Files
unifolm-world-model-action/scripts/evaluation/simulate_sparse_head_execution.py
2026-03-16 10:30:54 +08:00

381 lines
16 KiB
Python

import argparse
import json
from pathlib import Path
import numpy as np
import pandas as pd
DEFAULT_SCHEMES = {
"dense": list(range(50)),
"sparse_10": list(range(0, 50, 5)),
"sparse_5": list(range(0, 50, 10)),
"tail_heavy_10": [0, 5, 10, 20, 30, 38, 43, 46, 48, 49],
"tail_only_6": [40, 43, 46, 47, 48, 49],
"sparse_8": [0, 7, 14, 21, 28, 35, 42, 49],
"sparse_4": [0, 16, 32, 49],
"tail_only_4": [40, 43, 46, 49],
"tail_heavy_6": [0, 32, 40, 44, 47, 49],
"tail_heavy_8": [0, 10, 20, 30, 38, 43, 46, 49],
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Offline sparse head execution simulation from existing DDIM logs."
)
parser.add_argument(
"--root_dir",
type=str,
default=".",
help="Repository root or directory under which case outputs are stored.",
)
parser.add_argument(
"--output_dir",
type=str,
default=None,
help="Directory to write sparse-head simulation outputs. Defaults to <root_dir>/sparse_head_simulation.",
)
parser.add_argument(
"--target_threshold",
type=float,
default=0.95,
help="Target cosine threshold used by experiment 1.2.",
)
return parser.parse_args()
def discover_stepwise_logs(root_dir: Path) -> list[Path]:
return sorted(root_dir.glob("unitree_*/case*/output/inference/stepwise_log.csv"))
def load_stepwise_table(stepwise_paths: list[Path]) -> pd.DataFrame:
frames = []
for path in stepwise_paths:
frame = pd.read_csv(path)
frame["dataset"] = path.parts[-5]
frame["case"] = path.parts[-4]
frames.append(frame)
if not frames:
raise FileNotFoundError("No stepwise_log.csv files found.")
stepwise_df = pd.concat(frames, ignore_index=True)
for column in [
"step",
"step_time_s",
"latent_delta",
"action_delta",
"state_delta",
"action_cosine_vs_full50",
"state_cosine_vs_full50",
"latent_l2_vs_full50",
]:
stepwise_df[column] = pd.to_numeric(stepwise_df[column], errors="coerce")
return stepwise_df
def simulate_schemes(stepwise_df: pd.DataFrame,
schemes: dict[str, list[int]]) -> pd.DataFrame:
rows = []
group_columns = [
"dataset",
"case",
"sample_id",
"scene",
"pass_type",
"round_id",
]
grouped = stepwise_df.groupby(group_columns)
for keys, group in grouped:
group = group.sort_values("step").reset_index(drop=True)
action_curve = dict(zip(group["step"] - 1, group["action_cosine_vs_full50"]))
state_curve = dict(zip(group["step"] - 1, group["state_cosine_vs_full50"]))
for scheme_name, checkpoints in schemes.items():
normalized_checkpoints = sorted(
checkpoint for checkpoint in checkpoints if 0 <= checkpoint <= 49)
if not normalized_checkpoints:
continue
last_checkpoint = normalized_checkpoints[-1]
rows.append({
"dataset": keys[0],
"case": keys[1],
"sample_id": keys[2],
"scene": keys[3],
"pass_type": keys[4],
"round_id": keys[5],
"scheme": scheme_name,
"head_exec_steps_zero_based": json.dumps(normalized_checkpoints),
"head_exec_count": len(normalized_checkpoints),
"head_compute_saving_ratio": 1.0 - len(normalized_checkpoints) / 50.0,
"final_checkpoint_zero_based": last_checkpoint,
"final_checkpoint_one_based": last_checkpoint + 1,
"final_action_cosine_vs_dense": action_curve[last_checkpoint],
"final_state_cosine_vs_dense": state_curve[last_checkpoint],
})
return pd.DataFrame(rows)
def summarize_scheme_results(simulation_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
metric_columns = [
"final_action_cosine_vs_dense",
"final_state_cosine_vs_dense",
"head_exec_count",
"head_compute_saving_ratio",
]
overall = simulation_df.groupby(["scheme", "pass_type"])[metric_columns].agg(
["mean", "median", "std"]).reset_index()
per_case = simulation_df.groupby(
["dataset", "case", "scheme", "pass_type"])[metric_columns].mean().reset_index()
return overall, per_case
def compute_min_steps_needed(stepwise_df: pd.DataFrame,
threshold: float) -> pd.DataFrame:
rows = []
group_columns = [
"dataset",
"case",
"sample_id",
"scene",
"pass_type",
"round_id",
]
grouped = stepwise_df.groupby(group_columns)
for keys, group in grouped:
group = group.sort_values("step").reset_index(drop=True)
action_hits = group[group["action_cosine_vs_full50"] >= threshold]
state_hits = group[group["state_cosine_vs_full50"] >= threshold]
action_step = np.nan if action_hits.empty else float(action_hits.iloc[0]["step"] - 1)
state_step = np.nan if state_hits.empty else float(state_hits.iloc[0]["step"] - 1)
rows.append({
"dataset": keys[0],
"case": keys[1],
"sample_id": keys[2],
"scene": keys[3],
"pass_type": keys[4],
"round_id": keys[5],
"target_threshold": threshold,
"action_min_head_steps_needed": 1.0 if pd.notna(action_step) else np.nan,
"state_min_head_steps_needed": 1.0 if pd.notna(state_step) else np.nan,
"action_earliest_checkpoint_zero_based": action_step,
"state_earliest_checkpoint_zero_based": state_step,
"action_max_head_compute_saving_ratio": 0.98 if pd.notna(action_step) else np.nan,
"state_max_head_compute_saving_ratio": 0.98 if pd.notna(state_step) else np.nan,
})
return pd.DataFrame(rows)
def compare_tail_vs_uniform(simulation_df: pd.DataFrame) -> pd.DataFrame:
comparison_specs = [
("sparse_8", "tail_heavy_8", "budget_8_uniform_vs_tail"),
("sparse_4", "tail_only_4", "budget_4_uniform_vs_tail"),
("sparse_10", "tail_heavy_10", "budget_10_uniform_vs_tail"),
]
base_columns = [
"dataset",
"case",
"sample_id",
"scene",
"pass_type",
"round_id",
]
rows = []
for uniform_scheme, tail_scheme, comparison_name in comparison_specs:
uniform_df = simulation_df[simulation_df["scheme"] == uniform_scheme].copy()
tail_df = simulation_df[simulation_df["scheme"] == tail_scheme].copy()
merged = uniform_df.merge(
tail_df,
on=base_columns,
suffixes=("_uniform", "_tail"),
how="inner",
)
for _, row in merged.iterrows():
rows.append({
"comparison": comparison_name,
"dataset": row["dataset"],
"case": row["case"],
"sample_id": row["sample_id"],
"scene": row["scene"],
"pass_type": row["pass_type"],
"round_id": row["round_id"],
"uniform_scheme": uniform_scheme,
"tail_scheme": tail_scheme,
"uniform_head_exec_count": row["head_exec_count_uniform"],
"tail_head_exec_count": row["head_exec_count_tail"],
"uniform_action_cosine_vs_dense": row["final_action_cosine_vs_dense_uniform"],
"tail_action_cosine_vs_dense": row["final_action_cosine_vs_dense_tail"],
"uniform_state_cosine_vs_dense": row["final_state_cosine_vs_dense_uniform"],
"tail_state_cosine_vs_dense": row["final_state_cosine_vs_dense_tail"],
"tail_minus_uniform_action_cosine":
row["final_action_cosine_vs_dense_tail"] -
row["final_action_cosine_vs_dense_uniform"],
"tail_minus_uniform_state_cosine":
row["final_state_cosine_vs_dense_tail"] -
row["final_state_cosine_vs_dense_uniform"],
"tail_better_action": row["final_action_cosine_vs_dense_tail"] >
row["final_action_cosine_vs_dense_uniform"],
"tail_better_state": row["final_state_cosine_vs_dense_tail"] >
row["final_state_cosine_vs_dense_uniform"],
})
return pd.DataFrame(rows)
def build_summary_payload(simulation_df: pd.DataFrame,
min_steps_df: pd.DataFrame,
tail_compare_df: pd.DataFrame,
target_threshold: float) -> dict:
payload: dict[str, dict] = {
"config": {
"target_threshold": target_threshold,
"schemes": DEFAULT_SCHEMES,
},
"experiment_1_1": {},
"experiment_1_2": {},
"experiment_1_3": {},
}
for scheme, group in simulation_df.groupby("scheme"):
payload["experiment_1_1"][scheme] = {
"num_rows": int(len(group)),
"head_exec_count": float(group["head_exec_count"].iloc[0]),
"head_compute_saving_ratio": float(
group["head_compute_saving_ratio"].iloc[0]),
"final_action_cosine_vs_dense_mean": float(
group["final_action_cosine_vs_dense"].mean()),
"final_action_cosine_vs_dense_median": float(
group["final_action_cosine_vs_dense"].median()),
"final_state_cosine_vs_dense_mean": float(
group["final_state_cosine_vs_dense"].mean()),
"final_state_cosine_vs_dense_median": float(
group["final_state_cosine_vs_dense"].median()),
}
payload["experiment_1_2"] = {
"action_earliest_checkpoint_zero_based": {
"mean": float(
pd.to_numeric(
min_steps_df["action_earliest_checkpoint_zero_based"],
errors="coerce").dropna().mean()),
"median": float(
pd.to_numeric(
min_steps_df["action_earliest_checkpoint_zero_based"],
errors="coerce").dropna().median()),
},
"state_earliest_checkpoint_zero_based": {
"mean": float(
pd.to_numeric(
min_steps_df["state_earliest_checkpoint_zero_based"],
errors="coerce").dropna().mean()),
"median": float(
pd.to_numeric(
min_steps_df["state_earliest_checkpoint_zero_based"],
errors="coerce").dropna().median()),
},
"min_head_steps_needed_action_unique": sorted(
pd.to_numeric(min_steps_df["action_min_head_steps_needed"],
errors="coerce").dropna().unique().tolist()),
"min_head_steps_needed_state_unique": sorted(
pd.to_numeric(min_steps_df["state_min_head_steps_needed"],
errors="coerce").dropna().unique().tolist()),
}
for comparison, group in tail_compare_df.groupby("comparison"):
payload["experiment_1_3"][comparison] = {
"num_rows": int(len(group)),
"tail_better_action_share": float(group["tail_better_action"].mean()),
"tail_better_state_share": float(group["tail_better_state"].mean()),
"tail_minus_uniform_action_cosine_mean": float(
group["tail_minus_uniform_action_cosine"].mean()),
"tail_minus_uniform_state_cosine_mean": float(
group["tail_minus_uniform_state_cosine"].mean()),
}
return payload
def write_markdown_report(path: Path, payload: dict) -> None:
lines = [
"# Sparse Head Execution Simulation",
"",
"This report uses zero-order hold over logged stepwise action/state outputs.",
"For a sparse scheme, the final output at step 49 is approximated by the most recent checkpoint output.",
"",
"## Experiment 1.1",
"",
]
for scheme, stats in payload["experiment_1_1"].items():
lines.extend([
f"### {scheme}",
"",
f"- Head exec count: {stats['head_exec_count']:.0f}",
f"- Head compute saving ratio: {stats['head_compute_saving_ratio']:.4f}",
f"- Final action cosine vs dense: mean={stats['final_action_cosine_vs_dense_mean']:.4f}, median={stats['final_action_cosine_vs_dense_median']:.4f}",
f"- Final state cosine vs dense: mean={stats['final_state_cosine_vs_dense_mean']:.4f}, median={stats['final_state_cosine_vs_dense_median']:.4f}",
"",
])
lines.extend([
"## Experiment 1.2",
"",
f"- Target threshold: {payload['config']['target_threshold']}",
f"- Action earliest checkpoint mean: {payload['experiment_1_2']['action_earliest_checkpoint_zero_based']['mean']:.4f}",
f"- State earliest checkpoint mean: {payload['experiment_1_2']['state_earliest_checkpoint_zero_based']['mean']:.4f}",
f"- Unique min head steps needed for action: {payload['experiment_1_2']['min_head_steps_needed_action_unique']}",
f"- Unique min head steps needed for state: {payload['experiment_1_2']['min_head_steps_needed_state_unique']}",
"",
"## Experiment 1.3",
"",
])
for comparison, stats in payload["experiment_1_3"].items():
lines.extend([
f"### {comparison}",
"",
f"- Tail better action share: {stats['tail_better_action_share']:.4f}",
f"- Tail better state share: {stats['tail_better_state_share']:.4f}",
f"- Mean tail-minus-uniform action cosine: {stats['tail_minus_uniform_action_cosine_mean']:.4f}",
f"- Mean tail-minus-uniform state cosine: {stats['tail_minus_uniform_state_cosine_mean']:.4f}",
"",
])
with open(path, "w", encoding="utf-8") as file:
file.write("\n".join(lines) + "\n")
def main() -> None:
args = parse_args()
root_dir = Path(args.root_dir).resolve()
output_dir = Path(args.output_dir).resolve(
) if args.output_dir else root_dir / "sparse_head_simulation"
output_dir.mkdir(parents=True, exist_ok=True)
stepwise_paths = discover_stepwise_logs(root_dir)
stepwise_df = load_stepwise_table(stepwise_paths)
simulation_df = simulate_schemes(stepwise_df, DEFAULT_SCHEMES)
scheme_overall_df, scheme_per_case_df = summarize_scheme_results(simulation_df)
min_steps_df = compute_min_steps_needed(stepwise_df, args.target_threshold)
tail_compare_df = compare_tail_vs_uniform(simulation_df)
summary_payload = build_summary_payload(simulation_df, min_steps_df,
tail_compare_df,
args.target_threshold)
simulation_df.to_csv(output_dir / "scheme_simulation_per_round.csv",
index=False)
scheme_overall_df.to_csv(output_dir / "scheme_simulation_overall.csv",
index=False)
scheme_per_case_df.to_csv(output_dir / "scheme_simulation_per_case.csv",
index=False)
min_steps_df.to_csv(output_dir / "min_head_steps_needed.csv", index=False)
tail_compare_df.to_csv(output_dir / "tail_vs_uniform.csv", index=False)
with open(output_dir / "summary.json", "w", encoding="utf-8") as file:
json.dump(summary_payload, file, indent=2, ensure_ascii=False)
write_markdown_report(output_dir / "report.md", summary_payload)
print(f"Sparse head simulation written to: {output_dir}")
if __name__ == "__main__":
main()