149 lines
5.9 KiB
Python
149 lines
5.9 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
MID_STEP_MIN = 15
|
|
MID_STEP_MAX = 35
|
|
TAIL_STEP_MIN = 40
|
|
TAIL_STEP_MAX = 49
|
|
|
|
|
|
def describe(series: pd.Series) -> dict[str, float]:
|
|
numeric = pd.to_numeric(series, errors='coerce').dropna()
|
|
if numeric.empty:
|
|
return {'count': 0.0, 'mean': np.nan, 'median': np.nan, 'p90': np.nan}
|
|
return {
|
|
'count': float(numeric.count()),
|
|
'mean': float(numeric.mean()),
|
|
'median': float(numeric.median()),
|
|
'p90': float(numeric.quantile(0.90)),
|
|
}
|
|
|
|
|
|
def make_parser() -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
description="Aggregate backbone block profiling results.")
|
|
parser.add_argument("--input_csv",
|
|
type=str,
|
|
required=True,
|
|
help="Path to backbone_block_log.csv")
|
|
parser.add_argument("--output_dir",
|
|
type=str,
|
|
default=None,
|
|
help="Directory to store summaries; defaults to input parent.")
|
|
return parser
|
|
|
|
|
|
def main() -> None:
|
|
args = make_parser().parse_args()
|
|
input_path = Path(args.input_csv)
|
|
if not input_path.exists():
|
|
raise FileNotFoundError(f"Missing file: {input_path}")
|
|
|
|
output_dir = Path(args.output_dir) if args.output_dir else input_path.parent / "analysis"
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
df = pd.read_csv(input_path)
|
|
numeric_cols = [
|
|
'forward_time_ms',
|
|
'l2_delta_vs_prev',
|
|
'rel_l2_delta_vs_prev',
|
|
'cosine_vs_prev',
|
|
'l2_delta_vs_full50',
|
|
'cosine_vs_full50',
|
|
]
|
|
for col in numeric_cols:
|
|
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
|
|
step_aggregate = df.groupby(
|
|
['block_stage', 'block_name', 'block_index', 'step'])[numeric_cols].mean().reset_index()
|
|
step_aggregate.to_csv(output_dir / 'block_step_aggregate.csv', index=False)
|
|
|
|
block_summary = df.groupby(['block_stage', 'block_name', 'block_index']).agg(
|
|
mean_forward_time_ms=('forward_time_ms', 'mean'),
|
|
mean_rel_l2_delta_vs_prev=('rel_l2_delta_vs_prev', 'mean'),
|
|
mean_cosine_vs_prev=('cosine_vs_prev', 'mean'),
|
|
mean_l2_delta_vs_full50=('l2_delta_vs_full50', 'mean'),
|
|
mean_cosine_vs_full50=('cosine_vs_full50', 'mean'),
|
|
).reset_index()
|
|
|
|
mid_df = df[(df['step'] >= MID_STEP_MIN) & (df['step'] <= MID_STEP_MAX)]
|
|
tail_df = df[(df['step'] >= TAIL_STEP_MIN) & (df['step'] <= TAIL_STEP_MAX)]
|
|
block_summary = block_summary.merge(
|
|
mid_df.groupby(['block_stage', 'block_name', 'block_index']).agg(
|
|
mid_mean_rel_l2_delta_vs_prev=('rel_l2_delta_vs_prev', 'mean'),
|
|
mid_mean_cosine_vs_prev=('cosine_vs_prev', 'mean'),
|
|
mid_mean_forward_time_ms=('forward_time_ms', 'mean'),
|
|
).reset_index(),
|
|
on=['block_stage', 'block_name', 'block_index'],
|
|
how='left',
|
|
)
|
|
block_summary = block_summary.merge(
|
|
tail_df.groupby(['block_stage', 'block_name', 'block_index']).agg(
|
|
tail_mean_rel_l2_delta_vs_prev=('rel_l2_delta_vs_prev', 'mean'),
|
|
tail_mean_cosine_vs_prev=('cosine_vs_prev', 'mean'),
|
|
tail_mean_forward_time_ms=('forward_time_ms', 'mean'),
|
|
).reset_index(),
|
|
on=['block_stage', 'block_name', 'block_index'],
|
|
how='left',
|
|
)
|
|
block_summary = block_summary.sort_values(
|
|
['tail_mean_rel_l2_delta_vs_prev', 'mean_forward_time_ms'],
|
|
ascending=[True, False])
|
|
block_summary.to_csv(output_dir / 'block_summary.csv', index=False)
|
|
|
|
stage_summary = []
|
|
for stage_name, stage_df in [('all', df), ('mid', mid_df), ('tail', tail_df)]:
|
|
grouped = stage_df.groupby('block_stage').agg(
|
|
mean_forward_time_ms=('forward_time_ms', 'mean'),
|
|
mean_rel_l2_delta_vs_prev=('rel_l2_delta_vs_prev', 'mean'),
|
|
mean_cosine_vs_prev=('cosine_vs_prev', 'mean'),
|
|
mean_l2_delta_vs_full50=('l2_delta_vs_full50', 'mean'),
|
|
mean_cosine_vs_full50=('cosine_vs_full50', 'mean'),
|
|
).reset_index()
|
|
grouped.insert(0, 'window', stage_name)
|
|
stage_summary.append(grouped)
|
|
stage_summary_df = pd.concat(stage_summary, ignore_index=True)
|
|
stage_summary_df.to_csv(output_dir / 'stage_summary.csv', index=False)
|
|
|
|
best_cache_candidates = block_summary.sort_values(
|
|
['tail_mean_rel_l2_delta_vs_prev', 'mean_forward_time_ms'],
|
|
ascending=[True, False]).head(10)
|
|
|
|
lines = [
|
|
"# Backbone Block Profiling Report",
|
|
"",
|
|
"## Dataset Overview",
|
|
"",
|
|
f"- Rows: {len(df)}",
|
|
f"- Blocks: {df['block_name'].nunique()}",
|
|
f"- Steps: {int(df['step'].min())}-{int(df['step'].max())}",
|
|
f"- Pass types: {', '.join(sorted(df['pass_type'].dropna().unique()))}",
|
|
"",
|
|
"## Block Timing",
|
|
"",
|
|
f"- `forward_time_ms`: mean={describe(df['forward_time_ms'])['mean']:.4f}, median={describe(df['forward_time_ms'])['median']:.4f}, p90={describe(df['forward_time_ms'])['p90']:.4f}",
|
|
"",
|
|
"## Stability",
|
|
"",
|
|
f"- `rel_l2_delta_vs_prev`: mean={describe(df['rel_l2_delta_vs_prev'])['mean']:.6f}, median={describe(df['rel_l2_delta_vs_prev'])['median']:.6f}, p90={describe(df['rel_l2_delta_vs_prev'])['p90']:.6f}",
|
|
f"- `cosine_vs_prev`: mean={describe(df['cosine_vs_prev'])['mean']:.6f}, median={describe(df['cosine_vs_prev'])['median']:.6f}, p90={describe(df['cosine_vs_prev'])['p90']:.6f}",
|
|
"",
|
|
"## Top Cache Candidates",
|
|
"",
|
|
]
|
|
for _, row in best_cache_candidates.iterrows():
|
|
lines.append(
|
|
f"- `{row['block_name']}` ({row['block_stage']}): tail_rel_l2={row['tail_mean_rel_l2_delta_vs_prev']:.6f}, mean_forward_time_ms={row['mean_forward_time_ms']:.4f}"
|
|
)
|
|
|
|
(output_dir / 'backbone_profile_report.md').write_text("\n".join(lines),
|
|
encoding='utf-8')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|