xtask/summarize_bench/
mod.rs1pub(crate) mod data;
2
3use std::{
4 collections::{BTreeMap, hash_map::Entry},
5 fs::File,
6 hash::BuildHasherDefault,
7 path::{Path, PathBuf},
8 time::{Duration, UNIX_EPOCH},
9};
10
11use anyhow::{Context, anyhow};
12use chrono::{DateTime, Utc};
13use indexmap::IndexSet;
14use rustc_hash::{FxHashMap, FxHasher};
15use walkdir::WalkDir;
16
17use self::data::Benchmark;
18use crate::summarize_bench::data::{BaseBenchmarks, CStats};
19
20type FxIndexSet<T> = IndexSet<T, BuildHasherDefault<FxHasher>>;
21
22#[derive(Debug)]
23struct BenchDataFile {
24 path: PathBuf,
25 sha: String,
26 timestamp: u64,
27 system: String,
28}
29
30impl<'a> TryFrom<&'a Path> for BenchDataFile {
31 type Error = &'static str;
32
33 fn try_from(path: &'a Path) -> Result<Self, Self::Error> {
34 let key_dir = path.parent().ok_or("invalid structure")?;
35 let sha_dir = key_dir.parent().ok_or("invalid structure")?;
36 let system_dir = sha_dir.parent().ok_or("invalid structure")?;
37 let (timestamp, sha) = sha_dir
38 .file_name()
39 .ok_or("invalid filename")?
40 .to_str()
41 .ok_or("invalid chars in file name")?
42 .split_once('-')
43 .ok_or("missing dash in timestamp-sha directory")?;
44 Ok(Self {
45 path: path.to_path_buf(),
46 sha: sha.to_string(),
47 timestamp: timestamp.parse().map_err(|_| "unable to parse timestamp")?,
48 system: system_dir
49 .file_name()
50 .ok_or("invalid filename")?
51 .to_str()
52 .ok_or("invalid chars in file name")?
53 .to_string(),
54 })
55 }
56}
57
58pub fn process_all(path: PathBuf) {
59 let mut map = FxHashMap::default();
60 for entry in WalkDir::new(&path)
61 .into_iter()
62 .filter_map(Result::ok)
63 .filter(|e| {
64 e.file_type().is_file()
65 && e.file_name().to_str().map(|n| n.ends_with("raw.json")) == Some(true)
66 })
67 {
68 let data_file_result: Result<BenchDataFile, _> = entry.path().try_into();
69 if let Ok(data_file) = data_file_result {
70 let inner_map: &mut FxHashMap<_, _> = map.entry(data_file.system.clone()).or_default();
71 let items: &mut Vec<BenchDataFile> =
72 inner_map.entry(data_file.sha.clone()).or_default();
73 items.push(data_file);
74 }
75 }
76 for (system, inner_map) in map {
77 let mut latest: FxHashMap<String, (u64, String, Benchmark)> = FxHashMap::default();
78 for (sha, data_files) in inner_map {
79 let min_ts = data_files.iter().map(|d| d.timestamp).min().unwrap();
80 let max_ts = data_files.iter().map(|d| d.timestamp).max().unwrap();
81 let mut items = data_files
82 .iter()
83 .map(|data_file| {
84 let file = File::open(&data_file.path).unwrap();
85 let reader = std::io::BufReader::new(file);
86 let data: BaseBenchmarks = serde_json::from_reader(reader)
87 .with_context(|| anyhow!("unable to read {}", data_file.path.display()))
88 .unwrap();
89 data
90 })
91 .collect::<Vec<_>>();
92 let mut by_name = FxHashMap::default();
93 for (i, data) in items.iter().enumerate() {
94 for (name, bench) in data.benchmarks.iter() {
95 let list: &mut Vec<_> = by_name.entry(name).or_default();
96 list.push((i, bench));
97 }
98 }
99 let mut normalization_state = items.iter().map(|_| (0.0, 0)).collect::<Vec<_>>();
100 for list in by_name.values().filter(|l| l.len() > 1) {
101 let avg = list
102 .iter()
103 .map(|(_, b)| b.estimates.mean.point_estimate)
104 .sum::<f64>()
105 / list.len() as f64;
106 for (i, b) in list {
107 let correction = avg / b.estimates.mean.point_estimate;
108 let (sum, count) = &mut normalization_state[*i];
109 *sum += correction;
110 *count += 1;
111 }
112 }
113 for (i, benches) in items.iter_mut().enumerate() {
114 let (sum, count) = normalization_state[i];
115 if count <= 1 {
116 continue;
117 }
118 let correction = sum / count as f64;
119 for bench in benches.benchmarks.values_mut() {
120 fn correct(s: &mut CStats, f: f64) {
121 s.point_estimate *= f;
122 s.standard_error *= f;
123 s.confidence_interval.lower_bound *= f;
124 s.confidence_interval.upper_bound *= f;
125 }
126 correct(&mut bench.estimates.mean, correction);
127 correct(&mut bench.estimates.median, correction);
128 correct(&mut bench.estimates.median_abs_dev, correction);
129 if let Some(slope) = bench.estimates.slope.as_mut() {
130 correct(slope, correction);
131 }
132 correct(&mut bench.estimates.std_dev, correction);
133 }
134 }
142 let baseline = &sha[..7];
143 let mut latest_for_sha = BaseBenchmarks {
144 name: baseline.to_string(),
145 benchmarks: BTreeMap::new(),
146 };
147 let all_benchmark_keys = items
148 .iter()
149 .flat_map(|b| b.benchmarks.keys())
150 .collect::<FxIndexSet<_>>();
151 for key in all_benchmark_keys {
152 let (ts, bench) = items
153 .iter()
154 .enumerate()
155 .map(|(i, b)| (data_files[i].timestamp, b))
156 .filter_map(|(ts, benches)| benches.benchmarks.get(key).map(|b| (ts, b)))
157 .max_by_key(|(ts, _)| *ts)
158 .unwrap();
159 let ts = UNIX_EPOCH + Duration::from_secs(ts);
160 let ts = DateTime::<Utc>::from(ts);
161 let ts = ts.format("%Y-%m-%d %H:%M");
162 let key_ts = format!("{key} ({ts})");
163 let key_ts_sha = format!("{key} ({ts}, {})", &sha[..7]);
164 match latest.entry(key.to_string()) {
165 Entry::Occupied(mut e) => {
166 if e.get().0 < min_ts {
167 e.insert((min_ts, key_ts_sha, bench.clone()));
168 }
169 }
170 Entry::Vacant(e) => {
171 e.insert((min_ts, key_ts_sha, bench.clone()));
172 }
173 }
174 latest_for_sha.benchmarks.insert(key_ts, bench.clone());
175 }
176 let latest_for_sha_path = data_files
177 .first()
178 .unwrap()
179 .path
180 .parent()
181 .unwrap()
182 .parent()
183 .unwrap()
184 .parent()
185 .unwrap()
186 .join(format!("{min_ts}-{max_ts}-{sha}.json"));
187 let file = File::create(&latest_for_sha_path).unwrap();
188 let writer = std::io::BufWriter::new(file);
189 serde_json::to_writer_pretty(writer, &latest_for_sha).unwrap();
190 println!("{}", latest_for_sha_path.display());
191 }
192
193 let latest_for_system = BaseBenchmarks {
194 name: "latest".to_string(),
195 benchmarks: latest
196 .into_values()
197 .map(|(_, key, bench)| (key, bench))
198 .collect(),
199 };
200 let latest_path = path.join(format!("{system}.json"));
201 let file = File::create(&latest_path).unwrap();
202 let writer = std::io::BufWriter::new(file);
203 serde_json::to_writer_pretty(writer, &latest_for_system).unwrap();
204 println!("{}", latest_path.display());
205 }
206}