xtask/summarize_bench/
mod.rs

1pub(crate) mod data;
2
3use std::{
4    collections::{BTreeMap, hash_map::Entry},
5    fs::File,
6    hash::BuildHasherDefault,
7    path::{Path, PathBuf},
8    time::{Duration, UNIX_EPOCH},
9};
10
11use anyhow::{Context, anyhow};
12use chrono::{DateTime, Utc};
13use indexmap::IndexSet;
14use rustc_hash::{FxHashMap, FxHasher};
15use walkdir::WalkDir;
16
17use self::data::Benchmark;
18use crate::summarize_bench::data::{BaseBenchmarks, CStats};
19
20type FxIndexSet<T> = IndexSet<T, BuildHasherDefault<FxHasher>>;
21
22#[derive(Debug)]
23struct BenchDataFile {
24    path: PathBuf,
25    sha: String,
26    timestamp: u64,
27    system: String,
28}
29
30impl<'a> TryFrom<&'a Path> for BenchDataFile {
31    type Error = &'static str;
32
33    fn try_from(path: &'a Path) -> Result<Self, Self::Error> {
34        let key_dir = path.parent().ok_or("invalid structure")?;
35        let sha_dir = key_dir.parent().ok_or("invalid structure")?;
36        let system_dir = sha_dir.parent().ok_or("invalid structure")?;
37        let (timestamp, sha) = sha_dir
38            .file_name()
39            .ok_or("invalid filename")?
40            .to_str()
41            .ok_or("invalid chars in file name")?
42            .split_once('-')
43            .ok_or("missing dash in timestamp-sha directory")?;
44        Ok(Self {
45            path: path.to_path_buf(),
46            sha: sha.to_string(),
47            timestamp: timestamp.parse().map_err(|_| "unable to parse timestamp")?,
48            system: system_dir
49                .file_name()
50                .ok_or("invalid filename")?
51                .to_str()
52                .ok_or("invalid chars in file name")?
53                .to_string(),
54        })
55    }
56}
57
58pub fn process_all(path: PathBuf) {
59    let mut map = FxHashMap::default();
60    for entry in WalkDir::new(&path)
61        .into_iter()
62        .filter_map(Result::ok)
63        .filter(|e| {
64            e.file_type().is_file()
65                && e.file_name().to_str().map(|n| n.ends_with("raw.json")) == Some(true)
66        })
67    {
68        let data_file_result: Result<BenchDataFile, _> = entry.path().try_into();
69        if let Ok(data_file) = data_file_result {
70            let inner_map: &mut FxHashMap<_, _> = map.entry(data_file.system.clone()).or_default();
71            let items: &mut Vec<BenchDataFile> =
72                inner_map.entry(data_file.sha.clone()).or_default();
73            items.push(data_file);
74        }
75    }
76    for (system, inner_map) in map {
77        let mut latest: FxHashMap<String, (u64, String, Benchmark)> = FxHashMap::default();
78        for (sha, data_files) in inner_map {
79            let min_ts = data_files.iter().map(|d| d.timestamp).min().unwrap();
80            let max_ts = data_files.iter().map(|d| d.timestamp).max().unwrap();
81            let mut items = data_files
82                .iter()
83                .map(|data_file| {
84                    let file = File::open(&data_file.path).unwrap();
85                    let reader = std::io::BufReader::new(file);
86                    let data: BaseBenchmarks = serde_json::from_reader(reader)
87                        .with_context(|| anyhow!("unable to read {}", data_file.path.display()))
88                        .unwrap();
89                    data
90                })
91                .collect::<Vec<_>>();
92            let mut by_name = FxHashMap::default();
93            for (i, data) in items.iter().enumerate() {
94                for (name, bench) in data.benchmarks.iter() {
95                    let list: &mut Vec<_> = by_name.entry(name).or_default();
96                    list.push((i, bench));
97                }
98            }
99            let mut normalization_state = items.iter().map(|_| (0.0, 0)).collect::<Vec<_>>();
100            for list in by_name.values().filter(|l| l.len() > 1) {
101                let avg = list
102                    .iter()
103                    .map(|(_, b)| b.estimates.mean.point_estimate)
104                    .sum::<f64>()
105                    / list.len() as f64;
106                for (i, b) in list {
107                    let correction = avg / b.estimates.mean.point_estimate;
108                    let (sum, count) = &mut normalization_state[*i];
109                    *sum += correction;
110                    *count += 1;
111                }
112            }
113            for (i, benches) in items.iter_mut().enumerate() {
114                let (sum, count) = normalization_state[i];
115                if count <= 1 {
116                    continue;
117                }
118                let correction = sum / count as f64;
119                for bench in benches.benchmarks.values_mut() {
120                    fn correct(s: &mut CStats, f: f64) {
121                        s.point_estimate *= f;
122                        s.standard_error *= f;
123                        s.confidence_interval.lower_bound *= f;
124                        s.confidence_interval.upper_bound *= f;
125                    }
126                    correct(&mut bench.estimates.mean, correction);
127                    correct(&mut bench.estimates.median, correction);
128                    correct(&mut bench.estimates.median_abs_dev, correction);
129                    if let Some(slope) = bench.estimates.slope.as_mut() {
130                        correct(slope, correction);
131                    }
132                    correct(&mut bench.estimates.std_dev, correction);
133                }
134                // let data_file = &data_files[i];
135                // benches.name = format!("{}-{sha}", data_file.timestamp);
136                // let normalized =
137                // data_file.path.parent().unwrap().join("normalized.json");
138                // let file = File::create(&normalized).unwrap();
139                // let writer = std::io::BufWriter::new(file);
140                // serde_json::to_writer_pretty(writer, benches).unwrap();
141            }
142            let baseline = &sha[..7];
143            let mut latest_for_sha = BaseBenchmarks {
144                name: baseline.to_string(),
145                benchmarks: BTreeMap::new(),
146            };
147            let all_benchmark_keys = items
148                .iter()
149                .flat_map(|b| b.benchmarks.keys())
150                .collect::<FxIndexSet<_>>();
151            for key in all_benchmark_keys {
152                let (ts, bench) = items
153                    .iter()
154                    .enumerate()
155                    .map(|(i, b)| (data_files[i].timestamp, b))
156                    .filter_map(|(ts, benches)| benches.benchmarks.get(key).map(|b| (ts, b)))
157                    .max_by_key(|(ts, _)| *ts)
158                    .unwrap();
159                let ts = UNIX_EPOCH + Duration::from_secs(ts);
160                let ts = DateTime::<Utc>::from(ts);
161                let ts = ts.format("%Y-%m-%d %H:%M");
162                let key_ts = format!("{key} ({ts})");
163                let key_ts_sha = format!("{key} ({ts}, {})", &sha[..7]);
164                match latest.entry(key.to_string()) {
165                    Entry::Occupied(mut e) => {
166                        if e.get().0 < min_ts {
167                            e.insert((min_ts, key_ts_sha, bench.clone()));
168                        }
169                    }
170                    Entry::Vacant(e) => {
171                        e.insert((min_ts, key_ts_sha, bench.clone()));
172                    }
173                }
174                latest_for_sha.benchmarks.insert(key_ts, bench.clone());
175            }
176            let latest_for_sha_path = data_files
177                .first()
178                .unwrap()
179                .path
180                .parent()
181                .unwrap()
182                .parent()
183                .unwrap()
184                .parent()
185                .unwrap()
186                .join(format!("{min_ts}-{max_ts}-{sha}.json"));
187            let file = File::create(&latest_for_sha_path).unwrap();
188            let writer = std::io::BufWriter::new(file);
189            serde_json::to_writer_pretty(writer, &latest_for_sha).unwrap();
190            println!("{}", latest_for_sha_path.display());
191        }
192
193        let latest_for_system = BaseBenchmarks {
194            name: "latest".to_string(),
195            benchmarks: latest
196                .into_values()
197                .map(|(_, key, bench)| (key, bench))
198                .collect(),
199        };
200        let latest_path = path.join(format!("{system}.json"));
201        let file = File::create(&latest_path).unwrap();
202        let writer = std::io::BufWriter::new(file);
203        serde_json::to_writer_pretty(writer, &latest_for_system).unwrap();
204        println!("{}", latest_path.display());
205    }
206}