1use std::{
14 collections::{BTreeMap, HashSet},
15 fs::{self, File},
16 path::{Path, PathBuf},
17};
18
19use anyhow::{Context, Result, bail};
20use byteorder::{BE, ReadBytesExt};
21use lzzzz::lz4::decompress;
22use memmap2::Mmap;
23use turbo_persistence::{
24 BLOCK_HEADER_SIZE, checksum_block,
25 meta_file::MetaFile,
26 mmap_helper::advise_mmap_for_persistence,
27 static_sorted_file::{
28 BLOCK_TYPE_FIXED_KEY_NO_HASH, BLOCK_TYPE_FIXED_KEY_WITH_HASH, BLOCK_TYPE_KEY_NO_HASH,
29 BLOCK_TYPE_KEY_WITH_HASH, KEY_BLOCK_ENTRY_TYPE_BLOB, KEY_BLOCK_ENTRY_TYPE_DELETED,
30 KEY_BLOCK_ENTRY_TYPE_INLINE_MIN, KEY_BLOCK_ENTRY_TYPE_MEDIUM, KEY_BLOCK_ENTRY_TYPE_SMALL,
31 },
32};
33
34const KEY_BLOCK_HEADER_SIZE: usize = 4;
36
37#[derive(Default, Debug, Clone)]
39struct BlockSizeInfo {
40 stored_size: u64,
42 actual_size: u64,
44 compressed_count: u64,
46 uncompressed_count: u64,
48}
49
50impl BlockSizeInfo {
51 fn add(&mut self, stored: u64, actual: u64, was_compressed: bool) {
52 self.stored_size += stored;
53 self.actual_size += actual;
54 if was_compressed {
55 self.compressed_count += 1;
56 } else {
57 self.uncompressed_count += 1;
58 }
59 }
60
61 fn total_count(&self) -> u64 {
62 self.compressed_count + self.uncompressed_count
63 }
64
65 fn merge(&mut self, other: &BlockSizeInfo) {
66 self.stored_size += other.stored_size;
67 self.actual_size += other.actual_size;
68 self.compressed_count += other.compressed_count;
69 self.uncompressed_count += other.uncompressed_count;
70 }
71}
72
73#[derive(Default, Debug, Clone)]
75struct SstStats {
76 entry_type_counts: BTreeMap<u8, u64>,
78 total_entries: u64,
80
81 index_blocks: BlockSizeInfo,
83 key_blocks: BlockSizeInfo,
85 variable_key_blocks: BlockSizeInfo,
87 fixed_key_blocks: BlockSizeInfo,
89 value_blocks: BlockSizeInfo,
91
92 block_directory_size: u64,
94
95 inline_value_bytes: u64,
97 small_value_refs: u64, medium_value_refs: u64, blob_refs: u64, deleted_count: u64, file_size: u64,
104}
105
106impl SstStats {
107 fn merge(&mut self, other: &SstStats) {
108 for (ty, count) in &other.entry_type_counts {
109 *self.entry_type_counts.entry(*ty).or_insert(0) += count;
110 }
111 self.total_entries += other.total_entries;
112 self.index_blocks.merge(&other.index_blocks);
113 self.key_blocks.merge(&other.key_blocks);
114 self.variable_key_blocks.merge(&other.variable_key_blocks);
115 self.fixed_key_blocks.merge(&other.fixed_key_blocks);
116 self.value_blocks.merge(&other.value_blocks);
117 self.block_directory_size += other.block_directory_size;
118 self.inline_value_bytes += other.inline_value_bytes;
119 self.small_value_refs += other.small_value_refs;
120 self.medium_value_refs += other.medium_value_refs;
121 self.blob_refs += other.blob_refs;
122 self.deleted_count += other.deleted_count;
123 self.file_size += other.file_size;
124 }
125}
126
127struct SstInfo {
129 sequence_number: u32,
130 block_count: u16,
131}
132
133fn track_entry_type(stats: &mut SstStats, entry_type: u8) {
135 *stats.entry_type_counts.entry(entry_type).or_insert(0) += 1;
136 stats.total_entries += 1;
137
138 match entry_type {
139 KEY_BLOCK_ENTRY_TYPE_SMALL => {
140 stats.small_value_refs += 1;
141 }
142 KEY_BLOCK_ENTRY_TYPE_BLOB => {
143 stats.blob_refs += 1;
144 }
145 KEY_BLOCK_ENTRY_TYPE_DELETED => {
146 stats.deleted_count += 1;
147 }
148 KEY_BLOCK_ENTRY_TYPE_MEDIUM => {
149 stats.medium_value_refs += 1;
150 }
151 ty if ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN => {
152 let inline_size = (ty - KEY_BLOCK_ENTRY_TYPE_INLINE_MIN) as u64;
153 stats.inline_value_bytes += inline_size;
154 }
155 _ => {}
156 }
157}
158
159fn entry_type_description(ty: u8) -> String {
160 match ty {
161 KEY_BLOCK_ENTRY_TYPE_SMALL => "small value (in value block)".to_string(),
162 KEY_BLOCK_ENTRY_TYPE_BLOB => "blob reference".to_string(),
163 KEY_BLOCK_ENTRY_TYPE_DELETED => "deleted/tombstone".to_string(),
164 KEY_BLOCK_ENTRY_TYPE_MEDIUM => "medium value".to_string(),
165 ty if ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN => {
166 let inline_size = ty - KEY_BLOCK_ENTRY_TYPE_INLINE_MIN;
167 format!("inline {} bytes", inline_size)
168 }
169 _ => format!("unknown type {}", ty),
170 }
171}
172
173fn family_name(family: u32) -> &'static str {
174 match family {
175 0 => "Infra",
176 1 => "TaskMeta",
177 2 => "TaskData",
178 3 => "TaskCache",
179 _ => "Unknown",
180 }
181}
182
183fn format_number(n: u64) -> String {
185 let s = n.to_string();
186 let mut result = String::with_capacity(s.len() + s.len() / 3);
187 for (i, c) in s.chars().enumerate() {
188 if i > 0 && (s.len() - i).is_multiple_of(3) {
189 result.push(',');
190 }
191 result.push(c);
192 }
193 result
194}
195
196fn format_bytes(bytes: u64) -> String {
197 if bytes >= 1024 * 1024 * 1024 {
198 format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
199 } else if bytes >= 1024 * 1024 {
200 format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
201 } else if bytes >= 1024 {
202 format!("{:.2} KB", bytes as f64 / 1024.0)
203 } else {
204 format!("{} B", bytes)
205 }
206}
207
208fn collect_sst_info(db_path: &Path) -> Result<BTreeMap<u32, Vec<SstInfo>>> {
210 let mut meta_files: Vec<PathBuf> = fs::read_dir(db_path)?
211 .filter_map(|entry| entry.ok())
212 .map(|entry| entry.path())
213 .filter(|path| path.extension().is_some_and(|ext| ext == "meta"))
214 .collect();
215
216 meta_files.sort();
217
218 if meta_files.is_empty() {
219 bail!("No .meta files found in {}", db_path.display());
220 }
221
222 let mut family_sst_info: BTreeMap<u32, Vec<SstInfo>> = BTreeMap::new();
223
224 for meta_path in &meta_files {
225 let filename = meta_path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
227 let seq_num: u32 = filename.parse().unwrap_or(0);
228
229 let meta_file = MetaFile::open(db_path, seq_num)
230 .with_context(|| format!("Failed to open {}", meta_path.display()))?;
231
232 let family = meta_file.family();
233
234 for entry in meta_file.entries() {
235 family_sst_info.entry(family).or_default().push(SstInfo {
236 sequence_number: entry.sequence_number(),
237 block_count: entry.block_count(),
238 });
239 }
240 }
241
242 Ok(family_sst_info)
243}
244
245struct RawBlock {
247 data: Box<[u8]>,
248 compressed_size: u64,
249 actual_size: u64,
250 was_compressed: bool,
251}
252
253fn read_block(
255 mmap: &Mmap,
256 block_offsets_start: usize,
257 block_index: u16,
258 sequence_number: u32,
259) -> Result<RawBlock> {
260 let offset = block_offsets_start + block_index as usize * size_of::<u32>();
261
262 let block_start = if block_index == 0 {
263 0
264 } else {
265 (&mmap[offset - size_of::<u32>()..offset]).read_u32::<BE>()? as usize
266 };
267 let block_end = (&mmap[offset..offset + size_of::<u32>()]).read_u32::<BE>()? as usize;
268
269 let uncompressed_length =
270 (&mmap[block_start..block_start + size_of::<u32>()]).read_u32::<BE>()?;
271 let expected_checksum = (&mmap
272 [block_start + size_of::<u32>()..block_start + BLOCK_HEADER_SIZE])
273 .read_u32::<BE>()?;
274 let compressed_data = &mmap[block_start + BLOCK_HEADER_SIZE..block_end];
275 let compressed_size = compressed_data.len() as u64;
276
277 let was_compressed = uncompressed_length > 0;
278 let actual_size = if was_compressed {
279 uncompressed_length as u64
280 } else {
281 compressed_size
282 };
283
284 let actual_checksum = checksum_block(compressed_data);
285 if actual_checksum != expected_checksum {
286 bail!(
287 "Cache corruption detected: checksum mismatch in block {} of {:08}.sst (expected \
288 {:08x}, got {:08x})",
289 block_index,
290 sequence_number,
291 expected_checksum,
292 actual_checksum
293 );
294 }
295
296 let data = if was_compressed {
297 let mut buffer = vec![0u8; uncompressed_length as usize];
298 let bytes_written = decompress(compressed_data, &mut buffer)?;
299 assert_eq!(
300 bytes_written, uncompressed_length as usize,
301 "Decompressed length does not match expected"
302 );
303 buffer.into_boxed_slice()
304 } else {
305 Box::from(compressed_data)
306 };
307
308 Ok(RawBlock {
309 data,
310 compressed_size,
311 actual_size,
312 was_compressed,
313 })
314}
315
316fn parse_key_block_indices(index_block: &[u8]) -> HashSet<u16> {
320 assert!(index_block.len() >= 4, "Index block too small");
321 let mut data = &index_block[1..]; let first_block = data.read_u16::<BE>().unwrap();
323 let mut indices = HashSet::new();
324 indices.insert(first_block);
325 const ENTRY_SIZE: usize = size_of::<u64>() + size_of::<u16>();
326 let entry_count = data.len() / ENTRY_SIZE;
327 for i in 0..entry_count {
328 let block_index = (&data[i * ENTRY_SIZE + 8..]).read_u16::<BE>().unwrap();
329 indices.insert(block_index);
330 }
331 indices
332}
333
334enum KeyBlockHeader {
336 Variable { entry_count: u32 },
337 Fixed { entry_count: u32, value_type: u8 },
338}
339
340fn parse_key_block_header(block: &[u8]) -> Result<KeyBlockHeader> {
342 assert!(block.len() >= 4, "Key block too small");
343 let block_type = block[0];
344 let entry_count = ((block[1] as u32) << 16) | ((block[2] as u32) << 8) | (block[3] as u32);
345 match block_type {
346 BLOCK_TYPE_KEY_WITH_HASH | BLOCK_TYPE_KEY_NO_HASH => {
347 Ok(KeyBlockHeader::Variable { entry_count })
348 }
349 BLOCK_TYPE_FIXED_KEY_WITH_HASH | BLOCK_TYPE_FIXED_KEY_NO_HASH => {
350 assert!(block.len() >= 6, "Fixed key block header too small");
351 Ok(KeyBlockHeader::Fixed {
352 entry_count,
353 value_type: block[5],
354 })
355 }
356 _ => bail!("Invalid key block type: {block_type}"),
357 }
358}
359
360fn iter_key_block_entry_types(
365 header: KeyBlockHeader,
366 block: &[u8],
367) -> impl Iterator<Item = u8> + '_ {
368 let (entry_count, fixed_type) = match header {
369 KeyBlockHeader::Variable { entry_count } => (entry_count, None),
370 KeyBlockHeader::Fixed {
371 entry_count,
372 value_type,
373 } => (entry_count, Some(value_type)),
374 };
375 (0..entry_count).map(move |i| {
376 if let Some(vt) = fixed_type {
377 vt
378 } else {
379 let header_offset = KEY_BLOCK_HEADER_SIZE + i as usize * 4;
382 block[header_offset]
383 }
384 })
385}
386
387fn analyze_sst_file(db_path: &Path, info: &SstInfo) -> Result<SstStats> {
389 let filename = format!("{:08}.sst", info.sequence_number);
390 let path = db_path.join(&filename);
391
392 let file = File::open(&path).with_context(|| format!("Failed to open {}", filename))?;
393 let file_size = file.metadata()?.len();
394 let mmap = unsafe { Mmap::map(&file)? };
395 advise_mmap_for_persistence(&mmap)?;
396
397 let mut stats = SstStats {
398 block_directory_size: info.block_count as u64 * size_of::<u32>() as u64,
399 file_size,
400 ..Default::default()
401 };
402
403 let block_offsets_start = mmap.len() - (info.block_count as usize * size_of::<u32>());
404
405 let index_block_index = info.block_count - 1;
409 let index_raw = read_block(
410 &mmap,
411 block_offsets_start,
412 index_block_index,
413 info.sequence_number,
414 )?;
415 let key_block_indices = parse_key_block_indices(&index_raw.data);
416
417 stats.index_blocks.add(
418 index_raw.compressed_size,
419 index_raw.actual_size,
420 index_raw.was_compressed,
421 );
422
423 for block_index in 0..index_block_index {
425 let raw = match read_block(
426 &mmap,
427 block_offsets_start,
428 block_index,
429 info.sequence_number,
430 ) {
431 Ok(raw) => raw,
432 Err(e) => {
433 eprintln!(
434 "Warning: Failed to read block {} in {:08}.sst: {}",
435 block_index, info.sequence_number, e
436 );
437 continue;
438 }
439 };
440
441 if !key_block_indices.contains(&block_index) {
442 stats
444 .value_blocks
445 .add(raw.compressed_size, raw.actual_size, raw.was_compressed);
446 continue;
447 }
448
449 let block: &[u8] = &raw.data;
450
451 stats
452 .key_blocks
453 .add(raw.compressed_size, raw.actual_size, raw.was_compressed);
454
455 let key_block_header = parse_key_block_header(block).with_context(|| {
456 format!(
457 "Warning: key block {} in {:08}.sst has unexpected block type {}",
458 block_index, info.sequence_number, block[0]
459 )
460 })?;
461 match key_block_header {
462 KeyBlockHeader::Variable { .. } => {
463 stats.variable_key_blocks.add(
464 raw.compressed_size,
465 raw.actual_size,
466 raw.was_compressed,
467 );
468 }
469 KeyBlockHeader::Fixed { .. } => {
470 stats.fixed_key_blocks.add(
471 raw.compressed_size,
472 raw.actual_size,
473 raw.was_compressed,
474 );
475 }
476 };
477
478 for entry_type in iter_key_block_entry_types(key_block_header, block) {
479 track_entry_type(&mut stats, entry_type);
480 }
481 }
482
483 Ok(stats)
484}
485
486fn print_block_stats(name: &str, info: &BlockSizeInfo) {
487 let total = info.total_count();
488 if total == 0 {
489 println!(" {}: none", name);
490 return;
491 }
492
493 let all_uncompressed = info.compressed_count == 0;
495 let all_compressed = info.uncompressed_count == 0;
496
497 if all_uncompressed {
498 println!(
500 " {}: {} blocks (uncompressed), {}",
501 name,
502 format_number(total),
503 format_bytes(info.actual_size),
504 );
505 } else if all_compressed {
506 let savings_pct = if info.actual_size > 0 {
508 ((info.actual_size as f64 - info.stored_size as f64) / info.actual_size as f64) * 100.0
509 } else {
510 0.0
511 };
512 let savings_str = if savings_pct < 0.0 {
513 format!("{:.0}% overhead", -savings_pct)
514 } else {
515 format!("{:.0}% savings", savings_pct)
516 };
517 println!(
518 " {}: {} blocks, stored: {}, actual: {} ({})",
519 name,
520 format_number(total),
521 format_bytes(info.stored_size),
522 format_bytes(info.actual_size),
523 savings_str,
524 );
525 } else {
526 let savings_pct = if info.actual_size > 0 {
528 ((info.actual_size as f64 - info.stored_size as f64) / info.actual_size as f64) * 100.0
529 } else {
530 0.0
531 };
532 let savings_str = if savings_pct < 0.0 {
533 format!("{:.0}% overhead", -savings_pct)
534 } else {
535 format!("{:.0}% savings", savings_pct)
536 };
537 println!(
538 " {}: {} blocks ({} compressed, {} uncompressed)",
539 name,
540 format_number(total),
541 format_number(info.compressed_count),
542 format_number(info.uncompressed_count),
543 );
544 println!(
545 " stored: {}, actual: {} ({})",
546 format_bytes(info.stored_size),
547 format_bytes(info.actual_size),
548 savings_str,
549 );
550 }
551}
552
553fn print_entry_histogram(stats: &SstStats, prefix: &str) {
554 if stats.entry_type_counts.is_empty() {
555 return;
556 }
557 println!("{}Entry Type Histogram:", prefix);
558 for (ty, count) in &stats.entry_type_counts {
559 let pct = (*count as f64 / stats.total_entries as f64) * 100.0;
560 let bar_len = (pct / 2.0) as usize;
562 let bar: String = "█".repeat(bar_len.min(40));
563 println!(
564 "{} type {:3}: {:>12} ({:5.1}%) │{}│ {}",
565 prefix,
566 ty,
567 format_number(*count),
568 pct,
569 bar,
570 entry_type_description(*ty),
571 );
572 }
573}
574
575fn print_value_storage(stats: &SstStats, prefix: &str) {
576 println!("{}Value Storage:", prefix);
577 if stats.inline_value_bytes > 0 {
578 let inline_count: u64 = stats
579 .entry_type_counts
580 .iter()
581 .filter(|(ty, _)| **ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN)
582 .map(|(_, count)| count)
583 .sum();
584 println!(
585 "{} Inline: {} entries, {} total",
586 prefix,
587 format_number(inline_count),
588 format_bytes(stats.inline_value_bytes)
589 );
590 }
591 if stats.small_value_refs > 0 {
592 println!(
593 "{} Small (value block refs): {} entries",
594 prefix,
595 format_number(stats.small_value_refs)
596 );
597 }
598 if stats.medium_value_refs > 0 {
599 println!(
600 "{} Medium (dedicated blocks): {} entries",
601 prefix,
602 format_number(stats.medium_value_refs)
603 );
604 }
605 if stats.blob_refs > 0 {
606 println!(
607 "{} Blob (external files): {} entries",
608 prefix,
609 format_number(stats.blob_refs)
610 );
611 }
612 if stats.deleted_count > 0 {
613 println!(
614 "{} Deleted: {} entries",
615 prefix,
616 format_number(stats.deleted_count)
617 );
618 }
619}
620
621fn print_sst_details(seq_num: u32, stats: &SstStats) {
622 println!(
623 "\n ┌─ SST {:08}.sst ─────────────────────────────────────────────────────",
624 seq_num
625 );
626 println!(
627 " │ Entries: {}, File size: {}",
628 format_number(stats.total_entries),
629 format_bytes(stats.file_size)
630 );
631
632 let overhead = stats.block_directory_size;
634 let overhead_pct = if stats.file_size > 0 {
635 (overhead as f64 / stats.file_size as f64) * 100.0
636 } else {
637 0.0
638 };
639 println!(" │");
640 println!(
641 " │ Per-file Overhead: {} ({:.1}% of file)",
642 format_bytes(overhead),
643 overhead_pct
644 );
645 println!(
646 " │ Block directory: {}",
647 format_bytes(stats.block_directory_size)
648 );
649
650 println!(" │");
652 println!(" │ Block Statistics:");
653 print!(" │ ");
654 print_block_stats("Index blocks", &stats.index_blocks);
655 print!(" │ ");
656 print_block_stats("Key blocks", &stats.key_blocks);
657 if stats.variable_key_blocks.total_count() > 0 && stats.fixed_key_blocks.total_count() > 0 {
658 print!(" │ ");
659 print_block_stats("Variable", &stats.variable_key_blocks);
660 print!(" │ ");
661 print_block_stats("Fixed", &stats.fixed_key_blocks);
662 } else if stats.fixed_key_blocks.total_count() > 0 {
663 println!(" │ (all fixed-size)");
664 }
665 print!(" │ ");
666 print_block_stats("Value blocks", &stats.value_blocks);
667
668 if !stats.entry_type_counts.is_empty() {
670 println!(" │");
671 print_entry_histogram(stats, " │ ");
672 }
673
674 println!(" │");
676 print_value_storage(stats, " │ ");
677
678 println!(" └───────────────────────────────────────────────────────────────────────────");
679}
680
681fn print_family_summary(family: u32, sst_count: usize, stats: &SstStats) {
682 println!("═══════════════════════════════════════════════════════════════════════════════");
683 println!("Family {} ({}):", family, family_name(family));
684 println!("═══════════════════════════════════════════════════════════════════════════════");
685
686 println!(
687 " SST files: {}, Total entries: {}",
688 format_number(sst_count as u64),
689 format_number(stats.total_entries)
690 );
691 println!(" Total file size: {}", format_bytes(stats.file_size));
692
693 if sst_count > 0 {
695 let avg_file_size = stats.file_size / sst_count as u64;
696 let avg_keys_per_file = stats.total_entries / sst_count as u64;
697 let total_key_blocks = stats.key_blocks.total_count();
698 let avg_keys_per_block = if total_key_blocks > 0 {
699 stats.total_entries as f64 / total_key_blocks as f64
700 } else {
701 0.0
702 };
703
704 println!();
705 println!(" Averages:");
706 println!(" File size: {}", format_bytes(avg_file_size));
707 println!(" Keys per file: {}", format_number(avg_keys_per_file));
708 println!(" Keys per key block: {:.1}", avg_keys_per_block);
709 }
710
711 let total_overhead = stats.block_directory_size;
713 let overhead_pct = if stats.file_size > 0 {
714 (total_overhead as f64 / stats.file_size as f64) * 100.0
715 } else {
716 0.0
717 };
718 println!();
719 println!(
720 " Per-file Overhead (total): {} ({:.1}% of total file size)",
721 format_bytes(total_overhead),
722 overhead_pct
723 );
724 println!(
725 " Block directories: {}",
726 format_bytes(stats.block_directory_size)
727 );
728 if sst_count > 0 {
729 println!(
730 " Average per file: {}",
731 format_bytes(stats.block_directory_size / sst_count as u64)
732 );
733 }
734
735 println!();
736 println!(" Block Statistics:");
737 print!(" ");
738 print_block_stats("Index blocks", &stats.index_blocks);
739 print!(" ");
740 print_block_stats("Key blocks", &stats.key_blocks);
741 if stats.variable_key_blocks.total_count() > 0 && stats.fixed_key_blocks.total_count() > 0 {
742 print!(" ");
744 print_block_stats("Variable", &stats.variable_key_blocks);
745 print!(" ");
746 print_block_stats("Fixed", &stats.fixed_key_blocks);
747 } else if stats.fixed_key_blocks.total_count() > 0 {
748 println!(" (all fixed-size)");
749 }
750 print!(" ");
751 print_block_stats("Value blocks", &stats.value_blocks);
752
753 println!();
754 print_entry_histogram(stats, " ");
755
756 println!();
757 print_value_storage(stats, " ");
758
759 println!();
760}
761
762fn main() -> Result<()> {
763 let args: Vec<String> = std::env::args().collect();
764
765 let mut db_path: Option<PathBuf> = None;
767 let mut verbose = false;
768
769 let mut i = 1;
770 while i < args.len() {
771 match args[i].as_str() {
772 "--verbose" | "-v" => verbose = true,
773 arg if !arg.starts_with('-') => {
774 if db_path.is_none() {
775 db_path = Some(PathBuf::from(arg));
776 }
777 }
778 _ => {
779 eprintln!("Unknown option: {}", args[i]);
780 std::process::exit(1);
781 }
782 }
783 i += 1;
784 }
785
786 let db_path = match db_path {
787 Some(p) => p,
788 None => {
789 eprintln!("Usage: {} [OPTIONS] <db_directory>", args[0]);
790 eprintln!();
791 eprintln!("Inspects turbo-persistence SST files to report entry type statistics.");
792 eprintln!();
793 eprintln!("Options:");
794 eprintln!(" -v, --verbose Show per-SST file details (default: family totals only)");
795 eprintln!();
796 eprintln!("Entry types:");
797 eprintln!(" 0: Small value (stored in separate value block)");
798 eprintln!(" 1: Blob reference");
799 eprintln!(" 2: Deleted/tombstone");
800 eprintln!(" 3: Medium value");
801 eprintln!(" 8+: Inline value (size = type - 8)");
802 eprintln!();
803 eprintln!("For TaskCache (family 3), values are 4-byte TaskIds.");
804 eprintln!("Expected entry type is 12 (8 + 4) for inline optimization.");
805 std::process::exit(1);
806 }
807 };
808
809 if !db_path.is_dir() {
810 bail!("Not a directory: {}", db_path.display());
811 }
812
813 let family_sst_info = collect_sst_info(&db_path)?;
815
816 let total_sst_count: usize = family_sst_info.values().map(|v| v.len()).sum();
817 println!(
818 "Analyzing {} SST files in {}\n",
819 format_number(total_sst_count as u64),
820 db_path.display()
821 );
822
823 for (family, sst_list) in &family_sst_info {
825 let mut family_stats = SstStats::default();
826 let mut sst_stats_list: Vec<(u32, SstStats)> = Vec::new();
827
828 for info in sst_list {
829 match analyze_sst_file(&db_path, info) {
830 Ok(stats) => {
831 family_stats.merge(&stats);
832 if verbose {
833 sst_stats_list.push((info.sequence_number, stats));
834 }
835 }
836 Err(e) => {
837 eprintln!(
838 "Warning: Failed to analyze {:08}.sst: {}",
839 info.sequence_number, e
840 );
841 }
842 }
843 }
844
845 print_family_summary(*family, sst_list.len(), &family_stats);
847
848 if verbose && !sst_stats_list.is_empty() {
850 println!(" Per-SST Details:");
851 for (seq_num, stats) in &sst_stats_list {
852 print_sst_details(*seq_num, stats);
853 }
854 println!();
855 }
856 }
857
858 Ok(())
859}