Skip to main content

sst_inspect/
sst_inspect.rs

1//! SST file inspector binary for turbo-persistence databases.
2//!
3//! This tool inspects SST files to report entry type statistics per family,
4//! useful for verifying that inline value optimization is being used.
5//!
6//! Entry types:
7//! - 0: Small value (stored in value block)
8//! - 1: Blob reference
9//! - 2: Deleted/tombstone
10//! - 3: Medium value
11//! - 8-255: Inline value where (type - 8) = value byte count
12
13use std::{
14    collections::{BTreeMap, HashSet},
15    fs::{self, File},
16    path::{Path, PathBuf},
17};
18
19use anyhow::{Context, Result, bail};
20use byteorder::{BE, ReadBytesExt};
21use lzzzz::lz4::decompress;
22use memmap2::Mmap;
23use turbo_persistence::{
24    BLOCK_HEADER_SIZE, checksum_block,
25    meta_file::MetaFile,
26    mmap_helper::advise_mmap_for_persistence,
27    static_sorted_file::{
28        BLOCK_TYPE_FIXED_KEY_NO_HASH, BLOCK_TYPE_FIXED_KEY_WITH_HASH, BLOCK_TYPE_KEY_NO_HASH,
29        BLOCK_TYPE_KEY_WITH_HASH, KEY_BLOCK_ENTRY_TYPE_BLOB, KEY_BLOCK_ENTRY_TYPE_DELETED,
30        KEY_BLOCK_ENTRY_TYPE_INLINE_MIN, KEY_BLOCK_ENTRY_TYPE_MEDIUM, KEY_BLOCK_ENTRY_TYPE_SMALL,
31    },
32};
33
34/// Size of the key block header (1B type + 3B entry count).
35const KEY_BLOCK_HEADER_SIZE: usize = 4;
36
37/// Block size information
38#[derive(Default, Debug, Clone)]
39struct BlockSizeInfo {
40    /// Size as stored on disk (after compression, if any)
41    stored_size: u64,
42    /// Actual size (after decompression)
43    actual_size: u64,
44    /// Number of blocks that were compressed
45    compressed_count: u64,
46    /// Number of blocks stored uncompressed
47    uncompressed_count: u64,
48}
49
50impl BlockSizeInfo {
51    fn add(&mut self, stored: u64, actual: u64, was_compressed: bool) {
52        self.stored_size += stored;
53        self.actual_size += actual;
54        if was_compressed {
55            self.compressed_count += 1;
56        } else {
57            self.uncompressed_count += 1;
58        }
59    }
60
61    fn total_count(&self) -> u64 {
62        self.compressed_count + self.uncompressed_count
63    }
64
65    fn merge(&mut self, other: &BlockSizeInfo) {
66        self.stored_size += other.stored_size;
67        self.actual_size += other.actual_size;
68        self.compressed_count += other.compressed_count;
69        self.uncompressed_count += other.uncompressed_count;
70    }
71}
72
73/// Statistics for a single SST file
74#[derive(Default, Debug, Clone)]
75struct SstStats {
76    /// Count of entries by type
77    entry_type_counts: BTreeMap<u8, u64>,
78    /// Total entries
79    total_entries: u64,
80
81    /// Index block sizes
82    index_blocks: BlockSizeInfo,
83    /// Key block sizes (all types combined)
84    key_blocks: BlockSizeInfo,
85    /// Variable-size key blocks (types 1/2)
86    variable_key_blocks: BlockSizeInfo,
87    /// Fixed-size key blocks (types 3/4)
88    fixed_key_blocks: BlockSizeInfo,
89    /// Value block sizes (small values)
90    value_blocks: BlockSizeInfo,
91
92    /// Block directory size (block_count * 4 bytes at end of file)
93    block_directory_size: u64,
94
95    /// Value sizes by type (inline values track actual bytes)
96    inline_value_bytes: u64,
97    small_value_refs: u64,  // Count of references to value blocks
98    medium_value_refs: u64, // Count of references to medium values
99    blob_refs: u64,         // Count of blob references
100    deleted_count: u64,     // Count of deleted entries
101
102    /// File size in bytes
103    file_size: u64,
104}
105
106impl SstStats {
107    fn merge(&mut self, other: &SstStats) {
108        for (ty, count) in &other.entry_type_counts {
109            *self.entry_type_counts.entry(*ty).or_insert(0) += count;
110        }
111        self.total_entries += other.total_entries;
112        self.index_blocks.merge(&other.index_blocks);
113        self.key_blocks.merge(&other.key_blocks);
114        self.variable_key_blocks.merge(&other.variable_key_blocks);
115        self.fixed_key_blocks.merge(&other.fixed_key_blocks);
116        self.value_blocks.merge(&other.value_blocks);
117        self.block_directory_size += other.block_directory_size;
118        self.inline_value_bytes += other.inline_value_bytes;
119        self.small_value_refs += other.small_value_refs;
120        self.medium_value_refs += other.medium_value_refs;
121        self.blob_refs += other.blob_refs;
122        self.deleted_count += other.deleted_count;
123        self.file_size += other.file_size;
124    }
125}
126
127/// Information about an SST file from the meta file
128struct SstInfo {
129    sequence_number: u32,
130    block_count: u16,
131}
132
133/// Accumulates statistics for a single entry of the given type.
134fn track_entry_type(stats: &mut SstStats, entry_type: u8) {
135    *stats.entry_type_counts.entry(entry_type).or_insert(0) += 1;
136    stats.total_entries += 1;
137
138    match entry_type {
139        KEY_BLOCK_ENTRY_TYPE_SMALL => {
140            stats.small_value_refs += 1;
141        }
142        KEY_BLOCK_ENTRY_TYPE_BLOB => {
143            stats.blob_refs += 1;
144        }
145        KEY_BLOCK_ENTRY_TYPE_DELETED => {
146            stats.deleted_count += 1;
147        }
148        KEY_BLOCK_ENTRY_TYPE_MEDIUM => {
149            stats.medium_value_refs += 1;
150        }
151        ty if ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN => {
152            let inline_size = (ty - KEY_BLOCK_ENTRY_TYPE_INLINE_MIN) as u64;
153            stats.inline_value_bytes += inline_size;
154        }
155        _ => {}
156    }
157}
158
159fn entry_type_description(ty: u8) -> String {
160    match ty {
161        KEY_BLOCK_ENTRY_TYPE_SMALL => "small value (in value block)".to_string(),
162        KEY_BLOCK_ENTRY_TYPE_BLOB => "blob reference".to_string(),
163        KEY_BLOCK_ENTRY_TYPE_DELETED => "deleted/tombstone".to_string(),
164        KEY_BLOCK_ENTRY_TYPE_MEDIUM => "medium value".to_string(),
165        ty if ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN => {
166            let inline_size = ty - KEY_BLOCK_ENTRY_TYPE_INLINE_MIN;
167            format!("inline {} bytes", inline_size)
168        }
169        _ => format!("unknown type {}", ty),
170    }
171}
172
173fn family_name(family: u32) -> &'static str {
174    match family {
175        0 => "Infra",
176        1 => "TaskMeta",
177        2 => "TaskData",
178        3 => "TaskCache",
179        _ => "Unknown",
180    }
181}
182
183/// Format a number with comma separators for readability
184fn format_number(n: u64) -> String {
185    let s = n.to_string();
186    let mut result = String::with_capacity(s.len() + s.len() / 3);
187    for (i, c) in s.chars().enumerate() {
188        if i > 0 && (s.len() - i).is_multiple_of(3) {
189            result.push(',');
190        }
191        result.push(c);
192    }
193    result
194}
195
196fn format_bytes(bytes: u64) -> String {
197    if bytes >= 1024 * 1024 * 1024 {
198        format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
199    } else if bytes >= 1024 * 1024 {
200        format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
201    } else if bytes >= 1024 {
202        format!("{:.2} KB", bytes as f64 / 1024.0)
203    } else {
204        format!("{} B", bytes)
205    }
206}
207
208/// Collect SST info from all meta files in the database directory
209fn collect_sst_info(db_path: &Path) -> Result<BTreeMap<u32, Vec<SstInfo>>> {
210    let mut meta_files: Vec<PathBuf> = fs::read_dir(db_path)?
211        .filter_map(|entry| entry.ok())
212        .map(|entry| entry.path())
213        .filter(|path| path.extension().is_some_and(|ext| ext == "meta"))
214        .collect();
215
216    meta_files.sort();
217
218    if meta_files.is_empty() {
219        bail!("No .meta files found in {}", db_path.display());
220    }
221
222    let mut family_sst_info: BTreeMap<u32, Vec<SstInfo>> = BTreeMap::new();
223
224    for meta_path in &meta_files {
225        // Extract sequence number from filename
226        let filename = meta_path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
227        let seq_num: u32 = filename.parse().unwrap_or(0);
228
229        let meta_file = MetaFile::open(db_path, seq_num)
230            .with_context(|| format!("Failed to open {}", meta_path.display()))?;
231
232        let family = meta_file.family();
233
234        for entry in meta_file.entries() {
235            family_sst_info.entry(family).or_default().push(SstInfo {
236                sequence_number: entry.sequence_number(),
237                block_count: entry.block_count(),
238            });
239        }
240    }
241
242    Ok(family_sst_info)
243}
244
245/// Information about a raw block read from disk.
246struct RawBlock {
247    data: Box<[u8]>,
248    compressed_size: u64,
249    actual_size: u64,
250    was_compressed: bool,
251}
252
253/// Reads, checksums, and decompresses a single block from the mmap.
254fn read_block(
255    mmap: &Mmap,
256    block_offsets_start: usize,
257    block_index: u16,
258    sequence_number: u32,
259) -> Result<RawBlock> {
260    let offset = block_offsets_start + block_index as usize * size_of::<u32>();
261
262    let block_start = if block_index == 0 {
263        0
264    } else {
265        (&mmap[offset - size_of::<u32>()..offset]).read_u32::<BE>()? as usize
266    };
267    let block_end = (&mmap[offset..offset + size_of::<u32>()]).read_u32::<BE>()? as usize;
268
269    let uncompressed_length =
270        (&mmap[block_start..block_start + size_of::<u32>()]).read_u32::<BE>()?;
271    let expected_checksum = (&mmap
272        [block_start + size_of::<u32>()..block_start + BLOCK_HEADER_SIZE])
273        .read_u32::<BE>()?;
274    let compressed_data = &mmap[block_start + BLOCK_HEADER_SIZE..block_end];
275    let compressed_size = compressed_data.len() as u64;
276
277    let was_compressed = uncompressed_length > 0;
278    let actual_size = if was_compressed {
279        uncompressed_length as u64
280    } else {
281        compressed_size
282    };
283
284    let actual_checksum = checksum_block(compressed_data);
285    if actual_checksum != expected_checksum {
286        bail!(
287            "Cache corruption detected: checksum mismatch in block {} of {:08}.sst (expected \
288             {:08x}, got {:08x})",
289            block_index,
290            sequence_number,
291            expected_checksum,
292            actual_checksum
293        );
294    }
295
296    let data = if was_compressed {
297        let mut buffer = vec![0u8; uncompressed_length as usize];
298        let bytes_written = decompress(compressed_data, &mut buffer)?;
299        assert_eq!(
300            bytes_written, uncompressed_length as usize,
301            "Decompressed length does not match expected"
302        );
303        buffer.into_boxed_slice()
304    } else {
305        Box::from(compressed_data)
306    };
307
308    Ok(RawBlock {
309        data,
310        compressed_size,
311        actual_size,
312        was_compressed,
313    })
314}
315
316/// Parses an index block to extract all referenced key block indices.
317///
318/// Index block format: `[1B type][2B first_block][N * (8B hash + 2B block_index)]`.
319fn parse_key_block_indices(index_block: &[u8]) -> HashSet<u16> {
320    assert!(index_block.len() >= 4, "Index block too small");
321    let mut data = &index_block[1..]; // skip block type byte
322    let first_block = data.read_u16::<BE>().unwrap();
323    let mut indices = HashSet::new();
324    indices.insert(first_block);
325    const ENTRY_SIZE: usize = size_of::<u64>() + size_of::<u16>();
326    let entry_count = data.len() / ENTRY_SIZE;
327    for i in 0..entry_count {
328        let block_index = (&data[i * ENTRY_SIZE + 8..]).read_u16::<BE>().unwrap();
329        indices.insert(block_index);
330    }
331    indices
332}
333
334/// Parsed header of a key block.
335enum KeyBlockHeader {
336    Variable { entry_count: u32 },
337    Fixed { entry_count: u32, value_type: u8 },
338}
339
340/// Parses the header of a key block from the full decompressed block data.
341fn parse_key_block_header(block: &[u8]) -> Result<KeyBlockHeader> {
342    assert!(block.len() >= 4, "Key block too small");
343    let block_type = block[0];
344    let entry_count = ((block[1] as u32) << 16) | ((block[2] as u32) << 8) | (block[3] as u32);
345    match block_type {
346        BLOCK_TYPE_KEY_WITH_HASH | BLOCK_TYPE_KEY_NO_HASH => {
347            Ok(KeyBlockHeader::Variable { entry_count })
348        }
349        BLOCK_TYPE_FIXED_KEY_WITH_HASH | BLOCK_TYPE_FIXED_KEY_NO_HASH => {
350            assert!(block.len() >= 6, "Fixed key block header too small");
351            Ok(KeyBlockHeader::Fixed {
352                entry_count,
353                value_type: block[5],
354            })
355        }
356        _ => bail!("Invalid key block type: {block_type}"),
357    }
358}
359
360/// Iterates over entry type bytes in a key block.
361///
362/// For variable-size key blocks, reads byte 0 of each 4-byte offset table entry.
363/// For fixed-size key blocks, yields the single `value_type` repeated `entry_count` times.
364fn iter_key_block_entry_types(
365    header: KeyBlockHeader,
366    block: &[u8],
367) -> impl Iterator<Item = u8> + '_ {
368    let (entry_count, fixed_type) = match header {
369        KeyBlockHeader::Variable { entry_count } => (entry_count, None),
370        KeyBlockHeader::Fixed {
371            entry_count,
372            value_type,
373        } => (entry_count, Some(value_type)),
374    };
375    (0..entry_count).map(move |i| {
376        if let Some(vt) = fixed_type {
377            vt
378        } else {
379            // Variable block: offset table starts at byte 4 (after 1B type + 3B count),
380            // each entry is 4 bytes, first byte is the entry type.
381            let header_offset = KEY_BLOCK_HEADER_SIZE + i as usize * 4;
382            block[header_offset]
383        }
384    })
385}
386
387/// Analyze an SST file and return entry type statistics
388fn analyze_sst_file(db_path: &Path, info: &SstInfo) -> Result<SstStats> {
389    let filename = format!("{:08}.sst", info.sequence_number);
390    let path = db_path.join(&filename);
391
392    let file = File::open(&path).with_context(|| format!("Failed to open {}", filename))?;
393    let file_size = file.metadata()?.len();
394    let mmap = unsafe { Mmap::map(&file)? };
395    advise_mmap_for_persistence(&mmap)?;
396
397    let mut stats = SstStats {
398        block_directory_size: info.block_count as u64 * size_of::<u32>() as u64,
399        file_size,
400        ..Default::default()
401    };
402
403    let block_offsets_start = mmap.len() - (info.block_count as usize * size_of::<u32>());
404
405    // Read the index block (always the last block) first to learn which blocks are key blocks.
406    // Without this, we'd have to guess block types from their first byte, which is wrong for
407    // value blocks (they have no type header and their data can start with any byte).
408    let index_block_index = info.block_count - 1;
409    let index_raw = read_block(
410        &mmap,
411        block_offsets_start,
412        index_block_index,
413        info.sequence_number,
414    )?;
415    let key_block_indices = parse_key_block_indices(&index_raw.data);
416
417    stats.index_blocks.add(
418        index_raw.compressed_size,
419        index_raw.actual_size,
420        index_raw.was_compressed,
421    );
422
423    // Now iterate through all blocks, using the key block set for classification.
424    for block_index in 0..index_block_index {
425        let raw = match read_block(
426            &mmap,
427            block_offsets_start,
428            block_index,
429            info.sequence_number,
430        ) {
431            Ok(raw) => raw,
432            Err(e) => {
433                eprintln!(
434                    "Warning: Failed to read block {} in {:08}.sst: {}",
435                    block_index, info.sequence_number, e
436                );
437                continue;
438            }
439        };
440
441        if !key_block_indices.contains(&block_index) {
442            // Value block — no type header, just raw data.
443            stats
444                .value_blocks
445                .add(raw.compressed_size, raw.actual_size, raw.was_compressed);
446            continue;
447        }
448
449        let block: &[u8] = &raw.data;
450
451        stats
452            .key_blocks
453            .add(raw.compressed_size, raw.actual_size, raw.was_compressed);
454
455        let key_block_header = parse_key_block_header(block).with_context(|| {
456            format!(
457                "Warning: key block {} in {:08}.sst has unexpected block type {}",
458                block_index, info.sequence_number, block[0]
459            )
460        })?;
461        match key_block_header {
462            KeyBlockHeader::Variable { .. } => {
463                stats.variable_key_blocks.add(
464                    raw.compressed_size,
465                    raw.actual_size,
466                    raw.was_compressed,
467                );
468            }
469            KeyBlockHeader::Fixed { .. } => {
470                stats.fixed_key_blocks.add(
471                    raw.compressed_size,
472                    raw.actual_size,
473                    raw.was_compressed,
474                );
475            }
476        };
477
478        for entry_type in iter_key_block_entry_types(key_block_header, block) {
479            track_entry_type(&mut stats, entry_type);
480        }
481    }
482
483    Ok(stats)
484}
485
486fn print_block_stats(name: &str, info: &BlockSizeInfo) {
487    let total = info.total_count();
488    if total == 0 {
489        println!("    {}: none", name);
490        return;
491    }
492
493    // Determine compression status
494    let all_uncompressed = info.compressed_count == 0;
495    let all_compressed = info.uncompressed_count == 0;
496
497    if all_uncompressed {
498        // All blocks uncompressed - just show size
499        println!(
500            "    {}: {} blocks (uncompressed), {}",
501            name,
502            format_number(total),
503            format_bytes(info.actual_size),
504        );
505    } else if all_compressed {
506        // All blocks compressed - show stored vs actual with savings
507        let savings_pct = if info.actual_size > 0 {
508            ((info.actual_size as f64 - info.stored_size as f64) / info.actual_size as f64) * 100.0
509        } else {
510            0.0
511        };
512        let savings_str = if savings_pct < 0.0 {
513            format!("{:.0}% overhead", -savings_pct)
514        } else {
515            format!("{:.0}% savings", savings_pct)
516        };
517        println!(
518            "    {}: {} blocks, stored: {}, actual: {} ({})",
519            name,
520            format_number(total),
521            format_bytes(info.stored_size),
522            format_bytes(info.actual_size),
523            savings_str,
524        );
525    } else {
526        // Mixed - show breakdown
527        let savings_pct = if info.actual_size > 0 {
528            ((info.actual_size as f64 - info.stored_size as f64) / info.actual_size as f64) * 100.0
529        } else {
530            0.0
531        };
532        let savings_str = if savings_pct < 0.0 {
533            format!("{:.0}% overhead", -savings_pct)
534        } else {
535            format!("{:.0}% savings", savings_pct)
536        };
537        println!(
538            "    {}: {} blocks ({} compressed, {} uncompressed)",
539            name,
540            format_number(total),
541            format_number(info.compressed_count),
542            format_number(info.uncompressed_count),
543        );
544        println!(
545            "          stored: {}, actual: {} ({})",
546            format_bytes(info.stored_size),
547            format_bytes(info.actual_size),
548            savings_str,
549        );
550    }
551}
552
553fn print_entry_histogram(stats: &SstStats, prefix: &str) {
554    if stats.entry_type_counts.is_empty() {
555        return;
556    }
557    println!("{}Entry Type Histogram:", prefix);
558    for (ty, count) in &stats.entry_type_counts {
559        let pct = (*count as f64 / stats.total_entries as f64) * 100.0;
560        // Visual bar
561        let bar_len = (pct / 2.0) as usize;
562        let bar: String = "█".repeat(bar_len.min(40));
563        println!(
564            "{}  type {:3}: {:>12} ({:5.1}%) │{}│ {}",
565            prefix,
566            ty,
567            format_number(*count),
568            pct,
569            bar,
570            entry_type_description(*ty),
571        );
572    }
573}
574
575fn print_value_storage(stats: &SstStats, prefix: &str) {
576    println!("{}Value Storage:", prefix);
577    if stats.inline_value_bytes > 0 {
578        let inline_count: u64 = stats
579            .entry_type_counts
580            .iter()
581            .filter(|(ty, _)| **ty >= KEY_BLOCK_ENTRY_TYPE_INLINE_MIN)
582            .map(|(_, count)| count)
583            .sum();
584        println!(
585            "{}  Inline: {} entries, {} total",
586            prefix,
587            format_number(inline_count),
588            format_bytes(stats.inline_value_bytes)
589        );
590    }
591    if stats.small_value_refs > 0 {
592        println!(
593            "{}  Small (value block refs): {} entries",
594            prefix,
595            format_number(stats.small_value_refs)
596        );
597    }
598    if stats.medium_value_refs > 0 {
599        println!(
600            "{}  Medium (dedicated blocks): {} entries",
601            prefix,
602            format_number(stats.medium_value_refs)
603        );
604    }
605    if stats.blob_refs > 0 {
606        println!(
607            "{}  Blob (external files): {} entries",
608            prefix,
609            format_number(stats.blob_refs)
610        );
611    }
612    if stats.deleted_count > 0 {
613        println!(
614            "{}  Deleted: {} entries",
615            prefix,
616            format_number(stats.deleted_count)
617        );
618    }
619}
620
621fn print_sst_details(seq_num: u32, stats: &SstStats) {
622    println!(
623        "\n  ┌─ SST {:08}.sst ─────────────────────────────────────────────────────",
624        seq_num
625    );
626    println!(
627        "  │ Entries: {}, File size: {}",
628        format_number(stats.total_entries),
629        format_bytes(stats.file_size)
630    );
631
632    // Per-file overhead
633    let overhead = stats.block_directory_size;
634    let overhead_pct = if stats.file_size > 0 {
635        (overhead as f64 / stats.file_size as f64) * 100.0
636    } else {
637        0.0
638    };
639    println!("  │");
640    println!(
641        "  │ Per-file Overhead: {} ({:.1}% of file)",
642        format_bytes(overhead),
643        overhead_pct
644    );
645    println!(
646        "  │   Block directory: {}",
647        format_bytes(stats.block_directory_size)
648    );
649
650    // Block statistics
651    println!("  │");
652    println!("  │ Block Statistics:");
653    print!("  │   ");
654    print_block_stats("Index blocks", &stats.index_blocks);
655    print!("  │   ");
656    print_block_stats("Key blocks", &stats.key_blocks);
657    if stats.variable_key_blocks.total_count() > 0 && stats.fixed_key_blocks.total_count() > 0 {
658        print!("  │       ");
659        print_block_stats("Variable", &stats.variable_key_blocks);
660        print!("  │       ");
661        print_block_stats("Fixed", &stats.fixed_key_blocks);
662    } else if stats.fixed_key_blocks.total_count() > 0 {
663        println!("  │       (all fixed-size)");
664    }
665    print!("  │   ");
666    print_block_stats("Value blocks", &stats.value_blocks);
667
668    // Entry type histogram
669    if !stats.entry_type_counts.is_empty() {
670        println!("  │");
671        print_entry_histogram(stats, "  │ ");
672    }
673
674    // Value storage summary
675    println!("  │");
676    print_value_storage(stats, "  │ ");
677
678    println!("  └───────────────────────────────────────────────────────────────────────────");
679}
680
681fn print_family_summary(family: u32, sst_count: usize, stats: &SstStats) {
682    println!("═══════════════════════════════════════════════════════════════════════════════");
683    println!("Family {} ({}):", family, family_name(family));
684    println!("═══════════════════════════════════════════════════════════════════════════════");
685
686    println!(
687        "  SST files: {}, Total entries: {}",
688        format_number(sst_count as u64),
689        format_number(stats.total_entries)
690    );
691    println!("  Total file size: {}", format_bytes(stats.file_size));
692
693    // Averages
694    if sst_count > 0 {
695        let avg_file_size = stats.file_size / sst_count as u64;
696        let avg_keys_per_file = stats.total_entries / sst_count as u64;
697        let total_key_blocks = stats.key_blocks.total_count();
698        let avg_keys_per_block = if total_key_blocks > 0 {
699            stats.total_entries as f64 / total_key_blocks as f64
700        } else {
701            0.0
702        };
703
704        println!();
705        println!("  Averages:");
706        println!("    File size: {}", format_bytes(avg_file_size));
707        println!("    Keys per file: {}", format_number(avg_keys_per_file));
708        println!("    Keys per key block: {:.1}", avg_keys_per_block);
709    }
710
711    // Per-file overhead
712    let total_overhead = stats.block_directory_size;
713    let overhead_pct = if stats.file_size > 0 {
714        (total_overhead as f64 / stats.file_size as f64) * 100.0
715    } else {
716        0.0
717    };
718    println!();
719    println!(
720        "  Per-file Overhead (total): {} ({:.1}% of total file size)",
721        format_bytes(total_overhead),
722        overhead_pct
723    );
724    println!(
725        "    Block directories: {}",
726        format_bytes(stats.block_directory_size)
727    );
728    if sst_count > 0 {
729        println!(
730            "      Average per file: {}",
731            format_bytes(stats.block_directory_size / sst_count as u64)
732        );
733    }
734
735    println!();
736    println!("  Block Statistics:");
737    print!("  ");
738    print_block_stats("Index blocks", &stats.index_blocks);
739    print!("  ");
740    print_block_stats("Key blocks", &stats.key_blocks);
741    if stats.variable_key_blocks.total_count() > 0 && stats.fixed_key_blocks.total_count() > 0 {
742        // Only show breakdown when both types are present
743        print!("      ");
744        print_block_stats("Variable", &stats.variable_key_blocks);
745        print!("      ");
746        print_block_stats("Fixed", &stats.fixed_key_blocks);
747    } else if stats.fixed_key_blocks.total_count() > 0 {
748        println!("      (all fixed-size)");
749    }
750    print!("  ");
751    print_block_stats("Value blocks", &stats.value_blocks);
752
753    println!();
754    print_entry_histogram(stats, "  ");
755
756    println!();
757    print_value_storage(stats, "  ");
758
759    println!();
760}
761
762fn main() -> Result<()> {
763    let args: Vec<String> = std::env::args().collect();
764
765    // Parse arguments
766    let mut db_path: Option<PathBuf> = None;
767    let mut verbose = false;
768
769    let mut i = 1;
770    while i < args.len() {
771        match args[i].as_str() {
772            "--verbose" | "-v" => verbose = true,
773            arg if !arg.starts_with('-') => {
774                if db_path.is_none() {
775                    db_path = Some(PathBuf::from(arg));
776                }
777            }
778            _ => {
779                eprintln!("Unknown option: {}", args[i]);
780                std::process::exit(1);
781            }
782        }
783        i += 1;
784    }
785
786    let db_path = match db_path {
787        Some(p) => p,
788        None => {
789            eprintln!("Usage: {} [OPTIONS] <db_directory>", args[0]);
790            eprintln!();
791            eprintln!("Inspects turbo-persistence SST files to report entry type statistics.");
792            eprintln!();
793            eprintln!("Options:");
794            eprintln!("  -v, --verbose    Show per-SST file details (default: family totals only)");
795            eprintln!();
796            eprintln!("Entry types:");
797            eprintln!("  0: Small value (stored in separate value block)");
798            eprintln!("  1: Blob reference");
799            eprintln!("  2: Deleted/tombstone");
800            eprintln!("  3: Medium value");
801            eprintln!("  8+: Inline value (size = type - 8)");
802            eprintln!();
803            eprintln!("For TaskCache (family 3), values are 4-byte TaskIds.");
804            eprintln!("Expected entry type is 12 (8 + 4) for inline optimization.");
805            std::process::exit(1);
806        }
807    };
808
809    if !db_path.is_dir() {
810        bail!("Not a directory: {}", db_path.display());
811    }
812
813    // Collect SST info grouped by family
814    let family_sst_info = collect_sst_info(&db_path)?;
815
816    let total_sst_count: usize = family_sst_info.values().map(|v| v.len()).sum();
817    println!(
818        "Analyzing {} SST files in {}\n",
819        format_number(total_sst_count as u64),
820        db_path.display()
821    );
822
823    // Analyze and report by family
824    for (family, sst_list) in &family_sst_info {
825        let mut family_stats = SstStats::default();
826        let mut sst_stats_list: Vec<(u32, SstStats)> = Vec::new();
827
828        for info in sst_list {
829            match analyze_sst_file(&db_path, info) {
830                Ok(stats) => {
831                    family_stats.merge(&stats);
832                    if verbose {
833                        sst_stats_list.push((info.sequence_number, stats));
834                    }
835                }
836                Err(e) => {
837                    eprintln!(
838                        "Warning: Failed to analyze {:08}.sst: {}",
839                        info.sequence_number, e
840                    );
841                }
842            }
843        }
844
845        // Print family summary
846        print_family_summary(*family, sst_list.len(), &family_stats);
847
848        // Print per-SST details in verbose mode
849        if verbose && !sst_stats_list.is_empty() {
850            println!("  Per-SST Details:");
851            for (seq_num, stats) in &sst_stats_list {
852                print_sst_details(*seq_num, stats);
853            }
854            println!();
855        }
856    }
857
858    Ok(())
859}