Skip to main content

turbo_tasks_hash/
base38.rs

1/// URL-safe, filesystem-safe alphabet for hash encoding: `0-9 a-z _ -`
2///
3/// All 38 characters are RFC 3986 unreserved characters and are safe on
4/// case-insensitive filesystems (macOS HFS+/APFS, Windows NTFS).
5///
6/// `~` and `.` are intentionally excluded despite being RFC 3986 unreserved:
7/// they are blocked by common Nginx hardening rules (`block_common_exploits.conf`)
8/// and enterprise WAF configurations, causing 403 errors when sequences like
9/// `~~` or `...` appear in asset filenames (false-positive path traversal matches).
10const BASE38_CHARS: &[u8; 38] = b"0123456789abcdefghijklmnopqrstuvwxyz_-";
11
12const BASE: u128 = BASE38_CHARS.len() as u128;
13
14/// Computes the number of base-N digits needed to represent all values of
15/// `bits` width: the smallest `n` such that `base^n > 2^bits - 1`.
16const fn digits_for_bits(base: u128, bits: u32) -> usize {
17    let mut power: u128 = 1;
18    let mut n: usize = 0;
19    // We need base^n > u{bits}::MAX, i.e. base^n > 2^bits - 1.
20    // Since 2^128 doesn't fit in u128, we compare by checking if
21    // power has "overflowed past" the bit width. For bits == 128,
22    // we need base^n >= 2^128 which means power must overflow to 0
23    // (or we track via a flag). For bits < 128, we compare directly.
24    loop {
25        // Check if power > 2^bits - 1 (i.e. power can represent all values)
26        if bits < 128 && power > ((1u128 << bits) - 1) {
27            break;
28        }
29        let (new_power, overflowed) = power.overflowing_mul(base);
30        n += 1;
31        if overflowed {
32            // power * base >= 2^128 > 2^bits - 1 for any bits <= 128
33            break;
34        }
35        power = new_power;
36    }
37    n
38}
39
40/// Number of base38 characters needed to represent a 64-bit value without
41/// information loss.
42pub const BASE38_LEN_64: usize = digits_for_bits(BASE, 64);
43
44/// Number of base38 characters needed to represent a 128-bit value without
45/// information loss.
46pub const BASE38_LEN_128: usize = digits_for_bits(BASE, 128);
47
48// Verify our const computation matches the expected values.
49const _: () = assert!(BASE38_LEN_64 == 13);
50const _: () = assert!(BASE38_LEN_128 == 25);
51
52/// Encodes a value into a fixed-width base38 string by repeatedly dividing by
53/// 38.
54fn encode_base38_fixed<const N: usize>(mut n: u128) -> String {
55    let mut buf = [b'0'; N];
56    for i in (0..N).rev() {
57        buf[i] = BASE38_CHARS[(n % 38) as usize];
58        n /= 38;
59    }
60    // SAFETY: BASE38_CHARS only contains ASCII bytes.
61    unsafe { String::from_utf8_unchecked(buf.to_vec()) }
62}
63
64/// Encodes a 64-bit unsigned integer into a fixed-width 13-character base38
65/// string.
66pub fn encode_base38(n: u64) -> String {
67    encode_base38_fixed::<BASE38_LEN_64>(n as u128)
68}
69
70/// Encodes a 128-bit unsigned integer into a fixed-width 25-character base38
71/// string.
72pub fn encode_base38_128(n: u128) -> String {
73    encode_base38_fixed::<BASE38_LEN_128>(n)
74}