turbo_tasks_hash/base38.rs
1/// URL-safe, filesystem-safe alphabet for hash encoding: `0-9 a-z _ -`
2///
3/// All 38 characters are RFC 3986 unreserved characters and are safe on
4/// case-insensitive filesystems (macOS HFS+/APFS, Windows NTFS).
5///
6/// `~` and `.` are intentionally excluded despite being RFC 3986 unreserved:
7/// they are blocked by common Nginx hardening rules (`block_common_exploits.conf`)
8/// and enterprise WAF configurations, causing 403 errors when sequences like
9/// `~~` or `...` appear in asset filenames (false-positive path traversal matches).
10const BASE38_CHARS: &[u8; 38] = b"0123456789abcdefghijklmnopqrstuvwxyz_-";
11
12const BASE: u128 = BASE38_CHARS.len() as u128;
13
14/// Computes the number of base-N digits needed to represent all values of
15/// `bits` width: the smallest `n` such that `base^n > 2^bits - 1`.
16const fn digits_for_bits(base: u128, bits: u32) -> usize {
17 let mut power: u128 = 1;
18 let mut n: usize = 0;
19 // We need base^n > u{bits}::MAX, i.e. base^n > 2^bits - 1.
20 // Since 2^128 doesn't fit in u128, we compare by checking if
21 // power has "overflowed past" the bit width. For bits == 128,
22 // we need base^n >= 2^128 which means power must overflow to 0
23 // (or we track via a flag). For bits < 128, we compare directly.
24 loop {
25 // Check if power > 2^bits - 1 (i.e. power can represent all values)
26 if bits < 128 && power > ((1u128 << bits) - 1) {
27 break;
28 }
29 let (new_power, overflowed) = power.overflowing_mul(base);
30 n += 1;
31 if overflowed {
32 // power * base >= 2^128 > 2^bits - 1 for any bits <= 128
33 break;
34 }
35 power = new_power;
36 }
37 n
38}
39
40/// Number of base38 characters needed to represent a 64-bit value without
41/// information loss.
42pub const BASE38_LEN_64: usize = digits_for_bits(BASE, 64);
43
44/// Number of base38 characters needed to represent a 128-bit value without
45/// information loss.
46pub const BASE38_LEN_128: usize = digits_for_bits(BASE, 128);
47
48// Verify our const computation matches the expected values.
49const _: () = assert!(BASE38_LEN_64 == 13);
50const _: () = assert!(BASE38_LEN_128 == 25);
51
52/// Encodes a value into a fixed-width base38 string by repeatedly dividing by
53/// 38.
54fn encode_base38_fixed<const N: usize>(mut n: u128) -> String {
55 let mut buf = [b'0'; N];
56 for i in (0..N).rev() {
57 buf[i] = BASE38_CHARS[(n % 38) as usize];
58 n /= 38;
59 }
60 // SAFETY: BASE38_CHARS only contains ASCII bytes.
61 unsafe { String::from_utf8_unchecked(buf.to_vec()) }
62}
63
64/// Encodes a 64-bit unsigned integer into a fixed-width 13-character base38
65/// string.
66pub fn encode_base38(n: u64) -> String {
67 encode_base38_fixed::<BASE38_LEN_64>(n as u128)
68}
69
70/// Encodes a 128-bit unsigned integer into a fixed-width 25-character base38
71/// string.
72pub fn encode_base38_128(n: u128) -> String {
73 encode_base38_fixed::<BASE38_LEN_128>(n)
74}