Skip to main content

turbopack_ecmascript/
magic_identifier.rs

1use std::{
2    borrow::Cow,
3    fmt::{Display, Write},
4};
5
6use once_cell::sync::Lazy;
7use regex::{Captures, Regex, Replacer};
8
9pub fn mangle(content: &str) -> String {
10    let mut r = "__TURBOPACK__".to_string();
11    let mut hex_mode = false;
12    for c in content.chars() {
13        if matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | ' ') {
14            if hex_mode {
15                r.push('$');
16                hex_mode = false;
17            }
18            if c == ' ' {
19                r += "__";
20            } else {
21                r.push(c);
22            }
23        } else if c == '_' && (!r.ends_with('_') || hex_mode) {
24            if hex_mode {
25                r.push('$');
26                hex_mode = false;
27            }
28            r += "_";
29        } else if c == '$' && !hex_mode {
30            r += "$$";
31        } else if matches!(c, '\0'..='\u{ff}') {
32            if !hex_mode {
33                r.push('$');
34                hex_mode = true;
35            }
36            write!(r, "{0:2x}", c as u8).unwrap();
37        } else {
38            if !hex_mode {
39                r.push('$');
40            }
41            write!(r, "_{:x}$", c as u32).unwrap();
42            hex_mode = false;
43        }
44    }
45    if hex_mode {
46        r.push('$');
47    }
48    r += "__";
49    r
50}
51
52/// Decodes a magic identifier into a string.
53/// Returns None if the identifier is not mangled.
54pub fn unmangle(identifier: &str) -> Option<String> {
55    // Check for magic identifier prefix and suffix
56    if !identifier.starts_with("__TURBOPACK__") || !identifier.ends_with("__") {
57        return None;
58    }
59
60    // Extract the content between __TURBOPACK__ and the trailing __
61    let content = &identifier[13..identifier.len() - 2];
62
63    if content.is_empty() {
64        return None;
65    }
66
67    enum Mode {
68        Text,
69        Underscore,
70        Hex,
71        LongHex,
72    }
73    let mut mode = Mode::Text;
74    let mut output = String::new();
75    let mut buffer = String::with_capacity(2);
76    for char in content.chars() {
77        match mode {
78            Mode::Text => match char {
79                '_' => mode = Mode::Underscore,
80                '$' => mode = Mode::Hex,
81                c if c.is_ascii_alphanumeric() => output.push(c),
82                _ => return None,
83            },
84            Mode::Underscore => match char {
85                '_' => {
86                    output.push(' ');
87                    mode = Mode::Text;
88                }
89                '$' => {
90                    output.push('_');
91                    mode = Mode::Hex;
92                }
93                c if c.is_ascii_alphanumeric() => {
94                    output.push('_');
95                    output.push(c);
96                    mode = Mode::Text;
97                }
98                _ => return None,
99            },
100            Mode::Hex => {
101                if buffer.len() == 2 {
102                    if let Ok(byte) = u8::from_str_radix(&buffer, 16) {
103                        output.push(byte as char);
104                    }
105                    buffer.clear();
106                }
107                match char {
108                    '_' => {
109                        debug_assert!(buffer.is_empty());
110                        mode = Mode::LongHex;
111                    }
112                    '$' => {
113                        debug_assert!(buffer.is_empty());
114                        mode = Mode::Text;
115                    }
116                    c if c.is_ascii_hexdigit() => {
117                        buffer.push(c);
118                    }
119                    _ => return None,
120                }
121            }
122            Mode::LongHex => {
123                debug_assert!(char != '_');
124                match char {
125                    '$' => {
126                        if let Ok(code) = u32::from_str_radix(&buffer, 16) {
127                            output.push(std::char::from_u32(code).unwrap());
128                        }
129                        buffer.clear();
130                        mode = Mode::Text;
131                    }
132                    c if c.is_ascii_hexdigit() => {
133                        buffer.push(c);
134                    }
135                    _ => return None,
136                }
137            }
138        }
139    }
140    debug_assert!(matches!(mode, Mode::Text));
141    Some(output)
142}
143
144/// Decode all magic identifiers in a string.
145pub fn unmangle_identifiers<T: Display>(text: &str, magic: impl Fn(String) -> T) -> Cow<'_, str> {
146    static IDENTIFIER_REGEX: Lazy<Regex> =
147        Lazy::new(|| Regex::new(r"__TURBOPACK__[a-zA-Z0-9_$]+__").unwrap());
148
149    struct Rep<T: Fn(String) -> O, O: Display>(T);
150
151    impl<T: Fn(String) -> O, O: Display> Replacer for Rep<T, O> {
152        fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
153            let matched = caps.get(0).unwrap().as_str();
154            let unmangled = unmangle(matched).unwrap();
155            write!(dst, "{}", self.0(unmangled)).unwrap();
156        }
157    }
158
159    IDENTIFIER_REGEX.replace_all(text, Rep(magic))
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    #[test]
167    fn test_encode() {
168        assert_eq!(mangle("Hello World"), "__TURBOPACK__Hello__World__");
169        assert_eq!(mangle("Hello_World"), "__TURBOPACK__Hello_World__");
170        assert_eq!(mangle("Hello__World"), "__TURBOPACK__Hello_$5f$World__");
171        assert_eq!(mangle("Hello___World"), "__TURBOPACK__Hello_$5f$_World__");
172        assert_eq!(mangle("Hello/World"), "__TURBOPACK__Hello$2f$World__");
173        assert_eq!(mangle("Hello///World"), "__TURBOPACK__Hello$2f2f2f$World__");
174        assert_eq!(mangle("Hello/_World"), "__TURBOPACK__Hello$2f$_World__");
175        assert_eq!(mangle("Hello_/_World"), "__TURBOPACK__Hello_$2f$_World__");
176        assert_eq!(mangle("Hello😀World"), "__TURBOPACK__Hello$_1f600$World__");
177        assert_eq!(
178            mangle("Hello/😀/World"),
179            "__TURBOPACK__Hello$2f_1f600$$2f$World__"
180        );
181        assert_eq!(
182            mangle("Hello😀😀World"),
183            "__TURBOPACK__Hello$_1f600$$_1f600$World__"
184        );
185    }
186
187    #[test]
188    fn test_decode() {
189        assert_eq!(unmangle("foobar"), None);
190        assert_eq!(
191            unmangle("__TURBOPACK__Hello__World__"),
192            Some("Hello World".to_string())
193        );
194        assert_eq!(
195            unmangle("__TURBOPACK__Hello_World__"),
196            Some("Hello_World".to_string())
197        );
198        assert_eq!(
199            unmangle("__TURBOPACK__Hello_$5f$World__"),
200            Some("Hello__World".to_string())
201        );
202        assert_eq!(
203            unmangle("__TURBOPACK__Hello_$5f$_World__"),
204            Some("Hello___World".to_string())
205        );
206        assert_eq!(
207            unmangle("__TURBOPACK__Hello$2f$World__"),
208            Some("Hello/World".to_string())
209        );
210        assert_eq!(
211            unmangle("__TURBOPACK__Hello$2f2f2f$World__"),
212            Some("Hello///World".to_string())
213        );
214        assert_eq!(
215            unmangle("__TURBOPACK__Hello$2f$_World__"),
216            Some("Hello/_World".to_string())
217        );
218        assert_eq!(
219            unmangle("__TURBOPACK__Hello_$2f$_World__"),
220            Some("Hello_/_World".to_string())
221        );
222        assert_eq!(
223            unmangle("__TURBOPACK__Hello$_1f600$World__"),
224            Some("Hello😀World".to_string())
225        );
226        assert_eq!(
227            unmangle("__TURBOPACK__Hello$2f_1f600$$2f$World__"),
228            Some("Hello/😀/World".to_string())
229        );
230        assert_eq!(
231            unmangle("__TURBOPACK__Hello$_1f600$$_1f600$World__"),
232            Some("Hello😀😀World".to_string())
233        );
234    }
235
236    #[test]
237    fn test_unmangle_identifiers() {
238        assert_eq!(
239            unmangle_identifiers(
240                "Hello __TURBOPACK__Hello__World__ __TURBOPACK__Hello_World__",
241                |s| format!("{{{s}}}")
242            ),
243            "Hello {Hello World} {Hello_World}"
244        );
245    }
246}