Skip to main content

turbopack_ecmascript/
magic_identifier.rs

1use std::{
2    borrow::Cow,
3    fmt::{Display, Write},
4    sync::LazyLock,
5};
6
7use regex::{Captures, Regex, Replacer};
8use swc_core::atoms::Atom;
9use turbo_rcstr::RcStr;
10
11pub static MAGIC_IDENTIFIER_DEFAULT_EXPORT: LazyLock<RcStr> =
12    LazyLock::new(|| RcStr::from(mangle("default export")));
13
14pub static MAGIC_IDENTIFIER_DEFAULT_EXPORT_ATOM: LazyLock<Atom> =
15    LazyLock::new(|| Atom::from(mangle("default export")));
16
17pub fn mangle(content: &str) -> String {
18    let mut r = "__TURBOPACK__".to_string();
19    let mut hex_mode = false;
20    for c in content.chars() {
21        if matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | ' ') {
22            if hex_mode {
23                r.push('$');
24                hex_mode = false;
25            }
26            if c == ' ' {
27                r += "__";
28            } else {
29                r.push(c);
30            }
31        } else if c == '_' && (!r.ends_with('_') || hex_mode) {
32            if hex_mode {
33                r.push('$');
34                hex_mode = false;
35            }
36            r += "_";
37        } else if c == '$' && !hex_mode {
38            r += "$$";
39        } else if matches!(c, '\0'..='\u{ff}') {
40            if !hex_mode {
41                r.push('$');
42                hex_mode = true;
43            }
44            write!(r, "{0:2x}", c as u8).unwrap();
45        } else {
46            if !hex_mode {
47                r.push('$');
48            }
49            write!(r, "_{:x}$", c as u32).unwrap();
50            hex_mode = false;
51        }
52    }
53    if hex_mode {
54        r.push('$');
55    }
56    r += "__";
57    r
58}
59
60/// Decodes a magic identifier into a string.
61/// Returns None if the identifier is not mangled.
62pub fn unmangle(identifier: &str) -> Option<String> {
63    // Check for magic identifier prefix and suffix
64    if !identifier.starts_with("__TURBOPACK__") || !identifier.ends_with("__") {
65        return None;
66    }
67
68    // Extract the content between __TURBOPACK__ and the trailing __
69    let content = &identifier[13..identifier.len() - 2];
70
71    if content.is_empty() {
72        return None;
73    }
74
75    enum Mode {
76        Text,
77        Underscore,
78        Hex,
79        LongHex,
80    }
81    let mut mode = Mode::Text;
82    let mut output = String::new();
83    let mut buffer = String::with_capacity(2);
84    for char in content.chars() {
85        match mode {
86            Mode::Text => match char {
87                '_' => mode = Mode::Underscore,
88                '$' => mode = Mode::Hex,
89                c if c.is_ascii_alphanumeric() => output.push(c),
90                _ => return None,
91            },
92            Mode::Underscore => match char {
93                '_' => {
94                    output.push(' ');
95                    mode = Mode::Text;
96                }
97                '$' => {
98                    output.push('_');
99                    mode = Mode::Hex;
100                }
101                c if c.is_ascii_alphanumeric() => {
102                    output.push('_');
103                    output.push(c);
104                    mode = Mode::Text;
105                }
106                _ => return None,
107            },
108            Mode::Hex => {
109                if buffer.len() == 2 {
110                    if let Ok(byte) = u8::from_str_radix(&buffer, 16) {
111                        output.push(byte as char);
112                    }
113                    buffer.clear();
114                }
115                match char {
116                    '_' => {
117                        debug_assert!(buffer.is_empty());
118                        mode = Mode::LongHex;
119                    }
120                    '$' => {
121                        debug_assert!(buffer.is_empty());
122                        mode = Mode::Text;
123                    }
124                    c if c.is_ascii_hexdigit() => {
125                        buffer.push(c);
126                    }
127                    _ => return None,
128                }
129            }
130            Mode::LongHex => {
131                debug_assert!(char != '_');
132                match char {
133                    '$' => {
134                        if let Ok(code) = u32::from_str_radix(&buffer, 16) {
135                            output.push(std::char::from_u32(code).unwrap());
136                        }
137                        buffer.clear();
138                        mode = Mode::Text;
139                    }
140                    c if c.is_ascii_hexdigit() => {
141                        buffer.push(c);
142                    }
143                    _ => return None,
144                }
145            }
146        }
147    }
148    debug_assert!(matches!(mode, Mode::Text));
149    Some(output)
150}
151
152/// Decode all magic identifiers in a string.
153pub fn unmangle_identifiers<T: Display>(text: &str, magic: impl Fn(String) -> T) -> Cow<'_, str> {
154    static IDENTIFIER_REGEX: LazyLock<Regex> =
155        LazyLock::new(|| Regex::new(r"__TURBOPACK__[a-zA-Z0-9_$]+__").unwrap());
156
157    struct Rep<T: Fn(String) -> O, O: Display>(T);
158
159    impl<T: Fn(String) -> O, O: Display> Replacer for Rep<T, O> {
160        fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
161            let matched = caps.get(0).unwrap().as_str();
162            let unmangled = unmangle(matched).unwrap();
163            write!(dst, "{}", self.0(unmangled)).unwrap();
164        }
165    }
166
167    IDENTIFIER_REGEX.replace_all(text, Rep(magic))
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn test_encode() {
176        assert_eq!(mangle("Hello World"), "__TURBOPACK__Hello__World__");
177        assert_eq!(mangle("Hello_World"), "__TURBOPACK__Hello_World__");
178        assert_eq!(mangle("Hello__World"), "__TURBOPACK__Hello_$5f$World__");
179        assert_eq!(mangle("Hello___World"), "__TURBOPACK__Hello_$5f$_World__");
180        assert_eq!(mangle("Hello/World"), "__TURBOPACK__Hello$2f$World__");
181        assert_eq!(mangle("Hello///World"), "__TURBOPACK__Hello$2f2f2f$World__");
182        assert_eq!(mangle("Hello/_World"), "__TURBOPACK__Hello$2f$_World__");
183        assert_eq!(mangle("Hello_/_World"), "__TURBOPACK__Hello_$2f$_World__");
184        assert_eq!(mangle("Hello😀World"), "__TURBOPACK__Hello$_1f600$World__");
185        assert_eq!(
186            mangle("Hello/😀/World"),
187            "__TURBOPACK__Hello$2f_1f600$$2f$World__"
188        );
189        assert_eq!(
190            mangle("Hello😀😀World"),
191            "__TURBOPACK__Hello$_1f600$$_1f600$World__"
192        );
193    }
194
195    #[test]
196    fn test_decode() {
197        assert_eq!(unmangle("foobar"), None);
198        assert_eq!(
199            unmangle("__TURBOPACK__Hello__World__"),
200            Some("Hello World".to_string())
201        );
202        assert_eq!(
203            unmangle("__TURBOPACK__Hello_World__"),
204            Some("Hello_World".to_string())
205        );
206        assert_eq!(
207            unmangle("__TURBOPACK__Hello_$5f$World__"),
208            Some("Hello__World".to_string())
209        );
210        assert_eq!(
211            unmangle("__TURBOPACK__Hello_$5f$_World__"),
212            Some("Hello___World".to_string())
213        );
214        assert_eq!(
215            unmangle("__TURBOPACK__Hello$2f$World__"),
216            Some("Hello/World".to_string())
217        );
218        assert_eq!(
219            unmangle("__TURBOPACK__Hello$2f2f2f$World__"),
220            Some("Hello///World".to_string())
221        );
222        assert_eq!(
223            unmangle("__TURBOPACK__Hello$2f$_World__"),
224            Some("Hello/_World".to_string())
225        );
226        assert_eq!(
227            unmangle("__TURBOPACK__Hello_$2f$_World__"),
228            Some("Hello_/_World".to_string())
229        );
230        assert_eq!(
231            unmangle("__TURBOPACK__Hello$_1f600$World__"),
232            Some("Hello😀World".to_string())
233        );
234        assert_eq!(
235            unmangle("__TURBOPACK__Hello$2f_1f600$$2f$World__"),
236            Some("Hello/😀/World".to_string())
237        );
238        assert_eq!(
239            unmangle("__TURBOPACK__Hello$_1f600$$_1f600$World__"),
240            Some("Hello😀😀World".to_string())
241        );
242    }
243
244    #[test]
245    fn test_unmangle_identifiers() {
246        assert_eq!(
247            unmangle_identifiers(
248                "Hello __TURBOPACK__Hello__World__ __TURBOPACK__Hello_World__",
249                |s| format!("{{{s}}}")
250            ),
251            "Hello {Hello World} {Hello_World}"
252        );
253    }
254}