turbopack_ecmascript/
magic_identifier.rs

1use std::{
2    borrow::Cow,
3    fmt::{Display, Write},
4};
5
6use once_cell::sync::Lazy;
7use regex::{Captures, Regex, Replacer};
8
9pub fn mangle(content: &str) -> String {
10    let mut r = "__TURBOPACK__".to_string();
11    let mut hex_mode = false;
12    for c in content.chars() {
13        if matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | ' ') {
14            if hex_mode {
15                r.push('$');
16                hex_mode = false;
17            }
18            if c == ' ' {
19                r += "__";
20            } else {
21                r.push(c);
22            }
23        } else if c == '_' && (!r.ends_with('_') || hex_mode) {
24            if hex_mode {
25                r.push('$');
26                hex_mode = false;
27            }
28            r += "_";
29        } else if c == '$' && !hex_mode {
30            r += "$$";
31        } else if matches!(c, '\0'..='\u{ff}') {
32            if !hex_mode {
33                r.push('$');
34                hex_mode = true;
35            }
36            write!(r, "{0:2x}", c as u8).unwrap();
37        } else {
38            if !hex_mode {
39                r.push('$');
40            }
41            write!(r, "_{:x}$", c as u32).unwrap();
42            hex_mode = false;
43        }
44    }
45    if hex_mode {
46        r.push('$');
47    }
48    r += "__";
49    r
50}
51
52/// Decodes a magic identifier into a string.
53pub fn unmangle(identifier: &str) -> String {
54    static DECODE_REGEX: Lazy<Regex> =
55        Lazy::new(|| Regex::new(r"^__TURBOPACK__([a-zA-Z0-9_$]+)__$").unwrap());
56
57    let Some(captures) = DECODE_REGEX.captures(identifier) else {
58        return identifier.to_string();
59    };
60
61    let content = captures.get(1).unwrap().as_str();
62
63    enum Mode {
64        Text,
65        Underscore,
66        Hex,
67        LongHex,
68    }
69    let mut mode = Mode::Text;
70    let mut output = String::new();
71    let mut buffer = String::with_capacity(2);
72    for char in content.chars() {
73        match mode {
74            Mode::Text => match char {
75                '_' => mode = Mode::Underscore,
76                '$' => mode = Mode::Hex,
77                c => output.push(c),
78            },
79            Mode::Underscore => match char {
80                '_' => {
81                    output.push(' ');
82                    mode = Mode::Text;
83                }
84                '$' => {
85                    output.push('_');
86                    mode = Mode::Hex;
87                }
88                c => {
89                    output.push('_');
90                    output.push(c);
91                    mode = Mode::Text;
92                }
93            },
94            Mode::Hex => {
95                if buffer.len() == 2 {
96                    if let Ok(byte) = u8::from_str_radix(&buffer, 16) {
97                        output.push(byte as char);
98                    }
99                    buffer.clear();
100                }
101                match char {
102                    '_' => {
103                        debug_assert!(buffer.is_empty());
104                        mode = Mode::LongHex;
105                    }
106                    '$' => {
107                        debug_assert!(buffer.is_empty());
108                        mode = Mode::Text;
109                    }
110                    c => {
111                        buffer.push(c);
112                    }
113                }
114            }
115            Mode::LongHex => {
116                debug_assert!(char != '_');
117                match char {
118                    '$' => {
119                        if let Ok(code) = u32::from_str_radix(&buffer, 16) {
120                            output.push(std::char::from_u32(code).unwrap());
121                        }
122                        buffer.clear();
123                        mode = Mode::Text;
124                    }
125                    c => {
126                        buffer.push(c);
127                    }
128                }
129            }
130        }
131    }
132    debug_assert!(matches!(mode, Mode::Text));
133    output
134}
135
136/// Decode all magic identifiers in a string.
137pub fn unmangle_identifiers<T: Display>(text: &str, magic: impl Fn(String) -> T) -> Cow<'_, str> {
138    static IDENTIFIER_REGEX: Lazy<Regex> =
139        Lazy::new(|| Regex::new(r"__TURBOPACK__[a-zA-Z0-9_$]+__").unwrap());
140
141    struct Rep<T: Fn(String) -> O, O: Display>(T);
142
143    impl<T: Fn(String) -> O, O: Display> Replacer for Rep<T, O> {
144        fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
145            write!(dst, "{}", self.0(unmangle(caps.get(0).unwrap().as_str()))).unwrap();
146        }
147    }
148
149    IDENTIFIER_REGEX.replace_all(text, Rep(magic))
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155
156    #[test]
157    fn test_encode() {
158        assert_eq!(mangle("Hello World"), "__TURBOPACK__Hello__World__");
159        assert_eq!(mangle("Hello_World"), "__TURBOPACK__Hello_World__");
160        assert_eq!(mangle("Hello__World"), "__TURBOPACK__Hello_$5f$World__");
161        assert_eq!(mangle("Hello___World"), "__TURBOPACK__Hello_$5f$_World__");
162        assert_eq!(mangle("Hello/World"), "__TURBOPACK__Hello$2f$World__");
163        assert_eq!(mangle("Hello///World"), "__TURBOPACK__Hello$2f2f2f$World__");
164        assert_eq!(mangle("Hello/_World"), "__TURBOPACK__Hello$2f$_World__");
165        assert_eq!(mangle("Hello_/_World"), "__TURBOPACK__Hello_$2f$_World__");
166        assert_eq!(mangle("Hello😀World"), "__TURBOPACK__Hello$_1f600$World__");
167        assert_eq!(
168            mangle("Hello/😀/World"),
169            "__TURBOPACK__Hello$2f_1f600$$2f$World__"
170        );
171        assert_eq!(
172            mangle("Hello😀😀World"),
173            "__TURBOPACK__Hello$_1f600$$_1f600$World__"
174        );
175    }
176
177    #[test]
178    fn test_decode() {
179        assert_eq!(unmangle("__TURBOPACK__Hello__World__"), "Hello World");
180        assert_eq!(unmangle("__TURBOPACK__Hello_World__"), "Hello_World");
181        assert_eq!(unmangle("__TURBOPACK__Hello_$5f$World__"), "Hello__World");
182        assert_eq!(unmangle("__TURBOPACK__Hello_$5f$_World__"), "Hello___World");
183        assert_eq!(unmangle("__TURBOPACK__Hello$2f$World__"), "Hello/World");
184        assert_eq!(
185            unmangle("__TURBOPACK__Hello$2f2f2f$World__"),
186            "Hello///World"
187        );
188        assert_eq!(unmangle("__TURBOPACK__Hello$2f$_World__"), "Hello/_World");
189        assert_eq!(unmangle("__TURBOPACK__Hello_$2f$_World__"), "Hello_/_World");
190        assert_eq!(
191            unmangle("__TURBOPACK__Hello$_1f600$World__"),
192            "Hello😀World"
193        );
194        assert_eq!(
195            unmangle("__TURBOPACK__Hello$2f_1f600$$2f$World__"),
196            "Hello/😀/World"
197        );
198        assert_eq!(
199            unmangle("__TURBOPACK__Hello$_1f600$$_1f600$World__"),
200            "Hello😀😀World"
201        );
202    }
203
204    #[test]
205    fn test_unmangle_identifiers() {
206        assert_eq!(
207            unmangle_identifiers(
208                "Hello __TURBOPACK__Hello__World__ __TURBOPACK__Hello_World__",
209                |s| format!("{{{s}}}")
210            ),
211            "Hello {Hello World} {Hello_World}"
212        );
213    }
214}