turbopack_ecmascript/
magic_identifier.rs1use std::{
2 borrow::Cow,
3 fmt::{Display, Write},
4 sync::LazyLock,
5};
6
7use regex::{Captures, Regex, Replacer};
8use swc_core::atoms::Atom;
9use turbo_rcstr::RcStr;
10
11pub static MAGIC_IDENTIFIER_DEFAULT_EXPORT: LazyLock<RcStr> =
12 LazyLock::new(|| RcStr::from(mangle("default export")));
13
14pub static MAGIC_IDENTIFIER_DEFAULT_EXPORT_ATOM: LazyLock<Atom> =
15 LazyLock::new(|| Atom::from(mangle("default export")));
16
17pub fn mangle(content: &str) -> String {
18 let mut r = "__TURBOPACK__".to_string();
19 let mut hex_mode = false;
20 for c in content.chars() {
21 if matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | ' ') {
22 if hex_mode {
23 r.push('$');
24 hex_mode = false;
25 }
26 if c == ' ' {
27 r += "__";
28 } else {
29 r.push(c);
30 }
31 } else if c == '_' && (!r.ends_with('_') || hex_mode) {
32 if hex_mode {
33 r.push('$');
34 hex_mode = false;
35 }
36 r += "_";
37 } else if c == '$' && !hex_mode {
38 r += "$$";
39 } else if matches!(c, '\0'..='\u{ff}') {
40 if !hex_mode {
41 r.push('$');
42 hex_mode = true;
43 }
44 write!(r, "{0:2x}", c as u8).unwrap();
45 } else {
46 if !hex_mode {
47 r.push('$');
48 }
49 write!(r, "_{:x}$", c as u32).unwrap();
50 hex_mode = false;
51 }
52 }
53 if hex_mode {
54 r.push('$');
55 }
56 r += "__";
57 r
58}
59
60pub fn unmangle(identifier: &str) -> Option<String> {
63 if !identifier.starts_with("__TURBOPACK__") || !identifier.ends_with("__") {
65 return None;
66 }
67
68 let content = &identifier[13..identifier.len() - 2];
70
71 if content.is_empty() {
72 return None;
73 }
74
75 enum Mode {
76 Text,
77 Underscore,
78 Hex,
79 LongHex,
80 }
81 let mut mode = Mode::Text;
82 let mut output = String::new();
83 let mut buffer = String::with_capacity(2);
84 for char in content.chars() {
85 match mode {
86 Mode::Text => match char {
87 '_' => mode = Mode::Underscore,
88 '$' => mode = Mode::Hex,
89 c if c.is_ascii_alphanumeric() => output.push(c),
90 _ => return None,
91 },
92 Mode::Underscore => match char {
93 '_' => {
94 output.push(' ');
95 mode = Mode::Text;
96 }
97 '$' => {
98 output.push('_');
99 mode = Mode::Hex;
100 }
101 c if c.is_ascii_alphanumeric() => {
102 output.push('_');
103 output.push(c);
104 mode = Mode::Text;
105 }
106 _ => return None,
107 },
108 Mode::Hex => {
109 if buffer.len() == 2 {
110 if let Ok(byte) = u8::from_str_radix(&buffer, 16) {
111 output.push(byte as char);
112 }
113 buffer.clear();
114 }
115 match char {
116 '_' => {
117 debug_assert!(buffer.is_empty());
118 mode = Mode::LongHex;
119 }
120 '$' => {
121 debug_assert!(buffer.is_empty());
122 mode = Mode::Text;
123 }
124 c if c.is_ascii_hexdigit() => {
125 buffer.push(c);
126 }
127 _ => return None,
128 }
129 }
130 Mode::LongHex => {
131 debug_assert!(char != '_');
132 match char {
133 '$' => {
134 if let Ok(code) = u32::from_str_radix(&buffer, 16) {
135 output.push(std::char::from_u32(code).unwrap());
136 }
137 buffer.clear();
138 mode = Mode::Text;
139 }
140 c if c.is_ascii_hexdigit() => {
141 buffer.push(c);
142 }
143 _ => return None,
144 }
145 }
146 }
147 }
148 debug_assert!(matches!(mode, Mode::Text));
149 Some(output)
150}
151
152pub fn unmangle_identifiers<T: Display>(text: &str, magic: impl Fn(String) -> T) -> Cow<'_, str> {
154 static IDENTIFIER_REGEX: LazyLock<Regex> =
155 LazyLock::new(|| Regex::new(r"__TURBOPACK__[a-zA-Z0-9_$]+__").unwrap());
156
157 struct Rep<T: Fn(String) -> O, O: Display>(T);
158
159 impl<T: Fn(String) -> O, O: Display> Replacer for Rep<T, O> {
160 fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
161 let matched = caps.get(0).unwrap().as_str();
162 let unmangled = unmangle(matched).unwrap();
163 write!(dst, "{}", self.0(unmangled)).unwrap();
164 }
165 }
166
167 IDENTIFIER_REGEX.replace_all(text, Rep(magic))
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn test_encode() {
176 assert_eq!(mangle("Hello World"), "__TURBOPACK__Hello__World__");
177 assert_eq!(mangle("Hello_World"), "__TURBOPACK__Hello_World__");
178 assert_eq!(mangle("Hello__World"), "__TURBOPACK__Hello_$5f$World__");
179 assert_eq!(mangle("Hello___World"), "__TURBOPACK__Hello_$5f$_World__");
180 assert_eq!(mangle("Hello/World"), "__TURBOPACK__Hello$2f$World__");
181 assert_eq!(mangle("Hello///World"), "__TURBOPACK__Hello$2f2f2f$World__");
182 assert_eq!(mangle("Hello/_World"), "__TURBOPACK__Hello$2f$_World__");
183 assert_eq!(mangle("Hello_/_World"), "__TURBOPACK__Hello_$2f$_World__");
184 assert_eq!(mangle("Hello😀World"), "__TURBOPACK__Hello$_1f600$World__");
185 assert_eq!(
186 mangle("Hello/😀/World"),
187 "__TURBOPACK__Hello$2f_1f600$$2f$World__"
188 );
189 assert_eq!(
190 mangle("Hello😀😀World"),
191 "__TURBOPACK__Hello$_1f600$$_1f600$World__"
192 );
193 }
194
195 #[test]
196 fn test_decode() {
197 assert_eq!(unmangle("foobar"), None);
198 assert_eq!(
199 unmangle("__TURBOPACK__Hello__World__"),
200 Some("Hello World".to_string())
201 );
202 assert_eq!(
203 unmangle("__TURBOPACK__Hello_World__"),
204 Some("Hello_World".to_string())
205 );
206 assert_eq!(
207 unmangle("__TURBOPACK__Hello_$5f$World__"),
208 Some("Hello__World".to_string())
209 );
210 assert_eq!(
211 unmangle("__TURBOPACK__Hello_$5f$_World__"),
212 Some("Hello___World".to_string())
213 );
214 assert_eq!(
215 unmangle("__TURBOPACK__Hello$2f$World__"),
216 Some("Hello/World".to_string())
217 );
218 assert_eq!(
219 unmangle("__TURBOPACK__Hello$2f2f2f$World__"),
220 Some("Hello///World".to_string())
221 );
222 assert_eq!(
223 unmangle("__TURBOPACK__Hello$2f$_World__"),
224 Some("Hello/_World".to_string())
225 );
226 assert_eq!(
227 unmangle("__TURBOPACK__Hello_$2f$_World__"),
228 Some("Hello_/_World".to_string())
229 );
230 assert_eq!(
231 unmangle("__TURBOPACK__Hello$_1f600$World__"),
232 Some("Hello😀World".to_string())
233 );
234 assert_eq!(
235 unmangle("__TURBOPACK__Hello$2f_1f600$$2f$World__"),
236 Some("Hello/😀/World".to_string())
237 );
238 assert_eq!(
239 unmangle("__TURBOPACK__Hello$_1f600$$_1f600$World__"),
240 Some("Hello😀😀World".to_string())
241 );
242 }
243
244 #[test]
245 fn test_unmangle_identifiers() {
246 assert_eq!(
247 unmangle_identifiers(
248 "Hello __TURBOPACK__Hello__World__ __TURBOPACK__Hello_World__",
249 |s| format!("{{{s}}}")
250 ),
251 "Hello {Hello World} {Hello_World}"
252 );
253 }
254}