turbopack_ecmascript/
magic_identifier.rs1use std::{
2 borrow::Cow,
3 fmt::{Display, Write},
4};
5
6use once_cell::sync::Lazy;
7use regex::{Captures, Regex, Replacer};
8
9pub fn mangle(content: &str) -> String {
10 let mut r = "__TURBOPACK__".to_string();
11 let mut hex_mode = false;
12 for c in content.chars() {
13 if matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | ' ') {
14 if hex_mode {
15 r.push('$');
16 hex_mode = false;
17 }
18 if c == ' ' {
19 r += "__";
20 } else {
21 r.push(c);
22 }
23 } else if c == '_' && (!r.ends_with('_') || hex_mode) {
24 if hex_mode {
25 r.push('$');
26 hex_mode = false;
27 }
28 r += "_";
29 } else if c == '$' && !hex_mode {
30 r += "$$";
31 } else if matches!(c, '\0'..='\u{ff}') {
32 if !hex_mode {
33 r.push('$');
34 hex_mode = true;
35 }
36 write!(r, "{0:2x}", c as u8).unwrap();
37 } else {
38 if !hex_mode {
39 r.push('$');
40 }
41 write!(r, "_{:x}$", c as u32).unwrap();
42 hex_mode = false;
43 }
44 }
45 if hex_mode {
46 r.push('$');
47 }
48 r += "__";
49 r
50}
51
52pub fn unmangle(identifier: &str) -> Option<String> {
55 if !identifier.starts_with("__TURBOPACK__") || !identifier.ends_with("__") {
57 return None;
58 }
59
60 let content = &identifier[13..identifier.len() - 2];
62
63 if content.is_empty() {
64 return None;
65 }
66
67 enum Mode {
68 Text,
69 Underscore,
70 Hex,
71 LongHex,
72 }
73 let mut mode = Mode::Text;
74 let mut output = String::new();
75 let mut buffer = String::with_capacity(2);
76 for char in content.chars() {
77 match mode {
78 Mode::Text => match char {
79 '_' => mode = Mode::Underscore,
80 '$' => mode = Mode::Hex,
81 c if c.is_ascii_alphanumeric() => output.push(c),
82 _ => return None,
83 },
84 Mode::Underscore => match char {
85 '_' => {
86 output.push(' ');
87 mode = Mode::Text;
88 }
89 '$' => {
90 output.push('_');
91 mode = Mode::Hex;
92 }
93 c if c.is_ascii_alphanumeric() => {
94 output.push('_');
95 output.push(c);
96 mode = Mode::Text;
97 }
98 _ => return None,
99 },
100 Mode::Hex => {
101 if buffer.len() == 2 {
102 if let Ok(byte) = u8::from_str_radix(&buffer, 16) {
103 output.push(byte as char);
104 }
105 buffer.clear();
106 }
107 match char {
108 '_' => {
109 debug_assert!(buffer.is_empty());
110 mode = Mode::LongHex;
111 }
112 '$' => {
113 debug_assert!(buffer.is_empty());
114 mode = Mode::Text;
115 }
116 c if c.is_ascii_hexdigit() => {
117 buffer.push(c);
118 }
119 _ => return None,
120 }
121 }
122 Mode::LongHex => {
123 debug_assert!(char != '_');
124 match char {
125 '$' => {
126 if let Ok(code) = u32::from_str_radix(&buffer, 16) {
127 output.push(std::char::from_u32(code).unwrap());
128 }
129 buffer.clear();
130 mode = Mode::Text;
131 }
132 c if c.is_ascii_hexdigit() => {
133 buffer.push(c);
134 }
135 _ => return None,
136 }
137 }
138 }
139 }
140 debug_assert!(matches!(mode, Mode::Text));
141 Some(output)
142}
143
144pub fn unmangle_identifiers<T: Display>(text: &str, magic: impl Fn(String) -> T) -> Cow<'_, str> {
146 static IDENTIFIER_REGEX: Lazy<Regex> =
147 Lazy::new(|| Regex::new(r"__TURBOPACK__[a-zA-Z0-9_$]+__").unwrap());
148
149 struct Rep<T: Fn(String) -> O, O: Display>(T);
150
151 impl<T: Fn(String) -> O, O: Display> Replacer for Rep<T, O> {
152 fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
153 let matched = caps.get(0).unwrap().as_str();
154 let unmangled = unmangle(matched).unwrap();
155 write!(dst, "{}", self.0(unmangled)).unwrap();
156 }
157 }
158
159 IDENTIFIER_REGEX.replace_all(text, Rep(magic))
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn test_encode() {
168 assert_eq!(mangle("Hello World"), "__TURBOPACK__Hello__World__");
169 assert_eq!(mangle("Hello_World"), "__TURBOPACK__Hello_World__");
170 assert_eq!(mangle("Hello__World"), "__TURBOPACK__Hello_$5f$World__");
171 assert_eq!(mangle("Hello___World"), "__TURBOPACK__Hello_$5f$_World__");
172 assert_eq!(mangle("Hello/World"), "__TURBOPACK__Hello$2f$World__");
173 assert_eq!(mangle("Hello///World"), "__TURBOPACK__Hello$2f2f2f$World__");
174 assert_eq!(mangle("Hello/_World"), "__TURBOPACK__Hello$2f$_World__");
175 assert_eq!(mangle("Hello_/_World"), "__TURBOPACK__Hello_$2f$_World__");
176 assert_eq!(mangle("Hello😀World"), "__TURBOPACK__Hello$_1f600$World__");
177 assert_eq!(
178 mangle("Hello/😀/World"),
179 "__TURBOPACK__Hello$2f_1f600$$2f$World__"
180 );
181 assert_eq!(
182 mangle("Hello😀😀World"),
183 "__TURBOPACK__Hello$_1f600$$_1f600$World__"
184 );
185 }
186
187 #[test]
188 fn test_decode() {
189 assert_eq!(unmangle("foobar"), None);
190 assert_eq!(
191 unmangle("__TURBOPACK__Hello__World__"),
192 Some("Hello World".to_string())
193 );
194 assert_eq!(
195 unmangle("__TURBOPACK__Hello_World__"),
196 Some("Hello_World".to_string())
197 );
198 assert_eq!(
199 unmangle("__TURBOPACK__Hello_$5f$World__"),
200 Some("Hello__World".to_string())
201 );
202 assert_eq!(
203 unmangle("__TURBOPACK__Hello_$5f$_World__"),
204 Some("Hello___World".to_string())
205 );
206 assert_eq!(
207 unmangle("__TURBOPACK__Hello$2f$World__"),
208 Some("Hello/World".to_string())
209 );
210 assert_eq!(
211 unmangle("__TURBOPACK__Hello$2f2f2f$World__"),
212 Some("Hello///World".to_string())
213 );
214 assert_eq!(
215 unmangle("__TURBOPACK__Hello$2f$_World__"),
216 Some("Hello/_World".to_string())
217 );
218 assert_eq!(
219 unmangle("__TURBOPACK__Hello_$2f$_World__"),
220 Some("Hello_/_World".to_string())
221 );
222 assert_eq!(
223 unmangle("__TURBOPACK__Hello$_1f600$World__"),
224 Some("Hello😀World".to_string())
225 );
226 assert_eq!(
227 unmangle("__TURBOPACK__Hello$2f_1f600$$2f$World__"),
228 Some("Hello/😀/World".to_string())
229 );
230 assert_eq!(
231 unmangle("__TURBOPACK__Hello$_1f600$$_1f600$World__"),
232 Some("Hello😀😀World".to_string())
233 );
234 }
235
236 #[test]
237 fn test_unmangle_identifiers() {
238 assert_eq!(
239 unmangle_identifiers(
240 "Hello __TURBOPACK__Hello__World__ __TURBOPACK__Hello_World__",
241 |s| format!("{{{s}}}")
242 ),
243 "Hello {Hello World} {Hello_World}"
244 );
245 }
246}