turbopack_ecmascript/
magic_identifier.rs1use std::{
2 borrow::Cow,
3 fmt::{Display, Write},
4};
5
6use once_cell::sync::Lazy;
7use regex::{Captures, Regex, Replacer};
8
9pub fn mangle(content: &str) -> String {
10 let mut r = "__TURBOPACK__".to_string();
11 let mut hex_mode = false;
12 for c in content.chars() {
13 if matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | ' ') {
14 if hex_mode {
15 r.push('$');
16 hex_mode = false;
17 }
18 if c == ' ' {
19 r += "__";
20 } else {
21 r.push(c);
22 }
23 } else if c == '_' && (!r.ends_with('_') || hex_mode) {
24 if hex_mode {
25 r.push('$');
26 hex_mode = false;
27 }
28 r += "_";
29 } else if c == '$' && !hex_mode {
30 r += "$$";
31 } else if matches!(c, '\0'..='\u{ff}') {
32 if !hex_mode {
33 r.push('$');
34 hex_mode = true;
35 }
36 write!(r, "{0:2x}", c as u8).unwrap();
37 } else {
38 if !hex_mode {
39 r.push('$');
40 }
41 write!(r, "_{:x}$", c as u32).unwrap();
42 hex_mode = false;
43 }
44 }
45 if hex_mode {
46 r.push('$');
47 }
48 r += "__";
49 r
50}
51
52pub fn unmangle(identifier: &str) -> String {
54 static DECODE_REGEX: Lazy<Regex> =
55 Lazy::new(|| Regex::new(r"^__TURBOPACK__([a-zA-Z0-9_$]+)__$").unwrap());
56
57 let Some(captures) = DECODE_REGEX.captures(identifier) else {
58 return identifier.to_string();
59 };
60
61 let content = captures.get(1).unwrap().as_str();
62
63 enum Mode {
64 Text,
65 Underscore,
66 Hex,
67 LongHex,
68 }
69 let mut mode = Mode::Text;
70 let mut output = String::new();
71 let mut buffer = String::with_capacity(2);
72 for char in content.chars() {
73 match mode {
74 Mode::Text => match char {
75 '_' => mode = Mode::Underscore,
76 '$' => mode = Mode::Hex,
77 c => output.push(c),
78 },
79 Mode::Underscore => match char {
80 '_' => {
81 output.push(' ');
82 mode = Mode::Text;
83 }
84 '$' => {
85 output.push('_');
86 mode = Mode::Hex;
87 }
88 c => {
89 output.push('_');
90 output.push(c);
91 mode = Mode::Text;
92 }
93 },
94 Mode::Hex => {
95 if buffer.len() == 2 {
96 if let Ok(byte) = u8::from_str_radix(&buffer, 16) {
97 output.push(byte as char);
98 }
99 buffer.clear();
100 }
101 match char {
102 '_' => {
103 debug_assert!(buffer.is_empty());
104 mode = Mode::LongHex;
105 }
106 '$' => {
107 debug_assert!(buffer.is_empty());
108 mode = Mode::Text;
109 }
110 c => {
111 buffer.push(c);
112 }
113 }
114 }
115 Mode::LongHex => {
116 debug_assert!(char != '_');
117 match char {
118 '$' => {
119 if let Ok(code) = u32::from_str_radix(&buffer, 16) {
120 output.push(std::char::from_u32(code).unwrap());
121 }
122 buffer.clear();
123 mode = Mode::Text;
124 }
125 c => {
126 buffer.push(c);
127 }
128 }
129 }
130 }
131 }
132 debug_assert!(matches!(mode, Mode::Text));
133 output
134}
135
136pub fn unmangle_identifiers<T: Display>(text: &str, magic: impl Fn(String) -> T) -> Cow<'_, str> {
138 static IDENTIFIER_REGEX: Lazy<Regex> =
139 Lazy::new(|| Regex::new(r"__TURBOPACK__[a-zA-Z0-9_$]+__").unwrap());
140
141 struct Rep<T: Fn(String) -> O, O: Display>(T);
142
143 impl<T: Fn(String) -> O, O: Display> Replacer for Rep<T, O> {
144 fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
145 write!(dst, "{}", self.0(unmangle(caps.get(0).unwrap().as_str()))).unwrap();
146 }
147 }
148
149 IDENTIFIER_REGEX.replace_all(text, Rep(magic))
150}
151
152#[cfg(test)]
153mod tests {
154 use super::*;
155
156 #[test]
157 fn test_encode() {
158 assert_eq!(mangle("Hello World"), "__TURBOPACK__Hello__World__");
159 assert_eq!(mangle("Hello_World"), "__TURBOPACK__Hello_World__");
160 assert_eq!(mangle("Hello__World"), "__TURBOPACK__Hello_$5f$World__");
161 assert_eq!(mangle("Hello___World"), "__TURBOPACK__Hello_$5f$_World__");
162 assert_eq!(mangle("Hello/World"), "__TURBOPACK__Hello$2f$World__");
163 assert_eq!(mangle("Hello///World"), "__TURBOPACK__Hello$2f2f2f$World__");
164 assert_eq!(mangle("Hello/_World"), "__TURBOPACK__Hello$2f$_World__");
165 assert_eq!(mangle("Hello_/_World"), "__TURBOPACK__Hello_$2f$_World__");
166 assert_eq!(mangle("Hello😀World"), "__TURBOPACK__Hello$_1f600$World__");
167 assert_eq!(
168 mangle("Hello/😀/World"),
169 "__TURBOPACK__Hello$2f_1f600$$2f$World__"
170 );
171 assert_eq!(
172 mangle("Hello😀😀World"),
173 "__TURBOPACK__Hello$_1f600$$_1f600$World__"
174 );
175 }
176
177 #[test]
178 fn test_decode() {
179 assert_eq!(unmangle("__TURBOPACK__Hello__World__"), "Hello World");
180 assert_eq!(unmangle("__TURBOPACK__Hello_World__"), "Hello_World");
181 assert_eq!(unmangle("__TURBOPACK__Hello_$5f$World__"), "Hello__World");
182 assert_eq!(unmangle("__TURBOPACK__Hello_$5f$_World__"), "Hello___World");
183 assert_eq!(unmangle("__TURBOPACK__Hello$2f$World__"), "Hello/World");
184 assert_eq!(
185 unmangle("__TURBOPACK__Hello$2f2f2f$World__"),
186 "Hello///World"
187 );
188 assert_eq!(unmangle("__TURBOPACK__Hello$2f$_World__"), "Hello/_World");
189 assert_eq!(unmangle("__TURBOPACK__Hello_$2f$_World__"), "Hello_/_World");
190 assert_eq!(
191 unmangle("__TURBOPACK__Hello$_1f600$World__"),
192 "Hello😀World"
193 );
194 assert_eq!(
195 unmangle("__TURBOPACK__Hello$2f_1f600$$2f$World__"),
196 "Hello/😀/World"
197 );
198 assert_eq!(
199 unmangle("__TURBOPACK__Hello$_1f600$$_1f600$World__"),
200 "Hello😀😀World"
201 );
202 }
203
204 #[test]
205 fn test_unmangle_identifiers() {
206 assert_eq!(
207 unmangle_identifiers(
208 "Hello __TURBOPACK__Hello__World__ __TURBOPACK__Hello_World__",
209 |s| format!("{{{s}}}")
210 ),
211 "Hello {Hello World} {Hello_World}"
212 );
213 }
214}