1#![feature(arbitrary_self_types_pointers)]
2
3use std::vec;
4
5use anyhow::{Result, bail};
6
7pub fn register() {
8 turbo_tasks::register();
9 include!(concat!(env!("OUT_DIR"), "/register.rs"));
10}
11
12#[derive(Debug, Clone)]
16#[turbo_tasks::value(eq = "manual", shared)]
17#[serde(into = "RegexForm", try_from = "RegexForm")]
18pub struct EsRegex {
19 #[turbo_tasks(trace_ignore)]
20 delegate: EsRegexImpl,
21 pub pattern: String,
24 pub flags: String,
25}
26
27#[derive(Debug, Clone)]
28enum EsRegexImpl {
29 Regex(regex::Regex),
30 Regress(regress::Regex),
31}
32
33impl PartialEq for EsRegex {
38 fn eq(&self, other: &Self) -> bool {
39 self.pattern == other.pattern && self.flags == other.flags
40 }
41}
42impl Eq for EsRegex {}
43
44impl TryFrom<RegexForm> for EsRegex {
45 type Error = anyhow::Error;
46
47 fn try_from(value: RegexForm) -> std::result::Result<Self, Self::Error> {
48 EsRegex::new(&value.pattern, &value.flags)
49 }
50}
51
52#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
54struct RegexForm {
55 pattern: String,
56 flags: String,
57}
58
59impl From<EsRegex> for RegexForm {
60 fn from(value: EsRegex) -> Self {
61 Self {
62 pattern: value.pattern,
63 flags: value.flags,
64 }
65 }
66}
67
68impl EsRegex {
69 pub fn new(pattern: &str, flags: &str) -> Result<Self> {
72 let pattern = pattern.replace("\\/", "/");
74
75 let mut applied_flags = String::new();
76 for flag in flags.chars() {
77 match flag {
78 'd' => {}
80 'g' => {}
82 'i' => applied_flags.push('i'),
84 'm' => applied_flags.push('m'),
86 's' => applied_flags.push('s'),
88 'u' => applied_flags.push('u'),
90 'y' => {}
92 _ => bail!("unsupported flag `{flag}` in regex: `{pattern}` with flags: `{flags}`"),
93 }
94 }
95
96 let regex = if !applied_flags.is_empty() {
97 regex::Regex::new(&format!("(?{applied_flags}){pattern}"))
98 } else {
99 regex::Regex::new(&pattern)
100 };
101
102 let delegate = match regex {
103 Ok(reg) => Ok(EsRegexImpl::Regex(reg)),
104 Err(_e) => {
105 match regress::Regex::with_flags(&pattern, regress::Flags::from(flags)) {
108 Ok(reg) => Ok(EsRegexImpl::Regress(reg)),
109 Err(e) => Err(e),
111 }
112 }
113 }?;
114 Ok(Self {
115 delegate,
116 pattern,
117 flags: flags.to_string(),
118 })
119 }
120
121 pub fn is_match(&self, haystack: &str) -> bool {
123 match &self.delegate {
124 EsRegexImpl::Regex(r) => r.is_match(haystack),
125 EsRegexImpl::Regress(r) => r.find(haystack).is_some(),
126 }
127 }
128
129 pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> {
140 let delegate = match &self.delegate {
141 EsRegexImpl::Regex(r) => CapturesImpl::Regex {
142 captures: r.captures(haystack)?,
143 idx: 0,
144 },
145 EsRegexImpl::Regress(r) => {
146 let re_match = r.find(haystack)?;
147 CapturesImpl::Regress {
148 captures_iter: re_match.captures.into_iter(),
149 haystack,
150 match_range: Some(re_match.range),
151 }
152 }
153 };
154 Some(Captures { delegate })
155 }
156}
157
158pub struct Captures<'h> {
159 delegate: CapturesImpl<'h>,
160}
161
162enum CapturesImpl<'h> {
163 Regex {
170 captures: regex::Captures<'h>,
171 idx: usize,
172 },
173 Regress {
175 captures_iter: vec::IntoIter<Option<regress::Range>>,
176 haystack: &'h str,
177 match_range: Option<regress::Range>,
178 },
179}
180
181impl<'h> Iterator for Captures<'h> {
182 type Item = Option<&'h str>;
183
184 fn next(&mut self) -> Option<Self::Item> {
185 match &mut self.delegate {
186 CapturesImpl::Regex { captures, idx } => {
187 if *idx >= captures.len() {
188 None
189 } else {
190 let capture = Some(captures.get(*idx).map(|sub_match| sub_match.as_str()));
191 *idx += 1;
192 capture
193 }
194 }
195 CapturesImpl::Regress {
196 captures_iter,
197 haystack,
198 match_range,
199 } => {
200 if let Some(range) = match_range.take() {
201 Some(Some(&haystack[range]))
203 } else {
204 Some(captures_iter.next()?.map(|range| &haystack[range]))
205 }
206 }
207 }
208 }
209}
210
211#[cfg(test)]
212mod tests {
213 use super::{EsRegex, EsRegexImpl};
214
215 #[test]
216 fn round_trip_serialize() {
217 let regex = EsRegex::new("[a-z]", "i").unwrap();
218 let serialized = serde_json::to_string(®ex).unwrap();
219 let parsed = serde_json::from_str::<EsRegex>(&serialized).unwrap();
220 assert_eq!(regex, parsed);
221 }
222
223 #[test]
224 fn es_regex_matches_simple() {
225 let regex = EsRegex::new("a", "").unwrap();
226 assert!(matches!(regex.delegate, EsRegexImpl::Regex { .. }));
227 assert!(regex.is_match("a"));
228 }
229
230 #[test]
231 fn es_regex_matches_negative_lookahead() {
232 let regex = EsRegex::new("a(?!b)", "").unwrap();
234 assert!(matches!(regex.delegate, EsRegexImpl::Regress { .. }));
235 assert!(!regex.is_match("ab"));
236 assert!(regex.is_match("ac"));
237 }
238
239 #[test]
240 fn invalid_regex() {
241 assert!(matches!(EsRegex::new("*", ""), Err { .. }))
245 }
246
247 #[test]
248 fn captures_with_regex() {
249 let regex = EsRegex::new(r"(notmatched)|(\d{4})-(\d{2})-(\d{2})", "").unwrap();
250 assert!(matches!(regex.delegate, EsRegexImpl::Regex { .. }));
251
252 let captures = regex.captures("Today is 2024-01-15");
253 assert!(captures.is_some());
254 let caps: Vec<_> = captures.unwrap().collect();
255 assert_eq!(caps.len(), 5); assert_eq!(caps[0], Some("2024-01-15")); assert_eq!(caps[1], None); assert_eq!(caps[2], Some("2024")); assert_eq!(caps[3], Some("01")); assert_eq!(caps[4], Some("15")); }
262
263 #[test]
264 fn captures_with_regress() {
265 let regex = EsRegex::new(r"(\w+)(?=baz)", "").unwrap();
266 assert!(matches!(regex.delegate, EsRegexImpl::Regress { .. }));
267
268 let captures = regex.captures("foobar");
269 assert!(captures.is_none());
270
271 let captures = regex.captures("foobaz");
272 assert!(captures.is_some());
273 let caps: Vec<_> = captures.unwrap().collect();
274 assert_eq!(caps.len(), 2); assert_eq!(caps[0], Some("foo")); assert_eq!(caps[1], Some("foo")); }
278}