1#![feature(arbitrary_self_types_pointers)]
2
3use std::vec;
4
5use anyhow::{Result, bail};
6use bincode::{
7 Decode, Encode,
8 de::Decoder,
9 enc::Encoder,
10 error::{DecodeError, EncodeError},
11 impl_borrow_decode,
12};
13
14#[derive(Debug, Clone)]
18#[turbo_tasks::value(eq = "manual", shared, serialization = "custom")]
19pub struct EsRegex {
20 #[turbo_tasks(trace_ignore)]
21 delegate: EsRegexImpl,
22 pub pattern: String,
25 pub flags: String,
26}
27
28#[derive(Debug, Clone)]
29enum EsRegexImpl {
30 Regex(regex::Regex),
31 Regress(regress::Regex),
32}
33
34impl PartialEq for EsRegex {
39 fn eq(&self, other: &Self) -> bool {
40 self.pattern == other.pattern && self.flags == other.flags
41 }
42}
43impl Eq for EsRegex {}
44
45impl Encode for EsRegex {
46 fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
47 self.pattern.encode(encoder)?;
48 self.flags.encode(encoder)?;
49 Ok(())
50 }
51}
52
53impl<Context> Decode<Context> for EsRegex {
54 fn decode<D: Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, DecodeError> {
55 let pattern: String = Decode::decode(decoder)?;
56 let flags: String = Decode::decode(decoder)?;
57 EsRegex::new(&pattern, &flags).map_err(|err| DecodeError::OtherString(err.to_string()))
59 }
60}
61
62impl_borrow_decode!(EsRegex);
63
64impl EsRegex {
65 pub fn new(pattern: &str, flags: &str) -> Result<Self> {
68 let pattern = pattern.replace("\\/", "/");
70
71 let mut applied_flags = String::new();
72 for flag in flags.chars() {
73 match flag {
74 'd' => {}
76 'g' => {}
78 'i' => applied_flags.push('i'),
80 'm' => applied_flags.push('m'),
82 's' => applied_flags.push('s'),
84 'u' => applied_flags.push('u'),
86 'y' => {}
88 _ => bail!("unsupported flag `{flag}` in regex: `{pattern}` with flags: `{flags}`"),
89 }
90 }
91
92 let regex = if !applied_flags.is_empty() {
93 regex::Regex::new(&format!("(?{applied_flags}){pattern}"))
94 } else {
95 regex::Regex::new(&pattern)
96 };
97
98 let delegate = match regex {
99 Ok(reg) => Ok(EsRegexImpl::Regex(reg)),
100 Err(_e) => {
101 match regress::Regex::with_flags(&pattern, regress::Flags::from(flags)) {
104 Ok(reg) => Ok(EsRegexImpl::Regress(reg)),
105 Err(e) => Err(e),
107 }
108 }
109 }?;
110 Ok(Self {
111 delegate,
112 pattern,
113 flags: flags.to_string(),
114 })
115 }
116
117 pub fn is_match(&self, haystack: &str) -> bool {
119 match &self.delegate {
120 EsRegexImpl::Regex(r) => r.is_match(haystack),
121 EsRegexImpl::Regress(r) => r.find(haystack).is_some(),
122 }
123 }
124
125 pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> {
136 let delegate = match &self.delegate {
137 EsRegexImpl::Regex(r) => CapturesImpl::Regex {
138 captures: r.captures(haystack)?,
139 idx: 0,
140 },
141 EsRegexImpl::Regress(r) => {
142 let re_match = r.find(haystack)?;
143 CapturesImpl::Regress {
144 captures_iter: re_match.captures.into_iter(),
145 haystack,
146 match_range: Some(re_match.range),
147 }
148 }
149 };
150 Some(Captures { delegate })
151 }
152}
153
154pub struct Captures<'h> {
155 delegate: CapturesImpl<'h>,
156}
157
158enum CapturesImpl<'h> {
159 Regex {
166 captures: regex::Captures<'h>,
167 idx: usize,
168 },
169 Regress {
171 captures_iter: vec::IntoIter<Option<regress::Range>>,
172 haystack: &'h str,
173 match_range: Option<regress::Range>,
174 },
175}
176
177impl<'h> Iterator for Captures<'h> {
178 type Item = Option<&'h str>;
179
180 fn next(&mut self) -> Option<Self::Item> {
181 match &mut self.delegate {
182 CapturesImpl::Regex { captures, idx } => {
183 if *idx >= captures.len() {
184 None
185 } else {
186 let capture = Some(captures.get(*idx).map(|sub_match| sub_match.as_str()));
187 *idx += 1;
188 capture
189 }
190 }
191 CapturesImpl::Regress {
192 captures_iter,
193 haystack,
194 match_range,
195 } => {
196 if let Some(range) = match_range.take() {
197 Some(Some(&haystack[range]))
199 } else {
200 Some(captures_iter.next()?.map(|range| &haystack[range]))
201 }
202 }
203 }
204 }
205}
206
207#[cfg(test)]
208mod tests {
209 use super::{EsRegex, EsRegexImpl};
210
211 #[test]
212 fn round_trip_bincode() {
213 let regex = EsRegex::new("[a-z]", "i").unwrap();
214 let config = bincode::config::standard();
215 let encoded = bincode::encode_to_vec(®ex, config).unwrap();
216 let (decoded, len) = bincode::decode_from_slice::<EsRegex, _>(&encoded, config).unwrap();
217 assert_eq!(regex, decoded);
218 assert_eq!(len, encoded.len());
219 }
220
221 #[test]
222 fn es_regex_matches_simple() {
223 let regex = EsRegex::new("a", "").unwrap();
224 assert!(matches!(regex.delegate, EsRegexImpl::Regex { .. }));
225 assert!(regex.is_match("a"));
226 }
227
228 #[test]
229 fn es_regex_matches_negative_lookahead() {
230 let regex = EsRegex::new("a(?!b)", "").unwrap();
232 assert!(matches!(regex.delegate, EsRegexImpl::Regress { .. }));
233 assert!(!regex.is_match("ab"));
234 assert!(regex.is_match("ac"));
235 }
236
237 #[test]
238 fn invalid_regex() {
239 assert!(matches!(EsRegex::new("*", ""), Err { .. }))
243 }
244
245 #[test]
246 fn captures_with_regex() {
247 let regex = EsRegex::new(r"(notmatched)|(\d{4})-(\d{2})-(\d{2})", "").unwrap();
248 assert!(matches!(regex.delegate, EsRegexImpl::Regex { .. }));
249
250 let captures = regex.captures("Today is 2024-01-15");
251 assert!(captures.is_some());
252 let caps: Vec<_> = captures.unwrap().collect();
253 assert_eq!(caps.len(), 5); assert_eq!(caps[0], Some("2024-01-15")); assert_eq!(caps[1], None); assert_eq!(caps[2], Some("2024")); assert_eq!(caps[3], Some("01")); assert_eq!(caps[4], Some("15")); }
260
261 #[test]
262 fn captures_with_regress() {
263 let regex = EsRegex::new(r"(\w+)(?=baz)", "").unwrap();
264 assert!(matches!(regex.delegate, EsRegexImpl::Regress { .. }));
265
266 let captures = regex.captures("foobar");
267 assert!(captures.is_none());
268
269 let captures = regex.captures("foobaz");
270 assert!(captures.is_some());
271 let caps: Vec<_> = captures.unwrap().collect();
272 assert_eq!(caps.len(), 2); assert_eq!(caps[0], Some("foo")); assert_eq!(caps[1], Some("foo")); }
276}