turbo_esregex/
lib.rs

1#![feature(arbitrary_self_types_pointers)]
2
3use anyhow::{Result, bail};
4
5pub fn register() {
6    turbo_tasks::register();
7    include!(concat!(env!("OUT_DIR"), "/register.rs"));
8}
9
10/// A simple regular expression implementation following ecmascript semantics
11///
12/// Delegates to the `regex` crate when possible and `regress` otherwise.
13#[derive(Debug, Clone)]
14#[turbo_tasks::value(eq = "manual", shared)]
15#[serde(into = "RegexForm", try_from = "RegexForm")]
16pub struct EsRegex {
17    #[turbo_tasks(trace_ignore)]
18    delegate: EsRegexImpl,
19    // Store the original arguments used to construct
20    // this regex to support equality and serialization.
21    pattern: String,
22    flags: String,
23}
24
25#[derive(Debug, Clone)]
26enum EsRegexImpl {
27    Regex(regex::Regex),
28    Regress(regress::Regex),
29}
30
31/// Equality uses the source inputs since our delegate regex impls don't support
32/// equality natively.
33/// NOTE: there are multiple 'equivalent' ways to write a regex and this
34/// approach does _not_ attempt to equate them.
35impl PartialEq for EsRegex {
36    fn eq(&self, other: &Self) -> bool {
37        self.pattern == other.pattern && self.flags == other.flags
38    }
39}
40impl Eq for EsRegex {}
41
42impl TryFrom<RegexForm> for EsRegex {
43    type Error = anyhow::Error;
44
45    fn try_from(value: RegexForm) -> std::result::Result<Self, Self::Error> {
46        EsRegex::new(&value.pattern, &value.flags)
47    }
48}
49
50/// This is the serializable form for the `EsRegex` struct
51#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
52struct RegexForm {
53    pattern: String,
54    flags: String,
55}
56
57impl From<EsRegex> for RegexForm {
58    fn from(value: EsRegex) -> Self {
59        Self {
60            pattern: value.pattern,
61            flags: value.flags,
62        }
63    }
64}
65
66impl EsRegex {
67    /// Support ecmascript style regular expressions by selecting the `regex` crate when possible
68    /// and using regress when not.
69    pub fn new(pattern: &str, flags: &str) -> Result<Self> {
70        // rust regex doesn't allow escaped slashes, but they are necessary in js
71        let pattern = pattern.replace("\\/", "/");
72
73        let mut applied_flags = String::new();
74        for flag in flags.chars() {
75            match flag {
76                // indices for substring matches: not relevant for the regex itself
77                'd' => {}
78                // global: default in rust, ignore
79                'g' => {}
80                // case-insensitive: letters match both upper and lower case
81                'i' => applied_flags.push('i'),
82                // multi-line mode: ^ and $ match begin/end of line
83                'm' => applied_flags.push('m'),
84                // allow . to match \n
85                's' => applied_flags.push('s'),
86                // Unicode support (enabled by default)
87                'u' => applied_flags.push('u'),
88                // sticky search: not relevant for the regex itself
89                'y' => {}
90                _ => bail!("unsupported flag `{flag}` in regex: `{pattern}` with flags: `{flags}`"),
91            }
92        }
93
94        let regex = if !applied_flags.is_empty() {
95            regex::Regex::new(&format!("(?{applied_flags}){pattern}"))
96        } else {
97            regex::Regex::new(&pattern)
98        };
99
100        let delegate = match regex {
101            Ok(reg) => Ok(EsRegexImpl::Regex(reg)),
102            Err(_e) => {
103                // We failed to parse as an regex:Regex, try using regress. Regress uses the es
104                // flags format so we can pass the original flags value.
105                match regress::Regex::with_flags(&pattern, regress::Flags::from(flags)) {
106                    Ok(reg) => Ok(EsRegexImpl::Regress(reg)),
107                    // Propogate the error as is, regress has useful error messages.
108                    Err(e) => Err(e),
109                }
110            }
111        }?;
112        Ok(Self {
113            delegate,
114            pattern,
115            flags: flags.to_string(),
116        })
117    }
118
119    /// Returns true if there is any match for this regex in the `haystac`
120    pub fn is_match(&self, haystack: &str) -> bool {
121        match &self.delegate {
122            EsRegexImpl::Regex(r) => r.is_match(haystack),
123            EsRegexImpl::Regress(r) => r.find(haystack).is_some(),
124        }
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::{EsRegex, EsRegexImpl};
131
132    #[test]
133    fn round_trip_serialize() {
134        let regex = EsRegex::new("[a-z]", "i").unwrap();
135        let serialized = serde_json::to_string(&regex).unwrap();
136        let parsed = serde_json::from_str::<EsRegex>(&serialized).unwrap();
137        assert_eq!(regex, parsed);
138    }
139
140    #[test]
141    fn es_regex_matches_simple() {
142        let regex = EsRegex::new("a", "").unwrap();
143        assert!(matches!(regex.delegate, EsRegexImpl::Regex { .. }));
144        assert!(regex.is_match("a"));
145    }
146
147    #[test]
148    fn es_regex_matches_negative_lookahead() {
149        // This feature is not supported by the regex crate
150        let regex = EsRegex::new("a(?!b)", "").unwrap();
151        assert!(matches!(regex.delegate, EsRegexImpl::Regress { .. }));
152        assert!(!regex.is_match("ab"));
153        assert!(regex.is_match("ac"));
154    }
155
156    #[test]
157    fn invalid_regex() {
158        // This is invalid since there is nothing being repeated
159        // Don't bother asserting on the message since we delegate
160        // that to the underlying implementations.
161        assert!(matches!(EsRegex::new("*", ""), Err { .. }))
162    }
163}