Skip to main content

turbo_tasks_fs/
glob.rs

1use std::fmt::Display;
2
3use anyhow::{Result, bail};
4use bincode::{
5    Decode, Encode,
6    de::Decoder,
7    enc::Encoder,
8    error::{DecodeError, EncodeError},
9    impl_borrow_decode,
10};
11use regex::bytes::{Regex, RegexBuilder};
12use turbo_rcstr::{RcStr, rcstr};
13use turbo_tasks::{Vc, trace::TraceRawVcs};
14
15use crate::globset::parse;
16
17// Examples:
18// - file.js = File(file.js)
19// - *.js = AnyFile, File(.js)
20// - file*.js = File(file), AnyFile, File(.js)
21// - dir/file.js = File(dir), PathSeparator, File(file.js)
22// - **/*.js = AnyDirectories, PathSeparator, AnyFile, File(.js)
23// - {a/**,*}/file = Alternatives([File(a), PathSeparator, AnyDirectories], [AnyFile]),
24//   PathSeparator, File(file)
25
26// Note: a/**/b does match a/b, so we need some special logic about path
27// separators
28
29#[turbo_tasks::value(eq = "manual", serialization = "custom")]
30#[derive(Debug, Clone)]
31pub struct Glob {
32    glob: RcStr,
33    #[turbo_tasks(trace_ignore)]
34    opts: GlobOptions,
35    #[turbo_tasks(trace_ignore)]
36    regex: Regex,
37    #[turbo_tasks(trace_ignore)]
38    directory_match_regex: Regex,
39}
40
41impl PartialEq for Glob {
42    fn eq(&self, other: &Self) -> bool {
43        self.glob == other.glob
44    }
45}
46
47impl Eq for Glob {}
48
49impl Display for Glob {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        write!(f, "Glob({})", self.glob)
52    }
53}
54
55impl Encode for Glob {
56    fn encode<E: Encoder>(&self, encoder: &mut E) -> Result<(), EncodeError> {
57        self.glob.encode(encoder)?;
58        self.opts.encode(encoder)?;
59        Ok(())
60    }
61}
62
63impl<Context> Decode<Context> for Glob {
64    fn decode<D: Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, DecodeError> {
65        let glob = RcStr::decode(decoder)?;
66        let opts = GlobOptions::decode(decoder)?;
67        Glob::parse(glob, opts).map_err(|err| DecodeError::OtherString(err.to_string()))
68    }
69}
70
71impl_borrow_decode!(Glob);
72
73#[turbo_tasks::task_input]
74#[derive(Copy, Clone, PartialEq, Eq, Hash, Default, TraceRawVcs, Debug, Encode, Decode)]
75pub struct GlobOptions {
76    /// Whether the glob is a partial match.
77    /// Allows glob to match any part of the given string(s).
78    /// NOTE: this means that a pattern like `node_modules/package_name` with `contains:true` will
79    /// match `foo_node_modules/package_name_bar` If you want to match a _directory_ named
80    /// `node_modules/package_name` you should use `**/node_modules/package_name/**`
81    pub contains: bool,
82}
83
84impl Glob {
85    // Returns true if the glob matches the given path.
86    pub fn matches(&self, path: &str) -> bool {
87        self.regex.is_match(path.as_bytes())
88    }
89
90    // Returns true if the glob might match a filename underneath this `path` where the
91    // path represents a directory.
92    pub fn can_match_in_directory(&self, path: &str) -> bool {
93        debug_assert!(
94            !path.ends_with('/'),
95            "Path should be a directory name and not end with /"
96        );
97        self.directory_match_regex.is_match(path.as_bytes())
98    }
99
100    pub fn parse(input: RcStr, opts: GlobOptions) -> Result<Glob> {
101        let (glob_re, directory_match_re) = parse(&input, opts)?;
102        let regex = new_regex(glob_re.as_str());
103        let directory_match_regex = new_regex(directory_match_re.as_str());
104
105        Ok(Glob {
106            glob: input,
107            opts,
108            regex,
109            directory_match_regex,
110        })
111    }
112}
113
114#[turbo_tasks::value_impl]
115impl Glob {
116    #[turbo_tasks::function]
117    pub fn new(glob: RcStr, opts: GlobOptions) -> Result<Vc<Self>> {
118        Ok(Self::cell(Glob::parse(glob, opts)?))
119    }
120
121    #[turbo_tasks::function]
122    pub async fn alternatives(globs: Vec<Vc<Glob>>) -> Result<Vc<Self>> {
123        match globs.len() {
124            0 => Ok(Glob::new(rcstr!(""), GlobOptions::default())),
125            1 => Ok(globs.into_iter().next().unwrap()),
126            _ => {
127                let mut new_glob = String::new();
128                new_glob.push('{');
129                let mut opts = None;
130                for (index, glob) in globs.iter().enumerate() {
131                    if index > 0 {
132                        new_glob.push(',');
133                    }
134                    let glob = &*glob.await?;
135                    if let Some(old_opts) = opts {
136                        if old_opts != glob.opts {
137                            bail!(
138                                "Cannot compose globs with different options via the \
139                                 `alternatives` function."
140                            )
141                        }
142                    } else {
143                        opts = Some(glob.opts);
144                    }
145                    new_glob.push_str(&glob.glob);
146                }
147                new_glob.push('}');
148                // The loop must have iterated at least once, so the options must be initialized.
149                Ok(Glob::new(new_glob.into(), opts.unwrap()))
150            }
151        }
152    }
153}
154
155fn new_regex(pattern: &str) -> Regex {
156    RegexBuilder::new(pattern)
157        .dot_matches_new_line(true)
158        .build()
159        .expect("A successfully parsed glob should produce a valid regex")
160}
161
162#[cfg(test)]
163mod tests {
164    use rstest::*;
165
166    use super::*;
167
168    #[rstest]
169    #[case::file("file.js", "file.js")]
170    #[case::dir_and_file("../public/äöüščří.png", "../public/äöüščří.png")]
171    #[case::dir_and_file("dir/file.js", "dir/file.js")]
172    #[case::file_braces("file.{ts,js}", "file.js")]
173    #[case::dir_and_file_braces("dir/file.{ts,js}", "dir/file.js")]
174    #[case::dir_and_file_dir_braces("{dir,other}/file.{ts,js}", "dir/file.js")]
175    #[case::star("*.js", "file.js")]
176    #[case::dir_star("dir/*.js", "dir/file.js")]
177    #[case::globstar("**/*.js", "file.js")]
178    #[case::globstar("**/*.js", "dir/file.js")]
179    #[case::globstar("**/*.js", "dir/sub/file.js")]
180    #[case::globstar("**/**/*.js", "file.js")]
181    #[case::globstar("**/**/*.js", "dir/sub/file.js")]
182    #[case::globstar("**", "/foo")]
183    #[case::globstar("**", "foo")]
184    #[case::star("*", "foo")]
185    #[case::globstar_in_dir("dir/**/sub/file.js", "dir/sub/file.js")]
186    #[case::globstar_in_dir("dir/**/sub/file.js", "dir/a/sub/file.js")]
187    #[case::globstar_in_dir("dir/**/sub/file.js", "dir/a/b/sub/file.js")]
188    #[case::globstar_in_dir(
189        "**/next/dist/**/*.shared-runtime.js",
190        "next/dist/shared/lib/app-router-context.shared-runtime.js"
191    )]
192    #[case::star_dir(
193        "**/*/next/dist/server/next.js",
194        "node_modules/next/dist/server/next.js"
195    )]
196    #[case::node_modules_root("**/node_modules/**", "node_modules/next/dist/server/next.js")]
197    #[case::node_modules_root_package(
198        "**/node_modules/next/**",
199        "node_modules/next/dist/server/next.js"
200    )]
201    #[case::node_modules_nested(
202        "**/node_modules/**",
203        "apps/some-app/node_modules/regenerate-unicode-properties/Script_Extensions/Osage.js"
204    )]
205    #[case::node_modules_nested_package(
206        "**/node_modules/regenerate-unicode-properties/**",
207        "apps/some-app/node_modules/regenerate-unicode-properties/Script_Extensions/Osage.js"
208    )]
209    #[case::node_modules_pnpm(
210        "**/node_modules/**",
211        "node_modules/.pnpm/regenerate-unicode-properties@9.0.0/node_modules/\
212         regenerate-unicode-properties/Script_Extensions/Osage.js"
213    )]
214    #[case::node_modules_pnpm_package(
215        "**/node_modules/{regenerate,regenerate-unicode-properties}/**",
216        "node_modules/.pnpm/regenerate-unicode-properties@9.0.0/node_modules/\
217         regenerate-unicode-properties/Script_Extensions/Osage.js"
218    )]
219    #[case::node_modules_pnpm_prefixed_package(
220        "**/node_modules/{@blockfrost/blockfrost-js,@highlight-run/node,@libsql/client,@jpg-store/\
221         lucid-cardano,@mikro-orm/core,@mikro-orm/knex,@prisma/client,@sentry/nextjs,@sentry/node,\
222         @swc/core,argon2,autoprefixer,bcrypt,better-sqlite3,canvas,cpu-features,cypress,eslint,\
223         express,next-seo,node-pty,payload,pg,playwright,postcss,prettier,prisma,puppeteer,rimraf,\
224         sharp,shiki,sqlite3,tailwindcss,ts-node,typescript,vscode-oniguruma,webpack,websocket,@\
225         aws-sdk/client-dynamodb,@aws-sdk/lib-dynamodb}/**",
226        "node_modules/.pnpm/@aws-sdk+lib-dynamodb@3.445.0_@aws-sdk+client-dynamodb@3.445.0/\
227         node_modules/@aws-sdk/lib-dynamodb/dist-es/index.js"
228    )]
229    #[case::alternatives_nested1("{a,b/c,d/e/{f,g/h}}", "a")]
230    #[case::alternatives_nested2("{a,b/c,d/e/{f,g/h}}", "b/c")]
231    #[case::alternatives_nested3("{a,b/c,d/e/{f,g/h}}", "d/e/f")]
232    #[case::alternatives_nested4("{a,b/c,d/e/{f,g/h}}", "d/e/g/h")]
233    #[case::alternatives_empty1("react{,-dom}", "react")]
234    #[case::alternatives_empty2("react{,-dom}", "react-dom")]
235    #[case::alternatives_chars("[abc]", "b")]
236    fn glob_match(#[case] glob: &str, #[case] path: &str) {
237        let glob = Glob::parse(RcStr::from(glob), GlobOptions::default()).unwrap();
238
239        println!("{glob:?} {path}");
240
241        assert!(glob.matches(path));
242    }
243
244    #[rstest]
245    #[case::early_end("*.raw", "hello.raw.js")]
246    #[case::early_end(
247        "**/next/dist/esm/*.shared-runtime.js",
248        "next/dist/shared/lib/app-router-context.shared-runtime.js"
249    )]
250    #[case::star("*", "/foo")]
251    fn glob_not_matching(#[case] glob: &str, #[case] path: &str) {
252        let glob = Glob::parse(RcStr::from(glob), GlobOptions::default()).unwrap();
253
254        println!("{glob:?} {path}");
255
256        assert!(!glob.matches(path));
257    }
258
259    #[rstest]
260    #[case::dir_and_file_partial("dir/file.js", "dir")]
261    #[case::dir_star_partial("dir/*.js", "dir")]
262    #[case::globstar_partial("**/**/*.js", "dir")]
263    #[case::globstar_partial("**/**/*.js", "dir/sub")]
264    #[case::globstar_partial("**/**/*.js", "dir/sub/file.js")] // This demonstrates some ambiguity in naming. `file.js` might be a directory name.
265    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir")]
266    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a")]
267    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a/b")]
268    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a/b/sub")]
269    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a/b/sub/file.js")]
270    fn glob_can_match_directory(#[case] glob: &str, #[case] path: &str) {
271        let glob = Glob::parse(RcStr::from(glob), GlobOptions::default()).unwrap();
272
273        println!("{glob:?} {path}");
274
275        assert!(glob.can_match_in_directory(path));
276    }
277    #[rstest]
278    #[case::dir_and_file_partial("dir/file.js", "dir/file.js")] // even if there was a dir, named `file.js` we know the glob wasn't intended to match it.
279    #[case::alternatives_chars("[abc]", "b")]
280    fn glob_not_can_match_directory(#[case] glob: &str, #[case] path: &str) {
281        let glob = Glob::parse(RcStr::from(glob), GlobOptions::default()).unwrap();
282
283        println!("{glob:?} {path}");
284
285        assert!(!glob.can_match_in_directory(path));
286    }
287
288    #[rstest]
289    #[case::star("*", "/foo")]
290    #[case::star("*", "foo")]
291    #[case::star("*", "foo/bar")]
292    #[case::prefix("foo/*", "bar/foo/baz")]
293    // This is a possibly surprising case.
294    #[case::dir_match("node_modules/foo", "my_node_modules/foobar")]
295    fn partial_glob_match(#[case] glob: &str, #[case] path: &str) {
296        let glob = Glob::parse(RcStr::from(glob), GlobOptions { contains: true }).unwrap();
297
298        println!("{glob:?} {path}");
299
300        assert!(glob.matches(path));
301    }
302
303    #[rstest]
304    #[case::literal("foo", "bar")]
305    #[case::suffix("*.js", "foo.ts")]
306    #[case::prefix("foo/*", "bar")]
307    // This is a possibly surprising case
308    #[case::dir_match("/node_modules/", "node_modules/")]
309    fn partial_glob_not_matching(#[case] glob: &str, #[case] path: &str) {
310        let glob = Glob::parse(RcStr::from(glob), GlobOptions { contains: true }).unwrap();
311
312        println!("{glob:?} {path}");
313
314        assert!(!glob.matches(path));
315    }
316}