turbo_tasks_fs/
glob.rs

1use std::fmt::Display;
2
3use anyhow::{Result, bail};
4use regex::bytes::{Regex, RegexBuilder};
5use serde::{Deserialize, Serialize};
6use turbo_rcstr::{RcStr, rcstr};
7use turbo_tasks::{TaskInput, Vc, trace::TraceRawVcs};
8
9use crate::globset::parse;
10
11// Examples:
12// - file.js = File(file.js)
13// - *.js = AnyFile, File(.js)
14// - file*.js = File(file), AnyFile, File(.js)
15// - dir/file.js = File(dir), PathSeparator, File(file.js)
16// - **/*.js = AnyDirectories, PathSeparator, AnyFile, File(.js)
17// - {a/**,*}/file = Alternatives([File(a), PathSeparator, AnyDirectories], [AnyFile]),
18//   PathSeparator, File(file)
19
20// Note: a/**/b does match a/b, so we need some special logic about path
21// separators
22
23#[turbo_tasks::value(eq = "manual")]
24#[derive(Debug, Clone)]
25#[serde(into = "GlobForm", try_from = "GlobForm")]
26pub struct Glob {
27    glob: String,
28    #[turbo_tasks(trace_ignore)]
29    opts: GlobOptions,
30    #[turbo_tasks(trace_ignore)]
31    regex: Regex,
32    #[turbo_tasks(trace_ignore)]
33    directory_match_regex: Regex,
34}
35impl PartialEq for Glob {
36    fn eq(&self, other: &Self) -> bool {
37        self.glob == other.glob
38    }
39}
40impl Eq for Glob {}
41
42impl Display for Glob {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        write!(f, "Glob({})", self.glob)
45    }
46}
47#[derive(
48    Serialize, Deserialize, Copy, Clone, PartialEq, Eq, Hash, Default, TaskInput, TraceRawVcs, Debug,
49)]
50pub struct GlobOptions {
51    /// Whether the glob is a partial match.
52    /// Allows glob to match any part of the given string(s).
53    /// NOTE: this means that a pattern like `node_modules/package_name` with `contains:true` will
54    /// match `foo_node_modules/package_name_bar` If you want to match a _directory_ named
55    /// `node_modules/package_name` you should use `**/node_modules/package_name/**`
56    pub contains: bool,
57}
58
59#[derive(Serialize, Deserialize)]
60struct GlobForm {
61    glob: String,
62    opts: GlobOptions,
63}
64impl From<Glob> for GlobForm {
65    fn from(value: Glob) -> Self {
66        Self {
67            glob: value.glob,
68            opts: value.opts,
69        }
70    }
71}
72impl TryFrom<GlobForm> for Glob {
73    type Error = anyhow::Error;
74    fn try_from(value: GlobForm) -> Result<Self, Self::Error> {
75        Glob::parse(&value.glob, value.opts)
76    }
77}
78
79impl Glob {
80    // Returns true if the glob matches the given path.
81    pub fn matches(&self, path: &str) -> bool {
82        self.regex.is_match(path.as_bytes())
83    }
84
85    // Returns true if the glob might match a filename underneath this `path` where the
86    // path represents a directory.
87    pub fn can_match_in_directory(&self, path: &str) -> bool {
88        debug_assert!(
89            !path.ends_with('/'),
90            "Path should be a directory name and not end with /"
91        );
92        self.directory_match_regex.is_match(path.as_bytes())
93    }
94
95    pub fn parse(input: &str, opts: GlobOptions) -> Result<Glob> {
96        let (glob_re, directory_match_re) = parse(input, opts)?;
97        let regex = new_regex(glob_re.as_str());
98        let directory_match_regex = new_regex(directory_match_re.as_str());
99
100        Ok(Glob {
101            glob: input.to_string(),
102            opts,
103            regex,
104            directory_match_regex,
105        })
106    }
107}
108
109impl TryFrom<&str> for Glob {
110    type Error = anyhow::Error;
111
112    fn try_from(value: &str) -> Result<Self, Self::Error> {
113        Glob::parse(value, GlobOptions::default())
114    }
115}
116
117#[turbo_tasks::value_impl]
118impl Glob {
119    #[turbo_tasks::function]
120    pub fn new(glob: RcStr, opts: GlobOptions) -> Result<Vc<Self>> {
121        Ok(Self::cell(Glob::parse(glob.as_str(), opts)?))
122    }
123
124    #[turbo_tasks::function]
125    pub async fn alternatives(globs: Vec<Vc<Glob>>) -> Result<Vc<Self>> {
126        match globs.len() {
127            0 => Ok(Glob::new(rcstr!(""), GlobOptions::default())),
128            1 => Ok(globs.into_iter().next().unwrap()),
129            _ => {
130                let mut new_glob = String::new();
131                new_glob.push('{');
132                let mut opts = None;
133                for (index, glob) in globs.iter().enumerate() {
134                    if index > 0 {
135                        new_glob.push(',');
136                    }
137                    let glob = &*glob.await?;
138                    if let Some(old_opts) = opts {
139                        if old_opts != glob.opts {
140                            bail!(
141                                "Cannot compose globs with different options via the \
142                                 `alternatives` function."
143                            )
144                        }
145                    } else {
146                        opts = Some(glob.opts);
147                    }
148                    new_glob.push_str(&glob.glob);
149                }
150                new_glob.push('}');
151                // The loop must have iterated at least once, so the options must be initialized.
152                Ok(Glob::new(new_glob.into(), opts.unwrap()))
153            }
154        }
155    }
156}
157
158fn new_regex(pattern: &str) -> Regex {
159    RegexBuilder::new(pattern)
160        .dot_matches_new_line(true)
161        .build()
162        .expect("A successfully parsed glob should produce a valid regex")
163}
164
165#[cfg(test)]
166mod tests {
167    use rstest::*;
168
169    use super::Glob;
170    use crate::glob::GlobOptions;
171
172    #[rstest]
173    #[case::file("file.js", "file.js")]
174    #[case::dir_and_file("../public/äöüščří.png", "../public/äöüščří.png")]
175    #[case::dir_and_file("dir/file.js", "dir/file.js")]
176    #[case::file_braces("file.{ts,js}", "file.js")]
177    #[case::dir_and_file_braces("dir/file.{ts,js}", "dir/file.js")]
178    #[case::dir_and_file_dir_braces("{dir,other}/file.{ts,js}", "dir/file.js")]
179    #[case::star("*.js", "file.js")]
180    #[case::dir_star("dir/*.js", "dir/file.js")]
181    #[case::globstar("**/*.js", "file.js")]
182    #[case::globstar("**/*.js", "dir/file.js")]
183    #[case::globstar("**/*.js", "dir/sub/file.js")]
184    #[case::globstar("**/**/*.js", "file.js")]
185    #[case::globstar("**/**/*.js", "dir/sub/file.js")]
186    #[case::globstar("**", "/foo")]
187    #[case::globstar("**", "foo")]
188    #[case::star("*", "foo")]
189    #[case::globstar_in_dir("dir/**/sub/file.js", "dir/sub/file.js")]
190    #[case::globstar_in_dir("dir/**/sub/file.js", "dir/a/sub/file.js")]
191    #[case::globstar_in_dir("dir/**/sub/file.js", "dir/a/b/sub/file.js")]
192    #[case::globstar_in_dir(
193        "**/next/dist/**/*.shared-runtime.js",
194        "next/dist/shared/lib/app-router-context.shared-runtime.js"
195    )]
196    #[case::star_dir(
197        "**/*/next/dist/server/next.js",
198        "node_modules/next/dist/server/next.js"
199    )]
200    #[case::node_modules_root("**/node_modules/**", "node_modules/next/dist/server/next.js")]
201    #[case::node_modules_root_package(
202        "**/node_modules/next/**",
203        "node_modules/next/dist/server/next.js"
204    )]
205    #[case::node_modules_nested(
206        "**/node_modules/**",
207        "apps/some-app/node_modules/regenerate-unicode-properties/Script_Extensions/Osage.js"
208    )]
209    #[case::node_modules_nested_package(
210        "**/node_modules/regenerate-unicode-properties/**",
211        "apps/some-app/node_modules/regenerate-unicode-properties/Script_Extensions/Osage.js"
212    )]
213    #[case::node_modules_pnpm(
214        "**/node_modules/**",
215        "node_modules/.pnpm/regenerate-unicode-properties@9.0.0/node_modules/\
216         regenerate-unicode-properties/Script_Extensions/Osage.js"
217    )]
218    #[case::node_modules_pnpm_package(
219        "**/node_modules/{regenerate,regenerate-unicode-properties}/**",
220        "node_modules/.pnpm/regenerate-unicode-properties@9.0.0/node_modules/\
221         regenerate-unicode-properties/Script_Extensions/Osage.js"
222    )]
223    #[case::node_modules_pnpm_prefixed_package(
224        "**/node_modules/{@blockfrost/blockfrost-js,@highlight-run/node,@libsql/client,@jpg-store/\
225         lucid-cardano,@mikro-orm/core,@mikro-orm/knex,@prisma/client,@sentry/nextjs,@sentry/node,\
226         @swc/core,argon2,autoprefixer,bcrypt,better-sqlite3,canvas,cpu-features,cypress,eslint,\
227         express,next-seo,node-pty,payload,pg,playwright,postcss,prettier,prisma,puppeteer,rimraf,\
228         sharp,shiki,sqlite3,tailwindcss,ts-node,typescript,vscode-oniguruma,webpack,websocket,@\
229         aws-sdk/client-dynamodb,@aws-sdk/lib-dynamodb}/**",
230        "node_modules/.pnpm/@aws-sdk+lib-dynamodb@3.445.0_@aws-sdk+client-dynamodb@3.445.0/\
231         node_modules/@aws-sdk/lib-dynamodb/dist-es/index.js"
232    )]
233    #[case::alternatives_nested1("{a,b/c,d/e/{f,g/h}}", "a")]
234    #[case::alternatives_nested2("{a,b/c,d/e/{f,g/h}}", "b/c")]
235    #[case::alternatives_nested3("{a,b/c,d/e/{f,g/h}}", "d/e/f")]
236    #[case::alternatives_nested4("{a,b/c,d/e/{f,g/h}}", "d/e/g/h")]
237    #[case::alternatives_empty1("react{,-dom}", "react")]
238    #[case::alternatives_empty2("react{,-dom}", "react-dom")]
239    #[case::alternatives_chars("[abc]", "b")]
240    fn glob_match(#[case] glob: &str, #[case] path: &str) {
241        let glob = Glob::parse(glob, GlobOptions::default()).unwrap();
242
243        println!("{glob:?} {path}");
244
245        assert!(glob.matches(path));
246    }
247
248    #[rstest]
249    #[case::early_end("*.raw", "hello.raw.js")]
250    #[case::early_end(
251        "**/next/dist/esm/*.shared-runtime.js",
252        "next/dist/shared/lib/app-router-context.shared-runtime.js"
253    )]
254    #[case::star("*", "/foo")]
255    fn glob_not_matching(#[case] glob: &str, #[case] path: &str) {
256        let glob = Glob::parse(glob, GlobOptions::default()).unwrap();
257
258        println!("{glob:?} {path}");
259
260        assert!(!glob.matches(path));
261    }
262
263    #[rstest]
264    #[case::dir_and_file_partial("dir/file.js", "dir")]
265    #[case::dir_star_partial("dir/*.js", "dir")]
266    #[case::globstar_partial("**/**/*.js", "dir")]
267    #[case::globstar_partial("**/**/*.js", "dir/sub")]
268    #[case::globstar_partial("**/**/*.js", "dir/sub/file.js")] // This demonstrates some ambiguity in naming. `file.js` might be a directory name.
269    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir")]
270    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a")]
271    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a/b")]
272    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a/b/sub")]
273    #[case::globstar_in_dir_partial("dir/**/sub/file.js", "dir/a/b/sub/file.js")]
274    fn glob_can_match_directory(#[case] glob: &str, #[case] path: &str) {
275        let glob = Glob::parse(glob, GlobOptions::default()).unwrap();
276
277        println!("{glob:?} {path}");
278
279        assert!(glob.can_match_in_directory(path));
280    }
281    #[rstest]
282    #[case::dir_and_file_partial("dir/file.js", "dir/file.js")] // even if there was a dir, named `file.js` we know the glob wasn't intended to match it.
283    #[case::alternatives_chars("[abc]", "b")]
284    fn glob_not_can_match_directory(#[case] glob: &str, #[case] path: &str) {
285        let glob = Glob::parse(glob, GlobOptions::default()).unwrap();
286
287        println!("{glob:?} {path}");
288
289        assert!(!glob.can_match_in_directory(path));
290    }
291
292    #[rstest]
293    #[case::star("*", "/foo")]
294    #[case::star("*", "foo")]
295    #[case::star("*", "foo/bar")]
296    #[case::prefix("foo/*", "bar/foo/baz")]
297    // This is a possibly surprising case.
298    #[case::dir_match("node_modules/foo", "my_node_modules/foobar")]
299    fn partial_glob_match(#[case] glob: &str, #[case] path: &str) {
300        let glob = Glob::parse(glob, GlobOptions { contains: true }).unwrap();
301
302        println!("{glob:?} {path}");
303
304        assert!(glob.matches(path));
305    }
306
307    #[rstest]
308    #[case::literal("foo", "bar")]
309    #[case::suffix("*.js", "foo.ts")]
310    #[case::prefix("foo/*", "bar")]
311    // This is a possibly surprising case
312    #[case::dir_match("/node_modules/", "node_modules/")]
313    fn partial_glob_not_matching(#[case] glob: &str, #[case] path: &str) {
314        let glob = Glob::parse(glob, GlobOptions { contains: true }).unwrap();
315
316        println!("{glob:?} {path}");
317
318        assert!(!glob.matches(path));
319    }
320}