turbo_tasks_fs/
read_glob.rs

1use anyhow::{Result, bail};
2use futures::try_join;
3use rustc_hash::FxHashMap;
4use turbo_rcstr::RcStr;
5use turbo_tasks::{Completion, ResolvedVc, TryJoinIterExt, Vc};
6
7use crate::{
8    DirectoryContent, DirectoryEntry, FileSystem, FileSystemPath, LinkContent, LinkType, glob::Glob,
9};
10
11#[turbo_tasks::value]
12#[derive(Default, Debug)]
13pub struct ReadGlobResult {
14    pub results: FxHashMap<RcStr, DirectoryEntry>,
15    pub inner: FxHashMap<RcStr, ResolvedVc<ReadGlobResult>>,
16}
17
18/// Reads matches of a glob pattern. Symlinks are not resolved (and returned as-is)
19///
20/// DETERMINISM: Result is in random order. Either sort result or do not depend
21/// on the order.
22#[turbo_tasks::function(fs)]
23pub async fn read_glob(directory: FileSystemPath, glob: Vc<Glob>) -> Result<Vc<ReadGlobResult>> {
24    read_glob_internal("", directory, glob).await
25}
26
27#[turbo_tasks::function(fs)]
28async fn read_glob_inner(
29    prefix: RcStr,
30    directory: FileSystemPath,
31    glob: Vc<Glob>,
32) -> Result<Vc<ReadGlobResult>> {
33    read_glob_internal(&prefix, directory, glob).await
34}
35
36// The `prefix` represents the relative directory path where symlinks are not resolve.
37async fn read_glob_internal(
38    prefix: &str,
39    directory: FileSystemPath,
40    glob: Vc<Glob>,
41) -> Result<Vc<ReadGlobResult>> {
42    let dir = directory.read_dir().await?;
43    let mut result = ReadGlobResult::default();
44    let glob_value = glob.await?;
45    let handle_file = |result: &mut ReadGlobResult,
46                       entry_path: &RcStr,
47                       segment: &RcStr,
48                       entry: &DirectoryEntry| {
49        if glob_value.matches(entry_path) {
50            result.results.insert(segment.clone(), entry.clone());
51        }
52    };
53    let handle_dir = async |result: &mut ReadGlobResult,
54                            entry_path: RcStr,
55                            segment: &RcStr,
56                            path: &FileSystemPath| {
57        if glob_value.can_match_in_directory(&entry_path) {
58            result.inner.insert(
59                segment.clone(),
60                read_glob_inner(entry_path, path.clone(), glob)
61                    .to_resolved()
62                    .await?,
63            );
64        }
65        anyhow::Ok(())
66    };
67
68    match &*dir {
69        DirectoryContent::Entries(entries) => {
70            for (segment, entry) in entries.iter() {
71                let entry_path: RcStr = if prefix.is_empty() {
72                    segment.clone()
73                } else {
74                    format!("{prefix}/{segment}").into()
75                };
76
77                match entry {
78                    DirectoryEntry::File(_) => {
79                        handle_file(&mut result, &entry_path, segment, entry);
80                    }
81                    DirectoryEntry::Directory(path) => {
82                        // Add the directory to `results` if it is a whole match of the glob
83                        handle_file(&mut result, &entry_path, segment, entry);
84                        // Recursively handle the directory
85                        handle_dir(&mut result, entry_path, segment, path).await?;
86                    }
87                    DirectoryEntry::Symlink(path) => {
88                        if let LinkContent::Link { link_type, .. } = &*path.read_link().await? {
89                            if link_type.contains(LinkType::DIRECTORY) {
90                                // Ensure that there are no infinite link loops, but don't resolve
91                                resolve_symlink_safely(entry.clone()).await?;
92
93                                // Add the directory to `results` if it is a whole match of the glob
94                                handle_file(&mut result, &entry_path, segment, entry);
95                                // Recursively handle the directory
96                                handle_dir(&mut result, entry_path, segment, path).await?;
97                            } else {
98                                handle_file(&mut result, &entry_path, segment, entry);
99                            }
100                        }
101                    }
102                    DirectoryEntry::Other(_) | DirectoryEntry::Error => continue,
103                }
104            }
105        }
106        DirectoryContent::NotFound => {}
107    }
108    Ok(ReadGlobResult::cell(result))
109}
110
111/// Resolve a symlink checking for recursion.
112async fn resolve_symlink_safely(entry: DirectoryEntry) -> Result<DirectoryEntry> {
113    let resolved_entry = entry.clone().resolve_symlink().await?;
114    if resolved_entry != entry && matches!(&resolved_entry, DirectoryEntry::Directory(_)) {
115        // We followed a symlink to a directory
116        // To prevent an infinite loop, which in the case of turbo-tasks would simply
117        // exhaust RAM or go into an infinite loop with the GC we need to check for a
118        // recursive symlink, we need to check for recursion.
119
120        // Recursion can only occur if the symlink is a directory and points to an
121        // ancestor of the current path, which can be detected via a simple prefix
122        // match.
123        let source_path = entry.path().unwrap();
124        if source_path.is_inside_or_equal(&resolved_entry.clone().path().unwrap()) {
125            bail!(
126                "'{}' is a symlink causes that causes an infinite loop!",
127                source_path.path.to_string()
128            )
129        }
130    }
131    Ok(resolved_entry)
132}
133
134/// Traverses all directories that match the given `glob`.
135///
136/// This ensures that the calling task will be invalidated
137/// whenever the directories or contents of the directories change,
138///  but unlike read_glob doesn't accumulate data.
139#[turbo_tasks::function(fs)]
140pub async fn track_glob(
141    directory: FileSystemPath,
142    glob: Vc<Glob>,
143    include_dot_files: bool,
144) -> Result<Vc<Completion>> {
145    track_glob_internal("", directory, glob, include_dot_files).await
146}
147
148#[turbo_tasks::function(fs)]
149async fn track_glob_inner(
150    prefix: RcStr,
151    directory: FileSystemPath,
152    glob: Vc<Glob>,
153    include_dot_files: bool,
154) -> Result<Vc<Completion>> {
155    track_glob_internal(&prefix, directory, glob, include_dot_files).await
156}
157
158async fn track_glob_internal(
159    prefix: &str,
160    directory: FileSystemPath,
161    glob: Vc<Glob>,
162    include_dot_files: bool,
163) -> Result<Vc<Completion>> {
164    let dir = directory.read_dir().await?;
165    let glob_value = glob.await?;
166    let fs = directory.fs().to_resolved().await?;
167    let mut reads = Vec::new();
168    let mut completions = Vec::new();
169    let mut types = Vec::new();
170    match &*dir {
171        DirectoryContent::Entries(entries) => {
172            for (segment, entry) in entries.iter() {
173                if !include_dot_files && segment.starts_with('.') {
174                    continue;
175                }
176                // This is redundant with logic inside of `read_dir` but here we track it separately
177                // so we don't follow symlinks.
178                let entry_path = if prefix.is_empty() {
179                    segment.clone()
180                } else {
181                    format!("{prefix}/{segment}").into()
182                };
183
184                match resolve_symlink_safely(entry.clone()).await? {
185                    DirectoryEntry::Directory(path) => {
186                        if glob_value.can_match_in_directory(&entry_path) {
187                            completions.push(track_glob_inner(
188                                entry_path,
189                                path.clone(),
190                                glob,
191                                include_dot_files,
192                            ));
193                        }
194                    }
195                    DirectoryEntry::File(path) => {
196                        if glob_value.matches(&entry_path) {
197                            reads.push(fs.read(path.clone()))
198                        }
199                    }
200                    DirectoryEntry::Symlink(symlink_path) => unreachable!(
201                        "resolve_symlink_safely() should have resolved all symlinks, but found \
202                         unresolved symlink at path: '{}'. Found path: '{}'. Please report this \
203                         as a bug.",
204                        entry_path, symlink_path
205                    ),
206                    DirectoryEntry::Other(path) => {
207                        if glob_value.matches(&entry_path) {
208                            types.push(path.get_type())
209                        }
210                    }
211                    DirectoryEntry::Error => {}
212                }
213            }
214        }
215        DirectoryContent::NotFound => {}
216    }
217    try_join!(
218        reads.iter().try_join(),
219        types.iter().try_join(),
220        completions.iter().try_join()
221    )?;
222    Ok(Completion::new())
223}
224
225#[cfg(test)]
226pub mod tests {
227
228    use std::{
229        collections::HashMap,
230        fs::{File, create_dir},
231        io::prelude::*,
232        os::unix::fs::symlink,
233    };
234
235    use turbo_rcstr::{RcStr, rcstr};
236    use turbo_tasks::{Completion, ReadRef, Vc, apply_effects};
237    use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage};
238
239    use crate::{
240        DirectoryEntry, DiskFileSystem, FileContent, FileSystem, FileSystemPath,
241        glob::{Glob, GlobOptions},
242    };
243
244    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
245    async fn read_glob_basic() {
246        crate::register();
247        let scratch = tempfile::tempdir().unwrap();
248        {
249            // Create a simple directory with 2 files, a subdirectory and a dotfile
250            let path = scratch.path();
251            File::create_new(path.join("foo"))
252                .unwrap()
253                .write_all(b"foo")
254                .unwrap();
255            create_dir(path.join("sub")).unwrap();
256            File::create_new(path.join("sub/bar"))
257                .unwrap()
258                .write_all(b"bar")
259                .unwrap();
260        }
261        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
262            BackendOptions::default(),
263            noop_backing_storage(),
264        ));
265        let path: RcStr = scratch.path().to_str().unwrap().into();
266        tt.run_once(async {
267            let fs = DiskFileSystem::new(rcstr!("temp"), path);
268            let root = fs.root().await?;
269            let read_dir = root
270                .read_glob(Glob::new(rcstr!("**"), GlobOptions::default()))
271                .await
272                .unwrap();
273            assert_eq!(read_dir.results.len(), 2);
274            assert_eq!(
275                read_dir.results.get("foo"),
276                Some(&DirectoryEntry::File(fs.root().await?.join("foo")?))
277            );
278            assert_eq!(
279                read_dir.results.get("sub"),
280                Some(&DirectoryEntry::Directory(fs.root().await?.join("sub")?))
281            );
282            assert_eq!(read_dir.inner.len(), 1);
283            let inner = &*read_dir.inner.get("sub").unwrap().await?;
284            assert_eq!(inner.results.len(), 1);
285            assert_eq!(
286                inner.results.get("bar"),
287                Some(&DirectoryEntry::File(fs.root().await?.join("sub/bar")?))
288            );
289            assert_eq!(inner.inner.len(), 0);
290
291            // Now with a more specific pattern
292            let read_dir = root
293                .read_glob(Glob::new(rcstr!("**/bar"), GlobOptions::default()))
294                .await
295                .unwrap();
296            assert_eq!(read_dir.results.len(), 0);
297            assert_eq!(read_dir.inner.len(), 1);
298            let inner = &*read_dir.inner.get("sub").unwrap().await?;
299            assert_eq!(inner.results.len(), 1);
300            assert_eq!(
301                inner.results.get("bar"),
302                Some(&DirectoryEntry::File(fs.root().await?.join("sub/bar")?))
303            );
304
305            assert_eq!(inner.inner.len(), 0);
306
307            anyhow::Ok(())
308        })
309        .await
310        .unwrap();
311    }
312
313    #[cfg(unix)]
314    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
315    async fn read_glob_symlinks() {
316        crate::register();
317        let scratch = tempfile::tempdir().unwrap();
318        {
319            // root.js
320            // sub/foo.js
321            // sub/link-foo.js -> ./foo.js
322            // sub/link-root.js -> ../root.js
323            let path = scratch.path();
324            create_dir(path.join("sub")).unwrap();
325            let foo = path.join("sub/foo.js");
326            File::create_new(&foo).unwrap().write_all(b"foo").unwrap();
327            symlink(&foo, path.join("sub/link-foo.js")).unwrap();
328
329            let root = path.join("root.js");
330            File::create_new(&root).unwrap().write_all(b"root").unwrap();
331            symlink(&root, path.join("sub/link-root.js")).unwrap();
332
333            let dir = path.join("dir");
334            create_dir(&dir).unwrap();
335            File::create_new(dir.join("index.js"))
336                .unwrap()
337                .write_all(b"dir index")
338                .unwrap();
339            symlink(&dir, path.join("sub/dir")).unwrap();
340        }
341        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
342            BackendOptions::default(),
343            noop_backing_storage(),
344        ));
345        let path: RcStr = scratch.path().to_str().unwrap().into();
346        tt.run_once(async {
347            let fs = DiskFileSystem::new(rcstr!("temp"), path);
348            let root = fs.root().await?;
349            // Symlinked files
350            let read_dir = root
351                .read_glob(Glob::new(rcstr!("sub/*.js"), GlobOptions::default()))
352                .await
353                .unwrap();
354            assert_eq!(read_dir.results.len(), 0);
355            let inner = &*read_dir.inner.get("sub").unwrap().await?;
356            assert_eq!(
357                inner.results,
358                HashMap::from_iter([
359                    (
360                        "link-foo.js".into(),
361                        DirectoryEntry::Symlink(root.join("sub/link-foo.js")?),
362                    ),
363                    (
364                        "link-root.js".into(),
365                        DirectoryEntry::Symlink(root.join("sub/link-root.js")?),
366                    ),
367                    (
368                        "foo.js".into(),
369                        DirectoryEntry::File(root.join("sub/foo.js")?),
370                    ),
371                ])
372            );
373            assert_eq!(inner.inner.len(), 0);
374
375            // A symlinked folder
376            let read_dir = root
377                .read_glob(Glob::new(rcstr!("sub/dir/*"), GlobOptions::default()))
378                .await
379                .unwrap();
380            assert_eq!(read_dir.results.len(), 0);
381            let inner_sub = &*read_dir.inner.get("sub").unwrap().await?;
382            assert_eq!(inner_sub.results.len(), 0);
383            let inner_sub_dir = &*inner_sub.inner.get("dir").unwrap().await?;
384            assert_eq!(
385                inner_sub_dir.results,
386                HashMap::from_iter([(
387                    "index.js".into(),
388                    DirectoryEntry::File(root.join("sub/dir/index.js")?),
389                )])
390            );
391            assert_eq!(inner_sub_dir.inner.len(), 0);
392
393            anyhow::Ok(())
394        })
395        .await
396        .unwrap();
397    }
398
399    #[turbo_tasks::function(operation)]
400    pub async fn delete(path: FileSystemPath) -> anyhow::Result<()> {
401        path.write(FileContent::NotFound.cell()).await?;
402        Ok(())
403    }
404
405    #[turbo_tasks::function(operation)]
406    pub async fn write(path: FileSystemPath, contents: RcStr) -> anyhow::Result<()> {
407        path.write(
408            FileContent::Content(crate::File::from_bytes(contents.to_string().into_bytes())).cell(),
409        )
410        .await?;
411        Ok(())
412    }
413
414    #[turbo_tasks::function(operation)]
415    pub fn track_star_star_glob(path: FileSystemPath) -> Vc<Completion> {
416        path.track_glob(Glob::new(rcstr!("**"), GlobOptions::default()), false)
417    }
418
419    #[cfg(unix)]
420    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
421    async fn track_glob_invalidations() {
422        use std::os::unix::fs::symlink;
423        crate::register();
424        let scratch = tempfile::tempdir().unwrap();
425
426        // Create a simple directory with 2 files, a subdirectory and a dotfile
427        let path = scratch.path();
428        let dir = path.join("dir");
429        create_dir(&dir).unwrap();
430        File::create_new(dir.join("foo"))
431            .unwrap()
432            .write_all(b"foo")
433            .unwrap();
434        create_dir(dir.join("sub")).unwrap();
435        File::create_new(dir.join("sub/bar"))
436            .unwrap()
437            .write_all(b"bar")
438            .unwrap();
439        // Add a dotfile
440        create_dir(dir.join("sub/.vim")).unwrap();
441        let gitignore = dir.join("sub/.vim/.gitignore");
442        File::create_new(&gitignore)
443            .unwrap()
444            .write_all(b"ignore")
445            .unwrap();
446        // put a link in the dir that points at a file in the root.
447        let link_target = path.join("link_target.js");
448        File::create_new(&link_target)
449            .unwrap()
450            .write_all(b"link_target")
451            .unwrap();
452        symlink(&link_target, dir.join("link.js")).unwrap();
453
454        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
455            BackendOptions::default(),
456            noop_backing_storage(),
457        ));
458        let path: RcStr = scratch.path().to_str().unwrap().into();
459        tt.run_once(async {
460            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(rcstr!("temp"), path));
461            let dir = fs.root().await?.join("dir")?;
462            let read_dir = track_star_star_glob(dir.clone())
463                .read_strongly_consistent()
464                .await?;
465
466            // Delete a file that we shouldn't be tracking
467            let delete_result = delete(fs.root().await?.join("dir/sub/.vim/.gitignore")?);
468            delete_result.read_strongly_consistent().await?;
469            apply_effects(delete_result).await?;
470
471            let read_dir2 = track_star_star_glob(dir.clone())
472                .read_strongly_consistent()
473                .await?;
474            assert!(ReadRef::ptr_eq(&read_dir, &read_dir2));
475
476            // Delete a file that we should be tracking
477            let delete_result = delete(fs.root().await?.join("dir/foo")?);
478            delete_result.read_strongly_consistent().await?;
479            apply_effects(delete_result).await?;
480
481            let read_dir2 = track_star_star_glob(dir.clone())
482                .read_strongly_consistent()
483                .await?;
484
485            assert!(!ReadRef::ptr_eq(&read_dir, &read_dir2));
486
487            // Modify a symlink target file
488            let write_result = write(
489                fs.root().await?.join("link_target.js")?,
490                rcstr!("new_contents"),
491            );
492            write_result.read_strongly_consistent().await?;
493            apply_effects(write_result).await?;
494            let read_dir3 = track_star_star_glob(dir.clone())
495                .read_strongly_consistent()
496                .await?;
497
498            assert!(!ReadRef::ptr_eq(&read_dir3, &read_dir2));
499
500            anyhow::Ok(())
501        })
502        .await
503        .unwrap();
504    }
505
506    #[cfg(unix)]
507    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
508    async fn track_glob_symlinks_loop() {
509        crate::register();
510        let scratch = tempfile::tempdir().unwrap();
511        {
512            use std::os::unix::fs::symlink;
513
514            // Create a simple directory with 1 file and a symlink pointing at at a file in a
515            // subdirectory
516            let path = scratch.path();
517            let sub = &path.join("sub");
518            create_dir(sub).unwrap();
519            let foo = sub.join("foo.js");
520            File::create_new(&foo).unwrap().write_all(b"foo").unwrap();
521            // put a link in sub that points back at its parent director
522            symlink(sub, sub.join("link")).unwrap();
523        }
524        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
525            BackendOptions::default(),
526            noop_backing_storage(),
527        ));
528        let path: RcStr = scratch.path().to_str().unwrap().into();
529        tt.run_once(async {
530            use turbo_rcstr::rcstr;
531
532            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(rcstr!("temp"), path));
533            let err = fs
534                .root()
535                .await?
536                .track_glob(Glob::new(rcstr!("**"), GlobOptions::default()), false)
537                .await
538                .expect_err("Should have detected an infinite loop");
539
540            assert_eq!(
541                "'sub/link' is a symlink causes that causes an infinite loop!",
542                format!("{}", err.root_cause())
543            );
544
545            // Same when calling track glob
546            let err = fs
547                .root()
548                .await?
549                .track_glob(Glob::new(rcstr!("**"), GlobOptions::default()), false)
550                .await
551                .expect_err("Should have detected an infinite loop");
552
553            assert_eq!(
554                "'sub/link' is a symlink causes that causes an infinite loop!",
555                format!("{}", err.root_cause())
556            );
557
558            anyhow::Ok(())
559        })
560        .await
561        .unwrap();
562    }
563
564    #[cfg(unix)]
565    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
566    async fn read_glob_symlinks_loop() {
567        crate::register();
568        let scratch = tempfile::tempdir().unwrap();
569        {
570            use std::os::unix::fs::symlink;
571
572            // Create a simple directory with 1 file and a symlink pointing at at a file in a
573            // subdirectory
574            let path = scratch.path();
575            let sub = &path.join("sub");
576            create_dir(sub).unwrap();
577            let foo = sub.join("foo.js");
578            File::create_new(&foo).unwrap().write_all(b"foo").unwrap();
579            // put a link in sub that points back at its parent director
580            symlink(sub, sub.join("link")).unwrap();
581        }
582        let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new(
583            BackendOptions::default(),
584            noop_backing_storage(),
585        ));
586        let path: RcStr = scratch.path().to_str().unwrap().into();
587        tt.run_once(async {
588            let fs = Vc::upcast::<Box<dyn FileSystem>>(DiskFileSystem::new(rcstr!("temp"), path));
589            let err = fs
590                .root()
591                .await?
592                .read_glob(Glob::new(rcstr!("**"), GlobOptions::default()))
593                .await
594                .expect_err("Should have detected an infinite loop");
595
596            assert_eq!(
597                "'sub/link' is a symlink causes that causes an infinite loop!",
598                format!("{}", err.root_cause())
599            );
600
601            // Same when calling track glob
602            let err = fs
603                .root()
604                .await?
605                .track_glob(Glob::new(rcstr!("**"), GlobOptions::default()), false)
606                .await
607                .expect_err("Should have detected an infinite loop");
608
609            assert_eq!(
610                "'sub/link' is a symlink causes that causes an infinite loop!",
611                format!("{}", err.root_cause())
612            );
613
614            anyhow::Ok(())
615        })
616        .await
617        .unwrap();
618    }
619}