aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kotur <qtr@google.com>2020-12-21 17:28:16 +0100
committerJakub Kotur <qtr@google.com>2021-03-05 16:52:58 +0100
commitf453f93fa719a0ab7609d4dd85ae1310680a7750 (patch)
tree426594cde11fa04273e12c2d0f61446f477d0c8e
parentf393400a7721f7df03b86c8466b80201bef7d062 (diff)
downloadwalkdir-f453f93fa719a0ab7609d4dd85ae1310680a7750.tar.gz
Initial import of walkdir-2.3.1.
Bug: 155309706 Change-Id: I70e0eb3b8ce7d8c6cbc3538c1893b5ce40c55ab3
-rw-r--r--.cargo_vcs_info.json5
-rw-r--r--.github/workflows/ci.yml88
-rw-r--r--.gitignore10
-rw-r--r--Cargo.toml41
-rw-r--r--Cargo.toml.orig34
-rw-r--r--README.md139
-rw-r--r--compare/nftw.c25
-rw-r--r--compare/walk.py10
-rw-r--r--rustfmt.toml2
-rw-r--r--src/dent.rs375
-rw-r--r--src/error.rs265
-rw-r--r--src/lib.rs1125
-rw-r--r--src/tests/mod.rs4
-rw-r--r--src/tests/recursive.rs980
-rw-r--r--src/tests/util.rs252
-rw-r--r--src/util.rs25
16 files changed, 3380 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..95abad6
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+ "git": {
+ "sha1": "00df609016d7e75b3730fcf0cef25fc0efe14204"
+ }
+}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..b284608
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,88 @@
+name: ci
+on:
+ pull_request:
+ push:
+ branches:
+ - master
+ schedule:
+ - cron: '00 01 * * *'
+jobs:
+ test:
+ name: test
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ build:
+ - pinned
+ - pinned-win
+ - stable
+ - beta
+ - nightly
+ - macos
+ - win-msvc
+ - win-gnu
+ include:
+ - build: pinned
+ os: ubuntu-18.04
+ rust: 1.34.0
+ - build: pinned-win
+ os: windows-2019
+ rust: 1.34.0
+ - build: stable
+ os: ubuntu-18.04
+ rust: stable
+ - build: beta
+ os: ubuntu-18.04
+ rust: beta
+ - build: nightly
+ os: ubuntu-18.04
+ rust: nightly
+ - build: macos
+ os: macos-latest
+ rust: stable
+ - build: win-msvc
+ os: windows-2019
+ rust: stable
+ - build: win-gnu
+ os: windows-2019
+ rust: stable-x86_64-gnu
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v1
+ with:
+ fetch-depth: 1
+ - name: Install Rust
+ uses: actions-rs/toolchain@v1
+ with:
+ toolchain: ${{ matrix.rust }}
+ profile: minimal
+ - run: cargo build --verbose
+ - run: cargo doc --verbose
+ - if: startsWith(matrix.build, 'pinned-') == false
+ run: cargo test --verbose
+ - if: matrix.build == 'nightly'
+ run: |
+ set -x
+ cargo generate-lockfile -Z minimal-versions
+ cargo build --verbose
+ cargo test --verbose
+
+ rustfmt:
+ name: rustfmt
+ runs-on: ubuntu-18.04
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v1
+ with:
+ fetch-depth: 1
+ - name: Install Rust
+ uses: actions-rs/toolchain@v1
+ with:
+ toolchain: stable
+ profile: minimal
+ components: rustfmt
+ - name: Install rustfmt
+ run: rustup component add rustfmt
+ - name: Check formatting
+ run: |
+ cargo fmt --all -- --check
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d63756d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+.*.swp
+doc
+tags
+examples/ss10pusa.csv
+build
+target
+Cargo.lock
+scratch*
+bench_large/huge
+tmp
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..313d6f4
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,41 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+edition = "2018"
+name = "walkdir"
+version = "2.3.1"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+exclude = ["/ci/*", "/.travis.yml", "/appveyor.yml"]
+description = "Recursively walk a directory."
+homepage = "https://github.com/BurntSushi/walkdir"
+documentation = "https://docs.rs/walkdir/"
+readme = "README.md"
+keywords = ["directory", "recursive", "walk", "iterator"]
+categories = ["filesystem"]
+license = "Unlicense/MIT"
+repository = "https://github.com/BurntSushi/walkdir"
+[dependencies.same-file]
+version = "1.0.1"
+[dev-dependencies.doc-comment]
+version = "0.3"
+[target."cfg(windows)".dependencies.winapi]
+version = "0.3"
+features = ["std", "winnt"]
+
+[target."cfg(windows)".dependencies.winapi-util]
+version = "0.1.1"
+[badges.appveyor]
+repository = "BurntSushi/walkdir"
+
+[badges.travis-ci]
+repository = "BurntSushi/walkdir"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..94fc562
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,34 @@
+[package]
+name = "walkdir"
+version = "2.3.1" #:version
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = "Recursively walk a directory."
+documentation = "https://docs.rs/walkdir/"
+homepage = "https://github.com/BurntSushi/walkdir"
+repository = "https://github.com/BurntSushi/walkdir"
+readme = "README.md"
+keywords = ["directory", "recursive", "walk", "iterator"]
+categories = ["filesystem"]
+license = "Unlicense/MIT"
+exclude = ["/ci/*", "/.travis.yml", "/appveyor.yml"]
+edition = "2018"
+
+[badges]
+travis-ci = { repository = "BurntSushi/walkdir" }
+appveyor = { repository = "BurntSushi/walkdir" }
+
+[workspace]
+members = ["walkdir-list"]
+
+[dependencies]
+same-file = "1.0.1"
+
+[target.'cfg(windows)'.dependencies.winapi]
+version = "0.3"
+features = ["std", "winnt"]
+
+[target.'cfg(windows)'.dependencies.winapi-util]
+version = "0.1.1"
+
+[dev-dependencies]
+doc-comment = "0.3"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2b49506
--- /dev/null
+++ b/README.md
@@ -0,0 +1,139 @@
+walkdir
+=======
+A cross platform Rust library for efficiently walking a directory recursively.
+Comes with support for following symbolic links, controlling the number of
+open file descriptors and efficient mechanisms for pruning the entries in the
+directory tree.
+
+[![Build status](https://github.com/BurntSushi/walkdir/workflows/ci/badge.svg)](https://github.com/BurntSushi/walkdir/actions)
+[![](http://meritbadge.herokuapp.com/walkdir)](https://crates.io/crates/walkdir)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+### Documentation
+
+[docs.rs/walkdir](https://docs.rs/walkdir/)
+
+### Usage
+
+To use this crate, add `walkdir` as a dependency to your project's
+`Cargo.toml`:
+
+```toml
+[dependencies]
+walkdir = "2"
+```
+
+### Example
+
+The following code recursively iterates over the directory given and prints
+the path for each entry:
+
+```rust,no_run
+use walkdir::WalkDir;
+
+for entry in WalkDir::new("foo") {
+ let entry = entry.unwrap();
+ println!("{}", entry.path().display());
+}
+```
+
+Or, if you'd like to iterate over all entries and ignore any errors that may
+arise, use `filter_map`. (e.g., This code below will silently skip directories
+that the owner of the running process does not have permission to access.)
+
+```rust,no_run
+use walkdir::WalkDir;
+
+for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
+ println!("{}", entry.path().display());
+}
+```
+
+### Example: follow symbolic links
+
+The same code as above, except `follow_links` is enabled:
+
+```rust,no_run
+use walkdir::WalkDir;
+
+for entry in WalkDir::new("foo").follow_links(true) {
+ let entry = entry.unwrap();
+ println!("{}", entry.path().display());
+}
+```
+
+### Example: skip hidden files and directories efficiently on unix
+
+This uses the `filter_entry` iterator adapter to avoid yielding hidden files
+and directories efficiently:
+
+```rust,no_run
+use walkdir::{DirEntry, WalkDir};
+
+fn is_hidden(entry: &DirEntry) -> bool {
+ entry.file_name()
+ .to_str()
+ .map(|s| s.starts_with("."))
+ .unwrap_or(false)
+}
+
+let walker = WalkDir::new("foo").into_iter();
+for entry in walker.filter_entry(|e| !is_hidden(e)) {
+ let entry = entry.unwrap();
+ println!("{}", entry.path().display());
+}
+```
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version is `1.34.0`.
+
+The current policy is that the minimum Rust version required to use this crate
+can be increased in minor version updates. For example, if `crate 1.0` requires
+Rust 1.20.0, then `crate 1.0.z` for all values of `z` will also require Rust
+1.20.0 or newer. However, `crate 1.y` for `y > 0` may require a newer minimum
+version of Rust.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust.
+
+### Performance
+
+The short story is that performance is comparable with `find` and glibc's
+`nftw` on both a warm and cold file cache. In fact, I cannot observe any
+performance difference after running `find /`, `walkdir /` and `nftw /` on my
+local file system (SSD, ~3 million entries). More precisely, I am reasonably
+confident that this crate makes as few system calls and close to as few
+allocations as possible.
+
+I haven't recorded any benchmarks, but here are some things you can try with a
+local checkout of `walkdir`:
+
+```sh
+# The directory you want to recursively walk:
+DIR=$HOME
+
+# If you want to observe perf on a cold file cache, run this before *each*
+# command:
+sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
+
+# To warm the caches
+find $DIR
+
+# Test speed of `find` on warm cache:
+time find $DIR
+
+# Compile and test speed of `walkdir` crate:
+cargo build --release --example walkdir
+time ./target/release/examples/walkdir $DIR
+
+# Compile and test speed of glibc's `nftw`:
+gcc -O3 -o nftw ./compare/nftw.c
+time ./nftw $DIR
+
+# For shits and giggles, test speed of Python's (2 or 3) os.walk:
+time python ./compare/walk.py $DIR
+```
+
+On my system, the performance of `walkdir`, `find` and `nftw` is comparable.
diff --git a/compare/nftw.c b/compare/nftw.c
new file mode 100644
index 0000000..7d36e2f
--- /dev/null
+++ b/compare/nftw.c
@@ -0,0 +1,25 @@
+#define _XOPEN_SOURCE 500
+#include <ftw.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+static int
+display_info(const char *fpath, const struct stat *sb,
+ int tflag, struct FTW *ftwbuf)
+{
+ printf("%s\n", fpath);
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int flags = FTW_PHYS;
+ if (nftw((argc < 2) ? "." : argv[1], display_info, 20, flags) == -1) {
+ perror("nftw");
+ exit(EXIT_FAILURE);
+ }
+ exit(EXIT_SUCCESS);
+}
diff --git a/compare/walk.py b/compare/walk.py
new file mode 100644
index 0000000..303d323
--- /dev/null
+++ b/compare/walk.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import, division, print_function
+
+import os
+import sys
+
+for dirpath, dirnames, filenames in os.walk(sys.argv[1]):
+ for n in dirnames:
+ print(os.path.join(dirpath, n))
+ for n in filenames:
+ print(os.path.join(dirpath, n))
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..aa37a21
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,2 @@
+max_width = 79
+use_small_heuristics = "max"
diff --git a/src/dent.rs b/src/dent.rs
new file mode 100644
index 0000000..a28ed3d
--- /dev/null
+++ b/src/dent.rs
@@ -0,0 +1,375 @@
+use std::ffi::OsStr;
+use std::fmt;
+use std::fs::{self, FileType};
+use std::path::{Path, PathBuf};
+
+use crate::error::Error;
+use crate::Result;
+
+/// A directory entry.
+///
+/// This is the type of value that is yielded from the iterators defined in
+/// this crate.
+///
+/// On Unix systems, this type implements the [`DirEntryExt`] trait, which
+/// provides efficient access to the inode number of the directory entry.
+///
+/// # Differences with `std::fs::DirEntry`
+///
+/// This type mostly mirrors the type by the same name in [`std::fs`]. There
+/// are some differences however:
+///
+/// * All recursive directory iterators must inspect the entry's type.
+/// Therefore, the value is stored and its access is guaranteed to be cheap and
+/// successful.
+/// * [`path`] and [`file_name`] return borrowed variants.
+/// * If [`follow_links`] was enabled on the originating iterator, then all
+/// operations except for [`path`] operate on the link target. Otherwise, all
+/// operations operate on the symbolic link.
+///
+/// [`std::fs`]: https://doc.rust-lang.org/stable/std/fs/index.html
+/// [`path`]: #method.path
+/// [`file_name`]: #method.file_name
+/// [`follow_links`]: struct.WalkDir.html#method.follow_links
+/// [`DirEntryExt`]: trait.DirEntryExt.html
+pub struct DirEntry {
+ /// The path as reported by the [`fs::ReadDir`] iterator (even if it's a
+ /// symbolic link).
+ ///
+ /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
+ path: PathBuf,
+ /// The file type. Necessary for recursive iteration, so store it.
+ ty: FileType,
+ /// Is set when this entry was created from a symbolic link and the user
+ /// expects the iterator to follow symbolic links.
+ follow_link: bool,
+ /// The depth at which this entry was generated relative to the root.
+ depth: usize,
+ /// The underlying inode number (Unix only).
+ #[cfg(unix)]
+ ino: u64,
+ /// The underlying metadata (Windows only). We store this on Windows
+ /// because this comes for free while reading a directory.
+ ///
+ /// We use this to determine whether an entry is a directory or not, which
+ /// works around a bug in Rust's standard library:
+ /// https://github.com/rust-lang/rust/issues/46484
+ #[cfg(windows)]
+ metadata: fs::Metadata,
+}
+
+impl DirEntry {
+ /// The full path that this entry represents.
+ ///
+ /// The full path is created by joining the parents of this entry up to the
+ /// root initially given to [`WalkDir::new`] with the file name of this
+ /// entry.
+ ///
+ /// Note that this *always* returns the path reported by the underlying
+ /// directory entry, even when symbolic links are followed. To get the
+ /// target path, use [`path_is_symlink`] to (cheaply) check if this entry
+ /// corresponds to a symbolic link, and [`std::fs::read_link`] to resolve
+ /// the target.
+ ///
+ /// [`WalkDir::new`]: struct.WalkDir.html#method.new
+ /// [`path_is_symlink`]: struct.DirEntry.html#method.path_is_symlink
+ /// [`std::fs::read_link`]: https://doc.rust-lang.org/stable/std/fs/fn.read_link.html
+ pub fn path(&self) -> &Path {
+ &self.path
+ }
+
+ /// The full path that this entry represents.
+ ///
+ /// Analogous to [`path`], but moves ownership of the path.
+ ///
+ /// [`path`]: struct.DirEntry.html#method.path
+ pub fn into_path(self) -> PathBuf {
+ self.path
+ }
+
+ /// Returns `true` if and only if this entry was created from a symbolic
+ /// link. This is unaffected by the [`follow_links`] setting.
+ ///
+ /// When `true`, the value returned by the [`path`] method is a
+ /// symbolic link name. To get the full target path, you must call
+ /// [`std::fs::read_link(entry.path())`].
+ ///
+ /// [`path`]: struct.DirEntry.html#method.path
+ /// [`follow_links`]: struct.WalkDir.html#method.follow_links
+ /// [`std::fs::read_link(entry.path())`]: https://doc.rust-lang.org/stable/std/fs/fn.read_link.html
+ pub fn path_is_symlink(&self) -> bool {
+ self.ty.is_symlink() || self.follow_link
+ }
+
+ /// Return the metadata for the file that this entry points to.
+ ///
+ /// This will follow symbolic links if and only if the [`WalkDir`] value
+ /// has [`follow_links`] enabled.
+ ///
+ /// # Platform behavior
+ ///
+ /// This always calls [`std::fs::symlink_metadata`].
+ ///
+ /// If this entry is a symbolic link and [`follow_links`] is enabled, then
+ /// [`std::fs::metadata`] is called instead.
+ ///
+ /// # Errors
+ ///
+ /// Similar to [`std::fs::metadata`], returns errors for path values that
+ /// the program does not have permissions to access or if the path does not
+ /// exist.
+ ///
+ /// [`WalkDir`]: struct.WalkDir.html
+ /// [`follow_links`]: struct.WalkDir.html#method.follow_links
+ /// [`std::fs::metadata`]: https://doc.rust-lang.org/std/fs/fn.metadata.html
+ /// [`std::fs::symlink_metadata`]: https://doc.rust-lang.org/stable/std/fs/fn.symlink_metadata.html
+ pub fn metadata(&self) -> Result<fs::Metadata> {
+ self.metadata_internal()
+ }
+
+ #[cfg(windows)]
+ fn metadata_internal(&self) -> Result<fs::Metadata> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ Ok(self.metadata.clone())
+ }
+ .map_err(|err| Error::from_entry(self, err))
+ }
+
+ #[cfg(not(windows))]
+ fn metadata_internal(&self) -> Result<fs::Metadata> {
+ if self.follow_link {
+ fs::metadata(&self.path)
+ } else {
+ fs::symlink_metadata(&self.path)
+ }
+ .map_err(|err| Error::from_entry(self, err))
+ }
+
+ /// Return the file type for the file that this entry points to.
+ ///
+ /// If this is a symbolic link and [`follow_links`] is `true`, then this
+ /// returns the type of the target.
+ ///
+ /// This never makes any system calls.
+ ///
+ /// [`follow_links`]: struct.WalkDir.html#method.follow_links
+ pub fn file_type(&self) -> fs::FileType {
+ self.ty
+ }
+
+ /// Return the file name of this entry.
+ ///
+ /// If this entry has no file name (e.g., `/`), then the full path is
+ /// returned.
+ pub fn file_name(&self) -> &OsStr {
+ self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
+ }
+
+ /// Returns the depth at which this entry was created relative to the root.
+ ///
+ /// The smallest depth is `0` and always corresponds to the path given
+ /// to the `new` function on `WalkDir`. Its direct descendents have depth
+ /// `1`, and their descendents have depth `2`, and so on.
+ pub fn depth(&self) -> usize {
+ self.depth
+ }
+
+ /// Returns true if and only if this entry points to a directory.
+ ///
+ /// This works around a bug in Rust's standard library:
+ /// https://github.com/rust-lang/rust/issues/46484
+ #[cfg(windows)]
+ pub(crate) fn is_dir(&self) -> bool {
+ use std::os::windows::fs::MetadataExt;
+ use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY;
+ self.metadata.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0
+ }
+
+ /// Returns true if and only if this entry points to a directory.
+ #[cfg(not(windows))]
+ pub(crate) fn is_dir(&self) -> bool {
+ self.ty.is_dir()
+ }
+
+ #[cfg(windows)]
+ pub(crate) fn from_entry(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ) -> Result<DirEntry> {
+ let path = ent.path();
+ let ty = ent
+ .file_type()
+ .map_err(|err| Error::from_path(depth, path.clone(), err))?;
+ let md = ent
+ .metadata()
+ .map_err(|err| Error::from_path(depth, path.clone(), err))?;
+ Ok(DirEntry {
+ path: path,
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ metadata: md,
+ })
+ }
+
+ #[cfg(unix)]
+ pub(crate) fn from_entry(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ) -> Result<DirEntry> {
+ use std::os::unix::fs::DirEntryExt;
+
+ let ty = ent
+ .file_type()
+ .map_err(|err| Error::from_path(depth, ent.path(), err))?;
+ Ok(DirEntry {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ ino: ent.ino(),
+ })
+ }
+
+ #[cfg(not(any(unix, windows)))]
+ pub(crate) fn from_entry(
+ depth: usize,
+ ent: &fs::DirEntry,
+ ) -> Result<DirEntry> {
+ let ty = ent
+ .file_type()
+ .map_err(|err| Error::from_path(depth, ent.path(), err))?;
+ Ok(DirEntry {
+ path: ent.path(),
+ ty: ty,
+ follow_link: false,
+ depth: depth,
+ })
+ }
+
+ #[cfg(windows)]
+ pub(crate) fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ follow: bool,
+ ) -> Result<DirEntry> {
+ let md = if follow {
+ fs::metadata(&pb)
+ .map_err(|err| Error::from_path(depth, pb.clone(), err))?
+ } else {
+ fs::symlink_metadata(&pb)
+ .map_err(|err| Error::from_path(depth, pb.clone(), err))?
+ };
+ Ok(DirEntry {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: follow,
+ depth: depth,
+ metadata: md,
+ })
+ }
+
+ #[cfg(unix)]
+ pub(crate) fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ follow: bool,
+ ) -> Result<DirEntry> {
+ use std::os::unix::fs::MetadataExt;
+
+ let md = if follow {
+ fs::metadata(&pb)
+ .map_err(|err| Error::from_path(depth, pb.clone(), err))?
+ } else {
+ fs::symlink_metadata(&pb)
+ .map_err(|err| Error::from_path(depth, pb.clone(), err))?
+ };
+ Ok(DirEntry {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: follow,
+ depth: depth,
+ ino: md.ino(),
+ })
+ }
+
+ #[cfg(not(any(unix, windows)))]
+ pub(crate) fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ follow: bool,
+ ) -> Result<DirEntry> {
+ let md = if follow {
+ fs::metadata(&pb)
+ .map_err(|err| Error::from_path(depth, pb.clone(), err))?
+ } else {
+ fs::symlink_metadata(&pb)
+ .map_err(|err| Error::from_path(depth, pb.clone(), err))?
+ };
+ Ok(DirEntry {
+ path: pb,
+ ty: md.file_type(),
+ follow_link: follow,
+ depth: depth,
+ })
+ }
+}
+
+impl Clone for DirEntry {
+ #[cfg(windows)]
+ fn clone(&self) -> DirEntry {
+ DirEntry {
+ path: self.path.clone(),
+ ty: self.ty,
+ follow_link: self.follow_link,
+ depth: self.depth,
+ metadata: self.metadata.clone(),
+ }
+ }
+
+ #[cfg(unix)]
+ fn clone(&self) -> DirEntry {
+ DirEntry {
+ path: self.path.clone(),
+ ty: self.ty,
+ follow_link: self.follow_link,
+ depth: self.depth,
+ ino: self.ino,
+ }
+ }
+
+ #[cfg(not(any(unix, windows)))]
+ fn clone(&self) -> DirEntry {
+ DirEntry {
+ path: self.path.clone(),
+ ty: self.ty,
+ follow_link: self.follow_link,
+ depth: self.depth,
+ }
+ }
+}
+
+impl fmt::Debug for DirEntry {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "DirEntry({:?})", self.path)
+ }
+}
+
+/// Unix-specific extension methods for `walkdir::DirEntry`
+#[cfg(unix)]
+pub trait DirEntryExt {
+ /// Returns the underlying `d_ino` field in the contained `dirent`
+ /// structure.
+ fn ino(&self) -> u64;
+}
+
+#[cfg(unix)]
+impl DirEntryExt for DirEntry {
+ /// Returns the underlying `d_ino` field in the contained `dirent`
+ /// structure.
+ fn ino(&self) -> u64 {
+ self.ino
+ }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..3fb619c
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,265 @@
+use std::error;
+use std::fmt;
+use std::io;
+use std::path::{Path, PathBuf};
+
+use crate::DirEntry;
+
+/// An error produced by recursively walking a directory.
+///
+/// This error type is a light wrapper around [`std::io::Error`]. In
+/// particular, it adds the following information:
+///
+/// * The depth at which the error occurred in the file tree, relative to the
+/// root.
+/// * The path, if any, associated with the IO error.
+/// * An indication that a loop occurred when following symbolic links. In this
+/// case, there is no underlying IO error.
+///
+/// To maintain good ergonomics, this type has a
+/// [`impl From<Error> for std::io::Error`][impl] defined which preserves the original context.
+/// This allows you to use an [`io::Result`] with methods in this crate if you don't care about
+/// accessing the underlying error data in a structured form.
+///
+/// [`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
+/// [impl]: struct.Error.html#impl-From%3CError%3E
+#[derive(Debug)]
+pub struct Error {
+ depth: usize,
+ inner: ErrorInner,
+}
+
+#[derive(Debug)]
+enum ErrorInner {
+ Io { path: Option<PathBuf>, err: io::Error },
+ Loop { ancestor: PathBuf, child: PathBuf },
+}
+
+impl Error {
+ /// Returns the path associated with this error if one exists.
+ ///
+ /// For example, if an error occurred while opening a directory handle,
+ /// the error will include the path passed to [`std::fs::read_dir`].
+ ///
+ /// [`std::fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
+ pub fn path(&self) -> Option<&Path> {
+ match self.inner {
+ ErrorInner::Io { path: None, .. } => None,
+ ErrorInner::Io { path: Some(ref path), .. } => Some(path),
+ ErrorInner::Loop { ref child, .. } => Some(child),
+ }
+ }
+
+ /// Returns the path at which a cycle was detected.
+ ///
+ /// If no cycle was detected, [`None`] is returned.
+ ///
+ /// A cycle is detected when a directory entry is equivalent to one of
+ /// its ancestors.
+ ///
+ /// To get the path to the child directory entry in the cycle, use the
+ /// [`path`] method.
+ ///
+ /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None
+ /// [`path`]: struct.Error.html#path
+ pub fn loop_ancestor(&self) -> Option<&Path> {
+ match self.inner {
+ ErrorInner::Loop { ref ancestor, .. } => Some(ancestor),
+ _ => None,
+ }
+ }
+
+ /// Returns the depth at which this error occurred relative to the root.
+ ///
+ /// The smallest depth is `0` and always corresponds to the path given to
+ /// the [`new`] function on [`WalkDir`]. Its direct descendents have depth
+ /// `1`, and their descendents have depth `2`, and so on.
+ ///
+ /// [`new`]: struct.WalkDir.html#method.new
+ /// [`WalkDir`]: struct.WalkDir.html
+ pub fn depth(&self) -> usize {
+ self.depth
+ }
+
+ /// Inspect the original [`io::Error`] if there is one.
+ ///
+ /// [`None`] is returned if the [`Error`] doesn't correspond to an
+ /// [`io::Error`]. This might happen, for example, when the error was
+ /// produced because a cycle was found in the directory tree while
+ /// following symbolic links.
+ ///
+ /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To
+ /// obtain an owned value, the [`into_io_error`] can be used instead.
+ ///
+ /// > This is the original [`io::Error`] and is _not_ the same as
+ /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the
+ /// error.
+ ///
+ /// # Example
+ ///
+ /// ```rust,no-run
+ /// use std::io;
+ /// use std::path::Path;
+ ///
+ /// use walkdir::WalkDir;
+ ///
+ /// for entry in WalkDir::new("foo") {
+ /// match entry {
+ /// Ok(entry) => println!("{}", entry.path().display()),
+ /// Err(err) => {
+ /// let path = err.path().unwrap_or(Path::new("")).display();
+ /// println!("failed to access entry {}", path);
+ /// if let Some(inner) = err.io_error() {
+ /// match inner.kind() {
+ /// io::ErrorKind::InvalidData => {
+ /// println!(
+ /// "entry contains invalid data: {}",
+ /// inner)
+ /// }
+ /// io::ErrorKind::PermissionDenied => {
+ /// println!(
+ /// "Missing permission to read entry: {}",
+ /// inner)
+ /// }
+ /// _ => {
+ /// println!(
+ /// "Unexpected error occurred: {}",
+ /// inner)
+ /// }
+ /// }
+ /// }
+ /// }
+ /// }
+ /// }
+ /// ```
+ ///
+ /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None
+ /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+ /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html
+ /// [`Error`]: struct.Error.html
+ /// [`into_io_error`]: struct.Error.html#method.into_io_error
+ /// [impl]: struct.Error.html#impl-From%3CError%3E
+ pub fn io_error(&self) -> Option<&io::Error> {
+ match self.inner {
+ ErrorInner::Io { ref err, .. } => Some(err),
+ ErrorInner::Loop { .. } => None,
+ }
+ }
+
+ /// Similar to [`io_error`] except consumes self to convert to the original
+ /// [`io::Error`] if one exists.
+ ///
+ /// [`io_error`]: struct.Error.html#method.io_error
+ /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+ pub fn into_io_error(self) -> Option<io::Error> {
+ match self.inner {
+ ErrorInner::Io { err, .. } => Some(err),
+ ErrorInner::Loop { .. } => None,
+ }
+ }
+
+ pub(crate) fn from_path(
+ depth: usize,
+ pb: PathBuf,
+ err: io::Error,
+ ) -> Self {
+ Error {
+ depth: depth,
+ inner: ErrorInner::Io { path: Some(pb), err: err },
+ }
+ }
+
+ pub(crate) fn from_entry(dent: &DirEntry, err: io::Error) -> Self {
+ Error {
+ depth: dent.depth(),
+ inner: ErrorInner::Io {
+ path: Some(dent.path().to_path_buf()),
+ err: err,
+ },
+ }
+ }
+
+ pub(crate) fn from_io(depth: usize, err: io::Error) -> Self {
+ Error { depth: depth, inner: ErrorInner::Io { path: None, err: err } }
+ }
+
+ pub(crate) fn from_loop(
+ depth: usize,
+ ancestor: &Path,
+ child: &Path,
+ ) -> Self {
+ Error {
+ depth: depth,
+ inner: ErrorInner::Loop {
+ ancestor: ancestor.to_path_buf(),
+ child: child.to_path_buf(),
+ },
+ }
+ }
+}
+
+impl error::Error for Error {
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ match self.inner {
+ ErrorInner::Io { ref err, .. } => err.description(),
+ ErrorInner::Loop { .. } => "file system loop found",
+ }
+ }
+
+ fn cause(&self) -> Option<&dyn error::Error> {
+ self.source()
+ }
+
+ fn source(&self) -> Option<&(dyn error::Error + 'static)> {
+ match self.inner {
+ ErrorInner::Io { ref err, .. } => Some(err),
+ ErrorInner::Loop { .. } => None,
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.inner {
+ ErrorInner::Io { path: None, ref err } => err.fmt(f),
+ ErrorInner::Io { path: Some(ref path), ref err } => write!(
+ f,
+ "IO error for operation on {}: {}",
+ path.display(),
+ err
+ ),
+ ErrorInner::Loop { ref ancestor, ref child } => write!(
+ f,
+ "File system loop found: \
+ {} points to an ancestor {}",
+ child.display(),
+ ancestor.display()
+ ),
+ }
+ }
+}
+
+impl From<Error> for io::Error {
+ /// Convert the [`Error`] to an [`io::Error`], preserving the original
+ /// [`Error`] as the ["inner error"]. Note that this also makes the display
+ /// of the error include the context.
+ ///
+ /// This is different from [`into_io_error`] which returns the original
+ /// [`io::Error`].
+ ///
+ /// [`Error`]: struct.Error.html
+ /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+ /// ["inner error"]: https://doc.rust-lang.org/std/io/struct.Error.html#method.into_inner
+ /// [`into_io_error`]: struct.WalkDir.html#method.into_io_error
+ fn from(walk_err: Error) -> io::Error {
+ let kind = match walk_err {
+ Error { inner: ErrorInner::Io { ref err, .. }, .. } => err.kind(),
+ Error { inner: ErrorInner::Loop { .. }, .. } => {
+ io::ErrorKind::Other
+ }
+ };
+ io::Error::new(kind, walk_err)
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..5132dd5
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,1125 @@
+/*!
+Crate `walkdir` provides an efficient and cross platform implementation
+of recursive directory traversal. Several options are exposed to control
+iteration, such as whether to follow symbolic links (default off), limit the
+maximum number of simultaneous open file descriptors and the ability to
+efficiently skip descending into directories.
+
+To use this crate, add `walkdir` as a dependency to your project's
+`Cargo.toml`:
+
+```toml
+[dependencies]
+walkdir = "2"
+```
+
+# From the top
+
+The [`WalkDir`] type builds iterators. The [`DirEntry`] type describes values
+yielded by the iterator. Finally, the [`Error`] type is a small wrapper around
+[`std::io::Error`] with additional information, such as if a loop was detected
+while following symbolic links (not enabled by default).
+
+[`WalkDir`]: struct.WalkDir.html
+[`DirEntry`]: struct.DirEntry.html
+[`Error`]: struct.Error.html
+[`std::io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html
+
+# Example
+
+The following code recursively iterates over the directory given and prints
+the path for each entry:
+
+```no_run
+use walkdir::WalkDir;
+# use walkdir::Error;
+
+# fn try_main() -> Result<(), Error> {
+for entry in WalkDir::new("foo") {
+ println!("{}", entry?.path().display());
+}
+# Ok(())
+# }
+```
+
+Or, if you'd like to iterate over all entries and ignore any errors that
+may arise, use [`filter_map`]. (e.g., This code below will silently skip
+directories that the owner of the running process does not have permission to
+access.)
+
+```no_run
+use walkdir::WalkDir;
+
+for entry in WalkDir::new("foo").into_iter().filter_map(|e| e.ok()) {
+ println!("{}", entry.path().display());
+}
+```
+
+[`filter_map`]: https://doc.rust-lang.org/stable/std/iter/trait.Iterator.html#method.filter_map
+
+# Example: follow symbolic links
+
+The same code as above, except [`follow_links`] is enabled:
+
+```no_run
+use walkdir::WalkDir;
+# use walkdir::Error;
+
+# fn try_main() -> Result<(), Error> {
+for entry in WalkDir::new("foo").follow_links(true) {
+ println!("{}", entry?.path().display());
+}
+# Ok(())
+# }
+```
+
+[`follow_links`]: struct.WalkDir.html#method.follow_links
+
+# Example: skip hidden files and directories on unix
+
+This uses the [`filter_entry`] iterator adapter to avoid yielding hidden files
+and directories efficiently (i.e. without recursing into hidden directories):
+
+```no_run
+use walkdir::{DirEntry, WalkDir};
+# use walkdir::Error;
+
+fn is_hidden(entry: &DirEntry) -> bool {
+ entry.file_name()
+ .to_str()
+ .map(|s| s.starts_with("."))
+ .unwrap_or(false)
+}
+
+# fn try_main() -> Result<(), Error> {
+let walker = WalkDir::new("foo").into_iter();
+for entry in walker.filter_entry(|e| !is_hidden(e)) {
+ println!("{}", entry?.path().display());
+}
+# Ok(())
+# }
+```
+
+[`filter_entry`]: struct.IntoIter.html#method.filter_entry
+*/
+
+#![deny(missing_docs)]
+#![allow(unknown_lints)]
+
+#[cfg(test)]
+doc_comment::doctest!("../README.md");
+
+use std::cmp::{min, Ordering};
+use std::fmt;
+use std::fs::{self, ReadDir};
+use std::io;
+use std::path::{Path, PathBuf};
+use std::result;
+use std::vec;
+
+use same_file::Handle;
+
+pub use crate::dent::DirEntry;
+#[cfg(unix)]
+pub use crate::dent::DirEntryExt;
+pub use crate::error::Error;
+
+mod dent;
+mod error;
+#[cfg(test)]
+mod tests;
+mod util;
+
+/// Like try, but for iterators that return [`Option<Result<_, _>>`].
+///
+/// [`Option<Result<_, _>>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
+macro_rules! itry {
+ ($e:expr) => {
+ match $e {
+ Ok(v) => v,
+ Err(err) => return Some(Err(From::from(err))),
+ }
+ };
+}
+
+/// A result type for walkdir operations.
+///
+/// Note that this result type embeds the error type in this crate. This
+/// is only useful if you care about the additional information provided by
+/// the error (such as the path associated with the error or whether a loop
+/// was dectected). If you want things to Just Work, then you can use
+/// [`io::Result`] instead since the error type in this package will
+/// automatically convert to an [`io::Result`] when using the [`try!`] macro.
+///
+/// [`io::Result`]: https://doc.rust-lang.org/stable/std/io/type.Result.html
+/// [`try!`]: https://doc.rust-lang.org/stable/std/macro.try.html
+pub type Result<T> = ::std::result::Result<T, Error>;
+
+/// A builder to create an iterator for recursively walking a directory.
+///
+/// Results are returned in depth first fashion, with directories yielded
+/// before their contents. If [`contents_first`] is true, contents are yielded
+/// before their directories. The order is unspecified but if [`sort_by`] is
+/// given, directory entries are sorted according to this function. Directory
+/// entries `.` and `..` are always omitted.
+///
+/// If an error occurs at any point during iteration, then it is returned in
+/// place of its corresponding directory entry and iteration continues as
+/// normal. If an error occurs while opening a directory for reading, then it
+/// is not descended into (but the error is still yielded by the iterator).
+/// Iteration may be stopped at any time. When the iterator is destroyed, all
+/// resources associated with it are freed.
+///
+/// [`contents_first`]: struct.WalkDir.html#method.contents_first
+/// [`sort_by`]: struct.WalkDir.html#method.sort_by
+///
+/// # Usage
+///
+/// This type implements [`IntoIterator`] so that it may be used as the subject
+/// of a `for` loop. You may need to call [`into_iter`] explicitly if you want
+/// to use iterator adapters such as [`filter_entry`].
+///
+/// Idiomatic use of this type should use method chaining to set desired
+/// options. For example, this only shows entries with a depth of `1`, `2` or
+/// `3` (relative to `foo`):
+///
+/// ```no_run
+/// use walkdir::WalkDir;
+/// # use walkdir::Error;
+///
+/// # fn try_main() -> Result<(), Error> {
+/// for entry in WalkDir::new("foo").min_depth(1).max_depth(3) {
+/// println!("{}", entry?.path().display());
+/// }
+/// # Ok(())
+/// # }
+/// ```
+///
+/// [`IntoIterator`]: https://doc.rust-lang.org/stable/std/iter/trait.IntoIterator.html
+/// [`into_iter`]: https://doc.rust-lang.org/nightly/core/iter/trait.IntoIterator.html#tymethod.into_iter
+/// [`filter_entry`]: struct.IntoIter.html#method.filter_entry
+///
+/// Note that the iterator by default includes the top-most directory. Since
+/// this is the only directory yielded with depth `0`, it is easy to ignore it
+/// with the [`min_depth`] setting:
+///
+/// ```no_run
+/// use walkdir::WalkDir;
+/// # use walkdir::Error;
+///
+/// # fn try_main() -> Result<(), Error> {
+/// for entry in WalkDir::new("foo").min_depth(1) {
+/// println!("{}", entry?.path().display());
+/// }
+/// # Ok(())
+/// # }
+/// ```
+///
+/// [`min_depth`]: struct.WalkDir.html#method.min_depth
+///
+/// This will only return descendents of the `foo` directory and not `foo`
+/// itself.
+///
+/// # Loops
+///
+/// This iterator (like most/all recursive directory iterators) assumes that
+/// no loops can be made with *hard* links on your file system. In particular,
+/// this would require creating a hard link to a directory such that it creates
+/// a loop. On most platforms, this operation is illegal.
+///
+/// Note that when following symbolic/soft links, loops are detected and an
+/// error is reported.
+#[derive(Debug)]
+pub struct WalkDir {
+ opts: WalkDirOptions,
+ root: PathBuf,
+}
+
+struct WalkDirOptions {
+ follow_links: bool,
+ max_open: usize,
+ min_depth: usize,
+ max_depth: usize,
+ sorter: Option<
+ Box<
+ dyn FnMut(&DirEntry, &DirEntry) -> Ordering
+ + Send
+ + Sync
+ + 'static,
+ >,
+ >,
+ contents_first: bool,
+ same_file_system: bool,
+}
+
+impl fmt::Debug for WalkDirOptions {
+ fn fmt(
+ &self,
+ f: &mut fmt::Formatter<'_>,
+ ) -> result::Result<(), fmt::Error> {
+ let sorter_str = if self.sorter.is_some() {
+ // FnMut isn't `Debug`
+ "Some(...)"
+ } else {
+ "None"
+ };
+ f.debug_struct("WalkDirOptions")
+ .field("follow_links", &self.follow_links)
+ .field("max_open", &self.max_open)
+ .field("min_depth", &self.min_depth)
+ .field("max_depth", &self.max_depth)
+ .field("sorter", &sorter_str)
+ .field("contents_first", &self.contents_first)
+ .field("same_file_system", &self.same_file_system)
+ .finish()
+ }
+}
+
+impl WalkDir {
+ /// Create a builder for a recursive directory iterator starting at the
+ /// file path `root`. If `root` is a directory, then it is the first item
+ /// yielded by the iterator. If `root` is a file, then it is the first
+ /// and only item yielded by the iterator. If `root` is a symlink, then it
+ /// is always followed for the purposes of directory traversal. (A root
+ /// `DirEntry` still obeys its documentation with respect to symlinks and
+ /// the `follow_links` setting.)
+ pub fn new<P: AsRef<Path>>(root: P) -> Self {
+ WalkDir {
+ opts: WalkDirOptions {
+ follow_links: false,
+ max_open: 10,
+ min_depth: 0,
+ max_depth: ::std::usize::MAX,
+ sorter: None,
+ contents_first: false,
+ same_file_system: false,
+ },
+ root: root.as_ref().to_path_buf(),
+ }
+ }
+
+ /// Set the minimum depth of entries yielded by the iterator.
+ ///
+ /// The smallest depth is `0` and always corresponds to the path given
+ /// to the `new` function on this type. Its direct descendents have depth
+ /// `1`, and their descendents have depth `2`, and so on.
+ pub fn min_depth(mut self, depth: usize) -> Self {
+ self.opts.min_depth = depth;
+ if self.opts.min_depth > self.opts.max_depth {
+ self.opts.min_depth = self.opts.max_depth;
+ }
+ self
+ }
+
+ /// Set the maximum depth of entries yield by the iterator.
+ ///
+ /// The smallest depth is `0` and always corresponds to the path given
+ /// to the `new` function on this type. Its direct descendents have depth
+ /// `1`, and their descendents have depth `2`, and so on.
+ ///
+ /// Note that this will not simply filter the entries of the iterator, but
+ /// it will actually avoid descending into directories when the depth is
+ /// exceeded.
+ pub fn max_depth(mut self, depth: usize) -> Self {
+ self.opts.max_depth = depth;
+ if self.opts.max_depth < self.opts.min_depth {
+ self.opts.max_depth = self.opts.min_depth;
+ }
+ self
+ }
+
+ /// Follow symbolic links. By default, this is disabled.
+ ///
+ /// When `yes` is `true`, symbolic links are followed as if they were
+ /// normal directories and files. If a symbolic link is broken or is
+ /// involved in a loop, an error is yielded.
+ ///
+ /// When enabled, the yielded [`DirEntry`] values represent the target of
+ /// the link while the path corresponds to the link. See the [`DirEntry`]
+ /// type for more details.
+ ///
+ /// [`DirEntry`]: struct.DirEntry.html
+ pub fn follow_links(mut self, yes: bool) -> Self {
+ self.opts.follow_links = yes;
+ self
+ }
+
+ /// Set the maximum number of simultaneously open file descriptors used
+ /// by the iterator.
+ ///
+ /// `n` must be greater than or equal to `1`. If `n` is `0`, then it is set
+ /// to `1` automatically. If this is not set, then it defaults to some
+ /// reasonably low number.
+ ///
+ /// This setting has no impact on the results yielded by the iterator
+ /// (even when `n` is `1`). Instead, this setting represents a trade off
+ /// between scarce resources (file descriptors) and memory. Namely, when
+ /// the maximum number of file descriptors is reached and a new directory
+ /// needs to be opened to continue iteration, then a previous directory
+ /// handle is closed and has its unyielded entries stored in memory. In
+ /// practice, this is a satisfying trade off because it scales with respect
+ /// to the *depth* of your file tree. Therefore, low values (even `1`) are
+ /// acceptable.
+ ///
+ /// Note that this value does not impact the number of system calls made by
+ /// an exhausted iterator.
+ ///
+ /// # Platform behavior
+ ///
+ /// On Windows, if `follow_links` is enabled, then this limit is not
+ /// respected. In particular, the maximum number of file descriptors opened
+ /// is proportional to the depth of the directory tree traversed.
+ pub fn max_open(mut self, mut n: usize) -> Self {
+ if n == 0 {
+ n = 1;
+ }
+ self.opts.max_open = n;
+ self
+ }
+
+ /// Set a function for sorting directory entries.
+ ///
+ /// If a compare function is set, the resulting iterator will return all
+ /// paths in sorted order. The compare function will be called to compare
+ /// entries from the same directory.
+ ///
+ /// ```rust,no-run
+ /// use std::cmp;
+ /// use std::ffi::OsString;
+ /// use walkdir::WalkDir;
+ ///
+ /// WalkDir::new("foo").sort_by(|a,b| a.file_name().cmp(b.file_name()));
+ /// ```
+ pub fn sort_by<F>(mut self, cmp: F) -> Self
+ where
+ F: FnMut(&DirEntry, &DirEntry) -> Ordering + Send + Sync + 'static,
+ {
+ self.opts.sorter = Some(Box::new(cmp));
+ self
+ }
+
+ /// Yield a directory's contents before the directory itself. By default,
+ /// this is disabled.
+ ///
+ /// When `yes` is `false` (as is the default), the directory is yielded
+ /// before its contents are read. This is useful when, e.g. you want to
+ /// skip processing of some directories.
+ ///
+ /// When `yes` is `true`, the iterator yields the contents of a directory
+ /// before yielding the directory itself. This is useful when, e.g. you
+ /// want to recursively delete a directory.
+ ///
+ /// # Example
+ ///
+ /// Assume the following directory tree:
+ ///
+ /// ```text
+ /// foo/
+ /// abc/
+ /// qrs
+ /// tuv
+ /// def/
+ /// ```
+ ///
+ /// With contents_first disabled (the default), the following code visits
+ /// the directory tree in depth-first order:
+ ///
+ /// ```no_run
+ /// use walkdir::WalkDir;
+ ///
+ /// for entry in WalkDir::new("foo") {
+ /// let entry = entry.unwrap();
+ /// println!("{}", entry.path().display());
+ /// }
+ ///
+ /// // foo
+ /// // foo/abc
+ /// // foo/abc/qrs
+ /// // foo/abc/tuv
+ /// // foo/def
+ /// ```
+ ///
+ /// With contents_first enabled:
+ ///
+ /// ```no_run
+ /// use walkdir::WalkDir;
+ ///
+ /// for entry in WalkDir::new("foo").contents_first(true) {
+ /// let entry = entry.unwrap();
+ /// println!("{}", entry.path().display());
+ /// }
+ ///
+ /// // foo/abc/qrs
+ /// // foo/abc/tuv
+ /// // foo/abc
+ /// // foo/def
+ /// // foo
+ /// ```
+ pub fn contents_first(mut self, yes: bool) -> Self {
+ self.opts.contents_first = yes;
+ self
+ }
+
+ /// Do not cross file system boundaries.
+ ///
+ /// When this option is enabled, directory traversal will not descend into
+ /// directories that are on a different file system from the root path.
+ ///
+ /// Currently, this option is only supported on Unix and Windows. If this
+ /// option is used on an unsupported platform, then directory traversal
+ /// will immediately return an error and will not yield any entries.
+ pub fn same_file_system(mut self, yes: bool) -> Self {
+ self.opts.same_file_system = yes;
+ self
+ }
+}
+
+impl IntoIterator for WalkDir {
+ type Item = Result<DirEntry>;
+ type IntoIter = IntoIter;
+
+ fn into_iter(self) -> IntoIter {
+ IntoIter {
+ opts: self.opts,
+ start: Some(self.root),
+ stack_list: vec![],
+ stack_path: vec![],
+ oldest_opened: 0,
+ depth: 0,
+ deferred_dirs: vec![],
+ root_device: None,
+ }
+ }
+}
+
+/// An iterator for recursively descending into a directory.
+///
+/// A value with this type must be constructed with the [`WalkDir`] type, which
+/// uses a builder pattern to set options such as min/max depth, max open file
+/// descriptors and whether the iterator should follow symbolic links. After
+/// constructing a `WalkDir`, call [`.into_iter()`] at the end of the chain.
+///
+/// The order of elements yielded by this iterator is unspecified.
+///
+/// [`WalkDir`]: struct.WalkDir.html
+/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
+#[derive(Debug)]
+pub struct IntoIter {
+ /// Options specified in the builder. Depths, max fds, etc.
+ opts: WalkDirOptions,
+ /// The start path.
+ ///
+ /// This is only `Some(...)` at the beginning. After the first iteration,
+ /// this is always `None`.
+ start: Option<PathBuf>,
+ /// A stack of open (up to max fd) or closed handles to directories.
+ /// An open handle is a plain [`fs::ReadDir`] while a closed handle is
+ /// a `Vec<fs::DirEntry>` corresponding to the as-of-yet consumed entries.
+ ///
+ /// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
+ stack_list: Vec<DirList>,
+ /// A stack of file paths.
+ ///
+ /// This is *only* used when [`follow_links`] is enabled. In all other
+ /// cases this stack is empty.
+ ///
+ /// [`follow_links`]: struct.WalkDir.html#method.follow_links
+ stack_path: Vec<Ancestor>,
+ /// An index into `stack_list` that points to the oldest open directory
+ /// handle. If the maximum fd limit is reached and a new directory needs to
+ /// be read, the handle at this index is closed before the new directory is
+ /// opened.
+ oldest_opened: usize,
+ /// The current depth of iteration (the length of the stack at the
+ /// beginning of each iteration).
+ depth: usize,
+ /// A list of DirEntries corresponding to directories, that are
+ /// yielded after their contents has been fully yielded. This is only
+ /// used when `contents_first` is enabled.
+ deferred_dirs: Vec<DirEntry>,
+ /// The device of the root file path when the first call to `next` was
+ /// made.
+ ///
+ /// If the `same_file_system` option isn't enabled, then this is always
+ /// `None`. Conversely, if it is enabled, this is always `Some(...)` after
+ /// handling the root path.
+ root_device: Option<u64>,
+}
+
+/// An ancestor is an item in the directory tree traversed by walkdir, and is
+/// used to check for loops in the tree when traversing symlinks.
+#[derive(Debug)]
+struct Ancestor {
+ /// The path of this ancestor.
+ path: PathBuf,
+ /// An open file to this ancesor. This is only used on Windows where
+ /// opening a file handle appears to be quite expensive, so we choose to
+ /// cache it. This comes at the cost of not respecting the file descriptor
+ /// limit set by the user.
+ #[cfg(windows)]
+ handle: Handle,
+}
+
+impl Ancestor {
+ /// Create a new ancestor from the given directory path.
+ #[cfg(windows)]
+ fn new(dent: &DirEntry) -> io::Result<Ancestor> {
+ let handle = Handle::from_path(dent.path())?;
+ Ok(Ancestor { path: dent.path().to_path_buf(), handle: handle })
+ }
+
+ /// Create a new ancestor from the given directory path.
+ #[cfg(not(windows))]
+ fn new(dent: &DirEntry) -> io::Result<Ancestor> {
+ Ok(Ancestor { path: dent.path().to_path_buf() })
+ }
+
+ /// Returns true if and only if the given open file handle corresponds to
+ /// the same directory as this ancestor.
+ #[cfg(windows)]
+ fn is_same(&self, child: &Handle) -> io::Result<bool> {
+ Ok(child == &self.handle)
+ }
+
+ /// Returns true if and only if the given open file handle corresponds to
+ /// the same directory as this ancestor.
+ #[cfg(not(windows))]
+ fn is_same(&self, child: &Handle) -> io::Result<bool> {
+ Ok(child == &Handle::from_path(&self.path)?)
+ }
+}
+
+/// A sequence of unconsumed directory entries.
+///
+/// This represents the opened or closed state of a directory handle. When
+/// open, future entries are read by iterating over the raw `fs::ReadDir`.
+/// When closed, all future entries are read into memory. Iteration then
+/// proceeds over a [`Vec<fs::DirEntry>`].
+///
+/// [`fs::ReadDir`]: https://doc.rust-lang.org/stable/std/fs/struct.ReadDir.html
+/// [`Vec<fs::DirEntry>`]: https://doc.rust-lang.org/stable/std/vec/struct.Vec.html
+#[derive(Debug)]
+enum DirList {
+ /// An opened handle.
+ ///
+ /// This includes the depth of the handle itself.
+ ///
+ /// If there was an error with the initial [`fs::read_dir`] call, then it
+ /// is stored here. (We use an [`Option<...>`] to make yielding the error
+ /// exactly once simpler.)
+ ///
+ /// [`fs::read_dir`]: https://doc.rust-lang.org/stable/std/fs/fn.read_dir.html
+ /// [`Option<...>`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html
+ Opened { depth: usize, it: result::Result<ReadDir, Option<Error>> },
+ /// A closed handle.
+ ///
+ /// All remaining directory entries are read into memory.
+ Closed(vec::IntoIter<Result<DirEntry>>),
+}
+
+impl Iterator for IntoIter {
+ type Item = Result<DirEntry>;
+ /// Advances the iterator and returns the next value.
+ ///
+ /// # Errors
+ ///
+ /// If the iterator fails to retrieve the next value, this method returns
+ /// an error value. The error will be wrapped in an Option::Some.
+ fn next(&mut self) -> Option<Result<DirEntry>> {
+ if let Some(start) = self.start.take() {
+ if self.opts.same_file_system {
+ let result = util::device_num(&start)
+ .map_err(|e| Error::from_path(0, start.clone(), e));
+ self.root_device = Some(itry!(result));
+ }
+ let dent = itry!(DirEntry::from_path(0, start, false));
+ if let Some(result) = self.handle_entry(dent) {
+ return Some(result);
+ }
+ }
+ while !self.stack_list.is_empty() {
+ self.depth = self.stack_list.len();
+ if let Some(dentry) = self.get_deferred_dir() {
+ return Some(Ok(dentry));
+ }
+ if self.depth > self.opts.max_depth {
+ // If we've exceeded the max depth, pop the current dir
+ // so that we don't descend.
+ self.pop();
+ continue;
+ }
+ // Unwrap is safe here because we've verified above that
+ // `self.stack_list` is not empty
+ let next = self
+ .stack_list
+ .last_mut()
+ .expect("BUG: stack should be non-empty")
+ .next();
+ match next {
+ None => self.pop(),
+ Some(Err(err)) => return Some(Err(err)),
+ Some(Ok(dent)) => {
+ if let Some(result) = self.handle_entry(dent) {
+ return Some(result);
+ }
+ }
+ }
+ }
+ if self.opts.contents_first {
+ self.depth = self.stack_list.len();
+ if let Some(dentry) = self.get_deferred_dir() {
+ return Some(Ok(dentry));
+ }
+ }
+ None
+ }
+}
+
+impl IntoIter {
+ /// Skips the current directory.
+ ///
+ /// This causes the iterator to stop traversing the contents of the least
+ /// recently yielded directory. This means any remaining entries in that
+ /// directory will be skipped (including sub-directories).
+ ///
+ /// Note that the ergonomics of this method are questionable since it
+ /// borrows the iterator mutably. Namely, you must write out the looping
+ /// condition manually. For example, to skip hidden entries efficiently on
+ /// unix systems:
+ ///
+ /// ```no_run
+ /// use walkdir::{DirEntry, WalkDir};
+ ///
+ /// fn is_hidden(entry: &DirEntry) -> bool {
+ /// entry.file_name()
+ /// .to_str()
+ /// .map(|s| s.starts_with("."))
+ /// .unwrap_or(false)
+ /// }
+ ///
+ /// let mut it = WalkDir::new("foo").into_iter();
+ /// loop {
+ /// let entry = match it.next() {
+ /// None => break,
+ /// Some(Err(err)) => panic!("ERROR: {}", err),
+ /// Some(Ok(entry)) => entry,
+ /// };
+ /// if is_hidden(&entry) {
+ /// if entry.file_type().is_dir() {
+ /// it.skip_current_dir();
+ /// }
+ /// continue;
+ /// }
+ /// println!("{}", entry.path().display());
+ /// }
+ /// ```
+ ///
+ /// You may find it more convenient to use the [`filter_entry`] iterator
+ /// adapter. (See its documentation for the same example functionality as
+ /// above.)
+ ///
+ /// [`filter_entry`]: #method.filter_entry
+ pub fn skip_current_dir(&mut self) {
+ if !self.stack_list.is_empty() {
+ self.pop();
+ }
+ }
+
+ /// Yields only entries which satisfy the given predicate and skips
+ /// descending into directories that do not satisfy the given predicate.
+ ///
+ /// The predicate is applied to all entries. If the predicate is
+ /// true, iteration carries on as normal. If the predicate is false, the
+ /// entry is ignored and if it is a directory, it is not descended into.
+ ///
+ /// This is often more convenient to use than [`skip_current_dir`]. For
+ /// example, to skip hidden files and directories efficiently on unix
+ /// systems:
+ ///
+ /// ```no_run
+ /// use walkdir::{DirEntry, WalkDir};
+ /// # use walkdir::Error;
+ ///
+ /// fn is_hidden(entry: &DirEntry) -> bool {
+ /// entry.file_name()
+ /// .to_str()
+ /// .map(|s| s.starts_with("."))
+ /// .unwrap_or(false)
+ /// }
+ ///
+ /// # fn try_main() -> Result<(), Error> {
+ /// for entry in WalkDir::new("foo")
+ /// .into_iter()
+ /// .filter_entry(|e| !is_hidden(e)) {
+ /// println!("{}", entry?.path().display());
+ /// }
+ /// # Ok(())
+ /// # }
+ /// ```
+ ///
+ /// Note that the iterator will still yield errors for reading entries that
+ /// may not satisfy the predicate.
+ ///
+ /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
+ /// passed to this predicate.
+ ///
+ /// Note that if the iterator has `contents_first` enabled, then this
+ /// method is no different than calling the standard `Iterator::filter`
+ /// method (because directory entries are yielded after they've been
+ /// descended into).
+ ///
+ /// [`skip_current_dir`]: #method.skip_current_dir
+ /// [`min_depth`]: struct.WalkDir.html#method.min_depth
+ /// [`max_depth`]: struct.WalkDir.html#method.max_depth
+ pub fn filter_entry<P>(self, predicate: P) -> FilterEntry<Self, P>
+ where
+ P: FnMut(&DirEntry) -> bool,
+ {
+ FilterEntry { it: self, predicate: predicate }
+ }
+
+ fn handle_entry(
+ &mut self,
+ mut dent: DirEntry,
+ ) -> Option<Result<DirEntry>> {
+ if self.opts.follow_links && dent.file_type().is_symlink() {
+ dent = itry!(self.follow(dent));
+ }
+ let is_normal_dir = !dent.file_type().is_symlink() && dent.is_dir();
+ if is_normal_dir {
+ if self.opts.same_file_system && dent.depth() > 0 {
+ if itry!(self.is_same_file_system(&dent)) {
+ itry!(self.push(&dent));
+ }
+ } else {
+ itry!(self.push(&dent));
+ }
+ } else if dent.depth() == 0 && dent.file_type().is_symlink() {
+ // As a special case, if we are processing a root entry, then we
+ // always follow it even if it's a symlink and follow_links is
+ // false. We are careful to not let this change the semantics of
+ // the DirEntry however. Namely, the DirEntry should still respect
+ // the follow_links setting. When it's disabled, it should report
+ // itself as a symlink. When it's enabled, it should always report
+ // itself as the target.
+ let md = itry!(fs::metadata(dent.path()).map_err(|err| {
+ Error::from_path(dent.depth(), dent.path().to_path_buf(), err)
+ }));
+ if md.file_type().is_dir() {
+ itry!(self.push(&dent));
+ }
+ }
+ if is_normal_dir && self.opts.contents_first {
+ self.deferred_dirs.push(dent);
+ None
+ } else if self.skippable() {
+ None
+ } else {
+ Some(Ok(dent))
+ }
+ }
+
+ fn get_deferred_dir(&mut self) -> Option<DirEntry> {
+ if self.opts.contents_first {
+ if self.depth < self.deferred_dirs.len() {
+ // Unwrap is safe here because we've guaranteed that
+ // `self.deferred_dirs.len()` can never be less than 1
+ let deferred: DirEntry = self
+ .deferred_dirs
+ .pop()
+ .expect("BUG: deferred_dirs should be non-empty");
+ if !self.skippable() {
+ return Some(deferred);
+ }
+ }
+ }
+ None
+ }
+
+ fn push(&mut self, dent: &DirEntry) -> Result<()> {
+ // Make room for another open file descriptor if we've hit the max.
+ let free =
+ self.stack_list.len().checked_sub(self.oldest_opened).unwrap();
+ if free == self.opts.max_open {
+ self.stack_list[self.oldest_opened].close();
+ }
+ // Open a handle to reading the directory's entries.
+ let rd = fs::read_dir(dent.path()).map_err(|err| {
+ Some(Error::from_path(self.depth, dent.path().to_path_buf(), err))
+ });
+ let mut list = DirList::Opened { depth: self.depth, it: rd };
+ if let Some(ref mut cmp) = self.opts.sorter {
+ let mut entries: Vec<_> = list.collect();
+ entries.sort_by(|a, b| match (a, b) {
+ (&Ok(ref a), &Ok(ref b)) => cmp(a, b),
+ (&Err(_), &Err(_)) => Ordering::Equal,
+ (&Ok(_), &Err(_)) => Ordering::Greater,
+ (&Err(_), &Ok(_)) => Ordering::Less,
+ });
+ list = DirList::Closed(entries.into_iter());
+ }
+ if self.opts.follow_links {
+ let ancestor = Ancestor::new(&dent)
+ .map_err(|err| Error::from_io(self.depth, err))?;
+ self.stack_path.push(ancestor);
+ }
+ // We push this after stack_path since creating the Ancestor can fail.
+ // If it fails, then we return the error and won't descend.
+ self.stack_list.push(list);
+ // If we had to close out a previous directory stream, then we need to
+ // increment our index the oldest still-open stream. We do this only
+ // after adding to our stack, in order to ensure that the oldest_opened
+ // index remains valid. The worst that can happen is that an already
+ // closed stream will be closed again, which is a no-op.
+ //
+ // We could move the close of the stream above into this if-body, but
+ // then we would have more than the maximum number of file descriptors
+ // open at a particular point in time.
+ if free == self.opts.max_open {
+ // Unwrap is safe here because self.oldest_opened is guaranteed to
+ // never be greater than `self.stack_list.len()`, which implies
+ // that the subtraction won't underflow and that adding 1 will
+ // never overflow.
+ self.oldest_opened = self.oldest_opened.checked_add(1).unwrap();
+ }
+ Ok(())
+ }
+
+ fn pop(&mut self) {
+ self.stack_list.pop().expect("BUG: cannot pop from empty stack");
+ if self.opts.follow_links {
+ self.stack_path.pop().expect("BUG: list/path stacks out of sync");
+ }
+ // If everything in the stack is already closed, then there is
+ // room for at least one more open descriptor and it will
+ // always be at the top of the stack.
+ self.oldest_opened = min(self.oldest_opened, self.stack_list.len());
+ }
+
+ fn follow(&self, mut dent: DirEntry) -> Result<DirEntry> {
+ dent =
+ DirEntry::from_path(self.depth, dent.path().to_path_buf(), true)?;
+ // The only way a symlink can cause a loop is if it points
+ // to a directory. Otherwise, it always points to a leaf
+ // and we can omit any loop checks.
+ if dent.is_dir() {
+ self.check_loop(dent.path())?;
+ }
+ Ok(dent)
+ }
+
+ fn check_loop<P: AsRef<Path>>(&self, child: P) -> Result<()> {
+ let hchild = Handle::from_path(&child)
+ .map_err(|err| Error::from_io(self.depth, err))?;
+ for ancestor in self.stack_path.iter().rev() {
+ let is_same = ancestor
+ .is_same(&hchild)
+ .map_err(|err| Error::from_io(self.depth, err))?;
+ if is_same {
+ return Err(Error::from_loop(
+ self.depth,
+ &ancestor.path,
+ child.as_ref(),
+ ));
+ }
+ }
+ Ok(())
+ }
+
+ fn is_same_file_system(&mut self, dent: &DirEntry) -> Result<bool> {
+ let dent_device = util::device_num(dent.path())
+ .map_err(|err| Error::from_entry(dent, err))?;
+ Ok(self
+ .root_device
+ .map(|d| d == dent_device)
+ .expect("BUG: called is_same_file_system without root device"))
+ }
+
+ fn skippable(&self) -> bool {
+ self.depth < self.opts.min_depth || self.depth > self.opts.max_depth
+ }
+}
+
+impl DirList {
+ fn close(&mut self) {
+ if let DirList::Opened { .. } = *self {
+ *self = DirList::Closed(self.collect::<Vec<_>>().into_iter());
+ }
+ }
+}
+
+impl Iterator for DirList {
+ type Item = Result<DirEntry>;
+
+ #[inline(always)]
+ fn next(&mut self) -> Option<Result<DirEntry>> {
+ match *self {
+ DirList::Closed(ref mut it) => it.next(),
+ DirList::Opened { depth, ref mut it } => match *it {
+ Err(ref mut err) => err.take().map(Err),
+ Ok(ref mut rd) => rd.next().map(|r| match r {
+ Ok(r) => DirEntry::from_entry(depth + 1, &r),
+ Err(err) => Err(Error::from_io(depth + 1, err)),
+ }),
+ },
+ }
+ }
+}
+
+/// A recursive directory iterator that skips entries.
+///
+/// Values of this type are created by calling [`.filter_entry()`] on an
+/// `IntoIter`, which is formed by calling [`.into_iter()`] on a `WalkDir`.
+///
+/// Directories that fail the predicate `P` are skipped. Namely, they are
+/// never yielded and never descended into.
+///
+/// Entries that are skipped with the [`min_depth`] and [`max_depth`] options
+/// are not passed through this filter.
+///
+/// If opening a handle to a directory resulted in an error, then it is yielded
+/// and no corresponding call to the predicate is made.
+///
+/// Type parameter `I` refers to the underlying iterator and `P` refers to the
+/// predicate, which is usually `FnMut(&DirEntry) -> bool`.
+///
+/// [`.filter_entry()`]: struct.IntoIter.html#method.filter_entry
+/// [`.into_iter()`]: struct.WalkDir.html#into_iter.v
+/// [`min_depth`]: struct.WalkDir.html#method.min_depth
+/// [`max_depth`]: struct.WalkDir.html#method.max_depth
+#[derive(Debug)]
+pub struct FilterEntry<I, P> {
+ it: I,
+ predicate: P,
+}
+
+impl<P> Iterator for FilterEntry<IntoIter, P>
+where
+ P: FnMut(&DirEntry) -> bool,
+{
+ type Item = Result<DirEntry>;
+
+ /// Advances the iterator and returns the next value.
+ ///
+ /// # Errors
+ ///
+ /// If the iterator fails to retrieve the next value, this method returns
+ /// an error value. The error will be wrapped in an `Option::Some`.
+ fn next(&mut self) -> Option<Result<DirEntry>> {
+ loop {
+ let dent = match self.it.next() {
+ None => return None,
+ Some(result) => itry!(result),
+ };
+ if !(self.predicate)(&dent) {
+ if dent.is_dir() {
+ self.it.skip_current_dir();
+ }
+ continue;
+ }
+ return Some(Ok(dent));
+ }
+ }
+}
+
+impl<P> FilterEntry<IntoIter, P>
+where
+ P: FnMut(&DirEntry) -> bool,
+{
+ /// Yields only entries which satisfy the given predicate and skips
+ /// descending into directories that do not satisfy the given predicate.
+ ///
+ /// The predicate is applied to all entries. If the predicate is
+ /// true, iteration carries on as normal. If the predicate is false, the
+ /// entry is ignored and if it is a directory, it is not descended into.
+ ///
+ /// This is often more convenient to use than [`skip_current_dir`]. For
+ /// example, to skip hidden files and directories efficiently on unix
+ /// systems:
+ ///
+ /// ```no_run
+ /// use walkdir::{DirEntry, WalkDir};
+ /// # use walkdir::Error;
+ ///
+ /// fn is_hidden(entry: &DirEntry) -> bool {
+ /// entry.file_name()
+ /// .to_str()
+ /// .map(|s| s.starts_with("."))
+ /// .unwrap_or(false)
+ /// }
+ ///
+ /// # fn try_main() -> Result<(), Error> {
+ /// for entry in WalkDir::new("foo")
+ /// .into_iter()
+ /// .filter_entry(|e| !is_hidden(e)) {
+ /// println!("{}", entry?.path().display());
+ /// }
+ /// # Ok(())
+ /// # }
+ /// ```
+ ///
+ /// Note that the iterator will still yield errors for reading entries that
+ /// may not satisfy the predicate.
+ ///
+ /// Note that entries skipped with [`min_depth`] and [`max_depth`] are not
+ /// passed to this predicate.
+ ///
+ /// Note that if the iterator has `contents_first` enabled, then this
+ /// method is no different than calling the standard `Iterator::filter`
+ /// method (because directory entries are yielded after they've been
+ /// descended into).
+ ///
+ /// [`skip_current_dir`]: #method.skip_current_dir
+ /// [`min_depth`]: struct.WalkDir.html#method.min_depth
+ /// [`max_depth`]: struct.WalkDir.html#method.max_depth
+ pub fn filter_entry(self, predicate: P) -> FilterEntry<Self, P> {
+ FilterEntry { it: self, predicate: predicate }
+ }
+
+ /// Skips the current directory.
+ ///
+ /// This causes the iterator to stop traversing the contents of the least
+ /// recently yielded directory. This means any remaining entries in that
+ /// directory will be skipped (including sub-directories).
+ ///
+ /// Note that the ergonomics of this method are questionable since it
+ /// borrows the iterator mutably. Namely, you must write out the looping
+ /// condition manually. For example, to skip hidden entries efficiently on
+ /// unix systems:
+ ///
+ /// ```no_run
+ /// use walkdir::{DirEntry, WalkDir};
+ ///
+ /// fn is_hidden(entry: &DirEntry) -> bool {
+ /// entry.file_name()
+ /// .to_str()
+ /// .map(|s| s.starts_with("."))
+ /// .unwrap_or(false)
+ /// }
+ ///
+ /// let mut it = WalkDir::new("foo").into_iter();
+ /// loop {
+ /// let entry = match it.next() {
+ /// None => break,
+ /// Some(Err(err)) => panic!("ERROR: {}", err),
+ /// Some(Ok(entry)) => entry,
+ /// };
+ /// if is_hidden(&entry) {
+ /// if entry.file_type().is_dir() {
+ /// it.skip_current_dir();
+ /// }
+ /// continue;
+ /// }
+ /// println!("{}", entry.path().display());
+ /// }
+ /// ```
+ ///
+ /// You may find it more convenient to use the [`filter_entry`] iterator
+ /// adapter. (See its documentation for the same example functionality as
+ /// above.)
+ ///
+ /// [`filter_entry`]: #method.filter_entry
+ pub fn skip_current_dir(&mut self) {
+ self.it.skip_current_dir();
+ }
+}
diff --git a/src/tests/mod.rs b/src/tests/mod.rs
new file mode 100644
index 0000000..ebf952d
--- /dev/null
+++ b/src/tests/mod.rs
@@ -0,0 +1,4 @@
+#[macro_use]
+mod util;
+
+mod recursive;
diff --git a/src/tests/recursive.rs b/src/tests/recursive.rs
new file mode 100644
index 0000000..bbb1ce1
--- /dev/null
+++ b/src/tests/recursive.rs
@@ -0,0 +1,980 @@
+use std::fs;
+use std::path::PathBuf;
+
+use crate::tests::util::Dir;
+use crate::WalkDir;
+
+#[test]
+fn send_sync_traits() {
+ use crate::{FilterEntry, IntoIter};
+
+ fn assert_send<T: Send>() {}
+ fn assert_sync<T: Sync>() {}
+
+ assert_send::<WalkDir>();
+ assert_sync::<WalkDir>();
+ assert_send::<IntoIter>();
+ assert_sync::<IntoIter>();
+ assert_send::<FilterEntry<IntoIter, u8>>();
+ assert_sync::<FilterEntry<IntoIter, u8>>();
+}
+
+#[test]
+fn empty() {
+ let dir = Dir::tmp();
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ assert_eq!(1, r.ents().len());
+ let ent = &r.ents()[0];
+ assert!(ent.file_type().is_dir());
+ assert!(!ent.path_is_symlink());
+ assert_eq!(0, ent.depth());
+ assert_eq!(dir.path(), ent.path());
+ assert_eq!(dir.path().file_name().unwrap(), ent.file_name());
+}
+
+#[test]
+fn empty_follow() {
+ let dir = Dir::tmp();
+ let wd = WalkDir::new(dir.path()).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ assert_eq!(1, r.ents().len());
+ let ent = &r.ents()[0];
+ assert!(ent.file_type().is_dir());
+ assert!(!ent.path_is_symlink());
+ assert_eq!(0, ent.depth());
+ assert_eq!(dir.path(), ent.path());
+ assert_eq!(dir.path().file_name().unwrap(), ent.file_name());
+}
+
+#[test]
+fn empty_file() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+
+ let wd = WalkDir::new(dir.path().join("a"));
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ assert_eq!(1, r.ents().len());
+ let ent = &r.ents()[0];
+ assert!(ent.file_type().is_file());
+ assert!(!ent.path_is_symlink());
+ assert_eq!(0, ent.depth());
+ assert_eq!(dir.join("a"), ent.path());
+ assert_eq!("a", ent.file_name());
+}
+
+#[test]
+fn empty_file_follow() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+
+ let wd = WalkDir::new(dir.path().join("a")).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ assert_eq!(1, r.ents().len());
+ let ent = &r.ents()[0];
+ assert!(ent.file_type().is_file());
+ assert!(!ent.path_is_symlink());
+ assert_eq!(0, ent.depth());
+ assert_eq!(dir.join("a"), ent.path());
+ assert_eq!("a", ent.file_name());
+}
+
+#[test]
+fn one_dir() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.ents();
+ assert_eq!(2, ents.len());
+ let ent = &ents[1];
+ assert_eq!(dir.join("a"), ent.path());
+ assert_eq!(1, ent.depth());
+ assert_eq!("a", ent.file_name());
+ assert!(ent.file_type().is_dir());
+}
+
+#[test]
+fn one_file() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.ents();
+ assert_eq!(2, ents.len());
+ let ent = &ents[1];
+ assert_eq!(dir.join("a"), ent.path());
+ assert_eq!(1, ent.depth());
+ assert_eq!("a", ent.file_name());
+ assert!(ent.file_type().is_file());
+}
+
+#[test]
+fn one_dir_one_file() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo");
+ dir.touch("foo/a");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("foo"),
+ dir.join("foo").join("a"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn many_files() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo");
+ dir.touch_all(&["foo/a", "foo/b", "foo/c"]);
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("foo"),
+ dir.join("foo").join("a"),
+ dir.join("foo").join("b"),
+ dir.join("foo").join("c"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn many_dirs() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/a");
+ dir.mkdirp("foo/b");
+ dir.mkdirp("foo/c");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("foo"),
+ dir.join("foo").join("a"),
+ dir.join("foo").join("b"),
+ dir.join("foo").join("c"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn many_mixed() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/a");
+ dir.mkdirp("foo/c");
+ dir.mkdirp("foo/e");
+ dir.touch_all(&["foo/b", "foo/d", "foo/f"]);
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("foo"),
+ dir.join("foo").join("a"),
+ dir.join("foo").join("b"),
+ dir.join("foo").join("c"),
+ dir.join("foo").join("d"),
+ dir.join("foo").join("e"),
+ dir.join("foo").join("f"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn nested() {
+ let nested =
+ PathBuf::from("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z");
+ let dir = Dir::tmp();
+ dir.mkdirp(&nested);
+ dir.touch(nested.join("A"));
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("a"),
+ dir.join("a/b"),
+ dir.join("a/b/c"),
+ dir.join("a/b/c/d"),
+ dir.join("a/b/c/d/e"),
+ dir.join("a/b/c/d/e/f"),
+ dir.join("a/b/c/d/e/f/g"),
+ dir.join("a/b/c/d/e/f/g/h"),
+ dir.join("a/b/c/d/e/f/g/h/i"),
+ dir.join("a/b/c/d/e/f/g/h/i/j"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z"),
+ dir.join(&nested).join("A"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn nested_small_max_open() {
+ let nested =
+ PathBuf::from("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z");
+ let dir = Dir::tmp();
+ dir.mkdirp(&nested);
+ dir.touch(nested.join("A"));
+
+ let wd = WalkDir::new(dir.path()).max_open(1);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("a"),
+ dir.join("a/b"),
+ dir.join("a/b/c"),
+ dir.join("a/b/c/d"),
+ dir.join("a/b/c/d/e"),
+ dir.join("a/b/c/d/e/f"),
+ dir.join("a/b/c/d/e/f/g"),
+ dir.join("a/b/c/d/e/f/g/h"),
+ dir.join("a/b/c/d/e/f/g/h/i"),
+ dir.join("a/b/c/d/e/f/g/h/i/j"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y"),
+ dir.join("a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z"),
+ dir.join(&nested).join("A"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn siblings() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo");
+ dir.mkdirp("bar");
+ dir.touch_all(&["foo/a", "foo/b"]);
+ dir.touch_all(&["bar/a", "bar/b"]);
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("bar"),
+ dir.join("bar").join("a"),
+ dir.join("bar").join("b"),
+ dir.join("foo"),
+ dir.join("foo").join("a"),
+ dir.join("foo").join("b"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn sym_root_file_nofollow() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+ dir.symlink_file("a", "a-link");
+
+ let wd = WalkDir::new(dir.join("a-link"));
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(1, ents.len());
+ let link = &ents[0];
+
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(0, link.depth());
+
+ assert!(link.file_type().is_symlink());
+ assert!(!link.file_type().is_file());
+ assert!(!link.file_type().is_dir());
+
+ assert!(link.metadata().unwrap().file_type().is_symlink());
+ assert!(!link.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().is_dir());
+}
+
+#[test]
+fn sym_root_file_follow() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+ dir.symlink_file("a", "a-link");
+
+ let wd = WalkDir::new(dir.join("a-link")).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ let link = &ents[0];
+
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(0, link.depth());
+
+ assert!(!link.file_type().is_symlink());
+ assert!(link.file_type().is_file());
+ assert!(!link.file_type().is_dir());
+
+ assert!(!link.metadata().unwrap().file_type().is_symlink());
+ assert!(link.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().is_dir());
+}
+
+#[test]
+fn sym_root_dir_nofollow() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a");
+ dir.symlink_dir("a", "a-link");
+ dir.touch("a/zzz");
+
+ let wd = WalkDir::new(dir.join("a-link"));
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(2, ents.len());
+ let link = &ents[0];
+
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(0, link.depth());
+
+ assert!(link.file_type().is_symlink());
+ assert!(!link.file_type().is_file());
+ assert!(!link.file_type().is_dir());
+
+ assert!(link.metadata().unwrap().file_type().is_symlink());
+ assert!(!link.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().is_dir());
+
+ let link_zzz = &ents[1];
+ assert_eq!(dir.join("a-link").join("zzz"), link_zzz.path());
+ assert!(!link_zzz.path_is_symlink());
+}
+
+#[test]
+fn sym_root_dir_follow() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a");
+ dir.symlink_dir("a", "a-link");
+ dir.touch("a/zzz");
+
+ let wd = WalkDir::new(dir.join("a-link")).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(2, ents.len());
+ let link = &ents[0];
+
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(0, link.depth());
+
+ assert!(!link.file_type().is_symlink());
+ assert!(!link.file_type().is_file());
+ assert!(link.file_type().is_dir());
+
+ assert!(!link.metadata().unwrap().file_type().is_symlink());
+ assert!(!link.metadata().unwrap().is_file());
+ assert!(link.metadata().unwrap().is_dir());
+
+ let link_zzz = &ents[1];
+ assert_eq!(dir.join("a-link").join("zzz"), link_zzz.path());
+ assert!(!link_zzz.path_is_symlink());
+}
+
+#[test]
+fn sym_file_nofollow() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+ dir.symlink_file("a", "a-link");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(3, ents.len());
+ let (src, link) = (&ents[1], &ents[2]);
+
+ assert_eq!(dir.join("a"), src.path());
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(!src.path_is_symlink());
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(1, src.depth());
+ assert_eq!(1, link.depth());
+
+ assert!(src.file_type().is_file());
+ assert!(link.file_type().is_symlink());
+ assert!(!link.file_type().is_file());
+ assert!(!link.file_type().is_dir());
+
+ assert!(src.metadata().unwrap().is_file());
+ assert!(link.metadata().unwrap().file_type().is_symlink());
+ assert!(!link.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().is_dir());
+}
+
+#[test]
+fn sym_file_follow() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+ dir.symlink_file("a", "a-link");
+
+ let wd = WalkDir::new(dir.path()).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(3, ents.len());
+ let (src, link) = (&ents[1], &ents[2]);
+
+ assert_eq!(dir.join("a"), src.path());
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(!src.path_is_symlink());
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(1, src.depth());
+ assert_eq!(1, link.depth());
+
+ assert!(src.file_type().is_file());
+ assert!(!link.file_type().is_symlink());
+ assert!(link.file_type().is_file());
+ assert!(!link.file_type().is_dir());
+
+ assert!(src.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().file_type().is_symlink());
+ assert!(link.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().is_dir());
+}
+
+#[test]
+fn sym_dir_nofollow() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a");
+ dir.symlink_dir("a", "a-link");
+ dir.touch("a/zzz");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(4, ents.len());
+ let (src, link) = (&ents[1], &ents[3]);
+
+ assert_eq!(dir.join("a"), src.path());
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(!src.path_is_symlink());
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(1, src.depth());
+ assert_eq!(1, link.depth());
+
+ assert!(src.file_type().is_dir());
+ assert!(link.file_type().is_symlink());
+ assert!(!link.file_type().is_file());
+ assert!(!link.file_type().is_dir());
+
+ assert!(src.metadata().unwrap().is_dir());
+ assert!(link.metadata().unwrap().file_type().is_symlink());
+ assert!(!link.metadata().unwrap().is_file());
+ assert!(!link.metadata().unwrap().is_dir());
+}
+
+#[test]
+fn sym_dir_follow() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a");
+ dir.symlink_dir("a", "a-link");
+ dir.touch("a/zzz");
+
+ let wd = WalkDir::new(dir.path()).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let ents = r.sorted_ents();
+ assert_eq!(5, ents.len());
+ let (src, link) = (&ents[1], &ents[3]);
+
+ assert_eq!(dir.join("a"), src.path());
+ assert_eq!(dir.join("a-link"), link.path());
+
+ assert!(!src.path_is_symlink());
+ assert!(link.path_is_symlink());
+
+ assert_eq!(dir.join("a"), fs::read_link(link.path()).unwrap());
+
+ assert_eq!(1, src.depth());
+ assert_eq!(1, link.depth());
+
+ assert!(src.file_type().is_dir());
+ assert!(!link.file_type().is_symlink());
+ assert!(!link.file_type().is_file());
+ assert!(link.file_type().is_dir());
+
+ assert!(src.metadata().unwrap().is_dir());
+ assert!(!link.metadata().unwrap().file_type().is_symlink());
+ assert!(!link.metadata().unwrap().is_file());
+ assert!(link.metadata().unwrap().is_dir());
+
+ let (src_zzz, link_zzz) = (&ents[2], &ents[4]);
+ assert_eq!(dir.join("a").join("zzz"), src_zzz.path());
+ assert_eq!(dir.join("a-link").join("zzz"), link_zzz.path());
+ assert!(!src_zzz.path_is_symlink());
+ assert!(!link_zzz.path_is_symlink());
+}
+
+#[test]
+fn sym_noloop() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b/c");
+ dir.symlink_dir("a", "a/b/c/a-link");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ // There's no loop if we aren't following symlinks.
+ r.assert_no_errors();
+
+ assert_eq!(5, r.ents().len());
+}
+
+#[test]
+fn sym_loop_detect() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b/c");
+ dir.symlink_dir("a", "a/b/c/a-link");
+
+ let wd = WalkDir::new(dir.path()).follow_links(true);
+ let r = dir.run_recursive(wd);
+
+ let (ents, errs) = (r.sorted_ents(), r.errs());
+ assert_eq!(4, ents.len());
+ assert_eq!(1, errs.len());
+
+ let err = &errs[0];
+
+ let expected = dir.join("a/b/c/a-link");
+ assert_eq!(Some(&*expected), err.path());
+
+ let expected = dir.join("a");
+ assert_eq!(Some(&*expected), err.loop_ancestor());
+
+ assert_eq!(4, err.depth());
+ assert!(err.io_error().is_none());
+}
+
+#[test]
+fn sym_self_loop_no_error() {
+ let dir = Dir::tmp();
+ dir.symlink_file("a", "a");
+
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ // No errors occur because even though the symlink points to nowhere, it
+ // is never followed, and thus no error occurs.
+ r.assert_no_errors();
+ assert_eq!(2, r.ents().len());
+
+ let ent = &r.ents()[1];
+ assert_eq!(dir.join("a"), ent.path());
+ assert!(ent.path_is_symlink());
+
+ assert!(ent.file_type().is_symlink());
+ assert!(!ent.file_type().is_file());
+ assert!(!ent.file_type().is_dir());
+
+ assert!(ent.metadata().unwrap().file_type().is_symlink());
+ assert!(!ent.metadata().unwrap().file_type().is_file());
+ assert!(!ent.metadata().unwrap().file_type().is_dir());
+}
+
+#[test]
+fn sym_file_self_loop_io_error() {
+ let dir = Dir::tmp();
+ dir.symlink_file("a", "a");
+
+ let wd = WalkDir::new(dir.path()).follow_links(true);
+ let r = dir.run_recursive(wd);
+
+ let (ents, errs) = (r.sorted_ents(), r.errs());
+ assert_eq!(1, ents.len());
+ assert_eq!(1, errs.len());
+
+ let err = &errs[0];
+
+ let expected = dir.join("a");
+ assert_eq!(Some(&*expected), err.path());
+ assert_eq!(1, err.depth());
+ assert!(err.loop_ancestor().is_none());
+ assert!(err.io_error().is_some());
+}
+
+#[test]
+fn sym_dir_self_loop_io_error() {
+ let dir = Dir::tmp();
+ dir.symlink_dir("a", "a");
+
+ let wd = WalkDir::new(dir.path()).follow_links(true);
+ let r = dir.run_recursive(wd);
+
+ let (ents, errs) = (r.sorted_ents(), r.errs());
+ assert_eq!(1, ents.len());
+ assert_eq!(1, errs.len());
+
+ let err = &errs[0];
+
+ let expected = dir.join("a");
+ assert_eq!(Some(&*expected), err.path());
+ assert_eq!(1, err.depth());
+ assert!(err.loop_ancestor().is_none());
+ assert!(err.io_error().is_some());
+}
+
+#[test]
+fn min_depth_1() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b");
+
+ let wd = WalkDir::new(dir.path()).min_depth(1);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.join("a"), dir.join("a").join("b")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn min_depth_2() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b");
+
+ let wd = WalkDir::new(dir.path()).min_depth(2);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.join("a").join("b")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn max_depth_0() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b");
+
+ let wd = WalkDir::new(dir.path()).max_depth(0);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.path().to_path_buf()];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn max_depth_1() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b");
+
+ let wd = WalkDir::new(dir.path()).max_depth(1);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.path().to_path_buf(), dir.join("a")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn max_depth_2() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b");
+
+ let wd = WalkDir::new(dir.path()).max_depth(2);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected =
+ vec![dir.path().to_path_buf(), dir.join("a"), dir.join("a").join("b")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+// FIXME: This test seems wrong. It should return nothing!
+#[test]
+fn min_max_depth_diff_nada() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b/c");
+
+ let wd = WalkDir::new(dir.path()).min_depth(3).max_depth(2);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.join("a").join("b").join("c")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn min_max_depth_diff_0() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b/c");
+
+ let wd = WalkDir::new(dir.path()).min_depth(2).max_depth(2);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.join("a").join("b")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn min_max_depth_diff_1() {
+ let dir = Dir::tmp();
+ dir.mkdirp("a/b/c");
+
+ let wd = WalkDir::new(dir.path()).min_depth(1).max_depth(2);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.join("a"), dir.join("a").join("b")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn contents_first() {
+ let dir = Dir::tmp();
+ dir.touch("a");
+
+ let wd = WalkDir::new(dir.path()).contents_first(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![dir.join("a"), dir.path().to_path_buf()];
+ assert_eq!(expected, r.paths());
+}
+
+#[test]
+fn skip_current_dir() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/bar/baz");
+ dir.mkdirp("quux");
+
+ let mut paths = vec![];
+ let mut it = WalkDir::new(dir.path()).into_iter();
+ while let Some(result) = it.next() {
+ let ent = result.unwrap();
+ paths.push(ent.path().to_path_buf());
+ if ent.file_name() == "bar" {
+ it.skip_current_dir();
+ }
+ }
+ paths.sort();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("foo"),
+ dir.join("foo").join("bar"),
+ dir.join("quux"),
+ ];
+ assert_eq!(expected, paths);
+}
+
+#[test]
+fn filter_entry() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/bar/baz/abc");
+ dir.mkdirp("quux");
+
+ let wd = WalkDir::new(dir.path())
+ .into_iter()
+ .filter_entry(|ent| ent.file_name() != "baz");
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("foo"),
+ dir.join("foo").join("bar"),
+ dir.join("quux"),
+ ];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+#[test]
+fn sort() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/bar/baz/abc");
+ dir.mkdirp("quux");
+
+ let wd = WalkDir::new(dir.path())
+ .sort_by(|a, b| a.file_name().cmp(b.file_name()).reverse());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("quux"),
+ dir.join("foo"),
+ dir.join("foo").join("bar"),
+ dir.join("foo").join("bar").join("baz"),
+ dir.join("foo").join("bar").join("baz").join("abc"),
+ ];
+ assert_eq!(expected, r.paths());
+}
+
+#[test]
+fn sort_max_open() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/bar/baz/abc");
+ dir.mkdirp("quux");
+
+ let wd = WalkDir::new(dir.path())
+ .max_open(1)
+ .sort_by(|a, b| a.file_name().cmp(b.file_name()).reverse());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected = vec![
+ dir.path().to_path_buf(),
+ dir.join("quux"),
+ dir.join("foo"),
+ dir.join("foo").join("bar"),
+ dir.join("foo").join("bar").join("baz"),
+ dir.join("foo").join("bar").join("baz").join("abc"),
+ ];
+ assert_eq!(expected, r.paths());
+}
+
+#[cfg(target_os = "linux")]
+#[test]
+fn same_file_system() {
+ use std::path::Path;
+
+ // This test is a little weird since it's not clear whether it's a good
+ // idea to setup a distinct mounted volume in these tests. Instead, we
+ // probe for an existing one.
+ if !Path::new("/sys").is_dir() {
+ return;
+ }
+
+ let dir = Dir::tmp();
+ dir.touch("a");
+ dir.symlink_dir("/sys", "sys-link");
+
+ // First, do a sanity check that things work without following symlinks.
+ let wd = WalkDir::new(dir.path());
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected =
+ vec![dir.path().to_path_buf(), dir.join("a"), dir.join("sys-link")];
+ assert_eq!(expected, r.sorted_paths());
+
+ // ... now follow symlinks and ensure we don't descend into /sys.
+ let wd =
+ WalkDir::new(dir.path()).same_file_system(true).follow_links(true);
+ let r = dir.run_recursive(wd);
+ r.assert_no_errors();
+
+ let expected =
+ vec![dir.path().to_path_buf(), dir.join("a"), dir.join("sys-link")];
+ assert_eq!(expected, r.sorted_paths());
+}
+
+// Tests that skip_current_dir doesn't destroy internal invariants.
+//
+// See: https://github.com/BurntSushi/walkdir/issues/118
+#[test]
+fn regression_skip_current_dir() {
+ let dir = Dir::tmp();
+ dir.mkdirp("foo/a/b");
+ dir.mkdirp("foo/1/2");
+
+ let mut wd = WalkDir::new(dir.path()).max_open(1).into_iter();
+ wd.next();
+ wd.next();
+ wd.next();
+ wd.next();
+
+ wd.skip_current_dir();
+ wd.skip_current_dir();
+ wd.next();
+}
diff --git a/src/tests/util.rs b/src/tests/util.rs
new file mode 100644
index 0000000..fdf06f5
--- /dev/null
+++ b/src/tests/util.rs
@@ -0,0 +1,252 @@
+use std::env;
+use std::error;
+use std::fs::{self, File};
+use std::io;
+use std::path::{Path, PathBuf};
+use std::result;
+
+use crate::{DirEntry, Error};
+
+/// Create an error from a format!-like syntax.
+#[macro_export]
+macro_rules! err {
+ ($($tt:tt)*) => {
+ Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
+ }
+}
+
+/// A convenient result type alias.
+pub type Result<T> = result::Result<T, Box<dyn error::Error + Send + Sync>>;
+
+/// The result of running a recursive directory iterator on a single directory.
+#[derive(Debug)]
+pub struct RecursiveResults {
+ ents: Vec<DirEntry>,
+ errs: Vec<Error>,
+}
+
+impl RecursiveResults {
+ /// Return all of the errors encountered during traversal.
+ pub fn errs(&self) -> &[Error] {
+ &self.errs
+ }
+
+ /// Assert that no errors have occurred.
+ pub fn assert_no_errors(&self) {
+ assert!(
+ self.errs.is_empty(),
+ "expected to find no errors, but found: {:?}",
+ self.errs
+ );
+ }
+
+ /// Return all the successfully retrieved directory entries in the order
+ /// in which they were retrieved.
+ pub fn ents(&self) -> &[DirEntry] {
+ &self.ents
+ }
+
+ /// Return all paths from all successfully retrieved directory entries.
+ ///
+ /// This does not include paths that correspond to an error.
+ pub fn paths(&self) -> Vec<PathBuf> {
+ self.ents.iter().map(|d| d.path().to_path_buf()).collect()
+ }
+
+ /// Return all the successfully retrieved directory entries, sorted
+ /// lexicographically by their full file path.
+ pub fn sorted_ents(&self) -> Vec<DirEntry> {
+ let mut ents = self.ents.clone();
+ ents.sort_by(|e1, e2| e1.path().cmp(e2.path()));
+ ents
+ }
+
+ /// Return all paths from all successfully retrieved directory entries,
+ /// sorted lexicographically.
+ ///
+ /// This does not include paths that correspond to an error.
+ pub fn sorted_paths(&self) -> Vec<PathBuf> {
+ self.sorted_ents().into_iter().map(|d| d.into_path()).collect()
+ }
+}
+
+/// A helper for managing a directory in which to run tests.
+///
+/// When manipulating paths within this directory, paths are interpreted
+/// relative to this directory.
+#[derive(Debug)]
+pub struct Dir {
+ dir: TempDir,
+}
+
+impl Dir {
+ /// Create a new empty temporary directory.
+ pub fn tmp() -> Dir {
+ let dir = TempDir::new().unwrap();
+ Dir { dir }
+ }
+
+ /// Return the path to this directory.
+ pub fn path(&self) -> &Path {
+ self.dir.path()
+ }
+
+ /// Return a path joined to the path to this directory.
+ pub fn join<P: AsRef<Path>>(&self, path: P) -> PathBuf {
+ self.path().join(path)
+ }
+
+ /// Run the given iterator and return the result as a distinct collection
+ /// of directory entries and errors.
+ pub fn run_recursive<I>(&self, it: I) -> RecursiveResults
+ where
+ I: IntoIterator<Item = result::Result<DirEntry, Error>>,
+ {
+ let mut results = RecursiveResults { ents: vec![], errs: vec![] };
+ for result in it {
+ match result {
+ Ok(ent) => results.ents.push(ent),
+ Err(err) => results.errs.push(err),
+ }
+ }
+ results
+ }
+
+ /// Create a directory at the given path, while creating all intermediate
+ /// directories as needed.
+ pub fn mkdirp<P: AsRef<Path>>(&self, path: P) {
+ let full = self.join(path);
+ fs::create_dir_all(&full)
+ .map_err(|e| {
+ err!("failed to create directory {}: {}", full.display(), e)
+ })
+ .unwrap();
+ }
+
+ /// Create an empty file at the given path. All ancestor directories must
+ /// already exists.
+ pub fn touch<P: AsRef<Path>>(&self, path: P) {
+ let full = self.join(path);
+ File::create(&full)
+ .map_err(|e| {
+ err!("failed to create file {}: {}", full.display(), e)
+ })
+ .unwrap();
+ }
+
+ /// Create empty files at the given paths. All ancestor directories must
+ /// already exists.
+ pub fn touch_all<P: AsRef<Path>>(&self, paths: &[P]) {
+ for p in paths {
+ self.touch(p);
+ }
+ }
+
+ /// Create a file symlink to the given src with the given link name.
+ pub fn symlink_file<P1: AsRef<Path>, P2: AsRef<Path>>(
+ &self,
+ src: P1,
+ link_name: P2,
+ ) {
+ #[cfg(windows)]
+ fn imp(src: &Path, link_name: &Path) -> io::Result<()> {
+ use std::os::windows::fs::symlink_file;
+ symlink_file(src, link_name)
+ }
+
+ #[cfg(unix)]
+ fn imp(src: &Path, link_name: &Path) -> io::Result<()> {
+ use std::os::unix::fs::symlink;
+ symlink(src, link_name)
+ }
+
+ let (src, link_name) = (self.join(src), self.join(link_name));
+ imp(&src, &link_name)
+ .map_err(|e| {
+ err!(
+ "failed to symlink file {} with target {}: {}",
+ src.display(),
+ link_name.display(),
+ e
+ )
+ })
+ .unwrap()
+ }
+
+ /// Create a directory symlink to the given src with the given link name.
+ pub fn symlink_dir<P1: AsRef<Path>, P2: AsRef<Path>>(
+ &self,
+ src: P1,
+ link_name: P2,
+ ) {
+ #[cfg(windows)]
+ fn imp(src: &Path, link_name: &Path) -> io::Result<()> {
+ use std::os::windows::fs::symlink_dir;
+ symlink_dir(src, link_name)
+ }
+
+ #[cfg(unix)]
+ fn imp(src: &Path, link_name: &Path) -> io::Result<()> {
+ use std::os::unix::fs::symlink;
+ symlink(src, link_name)
+ }
+
+ let (src, link_name) = (self.join(src), self.join(link_name));
+ imp(&src, &link_name)
+ .map_err(|e| {
+ err!(
+ "failed to symlink directory {} with target {}: {}",
+ src.display(),
+ link_name.display(),
+ e
+ )
+ })
+ .unwrap()
+ }
+}
+
+/// A simple wrapper for creating a temporary directory that is automatically
+/// deleted when it's dropped.
+///
+/// We use this in lieu of tempfile because tempfile brings in too many
+/// dependencies.
+#[derive(Debug)]
+pub struct TempDir(PathBuf);
+
+impl Drop for TempDir {
+ fn drop(&mut self) {
+ fs::remove_dir_all(&self.0).unwrap();
+ }
+}
+
+impl TempDir {
+ /// Create a new empty temporary directory under the system's configured
+ /// temporary directory.
+ pub fn new() -> Result<TempDir> {
+ #[allow(deprecated)]
+ use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT};
+
+ static TRIES: usize = 100;
+ #[allow(deprecated)]
+ static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT;
+
+ let tmpdir = env::temp_dir();
+ for _ in 0..TRIES {
+ let count = COUNTER.fetch_add(1, Ordering::SeqCst);
+ let path = tmpdir.join("rust-walkdir").join(count.to_string());
+ if path.is_dir() {
+ continue;
+ }
+ fs::create_dir_all(&path).map_err(|e| {
+ err!("failed to create {}: {}", path.display(), e)
+ })?;
+ return Ok(TempDir(path));
+ }
+ Err(err!("failed to create temp dir after {} tries", TRIES))
+ }
+
+ /// Return the underlying path to this temporary directory.
+ pub fn path(&self) -> &Path {
+ &self.0
+ }
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..b9fcad8
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,25 @@
+use std::io;
+use std::path::Path;
+
+#[cfg(unix)]
+pub fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
+ use std::os::unix::fs::MetadataExt;
+
+ path.as_ref().metadata().map(|md| md.dev())
+}
+
+#[cfg(windows)]
+pub fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
+ use winapi_util::{file, Handle};
+
+ let h = Handle::from_path_any(path)?;
+ file::information(h).map(|info| info.volume_serial_number())
+}
+
+#[cfg(not(any(unix, windows)))]
+pub fn device_num<P: AsRef<Path>>(_: P) -> io::Result<u64> {
+ Err(io::Error::new(
+ io::ErrorKind::Other,
+ "walkdir: same_file_system option not supported on this platform",
+ ))
+}