aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElie Kheirallah <khei@google.com>2022-11-21 22:32:37 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2022-11-21 22:32:37 +0000
commitcd8d5711271bb0469c4b8c9dc47a4001ae0a48a2 (patch)
tree538bf65de32794e15b24be044d278411c116bd70
parent66db8983a6a1b6803ca598776467fc67d143c2a9 (diff)
parent99912074b61175871348de10f21426578b3b3447 (diff)
downloaduserfaultfd-cd8d5711271bb0469c4b8c9dc47a4001ae0a48a2.tar.gz
Initial import of userfaultfd-0.5.0 to AOSP am: 99912074b6
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/userfaultfd/+/2310309 Change-Id: Ic5d33f35d50b04301e99ac370689c536f7a79af3 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rwxr-xr-xCHANGELOG.md36
-rw-r--r--Cargo.toml43
-rw-r--r--Cargo.toml.orig22
l---------LICENSE1
-rw-r--r--LICENSE-APACHE176
-rw-r--r--LICENSE-MIT23
-rw-r--r--METADATA20
-rw-r--r--MODULE_LICENSE_APACHE20
-rw-r--r--OWNERS5
-rw-r--r--README.md11
-rw-r--r--SECURITY.md9
-rw-r--r--examples/manpage.rs136
-rw-r--r--src/builder.rs162
-rw-r--r--src/error.rs56
-rw-r--r--src/event.rs148
-rw-r--r--src/lib.rs664
-rw-r--r--src/raw.rs25
-rw-r--r--tests/manpage_example.rs8
18 files changed, 1545 insertions, 0 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100755
index 0000000..b2621e9
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,36 @@
+### Unreleased
+
+- Added `Uffd::read_events` that can read multiple events from the userfaultfd file descriptor.
+
+### 0.3.1 (2021-02-17)
+
+- Added support for the `UFFD_FEATURE_THREAD_ID` flag when compiled with the `linux4_14` Cargo
+ feature.
+
+### 0.3.0 (2021-02-03)
+
+- Update `bindgen` dependency of `userfaultfd-sys` to `0.57`. Thank you @jgowans
+
+### 0.2.1 (2020-11-20)
+
+- Make `ReadWrite` public. Thank you @electroCutie
+
+### 0.2.0 (2020-04-10)
+
+- Removed the compile-time Linux version check, and replaced it with a Cargo feature.
+
+ The Linux version check was overly restrictive, even on systems that did have the right kernel
+ version installed but had older headers in `/usr/include/linux`. Beyond that, this check made it
+ more difficult to compile on a different host than what's targeted.
+
+ There is now a `linux4_14` feature flag on `userfaultfd-sys`, which turns on and tests the extra
+ constants available in that version. Since `userfaultfd` did not make use of any of those newer
+ features, it doesn't have a feature flag yet.
+
+ Applications should take care when initializing with `UffdBuilder` to specify the features and
+ ioctls they require, so that an unsupported version will be detected at runtime.
+
+
+### 0.1.0 (2020-04-07)
+
+- Initial public release of userfaultfd-rs.
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..a188797
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,43 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+name = "userfaultfd"
+version = "0.5.0"
+authors = ["Adam C. Foltzer <acfoltzer@fastly.com>"]
+edition = "2018"
+license = "MIT OR Apache-2.0"
+description = "Rust bindings for the Linux userfaultfd functionality"
+repository = "https://github.com/fastly/userfaultfd-rs"
+readme = "README.md"
+
+[dependencies.bitflags]
+version = "1.0"
+
+[dependencies.cfg-if]
+version = "^1.0.0"
+
+[dependencies.libc]
+version = "0.2.65"
+
+[dependencies.nix]
+version = "0.23"
+
+[dependencies.thiserror]
+version = "1.0.4"
+
+[dependencies.userfaultfd-sys]
+version = "^0.4.0"
+
+[features]
+default = []
+linux4_14 = ["userfaultfd-sys/linux4_14"]
+linux5_7 = ["userfaultfd-sys/linux5_7"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..153efe6
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,22 @@
+[package]
+name = "userfaultfd"
+version = "0.5.0"
+authors = ["Adam C. Foltzer <acfoltzer@fastly.com>"]
+edition = "2018"
+license = "MIT OR Apache-2.0"
+description = "Rust bindings for the Linux userfaultfd functionality"
+repository = "https://github.com/fastly/userfaultfd-rs"
+readme = "README.md"
+
+[dependencies]
+bitflags = "1.0"
+cfg-if = "^1.0.0"
+libc = "0.2.65"
+nix = "0.23"
+thiserror = "1.0.4"
+userfaultfd-sys = { path = "userfaultfd-sys", version = "^0.4.0" }
+
+[features]
+default = []
+linux4_14 = ["userfaultfd-sys/linux4_14"]
+linux5_7 = ["userfaultfd-sys/linux5_7"]
diff --git a/LICENSE b/LICENSE
new file mode 120000
index 0000000..6b579aa
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1 @@
+LICENSE-APACHE \ No newline at end of file
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
new file mode 100644
index 0000000..1b5ec8b
--- /dev/null
+++ b/LICENSE-APACHE
@@ -0,0 +1,176 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/LICENSE-MIT b/LICENSE-MIT
new file mode 100644
index 0000000..31aa793
--- /dev/null
+++ b/LICENSE-MIT
@@ -0,0 +1,23 @@
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..9ffa4db
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,20 @@
+name: "userfaultfd"
+description: "Rust bindings for the Linux userfaultfd functionality"
+third_party {
+ url {
+ type: HOMEPAGE
+ value: "https://crates.io/crates/userfaultfd"
+ }
+ url {
+ type: ARCHIVE
+ value: "https://static.crates.io/crates/userfaultfd/userfaultfd-0.5.0.crate"
+ }
+ version: "0.5.0"
+ # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same.
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2022
+ month: 11
+ day: 16
+ }
+}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..3abd431
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,5 @@
+include platform/prebuilts/rust:master:/OWNERS
+devinmoore@google.com
+fmayle@google.com
+khei@google.com
+smoreland@google.com
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d485743
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+![Build](https://github.com/fastly/userfaultfd-rs/workflows/Rust/badge.svg)
+
+# Userfaultfd-rs
+Rust bindings for Linux's userfaultfd functionality.
+
+## License
+
+This software is distributed under the terms of both the MIT license and the Apache License (Version 2.0).
+
+See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT).
+
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..555ec87
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,9 @@
+## Report a security issue
+
+Fastly welcomes security reports and is committed to providing prompt attention to security issues. Security issues should be reported privately via [Fastly’s security issue reporting process](https://www.fastly.com/security/report-security-issue).
+
+## Security advisories
+
+Remediation of security vulnerabilities is prioritized. The project team endeavors to coordinate remediation with third-party stakeholders, and is committed to transparency in the disclosure process. The team announces security issues via release notes as well as the [RustSec advisory database](https://github.com/RustSec/advisory-db) (i.e. `cargo-audit`) on a best-effort basis.
+
+Note that communications related to security issues in Fastly-maintained OSS as described here are distinct from [Fastly Security Advisories](https://www.fastly.com/security-advisories).
diff --git a/examples/manpage.rs b/examples/manpage.rs
new file mode 100644
index 0000000..f8d6848
--- /dev/null
+++ b/examples/manpage.rs
@@ -0,0 +1,136 @@
+//! Port of the example from the `userfaultfd` manpage.
+use libc::{self, c_void};
+use nix::poll::{poll, PollFd, PollFlags};
+use nix::sys::mman::{mmap, MapFlags, ProtFlags};
+use nix::unistd::{sysconf, SysconfVar};
+use std::env;
+use std::os::unix::io::AsRawFd;
+use std::ptr;
+use userfaultfd::{Event, Uffd, UffdBuilder};
+
+fn fault_handler_thread(uffd: Uffd) {
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as usize;
+
+ // Create a page that will be copied into the faulting region
+
+ let page = unsafe {
+ mmap(
+ ptr::null_mut(),
+ page_size,
+ ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
+ MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS,
+ -1,
+ 0,
+ )
+ .expect("mmap")
+ };
+
+ // Loop, handling incoming events on the userfaultfd file descriptor
+
+ let mut fault_cnt = 0;
+ loop {
+ // See what poll() tells us about the userfaultfd
+
+ let pollfd = PollFd::new(uffd.as_raw_fd(), PollFlags::POLLIN);
+ let nready = poll(&mut [pollfd], -1).expect("poll");
+
+ println!("\nfault_handler_thread():");
+ let revents = pollfd.revents().unwrap();
+ println!(
+ " poll() returns: nready = {}; POLLIN = {}; POLLERR = {}",
+ nready,
+ revents.contains(PollFlags::POLLIN),
+ revents.contains(PollFlags::POLLERR),
+ );
+
+ // Read an event from the userfaultfd
+ let event = uffd
+ .read_event()
+ .expect("read uffd_msg")
+ .expect("uffd_msg ready");
+
+ // We expect only one kind of event; verify that assumption
+
+ if let Event::Pagefault { addr, .. } = event {
+ // Display info about the page-fault event
+
+ println!(" UFFD_EVENT_PAGEFAULT event: {:?}", event);
+
+ // Copy the page pointed to by 'page' into the faulting region. Vary the contents that are
+ // copied in, so that it is more obvious that each fault is handled separately.
+
+ for c in unsafe { std::slice::from_raw_parts_mut(page as *mut u8, page_size) } {
+ *c = b'A' + fault_cnt % 20;
+ }
+ fault_cnt += 1;
+
+ let dst = (addr as usize & !(page_size as usize - 1)) as *mut c_void;
+ let copy = unsafe { uffd.copy(page, dst, page_size, true).expect("uffd copy") };
+
+ println!(" (uffdio_copy.copy returned {})", copy);
+ } else {
+ panic!("Unexpected event on userfaultfd");
+ }
+ }
+}
+
+fn main() {
+ let num_pages = env::args()
+ .nth(1)
+ .expect("Usage: manpage <num_pages>")
+ .parse::<usize>()
+ .unwrap();
+
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as usize;
+ let len = num_pages * page_size;
+
+ // Create and enable userfaultfd object
+
+ let uffd = UffdBuilder::new()
+ .close_on_exec(true)
+ .non_blocking(true)
+ .user_mode_only(true)
+ .create()
+ .expect("uffd creation");
+
+ // Create a private anonymous mapping. The memory will be demand-zero paged--that is, not yet
+ // allocated. When we actually touch the memory, it will be allocated via the userfaultfd.
+
+ let addr = unsafe {
+ mmap(
+ ptr::null_mut(),
+ len,
+ ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
+ MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS,
+ -1,
+ 0,
+ )
+ .expect("mmap")
+ };
+
+ println!("Address returned by mmap() = {:p}", addr);
+
+ // Register the memory range of the mapping we just created for handling by the userfaultfd
+ // object. In mode, we request to track missing pages (i.e., pages that have not yet been
+ // faulted in).
+
+ uffd.register(addr, len).expect("uffd.register()");
+
+ // Create a thread that will process the userfaultfd events
+ let _s = std::thread::spawn(move || fault_handler_thread(uffd));
+
+ // Main thread now touches memory in the mapping, touching locations 1024 bytes apart. This will
+ // trigger userfaultfd events for all pages in the region.
+
+ // Ensure that faulting address is not on a page boundary, in order to test that we correctly
+ // handle that case in fault_handling_thread()
+ let mut l = 0xf;
+
+ while l < len {
+ let ptr = (addr as usize + l) as *mut u8;
+ let c = unsafe { *ptr };
+ println!("Read address {:p} in main(): {:?}", ptr, c as char);
+ l += 1024;
+ std::thread::sleep(std::time::Duration::from_micros(100000));
+ }
+}
diff --git a/src/builder.rs b/src/builder.rs
new file mode 100644
index 0000000..b89efb4
--- /dev/null
+++ b/src/builder.rs
@@ -0,0 +1,162 @@
+use crate::error::{Error, Result};
+use crate::raw;
+use crate::{IoctlFlags, Uffd};
+use bitflags::bitflags;
+use nix::errno::Errno;
+
+cfg_if::cfg_if! {
+ if #[cfg(any(feature = "linux5_7", feature = "linux4_14"))] {
+ bitflags! {
+ /// Used with `UffdBuilder` to determine which features are available in the current kernel.
+ pub struct FeatureFlags: u64 {
+ const PAGEFAULT_FLAG_WP = raw::UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+ const EVENT_FORK = raw::UFFD_FEATURE_EVENT_FORK;
+ const EVENT_REMAP = raw::UFFD_FEATURE_EVENT_REMAP;
+ const EVENT_REMOVE = raw::UFFD_FEATURE_EVENT_REMOVE;
+ const MISSING_HUGETLBFS = raw::UFFD_FEATURE_MISSING_HUGETLBFS;
+ const MISSING_SHMEM = raw::UFFD_FEATURE_MISSING_SHMEM;
+ const EVENT_UNMAP = raw::UFFD_FEATURE_EVENT_UNMAP;
+ const SIGBUS = raw::UFFD_FEATURE_SIGBUS;
+ const THREAD_ID = raw::UFFD_FEATURE_THREAD_ID;
+ }
+ }
+ } else {
+ bitflags! {
+ /// Used with `UffdBuilder` to determine which features are available in the current kernel.
+ pub struct FeatureFlags: u64 {
+ const PAGEFAULT_FLAG_WP = raw::UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+ const EVENT_FORK = raw::UFFD_FEATURE_EVENT_FORK;
+ const EVENT_REMAP = raw::UFFD_FEATURE_EVENT_REMAP;
+ const EVENT_REMOVE = raw::UFFD_FEATURE_EVENT_REMOVE;
+ const MISSING_HUGETLBFS = raw::UFFD_FEATURE_MISSING_HUGETLBFS;
+ const MISSING_SHMEM = raw::UFFD_FEATURE_MISSING_SHMEM;
+ const EVENT_UNMAP = raw::UFFD_FEATURE_EVENT_UNMAP;
+ }
+ }
+ }
+}
+/// A builder for initializing `Uffd` objects.
+///
+/// ```
+/// use userfaultfd::UffdBuilder;
+///
+/// let uffd = UffdBuilder::new()
+/// .close_on_exec(true)
+/// .non_blocking(true)
+/// .user_mode_only(true)
+/// .create();
+/// assert!(uffd.is_ok());
+/// ```
+pub struct UffdBuilder {
+ close_on_exec: bool,
+ non_blocking: bool,
+ user_mode_only: bool,
+ req_features: FeatureFlags,
+ req_ioctls: IoctlFlags,
+}
+
+impl UffdBuilder {
+ /// Create a new builder with no required features or ioctls, `close_on_exec` and
+ /// `non_blocking` both set to `false`, and `user_mode_only` set to `true`.
+ pub fn new() -> UffdBuilder {
+ UffdBuilder {
+ close_on_exec: false,
+ non_blocking: false,
+ user_mode_only: true,
+ req_features: FeatureFlags::empty(),
+ req_ioctls: IoctlFlags::empty(),
+ }
+ }
+
+ /// Enable the close-on-exec flag for the new userfaultfd object (see the description of
+ /// `O_CLOEXEC` in [`open(2)`](http://man7.org/linux/man-pages/man2/open.2.html)).
+ pub fn close_on_exec(&mut self, close_on_exec: bool) -> &mut Self {
+ self.close_on_exec = close_on_exec;
+ self
+ }
+
+ /// Enable non-blocking operation for the userfaultfd object.
+ ///
+ /// If this is set to `false`, `Uffd::read_event()` will block until an event is available to
+ /// read. Otherwise, it will immediately return `None` if no event is available.
+ pub fn non_blocking(&mut self, non_blocking: bool) -> &mut Self {
+ self.non_blocking = non_blocking;
+ self
+ }
+
+ /// Enable user-mode only flag for the userfaultfd object.
+ ///
+ /// If set to `false`, the process must have the `CAP_SYS_PTRACE` capability starting with Linux 5.11
+ /// or object creation will fail with EPERM. When set to `true`, userfaultfd can't be used
+ /// to handle kernel-mode page faults such as when kernel tries copying data to userspace.
+ ///
+ /// When used with kernels older than 5.11, this has no effect; the process doesn't need
+ /// `CAP_SYS_PTRACE` and can handle kernel-mode page faults.
+ pub fn user_mode_only(&mut self, user_mode_only: bool) -> &mut Self {
+ self.user_mode_only = user_mode_only;
+ self
+ }
+
+ /// Add a requirement that a particular feature or set of features is available.
+ ///
+ /// If a required feature is unavailable, `UffdBuilder.create()` will return an error.
+ pub fn require_features(&mut self, feature: FeatureFlags) -> &mut Self {
+ self.req_features |= feature;
+ self
+ }
+
+ /// Add a requirement that a particular ioctl or set of ioctls is available.
+ ///
+ /// If a required ioctl is unavailable, `UffdBuilder.create()` will return an error.
+ pub fn require_ioctls(&mut self, ioctls: IoctlFlags) -> &mut Self {
+ self.req_ioctls |= ioctls;
+ self
+ }
+
+ /// Create a `Uffd` object with the current settings of this builder.
+ pub fn create(&self) -> Result<Uffd> {
+ // first do the syscall to get the file descriptor
+ let mut flags = 0;
+ if self.close_on_exec {
+ flags |= libc::O_CLOEXEC;
+ }
+ if self.non_blocking {
+ flags |= libc::O_NONBLOCK;
+ }
+
+ if self.user_mode_only {
+ flags |= raw::UFFD_USER_MODE_ONLY as i32;
+ }
+
+ let fd = match Errno::result(unsafe { raw::userfaultfd(flags) }) {
+ Ok(fd) => fd,
+ // setting the USER_MODE_ONLY flag on kernel pre-5.11 causes it to return EINVAL.
+ // If the user asks for the flag, we first try with it set, and if kernel gives
+ // EINVAL we try again without the flag set.
+ Err(Errno::EINVAL) if self.user_mode_only => Errno::result(unsafe {
+ raw::userfaultfd(flags & !raw::UFFD_USER_MODE_ONLY as i32)
+ })?,
+ Err(e) => return Err(e.into()),
+ };
+
+ // Wrap the fd up so that a failure in this function body closes it with the drop.
+ let uffd = Uffd { fd };
+
+ // then do the UFFDIO_API ioctl to set up and ensure features and other ioctls are available
+ let mut api = raw::uffdio_api {
+ api: raw::UFFD_API,
+ features: self.req_features.bits(),
+ ioctls: 0,
+ };
+ unsafe {
+ raw::api(uffd.fd, &mut api as *mut raw::uffdio_api)?;
+ }
+ let supported =
+ IoctlFlags::from_bits(api.ioctls).ok_or(Error::UnrecognizedIoctls(api.ioctls))?;
+ if !supported.contains(self.req_ioctls) {
+ Err(Error::UnsupportedIoctls(supported))
+ } else {
+ Ok(uffd)
+ }
+ }
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..5cd8926
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,56 @@
+use crate::IoctlFlags;
+use nix::errno::Errno;
+use thiserror::Error;
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+/// Errors for this crate.
+///
+/// Several of these errors contain an underlying `Errno` value; see
+/// [`userfaultfd(2)`](http://man7.org/linux/man-pages/man2/userfaultfd.2.html) and
+/// [`ioctl_userfaultfd(2)`](http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html) for more
+/// details on how to interpret these errors.
+#[derive(Debug, Error)]
+pub enum Error {
+ /// Copy ioctl failure with `errno` value.
+ #[error("Copy failed")]
+ CopyFailed(Errno),
+
+ /// Copy ioctl failure with copied length.
+ #[error("Copy partially succeeded")]
+ PartiallyCopied(usize),
+
+ /// Failure to read a full `uffd_msg` struct from the underlying file descriptor.
+ #[error("Incomplete uffd_msg; read only {read}/{expected} bytes")]
+ IncompleteMsg { read: usize, expected: usize },
+
+ /// Generic system error.
+ #[error("System error")]
+ SystemError(#[source] nix::Error),
+
+ /// End-of-file was read from the underlying file descriptor.
+ #[error("EOF when reading file descriptor")]
+ ReadEof,
+
+ /// An unrecognized event code was found in a `uffd_msg` struct.
+ #[error("Unrecognized event in uffd_msg: {0}")]
+ UnrecognizedEvent(u8),
+
+ /// An unrecognized ioctl bit was set in the result of API initialization or registration.
+ #[error("Unrecognized ioctl flags: {0}")]
+ UnrecognizedIoctls(u64),
+
+ /// Requested ioctls were not available when initializing the API.
+ #[error("Requested ioctls unsupported; supported: {0:?}")]
+ UnsupportedIoctls(IoctlFlags),
+
+ /// Zeropage ioctl failure with `errno` value.
+ #[error("Zeropage failed: {0}")]
+ ZeropageFailed(Errno),
+}
+
+impl From<nix::Error> for Error {
+ fn from(e: nix::Error) -> Error {
+ Error::SystemError(e)
+ }
+}
diff --git a/src/event.rs b/src/event.rs
new file mode 100644
index 0000000..8ea75f2
--- /dev/null
+++ b/src/event.rs
@@ -0,0 +1,148 @@
+use crate::error::{Error, Result};
+use crate::raw;
+use crate::Uffd;
+use libc::c_void;
+#[cfg(feature = "linux4_14")]
+use nix::unistd::Pid;
+use std::os::unix::io::{FromRawFd, RawFd};
+
+/// Whether a page fault event was for a read or write.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum ReadWrite {
+ Read,
+ Write,
+}
+
+/// The kind of fault for a page fault event.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum FaultKind {
+ /// The fault was a read or write on a missing page.
+ Missing,
+ /// The fault was a write on a write-protected page.
+ #[cfg(feature = "linux5_7")]
+ WriteProtected,
+}
+
+/// Events from the userfaultfd object that are read by `Uffd::read_event()`.
+#[derive(Debug)]
+pub enum Event {
+ /// A pagefault event.
+ Pagefault {
+ /// The kind of fault.
+ kind: FaultKind,
+ /// Whether the fault is on a read or a write.
+ rw: ReadWrite,
+ /// The address that triggered the fault.
+ addr: *mut c_void,
+ /// The thread that triggered the fault, if [`FeatureFlags::THREAD_ID`] is enabled.
+ ///
+ /// If the thread ID feature is not enabled, the value of this field is undefined. It would
+ /// not be undefined behavior to use it, strictly speaking, but the [`Pid`] will not
+ /// necessarily point to a real thread.
+ ///
+ /// This requires this crate to be compiled with the `linux4_14` feature.
+ #[cfg(feature = "linux4_14")]
+ thread_id: Pid,
+ },
+ /// Generated when the faulting process invokes `fork(2)` (or `clone(2)` without the `CLONE_VM`
+ /// flag).
+ Fork {
+ /// The `Uffd` object created for the child by `fork(2)`
+ uffd: Uffd,
+ },
+ /// Generated when the faulting process invokes `mremap(2)`.
+ Remap {
+ /// The original address of the memory range that was remapped.
+ from: *mut c_void,
+ /// The new address of the memory range that was remapped.
+ to: *mut c_void,
+ /// The original length of the memory range that was remapped.
+ len: usize,
+ },
+ /// Generated when the faulting process invokes `madvise(2)` with `MADV_DONTNEED` or
+ /// `MADV_REMOVE` advice.
+ Remove {
+ /// The start address of the memory range that was freed.
+ start: *mut c_void,
+ /// The end address of the memory range that was freed.
+ end: *mut c_void,
+ },
+ /// Generated when the faulting process unmaps a meomry range, either explicitly using
+ /// `munmap(2)` or implicitly during `mmap(2)` or `mremap(2)`.
+ Unmap {
+ /// The start address of the memory range that was unmapped.
+ start: *mut c_void,
+ /// The end address of the memory range that was unmapped.
+ end: *mut c_void,
+ },
+}
+
+impl Event {
+ pub(crate) fn from_uffd_msg(msg: &raw::uffd_msg) -> Result<Event> {
+ match msg.event {
+ raw::UFFD_EVENT_PAGEFAULT => {
+ let pagefault = unsafe { msg.arg.pagefault };
+ cfg_if::cfg_if!(
+ if #[cfg(feature = "linux5_7")] {
+ let kind = if pagefault.flags & raw::UFFD_PAGEFAULT_FLAG_WP != 0 {
+ FaultKind::WriteProtected
+ } else {
+ FaultKind::Missing
+ };
+ } else {
+ let kind = FaultKind::Missing;
+ }
+ );
+
+ let rw = if pagefault.flags & raw::UFFD_PAGEFAULT_FLAG_WRITE == 0 {
+ ReadWrite::Read
+ } else {
+ ReadWrite::Write
+ };
+ // Converting the ptid to i32 is safe because the maximum pid in
+ // Linux is 2^22, which is about 4 million.
+ //
+ // Reference:
+ // https://github.com/torvalds/linux/blob/2d338201d5311bcd79d42f66df4cecbcbc5f4f2c/include/linux/threads.h
+ #[cfg(feature = "linux4_14")]
+ let thread_id = Pid::from_raw(unsafe { pagefault.feat.ptid } as i32);
+ Ok(Event::Pagefault {
+ kind,
+ rw,
+ addr: pagefault.address as *mut c_void,
+ #[cfg(feature = "linux4_14")]
+ thread_id,
+ })
+ }
+ raw::UFFD_EVENT_FORK => {
+ let fork = unsafe { msg.arg.fork };
+ Ok(Event::Fork {
+ uffd: unsafe { Uffd::from_raw_fd(fork.ufd as RawFd) },
+ })
+ }
+ raw::UFFD_EVENT_REMAP => {
+ let remap = unsafe { msg.arg.remap };
+ Ok(Event::Remap {
+ from: remap.from as *mut c_void,
+ to: remap.to as *mut c_void,
+ len: remap.len as usize,
+ })
+ }
+ raw::UFFD_EVENT_REMOVE => {
+ let remove = unsafe { msg.arg.remove };
+ Ok(Event::Remove {
+ start: remove.start as *mut c_void,
+ end: remove.end as *mut c_void,
+ })
+ }
+ raw::UFFD_EVENT_UNMAP => {
+ let remove = unsafe { msg.arg.remove };
+ Ok(Event::Unmap {
+ start: remove.start as *mut c_void,
+ end: remove.end as *mut c_void,
+ })
+ }
+ _ => Err(Error::UnrecognizedEvent(msg.event)),
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..9a7641b
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,664 @@
+//! A Linux mechanism for handling page faults in user space.
+//!
+//! The main way to interact with this library is to create a `Uffd` object with a `UffdBuilder`,
+//! then use the methods of `Uffd` from a worker thread.
+//!
+//! See [`userfaultfd(2)`](http://man7.org/linux/man-pages/man2/userfaultfd.2.html) and
+//! [`ioctl_userfaultfd(2)`](http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html) for more
+//! details.
+
+mod builder;
+mod error;
+mod event;
+mod raw;
+
+pub use crate::builder::{FeatureFlags, UffdBuilder};
+pub use crate::error::{Error, Result};
+pub use crate::event::{Event, FaultKind, ReadWrite};
+
+use bitflags::bitflags;
+use libc::{self, c_void};
+use nix::errno::Errno;
+use nix::unistd::read;
+use std::mem;
+use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
+
+/// Represents an opaque buffer where userfaultfd events are stored.
+///
+/// This is used in conjunction with [`Uffd::read_events`].
+pub struct EventBuffer(Vec<raw::uffd_msg>);
+
+impl EventBuffer {
+ /// Creates a new buffer for `size` number of events.
+ ///
+ /// [`Uffd::read_events`] will read up to this many events at a time.
+ pub fn new(size: usize) -> Self {
+ Self(vec![unsafe { mem::zeroed() }; size])
+ }
+}
+
+/// The userfaultfd object.
+///
+/// The userspace representation of the object is a file descriptor, so this type implements
+/// `AsRawFd`, `FromRawFd`, and `IntoRawFd`. These methods should be used with caution, but can be
+/// essential for using functions like `poll` on a worker thread.
+#[derive(Debug)]
+pub struct Uffd {
+ fd: RawFd,
+}
+
+impl Drop for Uffd {
+ fn drop(&mut self) {
+ unsafe { libc::close(self.fd) };
+ }
+}
+
+impl AsRawFd for Uffd {
+ fn as_raw_fd(&self) -> RawFd {
+ self.fd
+ }
+}
+
+impl IntoRawFd for Uffd {
+ fn into_raw_fd(self) -> RawFd {
+ self.fd
+ }
+}
+
+impl FromRawFd for Uffd {
+ unsafe fn from_raw_fd(fd: RawFd) -> Self {
+ Uffd { fd }
+ }
+}
+
+bitflags! {
+ /// The registration mode used when registering an address range with `Uffd`.
+ pub struct RegisterMode: u64 {
+ /// Registers the range for missing page faults.
+ const MISSING = raw::UFFDIO_REGISTER_MODE_MISSING;
+ /// Registers the range for write faults.
+ #[cfg(feature = "linux5_7")]
+ const WRITE_PROTECT = raw::UFFDIO_REGISTER_MODE_WP;
+ }
+}
+
+impl Uffd {
+ /// Register a memory address range with the userfaultfd object, and returns the `IoctlFlags`
+ /// that are available for the selected range.
+ ///
+ /// This method only registers the given range for missing page faults.
+ pub fn register(&self, start: *mut c_void, len: usize) -> Result<IoctlFlags> {
+ self.register_with_mode(start, len, RegisterMode::MISSING)
+ }
+
+ /// Register a memory address range with the userfaultfd object for the given mode and
+ /// returns the `IoctlFlags` that are available for the selected range.
+ pub fn register_with_mode(
+ &self,
+ start: *mut c_void,
+ len: usize,
+ mode: RegisterMode,
+ ) -> Result<IoctlFlags> {
+ let mut register = raw::uffdio_register {
+ range: raw::uffdio_range {
+ start: start as u64,
+ len: len as u64,
+ },
+ mode: mode.bits(),
+ ioctls: 0,
+ };
+ unsafe {
+ raw::register(self.as_raw_fd(), &mut register as *mut raw::uffdio_register)?;
+ }
+ IoctlFlags::from_bits(register.ioctls).ok_or(Error::UnrecognizedIoctls(register.ioctls))
+ }
+
+ /// Unregister a memory address range from the userfaultfd object.
+ pub fn unregister(&self, start: *mut c_void, len: usize) -> Result<()> {
+ let mut range = raw::uffdio_range {
+ start: start as u64,
+ len: len as u64,
+ };
+ unsafe {
+ raw::unregister(self.as_raw_fd(), &mut range as *mut raw::uffdio_range)?;
+ }
+ Ok(())
+ }
+
+ /// Atomically copy a continuous memory chunk into the userfaultfd-registered range, and return
+ /// the number of bytes that were successfully copied.
+ ///
+ /// If `wake` is `true`, wake up the thread waiting for page fault resolution on the memory
+ /// range.
+ pub unsafe fn copy(
+ &self,
+ src: *const c_void,
+ dst: *mut c_void,
+ len: usize,
+ wake: bool,
+ ) -> Result<usize> {
+ let mut copy = raw::uffdio_copy {
+ src: src as u64,
+ dst: dst as u64,
+ len: len as u64,
+ mode: if wake {
+ 0
+ } else {
+ raw::UFFDIO_COPY_MODE_DONTWAKE
+ },
+ copy: 0,
+ };
+
+ let _ =
+ raw::copy(self.as_raw_fd(), &mut copy as *mut raw::uffdio_copy).map_err(|errno| {
+ match errno {
+ Errno::EAGAIN => Error::PartiallyCopied(copy.copy as usize),
+ _ => Error::CopyFailed(errno),
+ }
+ })?;
+ if copy.copy < 0 {
+ // shouldn't ever get here, as errno should be caught above
+ Err(Error::CopyFailed(Errno::from_i32(-copy.copy as i32)))
+ } else {
+ Ok(copy.copy as usize)
+ }
+ }
+
+ /// Zero out a memory address range registered with userfaultfd, and return the number of bytes
+ /// that were successfully zeroed.
+ ///
+ /// If `wake` is `true`, wake up the thread waiting for page fault resolution on the memory
+ /// address range.
+ pub unsafe fn zeropage(&self, start: *mut c_void, len: usize, wake: bool) -> Result<usize> {
+ let mut zeropage = raw::uffdio_zeropage {
+ range: raw::uffdio_range {
+ start: start as u64,
+ len: len as u64,
+ },
+ mode: if wake {
+ 0
+ } else {
+ raw::UFFDIO_ZEROPAGE_MODE_DONTWAKE
+ },
+ zeropage: 0,
+ };
+
+ let _ = raw::zeropage(self.as_raw_fd(), &mut zeropage as &mut raw::uffdio_zeropage)
+ .map_err(Error::ZeropageFailed)?;
+ if zeropage.zeropage < 0 {
+ // shouldn't ever get here, as errno should be caught above
+ Err(Error::ZeropageFailed(Errno::from_i32(
+ -zeropage.zeropage as i32,
+ )))
+ } else {
+ Ok(zeropage.zeropage as usize)
+ }
+ }
+
+ /// Wake up the thread waiting for page fault resolution on the specified memory address range.
+ pub fn wake(&self, start: *mut c_void, len: usize) -> Result<()> {
+ let mut range = raw::uffdio_range {
+ start: start as u64,
+ len: len as u64,
+ };
+ unsafe {
+ raw::wake(self.as_raw_fd(), &mut range as *mut raw::uffdio_range)?;
+ }
+ Ok(())
+ }
+
+ /// Makes a range write-protected.
+ #[cfg(feature = "linux5_7")]
+ pub fn write_protect(&self, start: *mut c_void, len: usize) -> Result<()> {
+ let mut ioctl = raw::uffdio_writeprotect {
+ range: raw::uffdio_range {
+ start: start as u64,
+ len: len as u64,
+ },
+ mode: raw::UFFDIO_WRITEPROTECT_MODE_WP,
+ };
+
+ unsafe {
+ raw::write_protect(
+ self.as_raw_fd(),
+ &mut ioctl as *mut raw::uffdio_writeprotect,
+ )?;
+ }
+
+ Ok(())
+ }
+
+ /// Removes the write-protection for a range.
+ ///
+ /// If `wake` is `true`, wake up the thread waiting for page fault resolution on the memory
+ /// address range.
+ #[cfg(feature = "linux5_7")]
+ pub fn remove_write_protection(
+ &self,
+ start: *mut c_void,
+ len: usize,
+ wake: bool,
+ ) -> Result<()> {
+ let mut ioctl = raw::uffdio_writeprotect {
+ range: raw::uffdio_range {
+ start: start as u64,
+ len: len as u64,
+ },
+ mode: if wake {
+ 0
+ } else {
+ raw::UFFDIO_WRITEPROTECT_MODE_DONTWAKE
+ },
+ };
+
+ unsafe {
+ raw::write_protect(
+ self.as_raw_fd(),
+ &mut ioctl as *mut raw::uffdio_writeprotect,
+ )?;
+ }
+
+ Ok(())
+ }
+
+ /// Read an `Event` from the userfaultfd object.
+ ///
+ /// If the `Uffd` object was created with `non_blocking` set to `false`, this will block until
+ /// an event is successfully read (returning `Some(event)`, or an error is returned.
+ ///
+ /// If `non_blocking` was `true`, this will immediately return `None` if no event is ready to
+ /// read.
+ ///
+ /// Note that while this method doesn't require a mutable reference to the `Uffd` object, it
+ /// does consume bytes (thread-safely) from the underlying file descriptor.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// # use userfaultfd::{Uffd, Result};
+ /// fn read_event(uffd: &Uffd) -> Result<()> {
+ /// // Read a single event
+ /// match uffd.read_event()? {
+ /// Some(e) => {
+ /// // Do something with the event
+ /// },
+ /// None => {
+ /// // This was a non-blocking read and the descriptor was not ready for read
+ /// },
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn read_event(&self) -> Result<Option<Event>> {
+ let mut buf = [unsafe { std::mem::zeroed() }; 1];
+ let mut iter = self.read(&mut buf)?;
+ let event = iter.next().transpose()?;
+ assert!(iter.next().is_none());
+ Ok(event)
+ }
+
+ /// Read multiple events from the userfaultfd object using the given event buffer.
+ ///
+ /// If the `Uffd` object was created with `non_blocking` set to `false`, this will block until
+ /// an event is successfully read or an error is returned.
+ ///
+ /// If `non_blocking` was `true`, this will immediately return an empty iterator if the file
+ /// descriptor is not ready for reading.
+ ///
+ /// # Examples
+ ///
+ /// ```rust
+ /// # use userfaultfd::{Uffd, EventBuffer};
+ /// fn read_events(uffd: &Uffd) -> userfaultfd::Result<()> {
+ /// // Read up to 100 events at a time
+ /// let mut buf = EventBuffer::new(100);
+ /// for event in uffd.read_events(&mut buf)? {
+ /// let event = event?;
+ /// // Do something with the event...
+ /// }
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn read_events<'a>(
+ &self,
+ buf: &'a mut EventBuffer,
+ ) -> Result<impl Iterator<Item = Result<Event>> + 'a> {
+ self.read(&mut buf.0)
+ }
+
+ fn read<'a>(
+ &self,
+ msgs: &'a mut [raw::uffd_msg],
+ ) -> Result<impl Iterator<Item = Result<Event>> + 'a> {
+ const MSG_SIZE: usize = std::mem::size_of::<raw::uffd_msg>();
+
+ let buf = unsafe {
+ std::slice::from_raw_parts_mut(msgs.as_mut_ptr() as _, msgs.len() * MSG_SIZE)
+ };
+
+ let count = match read(self.as_raw_fd(), buf) {
+ Err(e) if e == Errno::EAGAIN => 0,
+ Err(e) => return Err(Error::SystemError(e)),
+ Ok(0) => return Err(Error::ReadEof),
+ Ok(bytes_read) => {
+ let remainder = bytes_read % MSG_SIZE;
+ if remainder != 0 {
+ return Err(Error::IncompleteMsg {
+ read: remainder,
+ expected: MSG_SIZE,
+ });
+ }
+
+ bytes_read / MSG_SIZE
+ }
+ };
+
+ Ok(msgs.iter().take(count).map(|msg| Event::from_uffd_msg(msg)))
+ }
+}
+
+bitflags! {
+ /// Used with `UffdBuilder` and `Uffd::register()` to determine which operations are available.
+ pub struct IoctlFlags: u64 {
+ const REGISTER = 1 << raw::_UFFDIO_REGISTER;
+ const UNREGISTER = 1 << raw::_UFFDIO_UNREGISTER;
+ const WAKE = 1 << raw::_UFFDIO_WAKE;
+ const COPY = 1 << raw::_UFFDIO_COPY;
+ const ZEROPAGE = 1 << raw::_UFFDIO_ZEROPAGE;
+ #[cfg(feature = "linux5_7")]
+ const WRITE_PROTECT = 1 << raw::_UFFDIO_WRITEPROTECT;
+ const API = 1 << raw::_UFFDIO_API;
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::ptr;
+ use std::thread;
+
+ #[test]
+ fn test_read_event() -> Result<()> {
+ const PAGE_SIZE: usize = 4096;
+
+ unsafe {
+ let uffd = UffdBuilder::new().close_on_exec(true).create()?;
+
+ let mapping = libc::mmap(
+ ptr::null_mut(),
+ PAGE_SIZE,
+ libc::PROT_READ | libc::PROT_WRITE,
+ libc::MAP_PRIVATE | libc::MAP_ANON,
+ -1,
+ 0,
+ );
+
+ assert!(!mapping.is_null());
+
+ uffd.register(mapping, PAGE_SIZE)?;
+
+ let ptr = mapping as usize;
+ let thread = thread::spawn(move || {
+ let ptr = ptr as *mut u8;
+ *ptr = 1;
+ });
+
+ match uffd.read_event()? {
+ Some(Event::Pagefault {
+ rw: ReadWrite::Write,
+ addr,
+ ..
+ }) => {
+ assert_eq!(addr, mapping);
+ uffd.zeropage(addr, PAGE_SIZE, true)?;
+ }
+ _ => panic!("unexpected event"),
+ }
+
+ thread.join().expect("failed to join thread");
+
+ uffd.unregister(mapping, PAGE_SIZE)?;
+
+ assert_eq!(libc::munmap(mapping, PAGE_SIZE), 0);
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_nonblocking_read_event() -> Result<()> {
+ const PAGE_SIZE: usize = 4096;
+
+ unsafe {
+ let uffd = UffdBuilder::new()
+ .close_on_exec(true)
+ .non_blocking(true)
+ .create()?;
+
+ let mapping = libc::mmap(
+ ptr::null_mut(),
+ PAGE_SIZE,
+ libc::PROT_READ | libc::PROT_WRITE,
+ libc::MAP_PRIVATE | libc::MAP_ANON,
+ -1,
+ 0,
+ );
+
+ assert!(!mapping.is_null());
+
+ uffd.register(mapping, PAGE_SIZE)?;
+
+ assert!(uffd.read_event()?.is_none());
+
+ let ptr = mapping as usize;
+ let thread = thread::spawn(move || {
+ let ptr = ptr as *mut u8;
+ *ptr = 1;
+ });
+
+ loop {
+ match uffd.read_event()? {
+ Some(Event::Pagefault {
+ rw: ReadWrite::Write,
+ addr,
+ ..
+ }) => {
+ assert_eq!(addr, mapping);
+ uffd.zeropage(addr, PAGE_SIZE, true)?;
+ break;
+ }
+ Some(_) => panic!("unexpected event"),
+ None => thread::sleep(std::time::Duration::from_millis(50)),
+ }
+ }
+
+ thread.join().expect("failed to join thread");
+
+ uffd.unregister(mapping, PAGE_SIZE)?;
+
+ assert_eq!(libc::munmap(mapping, PAGE_SIZE), 0);
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_read_events() -> Result<()> {
+ unsafe {
+ const MAX_THREADS: usize = 5;
+ const PAGE_SIZE: usize = 4096;
+ const MEM_SIZE: usize = PAGE_SIZE * MAX_THREADS;
+
+ let uffd = UffdBuilder::new().close_on_exec(true).create()?;
+
+ let mapping = libc::mmap(
+ ptr::null_mut(),
+ MEM_SIZE,
+ libc::PROT_READ | libc::PROT_WRITE,
+ libc::MAP_PRIVATE | libc::MAP_ANON,
+ -1,
+ 0,
+ );
+
+ assert!(!mapping.is_null());
+
+ uffd.register(mapping, MEM_SIZE)?;
+
+ // As accessing the memory will suspend each thread with a page fault event,
+ // there is no way to signal that the operations the test thread is waiting on to
+ // complete have been performed.
+ //
+ // Therefore, this is inherently racy. The best we can do is simply sleep-wait for
+ // all threads to have signaled that the operation is *about to be performed*.
+ let mut seen = [false; MAX_THREADS];
+ let mut threads = Vec::new();
+ for i in 0..MAX_THREADS {
+ let seen = &mut seen[i] as *mut _ as usize;
+ let ptr = (mapping as *mut u8).add(PAGE_SIZE * i) as usize;
+ threads.push(thread::spawn(move || {
+ let seen = seen as *mut bool;
+ let ptr = ptr as *mut u8;
+ *seen = true;
+ *ptr = 1;
+ }));
+ }
+
+ loop {
+ // Sleep even if all threads have "signaled", just in case any
+ // thread is preempted prior to faulting the memory access.
+ // Still, there's no guarantee that the call to `read_events` below will
+ // read all the events at once, but this should be "good enough".
+ let done = seen.iter().all(|b| *b);
+ thread::sleep(std::time::Duration::from_millis(50));
+ if done {
+ break;
+ }
+ }
+
+ // Read all the events at once
+ let mut buf = EventBuffer::new(MAX_THREADS);
+ let mut iter = uffd.read_events(&mut buf)?;
+
+ let mut seen = [false; MAX_THREADS];
+ for _ in 0..MAX_THREADS {
+ match iter
+ .next()
+ .transpose()?
+ .expect("failed to read all events; potential race condition was hit")
+ {
+ Event::Pagefault {
+ rw: ReadWrite::Write,
+ addr,
+ ..
+ } => {
+ let index = (addr as usize - mapping as usize) / PAGE_SIZE;
+ assert_eq!(seen[index], false);
+ seen[index] = true;
+ uffd.zeropage(addr, PAGE_SIZE, true)?;
+ }
+ _ => panic!("unexpected event"),
+ }
+ }
+
+ assert!(seen.iter().all(|b| *b));
+
+ for thread in threads {
+ thread.join().expect("failed to join thread");
+ }
+
+ uffd.unregister(mapping, MEM_SIZE)?;
+
+ assert_eq!(libc::munmap(mapping, MEM_SIZE), 0);
+ }
+
+ Ok(())
+ }
+
+ #[cfg(feature = "linux5_7")]
+ #[test]
+ fn test_write_protect() -> Result<()> {
+ const PAGE_SIZE: usize = 4096;
+
+ unsafe {
+ let uffd = UffdBuilder::new()
+ .require_features(FeatureFlags::PAGEFAULT_FLAG_WP)
+ .close_on_exec(true)
+ .create()?;
+
+ let mapping = libc::mmap(
+ ptr::null_mut(),
+ PAGE_SIZE,
+ libc::PROT_READ | libc::PROT_WRITE,
+ libc::MAP_PRIVATE | libc::MAP_ANON,
+ -1,
+ 0,
+ );
+
+ assert!(!mapping.is_null());
+
+ // This test uses both missing and write-protect modes for a reason.
+ // The `uffdio_writeprotect` ioctl can only be used on a range *after*
+ // the missing fault is handled, it seems. This means we either need to
+ // read/write the page *before* we protect it or handle the missing
+ // page fault by changing the protection level *after* we zero the page.
+ assert!(uffd
+ .register_with_mode(
+ mapping,
+ PAGE_SIZE,
+ RegisterMode::MISSING | RegisterMode::WRITE_PROTECT
+ )?
+ .contains(IoctlFlags::WRITE_PROTECT));
+
+ let ptr = mapping as usize;
+ let thread = thread::spawn(move || {
+ let ptr = ptr as *mut u8;
+ *ptr = 1;
+ *ptr = 2;
+ });
+
+ loop {
+ match uffd.read_event()? {
+ Some(Event::Pagefault {
+ kind,
+ rw: ReadWrite::Write,
+ addr,
+ ..
+ }) => match kind {
+ FaultKind::WriteProtected => {
+ assert_eq!(addr, mapping);
+ assert_eq!(*(addr as *const u8), 0);
+ // Remove the protection and wake the page
+ uffd.remove_write_protection(mapping, PAGE_SIZE, true)?;
+ break;
+ }
+ FaultKind::Missing => {
+ assert_eq!(addr, mapping);
+ uffd.zeropage(mapping, PAGE_SIZE, false)?;
+
+ // Technically, we already know it was a write that triggered
+ // the missing page fault, so there's little point in immediately
+ // write-protecting the page to cause another fault; in the real
+ // world, a missing fault with `rw` being `ReadWrite::Write` would
+ // be enough to mark the page as "dirty". For this test, however,
+ // we do it this way to ensure a write-protected fault is read.
+ assert_eq!(*(addr as *const u8), 0);
+ uffd.write_protect(mapping, PAGE_SIZE)?;
+ uffd.wake(mapping, PAGE_SIZE)?;
+ }
+ },
+ _ => panic!("unexpected event"),
+ }
+ }
+
+ thread.join().expect("failed to join thread");
+
+ assert_eq!(*(mapping as *const u8), 2);
+
+ uffd.unregister(mapping, PAGE_SIZE)?;
+
+ assert_eq!(libc::munmap(mapping, PAGE_SIZE), 0);
+ }
+
+ Ok(())
+ }
+}
diff --git a/src/raw.rs b/src/raw.rs
new file mode 100644
index 0000000..332c459
--- /dev/null
+++ b/src/raw.rs
@@ -0,0 +1,25 @@
+use libc::{c_int, c_long, syscall, SYS_userfaultfd, INT_MAX};
+pub use userfaultfd_sys::*;
+
+pub unsafe fn userfaultfd(flags: c_int) -> c_int {
+ let fd = syscall(SYS_userfaultfd, flags as c_long);
+ if fd > INT_MAX as c_long {
+ panic!("fd doesn't fit in a c_int");
+ } else {
+ fd as c_int
+ }
+}
+
+nix::ioctl_readwrite!(api, UFFDIO, _UFFDIO_API, uffdio_api);
+nix::ioctl_readwrite!(register, UFFDIO, _UFFDIO_REGISTER, uffdio_register);
+nix::ioctl_read!(unregister, UFFDIO, _UFFDIO_UNREGISTER, uffdio_range);
+nix::ioctl_read!(wake, UFFDIO, _UFFDIO_WAKE, uffdio_range);
+nix::ioctl_readwrite!(copy, UFFDIO, _UFFDIO_COPY, uffdio_copy);
+nix::ioctl_readwrite!(zeropage, UFFDIO, _UFFDIO_ZEROPAGE, uffdio_zeropage);
+#[cfg(feature = "linux5_7")]
+nix::ioctl_readwrite!(
+ write_protect,
+ UFFDIO,
+ _UFFDIO_WRITEPROTECT,
+ uffdio_writeprotect
+);
diff --git a/tests/manpage_example.rs b/tests/manpage_example.rs
new file mode 100644
index 0000000..49a061f
--- /dev/null
+++ b/tests/manpage_example.rs
@@ -0,0 +1,8 @@
+#[test]
+fn run_manpage_example() {
+ let output = std::process::Command::new("cargo")
+ .args(&["run", "--example", "manpage", "--", "3"])
+ .output()
+ .expect("manpage example failed to start");
+ assert!(output.status.success(), "manpage example succeeded");
+}