From 99912074b61175871348de10f21426578b3b3447 Mon Sep 17 00:00:00 2001 From: Elie Kheirallah Date: Mon, 14 Nov 2022 22:26:46 +0000 Subject: Initial import of userfaultfd-0.5.0 to AOSP Bug: 258476625 Test: N/A Change-Id: Ie8184f2fb756cb5fe8f22450805bbebbbf8faf1d --- CHANGELOG.md | 36 +++ Cargo.toml | 43 +++ Cargo.toml.orig | 22 ++ LICENSE | 1 + LICENSE-APACHE | 176 +++++++++++++ LICENSE-MIT | 23 ++ METADATA | 20 ++ MODULE_LICENSE_APACHE2 | 0 OWNERS | 5 + README.md | 11 + SECURITY.md | 9 + examples/manpage.rs | 136 ++++++++++ src/builder.rs | 162 ++++++++++++ src/error.rs | 56 ++++ src/event.rs | 148 +++++++++++ src/lib.rs | 664 +++++++++++++++++++++++++++++++++++++++++++++++ src/raw.rs | 25 ++ tests/manpage_example.rs | 8 + 18 files changed, 1545 insertions(+) create mode 100755 CHANGELOG.md create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 120000 LICENSE create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 METADATA create mode 100644 MODULE_LICENSE_APACHE2 create mode 100644 OWNERS create mode 100644 README.md create mode 100644 SECURITY.md create mode 100644 examples/manpage.rs create mode 100644 src/builder.rs create mode 100644 src/error.rs create mode 100644 src/event.rs create mode 100644 src/lib.rs create mode 100644 src/raw.rs create mode 100644 tests/manpage_example.rs diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100755 index 0000000..b2621e9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,36 @@ +### Unreleased + +- Added `Uffd::read_events` that can read multiple events from the userfaultfd file descriptor. + +### 0.3.1 (2021-02-17) + +- Added support for the `UFFD_FEATURE_THREAD_ID` flag when compiled with the `linux4_14` Cargo + feature. + +### 0.3.0 (2021-02-03) + +- Update `bindgen` dependency of `userfaultfd-sys` to `0.57`. Thank you @jgowans + +### 0.2.1 (2020-11-20) + +- Make `ReadWrite` public. Thank you @electroCutie + +### 0.2.0 (2020-04-10) + +- Removed the compile-time Linux version check, and replaced it with a Cargo feature. + + The Linux version check was overly restrictive, even on systems that did have the right kernel + version installed but had older headers in `/usr/include/linux`. Beyond that, this check made it + more difficult to compile on a different host than what's targeted. + + There is now a `linux4_14` feature flag on `userfaultfd-sys`, which turns on and tests the extra + constants available in that version. Since `userfaultfd` did not make use of any of those newer + features, it doesn't have a feature flag yet. + + Applications should take care when initializing with `UffdBuilder` to specify the features and + ioctls they require, so that an unsupported version will be detected at runtime. + + +### 0.1.0 (2020-04-07) + +- Initial public release of userfaultfd-rs. diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a188797 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,43 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +name = "userfaultfd" +version = "0.5.0" +authors = ["Adam C. Foltzer "] +edition = "2018" +license = "MIT OR Apache-2.0" +description = "Rust bindings for the Linux userfaultfd functionality" +repository = "https://github.com/fastly/userfaultfd-rs" +readme = "README.md" + +[dependencies.bitflags] +version = "1.0" + +[dependencies.cfg-if] +version = "^1.0.0" + +[dependencies.libc] +version = "0.2.65" + +[dependencies.nix] +version = "0.23" + +[dependencies.thiserror] +version = "1.0.4" + +[dependencies.userfaultfd-sys] +version = "^0.4.0" + +[features] +default = [] +linux4_14 = ["userfaultfd-sys/linux4_14"] +linux5_7 = ["userfaultfd-sys/linux5_7"] diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..153efe6 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,22 @@ +[package] +name = "userfaultfd" +version = "0.5.0" +authors = ["Adam C. Foltzer "] +edition = "2018" +license = "MIT OR Apache-2.0" +description = "Rust bindings for the Linux userfaultfd functionality" +repository = "https://github.com/fastly/userfaultfd-rs" +readme = "README.md" + +[dependencies] +bitflags = "1.0" +cfg-if = "^1.0.0" +libc = "0.2.65" +nix = "0.23" +thiserror = "1.0.4" +userfaultfd-sys = { path = "userfaultfd-sys", version = "^0.4.0" } + +[features] +default = [] +linux4_14 = ["userfaultfd-sys/linux4_14"] +linux5_7 = ["userfaultfd-sys/linux5_7"] diff --git a/LICENSE b/LICENSE new file mode 120000 index 0000000..6b579aa --- /dev/null +++ b/LICENSE @@ -0,0 +1 @@ +LICENSE-APACHE \ No newline at end of file diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..1b5ec8b --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/METADATA b/METADATA new file mode 100644 index 0000000..9ffa4db --- /dev/null +++ b/METADATA @@ -0,0 +1,20 @@ +name: "userfaultfd" +description: "Rust bindings for the Linux userfaultfd functionality" +third_party { + url { + type: HOMEPAGE + value: "https://crates.io/crates/userfaultfd" + } + url { + type: ARCHIVE + value: "https://static.crates.io/crates/userfaultfd/userfaultfd-0.5.0.crate" + } + version: "0.5.0" + # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same. + license_type: NOTICE + last_upgrade_date { + year: 2022 + month: 11 + day: 16 + } +} diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2 new file mode 100644 index 0000000..e69de29 diff --git a/OWNERS b/OWNERS new file mode 100644 index 0000000..3abd431 --- /dev/null +++ b/OWNERS @@ -0,0 +1,5 @@ +include platform/prebuilts/rust:master:/OWNERS +devinmoore@google.com +fmayle@google.com +khei@google.com +smoreland@google.com diff --git a/README.md b/README.md new file mode 100644 index 0000000..d485743 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +![Build](https://github.com/fastly/userfaultfd-rs/workflows/Rust/badge.svg) + +# Userfaultfd-rs +Rust bindings for Linux's userfaultfd functionality. + +## License + +This software is distributed under the terms of both the MIT license and the Apache License (Version 2.0). + +See [LICENSE-APACHE](LICENSE-APACHE) and [LICENSE-MIT](LICENSE-MIT). + diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..555ec87 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,9 @@ +## Report a security issue + +Fastly welcomes security reports and is committed to providing prompt attention to security issues. Security issues should be reported privately via [Fastly’s security issue reporting process](https://www.fastly.com/security/report-security-issue). + +## Security advisories + +Remediation of security vulnerabilities is prioritized. The project team endeavors to coordinate remediation with third-party stakeholders, and is committed to transparency in the disclosure process. The team announces security issues via release notes as well as the [RustSec advisory database](https://github.com/RustSec/advisory-db) (i.e. `cargo-audit`) on a best-effort basis. + +Note that communications related to security issues in Fastly-maintained OSS as described here are distinct from [Fastly Security Advisories](https://www.fastly.com/security-advisories). diff --git a/examples/manpage.rs b/examples/manpage.rs new file mode 100644 index 0000000..f8d6848 --- /dev/null +++ b/examples/manpage.rs @@ -0,0 +1,136 @@ +//! Port of the example from the `userfaultfd` manpage. +use libc::{self, c_void}; +use nix::poll::{poll, PollFd, PollFlags}; +use nix::sys::mman::{mmap, MapFlags, ProtFlags}; +use nix::unistd::{sysconf, SysconfVar}; +use std::env; +use std::os::unix::io::AsRawFd; +use std::ptr; +use userfaultfd::{Event, Uffd, UffdBuilder}; + +fn fault_handler_thread(uffd: Uffd) { + let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as usize; + + // Create a page that will be copied into the faulting region + + let page = unsafe { + mmap( + ptr::null_mut(), + page_size, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS, + -1, + 0, + ) + .expect("mmap") + }; + + // Loop, handling incoming events on the userfaultfd file descriptor + + let mut fault_cnt = 0; + loop { + // See what poll() tells us about the userfaultfd + + let pollfd = PollFd::new(uffd.as_raw_fd(), PollFlags::POLLIN); + let nready = poll(&mut [pollfd], -1).expect("poll"); + + println!("\nfault_handler_thread():"); + let revents = pollfd.revents().unwrap(); + println!( + " poll() returns: nready = {}; POLLIN = {}; POLLERR = {}", + nready, + revents.contains(PollFlags::POLLIN), + revents.contains(PollFlags::POLLERR), + ); + + // Read an event from the userfaultfd + let event = uffd + .read_event() + .expect("read uffd_msg") + .expect("uffd_msg ready"); + + // We expect only one kind of event; verify that assumption + + if let Event::Pagefault { addr, .. } = event { + // Display info about the page-fault event + + println!(" UFFD_EVENT_PAGEFAULT event: {:?}", event); + + // Copy the page pointed to by 'page' into the faulting region. Vary the contents that are + // copied in, so that it is more obvious that each fault is handled separately. + + for c in unsafe { std::slice::from_raw_parts_mut(page as *mut u8, page_size) } { + *c = b'A' + fault_cnt % 20; + } + fault_cnt += 1; + + let dst = (addr as usize & !(page_size as usize - 1)) as *mut c_void; + let copy = unsafe { uffd.copy(page, dst, page_size, true).expect("uffd copy") }; + + println!(" (uffdio_copy.copy returned {})", copy); + } else { + panic!("Unexpected event on userfaultfd"); + } + } +} + +fn main() { + let num_pages = env::args() + .nth(1) + .expect("Usage: manpage ") + .parse::() + .unwrap(); + + let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as usize; + let len = num_pages * page_size; + + // Create and enable userfaultfd object + + let uffd = UffdBuilder::new() + .close_on_exec(true) + .non_blocking(true) + .user_mode_only(true) + .create() + .expect("uffd creation"); + + // Create a private anonymous mapping. The memory will be demand-zero paged--that is, not yet + // allocated. When we actually touch the memory, it will be allocated via the userfaultfd. + + let addr = unsafe { + mmap( + ptr::null_mut(), + len, + ProtFlags::PROT_READ | ProtFlags::PROT_WRITE, + MapFlags::MAP_PRIVATE | MapFlags::MAP_ANONYMOUS, + -1, + 0, + ) + .expect("mmap") + }; + + println!("Address returned by mmap() = {:p}", addr); + + // Register the memory range of the mapping we just created for handling by the userfaultfd + // object. In mode, we request to track missing pages (i.e., pages that have not yet been + // faulted in). + + uffd.register(addr, len).expect("uffd.register()"); + + // Create a thread that will process the userfaultfd events + let _s = std::thread::spawn(move || fault_handler_thread(uffd)); + + // Main thread now touches memory in the mapping, touching locations 1024 bytes apart. This will + // trigger userfaultfd events for all pages in the region. + + // Ensure that faulting address is not on a page boundary, in order to test that we correctly + // handle that case in fault_handling_thread() + let mut l = 0xf; + + while l < len { + let ptr = (addr as usize + l) as *mut u8; + let c = unsafe { *ptr }; + println!("Read address {:p} in main(): {:?}", ptr, c as char); + l += 1024; + std::thread::sleep(std::time::Duration::from_micros(100000)); + } +} diff --git a/src/builder.rs b/src/builder.rs new file mode 100644 index 0000000..b89efb4 --- /dev/null +++ b/src/builder.rs @@ -0,0 +1,162 @@ +use crate::error::{Error, Result}; +use crate::raw; +use crate::{IoctlFlags, Uffd}; +use bitflags::bitflags; +use nix::errno::Errno; + +cfg_if::cfg_if! { + if #[cfg(any(feature = "linux5_7", feature = "linux4_14"))] { + bitflags! { + /// Used with `UffdBuilder` to determine which features are available in the current kernel. + pub struct FeatureFlags: u64 { + const PAGEFAULT_FLAG_WP = raw::UFFD_FEATURE_PAGEFAULT_FLAG_WP; + const EVENT_FORK = raw::UFFD_FEATURE_EVENT_FORK; + const EVENT_REMAP = raw::UFFD_FEATURE_EVENT_REMAP; + const EVENT_REMOVE = raw::UFFD_FEATURE_EVENT_REMOVE; + const MISSING_HUGETLBFS = raw::UFFD_FEATURE_MISSING_HUGETLBFS; + const MISSING_SHMEM = raw::UFFD_FEATURE_MISSING_SHMEM; + const EVENT_UNMAP = raw::UFFD_FEATURE_EVENT_UNMAP; + const SIGBUS = raw::UFFD_FEATURE_SIGBUS; + const THREAD_ID = raw::UFFD_FEATURE_THREAD_ID; + } + } + } else { + bitflags! { + /// Used with `UffdBuilder` to determine which features are available in the current kernel. + pub struct FeatureFlags: u64 { + const PAGEFAULT_FLAG_WP = raw::UFFD_FEATURE_PAGEFAULT_FLAG_WP; + const EVENT_FORK = raw::UFFD_FEATURE_EVENT_FORK; + const EVENT_REMAP = raw::UFFD_FEATURE_EVENT_REMAP; + const EVENT_REMOVE = raw::UFFD_FEATURE_EVENT_REMOVE; + const MISSING_HUGETLBFS = raw::UFFD_FEATURE_MISSING_HUGETLBFS; + const MISSING_SHMEM = raw::UFFD_FEATURE_MISSING_SHMEM; + const EVENT_UNMAP = raw::UFFD_FEATURE_EVENT_UNMAP; + } + } + } +} +/// A builder for initializing `Uffd` objects. +/// +/// ``` +/// use userfaultfd::UffdBuilder; +/// +/// let uffd = UffdBuilder::new() +/// .close_on_exec(true) +/// .non_blocking(true) +/// .user_mode_only(true) +/// .create(); +/// assert!(uffd.is_ok()); +/// ``` +pub struct UffdBuilder { + close_on_exec: bool, + non_blocking: bool, + user_mode_only: bool, + req_features: FeatureFlags, + req_ioctls: IoctlFlags, +} + +impl UffdBuilder { + /// Create a new builder with no required features or ioctls, `close_on_exec` and + /// `non_blocking` both set to `false`, and `user_mode_only` set to `true`. + pub fn new() -> UffdBuilder { + UffdBuilder { + close_on_exec: false, + non_blocking: false, + user_mode_only: true, + req_features: FeatureFlags::empty(), + req_ioctls: IoctlFlags::empty(), + } + } + + /// Enable the close-on-exec flag for the new userfaultfd object (see the description of + /// `O_CLOEXEC` in [`open(2)`](http://man7.org/linux/man-pages/man2/open.2.html)). + pub fn close_on_exec(&mut self, close_on_exec: bool) -> &mut Self { + self.close_on_exec = close_on_exec; + self + } + + /// Enable non-blocking operation for the userfaultfd object. + /// + /// If this is set to `false`, `Uffd::read_event()` will block until an event is available to + /// read. Otherwise, it will immediately return `None` if no event is available. + pub fn non_blocking(&mut self, non_blocking: bool) -> &mut Self { + self.non_blocking = non_blocking; + self + } + + /// Enable user-mode only flag for the userfaultfd object. + /// + /// If set to `false`, the process must have the `CAP_SYS_PTRACE` capability starting with Linux 5.11 + /// or object creation will fail with EPERM. When set to `true`, userfaultfd can't be used + /// to handle kernel-mode page faults such as when kernel tries copying data to userspace. + /// + /// When used with kernels older than 5.11, this has no effect; the process doesn't need + /// `CAP_SYS_PTRACE` and can handle kernel-mode page faults. + pub fn user_mode_only(&mut self, user_mode_only: bool) -> &mut Self { + self.user_mode_only = user_mode_only; + self + } + + /// Add a requirement that a particular feature or set of features is available. + /// + /// If a required feature is unavailable, `UffdBuilder.create()` will return an error. + pub fn require_features(&mut self, feature: FeatureFlags) -> &mut Self { + self.req_features |= feature; + self + } + + /// Add a requirement that a particular ioctl or set of ioctls is available. + /// + /// If a required ioctl is unavailable, `UffdBuilder.create()` will return an error. + pub fn require_ioctls(&mut self, ioctls: IoctlFlags) -> &mut Self { + self.req_ioctls |= ioctls; + self + } + + /// Create a `Uffd` object with the current settings of this builder. + pub fn create(&self) -> Result { + // first do the syscall to get the file descriptor + let mut flags = 0; + if self.close_on_exec { + flags |= libc::O_CLOEXEC; + } + if self.non_blocking { + flags |= libc::O_NONBLOCK; + } + + if self.user_mode_only { + flags |= raw::UFFD_USER_MODE_ONLY as i32; + } + + let fd = match Errno::result(unsafe { raw::userfaultfd(flags) }) { + Ok(fd) => fd, + // setting the USER_MODE_ONLY flag on kernel pre-5.11 causes it to return EINVAL. + // If the user asks for the flag, we first try with it set, and if kernel gives + // EINVAL we try again without the flag set. + Err(Errno::EINVAL) if self.user_mode_only => Errno::result(unsafe { + raw::userfaultfd(flags & !raw::UFFD_USER_MODE_ONLY as i32) + })?, + Err(e) => return Err(e.into()), + }; + + // Wrap the fd up so that a failure in this function body closes it with the drop. + let uffd = Uffd { fd }; + + // then do the UFFDIO_API ioctl to set up and ensure features and other ioctls are available + let mut api = raw::uffdio_api { + api: raw::UFFD_API, + features: self.req_features.bits(), + ioctls: 0, + }; + unsafe { + raw::api(uffd.fd, &mut api as *mut raw::uffdio_api)?; + } + let supported = + IoctlFlags::from_bits(api.ioctls).ok_or(Error::UnrecognizedIoctls(api.ioctls))?; + if !supported.contains(self.req_ioctls) { + Err(Error::UnsupportedIoctls(supported)) + } else { + Ok(uffd) + } + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..5cd8926 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,56 @@ +use crate::IoctlFlags; +use nix::errno::Errno; +use thiserror::Error; + +pub type Result = std::result::Result; + +/// Errors for this crate. +/// +/// Several of these errors contain an underlying `Errno` value; see +/// [`userfaultfd(2)`](http://man7.org/linux/man-pages/man2/userfaultfd.2.html) and +/// [`ioctl_userfaultfd(2)`](http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html) for more +/// details on how to interpret these errors. +#[derive(Debug, Error)] +pub enum Error { + /// Copy ioctl failure with `errno` value. + #[error("Copy failed")] + CopyFailed(Errno), + + /// Copy ioctl failure with copied length. + #[error("Copy partially succeeded")] + PartiallyCopied(usize), + + /// Failure to read a full `uffd_msg` struct from the underlying file descriptor. + #[error("Incomplete uffd_msg; read only {read}/{expected} bytes")] + IncompleteMsg { read: usize, expected: usize }, + + /// Generic system error. + #[error("System error")] + SystemError(#[source] nix::Error), + + /// End-of-file was read from the underlying file descriptor. + #[error("EOF when reading file descriptor")] + ReadEof, + + /// An unrecognized event code was found in a `uffd_msg` struct. + #[error("Unrecognized event in uffd_msg: {0}")] + UnrecognizedEvent(u8), + + /// An unrecognized ioctl bit was set in the result of API initialization or registration. + #[error("Unrecognized ioctl flags: {0}")] + UnrecognizedIoctls(u64), + + /// Requested ioctls were not available when initializing the API. + #[error("Requested ioctls unsupported; supported: {0:?}")] + UnsupportedIoctls(IoctlFlags), + + /// Zeropage ioctl failure with `errno` value. + #[error("Zeropage failed: {0}")] + ZeropageFailed(Errno), +} + +impl From for Error { + fn from(e: nix::Error) -> Error { + Error::SystemError(e) + } +} diff --git a/src/event.rs b/src/event.rs new file mode 100644 index 0000000..8ea75f2 --- /dev/null +++ b/src/event.rs @@ -0,0 +1,148 @@ +use crate::error::{Error, Result}; +use crate::raw; +use crate::Uffd; +use libc::c_void; +#[cfg(feature = "linux4_14")] +use nix::unistd::Pid; +use std::os::unix::io::{FromRawFd, RawFd}; + +/// Whether a page fault event was for a read or write. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum ReadWrite { + Read, + Write, +} + +/// The kind of fault for a page fault event. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum FaultKind { + /// The fault was a read or write on a missing page. + Missing, + /// The fault was a write on a write-protected page. + #[cfg(feature = "linux5_7")] + WriteProtected, +} + +/// Events from the userfaultfd object that are read by `Uffd::read_event()`. +#[derive(Debug)] +pub enum Event { + /// A pagefault event. + Pagefault { + /// The kind of fault. + kind: FaultKind, + /// Whether the fault is on a read or a write. + rw: ReadWrite, + /// The address that triggered the fault. + addr: *mut c_void, + /// The thread that triggered the fault, if [`FeatureFlags::THREAD_ID`] is enabled. + /// + /// If the thread ID feature is not enabled, the value of this field is undefined. It would + /// not be undefined behavior to use it, strictly speaking, but the [`Pid`] will not + /// necessarily point to a real thread. + /// + /// This requires this crate to be compiled with the `linux4_14` feature. + #[cfg(feature = "linux4_14")] + thread_id: Pid, + }, + /// Generated when the faulting process invokes `fork(2)` (or `clone(2)` without the `CLONE_VM` + /// flag). + Fork { + /// The `Uffd` object created for the child by `fork(2)` + uffd: Uffd, + }, + /// Generated when the faulting process invokes `mremap(2)`. + Remap { + /// The original address of the memory range that was remapped. + from: *mut c_void, + /// The new address of the memory range that was remapped. + to: *mut c_void, + /// The original length of the memory range that was remapped. + len: usize, + }, + /// Generated when the faulting process invokes `madvise(2)` with `MADV_DONTNEED` or + /// `MADV_REMOVE` advice. + Remove { + /// The start address of the memory range that was freed. + start: *mut c_void, + /// The end address of the memory range that was freed. + end: *mut c_void, + }, + /// Generated when the faulting process unmaps a meomry range, either explicitly using + /// `munmap(2)` or implicitly during `mmap(2)` or `mremap(2)`. + Unmap { + /// The start address of the memory range that was unmapped. + start: *mut c_void, + /// The end address of the memory range that was unmapped. + end: *mut c_void, + }, +} + +impl Event { + pub(crate) fn from_uffd_msg(msg: &raw::uffd_msg) -> Result { + match msg.event { + raw::UFFD_EVENT_PAGEFAULT => { + let pagefault = unsafe { msg.arg.pagefault }; + cfg_if::cfg_if!( + if #[cfg(feature = "linux5_7")] { + let kind = if pagefault.flags & raw::UFFD_PAGEFAULT_FLAG_WP != 0 { + FaultKind::WriteProtected + } else { + FaultKind::Missing + }; + } else { + let kind = FaultKind::Missing; + } + ); + + let rw = if pagefault.flags & raw::UFFD_PAGEFAULT_FLAG_WRITE == 0 { + ReadWrite::Read + } else { + ReadWrite::Write + }; + // Converting the ptid to i32 is safe because the maximum pid in + // Linux is 2^22, which is about 4 million. + // + // Reference: + // https://github.com/torvalds/linux/blob/2d338201d5311bcd79d42f66df4cecbcbc5f4f2c/include/linux/threads.h + #[cfg(feature = "linux4_14")] + let thread_id = Pid::from_raw(unsafe { pagefault.feat.ptid } as i32); + Ok(Event::Pagefault { + kind, + rw, + addr: pagefault.address as *mut c_void, + #[cfg(feature = "linux4_14")] + thread_id, + }) + } + raw::UFFD_EVENT_FORK => { + let fork = unsafe { msg.arg.fork }; + Ok(Event::Fork { + uffd: unsafe { Uffd::from_raw_fd(fork.ufd as RawFd) }, + }) + } + raw::UFFD_EVENT_REMAP => { + let remap = unsafe { msg.arg.remap }; + Ok(Event::Remap { + from: remap.from as *mut c_void, + to: remap.to as *mut c_void, + len: remap.len as usize, + }) + } + raw::UFFD_EVENT_REMOVE => { + let remove = unsafe { msg.arg.remove }; + Ok(Event::Remove { + start: remove.start as *mut c_void, + end: remove.end as *mut c_void, + }) + } + raw::UFFD_EVENT_UNMAP => { + let remove = unsafe { msg.arg.remove }; + Ok(Event::Unmap { + start: remove.start as *mut c_void, + end: remove.end as *mut c_void, + }) + } + _ => Err(Error::UnrecognizedEvent(msg.event)), + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..9a7641b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,664 @@ +//! A Linux mechanism for handling page faults in user space. +//! +//! The main way to interact with this library is to create a `Uffd` object with a `UffdBuilder`, +//! then use the methods of `Uffd` from a worker thread. +//! +//! See [`userfaultfd(2)`](http://man7.org/linux/man-pages/man2/userfaultfd.2.html) and +//! [`ioctl_userfaultfd(2)`](http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html) for more +//! details. + +mod builder; +mod error; +mod event; +mod raw; + +pub use crate::builder::{FeatureFlags, UffdBuilder}; +pub use crate::error::{Error, Result}; +pub use crate::event::{Event, FaultKind, ReadWrite}; + +use bitflags::bitflags; +use libc::{self, c_void}; +use nix::errno::Errno; +use nix::unistd::read; +use std::mem; +use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; + +/// Represents an opaque buffer where userfaultfd events are stored. +/// +/// This is used in conjunction with [`Uffd::read_events`]. +pub struct EventBuffer(Vec); + +impl EventBuffer { + /// Creates a new buffer for `size` number of events. + /// + /// [`Uffd::read_events`] will read up to this many events at a time. + pub fn new(size: usize) -> Self { + Self(vec![unsafe { mem::zeroed() }; size]) + } +} + +/// The userfaultfd object. +/// +/// The userspace representation of the object is a file descriptor, so this type implements +/// `AsRawFd`, `FromRawFd`, and `IntoRawFd`. These methods should be used with caution, but can be +/// essential for using functions like `poll` on a worker thread. +#[derive(Debug)] +pub struct Uffd { + fd: RawFd, +} + +impl Drop for Uffd { + fn drop(&mut self) { + unsafe { libc::close(self.fd) }; + } +} + +impl AsRawFd for Uffd { + fn as_raw_fd(&self) -> RawFd { + self.fd + } +} + +impl IntoRawFd for Uffd { + fn into_raw_fd(self) -> RawFd { + self.fd + } +} + +impl FromRawFd for Uffd { + unsafe fn from_raw_fd(fd: RawFd) -> Self { + Uffd { fd } + } +} + +bitflags! { + /// The registration mode used when registering an address range with `Uffd`. + pub struct RegisterMode: u64 { + /// Registers the range for missing page faults. + const MISSING = raw::UFFDIO_REGISTER_MODE_MISSING; + /// Registers the range for write faults. + #[cfg(feature = "linux5_7")] + const WRITE_PROTECT = raw::UFFDIO_REGISTER_MODE_WP; + } +} + +impl Uffd { + /// Register a memory address range with the userfaultfd object, and returns the `IoctlFlags` + /// that are available for the selected range. + /// + /// This method only registers the given range for missing page faults. + pub fn register(&self, start: *mut c_void, len: usize) -> Result { + self.register_with_mode(start, len, RegisterMode::MISSING) + } + + /// Register a memory address range with the userfaultfd object for the given mode and + /// returns the `IoctlFlags` that are available for the selected range. + pub fn register_with_mode( + &self, + start: *mut c_void, + len: usize, + mode: RegisterMode, + ) -> Result { + let mut register = raw::uffdio_register { + range: raw::uffdio_range { + start: start as u64, + len: len as u64, + }, + mode: mode.bits(), + ioctls: 0, + }; + unsafe { + raw::register(self.as_raw_fd(), &mut register as *mut raw::uffdio_register)?; + } + IoctlFlags::from_bits(register.ioctls).ok_or(Error::UnrecognizedIoctls(register.ioctls)) + } + + /// Unregister a memory address range from the userfaultfd object. + pub fn unregister(&self, start: *mut c_void, len: usize) -> Result<()> { + let mut range = raw::uffdio_range { + start: start as u64, + len: len as u64, + }; + unsafe { + raw::unregister(self.as_raw_fd(), &mut range as *mut raw::uffdio_range)?; + } + Ok(()) + } + + /// Atomically copy a continuous memory chunk into the userfaultfd-registered range, and return + /// the number of bytes that were successfully copied. + /// + /// If `wake` is `true`, wake up the thread waiting for page fault resolution on the memory + /// range. + pub unsafe fn copy( + &self, + src: *const c_void, + dst: *mut c_void, + len: usize, + wake: bool, + ) -> Result { + let mut copy = raw::uffdio_copy { + src: src as u64, + dst: dst as u64, + len: len as u64, + mode: if wake { + 0 + } else { + raw::UFFDIO_COPY_MODE_DONTWAKE + }, + copy: 0, + }; + + let _ = + raw::copy(self.as_raw_fd(), &mut copy as *mut raw::uffdio_copy).map_err(|errno| { + match errno { + Errno::EAGAIN => Error::PartiallyCopied(copy.copy as usize), + _ => Error::CopyFailed(errno), + } + })?; + if copy.copy < 0 { + // shouldn't ever get here, as errno should be caught above + Err(Error::CopyFailed(Errno::from_i32(-copy.copy as i32))) + } else { + Ok(copy.copy as usize) + } + } + + /// Zero out a memory address range registered with userfaultfd, and return the number of bytes + /// that were successfully zeroed. + /// + /// If `wake` is `true`, wake up the thread waiting for page fault resolution on the memory + /// address range. + pub unsafe fn zeropage(&self, start: *mut c_void, len: usize, wake: bool) -> Result { + let mut zeropage = raw::uffdio_zeropage { + range: raw::uffdio_range { + start: start as u64, + len: len as u64, + }, + mode: if wake { + 0 + } else { + raw::UFFDIO_ZEROPAGE_MODE_DONTWAKE + }, + zeropage: 0, + }; + + let _ = raw::zeropage(self.as_raw_fd(), &mut zeropage as &mut raw::uffdio_zeropage) + .map_err(Error::ZeropageFailed)?; + if zeropage.zeropage < 0 { + // shouldn't ever get here, as errno should be caught above + Err(Error::ZeropageFailed(Errno::from_i32( + -zeropage.zeropage as i32, + ))) + } else { + Ok(zeropage.zeropage as usize) + } + } + + /// Wake up the thread waiting for page fault resolution on the specified memory address range. + pub fn wake(&self, start: *mut c_void, len: usize) -> Result<()> { + let mut range = raw::uffdio_range { + start: start as u64, + len: len as u64, + }; + unsafe { + raw::wake(self.as_raw_fd(), &mut range as *mut raw::uffdio_range)?; + } + Ok(()) + } + + /// Makes a range write-protected. + #[cfg(feature = "linux5_7")] + pub fn write_protect(&self, start: *mut c_void, len: usize) -> Result<()> { + let mut ioctl = raw::uffdio_writeprotect { + range: raw::uffdio_range { + start: start as u64, + len: len as u64, + }, + mode: raw::UFFDIO_WRITEPROTECT_MODE_WP, + }; + + unsafe { + raw::write_protect( + self.as_raw_fd(), + &mut ioctl as *mut raw::uffdio_writeprotect, + )?; + } + + Ok(()) + } + + /// Removes the write-protection for a range. + /// + /// If `wake` is `true`, wake up the thread waiting for page fault resolution on the memory + /// address range. + #[cfg(feature = "linux5_7")] + pub fn remove_write_protection( + &self, + start: *mut c_void, + len: usize, + wake: bool, + ) -> Result<()> { + let mut ioctl = raw::uffdio_writeprotect { + range: raw::uffdio_range { + start: start as u64, + len: len as u64, + }, + mode: if wake { + 0 + } else { + raw::UFFDIO_WRITEPROTECT_MODE_DONTWAKE + }, + }; + + unsafe { + raw::write_protect( + self.as_raw_fd(), + &mut ioctl as *mut raw::uffdio_writeprotect, + )?; + } + + Ok(()) + } + + /// Read an `Event` from the userfaultfd object. + /// + /// If the `Uffd` object was created with `non_blocking` set to `false`, this will block until + /// an event is successfully read (returning `Some(event)`, or an error is returned. + /// + /// If `non_blocking` was `true`, this will immediately return `None` if no event is ready to + /// read. + /// + /// Note that while this method doesn't require a mutable reference to the `Uffd` object, it + /// does consume bytes (thread-safely) from the underlying file descriptor. + /// + /// # Examples + /// + /// ```rust + /// # use userfaultfd::{Uffd, Result}; + /// fn read_event(uffd: &Uffd) -> Result<()> { + /// // Read a single event + /// match uffd.read_event()? { + /// Some(e) => { + /// // Do something with the event + /// }, + /// None => { + /// // This was a non-blocking read and the descriptor was not ready for read + /// }, + /// } + /// Ok(()) + /// } + /// ``` + pub fn read_event(&self) -> Result> { + let mut buf = [unsafe { std::mem::zeroed() }; 1]; + let mut iter = self.read(&mut buf)?; + let event = iter.next().transpose()?; + assert!(iter.next().is_none()); + Ok(event) + } + + /// Read multiple events from the userfaultfd object using the given event buffer. + /// + /// If the `Uffd` object was created with `non_blocking` set to `false`, this will block until + /// an event is successfully read or an error is returned. + /// + /// If `non_blocking` was `true`, this will immediately return an empty iterator if the file + /// descriptor is not ready for reading. + /// + /// # Examples + /// + /// ```rust + /// # use userfaultfd::{Uffd, EventBuffer}; + /// fn read_events(uffd: &Uffd) -> userfaultfd::Result<()> { + /// // Read up to 100 events at a time + /// let mut buf = EventBuffer::new(100); + /// for event in uffd.read_events(&mut buf)? { + /// let event = event?; + /// // Do something with the event... + /// } + /// Ok(()) + /// } + /// ``` + pub fn read_events<'a>( + &self, + buf: &'a mut EventBuffer, + ) -> Result> + 'a> { + self.read(&mut buf.0) + } + + fn read<'a>( + &self, + msgs: &'a mut [raw::uffd_msg], + ) -> Result> + 'a> { + const MSG_SIZE: usize = std::mem::size_of::(); + + let buf = unsafe { + std::slice::from_raw_parts_mut(msgs.as_mut_ptr() as _, msgs.len() * MSG_SIZE) + }; + + let count = match read(self.as_raw_fd(), buf) { + Err(e) if e == Errno::EAGAIN => 0, + Err(e) => return Err(Error::SystemError(e)), + Ok(0) => return Err(Error::ReadEof), + Ok(bytes_read) => { + let remainder = bytes_read % MSG_SIZE; + if remainder != 0 { + return Err(Error::IncompleteMsg { + read: remainder, + expected: MSG_SIZE, + }); + } + + bytes_read / MSG_SIZE + } + }; + + Ok(msgs.iter().take(count).map(|msg| Event::from_uffd_msg(msg))) + } +} + +bitflags! { + /// Used with `UffdBuilder` and `Uffd::register()` to determine which operations are available. + pub struct IoctlFlags: u64 { + const REGISTER = 1 << raw::_UFFDIO_REGISTER; + const UNREGISTER = 1 << raw::_UFFDIO_UNREGISTER; + const WAKE = 1 << raw::_UFFDIO_WAKE; + const COPY = 1 << raw::_UFFDIO_COPY; + const ZEROPAGE = 1 << raw::_UFFDIO_ZEROPAGE; + #[cfg(feature = "linux5_7")] + const WRITE_PROTECT = 1 << raw::_UFFDIO_WRITEPROTECT; + const API = 1 << raw::_UFFDIO_API; + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::ptr; + use std::thread; + + #[test] + fn test_read_event() -> Result<()> { + const PAGE_SIZE: usize = 4096; + + unsafe { + let uffd = UffdBuilder::new().close_on_exec(true).create()?; + + let mapping = libc::mmap( + ptr::null_mut(), + PAGE_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANON, + -1, + 0, + ); + + assert!(!mapping.is_null()); + + uffd.register(mapping, PAGE_SIZE)?; + + let ptr = mapping as usize; + let thread = thread::spawn(move || { + let ptr = ptr as *mut u8; + *ptr = 1; + }); + + match uffd.read_event()? { + Some(Event::Pagefault { + rw: ReadWrite::Write, + addr, + .. + }) => { + assert_eq!(addr, mapping); + uffd.zeropage(addr, PAGE_SIZE, true)?; + } + _ => panic!("unexpected event"), + } + + thread.join().expect("failed to join thread"); + + uffd.unregister(mapping, PAGE_SIZE)?; + + assert_eq!(libc::munmap(mapping, PAGE_SIZE), 0); + } + + Ok(()) + } + + #[test] + fn test_nonblocking_read_event() -> Result<()> { + const PAGE_SIZE: usize = 4096; + + unsafe { + let uffd = UffdBuilder::new() + .close_on_exec(true) + .non_blocking(true) + .create()?; + + let mapping = libc::mmap( + ptr::null_mut(), + PAGE_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANON, + -1, + 0, + ); + + assert!(!mapping.is_null()); + + uffd.register(mapping, PAGE_SIZE)?; + + assert!(uffd.read_event()?.is_none()); + + let ptr = mapping as usize; + let thread = thread::spawn(move || { + let ptr = ptr as *mut u8; + *ptr = 1; + }); + + loop { + match uffd.read_event()? { + Some(Event::Pagefault { + rw: ReadWrite::Write, + addr, + .. + }) => { + assert_eq!(addr, mapping); + uffd.zeropage(addr, PAGE_SIZE, true)?; + break; + } + Some(_) => panic!("unexpected event"), + None => thread::sleep(std::time::Duration::from_millis(50)), + } + } + + thread.join().expect("failed to join thread"); + + uffd.unregister(mapping, PAGE_SIZE)?; + + assert_eq!(libc::munmap(mapping, PAGE_SIZE), 0); + } + + Ok(()) + } + + #[test] + fn test_read_events() -> Result<()> { + unsafe { + const MAX_THREADS: usize = 5; + const PAGE_SIZE: usize = 4096; + const MEM_SIZE: usize = PAGE_SIZE * MAX_THREADS; + + let uffd = UffdBuilder::new().close_on_exec(true).create()?; + + let mapping = libc::mmap( + ptr::null_mut(), + MEM_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANON, + -1, + 0, + ); + + assert!(!mapping.is_null()); + + uffd.register(mapping, MEM_SIZE)?; + + // As accessing the memory will suspend each thread with a page fault event, + // there is no way to signal that the operations the test thread is waiting on to + // complete have been performed. + // + // Therefore, this is inherently racy. The best we can do is simply sleep-wait for + // all threads to have signaled that the operation is *about to be performed*. + let mut seen = [false; MAX_THREADS]; + let mut threads = Vec::new(); + for i in 0..MAX_THREADS { + let seen = &mut seen[i] as *mut _ as usize; + let ptr = (mapping as *mut u8).add(PAGE_SIZE * i) as usize; + threads.push(thread::spawn(move || { + let seen = seen as *mut bool; + let ptr = ptr as *mut u8; + *seen = true; + *ptr = 1; + })); + } + + loop { + // Sleep even if all threads have "signaled", just in case any + // thread is preempted prior to faulting the memory access. + // Still, there's no guarantee that the call to `read_events` below will + // read all the events at once, but this should be "good enough". + let done = seen.iter().all(|b| *b); + thread::sleep(std::time::Duration::from_millis(50)); + if done { + break; + } + } + + // Read all the events at once + let mut buf = EventBuffer::new(MAX_THREADS); + let mut iter = uffd.read_events(&mut buf)?; + + let mut seen = [false; MAX_THREADS]; + for _ in 0..MAX_THREADS { + match iter + .next() + .transpose()? + .expect("failed to read all events; potential race condition was hit") + { + Event::Pagefault { + rw: ReadWrite::Write, + addr, + .. + } => { + let index = (addr as usize - mapping as usize) / PAGE_SIZE; + assert_eq!(seen[index], false); + seen[index] = true; + uffd.zeropage(addr, PAGE_SIZE, true)?; + } + _ => panic!("unexpected event"), + } + } + + assert!(seen.iter().all(|b| *b)); + + for thread in threads { + thread.join().expect("failed to join thread"); + } + + uffd.unregister(mapping, MEM_SIZE)?; + + assert_eq!(libc::munmap(mapping, MEM_SIZE), 0); + } + + Ok(()) + } + + #[cfg(feature = "linux5_7")] + #[test] + fn test_write_protect() -> Result<()> { + const PAGE_SIZE: usize = 4096; + + unsafe { + let uffd = UffdBuilder::new() + .require_features(FeatureFlags::PAGEFAULT_FLAG_WP) + .close_on_exec(true) + .create()?; + + let mapping = libc::mmap( + ptr::null_mut(), + PAGE_SIZE, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANON, + -1, + 0, + ); + + assert!(!mapping.is_null()); + + // This test uses both missing and write-protect modes for a reason. + // The `uffdio_writeprotect` ioctl can only be used on a range *after* + // the missing fault is handled, it seems. This means we either need to + // read/write the page *before* we protect it or handle the missing + // page fault by changing the protection level *after* we zero the page. + assert!(uffd + .register_with_mode( + mapping, + PAGE_SIZE, + RegisterMode::MISSING | RegisterMode::WRITE_PROTECT + )? + .contains(IoctlFlags::WRITE_PROTECT)); + + let ptr = mapping as usize; + let thread = thread::spawn(move || { + let ptr = ptr as *mut u8; + *ptr = 1; + *ptr = 2; + }); + + loop { + match uffd.read_event()? { + Some(Event::Pagefault { + kind, + rw: ReadWrite::Write, + addr, + .. + }) => match kind { + FaultKind::WriteProtected => { + assert_eq!(addr, mapping); + assert_eq!(*(addr as *const u8), 0); + // Remove the protection and wake the page + uffd.remove_write_protection(mapping, PAGE_SIZE, true)?; + break; + } + FaultKind::Missing => { + assert_eq!(addr, mapping); + uffd.zeropage(mapping, PAGE_SIZE, false)?; + + // Technically, we already know it was a write that triggered + // the missing page fault, so there's little point in immediately + // write-protecting the page to cause another fault; in the real + // world, a missing fault with `rw` being `ReadWrite::Write` would + // be enough to mark the page as "dirty". For this test, however, + // we do it this way to ensure a write-protected fault is read. + assert_eq!(*(addr as *const u8), 0); + uffd.write_protect(mapping, PAGE_SIZE)?; + uffd.wake(mapping, PAGE_SIZE)?; + } + }, + _ => panic!("unexpected event"), + } + } + + thread.join().expect("failed to join thread"); + + assert_eq!(*(mapping as *const u8), 2); + + uffd.unregister(mapping, PAGE_SIZE)?; + + assert_eq!(libc::munmap(mapping, PAGE_SIZE), 0); + } + + Ok(()) + } +} diff --git a/src/raw.rs b/src/raw.rs new file mode 100644 index 0000000..332c459 --- /dev/null +++ b/src/raw.rs @@ -0,0 +1,25 @@ +use libc::{c_int, c_long, syscall, SYS_userfaultfd, INT_MAX}; +pub use userfaultfd_sys::*; + +pub unsafe fn userfaultfd(flags: c_int) -> c_int { + let fd = syscall(SYS_userfaultfd, flags as c_long); + if fd > INT_MAX as c_long { + panic!("fd doesn't fit in a c_int"); + } else { + fd as c_int + } +} + +nix::ioctl_readwrite!(api, UFFDIO, _UFFDIO_API, uffdio_api); +nix::ioctl_readwrite!(register, UFFDIO, _UFFDIO_REGISTER, uffdio_register); +nix::ioctl_read!(unregister, UFFDIO, _UFFDIO_UNREGISTER, uffdio_range); +nix::ioctl_read!(wake, UFFDIO, _UFFDIO_WAKE, uffdio_range); +nix::ioctl_readwrite!(copy, UFFDIO, _UFFDIO_COPY, uffdio_copy); +nix::ioctl_readwrite!(zeropage, UFFDIO, _UFFDIO_ZEROPAGE, uffdio_zeropage); +#[cfg(feature = "linux5_7")] +nix::ioctl_readwrite!( + write_protect, + UFFDIO, + _UFFDIO_WRITEPROTECT, + uffdio_writeprotect +); diff --git a/tests/manpage_example.rs b/tests/manpage_example.rs new file mode 100644 index 0000000..49a061f --- /dev/null +++ b/tests/manpage_example.rs @@ -0,0 +1,8 @@ +#[test] +fn run_manpage_example() { + let output = std::process::Command::new("cargo") + .args(&["run", "--example", "manpage", "--", "3"]) + .output() + .expect("manpage example failed to start"); + assert!(output.status.success(), "manpage example succeeded"); +} -- cgit v1.2.3