From ea7c3077bda876b7967351e7047f800be3e45c1d Mon Sep 17 00:00:00 2001 From: Jeongik Cha Date: Thu, 14 Sep 2023 16:36:26 +0900 Subject: Import virtio-queue Bug: 277909042 Test: build Change-Id: Iff2cf057637648f5e1a67190678b83ce530edc33 --- .cargo_vcs_info.json | 6 + Android.bp | 19 + CHANGELOG.md | 134 ++++ Cargo.toml | 54 ++ Cargo.toml.orig | 28 + LICENSE | 30 + LICENSE-BSD-3-Clause | 27 + LICENSE_APACHE | 30 + METADATA | 19 + MODULE_LICENSE_APACHE2 | 0 MODULE_LICENSE_BSD | 0 OWNERS | 1 + README.md | 222 ++++++ benches/main.rs | 21 + benches/queue/mod.rs | 85 +++ cargo2android.json | 6 + docs/TESTING.md | 29 + docs/images/descriptor.png | Bin 0 -> 32100 bytes docs/images/queue.png | Bin 0 -> 86591 bytes src/chain.rs | 504 ++++++++++++++ src/defs.rs | 38 ++ src/descriptor.rs | 276 ++++++++ src/lib.rs | 267 ++++++++ src/mock.rs | 500 ++++++++++++++ src/queue.rs | 1597 ++++++++++++++++++++++++++++++++++++++++++++ src/queue_sync.rs | 358 ++++++++++ src/state.rs | 119 ++++ 27 files changed, 4370 insertions(+) create mode 100644 .cargo_vcs_info.json create mode 100644 Android.bp create mode 100644 CHANGELOG.md create mode 100644 Cargo.toml create mode 100644 Cargo.toml.orig create mode 100644 LICENSE create mode 100644 LICENSE-BSD-3-Clause create mode 100644 LICENSE_APACHE create mode 100644 METADATA create mode 100644 MODULE_LICENSE_APACHE2 create mode 100644 MODULE_LICENSE_BSD create mode 100644 OWNERS create mode 100644 README.md create mode 100644 benches/main.rs create mode 100644 benches/queue/mod.rs create mode 100644 cargo2android.json create mode 100644 docs/TESTING.md create mode 100644 docs/images/descriptor.png create mode 100644 docs/images/queue.png create mode 100644 src/chain.rs create mode 100644 src/defs.rs create mode 100644 src/descriptor.rs create mode 100644 src/lib.rs create mode 100644 src/mock.rs create mode 100644 src/queue.rs create mode 100644 src/queue_sync.rs create mode 100644 src/state.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..3486f5b --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "854e098e4871f2ea487b1853bc21f02d6c1e8057" + }, + "path_in_vcs": "crates/virtio-queue" +} \ No newline at end of file diff --git a/Android.bp b/Android.bp new file mode 100644 index 0000000..83d3ed5 --- /dev/null +++ b/Android.bp @@ -0,0 +1,19 @@ +// This file is generated by cargo2android.py --config cargo2android.json. +// Do not modify this file as changes will be overridden on upgrade. + + + +rust_library_host { + name: "libvirtio_queue", + crate_name: "virtio_queue", + cargo_env_compat: true, + cargo_pkg_version: "0.9.0", + srcs: ["src/lib.rs"], + edition: "2021", + rustlibs: [ + "liblog_rust", + "libvirtio_bindings", + "libvm_memory_android", + "libvmm_sys_util", + ], +} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..74f074a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,134 @@ +# v0.9.0 + +## Changed +- Updated vm-memory from 0.11.0 to 0.12.0. + +# v0.8.0 + +## Changed +- Terminate iterating descriptor chains that are longer than 2^32 bytes. +- Updated vm-memory from 0.10.0 to 0.11.0. +- Updated virtio-bindings from 0.1.0 to 0.2.0. + +# v0.7.1 + +## Fixed +- Skip indirect descriptor address alignment check, the virtio spec has + no alignment requirement on this, see `2.6.5.3 Indirect Descriptors` + and `2.7.7 Indirect Flag: Scatter-Gather Support` in virtio 1.0. +- Update the `add_desc_chains` mock function such that it works on big endian + hosts as well. +- Check that the queue is ready for processing requests when calling the + iterator functions. For now the checks are limited to the avail address and + the ready fields, but should be extended in the future to account for other + fields that could signal an invalid queue. This behavior can be triggered + by doing a `reset` followed by a `pop_descriptor_chain`. + +# v0.7.0 + +## Changed + +- Updated vmm-sys-util from 0.10.0 to 0.11.0. +- Updated vm-memory from 0.9.0 to 0.10.0. + +# v0.6.1 + +## Fixed +- Return an error if the number of available descriptor chains exposed by the + driver exceeds the queue size. This way we avoid potential hanging and + Denial-of-Service in the VMM, that was possible before by iterating multiple + times over the same chains. + +# v0.6.0 + +## Added +- Derive `Eq` for structures that derive `PartialEq`. + +## Changed +- Use `add_desc_chains` in tests +- Update dependencies: `vm-memory` from `0.8.0` to `0.9.0` and `log` from `0.4.6` to `0.4.17`. +- Upgrade to Rust 2021 edition. + +# v0.5.0 + +## Added +- Added getters and setters for the Virtio Queue fields. +- Added the `state` method for retrieving the `QueueState` of a `Queue`. + +## Fixed +- Validate the state of the Virtio Queue when restoring from state and return errors on invalid + input. + +## Removed +- Removed the wrapper over the Virtio Queue that was wrapping the Guest Memory. VMMs can define + this wrapper if needed, but this is no longer provided as part of virtio-queue crate so that the + naming scheme can be simplified. As a consequence, a couple of functions now receive the + memory as a parameter (more details in the Changed section). +- Removed `num_added` field from the `QueueState` because this is an implementation detail of + the notification suppression feature and thus should not be part of the state. +- Removed `QueueGuard` and `lock_with_memory`. + +## Changed +- `QueueState` is now renamed to `Queue`. +- `QueueStateSync` is now renamed to `QueueSync`. +- The `QueueState` structure now represents the state of the `Queue` without any implementation + details. This can be used for implementing save/restore. +- Initializing a `Queue` now returns an error in case the `max_size` is invalid. +- The `Queue` fields are now private and can be updated only through the dedicated setters. +- The following Queue methods now receive the memory as a parameter: `iter`, `is_valid`, + `add_used`, `needs_notification`, `enable_notification`, `disable_notification`, `avail_idx`, + `used_idx`. +- Use the constant definition from the `virtio-queue` crate. + +# v0.4.0 + +## Fixed +- [[#173]](https://github.com/rust-vmm/vm-virtio/pull/173) Fix potential division by zero in + iterator when the queue size is 0. + +## Changed +- [[#162]](https://github.com/rust-vmm/vm-virtio/pull/162) Added error handling in the mock + interface and the ability to create multiple descriptor chains for testing in order to + support running fuzzing. +- [[#174]](https://github.com/rust-vmm/vm-virtio/pull/174) Updated the `avail_idx` and `used_idx` + documentation to specify when these functions panic. + + +# v0.3.0 + +## Added +- [[#148]](https://github.com/rust-vmm/vm-virtio/pull/148): `QueueStateOwnedT` trait that stands + for queue objects which are exclusively owned and accessed by a single thread of execution. +- [[#148]](https://github.com/rust-vmm/vm-virtio/pull/148): Added the `pop_descriptor_chain` + method, which can be used to consume descriptor chains from the available ring without + using an iterator, to `QueueStateT` and `QueueGuard`. Also added `go_to_previous_position()` + to `QueueGuard`, which enables decrementing the next available index by one position, which + effectively undoes the consumption of a descriptor chain in some use cases. +- [[#151]](https://github.com/rust-vmm/vm-virtio/pull/151): Added `MockSplitQueue::add_desc_chain()`, + which places a descriptor chain at the specified offset in the descriptor table. +- [[#153]](https://github.com/rust-vmm/vm-virtio/pull/153): Added `QueueStateT::size()` to return + the size of the queue. + +## Changed +- The minimum version of the `vm-memory` dependency is now `v0.8.0` +- [[#161]](https://github.com/rust-vmm/vm-virtio/pull/161): Improve the efficiency of `needs_notification` + +## Removed +- [[#153]](https://github.com/rust-vmm/vm-virtio/pull/153): `#[derive(Clone)]` for `QueueState` + +# v0.2.0 + +## Added + +- *Testing Interface*: Added the possibility to initialize a mock descriptor + chain from a list of descriptors. +- Added setters and getters for the queue fields required for extending the + `Queue` in VMMs. + +## Fixed + +- Apply the appropriate endianness conversion on `used_idx`. + +# v0.1.0 + +This is the first release of the crate. diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7ef9e3e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,54 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2021" +name = "virtio-queue" +version = "0.9.0" +authors = ["The Chromium OS Authors"] +description = "virtio queue implementation" +readme = "README.md" +keywords = ["virtio"] +license = "Apache-2.0 OR BSD-3-Clause" +repository = "https://github.com/rust-vmm/vm-virtio" +resolver = "1" + +[[bench]] +name = "main" +harness = false + +[dependencies.log] +version = "0.4.17" + +[dependencies.virtio-bindings] +version = "0.2.1" + +[dependencies.vm-memory] +version = "0.12.0" + +[dependencies.vmm-sys-util] +version = "0.11.0" + +[dev-dependencies.criterion] +version = "0.3.0" + +[dev-dependencies.memoffset] +version = "0.7.1" + +[dev-dependencies.vm-memory] +version = "0.12.0" +features = [ + "backend-mmap", + "backend-atomic", +] + +[features] +test-utils = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..79ba46f --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,28 @@ +[package] +name = "virtio-queue" +version = "0.9.0" +authors = ["The Chromium OS Authors"] +description = "virtio queue implementation" +repository = "https://github.com/rust-vmm/vm-virtio" +keywords = ["virtio"] +readme = "README.md" +license = "Apache-2.0 OR BSD-3-Clause" +edition = "2021" + +[features] +test-utils = [] + +[dependencies] +vm-memory = "0.12.0" +vmm-sys-util = "0.11.0" +log = "0.4.17" +virtio-bindings = { path="../virtio-bindings", version = "0.2.1" } + +[dev-dependencies] +criterion = "0.3.0" +vm-memory = { version = "0.12.0", features = ["backend-mmap", "backend-atomic"] } +memoffset = "0.7.1" + +[[bench]] +name = "main" +harness = false diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ebfb55c --- /dev/null +++ b/LICENSE @@ -0,0 +1,30 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +--- + diff --git a/LICENSE-BSD-3-Clause b/LICENSE-BSD-3-Clause new file mode 100644 index 0000000..8bafca3 --- /dev/null +++ b/LICENSE-BSD-3-Clause @@ -0,0 +1,27 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSE_APACHE b/LICENSE_APACHE new file mode 100644 index 0000000..ebfb55c --- /dev/null +++ b/LICENSE_APACHE @@ -0,0 +1,30 @@ +// Copyright 2017 The Chromium OS Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +--- + diff --git a/METADATA b/METADATA new file mode 100644 index 0000000..51ab4f1 --- /dev/null +++ b/METADATA @@ -0,0 +1,19 @@ +name: "virtio-queue" +description: "virtio queue implementation" +third_party { + identifier { + type: "crates.io" + value: "https://crates.io/crates/virtio-queue" + } + identifier { + type: "Archive" + value: "https://static.crates.io/crates/virtio-queue/virtio-queue-0.9.0.crate" + } + version: "0.9.0" + license_type: NOTICE + last_upgrade_date { + year: 2023 + month: 8 + day: 23 + } +} diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2 new file mode 100644 index 0000000..e69de29 diff --git a/MODULE_LICENSE_BSD b/MODULE_LICENSE_BSD new file mode 100644 index 0000000..e69de29 diff --git a/OWNERS b/OWNERS new file mode 100644 index 0000000..45dc4dd --- /dev/null +++ b/OWNERS @@ -0,0 +1 @@ +include platform/prebuilts/rust:master:/OWNERS diff --git a/README.md b/README.md new file mode 100644 index 0000000..0adc1b1 --- /dev/null +++ b/README.md @@ -0,0 +1,222 @@ +# virtio-queue + +The `virtio-queue` crate provides a virtio device implementation for a virtio +queue, a virtio descriptor and a chain of such descriptors. +Two formats of virtio queues are defined in the specification: split virtqueues +and packed virtqueues. The `virtio-queue` crate offers support only for the +[split virtqueues](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-240006) +format. +The purpose of the virtio-queue API is to be consumed by virtio device +implementations (such as the block device or vsock device). +The main abstraction is the `Queue`. The crate is also defining a state object +for the queue, i.e. `QueueState`. + +## Usage + +Let’s take a concrete example of how a device would work with a queue, using +the MMIO bus. + +First, it is important to mention that the mandatory parts of the virtio +interface are the following: + +- the device status field → provides an indication of + [the completed steps](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-100001) + of the device initialization routine, +- the feature bits → + [the features](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-100001) + the driver/device understand(s), +- [notifications](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-170003), +- one or more + [virtqueues](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-230005) + → the mechanism for data transport between the driver and device. + +Each virtqueue consists of three parts: + +- Descriptor Table, +- Available Ring, +- Used Ring. + +Before booting the virtual machine (VM), the VMM does the following set up: + +1. initialize an array of Queues using the Queue constructor. +2. register the device to the MMIO bus, so that the driver can later send + read/write requests from/to the MMIO space, some of those requests also set + up the queues’ state. +3. other pre-boot configurations, such as registering a fd for the interrupt + assigned to the device, fd which will be later used by the device to inform + the driver that it has information to communicate. + +After the boot of the VM, the driver starts sending read/write requests to +configure things like: + +* the supported features; +* queue parameters. The following setters are used for the queue set up: + * `set_size` → for setting the size of the queue. + * `set_ready` → configure the queue to the `ready for processing` state. + * `set_desc_table_address`, `set_avail_ring_address`, + `set_used_ring_address` → configure the guest address of the constituent + parts of the queue. + * `set_event_idx` → it is called as part of the features' negotiation in + the `virtio-device` crate, and is enabling or disabling the + VIRTIO_F_RING_EVENT_IDX feature. +* the device activation. As part of this activation, the device can also create + a queue handler for the device, that can be later used to process the queue. + +Once the queues are ready, the device can be used. + +The steady state operation of a virtio device follows a model where the driver +produces descriptor chains which are consumed by the device, and both parties +need to be notified when new elements have been placed on the associate ring to +avoid busy polling. The precise notification mechanism is left up to the VMM +that incorporates the devices and queues (it usually involves things like MMIO +vm exits and interrupt injection into the guest). The queue implementation is +agnostic to the notification mechanism in use, and it exposes methods and +functionality (such as iterators) that are called from the outside in response +to a notification event. + +### Data transmission using virtqueues + +The basic principle of how the queues are used by the device/driver is the +following, as showed in the diagram below as well: + +1. when the guest driver has a new request (buffer), it allocates free + descriptor(s) for the buffer in the descriptor table, chaining as necessary. +2. the driver adds a new entry with the head index of the descriptor chain + describing the request, in the available ring entries. +3. the driver increments the `idx` with the number of new entries, the diagram + shows the simple use case of only one new entry. +4. the driver sends an available buffer notification to the device if such + notifications are not suppressed. +5. the device will at some point consume that request, by first reading the + `idx` field from the available ring. This can be directly achieved with + `Queue::avail_idx`, but we do not recommend to the consumers of the crate + to use this because it is already called behind the scenes by the iterator + over all available descriptor chain heads. +6. the device gets the index of the descriptor chain(s) corresponding to the + read `idx` value. +7. the device reads the corresponding descriptor(s) from the descriptor table. +8. the device adds a new entry in the used ring by using `Queue::add_used`; the + entry is defined in the spec as `virtq_used_elem`, and in `virtio-queue` as + `VirtqUsedElem`. This structure is holding both the index of the descriptor + chain and the number of bytes that were written to the memory as part of + serving the request. +9. the device increments the `idx` from the used ring; this is done as part of + the `Queue::add_used` that was mentioned above. +10. the device sends a used buffer notification to the driver if such + notifications are not suppressed. + +![queue](https://raw.githubusercontent.com/rust-vmm/vm-virtio/main/crates/virtio-queue/docs/images/queue.png) + +A descriptor is storing four fields, with the first two, `addr` and `len`, +pointing to the data in memory to which the descriptor refers, as shown in the +diagram below. The `flags` field is useful for indicating if, for example, the +buffer is device readable or writable, or if we have another descriptor chained +after this one (VIRTQ_DESC_F_NEXT flag set). `next` field is storing the index +of the next descriptor if VIRTQ_DESC_F_NEXT is set. + +![descriptor](https://raw.githubusercontent.com/rust-vmm/vm-virtio/main/crates/virtio-queue/docs/images/descriptor.png) + +**Requirements for device implementation** + +* Abstractions from virtio-queue such as `DescriptorChain` can be used to parse + descriptors provided by the device, which represent input or output memory + areas for device I/O. A descriptor is essentially an (address, length) pair, + which is subsequently used by the device model operation. We do not check the + validity of the descriptors, and instead expect any validations to happen + when the device implementation is attempting to access the corresponding + areas. Early checks can add non-negligible additional costs, and exclusively + relying upon them may lead to time-of-check-to-time-of-use race conditions. +* The device should validate before reading/writing to a buffer that it is + device-readable/device-writable. + +## Design + +`QueueT` is a trait that allows different implementations for a `Queue` +object for single-threaded context and multi-threaded context. The +implementations provided in `virtio-queue` are: + +1. `Queue` → it is used for the single-threaded context. +2. `QueueSync` → it is used for the multi-threaded context, and is simply + a wrapper over an `Arc>`. + +Besides the above abstractions, the `virtio-queue` crate provides also the +following ones: + +* `Descriptor` → which mostly offers accessors for the members of the + `Descriptor`. +* `DescriptorChain` → provides accessors for the `DescriptorChain`’s members + and an `Iterator` implementation for iterating over the `DescriptorChain`, + there is also an abstraction for iterators over just the device readable or + just the device writable descriptors (`DescriptorChainRwIter`). +* `AvailIter` - is a consuming iterator over all available descriptor chain + heads in the queue. + +## Save/Restore Queue + +The `Queue` allows saving the state through the `state` function which returns +a `QueueState`. `Queue` objects can be created from a previously saved state by +using `QueueState::try_from`. The VMM should check for errors when restoring +a `Queue` from a previously saved state. + +### Notification suppression + +A big part of the `virtio-queue` crate consists of the notification suppression +support. As already mentioned, the driver can send an available buffer +notification to the device when there are new entries in the available ring, +and the device can send a used buffer notification to the driver when there are +new entries in the used ring. There might be cases when sending a notification +each time these scenarios happen is not efficient, for example when the driver +is processing the used ring, it would not need to receive another used buffer +notification. The mechanism for suppressing the notifications is detailed in +the following sections from the specification: +- [Used Buffer Notification Suppression](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-400007), +- [Available Buffer Notification Suppression](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-4800010). + +The `Queue` abstraction is proposing the following sequence of steps for +processing new available ring entries: + +1. the device first disables the notifications to make the driver aware it is + processing the available ring and does not want interruptions, by using + `Queue::disable_notification`. Notifications are disabled by the device + either if VIRTIO_F_EVENT_IDX is not negotiated, and VIRTQ_USED_F_NO_NOTIFY + is set in the `flags` field of the used ring, or if VIRTIO_F_EVENT_IDX is + negotiated, and `avail_event` value is not updated, i.e. it remains set to + the latest `idx` value of the available ring that was already notified by + the driver. +2. the device processes the new entries by using the `AvailIter` iterator. +3. the device can enable the notifications now, by using + `Queue::enable_notification`. Notifications are enabled by the device either + if VIRTIO_F_EVENT_IDX is not negotiated, and 0 is set in the `flags` field + of the used ring, or if VIRTIO_F_EVENT_IDX is negotiated, and `avail_event` + value is set to the smallest `idx` value of the available ring that was not + already notified by the driver. This way the device makes sure that it won’t + miss any notification. + +The above steps should be done in a loop to also handle the less likely case +where the driver added new entries just before we re-enabled notifications. + +On the driver side, the `Queue` provides the `needs_notification` method which +should be used each time the device adds a new entry to the used ring. +Depending on the `used_event` value and on the last used value +(`signalled_used`), `needs_notification` returns true to let the device know it +should send a notification to the guest. + +## Assumptions + +We assume the users of the `Queue` implementation won’t attempt to use the +queue before checking that the `ready` bit is set. This can be verified by +calling `Queue::is_valid` which, besides this, is also checking that the three +queue parts are valid memory regions. +We assume consumers will use `AvailIter::go_to_previous_position` only in +single-threaded contexts. +We assume the users will consume the entries from the available ring in the +recommended way from the documentation, i.e. device starts processing the +available ring entries, disables the notifications, processes the entries, +and then re-enables notifications. + +## License + +This project is licensed under either of + +- [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0 +- [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause) diff --git a/benches/main.rs b/benches/main.rs new file mode 100644 index 0000000..18266c4 --- /dev/null +++ b/benches/main.rs @@ -0,0 +1,21 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +extern crate criterion; + +mod queue; + +use criterion::{criterion_group, criterion_main, Criterion}; + +use queue::benchmark_queue; + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(200).measurement_time(std::time::Duration::from_secs(20)); + targets = benchmark_queue +} + +criterion_main! { + benches, +} diff --git a/benches/queue/mod.rs b/benches/queue/mod.rs new file mode 100644 index 0000000..2d24d49 --- /dev/null +++ b/benches/queue/mod.rs @@ -0,0 +1,85 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +use criterion::{black_box, BatchSize, Criterion}; +use virtio_queue::{Queue, QueueOwnedT, QueueT}; +use vm_memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; + +use virtio_queue::mock::MockSplitQueue; + +pub fn benchmark_queue(c: &mut Criterion) { + fn walk_queue(q: &mut Queue, mem: &M) -> (usize, usize) { + let mut num_chains = 0; + let mut num_descriptors = 0; + + q.iter(mem).unwrap().for_each(|chain| { + num_chains += 1; + chain.for_each(|_| num_descriptors += 1); + }); + + (num_chains, num_descriptors) + } + + fn bench_queue(c: &mut Criterion, bench_name: &str, setup: S, mut routine: R) + where + S: FnMut() -> Queue + Clone, + R: FnMut(Queue), + { + c.bench_function(bench_name, move |b| { + b.iter_batched( + setup.clone(), + |q| routine(black_box(q)), + BatchSize::SmallInput, + ) + }); + } + + let mem = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x0), 0x1_0000_0000)]).unwrap(); + + let queue_with_chains = |num_chains, len, indirect| { + let mut mq = MockSplitQueue::new(&mem, 256); + for _ in 0..num_chains { + if indirect { + mq.add_indirect_chain(len).unwrap(); + } else { + mq.add_chain(len).unwrap(); + } + } + mq.create_queue().unwrap() + }; + + let empty_queue = || { + let mq = MockSplitQueue::new(&mem, 256); + mq.create_queue().unwrap() + }; + + for indirect in [false, true].iter().copied() { + bench_queue( + c, + &format!("single chain (indirect={})", indirect), + || queue_with_chains(1, 128, indirect), + |mut q| { + let (num_chains, num_descriptors) = walk_queue(&mut q, &mem); + assert_eq!(num_chains, 1); + assert_eq!(num_descriptors, 128); + }, + ); + + bench_queue( + c, + &format!("multiple chains (indirect={})", indirect), + || queue_with_chains(128, 1, indirect), + |mut q| { + let (num_chains, num_descriptors) = walk_queue(&mut q, &mem); + assert_eq!(num_chains, 128); + assert_eq!(num_descriptors, 128); + }, + ); + } + + bench_queue(c, "add used", empty_queue, |mut q| { + for _ in 0..128 { + q.add_used(&mem, 123, 0x1000).unwrap(); + } + }); +} diff --git a/cargo2android.json b/cargo2android.json new file mode 100644 index 0000000..3c5fa7e --- /dev/null +++ b/cargo2android.json @@ -0,0 +1,6 @@ +{ + "run": true, + "dep-suffixes": { + "vm_memory": "_android" + } + } \ No newline at end of file diff --git a/docs/TESTING.md b/docs/TESTING.md new file mode 100644 index 0000000..6bcbb48 --- /dev/null +++ b/docs/TESTING.md @@ -0,0 +1,29 @@ +# Testing + +The `virtio-queue` crate is tested using: +- unit tests - defined in their corresponding modules, +- performance tests - defined in the [benches](../benches) directory. For now, + the benchmarks are not run as part of the CI, but they can be run locally. + +The crate provides a mocking framework for the driver side of a virtio queue, +in the [mock](../src/mock.rs) module. +This module is compiled only when using the `test-utils` feature. To run all +the unit tests (which include the documentation examples), and the performance +tests in this crate, you need to specify the `test-utils` feature, otherwise +the build fails. + +```bash +cargo test --features test-utils +cargo bench --features test-utils +cargo test --doc --features test-utils +``` + +The mocking framework and the helpers it provides can be used in other crates +as well in order to test, for example, a specific device implementation. To be +able to use these test utilities, add the following to your `Cargo.toml` in the +`[dev-dependencies]` section: + +```toml +[dev-dependencies] +virtio-queue = { version = "0.1.0", features = ["test-utils"] } +``` diff --git a/docs/images/descriptor.png b/docs/images/descriptor.png new file mode 100644 index 0000000..5056751 Binary files /dev/null and b/docs/images/descriptor.png differ diff --git a/docs/images/queue.png b/docs/images/queue.png new file mode 100644 index 0000000..c3eaaf5 Binary files /dev/null and b/docs/images/queue.png differ diff --git a/src/chain.rs b/src/chain.rs new file mode 100644 index 0000000..45e0f17 --- /dev/null +++ b/src/chain.rs @@ -0,0 +1,504 @@ +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::fmt::{self, Debug}; +use std::mem::size_of; +use std::ops::Deref; + +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use crate::{Descriptor, Error}; +use virtio_bindings::bindings::virtio_ring::VRING_DESC_ALIGN_SIZE; + +/// A virtio descriptor chain. +#[derive(Clone, Debug)] +pub struct DescriptorChain { + mem: M, + desc_table: GuestAddress, + queue_size: u16, + head_index: u16, + next_index: u16, + ttl: u16, + yielded_bytes: u32, + is_indirect: bool, +} + +impl DescriptorChain +where + M: Deref, + M::Target: GuestMemory, +{ + fn with_ttl( + mem: M, + desc_table: GuestAddress, + queue_size: u16, + ttl: u16, + head_index: u16, + ) -> Self { + DescriptorChain { + mem, + desc_table, + queue_size, + head_index, + next_index: head_index, + ttl, + is_indirect: false, + yielded_bytes: 0, + } + } + + /// Create a new `DescriptorChain` instance. + /// + /// # Arguments + /// * `mem` - the `GuestMemory` object that can be used to access the buffers pointed to by the + /// descriptor chain. + /// * `desc_table` - the address of the descriptor table. + /// * `queue_size` - the size of the queue, which is also the maximum size of a descriptor + /// chain. + /// * `head_index` - the descriptor index of the chain head. + pub(crate) fn new(mem: M, desc_table: GuestAddress, queue_size: u16, head_index: u16) -> Self { + Self::with_ttl(mem, desc_table, queue_size, queue_size, head_index) + } + + /// Get the descriptor index of the chain head. + pub fn head_index(&self) -> u16 { + self.head_index + } + + /// Return a `GuestMemory` object that can be used to access the buffers pointed to by the + /// descriptor chain. + pub fn memory(&self) -> &M::Target { + self.mem.deref() + } + + /// Return an iterator that only yields the readable descriptors in the chain. + pub fn readable(self) -> DescriptorChainRwIter { + DescriptorChainRwIter { + chain: self, + writable: false, + } + } + + /// Return an iterator that only yields the writable descriptors in the chain. + pub fn writable(self) -> DescriptorChainRwIter { + DescriptorChainRwIter { + chain: self, + writable: true, + } + } + + // Alters the internal state of the `DescriptorChain` to switch iterating over an + // indirect descriptor table defined by `desc`. + fn switch_to_indirect_table(&mut self, desc: Descriptor) -> Result<(), Error> { + // Check the VIRTQ_DESC_F_INDIRECT flag (i.e., is_indirect) is not set inside + // an indirect descriptor. + // (see VIRTIO Spec, Section 2.6.5.3.1 Driver Requirements: Indirect Descriptors) + if self.is_indirect { + return Err(Error::InvalidIndirectDescriptor); + } + + // Alignment requirements for vring elements start from virtio 1.0, + // but this is not necessary for address of indirect descriptor. + if desc.len() & (VRING_DESC_ALIGN_SIZE - 1) != 0 { + return Err(Error::InvalidIndirectDescriptorTable); + } + + // It is safe to do a plain division since we checked above that desc.len() is a multiple of + // VRING_DESC_ALIGN_SIZE, and VRING_DESC_ALIGN_SIZE is != 0. + let table_len = desc.len() / VRING_DESC_ALIGN_SIZE; + if table_len > u32::from(u16::MAX) { + return Err(Error::InvalidIndirectDescriptorTable); + } + + self.desc_table = desc.addr(); + // try_from cannot fail as we've checked table_len above + self.queue_size = u16::try_from(table_len).expect("invalid table_len"); + self.next_index = 0; + self.ttl = self.queue_size; + self.is_indirect = true; + + Ok(()) + } +} + +impl Iterator for DescriptorChain +where + M: Deref, + M::Target: GuestMemory, +{ + type Item = Descriptor; + + /// Return the next descriptor in this descriptor chain, if there is one. + /// + /// Note that this is distinct from the next descriptor chain returned by + /// [`AvailIter`](struct.AvailIter.html), which is the head of the next + /// _available_ descriptor chain. + fn next(&mut self) -> Option { + if self.ttl == 0 || self.next_index >= self.queue_size { + return None; + } + + let desc_addr = self + .desc_table + // The multiplication can not overflow an u64 since we are multiplying an u16 with a + // small number. + .checked_add(self.next_index as u64 * size_of::() as u64)?; + + // The guest device driver should not touch the descriptor once submitted, so it's safe + // to use read_obj() here. + let desc = self.mem.read_obj::(desc_addr).ok()?; + + if desc.refers_to_indirect_table() { + self.switch_to_indirect_table(desc).ok()?; + return self.next(); + } + + // constructing a chain that is longer than 2^32 bytes is illegal, + // let's terminate the iteration if something violated this. + // (VIRTIO v1.2, 2.7.5.2: "Drivers MUST NOT add a descriptor chain + // longer than 2^32 bytes in total;") + match self.yielded_bytes.checked_add(desc.len()) { + Some(yielded_bytes) => self.yielded_bytes = yielded_bytes, + None => return None, + }; + + if desc.has_next() { + self.next_index = desc.next(); + // It's ok to decrement `self.ttl` here because we check at the start of the method + // that it's greater than 0. + self.ttl -= 1; + } else { + self.ttl = 0; + } + + Some(desc) + } +} + +/// An iterator for readable or writable descriptors. +#[derive(Clone)] +pub struct DescriptorChainRwIter { + chain: DescriptorChain, + writable: bool, +} + +impl Iterator for DescriptorChainRwIter +where + M: Deref, + M::Target: GuestMemory, +{ + type Item = Descriptor; + + /// Return the next readable/writeable descriptor (depending on the `writable` value) in this + /// descriptor chain, if there is one. + /// + /// Note that this is distinct from the next descriptor chain returned by + /// [`AvailIter`](struct.AvailIter.html), which is the head of the next + /// _available_ descriptor chain. + fn next(&mut self) -> Option { + loop { + match self.chain.next() { + Some(v) => { + if v.is_write_only() == self.writable { + return Some(v); + } + } + None => return None, + } + } + } +} + +// We can't derive Debug, because rustc doesn't generate the `M::T: Debug` constraint +impl Debug for DescriptorChainRwIter +where + M: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("DescriptorChainRwIter") + .field("chain", &self.chain) + .field("writable", &self.writable) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mock::{DescriptorTable, MockSplitQueue}; + use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT}; + use vm_memory::GuestMemoryMmap; + + #[test] + fn test_checked_new_descriptor_chain() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + assert!(vq.end().0 < 0x1000); + + // index >= queue_size + assert!( + DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 16) + .next() + .is_none() + ); + + // desc_table address is way off + assert!( + DescriptorChain::<&GuestMemoryMmap>::new(m, GuestAddress(0x00ff_ffff_ffff), 16, 0) + .next() + .is_none() + ); + + { + // the first desc has a normal len, and the next_descriptor flag is set + // but the the index of the next descriptor is too large + let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_NEXT as u16, 16); + vq.desc_table().store(0, desc).unwrap(); + + let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 0); + c.next().unwrap(); + assert!(c.next().is_none()); + } + + // finally, let's test an ok chain + { + let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_NEXT as u16, 1); + vq.desc_table().store(0, desc).unwrap(); + + let desc = Descriptor::new(0x2000, 0x1000, 0, 0); + vq.desc_table().store(1, desc).unwrap(); + + let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 0); + + assert_eq!( + c.memory() as *const GuestMemoryMmap, + m as *const GuestMemoryMmap + ); + + assert_eq!(c.desc_table, vq.start()); + assert_eq!(c.queue_size, 16); + assert_eq!(c.ttl, c.queue_size); + + let desc = c.next().unwrap(); + assert_eq!(desc.addr(), GuestAddress(0x1000)); + assert_eq!(desc.len(), 0x1000); + assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16); + assert_eq!(desc.next(), 1); + assert_eq!(c.ttl, c.queue_size - 1); + + assert!(c.next().is_some()); + // The descriptor above was the last from the chain, so `ttl` should be 0 now. + assert_eq!(c.ttl, 0); + assert!(c.next().is_none()); + assert_eq!(c.ttl, 0); + } + } + + #[test] + fn test_ttl_wrap_around() { + const QUEUE_SIZE: u16 = 16; + + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x100000)]).unwrap(); + let vq = MockSplitQueue::new(m, QUEUE_SIZE); + + // Populate the entire descriptor table with entries. Only the last one should not have the + // VIRTQ_DESC_F_NEXT set. + for i in 0..QUEUE_SIZE - 1 { + let desc = Descriptor::new( + 0x1000 * (i + 1) as u64, + 0x1000, + VRING_DESC_F_NEXT as u16, + i + 1, + ); + vq.desc_table().store(i, desc).unwrap(); + } + let desc = Descriptor::new((0x1000 * 16) as u64, 0x1000, 0, 0); + vq.desc_table().store(QUEUE_SIZE - 1, desc).unwrap(); + + let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), QUEUE_SIZE, 0); + assert_eq!(c.ttl, c.queue_size); + + // Validate that `ttl` wraps around even when the entire descriptor table is populated. + for i in 0..QUEUE_SIZE { + let _desc = c.next().unwrap(); + assert_eq!(c.ttl, c.queue_size - i - 1); + } + assert!(c.next().is_none()); + } + + #[test] + fn test_new_from_indirect_descriptor() { + // This is testing that chaining an indirect table works as expected. It is also a negative + // test for the following requirement from the spec: + // `A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT in flags.`. In + // case the driver is setting both of these flags, we check that the device doesn't panic. + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let dtable = vq.desc_table(); + + // Create a chain with one normal descriptor and one pointing to an indirect table. + let desc = Descriptor::new(0x6000, 0x1000, VRING_DESC_F_NEXT as u16, 1); + dtable.store(0, desc).unwrap(); + // The spec forbids setting both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT in flags. We do + // not currently enforce this rule, we just ignore the VIRTQ_DESC_F_NEXT flag. + let desc = Descriptor::new( + 0x7000, + 0x1000, + (VRING_DESC_F_INDIRECT | VRING_DESC_F_NEXT) as u16, + 2, + ); + dtable.store(1, desc).unwrap(); + let desc = Descriptor::new(0x8000, 0x1000, 0, 0); + dtable.store(2, desc).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = DescriptorChain::new(m, vq.start(), 16, 0); + + // create an indirect table with 4 chained descriptors + let idtable = DescriptorTable::new(m, GuestAddress(0x7000), 4); + for i in 0..4u16 { + let desc: Descriptor = if i < 3 { + Descriptor::new(0x1000 * i as u64, 0x1000, VRING_DESC_F_NEXT as u16, i + 1) + } else { + Descriptor::new(0x1000 * i as u64, 0x1000, 0, 0) + }; + idtable.store(i, desc).unwrap(); + } + + assert_eq!(c.head_index(), 0); + // Consume the first descriptor. + c.next().unwrap(); + + // The chain logic hasn't parsed the indirect descriptor yet. + assert!(!c.is_indirect); + + // Try to iterate through the indirect descriptor chain. + for i in 0..4 { + let desc = c.next().unwrap(); + assert!(c.is_indirect); + if i < 3 { + assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16); + assert_eq!(desc.next(), i + 1); + } + } + // Even though we added a new descriptor after the one that is pointing to the indirect + // table, this descriptor won't be available when parsing the chain. + assert!(c.next().is_none()); + } + + #[test] + fn test_indirect_descriptor_address_noaligned() { + // Alignment requirements for vring elements start from virtio 1.0, + // but this is not necessary for address of indirect descriptor. + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let dtable = vq.desc_table(); + + // Create a chain with a descriptor pointing to an indirect table with unaligned address. + let desc = Descriptor::new( + 0x7001, + 0x1000, + (VRING_DESC_F_INDIRECT | VRING_DESC_F_NEXT) as u16, + 2, + ); + dtable.store(0, desc).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = DescriptorChain::new(m, vq.start(), 16, 0); + + // Create an indirect table with 4 chained descriptors. + let idtable = DescriptorTable::new(m, GuestAddress(0x7001), 4); + for i in 0..4u16 { + let desc: Descriptor = if i < 3 { + Descriptor::new(0x1000 * i as u64, 0x1000, VRING_DESC_F_NEXT as u16, i + 1) + } else { + Descriptor::new(0x1000 * i as u64, 0x1000, 0, 0) + }; + idtable.store(i, desc).unwrap(); + } + + // Try to iterate through the indirect descriptor chain. + for i in 0..4 { + let desc = c.next().unwrap(); + assert!(c.is_indirect); + if i < 3 { + assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16); + assert_eq!(desc.next(), i + 1); + } + } + } + + #[test] + fn test_indirect_descriptor_err() { + // We are testing here different misconfigurations of the indirect table. For these error + // case scenarios, the iterator over the descriptor chain won't return a new descriptor. + { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an invalid indirect table: len not a + // multiple of descriptor size. + let desc = Descriptor::new(0x1000, 0x1001, VRING_DESC_F_INDIRECT as u16, 0); + vq.desc_table().store(0, desc).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0); + + assert!(c.next().is_none()); + } + + { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an invalid indirect table: table len > + // u16::MAX. + let desc = Descriptor::new( + 0x1000, + (u16::MAX as u32 + 1) * VRING_DESC_ALIGN_SIZE, + VRING_DESC_F_INDIRECT as u16, + 0, + ); + vq.desc_table().store(0, desc).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0); + + assert!(c.next().is_none()); + } + + { + let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + // Create a chain with a descriptor pointing to an indirect table. + let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_INDIRECT as u16, 0); + vq.desc_table().store(0, desc).unwrap(); + // It's ok for an indirect descriptor to have flags = 0. + let desc = Descriptor::new(0x3000, 0x1000, 0, 0); + m.write_obj(desc, GuestAddress(0x1000)).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0); + assert!(c.next().is_some()); + + // But it's not allowed to have an indirect descriptor that points to another indirect + // table. + let desc = Descriptor::new(0x3000, 0x1000, VRING_DESC_F_INDIRECT as u16, 0); + m.write_obj(desc, GuestAddress(0x1000)).unwrap(); + + let mut c: DescriptorChain<&GuestMemoryMmap> = + DescriptorChain::new(m, vq.start(), 16, 0); + + assert!(c.next().is_none()); + } + } +} diff --git a/src/defs.rs b/src/defs.rs new file mode 100644 index 0000000..3ef2b37 --- /dev/null +++ b/src/defs.rs @@ -0,0 +1,38 @@ +// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Virtio queue related constant definitions + +/// Size of used ring header: flags (u16) + idx (u16) +pub(crate) const VIRTQ_USED_RING_HEADER_SIZE: u64 = 4; + +/// Size of the used ring metadata: header + avail_event (le16). +/// +/// The total size of the used ring is: +/// VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size. +pub(crate) const VIRTQ_USED_RING_META_SIZE: u64 = VIRTQ_USED_RING_HEADER_SIZE + 2; + +/// Size of one element in the used ring, id (le32) + len (le32). +pub(crate) const VIRTQ_USED_ELEMENT_SIZE: u64 = 8; + +/// Size of available ring header: flags(u16) + idx(u16) +pub(crate) const VIRTQ_AVAIL_RING_HEADER_SIZE: u64 = 4; + +/// Size of the available ring metadata: header + used_event (le16). +/// +/// The total size of the available ring is: +/// VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size. +pub(crate) const VIRTQ_AVAIL_RING_META_SIZE: u64 = VIRTQ_AVAIL_RING_HEADER_SIZE + 2; + +/// Size of one element in the available ring (le16). +pub(crate) const VIRTQ_AVAIL_ELEMENT_SIZE: u64 = 2; + +/// Default guest physical address for descriptor table. +pub(crate) const DEFAULT_DESC_TABLE_ADDR: u64 = 0x0; + +/// Default guest physical address for available ring. +pub(crate) const DEFAULT_AVAIL_RING_ADDR: u64 = 0x0; + +/// Default guest physical address for used ring. +pub(crate) const DEFAULT_USED_RING_ADDR: u64 = 0x0; diff --git a/src/descriptor.rs b/src/descriptor.rs new file mode 100644 index 0000000..7f1564b --- /dev/null +++ b/src/descriptor.rs @@ -0,0 +1,276 @@ +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use vm_memory::{ByteValued, GuestAddress, Le16, Le32, Le64}; + +use virtio_bindings::bindings::virtio_ring::{ + VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT, VRING_DESC_F_WRITE, +}; + +/// A virtio descriptor constraints with C representation. +/// +/// # Example +/// +/// ```rust +/// # use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_NEXT, VRING_DESC_F_WRITE}; +/// # use virtio_queue::mock::MockSplitQueue; +/// use virtio_queue::{Descriptor, Queue, QueueOwnedT}; +/// use vm_memory::{GuestAddress, GuestMemoryMmap}; +/// +/// # fn populate_queue(m: &GuestMemoryMmap) -> Queue { +/// # let vq = MockSplitQueue::new(m, 16); +/// # let mut q = vq.create_queue().unwrap(); +/// # +/// # // We have only one chain: (0, 1). +/// # let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_NEXT as u16, 1); +/// # vq.desc_table().store(0, desc); +/// # let desc = Descriptor::new(0x2000, 0x1000, VRING_DESC_F_WRITE as u16, 0); +/// # vq.desc_table().store(1, desc); +/// # +/// # vq.avail().ring().ref_at(0).unwrap().store(u16::to_le(0)); +/// # vq.avail().idx().store(u16::to_le(1)); +/// # q +/// # } +/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// // Populate the queue with descriptor chains and update the available ring accordingly. +/// let mut queue = populate_queue(m); +/// let mut i = queue.iter(m).unwrap(); +/// let mut c = i.next().unwrap(); +/// +/// // Get the first descriptor and access its fields. +/// let desc = c.next().unwrap(); +/// let _addr = desc.addr(); +/// let _len = desc.len(); +/// let _flags = desc.flags(); +/// let _next = desc.next(); +/// let _is_write_only = desc.is_write_only(); +/// let _has_next = desc.has_next(); +/// let _refers_to_ind_table = desc.refers_to_indirect_table(); +/// ``` +// Note that the `ByteValued` implementation of this structure expects the `Descriptor` to store +// only plain old data types. +#[repr(C)] +#[derive(Default, Clone, Copy, Debug)] +pub struct Descriptor { + /// Guest physical address of device specific data. + addr: Le64, + + /// Length of device specific data. + len: Le32, + + /// Includes next, write, and indirect bits. + flags: Le16, + + /// Index into the descriptor table of the next descriptor if flags has the `next` bit set. + next: Le16, +} + +#[allow(clippy::len_without_is_empty)] +impl Descriptor { + /// Return the guest physical address of the descriptor buffer. + pub fn addr(&self) -> GuestAddress { + GuestAddress(self.addr.into()) + } + + /// Return the length of the descriptor buffer. + pub fn len(&self) -> u32 { + self.len.into() + } + + /// Return the flags for this descriptor, including next, write and indirect bits. + pub fn flags(&self) -> u16 { + self.flags.into() + } + + /// Return the value stored in the `next` field of the descriptor. + pub fn next(&self) -> u16 { + self.next.into() + } + + /// Check whether this descriptor refers to a buffer containing an indirect descriptor table. + pub fn refers_to_indirect_table(&self) -> bool { + self.flags() & VRING_DESC_F_INDIRECT as u16 != 0 + } + + /// Check whether the `VIRTQ_DESC_F_NEXT` is set for the descriptor. + pub fn has_next(&self) -> bool { + self.flags() & VRING_DESC_F_NEXT as u16 != 0 + } + + /// Check if the driver designated this as a write only descriptor. + /// + /// If this is false, this descriptor is read only. + /// Write only means the the emulated device can write and the driver can read. + pub fn is_write_only(&self) -> bool { + self.flags() & VRING_DESC_F_WRITE as u16 != 0 + } +} + +#[cfg(any(test, feature = "test-utils"))] +impl Descriptor { + /// Create a new descriptor. + /// + /// # Arguments + /// * `addr` - the guest physical address of the descriptor buffer. + /// * `len` - the length of the descriptor buffer. + /// * `flags` - the `flags` for the descriptor. + /// * `next` - the `next` field of the descriptor. + pub fn new(addr: u64, len: u32, flags: u16, next: u16) -> Self { + Descriptor { + addr: addr.into(), + len: len.into(), + flags: flags.into(), + next: next.into(), + } + } + + /// Set the guest physical address of the descriptor buffer. + pub fn set_addr(&mut self, addr: u64) { + self.addr = addr.into(); + } + + /// Set the length of the descriptor buffer. + pub fn set_len(&mut self, len: u32) { + self.len = len.into(); + } + + /// Set the flags for this descriptor. + pub fn set_flags(&mut self, flags: u16) { + self.flags = flags.into(); + } + + /// Set the value stored in the `next` field of the descriptor. + pub fn set_next(&mut self, next: u16) { + self.next = next.into(); + } +} + +// SAFETY: This is safe because `Descriptor` contains only wrappers over POD types and +// all accesses through safe `vm-memory` API will validate any garbage that could be +// included in there. +unsafe impl ByteValued for Descriptor {} + +/// Represents the contents of an element from the used virtqueue ring. +// Note that the `ByteValued` implementation of this structure expects the `VirtqUsedElem` to store +// only plain old data types. +#[repr(C)] +#[derive(Clone, Copy, Default, Debug)] +pub struct VirtqUsedElem { + id: Le32, + len: Le32, +} + +impl VirtqUsedElem { + /// Create a new `VirtqUsedElem` instance. + /// + /// # Arguments + /// * `id` - the index of the used descriptor chain. + /// * `len` - the total length of the descriptor chain which was used (written to). + pub(crate) fn new(id: u32, len: u32) -> Self { + VirtqUsedElem { + id: id.into(), + len: len.into(), + } + } +} + +#[cfg(any(test, feature = "test-utils"))] +#[allow(clippy::len_without_is_empty)] +impl VirtqUsedElem { + /// Get the index of the used descriptor chain. + pub fn id(&self) -> u32 { + self.id.into() + } + + /// Get `length` field of the used ring entry. + pub fn len(&self) -> u32 { + self.len.into() + } +} + +// SAFETY: This is safe because `VirtqUsedElem` contains only wrappers over POD types +// and all accesses through safe `vm-memory` API will validate any garbage that could be +// included in there. +unsafe impl ByteValued for VirtqUsedElem {} + +#[cfg(test)] +mod tests { + use super::*; + use memoffset::offset_of; + use std::mem::{align_of, size_of}; + + #[test] + fn test_descriptor_offset() { + assert_eq!(size_of::(), 16); + assert_eq!(offset_of!(Descriptor, addr), 0); + assert_eq!(offset_of!(Descriptor, len), 8); + assert_eq!(offset_of!(Descriptor, flags), 12); + assert_eq!(offset_of!(Descriptor, next), 14); + assert!(align_of::() <= 16); + } + + #[test] + fn test_descriptor_getter_setter() { + let mut desc = Descriptor::new(0, 0, 0, 0); + + desc.set_addr(0x1000); + assert_eq!(desc.addr(), GuestAddress(0x1000)); + desc.set_len(0x2000); + assert_eq!(desc.len(), 0x2000); + desc.set_flags(VRING_DESC_F_NEXT as u16); + assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16); + assert!(desc.has_next()); + assert!(!desc.is_write_only()); + assert!(!desc.refers_to_indirect_table()); + desc.set_flags(VRING_DESC_F_WRITE as u16); + assert_eq!(desc.flags(), VRING_DESC_F_WRITE as u16); + assert!(!desc.has_next()); + assert!(desc.is_write_only()); + assert!(!desc.refers_to_indirect_table()); + desc.set_flags(VRING_DESC_F_INDIRECT as u16); + assert_eq!(desc.flags(), VRING_DESC_F_INDIRECT as u16); + assert!(!desc.has_next()); + assert!(!desc.is_write_only()); + assert!(desc.refers_to_indirect_table()); + desc.set_next(3); + assert_eq!(desc.next(), 3); + } + + #[test] + fn test_descriptor_copy() { + let e1 = Descriptor::new(1, 2, VRING_DESC_F_NEXT as u16, 3); + let mut e2 = Descriptor::default(); + + e2.as_mut_slice().copy_from_slice(e1.as_slice()); + assert_eq!(e1.addr(), e2.addr()); + assert_eq!(e1.len(), e2.len()); + assert_eq!(e1.flags(), e2.flags()); + assert_eq!(e1.next(), e2.next()); + } + + #[test] + fn test_used_elem_offset() { + assert_eq!(offset_of!(VirtqUsedElem, id), 0); + assert_eq!(offset_of!(VirtqUsedElem, len), 4); + assert_eq!(size_of::(), 8); + } + + #[test] + fn test_used_elem_copy() { + let e1 = VirtqUsedElem::new(3, 15); + let mut e2 = VirtqUsedElem::new(0, 0); + + e2.as_mut_slice().copy_from_slice(e1.as_slice()); + assert_eq!(e1.id, e2.id); + assert_eq!(e1.len, e2.len); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..0e27935 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,267 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// Copyright © 2019 Intel Corporation +// +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +//! Virtio queue API for backend device drivers to access virtio queues. + +#![deny(missing_docs)] + +use std::fmt::{self, Debug, Display}; +use std::num::Wrapping; +use std::ops::{Deref, DerefMut}; +use std::sync::atomic::Ordering; + +use log::error; +use vm_memory::{GuestMemory, GuestMemoryError}; + +pub use self::chain::{DescriptorChain, DescriptorChainRwIter}; +pub use self::descriptor::{Descriptor, VirtqUsedElem}; +pub use self::queue::{AvailIter, Queue}; +pub use self::queue_sync::QueueSync; +pub use self::state::QueueState; + +pub mod defs; +#[cfg(any(test, feature = "test-utils"))] +pub mod mock; + +mod chain; +mod descriptor; +mod queue; +mod queue_sync; +mod state; + +/// Virtio Queue related errors. +#[derive(Debug)] +pub enum Error { + /// Address overflow. + AddressOverflow, + /// Failed to access guest memory. + GuestMemory(GuestMemoryError), + /// Invalid indirect descriptor. + InvalidIndirectDescriptor, + /// Invalid indirect descriptor table. + InvalidIndirectDescriptorTable, + /// Invalid descriptor chain. + InvalidChain, + /// Invalid descriptor index. + InvalidDescriptorIndex, + /// Invalid max_size. + InvalidMaxSize, + /// Invalid Queue Size. + InvalidSize, + /// Invalid alignment of descriptor table address. + InvalidDescTableAlign, + /// Invalid alignment of available ring address. + InvalidAvailRingAlign, + /// Invalid alignment of used ring address. + InvalidUsedRingAlign, + /// Invalid available ring index. + InvalidAvailRingIndex, + /// The queue is not ready for operation. + QueueNotReady, +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::Error::*; + + match self { + AddressOverflow => write!(f, "address overflow"), + GuestMemory(_) => write!(f, "error accessing guest memory"), + InvalidChain => write!(f, "invalid descriptor chain"), + InvalidIndirectDescriptor => write!(f, "invalid indirect descriptor"), + InvalidIndirectDescriptorTable => write!(f, "invalid indirect descriptor table"), + InvalidDescriptorIndex => write!(f, "invalid descriptor index"), + InvalidMaxSize => write!(f, "invalid queue maximum size"), + InvalidSize => write!(f, "invalid queue size"), + InvalidDescTableAlign => write!( + f, + "virtio queue descriptor table breaks alignment constraints" + ), + InvalidAvailRingAlign => write!( + f, + "virtio queue available ring breaks alignment constraints" + ), + InvalidUsedRingAlign => { + write!(f, "virtio queue used ring breaks alignment constraints") + } + InvalidAvailRingIndex => write!( + f, + "invalid available ring index (more descriptors to process than queue size)" + ), + QueueNotReady => write!(f, "trying to process requests on a queue that's not ready"), + } + } +} + +impl std::error::Error for Error {} + +/// Trait for objects returned by `QueueT::lock()`. +pub trait QueueGuard<'a> { + /// Type for guard returned by `Self::lock()`. + type G: DerefMut; +} + +/// Trait to access and manipulate a virtio queue. +/// +/// To optimize for performance, different implementations of the `QueueT` trait may be +/// provided for single-threaded context and multi-threaded context. +/// +/// Using Higher-Rank Trait Bounds (HRTBs) to effectively define an associated type that has a +/// lifetime parameter, without tagging the `QueueT` trait with a lifetime as well. +pub trait QueueT: for<'a> QueueGuard<'a> { + /// Construct an empty virtio queue state object with the given `max_size`. + /// + /// Returns an error if `max_size` is invalid. + fn new(max_size: u16) -> Result + where + Self: Sized; + + /// Check whether the queue configuration is valid. + fn is_valid(&self, mem: &M) -> bool; + + /// Reset the queue to the initial state. + fn reset(&mut self); + + /// Get an exclusive reference to the underlying `Queue` object. + /// + /// Logically this method will acquire the underlying lock protecting the `Queue` Object. + /// The lock will be released when the returned object gets dropped. + fn lock(&mut self) -> ::G; + + /// Get the maximum size of the virtio queue. + fn max_size(&self) -> u16; + + /// Get the actual size configured by the guest. + fn size(&self) -> u16; + + /// Configure the queue size for the virtio queue. + fn set_size(&mut self, size: u16); + + /// Check whether the queue is ready to be processed. + fn ready(&self) -> bool; + + /// Configure the queue to `ready for processing` state. + fn set_ready(&mut self, ready: bool); + + /// Set the descriptor table address for the queue. + /// + /// The descriptor table address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + fn set_desc_table_address(&mut self, low: Option, high: Option); + + /// Set the available ring address for the queue. + /// + /// The available ring address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + fn set_avail_ring_address(&mut self, low: Option, high: Option); + + /// Set the used ring address for the queue. + /// + /// The used ring address is 64-bit, the corresponding part will be updated if 'low' + /// and/or `high` is `Some` and valid. + fn set_used_ring_address(&mut self, low: Option, high: Option); + + /// Enable/disable the VIRTIO_F_RING_EVENT_IDX feature for interrupt coalescing. + fn set_event_idx(&mut self, enabled: bool); + + /// Read the `idx` field from the available ring. + /// + /// # Panics + /// + /// Panics if order is Release or AcqRel. + fn avail_idx(&self, mem: &M, order: Ordering) -> Result, Error> + where + M: GuestMemory + ?Sized; + + /// Read the `idx` field from the used ring. + /// + /// # Panics + /// + /// Panics if order is Release or AcqRel. + fn used_idx(&self, mem: &M, order: Ordering) -> Result, Error>; + + /// Put a used descriptor head into the used ring. + fn add_used(&mut self, mem: &M, head_index: u16, len: u32) + -> Result<(), Error>; + + /// Enable notification events from the guest driver. + /// + /// Return true if one or more descriptors can be consumed from the available ring after + /// notifications were enabled (and thus it's possible there will be no corresponding + /// notification). + fn enable_notification(&mut self, mem: &M) -> Result; + + /// Disable notification events from the guest driver. + fn disable_notification(&mut self, mem: &M) -> Result<(), Error>; + + /// Check whether a notification to the guest is needed. + /// + /// Please note this method has side effects: once it returns `true`, it considers the + /// driver will actually be notified, remember the associated index in the used ring, and + /// won't return `true` again until the driver updates `used_event` and/or the notification + /// conditions hold once more. + fn needs_notification(&mut self, mem: &M) -> Result; + + /// Return the index of the next entry in the available ring. + fn next_avail(&self) -> u16; + + /// Set the index of the next entry in the available ring. + fn set_next_avail(&mut self, next_avail: u16); + + /// Return the index for the next descriptor in the used ring. + fn next_used(&self) -> u16; + + /// Set the index for the next descriptor in the used ring. + fn set_next_used(&mut self, next_used: u16); + + /// Return the address of the descriptor table. + fn desc_table(&self) -> u64; + + /// Return the address of the available ring. + fn avail_ring(&self) -> u64; + + /// Return the address of the used ring. + fn used_ring(&self) -> u64; + + /// Checks whether `VIRTIO_F_RING_EVENT_IDX` is negotiated. + /// + /// This getter is only returning the correct value after the device passes the `FEATURES_OK` + /// status. + fn event_idx_enabled(&self) -> bool; + + /// Pop and return the next available descriptor chain, or `None` when there are no more + /// descriptor chains available. + /// + /// This enables the consumption of available descriptor chains in a "one at a time" + /// manner, without having to hold a borrow after the method returns. + fn pop_descriptor_chain(&mut self, mem: M) -> Option> + where + M: Clone + Deref, + M::Target: GuestMemory; +} + +/// Trait to access and manipulate a Virtio queue that's known to be exclusively accessed +/// by a single execution thread. +pub trait QueueOwnedT: QueueT { + /// Get a consuming iterator over all available descriptor chain heads offered by the driver. + /// + /// # Arguments + /// * `mem` - the `GuestMemory` object that can be used to access the queue buffers. + fn iter(&mut self, mem: M) -> Result, Error> + where + M: Deref, + M::Target: GuestMemory; + + /// Undo the last advancement of the next available index field by decrementing its + /// value by one. + fn go_to_previous_position(&mut self); +} diff --git a/src/mock.rs b/src/mock.rs new file mode 100644 index 0000000..d026f80 --- /dev/null +++ b/src/mock.rs @@ -0,0 +1,500 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +//! Utilities used by unit tests and benchmarks for mocking the driver side +//! of the virtio protocol. + +use std::marker::PhantomData; +use std::mem::size_of; + +use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestUsize}; + +use crate::defs::{VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE}; +use crate::{Descriptor, DescriptorChain, Error, Queue, QueueOwnedT, QueueT, VirtqUsedElem}; +use std::fmt::{self, Debug, Display}; +use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT}; + +/// Mock related errors. +#[derive(Debug)] +pub enum MockError { + /// Cannot create the Queue object due to invalid parameters. + InvalidQueueParams(Error), + /// Invalid Ref index + InvalidIndex, + /// Invalid next avail + InvalidNextAvail, +} + +impl Display for MockError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use self::MockError::*; + + match self { + InvalidQueueParams(_) => write!(f, "cannot create queue due to invalid parameter"), + InvalidIndex => write!( + f, + "invalid index for pointing to an address in a region when defining a Ref object" + ), + InvalidNextAvail => write!( + f, + "invalid next available descriptor chain head in the queue" + ), + } + } +} + +impl std::error::Error for MockError {} + +/// Wrapper struct used for accessing a particular address of a GuestMemory area. +pub struct Ref<'a, M, T> { + mem: &'a M, + addr: GuestAddress, + phantom: PhantomData<*const T>, +} + +impl<'a, M: GuestMemory, T: ByteValued> Ref<'a, M, T> { + fn new(mem: &'a M, addr: GuestAddress) -> Self { + Ref { + mem, + addr, + phantom: PhantomData, + } + } + + /// Read an object of type T from the underlying memory found at self.addr. + pub fn load(&self) -> T { + self.mem.read_obj(self.addr).unwrap() + } + + /// Write an object of type T from the underlying memory found at self.addr. + pub fn store(&self, val: T) { + self.mem.write_obj(val, self.addr).unwrap() + } +} + +/// Wrapper struct used for accessing a subregion of a GuestMemory area. +pub struct ArrayRef<'a, M, T> { + mem: &'a M, + addr: GuestAddress, + len: usize, + phantom: PhantomData<*const T>, +} + +impl<'a, M: GuestMemory, T: ByteValued> ArrayRef<'a, M, T> { + fn new(mem: &'a M, addr: GuestAddress, len: usize) -> Self { + ArrayRef { + mem, + addr, + len, + phantom: PhantomData, + } + } + + /// Return a `Ref` object pointing to an address defined by a particular + /// index offset in the region. + pub fn ref_at(&self, index: usize) -> Result, MockError> { + if index >= self.len { + return Err(MockError::InvalidIndex); + } + + let addr = self + .addr + .checked_add((index * size_of::()) as u64) + .unwrap(); + + Ok(Ref::new(self.mem, addr)) + } +} + +/// Represents a virtio queue ring. The only difference between the used and available rings, +/// is the ring element type. +pub struct SplitQueueRing<'a, M, T: ByteValued> { + flags: Ref<'a, M, u16>, + // The value stored here should more precisely be a `Wrapping`, but that would require a + // `ByteValued` impl for this type, which is not provided in vm-memory. Implementing the trait + // here would require defining a wrapper for `Wrapping` and that would be too much for a + // mock framework that is only used in tests. + idx: Ref<'a, M, u16>, + ring: ArrayRef<'a, M, T>, + // `used_event` for `AvailRing`, `avail_event` for `UsedRing`. + event: Ref<'a, M, u16>, +} + +impl<'a, M: GuestMemory, T: ByteValued> SplitQueueRing<'a, M, T> { + /// Create a new `SplitQueueRing` instance + pub fn new(mem: &'a M, base: GuestAddress, len: u16) -> Self { + let event_addr = base + .checked_add(4) + .and_then(|a| a.checked_add((size_of::() * len as usize) as u64)) + .unwrap(); + + let split_queue_ring = SplitQueueRing { + flags: Ref::new(mem, base), + idx: Ref::new(mem, base.checked_add(2).unwrap()), + ring: ArrayRef::new(mem, base.checked_add(4).unwrap(), len as usize), + event: Ref::new(mem, event_addr), + }; + + split_queue_ring.flags.store(0); + split_queue_ring.idx.store(0); + split_queue_ring.event.store(0); + + split_queue_ring + } + + /// Return the starting address of the `SplitQueueRing`. + pub fn start(&self) -> GuestAddress { + self.ring.addr + } + + /// Return the end address of the `SplitQueueRing`. + pub fn end(&self) -> GuestAddress { + self.start() + .checked_add(self.ring.len as GuestUsize) + .unwrap() + } + + /// Return a reference to the idx field. + pub fn idx(&self) -> &Ref<'a, M, u16> { + &self.idx + } + + /// Return a reference to the ring field. + pub fn ring(&self) -> &ArrayRef<'a, M, T> { + &self.ring + } +} + +/// The available ring is used by the driver to offer buffers to the device. +pub type AvailRing<'a, M> = SplitQueueRing<'a, M, u16>; +/// The used ring is where the device returns buffers once it is done with them. +pub type UsedRing<'a, M> = SplitQueueRing<'a, M, VirtqUsedElem>; + +/// Refers to the buffers the driver is using for the device. +pub struct DescriptorTable<'a, M> { + table: ArrayRef<'a, M, Descriptor>, + len: u16, + free_descriptors: Vec, +} + +impl<'a, M: GuestMemory> DescriptorTable<'a, M> { + /// Create a new `DescriptorTable` instance + pub fn new(mem: &'a M, addr: GuestAddress, len: u16) -> Self { + let table = ArrayRef::new(mem, addr, len as usize); + let free_descriptors = (0..len).rev().collect(); + + DescriptorTable { + table, + len, + free_descriptors, + } + } + + /// Read one descriptor from the specified index. + pub fn load(&self, index: u16) -> Result { + self.table + .ref_at(index as usize) + .map(|load_ref| load_ref.load()) + } + + /// Write one descriptor at the specified index. + pub fn store(&self, index: u16, value: Descriptor) -> Result<(), MockError> { + self.table + .ref_at(index as usize) + .map(|store_ref| store_ref.store(value)) + } + + /// Return the total size of the DescriptorTable in bytes. + pub fn total_size(&self) -> u64 { + (self.len as usize * size_of::()) as u64 + } + + /// Create a chain of descriptors. + pub fn build_chain(&mut self, len: u16) -> Result { + let indices = self + .free_descriptors + .iter() + .copied() + .rev() + .take(usize::from(len)) + .collect::>(); + + assert_eq!(indices.len(), len as usize); + + for (pos, index_value) in indices.iter().copied().enumerate() { + // Addresses and lens constant for now. + let mut desc = Descriptor::new(0x1000, 0x1000, 0, 0); + + // It's not the last descriptor in the chain. + if pos < indices.len() - 1 { + desc.set_flags(VRING_DESC_F_NEXT as u16); + desc.set_next(indices[pos + 1]); + } else { + desc.set_flags(0); + } + self.store(index_value, desc)?; + } + + Ok(indices[0]) + } +} + +trait GuestAddressExt { + fn align_up(&self, x: GuestUsize) -> GuestAddress; +} + +impl GuestAddressExt for GuestAddress { + fn align_up(&self, x: GuestUsize) -> GuestAddress { + Self((self.0 + (x - 1)) & !(x - 1)) + } +} + +/// A mock version of the virtio queue implemented from the perspective of the driver. +pub struct MockSplitQueue<'a, M> { + mem: &'a M, + len: u16, + desc_table_addr: GuestAddress, + desc_table: DescriptorTable<'a, M>, + avail_addr: GuestAddress, + avail: AvailRing<'a, M>, + used_addr: GuestAddress, + used: UsedRing<'a, M>, + indirect_addr: GuestAddress, +} + +impl<'a, M: GuestMemory> MockSplitQueue<'a, M> { + /// Create a new `MockSplitQueue` instance with 0 as the default guest + /// physical starting address. + pub fn new(mem: &'a M, len: u16) -> Self { + Self::create(mem, GuestAddress(0), len) + } + + /// Create a new `MockSplitQueue` instance. + pub fn create(mem: &'a M, start: GuestAddress, len: u16) -> Self { + const AVAIL_ALIGN: GuestUsize = 2; + const USED_ALIGN: GuestUsize = 4; + + let desc_table_addr = start; + let desc_table = DescriptorTable::new(mem, desc_table_addr, len); + + let avail_addr = start + .checked_add(16 * len as GuestUsize) + .unwrap() + .align_up(AVAIL_ALIGN); + let avail = AvailRing::new(mem, avail_addr, len); + + let used_addr = avail.end().align_up(USED_ALIGN); + let used = UsedRing::new(mem, used_addr, len); + + let indirect_addr = GuestAddress(0x3000_0000); + + MockSplitQueue { + mem, + len, + desc_table_addr, + desc_table, + avail_addr, + avail, + used_addr, + used, + indirect_addr, + } + } + + /// Return the starting address of the queue. + pub fn start(&self) -> GuestAddress { + self.desc_table_addr + } + + /// Return the end address of the queue. + pub fn end(&self) -> GuestAddress { + self.used.end() + } + + /// Descriptor table accessor. + pub fn desc_table(&self) -> &DescriptorTable<'a, M> { + &self.desc_table + } + + /// Available ring accessor. + pub fn avail(&self) -> &AvailRing { + &self.avail + } + + /// Used ring accessor. + pub fn used(&self) -> &UsedRing { + &self.used + } + + /// Return the starting address of the descriptor table. + pub fn desc_table_addr(&self) -> GuestAddress { + self.desc_table_addr + } + + /// Return the starting address of the available ring. + pub fn avail_addr(&self) -> GuestAddress { + self.avail_addr + } + + /// Return the starting address of the used ring. + pub fn used_addr(&self) -> GuestAddress { + self.used_addr + } + + fn update_avail_idx(&mut self, value: u16) -> Result<(), MockError> { + let avail_idx = self.avail.idx.load(); + self.avail.ring.ref_at(avail_idx as usize)?.store(value); + self.avail.idx.store(avail_idx.wrapping_add(1)); + Ok(()) + } + + fn alloc_indirect_chain(&mut self, len: u16) -> Result { + // To simplify things for now, we round up the table len as a multiple of 16. When this is + // no longer the case, we should make sure the starting address of the descriptor table + // we're creating below is properly aligned. + + let table_len = if len % 16 == 0 { + len + } else { + 16 * (len / 16 + 1) + }; + + let mut table = DescriptorTable::new(self.mem, self.indirect_addr, table_len); + let head_decriptor_index = table.build_chain(len)?; + // When building indirect descriptor tables, the descriptor at index 0 is supposed to be + // first in the resulting chain. Just making sure our logic actually makes that happen. + assert_eq!(head_decriptor_index, 0); + + let table_addr = self.indirect_addr; + self.indirect_addr = self.indirect_addr.checked_add(table.total_size()).unwrap(); + Ok(table_addr) + } + + /// Add a descriptor chain to the table. + pub fn add_chain(&mut self, len: u16) -> Result<(), MockError> { + self.desc_table + .build_chain(len) + .and_then(|head_idx| self.update_avail_idx(head_idx)) + } + + /// Add an indirect descriptor chain to the table. + pub fn add_indirect_chain(&mut self, len: u16) -> Result<(), MockError> { + let head_idx = self.desc_table.build_chain(1)?; + + // We just allocate the indirect table and forget about it for now. + let indirect_addr = self.alloc_indirect_chain(len)?; + + let mut desc = self.desc_table.load(head_idx)?; + desc.set_flags(VRING_DESC_F_INDIRECT as u16); + desc.set_addr(indirect_addr.raw_value()); + desc.set_len(u32::from(len) * size_of::() as u32); + + self.desc_table.store(head_idx, desc)?; + self.update_avail_idx(head_idx) + } + + /// Creates a new `Queue`, using the underlying memory regions represented + /// by the `MockSplitQueue`. + pub fn create_queue(&self) -> Result { + let mut q = Q::new(self.len)?; + q.set_size(self.len); + q.set_ready(true); + // we cannot directly set the u64 address, we need to compose it from low & high. + q.set_desc_table_address( + Some(self.desc_table_addr.0 as u32), + Some((self.desc_table_addr.0 >> 32) as u32), + ); + q.set_avail_ring_address( + Some(self.avail_addr.0 as u32), + Some((self.avail_addr.0 >> 32) as u32), + ); + q.set_used_ring_address( + Some(self.used_addr.0 as u32), + Some((self.used_addr.0 >> 32) as u32), + ); + Ok(q) + } + + /// Writes multiple descriptor chains to the memory object of the queue, at the beginning of + /// the descriptor table, and returns the first `DescriptorChain` available. + pub fn build_multiple_desc_chains( + &self, + descs: &[Descriptor], + ) -> Result, MockError> { + self.add_desc_chains(descs, 0)?; + self.create_queue::() + .map_err(MockError::InvalidQueueParams)? + .iter(self.mem) + .unwrap() + .next() + .ok_or(MockError::InvalidNextAvail) + } + + /// Writes a single descriptor chain to the memory object of the queue, at the beginning of the + /// descriptor table, and returns the associated `DescriptorChain` object. + // This method ensures the next flags and values are set properly for the desired chain, but + // keeps the other characteristics of the input descriptors (`addr`, `len`, other flags). + // TODO: make this function work with a generic queue. For now that's not possible because + // we cannot create the descriptor chain from an iterator as iterator is not implemented for + // a generic T, just for `Queue`. + pub fn build_desc_chain(&self, descs: &[Descriptor]) -> Result, MockError> { + let mut modified_descs: Vec = Vec::with_capacity(descs.len()); + for (idx, desc) in descs.iter().enumerate() { + let (flags, next) = if idx == descs.len() - 1 { + // Clear the NEXT flag if it was set. The value of the next field of the + // Descriptor doesn't matter at this point. + (desc.flags() & !VRING_DESC_F_NEXT as u16, 0) + } else { + // Ensure that the next flag is set and that we are referring the following + // descriptor. This ignores any value is actually present in `desc.next`. + (desc.flags() | VRING_DESC_F_NEXT as u16, idx as u16 + 1) + }; + modified_descs.push(Descriptor::new(desc.addr().0, desc.len(), flags, next)); + } + self.build_multiple_desc_chains(&modified_descs[..]) + } + + /// Adds descriptor chains to the memory object of the queue. + // `descs` represents a slice of `Descriptor` objects which are used to populate the chains, and + // `offset` is the index in the descriptor table where the chains should be added. + // The descriptor chain related information is written in memory starting with address 0. + // The `addr` fields of the input descriptors should start at a sufficiently + // greater location (i.e. 1MiB, or `0x10_0000`). + pub fn add_desc_chains(&self, descs: &[Descriptor], offset: u16) -> Result<(), MockError> { + let mut new_entries = 0; + let avail_idx: u16 = self + .mem + .read_obj::(self.avail_addr().unchecked_add(2)) + .map(u16::from_le) + .unwrap(); + + for (idx, desc) in descs.iter().enumerate() { + let i = idx as u16 + offset; + self.desc_table().store(i, *desc)?; + + if idx == 0 || descs[idx - 1].flags() & VRING_DESC_F_NEXT as u16 != 1 { + // Update the available ring position. + self.mem + .write_obj( + u16::to_le(i), + self.avail_addr().unchecked_add( + VIRTQ_AVAIL_RING_HEADER_SIZE + + (avail_idx + new_entries) as u64 * VIRTQ_AVAIL_ELEMENT_SIZE, + ), + ) + .unwrap(); + new_entries += 1; + } + } + + // Increment `avail_idx`. + self.mem + .write_obj( + u16::to_le(avail_idx + new_entries), + self.avail_addr().unchecked_add(2), + ) + .unwrap(); + + Ok(()) + } +} diff --git a/src/queue.rs b/src/queue.rs new file mode 100644 index 0000000..4a69b13 --- /dev/null +++ b/src/queue.rs @@ -0,0 +1,1597 @@ +// Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved. +// Copyright © 2019 Intel Corporation. +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE-BSD-3-Clause file. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::mem::size_of; +use std::num::Wrapping; +use std::ops::Deref; +use std::sync::atomic::{fence, Ordering}; + +use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; + +use crate::defs::{ + DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR, + VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE, VIRTQ_AVAIL_RING_META_SIZE, + VIRTQ_USED_ELEMENT_SIZE, VIRTQ_USED_RING_HEADER_SIZE, VIRTQ_USED_RING_META_SIZE, +}; +use crate::{ + error, Descriptor, DescriptorChain, Error, QueueGuard, QueueOwnedT, QueueState, QueueT, + VirtqUsedElem, +}; +use virtio_bindings::bindings::virtio_ring::VRING_USED_F_NO_NOTIFY; + +/// The maximum queue size as defined in the Virtio Spec. +pub const MAX_QUEUE_SIZE: u16 = 32768; + +/// Struct to maintain information and manipulate a virtio queue. +/// +/// # Example +/// +/// ```rust +/// use virtio_queue::{Queue, QueueOwnedT, QueueT}; +/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; +/// +/// let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// let mut queue = Queue::new(1024).unwrap(); +/// +/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO). +/// queue.set_size(8); +/// queue.set_desc_table_address(Some(0x1000), None); +/// queue.set_avail_ring_address(Some(0x2000), None); +/// queue.set_used_ring_address(Some(0x3000), None); +/// queue.set_event_idx(true); +/// queue.set_ready(true); +/// // The user should check if the queue is valid before starting to use it. +/// assert!(queue.is_valid(&m)); +/// +/// // Here the driver would add entries in the available ring and then update the `idx` field of +/// // the available ring (address = 0x2000 + 2). +/// m.write_obj(3, GuestAddress(0x2002)); +/// +/// loop { +/// queue.disable_notification(&m).unwrap(); +/// +/// // Consume entries from the available ring. +/// while let Some(chain) = queue.iter(&m).unwrap().next() { +/// // Process the descriptor chain, and then add an entry in the used ring and optionally +/// // notify the driver. +/// queue.add_used(&m, chain.head_index(), 0x100).unwrap(); +/// +/// if queue.needs_notification(&m).unwrap() { +/// // Here we would notify the driver it has new entries in the used ring to consume. +/// } +/// } +/// if !queue.enable_notification(&m).unwrap() { +/// break; +/// } +/// } +/// +/// // We can reset the queue at some point. +/// queue.reset(); +/// // The queue should not be ready after reset. +/// assert!(!queue.ready()); +/// ``` +#[derive(Debug, Default, PartialEq, Eq)] +pub struct Queue { + /// The maximum size in elements offered by the device. + max_size: u16, + + /// Tail position of the available ring. + next_avail: Wrapping, + + /// Head position of the used ring. + next_used: Wrapping, + + /// VIRTIO_F_RING_EVENT_IDX negotiated. + event_idx_enabled: bool, + + /// The number of descriptor chains placed in the used ring via `add_used` + /// since the last time `needs_notification` was called on the associated queue. + num_added: Wrapping, + + /// The queue size in elements the driver selected. + size: u16, + + /// Indicates if the queue is finished with configuration. + ready: bool, + + /// Guest physical address of the descriptor table. + desc_table: GuestAddress, + + /// Guest physical address of the available ring. + avail_ring: GuestAddress, + + /// Guest physical address of the used ring. + used_ring: GuestAddress, +} + +impl Queue { + /// Equivalent of [`QueueT::set_size`] returning an error in case of invalid size. + /// + /// This should not be directly used, as the preferred method is part of the [`QueueT`] + /// interface. This is a convenience function for implementing save/restore capabilities. + pub fn try_set_size(&mut self, size: u16) -> Result<(), Error> { + if size > self.max_size() || size == 0 || (size & (size - 1)) != 0 { + return Err(Error::InvalidSize); + } + self.size = size; + Ok(()) + } + + /// Tries to set the descriptor table address. In case of an invalid value, the address is + /// not updated. + /// + /// This should not be directly used, as the preferred method is + /// [`QueueT::set_desc_table_address`]. This is a convenience function for implementing + /// save/restore capabilities. + pub fn try_set_desc_table_address(&mut self, desc_table: GuestAddress) -> Result<(), Error> { + if desc_table.mask(0xf) != 0 { + return Err(Error::InvalidDescTableAlign); + } + self.desc_table = desc_table; + + Ok(()) + } + + /// Tries to update the available ring address. In case of an invalid value, the address is + /// not updated. + /// + /// This should not be directly used, as the preferred method is + /// [`QueueT::set_avail_ring_address`]. This is a convenience function for implementing + /// save/restore capabilities. + pub fn try_set_avail_ring_address(&mut self, avail_ring: GuestAddress) -> Result<(), Error> { + if avail_ring.mask(0x1) != 0 { + return Err(Error::InvalidAvailRingAlign); + } + self.avail_ring = avail_ring; + Ok(()) + } + + /// Tries to update the used ring address. In cae of an invalid value, the address is not + /// updated. + /// + /// This should not be directly used, as the preferred method is + /// [`QueueT::set_used_ring_address`]. This is a convenience function for implementing + /// save/restore capabilities. + pub fn try_set_used_ring_address(&mut self, used_ring: GuestAddress) -> Result<(), Error> { + if used_ring.mask(0x3) != 0 { + return Err(Error::InvalidUsedRingAlign); + } + self.used_ring = used_ring; + Ok(()) + } + + /// Returns the state of the `Queue`. + /// + /// This is useful for implementing save/restore capabilities. + /// The state does not have support for serialization, but this can be + /// added by VMMs locally through the use of a + /// [remote type](https://serde.rs/remote-derive.html). + /// + /// Alternatively, a version aware and serializable/deserializable QueueState + /// is available in the `virtio-queue-ser` crate. + pub fn state(&self) -> QueueState { + QueueState { + max_size: self.max_size, + next_avail: self.next_avail(), + next_used: self.next_used(), + event_idx_enabled: self.event_idx_enabled, + size: self.size, + ready: self.ready, + desc_table: self.desc_table(), + avail_ring: self.avail_ring(), + used_ring: self.used_ring(), + } + } + + // Helper method that writes `val` to the `avail_event` field of the used ring, using + // the provided ordering. + fn set_avail_event( + &self, + mem: &M, + val: u16, + order: Ordering, + ) -> Result<(), Error> { + // This can not overflow an u64 since it is working with relatively small numbers compared + // to u64::MAX. + let avail_event_offset = + VIRTQ_USED_RING_HEADER_SIZE + VIRTQ_USED_ELEMENT_SIZE * u64::from(self.size); + let addr = self + .used_ring + .checked_add(avail_event_offset) + .ok_or(Error::AddressOverflow)?; + + mem.store(u16::to_le(val), addr, order) + .map_err(Error::GuestMemory) + } + + // Set the value of the `flags` field of the used ring, applying the specified ordering. + fn set_used_flags( + &mut self, + mem: &M, + val: u16, + order: Ordering, + ) -> Result<(), Error> { + mem.store(u16::to_le(val), self.used_ring, order) + .map_err(Error::GuestMemory) + } + + // Write the appropriate values to enable or disable notifications from the driver. + // + // Every access in this method uses `Relaxed` ordering because a fence is added by the caller + // when appropriate. + fn set_notification(&mut self, mem: &M, enable: bool) -> Result<(), Error> { + if enable { + if self.event_idx_enabled { + // We call `set_avail_event` using the `next_avail` value, instead of reading + // and using the current `avail_idx` to avoid missing notifications. More + // details in `enable_notification`. + self.set_avail_event(mem, self.next_avail.0, Ordering::Relaxed) + } else { + self.set_used_flags(mem, 0, Ordering::Relaxed) + } + } else if !self.event_idx_enabled { + self.set_used_flags(mem, VRING_USED_F_NO_NOTIFY as u16, Ordering::Relaxed) + } else { + // Notifications are effectively disabled by default after triggering once when + // `VIRTIO_F_EVENT_IDX` is negotiated, so we don't do anything in that case. + Ok(()) + } + } + + // Return the value present in the used_event field of the avail ring. + // + // If the VIRTIO_F_EVENT_IDX feature bit is not negotiated, the flags field in the available + // ring offers a crude mechanism for the driver to inform the device that it doesn’t want + // interrupts when buffers are used. Otherwise virtq_avail.used_event is a more performant + // alternative where the driver specifies how far the device can progress before interrupting. + // + // Neither of these interrupt suppression methods are reliable, as they are not synchronized + // with the device, but they serve as useful optimizations. So we only ensure access to the + // virtq_avail.used_event is atomic, but do not need to synchronize with other memory accesses. + fn used_event(&self, mem: &M, order: Ordering) -> Result, Error> { + // This can not overflow an u64 since it is working with relatively small numbers compared + // to u64::MAX. + let used_event_offset = + VIRTQ_AVAIL_RING_HEADER_SIZE + u64::from(self.size) * VIRTQ_AVAIL_ELEMENT_SIZE; + let used_event_addr = self + .avail_ring + .checked_add(used_event_offset) + .ok_or(Error::AddressOverflow)?; + + mem.load(used_event_addr, order) + .map(u16::from_le) + .map(Wrapping) + .map_err(Error::GuestMemory) + } +} + +impl<'a> QueueGuard<'a> for Queue { + type G = &'a mut Self; +} + +impl QueueT for Queue { + fn new(max_size: u16) -> Result { + // We need to check that the max size is a power of 2 because we're setting this as the + // queue size, and the valid queue sizes are a power of 2 as per the specification. + if max_size == 0 || max_size > MAX_QUEUE_SIZE || (max_size & (max_size - 1)) != 0 { + return Err(Error::InvalidMaxSize); + } + Ok(Queue { + max_size, + size: max_size, + ready: false, + desc_table: GuestAddress(DEFAULT_DESC_TABLE_ADDR), + avail_ring: GuestAddress(DEFAULT_AVAIL_RING_ADDR), + used_ring: GuestAddress(DEFAULT_USED_RING_ADDR), + next_avail: Wrapping(0), + next_used: Wrapping(0), + event_idx_enabled: false, + num_added: Wrapping(0), + }) + } + + fn is_valid(&self, mem: &M) -> bool { + let queue_size = self.size as u64; + let desc_table = self.desc_table; + // The multiplication can not overflow an u64 since we are multiplying an u16 with a + // small number. + let desc_table_size = size_of::() as u64 * queue_size; + let avail_ring = self.avail_ring; + // The operations below can not overflow an u64 since they're working with relatively small + // numbers compared to u64::MAX. + let avail_ring_size = VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size; + let used_ring = self.used_ring; + let used_ring_size = VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size; + + if !self.ready { + error!("attempt to use virtio queue that is not marked ready"); + false + } else if desc_table + .checked_add(desc_table_size) + .map_or(true, |v| !mem.address_in_range(v)) + { + error!( + "virtio queue descriptor table goes out of bounds: start:0x{:08x} size:0x{:08x}", + desc_table.raw_value(), + desc_table_size + ); + false + } else if avail_ring + .checked_add(avail_ring_size) + .map_or(true, |v| !mem.address_in_range(v)) + { + error!( + "virtio queue available ring goes out of bounds: start:0x{:08x} size:0x{:08x}", + avail_ring.raw_value(), + avail_ring_size + ); + false + } else if used_ring + .checked_add(used_ring_size) + .map_or(true, |v| !mem.address_in_range(v)) + { + error!( + "virtio queue used ring goes out of bounds: start:0x{:08x} size:0x{:08x}", + used_ring.raw_value(), + used_ring_size + ); + false + } else { + true + } + } + + fn reset(&mut self) { + self.ready = false; + self.size = self.max_size; + self.desc_table = GuestAddress(DEFAULT_DESC_TABLE_ADDR); + self.avail_ring = GuestAddress(DEFAULT_AVAIL_RING_ADDR); + self.used_ring = GuestAddress(DEFAULT_USED_RING_ADDR); + self.next_avail = Wrapping(0); + self.next_used = Wrapping(0); + self.num_added = Wrapping(0); + self.event_idx_enabled = false; + } + + fn lock(&mut self) -> ::G { + self + } + + fn max_size(&self) -> u16 { + self.max_size + } + + fn size(&self) -> u16 { + self.size + } + + fn set_size(&mut self, size: u16) { + if self.try_set_size(size).is_err() { + error!("virtio queue with invalid size: {}", size); + } + } + + fn ready(&self) -> bool { + self.ready + } + + fn set_ready(&mut self, ready: bool) { + self.ready = ready; + } + + fn set_desc_table_address(&mut self, low: Option, high: Option) { + let low = low.unwrap_or(self.desc_table.0 as u32) as u64; + let high = high.unwrap_or((self.desc_table.0 >> 32) as u32) as u64; + + let desc_table = GuestAddress((high << 32) | low); + if self.try_set_desc_table_address(desc_table).is_err() { + error!("virtio queue descriptor table breaks alignment constraints"); + } + } + + fn set_avail_ring_address(&mut self, low: Option, high: Option) { + let low = low.unwrap_or(self.avail_ring.0 as u32) as u64; + let high = high.unwrap_or((self.avail_ring.0 >> 32) as u32) as u64; + + let avail_ring = GuestAddress((high << 32) | low); + if self.try_set_avail_ring_address(avail_ring).is_err() { + error!("virtio queue available ring breaks alignment constraints"); + } + } + + fn set_used_ring_address(&mut self, low: Option, high: Option) { + let low = low.unwrap_or(self.used_ring.0 as u32) as u64; + let high = high.unwrap_or((self.used_ring.0 >> 32) as u32) as u64; + + let used_ring = GuestAddress((high << 32) | low); + if self.try_set_used_ring_address(used_ring).is_err() { + error!("virtio queue used ring breaks alignment constraints"); + } + } + + fn set_event_idx(&mut self, enabled: bool) { + self.event_idx_enabled = enabled; + } + + fn avail_idx(&self, mem: &M, order: Ordering) -> Result, Error> + where + M: GuestMemory + ?Sized, + { + let addr = self + .avail_ring + .checked_add(2) + .ok_or(Error::AddressOverflow)?; + + mem.load(addr, order) + .map(u16::from_le) + .map(Wrapping) + .map_err(Error::GuestMemory) + } + + fn used_idx(&self, mem: &M, order: Ordering) -> Result, Error> { + let addr = self + .used_ring + .checked_add(2) + .ok_or(Error::AddressOverflow)?; + + mem.load(addr, order) + .map(u16::from_le) + .map(Wrapping) + .map_err(Error::GuestMemory) + } + + fn add_used( + &mut self, + mem: &M, + head_index: u16, + len: u32, + ) -> Result<(), Error> { + if head_index >= self.size { + error!( + "attempted to add out of bounds descriptor to used ring: {}", + head_index + ); + return Err(Error::InvalidDescriptorIndex); + } + + let next_used_index = u64::from(self.next_used.0 % self.size); + // This can not overflow an u64 since it is working with relatively small numbers compared + // to u64::MAX. + let offset = VIRTQ_USED_RING_HEADER_SIZE + next_used_index * VIRTQ_USED_ELEMENT_SIZE; + let addr = self + .used_ring + .checked_add(offset) + .ok_or(Error::AddressOverflow)?; + mem.write_obj(VirtqUsedElem::new(head_index.into(), len), addr) + .map_err(Error::GuestMemory)?; + + self.next_used += Wrapping(1); + self.num_added += Wrapping(1); + + mem.store( + u16::to_le(self.next_used.0), + self.used_ring + .checked_add(2) + .ok_or(Error::AddressOverflow)?, + Ordering::Release, + ) + .map_err(Error::GuestMemory) + } + + // TODO: Turn this into a doc comment/example. + // With the current implementation, a common way of consuming entries from the available ring + // while also leveraging notification suppression is to use a loop, for example: + // + // loop { + // // We have to explicitly disable notifications if `VIRTIO_F_EVENT_IDX` has not been + // // negotiated. + // self.disable_notification()?; + // + // for chain in self.iter()? { + // // Do something with each chain ... + // // Let's assume we process all available chains here. + // } + // + // // If `enable_notification` returns `true`, the driver has added more entries to the + // // available ring. + // if !self.enable_notification()? { + // break; + // } + // } + fn enable_notification(&mut self, mem: &M) -> Result { + self.set_notification(mem, true)?; + // Ensures the following read is not reordered before any previous write operation. + fence(Ordering::SeqCst); + + // We double check here to avoid the situation where the available ring has been updated + // just before we re-enabled notifications, and it's possible to miss one. We compare the + // current `avail_idx` value to `self.next_avail` because it's where we stopped processing + // entries. There are situations where we intentionally avoid processing everything in the + // available ring (which will cause this method to return `true`), but in that case we'll + // probably not re-enable notifications as we already know there are pending entries. + self.avail_idx(mem, Ordering::Relaxed) + .map(|idx| idx != self.next_avail) + } + + fn disable_notification(&mut self, mem: &M) -> Result<(), Error> { + self.set_notification(mem, false) + } + + fn needs_notification(&mut self, mem: &M) -> Result { + let used_idx = self.next_used; + + // Complete all the writes in add_used() before reading the event. + fence(Ordering::SeqCst); + + // The VRING_AVAIL_F_NO_INTERRUPT flag isn't supported yet. + + // When the `EVENT_IDX` feature is negotiated, the driver writes into `used_event` + // a value that's used by the device to determine whether a notification must + // be submitted after adding a descriptor chain to the used ring. According to the + // standard, the notification must be sent when `next_used == used_event + 1`, but + // various device model implementations rely on an inequality instead, most likely + // to also support use cases where a bunch of descriptor chains are added to the used + // ring first, and only afterwards the `needs_notification` logic is called. For example, + // the approach based on `num_added` below is taken from the Linux Kernel implementation + // (i.e. https://elixir.bootlin.com/linux/v5.15.35/source/drivers/virtio/virtio_ring.c#L661) + + // The `old` variable below is used to determine the value of `next_used` from when + // `needs_notification` was called last (each `needs_notification` call resets `num_added` + // to zero, while each `add_used` called increments it by one). Then, the logic below + // uses wrapped arithmetic to see whether `used_event` can be found between `old` and + // `next_used` in the circular sequence space of the used ring. + if self.event_idx_enabled { + let used_event = self.used_event(mem, Ordering::Relaxed)?; + let old = used_idx - self.num_added; + self.num_added = Wrapping(0); + + return Ok(used_idx - used_event - Wrapping(1) < used_idx - old); + } + + Ok(true) + } + + fn next_avail(&self) -> u16 { + self.next_avail.0 + } + + fn set_next_avail(&mut self, next_avail: u16) { + self.next_avail = Wrapping(next_avail); + } + + fn next_used(&self) -> u16 { + self.next_used.0 + } + + fn set_next_used(&mut self, next_used: u16) { + self.next_used = Wrapping(next_used); + } + + fn desc_table(&self) -> u64 { + self.desc_table.0 + } + + fn avail_ring(&self) -> u64 { + self.avail_ring.0 + } + + fn used_ring(&self) -> u64 { + self.used_ring.0 + } + + fn event_idx_enabled(&self) -> bool { + self.event_idx_enabled + } + + fn pop_descriptor_chain(&mut self, mem: M) -> Option> + where + M: Clone + Deref, + M::Target: GuestMemory, + { + // Default, iter-based impl. Will be subsequently improved. + match self.iter(mem) { + Ok(mut iter) => iter.next(), + Err(e) => { + error!("Iterator error {}", e); + None + } + } + } +} + +impl QueueOwnedT for Queue { + fn iter(&mut self, mem: M) -> Result, Error> + where + M: Deref, + M::Target: GuestMemory, + { + // We're checking here that a reset did not happen without re-initializing the queue. + // TODO: In the future we might want to also check that the other parameters in the + // queue are valid. + if !self.ready || self.avail_ring == GuestAddress(0) { + return Err(Error::QueueNotReady); + } + + self.avail_idx(mem.deref(), Ordering::Acquire) + .map(move |idx| AvailIter::new(mem, idx, self))? + } + + fn go_to_previous_position(&mut self) { + self.next_avail -= Wrapping(1); + } +} + +/// Consuming iterator over all available descriptor chain heads in the queue. +/// +/// # Example +/// +/// ```rust +/// # use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_NEXT, VRING_DESC_F_WRITE}; +/// # use virtio_queue::mock::MockSplitQueue; +/// use virtio_queue::{Descriptor, Queue, QueueOwnedT}; +/// use vm_memory::{GuestAddress, GuestMemoryMmap}; +/// +/// # fn populate_queue(m: &GuestMemoryMmap) -> Queue { +/// # let vq = MockSplitQueue::new(m, 16); +/// # let mut q: Queue = vq.create_queue().unwrap(); +/// # +/// # // The chains are (0, 1), (2, 3, 4) and (5, 6). +/// # let mut descs = Vec::new(); +/// # for i in 0..7 { +/// # let flags = match i { +/// # 1 | 6 => 0, +/// # 2 | 5 => VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, +/// # 4 => VRING_DESC_F_WRITE, +/// # _ => VRING_DESC_F_NEXT, +/// # }; +/// # +/// # descs.push(Descriptor::new((0x1000 * (i + 1)) as u64, 0x1000, flags as u16, i + 1)); +/// # } +/// # +/// # vq.add_desc_chains(&descs, 0).unwrap(); +/// # q +/// # } +/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// // Populate the queue with descriptor chains and update the available ring accordingly. +/// let mut queue = populate_queue(m); +/// let mut i = queue.iter(m).unwrap(); +/// +/// { +/// let mut c = i.next().unwrap(); +/// let _first_head_index = c.head_index(); +/// // We should have two descriptors in the first chain. +/// let _desc1 = c.next().unwrap(); +/// let _desc2 = c.next().unwrap(); +/// } +/// +/// { +/// let c = i.next().unwrap(); +/// let _second_head_index = c.head_index(); +/// +/// let mut iter = c.writable(); +/// // We should have two writable descriptors in the second chain. +/// let _desc1 = iter.next().unwrap(); +/// let _desc2 = iter.next().unwrap(); +/// } +/// +/// { +/// let c = i.next().unwrap(); +/// let _third_head_index = c.head_index(); +/// +/// let mut iter = c.readable(); +/// // We should have one readable descriptor in the third chain. +/// let _desc1 = iter.next().unwrap(); +/// } +/// // Let's go back one position in the available ring. +/// i.go_to_previous_position(); +/// // We should be able to access again the third descriptor chain. +/// let c = i.next().unwrap(); +/// let _third_head_index = c.head_index(); +/// ``` +#[derive(Debug)] +pub struct AvailIter<'b, M> { + mem: M, + desc_table: GuestAddress, + avail_ring: GuestAddress, + queue_size: u16, + last_index: Wrapping, + next_avail: &'b mut Wrapping, +} + +impl<'b, M> AvailIter<'b, M> +where + M: Deref, + M::Target: GuestMemory, +{ + /// Create a new instance of `AvailInter`. + /// + /// # Arguments + /// * `mem` - the `GuestMemory` object that can be used to access the queue buffers. + /// * `idx` - the index of the available ring entry where the driver would put the next + /// available descriptor chain. + /// * `queue` - the `Queue` object from which the needed data to create the `AvailIter` can + /// be retrieved. + pub(crate) fn new(mem: M, idx: Wrapping, queue: &'b mut Queue) -> Result { + // The number of descriptor chain heads to process should always + // be smaller or equal to the queue size, as the driver should + // never ask the VMM to process a available ring entry more than + // once. Checking and reporting such incorrect driver behavior + // can prevent potential hanging and Denial-of-Service from + // happening on the VMM side. + if (idx - queue.next_avail).0 > queue.size { + return Err(Error::InvalidAvailRingIndex); + } + + Ok(AvailIter { + mem, + desc_table: queue.desc_table, + avail_ring: queue.avail_ring, + queue_size: queue.size, + last_index: idx, + next_avail: &mut queue.next_avail, + }) + } + + /// Goes back one position in the available descriptor chain offered by the driver. + /// + /// Rust does not support bidirectional iterators. This is the only way to revert the effect + /// of an iterator increment on the queue. + /// + /// Note: this method assumes there's only one thread manipulating the queue, so it should only + /// be invoked in single-threaded context. + pub fn go_to_previous_position(&mut self) { + *self.next_avail -= Wrapping(1); + } +} + +impl<'b, M> Iterator for AvailIter<'b, M> +where + M: Clone + Deref, + M::Target: GuestMemory, +{ + type Item = DescriptorChain; + + fn next(&mut self) -> Option { + if *self.next_avail == self.last_index { + return None; + } + + // These two operations can not overflow an u64 since they're working with relatively small + // numbers compared to u64::MAX. + let elem_off = + u64::from(self.next_avail.0.checked_rem(self.queue_size)?) * VIRTQ_AVAIL_ELEMENT_SIZE; + let offset = VIRTQ_AVAIL_RING_HEADER_SIZE + elem_off; + + let addr = self.avail_ring.checked_add(offset)?; + let head_index: u16 = self + .mem + .load(addr, Ordering::Acquire) + .map(u16::from_le) + .map_err(|_| error!("Failed to read from memory {:x}", addr.raw_value())) + .ok()?; + + *self.next_avail += Wrapping(1); + + Some(DescriptorChain::new( + self.mem.clone(), + self.desc_table, + self.queue_size, + head_index, + )) + } +} + +#[cfg(any(test, feature = "test-utils"))] +// It is convenient for tests to implement `PartialEq`, but it is not a +// proper implementation as `GuestMemory` errors cannot implement `PartialEq`. +impl PartialEq for Error { + fn eq(&self, other: &Self) -> bool { + format!("{}", &self) == format!("{}", other) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::{DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR}; + use crate::mock::MockSplitQueue; + use crate::Descriptor; + use virtio_bindings::bindings::virtio_ring::{ + VRING_DESC_F_NEXT, VRING_DESC_F_WRITE, VRING_USED_F_NO_NOTIFY, + }; + + use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryMmap}; + + #[test] + fn test_queue_is_valid() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let mut q: Queue = vq.create_queue().unwrap(); + + // q is currently valid + assert!(q.is_valid(m)); + + // shouldn't be valid when not marked as ready + q.set_ready(false); + assert!(!q.ready()); + assert!(!q.is_valid(m)); + q.set_ready(true); + + // shouldn't be allowed to set a size > max_size + q.set_size(q.max_size() << 1); + assert_eq!(q.size, q.max_size()); + + // or set the size to 0 + q.set_size(0); + assert_eq!(q.size, q.max_size()); + + // or set a size which is not a power of 2 + q.set_size(11); + assert_eq!(q.size, q.max_size()); + + // but should be allowed to set a size if 0 < size <= max_size and size is a power of two + q.set_size(4); + assert_eq!(q.size, 4); + q.size = q.max_size(); + + // shouldn't be allowed to set an address that breaks the alignment constraint + q.set_desc_table_address(Some(0xf), None); + assert_eq!(q.desc_table.0, vq.desc_table_addr().0); + // should be allowed to set an aligned out of bounds address + q.set_desc_table_address(Some(0xffff_fff0), None); + assert_eq!(q.desc_table.0, 0xffff_fff0); + // but shouldn't be valid + assert!(!q.is_valid(m)); + // but should be allowed to set a valid description table address + q.set_desc_table_address(Some(0x10), None); + assert_eq!(q.desc_table.0, 0x10); + assert!(q.is_valid(m)); + let addr = vq.desc_table_addr().0; + q.set_desc_table_address(Some(addr as u32), Some((addr >> 32) as u32)); + + // shouldn't be allowed to set an address that breaks the alignment constraint + q.set_avail_ring_address(Some(0x1), None); + assert_eq!(q.avail_ring.0, vq.avail_addr().0); + // should be allowed to set an aligned out of bounds address + q.set_avail_ring_address(Some(0xffff_fffe), None); + assert_eq!(q.avail_ring.0, 0xffff_fffe); + // but shouldn't be valid + assert!(!q.is_valid(m)); + // but should be allowed to set a valid available ring address + q.set_avail_ring_address(Some(0x2), None); + assert_eq!(q.avail_ring.0, 0x2); + assert!(q.is_valid(m)); + let addr = vq.avail_addr().0; + q.set_avail_ring_address(Some(addr as u32), Some((addr >> 32) as u32)); + + // shouldn't be allowed to set an address that breaks the alignment constraint + q.set_used_ring_address(Some(0x3), None); + assert_eq!(q.used_ring.0, vq.used_addr().0); + // should be allowed to set an aligned out of bounds address + q.set_used_ring_address(Some(0xffff_fffc), None); + assert_eq!(q.used_ring.0, 0xffff_fffc); + // but shouldn't be valid + assert!(!q.is_valid(m)); + // but should be allowed to set a valid used ring address + q.set_used_ring_address(Some(0x4), None); + assert_eq!(q.used_ring.0, 0x4); + let addr = vq.used_addr().0; + q.set_used_ring_address(Some(addr as u32), Some((addr >> 32) as u32)); + assert!(q.is_valid(m)); + } + + #[test] + fn test_add_used() { + let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(mem, 16); + let mut q: Queue = vq.create_queue().unwrap(); + + assert_eq!(q.used_idx(mem, Ordering::Acquire).unwrap(), Wrapping(0)); + assert_eq!(u16::from_le(vq.used().idx().load()), 0); + + // index too large + assert!(q.add_used(mem, 16, 0x1000).is_err()); + assert_eq!(u16::from_le(vq.used().idx().load()), 0); + + // should be ok + q.add_used(mem, 1, 0x1000).unwrap(); + assert_eq!(q.next_used, Wrapping(1)); + assert_eq!(q.used_idx(mem, Ordering::Acquire).unwrap(), Wrapping(1)); + assert_eq!(u16::from_le(vq.used().idx().load()), 1); + + let x = vq.used().ring().ref_at(0).unwrap().load(); + assert_eq!(x.id(), 1); + assert_eq!(x.len(), 0x1000); + } + + #[test] + fn test_reset_queue() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + let mut q: Queue = vq.create_queue().unwrap(); + + q.set_size(8); + // The address set by `MockSplitQueue` for the descriptor table is DEFAULT_DESC_TABLE_ADDR, + // so let's change it for testing the reset. + q.set_desc_table_address(Some(0x5000), None); + // Same for `event_idx_enabled`, `next_avail` `next_used` and `signalled_used`. + q.set_event_idx(true); + q.set_next_avail(2); + q.set_next_used(4); + q.num_added = Wrapping(15); + assert_eq!(q.size, 8); + // `create_queue` also marks the queue as ready. + assert!(q.ready); + assert_ne!(q.desc_table, GuestAddress(DEFAULT_DESC_TABLE_ADDR)); + assert_ne!(q.avail_ring, GuestAddress(DEFAULT_AVAIL_RING_ADDR)); + assert_ne!(q.used_ring, GuestAddress(DEFAULT_USED_RING_ADDR)); + assert_ne!(q.next_avail, Wrapping(0)); + assert_ne!(q.next_used, Wrapping(0)); + assert_ne!(q.num_added, Wrapping(0)); + assert!(q.event_idx_enabled); + + q.reset(); + assert_eq!(q.size, 16); + assert!(!q.ready); + assert_eq!(q.desc_table, GuestAddress(DEFAULT_DESC_TABLE_ADDR)); + assert_eq!(q.avail_ring, GuestAddress(DEFAULT_AVAIL_RING_ADDR)); + assert_eq!(q.used_ring, GuestAddress(DEFAULT_USED_RING_ADDR)); + assert_eq!(q.next_avail, Wrapping(0)); + assert_eq!(q.next_used, Wrapping(0)); + assert_eq!(q.num_added, Wrapping(0)); + assert!(!q.event_idx_enabled); + } + + #[test] + fn test_needs_notification() { + let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let qsize = 16; + let vq = MockSplitQueue::new(mem, qsize); + let mut q: Queue = vq.create_queue().unwrap(); + let avail_addr = vq.avail_addr(); + + // It should always return true when EVENT_IDX isn't enabled. + for i in 0..qsize { + q.next_used = Wrapping(i); + assert!(q.needs_notification(mem).unwrap()); + } + + mem.write_obj::( + u16::to_le(4), + avail_addr.unchecked_add(4 + qsize as u64 * 2), + ) + .unwrap(); + q.set_event_idx(true); + + // Incrementing up to this value causes an `u16` to wrap back to 0. + let wrap = u32::from(u16::MAX) + 1; + + for i in 0..wrap + 12 { + q.next_used = Wrapping(i as u16); + // Let's test wrapping around the maximum index value as well. + // `num_added` needs to be at least `1` to represent the fact that new descriptor + // chains have be added to the used ring since the last time `needs_notification` + // returned. + q.num_added = Wrapping(1); + let expected = i == 5 || i == (5 + wrap); + assert_eq!((q.needs_notification(mem).unwrap(), i), (expected, i)); + } + + mem.write_obj::( + u16::to_le(8), + avail_addr.unchecked_add(4 + qsize as u64 * 2), + ) + .unwrap(); + + // Returns `false` because the current `used_event` value is behind both `next_used` and + // the value of `next_used` at the time when `needs_notification` last returned (which is + // computed based on `num_added` as described in the comments for `needs_notification`. + assert!(!q.needs_notification(mem).unwrap()); + + mem.write_obj::( + u16::to_le(15), + avail_addr.unchecked_add(4 + qsize as u64 * 2), + ) + .unwrap(); + + q.num_added = Wrapping(1); + assert!(!q.needs_notification(mem).unwrap()); + + q.next_used = Wrapping(15); + q.num_added = Wrapping(1); + assert!(!q.needs_notification(mem).unwrap()); + + q.next_used = Wrapping(16); + q.num_added = Wrapping(1); + assert!(q.needs_notification(mem).unwrap()); + + // Calling `needs_notification` again immediately returns `false`. + assert!(!q.needs_notification(mem).unwrap()); + + mem.write_obj::( + u16::to_le(u16::MAX - 3), + avail_addr.unchecked_add(4 + qsize as u64 * 2), + ) + .unwrap(); + q.next_used = Wrapping(u16::MAX - 2); + q.num_added = Wrapping(1); + // Returns `true` because, when looking at circular sequence of indices of the used ring, + // the value we wrote in the `used_event` appears between the "old" value of `next_used` + // (i.e. `next_used` - `num_added`) and the current `next_used`, thus suggesting that we + // need to notify the driver. + assert!(q.needs_notification(mem).unwrap()); + } + + #[test] + fn test_enable_disable_notification() { + let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(mem, 16); + + let mut q: Queue = vq.create_queue().unwrap(); + let used_addr = vq.used_addr(); + + assert!(!q.event_idx_enabled); + + q.enable_notification(mem).unwrap(); + let v = mem.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.disable_notification(mem).unwrap(); + let v = mem.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, VRING_USED_F_NO_NOTIFY as u16); + + q.enable_notification(mem).unwrap(); + let v = mem.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.set_event_idx(true); + let avail_addr = vq.avail_addr(); + mem.write_obj::(u16::to_le(2), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification(mem).unwrap()); + q.next_avail = Wrapping(2); + assert!(!q.enable_notification(mem).unwrap()); + + mem.write_obj::(u16::to_le(8), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification(mem).unwrap()); + q.next_avail = Wrapping(8); + assert!(!q.enable_notification(mem).unwrap()); + } + + #[test] + fn test_consume_chains_with_notif() { + let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(mem, 16); + + let mut q: Queue = vq.create_queue().unwrap(); + + // q is currently valid. + assert!(q.is_valid(mem)); + + // The chains are (0, 1), (2, 3, 4), (5, 6), (7, 8), (9, 10, 11, 12). + let mut descs = Vec::new(); + for i in 0..13 { + let flags = match i { + 1 | 4 | 6 | 8 | 12 => 0, + _ => VRING_DESC_F_NEXT, + }; + + descs.push(Descriptor::new( + (0x1000 * (i + 1)) as u64, + 0x1000, + flags as u16, + i + 1, + )); + } + + vq.add_desc_chains(&descs, 0).unwrap(); + // Update the index of the chain that can be consumed to not be the last one. + // This enables us to consume chains in multiple iterations as opposed to consuming + // all the driver written chains at once. + vq.avail().idx().store(u16::to_le(2)); + // No descriptor chains are consumed at this point. + assert_eq!(q.next_avail(), 0); + + let mut i = 0; + + loop { + i += 1; + q.disable_notification(mem).unwrap(); + + while let Some(chain) = q.iter(mem).unwrap().next() { + // Process the descriptor chain, and then add entries to the + // used ring. + let head_index = chain.head_index(); + let mut desc_len = 0; + chain.for_each(|d| { + if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE { + desc_len += d.len(); + } + }); + q.add_used(mem, head_index, desc_len).unwrap(); + } + if !q.enable_notification(mem).unwrap() { + break; + } + } + // The chains should be consumed in a single loop iteration because there's nothing updating + // the `idx` field of the available ring in the meantime. + assert_eq!(i, 1); + // The next chain that can be consumed should have index 2. + assert_eq!(q.next_avail(), 2); + assert_eq!(q.next_used(), 2); + // Let the device know it can consume one more chain. + vq.avail().idx().store(u16::to_le(3)); + i = 0; + + loop { + i += 1; + q.disable_notification(mem).unwrap(); + + while let Some(chain) = q.iter(mem).unwrap().next() { + // Process the descriptor chain, and then add entries to the + // used ring. + let head_index = chain.head_index(); + let mut desc_len = 0; + chain.for_each(|d| { + if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE { + desc_len += d.len(); + } + }); + q.add_used(mem, head_index, desc_len).unwrap(); + } + + // For the simplicity of the test we are updating here the `idx` value of the available + // ring. Ideally this should be done on a separate thread. + // Because of this update, the loop should be iterated again to consume the new + // available descriptor chains. + vq.avail().idx().store(u16::to_le(4)); + if !q.enable_notification(mem).unwrap() { + break; + } + } + assert_eq!(i, 2); + // The next chain that can be consumed should have index 4. + assert_eq!(q.next_avail(), 4); + assert_eq!(q.next_used(), 4); + + // Set an `idx` that is bigger than the number of entries added in the ring. + // This is an allowed scenario, but the indexes of the chain will have unexpected values. + vq.avail().idx().store(u16::to_le(7)); + loop { + q.disable_notification(mem).unwrap(); + + while let Some(chain) = q.iter(mem).unwrap().next() { + // Process the descriptor chain, and then add entries to the + // used ring. + let head_index = chain.head_index(); + let mut desc_len = 0; + chain.for_each(|d| { + if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE { + desc_len += d.len(); + } + }); + q.add_used(mem, head_index, desc_len).unwrap(); + } + if !q.enable_notification(mem).unwrap() { + break; + } + } + assert_eq!(q.next_avail(), 7); + assert_eq!(q.next_used(), 7); + } + + #[test] + fn test_invalid_avail_idx() { + // This is a negative test for the following MUST from the spec: `A driver MUST NOT + // decrement the available idx on a virtqueue (ie. there is no way to “unexpose” buffers).`. + // We validate that for this misconfiguration, the device does not panic. + let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(mem, 16); + + let mut q: Queue = vq.create_queue().unwrap(); + + // q is currently valid. + assert!(q.is_valid(mem)); + + // The chains are (0, 1), (2, 3, 4), (5, 6). + let mut descs = Vec::new(); + for i in 0..7 { + let flags = match i { + 1 | 4 | 6 => 0, + _ => VRING_DESC_F_NEXT, + }; + + descs.push(Descriptor::new( + (0x1000 * (i + 1)) as u64, + 0x1000, + flags as u16, + i + 1, + )); + } + + vq.add_desc_chains(&descs, 0).unwrap(); + // Let the device know it can consume chains with the index < 2. + vq.avail().idx().store(u16::to_le(3)); + // No descriptor chains are consumed at this point. + assert_eq!(q.next_avail(), 0); + assert_eq!(q.next_used(), 0); + + loop { + q.disable_notification(mem).unwrap(); + + while let Some(chain) = q.iter(mem).unwrap().next() { + // Process the descriptor chain, and then add entries to the + // used ring. + let head_index = chain.head_index(); + let mut desc_len = 0; + chain.for_each(|d| { + if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE { + desc_len += d.len(); + } + }); + q.add_used(mem, head_index, desc_len).unwrap(); + } + if !q.enable_notification(mem).unwrap() { + break; + } + } + // The next chain that can be consumed should have index 3. + assert_eq!(q.next_avail(), 3); + assert_eq!(q.avail_idx(mem, Ordering::Acquire).unwrap(), Wrapping(3)); + assert_eq!(q.next_used(), 3); + assert_eq!(q.used_idx(mem, Ordering::Acquire).unwrap(), Wrapping(3)); + assert!(q.lock().ready()); + + // Decrement `idx` which should be forbidden. We don't enforce this thing, but we should + // test that we don't panic in case the driver decrements it. + vq.avail().idx().store(u16::to_le(1)); + // Invalid available ring index + assert!(q.iter(mem).is_err()); + } + + #[test] + fn test_iterator_and_avail_idx() { + // This test ensures constructing a descriptor chain iterator succeeds + // with valid available ring indexes while produces an error with invalid + // indexes. + let queue_size = 2; + let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(mem, queue_size); + + let mut q: Queue = vq.create_queue().unwrap(); + + // q is currently valid. + assert!(q.is_valid(mem)); + + // Create descriptors to fill up the queue + let mut descs = Vec::new(); + for i in 0..queue_size { + descs.push(Descriptor::new( + (0x1000 * (i + 1)) as u64, + 0x1000, + 0_u16, + i + 1, + )); + } + vq.add_desc_chains(&descs, 0).unwrap(); + + // Set the 'next_available' index to 'u16:MAX' to test the wrapping scenarios + q.set_next_avail(u16::MAX); + + // When the number of chains exposed by the driver is equal to or less than the queue + // size, the available ring index is valid and constructs an iterator successfully. + let avail_idx = Wrapping(q.next_avail()) + Wrapping(queue_size); + vq.avail().idx().store(u16::to_le(avail_idx.0)); + assert!(q.iter(mem).is_ok()); + let avail_idx = Wrapping(q.next_avail()) + Wrapping(queue_size - 1); + vq.avail().idx().store(u16::to_le(avail_idx.0)); + assert!(q.iter(mem).is_ok()); + + // When the number of chains exposed by the driver is larger than the queue size, the + // available ring index is invalid and produces an error from constructing an iterator. + let avail_idx = Wrapping(q.next_avail()) + Wrapping(queue_size + 1); + vq.avail().idx().store(u16::to_le(avail_idx.0)); + assert!(q.iter(mem).is_err()); + } + + #[test] + fn test_descriptor_and_iterator() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q: Queue = vq.create_queue().unwrap(); + + // q is currently valid + assert!(q.is_valid(m)); + + // the chains are (0, 1), (2, 3, 4) and (5, 6) + let mut descs = Vec::new(); + for j in 0..7 { + let flags = match j { + 1 | 6 => 0, + 2 | 5 => VRING_DESC_F_NEXT | VRING_DESC_F_WRITE, + 4 => VRING_DESC_F_WRITE, + _ => VRING_DESC_F_NEXT, + }; + + descs.push(Descriptor::new( + (0x1000 * (j + 1)) as u64, + 0x1000, + flags as u16, + j + 1, + )); + } + + vq.add_desc_chains(&descs, 0).unwrap(); + + let mut i = q.iter(m).unwrap(); + + { + let c = i.next().unwrap(); + assert_eq!(c.head_index(), 0); + + let mut iter = c; + assert!(iter.next().is_some()); + assert!(iter.next().is_some()); + assert!(iter.next().is_none()); + assert!(iter.next().is_none()); + } + + { + let c = i.next().unwrap(); + assert_eq!(c.head_index(), 2); + + let mut iter = c.writable(); + assert!(iter.next().is_some()); + assert!(iter.next().is_some()); + assert!(iter.next().is_none()); + assert!(iter.next().is_none()); + } + + { + let c = i.next().unwrap(); + assert_eq!(c.head_index(), 5); + + let mut iter = c.readable(); + assert!(iter.next().is_some()); + assert!(iter.next().is_none()); + assert!(iter.next().is_none()); + } + } + + #[test] + fn test_iterator() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 16); + + let mut q: Queue = vq.create_queue().unwrap(); + + q.size = q.max_size; + q.desc_table = vq.desc_table_addr(); + q.avail_ring = vq.avail_addr(); + q.used_ring = vq.used_addr(); + assert!(q.is_valid(m)); + + { + // an invalid queue should return an iterator with no next + q.ready = false; + assert!(q.iter(m).is_err()); + } + + q.ready = true; + + // now let's create two simple descriptor chains + // the chains are (0, 1) and (2, 3, 4) + { + let mut descs = Vec::new(); + for j in 0..5u16 { + let flags = match j { + 1 | 4 => 0, + _ => VRING_DESC_F_NEXT, + }; + + descs.push(Descriptor::new( + (0x1000 * (j + 1)) as u64, + 0x1000, + flags as u16, + j + 1, + )); + } + vq.add_desc_chains(&descs, 0).unwrap(); + + let mut i = q.iter(m).unwrap(); + + { + let mut c = i.next().unwrap(); + assert_eq!(c.head_index(), 0); + + c.next().unwrap(); + assert!(c.next().is_some()); + assert!(c.next().is_none()); + assert_eq!(c.head_index(), 0); + } + + { + let mut c = i.next().unwrap(); + assert_eq!(c.head_index(), 2); + + c.next().unwrap(); + c.next().unwrap(); + c.next().unwrap(); + assert!(c.next().is_none()); + assert_eq!(c.head_index(), 2); + } + + // also test go_to_previous_position() works as expected + { + assert!(i.next().is_none()); + i.go_to_previous_position(); + let mut c = q.iter(m).unwrap().next().unwrap(); + c.next().unwrap(); + c.next().unwrap(); + c.next().unwrap(); + assert!(c.next().is_none()); + } + } + + // Test that iterating some broken descriptor chain does not exceed + // 2^32 bytes in total (VIRTIO spec version 1.2, 2.7.5.2: + // Drivers MUST NOT add a descriptor chain longer than 2^32 bytes in + // total) + { + let descs = vec![ + Descriptor::new(0x1000, 0xffff_ffff, VRING_DESC_F_NEXT as u16, 1), + Descriptor::new(0x1000, 0x1234_5678, 0, 2), + ]; + vq.add_desc_chains(&descs, 0).unwrap(); + let mut yielded_bytes_by_iteration = 0_u32; + for d in q.iter(m).unwrap().next().unwrap() { + yielded_bytes_by_iteration = yielded_bytes_by_iteration + .checked_add(d.len()) + .expect("iterator should not yield more than 2^32 bytes"); + } + } + + // Same as above, but test with a descriptor which is self-referential + { + let descs = vec![Descriptor::new( + 0x1000, + 0xffff_ffff, + VRING_DESC_F_NEXT as u16, + 0, + )]; + vq.add_desc_chains(&descs, 0).unwrap(); + let mut yielded_bytes_by_iteration = 0_u32; + for d in q.iter(m).unwrap().next().unwrap() { + yielded_bytes_by_iteration = yielded_bytes_by_iteration + .checked_add(d.len()) + .expect("iterator should not yield more than 2^32 bytes"); + } + } + } + + #[test] + fn test_regression_iterator_division() { + // This is a regression test that tests that the iterator does not try to divide + // by 0 when the queue size is 0 + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 1); + // This input was generated by the fuzzer, both for the QueueS and the Descriptor + let descriptors: Vec = vec![Descriptor::new( + 14178673876262995140, + 3301229764, + 50372, + 50372, + )]; + vq.build_desc_chain(&descriptors).unwrap(); + + let mut q = Queue { + max_size: 38, + next_avail: Wrapping(0), + next_used: Wrapping(0), + event_idx_enabled: false, + num_added: Wrapping(0), + size: 0, + ready: false, + desc_table: GuestAddress(12837708984796196), + avail_ring: GuestAddress(0), + used_ring: GuestAddress(9943947977301164032), + }; + + assert!(q.pop_descriptor_chain(m).is_none()); + } + + #[test] + fn test_setters_error_cases() { + assert_eq!(Queue::new(15).unwrap_err(), Error::InvalidMaxSize); + let mut q = Queue::new(16).unwrap(); + + let expected_val = q.desc_table.0; + assert_eq!( + q.try_set_desc_table_address(GuestAddress(0xf)).unwrap_err(), + Error::InvalidDescTableAlign + ); + assert_eq!(q.desc_table(), expected_val); + + let expected_val = q.avail_ring.0; + assert_eq!( + q.try_set_avail_ring_address(GuestAddress(0x1)).unwrap_err(), + Error::InvalidAvailRingAlign + ); + assert_eq!(q.avail_ring(), expected_val); + + let expected_val = q.used_ring.0; + assert_eq!( + q.try_set_used_ring_address(GuestAddress(0x3)).unwrap_err(), + Error::InvalidUsedRingAlign + ); + assert_eq!(q.used_ring(), expected_val); + + let expected_val = q.size; + assert_eq!(q.try_set_size(15).unwrap_err(), Error::InvalidSize); + assert_eq!(q.size(), expected_val) + } + + #[test] + // This is a regression test for a fuzzing finding. If the driver requests a reset of the + // device, but then does not re-initializes the queue then a subsequent call to process + // a request should yield no descriptors to process. Before this fix we were processing + // descriptors that were added to the queue before, and we were ending up processing 255 + // descriptors per chain. + fn test_regression_timeout_after_reset() { + // The input below was generated by libfuzzer and adapted for this test. + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap(); + let vq = MockSplitQueue::new(m, 1024); + + // This input below was generated by the fuzzer. + let descriptors: Vec = vec![ + Descriptor::new(21508325467, 0, 1, 4), + Descriptor::new(2097152, 4096, 3, 0), + Descriptor::new(18374686479672737792, 4294967295, 65535, 29), + Descriptor::new(76842670169653248, 1114115, 0, 0), + Descriptor::new(16, 983040, 126, 3), + Descriptor::new(897648164864, 0, 0, 0), + Descriptor::new(111669149722, 0, 0, 0), + ]; + vq.build_multiple_desc_chains(&descriptors).unwrap(); + + let mut q: Queue = vq.create_queue().unwrap(); + + // Setting the queue to ready should not allow consuming descriptors after reset. + q.reset(); + q.set_ready(true); + let mut counter = 0; + while let Some(mut desc_chain) = q.pop_descriptor_chain(m) { + // this empty loop is here to check that there are no side effects + // in terms of memory & execution time. + while desc_chain.next().is_some() { + counter += 1; + } + } + assert_eq!(counter, 0); + + // Setting the avail_addr to valid should not allow consuming descriptors after reset. + q.reset(); + q.set_avail_ring_address(Some(0x1000), None); + assert_eq!(q.avail_ring, GuestAddress(0x1000)); + counter = 0; + while let Some(mut desc_chain) = q.pop_descriptor_chain(m) { + // this empty loop is here to check that there are no side effects + // in terms of memory & execution time. + while desc_chain.next().is_some() { + counter += 1; + } + } + assert_eq!(counter, 0); + } +} diff --git a/src/queue_sync.rs b/src/queue_sync.rs new file mode 100644 index 0000000..6e666be --- /dev/null +++ b/src/queue_sync.rs @@ -0,0 +1,358 @@ +// Copyright (C) 2021 Alibaba Cloud. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause + +use std::num::Wrapping; +use std::ops::Deref; +use std::sync::atomic::Ordering; +use std::sync::{Arc, Mutex, MutexGuard}; + +use vm_memory::GuestMemory; + +use crate::{DescriptorChain, Error, Queue, QueueGuard, QueueT}; + +/// Struct to maintain information and manipulate state of a virtio queue for multi-threaded +/// context. +/// +/// # Example +/// +/// ```rust +/// use virtio_queue::{Queue, QueueSync, QueueT}; +/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; +/// +/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); +/// let mut queue = QueueSync::new(1024).unwrap(); +/// +/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO). +/// queue.set_size(8); +/// queue.set_desc_table_address(Some(0x1000), None); +/// queue.set_avail_ring_address(Some(0x2000), None); +/// queue.set_used_ring_address(Some(0x3000), None); +/// queue.set_ready(true); +/// // The user should check if the queue is valid before starting to use it. +/// assert!(queue.is_valid(m.memory())); +/// +/// // The memory object is not embedded in the `QueueSync`, so we have to pass it as a +/// // parameter to the methods that access the guest memory. Examples would be: +/// queue.add_used(m.memory(), 1, 0x100).unwrap(); +/// queue.needs_notification(m.memory()).unwrap(); +/// ``` +#[derive(Clone, Debug)] +pub struct QueueSync { + state: Arc>, +} + +impl QueueSync { + fn lock_state(&self) -> MutexGuard { + // Do not expect poisoned lock. + self.state.lock().unwrap() + } +} + +impl<'a> QueueGuard<'a> for QueueSync { + type G = MutexGuard<'a, Queue>; +} + +impl QueueT for QueueSync { + fn new(max_size: u16) -> Result { + Ok(QueueSync { + state: Arc::new(Mutex::new(Queue::new(max_size)?)), + }) + } + + fn is_valid(&self, mem: &M) -> bool { + self.lock_state().is_valid(mem) + } + + fn reset(&mut self) { + self.lock_state().reset(); + } + + fn lock(&mut self) -> ::G { + self.lock_state() + } + + fn max_size(&self) -> u16 { + self.lock_state().max_size() + } + + fn size(&self) -> u16 { + self.lock_state().size() + } + + fn set_size(&mut self, size: u16) { + self.lock_state().set_size(size); + } + + fn ready(&self) -> bool { + self.lock_state().ready() + } + + fn set_ready(&mut self, ready: bool) { + self.lock_state().set_ready(ready) + } + + fn set_desc_table_address(&mut self, low: Option, high: Option) { + self.lock_state().set_desc_table_address(low, high); + } + + fn set_avail_ring_address(&mut self, low: Option, high: Option) { + self.lock_state().set_avail_ring_address(low, high); + } + + fn set_used_ring_address(&mut self, low: Option, high: Option) { + self.lock_state().set_used_ring_address(low, high); + } + + fn set_event_idx(&mut self, enabled: bool) { + self.lock_state().set_event_idx(enabled); + } + + fn avail_idx(&self, mem: &M, order: Ordering) -> Result, Error> + where + M: GuestMemory + ?Sized, + { + self.lock_state().avail_idx(mem, order) + } + + fn used_idx(&self, mem: &M, order: Ordering) -> Result, Error> { + self.lock_state().used_idx(mem, order) + } + + fn add_used( + &mut self, + mem: &M, + head_index: u16, + len: u32, + ) -> Result<(), Error> { + self.lock_state().add_used(mem, head_index, len) + } + + fn enable_notification(&mut self, mem: &M) -> Result { + self.lock_state().enable_notification(mem) + } + + fn disable_notification(&mut self, mem: &M) -> Result<(), Error> { + self.lock_state().disable_notification(mem) + } + + fn needs_notification(&mut self, mem: &M) -> Result { + self.lock_state().needs_notification(mem) + } + + fn next_avail(&self) -> u16 { + self.lock_state().next_avail() + } + + fn set_next_avail(&mut self, next_avail: u16) { + self.lock_state().set_next_avail(next_avail); + } + + fn next_used(&self) -> u16 { + self.lock_state().next_used() + } + + fn set_next_used(&mut self, next_used: u16) { + self.lock_state().set_next_used(next_used); + } + + fn desc_table(&self) -> u64 { + self.lock_state().desc_table() + } + + fn avail_ring(&self) -> u64 { + self.lock_state().avail_ring() + } + + fn used_ring(&self) -> u64 { + self.lock_state().used_ring() + } + + fn event_idx_enabled(&self) -> bool { + self.lock_state().event_idx_enabled() + } + + fn pop_descriptor_chain(&mut self, mem: M) -> Option> + where + M: Clone + Deref, + M::Target: GuestMemory, + { + self.lock_state().pop_descriptor_chain(mem) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::defs::{DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR}; + use std::sync::Barrier; + use virtio_bindings::bindings::virtio_ring::VRING_USED_F_NO_NOTIFY; + use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap}; + + #[test] + fn test_queue_state_sync() { + let mut q = QueueSync::new(0x1000).unwrap(); + let mut q2 = q.clone(); + let q3 = q.clone(); + let barrier = Arc::new(Barrier::new(3)); + let b2 = barrier.clone(); + let b3 = barrier.clone(); + + let t1 = std::thread::spawn(move || { + { + let guard = q2.lock(); + assert!(!guard.ready()); + } + b2.wait(); + b2.wait(); + { + let guard = q2.lock(); + assert!(guard.ready()); + } + }); + + let t2 = std::thread::spawn(move || { + assert!(!q3.ready()); + b3.wait(); + b3.wait(); + assert!(q3.ready()); + }); + + barrier.wait(); + q.set_ready(true); + barrier.wait(); + + t1.join().unwrap(); + t2.join().unwrap(); + } + + #[test] + fn test_state_sync_add_used() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mut q = QueueSync::new(0x100).unwrap(); + + q.set_desc_table_address(Some(0x1000), None); + q.set_avail_ring_address(Some(0x2000), None); + q.set_used_ring_address(Some(0x3000), None); + q.set_event_idx(true); + q.set_ready(true); + assert!(q.is_valid(m.memory())); + assert_eq!(q.lock().size(), 0x100); + + assert_eq!(q.max_size(), 0x100); + assert_eq!(q.size(), 0x100); + q.set_size(0x80); + assert_eq!(q.size(), 0x80); + assert_eq!(q.max_size(), 0x100); + q.set_next_avail(5); + assert_eq!(q.next_avail(), 5); + q.set_next_used(3); + assert_eq!(q.next_used(), 3); + assert_eq!( + q.avail_idx(m.memory(), Ordering::Acquire).unwrap(), + Wrapping(0) + ); + assert_eq!( + q.used_idx(m.memory(), Ordering::Acquire).unwrap(), + Wrapping(0) + ); + + assert_eq!(q.next_used(), 3); + + // index too large + assert!(q.add_used(m.memory(), 0x200, 0x1000).is_err()); + assert_eq!(q.next_used(), 3); + + // should be ok + q.add_used(m.memory(), 1, 0x1000).unwrap(); + assert_eq!(q.next_used(), 4); + assert_eq!( + q.used_idx(m.memory(), Ordering::Acquire).unwrap(), + Wrapping(4) + ); + } + + #[test] + fn test_sync_state_reset_queue() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mut q = QueueSync::new(0x100).unwrap(); + + q.set_desc_table_address(Some(0x1000), None); + q.set_avail_ring_address(Some(0x2000), None); + q.set_used_ring_address(Some(0x3000), None); + q.set_event_idx(true); + q.set_next_avail(2); + q.set_next_used(2); + q.set_size(0x8); + q.set_ready(true); + assert!(q.is_valid(m.memory())); + + q.needs_notification(m.memory()).unwrap(); + + assert_eq!(q.lock_state().size(), 0x8); + assert!(q.lock_state().ready()); + assert_ne!(q.lock_state().desc_table(), DEFAULT_DESC_TABLE_ADDR); + assert_ne!(q.lock_state().avail_ring(), DEFAULT_AVAIL_RING_ADDR); + assert_ne!(q.lock_state().used_ring(), DEFAULT_USED_RING_ADDR); + assert_ne!(q.lock_state().next_avail(), 0); + assert_ne!(q.lock_state().next_used(), 0); + assert!(q.lock_state().event_idx_enabled()); + + q.reset(); + assert_eq!(q.lock_state().size(), 0x100); + assert!(!q.lock_state().ready()); + assert_eq!(q.lock_state().desc_table(), DEFAULT_DESC_TABLE_ADDR); + assert_eq!(q.lock_state().avail_ring(), DEFAULT_AVAIL_RING_ADDR); + assert_eq!(q.lock_state().used_ring(), DEFAULT_USED_RING_ADDR); + assert_eq!(q.lock_state().next_avail(), 0); + assert_eq!(q.lock_state().next_used(), 0); + assert!(!q.lock_state().event_idx_enabled()); + } + + #[test] + fn test_enable_disable_notification() { + let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); + let mem = m.memory(); + let mut q = QueueSync::new(0x100).unwrap(); + + q.set_desc_table_address(Some(0x1000), None); + assert_eq!(q.desc_table(), 0x1000); + q.set_avail_ring_address(Some(0x2000), None); + assert_eq!(q.avail_ring(), 0x2000); + q.set_used_ring_address(Some(0x3000), None); + assert_eq!(q.used_ring(), 0x3000); + q.set_ready(true); + assert!(q.is_valid(mem)); + + let used_addr = GuestAddress(q.lock_state().used_ring()); + + assert!(!q.event_idx_enabled()); + q.enable_notification(mem).unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.disable_notification(m.memory()).unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, VRING_USED_F_NO_NOTIFY as u16); + + q.enable_notification(mem).unwrap(); + let v = m.read_obj::(used_addr).map(u16::from_le).unwrap(); + assert_eq!(v, 0); + + q.set_event_idx(true); + let avail_addr = GuestAddress(q.lock_state().avail_ring()); + m.write_obj::(u16::to_le(2), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification(mem).unwrap()); + q.lock_state().set_next_avail(2); + assert!(!q.enable_notification(mem).unwrap()); + + m.write_obj::(u16::to_le(8), avail_addr.unchecked_add(2)) + .unwrap(); + + assert!(q.enable_notification(mem).unwrap()); + q.lock_state().set_next_avail(8); + assert!(!q.enable_notification(mem).unwrap()); + } +} diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..aa22fb3 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,119 @@ +use crate::{Error, Queue, QueueT}; +use vm_memory::GuestAddress; + +/// Representation of the `Queue` state. +/// +/// The `QueueState` represents the pure state of the `queue` without tracking any implementation +/// details of the queue. The goal with this design is to minimize the changes required to the +/// state, and thus the required transitions between states when upgrading or downgrading. +/// +/// In practice this means that the `QueueState` consists solely of POD (Plain Old Data). +/// +/// As this structure has all the fields public it is consider to be untrusted. A validated +/// queue can be created from the state by calling the associated `try_from` function. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct QueueState { + /// The maximum size in elements offered by the device. + pub max_size: u16, + /// Tail position of the available ring. + pub next_avail: u16, + /// Head position of the used ring. + pub next_used: u16, + /// VIRTIO_F_RING_EVENT_IDX negotiated. + pub event_idx_enabled: bool, + /// The queue size in elements the driver selected. + pub size: u16, + /// Indicates if the queue is finished with configuration. + pub ready: bool, + /// Guest physical address of the descriptor table. + pub desc_table: u64, + /// Guest physical address of the available ring. + pub avail_ring: u64, + /// Guest physical address of the used ring. + pub used_ring: u64, +} + +impl TryFrom for Queue { + type Error = Error; + + fn try_from(q_state: QueueState) -> Result { + let mut q = Queue::new(q_state.max_size)?; + + q.set_next_avail(q_state.next_avail); + q.set_next_used(q_state.next_used); + q.set_event_idx(q_state.event_idx_enabled); + q.try_set_size(q_state.size)?; + q.set_ready(q_state.ready); + q.try_set_desc_table_address(GuestAddress(q_state.desc_table))?; + q.try_set_avail_ring_address(GuestAddress(q_state.avail_ring))?; + q.try_set_used_ring_address(GuestAddress(q_state.used_ring))?; + + Ok(q) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_valid_queue_state() -> QueueState { + let queue = Queue::new(16).unwrap(); + queue.state() + } + + #[test] + fn test_empty_queue_state() { + let max_size = 16; + let queue = Queue::new(max_size).unwrap(); + + // Saving the state of a queue on which we didn't do any operation is ok. + // Same for restore. + let queue_state = queue.state(); + let restored_q = Queue::try_from(queue_state).unwrap(); + assert_eq!(queue, restored_q); + } + + #[test] + fn test_invalid_queue_state() { + // Let's generate a state that we know is valid so we can just alter one field at a time. + let mut q_state = create_valid_queue_state(); + + // Test invalid max_size. + // Size too small. + q_state.max_size = 0; + assert!(Queue::try_from(q_state).is_err()); + // Size too big. + q_state.max_size = u16::MAX; + assert!(Queue::try_from(q_state).is_err()); + // Size not a power of 2. + q_state.max_size = 15; + assert!(Queue::try_from(q_state).is_err()); + + // Test invalid size. + let mut q_state = create_valid_queue_state(); + // Size too small. + q_state.size = 0; + assert!(Queue::try_from(q_state).is_err()); + // Size too big. + q_state.size = u16::MAX; + assert!(Queue::try_from(q_state).is_err()); + // Size not a power of 2. + q_state.size = 15; + assert!(Queue::try_from(q_state).is_err()); + + // Test invalid desc_table. + let mut q_state = create_valid_queue_state(); + q_state.desc_table = 0xf; + assert!(Queue::try_from(q_state).is_err()); + + // Test invalid avail_ring. + let mut q_state = create_valid_queue_state(); + q_state.avail_ring = 0x1; + assert!(Queue::try_from(q_state).is_err()); + + // Test invalid used_ring. + let mut q_state = create_valid_queue_state(); + q_state.used_ring = 0x3; + assert!(Queue::try_from(q_state).is_err()); + } +} -- cgit v1.2.3