aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeongik Cha <jeongik@google.com>2023-09-14 16:36:26 +0900
committerJeongik Cha <jeongik@google.com>2023-09-27 03:10:20 +0900
commitea7c3077bda876b7967351e7047f800be3e45c1d (patch)
tree9b65c4db00f75679179e1a232d119c6f629e9145
parent056cb17499ad02cd758a44ca44005833755e222d (diff)
downloadvirtio-queue-ea7c3077bda876b7967351e7047f800be3e45c1d.tar.gz
Import virtio-queue
Bug: 277909042 Test: build Change-Id: Iff2cf057637648f5e1a67190678b83ce530edc33
-rw-r--r--.cargo_vcs_info.json6
-rw-r--r--Android.bp19
-rw-r--r--CHANGELOG.md134
-rw-r--r--Cargo.toml54
-rw-r--r--Cargo.toml.orig28
-rw-r--r--LICENSE30
-rw-r--r--LICENSE-BSD-3-Clause27
-rw-r--r--LICENSE_APACHE30
-rw-r--r--METADATA19
-rw-r--r--MODULE_LICENSE_APACHE20
-rw-r--r--MODULE_LICENSE_BSD0
-rw-r--r--OWNERS1
-rw-r--r--README.md222
-rw-r--r--benches/main.rs21
-rw-r--r--benches/queue/mod.rs85
-rw-r--r--cargo2android.json6
-rw-r--r--docs/TESTING.md29
-rw-r--r--docs/images/descriptor.pngbin0 -> 32100 bytes
-rw-r--r--docs/images/queue.pngbin0 -> 86591 bytes
-rw-r--r--src/chain.rs504
-rw-r--r--src/defs.rs38
-rw-r--r--src/descriptor.rs276
-rw-r--r--src/lib.rs267
-rw-r--r--src/mock.rs500
-rw-r--r--src/queue.rs1597
-rw-r--r--src/queue_sync.rs358
-rw-r--r--src/state.rs119
27 files changed, 4370 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..3486f5b
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,6 @@
+{
+ "git": {
+ "sha1": "854e098e4871f2ea487b1853bc21f02d6c1e8057"
+ },
+ "path_in_vcs": "crates/virtio-queue"
+} \ No newline at end of file
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..83d3ed5
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,19 @@
+// This file is generated by cargo2android.py --config cargo2android.json.
+// Do not modify this file as changes will be overridden on upgrade.
+
+
+
+rust_library_host {
+ name: "libvirtio_queue",
+ crate_name: "virtio_queue",
+ cargo_env_compat: true,
+ cargo_pkg_version: "0.9.0",
+ srcs: ["src/lib.rs"],
+ edition: "2021",
+ rustlibs: [
+ "liblog_rust",
+ "libvirtio_bindings",
+ "libvm_memory_android",
+ "libvmm_sys_util",
+ ],
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..74f074a
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,134 @@
+# v0.9.0
+
+## Changed
+- Updated vm-memory from 0.11.0 to 0.12.0.
+
+# v0.8.0
+
+## Changed
+- Terminate iterating descriptor chains that are longer than 2^32 bytes.
+- Updated vm-memory from 0.10.0 to 0.11.0.
+- Updated virtio-bindings from 0.1.0 to 0.2.0.
+
+# v0.7.1
+
+## Fixed
+- Skip indirect descriptor address alignment check, the virtio spec has
+ no alignment requirement on this, see `2.6.5.3 Indirect Descriptors`
+ and `2.7.7 Indirect Flag: Scatter-Gather Support` in virtio 1.0.
+- Update the `add_desc_chains` mock function such that it works on big endian
+ hosts as well.
+- Check that the queue is ready for processing requests when calling the
+ iterator functions. For now the checks are limited to the avail address and
+ the ready fields, but should be extended in the future to account for other
+ fields that could signal an invalid queue. This behavior can be triggered
+ by doing a `reset` followed by a `pop_descriptor_chain`.
+
+# v0.7.0
+
+## Changed
+
+- Updated vmm-sys-util from 0.10.0 to 0.11.0.
+- Updated vm-memory from 0.9.0 to 0.10.0.
+
+# v0.6.1
+
+## Fixed
+- Return an error if the number of available descriptor chains exposed by the
+ driver exceeds the queue size. This way we avoid potential hanging and
+ Denial-of-Service in the VMM, that was possible before by iterating multiple
+ times over the same chains.
+
+# v0.6.0
+
+## Added
+- Derive `Eq` for structures that derive `PartialEq`.
+
+## Changed
+- Use `add_desc_chains` in tests
+- Update dependencies: `vm-memory` from `0.8.0` to `0.9.0` and `log` from `0.4.6` to `0.4.17`.
+- Upgrade to Rust 2021 edition.
+
+# v0.5.0
+
+## Added
+- Added getters and setters for the Virtio Queue fields.
+- Added the `state` method for retrieving the `QueueState` of a `Queue`.
+
+## Fixed
+- Validate the state of the Virtio Queue when restoring from state and return errors on invalid
+ input.
+
+## Removed
+- Removed the wrapper over the Virtio Queue that was wrapping the Guest Memory. VMMs can define
+ this wrapper if needed, but this is no longer provided as part of virtio-queue crate so that the
+ naming scheme can be simplified. As a consequence, a couple of functions now receive the
+ memory as a parameter (more details in the Changed section).
+- Removed `num_added` field from the `QueueState` because this is an implementation detail of
+ the notification suppression feature and thus should not be part of the state.
+- Removed `QueueGuard` and `lock_with_memory`.
+
+## Changed
+- `QueueState` is now renamed to `Queue`.
+- `QueueStateSync` is now renamed to `QueueSync`.
+- The `QueueState` structure now represents the state of the `Queue` without any implementation
+ details. This can be used for implementing save/restore.
+- Initializing a `Queue` now returns an error in case the `max_size` is invalid.
+- The `Queue` fields are now private and can be updated only through the dedicated setters.
+- The following Queue methods now receive the memory as a parameter: `iter`, `is_valid`,
+ `add_used`, `needs_notification`, `enable_notification`, `disable_notification`, `avail_idx`,
+ `used_idx`.
+- Use the constant definition from the `virtio-queue` crate.
+
+# v0.4.0
+
+## Fixed
+- [[#173]](https://github.com/rust-vmm/vm-virtio/pull/173) Fix potential division by zero in
+ iterator when the queue size is 0.
+
+## Changed
+- [[#162]](https://github.com/rust-vmm/vm-virtio/pull/162) Added error handling in the mock
+ interface and the ability to create multiple descriptor chains for testing in order to
+ support running fuzzing.
+- [[#174]](https://github.com/rust-vmm/vm-virtio/pull/174) Updated the `avail_idx` and `used_idx`
+ documentation to specify when these functions panic.
+
+
+# v0.3.0
+
+## Added
+- [[#148]](https://github.com/rust-vmm/vm-virtio/pull/148): `QueueStateOwnedT` trait that stands
+ for queue objects which are exclusively owned and accessed by a single thread of execution.
+- [[#148]](https://github.com/rust-vmm/vm-virtio/pull/148): Added the `pop_descriptor_chain`
+ method, which can be used to consume descriptor chains from the available ring without
+ using an iterator, to `QueueStateT` and `QueueGuard`. Also added `go_to_previous_position()`
+ to `QueueGuard`, which enables decrementing the next available index by one position, which
+ effectively undoes the consumption of a descriptor chain in some use cases.
+- [[#151]](https://github.com/rust-vmm/vm-virtio/pull/151): Added `MockSplitQueue::add_desc_chain()`,
+ which places a descriptor chain at the specified offset in the descriptor table.
+- [[#153]](https://github.com/rust-vmm/vm-virtio/pull/153): Added `QueueStateT::size()` to return
+ the size of the queue.
+
+## Changed
+- The minimum version of the `vm-memory` dependency is now `v0.8.0`
+- [[#161]](https://github.com/rust-vmm/vm-virtio/pull/161): Improve the efficiency of `needs_notification`
+
+## Removed
+- [[#153]](https://github.com/rust-vmm/vm-virtio/pull/153): `#[derive(Clone)]` for `QueueState`
+
+# v0.2.0
+
+## Added
+
+- *Testing Interface*: Added the possibility to initialize a mock descriptor
+ chain from a list of descriptors.
+- Added setters and getters for the queue fields required for extending the
+ `Queue` in VMMs.
+
+## Fixed
+
+- Apply the appropriate endianness conversion on `used_idx`.
+
+# v0.1.0
+
+This is the first release of the crate.
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..7ef9e3e
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,54 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "virtio-queue"
+version = "0.9.0"
+authors = ["The Chromium OS Authors"]
+description = "virtio queue implementation"
+readme = "README.md"
+keywords = ["virtio"]
+license = "Apache-2.0 OR BSD-3-Clause"
+repository = "https://github.com/rust-vmm/vm-virtio"
+resolver = "1"
+
+[[bench]]
+name = "main"
+harness = false
+
+[dependencies.log]
+version = "0.4.17"
+
+[dependencies.virtio-bindings]
+version = "0.2.1"
+
+[dependencies.vm-memory]
+version = "0.12.0"
+
+[dependencies.vmm-sys-util]
+version = "0.11.0"
+
+[dev-dependencies.criterion]
+version = "0.3.0"
+
+[dev-dependencies.memoffset]
+version = "0.7.1"
+
+[dev-dependencies.vm-memory]
+version = "0.12.0"
+features = [
+ "backend-mmap",
+ "backend-atomic",
+]
+
+[features]
+test-utils = []
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..79ba46f
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,28 @@
+[package]
+name = "virtio-queue"
+version = "0.9.0"
+authors = ["The Chromium OS Authors"]
+description = "virtio queue implementation"
+repository = "https://github.com/rust-vmm/vm-virtio"
+keywords = ["virtio"]
+readme = "README.md"
+license = "Apache-2.0 OR BSD-3-Clause"
+edition = "2021"
+
+[features]
+test-utils = []
+
+[dependencies]
+vm-memory = "0.12.0"
+vmm-sys-util = "0.11.0"
+log = "0.4.17"
+virtio-bindings = { path="../virtio-bindings", version = "0.2.1" }
+
+[dev-dependencies]
+criterion = "0.3.0"
+vm-memory = { version = "0.12.0", features = ["backend-mmap", "backend-atomic"] }
+memoffset = "0.7.1"
+
+[[bench]]
+name = "main"
+harness = false
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..ebfb55c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
diff --git a/LICENSE-BSD-3-Clause b/LICENSE-BSD-3-Clause
new file mode 100644
index 0000000..8bafca3
--- /dev/null
+++ b/LICENSE-BSD-3-Clause
@@ -0,0 +1,27 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/LICENSE_APACHE b/LICENSE_APACHE
new file mode 100644
index 0000000..ebfb55c
--- /dev/null
+++ b/LICENSE_APACHE
@@ -0,0 +1,30 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
diff --git a/METADATA b/METADATA
new file mode 100644
index 0000000..51ab4f1
--- /dev/null
+++ b/METADATA
@@ -0,0 +1,19 @@
+name: "virtio-queue"
+description: "virtio queue implementation"
+third_party {
+ identifier {
+ type: "crates.io"
+ value: "https://crates.io/crates/virtio-queue"
+ }
+ identifier {
+ type: "Archive"
+ value: "https://static.crates.io/crates/virtio-queue/virtio-queue-0.9.0.crate"
+ }
+ version: "0.9.0"
+ license_type: NOTICE
+ last_upgrade_date {
+ year: 2023
+ month: 8
+ day: 23
+ }
+}
diff --git a/MODULE_LICENSE_APACHE2 b/MODULE_LICENSE_APACHE2
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_APACHE2
diff --git a/MODULE_LICENSE_BSD b/MODULE_LICENSE_BSD
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/MODULE_LICENSE_BSD
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000..45dc4dd
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1 @@
+include platform/prebuilts/rust:master:/OWNERS
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0adc1b1
--- /dev/null
+++ b/README.md
@@ -0,0 +1,222 @@
+# virtio-queue
+
+The `virtio-queue` crate provides a virtio device implementation for a virtio
+queue, a virtio descriptor and a chain of such descriptors.
+Two formats of virtio queues are defined in the specification: split virtqueues
+and packed virtqueues. The `virtio-queue` crate offers support only for the
+[split virtqueues](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-240006)
+format.
+The purpose of the virtio-queue API is to be consumed by virtio device
+implementations (such as the block device or vsock device).
+The main abstraction is the `Queue`. The crate is also defining a state object
+for the queue, i.e. `QueueState`.
+
+## Usage
+
+Let’s take a concrete example of how a device would work with a queue, using
+the MMIO bus.
+
+First, it is important to mention that the mandatory parts of the virtio
+interface are the following:
+
+- the device status field → provides an indication of
+ [the completed steps](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-100001)
+ of the device initialization routine,
+- the feature bits →
+ [the features](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-100001)
+ the driver/device understand(s),
+- [notifications](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-170003),
+- one or more
+ [virtqueues](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-230005)
+ → the mechanism for data transport between the driver and device.
+
+Each virtqueue consists of three parts:
+
+- Descriptor Table,
+- Available Ring,
+- Used Ring.
+
+Before booting the virtual machine (VM), the VMM does the following set up:
+
+1. initialize an array of Queues using the Queue constructor.
+2. register the device to the MMIO bus, so that the driver can later send
+ read/write requests from/to the MMIO space, some of those requests also set
+ up the queues’ state.
+3. other pre-boot configurations, such as registering a fd for the interrupt
+ assigned to the device, fd which will be later used by the device to inform
+ the driver that it has information to communicate.
+
+After the boot of the VM, the driver starts sending read/write requests to
+configure things like:
+
+* the supported features;
+* queue parameters. The following setters are used for the queue set up:
+ * `set_size` → for setting the size of the queue.
+ * `set_ready` → configure the queue to the `ready for processing` state.
+ * `set_desc_table_address`, `set_avail_ring_address`,
+ `set_used_ring_address` → configure the guest address of the constituent
+ parts of the queue.
+ * `set_event_idx` → it is called as part of the features' negotiation in
+ the `virtio-device` crate, and is enabling or disabling the
+ VIRTIO_F_RING_EVENT_IDX feature.
+* the device activation. As part of this activation, the device can also create
+ a queue handler for the device, that can be later used to process the queue.
+
+Once the queues are ready, the device can be used.
+
+The steady state operation of a virtio device follows a model where the driver
+produces descriptor chains which are consumed by the device, and both parties
+need to be notified when new elements have been placed on the associate ring to
+avoid busy polling. The precise notification mechanism is left up to the VMM
+that incorporates the devices and queues (it usually involves things like MMIO
+vm exits and interrupt injection into the guest). The queue implementation is
+agnostic to the notification mechanism in use, and it exposes methods and
+functionality (such as iterators) that are called from the outside in response
+to a notification event.
+
+### Data transmission using virtqueues
+
+The basic principle of how the queues are used by the device/driver is the
+following, as showed in the diagram below as well:
+
+1. when the guest driver has a new request (buffer), it allocates free
+ descriptor(s) for the buffer in the descriptor table, chaining as necessary.
+2. the driver adds a new entry with the head index of the descriptor chain
+ describing the request, in the available ring entries.
+3. the driver increments the `idx` with the number of new entries, the diagram
+ shows the simple use case of only one new entry.
+4. the driver sends an available buffer notification to the device if such
+ notifications are not suppressed.
+5. the device will at some point consume that request, by first reading the
+ `idx` field from the available ring. This can be directly achieved with
+ `Queue::avail_idx`, but we do not recommend to the consumers of the crate
+ to use this because it is already called behind the scenes by the iterator
+ over all available descriptor chain heads.
+6. the device gets the index of the descriptor chain(s) corresponding to the
+ read `idx` value.
+7. the device reads the corresponding descriptor(s) from the descriptor table.
+8. the device adds a new entry in the used ring by using `Queue::add_used`; the
+ entry is defined in the spec as `virtq_used_elem`, and in `virtio-queue` as
+ `VirtqUsedElem`. This structure is holding both the index of the descriptor
+ chain and the number of bytes that were written to the memory as part of
+ serving the request.
+9. the device increments the `idx` from the used ring; this is done as part of
+ the `Queue::add_used` that was mentioned above.
+10. the device sends a used buffer notification to the driver if such
+ notifications are not suppressed.
+
+![queue](https://raw.githubusercontent.com/rust-vmm/vm-virtio/main/crates/virtio-queue/docs/images/queue.png)
+
+A descriptor is storing four fields, with the first two, `addr` and `len`,
+pointing to the data in memory to which the descriptor refers, as shown in the
+diagram below. The `flags` field is useful for indicating if, for example, the
+buffer is device readable or writable, or if we have another descriptor chained
+after this one (VIRTQ_DESC_F_NEXT flag set). `next` field is storing the index
+of the next descriptor if VIRTQ_DESC_F_NEXT is set.
+
+![descriptor](https://raw.githubusercontent.com/rust-vmm/vm-virtio/main/crates/virtio-queue/docs/images/descriptor.png)
+
+**Requirements for device implementation**
+
+* Abstractions from virtio-queue such as `DescriptorChain` can be used to parse
+ descriptors provided by the device, which represent input or output memory
+ areas for device I/O. A descriptor is essentially an (address, length) pair,
+ which is subsequently used by the device model operation. We do not check the
+ validity of the descriptors, and instead expect any validations to happen
+ when the device implementation is attempting to access the corresponding
+ areas. Early checks can add non-negligible additional costs, and exclusively
+ relying upon them may lead to time-of-check-to-time-of-use race conditions.
+* The device should validate before reading/writing to a buffer that it is
+ device-readable/device-writable.
+
+## Design
+
+`QueueT` is a trait that allows different implementations for a `Queue`
+object for single-threaded context and multi-threaded context. The
+implementations provided in `virtio-queue` are:
+
+1. `Queue` → it is used for the single-threaded context.
+2. `QueueSync` → it is used for the multi-threaded context, and is simply
+ a wrapper over an `Arc<Mutex<Queue>>`.
+
+Besides the above abstractions, the `virtio-queue` crate provides also the
+following ones:
+
+* `Descriptor` → which mostly offers accessors for the members of the
+ `Descriptor`.
+* `DescriptorChain` → provides accessors for the `DescriptorChain`’s members
+ and an `Iterator` implementation for iterating over the `DescriptorChain`,
+ there is also an abstraction for iterators over just the device readable or
+ just the device writable descriptors (`DescriptorChainRwIter`).
+* `AvailIter` - is a consuming iterator over all available descriptor chain
+ heads in the queue.
+
+## Save/Restore Queue
+
+The `Queue` allows saving the state through the `state` function which returns
+a `QueueState`. `Queue` objects can be created from a previously saved state by
+using `QueueState::try_from`. The VMM should check for errors when restoring
+a `Queue` from a previously saved state.
+
+### Notification suppression
+
+A big part of the `virtio-queue` crate consists of the notification suppression
+support. As already mentioned, the driver can send an available buffer
+notification to the device when there are new entries in the available ring,
+and the device can send a used buffer notification to the driver when there are
+new entries in the used ring. There might be cases when sending a notification
+each time these scenarios happen is not efficient, for example when the driver
+is processing the used ring, it would not need to receive another used buffer
+notification. The mechanism for suppressing the notifications is detailed in
+the following sections from the specification:
+- [Used Buffer Notification Suppression](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-400007),
+- [Available Buffer Notification Suppression](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-4800010).
+
+The `Queue` abstraction is proposing the following sequence of steps for
+processing new available ring entries:
+
+1. the device first disables the notifications to make the driver aware it is
+ processing the available ring and does not want interruptions, by using
+ `Queue::disable_notification`. Notifications are disabled by the device
+ either if VIRTIO_F_EVENT_IDX is not negotiated, and VIRTQ_USED_F_NO_NOTIFY
+ is set in the `flags` field of the used ring, or if VIRTIO_F_EVENT_IDX is
+ negotiated, and `avail_event` value is not updated, i.e. it remains set to
+ the latest `idx` value of the available ring that was already notified by
+ the driver.
+2. the device processes the new entries by using the `AvailIter` iterator.
+3. the device can enable the notifications now, by using
+ `Queue::enable_notification`. Notifications are enabled by the device either
+ if VIRTIO_F_EVENT_IDX is not negotiated, and 0 is set in the `flags` field
+ of the used ring, or if VIRTIO_F_EVENT_IDX is negotiated, and `avail_event`
+ value is set to the smallest `idx` value of the available ring that was not
+ already notified by the driver. This way the device makes sure that it won’t
+ miss any notification.
+
+The above steps should be done in a loop to also handle the less likely case
+where the driver added new entries just before we re-enabled notifications.
+
+On the driver side, the `Queue` provides the `needs_notification` method which
+should be used each time the device adds a new entry to the used ring.
+Depending on the `used_event` value and on the last used value
+(`signalled_used`), `needs_notification` returns true to let the device know it
+should send a notification to the guest.
+
+## Assumptions
+
+We assume the users of the `Queue` implementation won’t attempt to use the
+queue before checking that the `ready` bit is set. This can be verified by
+calling `Queue::is_valid` which, besides this, is also checking that the three
+queue parts are valid memory regions.
+We assume consumers will use `AvailIter::go_to_previous_position` only in
+single-threaded contexts.
+We assume the users will consume the entries from the available ring in the
+recommended way from the documentation, i.e. device starts processing the
+available ring entries, disables the notifications, processes the entries,
+and then re-enables notifications.
+
+## License
+
+This project is licensed under either of
+
+- [Apache License](http://www.apache.org/licenses/LICENSE-2.0), Version 2.0
+- [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause)
diff --git a/benches/main.rs b/benches/main.rs
new file mode 100644
index 0000000..18266c4
--- /dev/null
+++ b/benches/main.rs
@@ -0,0 +1,21 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
+
+extern crate criterion;
+
+mod queue;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use queue::benchmark_queue;
+
+criterion_group! {
+ name = benches;
+ config = Criterion::default().sample_size(200).measurement_time(std::time::Duration::from_secs(20));
+ targets = benchmark_queue
+}
+
+criterion_main! {
+ benches,
+}
diff --git a/benches/queue/mod.rs b/benches/queue/mod.rs
new file mode 100644
index 0000000..2d24d49
--- /dev/null
+++ b/benches/queue/mod.rs
@@ -0,0 +1,85 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
+
+use criterion::{black_box, BatchSize, Criterion};
+use virtio_queue::{Queue, QueueOwnedT, QueueT};
+use vm_memory::{GuestAddress, GuestMemory, GuestMemoryMmap};
+
+use virtio_queue::mock::MockSplitQueue;
+
+pub fn benchmark_queue(c: &mut Criterion) {
+ fn walk_queue<M: GuestMemory>(q: &mut Queue, mem: &M) -> (usize, usize) {
+ let mut num_chains = 0;
+ let mut num_descriptors = 0;
+
+ q.iter(mem).unwrap().for_each(|chain| {
+ num_chains += 1;
+ chain.for_each(|_| num_descriptors += 1);
+ });
+
+ (num_chains, num_descriptors)
+ }
+
+ fn bench_queue<S, R>(c: &mut Criterion, bench_name: &str, setup: S, mut routine: R)
+ where
+ S: FnMut() -> Queue + Clone,
+ R: FnMut(Queue),
+ {
+ c.bench_function(bench_name, move |b| {
+ b.iter_batched(
+ setup.clone(),
+ |q| routine(black_box(q)),
+ BatchSize::SmallInput,
+ )
+ });
+ }
+
+ let mem = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x0), 0x1_0000_0000)]).unwrap();
+
+ let queue_with_chains = |num_chains, len, indirect| {
+ let mut mq = MockSplitQueue::new(&mem, 256);
+ for _ in 0..num_chains {
+ if indirect {
+ mq.add_indirect_chain(len).unwrap();
+ } else {
+ mq.add_chain(len).unwrap();
+ }
+ }
+ mq.create_queue().unwrap()
+ };
+
+ let empty_queue = || {
+ let mq = MockSplitQueue::new(&mem, 256);
+ mq.create_queue().unwrap()
+ };
+
+ for indirect in [false, true].iter().copied() {
+ bench_queue(
+ c,
+ &format!("single chain (indirect={})", indirect),
+ || queue_with_chains(1, 128, indirect),
+ |mut q| {
+ let (num_chains, num_descriptors) = walk_queue(&mut q, &mem);
+ assert_eq!(num_chains, 1);
+ assert_eq!(num_descriptors, 128);
+ },
+ );
+
+ bench_queue(
+ c,
+ &format!("multiple chains (indirect={})", indirect),
+ || queue_with_chains(128, 1, indirect),
+ |mut q| {
+ let (num_chains, num_descriptors) = walk_queue(&mut q, &mem);
+ assert_eq!(num_chains, 128);
+ assert_eq!(num_descriptors, 128);
+ },
+ );
+ }
+
+ bench_queue(c, "add used", empty_queue, |mut q| {
+ for _ in 0..128 {
+ q.add_used(&mem, 123, 0x1000).unwrap();
+ }
+ });
+}
diff --git a/cargo2android.json b/cargo2android.json
new file mode 100644
index 0000000..3c5fa7e
--- /dev/null
+++ b/cargo2android.json
@@ -0,0 +1,6 @@
+{
+ "run": true,
+ "dep-suffixes": {
+ "vm_memory": "_android"
+ }
+ } \ No newline at end of file
diff --git a/docs/TESTING.md b/docs/TESTING.md
new file mode 100644
index 0000000..6bcbb48
--- /dev/null
+++ b/docs/TESTING.md
@@ -0,0 +1,29 @@
+# Testing
+
+The `virtio-queue` crate is tested using:
+- unit tests - defined in their corresponding modules,
+- performance tests - defined in the [benches](../benches) directory. For now,
+ the benchmarks are not run as part of the CI, but they can be run locally.
+
+The crate provides a mocking framework for the driver side of a virtio queue,
+in the [mock](../src/mock.rs) module.
+This module is compiled only when using the `test-utils` feature. To run all
+the unit tests (which include the documentation examples), and the performance
+tests in this crate, you need to specify the `test-utils` feature, otherwise
+the build fails.
+
+```bash
+cargo test --features test-utils
+cargo bench --features test-utils
+cargo test --doc --features test-utils
+```
+
+The mocking framework and the helpers it provides can be used in other crates
+as well in order to test, for example, a specific device implementation. To be
+able to use these test utilities, add the following to your `Cargo.toml` in the
+`[dev-dependencies]` section:
+
+```toml
+[dev-dependencies]
+virtio-queue = { version = "0.1.0", features = ["test-utils"] }
+```
diff --git a/docs/images/descriptor.png b/docs/images/descriptor.png
new file mode 100644
index 0000000..5056751
--- /dev/null
+++ b/docs/images/descriptor.png
Binary files differ
diff --git a/docs/images/queue.png b/docs/images/queue.png
new file mode 100644
index 0000000..c3eaaf5
--- /dev/null
+++ b/docs/images/queue.png
Binary files differ
diff --git a/src/chain.rs b/src/chain.rs
new file mode 100644
index 0000000..45e0f17
--- /dev/null
+++ b/src/chain.rs
@@ -0,0 +1,504 @@
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+//
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Copyright © 2019 Intel Corporation
+//
+// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
+
+use std::fmt::{self, Debug};
+use std::mem::size_of;
+use std::ops::Deref;
+
+use vm_memory::{Address, Bytes, GuestAddress, GuestMemory};
+
+use crate::{Descriptor, Error};
+use virtio_bindings::bindings::virtio_ring::VRING_DESC_ALIGN_SIZE;
+
+/// A virtio descriptor chain.
+#[derive(Clone, Debug)]
+pub struct DescriptorChain<M> {
+ mem: M,
+ desc_table: GuestAddress,
+ queue_size: u16,
+ head_index: u16,
+ next_index: u16,
+ ttl: u16,
+ yielded_bytes: u32,
+ is_indirect: bool,
+}
+
+impl<M> DescriptorChain<M>
+where
+ M: Deref,
+ M::Target: GuestMemory,
+{
+ fn with_ttl(
+ mem: M,
+ desc_table: GuestAddress,
+ queue_size: u16,
+ ttl: u16,
+ head_index: u16,
+ ) -> Self {
+ DescriptorChain {
+ mem,
+ desc_table,
+ queue_size,
+ head_index,
+ next_index: head_index,
+ ttl,
+ is_indirect: false,
+ yielded_bytes: 0,
+ }
+ }
+
+ /// Create a new `DescriptorChain` instance.
+ ///
+ /// # Arguments
+ /// * `mem` - the `GuestMemory` object that can be used to access the buffers pointed to by the
+ /// descriptor chain.
+ /// * `desc_table` - the address of the descriptor table.
+ /// * `queue_size` - the size of the queue, which is also the maximum size of a descriptor
+ /// chain.
+ /// * `head_index` - the descriptor index of the chain head.
+ pub(crate) fn new(mem: M, desc_table: GuestAddress, queue_size: u16, head_index: u16) -> Self {
+ Self::with_ttl(mem, desc_table, queue_size, queue_size, head_index)
+ }
+
+ /// Get the descriptor index of the chain head.
+ pub fn head_index(&self) -> u16 {
+ self.head_index
+ }
+
+ /// Return a `GuestMemory` object that can be used to access the buffers pointed to by the
+ /// descriptor chain.
+ pub fn memory(&self) -> &M::Target {
+ self.mem.deref()
+ }
+
+ /// Return an iterator that only yields the readable descriptors in the chain.
+ pub fn readable(self) -> DescriptorChainRwIter<M> {
+ DescriptorChainRwIter {
+ chain: self,
+ writable: false,
+ }
+ }
+
+ /// Return an iterator that only yields the writable descriptors in the chain.
+ pub fn writable(self) -> DescriptorChainRwIter<M> {
+ DescriptorChainRwIter {
+ chain: self,
+ writable: true,
+ }
+ }
+
+ // Alters the internal state of the `DescriptorChain` to switch iterating over an
+ // indirect descriptor table defined by `desc`.
+ fn switch_to_indirect_table(&mut self, desc: Descriptor) -> Result<(), Error> {
+ // Check the VIRTQ_DESC_F_INDIRECT flag (i.e., is_indirect) is not set inside
+ // an indirect descriptor.
+ // (see VIRTIO Spec, Section 2.6.5.3.1 Driver Requirements: Indirect Descriptors)
+ if self.is_indirect {
+ return Err(Error::InvalidIndirectDescriptor);
+ }
+
+ // Alignment requirements for vring elements start from virtio 1.0,
+ // but this is not necessary for address of indirect descriptor.
+ if desc.len() & (VRING_DESC_ALIGN_SIZE - 1) != 0 {
+ return Err(Error::InvalidIndirectDescriptorTable);
+ }
+
+ // It is safe to do a plain division since we checked above that desc.len() is a multiple of
+ // VRING_DESC_ALIGN_SIZE, and VRING_DESC_ALIGN_SIZE is != 0.
+ let table_len = desc.len() / VRING_DESC_ALIGN_SIZE;
+ if table_len > u32::from(u16::MAX) {
+ return Err(Error::InvalidIndirectDescriptorTable);
+ }
+
+ self.desc_table = desc.addr();
+ // try_from cannot fail as we've checked table_len above
+ self.queue_size = u16::try_from(table_len).expect("invalid table_len");
+ self.next_index = 0;
+ self.ttl = self.queue_size;
+ self.is_indirect = true;
+
+ Ok(())
+ }
+}
+
+impl<M> Iterator for DescriptorChain<M>
+where
+ M: Deref,
+ M::Target: GuestMemory,
+{
+ type Item = Descriptor;
+
+ /// Return the next descriptor in this descriptor chain, if there is one.
+ ///
+ /// Note that this is distinct from the next descriptor chain returned by
+ /// [`AvailIter`](struct.AvailIter.html), which is the head of the next
+ /// _available_ descriptor chain.
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.ttl == 0 || self.next_index >= self.queue_size {
+ return None;
+ }
+
+ let desc_addr = self
+ .desc_table
+ // The multiplication can not overflow an u64 since we are multiplying an u16 with a
+ // small number.
+ .checked_add(self.next_index as u64 * size_of::<Descriptor>() as u64)?;
+
+ // The guest device driver should not touch the descriptor once submitted, so it's safe
+ // to use read_obj() here.
+ let desc = self.mem.read_obj::<Descriptor>(desc_addr).ok()?;
+
+ if desc.refers_to_indirect_table() {
+ self.switch_to_indirect_table(desc).ok()?;
+ return self.next();
+ }
+
+ // constructing a chain that is longer than 2^32 bytes is illegal,
+ // let's terminate the iteration if something violated this.
+ // (VIRTIO v1.2, 2.7.5.2: "Drivers MUST NOT add a descriptor chain
+ // longer than 2^32 bytes in total;")
+ match self.yielded_bytes.checked_add(desc.len()) {
+ Some(yielded_bytes) => self.yielded_bytes = yielded_bytes,
+ None => return None,
+ };
+
+ if desc.has_next() {
+ self.next_index = desc.next();
+ // It's ok to decrement `self.ttl` here because we check at the start of the method
+ // that it's greater than 0.
+ self.ttl -= 1;
+ } else {
+ self.ttl = 0;
+ }
+
+ Some(desc)
+ }
+}
+
+/// An iterator for readable or writable descriptors.
+#[derive(Clone)]
+pub struct DescriptorChainRwIter<M> {
+ chain: DescriptorChain<M>,
+ writable: bool,
+}
+
+impl<M> Iterator for DescriptorChainRwIter<M>
+where
+ M: Deref,
+ M::Target: GuestMemory,
+{
+ type Item = Descriptor;
+
+ /// Return the next readable/writeable descriptor (depending on the `writable` value) in this
+ /// descriptor chain, if there is one.
+ ///
+ /// Note that this is distinct from the next descriptor chain returned by
+ /// [`AvailIter`](struct.AvailIter.html), which is the head of the next
+ /// _available_ descriptor chain.
+ fn next(&mut self) -> Option<Self::Item> {
+ loop {
+ match self.chain.next() {
+ Some(v) => {
+ if v.is_write_only() == self.writable {
+ return Some(v);
+ }
+ }
+ None => return None,
+ }
+ }
+ }
+}
+
+// We can't derive Debug, because rustc doesn't generate the `M::T: Debug` constraint
+impl<M> Debug for DescriptorChainRwIter<M>
+where
+ M: Debug,
+{
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.debug_struct("DescriptorChainRwIter")
+ .field("chain", &self.chain)
+ .field("writable", &self.writable)
+ .finish()
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::mock::{DescriptorTable, MockSplitQueue};
+ use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT};
+ use vm_memory::GuestMemoryMmap;
+
+ #[test]
+ fn test_checked_new_descriptor_chain() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+
+ assert!(vq.end().0 < 0x1000);
+
+ // index >= queue_size
+ assert!(
+ DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 16)
+ .next()
+ .is_none()
+ );
+
+ // desc_table address is way off
+ assert!(
+ DescriptorChain::<&GuestMemoryMmap>::new(m, GuestAddress(0x00ff_ffff_ffff), 16, 0)
+ .next()
+ .is_none()
+ );
+
+ {
+ // the first desc has a normal len, and the next_descriptor flag is set
+ // but the the index of the next descriptor is too large
+ let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_NEXT as u16, 16);
+ vq.desc_table().store(0, desc).unwrap();
+
+ let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 0);
+ c.next().unwrap();
+ assert!(c.next().is_none());
+ }
+
+ // finally, let's test an ok chain
+ {
+ let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_NEXT as u16, 1);
+ vq.desc_table().store(0, desc).unwrap();
+
+ let desc = Descriptor::new(0x2000, 0x1000, 0, 0);
+ vq.desc_table().store(1, desc).unwrap();
+
+ let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), 16, 0);
+
+ assert_eq!(
+ c.memory() as *const GuestMemoryMmap,
+ m as *const GuestMemoryMmap
+ );
+
+ assert_eq!(c.desc_table, vq.start());
+ assert_eq!(c.queue_size, 16);
+ assert_eq!(c.ttl, c.queue_size);
+
+ let desc = c.next().unwrap();
+ assert_eq!(desc.addr(), GuestAddress(0x1000));
+ assert_eq!(desc.len(), 0x1000);
+ assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16);
+ assert_eq!(desc.next(), 1);
+ assert_eq!(c.ttl, c.queue_size - 1);
+
+ assert!(c.next().is_some());
+ // The descriptor above was the last from the chain, so `ttl` should be 0 now.
+ assert_eq!(c.ttl, 0);
+ assert!(c.next().is_none());
+ assert_eq!(c.ttl, 0);
+ }
+ }
+
+ #[test]
+ fn test_ttl_wrap_around() {
+ const QUEUE_SIZE: u16 = 16;
+
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x100000)]).unwrap();
+ let vq = MockSplitQueue::new(m, QUEUE_SIZE);
+
+ // Populate the entire descriptor table with entries. Only the last one should not have the
+ // VIRTQ_DESC_F_NEXT set.
+ for i in 0..QUEUE_SIZE - 1 {
+ let desc = Descriptor::new(
+ 0x1000 * (i + 1) as u64,
+ 0x1000,
+ VRING_DESC_F_NEXT as u16,
+ i + 1,
+ );
+ vq.desc_table().store(i, desc).unwrap();
+ }
+ let desc = Descriptor::new((0x1000 * 16) as u64, 0x1000, 0, 0);
+ vq.desc_table().store(QUEUE_SIZE - 1, desc).unwrap();
+
+ let mut c = DescriptorChain::<&GuestMemoryMmap>::new(m, vq.start(), QUEUE_SIZE, 0);
+ assert_eq!(c.ttl, c.queue_size);
+
+ // Validate that `ttl` wraps around even when the entire descriptor table is populated.
+ for i in 0..QUEUE_SIZE {
+ let _desc = c.next().unwrap();
+ assert_eq!(c.ttl, c.queue_size - i - 1);
+ }
+ assert!(c.next().is_none());
+ }
+
+ #[test]
+ fn test_new_from_indirect_descriptor() {
+ // This is testing that chaining an indirect table works as expected. It is also a negative
+ // test for the following requirement from the spec:
+ // `A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT in flags.`. In
+ // case the driver is setting both of these flags, we check that the device doesn't panic.
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+ let dtable = vq.desc_table();
+
+ // Create a chain with one normal descriptor and one pointing to an indirect table.
+ let desc = Descriptor::new(0x6000, 0x1000, VRING_DESC_F_NEXT as u16, 1);
+ dtable.store(0, desc).unwrap();
+ // The spec forbids setting both VIRTQ_DESC_F_INDIRECT and VIRTQ_DESC_F_NEXT in flags. We do
+ // not currently enforce this rule, we just ignore the VIRTQ_DESC_F_NEXT flag.
+ let desc = Descriptor::new(
+ 0x7000,
+ 0x1000,
+ (VRING_DESC_F_INDIRECT | VRING_DESC_F_NEXT) as u16,
+ 2,
+ );
+ dtable.store(1, desc).unwrap();
+ let desc = Descriptor::new(0x8000, 0x1000, 0, 0);
+ dtable.store(2, desc).unwrap();
+
+ let mut c: DescriptorChain<&GuestMemoryMmap> = DescriptorChain::new(m, vq.start(), 16, 0);
+
+ // create an indirect table with 4 chained descriptors
+ let idtable = DescriptorTable::new(m, GuestAddress(0x7000), 4);
+ for i in 0..4u16 {
+ let desc: Descriptor = if i < 3 {
+ Descriptor::new(0x1000 * i as u64, 0x1000, VRING_DESC_F_NEXT as u16, i + 1)
+ } else {
+ Descriptor::new(0x1000 * i as u64, 0x1000, 0, 0)
+ };
+ idtable.store(i, desc).unwrap();
+ }
+
+ assert_eq!(c.head_index(), 0);
+ // Consume the first descriptor.
+ c.next().unwrap();
+
+ // The chain logic hasn't parsed the indirect descriptor yet.
+ assert!(!c.is_indirect);
+
+ // Try to iterate through the indirect descriptor chain.
+ for i in 0..4 {
+ let desc = c.next().unwrap();
+ assert!(c.is_indirect);
+ if i < 3 {
+ assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16);
+ assert_eq!(desc.next(), i + 1);
+ }
+ }
+ // Even though we added a new descriptor after the one that is pointing to the indirect
+ // table, this descriptor won't be available when parsing the chain.
+ assert!(c.next().is_none());
+ }
+
+ #[test]
+ fn test_indirect_descriptor_address_noaligned() {
+ // Alignment requirements for vring elements start from virtio 1.0,
+ // but this is not necessary for address of indirect descriptor.
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+ let dtable = vq.desc_table();
+
+ // Create a chain with a descriptor pointing to an indirect table with unaligned address.
+ let desc = Descriptor::new(
+ 0x7001,
+ 0x1000,
+ (VRING_DESC_F_INDIRECT | VRING_DESC_F_NEXT) as u16,
+ 2,
+ );
+ dtable.store(0, desc).unwrap();
+
+ let mut c: DescriptorChain<&GuestMemoryMmap> = DescriptorChain::new(m, vq.start(), 16, 0);
+
+ // Create an indirect table with 4 chained descriptors.
+ let idtable = DescriptorTable::new(m, GuestAddress(0x7001), 4);
+ for i in 0..4u16 {
+ let desc: Descriptor = if i < 3 {
+ Descriptor::new(0x1000 * i as u64, 0x1000, VRING_DESC_F_NEXT as u16, i + 1)
+ } else {
+ Descriptor::new(0x1000 * i as u64, 0x1000, 0, 0)
+ };
+ idtable.store(i, desc).unwrap();
+ }
+
+ // Try to iterate through the indirect descriptor chain.
+ for i in 0..4 {
+ let desc = c.next().unwrap();
+ assert!(c.is_indirect);
+ if i < 3 {
+ assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16);
+ assert_eq!(desc.next(), i + 1);
+ }
+ }
+ }
+
+ #[test]
+ fn test_indirect_descriptor_err() {
+ // We are testing here different misconfigurations of the indirect table. For these error
+ // case scenarios, the iterator over the descriptor chain won't return a new descriptor.
+ {
+ let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+
+ // Create a chain with a descriptor pointing to an invalid indirect table: len not a
+ // multiple of descriptor size.
+ let desc = Descriptor::new(0x1000, 0x1001, VRING_DESC_F_INDIRECT as u16, 0);
+ vq.desc_table().store(0, desc).unwrap();
+
+ let mut c: DescriptorChain<&GuestMemoryMmap> =
+ DescriptorChain::new(m, vq.start(), 16, 0);
+
+ assert!(c.next().is_none());
+ }
+
+ {
+ let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+
+ // Create a chain with a descriptor pointing to an invalid indirect table: table len >
+ // u16::MAX.
+ let desc = Descriptor::new(
+ 0x1000,
+ (u16::MAX as u32 + 1) * VRING_DESC_ALIGN_SIZE,
+ VRING_DESC_F_INDIRECT as u16,
+ 0,
+ );
+ vq.desc_table().store(0, desc).unwrap();
+
+ let mut c: DescriptorChain<&GuestMemoryMmap> =
+ DescriptorChain::new(m, vq.start(), 16, 0);
+
+ assert!(c.next().is_none());
+ }
+
+ {
+ let m = &GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+
+ // Create a chain with a descriptor pointing to an indirect table.
+ let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_INDIRECT as u16, 0);
+ vq.desc_table().store(0, desc).unwrap();
+ // It's ok for an indirect descriptor to have flags = 0.
+ let desc = Descriptor::new(0x3000, 0x1000, 0, 0);
+ m.write_obj(desc, GuestAddress(0x1000)).unwrap();
+
+ let mut c: DescriptorChain<&GuestMemoryMmap> =
+ DescriptorChain::new(m, vq.start(), 16, 0);
+ assert!(c.next().is_some());
+
+ // But it's not allowed to have an indirect descriptor that points to another indirect
+ // table.
+ let desc = Descriptor::new(0x3000, 0x1000, VRING_DESC_F_INDIRECT as u16, 0);
+ m.write_obj(desc, GuestAddress(0x1000)).unwrap();
+
+ let mut c: DescriptorChain<&GuestMemoryMmap> =
+ DescriptorChain::new(m, vq.start(), 16, 0);
+
+ assert!(c.next().is_none());
+ }
+ }
+}
diff --git a/src/defs.rs b/src/defs.rs
new file mode 100644
index 0000000..3ef2b37
--- /dev/null
+++ b/src/defs.rs
@@ -0,0 +1,38 @@
+// Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
+
+//! Virtio queue related constant definitions
+
+/// Size of used ring header: flags (u16) + idx (u16)
+pub(crate) const VIRTQ_USED_RING_HEADER_SIZE: u64 = 4;
+
+/// Size of the used ring metadata: header + avail_event (le16).
+///
+/// The total size of the used ring is:
+/// VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size.
+pub(crate) const VIRTQ_USED_RING_META_SIZE: u64 = VIRTQ_USED_RING_HEADER_SIZE + 2;
+
+/// Size of one element in the used ring, id (le32) + len (le32).
+pub(crate) const VIRTQ_USED_ELEMENT_SIZE: u64 = 8;
+
+/// Size of available ring header: flags(u16) + idx(u16)
+pub(crate) const VIRTQ_AVAIL_RING_HEADER_SIZE: u64 = 4;
+
+/// Size of the available ring metadata: header + used_event (le16).
+///
+/// The total size of the available ring is:
+/// VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size.
+pub(crate) const VIRTQ_AVAIL_RING_META_SIZE: u64 = VIRTQ_AVAIL_RING_HEADER_SIZE + 2;
+
+/// Size of one element in the available ring (le16).
+pub(crate) const VIRTQ_AVAIL_ELEMENT_SIZE: u64 = 2;
+
+/// Default guest physical address for descriptor table.
+pub(crate) const DEFAULT_DESC_TABLE_ADDR: u64 = 0x0;
+
+/// Default guest physical address for available ring.
+pub(crate) const DEFAULT_AVAIL_RING_ADDR: u64 = 0x0;
+
+/// Default guest physical address for used ring.
+pub(crate) const DEFAULT_USED_RING_ADDR: u64 = 0x0;
diff --git a/src/descriptor.rs b/src/descriptor.rs
new file mode 100644
index 0000000..7f1564b
--- /dev/null
+++ b/src/descriptor.rs
@@ -0,0 +1,276 @@
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+//
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Copyright © 2019 Intel Corporation
+//
+// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
+
+use vm_memory::{ByteValued, GuestAddress, Le16, Le32, Le64};
+
+use virtio_bindings::bindings::virtio_ring::{
+ VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT, VRING_DESC_F_WRITE,
+};
+
+/// A virtio descriptor constraints with C representation.
+///
+/// # Example
+///
+/// ```rust
+/// # use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_NEXT, VRING_DESC_F_WRITE};
+/// # use virtio_queue::mock::MockSplitQueue;
+/// use virtio_queue::{Descriptor, Queue, QueueOwnedT};
+/// use vm_memory::{GuestAddress, GuestMemoryMmap};
+///
+/// # fn populate_queue(m: &GuestMemoryMmap) -> Queue {
+/// # let vq = MockSplitQueue::new(m, 16);
+/// # let mut q = vq.create_queue().unwrap();
+/// #
+/// # // We have only one chain: (0, 1).
+/// # let desc = Descriptor::new(0x1000, 0x1000, VRING_DESC_F_NEXT as u16, 1);
+/// # vq.desc_table().store(0, desc);
+/// # let desc = Descriptor::new(0x2000, 0x1000, VRING_DESC_F_WRITE as u16, 0);
+/// # vq.desc_table().store(1, desc);
+/// #
+/// # vq.avail().ring().ref_at(0).unwrap().store(u16::to_le(0));
+/// # vq.avail().idx().store(u16::to_le(1));
+/// # q
+/// # }
+/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+/// // Populate the queue with descriptor chains and update the available ring accordingly.
+/// let mut queue = populate_queue(m);
+/// let mut i = queue.iter(m).unwrap();
+/// let mut c = i.next().unwrap();
+///
+/// // Get the first descriptor and access its fields.
+/// let desc = c.next().unwrap();
+/// let _addr = desc.addr();
+/// let _len = desc.len();
+/// let _flags = desc.flags();
+/// let _next = desc.next();
+/// let _is_write_only = desc.is_write_only();
+/// let _has_next = desc.has_next();
+/// let _refers_to_ind_table = desc.refers_to_indirect_table();
+/// ```
+// Note that the `ByteValued` implementation of this structure expects the `Descriptor` to store
+// only plain old data types.
+#[repr(C)]
+#[derive(Default, Clone, Copy, Debug)]
+pub struct Descriptor {
+ /// Guest physical address of device specific data.
+ addr: Le64,
+
+ /// Length of device specific data.
+ len: Le32,
+
+ /// Includes next, write, and indirect bits.
+ flags: Le16,
+
+ /// Index into the descriptor table of the next descriptor if flags has the `next` bit set.
+ next: Le16,
+}
+
+#[allow(clippy::len_without_is_empty)]
+impl Descriptor {
+ /// Return the guest physical address of the descriptor buffer.
+ pub fn addr(&self) -> GuestAddress {
+ GuestAddress(self.addr.into())
+ }
+
+ /// Return the length of the descriptor buffer.
+ pub fn len(&self) -> u32 {
+ self.len.into()
+ }
+
+ /// Return the flags for this descriptor, including next, write and indirect bits.
+ pub fn flags(&self) -> u16 {
+ self.flags.into()
+ }
+
+ /// Return the value stored in the `next` field of the descriptor.
+ pub fn next(&self) -> u16 {
+ self.next.into()
+ }
+
+ /// Check whether this descriptor refers to a buffer containing an indirect descriptor table.
+ pub fn refers_to_indirect_table(&self) -> bool {
+ self.flags() & VRING_DESC_F_INDIRECT as u16 != 0
+ }
+
+ /// Check whether the `VIRTQ_DESC_F_NEXT` is set for the descriptor.
+ pub fn has_next(&self) -> bool {
+ self.flags() & VRING_DESC_F_NEXT as u16 != 0
+ }
+
+ /// Check if the driver designated this as a write only descriptor.
+ ///
+ /// If this is false, this descriptor is read only.
+ /// Write only means the the emulated device can write and the driver can read.
+ pub fn is_write_only(&self) -> bool {
+ self.flags() & VRING_DESC_F_WRITE as u16 != 0
+ }
+}
+
+#[cfg(any(test, feature = "test-utils"))]
+impl Descriptor {
+ /// Create a new descriptor.
+ ///
+ /// # Arguments
+ /// * `addr` - the guest physical address of the descriptor buffer.
+ /// * `len` - the length of the descriptor buffer.
+ /// * `flags` - the `flags` for the descriptor.
+ /// * `next` - the `next` field of the descriptor.
+ pub fn new(addr: u64, len: u32, flags: u16, next: u16) -> Self {
+ Descriptor {
+ addr: addr.into(),
+ len: len.into(),
+ flags: flags.into(),
+ next: next.into(),
+ }
+ }
+
+ /// Set the guest physical address of the descriptor buffer.
+ pub fn set_addr(&mut self, addr: u64) {
+ self.addr = addr.into();
+ }
+
+ /// Set the length of the descriptor buffer.
+ pub fn set_len(&mut self, len: u32) {
+ self.len = len.into();
+ }
+
+ /// Set the flags for this descriptor.
+ pub fn set_flags(&mut self, flags: u16) {
+ self.flags = flags.into();
+ }
+
+ /// Set the value stored in the `next` field of the descriptor.
+ pub fn set_next(&mut self, next: u16) {
+ self.next = next.into();
+ }
+}
+
+// SAFETY: This is safe because `Descriptor` contains only wrappers over POD types and
+// all accesses through safe `vm-memory` API will validate any garbage that could be
+// included in there.
+unsafe impl ByteValued for Descriptor {}
+
+/// Represents the contents of an element from the used virtqueue ring.
+// Note that the `ByteValued` implementation of this structure expects the `VirtqUsedElem` to store
+// only plain old data types.
+#[repr(C)]
+#[derive(Clone, Copy, Default, Debug)]
+pub struct VirtqUsedElem {
+ id: Le32,
+ len: Le32,
+}
+
+impl VirtqUsedElem {
+ /// Create a new `VirtqUsedElem` instance.
+ ///
+ /// # Arguments
+ /// * `id` - the index of the used descriptor chain.
+ /// * `len` - the total length of the descriptor chain which was used (written to).
+ pub(crate) fn new(id: u32, len: u32) -> Self {
+ VirtqUsedElem {
+ id: id.into(),
+ len: len.into(),
+ }
+ }
+}
+
+#[cfg(any(test, feature = "test-utils"))]
+#[allow(clippy::len_without_is_empty)]
+impl VirtqUsedElem {
+ /// Get the index of the used descriptor chain.
+ pub fn id(&self) -> u32 {
+ self.id.into()
+ }
+
+ /// Get `length` field of the used ring entry.
+ pub fn len(&self) -> u32 {
+ self.len.into()
+ }
+}
+
+// SAFETY: This is safe because `VirtqUsedElem` contains only wrappers over POD types
+// and all accesses through safe `vm-memory` API will validate any garbage that could be
+// included in there.
+unsafe impl ByteValued for VirtqUsedElem {}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use memoffset::offset_of;
+ use std::mem::{align_of, size_of};
+
+ #[test]
+ fn test_descriptor_offset() {
+ assert_eq!(size_of::<Descriptor>(), 16);
+ assert_eq!(offset_of!(Descriptor, addr), 0);
+ assert_eq!(offset_of!(Descriptor, len), 8);
+ assert_eq!(offset_of!(Descriptor, flags), 12);
+ assert_eq!(offset_of!(Descriptor, next), 14);
+ assert!(align_of::<Descriptor>() <= 16);
+ }
+
+ #[test]
+ fn test_descriptor_getter_setter() {
+ let mut desc = Descriptor::new(0, 0, 0, 0);
+
+ desc.set_addr(0x1000);
+ assert_eq!(desc.addr(), GuestAddress(0x1000));
+ desc.set_len(0x2000);
+ assert_eq!(desc.len(), 0x2000);
+ desc.set_flags(VRING_DESC_F_NEXT as u16);
+ assert_eq!(desc.flags(), VRING_DESC_F_NEXT as u16);
+ assert!(desc.has_next());
+ assert!(!desc.is_write_only());
+ assert!(!desc.refers_to_indirect_table());
+ desc.set_flags(VRING_DESC_F_WRITE as u16);
+ assert_eq!(desc.flags(), VRING_DESC_F_WRITE as u16);
+ assert!(!desc.has_next());
+ assert!(desc.is_write_only());
+ assert!(!desc.refers_to_indirect_table());
+ desc.set_flags(VRING_DESC_F_INDIRECT as u16);
+ assert_eq!(desc.flags(), VRING_DESC_F_INDIRECT as u16);
+ assert!(!desc.has_next());
+ assert!(!desc.is_write_only());
+ assert!(desc.refers_to_indirect_table());
+ desc.set_next(3);
+ assert_eq!(desc.next(), 3);
+ }
+
+ #[test]
+ fn test_descriptor_copy() {
+ let e1 = Descriptor::new(1, 2, VRING_DESC_F_NEXT as u16, 3);
+ let mut e2 = Descriptor::default();
+
+ e2.as_mut_slice().copy_from_slice(e1.as_slice());
+ assert_eq!(e1.addr(), e2.addr());
+ assert_eq!(e1.len(), e2.len());
+ assert_eq!(e1.flags(), e2.flags());
+ assert_eq!(e1.next(), e2.next());
+ }
+
+ #[test]
+ fn test_used_elem_offset() {
+ assert_eq!(offset_of!(VirtqUsedElem, id), 0);
+ assert_eq!(offset_of!(VirtqUsedElem, len), 4);
+ assert_eq!(size_of::<VirtqUsedElem>(), 8);
+ }
+
+ #[test]
+ fn test_used_elem_copy() {
+ let e1 = VirtqUsedElem::new(3, 15);
+ let mut e2 = VirtqUsedElem::new(0, 0);
+
+ e2.as_mut_slice().copy_from_slice(e1.as_slice());
+ assert_eq!(e1.id, e2.id);
+ assert_eq!(e1.len, e2.len);
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..0e27935
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,267 @@
+// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+//
+// Copyright © 2019 Intel Corporation
+//
+// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
+
+//! Virtio queue API for backend device drivers to access virtio queues.
+
+#![deny(missing_docs)]
+
+use std::fmt::{self, Debug, Display};
+use std::num::Wrapping;
+use std::ops::{Deref, DerefMut};
+use std::sync::atomic::Ordering;
+
+use log::error;
+use vm_memory::{GuestMemory, GuestMemoryError};
+
+pub use self::chain::{DescriptorChain, DescriptorChainRwIter};
+pub use self::descriptor::{Descriptor, VirtqUsedElem};
+pub use self::queue::{AvailIter, Queue};
+pub use self::queue_sync::QueueSync;
+pub use self::state::QueueState;
+
+pub mod defs;
+#[cfg(any(test, feature = "test-utils"))]
+pub mod mock;
+
+mod chain;
+mod descriptor;
+mod queue;
+mod queue_sync;
+mod state;
+
+/// Virtio Queue related errors.
+#[derive(Debug)]
+pub enum Error {
+ /// Address overflow.
+ AddressOverflow,
+ /// Failed to access guest memory.
+ GuestMemory(GuestMemoryError),
+ /// Invalid indirect descriptor.
+ InvalidIndirectDescriptor,
+ /// Invalid indirect descriptor table.
+ InvalidIndirectDescriptorTable,
+ /// Invalid descriptor chain.
+ InvalidChain,
+ /// Invalid descriptor index.
+ InvalidDescriptorIndex,
+ /// Invalid max_size.
+ InvalidMaxSize,
+ /// Invalid Queue Size.
+ InvalidSize,
+ /// Invalid alignment of descriptor table address.
+ InvalidDescTableAlign,
+ /// Invalid alignment of available ring address.
+ InvalidAvailRingAlign,
+ /// Invalid alignment of used ring address.
+ InvalidUsedRingAlign,
+ /// Invalid available ring index.
+ InvalidAvailRingIndex,
+ /// The queue is not ready for operation.
+ QueueNotReady,
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use self::Error::*;
+
+ match self {
+ AddressOverflow => write!(f, "address overflow"),
+ GuestMemory(_) => write!(f, "error accessing guest memory"),
+ InvalidChain => write!(f, "invalid descriptor chain"),
+ InvalidIndirectDescriptor => write!(f, "invalid indirect descriptor"),
+ InvalidIndirectDescriptorTable => write!(f, "invalid indirect descriptor table"),
+ InvalidDescriptorIndex => write!(f, "invalid descriptor index"),
+ InvalidMaxSize => write!(f, "invalid queue maximum size"),
+ InvalidSize => write!(f, "invalid queue size"),
+ InvalidDescTableAlign => write!(
+ f,
+ "virtio queue descriptor table breaks alignment constraints"
+ ),
+ InvalidAvailRingAlign => write!(
+ f,
+ "virtio queue available ring breaks alignment constraints"
+ ),
+ InvalidUsedRingAlign => {
+ write!(f, "virtio queue used ring breaks alignment constraints")
+ }
+ InvalidAvailRingIndex => write!(
+ f,
+ "invalid available ring index (more descriptors to process than queue size)"
+ ),
+ QueueNotReady => write!(f, "trying to process requests on a queue that's not ready"),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+/// Trait for objects returned by `QueueT::lock()`.
+pub trait QueueGuard<'a> {
+ /// Type for guard returned by `Self::lock()`.
+ type G: DerefMut<Target = Queue>;
+}
+
+/// Trait to access and manipulate a virtio queue.
+///
+/// To optimize for performance, different implementations of the `QueueT` trait may be
+/// provided for single-threaded context and multi-threaded context.
+///
+/// Using Higher-Rank Trait Bounds (HRTBs) to effectively define an associated type that has a
+/// lifetime parameter, without tagging the `QueueT` trait with a lifetime as well.
+pub trait QueueT: for<'a> QueueGuard<'a> {
+ /// Construct an empty virtio queue state object with the given `max_size`.
+ ///
+ /// Returns an error if `max_size` is invalid.
+ fn new(max_size: u16) -> Result<Self, Error>
+ where
+ Self: Sized;
+
+ /// Check whether the queue configuration is valid.
+ fn is_valid<M: GuestMemory>(&self, mem: &M) -> bool;
+
+ /// Reset the queue to the initial state.
+ fn reset(&mut self);
+
+ /// Get an exclusive reference to the underlying `Queue` object.
+ ///
+ /// Logically this method will acquire the underlying lock protecting the `Queue` Object.
+ /// The lock will be released when the returned object gets dropped.
+ fn lock(&mut self) -> <Self as QueueGuard>::G;
+
+ /// Get the maximum size of the virtio queue.
+ fn max_size(&self) -> u16;
+
+ /// Get the actual size configured by the guest.
+ fn size(&self) -> u16;
+
+ /// Configure the queue size for the virtio queue.
+ fn set_size(&mut self, size: u16);
+
+ /// Check whether the queue is ready to be processed.
+ fn ready(&self) -> bool;
+
+ /// Configure the queue to `ready for processing` state.
+ fn set_ready(&mut self, ready: bool);
+
+ /// Set the descriptor table address for the queue.
+ ///
+ /// The descriptor table address is 64-bit, the corresponding part will be updated if 'low'
+ /// and/or `high` is `Some` and valid.
+ fn set_desc_table_address(&mut self, low: Option<u32>, high: Option<u32>);
+
+ /// Set the available ring address for the queue.
+ ///
+ /// The available ring address is 64-bit, the corresponding part will be updated if 'low'
+ /// and/or `high` is `Some` and valid.
+ fn set_avail_ring_address(&mut self, low: Option<u32>, high: Option<u32>);
+
+ /// Set the used ring address for the queue.
+ ///
+ /// The used ring address is 64-bit, the corresponding part will be updated if 'low'
+ /// and/or `high` is `Some` and valid.
+ fn set_used_ring_address(&mut self, low: Option<u32>, high: Option<u32>);
+
+ /// Enable/disable the VIRTIO_F_RING_EVENT_IDX feature for interrupt coalescing.
+ fn set_event_idx(&mut self, enabled: bool);
+
+ /// Read the `idx` field from the available ring.
+ ///
+ /// # Panics
+ ///
+ /// Panics if order is Release or AcqRel.
+ fn avail_idx<M>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error>
+ where
+ M: GuestMemory + ?Sized;
+
+ /// Read the `idx` field from the used ring.
+ ///
+ /// # Panics
+ ///
+ /// Panics if order is Release or AcqRel.
+ fn used_idx<M: GuestMemory>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error>;
+
+ /// Put a used descriptor head into the used ring.
+ fn add_used<M: GuestMemory>(&mut self, mem: &M, head_index: u16, len: u32)
+ -> Result<(), Error>;
+
+ /// Enable notification events from the guest driver.
+ ///
+ /// Return true if one or more descriptors can be consumed from the available ring after
+ /// notifications were enabled (and thus it's possible there will be no corresponding
+ /// notification).
+ fn enable_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<bool, Error>;
+
+ /// Disable notification events from the guest driver.
+ fn disable_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<(), Error>;
+
+ /// Check whether a notification to the guest is needed.
+ ///
+ /// Please note this method has side effects: once it returns `true`, it considers the
+ /// driver will actually be notified, remember the associated index in the used ring, and
+ /// won't return `true` again until the driver updates `used_event` and/or the notification
+ /// conditions hold once more.
+ fn needs_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<bool, Error>;
+
+ /// Return the index of the next entry in the available ring.
+ fn next_avail(&self) -> u16;
+
+ /// Set the index of the next entry in the available ring.
+ fn set_next_avail(&mut self, next_avail: u16);
+
+ /// Return the index for the next descriptor in the used ring.
+ fn next_used(&self) -> u16;
+
+ /// Set the index for the next descriptor in the used ring.
+ fn set_next_used(&mut self, next_used: u16);
+
+ /// Return the address of the descriptor table.
+ fn desc_table(&self) -> u64;
+
+ /// Return the address of the available ring.
+ fn avail_ring(&self) -> u64;
+
+ /// Return the address of the used ring.
+ fn used_ring(&self) -> u64;
+
+ /// Checks whether `VIRTIO_F_RING_EVENT_IDX` is negotiated.
+ ///
+ /// This getter is only returning the correct value after the device passes the `FEATURES_OK`
+ /// status.
+ fn event_idx_enabled(&self) -> bool;
+
+ /// Pop and return the next available descriptor chain, or `None` when there are no more
+ /// descriptor chains available.
+ ///
+ /// This enables the consumption of available descriptor chains in a "one at a time"
+ /// manner, without having to hold a borrow after the method returns.
+ fn pop_descriptor_chain<M>(&mut self, mem: M) -> Option<DescriptorChain<M>>
+ where
+ M: Clone + Deref,
+ M::Target: GuestMemory;
+}
+
+/// Trait to access and manipulate a Virtio queue that's known to be exclusively accessed
+/// by a single execution thread.
+pub trait QueueOwnedT: QueueT {
+ /// Get a consuming iterator over all available descriptor chain heads offered by the driver.
+ ///
+ /// # Arguments
+ /// * `mem` - the `GuestMemory` object that can be used to access the queue buffers.
+ fn iter<M>(&mut self, mem: M) -> Result<AvailIter<'_, M>, Error>
+ where
+ M: Deref,
+ M::Target: GuestMemory;
+
+ /// Undo the last advancement of the next available index field by decrementing its
+ /// value by one.
+ fn go_to_previous_position(&mut self);
+}
diff --git a/src/mock.rs b/src/mock.rs
new file mode 100644
index 0000000..d026f80
--- /dev/null
+++ b/src/mock.rs
@@ -0,0 +1,500 @@
+// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
+
+//! Utilities used by unit tests and benchmarks for mocking the driver side
+//! of the virtio protocol.
+
+use std::marker::PhantomData;
+use std::mem::size_of;
+
+use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestUsize};
+
+use crate::defs::{VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE};
+use crate::{Descriptor, DescriptorChain, Error, Queue, QueueOwnedT, QueueT, VirtqUsedElem};
+use std::fmt::{self, Debug, Display};
+use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_INDIRECT, VRING_DESC_F_NEXT};
+
+/// Mock related errors.
+#[derive(Debug)]
+pub enum MockError {
+ /// Cannot create the Queue object due to invalid parameters.
+ InvalidQueueParams(Error),
+ /// Invalid Ref index
+ InvalidIndex,
+ /// Invalid next avail
+ InvalidNextAvail,
+}
+
+impl Display for MockError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use self::MockError::*;
+
+ match self {
+ InvalidQueueParams(_) => write!(f, "cannot create queue due to invalid parameter"),
+ InvalidIndex => write!(
+ f,
+ "invalid index for pointing to an address in a region when defining a Ref object"
+ ),
+ InvalidNextAvail => write!(
+ f,
+ "invalid next available descriptor chain head in the queue"
+ ),
+ }
+ }
+}
+
+impl std::error::Error for MockError {}
+
+/// Wrapper struct used for accessing a particular address of a GuestMemory area.
+pub struct Ref<'a, M, T> {
+ mem: &'a M,
+ addr: GuestAddress,
+ phantom: PhantomData<*const T>,
+}
+
+impl<'a, M: GuestMemory, T: ByteValued> Ref<'a, M, T> {
+ fn new(mem: &'a M, addr: GuestAddress) -> Self {
+ Ref {
+ mem,
+ addr,
+ phantom: PhantomData,
+ }
+ }
+
+ /// Read an object of type T from the underlying memory found at self.addr.
+ pub fn load(&self) -> T {
+ self.mem.read_obj(self.addr).unwrap()
+ }
+
+ /// Write an object of type T from the underlying memory found at self.addr.
+ pub fn store(&self, val: T) {
+ self.mem.write_obj(val, self.addr).unwrap()
+ }
+}
+
+/// Wrapper struct used for accessing a subregion of a GuestMemory area.
+pub struct ArrayRef<'a, M, T> {
+ mem: &'a M,
+ addr: GuestAddress,
+ len: usize,
+ phantom: PhantomData<*const T>,
+}
+
+impl<'a, M: GuestMemory, T: ByteValued> ArrayRef<'a, M, T> {
+ fn new(mem: &'a M, addr: GuestAddress, len: usize) -> Self {
+ ArrayRef {
+ mem,
+ addr,
+ len,
+ phantom: PhantomData,
+ }
+ }
+
+ /// Return a `Ref` object pointing to an address defined by a particular
+ /// index offset in the region.
+ pub fn ref_at(&self, index: usize) -> Result<Ref<'a, M, T>, MockError> {
+ if index >= self.len {
+ return Err(MockError::InvalidIndex);
+ }
+
+ let addr = self
+ .addr
+ .checked_add((index * size_of::<T>()) as u64)
+ .unwrap();
+
+ Ok(Ref::new(self.mem, addr))
+ }
+}
+
+/// Represents a virtio queue ring. The only difference between the used and available rings,
+/// is the ring element type.
+pub struct SplitQueueRing<'a, M, T: ByteValued> {
+ flags: Ref<'a, M, u16>,
+ // The value stored here should more precisely be a `Wrapping<u16>`, but that would require a
+ // `ByteValued` impl for this type, which is not provided in vm-memory. Implementing the trait
+ // here would require defining a wrapper for `Wrapping<u16>` and that would be too much for a
+ // mock framework that is only used in tests.
+ idx: Ref<'a, M, u16>,
+ ring: ArrayRef<'a, M, T>,
+ // `used_event` for `AvailRing`, `avail_event` for `UsedRing`.
+ event: Ref<'a, M, u16>,
+}
+
+impl<'a, M: GuestMemory, T: ByteValued> SplitQueueRing<'a, M, T> {
+ /// Create a new `SplitQueueRing` instance
+ pub fn new(mem: &'a M, base: GuestAddress, len: u16) -> Self {
+ let event_addr = base
+ .checked_add(4)
+ .and_then(|a| a.checked_add((size_of::<u16>() * len as usize) as u64))
+ .unwrap();
+
+ let split_queue_ring = SplitQueueRing {
+ flags: Ref::new(mem, base),
+ idx: Ref::new(mem, base.checked_add(2).unwrap()),
+ ring: ArrayRef::new(mem, base.checked_add(4).unwrap(), len as usize),
+ event: Ref::new(mem, event_addr),
+ };
+
+ split_queue_ring.flags.store(0);
+ split_queue_ring.idx.store(0);
+ split_queue_ring.event.store(0);
+
+ split_queue_ring
+ }
+
+ /// Return the starting address of the `SplitQueueRing`.
+ pub fn start(&self) -> GuestAddress {
+ self.ring.addr
+ }
+
+ /// Return the end address of the `SplitQueueRing`.
+ pub fn end(&self) -> GuestAddress {
+ self.start()
+ .checked_add(self.ring.len as GuestUsize)
+ .unwrap()
+ }
+
+ /// Return a reference to the idx field.
+ pub fn idx(&self) -> &Ref<'a, M, u16> {
+ &self.idx
+ }
+
+ /// Return a reference to the ring field.
+ pub fn ring(&self) -> &ArrayRef<'a, M, T> {
+ &self.ring
+ }
+}
+
+/// The available ring is used by the driver to offer buffers to the device.
+pub type AvailRing<'a, M> = SplitQueueRing<'a, M, u16>;
+/// The used ring is where the device returns buffers once it is done with them.
+pub type UsedRing<'a, M> = SplitQueueRing<'a, M, VirtqUsedElem>;
+
+/// Refers to the buffers the driver is using for the device.
+pub struct DescriptorTable<'a, M> {
+ table: ArrayRef<'a, M, Descriptor>,
+ len: u16,
+ free_descriptors: Vec<u16>,
+}
+
+impl<'a, M: GuestMemory> DescriptorTable<'a, M> {
+ /// Create a new `DescriptorTable` instance
+ pub fn new(mem: &'a M, addr: GuestAddress, len: u16) -> Self {
+ let table = ArrayRef::new(mem, addr, len as usize);
+ let free_descriptors = (0..len).rev().collect();
+
+ DescriptorTable {
+ table,
+ len,
+ free_descriptors,
+ }
+ }
+
+ /// Read one descriptor from the specified index.
+ pub fn load(&self, index: u16) -> Result<Descriptor, MockError> {
+ self.table
+ .ref_at(index as usize)
+ .map(|load_ref| load_ref.load())
+ }
+
+ /// Write one descriptor at the specified index.
+ pub fn store(&self, index: u16, value: Descriptor) -> Result<(), MockError> {
+ self.table
+ .ref_at(index as usize)
+ .map(|store_ref| store_ref.store(value))
+ }
+
+ /// Return the total size of the DescriptorTable in bytes.
+ pub fn total_size(&self) -> u64 {
+ (self.len as usize * size_of::<Descriptor>()) as u64
+ }
+
+ /// Create a chain of descriptors.
+ pub fn build_chain(&mut self, len: u16) -> Result<u16, MockError> {
+ let indices = self
+ .free_descriptors
+ .iter()
+ .copied()
+ .rev()
+ .take(usize::from(len))
+ .collect::<Vec<_>>();
+
+ assert_eq!(indices.len(), len as usize);
+
+ for (pos, index_value) in indices.iter().copied().enumerate() {
+ // Addresses and lens constant for now.
+ let mut desc = Descriptor::new(0x1000, 0x1000, 0, 0);
+
+ // It's not the last descriptor in the chain.
+ if pos < indices.len() - 1 {
+ desc.set_flags(VRING_DESC_F_NEXT as u16);
+ desc.set_next(indices[pos + 1]);
+ } else {
+ desc.set_flags(0);
+ }
+ self.store(index_value, desc)?;
+ }
+
+ Ok(indices[0])
+ }
+}
+
+trait GuestAddressExt {
+ fn align_up(&self, x: GuestUsize) -> GuestAddress;
+}
+
+impl GuestAddressExt for GuestAddress {
+ fn align_up(&self, x: GuestUsize) -> GuestAddress {
+ Self((self.0 + (x - 1)) & !(x - 1))
+ }
+}
+
+/// A mock version of the virtio queue implemented from the perspective of the driver.
+pub struct MockSplitQueue<'a, M> {
+ mem: &'a M,
+ len: u16,
+ desc_table_addr: GuestAddress,
+ desc_table: DescriptorTable<'a, M>,
+ avail_addr: GuestAddress,
+ avail: AvailRing<'a, M>,
+ used_addr: GuestAddress,
+ used: UsedRing<'a, M>,
+ indirect_addr: GuestAddress,
+}
+
+impl<'a, M: GuestMemory> MockSplitQueue<'a, M> {
+ /// Create a new `MockSplitQueue` instance with 0 as the default guest
+ /// physical starting address.
+ pub fn new(mem: &'a M, len: u16) -> Self {
+ Self::create(mem, GuestAddress(0), len)
+ }
+
+ /// Create a new `MockSplitQueue` instance.
+ pub fn create(mem: &'a M, start: GuestAddress, len: u16) -> Self {
+ const AVAIL_ALIGN: GuestUsize = 2;
+ const USED_ALIGN: GuestUsize = 4;
+
+ let desc_table_addr = start;
+ let desc_table = DescriptorTable::new(mem, desc_table_addr, len);
+
+ let avail_addr = start
+ .checked_add(16 * len as GuestUsize)
+ .unwrap()
+ .align_up(AVAIL_ALIGN);
+ let avail = AvailRing::new(mem, avail_addr, len);
+
+ let used_addr = avail.end().align_up(USED_ALIGN);
+ let used = UsedRing::new(mem, used_addr, len);
+
+ let indirect_addr = GuestAddress(0x3000_0000);
+
+ MockSplitQueue {
+ mem,
+ len,
+ desc_table_addr,
+ desc_table,
+ avail_addr,
+ avail,
+ used_addr,
+ used,
+ indirect_addr,
+ }
+ }
+
+ /// Return the starting address of the queue.
+ pub fn start(&self) -> GuestAddress {
+ self.desc_table_addr
+ }
+
+ /// Return the end address of the queue.
+ pub fn end(&self) -> GuestAddress {
+ self.used.end()
+ }
+
+ /// Descriptor table accessor.
+ pub fn desc_table(&self) -> &DescriptorTable<'a, M> {
+ &self.desc_table
+ }
+
+ /// Available ring accessor.
+ pub fn avail(&self) -> &AvailRing<M> {
+ &self.avail
+ }
+
+ /// Used ring accessor.
+ pub fn used(&self) -> &UsedRing<M> {
+ &self.used
+ }
+
+ /// Return the starting address of the descriptor table.
+ pub fn desc_table_addr(&self) -> GuestAddress {
+ self.desc_table_addr
+ }
+
+ /// Return the starting address of the available ring.
+ pub fn avail_addr(&self) -> GuestAddress {
+ self.avail_addr
+ }
+
+ /// Return the starting address of the used ring.
+ pub fn used_addr(&self) -> GuestAddress {
+ self.used_addr
+ }
+
+ fn update_avail_idx(&mut self, value: u16) -> Result<(), MockError> {
+ let avail_idx = self.avail.idx.load();
+ self.avail.ring.ref_at(avail_idx as usize)?.store(value);
+ self.avail.idx.store(avail_idx.wrapping_add(1));
+ Ok(())
+ }
+
+ fn alloc_indirect_chain(&mut self, len: u16) -> Result<GuestAddress, MockError> {
+ // To simplify things for now, we round up the table len as a multiple of 16. When this is
+ // no longer the case, we should make sure the starting address of the descriptor table
+ // we're creating below is properly aligned.
+
+ let table_len = if len % 16 == 0 {
+ len
+ } else {
+ 16 * (len / 16 + 1)
+ };
+
+ let mut table = DescriptorTable::new(self.mem, self.indirect_addr, table_len);
+ let head_decriptor_index = table.build_chain(len)?;
+ // When building indirect descriptor tables, the descriptor at index 0 is supposed to be
+ // first in the resulting chain. Just making sure our logic actually makes that happen.
+ assert_eq!(head_decriptor_index, 0);
+
+ let table_addr = self.indirect_addr;
+ self.indirect_addr = self.indirect_addr.checked_add(table.total_size()).unwrap();
+ Ok(table_addr)
+ }
+
+ /// Add a descriptor chain to the table.
+ pub fn add_chain(&mut self, len: u16) -> Result<(), MockError> {
+ self.desc_table
+ .build_chain(len)
+ .and_then(|head_idx| self.update_avail_idx(head_idx))
+ }
+
+ /// Add an indirect descriptor chain to the table.
+ pub fn add_indirect_chain(&mut self, len: u16) -> Result<(), MockError> {
+ let head_idx = self.desc_table.build_chain(1)?;
+
+ // We just allocate the indirect table and forget about it for now.
+ let indirect_addr = self.alloc_indirect_chain(len)?;
+
+ let mut desc = self.desc_table.load(head_idx)?;
+ desc.set_flags(VRING_DESC_F_INDIRECT as u16);
+ desc.set_addr(indirect_addr.raw_value());
+ desc.set_len(u32::from(len) * size_of::<Descriptor>() as u32);
+
+ self.desc_table.store(head_idx, desc)?;
+ self.update_avail_idx(head_idx)
+ }
+
+ /// Creates a new `Queue`, using the underlying memory regions represented
+ /// by the `MockSplitQueue`.
+ pub fn create_queue<Q: QueueT>(&self) -> Result<Q, Error> {
+ let mut q = Q::new(self.len)?;
+ q.set_size(self.len);
+ q.set_ready(true);
+ // we cannot directly set the u64 address, we need to compose it from low & high.
+ q.set_desc_table_address(
+ Some(self.desc_table_addr.0 as u32),
+ Some((self.desc_table_addr.0 >> 32) as u32),
+ );
+ q.set_avail_ring_address(
+ Some(self.avail_addr.0 as u32),
+ Some((self.avail_addr.0 >> 32) as u32),
+ );
+ q.set_used_ring_address(
+ Some(self.used_addr.0 as u32),
+ Some((self.used_addr.0 >> 32) as u32),
+ );
+ Ok(q)
+ }
+
+ /// Writes multiple descriptor chains to the memory object of the queue, at the beginning of
+ /// the descriptor table, and returns the first `DescriptorChain` available.
+ pub fn build_multiple_desc_chains(
+ &self,
+ descs: &[Descriptor],
+ ) -> Result<DescriptorChain<&M>, MockError> {
+ self.add_desc_chains(descs, 0)?;
+ self.create_queue::<Queue>()
+ .map_err(MockError::InvalidQueueParams)?
+ .iter(self.mem)
+ .unwrap()
+ .next()
+ .ok_or(MockError::InvalidNextAvail)
+ }
+
+ /// Writes a single descriptor chain to the memory object of the queue, at the beginning of the
+ /// descriptor table, and returns the associated `DescriptorChain` object.
+ // This method ensures the next flags and values are set properly for the desired chain, but
+ // keeps the other characteristics of the input descriptors (`addr`, `len`, other flags).
+ // TODO: make this function work with a generic queue. For now that's not possible because
+ // we cannot create the descriptor chain from an iterator as iterator is not implemented for
+ // a generic T, just for `Queue`.
+ pub fn build_desc_chain(&self, descs: &[Descriptor]) -> Result<DescriptorChain<&M>, MockError> {
+ let mut modified_descs: Vec<Descriptor> = Vec::with_capacity(descs.len());
+ for (idx, desc) in descs.iter().enumerate() {
+ let (flags, next) = if idx == descs.len() - 1 {
+ // Clear the NEXT flag if it was set. The value of the next field of the
+ // Descriptor doesn't matter at this point.
+ (desc.flags() & !VRING_DESC_F_NEXT as u16, 0)
+ } else {
+ // Ensure that the next flag is set and that we are referring the following
+ // descriptor. This ignores any value is actually present in `desc.next`.
+ (desc.flags() | VRING_DESC_F_NEXT as u16, idx as u16 + 1)
+ };
+ modified_descs.push(Descriptor::new(desc.addr().0, desc.len(), flags, next));
+ }
+ self.build_multiple_desc_chains(&modified_descs[..])
+ }
+
+ /// Adds descriptor chains to the memory object of the queue.
+ // `descs` represents a slice of `Descriptor` objects which are used to populate the chains, and
+ // `offset` is the index in the descriptor table where the chains should be added.
+ // The descriptor chain related information is written in memory starting with address 0.
+ // The `addr` fields of the input descriptors should start at a sufficiently
+ // greater location (i.e. 1MiB, or `0x10_0000`).
+ pub fn add_desc_chains(&self, descs: &[Descriptor], offset: u16) -> Result<(), MockError> {
+ let mut new_entries = 0;
+ let avail_idx: u16 = self
+ .mem
+ .read_obj::<u16>(self.avail_addr().unchecked_add(2))
+ .map(u16::from_le)
+ .unwrap();
+
+ for (idx, desc) in descs.iter().enumerate() {
+ let i = idx as u16 + offset;
+ self.desc_table().store(i, *desc)?;
+
+ if idx == 0 || descs[idx - 1].flags() & VRING_DESC_F_NEXT as u16 != 1 {
+ // Update the available ring position.
+ self.mem
+ .write_obj(
+ u16::to_le(i),
+ self.avail_addr().unchecked_add(
+ VIRTQ_AVAIL_RING_HEADER_SIZE
+ + (avail_idx + new_entries) as u64 * VIRTQ_AVAIL_ELEMENT_SIZE,
+ ),
+ )
+ .unwrap();
+ new_entries += 1;
+ }
+ }
+
+ // Increment `avail_idx`.
+ self.mem
+ .write_obj(
+ u16::to_le(avail_idx + new_entries),
+ self.avail_addr().unchecked_add(2),
+ )
+ .unwrap();
+
+ Ok(())
+ }
+}
diff --git a/src/queue.rs b/src/queue.rs
new file mode 100644
index 0000000..4a69b13
--- /dev/null
+++ b/src/queue.rs
@@ -0,0 +1,1597 @@
+// Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// Copyright (C) 2020-2021 Alibaba Cloud. All rights reserved.
+// Copyright © 2019 Intel Corporation.
+// Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE-BSD-3-Clause file.
+//
+// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
+
+use std::mem::size_of;
+use std::num::Wrapping;
+use std::ops::Deref;
+use std::sync::atomic::{fence, Ordering};
+
+use vm_memory::{Address, Bytes, GuestAddress, GuestMemory};
+
+use crate::defs::{
+ DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR,
+ VIRTQ_AVAIL_ELEMENT_SIZE, VIRTQ_AVAIL_RING_HEADER_SIZE, VIRTQ_AVAIL_RING_META_SIZE,
+ VIRTQ_USED_ELEMENT_SIZE, VIRTQ_USED_RING_HEADER_SIZE, VIRTQ_USED_RING_META_SIZE,
+};
+use crate::{
+ error, Descriptor, DescriptorChain, Error, QueueGuard, QueueOwnedT, QueueState, QueueT,
+ VirtqUsedElem,
+};
+use virtio_bindings::bindings::virtio_ring::VRING_USED_F_NO_NOTIFY;
+
+/// The maximum queue size as defined in the Virtio Spec.
+pub const MAX_QUEUE_SIZE: u16 = 32768;
+
+/// Struct to maintain information and manipulate a virtio queue.
+///
+/// # Example
+///
+/// ```rust
+/// use virtio_queue::{Queue, QueueOwnedT, QueueT};
+/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap};
+///
+/// let m = GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+/// let mut queue = Queue::new(1024).unwrap();
+///
+/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO).
+/// queue.set_size(8);
+/// queue.set_desc_table_address(Some(0x1000), None);
+/// queue.set_avail_ring_address(Some(0x2000), None);
+/// queue.set_used_ring_address(Some(0x3000), None);
+/// queue.set_event_idx(true);
+/// queue.set_ready(true);
+/// // The user should check if the queue is valid before starting to use it.
+/// assert!(queue.is_valid(&m));
+///
+/// // Here the driver would add entries in the available ring and then update the `idx` field of
+/// // the available ring (address = 0x2000 + 2).
+/// m.write_obj(3, GuestAddress(0x2002));
+///
+/// loop {
+/// queue.disable_notification(&m).unwrap();
+///
+/// // Consume entries from the available ring.
+/// while let Some(chain) = queue.iter(&m).unwrap().next() {
+/// // Process the descriptor chain, and then add an entry in the used ring and optionally
+/// // notify the driver.
+/// queue.add_used(&m, chain.head_index(), 0x100).unwrap();
+///
+/// if queue.needs_notification(&m).unwrap() {
+/// // Here we would notify the driver it has new entries in the used ring to consume.
+/// }
+/// }
+/// if !queue.enable_notification(&m).unwrap() {
+/// break;
+/// }
+/// }
+///
+/// // We can reset the queue at some point.
+/// queue.reset();
+/// // The queue should not be ready after reset.
+/// assert!(!queue.ready());
+/// ```
+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct Queue {
+ /// The maximum size in elements offered by the device.
+ max_size: u16,
+
+ /// Tail position of the available ring.
+ next_avail: Wrapping<u16>,
+
+ /// Head position of the used ring.
+ next_used: Wrapping<u16>,
+
+ /// VIRTIO_F_RING_EVENT_IDX negotiated.
+ event_idx_enabled: bool,
+
+ /// The number of descriptor chains placed in the used ring via `add_used`
+ /// since the last time `needs_notification` was called on the associated queue.
+ num_added: Wrapping<u16>,
+
+ /// The queue size in elements the driver selected.
+ size: u16,
+
+ /// Indicates if the queue is finished with configuration.
+ ready: bool,
+
+ /// Guest physical address of the descriptor table.
+ desc_table: GuestAddress,
+
+ /// Guest physical address of the available ring.
+ avail_ring: GuestAddress,
+
+ /// Guest physical address of the used ring.
+ used_ring: GuestAddress,
+}
+
+impl Queue {
+ /// Equivalent of [`QueueT::set_size`] returning an error in case of invalid size.
+ ///
+ /// This should not be directly used, as the preferred method is part of the [`QueueT`]
+ /// interface. This is a convenience function for implementing save/restore capabilities.
+ pub fn try_set_size(&mut self, size: u16) -> Result<(), Error> {
+ if size > self.max_size() || size == 0 || (size & (size - 1)) != 0 {
+ return Err(Error::InvalidSize);
+ }
+ self.size = size;
+ Ok(())
+ }
+
+ /// Tries to set the descriptor table address. In case of an invalid value, the address is
+ /// not updated.
+ ///
+ /// This should not be directly used, as the preferred method is
+ /// [`QueueT::set_desc_table_address`]. This is a convenience function for implementing
+ /// save/restore capabilities.
+ pub fn try_set_desc_table_address(&mut self, desc_table: GuestAddress) -> Result<(), Error> {
+ if desc_table.mask(0xf) != 0 {
+ return Err(Error::InvalidDescTableAlign);
+ }
+ self.desc_table = desc_table;
+
+ Ok(())
+ }
+
+ /// Tries to update the available ring address. In case of an invalid value, the address is
+ /// not updated.
+ ///
+ /// This should not be directly used, as the preferred method is
+ /// [`QueueT::set_avail_ring_address`]. This is a convenience function for implementing
+ /// save/restore capabilities.
+ pub fn try_set_avail_ring_address(&mut self, avail_ring: GuestAddress) -> Result<(), Error> {
+ if avail_ring.mask(0x1) != 0 {
+ return Err(Error::InvalidAvailRingAlign);
+ }
+ self.avail_ring = avail_ring;
+ Ok(())
+ }
+
+ /// Tries to update the used ring address. In cae of an invalid value, the address is not
+ /// updated.
+ ///
+ /// This should not be directly used, as the preferred method is
+ /// [`QueueT::set_used_ring_address`]. This is a convenience function for implementing
+ /// save/restore capabilities.
+ pub fn try_set_used_ring_address(&mut self, used_ring: GuestAddress) -> Result<(), Error> {
+ if used_ring.mask(0x3) != 0 {
+ return Err(Error::InvalidUsedRingAlign);
+ }
+ self.used_ring = used_ring;
+ Ok(())
+ }
+
+ /// Returns the state of the `Queue`.
+ ///
+ /// This is useful for implementing save/restore capabilities.
+ /// The state does not have support for serialization, but this can be
+ /// added by VMMs locally through the use of a
+ /// [remote type](https://serde.rs/remote-derive.html).
+ ///
+ /// Alternatively, a version aware and serializable/deserializable QueueState
+ /// is available in the `virtio-queue-ser` crate.
+ pub fn state(&self) -> QueueState {
+ QueueState {
+ max_size: self.max_size,
+ next_avail: self.next_avail(),
+ next_used: self.next_used(),
+ event_idx_enabled: self.event_idx_enabled,
+ size: self.size,
+ ready: self.ready,
+ desc_table: self.desc_table(),
+ avail_ring: self.avail_ring(),
+ used_ring: self.used_ring(),
+ }
+ }
+
+ // Helper method that writes `val` to the `avail_event` field of the used ring, using
+ // the provided ordering.
+ fn set_avail_event<M: GuestMemory>(
+ &self,
+ mem: &M,
+ val: u16,
+ order: Ordering,
+ ) -> Result<(), Error> {
+ // This can not overflow an u64 since it is working with relatively small numbers compared
+ // to u64::MAX.
+ let avail_event_offset =
+ VIRTQ_USED_RING_HEADER_SIZE + VIRTQ_USED_ELEMENT_SIZE * u64::from(self.size);
+ let addr = self
+ .used_ring
+ .checked_add(avail_event_offset)
+ .ok_or(Error::AddressOverflow)?;
+
+ mem.store(u16::to_le(val), addr, order)
+ .map_err(Error::GuestMemory)
+ }
+
+ // Set the value of the `flags` field of the used ring, applying the specified ordering.
+ fn set_used_flags<M: GuestMemory>(
+ &mut self,
+ mem: &M,
+ val: u16,
+ order: Ordering,
+ ) -> Result<(), Error> {
+ mem.store(u16::to_le(val), self.used_ring, order)
+ .map_err(Error::GuestMemory)
+ }
+
+ // Write the appropriate values to enable or disable notifications from the driver.
+ //
+ // Every access in this method uses `Relaxed` ordering because a fence is added by the caller
+ // when appropriate.
+ fn set_notification<M: GuestMemory>(&mut self, mem: &M, enable: bool) -> Result<(), Error> {
+ if enable {
+ if self.event_idx_enabled {
+ // We call `set_avail_event` using the `next_avail` value, instead of reading
+ // and using the current `avail_idx` to avoid missing notifications. More
+ // details in `enable_notification`.
+ self.set_avail_event(mem, self.next_avail.0, Ordering::Relaxed)
+ } else {
+ self.set_used_flags(mem, 0, Ordering::Relaxed)
+ }
+ } else if !self.event_idx_enabled {
+ self.set_used_flags(mem, VRING_USED_F_NO_NOTIFY as u16, Ordering::Relaxed)
+ } else {
+ // Notifications are effectively disabled by default after triggering once when
+ // `VIRTIO_F_EVENT_IDX` is negotiated, so we don't do anything in that case.
+ Ok(())
+ }
+ }
+
+ // Return the value present in the used_event field of the avail ring.
+ //
+ // If the VIRTIO_F_EVENT_IDX feature bit is not negotiated, the flags field in the available
+ // ring offers a crude mechanism for the driver to inform the device that it doesn’t want
+ // interrupts when buffers are used. Otherwise virtq_avail.used_event is a more performant
+ // alternative where the driver specifies how far the device can progress before interrupting.
+ //
+ // Neither of these interrupt suppression methods are reliable, as they are not synchronized
+ // with the device, but they serve as useful optimizations. So we only ensure access to the
+ // virtq_avail.used_event is atomic, but do not need to synchronize with other memory accesses.
+ fn used_event<M: GuestMemory>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error> {
+ // This can not overflow an u64 since it is working with relatively small numbers compared
+ // to u64::MAX.
+ let used_event_offset =
+ VIRTQ_AVAIL_RING_HEADER_SIZE + u64::from(self.size) * VIRTQ_AVAIL_ELEMENT_SIZE;
+ let used_event_addr = self
+ .avail_ring
+ .checked_add(used_event_offset)
+ .ok_or(Error::AddressOverflow)?;
+
+ mem.load(used_event_addr, order)
+ .map(u16::from_le)
+ .map(Wrapping)
+ .map_err(Error::GuestMemory)
+ }
+}
+
+impl<'a> QueueGuard<'a> for Queue {
+ type G = &'a mut Self;
+}
+
+impl QueueT for Queue {
+ fn new(max_size: u16) -> Result<Self, Error> {
+ // We need to check that the max size is a power of 2 because we're setting this as the
+ // queue size, and the valid queue sizes are a power of 2 as per the specification.
+ if max_size == 0 || max_size > MAX_QUEUE_SIZE || (max_size & (max_size - 1)) != 0 {
+ return Err(Error::InvalidMaxSize);
+ }
+ Ok(Queue {
+ max_size,
+ size: max_size,
+ ready: false,
+ desc_table: GuestAddress(DEFAULT_DESC_TABLE_ADDR),
+ avail_ring: GuestAddress(DEFAULT_AVAIL_RING_ADDR),
+ used_ring: GuestAddress(DEFAULT_USED_RING_ADDR),
+ next_avail: Wrapping(0),
+ next_used: Wrapping(0),
+ event_idx_enabled: false,
+ num_added: Wrapping(0),
+ })
+ }
+
+ fn is_valid<M: GuestMemory>(&self, mem: &M) -> bool {
+ let queue_size = self.size as u64;
+ let desc_table = self.desc_table;
+ // The multiplication can not overflow an u64 since we are multiplying an u16 with a
+ // small number.
+ let desc_table_size = size_of::<Descriptor>() as u64 * queue_size;
+ let avail_ring = self.avail_ring;
+ // The operations below can not overflow an u64 since they're working with relatively small
+ // numbers compared to u64::MAX.
+ let avail_ring_size = VIRTQ_AVAIL_RING_META_SIZE + VIRTQ_AVAIL_ELEMENT_SIZE * queue_size;
+ let used_ring = self.used_ring;
+ let used_ring_size = VIRTQ_USED_RING_META_SIZE + VIRTQ_USED_ELEMENT_SIZE * queue_size;
+
+ if !self.ready {
+ error!("attempt to use virtio queue that is not marked ready");
+ false
+ } else if desc_table
+ .checked_add(desc_table_size)
+ .map_or(true, |v| !mem.address_in_range(v))
+ {
+ error!(
+ "virtio queue descriptor table goes out of bounds: start:0x{:08x} size:0x{:08x}",
+ desc_table.raw_value(),
+ desc_table_size
+ );
+ false
+ } else if avail_ring
+ .checked_add(avail_ring_size)
+ .map_or(true, |v| !mem.address_in_range(v))
+ {
+ error!(
+ "virtio queue available ring goes out of bounds: start:0x{:08x} size:0x{:08x}",
+ avail_ring.raw_value(),
+ avail_ring_size
+ );
+ false
+ } else if used_ring
+ .checked_add(used_ring_size)
+ .map_or(true, |v| !mem.address_in_range(v))
+ {
+ error!(
+ "virtio queue used ring goes out of bounds: start:0x{:08x} size:0x{:08x}",
+ used_ring.raw_value(),
+ used_ring_size
+ );
+ false
+ } else {
+ true
+ }
+ }
+
+ fn reset(&mut self) {
+ self.ready = false;
+ self.size = self.max_size;
+ self.desc_table = GuestAddress(DEFAULT_DESC_TABLE_ADDR);
+ self.avail_ring = GuestAddress(DEFAULT_AVAIL_RING_ADDR);
+ self.used_ring = GuestAddress(DEFAULT_USED_RING_ADDR);
+ self.next_avail = Wrapping(0);
+ self.next_used = Wrapping(0);
+ self.num_added = Wrapping(0);
+ self.event_idx_enabled = false;
+ }
+
+ fn lock(&mut self) -> <Self as QueueGuard>::G {
+ self
+ }
+
+ fn max_size(&self) -> u16 {
+ self.max_size
+ }
+
+ fn size(&self) -> u16 {
+ self.size
+ }
+
+ fn set_size(&mut self, size: u16) {
+ if self.try_set_size(size).is_err() {
+ error!("virtio queue with invalid size: {}", size);
+ }
+ }
+
+ fn ready(&self) -> bool {
+ self.ready
+ }
+
+ fn set_ready(&mut self, ready: bool) {
+ self.ready = ready;
+ }
+
+ fn set_desc_table_address(&mut self, low: Option<u32>, high: Option<u32>) {
+ let low = low.unwrap_or(self.desc_table.0 as u32) as u64;
+ let high = high.unwrap_or((self.desc_table.0 >> 32) as u32) as u64;
+
+ let desc_table = GuestAddress((high << 32) | low);
+ if self.try_set_desc_table_address(desc_table).is_err() {
+ error!("virtio queue descriptor table breaks alignment constraints");
+ }
+ }
+
+ fn set_avail_ring_address(&mut self, low: Option<u32>, high: Option<u32>) {
+ let low = low.unwrap_or(self.avail_ring.0 as u32) as u64;
+ let high = high.unwrap_or((self.avail_ring.0 >> 32) as u32) as u64;
+
+ let avail_ring = GuestAddress((high << 32) | low);
+ if self.try_set_avail_ring_address(avail_ring).is_err() {
+ error!("virtio queue available ring breaks alignment constraints");
+ }
+ }
+
+ fn set_used_ring_address(&mut self, low: Option<u32>, high: Option<u32>) {
+ let low = low.unwrap_or(self.used_ring.0 as u32) as u64;
+ let high = high.unwrap_or((self.used_ring.0 >> 32) as u32) as u64;
+
+ let used_ring = GuestAddress((high << 32) | low);
+ if self.try_set_used_ring_address(used_ring).is_err() {
+ error!("virtio queue used ring breaks alignment constraints");
+ }
+ }
+
+ fn set_event_idx(&mut self, enabled: bool) {
+ self.event_idx_enabled = enabled;
+ }
+
+ fn avail_idx<M>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error>
+ where
+ M: GuestMemory + ?Sized,
+ {
+ let addr = self
+ .avail_ring
+ .checked_add(2)
+ .ok_or(Error::AddressOverflow)?;
+
+ mem.load(addr, order)
+ .map(u16::from_le)
+ .map(Wrapping)
+ .map_err(Error::GuestMemory)
+ }
+
+ fn used_idx<M: GuestMemory>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error> {
+ let addr = self
+ .used_ring
+ .checked_add(2)
+ .ok_or(Error::AddressOverflow)?;
+
+ mem.load(addr, order)
+ .map(u16::from_le)
+ .map(Wrapping)
+ .map_err(Error::GuestMemory)
+ }
+
+ fn add_used<M: GuestMemory>(
+ &mut self,
+ mem: &M,
+ head_index: u16,
+ len: u32,
+ ) -> Result<(), Error> {
+ if head_index >= self.size {
+ error!(
+ "attempted to add out of bounds descriptor to used ring: {}",
+ head_index
+ );
+ return Err(Error::InvalidDescriptorIndex);
+ }
+
+ let next_used_index = u64::from(self.next_used.0 % self.size);
+ // This can not overflow an u64 since it is working with relatively small numbers compared
+ // to u64::MAX.
+ let offset = VIRTQ_USED_RING_HEADER_SIZE + next_used_index * VIRTQ_USED_ELEMENT_SIZE;
+ let addr = self
+ .used_ring
+ .checked_add(offset)
+ .ok_or(Error::AddressOverflow)?;
+ mem.write_obj(VirtqUsedElem::new(head_index.into(), len), addr)
+ .map_err(Error::GuestMemory)?;
+
+ self.next_used += Wrapping(1);
+ self.num_added += Wrapping(1);
+
+ mem.store(
+ u16::to_le(self.next_used.0),
+ self.used_ring
+ .checked_add(2)
+ .ok_or(Error::AddressOverflow)?,
+ Ordering::Release,
+ )
+ .map_err(Error::GuestMemory)
+ }
+
+ // TODO: Turn this into a doc comment/example.
+ // With the current implementation, a common way of consuming entries from the available ring
+ // while also leveraging notification suppression is to use a loop, for example:
+ //
+ // loop {
+ // // We have to explicitly disable notifications if `VIRTIO_F_EVENT_IDX` has not been
+ // // negotiated.
+ // self.disable_notification()?;
+ //
+ // for chain in self.iter()? {
+ // // Do something with each chain ...
+ // // Let's assume we process all available chains here.
+ // }
+ //
+ // // If `enable_notification` returns `true`, the driver has added more entries to the
+ // // available ring.
+ // if !self.enable_notification()? {
+ // break;
+ // }
+ // }
+ fn enable_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<bool, Error> {
+ self.set_notification(mem, true)?;
+ // Ensures the following read is not reordered before any previous write operation.
+ fence(Ordering::SeqCst);
+
+ // We double check here to avoid the situation where the available ring has been updated
+ // just before we re-enabled notifications, and it's possible to miss one. We compare the
+ // current `avail_idx` value to `self.next_avail` because it's where we stopped processing
+ // entries. There are situations where we intentionally avoid processing everything in the
+ // available ring (which will cause this method to return `true`), but in that case we'll
+ // probably not re-enable notifications as we already know there are pending entries.
+ self.avail_idx(mem, Ordering::Relaxed)
+ .map(|idx| idx != self.next_avail)
+ }
+
+ fn disable_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<(), Error> {
+ self.set_notification(mem, false)
+ }
+
+ fn needs_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<bool, Error> {
+ let used_idx = self.next_used;
+
+ // Complete all the writes in add_used() before reading the event.
+ fence(Ordering::SeqCst);
+
+ // The VRING_AVAIL_F_NO_INTERRUPT flag isn't supported yet.
+
+ // When the `EVENT_IDX` feature is negotiated, the driver writes into `used_event`
+ // a value that's used by the device to determine whether a notification must
+ // be submitted after adding a descriptor chain to the used ring. According to the
+ // standard, the notification must be sent when `next_used == used_event + 1`, but
+ // various device model implementations rely on an inequality instead, most likely
+ // to also support use cases where a bunch of descriptor chains are added to the used
+ // ring first, and only afterwards the `needs_notification` logic is called. For example,
+ // the approach based on `num_added` below is taken from the Linux Kernel implementation
+ // (i.e. https://elixir.bootlin.com/linux/v5.15.35/source/drivers/virtio/virtio_ring.c#L661)
+
+ // The `old` variable below is used to determine the value of `next_used` from when
+ // `needs_notification` was called last (each `needs_notification` call resets `num_added`
+ // to zero, while each `add_used` called increments it by one). Then, the logic below
+ // uses wrapped arithmetic to see whether `used_event` can be found between `old` and
+ // `next_used` in the circular sequence space of the used ring.
+ if self.event_idx_enabled {
+ let used_event = self.used_event(mem, Ordering::Relaxed)?;
+ let old = used_idx - self.num_added;
+ self.num_added = Wrapping(0);
+
+ return Ok(used_idx - used_event - Wrapping(1) < used_idx - old);
+ }
+
+ Ok(true)
+ }
+
+ fn next_avail(&self) -> u16 {
+ self.next_avail.0
+ }
+
+ fn set_next_avail(&mut self, next_avail: u16) {
+ self.next_avail = Wrapping(next_avail);
+ }
+
+ fn next_used(&self) -> u16 {
+ self.next_used.0
+ }
+
+ fn set_next_used(&mut self, next_used: u16) {
+ self.next_used = Wrapping(next_used);
+ }
+
+ fn desc_table(&self) -> u64 {
+ self.desc_table.0
+ }
+
+ fn avail_ring(&self) -> u64 {
+ self.avail_ring.0
+ }
+
+ fn used_ring(&self) -> u64 {
+ self.used_ring.0
+ }
+
+ fn event_idx_enabled(&self) -> bool {
+ self.event_idx_enabled
+ }
+
+ fn pop_descriptor_chain<M>(&mut self, mem: M) -> Option<DescriptorChain<M>>
+ where
+ M: Clone + Deref,
+ M::Target: GuestMemory,
+ {
+ // Default, iter-based impl. Will be subsequently improved.
+ match self.iter(mem) {
+ Ok(mut iter) => iter.next(),
+ Err(e) => {
+ error!("Iterator error {}", e);
+ None
+ }
+ }
+ }
+}
+
+impl QueueOwnedT for Queue {
+ fn iter<M>(&mut self, mem: M) -> Result<AvailIter<'_, M>, Error>
+ where
+ M: Deref,
+ M::Target: GuestMemory,
+ {
+ // We're checking here that a reset did not happen without re-initializing the queue.
+ // TODO: In the future we might want to also check that the other parameters in the
+ // queue are valid.
+ if !self.ready || self.avail_ring == GuestAddress(0) {
+ return Err(Error::QueueNotReady);
+ }
+
+ self.avail_idx(mem.deref(), Ordering::Acquire)
+ .map(move |idx| AvailIter::new(mem, idx, self))?
+ }
+
+ fn go_to_previous_position(&mut self) {
+ self.next_avail -= Wrapping(1);
+ }
+}
+
+/// Consuming iterator over all available descriptor chain heads in the queue.
+///
+/// # Example
+///
+/// ```rust
+/// # use virtio_bindings::bindings::virtio_ring::{VRING_DESC_F_NEXT, VRING_DESC_F_WRITE};
+/// # use virtio_queue::mock::MockSplitQueue;
+/// use virtio_queue::{Descriptor, Queue, QueueOwnedT};
+/// use vm_memory::{GuestAddress, GuestMemoryMmap};
+///
+/// # fn populate_queue(m: &GuestMemoryMmap) -> Queue {
+/// # let vq = MockSplitQueue::new(m, 16);
+/// # let mut q: Queue = vq.create_queue().unwrap();
+/// #
+/// # // The chains are (0, 1), (2, 3, 4) and (5, 6).
+/// # let mut descs = Vec::new();
+/// # for i in 0..7 {
+/// # let flags = match i {
+/// # 1 | 6 => 0,
+/// # 2 | 5 => VRING_DESC_F_NEXT | VRING_DESC_F_WRITE,
+/// # 4 => VRING_DESC_F_WRITE,
+/// # _ => VRING_DESC_F_NEXT,
+/// # };
+/// #
+/// # descs.push(Descriptor::new((0x1000 * (i + 1)) as u64, 0x1000, flags as u16, i + 1));
+/// # }
+/// #
+/// # vq.add_desc_chains(&descs, 0).unwrap();
+/// # q
+/// # }
+/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+/// // Populate the queue with descriptor chains and update the available ring accordingly.
+/// let mut queue = populate_queue(m);
+/// let mut i = queue.iter(m).unwrap();
+///
+/// {
+/// let mut c = i.next().unwrap();
+/// let _first_head_index = c.head_index();
+/// // We should have two descriptors in the first chain.
+/// let _desc1 = c.next().unwrap();
+/// let _desc2 = c.next().unwrap();
+/// }
+///
+/// {
+/// let c = i.next().unwrap();
+/// let _second_head_index = c.head_index();
+///
+/// let mut iter = c.writable();
+/// // We should have two writable descriptors in the second chain.
+/// let _desc1 = iter.next().unwrap();
+/// let _desc2 = iter.next().unwrap();
+/// }
+///
+/// {
+/// let c = i.next().unwrap();
+/// let _third_head_index = c.head_index();
+///
+/// let mut iter = c.readable();
+/// // We should have one readable descriptor in the third chain.
+/// let _desc1 = iter.next().unwrap();
+/// }
+/// // Let's go back one position in the available ring.
+/// i.go_to_previous_position();
+/// // We should be able to access again the third descriptor chain.
+/// let c = i.next().unwrap();
+/// let _third_head_index = c.head_index();
+/// ```
+#[derive(Debug)]
+pub struct AvailIter<'b, M> {
+ mem: M,
+ desc_table: GuestAddress,
+ avail_ring: GuestAddress,
+ queue_size: u16,
+ last_index: Wrapping<u16>,
+ next_avail: &'b mut Wrapping<u16>,
+}
+
+impl<'b, M> AvailIter<'b, M>
+where
+ M: Deref,
+ M::Target: GuestMemory,
+{
+ /// Create a new instance of `AvailInter`.
+ ///
+ /// # Arguments
+ /// * `mem` - the `GuestMemory` object that can be used to access the queue buffers.
+ /// * `idx` - the index of the available ring entry where the driver would put the next
+ /// available descriptor chain.
+ /// * `queue` - the `Queue` object from which the needed data to create the `AvailIter` can
+ /// be retrieved.
+ pub(crate) fn new(mem: M, idx: Wrapping<u16>, queue: &'b mut Queue) -> Result<Self, Error> {
+ // The number of descriptor chain heads to process should always
+ // be smaller or equal to the queue size, as the driver should
+ // never ask the VMM to process a available ring entry more than
+ // once. Checking and reporting such incorrect driver behavior
+ // can prevent potential hanging and Denial-of-Service from
+ // happening on the VMM side.
+ if (idx - queue.next_avail).0 > queue.size {
+ return Err(Error::InvalidAvailRingIndex);
+ }
+
+ Ok(AvailIter {
+ mem,
+ desc_table: queue.desc_table,
+ avail_ring: queue.avail_ring,
+ queue_size: queue.size,
+ last_index: idx,
+ next_avail: &mut queue.next_avail,
+ })
+ }
+
+ /// Goes back one position in the available descriptor chain offered by the driver.
+ ///
+ /// Rust does not support bidirectional iterators. This is the only way to revert the effect
+ /// of an iterator increment on the queue.
+ ///
+ /// Note: this method assumes there's only one thread manipulating the queue, so it should only
+ /// be invoked in single-threaded context.
+ pub fn go_to_previous_position(&mut self) {
+ *self.next_avail -= Wrapping(1);
+ }
+}
+
+impl<'b, M> Iterator for AvailIter<'b, M>
+where
+ M: Clone + Deref,
+ M::Target: GuestMemory,
+{
+ type Item = DescriptorChain<M>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if *self.next_avail == self.last_index {
+ return None;
+ }
+
+ // These two operations can not overflow an u64 since they're working with relatively small
+ // numbers compared to u64::MAX.
+ let elem_off =
+ u64::from(self.next_avail.0.checked_rem(self.queue_size)?) * VIRTQ_AVAIL_ELEMENT_SIZE;
+ let offset = VIRTQ_AVAIL_RING_HEADER_SIZE + elem_off;
+
+ let addr = self.avail_ring.checked_add(offset)?;
+ let head_index: u16 = self
+ .mem
+ .load(addr, Ordering::Acquire)
+ .map(u16::from_le)
+ .map_err(|_| error!("Failed to read from memory {:x}", addr.raw_value()))
+ .ok()?;
+
+ *self.next_avail += Wrapping(1);
+
+ Some(DescriptorChain::new(
+ self.mem.clone(),
+ self.desc_table,
+ self.queue_size,
+ head_index,
+ ))
+ }
+}
+
+#[cfg(any(test, feature = "test-utils"))]
+// It is convenient for tests to implement `PartialEq`, but it is not a
+// proper implementation as `GuestMemory` errors cannot implement `PartialEq`.
+impl PartialEq for Error {
+ fn eq(&self, other: &Self) -> bool {
+ format!("{}", &self) == format!("{}", other)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::defs::{DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR};
+ use crate::mock::MockSplitQueue;
+ use crate::Descriptor;
+ use virtio_bindings::bindings::virtio_ring::{
+ VRING_DESC_F_NEXT, VRING_DESC_F_WRITE, VRING_USED_F_NO_NOTIFY,
+ };
+
+ use vm_memory::{Address, Bytes, GuestAddress, GuestMemoryMmap};
+
+ #[test]
+ fn test_queue_is_valid() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ // q is currently valid
+ assert!(q.is_valid(m));
+
+ // shouldn't be valid when not marked as ready
+ q.set_ready(false);
+ assert!(!q.ready());
+ assert!(!q.is_valid(m));
+ q.set_ready(true);
+
+ // shouldn't be allowed to set a size > max_size
+ q.set_size(q.max_size() << 1);
+ assert_eq!(q.size, q.max_size());
+
+ // or set the size to 0
+ q.set_size(0);
+ assert_eq!(q.size, q.max_size());
+
+ // or set a size which is not a power of 2
+ q.set_size(11);
+ assert_eq!(q.size, q.max_size());
+
+ // but should be allowed to set a size if 0 < size <= max_size and size is a power of two
+ q.set_size(4);
+ assert_eq!(q.size, 4);
+ q.size = q.max_size();
+
+ // shouldn't be allowed to set an address that breaks the alignment constraint
+ q.set_desc_table_address(Some(0xf), None);
+ assert_eq!(q.desc_table.0, vq.desc_table_addr().0);
+ // should be allowed to set an aligned out of bounds address
+ q.set_desc_table_address(Some(0xffff_fff0), None);
+ assert_eq!(q.desc_table.0, 0xffff_fff0);
+ // but shouldn't be valid
+ assert!(!q.is_valid(m));
+ // but should be allowed to set a valid description table address
+ q.set_desc_table_address(Some(0x10), None);
+ assert_eq!(q.desc_table.0, 0x10);
+ assert!(q.is_valid(m));
+ let addr = vq.desc_table_addr().0;
+ q.set_desc_table_address(Some(addr as u32), Some((addr >> 32) as u32));
+
+ // shouldn't be allowed to set an address that breaks the alignment constraint
+ q.set_avail_ring_address(Some(0x1), None);
+ assert_eq!(q.avail_ring.0, vq.avail_addr().0);
+ // should be allowed to set an aligned out of bounds address
+ q.set_avail_ring_address(Some(0xffff_fffe), None);
+ assert_eq!(q.avail_ring.0, 0xffff_fffe);
+ // but shouldn't be valid
+ assert!(!q.is_valid(m));
+ // but should be allowed to set a valid available ring address
+ q.set_avail_ring_address(Some(0x2), None);
+ assert_eq!(q.avail_ring.0, 0x2);
+ assert!(q.is_valid(m));
+ let addr = vq.avail_addr().0;
+ q.set_avail_ring_address(Some(addr as u32), Some((addr >> 32) as u32));
+
+ // shouldn't be allowed to set an address that breaks the alignment constraint
+ q.set_used_ring_address(Some(0x3), None);
+ assert_eq!(q.used_ring.0, vq.used_addr().0);
+ // should be allowed to set an aligned out of bounds address
+ q.set_used_ring_address(Some(0xffff_fffc), None);
+ assert_eq!(q.used_ring.0, 0xffff_fffc);
+ // but shouldn't be valid
+ assert!(!q.is_valid(m));
+ // but should be allowed to set a valid used ring address
+ q.set_used_ring_address(Some(0x4), None);
+ assert_eq!(q.used_ring.0, 0x4);
+ let addr = vq.used_addr().0;
+ q.set_used_ring_address(Some(addr as u32), Some((addr >> 32) as u32));
+ assert!(q.is_valid(m));
+ }
+
+ #[test]
+ fn test_add_used() {
+ let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(mem, 16);
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ assert_eq!(q.used_idx(mem, Ordering::Acquire).unwrap(), Wrapping(0));
+ assert_eq!(u16::from_le(vq.used().idx().load()), 0);
+
+ // index too large
+ assert!(q.add_used(mem, 16, 0x1000).is_err());
+ assert_eq!(u16::from_le(vq.used().idx().load()), 0);
+
+ // should be ok
+ q.add_used(mem, 1, 0x1000).unwrap();
+ assert_eq!(q.next_used, Wrapping(1));
+ assert_eq!(q.used_idx(mem, Ordering::Acquire).unwrap(), Wrapping(1));
+ assert_eq!(u16::from_le(vq.used().idx().load()), 1);
+
+ let x = vq.used().ring().ref_at(0).unwrap().load();
+ assert_eq!(x.id(), 1);
+ assert_eq!(x.len(), 0x1000);
+ }
+
+ #[test]
+ fn test_reset_queue() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ q.set_size(8);
+ // The address set by `MockSplitQueue` for the descriptor table is DEFAULT_DESC_TABLE_ADDR,
+ // so let's change it for testing the reset.
+ q.set_desc_table_address(Some(0x5000), None);
+ // Same for `event_idx_enabled`, `next_avail` `next_used` and `signalled_used`.
+ q.set_event_idx(true);
+ q.set_next_avail(2);
+ q.set_next_used(4);
+ q.num_added = Wrapping(15);
+ assert_eq!(q.size, 8);
+ // `create_queue` also marks the queue as ready.
+ assert!(q.ready);
+ assert_ne!(q.desc_table, GuestAddress(DEFAULT_DESC_TABLE_ADDR));
+ assert_ne!(q.avail_ring, GuestAddress(DEFAULT_AVAIL_RING_ADDR));
+ assert_ne!(q.used_ring, GuestAddress(DEFAULT_USED_RING_ADDR));
+ assert_ne!(q.next_avail, Wrapping(0));
+ assert_ne!(q.next_used, Wrapping(0));
+ assert_ne!(q.num_added, Wrapping(0));
+ assert!(q.event_idx_enabled);
+
+ q.reset();
+ assert_eq!(q.size, 16);
+ assert!(!q.ready);
+ assert_eq!(q.desc_table, GuestAddress(DEFAULT_DESC_TABLE_ADDR));
+ assert_eq!(q.avail_ring, GuestAddress(DEFAULT_AVAIL_RING_ADDR));
+ assert_eq!(q.used_ring, GuestAddress(DEFAULT_USED_RING_ADDR));
+ assert_eq!(q.next_avail, Wrapping(0));
+ assert_eq!(q.next_used, Wrapping(0));
+ assert_eq!(q.num_added, Wrapping(0));
+ assert!(!q.event_idx_enabled);
+ }
+
+ #[test]
+ fn test_needs_notification() {
+ let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let qsize = 16;
+ let vq = MockSplitQueue::new(mem, qsize);
+ let mut q: Queue = vq.create_queue().unwrap();
+ let avail_addr = vq.avail_addr();
+
+ // It should always return true when EVENT_IDX isn't enabled.
+ for i in 0..qsize {
+ q.next_used = Wrapping(i);
+ assert!(q.needs_notification(mem).unwrap());
+ }
+
+ mem.write_obj::<u16>(
+ u16::to_le(4),
+ avail_addr.unchecked_add(4 + qsize as u64 * 2),
+ )
+ .unwrap();
+ q.set_event_idx(true);
+
+ // Incrementing up to this value causes an `u16` to wrap back to 0.
+ let wrap = u32::from(u16::MAX) + 1;
+
+ for i in 0..wrap + 12 {
+ q.next_used = Wrapping(i as u16);
+ // Let's test wrapping around the maximum index value as well.
+ // `num_added` needs to be at least `1` to represent the fact that new descriptor
+ // chains have be added to the used ring since the last time `needs_notification`
+ // returned.
+ q.num_added = Wrapping(1);
+ let expected = i == 5 || i == (5 + wrap);
+ assert_eq!((q.needs_notification(mem).unwrap(), i), (expected, i));
+ }
+
+ mem.write_obj::<u16>(
+ u16::to_le(8),
+ avail_addr.unchecked_add(4 + qsize as u64 * 2),
+ )
+ .unwrap();
+
+ // Returns `false` because the current `used_event` value is behind both `next_used` and
+ // the value of `next_used` at the time when `needs_notification` last returned (which is
+ // computed based on `num_added` as described in the comments for `needs_notification`.
+ assert!(!q.needs_notification(mem).unwrap());
+
+ mem.write_obj::<u16>(
+ u16::to_le(15),
+ avail_addr.unchecked_add(4 + qsize as u64 * 2),
+ )
+ .unwrap();
+
+ q.num_added = Wrapping(1);
+ assert!(!q.needs_notification(mem).unwrap());
+
+ q.next_used = Wrapping(15);
+ q.num_added = Wrapping(1);
+ assert!(!q.needs_notification(mem).unwrap());
+
+ q.next_used = Wrapping(16);
+ q.num_added = Wrapping(1);
+ assert!(q.needs_notification(mem).unwrap());
+
+ // Calling `needs_notification` again immediately returns `false`.
+ assert!(!q.needs_notification(mem).unwrap());
+
+ mem.write_obj::<u16>(
+ u16::to_le(u16::MAX - 3),
+ avail_addr.unchecked_add(4 + qsize as u64 * 2),
+ )
+ .unwrap();
+ q.next_used = Wrapping(u16::MAX - 2);
+ q.num_added = Wrapping(1);
+ // Returns `true` because, when looking at circular sequence of indices of the used ring,
+ // the value we wrote in the `used_event` appears between the "old" value of `next_used`
+ // (i.e. `next_used` - `num_added`) and the current `next_used`, thus suggesting that we
+ // need to notify the driver.
+ assert!(q.needs_notification(mem).unwrap());
+ }
+
+ #[test]
+ fn test_enable_disable_notification() {
+ let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(mem, 16);
+
+ let mut q: Queue = vq.create_queue().unwrap();
+ let used_addr = vq.used_addr();
+
+ assert!(!q.event_idx_enabled);
+
+ q.enable_notification(mem).unwrap();
+ let v = mem.read_obj::<u16>(used_addr).map(u16::from_le).unwrap();
+ assert_eq!(v, 0);
+
+ q.disable_notification(mem).unwrap();
+ let v = mem.read_obj::<u16>(used_addr).map(u16::from_le).unwrap();
+ assert_eq!(v, VRING_USED_F_NO_NOTIFY as u16);
+
+ q.enable_notification(mem).unwrap();
+ let v = mem.read_obj::<u16>(used_addr).map(u16::from_le).unwrap();
+ assert_eq!(v, 0);
+
+ q.set_event_idx(true);
+ let avail_addr = vq.avail_addr();
+ mem.write_obj::<u16>(u16::to_le(2), avail_addr.unchecked_add(2))
+ .unwrap();
+
+ assert!(q.enable_notification(mem).unwrap());
+ q.next_avail = Wrapping(2);
+ assert!(!q.enable_notification(mem).unwrap());
+
+ mem.write_obj::<u16>(u16::to_le(8), avail_addr.unchecked_add(2))
+ .unwrap();
+
+ assert!(q.enable_notification(mem).unwrap());
+ q.next_avail = Wrapping(8);
+ assert!(!q.enable_notification(mem).unwrap());
+ }
+
+ #[test]
+ fn test_consume_chains_with_notif() {
+ let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(mem, 16);
+
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ // q is currently valid.
+ assert!(q.is_valid(mem));
+
+ // The chains are (0, 1), (2, 3, 4), (5, 6), (7, 8), (9, 10, 11, 12).
+ let mut descs = Vec::new();
+ for i in 0..13 {
+ let flags = match i {
+ 1 | 4 | 6 | 8 | 12 => 0,
+ _ => VRING_DESC_F_NEXT,
+ };
+
+ descs.push(Descriptor::new(
+ (0x1000 * (i + 1)) as u64,
+ 0x1000,
+ flags as u16,
+ i + 1,
+ ));
+ }
+
+ vq.add_desc_chains(&descs, 0).unwrap();
+ // Update the index of the chain that can be consumed to not be the last one.
+ // This enables us to consume chains in multiple iterations as opposed to consuming
+ // all the driver written chains at once.
+ vq.avail().idx().store(u16::to_le(2));
+ // No descriptor chains are consumed at this point.
+ assert_eq!(q.next_avail(), 0);
+
+ let mut i = 0;
+
+ loop {
+ i += 1;
+ q.disable_notification(mem).unwrap();
+
+ while let Some(chain) = q.iter(mem).unwrap().next() {
+ // Process the descriptor chain, and then add entries to the
+ // used ring.
+ let head_index = chain.head_index();
+ let mut desc_len = 0;
+ chain.for_each(|d| {
+ if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE {
+ desc_len += d.len();
+ }
+ });
+ q.add_used(mem, head_index, desc_len).unwrap();
+ }
+ if !q.enable_notification(mem).unwrap() {
+ break;
+ }
+ }
+ // The chains should be consumed in a single loop iteration because there's nothing updating
+ // the `idx` field of the available ring in the meantime.
+ assert_eq!(i, 1);
+ // The next chain that can be consumed should have index 2.
+ assert_eq!(q.next_avail(), 2);
+ assert_eq!(q.next_used(), 2);
+ // Let the device know it can consume one more chain.
+ vq.avail().idx().store(u16::to_le(3));
+ i = 0;
+
+ loop {
+ i += 1;
+ q.disable_notification(mem).unwrap();
+
+ while let Some(chain) = q.iter(mem).unwrap().next() {
+ // Process the descriptor chain, and then add entries to the
+ // used ring.
+ let head_index = chain.head_index();
+ let mut desc_len = 0;
+ chain.for_each(|d| {
+ if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE {
+ desc_len += d.len();
+ }
+ });
+ q.add_used(mem, head_index, desc_len).unwrap();
+ }
+
+ // For the simplicity of the test we are updating here the `idx` value of the available
+ // ring. Ideally this should be done on a separate thread.
+ // Because of this update, the loop should be iterated again to consume the new
+ // available descriptor chains.
+ vq.avail().idx().store(u16::to_le(4));
+ if !q.enable_notification(mem).unwrap() {
+ break;
+ }
+ }
+ assert_eq!(i, 2);
+ // The next chain that can be consumed should have index 4.
+ assert_eq!(q.next_avail(), 4);
+ assert_eq!(q.next_used(), 4);
+
+ // Set an `idx` that is bigger than the number of entries added in the ring.
+ // This is an allowed scenario, but the indexes of the chain will have unexpected values.
+ vq.avail().idx().store(u16::to_le(7));
+ loop {
+ q.disable_notification(mem).unwrap();
+
+ while let Some(chain) = q.iter(mem).unwrap().next() {
+ // Process the descriptor chain, and then add entries to the
+ // used ring.
+ let head_index = chain.head_index();
+ let mut desc_len = 0;
+ chain.for_each(|d| {
+ if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE {
+ desc_len += d.len();
+ }
+ });
+ q.add_used(mem, head_index, desc_len).unwrap();
+ }
+ if !q.enable_notification(mem).unwrap() {
+ break;
+ }
+ }
+ assert_eq!(q.next_avail(), 7);
+ assert_eq!(q.next_used(), 7);
+ }
+
+ #[test]
+ fn test_invalid_avail_idx() {
+ // This is a negative test for the following MUST from the spec: `A driver MUST NOT
+ // decrement the available idx on a virtqueue (ie. there is no way to “unexpose” buffers).`.
+ // We validate that for this misconfiguration, the device does not panic.
+ let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(mem, 16);
+
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ // q is currently valid.
+ assert!(q.is_valid(mem));
+
+ // The chains are (0, 1), (2, 3, 4), (5, 6).
+ let mut descs = Vec::new();
+ for i in 0..7 {
+ let flags = match i {
+ 1 | 4 | 6 => 0,
+ _ => VRING_DESC_F_NEXT,
+ };
+
+ descs.push(Descriptor::new(
+ (0x1000 * (i + 1)) as u64,
+ 0x1000,
+ flags as u16,
+ i + 1,
+ ));
+ }
+
+ vq.add_desc_chains(&descs, 0).unwrap();
+ // Let the device know it can consume chains with the index < 2.
+ vq.avail().idx().store(u16::to_le(3));
+ // No descriptor chains are consumed at this point.
+ assert_eq!(q.next_avail(), 0);
+ assert_eq!(q.next_used(), 0);
+
+ loop {
+ q.disable_notification(mem).unwrap();
+
+ while let Some(chain) = q.iter(mem).unwrap().next() {
+ // Process the descriptor chain, and then add entries to the
+ // used ring.
+ let head_index = chain.head_index();
+ let mut desc_len = 0;
+ chain.for_each(|d| {
+ if d.flags() as u32 & VRING_DESC_F_WRITE == VRING_DESC_F_WRITE {
+ desc_len += d.len();
+ }
+ });
+ q.add_used(mem, head_index, desc_len).unwrap();
+ }
+ if !q.enable_notification(mem).unwrap() {
+ break;
+ }
+ }
+ // The next chain that can be consumed should have index 3.
+ assert_eq!(q.next_avail(), 3);
+ assert_eq!(q.avail_idx(mem, Ordering::Acquire).unwrap(), Wrapping(3));
+ assert_eq!(q.next_used(), 3);
+ assert_eq!(q.used_idx(mem, Ordering::Acquire).unwrap(), Wrapping(3));
+ assert!(q.lock().ready());
+
+ // Decrement `idx` which should be forbidden. We don't enforce this thing, but we should
+ // test that we don't panic in case the driver decrements it.
+ vq.avail().idx().store(u16::to_le(1));
+ // Invalid available ring index
+ assert!(q.iter(mem).is_err());
+ }
+
+ #[test]
+ fn test_iterator_and_avail_idx() {
+ // This test ensures constructing a descriptor chain iterator succeeds
+ // with valid available ring indexes while produces an error with invalid
+ // indexes.
+ let queue_size = 2;
+ let mem = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(mem, queue_size);
+
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ // q is currently valid.
+ assert!(q.is_valid(mem));
+
+ // Create descriptors to fill up the queue
+ let mut descs = Vec::new();
+ for i in 0..queue_size {
+ descs.push(Descriptor::new(
+ (0x1000 * (i + 1)) as u64,
+ 0x1000,
+ 0_u16,
+ i + 1,
+ ));
+ }
+ vq.add_desc_chains(&descs, 0).unwrap();
+
+ // Set the 'next_available' index to 'u16:MAX' to test the wrapping scenarios
+ q.set_next_avail(u16::MAX);
+
+ // When the number of chains exposed by the driver is equal to or less than the queue
+ // size, the available ring index is valid and constructs an iterator successfully.
+ let avail_idx = Wrapping(q.next_avail()) + Wrapping(queue_size);
+ vq.avail().idx().store(u16::to_le(avail_idx.0));
+ assert!(q.iter(mem).is_ok());
+ let avail_idx = Wrapping(q.next_avail()) + Wrapping(queue_size - 1);
+ vq.avail().idx().store(u16::to_le(avail_idx.0));
+ assert!(q.iter(mem).is_ok());
+
+ // When the number of chains exposed by the driver is larger than the queue size, the
+ // available ring index is invalid and produces an error from constructing an iterator.
+ let avail_idx = Wrapping(q.next_avail()) + Wrapping(queue_size + 1);
+ vq.avail().idx().store(u16::to_le(avail_idx.0));
+ assert!(q.iter(mem).is_err());
+ }
+
+ #[test]
+ fn test_descriptor_and_iterator() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ // q is currently valid
+ assert!(q.is_valid(m));
+
+ // the chains are (0, 1), (2, 3, 4) and (5, 6)
+ let mut descs = Vec::new();
+ for j in 0..7 {
+ let flags = match j {
+ 1 | 6 => 0,
+ 2 | 5 => VRING_DESC_F_NEXT | VRING_DESC_F_WRITE,
+ 4 => VRING_DESC_F_WRITE,
+ _ => VRING_DESC_F_NEXT,
+ };
+
+ descs.push(Descriptor::new(
+ (0x1000 * (j + 1)) as u64,
+ 0x1000,
+ flags as u16,
+ j + 1,
+ ));
+ }
+
+ vq.add_desc_chains(&descs, 0).unwrap();
+
+ let mut i = q.iter(m).unwrap();
+
+ {
+ let c = i.next().unwrap();
+ assert_eq!(c.head_index(), 0);
+
+ let mut iter = c;
+ assert!(iter.next().is_some());
+ assert!(iter.next().is_some());
+ assert!(iter.next().is_none());
+ assert!(iter.next().is_none());
+ }
+
+ {
+ let c = i.next().unwrap();
+ assert_eq!(c.head_index(), 2);
+
+ let mut iter = c.writable();
+ assert!(iter.next().is_some());
+ assert!(iter.next().is_some());
+ assert!(iter.next().is_none());
+ assert!(iter.next().is_none());
+ }
+
+ {
+ let c = i.next().unwrap();
+ assert_eq!(c.head_index(), 5);
+
+ let mut iter = c.readable();
+ assert!(iter.next().is_some());
+ assert!(iter.next().is_none());
+ assert!(iter.next().is_none());
+ }
+ }
+
+ #[test]
+ fn test_iterator() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 16);
+
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ q.size = q.max_size;
+ q.desc_table = vq.desc_table_addr();
+ q.avail_ring = vq.avail_addr();
+ q.used_ring = vq.used_addr();
+ assert!(q.is_valid(m));
+
+ {
+ // an invalid queue should return an iterator with no next
+ q.ready = false;
+ assert!(q.iter(m).is_err());
+ }
+
+ q.ready = true;
+
+ // now let's create two simple descriptor chains
+ // the chains are (0, 1) and (2, 3, 4)
+ {
+ let mut descs = Vec::new();
+ for j in 0..5u16 {
+ let flags = match j {
+ 1 | 4 => 0,
+ _ => VRING_DESC_F_NEXT,
+ };
+
+ descs.push(Descriptor::new(
+ (0x1000 * (j + 1)) as u64,
+ 0x1000,
+ flags as u16,
+ j + 1,
+ ));
+ }
+ vq.add_desc_chains(&descs, 0).unwrap();
+
+ let mut i = q.iter(m).unwrap();
+
+ {
+ let mut c = i.next().unwrap();
+ assert_eq!(c.head_index(), 0);
+
+ c.next().unwrap();
+ assert!(c.next().is_some());
+ assert!(c.next().is_none());
+ assert_eq!(c.head_index(), 0);
+ }
+
+ {
+ let mut c = i.next().unwrap();
+ assert_eq!(c.head_index(), 2);
+
+ c.next().unwrap();
+ c.next().unwrap();
+ c.next().unwrap();
+ assert!(c.next().is_none());
+ assert_eq!(c.head_index(), 2);
+ }
+
+ // also test go_to_previous_position() works as expected
+ {
+ assert!(i.next().is_none());
+ i.go_to_previous_position();
+ let mut c = q.iter(m).unwrap().next().unwrap();
+ c.next().unwrap();
+ c.next().unwrap();
+ c.next().unwrap();
+ assert!(c.next().is_none());
+ }
+ }
+
+ // Test that iterating some broken descriptor chain does not exceed
+ // 2^32 bytes in total (VIRTIO spec version 1.2, 2.7.5.2:
+ // Drivers MUST NOT add a descriptor chain longer than 2^32 bytes in
+ // total)
+ {
+ let descs = vec![
+ Descriptor::new(0x1000, 0xffff_ffff, VRING_DESC_F_NEXT as u16, 1),
+ Descriptor::new(0x1000, 0x1234_5678, 0, 2),
+ ];
+ vq.add_desc_chains(&descs, 0).unwrap();
+ let mut yielded_bytes_by_iteration = 0_u32;
+ for d in q.iter(m).unwrap().next().unwrap() {
+ yielded_bytes_by_iteration = yielded_bytes_by_iteration
+ .checked_add(d.len())
+ .expect("iterator should not yield more than 2^32 bytes");
+ }
+ }
+
+ // Same as above, but test with a descriptor which is self-referential
+ {
+ let descs = vec![Descriptor::new(
+ 0x1000,
+ 0xffff_ffff,
+ VRING_DESC_F_NEXT as u16,
+ 0,
+ )];
+ vq.add_desc_chains(&descs, 0).unwrap();
+ let mut yielded_bytes_by_iteration = 0_u32;
+ for d in q.iter(m).unwrap().next().unwrap() {
+ yielded_bytes_by_iteration = yielded_bytes_by_iteration
+ .checked_add(d.len())
+ .expect("iterator should not yield more than 2^32 bytes");
+ }
+ }
+ }
+
+ #[test]
+ fn test_regression_iterator_division() {
+ // This is a regression test that tests that the iterator does not try to divide
+ // by 0 when the queue size is 0
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 1);
+ // This input was generated by the fuzzer, both for the QueueS and the Descriptor
+ let descriptors: Vec<Descriptor> = vec![Descriptor::new(
+ 14178673876262995140,
+ 3301229764,
+ 50372,
+ 50372,
+ )];
+ vq.build_desc_chain(&descriptors).unwrap();
+
+ let mut q = Queue {
+ max_size: 38,
+ next_avail: Wrapping(0),
+ next_used: Wrapping(0),
+ event_idx_enabled: false,
+ num_added: Wrapping(0),
+ size: 0,
+ ready: false,
+ desc_table: GuestAddress(12837708984796196),
+ avail_ring: GuestAddress(0),
+ used_ring: GuestAddress(9943947977301164032),
+ };
+
+ assert!(q.pop_descriptor_chain(m).is_none());
+ }
+
+ #[test]
+ fn test_setters_error_cases() {
+ assert_eq!(Queue::new(15).unwrap_err(), Error::InvalidMaxSize);
+ let mut q = Queue::new(16).unwrap();
+
+ let expected_val = q.desc_table.0;
+ assert_eq!(
+ q.try_set_desc_table_address(GuestAddress(0xf)).unwrap_err(),
+ Error::InvalidDescTableAlign
+ );
+ assert_eq!(q.desc_table(), expected_val);
+
+ let expected_val = q.avail_ring.0;
+ assert_eq!(
+ q.try_set_avail_ring_address(GuestAddress(0x1)).unwrap_err(),
+ Error::InvalidAvailRingAlign
+ );
+ assert_eq!(q.avail_ring(), expected_val);
+
+ let expected_val = q.used_ring.0;
+ assert_eq!(
+ q.try_set_used_ring_address(GuestAddress(0x3)).unwrap_err(),
+ Error::InvalidUsedRingAlign
+ );
+ assert_eq!(q.used_ring(), expected_val);
+
+ let expected_val = q.size;
+ assert_eq!(q.try_set_size(15).unwrap_err(), Error::InvalidSize);
+ assert_eq!(q.size(), expected_val)
+ }
+
+ #[test]
+ // This is a regression test for a fuzzing finding. If the driver requests a reset of the
+ // device, but then does not re-initializes the queue then a subsequent call to process
+ // a request should yield no descriptors to process. Before this fix we were processing
+ // descriptors that were added to the queue before, and we were ending up processing 255
+ // descriptors per chain.
+ fn test_regression_timeout_after_reset() {
+ // The input below was generated by libfuzzer and adapted for this test.
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x0), 0x10000)]).unwrap();
+ let vq = MockSplitQueue::new(m, 1024);
+
+ // This input below was generated by the fuzzer.
+ let descriptors: Vec<Descriptor> = vec![
+ Descriptor::new(21508325467, 0, 1, 4),
+ Descriptor::new(2097152, 4096, 3, 0),
+ Descriptor::new(18374686479672737792, 4294967295, 65535, 29),
+ Descriptor::new(76842670169653248, 1114115, 0, 0),
+ Descriptor::new(16, 983040, 126, 3),
+ Descriptor::new(897648164864, 0, 0, 0),
+ Descriptor::new(111669149722, 0, 0, 0),
+ ];
+ vq.build_multiple_desc_chains(&descriptors).unwrap();
+
+ let mut q: Queue = vq.create_queue().unwrap();
+
+ // Setting the queue to ready should not allow consuming descriptors after reset.
+ q.reset();
+ q.set_ready(true);
+ let mut counter = 0;
+ while let Some(mut desc_chain) = q.pop_descriptor_chain(m) {
+ // this empty loop is here to check that there are no side effects
+ // in terms of memory & execution time.
+ while desc_chain.next().is_some() {
+ counter += 1;
+ }
+ }
+ assert_eq!(counter, 0);
+
+ // Setting the avail_addr to valid should not allow consuming descriptors after reset.
+ q.reset();
+ q.set_avail_ring_address(Some(0x1000), None);
+ assert_eq!(q.avail_ring, GuestAddress(0x1000));
+ counter = 0;
+ while let Some(mut desc_chain) = q.pop_descriptor_chain(m) {
+ // this empty loop is here to check that there are no side effects
+ // in terms of memory & execution time.
+ while desc_chain.next().is_some() {
+ counter += 1;
+ }
+ }
+ assert_eq!(counter, 0);
+ }
+}
diff --git a/src/queue_sync.rs b/src/queue_sync.rs
new file mode 100644
index 0000000..6e666be
--- /dev/null
+++ b/src/queue_sync.rs
@@ -0,0 +1,358 @@
+// Copyright (C) 2021 Alibaba Cloud. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
+
+use std::num::Wrapping;
+use std::ops::Deref;
+use std::sync::atomic::Ordering;
+use std::sync::{Arc, Mutex, MutexGuard};
+
+use vm_memory::GuestMemory;
+
+use crate::{DescriptorChain, Error, Queue, QueueGuard, QueueT};
+
+/// Struct to maintain information and manipulate state of a virtio queue for multi-threaded
+/// context.
+///
+/// # Example
+///
+/// ```rust
+/// use virtio_queue::{Queue, QueueSync, QueueT};
+/// use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap};
+///
+/// let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+/// let mut queue = QueueSync::new(1024).unwrap();
+///
+/// // First, the driver sets up the queue; this set up is done via writes on the bus (PCI, MMIO).
+/// queue.set_size(8);
+/// queue.set_desc_table_address(Some(0x1000), None);
+/// queue.set_avail_ring_address(Some(0x2000), None);
+/// queue.set_used_ring_address(Some(0x3000), None);
+/// queue.set_ready(true);
+/// // The user should check if the queue is valid before starting to use it.
+/// assert!(queue.is_valid(m.memory()));
+///
+/// // The memory object is not embedded in the `QueueSync`, so we have to pass it as a
+/// // parameter to the methods that access the guest memory. Examples would be:
+/// queue.add_used(m.memory(), 1, 0x100).unwrap();
+/// queue.needs_notification(m.memory()).unwrap();
+/// ```
+#[derive(Clone, Debug)]
+pub struct QueueSync {
+ state: Arc<Mutex<Queue>>,
+}
+
+impl QueueSync {
+ fn lock_state(&self) -> MutexGuard<Queue> {
+ // Do not expect poisoned lock.
+ self.state.lock().unwrap()
+ }
+}
+
+impl<'a> QueueGuard<'a> for QueueSync {
+ type G = MutexGuard<'a, Queue>;
+}
+
+impl QueueT for QueueSync {
+ fn new(max_size: u16) -> Result<Self, Error> {
+ Ok(QueueSync {
+ state: Arc::new(Mutex::new(Queue::new(max_size)?)),
+ })
+ }
+
+ fn is_valid<M: GuestMemory>(&self, mem: &M) -> bool {
+ self.lock_state().is_valid(mem)
+ }
+
+ fn reset(&mut self) {
+ self.lock_state().reset();
+ }
+
+ fn lock(&mut self) -> <Self as QueueGuard>::G {
+ self.lock_state()
+ }
+
+ fn max_size(&self) -> u16 {
+ self.lock_state().max_size()
+ }
+
+ fn size(&self) -> u16 {
+ self.lock_state().size()
+ }
+
+ fn set_size(&mut self, size: u16) {
+ self.lock_state().set_size(size);
+ }
+
+ fn ready(&self) -> bool {
+ self.lock_state().ready()
+ }
+
+ fn set_ready(&mut self, ready: bool) {
+ self.lock_state().set_ready(ready)
+ }
+
+ fn set_desc_table_address(&mut self, low: Option<u32>, high: Option<u32>) {
+ self.lock_state().set_desc_table_address(low, high);
+ }
+
+ fn set_avail_ring_address(&mut self, low: Option<u32>, high: Option<u32>) {
+ self.lock_state().set_avail_ring_address(low, high);
+ }
+
+ fn set_used_ring_address(&mut self, low: Option<u32>, high: Option<u32>) {
+ self.lock_state().set_used_ring_address(low, high);
+ }
+
+ fn set_event_idx(&mut self, enabled: bool) {
+ self.lock_state().set_event_idx(enabled);
+ }
+
+ fn avail_idx<M>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error>
+ where
+ M: GuestMemory + ?Sized,
+ {
+ self.lock_state().avail_idx(mem, order)
+ }
+
+ fn used_idx<M: GuestMemory>(&self, mem: &M, order: Ordering) -> Result<Wrapping<u16>, Error> {
+ self.lock_state().used_idx(mem, order)
+ }
+
+ fn add_used<M: GuestMemory>(
+ &mut self,
+ mem: &M,
+ head_index: u16,
+ len: u32,
+ ) -> Result<(), Error> {
+ self.lock_state().add_used(mem, head_index, len)
+ }
+
+ fn enable_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<bool, Error> {
+ self.lock_state().enable_notification(mem)
+ }
+
+ fn disable_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<(), Error> {
+ self.lock_state().disable_notification(mem)
+ }
+
+ fn needs_notification<M: GuestMemory>(&mut self, mem: &M) -> Result<bool, Error> {
+ self.lock_state().needs_notification(mem)
+ }
+
+ fn next_avail(&self) -> u16 {
+ self.lock_state().next_avail()
+ }
+
+ fn set_next_avail(&mut self, next_avail: u16) {
+ self.lock_state().set_next_avail(next_avail);
+ }
+
+ fn next_used(&self) -> u16 {
+ self.lock_state().next_used()
+ }
+
+ fn set_next_used(&mut self, next_used: u16) {
+ self.lock_state().set_next_used(next_used);
+ }
+
+ fn desc_table(&self) -> u64 {
+ self.lock_state().desc_table()
+ }
+
+ fn avail_ring(&self) -> u64 {
+ self.lock_state().avail_ring()
+ }
+
+ fn used_ring(&self) -> u64 {
+ self.lock_state().used_ring()
+ }
+
+ fn event_idx_enabled(&self) -> bool {
+ self.lock_state().event_idx_enabled()
+ }
+
+ fn pop_descriptor_chain<M>(&mut self, mem: M) -> Option<DescriptorChain<M>>
+ where
+ M: Clone + Deref,
+ M::Target: GuestMemory,
+ {
+ self.lock_state().pop_descriptor_chain(mem)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::defs::{DEFAULT_AVAIL_RING_ADDR, DEFAULT_DESC_TABLE_ADDR, DEFAULT_USED_RING_ADDR};
+ use std::sync::Barrier;
+ use virtio_bindings::bindings::virtio_ring::VRING_USED_F_NO_NOTIFY;
+ use vm_memory::{Address, Bytes, GuestAddress, GuestAddressSpace, GuestMemoryMmap};
+
+ #[test]
+ fn test_queue_state_sync() {
+ let mut q = QueueSync::new(0x1000).unwrap();
+ let mut q2 = q.clone();
+ let q3 = q.clone();
+ let barrier = Arc::new(Barrier::new(3));
+ let b2 = barrier.clone();
+ let b3 = barrier.clone();
+
+ let t1 = std::thread::spawn(move || {
+ {
+ let guard = q2.lock();
+ assert!(!guard.ready());
+ }
+ b2.wait();
+ b2.wait();
+ {
+ let guard = q2.lock();
+ assert!(guard.ready());
+ }
+ });
+
+ let t2 = std::thread::spawn(move || {
+ assert!(!q3.ready());
+ b3.wait();
+ b3.wait();
+ assert!(q3.ready());
+ });
+
+ barrier.wait();
+ q.set_ready(true);
+ barrier.wait();
+
+ t1.join().unwrap();
+ t2.join().unwrap();
+ }
+
+ #[test]
+ fn test_state_sync_add_used() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let mut q = QueueSync::new(0x100).unwrap();
+
+ q.set_desc_table_address(Some(0x1000), None);
+ q.set_avail_ring_address(Some(0x2000), None);
+ q.set_used_ring_address(Some(0x3000), None);
+ q.set_event_idx(true);
+ q.set_ready(true);
+ assert!(q.is_valid(m.memory()));
+ assert_eq!(q.lock().size(), 0x100);
+
+ assert_eq!(q.max_size(), 0x100);
+ assert_eq!(q.size(), 0x100);
+ q.set_size(0x80);
+ assert_eq!(q.size(), 0x80);
+ assert_eq!(q.max_size(), 0x100);
+ q.set_next_avail(5);
+ assert_eq!(q.next_avail(), 5);
+ q.set_next_used(3);
+ assert_eq!(q.next_used(), 3);
+ assert_eq!(
+ q.avail_idx(m.memory(), Ordering::Acquire).unwrap(),
+ Wrapping(0)
+ );
+ assert_eq!(
+ q.used_idx(m.memory(), Ordering::Acquire).unwrap(),
+ Wrapping(0)
+ );
+
+ assert_eq!(q.next_used(), 3);
+
+ // index too large
+ assert!(q.add_used(m.memory(), 0x200, 0x1000).is_err());
+ assert_eq!(q.next_used(), 3);
+
+ // should be ok
+ q.add_used(m.memory(), 1, 0x1000).unwrap();
+ assert_eq!(q.next_used(), 4);
+ assert_eq!(
+ q.used_idx(m.memory(), Ordering::Acquire).unwrap(),
+ Wrapping(4)
+ );
+ }
+
+ #[test]
+ fn test_sync_state_reset_queue() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let mut q = QueueSync::new(0x100).unwrap();
+
+ q.set_desc_table_address(Some(0x1000), None);
+ q.set_avail_ring_address(Some(0x2000), None);
+ q.set_used_ring_address(Some(0x3000), None);
+ q.set_event_idx(true);
+ q.set_next_avail(2);
+ q.set_next_used(2);
+ q.set_size(0x8);
+ q.set_ready(true);
+ assert!(q.is_valid(m.memory()));
+
+ q.needs_notification(m.memory()).unwrap();
+
+ assert_eq!(q.lock_state().size(), 0x8);
+ assert!(q.lock_state().ready());
+ assert_ne!(q.lock_state().desc_table(), DEFAULT_DESC_TABLE_ADDR);
+ assert_ne!(q.lock_state().avail_ring(), DEFAULT_AVAIL_RING_ADDR);
+ assert_ne!(q.lock_state().used_ring(), DEFAULT_USED_RING_ADDR);
+ assert_ne!(q.lock_state().next_avail(), 0);
+ assert_ne!(q.lock_state().next_used(), 0);
+ assert!(q.lock_state().event_idx_enabled());
+
+ q.reset();
+ assert_eq!(q.lock_state().size(), 0x100);
+ assert!(!q.lock_state().ready());
+ assert_eq!(q.lock_state().desc_table(), DEFAULT_DESC_TABLE_ADDR);
+ assert_eq!(q.lock_state().avail_ring(), DEFAULT_AVAIL_RING_ADDR);
+ assert_eq!(q.lock_state().used_ring(), DEFAULT_USED_RING_ADDR);
+ assert_eq!(q.lock_state().next_avail(), 0);
+ assert_eq!(q.lock_state().next_used(), 0);
+ assert!(!q.lock_state().event_idx_enabled());
+ }
+
+ #[test]
+ fn test_enable_disable_notification() {
+ let m = &GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap();
+ let mem = m.memory();
+ let mut q = QueueSync::new(0x100).unwrap();
+
+ q.set_desc_table_address(Some(0x1000), None);
+ assert_eq!(q.desc_table(), 0x1000);
+ q.set_avail_ring_address(Some(0x2000), None);
+ assert_eq!(q.avail_ring(), 0x2000);
+ q.set_used_ring_address(Some(0x3000), None);
+ assert_eq!(q.used_ring(), 0x3000);
+ q.set_ready(true);
+ assert!(q.is_valid(mem));
+
+ let used_addr = GuestAddress(q.lock_state().used_ring());
+
+ assert!(!q.event_idx_enabled());
+ q.enable_notification(mem).unwrap();
+ let v = m.read_obj::<u16>(used_addr).map(u16::from_le).unwrap();
+ assert_eq!(v, 0);
+
+ q.disable_notification(m.memory()).unwrap();
+ let v = m.read_obj::<u16>(used_addr).map(u16::from_le).unwrap();
+ assert_eq!(v, VRING_USED_F_NO_NOTIFY as u16);
+
+ q.enable_notification(mem).unwrap();
+ let v = m.read_obj::<u16>(used_addr).map(u16::from_le).unwrap();
+ assert_eq!(v, 0);
+
+ q.set_event_idx(true);
+ let avail_addr = GuestAddress(q.lock_state().avail_ring());
+ m.write_obj::<u16>(u16::to_le(2), avail_addr.unchecked_add(2))
+ .unwrap();
+
+ assert!(q.enable_notification(mem).unwrap());
+ q.lock_state().set_next_avail(2);
+ assert!(!q.enable_notification(mem).unwrap());
+
+ m.write_obj::<u16>(u16::to_le(8), avail_addr.unchecked_add(2))
+ .unwrap();
+
+ assert!(q.enable_notification(mem).unwrap());
+ q.lock_state().set_next_avail(8);
+ assert!(!q.enable_notification(mem).unwrap());
+ }
+}
diff --git a/src/state.rs b/src/state.rs
new file mode 100644
index 0000000..aa22fb3
--- /dev/null
+++ b/src/state.rs
@@ -0,0 +1,119 @@
+use crate::{Error, Queue, QueueT};
+use vm_memory::GuestAddress;
+
+/// Representation of the `Queue` state.
+///
+/// The `QueueState` represents the pure state of the `queue` without tracking any implementation
+/// details of the queue. The goal with this design is to minimize the changes required to the
+/// state, and thus the required transitions between states when upgrading or downgrading.
+///
+/// In practice this means that the `QueueState` consists solely of POD (Plain Old Data).
+///
+/// As this structure has all the fields public it is consider to be untrusted. A validated
+/// queue can be created from the state by calling the associated `try_from` function.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub struct QueueState {
+ /// The maximum size in elements offered by the device.
+ pub max_size: u16,
+ /// Tail position of the available ring.
+ pub next_avail: u16,
+ /// Head position of the used ring.
+ pub next_used: u16,
+ /// VIRTIO_F_RING_EVENT_IDX negotiated.
+ pub event_idx_enabled: bool,
+ /// The queue size in elements the driver selected.
+ pub size: u16,
+ /// Indicates if the queue is finished with configuration.
+ pub ready: bool,
+ /// Guest physical address of the descriptor table.
+ pub desc_table: u64,
+ /// Guest physical address of the available ring.
+ pub avail_ring: u64,
+ /// Guest physical address of the used ring.
+ pub used_ring: u64,
+}
+
+impl TryFrom<QueueState> for Queue {
+ type Error = Error;
+
+ fn try_from(q_state: QueueState) -> Result<Self, Self::Error> {
+ let mut q = Queue::new(q_state.max_size)?;
+
+ q.set_next_avail(q_state.next_avail);
+ q.set_next_used(q_state.next_used);
+ q.set_event_idx(q_state.event_idx_enabled);
+ q.try_set_size(q_state.size)?;
+ q.set_ready(q_state.ready);
+ q.try_set_desc_table_address(GuestAddress(q_state.desc_table))?;
+ q.try_set_avail_ring_address(GuestAddress(q_state.avail_ring))?;
+ q.try_set_used_ring_address(GuestAddress(q_state.used_ring))?;
+
+ Ok(q)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn create_valid_queue_state() -> QueueState {
+ let queue = Queue::new(16).unwrap();
+ queue.state()
+ }
+
+ #[test]
+ fn test_empty_queue_state() {
+ let max_size = 16;
+ let queue = Queue::new(max_size).unwrap();
+
+ // Saving the state of a queue on which we didn't do any operation is ok.
+ // Same for restore.
+ let queue_state = queue.state();
+ let restored_q = Queue::try_from(queue_state).unwrap();
+ assert_eq!(queue, restored_q);
+ }
+
+ #[test]
+ fn test_invalid_queue_state() {
+ // Let's generate a state that we know is valid so we can just alter one field at a time.
+ let mut q_state = create_valid_queue_state();
+
+ // Test invalid max_size.
+ // Size too small.
+ q_state.max_size = 0;
+ assert!(Queue::try_from(q_state).is_err());
+ // Size too big.
+ q_state.max_size = u16::MAX;
+ assert!(Queue::try_from(q_state).is_err());
+ // Size not a power of 2.
+ q_state.max_size = 15;
+ assert!(Queue::try_from(q_state).is_err());
+
+ // Test invalid size.
+ let mut q_state = create_valid_queue_state();
+ // Size too small.
+ q_state.size = 0;
+ assert!(Queue::try_from(q_state).is_err());
+ // Size too big.
+ q_state.size = u16::MAX;
+ assert!(Queue::try_from(q_state).is_err());
+ // Size not a power of 2.
+ q_state.size = 15;
+ assert!(Queue::try_from(q_state).is_err());
+
+ // Test invalid desc_table.
+ let mut q_state = create_valid_queue_state();
+ q_state.desc_table = 0xf;
+ assert!(Queue::try_from(q_state).is_err());
+
+ // Test invalid avail_ring.
+ let mut q_state = create_valid_queue_state();
+ q_state.avail_ring = 0x1;
+ assert!(Queue::try_from(q_state).is_err());
+
+ // Test invalid used_ring.
+ let mut q_state = create_valid_queue_state();
+ q_state.used_ring = 0x3;
+ assert!(Queue::try_from(q_state).is_err());
+ }
+}