Define communication messages of vhost-user spec

Only basic messages are defined, and the vhost-user spec is also under development. So feel free to add needed messages. Signed-off-by: Liu Jiang <gerry@linux.alibaba.com> Signed-off-by: Daniel Prilik <daniel@prilik.com>
author: Liu Jiang <gerry@linux.alibaba.com> 2019-04-26 15:44:31 +0800
committer: Andreea Florescu <andreea.florescu15@gmail.com> 2020-09-04 17:59:53 +0300
commit: 560a28589c49b69450a7c6ceef3a817155af050c (patch)
tree: 7d27fd51963599de51f0b292a1725331ce294682
parent: 4f0ca8c8608959d8380e01257913e2a66a88118c (diff)
download: vmm_vhost-560a28589c49b69450a7c6ceef3a817155af050c.tar.gz
5 files changed, 970 insertions, 5 deletions
diff --git a/Cargo.toml b/Cargo.toml
index eb42432..8c676b3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,8 +9,11 @@ license = "Apache-2.0 or BSD-3-Clause"
 default = []
 vhost-vsock = []
 vhost-kern = ["vm-memory"]
+vhost-user-master = []
+vhost-user-slave = []
 
 [dependencies]
+bitflags = ">=1.0.1"
 libc = ">=0.2.39"
 
 vmm-sys-util = ">=0.3.1"
diff --git a/src/lib.rs b/src/lib.rs
index cf78056..8b7d428 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -32,6 +32,11 @@
 
 #![deny(missing_docs)]
 
+#[cfg_attr(
+    any(feature = "vhost-user-master", feature = "vhost-user-slave"),
+    macro_use
+)]
+extern crate bitflags;
 extern crate libc;
 #[cfg(feature = "vhost-kern")]
 extern crate vm_memory;
@@ -43,6 +48,8 @@ pub use backend::*;
 
 #[cfg(feature = "vhost-kern")]
 pub mod vhost_kern;
+#[cfg(any(feature = "vhost-user-master", feature = "vhost-user-slave"))]
+pub mod vhost_user;
 #[cfg(feature = "vhost-vsock")]
 pub mod vsock;
 
@@ -73,6 +80,9 @@ pub enum Error {
     IoctlError(std::io::Error),
     /// Error from IO subsystem.
     IOError(std::io::Error),
+    #[cfg(any(feature = "vhost-user-master", feature = "vhost-user-slave"))]
+    /// Error from the vhost-user subsystem.
+    VhostUserProtocol(crate::vhost_user::Error),
 }
 
 impl std::fmt::Display for Error {
@@ -91,9 +101,18 @@ impl std::fmt::Display for Error {
             Error::VhostOpen(e) => write!(f, "failure in opening vhost file: {}", e),
             #[cfg(feature = "vhost-kern")]
             Error::IoctlError(e) => write!(f, "failure in vhost ioctl: {}", e),
+            #[cfg(any(feature = "vhost-user-master", feature = "vhost-user-slave"))]
+            Error::VhostUserProtocol(e) => write!(f, "vhost-user error: {}", e),
         }
     }
 }
 
+#[cfg(any(feature = "vhost-user-master", feature = "vhost-user-slave"))]
+impl std::convert::From<crate::vhost_user::Error> for Error {
+    fn from(err: crate::vhost_user::Error) -> Self {
+        Error::VhostUserProtocol(err)
+    }
+}
+
 /// Result of vhost operations
 pub type Result<T> = std::result::Result<T, Error>;
diff --git a/src/vhost_kern/mod.rs b/src/vhost_kern/mod.rs
index 51fd671..5644118 100644
--- a/src/vhost_kern/mod.rs
+++ b/src/vhost_kern/mod.rs
@@ -47,15 +47,15 @@ pub trait VhostKernBackend<'a>: AsRawFd {
     fn mem(&self) -> &Self::M;
 
     /// Check whether the ring configuration is valid.
-    fn is_valid(
-        &self,
-        config_data: &VringConfigData
-    ) -> bool {
+    fn is_valid(&self, config_data: &VringConfigData) -> bool {
         let queue_size = config_data.queue_size;
         let desc_table_size = 16 * u64::from(queue_size) as GuestUsize;
         let avail_ring_size = 6 + 2 * u64::from(queue_size) as GuestUsize;
         let used_ring_size = 6 + 8 * u64::from(queue_size) as GuestUsize;
-        if queue_size > config_data.queue_max_size || queue_size == 0 || (queue_size & (queue_size - 1)) != 0 {
+        if queue_size > config_data.queue_max_size
+            || queue_size == 0
+            || (queue_size & (queue_size - 1)) != 0
+        {
             false
         } else if GuestAddress(config_data.desc_table_addr)
             .checked_add(desc_table_size)
diff --git a/src/vhost_user/message.rs b/src/vhost_user/message.rs
new file mode 100644
index 0000000..51b99b1
--- /dev/null
+++ b/src/vhost_user/message.rs
@@ -0,0 +1,818 @@
+// Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Define communication messages for the vhost-user protocol.
+//!
+//! For message definition, please refer to the [vhost-user spec](https://github.com/qemu/qemu/blob/f7526eece29cd2e36a63b6703508b24453095eb8/docs/interop/vhost-user.txt).
+
+#![allow(dead_code)]
+#![allow(non_camel_case_types)]
+
+use std::fmt::Debug;
+use std::marker::PhantomData;
+
+use VringConfigData;
+
+/// The vhost-user specification uses a field of u32 to store message length.
+/// On the other hand, preallocated buffers are needed to receive messages from the Unix domain
+/// socket. To preallocating a 4GB buffer for each vhost-user message is really just an overhead.
+/// Among all defined vhost-user messages, only the VhostUserConfig and VhostUserMemory has variable
+/// message size. For the VhostUserConfig, a maximum size of 4K is enough because the user
+/// configuration space for virtio devices is (4K - 0x100) bytes at most. For the VhostUserMemory,
+/// 4K should be enough too because it can support 255 memory regions at most.
+pub const MAX_MSG_SIZE: usize = 0x1000;
+
+/// The VhostUserMemory message has variable message size and variable number of attached file
+/// descriptors. Each user memory region entry in the message payload occupies 32 bytes,
+/// so setting maximum number of attached file descriptors based on the maximum message size.
+/// But rust only implements Default and AsMut traits for arrays with 0 - 32 entries, so further
+/// reduce the maximum number...
+// pub const MAX_ATTACHED_FD_ENTRIES: usize = (MAX_MSG_SIZE - 8) / 32;
+pub const MAX_ATTACHED_FD_ENTRIES: usize = 32;
+
+/// Starting position (inclusion) of the device configuration space in virtio devices.
+pub const VHOST_USER_CONFIG_OFFSET: u32 = 0x100;
+
+/// Ending position (exclusion) of the device configuration space in virtio devices.
+pub const VHOST_USER_CONFIG_SIZE: u32 = 0x1000;
+
+/// Maximum number of vrings supported.
+pub const VHOST_USER_MAX_VRINGS: u64 = 0xFFu64;
+
+pub(super) trait Req:
+    Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Into<u32>
+{
+    fn is_valid(&self) -> bool;
+}
+
+/// Type of requests sending from masters to slaves.
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum MasterReq {
+    /// Null operation.
+    NOOP = 0,
+    /// Get from the underlying vhost implementation the features bit mask.
+    GET_FEATURES = 1,
+    /// Enable features in the underlying vhost implementation using a bit mask.
+    SET_FEATURES = 2,
+    /// Set the current Master as an owner of the session.
+    SET_OWNER = 3,
+    /// No longer used.
+    RESET_OWNER = 4,
+    /// Set the memory map regions on the slave so it can translate the vring addresses.
+    SET_MEM_TABLE = 5,
+    /// Set logging shared memory space.
+    SET_LOG_BASE = 6,
+    /// Set the logging file descriptor, which is passed as ancillary data.
+    SET_LOG_FD = 7,
+    /// Set the size of the queue.
+    SET_VRING_NUM = 8,
+    /// Set the addresses of the different aspects of the vring.
+    SET_VRING_ADDR = 9,
+    /// Set the base offset in the available vring.
+    SET_VRING_BASE = 10,
+    /// Get the available vring base offset.
+    GET_VRING_BASE = 11,
+    /// Set the event file descriptor for adding buffers to the vring.
+    SET_VRING_KICK = 12,
+    /// Set the event file descriptor to signal when buffers are used.
+    SET_VRING_CALL = 13,
+    /// Set the event file descriptor to signal when error occurs.
+    SET_VRING_ERR = 14,
+    /// Get the protocol feature bit mask from the underlying vhost implementation.
+    GET_PROTOCOL_FEATURES = 15,
+    /// Enable protocol features in the underlying vhost implementation.
+    SET_PROTOCOL_FEATURES = 16,
+    /// Query how many queues the backend supports.
+    GET_QUEUE_NUM = 17,
+    /// Signal slave to enable or disable corresponding vring.
+    SET_VRING_ENABLE = 18,
+    /// Ask vhost user backend to broadcast a fake RARP to notify the migration is terminated
+    /// for guest that does not support GUEST_ANNOUNCE.
+    SEND_RARP = 19,
+    /// Set host MTU value exposed to the guest.
+    NET_SET_MTU = 20,
+    /// Set the socket file descriptor for slave initiated requests.
+    SET_SLAVE_REQ_FD = 21,
+    /// Send IOTLB messages with struct vhost_iotlb_msg as payload.
+    IOTLB_MSG = 22,
+    /// Set the endianness of a VQ for legacy devices.
+    SET_VRING_ENDIAN = 23,
+    /// Fetch the contents of the virtio device configuration space.
+    GET_CONFIG = 24,
+    /// Change the contents of the virtio device configuration space.
+    SET_CONFIG = 25,
+    /// Create a session for crypto operation.
+    CREATE_CRYPTO_SESSION = 26,
+    /// Close a session for crypto operation.
+    CLOSE_CRYPTO_SESSION = 27,
+    /// Advise slave that a migration with postcopy enabled is underway.
+    POSTCOPY_ADVISE = 28,
+    /// Advise slave that a transition to postcopy mode has happened.
+    POSTCOPY_LISTEN = 29,
+    /// Advise that postcopy migration has now completed.
+    POSTCOPY_END = 30,
+    /// Get a shared buffer from slave.
+    GET_INFLIGHT_FD = 31,
+    /// Send the shared inflight buffer back to slave
+    SET_INFLIGHT_FD = 32,
+    /// Upper bound of valid commands.
+    MAX_CMD = 33,
+}
+
+impl Into<u32> for MasterReq {
+    fn into(self) -> u32 {
+        self as u32
+    }
+}
+
+impl Req for MasterReq {
+    fn is_valid(&self) -> bool {
+        (*self > MasterReq::NOOP) && (*self < MasterReq::MAX_CMD)
+    }
+}
+
+/// Type of requests sending from slaves to masters.
+#[repr(u32)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum SlaveReq {
+    /// Null operation.
+    NOOP = 0,
+    /// Send IOTLB messages with struct vhost_iotlb_msg as payload.
+    IOTLB_MSG = 1,
+    /// Notify that the virtio device's configuration space has changed.
+    CONFIG_CHANGE_MSG = 2,
+    /// Set host notifier for a specified queue.
+    VRING_HOST_NOTIFIER_MSG = 3,
+    /// Virtio-fs draft: map file content into the window.
+    FS_MAP = 4,
+    /// Virtio-fs draft: unmap file content from the window.
+    FS_UNMAP = 5,
+    /// Virtio-fs draft: sync file content.
+    FS_SYNC = 6,
+    /// Upper bound of valid commands.
+    MAX_CMD = 7,
+}
+
+impl Into<u32> for SlaveReq {
+    fn into(self) -> u32 {
+        self as u32
+    }
+}
+
+impl Req for SlaveReq {
+    fn is_valid(&self) -> bool {
+        (*self > SlaveReq::NOOP) && (*self < SlaveReq::MAX_CMD)
+    }
+}
+
+/// Vhost message Validator.
+pub trait VhostUserMsgValidator {
+    /// Validate message syntax only.
+    /// It doesn't validate message semantics such as protocol version number and dependency
+    /// on feature flags etc.
+    fn is_valid(&self) -> bool {
+        true
+    }
+}
+
+// Bit mask for common message flags.
+bitflags! {
+    /// Common message flags for vhost-user requests and replies.
+    pub struct VhostUserHeaderFlag: u32 {
+        /// Bits[0..2] is message version number.
+        const VERSION = 0x3;
+        /// Mark message as reply.
+        const REPLY = 0x4;
+        /// Sender anticipates a reply message from the peer.
+        const NEED_REPLY = 0x8;
+        /// All valid bits.
+        const ALL_FLAGS = 0xc;
+        /// All reserved bits.
+        const RESERVED_BITS = !0xf;
+    }
+}
+
+/// Common message header for vhost-user requests and replies.
+/// A vhost-user message consists of 3 header fields and an optional payload. All numbers are in the
+/// machine native byte order.
+#[allow(safe_packed_borrows)]
+#[repr(packed)]
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub(super) struct VhostUserMsgHeader<R: Req> {
+    request: u32,
+    flags: u32,
+    size: u32,
+    _r: PhantomData<R>,
+}
+
+impl<R: Req> VhostUserMsgHeader<R> {
+    /// Create a new instance of `VhostUserMsgHeader`.
+    pub fn new(request: R, flags: u32, size: u32) -> Self {
+        // Default to protocol version 1
+        let fl = (flags & VhostUserHeaderFlag::ALL_FLAGS.bits()) | 0x1;
+        VhostUserMsgHeader {
+            request: request.into(),
+            flags: fl,
+            size,
+            _r: PhantomData,
+        }
+    }
+
+    /// Get message type.
+    pub fn get_code(&self) -> R {
+        // It's safe because R is marked as repr(u32).
+        unsafe { std::mem::transmute_copy::<u32, R>(&self.request) }
+    }
+
+    /// Set message type.
+    pub fn set_code(&mut self, request: R) {
+        self.request = request.into();
+    }
+
+    /// Get message version number.
+    pub fn get_version(&self) -> u32 {
+        self.flags & 0x3
+    }
+
+    /// Set message version number.
+    pub fn set_version(&mut self, ver: u32) {
+        self.flags &= !0x3;
+        self.flags |= ver & 0x3;
+    }
+
+    /// Check whether it's a reply message.
+    pub fn is_reply(&self) -> bool {
+        (self.flags & VhostUserHeaderFlag::REPLY.bits()) != 0
+    }
+
+    /// Mark message as reply.
+    pub fn set_reply(&mut self, is_reply: bool) {
+        if is_reply {
+            self.flags |= VhostUserHeaderFlag::REPLY.bits();
+        } else {
+            self.flags &= !VhostUserHeaderFlag::REPLY.bits();
+        }
+    }
+
+    /// Check whether reply for this message is requested.
+    pub fn is_need_reply(&self) -> bool {
+        (self.flags & VhostUserHeaderFlag::NEED_REPLY.bits()) != 0
+    }
+
+    /// Mark that reply for this message is needed.
+    pub fn set_need_reply(&mut self, need_reply: bool) {
+        if need_reply {
+            self.flags |= VhostUserHeaderFlag::NEED_REPLY.bits();
+        } else {
+            self.flags &= !VhostUserHeaderFlag::NEED_REPLY.bits();
+        }
+    }
+
+    /// Check whether it's the reply message for the request `req`.
+    pub fn is_reply_for(&self, req: &VhostUserMsgHeader<R>) -> bool {
+        self.is_reply() && !req.is_reply() && self.get_code() == req.get_code()
+    }
+
+    /// Get message size.
+    pub fn get_size(&self) -> u32 {
+        self.size
+    }
+
+    /// Set message size.
+    pub fn set_size(&mut self, size: u32) {
+        self.size = size;
+    }
+}
+
+impl<R: Req> Default for VhostUserMsgHeader<R> {
+    fn default() -> Self {
+        VhostUserMsgHeader {
+            request: 0,
+            flags: 0x1,
+            size: 0,
+            _r: PhantomData,
+        }
+    }
+}
+
+impl<T: Req> VhostUserMsgValidator for VhostUserMsgHeader<T> {
+    #[allow(clippy::if_same_then_else)]
+    fn is_valid(&self) -> bool {
+        if !self.get_code().is_valid() {
+            return false;
+        } else if self.size as usize > MAX_MSG_SIZE {
+            return false;
+        } else if self.get_version() != 0x1 {
+            return false;
+        } else if (self.flags & VhostUserHeaderFlag::RESERVED_BITS.bits()) != 0 {
+            return false;
+        }
+        true
+    }
+}
+
+// Bit mask for transport specific flags in VirtIO feature set defined by vhost-user.
+bitflags! {
+    /// Transport specific flags in VirtIO feature set defined by vhost-user.
+    pub struct VhostUserVirtioFeatures: u64 {
+        /// Feature flag for the protocol feature.
+        const PROTOCOL_FEATURES = 0x4000_0000;
+    }
+}
+
+// Bit mask for vhost-user protocol feature flags.
+bitflags! {
+    /// Vhost-user protocol feature flags.
+    pub struct VhostUserProtocolFeatures: u64 {
+        /// Support multiple queues.
+        const MQ = 0x0000_0001;
+        /// Support logging through shared memory fd.
+        const LOG_SHMFD = 0x0000_0002;
+        /// Support broadcasting fake RARP packet.
+        const RARP = 0x0000_0004;
+        /// Support sending reply messages for requests with NEED_REPLY flag set.
+        const REPLY_ACK = 0x0000_0008;
+        /// Support setting MTU for virtio-net devices.
+        const MTU = 0x0000_0010;
+        /// Allow the slave to send requests to the master by an optional communication channel.
+        const SLAVE_REQ = 0x0000_0020;
+        /// Support setting slave endian by SET_VRING_ENDIAN.
+        const CROSS_ENDIAN = 0x0000_0040;
+        /// Support crypto operations.
+        const CRYPTO_SESSION = 0x0000_0080;
+        /// Support sending userfault_fd from slaves to masters.
+        const PAGEFAULT = 0x0000_0100;
+        /// Support Virtio device configuration.
+        const CONFIG = 0x0000_0200;
+        /// Allow the slave to send fds (at most 8 descriptors in each message) to the master.
+        const SLAVE_SEND_FD = 0x0000_0400;
+        /// Allow the slave to register a host notifier.
+        const HOST_NOTIFIER = 0x0000_0800;
+    }
+}
+
+/// A generic message to encapsulate a 64-bit value.
+#[repr(packed)]
+#[derive(Default)]
+pub struct VhostUserU64 {
+    /// The encapsulated 64-bit common value.
+    pub value: u64,
+}
+
+impl VhostUserU64 {
+    /// Create a new instance.
+    pub fn new(value: u64) -> Self {
+        VhostUserU64 { value }
+    }
+}
+
+impl VhostUserMsgValidator for VhostUserU64 {}
+
+/// Memory region descriptor for the SET_MEM_TABLE request.
+#[repr(packed)]
+#[derive(Default)]
+pub struct VhostUserMemory {
+    /// Number of memory regions in the payload.
+    pub num_regions: u32,
+    /// Padding for alignment.
+    pub padding1: u32,
+}
+
+impl VhostUserMemory {
+    /// Create a new instance.
+    pub fn new(cnt: u32) -> Self {
+        VhostUserMemory {
+            num_regions: cnt,
+            padding1: 0,
+        }
+    }
+}
+
+impl VhostUserMsgValidator for VhostUserMemory {
+    #[allow(clippy::if_same_then_else)]
+    fn is_valid(&self) -> bool {
+        if self.padding1 != 0 {
+            return false;
+        } else if self.num_regions == 0 || self.num_regions > MAX_ATTACHED_FD_ENTRIES as u32 {
+            return false;
+        }
+        true
+    }
+}
+
+/// Memory region descriptors as payload for the SET_MEM_TABLE request.
+#[repr(packed)]
+#[derive(Default, Clone, Copy)]
+pub struct VhostUserMemoryRegion {
+    /// Guest physical address of the memory region.
+    pub guest_phys_addr: u64,
+    /// Size of the memory region.
+    pub memory_size: u64,
+    /// Virtual address in the current process.
+    pub user_addr: u64,
+    /// Offset where region starts in the mapped memory.
+    pub mmap_offset: u64,
+}
+
+impl VhostUserMemoryRegion {
+    /// Create a new instance.
+    pub fn new(guest_phys_addr: u64, memory_size: u64, user_addr: u64, mmap_offset: u64) -> Self {
+        VhostUserMemoryRegion {
+            guest_phys_addr,
+            memory_size,
+            user_addr,
+            mmap_offset,
+        }
+    }
+}
+
+impl VhostUserMsgValidator for VhostUserMemoryRegion {
+    fn is_valid(&self) -> bool {
+        if self.memory_size == 0
+            || self.guest_phys_addr.checked_add(self.memory_size).is_none()
+            || self.user_addr.checked_add(self.memory_size).is_none()
+            || self.mmap_offset.checked_add(self.memory_size).is_none()
+        {
+            return false;
+        }
+        true
+    }
+}
+
+/// Payload of the VhostUserMemory message.
+pub type VhostUserMemoryPayload = Vec<VhostUserMemoryRegion>;
+
+/// Vring state descriptor.
+#[repr(packed)]
+#[derive(Default)]
+pub struct VhostUserVringState {
+    /// Vring index.
+    pub index: u32,
+    /// A common 32bit value to encapsulate vring state etc.
+    pub num: u32,
+}
+
+impl VhostUserVringState {
+    /// Create a new instance.
+    pub fn new(index: u32, num: u32) -> Self {
+        VhostUserVringState { index, num }
+    }
+}
+
+impl VhostUserMsgValidator for VhostUserVringState {}
+
+// Bit mask for vring address flags.
+bitflags! {
+    /// Flags for vring address.
+    pub struct VhostUserVringAddrFlags: u32 {
+        /// Support log of vring operations.
+        /// Modifications to "used" vring should be logged.
+        const VHOST_VRING_F_LOG = 0x1;
+    }
+}
+
+/// Vring address descriptor.
+#[repr(packed)]
+#[derive(Default)]
+pub struct VhostUserVringAddr {
+    /// Vring index.
+    pub index: u32,
+    /// Vring flags defined by VhostUserVringAddrFlags.
+    pub flags: u32,
+    /// Ring address of the vring descriptor table.
+    pub descriptor: u64,
+    /// Ring address of the vring used ring.
+    pub used: u64,
+    /// Ring address of the vring available ring.
+    pub available: u64,
+    /// Guest address for logging.
+    pub log: u64,
+}
+
+impl VhostUserVringAddr {
+    /// Create a new instance.
+    pub fn new(
+        index: u32,
+        flags: VhostUserVringAddrFlags,
+        descriptor: u64,
+        used: u64,
+        available: u64,
+        log: u64,
+    ) -> Self {
+        VhostUserVringAddr {
+            index,
+            flags: flags.bits(),
+            descriptor,
+            used,
+            available,
+            log,
+        }
+    }
+
+    /// Create a new instance from `VringConfigData`.
+    #[cfg_attr(feature = "cargo-clippy", allow(clippy::identity_conversion))]
+    pub fn from_config_data(index: u32, config_data: &VringConfigData) -> Self {
+        let log_addr = config_data.log_addr.unwrap_or(0);
+        VhostUserVringAddr {
+            index,
+            flags: config_data.flags,
+            descriptor: config_data.desc_table_addr,
+            used: config_data.used_ring_addr,
+            available: config_data.avail_ring_addr,
+            log: log_addr,
+        }
+    }
+}
+
+impl VhostUserMsgValidator for VhostUserVringAddr {
+    #[allow(clippy::if_same_then_else)]
+    fn is_valid(&self) -> bool {
+        if (self.flags & !VhostUserVringAddrFlags::all().bits()) != 0 {
+            return false;
+        } else if self.descriptor & 0xf != 0 {
+            return false;
+        } else if self.available & 0x1 != 0 {
+            return false;
+        } else if self.used & 0x3 != 0 {
+            return false;
+        }
+        true
+    }
+}
+
+// Bit mask for the vhost-user device configuration message.
+bitflags! {
+    /// Flags for the device configuration message.
+    pub struct VhostUserConfigFlags: u32 {
+        /// TODO: seems the vhost-user spec has refined the definition, EMPTY is removed.
+        const EMPTY = 0x0;
+        /// Vhost master messages used for writable fields
+        const WRITABLE = 0x1;
+        /// Mark that message is part of an ongoing live-migration operation.
+        const LIVE_MIGRATION = 0x2;
+    }
+}
+
+/// Message to read/write device configuration space.
+#[repr(packed)]
+#[derive(Default)]
+pub struct VhostUserConfig {
+    /// Offset of virtio device's configuration space.
+    pub offset: u32,
+    /// Configuration space access size in bytes.
+    pub size: u32,
+    /// Flags for the device configuration operation.
+    pub flags: u32,
+}
+
+impl VhostUserConfig {
+    /// Create a new instance.
+    pub fn new(offset: u32, size: u32, flags: VhostUserConfigFlags) -> Self {
+        VhostUserConfig {
+            offset,
+            size,
+            flags: flags.bits(),
+        }
+    }
+}
+
+impl VhostUserMsgValidator for VhostUserConfig {
+    #[allow(clippy::if_same_then_else)]
+    fn is_valid(&self) -> bool {
+        if (self.flags & !VhostUserConfigFlags::all().bits()) != 0 {
+            return false;
+        } else if self.offset < VHOST_USER_CONFIG_OFFSET
+            || self.offset >= VHOST_USER_CONFIG_SIZE
+            || self.size == 0
+            || self.size > (VHOST_USER_CONFIG_SIZE - VHOST_USER_CONFIG_OFFSET)
+            || self.size + self.offset > VHOST_USER_CONFIG_SIZE
+        {
+            return false;
+        }
+        true
+    }
+}
+
+/// Payload for the VhostUserConfig message.
+pub type VhostUserConfigPayload = Vec<u8>;
+
+/*
+ * TODO: support dirty log, live migration and IOTLB operations.
+#[repr(packed)]
+pub struct VhostUserVringArea {
+    pub index: u32,
+    pub flags: u32,
+    pub size: u64,
+    pub offset: u64,
+}
+
+#[repr(packed)]
+pub struct VhostUserLog {
+    pub size: u64,
+    pub offset: u64,
+}
+
+#[repr(packed)]
+pub struct VhostUserIotlb {
+    pub iova: u64,
+    pub size: u64,
+    pub user_addr: u64,
+    pub permission: u8,
+    pub optype: u8,
+}
+*/
+
+// Bit mask for flags in virtio-fs slave messages
+bitflags! {
+    #[derive(Default)]
+    /// Flags for virtio-fs slave messages.
+    pub struct VhostUserFSSlaveMsgFlags: u64 {
+        /// Empty permission.
+        const EMPTY = 0x0;
+        /// Read permission.
+        const MAP_R = 0x1;
+        /// Write permission.
+        const MAP_W = 0x2;
+    }
+}
+
+/// Max entries in one virtio-fs slave request.
+const VHOST_USER_FS_SLAVE_ENTRIES: usize = 8;
+
+/// Slave request message to update the MMIO window.
+#[repr(packed)]
+#[derive(Default)]
+pub struct VhostUserFSSlaveMsg {
+    /// TODO:
+    pub fd_offset: [u64; VHOST_USER_FS_SLAVE_ENTRIES],
+    /// TODO:
+    pub cache_offset: [u64; VHOST_USER_FS_SLAVE_ENTRIES],
+    /// Size of region to map.
+    pub len: [u64; VHOST_USER_FS_SLAVE_ENTRIES],
+    /// Flags for the mmap operation
+    pub flags: [VhostUserFSSlaveMsgFlags; VHOST_USER_FS_SLAVE_ENTRIES],
+}
+
+impl VhostUserMsgValidator for VhostUserFSSlaveMsg {
+    fn is_valid(&self) -> bool {
+        for i in 0..VHOST_USER_FS_SLAVE_ENTRIES {
+            if ({ self.flags[i] }.bits() & !VhostUserFSSlaveMsgFlags::all().bits()) != 0
+                || self.fd_offset[i].checked_add(self.len[i]).is_none()
+                || self.cache_offset[i].checked_add(self.len[i]).is_none()
+            {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::mem;
+
+    #[test]
+    fn check_request_code() {
+        let code = MasterReq::NOOP;
+        assert!(!code.is_valid());
+        let code = MasterReq::MAX_CMD;
+        assert!(!code.is_valid());
+        let code = MasterReq::GET_FEATURES;
+        assert!(code.is_valid());
+    }
+
+    #[test]
+    fn msg_header_ops() {
+        let mut hdr = VhostUserMsgHeader::new(MasterReq::GET_FEATURES, 0, 0x100);
+        assert_eq!(hdr.get_code(), MasterReq::GET_FEATURES);
+        hdr.set_code(MasterReq::SET_FEATURES);
+        assert_eq!(hdr.get_code(), MasterReq::SET_FEATURES);
+
+        assert_eq!(hdr.get_version(), 0x1);
+
+        assert_eq!(hdr.is_reply(), false);
+        hdr.set_reply(true);
+        assert_eq!(hdr.is_reply(), true);
+        hdr.set_reply(false);
+
+        assert_eq!(hdr.is_need_reply(), false);
+        hdr.set_need_reply(true);
+        assert_eq!(hdr.is_need_reply(), true);
+        hdr.set_need_reply(false);
+
+        assert_eq!(hdr.get_size(), 0x100);
+        hdr.set_size(0x200);
+        assert_eq!(hdr.get_size(), 0x200);
+
+        assert_eq!(hdr.is_need_reply(), false);
+        assert_eq!(hdr.is_reply(), false);
+        assert_eq!(hdr.get_version(), 0x1);
+
+        // Check message length
+        assert!(hdr.is_valid());
+        hdr.set_size(0x2000);
+        assert!(!hdr.is_valid());
+        hdr.set_size(0x100);
+        assert_eq!(hdr.get_size(), 0x100);
+        assert!(hdr.is_valid());
+        hdr.set_size((MAX_MSG_SIZE - mem::size_of::<VhostUserMsgHeader<MasterReq>>()) as u32);
+        assert!(hdr.is_valid());
+        hdr.set_size(0x0);
+        assert!(hdr.is_valid());
+
+        // Check version
+        hdr.set_version(0x0);
+        assert!(!hdr.is_valid());
+        hdr.set_version(0x2);
+        assert!(!hdr.is_valid());
+        hdr.set_version(0x1);
+        assert!(hdr.is_valid());
+    }
+
+    #[test]
+    fn check_user_memory() {
+        let mut msg = VhostUserMemory::new(1);
+        assert!(msg.is_valid());
+        msg.num_regions = MAX_ATTACHED_FD_ENTRIES as u32;
+        assert!(msg.is_valid());
+
+        msg.num_regions += 1;
+        assert!(!msg.is_valid());
+        msg.num_regions = 0xFFFFFFFF;
+        assert!(!msg.is_valid());
+        msg.num_regions = MAX_ATTACHED_FD_ENTRIES as u32;
+        msg.padding1 = 1;
+        assert!(!msg.is_valid());
+    }
+
+    #[test]
+    fn check_user_memory_region() {
+        let mut msg = VhostUserMemoryRegion {
+            guest_phys_addr: 0,
+            memory_size: 0x1000,
+            user_addr: 0,
+            mmap_offset: 0,
+        };
+        assert!(msg.is_valid());
+        msg.guest_phys_addr = 0xFFFFFFFFFFFFEFFF;
+        assert!(msg.is_valid());
+        msg.guest_phys_addr = 0xFFFFFFFFFFFFF000;
+        assert!(!msg.is_valid());
+        msg.guest_phys_addr = 0xFFFFFFFFFFFF0000;
+        msg.memory_size = 0;
+        assert!(!msg.is_valid());
+    }
+
+    #[test]
+    fn check_user_vring_addr() {
+        let mut msg =
+            VhostUserVringAddr::new(0, VhostUserVringAddrFlags::all(), 0x0, 0x0, 0x0, 0x0);
+        assert!(msg.is_valid());
+
+        msg.descriptor = 1;
+        assert!(!msg.is_valid());
+        msg.descriptor = 0;
+
+        msg.available = 1;
+        assert!(!msg.is_valid());
+        msg.available = 0;
+
+        msg.used = 1;
+        assert!(!msg.is_valid());
+        msg.used = 0;
+
+        msg.flags |= 0x80000000;
+        assert!(!msg.is_valid());
+        msg.flags &= !0x80000000;
+    }
+
+    #[test]
+    fn check_user_config_msg() {
+        let mut msg = VhostUserConfig::new(
+            VHOST_USER_CONFIG_OFFSET,
+            VHOST_USER_CONFIG_SIZE - VHOST_USER_CONFIG_OFFSET,
+            VhostUserConfigFlags::EMPTY,
+        );
+
+        assert!(msg.is_valid());
+        msg.size = 0;
+        assert!(!msg.is_valid());
+        msg.size = 1;
+        assert!(msg.is_valid());
+        msg.offset = 0;
+        assert!(!msg.is_valid());
+        msg.offset = VHOST_USER_CONFIG_SIZE;
+        assert!(!msg.is_valid());
+        msg.offset = VHOST_USER_CONFIG_SIZE - 1;
+        assert!(msg.is_valid());
+        msg.size = 2;
+        assert!(!msg.is_valid());
+        msg.size = 1;
+        msg.flags |= VhostUserConfigFlags::WRITABLE.bits();
+        assert!(msg.is_valid());
+        msg.flags |= 0x4;
+        assert!(!msg.is_valid());
+    }
+}
diff --git a/src/vhost_user/mod.rs b/src/vhost_user/mod.rs
new file mode 100644
index 0000000..a9d3537
--- /dev/null
+++ b/src/vhost_user/mod.rs
@@ -0,0 +1,125 @@
+// Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! The protocol for vhost-user is based on the existing implementation of vhost for the Linux
+//! Kernel. The protocol defines two sides of the communication, master and slave. Master is
+//! the application that shares its virtqueues. Slave is the consumer of the virtqueues.
+//!
+//! The communication channel between the master and the slave includes two sub channels. One is
+//! used to send requests from the master to the slave and optional replies from the slave to the
+//! master. This sub channel is created on master startup by connecting to the slave service
+//! endpoint. The other is used to send requests from the slave to the master and optional replies
+//! from the master to the slave. This sub channel is created by the master issuing a
+//! VHOST_USER_SET_SLAVE_REQ_FD request to the slave with an auxiliary file descriptor.
+//!
+//! Unix domain socket is used as the underlying communication channel because the master needs to
+//! send file descriptors to the slave.
+//!
+//! Most messages that can be sent via the Unix domain socket implementing vhost-user have an
+//! equivalent ioctl to the kernel implementation.
+
+use libc;
+use std::io::Error as IOError;
+
+pub mod message;
+
+/// Errors for vhost-user operations
+#[derive(Debug)]
+pub enum Error {
+    /// Invalid parameters.
+    InvalidParam,
+    /// Unsupported operations due to that the protocol feature hasn't been negotiated.
+    InvalidOperation,
+    /// Invalid message format, flag or content.
+    InvalidMessage,
+    /// Only part of a message have been sent or received successfully
+    PartialMessage,
+    /// Message is too large
+    OversizedMsg,
+    /// Fd array in question is too big or too small
+    IncorrectFds,
+    /// Can't connect to peer.
+    SocketConnect(std::io::Error),
+    /// Generic socket errors.
+    SocketError(std::io::Error),
+    /// The socket is broken or has been closed.
+    SocketBroken(std::io::Error),
+    /// Should retry the socket operation again.
+    SocketRetry(std::io::Error),
+}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        match self {
+            Error::InvalidParam => write!(f, "invalid parameters"),
+            Error::InvalidOperation => write!(f, "invalid operation"),
+            Error::InvalidMessage => write!(f, "invalid message"),
+            Error::PartialMessage => write!(f, "partial message"),
+            Error::OversizedMsg => write!(f, "oversized message"),
+            Error::IncorrectFds => write!(f, "wrong number of attached fds"),
+            Error::SocketError(e) => write!(f, "socket error: {}", e),
+            Error::SocketConnect(e) => write!(f, "can't connect to peer: {}", e),
+            Error::SocketBroken(e) => write!(f, "socket is broken: {}", e),
+            Error::SocketRetry(e) => write!(f, "temporary socket error: {}", e),
+        }
+    }
+}
+
+impl Error {
+    /// Determine whether to rebuild the underline communication channel.
+    pub fn should_reconnect(&self) -> bool {
+        match *self {
+            // Should reconnect because it may be caused by temporary network errors.
+            Error::PartialMessage => true,
+            // Should reconnect because the underline socket is broken.
+            Error::SocketBroken(_) => true,
+            // Should just retry the IO operation instead of rebuilding the underline connection.
+            Error::SocketRetry(_) => false,
+            Error::InvalidParam | Error::InvalidOperation => false,
+            Error::InvalidMessage | Error::IncorrectFds | Error::OversizedMsg => false,
+            Error::SocketError(_) | Error::SocketConnect(_) => false,
+        }
+    }
+}
+
+impl std::convert::From<vmm_sys_util::errno::Error> for Error {
+    /// Convert raw socket errors into meaningful vhost-user errors.
+    ///
+    /// The vmm_sys_util::errno::Error is a simple wrapper over the raw errno, which doesn't means
+    /// much to the vhost-user connection manager. So convert it into meaningful errors to simplify
+    /// the connection manager logic.
+    ///
+    /// # Return:
+    /// * - Error::SocketRetry: temporary error caused by signals or short of resources.
+    /// * - Error::SocketBroken: the underline socket is broken.
+    /// * - Error::SocketError: other socket related errors.
+    #[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux
+    fn from(err: vmm_sys_util::errno::Error) -> Self {
+        match err.errno() {
+            // The socket is marked nonblocking and the requested operation would block.
+            libc::EAGAIN => Error::SocketRetry(IOError::from_raw_os_error(libc::EAGAIN)),
+            // The socket is marked nonblocking and the requested operation would block.
+            libc::EWOULDBLOCK => Error::SocketRetry(IOError::from_raw_os_error(libc::EWOULDBLOCK)),
+            // A signal occurred before any data was transmitted
+            libc::EINTR => Error::SocketRetry(IOError::from_raw_os_error(libc::EINTR)),
+            // The  output  queue  for  a network interface was full.  This generally indicates
+            // that the interface has stopped sending, but may be caused by transient congestion.
+            libc::ENOBUFS => Error::SocketRetry(IOError::from_raw_os_error(libc::ENOBUFS)),
+            // No memory available.
+            libc::ENOMEM => Error::SocketRetry(IOError::from_raw_os_error(libc::ENOMEM)),
+            // Connection reset by peer.
+            libc::ECONNRESET => Error::SocketBroken(IOError::from_raw_os_error(libc::ECONNRESET)),
+            // The local end has been shut down on a connection oriented socket. In this  case the
+            // process will also receive a SIGPIPE unless MSG_NOSIGNAL is set.
+            libc::EPIPE => Error::SocketBroken(IOError::from_raw_os_error(libc::EPIPE)),
+            // Write permission is denied on the destination socket file, or search permission is
+            // denied for one of the directories the path prefix.
+            libc::EACCES => Error::SocketConnect(IOError::from_raw_os_error(libc::EACCES)),
+            // Catch all other errors
+            e => Error::SocketError(IOError::from_raw_os_error(e)),
+        }
+    }
+}
+
+/// Result of vhost-user operations
+pub type Result<T> = std::result::Result<T, Error>;
author	Liu Jiang <gerry@linux.alibaba.com>	2019-04-26 15:44:31 +0800
committer	Andreea Florescu <andreea.florescu15@gmail.com>	2020-09-04 17:59:53 +0300
commit	560a28589c49b69450a7c6ceef3a817155af050c (patch)
tree	7d27fd51963599de51f0b292a1725331ce294682
parent	4f0ca8c8608959d8380e01257913e2a66a88118c (diff)
download	vmm_vhost-560a28589c49b69450a7c6ceef3a817155af050c.tar.gz