virtio/vhost: fixes

some bugfixes for virtio/vhost Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJXq0VGAAoJECgfDbjSjVRpC1sIAIJcgNiC4dj742doGGNKlIqF q81YTQSfdEgXIiONXAtVXaJmymsLTZqwOxb16Yxx/GwnH3butPZE85NGdAUR6WlU YJ5+PnTfI9Podxtulx/cy8etWoWgiD6e3AdZ7aNzB6PuTlWvECwQNZOzfwsz7fiP CZrpA1zGFL/CSLH64r4Uyjm+3LjP4LsQerd/oFOmr7ZlxrGIEE676D0ihyu0xcKO Feb8chXjA0Js/czyCXnW18pQG6E5nbJLuNTyCdc0HU22ybniI3B44jTovwqv2uGT tXZBIblcOf9DDNNToXGpMF1XKwJwUuIA3je1SgEm8Fnn0Wt02ZY9CULD1ZaQ/7w= =Qo6W -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging virtio/vhost: fixes some bugfixes for virtio/vhost Signed-off-by: Michael S. Tsirkin <mst@redhat.com> # gpg: Signature made Wed 10 Aug 2016 16:16:22 BST # gpg: using RSA key 0x281F0DB8D28D5469 # gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" # gpg: aka "Michael S. Tsirkin <mst@redhat.com>" # Primary key fingerprint: 0270 606B 6F3C DF3D 0B17 0970 C350 3912 AFBE 8E67 # Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA 8A0D 281F 0DB8 D28D 5469 * remotes/mst/tags/for_upstream: vhost-user: Attempt to fix a race with set_mem_table. vhost-user: Introduce a new protocol feature REPLY_ACK. vhost: check for vhost_ops before using. Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2016-08-10 17:14:35 +01:00 · 2016-08-10 17:14:35 +01:00 · d08306dc42
parent 4b3e5c06a1 28ed5ef163
commit d08306dc42
3 changed files with 115 additions and 50 deletions
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@ -37,6 +37,8 @@ consists of 3 header fields and a payload:
 * Flags: 32-bit bit field:
   - Lower 2 bits are the version (currently 0x01)
   - Bit 2 is the reply flag - needs to be sent on each reply from the slave
+   - Bit 3 is the need_reply flag - see VHOST_USER_PROTOCOL_F_REPLY_ACK for
+     details.
 * Size - 32-bit size of the payload


@ -126,6 +128,8 @@ the ones that do:
 * VHOST_GET_VRING_BASE
 * VHOST_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)

+[ Also see the section on REPLY_ACK protocol extension. ]
+
 There are several messages that the master sends with file descriptors passed
 in the ancillary data:

@ -254,6 +258,7 @@ Protocol features
 #define VHOST_USER_PROTOCOL_F_MQ             0
 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD      1
 #define VHOST_USER_PROTOCOL_F_RARP           2
+#define VHOST_USER_PROTOCOL_F_REPLY_ACK      3

 Message types
 -------------
@ -464,3 +469,24 @@ Message types
      is present in VHOST_USER_GET_PROTOCOL_FEATURES.
      The first 6 bytes of the payload contain the mac address of the guest to
      allow the vhost user backend to construct and broadcast the fake RARP.
+
+VHOST_USER_PROTOCOL_F_REPLY_ACK:
+-------------------------------
+The original vhost-user specification only demands replies for certain
+commands. This differs from the vhost protocol implementation where commands
+are sent over an ioctl() call and block until the client has completed.
+
+With this protocol extension negotiated, the sender (QEMU) can set the
+"need_reply" [Bit 3] flag to any command. This indicates that
+the client MUST respond with a Payload VhostUserMsg indicating success or
+failure. The payload should be set to zero on success or non-zero on failure,
+unless the message already has an explicit reply body.
+
+The response payload gives QEMU a deterministic indication of the result
+of the command. Today, QEMU is expected to terminate the main vhost-user
+loop upon receiving such errors. In future, qemu could be taught to be more
+resilient for selective requests.
+
+For the message types that already solicit a reply from the client, the
+presence of VHOST_USER_PROTOCOL_F_REPLY_ACK or need_reply bit being set brings
+no behavioural change. (See the 'Communication' section for details.)
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@ -428,7 +428,7 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)

    nc->vring_enable = enable;

-    if (vhost_ops->vhost_set_vring_enable) {
+    if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
        return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
    }

--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@ -31,6 +31,7 @@ enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_MQ = 0,
    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
    VHOST_USER_PROTOCOL_F_RARP = 2,
+    VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,

    VHOST_USER_PROTOCOL_F_MAX
 };
@ -84,6 +85,7 @@ typedef struct VhostUserMsg {

 #define VHOST_USER_VERSION_MASK     (0x3)
 #define VHOST_USER_REPLY_MASK       (0x1<<2)
+#define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
    uint32_t flags;
    uint32_t size; /* the following payload size */
    union {
@ -158,6 +160,25 @@ fail:
    return -1;
 }

+static int process_message_reply(struct vhost_dev *dev,
+                                 VhostUserRequest request)
+{
+    VhostUserMsg msg;
+
+    if (vhost_user_read(dev, &msg) < 0) {
+        return -1;
+    }
+
+    if (msg.request != request) {
+        error_report("Received unexpected msg type."
+                     "Expected %d received %d",
+                     request, msg.request);
+        return -1;
+    }
+
+    return msg.payload.u64 ? -1 : 0;
+}
+
 static bool vhost_user_one_time_request(VhostUserRequest request)
 {
    switch (request) {
@ -242,55 +263,6 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
    return 0;
 }

-static int vhost_user_set_mem_table(struct vhost_dev *dev,
-                                    struct vhost_memory *mem)
-{
-    int fds[VHOST_MEMORY_MAX_NREGIONS];
-    int i, fd;
-    size_t fd_num = 0;
-    VhostUserMsg msg = {
-        .request = VHOST_USER_SET_MEM_TABLE,
-        .flags = VHOST_USER_VERSION,
-    };
-
-    for (i = 0; i < dev->mem->nregions; ++i) {
-        struct vhost_memory_region *reg = dev->mem->regions + i;
-        ram_addr_t offset;
-        MemoryRegion *mr;
-
-        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
-        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
-                                     &offset);
-        fd = memory_region_get_fd(mr);
-        if (fd > 0) {
-            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
-            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
-            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
-            msg.payload.memory.regions[fd_num].mmap_offset = offset;
-            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
-            fds[fd_num++] = fd;
-        }
-    }
-
-    msg.payload.memory.nregions = fd_num;
-
-    if (!fd_num) {
-        error_report("Failed initializing vhost-user memory map, "
-                     "consider using -object memory-backend-file share=on");
-        return -1;
-    }
-
-    msg.size = sizeof(msg.payload.memory.nregions);
-    msg.size += sizeof(msg.payload.memory.padding);
-    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
-
-    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
-        return -1;
-    }
-
-    return 0;
-}
-
 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                     struct vhost_vring_addr *addr)
 {
@ -505,6 +477,73 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
 }

+static int vhost_user_set_mem_table(struct vhost_dev *dev,
+                                    struct vhost_memory *mem)
+{
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    int i, fd;
+    size_t fd_num = 0;
+    uint64_t features;
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+                                              VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    VhostUserMsg msg = {
+        .request = VHOST_USER_SET_MEM_TABLE,
+        .flags = VHOST_USER_VERSION,
+    };
+
+    if (reply_supported) {
+        msg.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    for (i = 0; i < dev->mem->nregions; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        ram_addr_t offset;
+        MemoryRegion *mr;
+
+        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
+        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
+                                     &offset);
+        fd = memory_region_get_fd(mr);
+        if (fd > 0) {
+            msg.payload.memory.regions[fd_num].userspace_addr
+                                             = reg->userspace_addr;
+            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
+            msg.payload.memory.regions[fd_num].guest_phys_addr
+                                             = reg->guest_phys_addr;
+            msg.payload.memory.regions[fd_num].mmap_offset = offset;
+            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
+            fds[fd_num++] = fd;
+        }
+    }
+
+    msg.payload.memory.nregions = fd_num;
+
+    if (!fd_num) {
+        error_report("Failed initializing vhost-user memory map, "
+                     "consider using -object memory-backend-file share=on");
+        return -1;
+    }
+
+    msg.size = sizeof(msg.payload.memory.nregions);
+    msg.size += sizeof(msg.payload.memory.padding);
+    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
+
+    vhost_user_write(dev, &msg, fds, fd_num);
+
+    if (reply_supported) {
+        return process_message_reply(dev, msg.request);
+    } else {
+        /* Note: It is (yet) unknown when the client application has finished
+         * remapping the GPA.
+         * Attempt to prevent a race by sending a command that requires a reply.
+         */
+        vhost_user_get_features(dev, &features);
+    }
+
+    return 0;
+}
+
 static int vhost_user_set_owner(struct vhost_dev *dev)
 {
    VhostUserMsg msg = {