From df67aa3e61e2c83459da7d815962d9706f1528fc Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 25 Jul 2022 22:02:35 -0300
Subject: [PATCH 1/5] migration: add remaining params->has_* = true in
 migration_instance_init()

Some of params->has_* = true are missing in migration_instance_init, this
causes migrate_params_check() to skip some tests, allowing some
unsupported scenarios.

Fix this by adding all missing params->has_* = true in
migration_instance_init().

Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters")
Fixes: 1d58872a91 ("migration: do not wait for free thread")
Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Message-Id: <20220726010235.342927-1-leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/migration.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index e03f698a3c..82fbe0cf55 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -4451,6 +4451,7 @@ static void migration_instance_init(Object *obj)
     /* Set has_* up only for parameter checks */
     params->has_compress_level = true;
     params->has_compress_threads = true;
+    params->has_compress_wait_thread = true;
     params->has_decompress_threads = true;
     params->has_throttle_trigger_threshold = true;
     params->has_cpu_throttle_initial = true;
@@ -4471,6 +4472,9 @@ static void migration_instance_init(Object *obj)
     params->has_announce_max = true;
     params->has_announce_rounds = true;
     params->has_announce_step = true;
+    params->has_tls_creds = true;
+    params->has_tls_hostname = true;
+    params->has_tls_authz = true;
 
     qemu_sem_init(&ms->postcopy_pause_sem, 0);
     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);

From 777f53c75983dd10756f5dbfc8af50fe11da81c1 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 2 Aug 2022 08:19:49 +0200
Subject: [PATCH 2/5] Revert "migration: Simplify unqueue_page()"

This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3.

The simplification of unqueue_page() introduced a bug that sometimes
breaks migration on s390x hosts.

The problem is not fully understood yet, but since we are already in
the freeze for QEMU 7.1 and we need something working there, let's
revert this patch for the upcoming release. The optimization can be
redone later again in a proper way if necessary.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2099934
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20220802061949.331576-1-thuth@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/ram.c        | 37 ++++++++++++++++++++++++++-----------
 migration/trace-events |  3 ++-
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index b94669ba5d..dc1de9ddbc 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1612,7 +1612,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
 {
     struct RAMSrcPageRequest *entry;
     RAMBlock *block = NULL;
-    size_t page_size;
 
     if (!postcopy_has_request(rs)) {
         return NULL;
@@ -1629,13 +1628,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
     entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
     block = entry->rb;
     *offset = entry->offset;
-    page_size = qemu_ram_pagesize(block);
-    /* Each page request should only be multiple page size of the ramblock */
-    assert((entry->len % page_size) == 0);
 
-    if (entry->len > page_size) {
-        entry->len -= page_size;
-        entry->offset += page_size;
+    if (entry->len > TARGET_PAGE_SIZE) {
+        entry->len -= TARGET_PAGE_SIZE;
+        entry->offset += TARGET_PAGE_SIZE;
     } else {
         memory_region_unref(block->mr);
         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
@@ -1643,9 +1639,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
         migration_consume_urgent_request();
     }
 
-    trace_unqueue_page(block->idstr, *offset,
-                       test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
-
     return block;
 }
 
@@ -2069,8 +2062,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
 {
     RAMBlock  *block;
     ram_addr_t offset;
+    bool dirty;
 
-    block = unqueue_page(rs, &offset);
+    do {
+        block = unqueue_page(rs, &offset);
+        /*
+         * We're sending this page, and since it's postcopy nothing else
+         * will dirty it, and we must make sure it doesn't get sent again
+         * even if this queue request was received after the background
+         * search already sent it.
+         */
+        if (block) {
+            unsigned long page;
+
+            page = offset >> TARGET_PAGE_BITS;
+            dirty = test_bit(page, block->bmap);
+            if (!dirty) {
+                trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
+                                                page);
+            } else {
+                trace_get_queued_page(block->idstr, (uint64_t)offset, page);
+            }
+        }
+
+    } while (block && !dirty);
 
     if (block) {
         /* See comment above postcopy_preempted_contains() */
diff --git a/migration/trace-events b/migration/trace-events
index a34afe7b85..57003edcbd 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
 qemu_file_fclose(void) ""
 
 # ram.c
+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI
 ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
 ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
 ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
 postcopy_preempt_triggered(char *str, unsigned long page) "during sending ramblock %s offset 0x%lx"
 postcopy_preempt_restored(char *str, unsigned long page) "ramblock %s offset 0x%lx"
 postcopy_preempt_hit(char *str, uint64_t offset) "ramblock %s offset 0x%"PRIx64

From ead34f64f97d11626774a57863ca285a7ddc0d18 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 21 Jul 2022 12:52:06 +0100
Subject: [PATCH 3/5] migration: Assert that migrate_multifd_compression()
 returns an in-range value

Coverity complains that when we use the return value from
migrate_multifd_compression() as an array index:
  multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];

that this might overrun the array (which is declared to have size
MULTIFD_COMPRESSION__MAX).  This is because the function return type
is MultiFDCompression, which is an autogenerated enum.  The code
generator includes the "one greater than the maximum possible value"
MULTIFD_COMPRESSION__MAX in the enum, even though this is not
actually a valid value for the enum, and this makes Coverity think
that migrate_multifd_compression() could return that __MAX value and
index off the end of the array.

Suppress the Coverity error by asserting that the value we're going
to return is within range.

Resolves: Coverity CID 1487239, 1487254
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20220721115207.729615-2-peter.maydell@linaro.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/migration.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/migration/migration.c b/migration/migration.c
index 82fbe0cf55..bb8bbddfe4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2617,6 +2617,7 @@ MultiFDCompression migrate_multifd_compression(void)
 
     s = migrate_get_current();
 
+    assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX);
     return s->parameters.multifd_compression;
 }
 

From 4bcb7de072fd8ab9de40b24aac00c7b6659f7f7d Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 21 Jul 2022 12:52:07 +0100
Subject: [PATCH 4/5] migration: Define BLK_MIG_BLOCK_SIZE as unsigned long
 long

When we use BLK_MIG_BLOCK_SIZE in expressions like
block_mig_state.submitted * BLK_MIG_BLOCK_SIZE, this multiplication
is done as 32 bits, because both operands are 32 bits.  Coverity
complains about possible overflows because we then accumulate that
into a 64 bit variable.

Define BLK_MIG_BLOCK_SIZE as unsigned long long using the ULL suffix.
The only two current uses of it with this problem are both in
block_save_pending(), so we could just cast to uint64_t there, but
using the ULL suffix is simpler and ensures that we don't
accidentally introduce new variants of the same issue in future.

Resolves: Coverity CID 1487136, 1487175
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-Id: <20220721115207.729615-3-peter.maydell@linaro.org>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/block.c b/migration/block.c
index 9e5aae5898..3577c815a9 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -28,7 +28,7 @@
 #include "sysemu/block-backend.h"
 #include "trace.h"
 
-#define BLK_MIG_BLOCK_SIZE           (1 << 20)
+#define BLK_MIG_BLOCK_SIZE           (1ULL << 20)
 #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS)
 
 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01

From a21ba54dd5ca38cd05da9035fc65374d7af54f13 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 29 Jul 2022 09:17:06 -0400
Subject: [PATCH 5/5] virtiofsd: Disable killpriv_v2 by default

We are having bunch of issues with killpriv_v2 enabled by default. First
of all it relies on clearing suid/sgid bits as needed by dropping
capability CAP_FSETID. This does not work for remote filesystems like
NFS (and possibly others).

Secondly, we are noticing other issues related to clearing of SGID
which leads to failures for xfstests generic/355 and generic/193.

Thirdly, there are other issues w.r.t caching of metadata (suid/sgid)
bits in fuse client with killpriv_v2 enabled. Guest can cache that
data for sometime even if cleared on server.

Second and Third issue are fixable. Just that it might take a little
while to get it fixed in kernel. First one will probably not see
any movement for a long time.

Given these issues, killpriv_v2 does not seem to be a good candidate
for enabling by default. We have already disabled it by default in
rust version of virtiofsd.

Hence this patch disabled killpriv_v2 by default. User can choose to
enable it by passing option "-o killpriv_v2".

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Message-Id: <YuPd0itNIAz4tQRt@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 7a73dfcce9..371a7bead6 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -767,19 +767,10 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
         fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
         conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
         lo->killpriv_v2 = 1;
-    } else if (lo->user_killpriv_v2 == -1 &&
-               conn->capable & FUSE_CAP_HANDLE_KILLPRIV_V2) {
-        /*
-         * User did not specify a value for killpriv_v2. By default enable it
-         * if connection offers this capability
-         */
-        fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
-        conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
-        lo->killpriv_v2 = 1;
     } else {
         /*
-         * Either user specified to disable killpriv_v2, or connection does
-         * not offer this capability. Disable killpriv_v2 in both the cases
+         * Either user specified to disable killpriv_v2, or did not
+         * specify anything. Disable killpriv_v2 in both the cases.
          */
         fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n");
         conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;