From 69583490856713f693291b32fc74b6d0f5992b72 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 14 Mar 2017 17:09:22 +0800
Subject: [PATCH 1/8] file-posix: clean up max_segments buffer termination

The following pattern is unsafe:

  char buf[32];
  ret = read(fd, buf, sizeof(buf));
  ...
  buf[ret] = 0;

If read(2) returns 32 then a byte beyond the end of the buffer is
zeroed.

In practice this buffer overflow does not occur because the sysfs
max_segments file only contains an unsigned short + '\n'.  The string is
always shorter than 32 bytes.

Regardless, avoid this pattern because static analysis tools might
complain and it could lead to real buffer overflows if copy-pasted
elsewhere in the codebase.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index c4c06637ef..ac6bd9fae8 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -686,7 +686,7 @@ static int hdev_get_max_segments(const struct stat *st)
         goto out;
     }
     do {
-        ret = read(fd, buf, sizeof(buf));
+        ret = read(fd, buf, sizeof(buf) - 1);
     } while (ret == -1 && errno == EINTR);
     if (ret < 0) {
         ret = -errno;

From 37a9051cc7d30672216af5f6620af1da122f66b3 Mon Sep 17 00:00:00 2001
From: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Date: Tue, 14 Mar 2017 19:46:52 +0800
Subject: [PATCH 2/8] replication: clarify permissions

Even if hidden_disk, secondary_disk are backing files, they all need
write permissions in replication scenario. Otherwise we will encouter
below exceptions on secondary side during adding nbd server:

{'execute': 'nbd-server-add', 'arguments': {'device': 'colo-disk', 'writable': true } }
{"error": {"class": "GenericError", "desc": "Conflicts with use by hidden-qcow2-driver as 'backing', which does not allow 'write' on sec-qcow2-driver-for-nbd"}}

CC: Zhang Hailiang <zhang.zhanghailiang@huawei.com>
CC: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
CC: Wen Congyang <wencongyang2@huawei.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/replication.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/block/replication.c b/block/replication.c
index 22f170fd33..bf3c395eb4 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -155,6 +155,18 @@ static void replication_close(BlockDriverState *bs)
     replication_remove(s->rs);
 }
 
+static void replication_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                   const BdrvChildRole *role,
+                                   uint64_t perm, uint64_t shared,
+                                   uint64_t *nperm, uint64_t *nshared)
+{
+    *nperm = *nshared = BLK_PERM_CONSISTENT_READ \
+                        | BLK_PERM_WRITE \
+                        | BLK_PERM_WRITE_UNCHANGED;
+
+    return;
+}
+
 static int64_t replication_getlength(BlockDriverState *bs)
 {
     return bdrv_getlength(bs->file->bs);
@@ -660,7 +672,7 @@ BlockDriver bdrv_replication = {
 
     .bdrv_open                  = replication_open,
     .bdrv_close                 = replication_close,
-    .bdrv_child_perm            = bdrv_filter_default_perms,
+    .bdrv_child_perm            = replication_child_perm,
 
     .bdrv_getlength             = replication_getlength,
     .bdrv_co_readv              = replication_co_readv,

From fed414df9dc9abef040adfbd8c5956fb610edaa2 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Wed, 15 Mar 2017 00:12:05 +0800
Subject: [PATCH 3/8] file-posix: Don't leak fd in hdev_get_max_segments

This fixes a leaked fd introduced in commit 9103f1ce.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/file-posix.c b/block/file-posix.c
index ac6bd9fae8..53febd3767 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -703,6 +703,9 @@ static int hdev_get_max_segments(const struct stat *st)
     }
 
 out:
+    if (fd != -1) {
+        close(fd);
+    }
     g_free(sysfspath);
     return ret;
 #else

From c1cef67251d3e6ae275c0898ccf4cbcfe85d4e0b Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Tue, 14 Mar 2017 10:30:50 +0800
Subject: [PATCH 4/8] block: Always call bdrv_child_check_perm first

bdrv_child_set_perm alone is not very usable because the caller must
call bdrv_child_check_perm first. This is already encapsulated
conveniently in bdrv_child_try_set_perm, so remove the other prototypes
from the header and fix the one wrong caller, block/mirror.c.

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c                   | 13 +++++++++----
 block/mirror.c            |  6 ++++--
 include/block/block_int.h |  4 ----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/block.c b/block.c
index cb5737073f..a77e8a064e 100644
--- a/block.c
+++ b/block.c
@@ -1393,6 +1393,11 @@ static int bdrv_fill_options(QDict **options, const char *filename,
     return 0;
 }
 
+static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                                 GSList *ignore_children, Error **errp);
+static void bdrv_child_abort_perm_update(BdrvChild *c);
+static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
+
 /*
  * Check whether permissions on this node can be changed in a way that
  * @cumulative_perms and @cumulative_shared_perms are the new cumulative
@@ -1615,8 +1620,8 @@ static int bdrv_check_update_perm(BlockDriverState *bs, uint64_t new_used_perm,
 
 /* Needs to be followed by a call to either bdrv_child_set_perm() or
  * bdrv_child_abort_perm_update(). */
-int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
-                          GSList *ignore_children, Error **errp)
+static int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
+                                 GSList *ignore_children, Error **errp)
 {
     int ret;
 
@@ -1627,7 +1632,7 @@ int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
     return ret;
 }
 
-void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
+static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
 {
     uint64_t cumulative_perms, cumulative_shared_perms;
 
@@ -1639,7 +1644,7 @@ void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared)
     bdrv_set_perm(c->bs, cumulative_perms, cumulative_shared_perms);
 }
 
-void bdrv_child_abort_perm_update(BdrvChild *c)
+static void bdrv_child_abort_perm_update(BdrvChild *c)
 {
     bdrv_abort_perm_update(c->bs);
 }
diff --git a/block/mirror.c b/block/mirror.c
index 4f3a5cb310..ca4baa510a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -574,7 +574,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
      * valid. Also give up permissions on mirror_top_bs->backing, which might
      * block the removal. */
     block_job_remove_all_bdrv(job);
-    bdrv_child_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL);
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
     bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
 
     /* We just changed the BDS the job BB refers to (with either or both of the
@@ -1245,7 +1246,8 @@ fail:
         block_job_unref(&s->common);
     }
 
-    bdrv_child_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL);
+    bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
+                            &error_abort);
     bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort);
 }
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 6c699ac9c3..59400bd848 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -889,10 +889,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                   void *opaque, Error **errp);
 void bdrv_root_unref_child(BdrvChild *child);
 
-int bdrv_child_check_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
-                          GSList *ignore_children, Error **errp);
-void bdrv_child_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared);
-void bdrv_child_abort_perm_update(BdrvChild *c);
 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
                             Error **errp);
 

From 184dd9c49b522fe729cfe21d119f6db241268a50 Mon Sep 17 00:00:00 2001
From: John Snow <jsnow@redhat.com>
Date: Wed, 15 Mar 2017 17:28:11 -0400
Subject: [PATCH 5/8] blockdev: fix bitmap clear undo

Only undo the action if we actually prepared the action.

Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index f1f49bd3ca..c5b2c2c209 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2047,7 +2047,9 @@ static void block_dirty_bitmap_clear_abort(BlkActionState *common)
     BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
                                              common, common);
 
-    bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup);
+    if (state->backup) {
+        bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup);
+    }
 }
 
 static void block_dirty_bitmap_clear_commit(BlkActionState *common)

From 8cd1a3e470212685bdd48ac03fdf9721dafc100b Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 17 Mar 2017 09:56:30 +0800
Subject: [PATCH 6/8] block: Propagate error in bdrv_open_backing_file

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block.c b/block.c
index a77e8a064e..e53808425e 100644
--- a/block.c
+++ b/block.c
@@ -2030,6 +2030,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
     bdrv_set_backing_hd(bs, backing_hd, &local_err);
     bdrv_unref(backing_hd);
     if (local_err) {
+        error_propagate(errp, local_err);
         ret = -EINVAL;
         goto free_exit;
     }

From b7a745dc33a18377bb4a8dfe54d1df01ea60bf66 Mon Sep 17 00:00:00 2001
From: Peter Lieven <pl@kamp.de>
Date: Thu, 16 Mar 2017 17:02:49 +0100
Subject: [PATCH 7/8] thread-pool: add missing qemu_bh_cancel in completion
 function

commit 3c80ca15 fixed a deadlock scenarion with nested aio_poll invocations.

However, the rescheduling of the completion BH introcuded unnecessary spinning
in the main-loop. On very fast file backends this can even lead to the
"WARNING: I/O thread spun for 1000 iterations" message popping up.

Callgrind reports about 3-4% less instructions with this patch running
qemu-img bench on a ramdisk based VMDK file.

Fixes: 3c80ca158c96ff902a30883a8933e755988948b1
Cc: qemu-stable@nongnu.org
Signed-off-by: Peter Lieven <pl@kamp.de>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 util/thread-pool.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/util/thread-pool.c b/util/thread-pool.c
index ce6cd30193..610646d131 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -188,6 +188,13 @@ restart:
             aio_context_release(pool->ctx);
             elem->common.cb(elem->common.opaque, elem->ret);
             aio_context_acquire(pool->ctx);
+
+            /* We can safely cancel the completion_bh here regardless of someone
+             * else having scheduled it meanwhile because we reenter the
+             * completion function anyway (goto restart).
+             */
+            qemu_bh_cancel(pool->completion_bh);
+
             qemu_aio_unref(elem);
             goto restart;
         } else {

From c2b6428d388b3f03261be7b0be770919e4e3e5ec Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 14 Mar 2017 12:11:57 +0100
Subject: [PATCH 8/8] block: quiesce AioContext when detaching from it

While it is true that bdrv_set_aio_context only works on a single
BlockDriverState subtree (see commit message for 53ec73e, "block: Use
bdrv_drain to replace uncessary bdrv_drain_all", 2015-07-07), it works
at the AioContext level rather than the BlockDriverState level.

Therefore, it is also necessary to trigger pending bottom halves too,
even if no requests are pending.

For NBD this ensures that the aio_co_schedule of a previous call to
nbd_attach_aio_context is completed before detaching from the old
AioContext; it fixes qemu-iotest 094.  Another similar bug happens
when the VM is stopped and the virtio-blk dataplane irqfd is torn down.
In this case it's possible that guest I/O gets stuck if notify_guest_bh
was scheduled but doesn't run.

Calling aio_poll from another AioContext is safe if non-blocking; races
such as the one mentioned in the commit message for c9d1a56 ("block:
only call aio_poll on the current thread's AioContext", 2016-10-28)
are a concern for blocking calls.

I considered other options, including:

- moving the bs->wakeup mechanism to AioContext, and letting the caller
check.  This might work for virtio which has a clear place to wakeup
(notify_place_bh) and check the condition (virtio_blk_data_plane_stop).
For aio_co_schedule I couldn't find a clear place to check the condition.

- adding a dummy oneshot bottom half and waiting for it to trigger.
This has the complication that bottom half list is LIFO for historical
reasons.  There were performance issues caused by bottom half ordering
in the past, so I decided against it for 2.9.

Fixes: 99723548561978da8ef44cf804fb7912698f5d88
Reported-by: Max Reitz <mreitz@redhat.com>
Reported-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Tested-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20170314111157.14464-2-pbonzini@redhat.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/block.c b/block.c
index e53808425e..6e906ec53c 100644
--- a/block.c
+++ b/block.c
@@ -4350,8 +4350,15 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
 
 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
 {
+    AioContext *ctx;
+
     bdrv_drain(bs); /* ensure there are no in-flight requests */
 
+    ctx = bdrv_get_aio_context(bs);
+    while (aio_poll(ctx, false)) {
+        /* wait for all bottom halves to execute */
+    }
+
     bdrv_detach_aio_context(bs);
 
     /* This function executes in the old AioContext so acquire the new one in