From 209c07dbec78b62d8cf688ae64099694c42c6067 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Tue, 30 Jan 2018 14:25:03 +0800
Subject: [PATCH 01/55] iotests: Fix CID for VMDK afl image

This reverts commit 76bf133c4 which updated the reference output, and
fixed the reference image, because the code path we want to exercise is
actually the invalid image size.

The descriptor block in the image, which includes the CID to verify, has been
invalid since the reference image was added. Since commit 9877860e7bd we report
this error earlier than the "file too large", so 059.out mismatches.

The binary change is generated along the operations of:

  $ bunzip2 afl9.vmdk.bz2
  $ qemu-img create -f vmdk fix.vmdk 1G
  $ dd if=afl9.vmdk of=fix.vmdk bs=512 count=1 conv=notrunc
  $ mv fix.vmdk afl9.vmdk
  $ bzip2 afl9.vmdk

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/059.out                     |   2 +-
 tests/qemu-iotests/sample_images/afl9.vmdk.bz2 | Bin 178 -> 618 bytes
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
index 1ac5d56233..f6dce7947c 100644
--- a/tests/qemu-iotests/059.out
+++ b/tests/qemu-iotests/059.out
@@ -2358,5 +2358,5 @@ Offset          Length          Mapped to       File
 0x140000000     0x10000         0x50000         TEST_DIR/t-s003.vmdk
 
 === Testing afl image with a very large capacity ===
-qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': Could not open 'TEST_DIR/afl9.IMGFMT': Invalid argument
+qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
 *** done
diff --git a/tests/qemu-iotests/sample_images/afl9.vmdk.bz2 b/tests/qemu-iotests/sample_images/afl9.vmdk.bz2
index 03615d36a12425cf4240bab86f4cfe648db14572..9fcd0af45a815431acf4689e0845ecf2d333cd58 100644
GIT binary patch
literal 618
zcmV-w0+szjT4*^jL0KkKSvgW7ssIN3|NsBH-Q9UpfAhclU70`s-*NE~5QvC~h=_=Y
zh>D2n*q*=vygR634445h35k;?00h9835kMW00004$iPepVE{Bqk)uhJ^wfGLr=)3s
zhM5CR88jLh7)B;cA*K)*6GmuECPU3o4NWG5O#pg>Ak#xY8Z^<M8Z>CrMt}oD38Ns$
z02n}M0LdjZ&}cLPqd+nPKmn$j0iXe(02%-d27nnJriN-uE+X&cz@Bj4BBfd|yV!NB
zwqkL}nW3AI5x^jp=t%^F1pxqp)v#n#)j$zcm1xqv(!$2d*5%vF{5RPWnOV8-^tE<(
zU~%&}Y0uNu*9Wt=yS^8PkC&gPueZO%IG;aD{l#sG`<Af;l1Pnwpi9I75FkQ`LLhd8
z6(9f*2s+N5=%bwp80ddrD6>m4Ho*fsHXdM<jtl*zKvRiTx7Ugy1|Nl<Ns!z;1dvhy
z=`SDHh~{u|1ZodC(_lzezQ)I*Kv2z|PZ@!SJjlVzwGdx2iu#W}dI{t+T&dDWT^LPy
zg3NouEM=V~7GvZQS1CXy676F6mJXWGgW!KTr+E$OspGYCjWmuwa^<Bc>_(-i7fPIW
zA+~n9iy_f)g8B2RILhd%F)dZ5f?7pFLw)@;Ncl<JE}gvMrfh{elT#3gLjY6r8xY4O
z)UO#pv=WYptukn<DuoMH2ip%k?V^k!rjQirK^RC<Brw>3Bz9<|!xm0F{45K+gg8#n
z4FNAJ!<X|3Vq+lyV4=xZ;>AN0<K=%c4A2ruB!4rGvWm!KFrvd4PyfZ-kxmpO4pfM$
EfLnqQYXATM

literal 178
zcmV;j08RfwT4*^jL0KkKS>A08g#Z9x|HJ$H)ZJi0004xF0SE*D03g5s00IDLSQelF
ziVX^$pfWNUJrmRhn2k52pQ;Rs0EQC;(S%|!m`2~BZ@b++;etskRJUVl!Kt)wu7?VN
zl;%JdqX2?TgsNVJP?87M*MvL1qQnBkCES&?0@MeaN-bL4;bDzxmMm|da4fuh!=#fu
g@i9R@5z!av{9tA<GGr!3hi~HUNT&)C8_l7xpl%OKQ2+n{


From c150eb92926ceb0cc6112c10e31e8af2f2d336cd Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 9 Feb 2018 13:29:13 +0800
Subject: [PATCH 02/55] qemu-img.texi: Clean up parameter list

Split options out of the "@table @var" section and create a "@table
@option", then use whitespaces and blank lines consistently.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-img.texi | 66 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 27 deletions(-)

diff --git a/qemu-img.texi b/qemu-img.texi
index fdcf120f36..60a0e080c6 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -33,38 +33,14 @@ The following commands are supported:
 
 Command parameters:
 @table @var
+
 @item filename
- is a disk image filename
-
-@item --object @var{objectdef}
-
-is a QEMU user creatable object definition. See the @code{qemu(1)} manual
-page for a description of the object properties. The most common object
-type is a @code{secret}, which is used to supply passwords and/or encryption
-keys.
-
-@item --image-opts
-
-Indicates that the source @var{filename} parameter is to be interpreted as a
-full option string, not a plain filename. This parameter is mutually
-exclusive with the @var{-f} parameter.
-
-@item --target-image-opts
-
-Indicates that the @var{output_filename} parameter(s) are to be interpreted as
-a full option string, not a plain filename. This parameter is mutually
-exclusive with the @var{-O} parameters. It is currently required to also use
-the @var{-n} parameter to skip image creation. This restriction may be relaxed
-in a future release.
+is a disk image filename
 
 @item fmt
 is the disk image format. It is guessed automatically in most cases. See below
 for a description of the supported disk formats.
 
-@item --backing-chain
-will enumerate information about backing files in a disk image chain. Refer
-below for further description.
-
 @item size
 is the disk image size in bytes. Optional suffixes @code{k} or @code{K}
 (kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M)
@@ -74,42 +50,78 @@ and T (terabyte, 1024G) are supported.  @code{b} is ignored.
 is the destination disk image filename
 
 @item output_fmt
- is the destination format
+is the destination format
+
 @item options
 is a comma separated list of format specific options in a
 name=value format. Use @code{-o ?} for an overview of the options supported
 by the used format or see the format descriptions below for details.
+
 @item snapshot_param
 is param used for internal snapshot, format is
 'snapshot.id=[ID],snapshot.name=[NAME]' or '[ID_OR_NAME]'
+
 @item snapshot_id_or_name
 is deprecated, use snapshot_param instead
 
+@end table
+
+@table @option
+
+@item --object @var{objectdef}
+is a QEMU user creatable object definition. See the @code{qemu(1)} manual
+page for a description of the object properties. The most common object
+type is a @code{secret}, which is used to supply passwords and/or encryption
+keys.
+
+@item --image-opts
+Indicates that the source @var{filename} parameter is to be interpreted as a
+full option string, not a plain filename. This parameter is mutually
+exclusive with the @var{-f} parameter.
+
+@item --target-image-opts
+Indicates that the @var{output_filename} parameter(s) are to be interpreted as
+a full option string, not a plain filename. This parameter is mutually
+exclusive with the @var{-O} parameters. It is currently required to also use
+the @var{-n} parameter to skip image creation. This restriction may be relaxed
+in a future release.
+
+@item --backing-chain
+will enumerate information about backing files in a disk image chain. Refer
+below for further description.
+
 @item -c
 indicates that target image must be compressed (qcow format only)
+
 @item -h
 with or without a command shows help and lists the supported formats
+
 @item -p
 display progress bar (compare, convert and rebase commands only).
 If the @var{-p} option is not used for a command that supports it, the
 progress is reported when the process receives a @code{SIGUSR1} or
 @code{SIGINFO} signal.
+
 @item -q
 Quiet mode - do not print any output (except errors). There's no progress bar
 in case both @var{-q} and @var{-p} options are used.
+
 @item -S @var{size}
 indicates the consecutive number of bytes that must contain only zeros
 for qemu-img to create a sparse image during conversion. This value is rounded
 down to the nearest 512 bytes. You may use the common size suffixes like
 @code{k} for kilobytes.
+
 @item -t @var{cache}
 specifies the cache mode that should be used with the (destination) file. See
 the documentation of the emulator's @code{-drive cache=...} option for allowed
 values.
+
 @item -T @var{src_cache}
 specifies the cache mode that should be used with the source file(s). See
 the documentation of the emulator's @code{-drive cache=...} option for allowed
 values.
+
 @end table
 
 Parameters to snapshot subcommand:

From a7e326df1c116e99e6e65ca02c6eadb5bd5aef45 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 9 Feb 2018 13:29:14 +0800
Subject: [PATCH 03/55] qemu-img: Document --force-share / -U

Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-img.texi | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/qemu-img.texi b/qemu-img.texi
index 60a0e080c6..8a26400adb 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -86,6 +86,14 @@ exclusive with the @var{-O} parameters. It is currently required to also use
 the @var{-n} parameter to skip image creation. This restriction may be relaxed
 in a future release.
 
+@item --force-share (-U)
+If specified, @code{qemu-img} will open the image in shared mode, allowing
+other QEMU processes to open it in write mode. For example, this can be used to
+get the image information (with 'info' subcommand) when the image is used by a
+running guest.  Note that this could produce inconsistent results because of
+concurrent metadata changes, etc. This option is only allowed when opening
+images in read-only mode.
+
 @item --backing-chain
 will enumerate information about backing files in a disk image chain. Refer
 below for further description.

From c1a4b6f9246bf2841b14b11d4c5a029429f0659a Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Fri, 9 Feb 2018 13:29:15 +0800
Subject: [PATCH 04/55] docs: Document share-rw property more thoroughly

Suggested-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 docs/qemu-block-drivers.texi | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
index cd74767ed3..f1793692bb 100644
--- a/docs/qemu-block-drivers.texi
+++ b/docs/qemu-block-drivers.texi
@@ -845,6 +845,16 @@ QEMU transparently handles lock handover during shared storage migration.  For
 shared virtual disk images between multiple VMs, the "share-rw" device option
 should be used.
 
+By default, the guest has exclusive write access to its disk image. If the
+guest can safely share the disk image with other writers the @code{-device
+...,share-rw=on} parameter can be used.  This is only safe if the guest is
+running software, such as a cluster file system, that coordinates disk accesses
+to avoid corruption.
+
+Note that share-rw=on only declares the guest's ability to share the disk.
+Some QEMU features, such as image file formats, require exclusive write access
+to the disk image and this is unaffected by the share-rw=on option.
+
 Alternatively, locking can be fully disabled by "locking=off" block device
 option. In the command line, the option is usually in the form of
 "file.locking=off" as the protocol driver is normally placed as a "file" child

From de7269d293bc3a7fba98a0f12781eccb4ea4be1e Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Fri, 9 Feb 2018 16:42:22 +0200
Subject: [PATCH 05/55] qcow2: Use g_try_realloc() in
 qcow2_expand_zero_clusters()

g_realloc() aborts the program if it fails to allocate the required
amount of memory. We want to detect that scenario and return an error
instead, so let's use g_try_realloc().

Signed-off-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 3a979bcd82..f077cd3ac5 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -2070,7 +2070,15 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
         int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size *
                                       sizeof(uint64_t), BDRV_SECTOR_SIZE);
 
-        l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+        uint64_t *new_l1_table =
+            g_try_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+
+        if (!new_l1_table) {
+            ret = -ENOMEM;
+            goto fail;
+        }
+
+        l1_table = new_l1_table;
 
         ret = bdrv_read(bs->file,
                         s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,

From cb2af9176fcb6b0b71bbac8c83498c4caa37ab05 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 7 Feb 2018 17:29:20 +0100
Subject: [PATCH 06/55] block: early check for blockers on drive-mirror

Even if an op blocker is present for BLOCK_OP_TYPE_MIRROR_SOURCE,
it is checked a bit late and the result is that the target is
created even if drive-mirror subsequently fails.  Add an early
check to avoid this.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Reviewed-by: Alberto Garcia <berto@igalia.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 blockdev.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index bdbdeae7e4..7423c5317b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3569,6 +3569,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
         return;
     }
 
+    /* Early check to avoid creating target */
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
+        return;
+    }
+
     aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 

From 2782bb75e9ff6319386159577f544deb0f50f536 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Mon, 12 Feb 2018 13:47:18 +0100
Subject: [PATCH 07/55] iotests: Use virtio-blk in 155

Only a few select machine types support floppy drives and there is
actually nothing preventing us from using virtio here, so let's do it.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/qemu-iotests/155 | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
index fc9fa975be..42dae04c83 100755
--- a/tests/qemu-iotests/155
+++ b/tests/qemu-iotests/155
@@ -64,7 +64,7 @@ class BaseClass(iotests.QMPTestCase):
                     'file': {'driver': 'file',
                              'filename': source_img}}
         self.vm.add_blockdev(self.qmp_to_opts(blockdev))
-        self.vm.add_device('floppy,id=qdev0,drive=source')
+        self.vm.add_device('virtio-blk,id=qdev0,drive=source')
         self.vm.launch()
 
         self.assertIntactSourceBackingChain()
@@ -173,21 +173,24 @@ class MirrorBaseClass(BaseClass):
     def testFull(self):
         self.runMirror('full')
 
-        node = self.findBlockNode('target', 'qdev0')
+        node = self.findBlockNode('target',
+                                  '/machine/peripheral/qdev0/virtio-backend')
         self.assertCorrectBackingImage(node, None)
         self.assertIntactSourceBackingChain()
 
     def testTop(self):
         self.runMirror('top')
 
-        node = self.findBlockNode('target', 'qdev0')
+        node = self.findBlockNode('target',
+                                  '/machine/peripheral/qdev0/virtio-backend')
         self.assertCorrectBackingImage(node, back2_img)
         self.assertIntactSourceBackingChain()
 
     def testNone(self):
         self.runMirror('none')
 
-        node = self.findBlockNode('target', 'qdev0')
+        node = self.findBlockNode('target',
+                                  '/machine/peripheral/qdev0/virtio-backend')
         self.assertCorrectBackingImage(node, source_img)
         self.assertIntactSourceBackingChain()
 
@@ -239,7 +242,8 @@ class TestCommit(BaseClass):
 
         self.vm.event_wait('BLOCK_JOB_COMPLETED')
 
-        node = self.findBlockNode(None, 'qdev0')
+        node = self.findBlockNode(None,
+                                  '/machine/peripheral/qdev0/virtio-backend')
         self.assert_qmp(node, 'image' + '/backing-image' * 0 + '/filename',
                         back1_img)
         self.assert_qmp(node, 'image' + '/backing-image' * 1 + '/filename',

From 0e448a05444ddc602d2857a17f31042631a75946 Mon Sep 17 00:00:00 2001
From: "Daniel P. Berrange" <berrange@redhat.com>
Date: Mon, 12 Feb 2018 18:48:49 +0000
Subject: [PATCH 08/55] qemu-io: fix EOF Ctrl-D handling in qemu-io readline
 code

qemu-io puts the TTY into non-canonical mode, which means no EOF processing is
done and thus getchar() will never return the EOF constant. Instead we have to
query the TTY attributes to determine the configured EOF character (usually
Ctrl-D / 0x4), and then explicitly check for that value. This fixes the
regression that prevented Ctrl-D from triggering an exit of qemu-io that has
existed since readline was first added in

  commit 0cf17e181798063c3824c8200ba46f25f54faa1a
  Author: Stefan Hajnoczi <stefanha@redhat.com>
  Date:   Thu Nov 14 11:54:17 2013 +0100

    qemu-io: use readline.c

It also ensures that a newline is printed when exiting, to complete the
line output by the "qemu-io> " prompt.

Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-io.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/qemu-io.c b/qemu-io.c
index f554ab614b..2c00ea068e 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -11,6 +11,9 @@
 #include "qemu/osdep.h"
 #include <getopt.h>
 #include <libgen.h>
+#ifndef _WIN32
+#include <termios.h>
+#endif
 
 #include "qapi/error.h"
 #include "qemu-io.h"
@@ -42,6 +45,26 @@ static bool imageOpts;
 
 static ReadLineState *readline_state;
 
+static int ttyEOF;
+
+static int get_eof_char(void)
+{
+#ifdef _WIN32
+    return 0x4; /* Ctrl-D */
+#else
+    struct termios tty;
+    if (tcgetattr(STDIN_FILENO, &tty) != 0) {
+        if (errno == ENOTTY) {
+            return 0x0; /* just expect read() == 0 */
+        } else {
+            return 0x4; /* Ctrl-D */
+        }
+    }
+
+    return tty.c_cc[VEOF];
+#endif
+}
+
 static int close_f(BlockBackend *blk, int argc, char **argv)
 {
     blk_unref(qemuio_blk);
@@ -323,7 +346,8 @@ static char *fetchline_readline(void)
     readline_start(readline_state, get_prompt(), 0, readline_func, &line);
     while (!line) {
         int ch = getchar();
-        if (ch == EOF) {
+        if (ttyEOF != 0x0 && ch == ttyEOF) {
+            printf("\n");
             break;
         }
         readline_handle_byte(readline_state, ch);
@@ -593,6 +617,7 @@ int main(int argc, char **argv)
     qemuio_add_command(&close_cmd);
 
     if (isatty(STDIN_FILENO)) {
+        ttyEOF = get_eof_char();
         readline_state = readline_init(readline_printf_func,
                                        readline_flush_func,
                                        NULL,

From 0375003df4153dce7f3ece9285b27427f00f9e01 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:50 +0100
Subject: [PATCH 09/55] gluster: Move glfs_close() to create's clean-up

glfs_close() is a classical clean-up operation, as can be seen by the
fact that it is executed even if the truncation before it failed.
Also, moving it to clean-up makes it more clear that if it fails, we do
not want it to overwrite the current ret value if that signifies an
error already.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/gluster.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index d8decc41ad..7fab2dfa12 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -970,7 +970,7 @@ static int qemu_gluster_create(const char *filename,
 {
     BlockdevOptionsGluster *gconf;
     struct glfs *glfs;
-    struct glfs_fd *fd;
+    struct glfs_fd *fd = NULL;
     int ret = 0;
     PreallocMode prealloc;
     int64_t total_size = 0;
@@ -1054,10 +1054,12 @@ static int qemu_gluster_create(const char *filename,
         break;
     }
 
-    if (glfs_close(fd) != 0) {
-        ret = -errno;
-    }
 out:
+    if (fd) {
+        if (glfs_close(fd) != 0 && ret == 0) {
+            ret = -errno;
+        }
+    }
     qapi_free_BlockdevOptionsGluster(gconf);
     glfs_clear_preopened(glfs);
     return ret;

From 36e87909f8f324a7c442444e5cc02cc5efcdf360 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:51 +0100
Subject: [PATCH 10/55] gluster: Pull truncation from qemu_gluster_create

Pull out the truncation code from the qemu_cluster_create() function so
we can later reuse it in qemu_gluster_truncate().

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/gluster.c | 74 ++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 34 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 7fab2dfa12..8178541416 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -965,6 +965,45 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
 }
 #endif
 
+static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset,
+                                    PreallocMode prealloc, Error **errp)
+{
+    switch (prealloc) {
+#ifdef CONFIG_GLUSTERFS_FALLOCATE
+    case PREALLOC_MODE_FALLOC:
+        if (glfs_fallocate(fd, 0, 0, offset)) {
+            error_setg_errno(errp, errno, "Could not preallocate data");
+            return -errno;
+        }
+        break;
+#endif /* CONFIG_GLUSTERFS_FALLOCATE */
+#ifdef CONFIG_GLUSTERFS_ZEROFILL
+    case PREALLOC_MODE_FULL:
+        if (glfs_ftruncate(fd, offset)) {
+            error_setg_errno(errp, errno, "Could not resize file");
+            return -errno;
+        }
+        if (glfs_zerofill(fd, 0, offset)) {
+            error_setg_errno(errp, errno, "Could not zerofill the new area");
+            return -errno;
+        }
+        break;
+#endif /* CONFIG_GLUSTERFS_ZEROFILL */
+    case PREALLOC_MODE_OFF:
+        if (glfs_ftruncate(fd, offset)) {
+            error_setg_errno(errp, errno, "Could not resize file");
+            return -errno;
+        }
+        break;
+    default:
+        error_setg(errp, "Unsupported preallocation mode: %s",
+                   PreallocMode_str(prealloc));
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static int qemu_gluster_create(const char *filename,
                                QemuOpts *opts, Error **errp)
 {
@@ -1019,40 +1058,7 @@ static int qemu_gluster_create(const char *filename,
         goto out;
     }
 
-    switch (prealloc) {
-#ifdef CONFIG_GLUSTERFS_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        if (glfs_fallocate(fd, 0, 0, total_size)) {
-            error_setg(errp, "Could not preallocate data for the new file");
-            ret = -errno;
-        }
-        break;
-#endif /* CONFIG_GLUSTERFS_FALLOCATE */
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    case PREALLOC_MODE_FULL:
-        if (!glfs_ftruncate(fd, total_size)) {
-            if (glfs_zerofill(fd, 0, total_size)) {
-                error_setg(errp, "Could not zerofill the new file");
-                ret = -errno;
-            }
-        } else {
-            error_setg(errp, "Could not resize file");
-            ret = -errno;
-        }
-        break;
-#endif /* CONFIG_GLUSTERFS_ZEROFILL */
-    case PREALLOC_MODE_OFF:
-        if (glfs_ftruncate(fd, total_size) != 0) {
-            ret = -errno;
-            error_setg(errp, "Could not resize file");
-        }
-        break;
-    default:
-        ret = -EINVAL;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_str(prealloc));
-        break;
-    }
+    ret = qemu_gluster_do_truncate(fd, total_size, prealloc, errp);
 
 out:
     if (fd) {

From f3a33f795cc7a3372d2372480d34f6b95a3b09b6 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:52 +0100
Subject: [PATCH 11/55] gluster: Query current size in do_truncate()

Instead of expecting the current size to be 0, query it and allocate
only the area [current_size, offset) if preallocation is requested.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/gluster.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 8178541416..806b894bc8 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -968,10 +968,27 @@ static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
 static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset,
                                     PreallocMode prealloc, Error **errp)
 {
+    int64_t current_length;
+
+    current_length = glfs_lseek(fd, 0, SEEK_END);
+    if (current_length < 0) {
+        error_setg_errno(errp, errno, "Failed to determine current size");
+        return -errno;
+    }
+
+    if (current_length > offset && prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Cannot use preallocation for shrinking files");
+        return -ENOTSUP;
+    }
+
+    if (current_length == offset) {
+        return 0;
+    }
+
     switch (prealloc) {
 #ifdef CONFIG_GLUSTERFS_FALLOCATE
     case PREALLOC_MODE_FALLOC:
-        if (glfs_fallocate(fd, 0, 0, offset)) {
+        if (glfs_fallocate(fd, 0, current_length, offset - current_length)) {
             error_setg_errno(errp, errno, "Could not preallocate data");
             return -errno;
         }
@@ -983,7 +1000,7 @@ static int qemu_gluster_do_truncate(struct glfs_fd *fd, int64_t offset,
             error_setg_errno(errp, errno, "Could not resize file");
             return -errno;
         }
-        if (glfs_zerofill(fd, 0, offset)) {
+        if (glfs_zerofill(fd, current_length, offset - current_length)) {
             error_setg_errno(errp, errno, "Could not zerofill the new area");
             return -errno;
         }

From c3132aff174ea35dd78bc8828fd79c28f9bc9a65 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:53 +0100
Subject: [PATCH 12/55] gluster: Add preallocated truncation

By using qemu_do_cluster_truncate() in qemu_cluster_truncate(), we now
automatically have preallocated truncation.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/gluster.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 806b894bc8..3f17b7819d 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -1122,23 +1122,8 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
 static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset,
                                  PreallocMode prealloc, Error **errp)
 {
-    int ret;
     BDRVGlusterState *s = bs->opaque;
-
-    if (prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Unsupported preallocation mode '%s'",
-                   PreallocMode_str(prealloc));
-        return -ENOTSUP;
-    }
-
-    ret = glfs_ftruncate(s->fd, offset);
-    if (ret < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Failed to truncate file");
-        return ret;
-    }
-
-    return 0;
+    return qemu_gluster_do_truncate(s->fd, offset, prealloc, errp);
 }
 
 static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,

From 8b9ad56e9cbfd852af53090d18bb32e94b539253 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:54 +0100
Subject: [PATCH 13/55] sheepdog: Make sd_prealloc() take a BDS

We want to use this function in sd_truncate() later on, so taking a
filename is not exactly ideal.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/sheepdog.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index af125a2c8d..cc1d37b3da 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1826,10 +1826,10 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
     return 0;
 }
 
-static int sd_prealloc(const char *filename, Error **errp)
+static int sd_prealloc(BlockDriverState *bs, Error **errp)
 {
     BlockBackend *blk = NULL;
-    BDRVSheepdogState *base = NULL;
+    BDRVSheepdogState *base = bs->opaque;
     unsigned long buf_size;
     uint32_t idx, max_idx;
     uint32_t object_size;
@@ -1837,10 +1837,11 @@ static int sd_prealloc(const char *filename, Error **errp)
     void *buf = NULL;
     int ret;
 
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
-    if (blk == NULL) {
-        ret = -EIO;
+    blk = blk_new(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                  BLK_PERM_ALL);
+
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
         goto out_with_err_set;
     }
 
@@ -1852,7 +1853,6 @@ static int sd_prealloc(const char *filename, Error **errp)
         goto out;
     }
 
-    base = blk_bs(blk)->opaque;
     object_size = (UINT32_C(1) << base->inode.block_size_shift);
     buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
     buf = g_malloc0(buf_size);
@@ -2108,7 +2108,20 @@ static int sd_create(const char *filename, QemuOpts *opts,
     }
 
     if (prealloc) {
-        ret = sd_prealloc(filename, errp);
+        BlockDriverState *bs;
+        QDict *opts;
+
+        opts = qdict_new();
+        qdict_put_str(opts, "driver", "sheepdog");
+        bs = bdrv_open(filename, NULL, opts, BDRV_O_PROTOCOL | BDRV_O_RDWR,
+                       errp);
+        if (!bs) {
+            goto out;
+        }
+
+        ret = sd_prealloc(bs, errp);
+
+        bdrv_unref(bs);
     }
 out:
     g_free(backing_file);

From 1a62baf62bdc30ec4856ac569cefb9d831808253 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:55 +0100
Subject: [PATCH 14/55] sheepdog: Pass old and new size to sd_prealloc()

sd_prealloc() will now preallocate the area [old_size, new_size).  As
before, it rounds to buf_size and may thus overshoot and preallocate
areas that were not requested to be preallocated.  For image creation,
this is no change in behavior.  For truncation, this is in accordance
with the documentation for preallocated truncation.

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/sheepdog.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index cc1d37b3da..d300fb69c0 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1826,14 +1826,14 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
     return 0;
 }
 
-static int sd_prealloc(BlockDriverState *bs, Error **errp)
+static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
+                       Error **errp)
 {
     BlockBackend *blk = NULL;
     BDRVSheepdogState *base = bs->opaque;
     unsigned long buf_size;
     uint32_t idx, max_idx;
     uint32_t object_size;
-    int64_t vdi_size;
     void *buf = NULL;
     int ret;
 
@@ -1847,19 +1847,13 @@ static int sd_prealloc(BlockDriverState *bs, Error **errp)
 
     blk_set_allow_write_beyond_eof(blk, true);
 
-    vdi_size = blk_getlength(blk);
-    if (vdi_size < 0) {
-        ret = vdi_size;
-        goto out;
-    }
-
     object_size = (UINT32_C(1) << base->inode.block_size_shift);
     buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
     buf = g_malloc0(buf_size);
 
-    max_idx = DIV_ROUND_UP(vdi_size, buf_size);
+    max_idx = DIV_ROUND_UP(new_size, buf_size);
 
-    for (idx = 0; idx < max_idx; idx++) {
+    for (idx = old_size / buf_size; idx < max_idx; idx++) {
         /*
          * The created image can be a cloned image, so we need to read
          * a data from the source image.
@@ -2119,7 +2113,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
             goto out;
         }
 
-        ret = sd_prealloc(bs, errp);
+        ret = sd_prealloc(bs, 0, s->inode.vdi_size, errp);
 
         bdrv_unref(bs);
     }

From 74f1eabf9cbdd168f0aceae97a0f6645d1ce7ebd Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Tue, 13 Feb 2018 14:03:56 +0100
Subject: [PATCH 15/55] sheepdog: Allow fully preallocated truncation

Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/sheepdog.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index d300fb69c0..ac02b10fe0 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -2180,15 +2180,16 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
     int ret, fd;
     unsigned int datalen;
     uint64_t max_vdi_size;
+    int64_t old_size = s->inode.vdi_size;
 
-    if (prealloc != PREALLOC_MODE_OFF) {
+    if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) {
         error_setg(errp, "Unsupported preallocation mode '%s'",
                    PreallocMode_str(prealloc));
         return -ENOTSUP;
     }
 
     max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-    if (offset < s->inode.vdi_size) {
+    if (offset < old_size) {
         error_setg(errp, "shrinking is not supported");
         return -EINVAL;
     } else if (offset > max_vdi_size) {
@@ -2211,9 +2212,17 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
 
     if (ret < 0) {
         error_setg_errno(errp, -ret, "failed to update an inode");
+        return ret;
     }
 
-    return ret;
+    if (prealloc == PREALLOC_MODE_FULL) {
+        ret = sd_prealloc(bs, old_size, offset, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
 }
 
 /*

From 3e99da5e76a4e834e457cbef774ed5f83589c5e7 Mon Sep 17 00:00:00 2001
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Date: Fri, 2 Feb 2018 19:07:52 +0300
Subject: [PATCH 16/55] block: maintain persistent disabled bitmaps

To maintain load/store disabled bitmap there is new approach:

 - deprecate @autoload flag of block-dirty-bitmap-add, make it ignored
 - store enabled bitmaps as "auto" to qcow2
 - store disabled bitmaps without "auto" flag to qcow2
 - on qcow2 open load "auto" bitmaps as enabled and others
   as disabled (except in_use bitmaps)

Also, adjust iotests 165 and 176 appropriately.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Message-id: 20180202160752.143796-1-vsementsov@virtuozzo.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/dirty-bitmap.c         | 18 ------------------
 block/qcow2-bitmap.c         | 12 +++++++-----
 block/qcow2.c                |  2 +-
 block/qcow2.h                |  2 +-
 blockdev.c                   | 10 ++--------
 include/block/dirty-bitmap.h |  1 -
 qapi/block-core.json         |  6 +++---
 qemu-doc.texi                |  7 +++++++
 tests/qemu-iotests/165       |  2 +-
 tests/qemu-iotests/176       |  2 +-
 10 files changed, 23 insertions(+), 39 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 7879d13ddb..909f0517f8 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -52,8 +52,6 @@ struct BdrvDirtyBitmap {
                                    Such operations must fail and both the image
                                    and this bitmap must remain unchanged while
                                    this flag is set. */
-    bool autoload;              /* For persistent bitmaps: bitmap must be
-                                   autoloaded on image opening */
     bool persistent;            /* bitmap must be saved to owner disk image */
     QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
@@ -104,7 +102,6 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
     g_free(bitmap->name);
     bitmap->name = NULL;
     bitmap->persistent = false;
-    bitmap->autoload = false;
 }
 
 /* Called with BQL taken.  */
@@ -261,8 +258,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
     bitmap->successor = NULL;
     successor->persistent = bitmap->persistent;
     bitmap->persistent = false;
-    successor->autoload = bitmap->autoload;
-    bitmap->autoload = false;
     bdrv_release_dirty_bitmap(bs, bitmap);
 
     return successor;
@@ -666,19 +661,6 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
     return false;
 }
 
-/* Called with BQL taken. */
-void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
-{
-    qemu_mutex_lock(bitmap->mutex);
-    bitmap->autoload = autoload;
-    qemu_mutex_unlock(bitmap->mutex);
-}
-
-bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->autoload;
-}
-
 /* Called with BQL taken. */
 void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool persistent)
 {
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index efa10c6663..4f6fd863ea 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -933,14 +933,14 @@ static void set_readonly_helper(gpointer bitmap, gpointer value)
     bdrv_dirty_bitmap_set_readonly(bitmap, (bool)value);
 }
 
-/* qcow2_load_autoloading_dirty_bitmaps()
+/* qcow2_load_dirty_bitmaps()
  * Return value is a hint for caller: true means that the Qcow2 header was
  * updated. (false doesn't mean that the header should be updated by the
  * caller, it just means that updating was not needed or the image cannot be
  * written to).
  * On failure the function returns false.
  */
-bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp)
+bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2BitmapList *bm_list;
@@ -960,14 +960,16 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp)
     }
 
     QSIMPLEQ_FOREACH(bm, bm_list, entry) {
-        if ((bm->flags & BME_FLAG_AUTO) && !(bm->flags & BME_FLAG_IN_USE)) {
+        if (!(bm->flags & BME_FLAG_IN_USE)) {
             BdrvDirtyBitmap *bitmap = load_bitmap(bs, bm, errp);
             if (bitmap == NULL) {
                 goto fail;
             }
 
+            if (!(bm->flags & BME_FLAG_AUTO)) {
+                bdrv_disable_dirty_bitmap(bitmap);
+            }
             bdrv_dirty_bitmap_set_persistance(bitmap, true);
-            bdrv_dirty_bitmap_set_autoload(bitmap, true);
             bm->flags |= BME_FLAG_IN_USE;
             created_dirty_bitmaps =
                     g_slist_append(created_dirty_bitmaps, bitmap);
@@ -1369,7 +1371,7 @@ void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp)
             bm->table.size = 0;
             QSIMPLEQ_INSERT_TAIL(&drop_tables, tb, entry);
         }
-        bm->flags = bdrv_dirty_bitmap_get_autoload(bitmap) ? BME_FLAG_AUTO : 0;
+        bm->flags = bdrv_dirty_bitmap_enabled(bitmap) ? BME_FLAG_AUTO : 0;
         bm->granularity_bits = ctz32(bdrv_dirty_bitmap_granularity(bitmap));
         bm->dirty_bitmap = bitmap;
     }
diff --git a/block/qcow2.c b/block/qcow2.c
index 801e29fc56..80774aacf6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1460,7 +1460,7 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
         s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
     }
 
-    if (qcow2_load_autoloading_dirty_bitmaps(bs, &local_err)) {
+    if (qcow2_load_dirty_bitmaps(bs, &local_err)) {
         update_header = false;
     }
     if (local_err != NULL) {
diff --git a/block/qcow2.h b/block/qcow2.h
index 46c8cf44ec..016e87c81a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -663,7 +663,7 @@ void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table);
 int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
                                   void **refcount_table,
                                   int64_t *refcount_table_size);
-bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+bool qcow2_load_dirty_bitmaps(BlockDriverState *bs, Error **errp);
 int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
 int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
diff --git a/blockdev.c b/blockdev.c
index 7423c5317b..3fb1ca803c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2825,14 +2825,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
     if (!has_persistent) {
         persistent = false;
     }
-    if (!has_autoload) {
-        autoload = false;
-    }
 
-    if (has_autoload && !persistent) {
-        error_setg(errp, "Autoload flag must be used only for persistent "
-                         "bitmaps");
-        return;
+    if (has_autoload) {
+        warn_report("Autoload option is deprecated and its value is ignored");
     }
 
     if (persistent &&
@@ -2847,7 +2842,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
     }
 
     bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
-    bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
 }
 
 void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 3da8486ab1..e3f4bbf51d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -66,7 +66,6 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
 void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value);
-void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload);
 void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap,
                                        bool persistent);
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8046c2da23..2c107823fe 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1593,9 +1593,9 @@
 #              Qcow2 disks support persistent bitmaps. Default is false for
 #              block-dirty-bitmap-add. (Since: 2.10)
 #
-# @autoload: the bitmap will be automatically loaded when the image it is stored
-#            in is opened. This flag may only be specified for persistent
-#            bitmaps. Default is false for block-dirty-bitmap-add. (Since: 2.10)
+# @autoload: ignored and deprecated since 2.12.
+#            Currently, all dirty tracking bitmaps are loaded from Qcow2 on
+#            open.
 #
 # Since: 2.4
 ##
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 769968aba4..137f5814a8 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -2757,6 +2757,13 @@ used and it will be removed with no replacement.
 The ``convert -s snapshot_id_or_name'' argument is obsoleted
 by the ``convert -l snapshot_param'' argument instead.
 
+@section QEMU Machine Protocol (QMP) commands
+
+@subsection block-dirty-bitmap-add "autoload" parameter (since 2.12.0)
+
+"autoload" parameter is now ignored. All bitmaps are automatically loaded
+from qcow2 images.
+
 @section System emulator human monitor commands
 
 @subsection host_net_add (since 2.10.0)
diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165
index a3932db3de..2936929627 100755
--- a/tests/qemu-iotests/165
+++ b/tests/qemu-iotests/165
@@ -64,7 +64,7 @@ class TestPersistentDirtyBitmap(iotests.QMPTestCase):
 
     def qmpAddBitmap(self):
         self.vm.qmp('block-dirty-bitmap-add', node='drive0',
-                    name='bitmap0', persistent=True, autoload=True)
+                    name='bitmap0', persistent=True)
 
     def test_persistent(self):
         self.vm = self.mkVm()
diff --git a/tests/qemu-iotests/176 b/tests/qemu-iotests/176
index d38b3aeb91..32baa116dd 100755
--- a/tests/qemu-iotests/176
+++ b/tests/qemu-iotests/176
@@ -95,7 +95,7 @@ case $reason in
      "file": { "driver": "file", "filename": "$TEST_IMG" } } }
 { "execute": "block-dirty-bitmap-add",
   "arguments": { "node": "drive0", "name": "bitmap0",
-     "persistent": true, "autoload": true } }
+     "persistent": true } }
 { "execute": "quit" }
 EOF
 	;;

From d4c373b854f813a6c3b9f763c98723053aa80ac5 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:01 +0200
Subject: [PATCH 17/55] qcow2: Fix documentation of get_cluster_table()

This function has not been returning the offset of the L2 table since
commit 3948d1d4876065160583e79533bf604481063833

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: b498733b6706a859a03678d74ecbd26aeba129aa.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f077cd3ac5..e5ab102c29 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -647,8 +647,7 @@ fail:
  * for a given disk offset, load (and allocate if needed)
  * the l2 table.
  *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
+ * the cluster index in the l2 table is given to the caller.
  *
  * Returns 0 on success, -errno in failure case
  */

From 03019d73142f78c22b87b7162f3c4a390d7d839e Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:02 +0200
Subject: [PATCH 18/55] qcow2: Add table size field to Qcow2Cache

The table size in the qcow2 cache is currently equal to the cluster
size. This doesn't allow us to use the cache memory efficiently,
particularly with large cluster sizes, so we need to be able to have
smaller cache tables that are independent from the cluster size. This
patch adds a new field to Qcow2Cache that we can use instead of the
cluster size.

The current table size is still being initialized to the cluster size,
so there are no semantic changes yet, but this patch will allow us to
prepare the rest of the code and simplify a few function calls.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 67a1bf9e55f417005c567bead95a018dc34bc687.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index c48ffebd8f..38c03770b4 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -39,6 +39,7 @@ struct Qcow2Cache {
     Qcow2CachedTable       *entries;
     struct Qcow2Cache      *depends;
     int                     size;
+    int                     table_size;
     bool                    depends_on_flush;
     void                   *table_array;
     uint64_t                lru_counter;
@@ -48,17 +49,15 @@ struct Qcow2Cache {
 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
                     Qcow2Cache *c, int table)
 {
-    BDRVQcow2State *s = bs->opaque;
-    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+    return (uint8_t *) c->table_array + (size_t) table * c->table_size;
 }
 
 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
                   Qcow2Cache *c, void *table)
 {
-    BDRVQcow2State *s = bs->opaque;
     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
-    int idx = table_offset / s->cluster_size;
-    assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
+    int idx = table_offset / c->table_size;
+    assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
     return idx;
 }
 
@@ -79,10 +78,9 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
 {
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
-    BDRVQcow2State *s = bs->opaque;
     void *t = qcow2_cache_get_table_addr(bs, c, i);
     int align = getpagesize();
-    size_t mem_size = (size_t) s->cluster_size * num_tables;
+    size_t mem_size = (size_t) c->table_size * num_tables;
     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
     if (mem_size > offset && length > 0) {
@@ -132,9 +130,10 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 
     c = g_new0(Qcow2Cache, 1);
     c->size = num_tables;
+    c->table_size = s->cluster_size;
     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
     c->table_array = qemu_try_blockalign(bs->file->bs,
-                                         (size_t) num_tables * s->cluster_size);
+                                         (size_t) num_tables * c->table_size);
 
     if (!c->entries || !c->table_array) {
         qemu_vfree(c->table_array);
@@ -203,13 +202,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 
     if (c == s->refcount_block_cache) {
         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
-                c->entries[i].offset, s->cluster_size);
+                c->entries[i].offset, c->table_size);
     } else if (c == s->l2_table_cache) {
         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                c->entries[i].offset, s->cluster_size);
+                c->entries[i].offset, c->table_size);
     } else {
         ret = qcow2_pre_write_overlap_check(bs, 0,
-                c->entries[i].offset, s->cluster_size);
+                c->entries[i].offset, c->table_size);
     }
 
     if (ret < 0) {
@@ -223,7 +222,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
     }
 
     ret = bdrv_pwrite(bs->file, c->entries[i].offset,
-                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
+                      qcow2_cache_get_table_addr(bs, c, i), c->table_size);
     if (ret < 0) {
         return ret;
     }
@@ -331,7 +330,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
                           offset, read_from_disk);
 
-    if (offset_into_cluster(s, offset)) {
+    if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
         qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
                                 "cache: Offset %#" PRIx64 " is unaligned",
                                 qcow2_cache_get_name(s, c), offset);
@@ -339,7 +338,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     }
 
     /* Check if the table is already cached */
-    i = lookup_index = (offset / s->cluster_size * 4) % c->size;
+    i = lookup_index = (offset / c->table_size * 4) % c->size;
     do {
         const Qcow2CachedTable *t = &c->entries[i];
         if (t->offset == offset) {
@@ -380,7 +379,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
 
         ret = bdrv_pread(bs->file, offset,
                          qcow2_cache_get_table_addr(bs, c, i),
-                         s->cluster_size);
+                         c->table_size);
         if (ret < 0) {
             return ret;
         }

From 9869b27bb5c1c08f175804d287efc7b3c6a94e33 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:03 +0200
Subject: [PATCH 19/55] qcow2: Remove BDS parameter from
 qcow2_cache_get_table_addr()

This function was only using the BlockDriverState parameter to get the
cache table size (since it was equal to the cluster size). This is no
longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: e1f943a9e89e1deb876f45de1bb22419ccdb6ad3.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 38c03770b4..a481ef499a 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -46,8 +46,7 @@ struct Qcow2Cache {
     uint64_t                cache_clean_lru_counter;
 };
 
-static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
-                    Qcow2Cache *c, int table)
+static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
 {
     return (uint8_t *) c->table_array + (size_t) table * c->table_size;
 }
@@ -78,7 +77,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
 {
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
-    void *t = qcow2_cache_get_table_addr(bs, c, i);
+    void *t = qcow2_cache_get_table_addr(c, i);
     int align = getpagesize();
     size_t mem_size = (size_t) c->table_size * num_tables;
     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
@@ -222,7 +221,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
     }
 
     ret = bdrv_pwrite(bs->file, c->entries[i].offset,
-                      qcow2_cache_get_table_addr(bs, c, i), c->table_size);
+                      qcow2_cache_get_table_addr(c, i), c->table_size);
     if (ret < 0) {
         return ret;
     }
@@ -378,7 +377,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
         }
 
         ret = bdrv_pread(bs->file, offset,
-                         qcow2_cache_get_table_addr(bs, c, i),
+                         qcow2_cache_get_table_addr(c, i),
                          c->table_size);
         if (ret < 0) {
             return ret;
@@ -390,7 +389,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
     /* And return the right table */
 found:
     c->entries[i].ref++;
-    *table = qcow2_cache_get_table_addr(bs, c, i);
+    *table = qcow2_cache_get_table_addr(c, i);
 
     trace_qcow2_cache_get_done(qemu_coroutine_self(),
                                c == s->l2_table_cache, i);
@@ -439,7 +438,7 @@ void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
 
     for (i = 0; i < c->size; i++) {
         if (c->entries[i].offset == offset) {
-            return qcow2_cache_get_table_addr(bs, c, i);
+            return qcow2_cache_get_table_addr(c, i);
         }
     }
     return NULL;

From b3b8b6d9c4c7885e39bebe02abf60855c233dc5c Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:04 +0200
Subject: [PATCH 20/55] qcow2: Remove BDS parameter from
 qcow2_cache_get_table_idx()

This function was only using the BlockDriverState parameter to get the
cache table size (since it was equal to the cluster size). This is no
longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: da3575d47c9a181a2cfd4715e53dd84a2c651017.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index a481ef499a..3749d55595 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -51,8 +51,7 @@ static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
     return (uint8_t *) c->table_array + (size_t) table * c->table_size;
 }
 
-static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
-                  Qcow2Cache *c, void *table)
+static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
 {
     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
     int idx = table_offset / c->table_size;
@@ -411,7 +410,7 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
 
 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, *table);
+    int i = qcow2_cache_get_table_idx(c, *table);
 
     c->entries[i].ref--;
     *table = NULL;
@@ -426,7 +425,7 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
      void *table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, table);
+    int i = qcow2_cache_get_table_idx(c, table);
     assert(c->entries[i].offset != 0);
     c->entries[i].dirty = true;
 }
@@ -446,7 +445,7 @@ void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
 
 void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
 {
-    int i = qcow2_cache_get_table_idx(bs, c, table);
+    int i = qcow2_cache_get_table_idx(c, table);
 
     assert(c->entries[i].ref == 0);
 

From ebe988f318e1dc89d83572da4ba545619514ad1e Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:05 +0200
Subject: [PATCH 21/55] qcow2: Remove BDS parameter from
 qcow2_cache_table_release()

This function was only using the BlockDriverState parameter to get the
cache table size (since it was equal to the cluster size). This is no
longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 7c1b262344375d52544525f85bbbf0548d5ba575.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 3749d55595..5ff2cbf5c5 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -71,8 +71,7 @@ static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
     }
 }
 
-static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
-                                      int i, int num_tables)
+static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
 {
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
@@ -114,7 +113,7 @@ void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
         }
 
         if (to_clean > 0) {
-            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+            qcow2_cache_table_release(c, i - to_clean, to_clean);
         }
     }
 
@@ -306,7 +305,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
         c->entries[i].lru_counter = 0;
     }
 
-    qcow2_cache_table_release(bs, c, 0, c->size);
+    qcow2_cache_table_release(c, 0, c->size);
 
     c->lru_counter = 0;
 
@@ -453,5 +452,5 @@ void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
     c->entries[i].lru_counter = 0;
     c->entries[i].dirty = false;
 
-    qcow2_cache_table_release(bs, c, i, 1);
+    qcow2_cache_table_release(c, i, 1);
 }

From 2d135ee92d656f824504b603054435297552bd8b Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:06 +0200
Subject: [PATCH 22/55] qcow2: Remove BDS parameter from
 qcow2_cache_entry_mark_dirty()

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_idx(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 5c40516a91782b083c1428b7b6a41bb9e2679bfb.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c    |  3 +--
 block/qcow2-cluster.c  | 12 ++++++------
 block/qcow2-refcount.c | 14 ++++++--------
 block/qcow2.h          |  3 +--
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 5ff2cbf5c5..07603e6b15 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -421,8 +421,7 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
     assert(c->entries[i].ref >= 0);
 }
 
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table)
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
 {
     int i = qcow2_cache_get_table_idx(c, table);
     assert(c->entries[i].offset != 0);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e5ab102c29..08af5c5995 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -324,7 +324,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
     trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
         goto fail;
@@ -765,7 +765,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     /* compressed clusters never have the copied flag */
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
     qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
@@ -937,7 +937,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     if (ret < 0) {
         goto err;
     }
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 
     assert(l2_index + m->nb_clusters <= s->l2_size);
     for (i = 0; i < m->nb_clusters; i++) {
@@ -1678,7 +1678,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         }
 
         /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
         if (!full_discard && s->qcow_version >= 3) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
         } else {
@@ -1774,7 +1774,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
             continue;
         }
 
-        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
         if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
             qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@@ -1983,7 +1983,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
 
         if (is_active_l1) {
             if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
                 qcow2_cache_depends_on_flush(s->l2_table_cache);
             }
             qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 92701ab7af..5434e7d4c8 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -421,7 +421,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 
     /* Now the new refcount block needs to be written to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
+    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
     ret = qcow2_cache_flush(bs, s->refcount_block_cache);
     if (ret < 0) {
         goto fail;
@@ -623,7 +623,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
                 goto fail;
             }
             memset(refblock_data, 0, s->cluster_size);
-            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+            qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
                                          refblock_data);
 
             new_table[i] = block_offset;
@@ -656,7 +656,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
                 s->set_refcount(refblock_data, j, 1);
             }
 
-            qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+            qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
                                          refblock_data);
         }
 
@@ -845,8 +845,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         }
         old_table_index = table_index;
 
-        qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
-                                     refcount_block);
+        qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
 
         /* we can update the count and save it */
         block_index = cluster_index & (s->refcount_block_size - 1);
@@ -1316,8 +1315,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                             s->refcount_block_cache);
                     }
                     l2_table[j] = cpu_to_be64(entry);
-                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
-                                                 l2_table);
+                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
                 }
             }
 
@@ -3180,7 +3178,7 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs,
     }
     s->set_refcount(refblock, block_index, 0);
 
-    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, refblock);
+    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
 
     qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 016e87c81a..73e0e0133d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -639,8 +639,7 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
 
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table);
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,

From 2013c3d44d75020292905828e02dd44520a26b7b Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:07 +0200
Subject: [PATCH 23/55] qcow2: Remove BDS parameter from qcow2_cache_put()

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_idx(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 6f98155489054a457563da77cdad1a66ebb3e896.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c    |  2 +-
 block/qcow2-cluster.c  | 28 ++++++++++++++--------------
 block/qcow2-refcount.c | 30 +++++++++++++++---------------
 block/qcow2.h          |  2 +-
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 07603e6b15..3b55f39afb 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -407,7 +407,7 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
     return qcow2_cache_do_get(bs, c, offset, table, false);
 }
 
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+void qcow2_cache_put(Qcow2Cache *c, void **table)
 {
     int i = qcow2_cache_get_table_idx(c, *table);
 
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 08af5c5995..ab5715c97e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -317,7 +317,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
         memcpy(l2_table, old_table, s->cluster_size);
 
-        qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table);
+        qcow2_cache_put(s->l2_table_cache, (void **) &old_table);
     }
 
     /* write the l2 table to the file */
@@ -345,7 +345,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 fail:
     trace_qcow2_l2_allocate_done(bs, l1_index, ret);
     if (l2_table != NULL) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+        qcow2_cache_put(s->l2_table_cache, (void **) table);
     }
     s->l1_table[l1_index] = old_l2_offset;
     if (l2_offset > 0) {
@@ -619,7 +619,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
         abort();
     }
 
-    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     bytes_available = (int64_t)c * s->cluster_size;
 
@@ -637,7 +637,7 @@ out:
     return type;
 
 fail:
-    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **)&l2_table);
     return ret;
 }
 
@@ -744,13 +744,13 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
      * allocated. */
     cluster_offset = be64_to_cpu(l2_table[l2_index]);
     if (cluster_offset & L2E_OFFSET_MASK) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
         return 0;
     }
 
     cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
     if (cluster_offset < 0) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
         return 0;
     }
 
@@ -767,7 +767,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     return cluster_offset;
 }
@@ -956,7 +956,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
      }
 
 
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     /*
      * If this was a COW, we need to decrease the refcount of the old cluster.
@@ -1174,7 +1174,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
 
     /* Cleanup */
 out:
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     /* Only return a host offset if we actually made progress. Otherwise we
      * would make requirements for handle_alloc() that it can't fulfill */
@@ -1333,7 +1333,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
         keep_old_clusters = true;
     }
 
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     if (!alloc_cluster_offset) {
         /* Allocate, if necessary at a given offset in the image file */
@@ -1689,7 +1689,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
     }
 
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     return nb_clusters;
 }
@@ -1783,7 +1783,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
         }
     }
 
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
     return nb_clusters;
 }
@@ -1986,7 +1986,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
                 qcow2_cache_depends_on_flush(s->l2_table_cache);
             }
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+            qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
         } else {
             if (l2_dirty) {
                 ret = qcow2_pre_write_overlap_check(bs,
@@ -2017,7 +2017,7 @@ fail:
         if (!is_active_l1) {
             qemu_vfree(l2_table);
         } else {
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+            qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
         }
     }
     return ret;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 5434e7d4c8..65af05dd23 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -277,7 +277,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
     block_index = cluster_index & (s->refcount_block_size - 1);
     *refcount = s->get_refcount(refcount_block, block_index);
 
-    qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+    qcow2_cache_put(s->refcount_block_cache, &refcount_block);
 
     return 0;
 }
@@ -449,7 +449,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
         return -EAGAIN;
     }
 
-    qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+    qcow2_cache_put(s->refcount_block_cache, refcount_block);
 
     /*
      * If we come here, we need to grow the refcount table. Again, a new
@@ -501,7 +501,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 
 fail:
     if (*refcount_block != NULL) {
-        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
+        qcow2_cache_put(s->refcount_block_cache, refcount_block);
     }
     return ret;
 }
@@ -660,7 +660,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t start_offset,
                                          refblock_data);
         }
 
-        qcow2_cache_put(bs, s->refcount_block_cache, &refblock_data);
+        qcow2_cache_put(s->refcount_block_cache, &refblock_data);
     }
 
     assert(block_offset == table_offset);
@@ -836,7 +836,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         /* Load the refcount block and allocate it if needed */
         if (table_index != old_table_index) {
             if (refcount_block) {
-                qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+                qcow2_cache_put(s->refcount_block_cache, &refcount_block);
             }
             ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
             if (ret < 0) {
@@ -874,7 +874,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
             table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
                                                 offset);
             if (table != NULL) {
-                qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+                qcow2_cache_put(s->refcount_block_cache, &refcount_block);
                 qcow2_cache_discard(bs, s->refcount_block_cache, table);
             }
 
@@ -897,7 +897,7 @@ fail:
 
     /* Write last changed block to disk */
     if (refcount_block) {
-        qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
+        qcow2_cache_put(s->refcount_block_cache, &refcount_block);
     }
 
     /*
@@ -1319,7 +1319,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                 }
             }
 
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
+            qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
 
             if (addend != 0) {
                 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
@@ -1347,7 +1347,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     ret = bdrv_flush(bs);
 fail:
     if (l2_table) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
     }
 
     s->cache_discards = false;
@@ -2847,7 +2847,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
                                     new_reftable_size, new_refblock,
                                     new_refblock_empty, allocated, errp);
                     if (ret < 0) {
-                        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+                        qcow2_cache_put(s->refcount_block_cache, &refblock);
                         return ret;
                     }
 
@@ -2860,7 +2860,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
                 if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
                     uint64_t offset;
 
-                    qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+                    qcow2_cache_put(s->refcount_block_cache, &refblock);
 
                     offset = ((reftable_index << s->refcount_block_bits)
                               + refblock_index) << s->cluster_bits;
@@ -2881,7 +2881,7 @@ static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
                 new_refblock_empty = new_refblock_empty && refcount == 0;
             }
 
-            qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+            qcow2_cache_put(s->refcount_block_cache, &refblock);
         } else {
             /* No refblock means every refcount is 0 */
             for (refblock_index = 0; refblock_index < s->refcount_block_size;
@@ -3173,14 +3173,14 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs,
                                 offset_to_reftable_index(s, discard_block_offs),
                                 discard_block_offs,
                                 s->get_refcount(refblock, block_index));
-        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+        qcow2_cache_put(s->refcount_block_cache, &refblock);
         return -EINVAL;
     }
     s->set_refcount(refblock, block_index, 0);
 
     qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refblock);
 
-    qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+    qcow2_cache_put(s->refcount_block_cache, &refblock);
 
     if (cluster_index < s->free_cluster_index) {
         s->free_cluster_index = cluster_index;
@@ -3233,7 +3233,7 @@ int qcow2_shrink_reftable(BlockDriverState *bs)
         } else {
             unused_block = buffer_is_zero(refblock, s->cluster_size);
         }
-        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+        qcow2_cache_put(s->refcount_block_cache, &refblock);
 
         reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
     }
diff --git a/block/qcow2.h b/block/qcow2.h
index 73e0e0133d..606dcdd47f 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -653,7 +653,7 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
     void **table);
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
     void **table);
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+void qcow2_cache_put(Qcow2Cache *c, void **table);
 void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
                                   uint64_t offset);
 void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table);

From e64d4072d1f32128a03264df0539ed60395a17d0 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:08 +0200
Subject: [PATCH 24/55] qcow2: Remove BDS parameter from qcow2_cache_destroy()

This function was never using the BlockDriverState parameter so it can
be safely removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 49c74fe8b3aead9056e61a85b145ce787d06262b.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c |  2 +-
 block/qcow2.c       | 16 ++++++++--------
 block/qcow2.h       |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 3b55f39afb..6f17a28635 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -142,7 +142,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
     return c;
 }
 
-int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_destroy(Qcow2Cache *c)
 {
     int i;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 80774aacf6..38e7fdc4b0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1044,10 +1044,10 @@ static void qcow2_update_options_commit(BlockDriverState *bs,
     int i;
 
     if (s->l2_table_cache) {
-        qcow2_cache_destroy(bs, s->l2_table_cache);
+        qcow2_cache_destroy(s->l2_table_cache);
     }
     if (s->refcount_block_cache) {
-        qcow2_cache_destroy(bs, s->refcount_block_cache);
+        qcow2_cache_destroy(s->refcount_block_cache);
     }
     s->l2_table_cache = r->l2_table_cache;
     s->refcount_block_cache = r->refcount_block_cache;
@@ -1073,10 +1073,10 @@ static void qcow2_update_options_abort(BlockDriverState *bs,
                                        Qcow2ReopenState *r)
 {
     if (r->l2_table_cache) {
-        qcow2_cache_destroy(bs, r->l2_table_cache);
+        qcow2_cache_destroy(r->l2_table_cache);
     }
     if (r->refcount_block_cache) {
-        qcow2_cache_destroy(bs, r->refcount_block_cache);
+        qcow2_cache_destroy(r->refcount_block_cache);
     }
     qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
 }
@@ -1514,10 +1514,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
     s->l1_table = NULL;
     cache_clean_timer_del(bs);
     if (s->l2_table_cache) {
-        qcow2_cache_destroy(bs, s->l2_table_cache);
+        qcow2_cache_destroy(s->l2_table_cache);
     }
     if (s->refcount_block_cache) {
-        qcow2_cache_destroy(bs, s->refcount_block_cache);
+        qcow2_cache_destroy(s->refcount_block_cache);
     }
     qcrypto_block_free(s->crypto);
     qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
@@ -2065,8 +2065,8 @@ static void qcow2_close(BlockDriverState *bs)
     }
 
     cache_clean_timer_del(bs);
-    qcow2_cache_destroy(bs, s->l2_table_cache);
-    qcow2_cache_destroy(bs, s->refcount_block_cache);
+    qcow2_cache_destroy(s->l2_table_cache);
+    qcow2_cache_destroy(s->refcount_block_cache);
 
     qcrypto_block_free(s->crypto);
     s->crypto = NULL;
diff --git a/block/qcow2.h b/block/qcow2.h
index 606dcdd47f..9936cd0700 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -637,7 +637,7 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 
 /* qcow2-cache.c functions */
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
+int qcow2_cache_destroy(Qcow2Cache *c);
 
 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);

From b2f68bffab1a0ec5fb5c913b22d8a75b24d7945d Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:09 +0200
Subject: [PATCH 25/55] qcow2: Remove BDS parameter from
 qcow2_cache_clean_unused()

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_table_release(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: b74f17591af52f201de0ea3a3b2dd0a81932334d.1517840876.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c | 2 +-
 block/qcow2.c       | 4 ++--
 block/qcow2.h       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 6f17a28635..03b3e03c6c 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -93,7 +93,7 @@ static inline bool can_clean_entry(Qcow2Cache *c, int i)
         t->lru_counter <= c->cache_clean_lru_counter;
 }
 
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+void qcow2_cache_clean_unused(Qcow2Cache *c)
 {
     int i = 0;
     while (i < c->size) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 38e7fdc4b0..461cc7ab1c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -706,8 +706,8 @@ static void cache_clean_timer_cb(void *opaque)
 {
     BlockDriverState *bs = opaque;
     BDRVQcow2State *s = bs->opaque;
-    qcow2_cache_clean_unused(bs, s->l2_table_cache);
-    qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+    qcow2_cache_clean_unused(s->l2_table_cache);
+    qcow2_cache_clean_unused(s->refcount_block_cache);
     timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
               (int64_t) s->cache_clean_interval * 1000);
 }
diff --git a/block/qcow2.h b/block/qcow2.h
index 9936cd0700..867f2e8828 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -646,7 +646,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
     Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);
 
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
+void qcow2_cache_clean_unused(Qcow2Cache *c);
 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
 
 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,

From 77aadd7bedbd4455fb824d6426d11043fe921d85 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:10 +0200
Subject: [PATCH 26/55] qcow2: Remove BDS parameter from qcow2_cache_discard()

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_idx() and qcow2_cache_table_release(). This
is no longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 9724f7e38e763ad3be32627c6b7fe8df9edb1476.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c    | 2 +-
 block/qcow2-refcount.c | 6 +++---
 block/qcow2.h          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 03b3e03c6c..8d0b65e671 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -441,7 +441,7 @@ void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
     return NULL;
 }
 
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
+void qcow2_cache_discard(Qcow2Cache *c, void *table)
 {
     int i = qcow2_cache_get_table_idx(c, table);
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 65af05dd23..361b39d5cc 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -875,12 +875,12 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                                                 offset);
             if (table != NULL) {
                 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
-                qcow2_cache_discard(bs, s->refcount_block_cache, table);
+                qcow2_cache_discard(s->refcount_block_cache, table);
             }
 
             table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset);
             if (table != NULL) {
-                qcow2_cache_discard(bs, s->l2_table_cache, table);
+                qcow2_cache_discard(s->l2_table_cache, table);
             }
 
             if (s->discard_passthrough[type]) {
@@ -3190,7 +3190,7 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs,
                                            discard_block_offs);
     if (refblock) {
         /* discard refblock from the cache if refblock is cached */
-        qcow2_cache_discard(bs, s->refcount_block_cache, refblock);
+        qcow2_cache_discard(s->refcount_block_cache, refblock);
     }
     update_refcount_discard(bs, discard_block_offs, s->cluster_size);
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 867f2e8828..7296784353 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -656,7 +656,7 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
 void qcow2_cache_put(Qcow2Cache *c, void **table);
 void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
                                   uint64_t offset);
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table);
+void qcow2_cache_discard(Qcow2Cache *c, void *table);
 
 /* qcow2-bitmap.c functions */
 int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,

From 6e6fa7605e8a7c08d873dba9e1688f93aac20fc5 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:11 +0200
Subject: [PATCH 27/55] qcow2: Remove BDS parameter from
 qcow2_cache_is_table_offset()

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_addr(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: eb0ed90affcf302e5a954bafb5931b5215483d3a.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c    | 3 +--
 block/qcow2-refcount.c | 6 +++---
 block/qcow2.h          | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 8d0b65e671..b1aa42477e 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -428,8 +428,7 @@ void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
     c->entries[i].dirty = true;
 }
 
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
-                                  uint64_t offset)
+void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
 {
     int i;
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 361b39d5cc..9df380d52c 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -871,14 +871,14 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         if (refcount == 0) {
             void *table;
 
-            table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
+            table = qcow2_cache_is_table_offset(s->refcount_block_cache,
                                                 offset);
             if (table != NULL) {
                 qcow2_cache_put(s->refcount_block_cache, &refcount_block);
                 qcow2_cache_discard(s->refcount_block_cache, table);
             }
 
-            table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset);
+            table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
             if (table != NULL) {
                 qcow2_cache_discard(s->l2_table_cache, table);
             }
@@ -3186,7 +3186,7 @@ static int qcow2_discard_refcount_block(BlockDriverState *bs,
         s->free_cluster_index = cluster_index;
     }
 
-    refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
+    refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
                                            discard_block_offs);
     if (refblock) {
         /* discard refblock from the cache if refblock is cached */
diff --git a/block/qcow2.h b/block/qcow2.h
index 7296784353..edc5d8d57d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -654,8 +654,7 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
     void **table);
 void qcow2_cache_put(Qcow2Cache *c, void **table);
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
-                                  uint64_t offset);
+void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset);
 void qcow2_cache_discard(Qcow2Cache *c, void *table);
 
 /* qcow2-bitmap.c functions */

From 05b5b6ee54cf69e7053c73286aac354be754235e Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:12 +0200
Subject: [PATCH 28/55] qcow2: Add offset_to_l1_index()

Similar to offset_to_l2_index(), this function returns the index in
the L1 table for a given guest offset. This is only used in a couple
of places and it's not a particularly complex calculation, but it
makes the code a bit more readable.

Although in the qcow2_get_cluster_offset() case the old code was
taking advantage of the l1_bits variable, we're going to get rid of
the other uses of l1_bits in a later patch anyway, so it doesn't make
sense to keep it just for this.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: a5f626fed526b7459a0425fad06d823d18df8522.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 4 ++--
 block/qcow2.h         | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ab5715c97e..ce7591dc3d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -540,7 +540,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 
     /* seek to the l2 offset in the l1 table */
 
-    l1_index = offset >> l1_bits;
+    l1_index = offset_to_l1_index(s, offset);
     if (l1_index >= s->l1_size) {
         type = QCOW2_CLUSTER_UNALLOCATED;
         goto out;
@@ -663,7 +663,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
 
     /* seek to the l2 offset in the l1 table */
 
-    l1_index = offset >> (s->l2_bits + s->cluster_bits);
+    l1_index = offset_to_l1_index(s, offset);
     if (l1_index >= s->l1_size) {
         ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
         if (ret < 0) {
diff --git a/block/qcow2.h b/block/qcow2.h
index edc5d8d57d..d9ba57c030 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -463,6 +463,11 @@ static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
     return (size + (1ULL << shift) - 1) >> shift;
 }
 
+static inline int offset_to_l1_index(BDRVQcow2State *s, uint64_t offset)
+{
+    return offset >> (s->l2_bits + s->cluster_bits);
+}
+
 static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
 {
     return (offset >> s->cluster_bits) & (s->l2_size - 1);

From 3c2e511a24b51c5bfc70d365b54ad8c612d3b07c Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:13 +0200
Subject: [PATCH 29/55] qcow2: Add l2_slice_size field to BDRVQcow2State

The BDRVQcow2State structure contains an l2_size field, which stores
the number of 64-bit entries in an L2 table.

For efficiency reasons we want to be able to load slices instead of
full L2 tables, so we need to know how many entries an L2 slice can
hold.

An L2 slice is the portion of an L2 table that is loaded by the qcow2
cache. At the moment that cache can only load complete tables,
therefore an L2 slice has the same size as an L2 table (one cluster)
and l2_size == l2_slice_size.

Later we'll allow smaller slices, but until then we have to use this
new l2_slice_size field to make the rest of the code ready for that.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: adb048595f9fb5dfb110c802a8b3c3be3b937f37.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.c | 3 +++
 block/qcow2.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index 461cc7ab1c..ba8d71c72d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -807,6 +807,7 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
 typedef struct Qcow2ReopenState {
     Qcow2Cache *l2_table_cache;
     Qcow2Cache *refcount_block_cache;
+    int l2_slice_size; /* Number of entries in a slice of the L2 table */
     bool use_lazy_refcounts;
     int overlap_check;
     bool discard_passthrough[QCOW2_DISCARD_MAX];
@@ -888,6 +889,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
         }
     }
 
+    r->l2_slice_size = s->cluster_size / sizeof(uint64_t);
     r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
     r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
     if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
@@ -1051,6 +1053,7 @@ static void qcow2_update_options_commit(BlockDriverState *bs,
     }
     s->l2_table_cache = r->l2_table_cache;
     s->refcount_block_cache = r->refcount_block_cache;
+    s->l2_slice_size = r->l2_slice_size;
 
     s->overlap_check = r->overlap_check;
     s->use_lazy_refcounts = r->use_lazy_refcounts;
diff --git a/block/qcow2.h b/block/qcow2.h
index d9ba57c030..45fb7f4299 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -251,6 +251,7 @@ typedef struct BDRVQcow2State {
     int cluster_bits;
     int cluster_size;
     int cluster_sectors;
+    int l2_slice_size;
     int l2_bits;
     int l2_size;
     int l1_size;

From 8f8181757771f252106efe2d6e833aabf0b64d01 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:14 +0200
Subject: [PATCH 30/55] qcow2: Add offset_to_l2_slice_index()

Similar to offset_to_l2_index(), this function takes a guest offset
and returns the index in the L2 slice that contains its L2 entry.

An L2 slice has currently the same size as an L2 table (one cluster),
so both functions return the same value for now.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: a1c45c5c5a76146dd1712d8d1e7b409ad539c718.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/block/qcow2.h b/block/qcow2.h
index 45fb7f4299..d956a6cdd2 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -474,6 +474,11 @@ static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
     return (offset >> s->cluster_bits) & (s->l2_size - 1);
 }
 
+static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset)
+{
+    return (offset >> s->cluster_bits) & (s->l2_slice_size - 1);
+}
+
 static inline int64_t align_offset(int64_t offset, int n)
 {
     offset = (offset + n - 1) & ~(n - 1);

From e2b5713eb9a8545e00eaeb5350069ef40c6cc49a Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:15 +0200
Subject: [PATCH 31/55] qcow2: Update l2_load() to support L2 slices

Each entry in the qcow2 L2 cache stores a full L2 table (which uses a
complete cluster in the qcow2 image). A cluster is usually too large
to be used efficiently as the size for a cache entry, so we want to
decouple both values by allowing smaller cache entries. Therefore the
qcow2 L2 cache will no longer return full L2 tables but slices
instead.

This patch updates l2_load() so it can handle L2 slices correctly.
Apart from the offset of the L2 table (which we already had) we also
need the guest offset in order to calculate which one of the slices
we need.

An L2 slice has currently the same size as an L2 table (one cluster),
so for now this function will load exactly the same data as before.

This patch also removes a stale comment about the return value being
a pointer to the L2 table. This function returns an error code since
55c17e9821c474d5fcdebdc82ed2fc096777d611.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: b830aa1fc5b6f8e3cb331d006853fe22facca847.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ce7591dc3d..542080aed0 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -195,20 +195,26 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 /*
  * l2_load
  *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
+ * @bs: The BlockDriverState
+ * @offset: A guest offset, used to calculate what slice of the L2
+ *          table to load.
+ * @l2_offset: Offset to the L2 table in the image file.
+ * @l2_slice: Location to store the pointer to the L2 slice.
  *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
+ * Loads a L2 slice into memory (L2 slices are the parts of L2 tables
+ * that are loaded by the qcow2 cache). If the slice is in the cache,
+ * the cache is used; otherwise the L2 slice is loaded from the image
+ * file.
  */
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
-    uint64_t **l2_table)
+static int l2_load(BlockDriverState *bs, uint64_t offset,
+                   uint64_t l2_offset, uint64_t **l2_slice)
 {
     BDRVQcow2State *s = bs->opaque;
+    int start_of_slice = sizeof(uint64_t) *
+        (offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset));
 
-    return qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                           (void **)l2_table);
+    return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice,
+                           (void **)l2_slice);
 }
 
 /*
@@ -561,7 +567,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 
     /* load the l2 table in memory */
 
-    ret = l2_load(bs, l2_offset, &l2_table);
+    ret = l2_load(bs, offset, l2_offset, &l2_table);
     if (ret < 0) {
         return ret;
     }
@@ -684,7 +690,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
 
     if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
         /* load the l2 table in memory */
-        ret = l2_load(bs, l2_offset, &l2_table);
+        ret = l2_load(bs, offset, l2_offset, &l2_table);
         if (ret < 0) {
             return ret;
         }

From 6580bb09ab154143461e9ddc956dee3d81018660 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:16 +0200
Subject: [PATCH 32/55] qcow2: Prepare l2_allocate() for adding L2 slice
 support

Adding support for L2 slices to l2_allocate() needs (among other
things) an extra loop that iterates over all slices of a new L2 table.

Putting all changes in one patch would make it hard to read because
all semantic changes would be mixed with pure indentation changes.

To make things easier this patch simply creates a new block and
changes the indentation of all lines of code inside it. Thus, all
modifications in this patch are cosmetic. There are no semantic
changes and no variables are renamed yet. The next patch will take
care of that.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: d0d7dca8520db304524f52f49d8157595a707a35.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 55 +++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 542080aed0..3f7272441d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -299,38 +299,43 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     /* allocate a new entry in the l2 cache */
 
     trace_qcow2_l2_allocate_get_empty(bs, l1_index);
-    ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    l2_table = *table;
-
-    if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
-        /* if there was no old l2 table, clear the new table */
-        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-    } else {
-        uint64_t* old_table;
-
-        /* if there was an old l2 table, read it from the disk */
-        BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-        ret = qcow2_cache_get(bs, s->l2_table_cache,
-            old_l2_offset & L1E_OFFSET_MASK,
-            (void**) &old_table);
+    {
+        ret = qcow2_cache_get_empty(bs, s->l2_table_cache,
+                                    l2_offset,
+                                    (void **) table);
         if (ret < 0) {
             goto fail;
         }
 
-        memcpy(l2_table, old_table, s->cluster_size);
+        l2_table = *table;
 
-        qcow2_cache_put(s->l2_table_cache, (void **) &old_table);
+        if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
+            /* if there was no old l2 table, clear the new table */
+            memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+        } else {
+            uint64_t *old_table;
+
+            /* if there was an old l2 table, read it from the disk */
+            BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+            ret = qcow2_cache_get(bs, s->l2_table_cache,
+                                  old_l2_offset & L1E_OFFSET_MASK,
+                                  (void **) &old_table);
+            if (ret < 0) {
+                goto fail;
+            }
+
+            memcpy(l2_table, old_table, s->cluster_size);
+
+            qcow2_cache_put(s->l2_table_cache, (void **) &old_table);
+        }
+
+        /* write the l2 table to the file */
+        BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+
+        trace_qcow2_l2_allocate_write_l2(bs, l1_index);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     }
 
-    /* write the l2 table to the file */
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
-    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
         goto fail;

From 3861946a5b423b71e677e79cc32c528174bdf9df Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:17 +0200
Subject: [PATCH 33/55] qcow2: Update l2_allocate() to support L2 slices

This patch updates l2_allocate() to support the qcow2 cache returning
L2 slices instead of full L2 tables.

The old code simply gets an L2 table from the cache and initializes it
with zeroes or with the contents of an existing table. With a cache
that returns slices instead of tables the idea remains the same, but
the code must now iterate over all the slices that are contained in an
L2 table.

Since now we're operating with slices the function can no longer
return the newly-allocated table, so it's up to the caller to retrieve
the appropriate L2 slice after calling l2_allocate() (note that with
this patch the caller is still loading full L2 tables, but we'll deal
with that in a separate patch).

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-id: 20fc0415bf0e011e29f6487ec86eb06a11f37445.1517840877.git.berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 56 ++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 3f7272441d..4aa9ea7b29 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -263,11 +263,12 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
  *
  */
 
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+static int l2_allocate(BlockDriverState *bs, int l1_index)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t old_l2_offset;
-    uint64_t *l2_table = NULL;
+    uint64_t *l2_slice = NULL;
+    unsigned slice, slice_size2, n_slices;
     int64_t l2_offset;
     int ret;
 
@@ -298,42 +299,45 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
     /* allocate a new entry in the l2 cache */
 
+    slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+    n_slices = s->cluster_size / slice_size2;
+
     trace_qcow2_l2_allocate_get_empty(bs, l1_index);
-    {
+    for (slice = 0; slice < n_slices; slice++) {
         ret = qcow2_cache_get_empty(bs, s->l2_table_cache,
-                                    l2_offset,
-                                    (void **) table);
+                                    l2_offset + slice * slice_size2,
+                                    (void **) &l2_slice);
         if (ret < 0) {
             goto fail;
         }
 
-        l2_table = *table;
-
         if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
-            /* if there was no old l2 table, clear the new table */
-            memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+            /* if there was no old l2 table, clear the new slice */
+            memset(l2_slice, 0, slice_size2);
         } else {
-            uint64_t *old_table;
+            uint64_t *old_slice;
+            uint64_t old_l2_slice_offset =
+                (old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2;
 
-            /* if there was an old l2 table, read it from the disk */
+            /* if there was an old l2 table, read a slice from the disk */
             BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-            ret = qcow2_cache_get(bs, s->l2_table_cache,
-                                  old_l2_offset & L1E_OFFSET_MASK,
-                                  (void **) &old_table);
+            ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset,
+                                  (void **) &old_slice);
             if (ret < 0) {
                 goto fail;
             }
 
-            memcpy(l2_table, old_table, s->cluster_size);
+            memcpy(l2_slice, old_slice, slice_size2);
 
-            qcow2_cache_put(s->l2_table_cache, (void **) &old_table);
+            qcow2_cache_put(s->l2_table_cache, (void **) &old_slice);
         }
 
-        /* write the l2 table to the file */
+        /* write the l2 slice to the file */
         BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
         trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
     }
 
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
@@ -349,14 +353,13 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
         goto fail;
     }
 
-    *table = l2_table;
     trace_qcow2_l2_allocate_done(bs, l1_index, 0);
     return 0;
 
 fail:
     trace_qcow2_l2_allocate_done(bs, l1_index, ret);
-    if (l2_table != NULL) {
-        qcow2_cache_put(s->l2_table_cache, (void **) table);
+    if (l2_slice != NULL) {
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
     }
     s->l1_table[l1_index] = old_l2_offset;
     if (l2_offset > 0) {
@@ -701,7 +704,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
         }
     } else {
         /* First allocate a new L2 table (and do COW if needed) */
-        ret = l2_allocate(bs, l1_index, &l2_table);
+        ret = l2_allocate(bs, l1_index);
         if (ret < 0) {
             return ret;
         }
@@ -711,6 +714,15 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
             qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
                                 QCOW2_DISCARD_OTHER);
         }
+
+        /* Get the offset of the newly-allocated l2 table */
+        l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+        assert(offset_into_cluster(s, l2_offset) == 0);
+        /* Load the l2 table in memory */
+        ret = l2_load(bs, offset, l2_offset, &l2_table);
+        if (ret < 0) {
+            return ret;
+        }
     }
 
     /* find the cluster offset for the given disk offset */

From 05f9ee4689b71e726c46e7477020ac4bf4814109 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:18 +0200
Subject: [PATCH 34/55] qcow2: Refactor get_cluster_table()

After the previous patch we're now always using l2_load() in
get_cluster_table() regardless of whether a new L2 table has to be
allocated or not.

This patch refactors that part of the code to use one single l2_load()
call.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: ce31758c4a1fadccea7a6ccb93951eb01d95fd4c.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4aa9ea7b29..044eccda72 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -694,15 +694,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
         return -EIO;
     }
 
-    /* seek the l2 table of the given l2 offset */
-
-    if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
-        /* load the l2 table in memory */
-        ret = l2_load(bs, offset, l2_offset, &l2_table);
-        if (ret < 0) {
-            return ret;
-        }
-    } else {
+    if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) {
         /* First allocate a new L2 table (and do COW if needed) */
         ret = l2_allocate(bs, l1_index);
         if (ret < 0) {
@@ -718,11 +710,12 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
         /* Get the offset of the newly-allocated l2 table */
         l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
         assert(offset_into_cluster(s, l2_offset) == 0);
-        /* Load the l2 table in memory */
-        ret = l2_load(bs, offset, l2_offset, &l2_table);
-        if (ret < 0) {
-            return ret;
-        }
+    }
+
+    /* load the l2 table in memory */
+    ret = l2_load(bs, offset, l2_offset, &l2_table);
+    if (ret < 0) {
+        return ret;
     }
 
     /* find the cluster offset for the given disk offset */

From c03bfc5bba594a961ddc3c755fd4c0e45c41bae4 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:19 +0200
Subject: [PATCH 35/55] qcow2: Update get_cluster_table() to support L2 slices

This patch updates get_cluster_table() to return L2 slices instead of
full L2 tables.

The code itself needs almost no changes, it only needs to call
offset_to_l2_slice_index() instead of offset_to_l2_index(). This patch
also renames all the relevant variables and the documentation.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 64cf064c0021ba315d3f3032da0f95db1b615f33.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 044eccda72..cd1ba40f74 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -659,20 +659,20 @@ fail:
  * get_cluster_table
  *
  * for a given disk offset, load (and allocate if needed)
- * the l2 table.
+ * the appropriate slice of its l2 table.
  *
- * the cluster index in the l2 table is given to the caller.
+ * the cluster index in the l2 slice is given to the caller.
  *
  * Returns 0 on success, -errno in failure case
  */
 static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
-                             uint64_t **new_l2_table,
+                             uint64_t **new_l2_slice,
                              int *new_l2_index)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
     uint64_t l1_index, l2_offset;
-    uint64_t *l2_table = NULL;
+    uint64_t *l2_slice = NULL;
     int ret;
 
     /* seek to the l2 offset in the l1 table */
@@ -712,17 +712,17 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
         assert(offset_into_cluster(s, l2_offset) == 0);
     }
 
-    /* load the l2 table in memory */
-    ret = l2_load(bs, offset, l2_offset, &l2_table);
+    /* load the l2 slice in memory */
+    ret = l2_load(bs, offset, l2_offset, &l2_slice);
     if (ret < 0) {
         return ret;
     }
 
     /* find the cluster offset for the given disk offset */
 
-    l2_index = offset_to_l2_index(s, offset);
+    l2_index = offset_to_l2_slice_index(s, offset);
 
-    *new_l2_table = l2_table;
+    *new_l2_slice = l2_slice;
     *new_l2_index = l2_index;
 
     return 0;

From fd630039c0478919434cd2f9a4b9a72f61312624 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:20 +0200
Subject: [PATCH 36/55] qcow2: Update qcow2_get_cluster_offset() to support L2
 slices

qcow2_get_cluster_offset() checks how many contiguous bytes are
available at a given offset. The returned number of bytes is limited
by the amount that can be addressed without having to load more than
one L2 table.

Since we'll be loading L2 slices instead of full tables this patch
changes the limit accordingly using the size of the L2 slice for the
calculations instead of the full table size.

One consequence of this is that with small L2 slices operations such
as 'qemu-img map' will need to iterate in more steps because each
qcow2_get_cluster_offset() call will potentially return a smaller
number. However the code is already prepared for that so this doesn't
break semantics.

The l2_table variable is also renamed to l2_slice to reflect this, and
offset_to_l2_index() is replaced with offset_to_l2_slice_index().

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 6b602260acb33da56ed6af9611731cb7acd110eb.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cd1ba40f74..e0eea73297 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -529,8 +529,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
-    uint64_t l1_index, l2_offset, *l2_table;
-    int l1_bits, c;
+    uint64_t l1_index, l2_offset, *l2_slice;
+    int c;
     unsigned int offset_in_cluster;
     uint64_t bytes_available, bytes_needed, nb_clusters;
     QCow2ClusterType type;
@@ -539,12 +539,12 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     offset_in_cluster = offset_into_cluster(s, offset);
     bytes_needed = (uint64_t) *bytes + offset_in_cluster;
 
-    l1_bits = s->l2_bits + s->cluster_bits;
-
     /* compute how many bytes there are between the start of the cluster
-     * containing offset and the end of the l1 entry */
-    bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
-                    + offset_in_cluster;
+     * containing offset and the end of the l2 slice that contains
+     * the entry pointing to it */
+    bytes_available =
+        ((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset)))
+        << s->cluster_bits;
 
     if (bytes_needed > bytes_available) {
         bytes_needed = bytes_available;
@@ -573,17 +573,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
         return -EIO;
     }
 
-    /* load the l2 table in memory */
+    /* load the l2 slice in memory */
 
-    ret = l2_load(bs, offset, l2_offset, &l2_table);
+    ret = l2_load(bs, offset, l2_offset, &l2_slice);
     if (ret < 0) {
         return ret;
     }
 
     /* find the cluster offset for the given disk offset */
 
-    l2_index = offset_to_l2_index(s, offset);
-    *cluster_offset = be64_to_cpu(l2_table[l2_index]);
+    l2_index = offset_to_l2_slice_index(s, offset);
+    *cluster_offset = be64_to_cpu(l2_slice[l2_index]);
 
     nb_clusters = size_to_clusters(s, bytes_needed);
     /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
@@ -610,14 +610,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     case QCOW2_CLUSTER_UNALLOCATED:
         /* how many empty clusters ? */
         c = count_contiguous_clusters_unallocated(nb_clusters,
-                                                  &l2_table[l2_index], type);
+                                                  &l2_slice[l2_index], type);
         *cluster_offset = 0;
         break;
     case QCOW2_CLUSTER_ZERO_ALLOC:
     case QCOW2_CLUSTER_NORMAL:
         /* how many allocated clusters ? */
         c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index], QCOW_OFLAG_ZERO);
+                                      &l2_slice[l2_index], QCOW_OFLAG_ZERO);
         *cluster_offset &= L2E_OFFSET_MASK;
         if (offset_into_cluster(s, *cluster_offset)) {
             qcow2_signal_corruption(bs, true, -1, -1,
@@ -633,7 +633,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
         abort();
     }
 
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     bytes_available = (int64_t)c * s->cluster_size;
 
@@ -651,7 +651,7 @@ out:
     return type;
 
 fail:
-    qcow2_cache_put(s->l2_table_cache, (void **)&l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **)&l2_slice);
     return ret;
 }
 

From a002c0b09d3c48484225b268d0d3df8615d7f56c Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:21 +0200
Subject: [PATCH 37/55] qcow2: Update qcow2_alloc_cluster_link_l2() to support
 L2 slices

There's a loop in this function that iterates over the L2 entries in a
table, so now we need to assert that it remains within the limits of
an L2 slice.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: f9846a1c2efc51938e877e2a25852d9ab14797ff.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e0eea73297..a01abb3b18 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -922,7 +922,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
 {
     BDRVQcow2State *s = bs->opaque;
     int i, j = 0, l2_index, ret;
-    uint64_t *old_cluster, *l2_table;
+    uint64_t *old_cluster, *l2_slice;
     uint64_t cluster_offset = m->alloc_offset;
 
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
@@ -949,13 +949,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
                                    s->refcount_block_cache);
     }
 
-    ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
+    ret = get_cluster_table(bs, m->offset, &l2_slice, &l2_index);
     if (ret < 0) {
         goto err;
     }
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
 
-    assert(l2_index + m->nb_clusters <= s->l2_size);
+    assert(l2_index + m->nb_clusters <= s->l2_slice_size);
     for (i = 0; i < m->nb_clusters; i++) {
         /* if two concurrent writes happen to the same unallocated cluster
          * each write allocates separate cluster and writes data concurrently.
@@ -963,16 +963,16 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
          * cluster the second one has to do RMW (which is done above by
          * perform_cow()), update l2 table with its cluster pointer and free
          * old cluster. This is what this loop does */
-        if (l2_table[l2_index + i] != 0) {
-            old_cluster[j++] = l2_table[l2_index + i];
+        if (l2_slice[l2_index + i] != 0) {
+            old_cluster[j++] = l2_slice[l2_index + i];
         }
 
-        l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+        l2_slice[l2_index + i] = cpu_to_be64((cluster_offset +
                     (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
      }
 
 
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     /*
      * If this was a COW, we need to decrease the refcount of the old cluster.

From cde917662a71bea9a42ae00c23235384df654804 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:22 +0200
Subject: [PATCH 38/55] qcow2: Update handle_copied() to support L2 slices

handle_copied() loads an L2 table and limits the number of checked
clusters to the amount that fits inside that table. Since we'll be
loading L2 slices instead of full tables we need to update that limit.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 541ac001a7d6b86bab2392554bee53c2b312148c.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index a01abb3b18..7699544358 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1119,7 +1119,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
     BDRVQcow2State *s = bs->opaque;
     int l2_index;
     uint64_t cluster_offset;
-    uint64_t *l2_table;
+    uint64_t *l2_slice;
     uint64_t nb_clusters;
     unsigned int keep_clusters;
     int ret;
@@ -1131,23 +1131,23 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
                                 == offset_into_cluster(s, *host_offset));
 
     /*
-     * Calculate the number of clusters to look for. We stop at L2 table
+     * Calculate the number of clusters to look for. We stop at L2 slice
      * boundaries to keep things simple.
      */
     nb_clusters =
         size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
 
-    l2_index = offset_to_l2_index(s, guest_offset);
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    l2_index = offset_to_l2_slice_index(s, guest_offset);
+    nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
     assert(nb_clusters <= INT_MAX);
 
     /* Find L2 entry for the first involved cluster */
-    ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+    ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+    cluster_offset = be64_to_cpu(l2_slice[l2_index]);
 
     /* Check how many clusters are already allocated and don't need COW */
     if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
@@ -1175,7 +1175,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
         /* We keep all QCOW_OFLAG_COPIED clusters */
         keep_clusters =
             count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index],
+                                      &l2_slice[l2_index],
                                       QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
         assert(keep_clusters <= nb_clusters);
 
@@ -1190,7 +1190,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
 
     /* Cleanup */
 out:
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     /* Only return a host offset if we actually made progress. Otherwise we
      * would make requirements for handle_alloc() that it can't fulfill */

From 6d99a344473b2c5e2906489e3587b952d9b84a83 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:23 +0200
Subject: [PATCH 39/55] qcow2: Update handle_alloc() to support L2 slices

handle_alloc() loads an L2 table and limits the number of checked
clusters to the amount that fits inside that table. Since we'll be
loading L2 slices instead of full tables we need to update that limit.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: b243299c7136f7014c5af51665431ddbf5e99afd.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 7699544358..dbd57cd35f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1274,7 +1274,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
 {
     BDRVQcow2State *s = bs->opaque;
     int l2_index;
-    uint64_t *l2_table;
+    uint64_t *l2_slice;
     uint64_t entry;
     uint64_t nb_clusters;
     int ret;
@@ -1287,29 +1287,29 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     assert(*bytes > 0);
 
     /*
-     * Calculate the number of clusters to look for. We stop at L2 table
+     * Calculate the number of clusters to look for. We stop at L2 slice
      * boundaries to keep things simple.
      */
     nb_clusters =
         size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
 
-    l2_index = offset_to_l2_index(s, guest_offset);
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    l2_index = offset_to_l2_slice_index(s, guest_offset);
+    nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
     assert(nb_clusters <= INT_MAX);
 
     /* Find L2 entry for the first involved cluster */
-    ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+    ret = get_cluster_table(bs, guest_offset, &l2_slice, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
-    entry = be64_to_cpu(l2_table[l2_index]);
+    entry = be64_to_cpu(l2_slice[l2_index]);
 
     /* For the moment, overwrite compressed clusters one by one */
     if (entry & QCOW_OFLAG_COMPRESSED) {
         nb_clusters = 1;
     } else {
-        nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+        nb_clusters = count_cow_clusters(s, nb_clusters, l2_slice, l2_index);
     }
 
     /* This function is only called when there were no non-COW clusters, so if
@@ -1338,7 +1338,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
          * nb_clusters already to a range of COW clusters */
         preallocated_nb_clusters =
             count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index], QCOW_OFLAG_COPIED);
+                                      &l2_slice[l2_index], QCOW_OFLAG_COPIED);
         assert(preallocated_nb_clusters > 0);
 
         nb_clusters = preallocated_nb_clusters;
@@ -1349,7 +1349,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
         keep_old_clusters = true;
     }
 
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     if (!alloc_cluster_offset) {
         /* Allocate, if necessary at a given offset in the image file */

From 21ab3add9c9564270a64c92fff448b3333b6d328 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:24 +0200
Subject: [PATCH 40/55] qcow2: Update discard_single_l2() to support L2 slices

discard_single_l2() limits the number of clusters to be discarded
to the amount that fits inside an L2 table. Since we'll be loading
L2 slices instead of full tables we need to update that limit. The
function is renamed to discard_in_l2_slice() for clarity.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 1cb44a5b68be5334cb01b97a3db3a3c5a43396e5.1517840877.git.berto@igalia.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index dbd57cd35f..62336960a5 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1631,32 +1631,32 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 
 /*
  * This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
+ * all clusters in the same L2 slice) and returns the number of discarded
  * clusters.
  */
-static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-                             uint64_t nb_clusters, enum qcow2_discard_type type,
-                             bool full_discard)
+static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
+                               uint64_t nb_clusters,
+                               enum qcow2_discard_type type, bool full_discard)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table;
+    uint64_t *l2_slice;
     int l2_index;
     int ret;
     int i;
 
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+    ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
-    /* Limit nb_clusters to one L2 table */
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    /* Limit nb_clusters to one L2 slice */
+    nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
     assert(nb_clusters <= INT_MAX);
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t old_l2_entry;
 
-        old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+        old_l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
 
         /*
          * If full_discard is false, make sure that a discarded area reads back
@@ -1694,18 +1694,18 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         }
 
         /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
         if (!full_discard && s->qcow_version >= 3) {
-            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+            l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
         } else {
-            l2_table[l2_index + i] = cpu_to_be64(0);
+            l2_slice[l2_index + i] = cpu_to_be64(0);
         }
 
         /* Then decrease the refcount */
         qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
     }
 
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     return nb_clusters;
 }
@@ -1729,10 +1729,10 @@ int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
 
     s->cache_discards = true;
 
-    /* Each L2 table is handled by its own loop iteration */
+    /* Each L2 slice is handled by its own loop iteration */
     while (nb_clusters > 0) {
-        cleared = discard_single_l2(bs, offset, nb_clusters, type,
-                                    full_discard);
+        cleared = discard_in_l2_slice(bs, offset, nb_clusters, type,
+                                      full_discard);
         if (cleared < 0) {
             ret = cleared;
             goto fail;

From a9a9f8f0b6fae52296fe6cbcced9475aad485477 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:25 +0200
Subject: [PATCH 41/55] qcow2: Update zero_single_l2() to support L2 slices

zero_single_l2() limits the number of clusters to be zeroed to the
amount that fits inside an L2 table. Since we'll be loading L2 slices
instead of full tables we need to update that limit. The function is
renamed to zero_in_l2_slice() for clarity.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: ebc16e7e79fa6969d8975ef487d679794de4fbcc.1517840877.git.berto@igalia.com
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 62336960a5..f440105f6c 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1752,33 +1752,33 @@ fail:
 
 /*
  * This zeroes as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of zeroed
+ * all clusters in the same L2 slice) and returns the number of zeroed
  * clusters.
  */
-static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
-                          uint64_t nb_clusters, int flags)
+static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
+                            uint64_t nb_clusters, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table;
+    uint64_t *l2_slice;
     int l2_index;
     int ret;
     int i;
     bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);
 
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+    ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
-    /* Limit nb_clusters to one L2 table */
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    /* Limit nb_clusters to one L2 slice */
+    nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
     assert(nb_clusters <= INT_MAX);
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t old_offset;
         QCow2ClusterType cluster_type;
 
-        old_offset = be64_to_cpu(l2_table[l2_index + i]);
+        old_offset = be64_to_cpu(l2_slice[l2_index + i]);
 
         /*
          * Minimize L2 changes if the cluster already reads back as
@@ -1790,16 +1790,16 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
             continue;
         }
 
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
         if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
-            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+            l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
             qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
         } else {
-            l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
+            l2_slice[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
         }
     }
 
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     return nb_clusters;
 }
@@ -1823,13 +1823,13 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
         return -ENOTSUP;
     }
 
-    /* Each L2 table is handled by its own loop iteration */
+    /* Each L2 slice is handled by its own loop iteration */
     nb_clusters = size_to_clusters(s, bytes);
 
     s->cache_discards = true;
 
     while (nb_clusters > 0) {
-        cleared = zero_single_l2(bs, offset, nb_clusters, flags);
+        cleared = zero_in_l2_slice(bs, offset, nb_clusters, flags);
         if (cleared < 0) {
             ret = cleared;
             goto fail;

From ca62dd5c2b1897a187c841371fef88263639ce0d Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:26 +0200
Subject: [PATCH 42/55] qcow2: Prepare qcow2_update_snapshot_refcount() for
 adding L2 slice support

Adding support for L2 slices to qcow2_update_snapshot_refcount() needs
(among other things) an extra loop that iterates over all slices of
each L2 table.

Putting all changes in one patch would make it hard to read because
all semantic changes would be mixed with pure indentation changes.

To make things easier this patch simply creates a new block and
changes the indentation of all lines of code inside it. Thus, all
modifications in this patch are cosmetic. There are no semantic
changes and no variables are renamed yet. The next patch will take
care of that.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 8ffaa5e55bd51121f80e498f4045b64902a94293.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 142 +++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9df380d52c..dfa28301c4 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1236,91 +1236,97 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                 goto fail;
             }
 
-            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                (void**) &l2_table);
-            if (ret < 0) {
-                goto fail;
-            }
+            {
+                ret = qcow2_cache_get(bs, s->l2_table_cache,
+                                      l2_offset,
+                                      (void **) &l2_table);
+                if (ret < 0) {
+                    goto fail;
+                }
 
-            for (j = 0; j < s->l2_size; j++) {
-                uint64_t cluster_index;
-                uint64_t offset;
+                for (j = 0; j < s->l2_size; j++) {
+                    uint64_t cluster_index;
+                    uint64_t offset;
 
-                entry = be64_to_cpu(l2_table[j]);
-                old_entry = entry;
-                entry &= ~QCOW_OFLAG_COPIED;
-                offset = entry & L2E_OFFSET_MASK;
+                    entry = be64_to_cpu(l2_table[j]);
+                    old_entry = entry;
+                    entry &= ~QCOW_OFLAG_COPIED;
+                    offset = entry & L2E_OFFSET_MASK;
 
-                switch (qcow2_get_cluster_type(entry)) {
-                case QCOW2_CLUSTER_COMPRESSED:
-                    nb_csectors = ((entry >> s->csize_shift) &
-                                   s->csize_mask) + 1;
-                    if (addend != 0) {
-                        ret = update_refcount(bs,
-                                (entry & s->cluster_offset_mask) & ~511,
+                    switch (qcow2_get_cluster_type(entry)) {
+                    case QCOW2_CLUSTER_COMPRESSED:
+                        nb_csectors = ((entry >> s->csize_shift) &
+                                       s->csize_mask) + 1;
+                        if (addend != 0) {
+                            ret = update_refcount(
+                                bs, (entry & s->cluster_offset_mask) & ~511,
                                 nb_csectors * 512, abs(addend), addend < 0,
                                 QCOW2_DISCARD_SNAPSHOT);
+                            if (ret < 0) {
+                                goto fail;
+                            }
+                        }
+                        /* compressed clusters are never modified */
+                        refcount = 2;
+                        break;
+
+                    case QCOW2_CLUSTER_NORMAL:
+                    case QCOW2_CLUSTER_ZERO_ALLOC:
+                        if (offset_into_cluster(s, offset)) {
+                            qcow2_signal_corruption(
+                                bs, true, -1, -1, "Cluster "
+                                "allocation offset %#" PRIx64
+                                " unaligned (L2 offset: %#"
+                                PRIx64 ", L2 index: %#x)",
+                                offset, l2_offset, j);
+                            ret = -EIO;
+                            goto fail;
+                        }
+
+                        cluster_index = offset >> s->cluster_bits;
+                        assert(cluster_index);
+                        if (addend != 0) {
+                            ret = qcow2_update_cluster_refcount(
+                                bs, cluster_index, abs(addend), addend < 0,
+                                QCOW2_DISCARD_SNAPSHOT);
+                            if (ret < 0) {
+                                goto fail;
+                            }
+                        }
+
+                        ret = qcow2_get_refcount(bs, cluster_index, &refcount);
                         if (ret < 0) {
                             goto fail;
                         }
-                    }
-                    /* compressed clusters are never modified */
-                    refcount = 2;
-                    break;
+                        break;
 
-                case QCOW2_CLUSTER_NORMAL:
-                case QCOW2_CLUSTER_ZERO_ALLOC:
-                    if (offset_into_cluster(s, offset)) {
-                        qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
-                                                "allocation offset %#" PRIx64
-                                                " unaligned (L2 offset: %#"
-                                                PRIx64 ", L2 index: %#x)",
-                                                offset, l2_offset, j);
-                        ret = -EIO;
-                        goto fail;
+                    case QCOW2_CLUSTER_ZERO_PLAIN:
+                    case QCOW2_CLUSTER_UNALLOCATED:
+                        refcount = 0;
+                        break;
+
+                    default:
+                        abort();
                     }
 
-                    cluster_index = offset >> s->cluster_bits;
-                    assert(cluster_index);
-                    if (addend != 0) {
-                        ret = qcow2_update_cluster_refcount(bs,
-                                    cluster_index, abs(addend), addend < 0,
-                                    QCOW2_DISCARD_SNAPSHOT);
-                        if (ret < 0) {
-                            goto fail;
+                    if (refcount == 1) {
+                        entry |= QCOW_OFLAG_COPIED;
+                    }
+                    if (entry != old_entry) {
+                        if (addend > 0) {
+                            qcow2_cache_set_dependency(bs, s->l2_table_cache,
+                                                       s->refcount_block_cache);
                         }
+                        l2_table[j] = cpu_to_be64(entry);
+                        qcow2_cache_entry_mark_dirty(s->l2_table_cache,
+                                                     l2_table);
                     }
-
-                    ret = qcow2_get_refcount(bs, cluster_index, &refcount);
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                    break;
-
-                case QCOW2_CLUSTER_ZERO_PLAIN:
-                case QCOW2_CLUSTER_UNALLOCATED:
-                    refcount = 0;
-                    break;
-
-                default:
-                    abort();
                 }
 
-                if (refcount == 1) {
-                    entry |= QCOW_OFLAG_COPIED;
-                }
-                if (entry != old_entry) {
-                    if (addend > 0) {
-                        qcow2_cache_set_dependency(bs, s->l2_table_cache,
-                            s->refcount_block_cache);
-                    }
-                    l2_table[j] = cpu_to_be64(entry);
-                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-                }
+                qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+
             }
 
-            qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
-
             if (addend != 0) {
                 ret = qcow2_update_cluster_refcount(bs, l2_offset >>
                                                         s->cluster_bits,

From 83ad165be74420bc5bbf0fe4f62b6f53b1fb6004 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:27 +0200
Subject: [PATCH 43/55] qcow2: Update qcow2_update_snapshot_refcount() to
 support L2 slices

qcow2_update_snapshot_refcount() increases the refcount of all
clusters of a given snapshot. In order to do that it needs to load all
its L2 tables and iterate over their entries. Since we'll be loading
L2 slices instead of full tables we need to add an extra loop that
iterates over all slices of each L2 table.

This function doesn't need any additional changes so apart from that
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 5f4db199b9637f4833b58487135124d70add8cf0.1517840877.git.berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-refcount.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index dfa28301c4..d46b69d7f3 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1183,17 +1183,20 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     int64_t l1_table_offset, int l1_size, int addend)
 {
     BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
+    uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
     bool l1_allocated = false;
     int64_t old_entry, old_l2_offset;
+    unsigned slice, slice_size2, n_slices;
     int i, j, l1_modified = 0, nb_csectors;
     int ret;
 
     assert(addend >= -1 && addend <= 1);
 
-    l2_table = NULL;
+    l2_slice = NULL;
     l1_table = NULL;
     l1_size2 = l1_size * sizeof(uint64_t);
+    slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+    n_slices = s->cluster_size / slice_size2;
 
     s->cache_discards = true;
 
@@ -1236,19 +1239,19 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                 goto fail;
             }
 
-            {
+            for (slice = 0; slice < n_slices; slice++) {
                 ret = qcow2_cache_get(bs, s->l2_table_cache,
-                                      l2_offset,
-                                      (void **) &l2_table);
+                                      l2_offset + slice * slice_size2,
+                                      (void **) &l2_slice);
                 if (ret < 0) {
                     goto fail;
                 }
 
-                for (j = 0; j < s->l2_size; j++) {
+                for (j = 0; j < s->l2_slice_size; j++) {
                     uint64_t cluster_index;
                     uint64_t offset;
 
-                    entry = be64_to_cpu(l2_table[j]);
+                    entry = be64_to_cpu(l2_slice[j]);
                     old_entry = entry;
                     entry &= ~QCOW_OFLAG_COPIED;
                     offset = entry & L2E_OFFSET_MASK;
@@ -1273,12 +1276,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                     case QCOW2_CLUSTER_NORMAL:
                     case QCOW2_CLUSTER_ZERO_ALLOC:
                         if (offset_into_cluster(s, offset)) {
+                            /* Here l2_index means table (not slice) index */
+                            int l2_index = slice * s->l2_slice_size + j;
                             qcow2_signal_corruption(
                                 bs, true, -1, -1, "Cluster "
                                 "allocation offset %#" PRIx64
                                 " unaligned (L2 offset: %#"
                                 PRIx64 ", L2 index: %#x)",
-                                offset, l2_offset, j);
+                                offset, l2_offset, l2_index);
                             ret = -EIO;
                             goto fail;
                         }
@@ -1317,14 +1322,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                             qcow2_cache_set_dependency(bs, s->l2_table_cache,
                                                        s->refcount_block_cache);
                         }
-                        l2_table[j] = cpu_to_be64(entry);
+                        l2_slice[j] = cpu_to_be64(entry);
                         qcow2_cache_entry_mark_dirty(s->l2_table_cache,
-                                                     l2_table);
+                                                     l2_slice);
                     }
                 }
 
-                qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
-
+                qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
             }
 
             if (addend != 0) {
@@ -1352,8 +1356,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
     ret = bdrv_flush(bs);
 fail:
-    if (l2_table) {
-        qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    if (l2_slice) {
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
     }
 
     s->cache_discards = false;

From 9b765486b7796bb775987dfbf5b99b9fdbc5e8ea Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:28 +0200
Subject: [PATCH 44/55] qcow2: Read refcount before L2 table in
 expand_zero_clusters_in_l1()

At the moment it doesn't really make a difference whether we call
qcow2_get_refcount() before of after reading the L2 table, but if we
want to support L2 slices we'll need to read the refcount first.

This patch simply changes the order of those two operations to prepare
for that. The patch with the actual semantic changes will be easier to
read because of this.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 947a91d934053a2dbfef979aeb9568f57ef57c5d.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f440105f6c..1041dcb6dd 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1898,6 +1898,12 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
             goto fail;
         }
 
+        ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
+                                 &l2_refcount);
+        if (ret < 0) {
+            goto fail;
+        }
+
         if (is_active_l1) {
             /* get active L2 tables from cache */
             ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
@@ -1911,12 +1917,6 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
             goto fail;
         }
 
-        ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
-                                 &l2_refcount);
-        if (ret < 0) {
-            goto fail;
-        }
-
         for (j = 0; j < s->l2_size; j++) {
             uint64_t l2_entry = be64_to_cpu(l2_table[j]);
             int64_t offset = l2_entry & L2E_OFFSET_MASK;

From 226494ff69d3d0563d00c8dbab30e740191d4e93 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:29 +0200
Subject: [PATCH 45/55] qcow2: Prepare expand_zero_clusters_in_l1() for adding
 L2 slice support

Adding support for L2 slices to expand_zero_clusters_in_l1() needs
(among other things) an extra loop that iterates over all slices of
each L2 table.

Putting all changes in one patch would make it hard to read because
all semantic changes would be mixed with pure indentation changes.

To make things easier this patch simply creates a new block and
changes the indentation of all lines of code inside it. Thus, all
modifications in this patch are cosmetic. There are no semantic
changes and no variables are renamed yet. The next patch will take
care of that.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: c2ae9f31ed5b6e591477ad4654448badd1c89d73.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 201 ++++++++++++++++++++++--------------------
 1 file changed, 103 insertions(+), 98 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1041dcb6dd..ccb2944eda 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1904,118 +1904,123 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
             goto fail;
         }
 
-        if (is_active_l1) {
-            /* get active L2 tables from cache */
-            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                    (void **)&l2_table);
-        } else {
-            /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                            (void *)l2_table, s->cluster_sectors);
-        }
-        if (ret < 0) {
-            goto fail;
-        }
-
-        for (j = 0; j < s->l2_size; j++) {
-            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            int64_t offset = l2_entry & L2E_OFFSET_MASK;
-            QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
-
-            if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
-                cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
-                continue;
+        {
+            if (is_active_l1) {
+                /* get active L2 tables from cache */
+                ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+                                      (void **)&l2_table);
+            } else {
+                /* load inactive L2 tables from disk */
+                ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
+                                (void *)l2_table, s->cluster_sectors);
+            }
+            if (ret < 0) {
+                goto fail;
             }
 
-            if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
-                if (!bs->backing) {
-                    /* not backed; therefore we can simply deallocate the
-                     * cluster */
-                    l2_table[j] = 0;
-                    l2_dirty = true;
+            for (j = 0; j < s->l2_size; j++) {
+                uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+                int64_t offset = l2_entry & L2E_OFFSET_MASK;
+                QCow2ClusterType cluster_type =
+                    qcow2_get_cluster_type(l2_entry);
+
+                if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
+                    cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
                     continue;
                 }
 
-                offset = qcow2_alloc_clusters(bs, s->cluster_size);
-                if (offset < 0) {
-                    ret = offset;
-                    goto fail;
-                }
+                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                    if (!bs->backing) {
+                        /* not backed; therefore we can simply deallocate the
+                         * cluster */
+                        l2_table[j] = 0;
+                        l2_dirty = true;
+                        continue;
+                    }
 
-                if (l2_refcount > 1) {
-                    /* For shared L2 tables, set the refcount accordingly (it is
-                     * already 1 and needs to be l2_refcount) */
-                    ret = qcow2_update_cluster_refcount(bs,
-                            offset >> s->cluster_bits,
-                            refcount_diff(1, l2_refcount), false,
-                            QCOW2_DISCARD_OTHER);
-                    if (ret < 0) {
-                        qcow2_free_clusters(bs, offset, s->cluster_size,
-                                            QCOW2_DISCARD_OTHER);
+                    offset = qcow2_alloc_clusters(bs, s->cluster_size);
+                    if (offset < 0) {
+                        ret = offset;
                         goto fail;
                     }
+
+                    if (l2_refcount > 1) {
+                        /* For shared L2 tables, set the refcount accordingly
+                         * (it is already 1 and needs to be l2_refcount) */
+                        ret = qcow2_update_cluster_refcount(
+                            bs, offset >> s->cluster_bits,
+                            refcount_diff(1, l2_refcount), false,
+                            QCOW2_DISCARD_OTHER);
+                        if (ret < 0) {
+                            qcow2_free_clusters(bs, offset, s->cluster_size,
+                                                QCOW2_DISCARD_OTHER);
+                            goto fail;
+                        }
+                    }
                 }
+
+                if (offset_into_cluster(s, offset)) {
+                    qcow2_signal_corruption(
+                        bs, true, -1, -1,
+                        "Cluster allocation offset "
+                        "%#" PRIx64 " unaligned (L2 offset: %#"
+                        PRIx64 ", L2 index: %#x)", offset,
+                        l2_offset, j);
+                    if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                        qcow2_free_clusters(bs, offset, s->cluster_size,
+                                            QCOW2_DISCARD_ALWAYS);
+                    }
+                    ret = -EIO;
+                    goto fail;
+                }
+
+                ret = qcow2_pre_write_overlap_check(bs, 0, offset,
+                                                    s->cluster_size);
+                if (ret < 0) {
+                    if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                        qcow2_free_clusters(bs, offset, s->cluster_size,
+                                            QCOW2_DISCARD_ALWAYS);
+                    }
+                    goto fail;
+                }
+
+                ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
+                if (ret < 0) {
+                    if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
+                        qcow2_free_clusters(bs, offset, s->cluster_size,
+                                            QCOW2_DISCARD_ALWAYS);
+                    }
+                    goto fail;
+                }
+
+                if (l2_refcount == 1) {
+                    l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+                } else {
+                    l2_table[j] = cpu_to_be64(offset);
+                }
+                l2_dirty = true;
             }
 
-            if (offset_into_cluster(s, offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1,
-                                        "Cluster allocation offset "
-                                        "%#" PRIx64 " unaligned (L2 offset: %#"
-                                        PRIx64 ", L2 index: %#x)", offset,
-                                        l2_offset, j);
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
+            if (is_active_l1) {
+                if (l2_dirty) {
+                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                    qcow2_cache_depends_on_flush(s->l2_table_cache);
                 }
-                ret = -EIO;
-                goto fail;
-            }
-
-            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
-            if (ret < 0) {
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                goto fail;
-            }
-
-            ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
-            if (ret < 0) {
-                if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                goto fail;
-            }
-
-            if (l2_refcount == 1) {
-                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+                qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
             } else {
-                l2_table[j] = cpu_to_be64(offset);
-            }
-            l2_dirty = true;
-        }
+                if (l2_dirty) {
+                    ret = qcow2_pre_write_overlap_check(
+                        bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2,
+                        l2_offset, s->cluster_size);
+                    if (ret < 0) {
+                        goto fail;
+                    }
 
-        if (is_active_l1) {
-            if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-                qcow2_cache_depends_on_flush(s->l2_table_cache);
-            }
-            qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
-        } else {
-            if (l2_dirty) {
-                ret = qcow2_pre_write_overlap_check(bs,
-                        QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset,
-                        s->cluster_size);
-                if (ret < 0) {
-                    goto fail;
-                }
-
-                ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                                 (void *)l2_table, s->cluster_sectors);
-                if (ret < 0) {
-                    goto fail;
+                    ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
+                                     (void *)l2_table, s->cluster_sectors);
+                    if (ret < 0) {
+                        goto fail;
+                    }
                 }
             }
         }

From 415184f52031cd16ff7ea3702e0ef9f2f7edec9a Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:30 +0200
Subject: [PATCH 46/55] qcow2: Update expand_zero_clusters_in_l1() to support
 L2 slices

expand_zero_clusters_in_l1() expands zero clusters as a necessary step
to downgrade qcow2 images to a version that doesn't support metadata
zero clusters. This function takes an L1 table (which may or may not
be active) and iterates over all its L2 tables looking for zero
clusters.

Since we'll be loading L2 slices instead of full tables we need to add
an extra loop that iterates over all slices of each L2 table, and we
should also use the slice size when allocating the buffer used when
the L1 table is not active.

This function doesn't need any additional changes so apart from that
this patch simply updates the variable name from l2_table to l2_slice.

Finally, and since we have to touch the bdrv_read() / bdrv_write()
calls anyway, this patch takes the opportunity to replace them with
the byte-based bdrv_pread() / bdrv_pwrite().

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 43590976f730501688096cff103f2923b72b0f32.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 51 ++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ccb2944eda..8fbaba008b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1863,22 +1863,25 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
 {
     BDRVQcow2State *s = bs->opaque;
     bool is_active_l1 = (l1_table == s->l1_table);
-    uint64_t *l2_table = NULL;
+    uint64_t *l2_slice = NULL;
+    unsigned slice, slice_size2, n_slices;
     int ret;
     int i, j;
 
+    slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+    n_slices = s->cluster_size / slice_size2;
+
     if (!is_active_l1) {
         /* inactive L2 tables require a buffer to be stored in when loading
          * them from disk */
-        l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
-        if (l2_table == NULL) {
+        l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2);
+        if (l2_slice == NULL) {
             return -ENOMEM;
         }
     }
 
     for (i = 0; i < l1_size; i++) {
         uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
-        bool l2_dirty = false;
         uint64_t l2_refcount;
 
         if (!l2_offset) {
@@ -1904,22 +1907,23 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
             goto fail;
         }
 
-        {
+        for (slice = 0; slice < n_slices; slice++) {
+            uint64_t slice_offset = l2_offset + slice * slice_size2;
+            bool l2_dirty = false;
             if (is_active_l1) {
                 /* get active L2 tables from cache */
-                ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                                      (void **)&l2_table);
+                ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset,
+                                      (void **)&l2_slice);
             } else {
                 /* load inactive L2 tables from disk */
-                ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                                (void *)l2_table, s->cluster_sectors);
+                ret = bdrv_pread(bs->file, slice_offset, l2_slice, slice_size2);
             }
             if (ret < 0) {
                 goto fail;
             }
 
-            for (j = 0; j < s->l2_size; j++) {
-                uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+            for (j = 0; j < s->l2_slice_size; j++) {
+                uint64_t l2_entry = be64_to_cpu(l2_slice[j]);
                 int64_t offset = l2_entry & L2E_OFFSET_MASK;
                 QCow2ClusterType cluster_type =
                     qcow2_get_cluster_type(l2_entry);
@@ -1933,7 +1937,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     if (!bs->backing) {
                         /* not backed; therefore we can simply deallocate the
                          * cluster */
-                        l2_table[j] = 0;
+                        l2_slice[j] = 0;
                         l2_dirty = true;
                         continue;
                     }
@@ -1960,12 +1964,13 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 }
 
                 if (offset_into_cluster(s, offset)) {
+                    int l2_index = slice * s->l2_slice_size + j;
                     qcow2_signal_corruption(
                         bs, true, -1, -1,
                         "Cluster allocation offset "
                         "%#" PRIx64 " unaligned (L2 offset: %#"
                         PRIx64 ", L2 index: %#x)", offset,
-                        l2_offset, j);
+                        l2_offset, l2_index);
                     if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                         qcow2_free_clusters(bs, offset, s->cluster_size,
                                             QCOW2_DISCARD_ALWAYS);
@@ -1994,30 +1999,30 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 }
 
                 if (l2_refcount == 1) {
-                    l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+                    l2_slice[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
                 } else {
-                    l2_table[j] = cpu_to_be64(offset);
+                    l2_slice[j] = cpu_to_be64(offset);
                 }
                 l2_dirty = true;
             }
 
             if (is_active_l1) {
                 if (l2_dirty) {
-                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
                     qcow2_cache_depends_on_flush(s->l2_table_cache);
                 }
-                qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+                qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
             } else {
                 if (l2_dirty) {
                     ret = qcow2_pre_write_overlap_check(
                         bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2,
-                        l2_offset, s->cluster_size);
+                        slice_offset, slice_size2);
                     if (ret < 0) {
                         goto fail;
                     }
 
-                    ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                                     (void *)l2_table, s->cluster_sectors);
+                    ret = bdrv_pwrite(bs->file, slice_offset,
+                                      l2_slice, slice_size2);
                     if (ret < 0) {
                         goto fail;
                     }
@@ -2034,11 +2039,11 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
     ret = 0;
 
 fail:
-    if (l2_table) {
+    if (l2_slice) {
         if (!is_active_l1) {
-            qemu_vfree(l2_table);
+            qemu_vfree(l2_slice);
         } else {
-            qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+            qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
         }
     }
     return ret;

From 13bec229d848d22fceb3f0f1bbf01e1f7bcac7a1 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:31 +0200
Subject: [PATCH 47/55] qcow2: Update qcow2_truncate() to support L2 slices

The qcow2_truncate() code is mostly independent from whether
we're using L2 slices or full L2 tables, but in full and
falloc preallocation modes new L2 tables are allocated using
qcow2_alloc_cluster_link_l2().  Therefore the code needs to be
modified to ensure that all nb_clusters that are processed in each
call can be allocated with just one L2 slice.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Message-id: 1fd7d272b5e7b66254a090b74cf2bed1cc334c0e.1517840877.git.berto@igalia.com
Reviewed-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index ba8d71c72d..953254a004 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3262,9 +3262,9 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset,
         host_offset = allocation_start;
         guest_offset = old_length;
         while (nb_new_data_clusters) {
-            int64_t guest_cluster = guest_offset >> s->cluster_bits;
-            int64_t nb_clusters = MIN(nb_new_data_clusters,
-                                      s->l2_size - guest_cluster % s->l2_size);
+            int64_t nb_clusters = MIN(
+                nb_new_data_clusters,
+                s->l2_slice_size - offset_to_l2_slice_index(s, guest_offset));
             QCowL2Meta allocation = {
                 .offset       = guest_offset,
                 .alloc_offset = host_offset,

From e4e7254829d12a831acf2935ea8388e27c06bc42 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:32 +0200
Subject: [PATCH 48/55] qcow2: Rename l2_table in
 qcow2_alloc_compressed_cluster_offset()

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices instead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 0c5d4b9bf163aa3b49ec19cc512a50d83563f2ad.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8fbaba008b..24055e19a1 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -747,26 +747,26 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 {
     BDRVQcow2State *s = bs->opaque;
     int l2_index, ret;
-    uint64_t *l2_table;
+    uint64_t *l2_slice;
     int64_t cluster_offset;
     int nb_csectors;
 
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+    ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
     if (ret < 0) {
         return 0;
     }
 
     /* Compression can't overwrite anything. Fail if the cluster was already
      * allocated. */
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
+    cluster_offset = be64_to_cpu(l2_slice[l2_index]);
     if (cluster_offset & L2E_OFFSET_MASK) {
-        qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
         return 0;
     }
 
     cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
     if (cluster_offset < 0) {
-        qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+        qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
         return 0;
     }
 
@@ -781,9 +781,9 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     /* compressed clusters never have the copied flag */
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-    l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    qcow2_cache_put(s->l2_table_cache, (void **) &l2_table);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+    l2_slice[l2_index] = cpu_to_be64(cluster_offset);
+    qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     return cluster_offset;
 }

From 13f893c47d8fe0dbd70c40570b3208fbc01579c4 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:33 +0200
Subject: [PATCH 49/55] qcow2: Rename l2_table in count_contiguous_clusters()

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices intead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 812b0c3505bb1687e51285dccf1a94f0cecb1f74.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 24055e19a1..54ae210b43 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -370,19 +370,19 @@ fail:
 }
 
 /*
- * Checks how many clusters in a given L2 table are contiguous in the image
+ * Checks how many clusters in a given L2 slice are contiguous in the image
  * file. As soon as one of the flags in the bitmask stop_flags changes compared
  * to the first cluster, the search is stopped and the cluster is not counted
  * as contiguous. (This allows it, for example, to stop at the first compressed
  * cluster which may require a different handling)
  */
 static int count_contiguous_clusters(int nb_clusters, int cluster_size,
-        uint64_t *l2_table, uint64_t stop_flags)
+        uint64_t *l2_slice, uint64_t stop_flags)
 {
     int i;
     QCow2ClusterType first_cluster_type;
     uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
-    uint64_t first_entry = be64_to_cpu(l2_table[0]);
+    uint64_t first_entry = be64_to_cpu(l2_slice[0]);
     uint64_t offset = first_entry & mask;
 
     if (!offset) {
@@ -395,7 +395,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
            first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
 
     for (i = 0; i < nb_clusters; i++) {
-        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+        uint64_t l2_entry = be64_to_cpu(l2_slice[i]) & mask;
         if (offset + (uint64_t) i * cluster_size != l2_entry) {
             break;
         }

From c26f10ba898d07d5ac36f56a7629691cc0003076 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:34 +0200
Subject: [PATCH 50/55] qcow2: Rename l2_table in
 count_contiguous_clusters_unallocated()

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices instead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 78bcc54bc632574dd0b900a77a00a1b6ffc359e6.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 54ae210b43..074a4aaf1e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -406,10 +406,10 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
 
 /*
  * Checks how many consecutive unallocated clusters in a given L2
- * table have the same cluster type.
+ * slice have the same cluster type.
  */
 static int count_contiguous_clusters_unallocated(int nb_clusters,
-                                                 uint64_t *l2_table,
+                                                 uint64_t *l2_slice,
                                                  QCow2ClusterType wanted_type)
 {
     int i;
@@ -417,7 +417,7 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
     assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
            wanted_type == QCOW2_CLUSTER_UNALLOCATED);
     for (i = 0; i < nb_clusters; i++) {
-        uint64_t entry = be64_to_cpu(l2_table[i]);
+        uint64_t entry = be64_to_cpu(l2_slice[i]);
         QCow2ClusterType type = qcow2_get_cluster_type(entry);
 
         if (type != wanted_type) {

From dd32c881080ca089c0f1914e0886771e59893cb2 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:35 +0200
Subject: [PATCH 51/55] qcow2: Rename l2_table in count_cow_clusters()

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices intead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 6107001fc79e6739242f1de7d191375e4f130aac.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cluster.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 074a4aaf1e..e406b0f3b9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -999,12 +999,12 @@ err:
  * which must copy from the backing file)
  */
 static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
-    uint64_t *l2_table, int l2_index)
+    uint64_t *l2_slice, int l2_index)
 {
     int i;
 
     for (i = 0; i < nb_clusters; i++) {
-        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+        uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
         QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
 
         switch(cluster_type) {

From 1221fe6f636754ab5f2c1c87caa77633e9123622 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:36 +0200
Subject: [PATCH 52/55] qcow2: Allow configuring the L2 slice size

Now that the code is ready to handle L2 slices we can finally add an
option to allow configuring their size.

An L2 slice is the portion of an L2 table that is read by the qcow2
cache. Until now the cache was always reading full L2 tables, and
since the L2 table size is equal to the cluster size this was not very
efficient with large clusters. Here's a more detailed explanation of
why it makes sense to have smaller cache entries in order to load L2
data:

   https://lists.gnu.org/archive/html/qemu-block/2017-09/msg00635.html

This patch introduces a new command-line option to the qcow2 driver
named l2-cache-entry-size (cf. l2-cache-size). The cache entry size
has the same restrictions as the cluster size: it must be a power of
two and it has the same range of allowed values, with the additional
requirement that it must not be larger than the cluster size.

The L2 cache entry size (L2 slice size) remains equal to the cluster
size for now by default, so this feature must be explicitly enabled.
Although my tests show that 4KB slices consistently improve
performance and give the best results, let's wait and make more tests
with different cluster sizes before deciding on an optimal default.

Now that the cache entry size is not necessarily equal to the cluster
size we need to reflect that in the MIN_L2_CACHE_SIZE documentation.
That minimum value is a requirement of the COW algorithm: we need to
read two L2 slices (and not two L2 tables) in order to do COW, see
l2_allocate() for the actual code.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: c73e5611ff4a9ec5d20de68a6c289553a13d2354.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2-cache.c  | 10 ++++++++--
 block/qcow2.c        | 34 +++++++++++++++++++++++++++-------
 block/qcow2.h        |  6 ++++--
 qapi/block-core.json |  6 ++++++
 4 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index b1aa42477e..d9dafa31e5 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -120,14 +120,20 @@ void qcow2_cache_clean_unused(Qcow2Cache *c)
     c->cache_clean_lru_counter = c->lru_counter;
 }
 
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
+                               unsigned table_size)
 {
     BDRVQcow2State *s = bs->opaque;
     Qcow2Cache *c;
 
+    assert(num_tables > 0);
+    assert(is_power_of_2(table_size));
+    assert(table_size >= (1 << MIN_CLUSTER_BITS));
+    assert(table_size <= s->cluster_size);
+
     c = g_new0(Qcow2Cache, 1);
     c->size = num_tables;
-    c->table_size = s->cluster_size;
+    c->table_size = table_size;
     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
     c->table_array = qemu_try_blockalign(bs->file->bs,
                                          (size_t) num_tables * c->table_size);
diff --git a/block/qcow2.c b/block/qcow2.c
index 953254a004..57a517e2bd 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -675,6 +675,11 @@ static QemuOptsList qcow2_runtime_opts = {
             .type = QEMU_OPT_SIZE,
             .help = "Maximum L2 table cache size",
         },
+        {
+            .name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Size of each entry in the L2 cache",
+        },
         {
             .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
             .type = QEMU_OPT_SIZE,
@@ -747,6 +752,7 @@ static void qcow2_attach_aio_context(BlockDriverState *bs,
 
 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
                              uint64_t *l2_cache_size,
+                             uint64_t *l2_cache_entry_size,
                              uint64_t *refcount_cache_size, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
@@ -762,6 +768,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
     *refcount_cache_size = qemu_opt_get_size(opts,
                                              QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
 
+    *l2_cache_entry_size = qemu_opt_get_size(
+        opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
+
     if (combined_cache_size_set) {
         if (l2_cache_size_set && refcount_cache_size_set) {
             error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
@@ -802,6 +811,15 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
                                  / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
         }
     }
+
+    if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
+        *l2_cache_entry_size > s->cluster_size ||
+        !is_power_of_2(*l2_cache_entry_size)) {
+        error_setg(errp, "L2 cache entry size must be a power of two "
+                   "between %d and the cluster size (%d)",
+                   1 << MIN_CLUSTER_BITS, s->cluster_size);
+        return;
+    }
 }
 
 typedef struct Qcow2ReopenState {
@@ -824,7 +842,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
     QemuOpts *opts = NULL;
     const char *opt_overlap_check, *opt_overlap_check_template;
     int overlap_check_template = 0;
-    uint64_t l2_cache_size, refcount_cache_size;
+    uint64_t l2_cache_size, l2_cache_entry_size, refcount_cache_size;
     int i;
     const char *encryptfmt;
     QDict *encryptopts = NULL;
@@ -843,15 +861,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
     }
 
     /* get L2 table/refcount block cache size from command line options */
-    read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
-                     &local_err);
+    read_cache_sizes(bs, opts, &l2_cache_size, &l2_cache_entry_size,
+                     &refcount_cache_size, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         ret = -EINVAL;
         goto fail;
     }
 
-    l2_cache_size /= s->cluster_size;
+    l2_cache_size /= l2_cache_entry_size;
     if (l2_cache_size < MIN_L2_CACHE_SIZE) {
         l2_cache_size = MIN_L2_CACHE_SIZE;
     }
@@ -889,9 +907,11 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
         }
     }
 
-    r->l2_slice_size = s->cluster_size / sizeof(uint64_t);
-    r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
-    r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
+    r->l2_slice_size = l2_cache_entry_size / sizeof(uint64_t);
+    r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size,
+                                           l2_cache_entry_size);
+    r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size,
+                                                 s->cluster_size);
     if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
         error_setg(errp, "Could not allocate metadata caches");
         ret = -ENOMEM;
diff --git a/block/qcow2.h b/block/qcow2.h
index d956a6cdd2..883802241f 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -68,7 +68,7 @@
 #define MAX_CLUSTER_BITS 21
 
 /* Must be at least 2 to cover COW */
-#define MIN_L2_CACHE_SIZE 2 /* clusters */
+#define MIN_L2_CACHE_SIZE 2 /* cache entries */
 
 /* Must be at least 4 to cover all cases of refcount table growth */
 #define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
@@ -100,6 +100,7 @@
 #define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
 #define QCOW2_OPT_CACHE_SIZE "cache-size"
 #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
+#define QCOW2_OPT_L2_CACHE_ENTRY_SIZE "l2-cache-entry-size"
 #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
 #define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
 
@@ -647,7 +648,8 @@ void qcow2_free_snapshots(BlockDriverState *bs);
 int qcow2_read_snapshots(BlockDriverState *bs);
 
 /* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
+                               unsigned table_size);
 int qcow2_cache_destroy(Qcow2Cache *c);
 
 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2c107823fe..5c5921bfb7 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2521,6 +2521,11 @@
 # @l2-cache-size:         the maximum size of the L2 table cache in
 #                         bytes (since 2.2)
 #
+# @l2-cache-entry-size:   the size of each entry in the L2 cache in
+#                         bytes. It must be a power of two between 512
+#                         and the cluster size. The default value is
+#                         the cluster size (since 2.12)
+#
 # @refcount-cache-size:   the maximum size of the refcount block cache
 #                         in bytes (since 2.2)
 #
@@ -2542,6 +2547,7 @@
             '*overlap-check': 'Qcow2OverlapChecks',
             '*cache-size': 'int',
             '*l2-cache-size': 'int',
+            '*l2-cache-entry-size': 'int',
             '*refcount-cache-size': 'int',
             '*cache-clean-interval': 'int',
             '*encrypt': 'BlockdevQcow2Encryption' } }

From 4450c39625d6ef4404f41bd1fda3793afd5ce5c0 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:37 +0200
Subject: [PATCH 53/55] iotests: Test valid values of l2-cache-entry-size

The l2-cache-entry-size setting can only contain values that are
powers of two between 512 and the cluster size.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: bd3547b670b8d0af11480c760991a22bcae5b48c.1517840877.git.berto@igalia.com
[mreitz: Changed non-power-of-two test value from 300 to 4242]
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/103     | 17 +++++++++++++++++
 tests/qemu-iotests/103.out |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103
index d0cfab8844..2841318492 100755
--- a/tests/qemu-iotests/103
+++ b/tests/qemu-iotests/103
@@ -66,6 +66,14 @@ $QEMU_IO -c "open -o cache-size=1M,refcount-cache-size=2M $TEST_IMG" 2>&1 \
 $QEMU_IO -c "open -o cache-size=0,l2-cache-size=0,refcount-cache-size=0 $TEST_IMG" \
     2>&1 | _filter_testdir | _filter_imgfmt
 
+# Invalid cache entry sizes
+$QEMU_IO -c "open -o l2-cache-entry-size=256 $TEST_IMG" \
+    2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=4242 $TEST_IMG" \
+    2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=128k $TEST_IMG" \
+    2>&1 | _filter_testdir | _filter_imgfmt
+
 echo
 echo '=== Testing valid option combinations ==='
 echo
@@ -94,6 +102,15 @@ $QEMU_IO -c "open -o l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \
          -c 'read -P 42 0 64k' \
     | _filter_qemu_io
 
+# Valid cache entry sizes
+$QEMU_IO -c "open -o l2-cache-entry-size=512 $TEST_IMG" \
+    2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=16k $TEST_IMG" \
+    2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=64k $TEST_IMG" \
+    2>&1 | _filter_testdir | _filter_imgfmt
+
+
 echo
 echo '=== Testing minimal L2 cache and COW ==='
 echo
diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out
index b7aaadf89a..bd45d3875a 100644
--- a/tests/qemu-iotests/103.out
+++ b/tests/qemu-iotests/103.out
@@ -9,6 +9,9 @@ can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cach
 can't open device TEST_DIR/t.IMGFMT: l2-cache-size may not exceed cache-size
 can't open device TEST_DIR/t.IMGFMT: refcount-cache-size may not exceed cache-size
 can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and refcount-cache-size may not be set the same time
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536)
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536)
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of two between 512 and the cluster size (65536)
 
 === Testing valid option combinations ===
 

From 2ecec91110e605d3b2d3d15d6fb9beaf6c96ada6 Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:38 +0200
Subject: [PATCH 54/55] iotests: Test downgrading an image using a small L2
 slice size

expand_zero_clusters_in_l1() is used when downgrading qcow2 images
from v3 to v2 (compat=0.10). This is one of the functions that needed
more changes to support L2 slices, so this patch extends iotest 061 to
test downgrading a qcow2 image using a smaller slice size.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 3e5662dce5e4926c8fabbad4c0b9142b2a506dd4.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/061     | 16 ++++++++++
 tests/qemu-iotests/061.out | 61 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index f5678b10c9..911b6f2894 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -53,6 +53,22 @@ $PYTHON qcow2.py "$TEST_IMG" dump-header
 $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
+echo
+echo "=== Testing version downgrade with zero expansion and 4K cache entries ==="
+echo
+IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
+$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z 32M 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io
+$PYTHON qcow2.py "$TEST_IMG" dump-header
+$QEMU_IMG amend -o "compat=0.10" --image-opts \
+          driver=qcow2,file.filename=$TEST_IMG,l2-cache-entry-size=4096
+$PYTHON qcow2.py "$TEST_IMG" dump-header
+$QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 32M 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io
+_check_test_img
+
 echo
 echo "=== Testing dirty version downgrade ==="
 echo
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index 942485de99..e857ef9a7d 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -52,6 +52,67 @@ read 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 
+=== Testing version downgrade with zero expansion and 4K cache entries ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 33554432
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+128 KiB (0x20000) bytes     allocated at offset 0 bytes (0x0)
+31.875 MiB (0x1fe0000) bytes not allocated at offset 128 KiB (0x20000)
+128 KiB (0x20000) bytes     allocated at offset 32 MiB (0x2000000)
+31.875 MiB (0x1fe0000) bytes not allocated at offset 32.125 MiB (0x2020000)
+magic                     0x514649fb
+version                   3
+backing_file_offset       0x0
+backing_file_size         0x0
+cluster_bits              16
+size                      67108864
+crypt_method              0
+l1_size                   1
+l1_table_offset           0x30000
+refcount_table_offset     0x10000
+refcount_table_clusters   1
+nb_snapshots              0
+snapshot_offset           0x0
+incompatible_features     0x0
+compatible_features       0x1
+autoclear_features        0x0
+refcount_order            4
+header_length             104
+
+Header extension:
+magic                     0x6803f857
+length                    144
+data                      <binary>
+
+magic                     0x514649fb
+version                   2
+backing_file_offset       0x0
+backing_file_size         0x0
+cluster_bits              16
+size                      67108864
+crypt_method              0
+l1_size                   1
+l1_table_offset           0x30000
+refcount_table_offset     0x10000
+refcount_table_clusters   1
+nb_snapshots              0
+snapshot_offset           0x0
+incompatible_features     0x0
+compatible_features       0x0
+autoclear_features        0x0
+refcount_order            4
+header_length             72
+
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 33554432
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+64 MiB (0x4000000) bytes not allocated at offset 0 bytes (0x0)
+No errors were found on the image.
+
 === Testing dirty version downgrade ===
 
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864

From 03b1b6f0220d5f50471a1f702bedcd789fee032d Mon Sep 17 00:00:00 2001
From: Alberto Garcia <berto@igalia.com>
Date: Mon, 5 Feb 2018 16:33:39 +0200
Subject: [PATCH 55/55] iotests: Add l2-cache-entry-size to iotest 137

This test tries reopening a qcow2 image with valid and invalid
options. This patch adds l2-cache-entry-size to the set.

Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
Message-id: 3d3b7d2dbfc020deaef60fb58739b0801eb9517c.1517840877.git.berto@igalia.com
Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 tests/qemu-iotests/137     | 5 +++++
 tests/qemu-iotests/137.out | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137
index 5a01250005..87965625d8 100755
--- a/tests/qemu-iotests/137
+++ b/tests/qemu-iotests/137
@@ -83,6 +83,9 @@ $QEMU_IO \
     -c "reopen -o overlap-check.inactive-l2=off" \
     -c "reopen -o cache-size=1M" \
     -c "reopen -o l2-cache-size=512k" \
+    -c "reopen -o l2-cache-entry-size=512" \
+    -c "reopen -o l2-cache-entry-size=4k" \
+    -c "reopen -o l2-cache-entry-size=64k" \
     -c "reopen -o refcount-cache-size=128k" \
     -c "reopen -o cache-clean-interval=5" \
     -c "reopen -o cache-clean-interval=0" \
@@ -107,6 +110,8 @@ $QEMU_IO \
     -c "reopen -o cache-size=1M,l2-cache-size=2M" \
     -c "reopen -o cache-size=1M,refcount-cache-size=2M" \
     -c "reopen -o l2-cache-size=256T" \
+    -c "reopen -o l2-cache-entry-size=33k" \
+    -c "reopen -o l2-cache-entry-size=128k" \
     -c "reopen -o refcount-cache-size=256T" \
     -c "reopen -o overlap-check=constant,overlap-check.template=all" \
     -c "reopen -o overlap-check=blubb" \
diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out
index 05efd74d17..e28e1eadba 100644
--- a/tests/qemu-iotests/137.out
+++ b/tests/qemu-iotests/137.out
@@ -20,6 +20,8 @@ cache-size, l2-cache-size and refcount-cache-size may not be set the same time
 l2-cache-size may not exceed cache-size
 refcount-cache-size may not exceed cache-size
 L2 cache size too big
+L2 cache entry size must be a power of two between 512 and the cluster size (65536)
+L2 cache entry size must be a power of two between 512 and the cluster size (65536)
 L2 cache size too big
 Conflicting values for qcow2 options 'overlap-check' ('constant') and 'overlap-check.template' ('all')
 Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of the following: none, constant, cached, all