From 88481329c0a43373f994f0c8ed19e888a8c86830 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 20 Jun 2018 17:48:35 +0300 Subject: [PATCH 01/12] qemu-img: allow compressed not-in-order writes No reason to forbid them, and they are needed to improve performance with compress-threads in further patches. Signed-off-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- qemu-img.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index e1a506f7f6..7651d8172c 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -2141,11 +2141,6 @@ static int img_convert(int argc, char **argv) goto fail_getopt; } - if (!s.wr_in_order && s.compressed) { - error_report("Out of order write and compress are mutually exclusive"); - goto fail_getopt; - } - if (tgt_image_opts && !skip_create) { error_report("--target-image-opts requires use of -n flag"); goto fail_getopt; From 2714f13d69adf73638842729ccfb3bdd6d5ee98f Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 20 Jun 2018 17:48:36 +0300 Subject: [PATCH 02/12] qcow2: refactor data compression Make a separate function for compression to be parallelized later. - use .avail_out field instead of .next_out to calculate size of compressed data. It looks more natural and it allows to keep dest to be void pointer - set avail_out to be at least one byte less than input, to be sure avoid inefficient compression earlier Signed-off-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/qcow2.c | 78 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 2f9e58e0c4..9cee653d96 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -23,11 +23,14 @@ */ #include "qemu/osdep.h" + +#define ZLIB_CONST +#include + #include "block/block_int.h" #include "block/qdict.h" #include "sysemu/block-backend.h" #include "qemu/module.h" -#include #include "qcow2.h" #include "qemu/error-report.h" #include "qapi/error.h" @@ -3650,6 +3653,48 @@ fail: return ret; } +/* + * qcow2_compress() + * + * @dest - destination buffer, at least of @size-1 bytes + * @src - source buffer, @size bytes + * + * Returns: compressed size on success + * -1 if compression is inefficient + * -2 on any other error + */ +static ssize_t qcow2_compress(void *dest, const void *src, size_t size) +{ + ssize_t ret; + z_stream strm; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -12, 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + return -2; + } + + /* strm.next_in is not const in old zlib versions, such as those used on + * OpenBSD/NetBSD, so cast the const away */ + strm.avail_in = size; + strm.next_in = (void *) src; + strm.avail_out = size - 1; + strm.next_out = dest; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_END) { + ret = size - 1 - strm.avail_out; + } else { + ret = (ret == Z_OK ? -1 : -2); + } + + deflateEnd(&strm); + + return ret; +} + /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int @@ -3659,8 +3704,8 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, BDRVQcow2State *s = bs->opaque; QEMUIOVector hd_qiov; struct iovec iov; - z_stream strm; - int ret, out_len; + int ret; + size_t out_len; uint8_t *buf, *out_buf; int64_t cluster_offset; @@ -3694,32 +3739,11 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, out_buf = g_malloc(s->cluster_size); - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, - Z_DEFLATED, -12, - 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { + out_len = qcow2_compress(out_buf, buf, s->cluster_size); + if (out_len == -2) { ret = -EINVAL; goto fail; - } - - strm.avail_in = s->cluster_size; - strm.next_in = (uint8_t *)buf; - strm.avail_out = s->cluster_size; - strm.next_out = out_buf; - - ret = deflate(&strm, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK) { - deflateEnd(&strm); - ret = -EINVAL; - goto fail; - } - out_len = strm.next_out - out_buf; - - deflateEnd(&strm); - - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + } else if (out_len == -1) { /* could not compress: write normal cluster */ ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); if (ret < 0) { From ceb029cd6feccf9f7607833b71dd609d149421a1 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 20 Jun 2018 17:48:37 +0300 Subject: [PATCH 03/12] qcow2: add compress threads Do data compression in separate threads. This significantly improve performance for qemu-img convert with -W (allow async writes) and -c (compressed) options. Signed-off-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++- block/qcow2.h | 3 +++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 9cee653d96..33b61b7480 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -44,6 +44,7 @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "crypto.h" +#include "block/thread-pool.h" /* Differences with QCOW: @@ -1544,6 +1545,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, qcow2_check_refcounts(bs, &result, 0); } #endif + + qemu_co_queue_init(&s->compress_wait_queue); + return ret; fail: @@ -3695,6 +3699,62 @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size) return ret; } +#define MAX_COMPRESS_THREADS 4 + +typedef struct Qcow2CompressData { + void *dest; + const void *src; + size_t size; + ssize_t ret; +} Qcow2CompressData; + +static int qcow2_compress_pool_func(void *opaque) +{ + Qcow2CompressData *data = opaque; + + data->ret = qcow2_compress(data->dest, data->src, data->size); + + return 0; +} + +static void qcow2_compress_complete(void *opaque, int ret) +{ + qemu_coroutine_enter(opaque); +} + +/* See qcow2_compress definition for parameters description */ +static ssize_t qcow2_co_compress(BlockDriverState *bs, + void *dest, const void *src, size_t size) +{ + BDRVQcow2State *s = bs->opaque; + BlockAIOCB *acb; + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + Qcow2CompressData arg = { + .dest = dest, + .src = src, + .size = size, + }; + + while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { + qemu_co_queue_wait(&s->compress_wait_queue, NULL); + } + + s->nb_compress_threads++; + acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, + qcow2_compress_complete, + qemu_coroutine_self()); + + if (!acb) { + s->nb_compress_threads--; + return -EINVAL; + } + qemu_coroutine_yield(); + s->nb_compress_threads--; + qemu_co_queue_next(&s->compress_wait_queue); + + return arg.ret; +} + /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int @@ -3739,7 +3799,7 @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, out_buf = g_malloc(s->cluster_size); - out_len = qcow2_compress(out_buf, buf, s->cluster_size); + out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); if (out_len == -2) { ret = -EINVAL; goto fail; diff --git a/block/qcow2.h b/block/qcow2.h index 1c9c0d3631..d6aca687d6 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -326,6 +326,9 @@ typedef struct BDRVQcow2State { * override) */ char *image_backing_file; char *image_backing_format; + + CoQueue compress_wait_queue; + int nb_compress_threads; } BDRVQcow2State; typedef struct Qcow2COWRegion { From 7ae9f3f61b2b99e2f348d3dc4a4ef2c6af0ae9bc Mon Sep 17 00:00:00 2001 From: Ari Sundholm Date: Tue, 3 Jul 2018 17:48:47 +0300 Subject: [PATCH 04/12] block: Move two block permission constants to the relevant enum This allows using the two constants outside of block.c, which will happen in a subsequent patch. Signed-off-by: Ari Sundholm Signed-off-by: Kevin Wolf --- block.c | 6 ------ include/block/block.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 70a46fdd84..961ec97d26 100644 --- a/block.c +++ b/block.c @@ -1948,12 +1948,6 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } -#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ - | BLK_PERM_WRITE \ - | BLK_PERM_WRITE_UNCHANGED \ - | BLK_PERM_RESIZE) -#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) - void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, BlockReopenQueue *reopen_queue, diff --git a/include/block/block.h b/include/block/block.h index e5c7759a0c..bc76b1e59f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -225,6 +225,13 @@ enum { BLK_PERM_GRAPH_MOD = 0x10, BLK_PERM_ALL = 0x1f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, }; char *bdrv_perm_names(uint64_t perm); From bfcc224e3cf04ee3fef0eb69984607b5764d9892 Mon Sep 17 00:00:00 2001 From: Aapo Vienamo Date: Tue, 3 Jul 2018 17:48:48 +0300 Subject: [PATCH 05/12] block: Add blklogwrites Implements a block device write logging system, similar to Linux kernel device mapper dm-log-writes. The write operations that are performed on a block device are logged to a file or another block device. The write log format is identical to the dm-log-writes format. Currently, log markers are not supported. This functionality can be used for crash consistency and fs consistency testing. By implementing it in qemu, tests utilizing write logs can be be used to test non-Linux drivers and older kernels. The driver accepts an optional parameter to set the sector size used for logging. This makes the driver require all requests to be aligned to this sector size and also makes offsets and sizes of writes in the log metadata to be expressed in terms of this value (the log format has a granularity of one sector for offsets and sizes). This allows accurate logging of writes to guest block devices that have unusual sector sizes. The implementation is based on the blkverify and blkdebug block drivers. Signed-off-by: Aapo Vienamo Signed-off-by: Ari Sundholm Signed-off-by: Kevin Wolf --- MAINTAINERS | 6 + block/Makefile.objs | 1 + block/blklogwrites.c | 414 +++++++++++++++++++++++++++++++++++++++++++ qapi/block-core.json | 33 +++- 4 files changed, 448 insertions(+), 6 deletions(-) create mode 100644 block/blklogwrites.c diff --git a/MAINTAINERS b/MAINTAINERS index 6630d691d1..4431a80860 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2052,6 +2052,12 @@ S: Supported F: block/quorum.c L: qemu-block@nongnu.org +blklogwrites +M: Ari Sundholm +L: qemu-block@nongnu.org +S: Supported +F: block/blklogwrites.c + blkverify M: Stefan Hajnoczi L: qemu-block@nongnu.org diff --git a/block/Makefile.objs b/block/Makefile.objs index 899bfb5e2c..c8337bf186 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -5,6 +5,7 @@ block-obj-y += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o +block-obj-y += blklogwrites.o block-obj-y += block-backend.o snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += file-posix.o diff --git a/block/blklogwrites.c b/block/blklogwrites.c new file mode 100644 index 0000000000..47093fadd6 --- /dev/null +++ b/block/blklogwrites.c @@ -0,0 +1,414 @@ +/* + * Write logging blk driver based on blkverify and blkdebug. + * + * Copyright (c) 2017 Tuomas Tynkkynen + * Copyright (c) 2018 Aapo Vienamo + * Copyright (c) 2018 Ari Sundholm + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" +#include "qemu/option.h" + +/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */ + +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) + +#define WRITE_LOG_VERSION 1ULL +#define WRITE_LOG_MAGIC 0x6a736677736872ULL + +/* All fields are little-endian. */ +struct log_write_super { + uint64_t magic; + uint64_t version; + uint64_t nr_entries; + uint32_t sectorsize; +} QEMU_PACKED; + +struct log_write_entry { + uint64_t sector; + uint64_t nr_sectors; + uint64_t flags; + uint64_t data_len; +} QEMU_PACKED; + +/* End of disk format structures. */ + +typedef struct { + BdrvChild *log_file; + uint32_t sectorsize; + uint32_t sectorbits; + uint64_t cur_log_sector; + uint64_t nr_entries; +} BDRVBlkLogWritesState; + +static QemuOptsList runtime_opts = { + .name = "blklogwrites", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "log-sector-size", + .type = QEMU_OPT_SIZE, + .help = "Log sector size", + }, + { /* end of list */ } + }, +}; + +static inline uint32_t blk_log_writes_log2(uint32_t value) +{ + assert(value > 0); + return 31 - clz32(value); +} + +static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + int ret; + int64_t log_sector_size; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + /* Open the file */ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + log_sector_size = qemu_opt_get_size(opts, "log-sector-size", + BDRV_SECTOR_SIZE); + + if (log_sector_size < 0 || log_sector_size > (1ull << 23) || + !is_power_of_2(log_sector_size)) + { + ret = -EINVAL; + error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size); + goto fail; + } + + s->sectorsize = log_sector_size; + s->sectorbits = blk_log_writes_log2(log_sector_size); + s->cur_log_sector = 1; + s->nr_entries = 0; + + /* Open the log file */ + s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + ret = 0; +fail: + if (ret < 0) { + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + } + qemu_opts_del(opts); + return ret; +} + +static void blk_log_writes_close(BlockDriverState *bs) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; +} + +static int64_t blk_log_writes_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static void blk_log_writes_refresh_filename(BlockDriverState *bs, + QDict *options) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + /* bs->file->bs has already been refreshed */ + bdrv_refresh_filename(s->log_file->bs); + + if (bs->file->bs->full_open_options + && s->log_file->bs->full_open_options) + { + QDict *opts = qdict_new(); + qdict_put_str(opts, "driver", "blklogwrites"); + + qobject_ref(bs->file->bs->full_open_options); + qdict_put_obj(opts, "file", QOBJECT(bs->file->bs->full_open_options)); + qobject_ref(s->log_file->bs->full_open_options); + qdict_put_obj(opts, "log", + QOBJECT(s->log_file->bs->full_open_options)); + qdict_put_int(opts, "log-sector-size", s->sectorsize); + + bs->full_open_options = opts; + } +} + +static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *ro_q, + uint64_t perm, uint64_t shrd, + uint64_t *nperm, uint64_t *nshrd) +{ + if (!c) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + if (!strcmp(c->name, "log")) { + bdrv_format_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); + } else { + bdrv_filter_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); + } +} + +static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + bs->bl.request_alignment = s->sectorsize; +} + +static int coroutine_fn +blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +typedef struct BlkLogWritesFileReq { + BlockDriverState *bs; + uint64_t offset; + uint64_t bytes; + int file_flags; + QEMUIOVector *qiov; + int (*func)(struct BlkLogWritesFileReq *r); + int file_ret; +} BlkLogWritesFileReq; + +typedef struct { + BlockDriverState *bs; + QEMUIOVector *qiov; + struct log_write_entry entry; + uint64_t zero_size; + int log_ret; +} BlkLogWritesLogReq; + +static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) +{ + BDRVBlkLogWritesState *s = lr->bs->opaque; + uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits; + + s->nr_entries++; + s->cur_log_sector += + ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size, + lr->qiov, 0); + + /* Logging for the "write zeroes" operation */ + if (lr->log_ret == 0 && lr->zero_size) { + cur_log_offset = s->cur_log_sector << s->sectorbits; + s->cur_log_sector += + ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset, + lr->zero_size, 0); + } + + /* Update super block on flush */ + if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) { + struct log_write_super super = { + .magic = cpu_to_le64(WRITE_LOG_MAGIC), + .version = cpu_to_le64(WRITE_LOG_VERSION), + .nr_entries = cpu_to_le64(s->nr_entries), + .sectorsize = cpu_to_le32(s->sectorsize), + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(super)); + QEMUIOVector qiov; + + qemu_iovec_init(&qiov, 2); + qemu_iovec_add(&qiov, &super, sizeof(super)); + qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super)); + + lr->log_ret = + bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0); + if (lr->log_ret == 0) { + lr->log_ret = bdrv_co_flush(s->log_file->bs); + } + qemu_iovec_destroy(&qiov); + g_free(zeroes); + } +} + +static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr) +{ + fr->file_ret = fr->func(fr); +} + +static int coroutine_fn +blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + int (*file_func)(BlkLogWritesFileReq *r), + uint64_t entry_flags, bool is_zero_write) +{ + QEMUIOVector log_qiov; + size_t niov = qiov ? qiov->niov : 0; + BDRVBlkLogWritesState *s = bs->opaque; + BlkLogWritesFileReq fr = { + .bs = bs, + .offset = offset, + .bytes = bytes, + .file_flags = flags, + .qiov = qiov, + .func = file_func, + }; + BlkLogWritesLogReq lr = { + .bs = bs, + .qiov = &log_qiov, + .entry = { + .sector = cpu_to_le64(offset >> s->sectorbits), + .nr_sectors = cpu_to_le64(bytes >> s->sectorbits), + .flags = cpu_to_le64(entry_flags), + .data_len = 0, + }, + .zero_size = is_zero_write ? bytes : 0, + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry)); + + assert((1 << s->sectorbits) == s->sectorsize); + assert(bs->bl.request_alignment == s->sectorsize); + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + + qemu_iovec_init(&log_qiov, niov + 2); + qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry)); + qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry)); + if (qiov) { + qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size); + } + + blk_log_writes_co_do_file(&fr); + blk_log_writes_co_do_log(&lr); + + qemu_iovec_destroy(&log_qiov); + g_free(zeroes); + + if (lr.log_ret < 0) { + return lr.log_ret; + } + + return fr.file_ret; +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes, + fr->qiov, fr->file_flags); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes, + fr->file_flags); +} + +static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr) +{ + return bdrv_co_flush(fr->bs->file->bs); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes); +} + +static int coroutine_fn +blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, qiov, flags, + blk_log_writes_co_do_file_pwritev, 0, false); +} + +static int coroutine_fn +blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, NULL, flags, + blk_log_writes_co_do_file_pwrite_zeroes, 0, + true); +} + +static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) +{ + return blk_log_writes_co_log(bs, 0, 0, NULL, 0, + blk_log_writes_co_do_file_flush, + LOG_FLUSH_FLAG, false); +} + +static int coroutine_fn +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +{ + return blk_log_writes_co_log(bs, offset, count, NULL, 0, + blk_log_writes_co_do_file_pdiscard, + LOG_DISCARD_FLAG, false); +} + +static BlockDriver bdrv_blk_log_writes = { + .format_name = "blklogwrites", + .instance_size = sizeof(BDRVBlkLogWritesState), + + .bdrv_open = blk_log_writes_open, + .bdrv_close = blk_log_writes_close, + .bdrv_getlength = blk_log_writes_getlength, + .bdrv_refresh_filename = blk_log_writes_refresh_filename, + .bdrv_child_perm = blk_log_writes_child_perm, + .bdrv_refresh_limits = blk_log_writes_refresh_limits, + + .bdrv_co_preadv = blk_log_writes_co_preadv, + .bdrv_co_pwritev = blk_log_writes_co_pwritev, + .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes, + .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk, + .bdrv_co_pdiscard = blk_log_writes_co_pdiscard, + .bdrv_co_block_status = bdrv_co_block_status_from_file, + + .is_filter = true, +}; + +static void bdrv_blk_log_writes_init(void) +{ + bdrv_register(&bdrv_blk_log_writes); +} + +block_init(bdrv_blk_log_writes_init); diff --git a/qapi/block-core.json b/qapi/block-core.json index 90e554ed0f..a9eab8cab8 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2533,16 +2533,17 @@ # @throttle: Since 2.11 # @nvme: Since 2.12 # @copy-on-read: Since 3.0 +# @blklogwrites: Since 3.0 # # Since: 2.9 ## { 'enum': 'BlockdevDriver', - 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read', - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', - 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', - 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed', - 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh', - 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } + 'data': [ 'blkdebug', 'blklogwrites', 'blkverify', 'bochs', 'cloop', + 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster', + 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', + 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', + 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', + 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } ## # @BlockdevOptionsFile: @@ -3044,6 +3045,25 @@ '*inject-error': ['BlkdebugInjectErrorOptions'], '*set-state': ['BlkdebugSetStateOptions'] } } +## +# @BlockdevOptionsBlklogwrites: +# +# Driver specific block device options for blklogwrites. +# +# @file: block device +# +# @log: block device used to log writes to @file +# +# @log-sector-size: sector size used in logging writes to @file, determines +# granularity of offsets and sizes of writes (default: 512) +# +# Since: 3.0 +## +{ 'struct': 'BlockdevOptionsBlklogwrites', + 'data': { 'file': 'BlockdevRef', + 'log': 'BlockdevRef', + '*log-sector-size': 'uint32' } } + ## # @BlockdevOptionsBlkverify: # @@ -3563,6 +3583,7 @@ 'discriminator': 'driver', 'data': { 'blkdebug': 'BlockdevOptionsBlkdebug', + 'blklogwrites':'BlockdevOptionsBlklogwrites', 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', From 824808dd77821ceba05357cb1ee4069a6a95bebd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 4 Jul 2018 13:28:29 +0200 Subject: [PATCH 06/12] block: Don't silently truncate node names If the user passes a too long node name string, we silently truncate it to fit into BlockDriverState.node_name, i.e. to 31 characters. Apart from surprising the user when the node has a different name than requested, this also bypasses the check for duplicate names, so that the same name can be assigned to multiple nodes. Fix this by just making too long node names an error. Reported-by: Peter Krempa Signed-off-by: Kevin Wolf --- block.c | 6 ++++++ tests/qemu-iotests/051 | 15 +++++++++++++++ tests/qemu-iotests/051.out | 23 +++++++++++++++++++++++ tests/qemu-iotests/051.pc.out | 23 +++++++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/block.c b/block.c index 961ec97d26..ac8b3a3511 100644 --- a/block.c +++ b/block.c @@ -1156,6 +1156,12 @@ static void bdrv_assign_node_name(BlockDriverState *bs, goto out; } + /* Make sure that the node name isn't truncated */ + if (strlen(node_name) >= sizeof(bs->node_name)) { + error_setg(errp, "Node name too long"); + goto out; + } + /* copy node name into the bs and insert it into the graph list */ pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index f617e25e24..ee9c820d0f 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -99,6 +99,21 @@ run_qemu -drive file="$TEST_IMG",driver=foo run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2 run_qemu -drive file="$TEST_IMG",driver=qcow2,format=qcow2 +echo +echo === Node names === +echo + +# Maximum length: 31 characters +run_qemu -drive file="$TEST_IMG",node-name=x123456789012345678901234567890 +run_qemu -drive file="$TEST_IMG",node-name=x1234567890123456789012345678901 + +# First character must be alphabetic +# Following characters alphanumeric or -._ +run_qemu -drive file="$TEST_IMG",node-name=All-Types.of_all0wed_chars +run_qemu -drive file="$TEST_IMG",node-name=123foo +run_qemu -drive file="$TEST_IMG",node-name=_foo +run_qemu -drive file="$TEST_IMG",node-name=foo#12 + echo echo === Device without drive === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index dd9846d1ce..b7273505c7 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -47,6 +47,29 @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Node names === + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name + + === Device without drive === Testing: -device VIRTIO_SCSI -device scsi-hd diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index b01f9a90d7..e9257fe318 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -47,6 +47,29 @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Node names === + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name + + === Device without drive === Testing: -device VIRTIO_SCSI -device scsi-hd From 0b68589d17ef2ea78b3cf016525db13cee1d99e7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 4 Jul 2018 14:55:06 +0200 Subject: [PATCH 07/12] block/crypto: Fix memory leak in create error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: Coverity CID 1393782 Signed-off-by: Kevin Wolf Reviewed-by: Daniel P. Berrangé --- block/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/crypto.c b/block/crypto.c index 994172a3de..146d81c90a 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -551,7 +551,7 @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, /* Create protocol layer */ ret = bdrv_create_file(filename, opts, errp); if (ret < 0) { - return ret; + goto fail; } bs = bdrv_open(filename, NULL, NULL, From 2dacaf7c82c2771d507e5e59efcea78d933baca9 Mon Sep 17 00:00:00 2001 From: Ari Sundholm Date: Wed, 4 Jul 2018 17:59:34 +0300 Subject: [PATCH 08/12] block/blklogwrites: Change log_sector_size from int64_t to uint64_t This was a simple oversight when working on intermediate versions of the original patch which introduced blklogwrites. Signed-off-by: Ari Sundholm Signed-off-by: Kevin Wolf --- block/blklogwrites.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 47093fadd6..272e11a021 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -79,7 +79,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; int ret; - int64_t log_sector_size; + uint64_t log_sector_size; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -101,11 +101,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, log_sector_size = qemu_opt_get_size(opts, "log-sector-size", BDRV_SECTOR_SIZE); - if (log_sector_size < 0 || log_sector_size > (1ull << 23) || - !is_power_of_2(log_sector_size)) - { + if (log_sector_size > (1ull << 23) || !is_power_of_2(log_sector_size)) { ret = -EINVAL; - error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size); + error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); goto fail; } From 0878b3c113145d4e01d65aadd4efaf097a3fda4b Mon Sep 17 00:00:00 2001 From: Ari Sundholm Date: Wed, 4 Jul 2018 17:59:35 +0300 Subject: [PATCH 09/12] block/blklogwrites: Add an option for appending to an old log Suggested by Kevin Wolf. May be useful when testing multiple batches of writes or doing long-term testing involving restarts of the VM. Signed-off-by: Ari Sundholm Signed-off-by: Kevin Wolf --- block/blklogwrites.c | 147 ++++++++++++++++++++++++++++++++++++++----- qapi/block-core.json | 3 +- 2 files changed, 135 insertions(+), 15 deletions(-) diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 272e11a021..56154e7325 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -24,6 +24,10 @@ #define LOG_FUA_FLAG (1 << 1) #define LOG_DISCARD_FLAG (1 << 2) #define LOG_MARK_FLAG (1 << 3) +#define LOG_FLAG_MASK (LOG_FLUSH_FLAG \ + | LOG_FUA_FLAG \ + | LOG_DISCARD_FLAG \ + | LOG_MARK_FLAG) #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL @@ -57,6 +61,11 @@ static QemuOptsList runtime_opts = { .name = "blklogwrites", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { + { + .name = "log-append", + .type = QEMU_OPT_BOOL, + .help = "Append to an existing log", + }, { .name = "log-sector-size", .type = QEMU_OPT_SIZE, @@ -72,6 +81,53 @@ static inline uint32_t blk_log_writes_log2(uint32_t value) return 31 - clz32(value); } +static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size) +{ + return sector_size < (1ull << 24) && is_power_of_2(sector_size); +} + +static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log, + uint32_t sector_size, + uint64_t nr_entries, + Error **errp) +{ + uint64_t cur_sector = 1; + uint64_t cur_idx = 0; + uint32_t sector_bits = blk_log_writes_log2(sector_size); + struct log_write_entry cur_entry; + + while (cur_idx < nr_entries) { + int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry, + sizeof(cur_entry)); + if (read_ret < 0) { + error_setg_errno(errp, -read_ret, + "Failed to read log entry %"PRIu64, cur_idx); + return (uint64_t)-1ull; + } + + if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) { + error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64, + le64_to_cpu(cur_entry.flags), cur_idx); + return (uint64_t)-1ull; + } + + /* Account for the sector of the entry itself */ + ++cur_sector; + + /* + * Account for the data of the write. + * For discards, this data is not present. + */ + if (!(cur_entry.flags & cpu_to_le64(LOG_DISCARD_FLAG))) { + cur_sector += le64_to_cpu(cur_entry.nr_sectors); + } + + ++cur_idx; + } + + return cur_sector; +} + static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -80,6 +136,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; int ret; uint64_t log_sector_size; + bool log_append; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -98,20 +155,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - log_sector_size = qemu_opt_get_size(opts, "log-sector-size", - BDRV_SECTOR_SIZE); - - if (log_sector_size > (1ull << 23) || !is_power_of_2(log_sector_size)) { - ret = -EINVAL; - error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); - goto fail; - } - - s->sectorsize = log_sector_size; - s->sectorbits = blk_log_writes_log2(log_sector_size); - s->cur_log_sector = 1; - s->nr_entries = 0; - /* Open the log file */ s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false, &local_err); @@ -121,7 +164,83 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + log_append = qemu_opt_get_bool(opts, "log-append", false); + + if (log_append) { + struct log_write_super log_sb = { 0, 0, 0, 0 }; + + if (qemu_opt_find(opts, "log-sector-size")) { + ret = -EINVAL; + error_setg(errp, "log-append and log-sector-size are mutually " + "exclusive"); + goto fail_log; + } + + /* Read log superblock or fake one for an empty log */ + if (!bdrv_getlength(s->log_file->bs)) { + log_sb.magic = cpu_to_le64(WRITE_LOG_MAGIC); + log_sb.version = cpu_to_le64(WRITE_LOG_VERSION); + log_sb.nr_entries = cpu_to_le64(0); + log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE); + } else { + ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read log superblock"); + goto fail_log; + } + } + + if (log_sb.magic != cpu_to_le64(WRITE_LOG_MAGIC)) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock magic"); + goto fail_log; + } + + if (log_sb.version != cpu_to_le64(WRITE_LOG_VERSION)) { + ret = -EINVAL; + error_setg(errp, "Unsupported log version %"PRIu64, + le64_to_cpu(log_sb.version)); + goto fail_log; + } + + log_sector_size = le32_to_cpu(log_sb.sectorsize); + s->cur_log_sector = 1; + s->nr_entries = 0; + + if (blk_log_writes_sector_size_valid(log_sector_size)) { + s->cur_log_sector = + blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size, + le64_to_cpu(log_sb.nr_entries), &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail_log; + } + + s->nr_entries = le64_to_cpu(log_sb.nr_entries); + } + } else { + log_sector_size = qemu_opt_get_size(opts, "log-sector-size", + BDRV_SECTOR_SIZE); + s->cur_log_sector = 1; + s->nr_entries = 0; + } + + if (!blk_log_writes_sector_size_valid(log_sector_size)) { + ret = -EINVAL; + error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); + goto fail_log; + } + + s->sectorsize = log_sector_size; + s->sectorbits = blk_log_writes_log2(log_sector_size); + ret = 0; +fail_log: + if (ret < 0) { + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; + } fail: if (ret < 0) { bdrv_unref_child(bs, bs->file); diff --git a/qapi/block-core.json b/qapi/block-core.json index a9eab8cab8..d1753a2ae7 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3062,7 +3062,8 @@ { 'struct': 'BlockdevOptionsBlklogwrites', 'data': { 'file': 'BlockdevRef', 'log': 'BlockdevRef', - '*log-sector-size': 'uint32' } } + '*log-sector-size': 'uint32', + '*log-append': 'bool' } } ## # @BlockdevOptionsBlkverify: From 1dce698ea85bb18f62e4c540d4db628bacfba6ba Mon Sep 17 00:00:00 2001 From: Ari Sundholm Date: Wed, 4 Jul 2018 17:59:36 +0300 Subject: [PATCH 10/12] block/blklogwrites: Add an option for the update interval of the log superblock This is a way to ensure that the log superblock is periodically updated. Before, this was only done on flush requests, which may not be enough if the VM exits abnormally, omitting the final flush. The default interval is 4096 write requests. Signed-off-by: Ari Sundholm Signed-off-by: Kevin Wolf --- block/blklogwrites.c | 20 ++++++++++++++++++-- qapi/block-core.json | 6 +++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 56154e7325..63bf6b34a9 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -55,6 +55,7 @@ typedef struct { uint32_t sectorbits; uint64_t cur_log_sector; uint64_t nr_entries; + uint64_t update_interval; } BDRVBlkLogWritesState; static QemuOptsList runtime_opts = { @@ -71,6 +72,11 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Log sector size", }, + { + .name = "log-super-update-interval", + .type = QEMU_OPT_NUMBER, + .help = "Log superblock update interval (# of write requests)", + }, { /* end of list */ } }, }; @@ -234,6 +240,14 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, s->sectorsize = log_sector_size; s->sectorbits = blk_log_writes_log2(log_sector_size); + s->update_interval = qemu_opt_get_number(opts, "log-super-update-interval", + 4096); + if (!s->update_interval) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock update interval %"PRIu64, + s->update_interval); + goto fail_log; + } ret = 0; fail_log: @@ -360,8 +374,10 @@ static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) lr->zero_size, 0); } - /* Update super block on flush */ - if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) { + /* Update super block on flush or every update interval */ + if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG) + || (s->nr_entries % s->update_interval == 0))) + { struct log_write_super super = { .magic = cpu_to_le64(WRITE_LOG_MAGIC), .version = cpu_to_le64(WRITE_LOG_VERSION), diff --git a/qapi/block-core.json b/qapi/block-core.json index d1753a2ae7..38b31250f9 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3057,13 +3057,17 @@ # @log-sector-size: sector size used in logging writes to @file, determines # granularity of offsets and sizes of writes (default: 512) # +# @log-super-update-interval: interval of write requests after which the log +# super block is updated to disk (default: 4096) +# # Since: 3.0 ## { 'struct': 'BlockdevOptionsBlklogwrites', 'data': { 'file': 'BlockdevRef', 'log': 'BlockdevRef', '*log-sector-size': 'uint32', - '*log-append': 'bool' } } + '*log-append': 'bool', + '*log-super-update-interval': 'uint64' } } ## # @BlockdevOptionsBlkverify: From d815efcaf01b1698e2fdf0f3e125201025c53191 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 4 Jul 2018 16:47:50 +0200 Subject: [PATCH 11/12] file-posix: Fix creation locking raw_apply_lock_bytes() takes a bit mask of "permissions that are NOT shared". Also, make the "perm" and "shared" variables uint64_t, because I do not particularly like using ~ on signed integers (and other permission masks are usually uint64_t, too). Reported-by: Kevin Wolf Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/file-posix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 829ee538d8..b57c58e80f 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2112,7 +2112,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; int fd; - int perm, shared; + uint64_t perm, shared; int result = 0; /* Validate options and set default values */ @@ -2148,7 +2148,7 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; /* Step one: Take locks */ - result = raw_apply_lock_bytes(fd, perm, shared, false, errp); + result = raw_apply_lock_bytes(fd, perm, ~shared, false, errp); if (result < 0) { goto out_close; } From 7c20c808a5cbf5d244735bc78fc3138c739c1946 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 4 Jul 2018 16:47:51 +0200 Subject: [PATCH 12/12] file-posix: Unlock FD after creation Closing the FD does not necessarily mean that it is unlocked. Fix this by relinquishing all permission locks before qemu_close(). Reported-by: Kevin Wolf Signed-off-by: Max Reitz Signed-off-by: Kevin Wolf --- block/file-posix.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index b57c58e80f..98987b80f1 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2111,6 +2111,7 @@ static int coroutine_fn raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; + Error *local_err = NULL; int fd; uint64_t perm, shared; int result = 0; @@ -2156,13 +2157,13 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Step two: Check that nobody else has taken conflicting locks */ result = raw_check_lock_bytes(fd, perm, shared, errp); if (result < 0) { - goto out_close; + goto out_unlock; } /* Clear the file by truncating it to 0 */ result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); if (result < 0) { - goto out_close; + goto out_unlock; } if (file_opts->nocow) { @@ -2185,7 +2186,17 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) result = raw_regular_truncate(NULL, fd, file_opts->size, file_opts->preallocation, errp); if (result < 0) { - goto out_close; + goto out_unlock; + } + +out_unlock: + raw_apply_lock_bytes(fd, 0, 0, true, &local_err); + if (local_err) { + /* The above call should not fail, and if it does, that does + * not mean the whole creation operation has failed. So + * report it the user for their convenience, but do not report + * it to the caller. */ + error_report_err(local_err); } out_close: