From 9fe3781f09f94f3ce76e52899bcdeb0d5164dbb1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 4 Dec 2012 16:12:18 +0100 Subject: [PATCH 01/43] tests: use aio_poll() instead of aio_flush() in test-aio.c There has been confusion between various aio wait and flush functions. It's time to get rid of qemu_aio_flush() but in the aio test cases we really do want this low-level functionality. Therefore declare a local wait_for_aio() helper for the test cases. Drop the aio_flush() test case. Signed-off-by: Stefan Hajnoczi Acked-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- tests/test-aio.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/tests/test-aio.c b/tests/test-aio.c index f53c908707..a8a4f0c6a5 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -15,6 +15,14 @@ AioContext *ctx; +/* Wait until there are no more BHs or AIO requests */ +static void wait_for_aio(void) +{ + while (aio_poll(ctx, true)) { + /* Do nothing */ + } +} + /* Simple callbacks for testing. */ typedef struct { @@ -78,14 +86,6 @@ static void test_notify(void) g_assert(!aio_poll(ctx, false)); } -static void test_flush(void) -{ - g_assert(!aio_poll(ctx, false)); - aio_notify(ctx); - aio_flush(ctx); - g_assert(!aio_poll(ctx, false)); -} - static void test_bh_schedule(void) { BHTestData data = { .n = 0 }; @@ -116,7 +116,7 @@ static void test_bh_schedule10(void) g_assert(aio_poll(ctx, true)); g_assert_cmpint(data.n, ==, 2); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 10); g_assert(!aio_poll(ctx, false)); @@ -164,7 +164,7 @@ static void test_bh_delete_from_cb(void) qemu_bh_schedule(data1.bh); g_assert_cmpint(data1.n, ==, 0); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data1.n, ==, data1.max); g_assert(data1.bh == NULL); @@ -200,7 +200,7 @@ static void test_bh_delete_from_cb_many(void) g_assert_cmpint(data4.n, ==, 1); g_assert(data1.bh == NULL); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data1.n, ==, data1.max); g_assert_cmpint(data2.n, ==, data2.max); g_assert_cmpint(data3.n, ==, data3.max); @@ -219,7 +219,7 @@ static void test_bh_flush(void) qemu_bh_schedule(data.bh); g_assert_cmpint(data.n, ==, 0); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 1); g_assert(!aio_poll(ctx, false)); @@ -281,7 +281,7 @@ static void test_flush_event_notifier(void) g_assert_cmpint(data.active, ==, 9); g_assert(aio_poll(ctx, false)); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 10); g_assert_cmpint(data.active, ==, 0); g_assert(!aio_poll(ctx, false)); @@ -325,7 +325,7 @@ static void test_wait_event_notifier_noflush(void) g_assert_cmpint(data.n, ==, 2); event_notifier_set(&dummy.e); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 2); g_assert_cmpint(dummy.n, ==, 1); g_assert_cmpint(dummy.active, ==, 0); @@ -346,7 +346,7 @@ static void test_wait_event_notifier_noflush(void) * - sometimes both the AioContext and the glib main loop wake * themselves up. Hence, some "g_assert(!aio_poll(ctx, false));" * are replaced by "while (g_main_context_iteration(NULL, false));". - * - there is no exact replacement for aio_flush's blocking wait. + * - there is no exact replacement for a blocking wait. * "while (g_main_context_iteration(NULL, true)" seems to work, * but it is not documented _why_ it works. For these tests a * non-blocking loop like "while (g_main_context_iteration(NULL, false)" @@ -637,7 +637,6 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); g_test_add_func("/aio/notify", test_notify); - g_test_add_func("/aio/flush", test_flush); g_test_add_func("/aio/bh/schedule", test_bh_schedule); g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); g_test_add_func("/aio/bh/cancel", test_bh_cancel); From 8a805c222caa0e20bf11d2267f726d0bb5917d94 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 4 Dec 2012 16:12:19 +0100 Subject: [PATCH 02/43] tests: avoid qemu_aio_flush() in test-thread-pool.c We need to eliminate calls to qemu_aio_flush() since the function is being removed. Most callers will use bdrv_drain_all() instead but test-thread-pool.c is lower level. Since the test uses the global AioContext we can loop on qemu_aio_wait() to wait for aio and bh activity to complete. Signed-off-by: Stefan Hajnoczi Acked-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- tests/test-thread-pool.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c index fea0445fb4..ea8e676b0c 100644 --- a/tests/test-thread-pool.c +++ b/tests/test-thread-pool.c @@ -47,11 +47,19 @@ static void qemu_aio_wait_nonblocking(void) qemu_aio_wait(); } +/* Wait until all aio and bh activity has finished */ +static void qemu_aio_wait_all(void) +{ + while (qemu_aio_wait()) { + /* Do nothing */ + } +} + static void test_submit(void) { WorkerTestData data = { .n = 0 }; thread_pool_submit(worker_cb, &data); - qemu_aio_flush(); + qemu_aio_wait_all(); g_assert_cmpint(data.n, ==, 1); } @@ -63,7 +71,7 @@ static void test_submit_aio(void) /* The callbacks are not called until after the first wait. */ active = 1; g_assert_cmpint(data.ret, ==, -EINPROGRESS); - qemu_aio_flush(); + qemu_aio_wait_all(); g_assert_cmpint(active, ==, 0); g_assert_cmpint(data.n, ==, 1); g_assert_cmpint(data.ret, ==, 0); @@ -84,7 +92,7 @@ static void co_test_cb(void *opaque) data->ret = 0; active--; - /* The test continues in test_submit_co, after qemu_aio_flush... */ + /* The test continues in test_submit_co, after qemu_aio_wait_all... */ } static void test_submit_co(void) @@ -99,9 +107,9 @@ static void test_submit_co(void) g_assert_cmpint(active, ==, 1); g_assert_cmpint(data.ret, ==, -EINPROGRESS); - /* qemu_aio_flush will execute the rest of the coroutine. */ + /* qemu_aio_wait_all will execute the rest of the coroutine. */ - qemu_aio_flush(); + qemu_aio_wait_all(); /* Back here after the coroutine has finished. */ @@ -184,7 +192,7 @@ static void test_cancel(void) } /* Finish execution and execute any remaining callbacks. */ - qemu_aio_flush(); + qemu_aio_wait_all(); g_assert_cmpint(active, ==, 0); for (i = 0; i < 100; i++) { if (data[i].n == 3) { From d318aea9325c99b15c87a7c14865386c2fde0d2c Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 13 Nov 2012 16:35:08 +0100 Subject: [PATCH 03/43] block: Improve bdrv_aio_co_cancel_em Instead of waiting for all requests to complete, wait just for the specific request that should be cancelled. Signed-off-by: Kevin Wolf --- block.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index c05875fe39..c7a1a3c85c 100644 --- a/block.c +++ b/block.c @@ -3778,12 +3778,20 @@ typedef struct BlockDriverAIOCBCoroutine { BlockDriverAIOCB common; BlockRequest req; bool is_write; + bool *done; QEMUBH* bh; } BlockDriverAIOCBCoroutine; static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) { - qemu_aio_flush(); + BlockDriverAIOCBCoroutine *acb = + container_of(blockacb, BlockDriverAIOCBCoroutine, common); + bool done = false; + + acb->done = &done; + while (!done) { + qemu_aio_wait(); + } } static const AIOCBInfo bdrv_em_co_aiocb_info = { @@ -3796,6 +3804,11 @@ static void bdrv_co_em_bh(void *opaque) BlockDriverAIOCBCoroutine *acb = opaque; acb->common.cb(acb->common.opaque, acb->req.error); + + if (acb->done) { + *acb->done = true; + } + qemu_bh_delete(acb->bh); qemu_aio_release(acb); } @@ -3834,6 +3847,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, acb->req.nb_sectors = nb_sectors; acb->req.qiov = qiov; acb->is_write = is_write; + acb->done = NULL; co = qemu_coroutine_create(bdrv_co_do_rw); qemu_coroutine_enter(co, acb); @@ -3860,6 +3874,8 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverAIOCBCoroutine *acb; acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); + acb->done = NULL; + co = qemu_coroutine_create(bdrv_aio_flush_co_entry); qemu_coroutine_enter(co, acb); @@ -3888,6 +3904,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->req.sector = sector_num; acb->req.nb_sectors = nb_sectors; + acb->done = NULL; co = qemu_coroutine_create(bdrv_aio_discard_co_entry); qemu_coroutine_enter(co, acb); From c57b6656c3168bccca7f78b3f740e9149893b3da Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 13 Nov 2012 16:35:13 +0100 Subject: [PATCH 04/43] aio: Get rid of qemu_aio_flush() There are no remaining users, and new users should probably be using bdrv_drain_all() in the first place. Signed-off-by: Kevin Wolf --- async.c | 5 ----- block/commit.c | 2 +- block/mirror.c | 2 +- block/stream.c | 2 +- main-loop.c | 5 ----- qemu-aio.h | 9 ++------- 6 files changed, 5 insertions(+), 20 deletions(-) diff --git a/async.c b/async.c index 3f0e8f367c..41ae0c1195 100644 --- a/async.c +++ b/async.c @@ -215,8 +215,3 @@ void aio_context_unref(AioContext *ctx) { g_source_unref(&ctx->source); } - -void aio_flush(AioContext *ctx) -{ - while (aio_poll(ctx, true)); -} diff --git a/block/commit.c b/block/commit.c index fae79582d4..e2bb1e241b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -103,7 +103,7 @@ static void coroutine_fn commit_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/mirror.c b/block/mirror.c index d6618a4b34..b1f5d4fa22 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -205,7 +205,7 @@ static void coroutine_fn mirror_run(void *opaque) } /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/stream.c b/block/stream.c index 0c0fc7a13b..0dcd286035 100644 --- a/block/stream.c +++ b/block/stream.c @@ -108,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/main-loop.c b/main-loop.c index c87624e621..7dba6f6e35 100644 --- a/main-loop.c +++ b/main-loop.c @@ -432,11 +432,6 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) return aio_bh_new(qemu_aio_context, cb, opaque); } -void qemu_aio_flush(void) -{ - aio_flush(qemu_aio_context); -} - bool qemu_aio_wait(void) { return aio_poll(qemu_aio_context, true); diff --git a/qemu-aio.h b/qemu-aio.h index 3889fe97a4..31884a8f16 100644 --- a/qemu-aio.h +++ b/qemu-aio.h @@ -162,10 +162,6 @@ void qemu_bh_cancel(QEMUBH *bh); */ void qemu_bh_delete(QEMUBH *bh); -/* Flush any pending AIO operation. This function will block until all - * outstanding AIO operations have been completed or cancelled. */ -void aio_flush(AioContext *ctx); - /* Return whether there are any pending callbacks from the GSource * attached to the AioContext. * @@ -196,7 +192,7 @@ typedef int (AioFlushHandler)(void *opaque); /* Register a file descriptor and associated callbacks. Behaves very similarly * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will - * be invoked when using either qemu_aio_wait() or qemu_aio_flush(). + * be invoked when using qemu_aio_wait(). * * Code that invokes AIO completion functions should rely on this function * instead of qemu_set_fd_handler[2]. @@ -211,7 +207,7 @@ void aio_set_fd_handler(AioContext *ctx, /* Register an event notifier and associated callbacks. Behaves very similarly * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks - * will be invoked when using either qemu_aio_wait() or qemu_aio_flush(). + * will be invoked when using qemu_aio_wait(). * * Code that invokes AIO completion functions should rely on this function * instead of event_notifier_set_handler. @@ -228,7 +224,6 @@ GSource *aio_get_g_source(AioContext *ctx); /* Functions to operate on the main QEMU AioContext. */ -void qemu_aio_flush(void); bool qemu_aio_wait(void); void qemu_aio_set_event_notifier(EventNotifier *notifier, EventNotifierHandler *io_read, From 7b272452398135e4f8e48341239705d03c82dae3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 12 Nov 2012 17:05:39 +0100 Subject: [PATCH 05/43] block: Factor out bdrv_open_flags Signed-off-by: Kevin Wolf --- block.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/block.c b/block.c index c7a1a3c85c..c4f5566b5d 100644 --- a/block.c +++ b/block.c @@ -634,6 +634,26 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) bs->copy_on_read--; } +static int bdrv_open_flags(BlockDriverState *bs, int flags) +{ + int open_flags = flags | BDRV_O_CACHE_WB; + + /* + * Clear flags that are internal to the block layer before opening the + * image. + */ + open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + + /* + * Snapshots should be writable. + */ + if (bs->is_temporary) { + open_flags |= BDRV_O_RDWR; + } + + return open_flags; +} + /* * Common part for opening disk images and files */ @@ -665,20 +685,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, bs->opaque = g_malloc0(drv->instance_size); bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); - open_flags = flags | BDRV_O_CACHE_WB; - - /* - * Clear flags that are internal to the block layer before opening the - * image. - */ - open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); - - /* - * Snapshots should be writable. - */ - if (bs->is_temporary) { - open_flags |= BDRV_O_RDWR; - } + open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); From f500a6d3c2b9ef0bb06d0080d91d8ed3c1d68f58 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 12 Nov 2012 17:35:27 +0100 Subject: [PATCH 06/43] block: Avoid second open for format probing This fixes problems that are caused by the additional open/close cycle of the existing format probing, for example related to qemu-nbd without -t option or file descriptor passing. Signed-off-by: Kevin Wolf --- block.c | 74 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/block.c b/block.c index c4f5566b5d..2ec3afebfe 100644 --- a/block.c +++ b/block.c @@ -518,22 +518,16 @@ BlockDriver *bdrv_find_protocol(const char *filename) return NULL; } -static int find_image_format(const char *filename, BlockDriver **pdrv) +static int find_image_format(BlockDriverState *bs, const char *filename, + BlockDriver **pdrv) { - int ret, score, score_max; + int score, score_max; BlockDriver *drv1, *drv; uint8_t buf[2048]; - BlockDriverState *bs; - - ret = bdrv_file_open(&bs, filename, 0); - if (ret < 0) { - *pdrv = NULL; - return ret; - } + int ret = 0; /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ if (bs->sg || !bdrv_is_inserted(bs)) { - bdrv_delete(bs); drv = bdrv_find_format("raw"); if (!drv) { ret = -ENOENT; @@ -543,7 +537,6 @@ static int find_image_format(const char *filename, BlockDriver **pdrv) } ret = bdrv_pread(bs, 0, buf, sizeof(buf)); - bdrv_delete(bs); if (ret < 0) { *pdrv = NULL; return ret; @@ -657,7 +650,8 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) /* * Common part for opening disk images and files */ -static int bdrv_open_common(BlockDriverState *bs, const char *filename, +static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, + const char *filename, int flags, BlockDriver *drv) { int ret, open_flags; @@ -691,12 +685,16 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, /* Open the image, either directly or using a protocol */ if (drv->bdrv_file_open) { - ret = drv->bdrv_file_open(bs, filename, open_flags); - } else { - ret = bdrv_file_open(&bs->file, filename, open_flags); - if (ret >= 0) { - ret = drv->bdrv_open(bs, open_flags); + if (file != NULL) { + bdrv_swap(file, bs); + ret = 0; + } else { + ret = drv->bdrv_file_open(bs, filename, open_flags); } + } else { + assert(file != NULL); + bs->file = file; + ret = drv->bdrv_open(bs, open_flags); } if (ret < 0) { @@ -716,10 +714,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, return 0; free_and_fail: - if (bs->file) { - bdrv_delete(bs->file); - bs->file = NULL; - } + bs->file = NULL; g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; @@ -741,7 +736,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) } bs = bdrv_new(""); - ret = bdrv_open_common(bs, filename, flags, drv); + ret = bdrv_open_common(bs, NULL, filename, flags, drv); if (ret < 0) { bdrv_delete(bs); return ret; @@ -796,6 +791,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, int ret; /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char tmp_filename[PATH_MAX + 1]; + BlockDriverState *file = NULL; if (flags & BDRV_O_SNAPSHOT) { BlockDriverState *bs1; @@ -855,25 +851,36 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, bs->is_temporary = 1; } - /* Find the right image format driver */ - if (!drv) { - ret = find_image_format(filename, &drv); - } - - if (!drv) { - goto unlink_and_fail; - } - + /* Open image file without format layer */ if (flags & BDRV_O_RDWR) { flags |= BDRV_O_ALLOW_RDWR; } + ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags)); + if (ret < 0) { + return ret; + } + + /* Find the right image format driver */ + if (!drv) { + ret = find_image_format(file, filename, &drv); + } + + if (!drv) { + goto unlink_and_fail; + } + /* Open the image */ - ret = bdrv_open_common(bs, filename, flags, drv); + ret = bdrv_open_common(bs, file, filename, flags, drv); if (ret < 0) { goto unlink_and_fail; } + if (bs->file != file) { + bdrv_delete(file); + file = NULL; + } + /* If there is a backing file, use it */ if ((flags & BDRV_O_NO_BACKING) == 0) { ret = bdrv_open_backing_file(bs); @@ -895,6 +902,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, return 0; unlink_and_fail: + if (file != NULL) { + bdrv_delete(file); + } if (bs->is_temporary) { unlink(filename); } From d567e62f98d5789ff4d273b924a0474931c71e8b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 15 Nov 2012 13:12:14 +1100 Subject: [PATCH 07/43] virtio-blk: Remove duplicate property definition For the virtio-blk device (via virtio-pci) the property "config-wce" is defined in two places. First, it's defined from the DEFINE_VIRTIO_BLK_FEATURES macro, second it's defined directly in virtio-pci, just two lines above the call to that macro. The direct definition in virtio-pci.c is broken, since it operates on the 'config_wce' field of VirtIOBlkConf, which is never used anywhere else. Therefore, this patch removes both the extra property definition and the redundant field it works on. Cc: Kevin Wolf Cc: Anthony Liguori Cc: Paul 'Rusty' Russell Signed-off-by: David Gibson Signed-off-by: Stefan Hajnoczi --- hw/virtio-blk.h | 1 - hw/virtio-pci.c | 1 - 2 files changed, 2 deletions(-) diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h index f0740d01af..651a000b9f 100644 --- a/hw/virtio-blk.h +++ b/hw/virtio-blk.h @@ -104,7 +104,6 @@ struct VirtIOBlkConf BlockConf conf; char *serial; uint32_t scsi; - uint32_t config_wce; }; #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \ diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 71f4fb5dc6..7684ac9a70 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -895,7 +895,6 @@ static Property virtio_blk_properties[] = { #ifdef __linux__ DEFINE_PROP_BIT("scsi", VirtIOPCIProxy, blk.scsi, 0, true), #endif - DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true), DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features), From 1fe1fa510aa3d4eb1fb4246d4951ef48e4c949c2 Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Fri, 2 Nov 2012 09:54:24 -0600 Subject: [PATCH 08/43] block: vpc initialize the uuid footer field Initialize the uuid field in the footer with a generated uuid. Signed-off-by: Charles Arnold Reviewed-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- block/vpc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/vpc.c b/block/vpc.c index b6bf52f140..f14c6ae196 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -26,6 +26,9 @@ #include "block_int.h" #include "module.h" #include "migration.h" +#if defined(CONFIG_UUID) +#include +#endif /**************************************************************/ @@ -739,7 +742,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) footer->type = be32_to_cpu(disk_type); - /* TODO uuid is missing */ +#if defined(CONFIG_UUID) + uuid_generate(footer->uuid); +#endif footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); From 258d2edbcd4bb5d267c96163333820332e1c14fa Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Tue, 30 Oct 2012 20:59:32 -0600 Subject: [PATCH 09/43] block: vpc support for ~2 TB disks The VHD specification allows for up to a 2 TB disk size. The current implementation in qemu emulates EIDE and ATA-2 hardware which only allows for up to 127 GB. This disk size limitation can be overridden by allowing up to 255 heads instead of the normal 4 bit limitation of 16. Doing so allows disk images to be created of up to nearly 2 TB. This change does not violate the VHD format specification nor does it change how smaller disks (ie, <=127GB) are defined. [Charles Arnold also writes: "In analyzing a 160 GB VHD fixed disk image created on Windows 2008 R2, it appears that MS is also ignoring the CHS values in the footer geometry field in whatever driver they use for accessing the image. The CHS values are set at 65535,16,255 which obviously doesn't represent an image size of 160 GB." -- Stefan] Signed-off-by: Charles Arnold Reviewed-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- block/vpc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index f14c6ae196..566e9a3b37 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -201,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags) bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - if (bs->total_sectors >= 65535 * 16 * 255) { + /* Allow a maximum disk size of approximately 2 TB */ + if (bs->total_sectors >= 65535LL * 255 * 255) { err = -EFBIG; goto fail; } @@ -527,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, * Note that the geometry doesn't always exactly match total_sectors but * may round it down. * - * Returns 0 on success, -EFBIG if the size is larger than 127 GB + * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override + * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) + * and instead allow up to 255 heads. */ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, uint8_t* heads, uint8_t* secs_per_cyl) { uint32_t cyls_times_heads; - if (total_sectors > 65535 * 16 * 255) + /* Allow a maximum disk size of approximately 2 TB */ + if (total_sectors > 65535LL * 255 * 255) { return -EFBIG; + } if (total_sectors > 65535 * 16 * 63) { *secs_per_cyl = 255; - *heads = 16; + if (total_sectors > 65535 * 16 * 255) { + *heads = 255; + } else { + *heads = 16; + } cyls_times_heads = total_sectors / *secs_per_cyl; } else { *secs_per_cyl = 17; From c208e8c2d88eea2bbafc2850d8856525637e495d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 2 Nov 2012 16:14:20 +0100 Subject: [PATCH 10/43] raw-posix: inline paio_ioctl into hdev_aio_ioctl clang now warns about an unused function: CC block/raw-posix.o block/raw-posix.c:707:26: warning: unused function paio_ioctl [-Wunused-function] static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, ^ 1 warning generated. because the only use of paio_ioctl() is inside a #if defined(__linux__) guard and it is static now. Reported-by: Peter Maydell Signed-off-by: Paolo Bonzini Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 550c81f22b..abfedbea73 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -708,22 +708,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } -static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); - - acb->bs = bs; - acb->aio_type = QEMU_AIO_IOCTL; - acb->aio_fildes = fd; - acb->aio_offset = 0; - acb->aio_ioctl_buf = buf; - acb->aio_ioctl_cmd = req; - - return thread_pool_submit_aio(aio_worker, acb, cb, opaque); -} - static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type) @@ -1346,10 +1330,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; + RawPosixAIOData *acb; if (fd_open(bs) < 0) return NULL; - return paio_ioctl(bs, s->fd, req, buf, cb, opaque); + + acb = g_slice_new(RawPosixAIOData); + acb->bs = bs; + acb->aio_type = QEMU_AIO_IOCTL; + acb->aio_fildes = s->fd; + acb->aio_offset = 0; + acb->aio_ioctl_buf = buf; + acb->aio_ioctl_cmd = req; + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) From 2d0d2837dcf786da415cf4165d37f4ddd684ff57 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 20 Nov 2012 15:30:34 +0100 Subject: [PATCH 11/43] Support default block interfaces per QEMUMachine There are QEMUMachines that have neither IF_IDE nor IF_SCSI as a default/standard interface to their block devices / drives. Therefore, this patch introduces a new field default_block_type per QEMUMachine struct. The prior use_scsi field becomes thereby obsolete and is replaced through .default_block_type = IF_SCSI. This patch also changes the default for s390x to IF_VIRTIO and removes an early hack that converts IF_IDE drives. Other parties have already claimed interest (e.g. IF_SD for exynos) To create a sane default, for machines that dont specify a default_block_type, this patch makes IF_IDE = 0 and IF_NONE = 1. I checked all users of IF_NONE (blockdev.c and ww/device-hotplug.c) as well as IF_IDE and it seems that it is ok to change the defines - in other words, I found no obvious (to me) assumption in the code regarding IF_NONE==0. IF_NONE is only set if there is an explicit if=none. Without if=* the interface becomes IF_DEFAULT. I would suggest to have some additional care, e.g. by letting this patch sit some days in the block tree. Based on an initial patch from Einar Lueck Signed-off-by: Christian Borntraeger CC: Igor Mitsyanko CC: Markus Armbruster CC: Kevin Wolf Reviewed-by: Alexander Graf Acked-by: Igor Mitsyanko Reviewed-by: Markus Armbruster Signed-off-by: Stefan Hajnoczi --- blockdev.c | 4 ++-- blockdev.h | 9 +++++++-- hw/boards.h | 3 ++- hw/device-hotplug.c | 2 +- hw/highbank.c | 2 +- hw/leon3.c | 1 - hw/mips_jazz.c | 4 ++-- hw/pc_sysfw.c | 2 +- hw/puv3.c | 1 - hw/realview.c | 6 +++--- hw/s390-virtio.c | 17 ++--------------- hw/spapr.c | 2 +- hw/sun4m.c | 24 ++++++++++++------------ hw/versatilepb.c | 4 ++-- hw/vexpress.c | 4 ++-- hw/xilinx_zynq.c | 2 +- vl.c | 21 ++++++++++++--------- 17 files changed, 51 insertions(+), 57 deletions(-) diff --git a/blockdev.c b/blockdev.c index e73fd6e388..4a4d99f223 100644 --- a/blockdev.c +++ b/blockdev.c @@ -275,7 +275,7 @@ static bool do_check_io_limits(BlockIOLimit *io_limits) return true; } -DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) +DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type) { const char *buf; const char *file = NULL; @@ -325,7 +325,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) return NULL; } } else { - type = default_to_scsi ? IF_SCSI : IF_IDE; + type = block_default_type; } max_devs = if_max_devs[type]; diff --git a/blockdev.h b/blockdev.h index 5f27b643be..d73d552a98 100644 --- a/blockdev.h +++ b/blockdev.h @@ -19,8 +19,13 @@ void blockdev_auto_del(BlockDriverState *bs); typedef enum { IF_DEFAULT = -1, /* for use with drive_add() only */ + /* + * IF_IDE must be zero, because we want QEMUMachine member + * block_default_type to default-initialize to IF_IDE + */ + IF_IDE = 0, IF_NONE, - IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN, + IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN, IF_COUNT } BlockInterfaceType; @@ -51,7 +56,7 @@ DriveInfo *drive_get_by_blockdev(BlockDriverState *bs); QemuOpts *drive_def(const char *optstr); QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, const char *optstr); -DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi); +DriveInfo *drive_init(QemuOpts *arg, BlockInterfaceType block_default_type); /* device-hotplug */ diff --git a/hw/boards.h b/hw/boards.h index 813d0e5109..c66fa16a9d 100644 --- a/hw/boards.h +++ b/hw/boards.h @@ -3,6 +3,7 @@ #ifndef HW_BOARDS_H #define HW_BOARDS_H +#include "blockdev.h" #include "qdev.h" typedef struct QEMUMachineInitArgs { @@ -24,7 +25,7 @@ typedef struct QEMUMachine { const char *desc; QEMUMachineInitFunc *init; QEMUMachineResetFunc *reset; - int use_scsi; + BlockInterfaceType block_default_type; int max_cpus; unsigned int no_serial:1, no_parallel:1, diff --git a/hw/device-hotplug.c b/hw/device-hotplug.c index 6d9c080381..839b9ea1d4 100644 --- a/hw/device-hotplug.c +++ b/hw/device-hotplug.c @@ -39,7 +39,7 @@ DriveInfo *add_init_drive(const char *optstr) if (!opts) return NULL; - dinfo = drive_init(opts, current_machine->use_scsi); + dinfo = drive_init(opts, current_machine->block_default_type); if (!dinfo) { qemu_opts_del(opts); return NULL; diff --git a/hw/highbank.c b/hw/highbank.c index afbb005422..e8e5bf0826 100644 --- a/hw/highbank.c +++ b/hw/highbank.c @@ -326,7 +326,7 @@ static QEMUMachine highbank_machine = { .name = "highbank", .desc = "Calxeda Highbank (ECX-1000)", .init = highbank_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; diff --git a/hw/leon3.c b/hw/leon3.c index 774273828f..ef83dffd85 100644 --- a/hw/leon3.c +++ b/hw/leon3.c @@ -212,7 +212,6 @@ static QEMUMachine leon3_generic_machine = { .name = "leon3_generic", .desc = "Leon-3 generic", .init = leon3_generic_hw_init, - .use_scsi = 0, }; static void leon3_machine_init(void) diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c index 0847427241..ea1416ae2f 100644 --- a/hw/mips_jazz.c +++ b/hw/mips_jazz.c @@ -324,14 +324,14 @@ static QEMUMachine mips_magnum_machine = { .name = "magnum", .desc = "MIPS Magnum", .init = mips_magnum_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine mips_pica61_machine = { .name = "pica61", .desc = "Acer Pica 61", .init = mips_pica61_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static void mips_jazz_machine_init(void) diff --git a/hw/pc_sysfw.c b/hw/pc_sysfw.c index 40bced2322..d7ea3a5595 100644 --- a/hw/pc_sysfw.c +++ b/hw/pc_sysfw.c @@ -98,7 +98,7 @@ static void pc_fw_add_pflash_drv(void) return; } - if (!drive_init(opts, machine->use_scsi)) { + if (!drive_init(opts, machine->block_default_type)) { qemu_opts_del(opts); } } diff --git a/hw/puv3.c b/hw/puv3.c index 764799cff4..3d7734936b 100644 --- a/hw/puv3.c +++ b/hw/puv3.c @@ -122,7 +122,6 @@ static QEMUMachine puv3_machine = { .desc = "PKUnity Version-3 based on UniCore32", .init = puv3_init, .is_default = 1, - .use_scsi = 0, }; static void puv3_machine_init(void) diff --git a/hw/realview.c b/hw/realview.c index e789c159a9..8ea4ad7423 100644 --- a/hw/realview.c +++ b/hw/realview.c @@ -364,14 +364,14 @@ static QEMUMachine realview_eb_machine = { .name = "realview-eb", .desc = "ARM RealView Emulation Baseboard (ARM926EJ-S)", .init = realview_eb_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine realview_eb_mpcore_machine = { .name = "realview-eb-mpcore", .desc = "ARM RealView Emulation Baseboard (ARM11MPCore)", .init = realview_eb_mpcore_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -385,7 +385,7 @@ static QEMUMachine realview_pbx_a9_machine = { .name = "realview-pbx-a9", .desc = "ARM RealView Platform Baseboard Explore for Cortex-A9", .init = realview_pbx_a9_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c index ca1bb09816..7aca0c4aad 100644 --- a/hw/s390-virtio.c +++ b/hw/s390-virtio.c @@ -314,21 +314,6 @@ static void s390_init(QEMUMachineInitArgs *args) qdev_set_nic_properties(dev, nd); qdev_init_nofail(dev); } - - /* Create VirtIO disk drives */ - for(i = 0; i < MAX_BLK_DEVS; i++) { - DriveInfo *dinfo; - DeviceState *dev; - - dinfo = drive_get(IF_IDE, 0, i); - if (!dinfo) { - continue; - } - - dev = qdev_create((BusState *)s390_bus, "virtio-blk-s390"); - qdev_prop_set_drive_nofail(dev, "drive", dinfo->bdrv); - qdev_init_nofail(dev); - } } static QEMUMachine s390_machine = { @@ -336,6 +321,7 @@ static QEMUMachine s390_machine = { .alias = "s390", .desc = "VirtIO based S390 machine", .init = s390_init, + .block_default_type = IF_VIRTIO, .no_cdrom = 1, .no_floppy = 1, .no_serial = 1, @@ -352,3 +338,4 @@ static void s390_machine_init(void) } machine_init(s390_machine_init); + diff --git a/hw/spapr.c b/hw/spapr.c index ad3f0ea7fc..d955f02a3b 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -924,9 +924,9 @@ static QEMUMachine spapr_machine = { .desc = "pSeries Logical Partition (PAPR compliant)", .init = ppc_spapr_init, .reset = ppc_spapr_reset, + .block_default_type = IF_SCSI, .max_cpus = MAX_CPUS, .no_parallel = 1, - .use_scsi = 1, }; static void spapr_machine_init(void) diff --git a/hw/sun4m.c b/hw/sun4m.c index 1a786762aa..52cf82b681 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -1426,7 +1426,7 @@ static QEMUMachine ss5_machine = { .name = "SS-5", .desc = "Sun4m platform, SPARCstation 5", .init = ss5_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .is_default = 1, }; @@ -1434,7 +1434,7 @@ static QEMUMachine ss10_machine = { .name = "SS-10", .desc = "Sun4m platform, SPARCstation 10", .init = ss10_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -1442,7 +1442,7 @@ static QEMUMachine ss600mp_machine = { .name = "SS-600MP", .desc = "Sun4m platform, SPARCserver 600MP", .init = ss600mp_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -1450,7 +1450,7 @@ static QEMUMachine ss20_machine = { .name = "SS-20", .desc = "Sun4m platform, SPARCstation 20", .init = ss20_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -1458,35 +1458,35 @@ static QEMUMachine voyager_machine = { .name = "Voyager", .desc = "Sun4m platform, SPARCstation Voyager", .init = vger_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine ss_lx_machine = { .name = "LX", .desc = "Sun4m platform, SPARCstation LX", .init = ss_lx_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine ss4_machine = { .name = "SS-4", .desc = "Sun4m platform, SPARCstation 4", .init = ss4_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine scls_machine = { .name = "SPARCClassic", .desc = "Sun4m platform, SPARCClassic", .init = scls_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine sbook_machine = { .name = "SPARCbook", .desc = "Sun4m platform, SPARCbook", .init = sbook_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static const struct sun4d_hwdef sun4d_hwdefs[] = { @@ -1709,7 +1709,7 @@ static QEMUMachine ss1000_machine = { .name = "SS-1000", .desc = "Sun4d platform, SPARCserver 1000", .init = ss1000_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 8, }; @@ -1717,7 +1717,7 @@ static QEMUMachine ss2000_machine = { .name = "SS-2000", .desc = "Sun4d platform, SPARCcenter 2000", .init = ss2000_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 20, }; @@ -1896,7 +1896,7 @@ static QEMUMachine ss2_machine = { .name = "SS-2", .desc = "Sun4c platform, SPARCstation 2", .init = ss2_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static void sun4m_register_types(void) diff --git a/hw/versatilepb.c b/hw/versatilepb.c index 25e652b1aa..4892c1d27a 100644 --- a/hw/versatilepb.c +++ b/hw/versatilepb.c @@ -358,14 +358,14 @@ static QEMUMachine versatilepb_machine = { .name = "versatilepb", .desc = "ARM Versatile/PB (ARM926EJ-S)", .init = vpb_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine versatileab_machine = { .name = "versatileab", .desc = "ARM Versatile/AB (ARM926EJ-S)", .init = vab_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static void versatile_machine_init(void) diff --git a/hw/vexpress.c b/hw/vexpress.c index d93f057bff..e89694c86e 100644 --- a/hw/vexpress.c +++ b/hw/vexpress.c @@ -477,7 +477,7 @@ static QEMUMachine vexpress_a9_machine = { .name = "vexpress-a9", .desc = "ARM Versatile Express for Cortex-A9", .init = vexpress_a9_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -485,7 +485,7 @@ static QEMUMachine vexpress_a15_machine = { .name = "vexpress-a15", .desc = "ARM Versatile Express for Cortex-A15", .init = vexpress_a15_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c index 1f12a3d1ad..a4a71c2497 100644 --- a/hw/xilinx_zynq.c +++ b/hw/xilinx_zynq.c @@ -200,7 +200,7 @@ static QEMUMachine zynq_machine = { .name = "xilinx-zynq-a9", .desc = "Xilinx Zynq Platform Baseboard for Cortex-A9", .init = zynq_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 1, .no_sdcard = 1 }; diff --git a/vl.c b/vl.c index a3ab3841a7..ee10d21c78 100644 --- a/vl.c +++ b/vl.c @@ -886,9 +886,9 @@ static int cleanup_add_fd(QemuOpts *opts, void *opaque) static int drive_init_func(QemuOpts *opts, void *opaque) { - int *use_scsi = opaque; + BlockInterfaceType *block_default_type = opaque; - return drive_init(opts, *use_scsi) == NULL; + return drive_init(opts, *block_default_type) == NULL; } static int drive_enable_snapshot(QemuOpts *opts, void *opaque) @@ -899,14 +899,15 @@ static int drive_enable_snapshot(QemuOpts *opts, void *opaque) return 0; } -static void default_drive(int enable, int snapshot, int use_scsi, +static void default_drive(int enable, int snapshot, + BlockInterfaceType block_default_type, BlockInterfaceType type, int index, const char *optstr) { QemuOpts *opts; if (type == IF_DEFAULT) { - type = use_scsi ? IF_SCSI : IF_IDE; + type = block_default_type; } if (!enable || drive_get_by_index(type, index)) { @@ -917,7 +918,7 @@ static void default_drive(int enable, int snapshot, int use_scsi, if (snapshot) { drive_enable_snapshot(opts, NULL); } - if (!drive_init(opts, use_scsi)) { + if (!drive_init(opts, type)) { exit(1); } } @@ -3770,14 +3771,16 @@ int main(int argc, char **argv, char **envp) /* open the virtual block devices */ if (snapshot) qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, NULL, 0); - if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0) + if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, + &machine->block_default_type, 1) != 0) { exit(1); + } - default_drive(default_cdrom, snapshot, machine->use_scsi, + default_drive(default_cdrom, snapshot, machine->block_default_type, IF_DEFAULT, 2, CDROM_OPTS); - default_drive(default_floppy, snapshot, machine->use_scsi, + default_drive(default_floppy, snapshot, machine->block_default_type, IF_FLOPPY, 0, FD_OPTS); - default_drive(default_sdcard, snapshot, machine->use_scsi, + default_drive(default_sdcard, snapshot, machine->block_default_type, IF_SD, 0, SD_OPTS); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); From 3c42ea66888f149d72d600bab63624b2d849e4bf Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 22 Nov 2012 21:02:55 +0100 Subject: [PATCH 12/43] block: simplify default_drive Markus Armbruster pointed out that there is only one caller to default_drive with IF_DEFAULT as a type. Lets get rid of the block_default_type parameter and adopt the caller to do the right thing (asking the machine struct). Signed-off-by: Christian Borntraeger Reviewed-by: Markus Armbruster Signed-off-by: Stefan Hajnoczi --- vl.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/vl.c b/vl.c index ee10d21c78..6b3827cf06 100644 --- a/vl.c +++ b/vl.c @@ -899,17 +899,11 @@ static int drive_enable_snapshot(QemuOpts *opts, void *opaque) return 0; } -static void default_drive(int enable, int snapshot, - BlockInterfaceType block_default_type, - BlockInterfaceType type, int index, - const char *optstr) +static void default_drive(int enable, int snapshot, BlockInterfaceType type, + int index, const char *optstr) { QemuOpts *opts; - if (type == IF_DEFAULT) { - type = block_default_type; - } - if (!enable || drive_get_by_index(type, index)) { return; } @@ -3776,12 +3770,10 @@ int main(int argc, char **argv, char **envp) exit(1); } - default_drive(default_cdrom, snapshot, machine->block_default_type, - IF_DEFAULT, 2, CDROM_OPTS); - default_drive(default_floppy, snapshot, machine->block_default_type, - IF_FLOPPY, 0, FD_OPTS); - default_drive(default_sdcard, snapshot, machine->block_default_type, - IF_SD, 0, SD_OPTS); + default_drive(default_cdrom, snapshot, machine->block_default_type, 2, + CDROM_OPTS); + default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); + default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); From 71c79813d83b5b45ba934cf995436063da458f66 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 30 Nov 2012 10:52:04 -0200 Subject: [PATCH 13/43] block: bdrv_img_create(): add Error ** argument This commit adds an Error ** argument to bdrv_img_create() and set it appropriately on error. Callers of bdrv_img_create() pass NULL for the new argument and still rely on bdrv_img_create()'s return value. Next commits will change callers to use the Error object instead. Signed-off-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- block.c | 22 +++++++++++++++++++++- block.h | 2 +- blockdev.c | 6 +++--- qemu-img.c | 2 +- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 2ec3afebfe..066bd66db0 100644 --- a/block.c +++ b/block.c @@ -4444,7 +4444,7 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) int bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags) + char *options, uint64_t img_size, int flags, Error **errp) { QEMUOptionParameter *param = NULL, *create_options = NULL; QEMUOptionParameter *backing_fmt, *backing_file, *size; @@ -4457,6 +4457,7 @@ int bdrv_img_create(const char *filename, const char *fmt, drv = bdrv_find_format(fmt); if (!drv) { error_report("Unknown file format '%s'", fmt); + error_setg(errp, "Unknown file format '%s'", fmt); ret = -EINVAL; goto out; } @@ -4464,6 +4465,7 @@ int bdrv_img_create(const char *filename, const char *fmt, proto_drv = bdrv_find_protocol(filename); if (!proto_drv) { error_report("Unknown protocol '%s'", filename); + error_setg(errp, "Unknown protocol '%s'", filename); ret = -EINVAL; goto out; } @@ -4483,6 +4485,7 @@ int bdrv_img_create(const char *filename, const char *fmt, param = parse_option_parameters(options, create_options, param); if (param == NULL) { error_report("Invalid options for file format '%s'.", fmt); + error_setg(errp, "Invalid options for file format '%s'.", fmt); ret = -EINVAL; goto out; } @@ -4493,6 +4496,8 @@ int bdrv_img_create(const char *filename, const char *fmt, base_filename)) { error_report("Backing file not supported for file format '%s'", fmt); + error_setg(errp, "Backing file not supported for file format '%s'", + fmt); ret = -EINVAL; goto out; } @@ -4502,6 +4507,8 @@ int bdrv_img_create(const char *filename, const char *fmt, if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { error_report("Backing file format not supported for file " "format '%s'", fmt); + error_setg(errp, "Backing file format not supported for file " + "format '%s'", fmt); ret = -EINVAL; goto out; } @@ -4512,6 +4519,8 @@ int bdrv_img_create(const char *filename, const char *fmt, if (!strcmp(filename, backing_file->value.s)) { error_report("Error: Trying to create an image with the " "same filename as the backing file"); + error_setg(errp, "Error: Trying to create an image with the " + "same filename as the backing file"); ret = -EINVAL; goto out; } @@ -4523,6 +4532,8 @@ int bdrv_img_create(const char *filename, const char *fmt, if (!backing_drv) { error_report("Unknown backing file format '%s'", backing_fmt->value.s); + error_setg(errp, "Unknown backing file format '%s'", + backing_fmt->value.s); ret = -EINVAL; goto out; } @@ -4546,6 +4557,8 @@ int bdrv_img_create(const char *filename, const char *fmt, ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv); if (ret < 0) { error_report("Could not open '%s'", backing_file->value.s); + error_setg_errno(errp, -ret, "Could not open '%s'", + backing_file->value.s); goto out; } bdrv_get_geometry(bs, &size); @@ -4555,6 +4568,7 @@ int bdrv_img_create(const char *filename, const char *fmt, set_option_parameter(param, BLOCK_OPT_SIZE, buf); } else { error_report("Image creation needs a size parameter"); + error_setg(errp, "Image creation needs a size parameter"); ret = -EINVAL; goto out; } @@ -4570,12 +4584,18 @@ int bdrv_img_create(const char *filename, const char *fmt, if (ret == -ENOTSUP) { error_report("Formatting or formatting option not supported for " "file format '%s'", fmt); + error_setg(errp,"Formatting or formatting option not supported for " + "file format '%s'", fmt); } else if (ret == -EFBIG) { error_report("The image size is too large for file format '%s'", fmt); + error_setg(errp, "The image size is too large for file format '%s'", + fmt); } else { error_report("%s: error while creating %s: %s", filename, fmt, strerror(-ret)); + error_setg(errp, "%s: error while creating %s: %s", filename, fmt, + strerror(-ret)); } } diff --git a/block.h b/block.h index 722c620590..ff54d15c86 100644 --- a/block.h +++ b/block.h @@ -345,7 +345,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags); + char *options, uint64_t img_size, int flags, Error **errp); void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); diff --git a/blockdev.c b/blockdev.c index 4a4d99f223..500f09116a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -789,7 +789,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) ret = bdrv_img_create(new_image_file, format, states->old_bs->filename, states->old_bs->drv->format_name, - NULL, -1, flags); + NULL, -1, flags, NULL); if (ret) { error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); goto delete_and_fail; @@ -1264,7 +1264,7 @@ void qmp_drive_mirror(const char *device, const char *target, bdrv_get_geometry(bs, &size); size *= 512; ret = bdrv_img_create(target, format, - NULL, NULL, NULL, size, flags); + NULL, NULL, NULL, size, flags, NULL); } else { switch (mode) { case NEW_IMAGE_MODE_EXISTING: @@ -1275,7 +1275,7 @@ void qmp_drive_mirror(const char *device, const char *target, ret = bdrv_img_create(target, format, source->filename, source->drv->format_name, - NULL, -1, flags); + NULL, -1, flags, NULL); break; default: abort(); diff --git a/qemu-img.c b/qemu-img.c index e29e01b729..3896689060 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -362,7 +362,7 @@ static int img_create(int argc, char **argv) } ret = bdrv_img_create(filename, fmt, base_filename, base_fmt, - options, img_size, BDRV_O_FLAGS); + options, img_size, BDRV_O_FLAGS, NULL); out: if (ret) { return 1; From 9b37525a7dbc4f5eef0023fc92716259a3d94612 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 30 Nov 2012 10:52:05 -0200 Subject: [PATCH 14/43] qemu-img: img_create(): pass Error object to bdrv_img_create() Signed-off-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- qemu-img.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 3896689060..595b6f5136 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -301,6 +301,7 @@ static int img_create(int argc, char **argv) const char *filename; const char *base_filename = NULL; char *options = NULL; + Error *local_err = NULL; for(;;) { c = getopt(argc, argv, "F:b:f:he6o:"); @@ -361,8 +362,14 @@ static int img_create(int argc, char **argv) goto out; } - ret = bdrv_img_create(filename, fmt, base_filename, base_fmt, - options, img_size, BDRV_O_FLAGS, NULL); + bdrv_img_create(filename, fmt, base_filename, base_fmt, + options, img_size, BDRV_O_FLAGS, &local_err); + if (error_is_set(&local_err)) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); + ret = -1; + } + out: if (ret) { return 1; From a930091189cedcc0023dd38f705e2a46e530f4a4 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 30 Nov 2012 10:52:06 -0200 Subject: [PATCH 15/43] qemu-img: img_create(): drop unneeded goto and ret variable Signed-off-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- qemu-img.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 595b6f5136..c4dae88e84 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -294,7 +294,7 @@ static int add_old_style_options(const char *fmt, QEMUOptionParameter *list, static int img_create(int argc, char **argv) { - int c, ret = 0; + int c; uint64_t img_size = -1; const char *fmt = "raw"; const char *base_fmt = NULL; @@ -351,15 +351,13 @@ static int img_create(int argc, char **argv) error_report("Invalid image size specified! You may use k, M, G or " "T suffixes for "); error_report("kilobytes, megabytes, gigabytes and terabytes."); - ret = -1; - goto out; + return 1; } img_size = (uint64_t)sval; } if (options && is_help_option(options)) { - ret = print_block_option_help(filename, fmt); - goto out; + return print_block_option_help(filename, fmt); } bdrv_img_create(filename, fmt, base_filename, base_fmt, @@ -367,13 +365,9 @@ static int img_create(int argc, char **argv) if (error_is_set(&local_err)) { error_report("%s", error_get_pretty(local_err)); error_free(local_err); - ret = -1; - } - -out: - if (ret) { return 1; } + return 0; } From 43e17041156ddecac8a7500648e71287ba270c0a Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 30 Nov 2012 10:52:07 -0200 Subject: [PATCH 16/43] qmp: qmp_transaction(): pass Error object to bdrv_img_create() Signed-off-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- blockdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/blockdev.c b/blockdev.c index 500f09116a..6fb336294c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -707,6 +707,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) int ret = 0; BlockdevActionList *dev_entry = dev_list; BlkTransactionStates *states, *next; + Error *local_err = NULL; QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); @@ -786,12 +787,12 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* create new image w/backing file */ if (mode != NEW_IMAGE_MODE_EXISTING) { - ret = bdrv_img_create(new_image_file, format, - states->old_bs->filename, - states->old_bs->drv->format_name, - NULL, -1, flags, NULL); - if (ret) { - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + bdrv_img_create(new_image_file, format, + states->old_bs->filename, + states->old_bs->drv->format_name, + NULL, -1, flags, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); goto delete_and_fail; } } From cf8f2426c55245f437a91f2fdabbed4ea24e7786 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 30 Nov 2012 10:52:08 -0200 Subject: [PATCH 17/43] qmp: qmp_drive_mirror(): pass Error object to bdrv_img_create() Signed-off-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- blockdev.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/blockdev.c b/blockdev.c index 6fb336294c..463f4c2094 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1264,8 +1264,8 @@ void qmp_drive_mirror(const char *device, const char *target, assert(format && drv); bdrv_get_geometry(bs, &size); size *= 512; - ret = bdrv_img_create(target, format, - NULL, NULL, NULL, size, flags, NULL); + bdrv_img_create(target, format, + NULL, NULL, NULL, size, flags, &local_err); } else { switch (mode) { case NEW_IMAGE_MODE_EXISTING: @@ -1273,18 +1273,18 @@ void qmp_drive_mirror(const char *device, const char *target, break; case NEW_IMAGE_MODE_ABSOLUTE_PATHS: /* create new image with backing file */ - ret = bdrv_img_create(target, format, - source->filename, - source->drv->format_name, - NULL, -1, flags, NULL); + bdrv_img_create(target, format, + source->filename, + source->drv->format_name, + NULL, -1, flags, &local_err); break; default: abort(); } } - if (ret) { - error_set(errp, QERR_OPEN_FILE_FAILED, target); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); return; } From d92ada2202a0730e396304908ff7b870168387d2 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 30 Nov 2012 10:52:09 -0200 Subject: [PATCH 18/43] block: bdrv_img_create(): drop unused error handling code Signed-off-by: Luiz Capitulino Signed-off-by: Kevin Wolf --- block.c | 40 +++++----------------------------------- block.h | 6 +++--- 2 files changed, 8 insertions(+), 38 deletions(-) diff --git a/block.c b/block.c index 066bd66db0..b3faf3a463 100644 --- a/block.c +++ b/block.c @@ -4442,9 +4442,9 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; } -int bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags, Error **errp) +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, Error **errp) { QEMUOptionParameter *param = NULL, *create_options = NULL; QEMUOptionParameter *backing_fmt, *backing_file, *size; @@ -4456,18 +4456,14 @@ int bdrv_img_create(const char *filename, const char *fmt, /* Find driver and parse its options */ drv = bdrv_find_format(fmt); if (!drv) { - error_report("Unknown file format '%s'", fmt); error_setg(errp, "Unknown file format '%s'", fmt); - ret = -EINVAL; - goto out; + return; } proto_drv = bdrv_find_protocol(filename); if (!proto_drv) { - error_report("Unknown protocol '%s'", filename); error_setg(errp, "Unknown protocol '%s'", filename); - ret = -EINVAL; - goto out; + return; } create_options = append_option_parameters(create_options, @@ -4484,9 +4480,7 @@ int bdrv_img_create(const char *filename, const char *fmt, if (options) { param = parse_option_parameters(options, create_options, param); if (param == NULL) { - error_report("Invalid options for file format '%s'.", fmt); error_setg(errp, "Invalid options for file format '%s'.", fmt); - ret = -EINVAL; goto out; } } @@ -4494,22 +4488,16 @@ int bdrv_img_create(const char *filename, const char *fmt, if (base_filename) { if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, base_filename)) { - error_report("Backing file not supported for file format '%s'", - fmt); error_setg(errp, "Backing file not supported for file format '%s'", fmt); - ret = -EINVAL; goto out; } } if (base_fmt) { if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { - error_report("Backing file format not supported for file " - "format '%s'", fmt); error_setg(errp, "Backing file format not supported for file " "format '%s'", fmt); - ret = -EINVAL; goto out; } } @@ -4517,11 +4505,8 @@ int bdrv_img_create(const char *filename, const char *fmt, backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); if (backing_file && backing_file->value.s) { if (!strcmp(filename, backing_file->value.s)) { - error_report("Error: Trying to create an image with the " - "same filename as the backing file"); error_setg(errp, "Error: Trying to create an image with the " "same filename as the backing file"); - ret = -EINVAL; goto out; } } @@ -4530,11 +4515,8 @@ int bdrv_img_create(const char *filename, const char *fmt, if (backing_fmt && backing_fmt->value.s) { backing_drv = bdrv_find_format(backing_fmt->value.s); if (!backing_drv) { - error_report("Unknown backing file format '%s'", - backing_fmt->value.s); error_setg(errp, "Unknown backing file format '%s'", backing_fmt->value.s); - ret = -EINVAL; goto out; } } @@ -4556,7 +4538,6 @@ int bdrv_img_create(const char *filename, const char *fmt, ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv); if (ret < 0) { - error_report("Could not open '%s'", backing_file->value.s); error_setg_errno(errp, -ret, "Could not open '%s'", backing_file->value.s); goto out; @@ -4567,9 +4548,7 @@ int bdrv_img_create(const char *filename, const char *fmt, snprintf(buf, sizeof(buf), "%" PRId64, size); set_option_parameter(param, BLOCK_OPT_SIZE, buf); } else { - error_report("Image creation needs a size parameter"); error_setg(errp, "Image creation needs a size parameter"); - ret = -EINVAL; goto out; } } @@ -4579,21 +4558,14 @@ int bdrv_img_create(const char *filename, const char *fmt, puts(""); ret = bdrv_create(drv, filename, param); - if (ret < 0) { if (ret == -ENOTSUP) { - error_report("Formatting or formatting option not supported for " - "file format '%s'", fmt); error_setg(errp,"Formatting or formatting option not supported for " "file format '%s'", fmt); } else if (ret == -EFBIG) { - error_report("The image size is too large for file format '%s'", - fmt); error_setg(errp, "The image size is too large for file format '%s'", fmt); } else { - error_report("%s: error while creating %s: %s", filename, fmt, - strerror(-ret)); error_setg(errp, "%s: error while creating %s: %s", filename, fmt, strerror(-ret)); } @@ -4606,6 +4578,4 @@ out: if (bs) { bdrv_delete(bs); } - - return ret; } diff --git a/block.h b/block.h index ff54d15c86..24bea09530 100644 --- a/block.h +++ b/block.h @@ -343,9 +343,9 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size); -int bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags, Error **errp); +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, Error **errp); void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); From 23e956bfe6af6f71046772478ed08d4e5c9c62d4 Mon Sep 17 00:00:00 2001 From: Corey Bryant Date: Wed, 14 Nov 2012 17:53:16 -0500 Subject: [PATCH 19/43] tests: Add tests for fdsets Signed-off-by: Corey Bryant Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/045 | 129 ++++++++++++++++++++++++++++++++++ tests/qemu-iotests/045.out | 5 ++ tests/qemu-iotests/group | 1 + tests/qemu-iotests/iotests.py | 12 ++++ 4 files changed, 147 insertions(+) create mode 100755 tests/qemu-iotests/045 create mode 100644 tests/qemu-iotests/045.out diff --git a/tests/qemu-iotests/045 b/tests/qemu-iotests/045 new file mode 100755 index 0000000000..2b6f1af27a --- /dev/null +++ b/tests/qemu-iotests/045 @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# +# Tests for fdsets. +# +# Copyright (C) 2012 IBM Corp. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import os +import iotests +from iotests import qemu_img + +image0 = os.path.join(iotests.test_dir, 'image0') +image1 = os.path.join(iotests.test_dir, 'image1') +image2 = os.path.join(iotests.test_dir, 'image2') +image3 = os.path.join(iotests.test_dir, 'image3') +image4 = os.path.join(iotests.test_dir, 'image4') + +class TestFdSets(iotests.QMPTestCase): + + def setUp(self): + self.vm = iotests.VM() + qemu_img('create', '-f', iotests.imgfmt, image0, '128K') + qemu_img('create', '-f', iotests.imgfmt, image1, '128K') + qemu_img('create', '-f', iotests.imgfmt, image2, '128K') + qemu_img('create', '-f', iotests.imgfmt, image3, '128K') + qemu_img('create', '-f', iotests.imgfmt, image4, '128K') + self.file0 = open(image0, 'r') + self.file1 = open(image1, 'w+') + self.file2 = open(image2, 'r') + self.file3 = open(image3, 'r') + self.file4 = open(image4, 'r') + self.vm.add_fd(self.file0.fileno(), 1, 'image0:r') + self.vm.add_fd(self.file1.fileno(), 1, 'image1:w+') + self.vm.add_fd(self.file2.fileno(), 0, 'image2:r') + self.vm.add_fd(self.file3.fileno(), 2, 'image3:r') + self.vm.add_fd(self.file4.fileno(), 2, 'image4:r') + self.vm.add_drive("/dev/fdset/1") + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + self.file0.close() + self.file1.close() + self.file2.close() + self.file3.close() + self.file4.close() + os.remove(image0) + os.remove(image1) + os.remove(image2) + os.remove(image3) + os.remove(image4) + + def test_query_fdset(self): + result = self.vm.qmp('query-fdsets') + self.assert_qmp(result, 'return[0]/fdset-id', 2) + self.assert_qmp(result, 'return[1]/fdset-id', 1) + self.assert_qmp(result, 'return[2]/fdset-id', 0) + self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image3:r') + self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image4:r') + self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r') + self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+') + self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r') + self.vm.shutdown() + + def test_remove_fdset(self): + result = self.vm.qmp('remove-fd', fdset_id=2) + self.assert_qmp(result, 'return', {}) + result = self.vm.qmp('query-fdsets') + self.assert_qmp(result, 'return[0]/fdset-id', 1) + self.assert_qmp(result, 'return[1]/fdset-id', 0) + self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image0:r') + self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image1:w+') + self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image2:r') + self.vm.shutdown() + + def test_remove_fd(self): + result = self.vm.qmp('query-fdsets') + fd_image3 = result['return'][0]['fds'][0]['fd'] + result = self.vm.qmp('remove-fd', fdset_id=2, fd=fd_image3) + self.assert_qmp(result, 'return', {}) + result = self.vm.qmp('query-fdsets') + self.assert_qmp(result, 'return[0]/fdset-id', 2) + self.assert_qmp(result, 'return[1]/fdset-id', 1) + self.assert_qmp(result, 'return[2]/fdset-id', 0) + self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image4:r') + self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r') + self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+') + self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r') + self.vm.shutdown() + + def test_remove_fd_invalid_fdset(self): + result = self.vm.qmp('query-fdsets') + fd_image3 = result['return'][0]['fds'][0]['fd'] + result = self.vm.qmp('remove-fd', fdset_id=3, fd=fd_image3) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'File descriptor named \'fdset-id:3, fd:%d\' not found' % fd_image3) + self.vm.shutdown() + + def test_remove_fd_invalid_fd(self): + result = self.vm.qmp('query-fdsets') + result = self.vm.qmp('remove-fd', fdset_id=2, fd=999) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'File descriptor named \'fdset-id:2, fd:999\' not found') + self.vm.shutdown() + + def test_add_fd_invalid_fd(self): + result = self.vm.qmp('add-fd', fdset_id=2) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'No file descriptor supplied via SCM_RIGHTS') + self.vm.shutdown() + +if __name__ == '__main__': + iotests.main(supported_fmts=['raw']) diff --git a/tests/qemu-iotests/045.out b/tests/qemu-iotests/045.out new file mode 100644 index 0000000000..3f8a935a08 --- /dev/null +++ b/tests/qemu-iotests/045.out @@ -0,0 +1,5 @@ +...... +---------------------------------------------------------------------- +Ran 6 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a4a9044f24..5b39785461 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -51,3 +51,4 @@ 042 rw auto quick 043 rw auto backing 044 rw auto +045 rw auto diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 0be5c7e13f..569ca3d804 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -79,6 +79,18 @@ class VM(object): self._num_drives += 1 return self + def add_fd(self, fd, fdset, opaque, opts=''): + '''Pass a file descriptor to the VM''' + options = ['fd=%d' % fd, + 'set=%d' % fdset, + 'opaque=%s' % opaque] + if opts: + options.append(opts) + + self._args.append('-add-fd') + self._args.append(','.join(options)) + return self + def launch(self): '''Launch the VM and establish a QMP connection''' devnull = open('/dev/null', 'rb') From 791bfa35ee00ca10b13bedfb048ffda385b151c7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Dec 2012 16:35:12 +0100 Subject: [PATCH 20/43] qemu-io: Implement write -c for compressed clusters This makes it easier to create images with both compressed and uncompressed clusters for testing. Signed-off-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- qemu-io.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/qemu-io.c b/qemu-io.c index 92cdb2ab9c..b4b0898741 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -265,6 +265,18 @@ static int do_co_write_zeroes(int64_t offset, int count, int *total) } } +static int do_write_compressed(char *buf, int64_t offset, int count, int *total) +{ + int ret; + + ret = bdrv_write_compressed(bs, offset >> 9, (uint8_t *)buf, count >> 9); + if (ret < 0) { + return ret; + } + *total = count; + return 1; +} + static int do_load_vmstate(char *buf, int64_t offset, int count, int *total) { *total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count); @@ -687,6 +699,7 @@ static void write_help(void) " Writes into a segment of the currently open file, using a buffer\n" " filled with a set pattern (0xcdcdcdcd).\n" " -b, -- write to the VM state rather than the virtual disk\n" +" -c, -- write compressed data with bdrv_write_compressed\n" " -p, -- use bdrv_pwrite to write the file\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" @@ -703,7 +716,7 @@ static const cmdinfo_t write_cmd = { .cfunc = write_f, .argmin = 2, .argmax = -1, - .args = "[-bCpqz] [-P pattern ] off len", + .args = "[-bcCpqz] [-P pattern ] off len", .oneline = "writes a number of bytes at a specified offset", .help = write_help, }; @@ -712,6 +725,7 @@ static int write_f(int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0; + int cflag = 0; int c, cnt; char *buf = NULL; int64_t offset; @@ -720,11 +734,14 @@ static int write_f(int argc, char **argv) int total = 0; int pattern = 0xcd; - while ((c = getopt(argc, argv, "bCpP:qz")) != EOF) { + while ((c = getopt(argc, argv, "bcCpP:qz")) != EOF) { switch (c) { case 'b': bflag = 1; break; + case 'c': + cflag = 1; + break; case 'C': Cflag = 1; break; @@ -801,6 +818,8 @@ static int write_f(int argc, char **argv) cnt = do_save_vmstate(buf, offset, count, &total); } else if (zflag) { cnt = do_co_write_zeroes(offset, count, &total); + } else if (cflag) { + cnt = do_write_compressed(buf, offset, count, &total); } else { cnt = do_write(buf, offset, count, &total); } From 473c7f0255920bcaf37411990a3725898772817f Mon Sep 17 00:00:00 2001 From: Stefan Priebe Date: Fri, 30 Nov 2012 09:55:46 +0100 Subject: [PATCH 21/43] rbd: Fix race between aio completition and aio cancel This one fixes a race which qemu had also in iscsi block driver between cancellation and io completition. qemu_rbd_aio_cancel was not synchronously waiting for the end of the command. To archieve this it introduces a new status flag which uses -EINPROGRESS. Signed-off-by: Stefan Priebe Reviewed-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/rbd.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index f3becc7a8b..737bab16cc 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -77,6 +77,7 @@ typedef struct RBDAIOCB { int error; struct BDRVRBDState *s; int cancelled; + int status; } RBDAIOCB; typedef struct RADOSCB { @@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) RBDAIOCB *acb = rcb->acb; int64_t r; - if (acb->cancelled) { - qemu_vfree(acb->bounce); - qemu_aio_release(acb); - goto done; - } - r = rcb->ret; if (acb->cmd == RBD_AIO_WRITE || @@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) /* Note that acb->bh can be NULL in case where the aio was cancelled */ acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); qemu_bh_schedule(acb->bh); -done: g_free(rcb); } @@ -568,6 +562,12 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) { RBDAIOCB *acb = (RBDAIOCB *) blockacb; acb->cancelled = 1; + + while (acb->status == -EINPROGRESS) { + qemu_aio_wait(); + } + + qemu_aio_release(acb); } static const AIOCBInfo rbd_aiocb_info = { @@ -639,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque) acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); qemu_bh_delete(acb->bh); acb->bh = NULL; + acb->status = 0; - qemu_aio_release(acb); + if (!acb->cancelled) { + qemu_aio_release(acb); + } } static int rbd_aio_discard_wrapper(rbd_image_t image, @@ -685,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->s = s; acb->cancelled = 0; acb->bh = NULL; + acb->status = -EINPROGRESS; if (cmd == RBD_AIO_WRITE) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); From fbcad04d6bfdff937536eb23088a01a280a1a3af Mon Sep 17 00:00:00 2001 From: Fabien Chouteau Date: Mon, 10 Dec 2012 12:56:22 +0100 Subject: [PATCH 22/43] Fix error code checking for SetFilePointer() call An error has occurred if the return value is invalid_set_file_pointer and getlasterror doesn't return no_error. Signed-off-by: Fabien Chouteau Acked-by: Stefan Hajnoczi --- block/raw-win32.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/block/raw-win32.c b/block/raw-win32.c index 0c05c58c5a..ce207a3109 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -303,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset) { BDRVRawState *s = bs->opaque; LONG low, high; + DWORD dwPtrLow; low = offset; high = offset >> 32; - if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN)) - return -EIO; - if (!SetEndOfFile(s->hfile)) + + /* + * An error has occurred if the return value is INVALID_SET_FILE_POINTER + * and GetLastError doesn't return NO_ERROR. + */ + dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN); + if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + fprintf(stderr, "SetFilePointer error: %d\n", GetLastError()); return -EIO; + } + if (SetEndOfFile(s->hfile) == 0) { + fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError()); + return -EIO; + } return 0; } From c474ced8fe6684265fbb6a3183eb0cbea561409f Mon Sep 17 00:00:00 2001 From: Dong Xu Wang Date: Thu, 6 Dec 2012 14:47:18 +0800 Subject: [PATCH 23/43] qemu-option: opt_set(): split it up into more functions The new functions are opts_accepts_any() and find_desc_by_name(), which are also going to be used by qemu_opts_validate() (see next commit). This also makes opt_set() slightly more readable. Signed-off-by: Luiz Capitulino Signed-off-by: Dong Xu Wang Signed-off-by: Kevin Wolf --- qemu-option.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/qemu-option.c b/qemu-option.c index 27891e74e7..375daaa4ad 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -602,26 +602,36 @@ static void qemu_opt_del(QemuOpt *opt) g_free(opt); } -static void opt_set(QemuOpts *opts, const char *name, const char *value, - bool prepend, Error **errp) +static bool opts_accepts_any(const QemuOpts *opts) +{ + return opts->list->desc[0].name == NULL; +} + +static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc, + const char *name) { - QemuOpt *opt; - const QemuOptDesc *desc = opts->list->desc; - Error *local_err = NULL; int i; for (i = 0; desc[i].name != NULL; i++) { if (strcmp(desc[i].name, name) == 0) { - break; + return &desc[i]; } } - if (desc[i].name == NULL) { - if (i == 0) { - /* empty list -> allow any */; - } else { - error_set(errp, QERR_INVALID_PARAMETER, name); - return; - } + + return NULL; +} + +static void opt_set(QemuOpts *opts, const char *name, const char *value, + bool prepend, Error **errp) +{ + QemuOpt *opt; + const QemuOptDesc *desc; + Error *local_err = NULL; + + desc = find_desc_by_name(opts->list->desc, name); + if (!desc && !opts_accepts_any(opts)) { + error_set(errp, QERR_INVALID_PARAMETER, name); + return; } opt = g_malloc0(sizeof(*opt)); @@ -632,9 +642,7 @@ static void opt_set(QemuOpts *opts, const char *name, const char *value, } else { QTAILQ_INSERT_TAIL(&opts->head, opt, next); } - if (desc[i].name != NULL) { - opt->desc = desc+i; - } + opt->desc = desc; if (value) { opt->str = g_strdup(value); } From db97ceba1e17db59188e91b66e61bf84a6a71081 Mon Sep 17 00:00:00 2001 From: Dong Xu Wang Date: Thu, 6 Dec 2012 14:47:19 +0800 Subject: [PATCH 24/43] qemu-option: qemu_opts_validate(): fix duplicated code Use opts_accepts_any() and find_desc_by_name(). Signed-off-by: Luiz Capitulino Signed-off-by: Dong Xu Wang Signed-off-by: Kevin Wolf --- qemu-option.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/qemu-option.c b/qemu-option.c index 375daaa4ad..74321bbc13 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -1076,23 +1076,15 @@ void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp) QemuOpt *opt; Error *local_err = NULL; - assert(opts->list->desc[0].name == NULL); + assert(opts_accepts_any(opts)); QTAILQ_FOREACH(opt, &opts->head, next) { - int i; - - for (i = 0; desc[i].name != NULL; i++) { - if (strcmp(desc[i].name, opt->name) == 0) { - break; - } - } - if (desc[i].name == NULL) { + opt->desc = find_desc_by_name(desc, opt->name); + if (!opt->desc) { error_set(errp, QERR_INVALID_PARAMETER, opt->name); return; } - opt->desc = &desc[i]; - qemu_opt_parse(opt, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); From ad718d01ba0af531d10b0a8685cf5047edfd1891 Mon Sep 17 00:00:00 2001 From: Dong Xu Wang Date: Thu, 6 Dec 2012 14:47:20 +0800 Subject: [PATCH 25/43] qemu-option: qemu_opt_set_bool(): fix code duplication It will set opt->str in qemu_opt_set_bool, without opt->str, there will be some potential bugs. These are uses of opt->str, and what happens when it isn't set: * qemu_opt_get(): returns NULL, which means "not set". Bug can bite when value isn't the default value. * qemu_opt_parse(): passes NULL to parse_option_bool(), which treats it like "on". Wrong if the value is actually false. Bug can bite when qemu_opts_validate() runs after qemu_opt_set_bool(). * qemu_opt_del(): passes NULL to g_free(), which is just fine. * qemu_opt_foreach(): passes NULL to the callback, which is unlikely to be prepared for it. * qemu_opts_print(): prints NULL, which crashes on some systems. * qemu_opts_to_qdict(): passes NULL to qstring_from_str(), which crashes. It also makes qemu_opt_set_bool more readable by using find_desc_by_name and opts_accepts_any. It is based on Luiz's patch and uses Markus's comments. Discussions can be found at: http://lists.nongnu.org/archive/html/qemu-devel/2012-07/msg02716.html Signed-off-by: Dong Xu Wang Signed-off-by: Kevin Wolf --- qemu-option.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/qemu-option.c b/qemu-option.c index 74321bbc13..e0131ce7b1 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -677,30 +677,21 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val) { QemuOpt *opt; const QemuOptDesc *desc = opts->list->desc; - int i; - - for (i = 0; desc[i].name != NULL; i++) { - if (strcmp(desc[i].name, name) == 0) { - break; - } - } - if (desc[i].name == NULL) { - if (i == 0) { - /* empty list -> allow any */; - } else { - qerror_report(QERR_INVALID_PARAMETER, name); - return -1; - } - } opt = g_malloc0(sizeof(*opt)); + opt->desc = find_desc_by_name(desc, name); + if (!opt->desc && !opts_accepts_any(opts)) { + qerror_report(QERR_INVALID_PARAMETER, name); + g_free(opt); + return -1; + } + opt->name = g_strdup(name); opt->opts = opts; - QTAILQ_INSERT_TAIL(&opts->head, opt, next); - if (desc[i].name != NULL) { - opt->desc = desc+i; - } opt->value.boolean = !!val; + opt->str = g_strdup(val ? "on" : "off"); + QTAILQ_INSERT_TAIL(&opts->head, opt, next); + return 0; } From dd39244978627e41a66b98d20eceddb1d7d25def Mon Sep 17 00:00:00 2001 From: Dong Xu Wang Date: Thu, 6 Dec 2012 14:47:21 +0800 Subject: [PATCH 26/43] introduce qemu_opts_create_nofail function While id is NULL, qemu_opts_create can not fail, so ignore errors is fine. Signed-off-by: Dong Xu Wang Signed-off-by: Kevin Wolf --- qemu-option.c | 9 +++++++++ qemu-option.h | 1 + 2 files changed, 10 insertions(+) diff --git a/qemu-option.c b/qemu-option.c index e0131ce7b1..1303188dbd 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -780,6 +780,15 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, return opts; } +QemuOpts *qemu_opts_create_nofail(QemuOptsList *list) +{ + QemuOpts *opts; + Error *errp = NULL; + opts = qemu_opts_create(list, NULL, 0, &errp); + assert_no_error(errp); + return opts; +} + void qemu_opts_reset(QemuOptsList *list) { QemuOpts *opts, *next_opts; diff --git a/qemu-option.h b/qemu-option.h index ca729862d5..b0f8d1ecd6 100644 --- a/qemu-option.h +++ b/qemu-option.h @@ -133,6 +133,7 @@ int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id); QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, int fail_if_exists, Error **errp); +QemuOpts *qemu_opts_create_nofail(QemuOptsList *list); void qemu_opts_reset(QemuOptsList *list); void qemu_opts_loc_restore(QemuOpts *opts); int qemu_opts_set(QemuOptsList *list, const char *id, From e478b448d7c36046462733ffaeaea0961575790a Mon Sep 17 00:00:00 2001 From: Dong Xu Wang Date: Thu, 6 Dec 2012 14:47:22 +0800 Subject: [PATCH 27/43] use qemu_opts_create_nofail We will use qemu_opts_create_nofail function, it can make code more readable. Signed-off-by: Dong Xu Wang Signed-off-by: Kevin Wolf --- blockdev.c | 2 +- hw/watchdog.c | 2 +- qemu-config.c | 4 ++-- qemu-img.c | 2 +- qemu-sockets.c | 16 ++++++++-------- vl.c | 12 +++++------- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/blockdev.c b/blockdev.c index 463f4c2094..9a05e57009 100644 --- a/blockdev.c +++ b/blockdev.c @@ -568,7 +568,7 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type) break; case IF_VIRTIO: /* add virtio block device */ - opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL); + opts = qemu_opts_create_nofail(qemu_find_opts("device")); if (arch_type == QEMU_ARCH_S390X) { qemu_opt_set(opts, "driver", "virtio-blk-s390"); } else { diff --git a/hw/watchdog.c b/hw/watchdog.c index b52acedd98..5c82c17d09 100644 --- a/hw/watchdog.c +++ b/hw/watchdog.c @@ -66,7 +66,7 @@ int select_watchdog(const char *p) QLIST_FOREACH(model, &watchdog_list, entry) { if (strcasecmp(model->wdt_name, p) == 0) { /* add the device */ - opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL); + opts = qemu_opts_create_nofail(qemu_find_opts("device")); qemu_opt_set(opts, "driver", p); return 0; } diff --git a/qemu-config.c b/qemu-config.c index aa78fb9ea7..54db9813e8 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -756,7 +756,7 @@ int qemu_global_option(const char *str) return -1; } - opts = qemu_opts_create(&qemu_global_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&qemu_global_opts); qemu_opt_set(opts, "driver", driver); qemu_opt_set(opts, "property", property); qemu_opt_set(opts, "value", str+offset+1); @@ -843,7 +843,7 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname) error_free(local_err); goto out; } - opts = qemu_opts_create(list, NULL, 0, NULL); + opts = qemu_opts_create_nofail(list); continue; } if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2) { diff --git a/qemu-img.c b/qemu-img.c index c4dae88e84..c989a52564 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1934,7 +1934,7 @@ static int img_resize(int argc, char **argv) } /* Parse size */ - param = qemu_opts_create(&resize_options, NULL, 0, NULL); + param = qemu_opts_create_nofail(&resize_options); if (qemu_opt_set(param, BLOCK_OPT_SIZE, size)) { /* Error message already printed when size parsing fails */ ret = -1; diff --git a/qemu-sockets.c b/qemu-sockets.c index d314cf1d1b..c52a40a411 100644 --- a/qemu-sockets.c +++ b/qemu-sockets.c @@ -579,7 +579,7 @@ int inet_listen(const char *str, char *ostr, int olen, addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); inet_addr_to_opts(opts, addr); qapi_free_InetSocketAddress(addr); sock = inet_listen_opts(opts, port_offset, errp); @@ -618,7 +618,7 @@ int inet_connect(const char *str, Error **errp) addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); inet_addr_to_opts(opts, addr); qapi_free_InetSocketAddress(addr); sock = inet_connect_opts(opts, errp, NULL, NULL); @@ -652,7 +652,7 @@ int inet_nonblocking_connect(const char *str, addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); inet_addr_to_opts(opts, addr); qapi_free_InetSocketAddress(addr); sock = inet_connect_opts(opts, errp, callback, opaque); @@ -795,7 +795,7 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp) char *path, *optstr; int sock, len; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); optstr = strchr(str, ','); if (optstr) { @@ -823,7 +823,7 @@ int unix_connect(const char *path, Error **errp) QemuOpts *opts; int sock; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); qemu_opt_set(opts, "path", path); sock = unix_connect_opts(opts, errp, NULL, NULL); qemu_opts_del(opts); @@ -840,7 +840,7 @@ int unix_nonblocking_connect(const char *path, g_assert(callback != NULL); - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); qemu_opt_set(opts, "path", path); sock = unix_connect_opts(opts, errp, callback, opaque); qemu_opts_del(opts); @@ -891,7 +891,7 @@ int socket_connect(SocketAddress *addr, Error **errp, QemuOpts *opts; int fd; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); switch (addr->kind) { case SOCKET_ADDRESS_KIND_INET: inet_addr_to_opts(opts, addr->inet); @@ -922,7 +922,7 @@ int socket_listen(SocketAddress *addr, Error **errp) QemuOpts *opts; int fd; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); switch (addr->kind) { case SOCKET_ADDRESS_KIND_INET: inet_addr_to_opts(opts, addr->inet); diff --git a/vl.c b/vl.c index 6b3827cf06..3ebf01f8f1 100644 --- a/vl.c +++ b/vl.c @@ -1996,7 +1996,7 @@ static int balloon_parse(const char *arg) return -1; } else { /* create empty opts */ - opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL); + opts = qemu_opts_create_nofail(qemu_find_opts("device")); } qemu_opt_set(opts, "driver", "virtio-balloon"); return 0; @@ -2246,14 +2246,14 @@ static int virtcon_parse(const char *devname) exit(1); } - bus_opts = qemu_opts_create(device, NULL, 0, NULL); + bus_opts = qemu_opts_create_nofail(device); if (arch_type == QEMU_ARCH_S390X) { qemu_opt_set(bus_opts, "driver", "virtio-serial-s390"); } else { qemu_opt_set(bus_opts, "driver", "virtio-serial-pci"); } - dev_opts = qemu_opts_create(device, NULL, 0, NULL); + dev_opts = qemu_opts_create_nofail(device); qemu_opt_set(dev_opts, "driver", "virtconsole"); snprintf(label, sizeof(label), "virtcon%d", index); @@ -3105,8 +3105,7 @@ int main(int argc, char **argv, char **envp) qemu_opt_set_bool(fsdev, "readonly", qemu_opt_get_bool(opts, "readonly", 0)); - device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, - NULL); + device = qemu_opts_create_nofail(qemu_find_opts("device")); qemu_opt_set(device, "driver", "virtio-9p-pci"); qemu_opt_set(device, "fsdev", qemu_opt_get(opts, "mount_tag")); @@ -3126,8 +3125,7 @@ int main(int argc, char **argv, char **envp) } qemu_opt_set(fsdev, "fsdriver", "synth"); - device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, - NULL); + device = qemu_opts_create_nofail(qemu_find_opts("device")); qemu_opt_set(device, "driver", "virtio-9p-pci"); qemu_opt_set(device, "fsdev", "v_synth"); qemu_opt_set(device, "mount_tag", "v_synth"); From b83c18e225cf82a21535561270b6dfd86b1c9031 Mon Sep 17 00:00:00 2001 From: Dong Xu Wang Date: Thu, 6 Dec 2012 14:47:23 +0800 Subject: [PATCH 28/43] create new function: qemu_opt_set_number Signed-off-by: Dong Xu Wang Signed-off-by: Kevin Wolf --- qemu-option.c | 22 ++++++++++++++++++++++ qemu-option.h | 1 + 2 files changed, 23 insertions(+) diff --git a/qemu-option.c b/qemu-option.c index 1303188dbd..94557cfde7 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -695,6 +695,28 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val) return 0; } +int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val) +{ + QemuOpt *opt; + const QemuOptDesc *desc = opts->list->desc; + + opt = g_malloc0(sizeof(*opt)); + opt->desc = find_desc_by_name(desc, name); + if (!opt->desc && !opts_accepts_any(opts)) { + qerror_report(QERR_INVALID_PARAMETER, name); + g_free(opt); + return -1; + } + + opt->name = g_strdup(name); + opt->opts = opts; + opt->value.uint = val; + opt->str = g_strdup_printf("%" PRId64, val); + QTAILQ_INSERT_TAIL(&opts->head, opt, next); + + return 0; +} + int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, int abort_on_failure) { diff --git a/qemu-option.h b/qemu-option.h index b0f8d1ecd6..002dd07ee5 100644 --- a/qemu-option.h +++ b/qemu-option.h @@ -126,6 +126,7 @@ int qemu_opt_set(QemuOpts *opts, const char *name, const char *value); void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value, Error **errp); int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val); +int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val); typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque); int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, int abort_on_failure); From 312a2ba0eb8ab19646517aeaa785475d3fbcfd51 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 6 Dec 2012 14:32:55 +0100 Subject: [PATCH 29/43] blkdebug: Allow usage without config file As soon as new rules can be set during runtime, as introduced by the next patch, blkdebug makes sense even without a config file. Signed-off-by: Kevin Wolf --- block/blkdebug.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/block/blkdebug.c b/block/blkdebug.c index d61ece86a9..c9041ec3d2 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -240,6 +240,11 @@ static int read_config(BDRVBlkdebugState *s, const char *filename) int ret; struct add_rule_data d; + /* Allow usage without config file */ + if (!*filename) { + return 0; + } + f = fopen(filename, "r"); if (f == NULL) { return -errno; From 9e35542b0fc3871caac15ccd57548b99df2c94b7 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 6 Dec 2012 14:32:56 +0100 Subject: [PATCH 30/43] blkdebug: Factor out remove_rule() The cleanup work to remove a rule depends on the type of the rule. It's easy for the existing rules as there is no data that must be cleaned up and is specific to a type yet, but the next patch will change this. Signed-off-by: Kevin Wolf --- block/blkdebug.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index c9041ec3d2..859792b647 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -234,6 +234,18 @@ static int add_rule(QemuOpts *opts, void *opaque) return 0; } +static void remove_rule(BlkdebugRule *rule) +{ + switch (rule->action) { + case ACTION_INJECT_ERROR: + case ACTION_SET_STATE: + break; + } + + QLIST_REMOVE(rule, next); + g_free(rule); +} + static int read_config(BDRVBlkdebugState *s, const char *filename) { FILE *f; @@ -402,8 +414,7 @@ static void blkdebug_close(BlockDriverState *bs) for (i = 0; i < BLKDBG_EVENT_MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { - QLIST_REMOVE(rule, next); - g_free(rule); + remove_rule(rule); } } } From 3c90c65d7adab49a41952ee14e1d65f81355e408 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 6 Dec 2012 14:32:57 +0100 Subject: [PATCH 31/43] blkdebug: Implement suspend/resume of AIO requests This allows more systematic AIO testing. The patch adds three new operations to blkdebug: * Setting a "breakpoint" on a blkdebug event. The next request that triggers this breakpoint is suspended and is tagged with a name. The breakpoint is removed after a request has triggered it. * A suspended request (identified by it's tag) can be resumed * It's possible to check whether a suspended request with a given tag exists. This can be used for waiting for an event. Ideally, we would instead tag requests right when they are created and set breakpoints for individual requests. However, at this point the block layer doesn't allow this easily, and breakpoints that trigger for any request already allow a lot of useful testing. Signed-off-by: Kevin Wolf --- block/blkdebug.c | 108 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 3 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 859792b647..294e983306 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -29,8 +29,10 @@ typedef struct BDRVBlkdebugState { int state; int new_state; + QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; + QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; } BDRVBlkdebugState; typedef struct BlkdebugAIOCB { @@ -39,6 +41,12 @@ typedef struct BlkdebugAIOCB { int ret; } BlkdebugAIOCB; +typedef struct BlkdebugSuspendedReq { + Coroutine *co; + char *tag; + QLIST_ENTRY(BlkdebugSuspendedReq) next; +} BlkdebugSuspendedReq; + static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb); static const AIOCBInfo blkdebug_aiocb_info = { @@ -49,6 +57,7 @@ static const AIOCBInfo blkdebug_aiocb_info = { enum { ACTION_INJECT_ERROR, ACTION_SET_STATE, + ACTION_SUSPEND, }; typedef struct BlkdebugRule { @@ -65,6 +74,9 @@ typedef struct BlkdebugRule { struct { int new_state; } set_state; + struct { + char *tag; + } suspend; } options; QLIST_ENTRY(BlkdebugRule) next; QSIMPLEQ_ENTRY(BlkdebugRule) active_next; @@ -226,6 +238,11 @@ static int add_rule(QemuOpts *opts, void *opaque) rule->options.set_state.new_state = qemu_opt_get_number(opts, "new_state", 0); break; + + case ACTION_SUSPEND: + rule->options.suspend.tag = + g_strdup(qemu_opt_get(opts, "tag")); + break; }; /* Add the rule */ @@ -240,6 +257,9 @@ static void remove_rule(BlkdebugRule *rule) case ACTION_INJECT_ERROR: case ACTION_SET_STATE: break; + case ACTION_SUSPEND: + g_free(rule->options.suspend.tag); + break; } QLIST_REMOVE(rule, next); @@ -406,6 +426,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs, return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque); } + static void blkdebug_close(BlockDriverState *bs) { BDRVBlkdebugState *s = bs->opaque; @@ -419,6 +440,27 @@ static void blkdebug_close(BlockDriverState *bs) } } +static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq r; + + r = (BlkdebugSuspendedReq) { + .co = qemu_coroutine_self(), + .tag = g_strdup(rule->options.suspend.tag), + }; + + remove_rule(rule); + QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); + + printf("blkdebug: Suspended request '%s'\n", r.tag); + qemu_coroutine_yield(); + printf("blkdebug: Resuming request '%s'\n", r.tag); + + QLIST_REMOVE(&r, next); + g_free(r.tag); +} + static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, bool injected) { @@ -442,6 +484,10 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, case ACTION_SET_STATE: s->new_state = rule->options.set_state.new_state; break; + + case ACTION_SUSPEND: + suspend_request(bs, rule); + break; } return injected; } @@ -449,19 +495,72 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; - struct BlkdebugRule *rule; + struct BlkdebugRule *rule, *next; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; s->new_state = s->state; - QLIST_FOREACH(rule, &s->rules[event], next) { + QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { injected = process_rule(bs, rule, injected); } s->state = s->new_state; } +static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + struct BlkdebugRule *rule; + BlkDebugEvent blkdebug_event; + + if (get_event_by_name(event, &blkdebug_event) < 0) { + return -ENOENT; + } + + + rule = g_malloc(sizeof(*rule)); + *rule = (struct BlkdebugRule) { + .event = blkdebug_event, + .action = ACTION_SUSPEND, + .state = 0, + .options.suspend.tag = g_strdup(tag), + }; + + QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next); + + return 0; +} + +static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co, NULL); + return 0; + } + } + return -ENOENT; +} + + +static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + return true; + } + } + return false; +} + static int64_t blkdebug_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); @@ -480,7 +579,10 @@ static BlockDriver bdrv_blkdebug = { .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev = blkdebug_aio_writev, - .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_resume = blkdebug_debug_resume, + .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, }; static void bdrv_blkdebug_init(void) From 41c695c749b84d40e53e64faadedc0392aaea07e Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 6 Dec 2012 14:32:58 +0100 Subject: [PATCH 32/43] qemu-io: Add AIO debugging commands This makes the blkdebug suspend/resume functionality available in qemu-io. Use it like this: $ ./qemu-io blkdebug::/tmp/test.qcow2 qemu-io> break write_aio req_a qemu-io> aio_write 0 4k qemu-io> blkdebug: Suspended request 'req_a' qemu-io> resume req_a blkdebug: Resuming request 'req_a' qemu-io> wrote 4096/4096 bytes at offset 0 4 KiB, 1 ops; 0:00:30.71 (133.359788 bytes/sec and 0.0326 ops/sec) Signed-off-by: Kevin Wolf --- block.c | 39 ++++++++++++++++++++++++++++++++ block.h | 5 +++++ block_int.h | 6 +++++ qemu-io.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 114 insertions(+) diff --git a/block.c b/block.c index b3faf3a463..0668c4be17 100644 --- a/block.c +++ b/block.c @@ -3045,7 +3045,46 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) } drv->bdrv_debug_event(bs, event); +} +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { + return bs->drv->bdrv_debug_breakpoint(bs, event, tag); + } + + return -ENOTSUP; +} + +int bdrv_debug_resume(BlockDriverState *bs, const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_resume) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_resume) { + return bs->drv->bdrv_debug_resume(bs, tag); + } + + return -ENOTSUP; +} + +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { + return bs->drv->bdrv_debug_is_suspended(bs, tag); + } + + return false; } /**************************************************************/ diff --git a/block.h b/block.h index 24bea09530..893448a5fc 100644 --- a/block.h +++ b/block.h @@ -431,4 +431,9 @@ typedef enum { #define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt) void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + #endif diff --git a/block_int.h b/block_int.h index 9deedb811a..bf3f79b3db 100644 --- a/block_int.h +++ b/block_int.h @@ -190,6 +190,12 @@ struct BlockDriver { void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + /* * Returns 1 if newly created images are guaranteed to contain only * zeros, 0 otherwise. diff --git a/qemu-io.c b/qemu-io.c index b4b0898741..1637773302 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -1671,6 +1671,67 @@ static const cmdinfo_t map_cmd = { .oneline = "prints the allocated areas of a file", }; +static int break_f(int argc, char **argv) +{ + int ret; + + ret = bdrv_debug_breakpoint(bs, argv[1], argv[2]); + if (ret < 0) { + printf("Could not set breakpoint: %s\n", strerror(-ret)); + } + + return 0; +} + +static const cmdinfo_t break_cmd = { + .name = "break", + .argmin = 2, + .argmax = 2, + .cfunc = break_f, + .args = "event tag", + .oneline = "sets a breakpoint on event and tags the stopped " + "request as tag", +}; + +static int resume_f(int argc, char **argv) +{ + int ret; + + ret = bdrv_debug_resume(bs, argv[1]); + if (ret < 0) { + printf("Could not resume request: %s\n", strerror(-ret)); + } + + return 0; +} + +static const cmdinfo_t resume_cmd = { + .name = "resume", + .argmin = 1, + .argmax = 1, + .cfunc = resume_f, + .args = "tag", + .oneline = "resumes the request tagged as tag", +}; + +static int wait_break_f(int argc, char **argv) +{ + while (!bdrv_debug_is_suspended(bs, argv[1])) { + qemu_aio_wait(); + } + + return 0; +} + +static const cmdinfo_t wait_break_cmd = { + .name = "wait_break", + .argmin = 1, + .argmax = 1, + .cfunc = wait_break_f, + .args = "tag", + .oneline = "waits for the suspension of a request", +}; + static int abort_f(int argc, char **argv) { abort(); @@ -1934,6 +1995,9 @@ int main(int argc, char **argv) add_command(&discard_cmd); add_command(&alloc_cmd); add_command(&map_cmd); + add_command(&break_cmd); + add_command(&resume_cmd); + add_command(&wait_break_cmd); add_command(&abort_cmd); add_args_command(init_args_command); From 67a7a0ebe5ef0f337d5f7e7e618b08c562a55da0 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 6 Dec 2012 14:32:59 +0100 Subject: [PATCH 33/43] qcow2: Move BLKDBG_EVENT out of the lock We want to use these events to suspend requests for testing concurrent AIO requests. Suspending requests while they are holding the CoMutex is rather boring for this purpose. Signed-off-by: Kevin Wolf --- block/qcow2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index c1ff31f482..0a08ec74c7 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -835,8 +835,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, cur_nr_sectors * 512); } - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, From 91d4093dce58e343e2336324794daa93517b86c2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 6 Dec 2012 14:33:00 +0100 Subject: [PATCH 34/43] qemu-iotests: Test concurrent cluster allocations This adds some first tests for qcow2's dependency handling when two parallel write requests access the same cluster. Signed-off-by: Kevin Wolf --- tests/qemu-iotests/046 | 215 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/046.out | 163 ++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 379 insertions(+) create mode 100755 tests/qemu-iotests/046 create mode 100644 tests/qemu-iotests/046.out diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046 new file mode 100755 index 0000000000..e0176f42df --- /dev/null +++ b/tests/qemu-iotests/046 @@ -0,0 +1,215 @@ +#!/bin/bash +# +# Test concurrent cluster allocations +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +CLUSTER_SIZE=64k +size=128M + +echo +echo "== creating backing file for COW tests ==" + +_make_test_img $size + +function backing_io() +{ + local offset=$1 + local sectors=$2 + local op=$3 + local pattern=0 + local cur_sec=0 + + for i in $(seq 0 $((sectors - 1))); do + cur_sec=$((offset / 65536 + i)) + pattern=$(( ( (cur_sec % 128) + (cur_sec / 128)) % 128 )) + + echo "$op -P $pattern $((cur_sec * 64))k 64k" + done +} + +backing_io 0 16 write | $QEMU_IO $TEST_IMG | _filter_qemu_io + +mv $TEST_IMG $TEST_IMG.base + +_make_test_img -b $TEST_IMG.base 6G + +echo +echo "== Some concurrent requests touching the same cluster ==" + +function overlay_io() +{ +# Allocate middle of cluster 1, then write to somewhere before and after it +cat < wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 196608 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 262144 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 327680 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 393216 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 458752 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 524288 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 589824 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 655360 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 720896 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 786432 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 851968 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 917504 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 983040 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944 backing_file='TEST_DIR/t.IMGFMT.base' + +== Some concurrent requests touching the same cluster == +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 32768/32768 bytes at offset XXX +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 57344/57344 bytes at offset XXX +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 4096/4096 bytes at offset XXX +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 32768/32768 bytes at offset XXX +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 57344/57344 bytes at offset XXX +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 4096/4096 bytes at offset XXX +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 57344/57344 bytes at offset XXX +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> +== Verify image content == +qemu-io> read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 65536 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 73728 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 81920 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 98304 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 106496 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 114688 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 122880 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 131072 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 163840 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 65536/65536 bytes at offset 172032 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 237568 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 262144 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 294912 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 303104 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 65536/65536 bytes at offset 311296 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 376832 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 393216 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 417792 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 425984 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 57344/57344 bytes at offset 434176 +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 491520 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 516096 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 524288 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 548864 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 557056 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 57344/57344 bytes at offset 565248 +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 622592 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 647168 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 655360 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 679936 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 688128 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 57344/57344 bytes at offset 696320 +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 753664 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 5b39785461..a0307de06b 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -52,3 +52,4 @@ 043 rw auto backing 044 rw auto 045 rw auto +046 rw auto aio From a7f3d65b65b8c86a5ff0c0abcfefb45e2ec6fe4c Mon Sep 17 00:00:00 2001 From: Pavel Hrdina Date: Tue, 11 Dec 2012 08:55:48 +0100 Subject: [PATCH 35/43] atapi: reset cdrom tray statuses on ide_reset Tray statuses should be also reseted. Some guests may lock the tray and after reset before any kernel is loaded the tray should be unlocked. Also if you reset the real computer the tray is closed. We should do the same in qemu. Signed-off-by: Pavel Hrdina Signed-off-by: Kevin Wolf --- hw/ide/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/ide/core.c b/hw/ide/core.c index c4f93d0e47..1235612d95 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1869,6 +1869,8 @@ static void ide_reset(IDEState *s) s->io_buffer_index = 0; s->cd_sector_size = 0; s->atapi_dma = 0; + s->tray_locked = 0; + s->tray_open = 0; /* ATA DMA state */ s->io_buffer_size = 0; s->req_nb_sectors = 0; From 1d3afd649bc77aa14bc2741e2da6475822d41c5f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:42 +0100 Subject: [PATCH 36/43] qcow2: Round QCowL2Meta.offset down to cluster boundary The offset within the cluster is already present as n_start and this is what the code uses. QCowL2Meta.offset is only needed at a cluster granularity. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 4 ++-- block/qcow2.h | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e179211c57..d17a37c2fa 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -631,7 +631,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); /* copy content of unmodified sectors */ - start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; + start_sect = m->offset >> 9; if (m->n_start) { cow = true; qemu_co_mutex_unlock(&s->lock); @@ -966,7 +966,7 @@ again: .cluster_offset = keep_clusters == 0 ? alloc_cluster_offset : cluster_offset, .alloc_offset = alloc_cluster_offset, - .offset = alloc_offset, + .offset = alloc_offset & ~(s->cluster_size - 1), .n_start = keep_clusters == 0 ? n_start : 0, .nb_clusters = nb_clusters, .nb_available = MIN(requested_sectors, avail_sectors), diff --git a/block/qcow2.h b/block/qcow2.h index b4eb65470e..2a406a7e83 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -199,12 +199,34 @@ struct QCowAIOCB; /* XXX This could be private for qcow2-cluster.c */ typedef struct QCowL2Meta { + /** Guest offset of the first newly allocated cluster */ uint64_t offset; + + /** Host offset of the first cluster of the request */ uint64_t cluster_offset; + + /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; + + /** + * Number of sectors between the start of the first allocated cluster and + * the area that the guest actually writes to. + */ int n_start; + + /** + * Number of sectors from the start of the first allocated cluster to + * the end of the (possibly shortened) request + */ int nb_available; + + /** Number of newly allocated clusters */ int nb_clusters; + + /** + * Requests that overlap with this allocation and wait to be restarted + * when the allocating request has completed. + */ CoQueue dependent_requests; QLIST_ENTRY(QCowL2Meta) next_in_flight; From 593fb83cacf3818a5623f31a6c04c24d87519ad0 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:43 +0100 Subject: [PATCH 37/43] qcow2: Introduce Qcow2COWRegion This makes it easier to address the areas for which a COW must be performed. As a nice side effect, the COW code in qcow2_alloc_cluster_link_l2 becomes really trivial. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 83 +++++++++++++++++++++++++++---------------- block/qcow2.h | 29 +++++++++++---- 2 files changed, 76 insertions(+), 36 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index d17a37c2fa..94b7f136fa 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -615,13 +615,41 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) +{ + BDRVQcowState *s = bs->opaque; + int ret; + + if (r->nb_sectors == 0) { + return 0; + } + + qemu_co_mutex_unlock(&s->lock); + ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, + r->offset / BDRV_SECTOR_SIZE, + r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); + qemu_co_mutex_lock(&s->lock); + + if (ret < 0) { + return ret; + } + + /* + * Before we update the L2 table to actually point to the new cluster, we + * need to be sure that the refcounts have been increased and COW was + * handled. + */ + qcow2_cache_depends_on_flush(s->l2_table_cache); + + return 0; +} + int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcowState *s = bs->opaque; int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, *l2_table; + uint64_t *old_cluster, *l2_table; uint64_t cluster_offset = m->alloc_offset; - bool cow = false; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); @@ -631,36 +659,17 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); /* copy content of unmodified sectors */ - start_sect = m->offset >> 9; - if (m->n_start) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; + ret = perform_cow(bs, m, &m->cow_start); + if (ret < 0) { + goto err; } - if (m->nb_available & (s->cluster_sectors - 1)) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available, - align_offset(m->nb_available, s->cluster_sectors)); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; + ret = perform_cow(bs, m, &m->cow_end); + if (ret < 0) { + goto err; } - /* - * Update L2 table. - * - * Before we update the L2 table to actually point to the new cluster, we - * need to be sure that the refcounts have been increased and COW was - * handled. - */ - if (cow) { - qcow2_cache_depends_on_flush(s->l2_table_cache); - } + /* Update L2 table. */ if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s->l2_table_cache, @@ -957,19 +966,33 @@ again: * * avail_sectors: Number of sectors from the start of the first * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the aread that the write + * request actually writes to (excluding COW at the end) */ int requested_sectors = n_end - keep_clusters * s->cluster_sectors; int avail_sectors = nb_clusters << (s->cluster_bits - BDRV_SECTOR_BITS); + int alloc_n_start = keep_clusters == 0 ? n_start : 0; + int nb_sectors = MIN(requested_sectors, avail_sectors); *m = (QCowL2Meta) { .cluster_offset = keep_clusters == 0 ? alloc_cluster_offset : cluster_offset, .alloc_offset = alloc_cluster_offset, .offset = alloc_offset & ~(s->cluster_size - 1), - .n_start = keep_clusters == 0 ? n_start : 0, .nb_clusters = nb_clusters, - .nb_available = MIN(requested_sectors, avail_sectors), + .nb_available = nb_sectors, + + .cow_start = { + .offset = 0, + .nb_sectors = alloc_n_start, + }, + .cow_end = { + .offset = nb_sectors * BDRV_SECTOR_SIZE, + .nb_sectors = avail_sectors - nb_sectors, + }, }; qemu_co_queue_init(&m->dependent_requests); QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); diff --git a/block/qcow2.h b/block/qcow2.h index 2a406a7e83..1106b33206 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -196,6 +196,17 @@ typedef struct QCowCreateState { struct QCowAIOCB; +typedef struct Qcow2COWRegion { + /** + * Offset of the COW region in bytes from the start of the first cluster + * touched by the request. + */ + uint64_t offset; + + /** Number of sectors to copy */ + int nb_sectors; +} Qcow2COWRegion; + /* XXX This could be private for qcow2-cluster.c */ typedef struct QCowL2Meta { @@ -208,12 +219,6 @@ typedef struct QCowL2Meta /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; - /** - * Number of sectors between the start of the first allocated cluster and - * the area that the guest actually writes to. - */ - int n_start; - /** * Number of sectors from the start of the first allocated cluster to * the end of the (possibly shortened) request @@ -229,6 +234,18 @@ typedef struct QCowL2Meta */ CoQueue dependent_requests; + /** + * The COW Region between the start of the first allocated cluster and the + * area the guest actually writes to. + */ + Qcow2COWRegion cow_start; + + /** + * The COW Region between the area the guest actually writes to and the + * end of the last allocated cluster. + */ + Qcow2COWRegion cow_end; + QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; From cf5c1a231ee99ac21fe8258faf50bb1f65884343 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:44 +0100 Subject: [PATCH 38/43] qcow2: Allocate l2meta dynamically As soon as delayed COW is introduced, the l2meta struct is needed even after completion of the request, so it can't live on the stack. Signed-off-by: Kevin Wolf --- block/qcow2.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 0a08ec74c7..71f870d829 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -774,15 +774,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; - QCowL2Meta l2meta = { - .nb_clusters = 0, - }; + QCowL2Meta *l2meta; trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, remaining_sectors); - qemu_co_queue_init(&l2meta.dependent_requests); - qemu_iovec_init(&hd_qiov, qiov->niov); s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -791,6 +787,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + l2meta = g_malloc0(sizeof(*l2meta)); + qemu_co_queue_init(&l2meta->dependent_requests); + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; @@ -800,17 +799,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, n_end, &cur_nr_sectors, &l2meta); + index_in_cluster, n_end, &cur_nr_sectors, l2meta); if (ret < 0) { goto fail; } - if (l2meta.nb_clusters > 0 && + if (l2meta->nb_clusters > 0 && (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) { qcow2_mark_dirty(bs); } - cluster_offset = l2meta.cluster_offset; + cluster_offset = l2meta->cluster_offset; assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); @@ -847,12 +846,14 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - ret = qcow2_alloc_cluster_link_l2(bs, &l2meta); + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); if (ret < 0) { goto fail; } - run_dependent_requests(s, &l2meta); + run_dependent_requests(s, l2meta); + g_free(l2meta); + l2meta = NULL; remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -862,7 +863,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: - run_dependent_requests(s, &l2meta); + if (l2meta != NULL) { + run_dependent_requests(s, l2meta); + g_free(l2meta); + } qemu_co_mutex_unlock(&s->lock); From 060bee8943c27d4d53f65570fafaa2559fcd87c3 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:45 +0100 Subject: [PATCH 39/43] qcow2: Drop l2meta.cluster_offset There's no real reason to have an l2meta for normal requests that don't allocate anything. Before we can get rid of it, we must return the host cluster offset in a different way. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 10 ++++++---- block/qcow2.c | 14 +++++++------- block/qcow2.h | 5 +---- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 94b7f136fa..c4752ee812 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -856,7 +856,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m) { BDRVQcowState *s = bs->opaque; int l2_index, ret, sectors; @@ -929,7 +929,6 @@ again: /* If there is something left to allocate, do that now */ *m = (QCowL2Meta) { - .cluster_offset = cluster_offset, .nb_clusters = 0, }; qemu_co_queue_init(&m->dependent_requests); @@ -977,9 +976,11 @@ again: int alloc_n_start = keep_clusters == 0 ? n_start : 0; int nb_sectors = MIN(requested_sectors, avail_sectors); + if (keep_clusters == 0) { + cluster_offset = alloc_cluster_offset; + } + *m = (QCowL2Meta) { - .cluster_offset = keep_clusters == 0 ? - alloc_cluster_offset : cluster_offset, .alloc_offset = alloc_cluster_offset, .offset = alloc_offset & ~(s->cluster_size - 1), .nb_clusters = nb_clusters, @@ -1007,6 +1008,7 @@ again: assert(sectors > n_start); *num = sectors - n_start; + *host_offset = cluster_offset; return 0; diff --git a/block/qcow2.c b/block/qcow2.c index 71f870d829..66ca12f94e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -799,7 +799,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, n_end, &cur_nr_sectors, l2meta); + index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, l2meta); if (ret < 0) { goto fail; } @@ -809,7 +809,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, qcow2_mark_dirty(bs); } - cluster_offset = l2meta->cluster_offset; assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); @@ -1132,6 +1131,7 @@ static int preallocate(BlockDriverState *bs) { uint64_t nb_sectors; uint64_t offset; + uint64_t host_offset = 0; int num; int ret; QCowL2Meta meta; @@ -1139,18 +1139,18 @@ static int preallocate(BlockDriverState *bs) nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; qemu_co_queue_init(&meta.dependent_requests); - meta.cluster_offset = 0; while (nb_sectors) { num = MIN(nb_sectors, INT_MAX >> 9); - ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta); + ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, + &host_offset, &meta); if (ret < 0) { return ret; } ret = qcow2_alloc_cluster_link_l2(bs, &meta); if (ret < 0) { - qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); + qcow2_free_any_clusters(bs, meta.alloc_offset, meta.nb_clusters); return ret; } @@ -1169,10 +1169,10 @@ static int preallocate(BlockDriverState *bs) * all of the allocated clusters (otherwise we get failing reads after * EOF). Extend the image to the last allocated sector. */ - if (meta.cluster_offset != 0) { + if (host_offset != 0) { uint8_t buf[512]; memset(buf, 0, 512); - ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1); + ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1); if (ret < 0) { return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index 1106b33206..24f100125c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -213,9 +213,6 @@ typedef struct QCowL2Meta /** Guest offset of the first newly allocated cluster */ uint64_t offset; - /** Host offset of the first cluster of the request */ - uint64_t cluster_offset; - /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; @@ -336,7 +333,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m); + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size); From f50f88b9fea09fef12cc293126cf45dcf0ef600b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:46 +0100 Subject: [PATCH 40/43] qcow2: Allocate l2meta only for cluster allocations Even for writes to already allocated clusters, an l2meta is allocated, though it stays effectively unused. After this patch, only allocating requests still have one. Each l2meta now describes an in-flight request that writes to clusters that are not yet hooked up in the L2 table. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 23 +++++++++-------------- block/qcow2.c | 32 +++++++++++++++++--------------- block/qcow2.h | 7 +++++-- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c4752ee812..c2b59e7d6a 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -652,9 +652,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) uint64_t cluster_offset = m->alloc_offset; trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); - - if (m->nb_clusters == 0) - return 0; + assert(m->nb_clusters > 0); old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); @@ -856,7 +854,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m) + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; int l2_index, ret, sectors; @@ -928,11 +926,6 @@ again: } /* If there is something left to allocate, do that now */ - *m = (QCowL2Meta) { - .nb_clusters = 0, - }; - qemu_co_queue_init(&m->dependent_requests); - if (nb_clusters > 0) { uint64_t alloc_offset; uint64_t alloc_cluster_offset; @@ -980,7 +973,9 @@ again: cluster_offset = alloc_cluster_offset; } - *m = (QCowL2Meta) { + *m = g_malloc0(sizeof(**m)); + + **m = (QCowL2Meta) { .alloc_offset = alloc_cluster_offset, .offset = alloc_offset & ~(s->cluster_size - 1), .nb_clusters = nb_clusters, @@ -995,8 +990,8 @@ again: .nb_sectors = avail_sectors - nb_sectors, }, }; - qemu_co_queue_init(&m->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); } } @@ -1013,8 +1008,8 @@ again: return 0; fail: - if (m->nb_clusters > 0) { - QLIST_REMOVE(m, next_in_flight); + if (*m && (*m)->nb_clusters > 0) { + QLIST_REMOVE(*m, next_in_flight); } return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index 66ca12f94e..08d92cc6e3 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -787,8 +787,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { - l2meta = g_malloc0(sizeof(*l2meta)); - qemu_co_queue_init(&l2meta->dependent_requests); + l2meta = NULL; trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); @@ -799,7 +798,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, l2meta); + index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta); if (ret < 0) { goto fail; } @@ -845,14 +844,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - ret = qcow2_alloc_cluster_link_l2(bs, l2meta); - if (ret < 0) { - goto fail; - } + if (l2meta != NULL) { + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); + if (ret < 0) { + goto fail; + } - run_dependent_requests(s, l2meta); - g_free(l2meta); - l2meta = NULL; + run_dependent_requests(s, l2meta); + g_free(l2meta); + l2meta = NULL; + } remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -1134,11 +1135,10 @@ static int preallocate(BlockDriverState *bs) uint64_t host_offset = 0; int num; int ret; - QCowL2Meta meta; + QCowL2Meta *meta; nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; - qemu_co_queue_init(&meta.dependent_requests); while (nb_sectors) { num = MIN(nb_sectors, INT_MAX >> 9); @@ -1148,15 +1148,17 @@ static int preallocate(BlockDriverState *bs) return ret; } - ret = qcow2_alloc_cluster_link_l2(bs, &meta); + ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret < 0) { - qcow2_free_any_clusters(bs, meta.alloc_offset, meta.nb_clusters); + qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters); return ret; } /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ - run_dependent_requests(bs->opaque, &meta); + if (meta != NULL) { + run_dependent_requests(bs->opaque, meta); + } /* TODO Preallocate data if requested */ diff --git a/block/qcow2.h b/block/qcow2.h index 24f100125c..6dc79b5af4 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -207,7 +207,10 @@ typedef struct Qcow2COWRegion { int nb_sectors; } Qcow2COWRegion; -/* XXX This could be private for qcow2-cluster.c */ +/** + * Describes an in-flight (part of a) write request that writes to clusters + * that are not referenced in their L2 table yet. + */ typedef struct QCowL2Meta { /** Guest offset of the first newly allocated cluster */ @@ -333,7 +336,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta *m); + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size); From 280d373579558f73a8b70e329d9a6206933d3809 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:47 +0100 Subject: [PATCH 41/43] qcow2: Enable dirty flag in qcow2_alloc_cluster_link_l2 This is closer to where the dirty flag is really needed, and it avoids having checks for special cases related to cluster allocation directly in the writev loop. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 5 ++++- block/qcow2.c | 7 +------ block/qcow2.h | 2 ++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c2b59e7d6a..7a038aca40 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -668,11 +668,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) } /* Update L2 table. */ - + if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) { + qcow2_mark_dirty(bs); + } if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } + ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); if (ret < 0) { goto err; diff --git a/block/qcow2.c b/block/qcow2.c index 08d92cc6e3..dbcc0fff97 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -222,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs, * updated successfully. Therefore it is not required to check the return * value of this function. */ -static int qcow2_mark_dirty(BlockDriverState *bs) +int qcow2_mark_dirty(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; uint64_t val; @@ -803,11 +803,6 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - if (l2meta->nb_clusters > 0 && - (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) { - qcow2_mark_dirty(bs); - } - assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); diff --git a/block/qcow2.h b/block/qcow2.h index 6dc79b5af4..a60fcb429a 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -303,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); + +int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ From 4e95314e2bb7baa64f2a9026df5e2649081b7060 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:48 +0100 Subject: [PATCH 42/43] qcow2: Execute run_dependent_requests() without lock There's no reason for run_dependent_requests() to hold s->lock, and a later patch will require that in fact the lock is not held. Also, before this patch, run_dependent_requests() not only does what its name suggests, but also removes the l2meta from the list of in-flight requests. When changing this, it becomes an one-liner, so just inline it completely. Signed-off-by: Kevin Wolf --- block/qcow2.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index dbcc0fff97..8520bda21a 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -745,21 +745,6 @@ fail: return ret; } -static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) -{ - /* Take the request off the list of running requests */ - if (m->nb_clusters != 0) { - QLIST_REMOVE(m, next_in_flight); - } - - /* Restart all dependent requests */ - if (!qemu_co_queue_empty(&m->dependent_requests)) { - qemu_co_mutex_unlock(&s->lock); - qemu_co_queue_restart_all(&m->dependent_requests); - qemu_co_mutex_lock(&s->lock); - } -} - static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, @@ -845,7 +830,15 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - run_dependent_requests(s, l2meta); + /* Take the request off the list of running requests */ + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + + qemu_co_mutex_unlock(&s->lock); + qemu_co_queue_restart_all(&l2meta->dependent_requests); + qemu_co_mutex_lock(&s->lock); + g_free(l2meta); l2meta = NULL; } @@ -858,13 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: + qemu_co_mutex_unlock(&s->lock); + if (l2meta != NULL) { - run_dependent_requests(s, l2meta); + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + qemu_co_queue_restart_all(&l2meta->dependent_requests); g_free(l2meta); } - qemu_co_mutex_unlock(&s->lock); - qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); @@ -1152,7 +1148,7 @@ static int preallocate(BlockDriverState *bs) /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ if (meta != NULL) { - run_dependent_requests(bs->opaque, meta); + QLIST_REMOVE(meta, next_in_flight); } /* TODO Preallocate data if requested */ From 226c3c26b9800b7c6a8d3100e1faad6d2b97b0f5 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 7 Dec 2012 18:08:49 +0100 Subject: [PATCH 43/43] qcow2: Factor out handle_dependencies() Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 70 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 7a038aca40..468ef1be56 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -753,38 +753,16 @@ out: } /* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. - * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. - * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, unsigned int *nb_clusters) +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, + unsigned int *nb_clusters) { BDRVQcowState *s = bs->opaque; QCowL2Meta *old_alloc; - trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, - *host_offset, *nb_clusters); - - /* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { uint64_t start = guest_offset >> s->cluster_bits; @@ -817,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, abort(); } + return 0; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + int ret; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + + ret = handle_dependencies(bs, guest_offset, nb_clusters); + if (ret < 0) { + return ret; + } + /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -828,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { - int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); + ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret < 0) { return ret; }