From 89dbe18089127cf90993359096b659ea6f819848 Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Tue, 25 Oct 2016 18:33:57 +0530 Subject: [PATCH 01/29] block/ssh: Add ssh_has_filename_options_conflict() We have 5 options plus ("server") option which is added in the next patch that conflict with specifying a SSH filename. We need to iterate over all the options to check whether its key has an "server." prefix. This iteration will help us adding the new option "server" easily. Signed-off-by: Ashijeet Acharya Reviewed-by: Max Reitz Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/ssh.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/block/ssh.c b/block/ssh.c index 5ce12b633a..75cb7bc02d 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -254,15 +254,30 @@ static int parse_uri(const char *filename, QDict *options, Error **errp) return -EINVAL; } +static bool ssh_has_filename_options_conflict(QDict *options, Error **errp) +{ + const QDictEntry *qe; + + for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) { + if (!strcmp(qe->key, "host") || + !strcmp(qe->key, "port") || + !strcmp(qe->key, "path") || + !strcmp(qe->key, "user") || + !strcmp(qe->key, "host_key_check")) + { + error_setg(errp, "Option '%s' cannot be used with a file name", + qe->key); + return true; + } + } + + return false; +} + static void ssh_parse_filename(const char *filename, QDict *options, Error **errp) { - if (qdict_haskey(options, "user") || - qdict_haskey(options, "host") || - qdict_haskey(options, "port") || - qdict_haskey(options, "path") || - qdict_haskey(options, "host_key_check")) { - error_setg(errp, "user, host, port, path, host_key_check cannot be used at the same time as a file option"); + if (ssh_has_filename_options_conflict(options, errp)) { return; } From 89cadc9dc0f7c6a48c243bf7d9ad983a9b667426 Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Tue, 25 Oct 2016 18:33:58 +0530 Subject: [PATCH 02/29] util/qemu-sockets: Make inet_connect_saddr() public Make inet_connect_saddr() in util/qemu-sockets.c public in order to be able to use it with InetSocketAddress sockets outside of util/qemu-sockets.c independently. Signed-off-by: Ashijeet Acharya Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- include/qemu/sockets.h | 2 ++ util/qemu-sockets.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 9eb24707df..5589e6842b 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -34,6 +34,8 @@ typedef void NonBlockingConnectHandler(int fd, Error *err, void *opaque); InetSocketAddress *inet_parse(const char *str, Error **errp); int inet_connect(const char *str, Error **errp); +int inet_connect_saddr(InetSocketAddress *saddr, Error **errp, + NonBlockingConnectHandler *callback, void *opaque); NetworkAddressFamily inet_netfamily(int family); diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 4cef549d90..31f7fc69c1 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -412,8 +412,8 @@ static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr, * function succeeds, callback will be called when the connection * completes, with the file descriptor on success, or -1 on error. */ -static int inet_connect_saddr(InetSocketAddress *saddr, Error **errp, - NonBlockingConnectHandler *callback, void *opaque) +int inet_connect_saddr(InetSocketAddress *saddr, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) { Error *local_err = NULL; struct addrinfo *res, *e; From 0da5b8ef5d81c3fab0ba560c7c2140da27309023 Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Tue, 25 Oct 2016 18:33:59 +0530 Subject: [PATCH 03/29] block/ssh: Add InetSocketAddress and accept it Add InetSocketAddress compatibility to SSH driver. Add a new option "server" to the SSH block driver which then accepts a InetSocketAddress. "host" and "port" are supported as legacy options and are mapped to their InetSocketAddress representation. Signed-off-by: Ashijeet Acharya Signed-off-by: Kevin Wolf --- block/ssh.c | 98 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 16 deletions(-) diff --git a/block/ssh.c b/block/ssh.c index 75cb7bc02d..20fa9c222b 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -30,10 +30,14 @@ #include "block/block_int.h" #include "qapi/error.h" #include "qemu/error-report.h" +#include "qemu/cutils.h" #include "qemu/sockets.h" #include "qemu/uri.h" +#include "qapi-visit.h" #include "qapi/qmp/qint.h" #include "qapi/qmp/qstring.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qobject-output-visitor.h" /* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in * this block driver code. @@ -74,8 +78,9 @@ typedef struct BDRVSSHState { */ LIBSSH2_SFTP_ATTRIBUTES attrs; + InetSocketAddress *inet; + /* Used to warn if 'flush' is not supported. */ - char *hostport; bool unsafe_flush_warning; } BDRVSSHState; @@ -89,7 +94,6 @@ static void ssh_state_init(BDRVSSHState *s) static void ssh_state_free(BDRVSSHState *s) { - g_free(s->hostport); if (s->sftp_handle) { libssh2_sftp_close(s->sftp_handle); } @@ -263,7 +267,8 @@ static bool ssh_has_filename_options_conflict(QDict *options, Error **errp) !strcmp(qe->key, "port") || !strcmp(qe->key, "path") || !strcmp(qe->key, "user") || - !strcmp(qe->key, "host_key_check")) + !strcmp(qe->key, "host_key_check") || + strstart(qe->key, "server.", NULL)) { error_setg(errp, "Option '%s' cannot be used with a file name", qe->key); @@ -555,14 +560,69 @@ static QemuOptsList ssh_runtime_opts = { }, }; +static bool ssh_process_legacy_socket_options(QDict *output_opts, + QemuOpts *legacy_opts, + Error **errp) +{ + const char *host = qemu_opt_get(legacy_opts, "host"); + const char *port = qemu_opt_get(legacy_opts, "port"); + + if (!host && port) { + error_setg(errp, "port may not be used without host"); + return false; + } + + if (host) { + qdict_put(output_opts, "server.host", qstring_from_str(host)); + qdict_put(output_opts, "server.port", + qstring_from_str(port ?: stringify(22))); + } + + return true; +} + +static InetSocketAddress *ssh_config(BDRVSSHState *s, QDict *options, + Error **errp) +{ + InetSocketAddress *inet = NULL; + QDict *addr = NULL; + QObject *crumpled_addr = NULL; + Visitor *iv = NULL; + Error *local_error = NULL; + + qdict_extract_subqdict(options, &addr, "server."); + if (!qdict_size(addr)) { + error_setg(errp, "SSH server address missing"); + goto out; + } + + crumpled_addr = qdict_crumple(addr, errp); + if (!crumpled_addr) { + goto out; + } + + iv = qobject_input_visitor_new(crumpled_addr, true); + visit_type_InetSocketAddress(iv, NULL, &inet, &local_error); + if (local_error) { + error_propagate(errp, local_error); + goto out; + } + +out: + QDECREF(addr); + qobject_decref(crumpled_addr); + visit_free(iv); + return inet; +} + static int connect_to_ssh(BDRVSSHState *s, QDict *options, int ssh_flags, int creat_mode, Error **errp) { int r, ret; QemuOpts *opts = NULL; Error *local_err = NULL; - const char *host, *user, *path, *host_key_check; - int port; + const char *user, *path, *host_key_check; + long port = 0; opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -572,15 +632,11 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options, goto err; } - host = qemu_opt_get(opts, "host"); - if (!host) { + if (!ssh_process_legacy_socket_options(options, opts, errp)) { ret = -EINVAL; - error_setg(errp, "No hostname was specified"); goto err; } - port = qemu_opt_get_number(opts, "port", 22); - path = qemu_opt_get(opts, "path"); if (!path) { ret = -EINVAL; @@ -603,12 +659,21 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options, host_key_check = "yes"; } - /* Construct the host:port name for inet_connect. */ - g_free(s->hostport); - s->hostport = g_strdup_printf("%s:%d", host, port); + /* Pop the config into our state object, Exit if invalid */ + s->inet = ssh_config(s, options, errp); + if (!s->inet) { + ret = -EINVAL; + goto err; + } + + if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) { + error_setg(errp, "Use only numeric port value"); + ret = -EINVAL; + goto err; + } /* Open the socket and connect. */ - s->sock = inet_connect(s->hostport, errp); + s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL); if (s->sock < 0) { ret = -EIO; goto err; @@ -634,7 +699,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options, } /* Check the remote host's key against known_hosts. */ - ret = check_host_key(s, host, port, host_key_check, errp); + ret = check_host_key(s, s->inet->host, port, host_key_check, + errp); if (ret < 0) { goto err; } @@ -1055,7 +1121,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what) { if (!s->unsafe_flush_warning) { error_report("warning: ssh server %s does not support fsync", - s->hostport); + s->inet->host); if (what) { error_report("to support fsync, you need %s", what); } From 1059f1bb42928e925f7dacb85da157a6e99c20ea Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Tue, 25 Oct 2016 18:34:00 +0530 Subject: [PATCH 04/29] block/ssh: Use InetSocketAddress options Drop the use of legacy options in favour of the InetSocketAddress options. Signed-off-by: Ashijeet Acharya Reviewed-by: Max Reitz Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/ssh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/ssh.c b/block/ssh.c index 20fa9c222b..ca071c569c 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -197,6 +197,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp) { URI *uri = NULL; QueryParams *qp; + char *port_str; int i; uri = uri_parse(filename); @@ -229,11 +230,11 @@ static int parse_uri(const char *filename, QDict *options, Error **errp) qdict_put(options, "user", qstring_from_str(uri->user)); } - qdict_put(options, "host", qstring_from_str(uri->server)); + qdict_put(options, "server.host", qstring_from_str(uri->server)); - if (uri->port) { - qdict_put(options, "port", qint_from_int(uri->port)); - } + port_str = g_strdup_printf("%d", uri->port ?: 22); + qdict_put(options, "server.port", qstring_from_str(port_str)); + g_free(port_str); qdict_put(options, "path", qstring_from_str(uri->path)); From ad0e90a682280030af81ece502715f64232706db Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Tue, 25 Oct 2016 18:34:01 +0530 Subject: [PATCH 05/29] qapi: allow blockdev-add for ssh Introduce new object 'BlockdevOptionsSsh' in qapi/block-core.json to support blockdev-add for SSH network protocol driver. Use only 'struct InetSocketAddress' since SSH only supports connection over TCP. Signed-off-by: Ashijeet Acharya Reviewed-by: Kevin Wolf [ kwolf: Removed host_key_check option, we want to expose this later in a structured way rather than as a string that must be parsed ] Signed-off-by: Kevin Wolf --- qapi/block-core.json | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index cd1fa7ba07..0f2b7032fc 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1703,7 +1703,7 @@ # # @host_device, @host_cdrom: Since 2.1 # @gluster: Since 2.7 -# @nbd: Since 2.8 +# @nbd, @ssh: Since 2.8 # # Since: 2.0 ## @@ -1712,7 +1712,8 @@ 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'luks', 'nbd', 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', - 'replication', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } + 'replication', 'ssh', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', + 'vvfat' ] } ## # @BlockdevOptionsFile @@ -1949,6 +1950,25 @@ '*vport': 'int', '*segment': 'str' } } +## +# @BlockdevOptionsSsh +# +# @server: host address +# +# @path: path to the image on the host +# +# @user: #optional user as which to connect, defaults to current +# local user name +# +# TODO: Expose the host_key_check option in QMP +# +# Since 2.8 +## +{ 'struct': 'BlockdevOptionsSsh', + 'data': { 'server': 'InetSocketAddress', + 'path': 'str', + '*user': 'str' } } + ## # @BlkdebugEvent @@ -2296,7 +2316,7 @@ # TODO rbd: Wait for structured options 'replication':'BlockdevOptionsReplication', # TODO sheepdog: Wait for structured options -# TODO ssh: Should take InetSocketAddress for 'host'? + 'ssh': 'BlockdevOptionsSsh', 'tftp': 'BlockdevOptionsCurl', 'vdi': 'BlockdevOptionsGenericFormat', 'vhdx': 'BlockdevOptionsGenericFormat', From c0778f6693213d0d6bfa7bee8045374a798db750 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:02 +0300 Subject: [PATCH 06/29] block: Add bdrv_drain_all_{begin,end}() bdrv_drain_all() doesn't allow the caller to do anything after all pending requests have been completed but before block jobs are resumed. This patch splits bdrv_drain_all() into _begin() and _end() for that purpose. It also adds aio_{disable,enable}_external() calls to disable external clients in the meantime. An important restriction of this split is that no new block jobs or BlockDriverStates can be created between the bdrv_drain_all_begin() and bdrv_drain_all_end() calls. This is not a concern now because we'll only be using this in bdrv_reopen_multiple(), but it must be dealt with if we ever have other uses cases in the future. Signed-off-by: Alberto Garcia Cc: Paolo Bonzini Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/io.c | 27 ++++++++++++++++++++++++--- include/block/block.h | 2 ++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/block/io.c b/block/io.c index be0d862ca6..37749b680a 100644 --- a/block/io.c +++ b/block/io.c @@ -273,8 +273,14 @@ void bdrv_drain(BlockDriverState *bs) * * This function does not flush data to disk, use bdrv_flush_all() for that * after calling this function. + * + * This pauses all block jobs and disables external clients. It must + * be paired with bdrv_drain_all_end(). + * + * NOTE: no new block jobs or BlockDriverStates can be created between + * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls. */ -void bdrv_drain_all(void) +void bdrv_drain_all_begin(void) { /* Always run first iteration so any pending completion BHs run */ bool waited = true; @@ -297,6 +303,7 @@ void bdrv_drain_all(void) aio_context_acquire(aio_context); bdrv_parent_drained_begin(bs); bdrv_io_unplugged_begin(bs); + aio_disable_external(aio_context); aio_context_release(aio_context); if (!g_slist_find(aio_ctxs, aio_context)) { @@ -326,17 +333,25 @@ void bdrv_drain_all(void) } } + g_slist_free(aio_ctxs); +} + +void bdrv_drain_all_end(void) +{ + BlockDriverState *bs; + BdrvNextIterator it; + BlockJob *job = NULL; + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { AioContext *aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); + aio_enable_external(aio_context); bdrv_io_unplugged_end(bs); bdrv_parent_drained_end(bs); aio_context_release(aio_context); } - g_slist_free(aio_ctxs); - job = NULL; while ((job = block_job_next(job))) { AioContext *aio_context = blk_get_aio_context(job->blk); @@ -346,6 +361,12 @@ void bdrv_drain_all(void) } } +void bdrv_drain_all(void) +{ + bdrv_drain_all_begin(); + bdrv_drain_all_end(); +} + /** * Remove an active request from the tracked requests list * diff --git a/include/block/block.h b/include/block/block.h index b7dc7d54ae..b81a3e35ce 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -332,6 +332,8 @@ int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain(BlockDriverState *bs); void coroutine_fn bdrv_co_drain(BlockDriverState *bs); +void bdrv_drain_all_begin(void); +void bdrv_drain_all_end(void); void bdrv_drain_all(void); #define BDRV_POLL_WHILE(bs, cond) ({ \ From 40840e419be31e6a32e6ea24511c74b389d5e0e4 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:03 +0300 Subject: [PATCH 07/29] block: Pause all jobs during bdrv_reopen_multiple() When a BlockDriverState is about to be reopened it can trigger certain operations that need to write to disk. During this process a different block job can be woken up. If that block job completes and also needs to call bdrv_reopen() it can happen that it needs to do it on the same BlockDriverState that is still in the process of being reopened. This can have fatal consequences, like in this example: 1) Block job A starts and sleeps after a while. 2) Block job B starts and tries to reopen node1 (a qcow2 file). 3) Reopening node1 means flushing and replacing its qcow2 cache. 4) While the qcow2 cache is being flushed, job A wakes up. 5) Job A completes and reopens node1, replacing its cache. 6) Job B resumes, but the cache that was being flushed no longer exists. This patch splits the bdrv_drain_all() call to keep all block jobs paused during bdrv_reopen_multiple(), so that step 4 can never happen and the operation is safe. Note that this scenario can only happen if both bdrv_reopen() calls are made by block jobs on the same backing chain. Otherwise there's no chance that the same BlockDriverState appears in both reopen queues. Signed-off-by: Alberto Garcia Reviewed-by: Paolo Bonzini Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index a17baab1d0..2c87186ecf 100644 --- a/block.c +++ b/block.c @@ -2091,7 +2091,7 @@ int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **er assert(bs_queue != NULL); aio_context_release(ctx); - bdrv_drain_all(); + bdrv_drain_all_begin(); aio_context_acquire(ctx); QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) { @@ -2122,6 +2122,9 @@ cleanup: g_free(bs_entry); } g_free(bs_queue); + + bdrv_drain_all_end(); + return ret; } From 23d402d42beb975b97303beb3d9d4f60f4a1a883 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:04 +0300 Subject: [PATCH 08/29] block: Add block_job_add_bdrv() When a block job is created on a certain BlockDriverState, operations are blocked there while the job exists. However, some block jobs may involve additional BDSs, which must be blocked separately when the job is created and unblocked manually afterwards. This patch adds block_job_add_bdrv(), that simplifies this process by keeping a list of BDSs that are involved in the specified block job. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockjob.c | 17 +++++++++++++++-- include/block/blockjob.h | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/blockjob.c b/blockjob.c index 7c88b30074..422851fde5 100644 --- a/blockjob.c +++ b/blockjob.c @@ -113,6 +113,13 @@ static void block_job_detach_aio_context(void *opaque) block_job_unref(job); } +void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs) +{ + job->nodes = g_slist_prepend(job->nodes, bs); + bdrv_ref(bs); + bdrv_op_block_all(bs, job->blocker); +} + void *block_job_create(const char *job_id, const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp) @@ -150,7 +157,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, job = g_malloc0(driver->instance_size); error_setg(&job->blocker, "block device is in use by block job: %s", BlockJobType_lookup[driver->job_type]); - bdrv_op_block_all(bs, job->blocker); + block_job_add_bdrv(job, bs); bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker); job->driver = driver; @@ -189,9 +196,15 @@ void block_job_ref(BlockJob *job) void block_job_unref(BlockJob *job) { if (--job->refcnt == 0) { + GSList *l; BlockDriverState *bs = blk_bs(job->blk); bs->job = NULL; - bdrv_op_unblock_all(bs, job->blocker); + for (l = job->nodes; l; l = l->next) { + bs = l->data; + bdrv_op_unblock_all(bs, job->blocker); + bdrv_unref(bs); + } + g_slist_free(job->nodes); blk_remove_aio_context_notifier(job->blk, block_job_attached_aio_context, block_job_detach_aio_context, job); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 2bb39f4d29..4dfb16b43f 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -188,6 +188,9 @@ struct BlockJob { /** Block other operations when block job is running */ Error *blocker; + /** BlockDriverStates that are involved in this block job */ + GSList *nodes; + /** The opaque value that is passed to the completion function. */ void *opaque; @@ -252,6 +255,17 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, BlockDriverState *bs, int64_t speed, BlockCompletionFunc *cb, void *opaque, Error **errp); +/** + * block_job_add_bdrv: + * @job: A block job + * @bs: A BlockDriverState that is involved in @job + * + * Add @bs to the list of BlockDriverState that are involved in + * @job. This means that all operations will be blocked on @bs while + * @job exists. + */ +void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs); + /** * block_job_sleep_ns: * @job: The job that calls the function. From cee3c6b5cab50a20cdb061ec107ec17bd2e54643 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:05 +0300 Subject: [PATCH 09/29] block: Use block_job_add_bdrv() in mirror_start_job() Use block_job_add_bdrv() instead of blocking all operations in mirror_start_job() and unblocking them in mirror_exit(). Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/mirror.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 3a0788ede3..04765ad2ff 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -530,7 +530,6 @@ static void mirror_exit(BlockJob *job, void *opaque) aio_context_release(replace_aio_context); } g_free(s->replaces); - bdrv_op_unblock_all(target_bs, s->common.blocker); blk_unref(s->target); s->target = NULL; block_job_completed(&s->common, data->ret); @@ -997,7 +996,7 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, return; } - bdrv_op_block_all(target, s->common.blocker); + block_job_add_bdrv(&s->common, target); s->common.co = qemu_coroutine_create(mirror_run, s); trace_mirror_start(bs, s, s->common.co, opaque); From b7340d002e1bcda6b7c07659a7e9d35842651476 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:06 +0300 Subject: [PATCH 10/29] block: Use block_job_add_bdrv() in backup_start() Use block_job_add_bdrv() instead of blocking all operations in backup_start() and unblocking them in backup_run(). Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/backup.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/backup.c b/block/backup.c index 81d4042ae8..44c7ff3d16 100644 --- a/block/backup.c +++ b/block/backup.c @@ -446,7 +446,6 @@ static void coroutine_fn backup_run(void *opaque) BackupBlockJob *job = opaque; BackupCompleteData *data; BlockDriverState *bs = blk_bs(job->common.blk); - BlockBackend *target = job->target; int64_t start, end; int64_t sectors_per_cluster = cluster_size_sectors(job); int ret = 0; @@ -533,8 +532,6 @@ static void coroutine_fn backup_run(void *opaque) qemu_co_rwlock_unlock(&job->flush_rwlock); g_free(job->done_bitmap); - bdrv_op_unblock_all(blk_bs(target), job->common.blocker); - data = g_malloc(sizeof(*data)); data->ret = ret; block_job_defer_to_main_loop(&job->common, backup_complete, data); @@ -648,7 +645,7 @@ void backup_start(const char *job_id, BlockDriverState *bs, job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); } - bdrv_op_block_all(target, job->common.blocker); + block_job_add_bdrv(&job->common, target); job->common.len = len; job->common.co = qemu_coroutine_create(backup_run, job); block_job_txn_add_job(txn, &job->common); From 058223a6e3b73cdf7f56b8d0a55032f99c5f11ac Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:07 +0300 Subject: [PATCH 11/29] block: Check blockers in all nodes involved in a block-commit job qmp_block_commit() checks for op blockers in the active and destination (base) images. However all nodes between top_bs and base are also involved, and they are removed from the chain afterwards. In addition to that, if top_bs is not the active layer then top_bs's overlay also needs to be checked because it's involved in the job (its backing image string needs to be updated to point to 'base'). This patch checks that none of those nodes are blocked. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockdev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/blockdev.c b/blockdev.c index d11a74f837..de5b5f589c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3001,6 +3001,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, Error **errp) { BlockDriverState *bs; + BlockDriverState *iter; BlockDriverState *base_bs, *top_bs; AioContext *aio_context; Error *local_err = NULL; @@ -3067,8 +3068,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, assert(bdrv_get_aio_context(base_bs) == aio_context); - if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { - goto out; + for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) { + if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } } /* Do not allow attempts to commit an image into itself */ @@ -3086,6 +3089,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed, on_error, block_job_cb, bs, &local_err, false); } else { + BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); + if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { + goto out; + } commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, has_backing_file ? backing_file : NULL, &local_err); From 3e4c5122cbb881cd271314ef0782c4b6c57ab03d Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:08 +0300 Subject: [PATCH 12/29] block: Block all nodes involved in the block-commit operation After a successful block-commit operation all nodes between top and base are removed from the backing chain, and top's overlay needs to be updated to point to base. Because of that we should prevent other block jobs from messing with them. This patch blocks all operations in these nodes in commit_start(). Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/commit.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/commit.c b/block/commit.c index 499eccaeee..a5e17f610f 100644 --- a/block/commit.c +++ b/block/commit.c @@ -216,6 +216,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, BlockReopenQueue *reopen_queue = NULL; int orig_overlay_flags; int orig_base_flags; + BlockDriverState *iter; BlockDriverState *overlay_bs; Error *local_err = NULL; @@ -260,6 +261,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, } + /* Block all nodes between top and base, because they will + * disappear from the chain after this operation. */ + assert(bdrv_chain_contains(top, base)); + for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) { + block_job_add_bdrv(&s->common, iter); + } + /* overlay_bs must be blocked because it needs to be modified to + * update the backing image string, but if it's the root node then + * don't block it again */ + if (bs != overlay_bs) { + block_job_add_bdrv(&s->common, overlay_bs); + } + s->base = blk_new(); blk_insert_bs(s->base, base); From f3ede4b05d1407d48314f0edada5e402865e641e Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:09 +0300 Subject: [PATCH 13/29] block: Block all intermediate nodes in commit_active_start() When block-commit is launched without the top parameter, it uses internally a mirror block job. In that case all intermediate nodes between the active and base nodes must be blocked as well. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block/mirror.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index 04765ad2ff..7e99f3a880 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -997,6 +997,14 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, } block_job_add_bdrv(&s->common, target); + /* In commit_active_start() all intermediate nodes disappear, so + * any jobs in them must be blocked */ + if (bdrv_chain_contains(bs, target)) { + BlockDriverState *iter; + for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { + block_job_add_bdrv(&s->common, iter); + } + } s->common.co = qemu_coroutine_create(mirror_run, s); trace_mirror_start(bs, s, s->common.co, opaque); From 61b49e48b379db45f0ea91c93a61c873695549a9 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:10 +0300 Subject: [PATCH 14/29] block: Support streaming to an intermediate layer This makes sure that the image we are streaming into is open in read-write mode during the operation. Operation blockers are also set in all intermediate nodes, since they will be removed from the chain afterwards. Finally, this also unblocks the stream operation in backing files. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- block.c | 4 +++- block/stream.c | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 2c87186ecf..c19c6c6d42 100644 --- a/block.c +++ b/block.c @@ -1428,9 +1428,11 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) backing_hd->drv ? backing_hd->drv->format_name : ""); bdrv_op_block_all(backing_hd, bs->backing_blocker); - /* Otherwise we won't be able to commit due to check in bdrv_commit */ + /* Otherwise we won't be able to commit or stream */ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, bs->backing_blocker); + bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM, + bs->backing_blocker); /* * We do backup in 3 ways: * 1. drive backup diff --git a/block/stream.c b/block/stream.c index 31874817c2..b8ab89a105 100644 --- a/block/stream.c +++ b/block/stream.c @@ -37,6 +37,7 @@ typedef struct StreamBlockJob { BlockDriverState *base; BlockdevOnError on_error; char *backing_file_str; + int bs_flags; } StreamBlockJob; static int coroutine_fn stream_populate(BlockBackend *blk, @@ -81,6 +82,11 @@ static void stream_complete(BlockJob *job, void *opaque) bdrv_set_backing_hd(bs, base); } + /* Reopen the image back in read-only mode if necessary */ + if (s->bs_flags != bdrv_get_flags(bs)) { + bdrv_reopen(bs, s->bs_flags, NULL); + } + g_free(s->backing_file_str); block_job_completed(&s->common, data->ret); g_free(data); @@ -220,6 +226,8 @@ void stream_start(const char *job_id, BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque, Error **errp) { StreamBlockJob *s; + BlockDriverState *iter; + int orig_bs_flags; s = block_job_create(job_id, &stream_job_driver, bs, speed, cb, opaque, errp); @@ -227,8 +235,24 @@ void stream_start(const char *job_id, BlockDriverState *bs, return; } + /* Make sure that the image is opened in read-write mode */ + orig_bs_flags = bdrv_get_flags(bs); + if (!(orig_bs_flags & BDRV_O_RDWR)) { + if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) { + block_job_unref(&s->common); + return; + } + } + + /* Block all intermediate nodes between bs and base, because they + * will disappear from the chain after this operation */ + for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { + block_job_add_bdrv(&s->common, iter); + } + s->base = base; s->backing_file_str = g_strdup(backing_file_str); + s->bs_flags = orig_bs_flags; s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run, s); From 554b614765090f47d97a20ca6981e17e96515ec1 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:11 +0300 Subject: [PATCH 15/29] block: Add QMP support for streaming to an intermediate layer This patch makes the 'device' parameter of the 'block-stream' command accept a node name that is not a root node. The presence of this feature can't be directly tested with introspection; soon we'll introduce a 'base-node' parameter whose presence can be checked for this purpose. In addition to that, operation blockers will be checked in all intermediate nodes between the top and the base node. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- blockdev.c | 15 +++++++++------ qapi/block-core.json | 13 ++++++++++--- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/blockdev.c b/blockdev.c index de5b5f589c..b5d4d6919f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2937,7 +2937,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, bool has_on_error, BlockdevOnError on_error, Error **errp) { - BlockDriverState *bs; + BlockDriverState *bs, *iter; BlockDriverState *base_bs = NULL; AioContext *aio_context; Error *local_err = NULL; @@ -2947,7 +2947,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, on_error = BLOCKDEV_ON_ERROR_REPORT; } - bs = qmp_get_root_bs(device, errp); + bs = bdrv_lookup_bs(device, device, errp); if (!bs) { return; } @@ -2955,10 +2955,6 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); - if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) { - goto out; - } - if (has_base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { @@ -2969,6 +2965,13 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, base_name = base; } + /* Check for op blockers in the whole chain between bs and base */ + for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) { + if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) { + goto out; + } + } + /* if we are streaming the entire chain, the result will have no backing * file, and specifying one is therefore an error */ if (base_bs == NULL && has_backing_file) { diff --git a/qapi/block-core.json b/qapi/block-core.json index 0f2b7032fc..d7a029cfd5 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1464,6 +1464,13 @@ # with query-block-jobs. The operation can be stopped before it has completed # using the block-job-cancel command. # +# The node that receives the data is called the top image, can be located in +# any part of the chain (but always above the base image; see below) and can be +# specified using its device or node name. Earlier qemu versions only allowed +# 'device' to name the top level node; presence of the 'base-node' parameter +# during introspection can be used as a witness of the enhanced semantics +# of 'device'. +# # If a base file is specified then sectors are not copied from that base file and # its backing chain. When streaming completes the image file will have the base # file as its backing file. This can be used to stream a subset of the backing @@ -1475,12 +1482,12 @@ # @job-id: #optional identifier for the newly-created block job. If # omitted, the device name will be used. (Since 2.7) # -# @device: the device name or node-name of a root node +# @device: the device or node name of the top image # # @base: #optional the common backing file name # -# @backing-file: #optional The backing file string to write into the active -# layer. This filename is not validated. +# @backing-file: #optional The backing file string to write into the top +# image. This filename is not validated. # # If a pathname string is such that it cannot be # resolved by QEMU, that means that subsequent QMP or From 1029641bef0899ea95bbdf651c77e7856cdcbe37 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:12 +0300 Subject: [PATCH 16/29] docs: Document how to stream to an intermediate layer Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- docs/live-block-ops.txt | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt index a257087401..2211d14428 100644 --- a/docs/live-block-ops.txt +++ b/docs/live-block-ops.txt @@ -4,15 +4,20 @@ LIVE BLOCK OPERATIONS High level description of live block operations. Note these are not supported for use with the raw format at the moment. +Note also that this document is incomplete and it currently only +covers the 'stream' operation. Other operations supported by QEMU such +as 'commit', 'mirror' and 'backup' are not described here yet. Please +refer to the qapi/block-core.json file for an overview of those. + Snapshot live merge =================== Given a snapshot chain, described in this document in the following format: -[A] -> [B] -> [C] -> [D] +[A] <- [B] <- [C] <- [D] <- [E] -Where the rightmost object ([D] in the example) described is the current +Where the rightmost object ([E] in the example) described is the current image which the guest OS has write access to. To the left of it is its base image, and so on accordingly until the leftmost image, which has no base. @@ -21,11 +26,14 @@ The snapshot live merge operation transforms such a chain into a smaller one with fewer elements, such as this transformation relative to the first example: -[A] -> [D] +[A] <- [E] -Currently only forward merge with target being the active image is -supported, that is, data copy is performed in the right direction with -destination being the rightmost image. +Data is copied in the right direction with destination being the +rightmost image, but any other intermediate image can be specified +instead. In this example data is copied from [C] into [D], so [D] can +be backed by [B]: + +[A] <- [B] <- [D] <- [E] The operation is implemented in QEMU through image streaming facilities. @@ -35,14 +43,20 @@ streaming operation completes it raises a QMP event. 'block_stream' copies data from the backing file(s) into the active image. When finished, it adjusts the backing file pointer. -The 'base' parameter specifies an image which data need not be streamed from. -This image will be used as the backing file for the active image when the -operation is finished. +The 'base' parameter specifies an image which data need not be +streamed from. This image will be used as the backing file for the +destination image when the operation is finished. -In the example above, the command would be: +In the first example above, the command would be: -(qemu) block_stream virtio0 A +(qemu) block_stream virtio0 file-A.img +In order to specify a destination image different from the active +(rightmost) one we can use its node name instead. + +In the second example above, the command would be: + +(qemu) block_stream node-D file-B.img Live block copy =============== From 7b8a9e5ab45a4532ce993f4273cfe0120abdea56 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:13 +0300 Subject: [PATCH 17/29] qemu-iotests: Test streaming to an intermediate layer This adds test_stream_intermediate(), similar to test_stream() but streams to the intermediate image instead. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 21 ++++++++++++++++++++- tests/qemu-iotests/030.out | 4 ++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 107049b50f..8b0212cc81 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -36,7 +36,7 @@ class TestSingleDrive(iotests.QMPTestCase): qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) qemu_io('-f', 'raw', '-c', 'write -P 0x1 0 512', backing_img) qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x1 524288 512', mid_img) - self.vm = iotests.VM().add_drive("blkdebug::" + test_img) + self.vm = iotests.VM().add_drive("blkdebug::" + test_img, "backing.node-name=mid") self.vm.launch() def tearDown(self): @@ -60,6 +60,25 @@ class TestSingleDrive(iotests.QMPTestCase): qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img), 'image file map does not match backing file after streaming') + def test_stream_intermediate(self): + self.assert_no_active_block_jobs() + + self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img), + qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img), + 'image file map matches backing file before streaming') + + result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid') + self.assert_qmp(result, 'return', {}) + + self.wait_until_completed(drive='stream-mid') + + self.assert_no_active_block_jobs() + self.vm.shutdown() + + self.assertEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img), + qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img), + 'image file map does not match backing file after streaming') + def test_stream_pause(self): self.assert_no_active_block_jobs() diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 6323079e08..96961ed0b5 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -.............. +............... ---------------------------------------------------------------------- -Ran 14 tests +Ran 15 tests OK From c1a34322d86ccc16e09909bc2f96587e2c3ca82b Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:14 +0300 Subject: [PATCH 18/29] qemu-iotests: Test block-stream operations in parallel This test case checks that it's possible to launch several stream operations in parallel in the same snapshot chain, each one involving a different set of nodes. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 80 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/030.out | 4 +- 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 8b0212cc81..ba9a5418e9 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -148,6 +148,86 @@ class TestSingleDrive(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') +class TestParallelOps(iotests.QMPTestCase): + num_ops = 4 # Number of parallel block-stream operations + num_imgs = num_ops * 2 + 1 + image_len = num_ops * 1024 * 1024 + imgs = [] + + def setUp(self): + opts = [] + self.imgs = [] + + # Initialize file names and command-line options + for i in range(self.num_imgs): + img_depth = self.num_imgs - i - 1 + opts.append("backing." * img_depth + "node-name=node%d" % i) + self.imgs.append(os.path.join(iotests.test_dir, 'img-%d.img' % i)) + + # Create all images + iotests.create_image(self.imgs[0], self.image_len) + for i in range(1, self.num_imgs): + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=%s' % self.imgs[i-1], self.imgs[i]) + + # Put data into the images we are copying data from + for i in range(self.num_imgs / 2): + img_index = i * 2 + 1 + # Alternate between 512k and 1M. + # This way jobs will not finish in the same order they were created + num_kb = 512 + 512 * (i % 2) + qemu_io('-f', iotests.imgfmt, + '-c', 'write -P %d %d %d' % (i, i*1024*1024, num_kb * 1024), + self.imgs[img_index]) + + # Attach the drive to the VM + self.vm = iotests.VM() + self.vm.add_drive(self.imgs[-1], ','.join(opts)) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + for img in self.imgs: + os.remove(img) + + # Test that it's possible to run several block-stream operations + # in parallel in the same snapshot chain + def test_stream_parallel(self): + self.assert_no_active_block_jobs() + + # Check that the maps don't match before the streaming operations + for i in range(2, self.num_imgs, 2): + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]), + qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]), + 'image file map matches backing file before streaming') + + # Create all streaming jobs + pending_jobs = [] + for i in range(2, self.num_imgs, 2): + node_name = 'node%d' % i + job_id = 'stream-%s' % node_name + pending_jobs.append(job_id) + result = self.vm.qmp('block-stream', device=node_name, job_id=job_id, base=self.imgs[i-2], speed=512*1024) + self.assert_qmp(result, 'return', {}) + + # Wait for all jobs to be finished. + while len(pending_jobs) > 0: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + job_id = self.dictpath(event, 'data/device') + self.assertTrue(job_id in pending_jobs) + self.assert_qmp_absent(event, 'data/error') + pending_jobs.remove(job_id) + + self.assert_no_active_block_jobs() + self.vm.shutdown() + + # Check that all maps match now + for i in range(2, self.num_imgs, 2): + self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]), + qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]), + 'image file map does not match backing file after streaming') + class TestSmallerBackingFile(iotests.QMPTestCase): backing_len = 1 * 1024 * 1024 # MB image_len = 2 * backing_len diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 96961ed0b5..b6f257674e 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -............... +................ ---------------------------------------------------------------------- -Ran 15 tests +Ran 16 tests OK From eb290b78ffec18975b7a31175de770e862749185 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:15 +0300 Subject: [PATCH 19/29] qemu-iotests: Test overlapping stream and commit operations These test cases check that it's not possible to perform two block-stream or block-commit operations if there are nodes involved in both. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 89 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/030.out | 4 +- 2 files changed, 91 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index ba9a5418e9..3533495137 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -228,6 +228,95 @@ class TestParallelOps(iotests.QMPTestCase): qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]), 'image file map does not match backing file after streaming') + # Test that it's not possible to perform two block-stream + # operations if there are nodes involved in both. + def test_overlapping_1(self): + self.assert_no_active_block_jobs() + + # Set a speed limit to make sure that this job blocks the rest + result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4', base=self.imgs[1], speed=1024*1024) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-stream', device='node5', job_id='stream-node5', base=self.imgs[2]) + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3', base=self.imgs[2]) + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4-v2') + self.assert_qmp(result, 'error/class', 'GenericError') + + # block-commit should also fail if it touches nodes used by the stream job + result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[4], job_id='commit-node4') + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[1], top=self.imgs[3], job_id='commit-node1') + self.assert_qmp(result, 'error/class', 'GenericError') + + # This fails because it needs to modify the backing string in node2, which is blocked + result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[0], top=self.imgs[1], job_id='commit-node0') + self.assert_qmp(result, 'error/class', 'GenericError') + + self.wait_until_completed(drive='stream-node4') + self.assert_no_active_block_jobs() + + # Similar to test_overlapping_1, but with block-commit + # blocking the other jobs + def test_overlapping_2(self): + self.assertLessEqual(9, self.num_imgs) + self.assert_no_active_block_jobs() + + # Set a speed limit to make sure that this job blocks the rest + result = self.vm.qmp('block-commit', device='drive0', top=self.imgs[5], base=self.imgs[3], job_id='commit-node3', speed=1024*1024) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3') + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('block-stream', device='node6', base=self.imgs[2], job_id='stream-node6') + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], job_id='stream-node4') + self.assert_qmp(result, 'error/class', 'GenericError') + + result = self.vm.qmp('block-stream', device='node6', base=self.imgs[4], job_id='stream-node6-v2') + self.assert_qmp(result, 'error/class', 'GenericError') + + # This fails because block-commit needs to block node6, the overlay of the 'top' image + result = self.vm.qmp('block-stream', device='node7', base=self.imgs[5], job_id='stream-node6-v3') + self.assert_qmp(result, 'error/class', 'GenericError') + + # This fails because block-commit currently blocks the active layer even if it's not used + result = self.vm.qmp('block-stream', device='drive0', base=self.imgs[5], job_id='stream-drive0') + self.assert_qmp(result, 'error/class', 'GenericError') + + self.wait_until_completed(drive='commit-node3') + + # Similar to test_overlapping_2, but here block-commit doesn't use the 'top' parameter. + # Internally this uses a mirror block job, hence the separate test case. + def test_overlapping_3(self): + self.assertLessEqual(8, self.num_imgs) + self.assert_no_active_block_jobs() + + # Set a speed limit to make sure that this job blocks the rest + result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[3], job_id='commit-drive0', speed=1024*1024) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-stream', device='node5', base=self.imgs[3], job_id='stream-node6') + self.assert_qmp(result, 'error/class', 'GenericError') + + event = self.vm.get_qmp_event(wait=True) + self.assertEqual(event['event'], 'BLOCK_JOB_READY') + self.assert_qmp(event, 'data/device', 'commit-drive0') + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp_absent(event, 'data/error') + + result = self.vm.qmp('block-job-complete', device='commit-drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_until_completed(drive='commit-drive0') + self.assert_no_active_block_jobs() + class TestSmallerBackingFile(iotests.QMPTestCase): backing_len = 1 * 1024 * 1024 # MB image_len = 2 * backing_len diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index b6f257674e..4176bb9402 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -................ +................... ---------------------------------------------------------------------- -Ran 16 tests +Ran 19 tests OK From 704d59f13d5b0e02674f18fa32f8c44ca18abea3 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:16 +0300 Subject: [PATCH 20/29] qemu-iotests: Test block-stream and block-commit in parallel As with test_stream_parallel(), we allow mixing block-stream and block-commit operations in the same backing chain as long as there's no overlap among the involved nodes. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 30 ++++++++++++++++++++++++++++++ tests/qemu-iotests/030.out | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 3533495137..83aeaa694d 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -315,6 +315,36 @@ class TestParallelOps(iotests.QMPTestCase): self.assert_qmp(result, 'return', {}) self.wait_until_completed(drive='commit-drive0') + + # Test a block-stream and a block-commit job in parallel + def test_stream_commit(self): + self.assertLessEqual(8, self.num_imgs) + self.assert_no_active_block_jobs() + + # Stream from node0 into node2 + result = self.vm.qmp('block-stream', device='node2', job_id='node2') + self.assert_qmp(result, 'return', {}) + + # Commit from the active layer into node3 + result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[3]) + self.assert_qmp(result, 'return', {}) + + # Wait for all jobs to be finished. + pending_jobs = ['node2', 'drive0'] + while len(pending_jobs) > 0: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + node_name = self.dictpath(event, 'data/device') + self.assertTrue(node_name in pending_jobs) + self.assert_qmp_absent(event, 'data/error') + pending_jobs.remove(node_name) + if event['event'] == 'BLOCK_JOB_READY': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp_absent(event, 'data/error') + self.assertTrue('drive0' in pending_jobs) + self.vm.qmp('block-job-complete', device='drive0') + self.assert_no_active_block_jobs() class TestSmallerBackingFile(iotests.QMPTestCase): diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 4176bb9402..3a89159833 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -................... +.................... ---------------------------------------------------------------------- -Ran 19 tests +Ran 20 tests OK From b0f904950c84c8de24ed0be75d5d48e2f7ff1e73 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:17 +0300 Subject: [PATCH 21/29] qemu-iotests: Add iotests.supports_quorum() There's many tests that need Quorum support in order to run. At the moment each test implements its own check to see if Quorum is enabled. This patch centralizes all those checks in a new function called iotests.supports_quorum(). Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 27 ++++++++++++--------------- tests/qemu-iotests/139 | 3 ++- tests/qemu-iotests/iotests.py | 5 ++++- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 30e628f0f7..bc6cf782fe 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -760,9 +760,6 @@ class TestRepairQuorum(iotests.QMPTestCase): image_len = 1 * 1024 * 1024 # MB IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ] - def has_quorum(self): - return 'quorum' in iotests.qemu_img_pipe('--help') - def setUp(self): self.vm = iotests.VM() @@ -783,7 +780,7 @@ class TestRepairQuorum(iotests.QMPTestCase): #assemble the quorum block device from the individual files args = { "driver": "quorum", "node-name": "quorum0", "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } - if self.has_quorum(): + if iotests.supports_quorum(): result = self.vm.qmp("blockdev-add", **args) self.assert_qmp(result, 'return', {}) @@ -798,7 +795,7 @@ class TestRepairQuorum(iotests.QMPTestCase): pass def test_complete(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return self.assert_no_active_block_jobs() @@ -817,7 +814,7 @@ class TestRepairQuorum(iotests.QMPTestCase): 'target image does not match source after mirroring') def test_cancel(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return self.assert_no_active_block_jobs() @@ -834,7 +831,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.vm.shutdown() def test_cancel_after_ready(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return self.assert_no_active_block_jobs() @@ -853,7 +850,7 @@ class TestRepairQuorum(iotests.QMPTestCase): 'target image does not match source after mirroring') def test_pause(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return self.assert_no_active_block_jobs() @@ -883,7 +880,7 @@ class TestRepairQuorum(iotests.QMPTestCase): 'target image does not match source after mirroring') def test_medium_not_found(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return if iotests.qemu_default_machine != 'pc': @@ -897,7 +894,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') def test_image_not_found(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', @@ -907,7 +904,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') def test_device_not_found(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return result = self.vm.qmp('drive-mirror', job_id='job0', @@ -918,7 +915,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') def test_wrong_sync_mode(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0', @@ -928,7 +925,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') def test_no_node_name(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', @@ -937,7 +934,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') def test_nonexistent_replaces(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0', @@ -946,7 +943,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_qmp(result, 'error/class', 'GenericError') def test_after_a_quorum_snapshot(self): - if not self.has_quorum(): + if not iotests.supports_quorum(): return result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1', diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139 index 6a0f6ca569..6d98e4f879 100644 --- a/tests/qemu-iotests/139 +++ b/tests/qemu-iotests/139 @@ -336,8 +336,9 @@ class TestBlockdevDel(iotests.QMPTestCase): self.checkBlockDriverState('node1', False) def testQuorum(self): - if not 'quorum' in iotests.qemu_img_pipe('--help'): + if not iotests.supports_quorum(): return + self.addQuorum('quorum0', 'node0', 'node1') # We cannot remove the children of a Quorum device self.delBlockDriverState('node0', expect_error = True) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 1f30cfcc75..bec8eb4b8d 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -348,9 +348,12 @@ def verify_platform(supported_oses=['linux']): if True not in [sys.platform.startswith(x) for x in supported_oses]: notrun('not suitable for this OS: %s' % sys.platform) +def supports_quorum(): + return 'quorum' in qemu_img_pipe('--help') + def verify_quorum(): '''Skip test suite if quorum support is not available''' - if 'quorum' not in qemu_img_pipe('--help'): + if not supports_quorum(): notrun('quorum support missing') def main(supported_fmts=[], supported_oses=['linux']): From 48361afba902ab90a9f667a94f863bafd1417d2e Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:18 +0300 Subject: [PATCH 22/29] qemu-iotests: Test streaming to a Quorum child Quorum children are special in the sense that they're not directly attached to a block backend but they're not used as backing images either. However the intermediate block streaming code supports streaming to them. This is a test case for that scenario. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 56 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/030.out | 4 +-- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 83aeaa694d..783a1b80c2 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -347,6 +347,62 @@ class TestParallelOps(iotests.QMPTestCase): self.assert_no_active_block_jobs() +class TestQuorum(iotests.QMPTestCase): + num_children = 3 + children = [] + backing = [] + + def setUp(self): + opts = ['driver=quorum', 'vote-threshold=2'] + + # Initialize file names and command-line options + for i in range(self.num_children): + child_img = os.path.join(iotests.test_dir, 'img-%d.img' % i) + backing_img = os.path.join(iotests.test_dir, 'backing-%d.img' % i) + self.children.append(child_img) + self.backing.append(backing_img) + qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M') + qemu_io('-f', iotests.imgfmt, + '-c', 'write -P 0x55 0 1024', backing_img) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=%s' % backing_img, child_img) + opts.append("children.%d.file.filename=%s" % (i, child_img)) + opts.append("children.%d.node-name=node%d" % (i, i)) + + # Attach the drive to the VM + self.vm = iotests.VM() + self.vm.add_drive(path = None, opts = ','.join(opts)) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + for img in self.children: + os.remove(img) + for img in self.backing: + os.remove(img) + + def test_stream_quorum(self): + if not iotests.supports_quorum(): + return + + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]), + qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]), + 'image file map matches backing file before streaming') + + self.assert_no_active_block_jobs() + + result = self.vm.qmp('block-stream', device='node0', job_id='stream-node0') + self.assert_qmp(result, 'return', {}) + + self.wait_until_completed(drive='stream-node0') + + self.assert_no_active_block_jobs() + self.vm.shutdown() + + self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]), + qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]), + 'image file map does not match backing file after streaming') + class TestSmallerBackingFile(iotests.QMPTestCase): backing_len = 1 * 1024 * 1024 # MB image_len = 2 * backing_len diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 3a89159833..c6a10f870d 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -.................... +..................... ---------------------------------------------------------------------- -Ran 20 tests +Ran 21 tests OK From 312fe09cc8af86c25a0c17162539589121e7d9a9 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:19 +0300 Subject: [PATCH 23/29] block: Add 'base-node' parameter to the 'block-stream' command The way to specify the node from which to copy data in the block-stream operation is by using the 'base' parameter. This parameter however takes a file name, not a node name. Since we want to be able to perform this operation using only node names, this patch adds a new 'base-node' parameter. Signed-off-by: Alberto Garcia Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf --- blockdev.c | 21 +++++++++++++++++++++ docs/qmp-commands.txt | 7 +++++-- hmp.c | 2 +- qapi/block-core.json | 8 ++++++-- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/blockdev.c b/blockdev.c index b5d4d6919f..ded13268f7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2932,6 +2932,7 @@ static void block_job_cb(void *opaque, int ret) void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, bool has_base, const char *base, + bool has_base_node, const char *base_node, bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, bool has_on_error, BlockdevOnError on_error, @@ -2955,6 +2956,12 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); + if (has_base && has_base_node) { + error_setg(errp, "'base' and 'base-node' cannot be specified " + "at the same time"); + goto out; + } + if (has_base) { base_bs = bdrv_find_backing_image(bs, base); if (base_bs == NULL) { @@ -2965,6 +2972,20 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device, base_name = base; } + if (has_base_node) { + base_bs = bdrv_lookup_bs(NULL, base_node, errp); + if (!base_bs) { + goto out; + } + if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) { + error_setg(errp, "Node '%s' is not a backing image of '%s'", + base_node, device); + goto out; + } + assert(bdrv_get_aio_context(base_bs) == aio_context); + base_name = base_bs->filename; + } + /* Check for op blockers in the whole chain between bs and base */ for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) { if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) { diff --git a/docs/qmp-commands.txt b/docs/qmp-commands.txt index a4732a570c..6afa87298d 100644 --- a/docs/qmp-commands.txt +++ b/docs/qmp-commands.txt @@ -750,8 +750,11 @@ Arguments: - "job-id": Identifier for the newly-created block job. If omitted, the device name will be used. (json-string, optional) - "device": The device name or node-name of a root node (json-string) -- "base": The file name of the backing image above which copying starts - (json-string, optional) +- "base": The file name of the backing image above which copying starts. + It cannot be set if 'base-node' is also set (json-string, optional) +- "base-node": the node name of the backing image above which copying starts. + It cannot be set if 'base' is also set. + (json-string, optional) (Since 2.8) - "backing-file": The backing file string to write into the active layer. This filename is not validated. diff --git a/hmp.c b/hmp.c index 00af4230bf..b5e3f54534 100644 --- a/hmp.c +++ b/hmp.c @@ -1571,7 +1571,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) int64_t speed = qdict_get_try_int(qdict, "speed", 0); qmp_block_stream(false, NULL, device, base != NULL, base, false, NULL, - qdict_haskey(qdict, "speed"), speed, + false, NULL, qdict_haskey(qdict, "speed"), speed, true, BLOCKDEV_ON_ERROR_REPORT, &error); hmp_handle_error(mon, &error); diff --git a/qapi/block-core.json b/qapi/block-core.json index d7a029cfd5..3592a9d7ad 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1484,7 +1484,11 @@ # # @device: the device or node name of the top image # -# @base: #optional the common backing file name +# @base: #optional the common backing file name. +# It cannot be set if @base-node is also set. +# +# @base-node: #optional the node name of the backing file. +# It cannot be set if @base is also set. (Since 2.8) # # @backing-file: #optional The backing file string to write into the top # image. This filename is not validated. @@ -1511,7 +1515,7 @@ ## { 'command': 'block-stream', 'data': { '*job-id': 'str', 'device': 'str', '*base': 'str', - '*backing-file': 'str', '*speed': 'int', + '*base-node': 'str', '*backing-file': 'str', '*speed': 'int', '*on-error': 'BlockdevOnError' } } ## From 7eb13c9daac7e02bcd4dc18ad4297a93ae34d235 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Fri, 28 Oct 2016 10:08:20 +0300 Subject: [PATCH 24/29] qemu-iotests: Test the 'base-node' parameter of 'block-stream' The block-stream command has traditionally used the 'base' parameter to indicate the image to copy the data from. This test checks that the 'base-node' parameter can also be used for the same purpose. Signed-off-by: Alberto Garcia Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 37 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/030.out | 4 ++-- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 783a1b80c2..54db54a1ea 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -347,6 +347,43 @@ class TestParallelOps(iotests.QMPTestCase): self.assert_no_active_block_jobs() + # Test the base_node parameter + def test_stream_base_node_name(self): + self.assert_no_active_block_jobs() + + self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]), + qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]), + 'image file map matches backing file before streaming') + + # Error: the base node does not exist + result = self.vm.qmp('block-stream', device='node4', base_node='none', job_id='stream') + self.assert_qmp(result, 'error/class', 'GenericError') + + # Error: the base node is not a backing file of the top node + result = self.vm.qmp('block-stream', device='node4', base_node='node6', job_id='stream') + self.assert_qmp(result, 'error/class', 'GenericError') + + # Error: the base node is the same as the top node + result = self.vm.qmp('block-stream', device='node4', base_node='node4', job_id='stream') + self.assert_qmp(result, 'error/class', 'GenericError') + + # Error: cannot specify 'base' and 'base-node' at the same time + result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], base_node='node2', job_id='stream') + self.assert_qmp(result, 'error/class', 'GenericError') + + # Success: the base node is a backing file of the top node + result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='stream') + self.assert_qmp(result, 'return', {}) + + self.wait_until_completed(drive='stream') + + self.assert_no_active_block_jobs() + self.vm.shutdown() + + self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]), + qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]), + 'image file map matches backing file after streaming') + class TestQuorum(iotests.QMPTestCase): num_children = 3 children = [] diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index c6a10f870d..84bfd63fba 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -..................... +...................... ---------------------------------------------------------------------- -Ran 21 tests +Ran 22 tests OK From 2fdc70452a59484c85359dc8d5a0650245288781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Golembiovsk=C3=BD?= Date: Mon, 31 Oct 2016 11:27:40 +0100 Subject: [PATCH 25/29] raw_bsd: add offset and size options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added two new options 'offset' and 'size'. This makes it possible to use only part of the file as a device. This can be used e.g. to limit the access only to single partition in a disk image or use a disk inside a tar archive (like OVA). When 'size' is specified we do our best to honour it. Signed-off-by: Tomáš Golembiovský Signed-off-by: Kevin Wolf --- block/raw_bsd.c | 217 ++++++++++++++++++++++++++++++++++++++++++- qapi/block-core.json | 16 +++- 2 files changed, 229 insertions(+), 4 deletions(-) diff --git a/block/raw_bsd.c b/block/raw_bsd.c index fc16ec1f74..7c9bebb507 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -31,6 +31,30 @@ #include "qapi/error.h" #include "qemu/option.h" +typedef struct BDRVRawState { + uint64_t offset; + uint64_t size; + bool has_size; +} BDRVRawState; + +static QemuOptsList raw_runtime_opts = { + .name = "raw", + .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), + .desc = { + { + .name = "offset", + .type = QEMU_OPT_SIZE, + .help = "offset in the disk where the image starts", + }, + { + .name = "size", + .type = QEMU_OPT_SIZE, + .help = "virtual disk size", + }, + { /* end of list */ } + }, +}; + static QemuOptsList raw_create_opts = { .name = "raw-create-opts", .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), @@ -44,16 +68,108 @@ static QemuOptsList raw_create_opts = { } }; +static int raw_read_options(QDict *options, BlockDriverState *bs, + BDRVRawState *s, Error **errp) +{ + Error *local_err = NULL; + QemuOpts *opts = NULL; + int64_t real_size = 0; + int ret; + + real_size = bdrv_getlength(bs->file->bs); + if (real_size < 0) { + error_setg_errno(errp, -real_size, "Could not get image size"); + return real_size; + } + + opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto end; + } + + s->offset = qemu_opt_get_size(opts, "offset", 0); + if (qemu_opt_find(opts, "size") != NULL) { + s->size = qemu_opt_get_size(opts, "size", 0); + s->has_size = true; + } else { + s->has_size = false; + s->size = real_size - s->offset; + } + + /* Check size and offset */ + if (real_size < s->offset || (real_size - s->offset) < s->size) { + error_setg(errp, "The sum of offset (%" PRIu64 ") and size " + "(%" PRIu64 ") has to be smaller or equal to the " + " actual size of the containing file (%" PRId64 ")", + s->offset, s->size, real_size); + ret = -EINVAL; + goto end; + } + + /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding + * up and leaking out of the specified area. */ + if (!QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) { + error_setg(errp, "Specified size is not multiple of %llu", + BDRV_SECTOR_SIZE); + ret = -EINVAL; + goto end; + } + + ret = 0; + +end: + + qemu_opts_del(opts); + + return ret; +} + static int raw_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp) { - return 0; + assert(reopen_state != NULL); + assert(reopen_state->bs != NULL); + + reopen_state->opaque = g_new0(BDRVRawState, 1); + + return raw_read_options( + reopen_state->options, + reopen_state->bs, + reopen_state->opaque, + errp); +} + +static void raw_reopen_commit(BDRVReopenState *state) +{ + BDRVRawState *new_s = state->opaque; + BDRVRawState *s = state->bs->opaque; + + memcpy(s, new_s, sizeof(BDRVRawState)); + + g_free(state->opaque); + state->opaque = NULL; +} + +static void raw_reopen_abort(BDRVReopenState *state) +{ + g_free(state->opaque); + state->opaque = NULL; } static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { + BDRVRawState *s = bs->opaque; + + if (offset > UINT64_MAX - s->offset) { + return -EINVAL; + } + offset += s->offset; + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); } @@ -62,11 +178,23 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags) { + BDRVRawState *s = bs->opaque; void *buf = NULL; BlockDriver *drv; QEMUIOVector local_qiov; int ret; + if (s->has_size && (offset > s->size || bytes > (s->size - offset))) { + /* There's not enough space for the data. Don't write anything and just + * fail to prevent leaking out of the size specified in options. */ + return -ENOSPC; + } + + if (offset > UINT64_MAX - s->offset) { + ret = -EINVAL; + goto fail; + } + if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { /* Handling partial writes would be a pain - so we just * require that guests have 512-byte request alignment if @@ -101,6 +229,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, qiov = &local_qiov; } + offset += s->offset; + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); @@ -117,8 +247,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, int nb_sectors, int *pnum, BlockDriverState **file) { + BDRVRawState *s = bs->opaque; *pnum = nb_sectors; *file = bs->file->bs; + sector_num += s->offset / BDRV_SECTOR_SIZE; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | (sector_num << BDRV_SECTOR_BITS); } @@ -127,18 +259,49 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int count, BdrvRequestFlags flags) { + BDRVRawState *s = bs->opaque; + if (offset > UINT64_MAX - s->offset) { + return -EINVAL; + } + offset += s->offset; return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); } static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) { + BDRVRawState *s = bs->opaque; + if (offset > UINT64_MAX - s->offset) { + return -EINVAL; + } + offset += s->offset; return bdrv_co_pdiscard(bs->file->bs, offset, count); } static int64_t raw_getlength(BlockDriverState *bs) { - return bdrv_getlength(bs->file->bs); + int64_t len; + BDRVRawState *s = bs->opaque; + + /* Update size. It should not change unless the file was externally + * modified. */ + len = bdrv_getlength(bs->file->bs); + if (len < 0) { + return len; + } + + if (len < s->offset) { + s->size = 0; + } else { + if (s->has_size) { + /* Try to honour the size */ + s->size = MIN(s->size, len - s->offset); + } else { + s->size = len - s->offset; + } + } + + return s->size; } static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -158,6 +321,18 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) static int raw_truncate(BlockDriverState *bs, int64_t offset) { + BDRVRawState *s = bs->opaque; + + if (s->has_size) { + return -ENOTSUP; + } + + if (INT64_MAX - offset < s->offset) { + return -EINVAL; + } + + s->size = offset; + offset += s->offset; return bdrv_truncate(bs->file->bs, offset); } @@ -178,6 +353,10 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked) static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { + BDRVRawState *s = bs->opaque; + if (s->offset || s->has_size) { + return -ENOTSUP; + } return bdrv_co_ioctl(bs->file->bs, req, buf); } @@ -194,6 +373,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { + BDRVRawState *s = bs->opaque; + int ret; + bs->sg = bs->file->bs->sg; bs->supported_write_flags = BDRV_REQ_FUA & bs->file->bs->supported_write_flags; @@ -211,6 +393,16 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, bs->file->bs->filename); } + ret = raw_read_options(options, bs, s, errp); + if (ret < 0) { + return ret; + } + + if (bs->sg && (s->offset || s->has_size)) { + error_setg(errp, "Cannot use offset/size with SCSI generic devices"); + return -EINVAL; + } + return 0; } @@ -228,18 +420,37 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) { - return bdrv_probe_blocksizes(bs->file->bs, bsz); + BDRVRawState *s = bs->opaque; + int ret; + + ret = bdrv_probe_blocksizes(bs->file->bs, bsz); + if (ret < 0) { + return ret; + } + + if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) { + return -ENOTSUP; + } + + return 0; } static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { + BDRVRawState *s = bs->opaque; + if (s->offset || s->has_size) { + return -ENOTSUP; + } return bdrv_probe_geometry(bs->file->bs, geo); } BlockDriver bdrv_raw = { .format_name = "raw", + .instance_size = sizeof(BDRVRawState), .bdrv_probe = &raw_probe, .bdrv_reopen_prepare = &raw_reopen_prepare, + .bdrv_reopen_commit = &raw_reopen_commit, + .bdrv_reopen_abort = &raw_reopen_abort, .bdrv_open = &raw_open, .bdrv_close = &raw_close, .bdrv_create = &raw_create, diff --git a/qapi/block-core.json b/qapi/block-core.json index 3592a9d7ad..df3b9878f8 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2269,6 +2269,20 @@ '*export': 'str', '*tls-creds': 'str' } } +## +# @BlockdevOptionsRaw +# +# Driver specific block device options for the raw driver. +# +# @offset: #optional position where the block device starts +# @size: #optional the assumed size of the device +# +# Since: 2.8 +## +{ 'struct': 'BlockdevOptionsRaw', + 'base': 'BlockdevOptionsGenericFormat', + 'data': { '*offset': 'int', '*size': 'int' } } + ## # @BlockdevOptions # @@ -2323,7 +2337,7 @@ 'qcow': 'BlockdevOptionsGenericCOWFormat', 'qed': 'BlockdevOptionsGenericCOWFormat', 'quorum': 'BlockdevOptionsQuorum', - 'raw': 'BlockdevOptionsGenericFormat', + 'raw': 'BlockdevOptionsRaw', # TODO rbd: Wait for structured options 'replication':'BlockdevOptionsReplication', # TODO sheepdog: Wait for structured options From ccc47808fd1cf3984860c60e81a8496f27dbb8a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Golembiovsk=C3=BD?= Date: Mon, 31 Oct 2016 11:27:41 +0100 Subject: [PATCH 26/29] qemu-iotests: test 'offset' and 'size' options in raw driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tomáš Golembiovský Signed-off-by: Kevin Wolf --- tests/qemu-iotests/171 | 212 +++++++++++++++++++++++++ tests/qemu-iotests/171.out | 313 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 526 insertions(+) create mode 100755 tests/qemu-iotests/171 create mode 100644 tests/qemu-iotests/171.out diff --git a/tests/qemu-iotests/171 b/tests/qemu-iotests/171 new file mode 100755 index 0000000000..257be10a0e --- /dev/null +++ b/tests/qemu-iotests/171 @@ -0,0 +1,212 @@ +#!/bin/bash +# +# Test 'offset' and 'size' options of the raw driver. Make sure we can't +# (or can) read and write outside of the image size. +# +# Copyright (C) 2016 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=tgolembi@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt raw +_supported_proto file +_supported_os Linux + + +# Create JSON with options +img_json() { + echo -n 'json:{"driver":"raw", ' + echo -n "\"offset\":\"$img_offset\", " + if [ "$img_size" -ne -1 ] ; then + echo -n "\"size\":\"$img_size\", " + fi + echo -n '"file": {' + echo -n '"driver":"file", ' + echo -n "\"filename\":\"$TEST_IMG\" " + echo -n "} }" +} + +do_general_test() { + if [ "$img_size" -ge 0 ] ; then + test_size=$img_size + else + test_size=$((size-img_offset)) + fi + + echo + echo "write to image" + $QEMU_IO -c "write -P 0x0a 0 $test_size" "$(img_json)" | _filter_qemu_io + + echo + echo "read the image" + $QEMU_IO -c "read -P 0x0a 0 $test_size" "$(img_json)" | _filter_qemu_io + + echo + echo "check that offset is respected" + $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io + + echo + echo "write before image boundary" + $QEMU_IO -c "write $((test_size-1)) 1" "$(img_json)" | _filter_qemu_io + + echo + echo "write across image boundary" + $QEMU_IO -c "write $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io + + echo + echo "write at image boundary" + $QEMU_IO -c "write $test_size 1" "$(img_json)" | _filter_qemu_io + + echo + echo "write after image boundary" + $QEMU_IO -c "write $((test_size+512)) 1" "$(img_json)" | _filter_qemu_io + + echo + echo "writev before/after image boundary" + $QEMU_IO -c "writev $((test_size-512)) 512 512" "$(img_json)" | _filter_qemu_io + + echo + echo "read before image boundary" + $QEMU_IO -c "read $((test_size-1)) 1" "$(img_json)" | _filter_qemu_io + + echo + echo "read across image boundary" + $QEMU_IO -c "read $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io + + echo + echo "read at image boundary" + $QEMU_IO -c "read $test_size 1" "$(img_json)" | _filter_qemu_io + + echo + echo "read after image boundary" + $QEMU_IO -c "read $((test_size+512)) 1" "$(img_json)" | _filter_qemu_io + + echo + echo "readv before/after image boundary" + $QEMU_IO -c "readv $((test_size-512)) 512 512" "$(img_json)" | _filter_qemu_io + + echo + echo "fill image with pattern" + $QEMU_IO -c "write -P 0x0a 0 $size" $TEST_IMG | _filter_qemu_io + + echo + echo "write zeroes and check" + $QEMU_IO -c "write -z 0 512" "$(img_json)" | _filter_qemu_io + $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io + + echo + echo "write zeroes across image boundary" + $QEMU_IO -c "write -z $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io + + echo + echo "write zeroes at image boundary and check" + $QEMU_IO -c "write -z $((test_size-2)) 2" "$(img_json)" | _filter_qemu_io + $QEMU_IO -c "read -v $((img_offset+test_size-2)) 2" $TEST_IMG | _filter_qemu_io + $QEMU_IO -c "read -v $((img_offset+test_size)) 2" $TEST_IMG | _filter_qemu_io + + echo + echo "fill image with pattern" + $QEMU_IO -c "write -P 0x0a 0 $size" $TEST_IMG | _filter_qemu_io + + echo + echo "discard and check" + $QEMU_IO -c "discard 0 512" "$(img_json)" | _filter_qemu_io + $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io + + echo + echo "discard across image boundary" + $QEMU_IO -c "discard $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io + + echo + echo "discard at image boundary and check" + $QEMU_IO -c "discard $((test_size-2)) 2" "$(img_json)" | _filter_qemu_io + $QEMU_IO -c "read -v $((img_offset+test_size-2)) 2" $TEST_IMG | _filter_qemu_io + $QEMU_IO -c "read -v $((img_offset+test_size)) 2" $TEST_IMG | _filter_qemu_io +} + +echo +echo "== test 'offset' option ==" +size=4096 +img_offset=512 +img_size=-1 +_make_test_img $size +do_general_test +_cleanup_test_img + +echo +echo "== test 'offset' and 'size' options ==" +size=4096 +img_offset=512 +img_size=2048 +_make_test_img $size +do_general_test +_cleanup_test_img + +echo +echo "== test misaligned 'offset' ==" +size=4096 +img_offset=10 +img_size=2048 +_make_test_img $size +do_general_test +_cleanup_test_img + +echo +echo "== test reopen ==" +size=4096 +img_offset=512 +img_size=512 +_make_test_img $size +( +$QEMU_IO "$(img_json)" < Date: Thu, 27 Oct 2016 15:23:33 -0500 Subject: [PATCH 27/29] block: Mention replication in BlockdevDriver enum docs Missed in commit 82ac554. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- qapi/block-core.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index df3b9878f8..7299da33be 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1714,7 +1714,7 @@ # # @host_device, @host_cdrom: Since 2.1 # @gluster: Since 2.7 -# @nbd, @ssh: Since 2.8 +# @nbd, @replication, @ssh: Since 2.8 # # Since: 2.0 ## From 94d6a7a76e9df9919629428f6c598e2b97d9426c Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Mon, 31 Oct 2016 20:35:49 +0530 Subject: [PATCH 28/29] block/nfs: Introduce runtime_opts in NFS Make NFS block driver use various fine grained runtime_opts. Set .bdrv_parse_filename() to nfs_parse_filename() and introduce two new functions nfs_parse_filename() and nfs_parse_uri() to help parsing the URI. Add a new option "server" which then accepts a new struct NFSServer. Signed-off-by: Ashijeet Acharya [ kwolf: Fixed client->path ] Signed-off-by: Kevin Wolf --- block/nfs.c | 462 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 359 insertions(+), 103 deletions(-) diff --git a/block/nfs.c b/block/nfs.c index 88c60a9118..55c4e0b073 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -35,8 +35,15 @@ #include "qemu/uri.h" #include "qemu/cutils.h" #include "sysemu/sysemu.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qint.h" +#include "qapi/qmp/qstring.h" +#include "qapi-visit.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qobject-output-visitor.h" #include + #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE) #define QEMU_NFS_MAX_DEBUG_LEVEL 2 @@ -49,6 +56,9 @@ typedef struct NFSClient { AioContext *aio_context; blkcnt_t st_blocks; bool cache_used; + NFSServer *server; + char *path; + int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug; } NFSClient; typedef struct NFSRPC { @@ -61,6 +71,122 @@ typedef struct NFSRPC { NFSClient *client; } NFSRPC; +static int nfs_parse_uri(const char *filename, QDict *options, Error **errp) +{ + URI *uri = NULL; + QueryParams *qp = NULL; + int ret = -EINVAL, i; + + uri = uri_parse(filename); + if (!uri) { + error_setg(errp, "Invalid URI specified"); + goto out; + } + if (strcmp(uri->scheme, "nfs") != 0) { + error_setg(errp, "URI scheme must be 'nfs'"); + goto out; + } + + if (!uri->server) { + error_setg(errp, "missing hostname in URI"); + goto out; + } + + if (!uri->path) { + error_setg(errp, "missing file path in URI"); + goto out; + } + + qp = query_params_parse(uri->query); + if (!qp) { + error_setg(errp, "could not parse query parameters"); + goto out; + } + + qdict_put(options, "server.host", qstring_from_str(uri->server)); + qdict_put(options, "server.type", qstring_from_str("inet")); + qdict_put(options, "path", qstring_from_str(uri->path)); + + for (i = 0; i < qp->n; i++) { + if (!qp->p[i].value) { + error_setg(errp, "Value for NFS parameter expected: %s", + qp->p[i].name); + goto out; + } + if (parse_uint_full(qp->p[i].value, NULL, 0)) { + error_setg(errp, "Illegal value for NFS parameter: %s", + qp->p[i].name); + goto out; + } + if (!strcmp(qp->p[i].name, "uid")) { + qdict_put(options, "user", + qstring_from_str(qp->p[i].value)); + } else if (!strcmp(qp->p[i].name, "gid")) { + qdict_put(options, "group", + qstring_from_str(qp->p[i].value)); + } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) { + qdict_put(options, "tcp-syn-count", + qstring_from_str(qp->p[i].value)); + } else if (!strcmp(qp->p[i].name, "readahead")) { + qdict_put(options, "readahead-size", + qstring_from_str(qp->p[i].value)); + } else if (!strcmp(qp->p[i].name, "pagecache")) { + qdict_put(options, "page-cache-size", + qstring_from_str(qp->p[i].value)); + } else if (!strcmp(qp->p[i].name, "debug")) { + qdict_put(options, "debug-level", + qstring_from_str(qp->p[i].value)); + } else { + error_setg(errp, "Unknown NFS parameter name: %s", + qp->p[i].name); + goto out; + } + } + ret = 0; +out: + if (qp) { + query_params_free(qp); + } + if (uri) { + uri_free(uri); + } + return ret; +} + +static bool nfs_has_filename_options_conflict(QDict *options, Error **errp) +{ + const QDictEntry *qe; + + for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) { + if (!strcmp(qe->key, "host") || + !strcmp(qe->key, "path") || + !strcmp(qe->key, "user") || + !strcmp(qe->key, "group") || + !strcmp(qe->key, "tcp-syn-count") || + !strcmp(qe->key, "readahead-size") || + !strcmp(qe->key, "page-cache-size") || + !strcmp(qe->key, "debug-level") || + strstart(qe->key, "server.", NULL)) + { + error_setg(errp, "Option %s cannot be used with a filename", + qe->key); + return true; + } + } + + return false; +} + +static void nfs_parse_filename(const char *filename, QDict *options, + Error **errp) +{ + if (nfs_has_filename_options_conflict(options, errp)) { + return; + } + + nfs_parse_uri(filename, options, errp); +} + static void nfs_process_read(void *arg); static void nfs_process_write(void *arg); @@ -221,15 +347,44 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs) return task.ret; } -/* TODO Convert to fine grained options */ static QemuOptsList runtime_opts = { .name = "nfs", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { - .name = "filename", + .name = "path", .type = QEMU_OPT_STRING, - .help = "URL to the NFS file", + .help = "Path of the image on the host", + }, + { + .name = "uid", + .type = QEMU_OPT_NUMBER, + .help = "UID value to use when talking to the server", + }, + { + .name = "gid", + .type = QEMU_OPT_NUMBER, + .help = "GID value to use when talking to the server", + }, + { + .name = "tcp-syncnt", + .type = QEMU_OPT_NUMBER, + .help = "Number of SYNs to send during the session establish", + }, + { + .name = "readahead", + .type = QEMU_OPT_NUMBER, + .help = "Set the readahead size in bytes", + }, + { + .name = "pagecache", + .type = QEMU_OPT_NUMBER, + .help = "Set the pagecache size in bytes", + }, + { + .name = "debug", + .type = QEMU_OPT_NUMBER, + .help = "Set the NFS debug level (max 2)", }, { /* end of list */ } }, @@ -272,25 +427,65 @@ static void nfs_file_close(BlockDriverState *bs) nfs_client_close(client); } -static int64_t nfs_client_open(NFSClient *client, const char *filename, +static NFSServer *nfs_config(QDict *options, Error **errp) +{ + NFSServer *server = NULL; + QDict *addr = NULL; + QObject *crumpled_addr = NULL; + Visitor *iv = NULL; + Error *local_error = NULL; + + qdict_extract_subqdict(options, &addr, "server."); + if (!qdict_size(addr)) { + error_setg(errp, "NFS server address missing"); + goto out; + } + + crumpled_addr = qdict_crumple(addr, errp); + if (!crumpled_addr) { + goto out; + } + + iv = qobject_input_visitor_new(crumpled_addr, true); + visit_type_NFSServer(iv, NULL, &server, &local_error); + if (local_error) { + error_propagate(errp, local_error); + goto out; + } + +out: + QDECREF(addr); + qobject_decref(crumpled_addr); + visit_free(iv); + return server; +} + + +static int64_t nfs_client_open(NFSClient *client, QDict *options, int flags, Error **errp, int open_flags) { - int ret = -EINVAL, i; + int ret = -EINVAL; + QemuOpts *opts = NULL; + Error *local_err = NULL; struct stat st; - URI *uri; - QueryParams *qp = NULL; char *file = NULL, *strp = NULL; - uri = uri_parse(filename); - if (!uri) { - error_setg(errp, "Invalid URL specified"); + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; goto fail; } - if (!uri->server) { - error_setg(errp, "Invalid URL specified"); + + client->path = g_strdup(qemu_opt_get(opts, "path")); + if (!client->path) { + ret = -EINVAL; + error_setg(errp, "No path was specified"); goto fail; } - strp = strrchr(uri->path, '/'); + + strp = strrchr(client->path, '/'); if (strp == NULL) { error_setg(errp, "Invalid URL specified"); goto fail; @@ -298,85 +493,89 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, file = g_strdup(strp); *strp = 0; + /* Pop the config into our state object, Exit if invalid */ + client->server = nfs_config(options, errp); + if (!client->server) { + ret = -EINVAL; + goto fail; + } + client->context = nfs_init_context(); if (client->context == NULL) { error_setg(errp, "Failed to init NFS context"); goto fail; } - qp = query_params_parse(uri->query); - for (i = 0; i < qp->n; i++) { - unsigned long long val; - if (!qp->p[i].value) { - error_setg(errp, "Value for NFS parameter expected: %s", - qp->p[i].name); - goto fail; - } - if (parse_uint_full(qp->p[i].value, &val, 0)) { - error_setg(errp, "Illegal value for NFS parameter: %s", - qp->p[i].name); - goto fail; - } - if (!strcmp(qp->p[i].name, "uid")) { - nfs_set_uid(client->context, val); - } else if (!strcmp(qp->p[i].name, "gid")) { - nfs_set_gid(client->context, val); - } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) { - nfs_set_tcp_syncnt(client->context, val); -#ifdef LIBNFS_FEATURE_READAHEAD - } else if (!strcmp(qp->p[i].name, "readahead")) { - if (open_flags & BDRV_O_NOCACHE) { - error_setg(errp, "Cannot enable NFS readahead " - "if cache.direct = on"); - goto fail; - } - if (val > QEMU_NFS_MAX_READAHEAD_SIZE) { - error_report("NFS Warning: Truncating NFS readahead" - " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE); - val = QEMU_NFS_MAX_READAHEAD_SIZE; - } - nfs_set_readahead(client->context, val); -#ifdef LIBNFS_FEATURE_PAGECACHE - nfs_set_pagecache_ttl(client->context, 0); -#endif - client->cache_used = true; -#endif -#ifdef LIBNFS_FEATURE_PAGECACHE - nfs_set_pagecache_ttl(client->context, 0); - } else if (!strcmp(qp->p[i].name, "pagecache")) { - if (open_flags & BDRV_O_NOCACHE) { - error_setg(errp, "Cannot enable NFS pagecache " - "if cache.direct = on"); - goto fail; - } - if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) { - error_report("NFS Warning: Truncating NFS pagecache" - " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE); - val = QEMU_NFS_MAX_PAGECACHE_SIZE; - } - nfs_set_pagecache(client->context, val); - nfs_set_pagecache_ttl(client->context, 0); - client->cache_used = true; -#endif -#ifdef LIBNFS_FEATURE_DEBUG - } else if (!strcmp(qp->p[i].name, "debug")) { - /* limit the maximum debug level to avoid potential flooding - * of our log files. */ - if (val > QEMU_NFS_MAX_DEBUG_LEVEL) { - error_report("NFS Warning: Limiting NFS debug level" - " to %d", QEMU_NFS_MAX_DEBUG_LEVEL); - val = QEMU_NFS_MAX_DEBUG_LEVEL; - } - nfs_set_debug(client->context, val); -#endif - } else { - error_setg(errp, "Unknown NFS parameter name: %s", - qp->p[i].name); - goto fail; - } + if (qemu_opt_get(opts, "uid")) { + client->uid = qemu_opt_get_number(opts, "uid", 0); + nfs_set_uid(client->context, client->uid); } - ret = nfs_mount(client->context, uri->server, uri->path); + if (qemu_opt_get(opts, "gid")) { + client->gid = qemu_opt_get_number(opts, "gid", 0); + nfs_set_gid(client->context, client->gid); + } + + if (qemu_opt_get(opts, "tcp-syncnt")) { + client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0); + nfs_set_tcp_syncnt(client->context, client->tcp_syncnt); + } + +#ifdef LIBNFS_FEATURE_READAHEAD + if (qemu_opt_get(opts, "readahead")) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS readahead " + "if cache.direct = on"); + goto fail; + } + client->readahead = qemu_opt_get_number(opts, "readahead", 0); + if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) { + error_report("NFS Warning: Truncating NFS readahead " + "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE); + client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE; + } + nfs_set_readahead(client->context, client->readahead); +#ifdef LIBNFS_FEATURE_PAGECACHE + nfs_set_pagecache_ttl(client->context, 0); +#endif + client->cache_used = true; + } +#endif + +#ifdef LIBNFS_FEATURE_PAGECACHE + if (qemu_opt_get(opts, "pagecache")) { + if (open_flags & BDRV_O_NOCACHE) { + error_setg(errp, "Cannot enable NFS pagecache " + "if cache.direct = on"); + goto fail; + } + client->pagecache = qemu_opt_get_number(opts, "pagecache", 0); + if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) { + error_report("NFS Warning: Truncating NFS pagecache " + "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE); + client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE; + } + nfs_set_pagecache(client->context, client->pagecache); + nfs_set_pagecache_ttl(client->context, 0); + client->cache_used = true; + } +#endif + +#ifdef LIBNFS_FEATURE_DEBUG + if (qemu_opt_get(opts, "debug")) { + client->debug = qemu_opt_get_number(opts, "debug", 0); + /* limit the maximum debug level to avoid potential flooding + * of our log files. */ + if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) { + error_report("NFS Warning: Limiting NFS debug level " + "to %d", QEMU_NFS_MAX_DEBUG_LEVEL); + client->debug = QEMU_NFS_MAX_DEBUG_LEVEL; + } + nfs_set_debug(client->context, client->debug); + } +#endif + + ret = nfs_mount(client->context, client->server->host, client->path); if (ret < 0) { error_setg(errp, "Failed to mount nfs share: %s", nfs_get_error(client->context)); @@ -409,14 +608,13 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE); client->st_blocks = st.st_blocks; client->has_zero_init = S_ISREG(st.st_mode); + *strp = '/'; goto out; + fail: nfs_client_close(client); out: - if (qp) { - query_params_free(qp); - } - uri_free(uri); + qemu_opts_del(opts); g_free(file); return ret; } @@ -425,28 +623,17 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { NFSClient *client = bs->opaque; int64_t ret; - QemuOpts *opts; - Error *local_err = NULL; client->aio_context = bdrv_get_aio_context(bs); - opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto out; - } - ret = nfs_client_open(client, qemu_opt_get(opts, "filename"), + ret = nfs_client_open(client, options, (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY, errp, bs->open_flags); if (ret < 0) { - goto out; + return ret; } bs->total_sectors = ret; ret = 0; -out: - qemu_opts_del(opts); return ret; } @@ -468,6 +655,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) int ret = 0; int64_t total_size = 0; NFSClient *client = g_new0(NFSClient, 1); + QDict *options = NULL; client->aio_context = qemu_get_aio_context(); @@ -475,7 +663,13 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp) total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); - ret = nfs_client_open(client, url, O_CREAT, errp, 0); + options = qdict_new(); + ret = nfs_parse_uri(url, options, errp); + if (ret < 0) { + goto out; + } + + ret = nfs_client_open(client, options, O_CREAT, errp, 0); if (ret < 0) { goto out; } @@ -572,6 +766,67 @@ static int nfs_reopen_prepare(BDRVReopenState *state, return 0; } +static void nfs_refresh_filename(BlockDriverState *bs, QDict *options) +{ + NFSClient *client = bs->opaque; + QDict *opts = qdict_new(); + QObject *server_qdict; + Visitor *ov; + + qdict_put(opts, "driver", qstring_from_str("nfs")); + + if (client->uid && !client->gid) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s?uid=%" PRId64, client->server->host, client->path, + client->uid); + } else if (!client->uid && client->gid) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s?gid=%" PRId64, client->server->host, client->path, + client->gid); + } else if (client->uid && client->gid) { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64, + client->server->host, client->path, client->uid, client->gid); + } else { + snprintf(bs->exact_filename, sizeof(bs->exact_filename), + "nfs://%s%s", client->server->host, client->path); + } + + ov = qobject_output_visitor_new(&server_qdict); + visit_type_NFSServer(ov, NULL, &client->server, &error_abort); + visit_complete(ov, &server_qdict); + assert(qobject_type(server_qdict) == QTYPE_QDICT); + + qdict_put_obj(opts, "server", server_qdict); + qdict_put(opts, "path", qstring_from_str(client->path)); + + if (client->uid) { + qdict_put(opts, "uid", qint_from_int(client->uid)); + } + if (client->gid) { + qdict_put(opts, "gid", qint_from_int(client->gid)); + } + if (client->tcp_syncnt) { + qdict_put(opts, "tcp-syncnt", + qint_from_int(client->tcp_syncnt)); + } + if (client->readahead) { + qdict_put(opts, "readahead", + qint_from_int(client->readahead)); + } + if (client->pagecache) { + qdict_put(opts, "pagecache", + qint_from_int(client->pagecache)); + } + if (client->debug) { + qdict_put(opts, "debug", qint_from_int(client->debug)); + } + + visit_free(ov); + qdict_flatten(opts); + bs->full_open_options = opts; +} + #ifdef LIBNFS_FEATURE_PAGECACHE static void nfs_invalidate_cache(BlockDriverState *bs, Error **errp) @@ -586,7 +841,7 @@ static BlockDriver bdrv_nfs = { .protocol_name = "nfs", .instance_size = sizeof(NFSClient), - .bdrv_needs_filename = true, + .bdrv_parse_filename = nfs_parse_filename, .create_opts = &nfs_create_opts, .bdrv_has_zero_init = nfs_has_zero_init, @@ -604,6 +859,7 @@ static BlockDriver bdrv_nfs = { .bdrv_detach_aio_context = nfs_detach_aio_context, .bdrv_attach_aio_context = nfs_attach_aio_context, + .bdrv_refresh_filename = nfs_refresh_filename, #ifdef LIBNFS_FEATURE_PAGECACHE .bdrv_invalidate_cache = nfs_invalidate_cache, From aa2623d817e7ecb62fd917e475ccc0d42dd1a413 Mon Sep 17 00:00:00 2001 From: Ashijeet Acharya Date: Mon, 31 Oct 2016 20:35:50 +0530 Subject: [PATCH 29/29] qapi: allow blockdev-add for NFS Introduce new object 'BlockdevOptionsNFS' in qapi/block-core.json to support blockdev-add for NFS network protocol driver. Also make a new struct NFSServer to support tcp connection. Signed-off-by: Ashijeet Acharya Signed-off-by: Kevin Wolf --- qapi/block-core.json | 74 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 7299da33be..5af040b740 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1714,14 +1714,14 @@ # # @host_device, @host_cdrom: Since 2.1 # @gluster: Since 2.7 -# @nbd, @replication, @ssh: Since 2.8 +# @nbd, @nfs, @replication, @ssh: Since 2.8 # # Since: 2.0 ## { 'enum': 'BlockdevDriver', 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', - 'host_device', 'http', 'https', 'luks', 'nbd', 'null-aio', + 'host_device', 'http', 'https', 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'replication', 'ssh', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] } @@ -2239,6 +2239,74 @@ 'data': { 'mode': 'ReplicationMode', '*top-id': 'str' } } +## +# @NFSTransport +# +# An enumeration of NFS transport types +# +# @inet: TCP transport +# +# Since 2.8 +## +{ 'enum': 'NFSTransport', + 'data': [ 'inet' ] } + +## +# @NFSServer +# +# Captures the address of the socket +# +# @type: transport type used for NFS (only TCP supported) +# +# @host: host address for NFS server +# +# Since 2.8 +## +{ 'struct': 'NFSServer', + 'data': { 'type': 'NFSTransport', + 'host': 'str' } } + +## +# @BlockdevOptionsNfs +# +# Driver specific block device option for NFS +# +# @server: host address +# +# @path: path of the image on the host +# +# @user: #optional UID value to use when talking to the +# server (defaults to 65534 on Windows and getuid() +# on unix) +# +# @group: #optional GID value to use when talking to the +# server (defaults to 65534 on Windows and getgid() +# in unix) +# +# @tcp-syn-count: #optional number of SYNs during the session +# establishment (defaults to libnfs default) +# +# @readahead-size: #optional set the readahead size in bytes (defaults +# to libnfs default) +# +# @page-cache-size: #optional set the pagecache size in bytes (defaults +# to libnfs default) +# +# @debug-level: #optional set the NFS debug level (max 2) (defaults +# to libnfs default) +# +# Since 2.8 +## +{ 'struct': 'BlockdevOptionsNfs', + 'data': { 'server': 'NFSServer', + 'path': 'str', + '*user': 'int', + '*group': 'int', + '*tcp-syn-count': 'int', + '*readahead-size': 'int', + '*page-cache-size': 'int', + '*debug-level': 'int' } } + ## # @BlockdevOptionsCurl # @@ -2329,7 +2397,7 @@ # TODO iscsi: Wait for structured options 'luks': 'BlockdevOptionsLUKS', 'nbd': 'BlockdevOptionsNbd', -# TODO nfs: Wait for structured options + 'nfs': 'BlockdevOptionsNfs', 'null-aio': 'BlockdevOptionsNull', 'null-co': 'BlockdevOptionsNull', 'parallels': 'BlockdevOptionsGenericFormat',