From a410a7f1af725cee94c7c847ddc85be043d88439 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 28 Feb 2017 22:33:40 +0300 Subject: [PATCH 01/12] backup: allow target without .bdrv_get_info Currently backup to nbd target is broken, as nbd doesn't have .bdrv_get_info realization. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/backup.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/block/backup.c b/block/backup.c index d1ab617c7e..d3d20dbcb0 100644 --- a/block/backup.c +++ b/block/backup.c @@ -24,6 +24,7 @@ #include "qemu/cutils.h" #include "sysemu/block-backend.h" #include "qemu/bitmap.h" +#include "qemu/error-report.h" #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) #define SLICE_TIME 100000000ULL /* ns */ @@ -648,7 +649,16 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, * backup cluster size is smaller than the target cluster size. Even for * targets with a backing file, try to avoid COW if possible. */ ret = bdrv_get_info(target, &bdi); - if (ret < 0 && !target->backing) { + if (ret == -ENOTSUP && !target->backing) { + /* Cluster size is not defined */ + error_report("WARNING: The target block device doesn't provide " + "information about the block size and it doesn't have a " + "backing file. The default block size of %u bytes is " + "used. If the actual block size of the target exceeds " + "this default, the backup may be unusable", + BACKUP_CLUSTER_SIZE_DEFAULT); + job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT; + } else if (ret < 0 && !target->backing) { error_setg_errno(errp, -ret, "Couldn't determine the cluster size of the target image, " "which has no backing file"); From 9103f1ceb46614b150bcbc3c9a4fbc72b47fedcc Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 8 Mar 2017 20:08:14 +0800 Subject: [PATCH 02/12] file-posix: Consider max_segments for BlockLimits.max_transfer BlockLimits.max_transfer can be too high without this fix, guest will encounter I/O error or even get paused with werror=stop or rerror=stop. The cause is explained below. Linux has a separate limit, /sys/block/.../queue/max_segments, which in the worst case can be more restrictive than the BLKSECTGET which we already consider (note that they are two different things). So, the failure scenario before this patch is: 1) host device has max_sectors_kb = 4096 and max_segments = 64; 2) guest learns max_sectors_kb limit from QEMU, but doesn't know max_segments; 3) guest issues e.g. a 512KB request thinking it's okay, but actually it's not, because it will be passed through to host device as an SG_IO req that has niov > 64; 4) host kernel doesn't like the segmenting of the request, and returns -EINVAL; This patch checks the max_segments sysfs entry for the host device and calculates a "conservative" bytes limit using the page size, which is then merged into the existing max_transfer limit. Guest will discover this from the usual virtual block device interfaces. (In the case of scsi-generic, it will be done in the INQUIRY reply interception in device model.) The other possibility is to actually propagate it as a separate limit, but it's not better. On the one hand, there is a big complication: the limit is per-LUN in QEMU PoV (because we can attach LUNs from different host HBAs to the same virtio-scsi bus), but the channel to communicate it in a per-LUN manner is missing down the stack; on the other hand, two limits versus one doesn't change much about the valid size of I/O (because guest has no control over host segmenting). Also, the idea to fall back to bounce buffering in QEMU, upon -EINVAL, was explored. Unfortunately there is no neat way to ensure the bounce buffer is less segmented (in terms of DMA addr) than the guest buffer. Practically, this bug is not very common. It is only reported on a Emulex (lpfc), so it's okay to get it fixed in the easier way. Reviewed-by: Paolo Bonzini Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/file-posix.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/block/file-posix.c b/block/file-posix.c index 4de1abd023..c4c06637ef 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -668,6 +668,48 @@ static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd) #endif } +static int hdev_get_max_segments(const struct stat *st) +{ +#ifdef CONFIG_LINUX + char buf[32]; + const char *end; + char *sysfspath; + int ret; + int fd = -1; + long max_segments; + + sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments", + major(st->st_rdev), minor(st->st_rdev)); + fd = open(sysfspath, O_RDONLY); + if (fd == -1) { + ret = -errno; + goto out; + } + do { + ret = read(fd, buf, sizeof(buf)); + } while (ret == -1 && errno == EINTR); + if (ret < 0) { + ret = -errno; + goto out; + } else if (ret == 0) { + ret = -EIO; + goto out; + } + buf[ret] = 0; + /* The file is ended with '\n', pass 'end' to accept that. */ + ret = qemu_strtol(buf, &end, 10, &max_segments); + if (ret == 0 && end && *end == '\n') { + ret = max_segments; + } + +out: + g_free(sysfspath); + return ret; +#else + return -ENOTSUP; +#endif +} + static void raw_refresh_limits(BlockDriverState *bs, Error **errp) { BDRVRawState *s = bs->opaque; @@ -679,6 +721,11 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) { bs->bl.max_transfer = pow2floor(ret); } + ret = hdev_get_max_segments(&st); + if (ret > 0) { + bs->bl.max_transfer = MIN(bs->bl.max_transfer, + ret * getpagesize()); + } } } From e32ccbc6e9ff95330e916fd72c541894d440fcd5 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 8 Mar 2017 14:02:16 -0600 Subject: [PATCH 03/12] block: Drop unmaintained 'archipelago' driver The driver has failed to build since commit da34e65, in qemu 2.6, due to a missing include of qapi/error.h for error_setg(). Since no one has complained in three releases, it is easier to remove the dead code than to keep it around, especially since it is not being built by default and therefore prone to bitrot. Signed-off-by: Eric Blake Reviewed-by: Max Reitz Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- MAINTAINERS | 8 - block/Makefile.objs | 2 - block/archipelago.c | 1079 ------------------------------ configure | 43 -- qapi/block-core.json | 33 +- tests/qemu-iotests/025 | 2 +- tests/qemu-iotests/common | 6 - tests/qemu-iotests/common.filter | 4 +- tests/qemu-iotests/common.rc | 8 - 9 files changed, 4 insertions(+), 1181 deletions(-) delete mode 100644 block/archipelago.c diff --git a/MAINTAINERS b/MAINTAINERS index e3edd04982..bf1aafb1c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1674,14 +1674,6 @@ S: Supported F: block/ssh.c T: git git://github.com/codyprime/qemu-kvm-jtc.git block -ARCHIPELAGO -M: Chrysostomos Nanakos -M: Jeff Cody -L: qemu-block@nongnu.org -S: Maintained -F: block/archipelago.c -T: git git://github.com/codyprime/qemu-kvm-jtc.git block - CURL M: Jeff Cody L: qemu-block@nongnu.org diff --git a/block/Makefile.objs b/block/Makefile.objs index c6bd14e883..de96f8ee80 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -19,7 +19,6 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o -block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o @@ -41,7 +40,6 @@ gluster.o-cflags := $(GLUSTERFS_CFLAGS) gluster.o-libs := $(GLUSTERFS_LIBS) ssh.o-cflags := $(LIBSSH2_CFLAGS) ssh.o-libs := $(LIBSSH2_LIBS) -archipelago.o-libs := $(ARCHIPELAGO_LIBS) block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o dmg-bz2.o-libs := $(BZIP2_LIBS) qcow.o-libs := -lz diff --git a/block/archipelago.c b/block/archipelago.c deleted file mode 100644 index 2449cfc702..0000000000 --- a/block/archipelago.c +++ /dev/null @@ -1,1079 +0,0 @@ -/* - * QEMU Block driver for Archipelago - * - * Copyright (C) 2014 Chrysostomos Nanakos - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -/* - * VM Image on Archipelago volume is specified like this: - * - * file.driver=archipelago,file.volume= - * [,file.mport=[,file.vport=] - * [,file.segment=]] - * - * or - * - * file=archipelago:[/mport=[:vport=][: - * segment=]] - * - * 'archipelago' is the protocol. - * - * 'mport' is the port number on which mapperd is listening. This is optional - * and if not specified, QEMU will make Archipelago to use the default port. - * - * 'vport' is the port number on which vlmcd is listening. This is optional - * and if not specified, QEMU will make Archipelago to use the default port. - * - * 'segment' is the name of the shared memory segment Archipelago stack - * is using. This is optional and if not specified, QEMU will make Archipelago - * to use the default value, 'archipelago'. - * - * Examples: - * - * file.driver=archipelago,file.volume=my_vm_volume - * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123 - * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123, - * file.vport=1234 - * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123, - * file.vport=1234,file.segment=my_segment - * - * or - * - * file=archipelago:my_vm_volume - * file=archipelago:my_vm_volume/mport=123 - * file=archipelago:my_vm_volume/mport=123:vport=1234 - * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment - * - */ - -#include "qemu/osdep.h" -#include "qemu/cutils.h" -#include "block/block_int.h" -#include "qemu/error-report.h" -#include "qemu/thread.h" -#include "qapi/qmp/qint.h" -#include "qapi/qmp/qstring.h" -#include "qapi/qmp/qjson.h" -#include "qemu/atomic.h" - -#include -#include - -#define MAX_REQUEST_SIZE 524288 - -#define ARCHIPELAGO_OPT_VOLUME "volume" -#define ARCHIPELAGO_OPT_SEGMENT "segment" -#define ARCHIPELAGO_OPT_MPORT "mport" -#define ARCHIPELAGO_OPT_VPORT "vport" -#define ARCHIPELAGO_DFL_MPORT 1001 -#define ARCHIPELAGO_DFL_VPORT 501 - -#define archipelagolog(fmt, ...) \ - do { \ - fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \ - } while (0) - -typedef enum { - ARCHIP_OP_READ, - ARCHIP_OP_WRITE, - ARCHIP_OP_FLUSH, - ARCHIP_OP_VOLINFO, - ARCHIP_OP_TRUNCATE, -} ARCHIPCmd; - -typedef struct ArchipelagoAIOCB { - BlockAIOCB common; - struct BDRVArchipelagoState *s; - QEMUIOVector *qiov; - ARCHIPCmd cmd; - int status; - int64_t size; - int64_t ret; -} ArchipelagoAIOCB; - -typedef struct BDRVArchipelagoState { - ArchipelagoAIOCB *event_acb; - char *volname; - char *segment_name; - uint64_t size; - /* Archipelago specific */ - struct xseg *xseg; - struct xseg_port *port; - xport srcport; - xport sport; - xport mportno; - xport vportno; - QemuMutex archip_mutex; - QemuCond archip_cond; - bool is_signaled; - /* Request handler specific */ - QemuThread request_th; - QemuCond request_cond; - QemuMutex request_mutex; - bool th_is_signaled; - bool stopping; -} BDRVArchipelagoState; - -typedef struct ArchipelagoSegmentedRequest { - size_t count; - size_t total; - int ref; - int failed; -} ArchipelagoSegmentedRequest; - -typedef struct AIORequestData { - const char *volname; - off_t offset; - size_t size; - uint64_t bufidx; - int ret; - int op; - ArchipelagoAIOCB *aio_cb; - ArchipelagoSegmentedRequest *segreq; -} AIORequestData; - -static void qemu_archipelago_complete_aio(void *opaque); - -static void init_local_signal(struct xseg *xseg, xport sport, xport srcport) -{ - if (xseg && (sport != srcport)) { - xseg_init_local_signal(xseg, srcport); - sport = srcport; - } -} - -static void archipelago_finish_aiocb(AIORequestData *reqdata) -{ - if (reqdata->aio_cb->ret != reqdata->segreq->total) { - reqdata->aio_cb->ret = -EIO; - } else if (reqdata->aio_cb->ret == reqdata->segreq->total) { - reqdata->aio_cb->ret = 0; - } - aio_bh_schedule_oneshot( - bdrv_get_aio_context(reqdata->aio_cb->common.bs), - qemu_archipelago_complete_aio, reqdata - ); -} - -static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port, - struct xseg_request *expected_req) -{ - struct xseg_request *req; - xseg_prepare_wait(xseg, srcport); - void *psd = xseg_get_signal_desc(xseg, port); - while (1) { - req = xseg_receive(xseg, srcport, X_NONBLOCK); - if (req) { - if (req != expected_req) { - archipelagolog("Unknown received request\n"); - xseg_put_request(xseg, req, srcport); - } else if (!(req->state & XS_SERVED)) { - return -1; - } else { - break; - } - } - xseg_wait_signal(xseg, psd, 100000UL); - } - xseg_cancel_wait(xseg, srcport); - return 0; -} - -static void xseg_request_handler(void *state) -{ - BDRVArchipelagoState *s = (BDRVArchipelagoState *) state; - void *psd = xseg_get_signal_desc(s->xseg, s->port); - qemu_mutex_lock(&s->request_mutex); - - while (!s->stopping) { - struct xseg_request *req; - void *data; - xseg_prepare_wait(s->xseg, s->srcport); - req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK); - if (req) { - AIORequestData *reqdata; - ArchipelagoSegmentedRequest *segreq; - xseg_get_req_data(s->xseg, req, (void **)&reqdata); - - switch (reqdata->op) { - case ARCHIP_OP_READ: - data = xseg_get_data(s->xseg, req); - segreq = reqdata->segreq; - segreq->count += req->serviced; - - qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx, - data, - req->serviced); - - xseg_put_request(s->xseg, req, s->srcport); - - if (atomic_fetch_dec(&segreq->ref) == 1) { - if (!segreq->failed) { - reqdata->aio_cb->ret = segreq->count; - archipelago_finish_aiocb(reqdata); - g_free(segreq); - } else { - g_free(segreq); - g_free(reqdata); - } - } else { - g_free(reqdata); - } - break; - case ARCHIP_OP_WRITE: - case ARCHIP_OP_FLUSH: - segreq = reqdata->segreq; - segreq->count += req->serviced; - xseg_put_request(s->xseg, req, s->srcport); - - if (atomic_fetch_dec(&segreq->ref) == 1) { - if (!segreq->failed) { - reqdata->aio_cb->ret = segreq->count; - archipelago_finish_aiocb(reqdata); - g_free(segreq); - } else { - g_free(segreq); - g_free(reqdata); - } - } else { - g_free(reqdata); - } - break; - case ARCHIP_OP_VOLINFO: - case ARCHIP_OP_TRUNCATE: - s->is_signaled = true; - qemu_cond_signal(&s->archip_cond); - break; - } - } else { - xseg_wait_signal(s->xseg, psd, 100000UL); - } - xseg_cancel_wait(s->xseg, s->srcport); - } - - s->th_is_signaled = true; - qemu_cond_signal(&s->request_cond); - qemu_mutex_unlock(&s->request_mutex); - qemu_thread_exit(NULL); -} - -static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s) -{ - if (xseg_initialize()) { - archipelagolog("Cannot initialize XSEG\n"); - goto err_exit; - } - - s->xseg = xseg_join("posix", s->segment_name, - "posixfd", NULL); - if (!s->xseg) { - archipelagolog("Cannot join XSEG shared memory segment\n"); - goto err_exit; - } - s->port = xseg_bind_dynport(s->xseg); - s->srcport = s->port->portno; - init_local_signal(s->xseg, s->sport, s->srcport); - return 0; - -err_exit: - return -1; -} - -static int qemu_archipelago_init(BDRVArchipelagoState *s) -{ - int ret; - - ret = qemu_archipelago_xseg_init(s); - if (ret < 0) { - error_report("Cannot initialize XSEG. Aborting..."); - goto err_exit; - } - - qemu_cond_init(&s->archip_cond); - qemu_mutex_init(&s->archip_mutex); - qemu_cond_init(&s->request_cond); - qemu_mutex_init(&s->request_mutex); - s->th_is_signaled = false; - qemu_thread_create(&s->request_th, "xseg_io_th", - (void *) xseg_request_handler, - (void *) s, QEMU_THREAD_JOINABLE); - -err_exit: - return ret; -} - -static void qemu_archipelago_complete_aio(void *opaque) -{ - AIORequestData *reqdata = (AIORequestData *) opaque; - ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb; - - aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret); - aio_cb->status = 0; - - qemu_aio_unref(aio_cb); - g_free(reqdata); -} - -static void xseg_find_port(char *pstr, const char *needle, xport *aport) -{ - const char *a; - char *endptr = NULL; - unsigned long port; - if (strstart(pstr, needle, &a)) { - if (strlen(a) > 0) { - port = strtoul(a, &endptr, 10); - if (strlen(endptr)) { - *aport = -2; - return; - } - *aport = (xport) port; - } - } -} - -static void xseg_find_segment(char *pstr, const char *needle, - char **segment_name) -{ - const char *a; - if (strstart(pstr, needle, &a)) { - if (strlen(a) > 0) { - *segment_name = g_strdup(a); - } - } -} - -static void parse_filename_opts(const char *filename, Error **errp, - char **volume, char **segment_name, - xport *mport, xport *vport) -{ - const char *start; - char *tokens[4], *ds; - int idx; - xport lmport = NoPort, lvport = NoPort; - - strstart(filename, "archipelago:", &start); - - ds = g_strdup(start); - tokens[0] = strtok(ds, "/"); - tokens[1] = strtok(NULL, ":"); - tokens[2] = strtok(NULL, ":"); - tokens[3] = strtok(NULL, "\0"); - - if (!strlen(tokens[0])) { - error_setg(errp, "volume name must be specified first"); - g_free(ds); - return; - } - - for (idx = 1; idx < 4; idx++) { - if (tokens[idx] != NULL) { - if (strstart(tokens[idx], "mport=", NULL)) { - xseg_find_port(tokens[idx], "mport=", &lmport); - } - if (strstart(tokens[idx], "vport=", NULL)) { - xseg_find_port(tokens[idx], "vport=", &lvport); - } - if (strstart(tokens[idx], "segment=", NULL)) { - xseg_find_segment(tokens[idx], "segment=", segment_name); - } - } - } - - if ((lmport == -2) || (lvport == -2)) { - error_setg(errp, "mport and/or vport must be set"); - g_free(ds); - return; - } - *volume = g_strdup(tokens[0]); - *mport = lmport; - *vport = lvport; - g_free(ds); -} - -static void archipelago_parse_filename(const char *filename, QDict *options, - Error **errp) -{ - const char *start; - char *volume = NULL, *segment_name = NULL; - xport mport = NoPort, vport = NoPort; - - if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME) - || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT) - || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT) - || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) { - error_setg(errp, "volume/mport/vport/segment and a file name may not" - " be specified at the same time"); - return; - } - - if (!strstart(filename, "archipelago:", &start)) { - error_setg(errp, "File name must start with 'archipelago:'"); - return; - } - - if (!strlen(start) || strstart(start, "/", NULL)) { - error_setg(errp, "volume name must be specified"); - return; - } - - parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport); - - if (volume) { - qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume)); - g_free(volume); - } - if (segment_name) { - qdict_put(options, ARCHIPELAGO_OPT_SEGMENT, - qstring_from_str(segment_name)); - g_free(segment_name); - } - if (mport != NoPort) { - qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport)); - } - if (vport != NoPort) { - qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport)); - } -} - -static QemuOptsList archipelago_runtime_opts = { - .name = "archipelago", - .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head), - .desc = { - { - .name = ARCHIPELAGO_OPT_VOLUME, - .type = QEMU_OPT_STRING, - .help = "Name of the volume image", - }, - { - .name = ARCHIPELAGO_OPT_SEGMENT, - .type = QEMU_OPT_STRING, - .help = "Name of the Archipelago shared memory segment", - }, - { - .name = ARCHIPELAGO_OPT_MPORT, - .type = QEMU_OPT_NUMBER, - .help = "Archipelago mapperd port number" - }, - { - .name = ARCHIPELAGO_OPT_VPORT, - .type = QEMU_OPT_NUMBER, - .help = "Archipelago vlmcd port number" - - }, - { /* end of list */ } - }, -}; - -static int qemu_archipelago_open(BlockDriverState *bs, - QDict *options, - int bdrv_flags, - Error **errp) -{ - int ret = 0; - const char *volume, *segment_name; - QemuOpts *opts; - Error *local_err = NULL; - BDRVArchipelagoState *s = bs->opaque; - - opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto err_exit; - } - - s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT, - ARCHIPELAGO_DFL_MPORT); - s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT, - ARCHIPELAGO_DFL_VPORT); - - segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT); - if (segment_name == NULL) { - s->segment_name = g_strdup("archipelago"); - } else { - s->segment_name = g_strdup(segment_name); - } - - volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME); - if (volume == NULL) { - error_setg(errp, "archipelago block driver requires the 'volume'" - " option"); - ret = -EINVAL; - goto err_exit; - } - s->volname = g_strdup(volume); - - /* Initialize XSEG, join shared memory segment */ - ret = qemu_archipelago_init(s); - if (ret < 0) { - error_setg(errp, "cannot initialize XSEG and join shared " - "memory segment"); - goto err_exit; - } - - qemu_opts_del(opts); - return 0; - -err_exit: - g_free(s->volname); - g_free(s->segment_name); - qemu_opts_del(opts); - return ret; -} - -static void qemu_archipelago_close(BlockDriverState *bs) -{ - int r, targetlen; - char *target; - struct xseg_request *req; - BDRVArchipelagoState *s = bs->opaque; - - s->stopping = true; - - qemu_mutex_lock(&s->request_mutex); - while (!s->th_is_signaled) { - qemu_cond_wait(&s->request_cond, - &s->request_mutex); - } - qemu_mutex_unlock(&s->request_mutex); - qemu_thread_join(&s->request_th); - qemu_cond_destroy(&s->request_cond); - qemu_mutex_destroy(&s->request_mutex); - - qemu_cond_destroy(&s->archip_cond); - qemu_mutex_destroy(&s->archip_mutex); - - targetlen = strlen(s->volname); - req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC); - if (!req) { - archipelagolog("Cannot get XSEG request\n"); - goto err_exit; - } - r = xseg_prep_request(s->xseg, req, targetlen, 0); - if (r < 0) { - xseg_put_request(s->xseg, req, s->srcport); - archipelagolog("Cannot prepare XSEG close request\n"); - goto err_exit; - } - - target = xseg_get_target(s->xseg, req); - memcpy(target, s->volname, targetlen); - req->size = req->datalen; - req->offset = 0; - req->op = X_CLOSE; - - xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); - if (p == NoPort) { - xseg_put_request(s->xseg, req, s->srcport); - archipelagolog("Cannot submit XSEG close request\n"); - goto err_exit; - } - - xseg_signal(s->xseg, p); - wait_reply(s->xseg, s->srcport, s->port, req); - - xseg_put_request(s->xseg, req, s->srcport); - -err_exit: - g_free(s->volname); - g_free(s->segment_name); - xseg_quit_local_signal(s->xseg, s->srcport); - xseg_leave_dynport(s->xseg, s->port); - xseg_leave(s->xseg); -} - -static int qemu_archipelago_create_volume(Error **errp, const char *volname, - char *segment_name, - uint64_t size, xport mportno, - xport vportno) -{ - int ret, targetlen; - struct xseg *xseg = NULL; - struct xseg_request *req; - struct xseg_request_clone *xclone; - struct xseg_port *port; - xport srcport = NoPort, sport = NoPort; - char *target; - - /* Try default values if none has been set */ - if (mportno == (xport) -1) { - mportno = ARCHIPELAGO_DFL_MPORT; - } - - if (vportno == (xport) -1) { - vportno = ARCHIPELAGO_DFL_VPORT; - } - - if (xseg_initialize()) { - error_setg(errp, "Cannot initialize XSEG"); - return -1; - } - - xseg = xseg_join("posix", segment_name, - "posixfd", NULL); - - if (!xseg) { - error_setg(errp, "Cannot join XSEG shared memory segment"); - return -1; - } - - port = xseg_bind_dynport(xseg); - srcport = port->portno; - init_local_signal(xseg, sport, srcport); - - req = xseg_get_request(xseg, srcport, mportno, X_ALLOC); - if (!req) { - error_setg(errp, "Cannot get XSEG request"); - return -1; - } - - targetlen = strlen(volname); - ret = xseg_prep_request(xseg, req, targetlen, - sizeof(struct xseg_request_clone)); - if (ret < 0) { - error_setg(errp, "Cannot prepare XSEG request"); - goto err_exit; - } - - target = xseg_get_target(xseg, req); - if (!target) { - error_setg(errp, "Cannot get XSEG target."); - goto err_exit; - } - memcpy(target, volname, targetlen); - xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req); - memset(xclone->target, 0 , XSEG_MAX_TARGETLEN); - xclone->targetlen = 0; - xclone->size = size; - req->offset = 0; - req->size = req->datalen; - req->op = X_CLONE; - - xport p = xseg_submit(xseg, req, srcport, X_ALLOC); - if (p == NoPort) { - error_setg(errp, "Could not submit XSEG request"); - goto err_exit; - } - xseg_signal(xseg, p); - - ret = wait_reply(xseg, srcport, port, req); - if (ret < 0) { - error_setg(errp, "wait_reply() error."); - } - - xseg_put_request(xseg, req, srcport); - xseg_quit_local_signal(xseg, srcport); - xseg_leave_dynport(xseg, port); - xseg_leave(xseg); - return ret; - -err_exit: - xseg_put_request(xseg, req, srcport); - xseg_quit_local_signal(xseg, srcport); - xseg_leave_dynport(xseg, port); - xseg_leave(xseg); - return -1; -} - -static int qemu_archipelago_create(const char *filename, - QemuOpts *options, - Error **errp) -{ - int ret = 0; - uint64_t total_size = 0; - char *volname = NULL, *segment_name = NULL; - const char *start; - xport mport = NoPort, vport = NoPort; - - if (!strstart(filename, "archipelago:", &start)) { - error_setg(errp, "File name must start with 'archipelago:'"); - return -1; - } - - if (!strlen(start) || strstart(start, "/", NULL)) { - error_setg(errp, "volume name must be specified"); - return -1; - } - - parse_filename_opts(filename, errp, &volname, &segment_name, &mport, - &vport); - total_size = ROUND_UP(qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0), - BDRV_SECTOR_SIZE); - - if (segment_name == NULL) { - segment_name = g_strdup("archipelago"); - } - - /* Create an Archipelago volume */ - ret = qemu_archipelago_create_volume(errp, volname, segment_name, - total_size, mport, - vport); - - g_free(volname); - g_free(segment_name); - return ret; -} - -static const AIOCBInfo archipelago_aiocb_info = { - .aiocb_size = sizeof(ArchipelagoAIOCB), -}; - -static int archipelago_submit_request(BDRVArchipelagoState *s, - uint64_t bufidx, - size_t count, - off_t offset, - ArchipelagoAIOCB *aio_cb, - ArchipelagoSegmentedRequest *segreq, - int op) -{ - int ret, targetlen; - char *target; - void *data = NULL; - struct xseg_request *req; - AIORequestData *reqdata = g_new(AIORequestData, 1); - - targetlen = strlen(s->volname); - req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC); - if (!req) { - archipelagolog("Cannot get XSEG request\n"); - goto err_exit2; - } - ret = xseg_prep_request(s->xseg, req, targetlen, count); - if (ret < 0) { - archipelagolog("Cannot prepare XSEG request\n"); - goto err_exit; - } - target = xseg_get_target(s->xseg, req); - if (!target) { - archipelagolog("Cannot get XSEG target\n"); - goto err_exit; - } - memcpy(target, s->volname, targetlen); - req->size = count; - req->offset = offset; - - switch (op) { - case ARCHIP_OP_READ: - req->op = X_READ; - break; - case ARCHIP_OP_WRITE: - req->op = X_WRITE; - break; - case ARCHIP_OP_FLUSH: - req->op = X_FLUSH; - break; - } - reqdata->volname = s->volname; - reqdata->offset = offset; - reqdata->size = count; - reqdata->bufidx = bufidx; - reqdata->aio_cb = aio_cb; - reqdata->segreq = segreq; - reqdata->op = op; - - xseg_set_req_data(s->xseg, req, reqdata); - if (op == ARCHIP_OP_WRITE) { - data = xseg_get_data(s->xseg, req); - if (!data) { - archipelagolog("Cannot get XSEG data\n"); - goto err_exit; - } - qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count); - } - - xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); - if (p == NoPort) { - archipelagolog("Could not submit XSEG request\n"); - goto err_exit; - } - xseg_signal(s->xseg, p); - return 0; - -err_exit: - g_free(reqdata); - xseg_put_request(s->xseg, req, s->srcport); - return -EIO; -err_exit2: - g_free(reqdata); - return -EIO; -} - -static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s, - size_t count, - off_t offset, - ArchipelagoAIOCB *aio_cb, - int op) -{ - int ret, segments_nr; - size_t pos = 0; - ArchipelagoSegmentedRequest *segreq; - - segreq = g_new0(ArchipelagoSegmentedRequest, 1); - - if (op == ARCHIP_OP_FLUSH) { - segments_nr = 1; - } else { - segments_nr = (int)(count / MAX_REQUEST_SIZE) + \ - ((count % MAX_REQUEST_SIZE) ? 1 : 0); - } - segreq->total = count; - atomic_mb_set(&segreq->ref, segments_nr); - - while (segments_nr > 1) { - ret = archipelago_submit_request(s, pos, - MAX_REQUEST_SIZE, - offset + pos, - aio_cb, segreq, op); - - if (ret < 0) { - goto err_exit; - } - count -= MAX_REQUEST_SIZE; - pos += MAX_REQUEST_SIZE; - segments_nr--; - } - ret = archipelago_submit_request(s, pos, count, offset + pos, - aio_cb, segreq, op); - - if (ret < 0) { - goto err_exit; - } - return 0; - -err_exit: - segreq->failed = 1; - if (atomic_fetch_sub(&segreq->ref, segments_nr) == segments_nr) { - g_free(segreq); - } - return ret; -} - -static BlockAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque, - int op) -{ - ArchipelagoAIOCB *aio_cb; - BDRVArchipelagoState *s = bs->opaque; - int64_t size, off; - int ret; - - aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque); - aio_cb->cmd = op; - aio_cb->qiov = qiov; - - aio_cb->ret = 0; - aio_cb->s = s; - aio_cb->status = -EINPROGRESS; - - off = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; - aio_cb->size = size; - - ret = archipelago_aio_segmented_rw(s, size, off, - aio_cb, op); - if (ret < 0) { - goto err_exit; - } - return &aio_cb->common; - -err_exit: - error_report("qemu_archipelago_aio_rw(): I/O Error"); - qemu_aio_unref(aio_cb); - return NULL; -} - -static BlockAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb, - opaque, ARCHIP_OP_READ); -} - -static BlockAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb, - opaque, ARCHIP_OP_WRITE); -} - -static int64_t archipelago_volume_info(BDRVArchipelagoState *s) -{ - uint64_t size; - int ret, targetlen; - struct xseg_request *req; - struct xseg_reply_info *xinfo; - AIORequestData *reqdata = g_new(AIORequestData, 1); - - const char *volname = s->volname; - targetlen = strlen(volname); - req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC); - if (!req) { - archipelagolog("Cannot get XSEG request\n"); - goto err_exit2; - } - ret = xseg_prep_request(s->xseg, req, targetlen, - sizeof(struct xseg_reply_info)); - if (ret < 0) { - archipelagolog("Cannot prepare XSEG request\n"); - goto err_exit; - } - char *target = xseg_get_target(s->xseg, req); - if (!target) { - archipelagolog("Cannot get XSEG target\n"); - goto err_exit; - } - memcpy(target, volname, targetlen); - req->size = req->datalen; - req->offset = 0; - req->op = X_INFO; - - reqdata->op = ARCHIP_OP_VOLINFO; - reqdata->volname = volname; - xseg_set_req_data(s->xseg, req, reqdata); - - xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); - if (p == NoPort) { - archipelagolog("Cannot submit XSEG request\n"); - goto err_exit; - } - xseg_signal(s->xseg, p); - qemu_mutex_lock(&s->archip_mutex); - while (!s->is_signaled) { - qemu_cond_wait(&s->archip_cond, &s->archip_mutex); - } - s->is_signaled = false; - qemu_mutex_unlock(&s->archip_mutex); - - xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req); - size = xinfo->size; - xseg_put_request(s->xseg, req, s->srcport); - g_free(reqdata); - s->size = size; - return size; - -err_exit: - xseg_put_request(s->xseg, req, s->srcport); -err_exit2: - g_free(reqdata); - return -EIO; -} - -static int64_t qemu_archipelago_getlength(BlockDriverState *bs) -{ - BDRVArchipelagoState *s = bs->opaque; - - return archipelago_volume_info(s); -} - -static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset) -{ - int ret, targetlen; - struct xseg_request *req; - BDRVArchipelagoState *s = bs->opaque; - AIORequestData *reqdata = g_new(AIORequestData, 1); - - const char *volname = s->volname; - targetlen = strlen(volname); - req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC); - if (!req) { - archipelagolog("Cannot get XSEG request\n"); - goto err_exit2; - } - - ret = xseg_prep_request(s->xseg, req, targetlen, 0); - if (ret < 0) { - archipelagolog("Cannot prepare XSEG request\n"); - goto err_exit; - } - char *target = xseg_get_target(s->xseg, req); - if (!target) { - archipelagolog("Cannot get XSEG target\n"); - goto err_exit; - } - memcpy(target, volname, targetlen); - req->offset = offset; - req->op = X_TRUNCATE; - - reqdata->op = ARCHIP_OP_TRUNCATE; - reqdata->volname = volname; - - xseg_set_req_data(s->xseg, req, reqdata); - - xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); - if (p == NoPort) { - archipelagolog("Cannot submit XSEG request\n"); - goto err_exit; - } - - xseg_signal(s->xseg, p); - qemu_mutex_lock(&s->archip_mutex); - while (!s->is_signaled) { - qemu_cond_wait(&s->archip_cond, &s->archip_mutex); - } - s->is_signaled = false; - qemu_mutex_unlock(&s->archip_mutex); - xseg_put_request(s->xseg, req, s->srcport); - g_free(reqdata); - return 0; - -err_exit: - xseg_put_request(s->xseg, req, s->srcport); -err_exit2: - g_free(reqdata); - return -EIO; -} - -static QemuOptsList qemu_archipelago_create_opts = { - .name = "archipelago-create-opts", - .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head), - .desc = { - { - .name = BLOCK_OPT_SIZE, - .type = QEMU_OPT_SIZE, - .help = "Virtual disk size" - }, - { /* end of list */ } - } -}; - -static BlockAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) -{ - return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque, - ARCHIP_OP_FLUSH); -} - -static BlockDriver bdrv_archipelago = { - .format_name = "archipelago", - .protocol_name = "archipelago", - .instance_size = sizeof(BDRVArchipelagoState), - .bdrv_parse_filename = archipelago_parse_filename, - .bdrv_file_open = qemu_archipelago_open, - .bdrv_close = qemu_archipelago_close, - .bdrv_create = qemu_archipelago_create, - .bdrv_getlength = qemu_archipelago_getlength, - .bdrv_truncate = qemu_archipelago_truncate, - .bdrv_aio_readv = qemu_archipelago_aio_readv, - .bdrv_aio_writev = qemu_archipelago_aio_writev, - .bdrv_aio_flush = qemu_archipelago_aio_flush, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .create_opts = &qemu_archipelago_create_opts, -}; - -static void bdrv_archipelago_init(void) -{ - bdrv_register(&bdrv_archipelago); -} - -block_init(bdrv_archipelago_init); diff --git a/configure b/configure index 6c21975f02..b05c37685a 100755 --- a/configure +++ b/configure @@ -301,7 +301,6 @@ glusterfs="" glusterfs_xlator_opt="no" glusterfs_discard="no" glusterfs_zerofill="no" -archipelago="no" gtk="" gtkabi="" gtk_gl="no" @@ -1101,10 +1100,6 @@ for opt do ;; --enable-glusterfs) glusterfs="yes" ;; - --disable-archipelago) archipelago="no" - ;; - --enable-archipelago) archipelago="yes" - ;; --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane) echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2 ;; @@ -1396,7 +1391,6 @@ disabled with --disable-FEATURE, default is enabled if available: seccomp seccomp support coroutine-pool coroutine freelist (better performance) glusterfs GlusterFS backend - archipelago Archipelago backend tpm TPM support libssh2 ssh block device support numa libnuma support @@ -3466,37 +3460,6 @@ EOF fi fi -########################################## -# archipelago probe -if test "$archipelago" != "no" ; then - cat > $TMPC < -#include -#include -int main(void) { - xseg_initialize(); - return 0; -} -EOF - archipelago_libs=-lxseg - if compile_prog "" "$archipelago_libs"; then - archipelago="yes" - libs_tools="$archipelago_libs $libs_tools" - libs_softmmu="$archipelago_libs $libs_softmmu" - - echo "WARNING: Please check the licenses of QEMU and libxseg carefully." - echo "GPLv3 versions of libxseg may not be compatible with QEMU's" - echo "license and therefore prevent redistribution." - echo - echo "To disable Archipelago, use --disable-archipelago" - else - if test "$archipelago" = "yes" ; then - feature_not_found "Archipelago backend support" "Install libxseg devel" - fi - archipelago="no" - fi -fi - ########################################## # glusterfs probe @@ -5099,7 +5062,6 @@ echo "coroutine backend $coroutine" echo "coroutine pool $coroutine_pool" echo "debug stack usage $debug_stack_usage" echo "GlusterFS support $glusterfs" -echo "Archipelago support $archipelago" echo "gcov $gcov_tool" echo "gcov enabled $gcov" echo "TPM support $tpm" @@ -5640,11 +5602,6 @@ if test "$glusterfs_zerofill" = "yes" ; then echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak fi -if test "$archipelago" = "yes" ; then - echo "CONFIG_ARCHIPELAGO=m" >> $config_host_mak - echo "ARCHIPELAGO_LIBS=$archipelago_libs" >> $config_host_mak -fi - if test "$libssh2" = "yes" ; then echo "CONFIG_LIBSSH2=m" >> $config_host_mak echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak diff --git a/qapi/block-core.json b/qapi/block-core.json index 9bb7f4a17b..786b39e911 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -251,6 +251,7 @@ # 2.5: 'host_floppy' dropped # 2.6: 'luks' added # 2.8: 'replication' added, 'tftp' dropped +# 2.9: 'archipelago' dropped # # @backing_file: #optional the name of the backing file (for copy-on-write) # @@ -2129,7 +2130,7 @@ # Since: 2.0 ## { 'enum': 'BlockdevDriver', - 'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop', + 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed', @@ -2342,35 +2343,6 @@ '*cache-clean-interval': 'int' } } -## -# @BlockdevOptionsArchipelago: -# -# Driver specific block device options for Archipelago. -# -# @volume: Name of the Archipelago volume image -# -# @mport: #optional The port number on which mapperd is -# listening. This is optional -# and if not specified, QEMU will make Archipelago -# use the default port (1001). -# -# @vport: #optional The port number on which vlmcd is -# listening. This is optional -# and if not specified, QEMU will make Archipelago -# use the default port (501). -# -# @segment: #optional The name of the shared memory segment -# Archipelago stack is using. This is optional -# and if not specified, QEMU will make Archipelago -# use the default value, 'archipelago'. -# Since: 2.2 -## -{ 'struct': 'BlockdevOptionsArchipelago', - 'data': { 'volume': 'str', - '*mport': 'int', - '*vport': 'int', - '*segment': 'str' } } - ## # @BlockdevOptionsSsh: # @@ -2884,7 +2856,6 @@ '*detect-zeroes': 'BlockdevDetectZeroesOptions' }, 'discriminator': 'driver', 'data': { - 'archipelago':'BlockdevOptionsArchipelago', 'blkdebug': 'BlockdevOptionsBlkdebug', 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', diff --git a/tests/qemu-iotests/025 b/tests/qemu-iotests/025 index c41370f3b2..f5e672e6b3 100755 --- a/tests/qemu-iotests/025 +++ b/tests/qemu-iotests/025 @@ -39,7 +39,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.pattern _supported_fmt raw qcow2 qed -_supported_proto file sheepdog rbd nfs archipelago +_supported_proto file sheepdog rbd nfs _supported_os Linux echo "=== Creating image" diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index b6274bee0a..4d5650d7c8 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -156,7 +156,6 @@ check options -nbd test nbd -ssh test ssh -nfs test nfs - -archipelago test archipelago -luks test luks -xdiff graphical mode diff -nocache use O_DIRECT on backing file @@ -271,11 +270,6 @@ testlist options xpand=false ;; - -archipelago) - IMGPROTO=archipelago - xpand=false - ;; - -nocache) CACHEMODE="none" CACHEMODE_IS_DEFAULT=false diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 4befd865f4..104001358b 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -112,7 +112,6 @@ _filter_img_create() -e "s# block_size=[0-9]\\+##g" \ -e "s# block_state_zero=\\(on\\|off\\)##g" \ -e "s# log_size=[0-9]\\+##g" \ - -e "s/archipelago:a/TEST_DIR\//g" \ -e "s# refcount_bits=[0-9]\\+##g" \ -e "s# key-secret=[a-zA-Z0-9]\\+##g" } @@ -136,8 +135,7 @@ _filter_img_info() -e "/lazy_refcounts: \\(on\\|off\\)/d" \ -e "/block_size: [0-9]\\+/d" \ -e "/block_state_zero: \\(on\\|off\\)/d" \ - -e "/log_size: [0-9]\\+/d" \ - -e "s/archipelago:a/TEST_DIR\//g" + -e "/log_size: [0-9]\\+/d" } # filter out offsets and file names from qemu-img map diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 08065dceae..7d4781d4ad 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -70,8 +70,6 @@ if [ "$IMGOPTSSYNTAX" = "true" ]; then elif [ "$IMGPROTO" = "nfs" ]; then TEST_DIR="$DRIVER,file.driver=nfs,file.filename=nfs://127.0.0.1/$TEST_DIR" TEST_IMG=$TEST_DIR/t.$IMGFMT - elif [ "$IMGPROTO" = "archipelago" ]; then - TEST_IMG="$DRIVER,file.driver=archipelago,file.volume=:at.$IMGFMT" else TEST_IMG="$DRIVER,file.driver=$IMGPROTO,file.filename=$TEST_DIR/t.$IMGFMT" fi @@ -87,8 +85,6 @@ else elif [ "$IMGPROTO" = "nfs" ]; then TEST_DIR="nfs://127.0.0.1/$TEST_DIR" TEST_IMG=$TEST_DIR/t.$IMGFMT - elif [ "$IMGPROTO" = "archipelago" ]; then - TEST_IMG="archipelago:at.$IMGFMT" else TEST_IMG=$IMGPROTO:$TEST_DIR/t.$IMGFMT fi @@ -215,10 +211,6 @@ _cleanup_test_img() rbd --no-progress rm "$TEST_DIR/t.$IMGFMT" > /dev/null ;; - archipelago) - vlmc remove "at.$IMGFMT" > /dev/null - ;; - sheepdog) collie vdi delete "$TEST_DIR/t.$IMGFMT" ;; From 666a9543fa957169b6e25805aac88ca72c08f0b5 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 8 Mar 2017 15:34:27 -0600 Subject: [PATCH 04/12] backup: React to bdrv_is_allocated() errors If bdrv_is_allocated() fails, we should immediately do the backup error action, rather than attempting backup_do_cow() (although that will likely fail too). Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/backup.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/block/backup.c b/block/backup.c index d3d20dbcb0..a4fb2884f9 100644 --- a/block/backup.c +++ b/block/backup.c @@ -468,13 +468,14 @@ static void coroutine_fn backup_run(void *opaque) /* Both FULL and TOP SYNC_MODE's require copying.. */ for (; start < end; start++) { bool error_is_read; + int alloced = 0; + if (yield_and_check(job)) { break; } if (job->sync_mode == MIRROR_SYNC_MODE_TOP) { int i, n; - int alloced = 0; /* Check to see if these blocks are already in the * backing file. */ @@ -492,7 +493,7 @@ static void coroutine_fn backup_run(void *opaque) sectors_per_cluster - i, &n); i += n; - if (alloced == 1 || n == 0) { + if (alloced || n == 0) { break; } } @@ -504,8 +505,13 @@ static void coroutine_fn backup_run(void *opaque) } } /* FULL sync mode we copy the whole drive. */ - ret = backup_do_cow(job, start * sectors_per_cluster, - sectors_per_cluster, &error_is_read, false); + if (alloced < 0) { + ret = alloced; + } else { + ret = backup_do_cow(job, start * sectors_per_cluster, + sectors_per_cluster, &error_is_read, + false); + } if (ret < 0) { /* Depending on error action, fail now or retry cluster */ BlockErrorAction action = From 6f712ee08036f4e8066cdddcfed288bf01914e55 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 8 Mar 2017 15:34:28 -0600 Subject: [PATCH 05/12] vvfat: React to bdrv_is_allocated() errors If bdrv_is_allocated() fails, we should react to that failure. For 2 of the 3 callers, reporting the error was easy. But in cluster_was_modified() and its lone caller get_cluster_count_for_direntry(), it's rather invasive to update the logic to pass the error back; so there, I went with merely documenting the issue by changing the return type to bool (in all likelihood, treating the cluster as modified will then trigger a read which will also fail, and eventually get to an error - but given the appalling number of abort() calls in this code, I'm not making it any worse). Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- block/vvfat.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index aa61c329e7..af5153d27d 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -1394,7 +1394,13 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num, return -1; if (s->qcow) { int n; - if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) { + int ret; + ret = bdrv_is_allocated(s->qcow->bs, sector_num, + nb_sectors - i, &n); + if (ret < 0) { + return ret; + } + if (ret) { DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n)); if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) { @@ -1668,7 +1674,8 @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s, } } -static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num) +static inline bool cluster_was_modified(BDRVVVFATState *s, + uint32_t cluster_num) { int was_modified = 0; int i, dummy; @@ -1683,7 +1690,13 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num) 1, &dummy); } - return was_modified; + /* + * Note that this treats failures to learn allocation status the + * same as if an allocation has occurred. It's as safe as + * anything else, given that a failure to learn allocation status + * will probably result in more failures. + */ + return !!was_modified; } static const char* get_basename(const char* path) @@ -1833,6 +1846,9 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s, int res; res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy); + if (res < 0) { + return -1; + } if (!res) { res = vvfat_read(s->bs, offset, s->cluster_buffer, 1); if (res) { From 7d66b1fbd2112a0c1433ad7106d74c7c39e57f5f Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Wed, 8 Mar 2017 15:34:29 -0600 Subject: [PATCH 06/12] migration: Document handling of bdrv_is_allocated() errors Migration is the only code left in the tree that does not react to bdrv_is_allocated() failures. But as there is no useful way to react to the failure, and we are merely skipping unallocated sectors on success, just document that our choice of handling is intended. Signed-off-by: Eric Blake Signed-off-by: Kevin Wolf --- migration/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/block.c b/migration/block.c index 1941bc2402..6741228200 100644 --- a/migration/block.c +++ b/migration/block.c @@ -276,6 +276,8 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) if (bmds->shared_base) { qemu_mutex_lock_iothread(); aio_context_acquire(blk_get_aio_context(bb)); + /* Skip unallocated sectors; intentionally treats failure as + * an allocated sector */ while (cur_sector < total_sectors && !bdrv_is_allocated(blk_bs(bb), cur_sector, MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { From 466787fbca9b25b47365b3d2c09d308df67a61db Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Mar 2017 14:44:07 +0100 Subject: [PATCH 07/12] block: Remove check_new_perm from bdrv_replace_child() All callers pass false now, so the parameter can go away again. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/block.c b/block.c index b404ef251a..dd38e7139f 100644 --- a/block.c +++ b/block.c @@ -1756,8 +1756,18 @@ static void bdrv_replace_child_noperm(BdrvChild *child, } } -static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, - bool check_new_perm) +/* + * Updates @child to change its reference to point to @new_bs, including + * checking and applying the necessary permisson updates both to the old node + * and to @new_bs. + * + * NULL is passed as @new_bs for removing the reference before freeing @child. + * + * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this + * function uses bdrv_set_perm() to update the permissions according to the new + * reference that @new_bs gets. + */ +static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) { BlockDriverState *old_bs = child->bs; uint64_t perm, shared_perm; @@ -1775,9 +1785,6 @@ static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs, if (new_bs) { bdrv_get_cumulative_perm(new_bs, &perm, &shared_perm); - if (check_new_perm) { - bdrv_check_perm(new_bs, perm, shared_perm, NULL, &error_abort); - } bdrv_set_perm(new_bs, perm, shared_perm); } } @@ -1808,7 +1815,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, }; /* This performs the matching bdrv_set_perm() for the above check. */ - bdrv_replace_child(child, child_bs, false); + bdrv_replace_child(child, child_bs); return child; } @@ -1845,7 +1852,7 @@ static void bdrv_detach_child(BdrvChild *child) child->next.le_prev = NULL; } - bdrv_replace_child(child, NULL, false); + bdrv_replace_child(child, NULL); g_free(child->name); g_free(child); From b64aa441955b9aa72a05456650303015ac2bea28 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Mar 2017 15:02:49 +0100 Subject: [PATCH 08/12] block: Request block status from *file for BDRV_BLOCK_RAW This fixes bdrv_co_get_block_status() for the bdrv_mirror_top block driver, which must fall through to bs->backing instead of bs->file. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/io.c b/block/io.c index 8f38d46de0..2709a7007f 100644 --- a/block/io.c +++ b/block/io.c @@ -1760,7 +1760,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID); - ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, + ret = bdrv_get_block_status(*file, ret >> BDRV_SECTOR_BITS, *pnum, pnum, file); goto out; } From 91965658664cc5da412049de0dd309d50ac09917 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 8 Mar 2017 15:07:12 +0100 Subject: [PATCH 09/12] commit: Implement bdrv_commit_top.bdrv_co_get_block_status In some cases, bdrv_co_get_block_status() is called recursively for the whole backing chain. The automatically inserted bdrv_commit_top filter driver must not stop the recursion, so implement a callback that simply forwards the request to bs->backing. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/commit.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/block/commit.c b/block/commit.c index 9c4198837f..932d1e6046 100644 --- a/block/commit.c +++ b/block/commit.c @@ -232,6 +232,17 @@ static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs, return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags); } +static int64_t coroutine_fn bdrv_commit_top_get_block_status( + BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file) +{ + *pnum = nb_sectors; + *file = bs->backing->bs; + return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | + (sector_num << BDRV_SECTOR_BITS); +} + + static void bdrv_commit_top_close(BlockDriverState *bs) { } @@ -248,10 +259,11 @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c, /* Dummy node that provides consistent read to its users without requiring it * from its backing file and that allows writes on the backing file chain. */ static BlockDriver bdrv_commit_top = { - .format_name = "commit_top", - .bdrv_co_preadv = bdrv_commit_top_preadv, - .bdrv_close = bdrv_commit_top_close, - .bdrv_child_perm = bdrv_commit_top_child_perm, + .format_name = "commit_top", + .bdrv_co_preadv = bdrv_commit_top_preadv, + .bdrv_co_get_block_status = bdrv_commit_top_get_block_status, + .bdrv_close = bdrv_commit_top_close, + .bdrv_child_perm = bdrv_commit_top_child_perm, }; void commit_start(const char *job_id, BlockDriverState *bs, From 9e7e940c3dc9d9643065dc0f64eb51efca954e54 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Mar 2017 11:45:39 +0100 Subject: [PATCH 10/12] block: Refresh filename after changing backing file In bdrv_open_inherit(), the filename is refreshed after opening the backing file, but we neglected to do the same when the backing file changes later. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block.c b/block.c index dd38e7139f..cb5737073f 100644 --- a/block.c +++ b/block.c @@ -1938,6 +1938,8 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, bdrv_unref(backing_hd); } + bdrv_refresh_filename(bs); + out: bdrv_refresh_limits(bs, NULL); } From fd4a6493bb7c5f9bea0e7d9de09ccc0d573e3789 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Mar 2017 11:49:16 +0100 Subject: [PATCH 11/12] mirror: Implement .bdrv_refresh_filename We want query-block to return the right filename, even if a mirror job put a bdrv_mirror_top on top of the actual image format driver. Let bdrv_mirror_top.bdrv_refresh_filename get the filename from its backing file. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/mirror.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index a5d30ee575..4f3a5cb310 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -12,6 +12,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "trace.h" #include "block/blockjob_int.h" #include "block/block_int.h" @@ -1060,6 +1061,13 @@ static int coroutine_fn bdrv_mirror_top_pdiscard(BlockDriverState *bs, return bdrv_co_pdiscard(bs->backing->bs, offset, count); } +static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts) +{ + bdrv_refresh_filename(bs->backing->bs); + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->backing->bs->filename); +} + static void bdrv_mirror_top_close(BlockDriverState *bs) { } @@ -1088,6 +1096,7 @@ static BlockDriver bdrv_mirror_top = { .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, .bdrv_co_flush = bdrv_mirror_top_flush, .bdrv_co_get_block_status = bdrv_mirror_top_get_block_status, + .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, .bdrv_close = bdrv_mirror_top_close, .bdrv_child_perm = bdrv_mirror_top_child_perm, }; From dcbf37ce41a52698550f8f8b2f14b5e6fee22d2d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 9 Mar 2017 11:49:16 +0100 Subject: [PATCH 12/12] commit: Implement .bdrv_refresh_filename We want query-block to return the right filename, even if a commit job put a bdrv_commit_top on top of the actual image format driver. Let bdrv_commit_top.bdrv_refresh_filename get the filename from its backing file. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/commit.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/commit.c b/block/commit.c index 932d1e6046..28324820a4 100644 --- a/block/commit.c +++ b/block/commit.c @@ -13,6 +13,7 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "trace.h" #include "block/block_int.h" #include "block/blockjob_int.h" @@ -242,6 +243,12 @@ static int64_t coroutine_fn bdrv_commit_top_get_block_status( (sector_num << BDRV_SECTOR_BITS); } +static void bdrv_commit_top_refresh_filename(BlockDriverState *bs, QDict *opts) +{ + bdrv_refresh_filename(bs->backing->bs); + pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), + bs->backing->bs->filename); +} static void bdrv_commit_top_close(BlockDriverState *bs) { @@ -262,6 +269,7 @@ static BlockDriver bdrv_commit_top = { .format_name = "commit_top", .bdrv_co_preadv = bdrv_commit_top_preadv, .bdrv_co_get_block_status = bdrv_commit_top_get_block_status, + .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, .bdrv_close = bdrv_commit_top_close, .bdrv_child_perm = bdrv_commit_top_child_perm, };