From 8b9d74e0eebb2106b767d66355d38086be72ad2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jun 2015 12:01:38 +0200 Subject: [PATCH 01/11] nvme: implement the Flush command Implement a real flush instead of faking it. This is especially important as Qemu assume Write back cashing by default and thus requires a working cache flush operation for data integrity. Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 19 ++++++++++++++++--- hw/block/nvme.h | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index c6a6a0e49a..dc9caf07fe 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -207,11 +207,23 @@ static void nvme_rw_cb(void *opaque, int ret) } else { req->status = NVME_INTERNAL_DEV_ERROR; } - - qemu_sglist_destroy(&req->qsg); + if (req->has_sg) { + qemu_sglist_destroy(&req->qsg); + } nvme_enqueue_req_completion(cq, req); } +static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + req->has_sg = false; + block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0, + BLOCK_ACCT_FLUSH); + req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req); + + return NVME_NO_COMPLETE; +} + static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, NvmeRequest *req) { @@ -235,6 +247,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, } assert((nlb << data_shift) == req->qsg.size); + req->has_sg = true; dma_acct_start(n->conf.blk, &req->acct, &req->qsg, is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ); req->aiocb = is_write ? @@ -256,7 +269,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) ns = &n->namespaces[nsid - 1]; switch (cmd->opcode) { case NVME_CMD_FLUSH: - return NVME_SUCCESS; + return nvme_flush(n, ns, cmd, req); case NVME_CMD_WRITE: case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index b6ccb655a6..bf3a3ccac8 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -638,6 +638,7 @@ typedef struct NvmeRequest { struct NvmeSQueue *sq; BlockAIOCB *aiocb; uint16_t status; + bool has_sg; NvmeCqe cqe; BlockAcctCookie acct; QEMUSGList qsg; From 30349fd038ffb26528fad21abe1e264031364449 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Jun 2015 12:01:39 +0200 Subject: [PATCH 02/11] nvme: properly report volatile write caches Implement support in Identify and Get/Set Features to properly report and allow to change the Volatile Write Cache status reported by the virtual NVMe device. Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index dc9caf07fe..40d4880326 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -487,26 +487,32 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t result; switch (dw10) { - case NVME_NUMBER_OF_QUEUES: - req->cqe.result = - cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); - break; case NVME_VOLATILE_WRITE_CACHE: - req->cqe.result = cpu_to_le32(1); + result = blk_enable_write_cache(n->conf.blk); + break; + case NVME_NUMBER_OF_QUEUES: + result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); break; default: return NVME_INVALID_FIELD | NVME_DNR; } + + req->cqe.result = result; return NVME_SUCCESS; } static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); switch (dw10) { + case NVME_VOLATILE_WRITE_CACHE: + blk_set_enable_write_cache(n->conf.blk, dw11 & 1); + break; case NVME_NUMBER_OF_QUEUES: req->cqe.result = cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16)); @@ -831,6 +837,9 @@ static int nvme_init(PCIDevice *pci_dev) id->psd[0].mp = cpu_to_le16(0x9c4); id->psd[0].enlat = cpu_to_le32(0x10); id->psd[0].exlat = cpu_to_le32(0x4); + if (blk_enable_write_cache(n->conf.blk)) { + id->vwc = 1; + } n->bar.cap = 0; NVME_CAP_SET_MQES(n->bar.cap, 0x7ff); From df5817926790f6e84d1936eab523556f96fa577a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 15 Jun 2015 11:53:47 +0200 Subject: [PATCH 03/11] block: Move bdrv_attach_child() calls up the call chain Let the callers of bdrv_open_inherit() call bdrv_attach_child(). It needs to be called in all cases where bdrv_open_inherit() succeeds (i.e. returns 0) and a child_role is given. bdrv_attach_child() is moved upwards to avoid a forward declaration. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/block.c b/block.c index 5e80336e9f..0398bffc03 100644 --- a/block.c +++ b/block.c @@ -1102,6 +1102,19 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, return 0; } +static void bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const BdrvChildRole *child_role) +{ + BdrvChild *child = g_new(BdrvChild, 1); + *child = (BdrvChild) { + .bs = child_bs, + .role = child_role, + }; + + QLIST_INSERT_HEAD(&parent_bs->children, child, next); +} + void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { @@ -1202,6 +1215,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_free(local_err); goto free_exit; } + + bdrv_attach_child(bs, backing_hd, &child_backing); bdrv_set_backing_hd(bs, backing_hd); free_exit: @@ -1237,6 +1252,7 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, assert(pbs); assert(*pbs == NULL); + assert(child_role != NULL); bdref_key_dot = g_strdup_printf("%s.", bdref_key); qdict_extract_subqdict(options, &image_options, bdref_key_dot); @@ -1257,6 +1273,11 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, parent, child_role, NULL, errp); + if (ret < 0) { + goto done; + } + + bdrv_attach_child(parent, *pbs, child_role); done: qdict_del(options, bdref_key); @@ -1328,19 +1349,6 @@ out: return ret; } -static void bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const BdrvChildRole *child_role) -{ - BdrvChild *child = g_new(BdrvChild, 1); - *child = (BdrvChild) { - .bs = child_bs, - .role = child_role, - }; - - QLIST_INSERT_HEAD(&parent_bs->children, child, next); -} - /* * Opens a disk image (raw, qcow2, vmdk, ...) * @@ -1393,9 +1401,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, return -ENODEV; } bdrv_ref(bs); - if (child_role) { - bdrv_attach_child(parent, bs, child_role); - } *pbs = bs; return 0; } @@ -1540,10 +1545,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, goto close_and_fail; } - if (child_role) { - bdrv_attach_child(parent, bs, child_role); - } - QDECREF(options); *pbs = bs; return 0; From b4b059f628173dd1d722ee8a9c592a80aec1fc2f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 15 Jun 2015 13:24:19 +0200 Subject: [PATCH 04/11] block: Introduce bdrv_open_child() It is the same as bdrv_open_image(), except that it doesn't only return success or failure, but the newly created BdrvChild object for the new child node. As the BdrvChild object already contains a BlockDriverState pointer (and this is supposed to become the only pointer so that bdrv_append() and friends can just change a single pointer in BdrvChild), the pbs parameter is removed for bdrv_open_child(). Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 71 ++++++++++++++++++++++++++++----------- include/block/block.h | 6 ++++ include/block/block_int.h | 4 +-- 3 files changed, 60 insertions(+), 21 deletions(-) diff --git a/block.c b/block.c index 0398bffc03..029feeb7f9 100644 --- a/block.c +++ b/block.c @@ -1102,9 +1102,9 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, return 0; } -static void bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const BdrvChildRole *child_role) +static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const BdrvChildRole *child_role) { BdrvChild *child = g_new(BdrvChild, 1); *child = (BdrvChild) { @@ -1113,6 +1113,8 @@ static void bdrv_attach_child(BlockDriverState *parent_bs, }; QLIST_INSERT_HEAD(&parent_bs->children, child, next); + + return child; } void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) @@ -1229,7 +1231,7 @@ free_exit: * device's options. * * If allow_none is true, no image will be opened if filename is false and no - * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * BlockdevRef is given. NULL will be returned, but errp remains unset. * * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict @@ -1237,21 +1239,20 @@ free_exit: * BlockdevRef. * * The BlockdevRef will be removed from the options QDict. - * - * To conform with the behavior of bdrv_open(), *pbs has to be NULL. */ -int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, - BlockDriverState* parent, const BdrvChildRole *child_role, - bool allow_none, Error **errp) +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildRole *child_role, + bool allow_none, Error **errp) { + BdrvChild *c = NULL; + BlockDriverState *bs; QDict *image_options; int ret; char *bdref_key_dot; const char *reference; - assert(pbs); - assert(*pbs == NULL); assert(child_role != NULL); bdref_key_dot = g_strdup_printf("%s.", bdref_key); @@ -1260,28 +1261,60 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, reference = qdict_get_try_str(options, bdref_key); if (!filename && !reference && !qdict_size(image_options)) { - if (allow_none) { - ret = 0; - } else { + if (!allow_none) { error_setg(errp, "A block device must be specified for \"%s\"", bdref_key); - ret = -EINVAL; } QDECREF(image_options); goto done; } - ret = bdrv_open_inherit(pbs, filename, reference, image_options, 0, + bs = NULL; + ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0, parent, child_role, NULL, errp); if (ret < 0) { goto done; } - bdrv_attach_child(parent, *pbs, child_role); + c = bdrv_attach_child(parent, bs, child_role); done: qdict_del(options, bdref_key); - return ret; + return c; +} + +/* + * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of + * a BdrvChild object. + * + * If allow_none is true, no image will be opened if filename is false and no + * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * + * To conform with the behavior of bdrv_open(), *pbs has to be NULL. + */ +int bdrv_open_image(BlockDriverState **pbs, const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, const BdrvChildRole *child_role, + bool allow_none, Error **errp) +{ + Error *local_err = NULL; + BdrvChild *c; + + assert(pbs); + assert(*pbs == NULL); + + c = bdrv_open_child(filename, options, bdref_key, parent, child_role, + allow_none, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + if (c != NULL) { + *pbs = c->bs; + } + + return 0; } int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) diff --git a/include/block/block.h b/include/block/block.h index 06e4137008..50487726f5 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -12,6 +12,7 @@ /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; +typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; typedef struct BlockDriverInfo { @@ -208,6 +209,11 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, QDict *options, const char *bdref_key, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildRole *child_role, + bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); diff --git a/include/block/block_int.h b/include/block/block_int.h index 8996baf2f0..ec244b5d06 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -335,11 +335,11 @@ struct BdrvChildRole { extern const BdrvChildRole child_file; extern const BdrvChildRole child_format; -typedef struct BdrvChild { +struct BdrvChild { BlockDriverState *bs; const BdrvChildRole *role; QLIST_ENTRY(BdrvChild) next; -} BdrvChild; +}; /* * Note: the function bdrv_append() copies and swaps contents of From 33a604075c51e5528eed970eeaeefe609ea2337d Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 15 Jun 2015 13:51:04 +0200 Subject: [PATCH 05/11] block: Introduce bdrv_unref_child() This is the counterpart for bdrv_open_child(). It decreases the reference count of the child BDS and removes it from the list of children of the given parent BDS. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 23 +++++++++++++++++++++-- include/block/block.h | 1 + 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 029feeb7f9..b723cf2858 100644 --- a/block.c +++ b/block.c @@ -1117,6 +1117,24 @@ static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, return child; } +static void bdrv_detach_child(BdrvChild *child) +{ + QLIST_REMOVE(child, next); + g_free(child); +} + +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +{ + BlockDriverState *child_bs = child->bs; + + if (child->bs->inherits_from == parent) { + child->bs->inherits_from = NULL; + } + + bdrv_detach_child(child); + bdrv_unref(child_bs); +} + void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { @@ -1884,11 +1902,12 @@ void bdrv_close(BlockDriverState *bs) BdrvChild *child, *next; QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + /* TODO Remove bdrv_unref() from drivers' close function and use + * bdrv_unref_child() here */ if (child->bs->inherits_from == bs) { child->bs->inherits_from = NULL; } - QLIST_REMOVE(child, next); - g_free(child); + bdrv_detach_child(child); } if (bs->backing_hd) { diff --git a/include/block/block.h b/include/block/block.h index 50487726f5..37916f7208 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -513,6 +513,7 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs); void bdrv_ref(BlockDriverState *bs); void bdrv_unref(BlockDriverState *bs); +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); From 9a7dedbc43c7c400663d2876a8ccb6d942a1429a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 16 Jun 2015 10:58:20 +0200 Subject: [PATCH 06/11] block: Reorder cleanups in bdrv_close() Block drivers may still want to access their child nodes in their .bdrv_close handler. If they unref and/or detach a child by themselves, this should not result in a double free. There is additional code for backing files, which are just a special case of child nodes. The same applies for them. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index b723cf2858..d5c9f032aa 100644 --- a/block.c +++ b/block.c @@ -1901,6 +1901,14 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { BdrvChild *child, *next; + bs->drv->bdrv_close(bs); + + if (bs->backing_hd) { + BlockDriverState *backing_hd = bs->backing_hd; + bdrv_set_backing_hd(bs, NULL); + bdrv_unref(backing_hd); + } + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { /* TODO Remove bdrv_unref() from drivers' close function and use * bdrv_unref_child() here */ @@ -1910,12 +1918,6 @@ void bdrv_close(BlockDriverState *bs) bdrv_detach_child(child); } - if (bs->backing_hd) { - BlockDriverState *backing_hd = bs->backing_hd; - bdrv_set_backing_hd(bs, NULL); - bdrv_unref(backing_hd); - } - bs->drv->bdrv_close(bs); g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; From 80a1e130917e0745625129553c943743eb663727 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 17 Jun 2015 15:52:09 +0200 Subject: [PATCH 07/11] block: Fix backing file child when modifying graph This patch moves bdrv_attach_child() from the individual places that add a backing file to a BDS to bdrv_set_backing_hd(), which is called by all of them. It also adds bdrv_detach_child() there. For normal operation (starting with one backing file chain and not changing it until the topmost image is closed) and live snapshots, this constitutes no change in behaviour. For all other cases, this is a fix for the bug that the old backing file was still referenced as a child, and the new one wasn't referenced. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 5 +++-- include/block/block_int.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index d5c9f032aa..d088ee02ff 100644 --- a/block.c +++ b/block.c @@ -1141,6 +1141,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) if (bs->backing_hd) { assert(bs->backing_blocker); bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); + bdrv_detach_child(bs->backing_child); } else if (backing_hd) { error_setg(&bs->backing_blocker, "node is used as backing hd of '%s'", @@ -1151,8 +1152,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) if (!backing_hd) { error_free(bs->backing_blocker); bs->backing_blocker = NULL; + bs->backing_child = NULL; goto out; } + bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing); bs->open_flags &= ~BDRV_O_NO_BACKING; pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1236,7 +1239,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) goto free_exit; } - bdrv_attach_child(bs, backing_hd, &child_backing); bdrv_set_backing_hd(bs, backing_hd); free_exit: @@ -2171,7 +2173,6 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) /* The contents of 'tmp' will become bs_top, as we are * swapping bs_new and bs_top contents. */ bdrv_set_backing_hd(bs_top, bs_new); - bdrv_attach_child(bs_top, bs_new, &child_backing); } static void bdrv_delete(BlockDriverState *bs) diff --git a/include/block/block_int.h b/include/block/block_int.h index ec244b5d06..14ad4c334b 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -379,6 +379,7 @@ struct BlockDriverState { char exact_filename[PATH_MAX]; BlockDriverState *backing_hd; + BdrvChild *backing_child; BlockDriverState *file; NotifierList close_notifiers; From 3dbf00e058e450173c6f892bb572df871eb4ea58 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Wed, 10 Jun 2015 20:28:43 -0700 Subject: [PATCH 08/11] rbd: remove unused constants and fields RBDAIOCB.status was only used for cancel, which was removed in 7691e24dbebb46658e89b3f950fda6ec78bbb823. RBDAIOCB.sector_num was never used. RADOSCB.done and rcbid were never used. RBD_FD* are obsolete since the pipe was removed in e04fb07fd1676e9facd7f3f878c1bbe03bccd26b. Signed-off-by: Josh Durgin Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/rbd.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index fbe87e035b..50b5f6be68 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -74,25 +74,18 @@ typedef struct RBDAIOCB { QEMUIOVector *qiov; char *bounce; RBDAIOCmd cmd; - int64_t sector_num; int error; struct BDRVRBDState *s; - int status; } RBDAIOCB; typedef struct RADOSCB { - int rcbid; RBDAIOCB *acb; struct BDRVRBDState *s; - int done; int64_t size; char *buf; int64_t ret; } RADOSCB; -#define RBD_FD_READ 0 -#define RBD_FD_WRITE 1 - typedef struct BDRVRBDState { rados_t cluster; rados_ioctx_t io_ctx; @@ -405,7 +398,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) } qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); - acb->status = 0; qemu_aio_unref(acb); } @@ -621,7 +613,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, acb->error = 0; acb->s = s; acb->bh = NULL; - acb->status = -EINPROGRESS; if (cmd == RBD_AIO_WRITE) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); @@ -633,7 +624,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, size = nb_sectors * BDRV_SECTOR_SIZE; rcb = g_new(RADOSCB, 1); - rcb->done = 0; rcb->acb = acb; rcb->buf = buf; rcb->s = acb->s; From 5a8ac6d9d70e1a078d04ad75a5c055b00a041d70 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Wed, 10 Jun 2015 20:28:44 -0700 Subject: [PATCH 09/11] MAINTAINERS: update email address The old one still works for now, but will not work indefinitely. Signed-off-by: Josh Durgin Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 411da3cf57..978b7174f3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1169,7 +1169,7 @@ S: Supported F: block/vmdk.c RBD -M: Josh Durgin +M: Josh Durgin M: Jeff Cody L: qemu-block@nongnu.org S: Supported From 99a3c89d5d538dc6c360e35dffb797cfe06e9cda Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Wed, 10 Jun 2015 20:28:45 -0700 Subject: [PATCH 10/11] rbd: make qemu's cache setting override any ceph setting To be safe, when cache=none is used ceph settings should not be able to override it to turn on caching. This was previously possible with rbd_cache=true in the rbd device configuration or a ceph configuration file. Similarly, rbd settings could have turned off caching when qemu requested it, although this would just be a performance problem. Fix this by changing rbd's cache setting to match qemu after all other ceph settings have been applied. Signed-off-by: Josh Durgin Reviewed-by: Jeff Cody Signed-off-by: Kevin Wolf --- block/rbd.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index 50b5f6be68..00d027d247 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -460,6 +460,18 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, s->snap = g_strdup(snap_buf); } + if (strstr(conf, "conf=") == NULL) { + /* try default location, but ignore failure */ + rados_conf_read_file(s->cluster, NULL); + } + + if (conf[0] != '\0') { + r = qemu_rbd_set_conf(s->cluster, conf, errp); + if (r < 0) { + goto failed_shutdown; + } + } + /* * Fallback to more conservative semantics if setting cache * options fails. Ignore errors from setting rbd_cache because the @@ -473,18 +485,6 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, rados_conf_set(s->cluster, "rbd_cache", "true"); } - if (strstr(conf, "conf=") == NULL) { - /* try default location, but ignore failure */ - rados_conf_read_file(s->cluster, NULL); - } - - if (conf[0] != '\0') { - r = qemu_rbd_set_conf(s->cluster, conf, errp); - if (r < 0) { - goto failed_shutdown; - } - } - r = rados_connect(s->cluster); if (r < 0) { error_setg(errp, "error connecting"); From e34d8f297d51b7ffa5dce72df1e45fa94cff989c Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Wed, 10 Jun 2015 20:28:46 -0700 Subject: [PATCH 11/11] rbd: fix ceph settings precedence Apply the ceph settings from a config file before any ceph settings from the command line. Since the ceph config file location may be specified on the command line, parse it once to read the config file, and do a second pass to apply the rest of the command line ceph options. Signed-off-by: Josh Durgin Signed-off-by: Kevin Wolf --- block/rbd.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index 00d027d247..a60a19d58d 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -228,7 +228,9 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname) return NULL; } -static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp) +static int qemu_rbd_set_conf(rados_t cluster, const char *conf, + bool only_read_conf_file, + Error **errp) { char *p, *buf; char name[RBD_MAX_CONF_NAME_SIZE]; @@ -260,14 +262,18 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp) qemu_rbd_unescape(value); if (strcmp(name, "conf") == 0) { - ret = rados_conf_read_file(cluster, value); - if (ret < 0) { - error_setg(errp, "error reading conf file %s", value); - break; + /* read the conf file alone, so it doesn't override more + specific settings for a particular device */ + if (only_read_conf_file) { + ret = rados_conf_read_file(cluster, value); + if (ret < 0) { + error_setg(errp, "error reading conf file %s", value); + break; + } } } else if (strcmp(name, "id") == 0) { /* ignore, this is parsed by qemu_rbd_parse_clientname() */ - } else { + } else if (!only_read_conf_file) { ret = rados_conf_set(cluster, name, value); if (ret < 0) { error_setg(errp, "invalid conf option %s", name); @@ -330,10 +336,15 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) if (strstr(conf, "conf=") == NULL) { /* try default location, but ignore failure */ rados_conf_read_file(cluster, NULL); + } else if (conf[0] != '\0' && + qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) { + rados_shutdown(cluster); + error_propagate(errp, local_err); + return -EIO; } if (conf[0] != '\0' && - qemu_rbd_set_conf(cluster, conf, &local_err) < 0) { + qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) { rados_shutdown(cluster); error_propagate(errp, local_err); return -EIO; @@ -463,10 +474,15 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, if (strstr(conf, "conf=") == NULL) { /* try default location, but ignore failure */ rados_conf_read_file(s->cluster, NULL); + } else if (conf[0] != '\0') { + r = qemu_rbd_set_conf(s->cluster, conf, true, errp); + if (r < 0) { + goto failed_shutdown; + } } if (conf[0] != '\0') { - r = qemu_rbd_set_conf(s->cluster, conf, errp); + r = qemu_rbd_set_conf(s->cluster, conf, false, errp); if (r < 0) { goto failed_shutdown; }