mirror of https://github.com/xemu-project/xemu.git
Block layer patches
- Add vduse-blk export - Dirty bitmaps: Fix and improve bitmap merge - gluster: correctly set max_pdiscard - rbd: report a better error when namespace does not exist - aio_wait_kick: add missing memory barrier - Code cleanups -----BEGIN PGP SIGNATURE----- iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmK10yURHGt3b2xmQHJl ZGhhdC5jb20ACgkQfwmycsiPL9YnXg/+J9PKsGl7hbJUFVIb4RZ3pMQ0o4xC1TbS OjA63RRwdJWt1gGsgLNH6kdsgz04FXiXk7XNDTe8MYn21bvznCYGFsI2EFJiit+x HkOYXJwAGsrfKBViq8QznosBQbwf9fOrDUlOdbnixFH/dx6DXUZM2F9ud5shiuiC vrP76zjUgSlHhPwL+0xsJSNxxNISv845uN9SYNpiGbZDaYLBtNbEc8NA2GMOYW45 sCbxJDxNSFNrTT5gaZyo4KGFH9393qJSmYHJ+q53elbhje5dpmcn2c/AkIG7/UM4 HsEWd7Gj1YbLQV6kvkefJzF4BOuZmN/b8sG+LRDjrEMHLeOw5rh2ETmkQfIdy2H3 2U52mHqw9bMGWtds0ocOJFKw3vPZuOCJaA+ql/A0aWGO9Fjivx1cQcOVXn3jKgj5 7riSxigK9TCTd92Qhi3j4do86mJi9NpjgawbQ5oOvKqUiq29moOwcv1hit0WVzwB hJp2qsrM6w00wFseyp+10vHigW7ZMNEIh4TKYLfZ15Gvy6YIG/m/oyCYF4oVM5y5 knbsnJBRpTjLnggag86Pk8Ji+Lxi0cbv7zrbn++/S9OLQ5VMV5OpxKXHmyX2HyXa RCV0rMaTC3s4hy8gElDSwSeor6NBaJ9TECcit67uJ/kw1eK+omoa4hKMCgKf6qYC GY+NRPELJGA= =OuiN -----END PGP SIGNATURE----- Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging Block layer patches - Add vduse-blk export - Dirty bitmaps: Fix and improve bitmap merge - gluster: correctly set max_pdiscard - rbd: report a better error when namespace does not exist - aio_wait_kick: add missing memory barrier - Code cleanups # -----BEGIN PGP SIGNATURE----- # # iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmK10yURHGt3b2xmQHJl # ZGhhdC5jb20ACgkQfwmycsiPL9YnXg/+J9PKsGl7hbJUFVIb4RZ3pMQ0o4xC1TbS # OjA63RRwdJWt1gGsgLNH6kdsgz04FXiXk7XNDTe8MYn21bvznCYGFsI2EFJiit+x # HkOYXJwAGsrfKBViq8QznosBQbwf9fOrDUlOdbnixFH/dx6DXUZM2F9ud5shiuiC # vrP76zjUgSlHhPwL+0xsJSNxxNISv845uN9SYNpiGbZDaYLBtNbEc8NA2GMOYW45 # sCbxJDxNSFNrTT5gaZyo4KGFH9393qJSmYHJ+q53elbhje5dpmcn2c/AkIG7/UM4 # HsEWd7Gj1YbLQV6kvkefJzF4BOuZmN/b8sG+LRDjrEMHLeOw5rh2ETmkQfIdy2H3 # 2U52mHqw9bMGWtds0ocOJFKw3vPZuOCJaA+ql/A0aWGO9Fjivx1cQcOVXn3jKgj5 # 7riSxigK9TCTd92Qhi3j4do86mJi9NpjgawbQ5oOvKqUiq29moOwcv1hit0WVzwB # hJp2qsrM6w00wFseyp+10vHigW7ZMNEIh4TKYLfZ15Gvy6YIG/m/oyCYF4oVM5y5 # knbsnJBRpTjLnggag86Pk8Ji+Lxi0cbv7zrbn++/S9OLQ5VMV5OpxKXHmyX2HyXa # RCV0rMaTC3s4hy8gElDSwSeor6NBaJ9TECcit67uJ/kw1eK+omoa4hKMCgKf6qYC # GY+NRPELJGA= # =OuiN # -----END PGP SIGNATURE----- # gpg: Signature made Fri 24 Jun 2022 08:07:17 AM PDT # gpg: using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6 # gpg: issuer "kwolf@redhat.com" # gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full] * tag 'for-upstream' of git://repo.or.cz/qemu/kevin: vduse-blk: Add name option vduse-blk: Add serial option nbd: Drop dead code spotted by Coverity aio_wait_kick: add missing memory barrier block/gluster: correctly set max_pdiscard block/rbd: report a better error when namespace does not exist qsd: document vduse-blk exports libvduse: Add support for reconnecting vduse-blk: Add vduse-blk resize support vduse-blk: Implement vduse-blk export libvduse: Add VDUSE (vDPA Device in Userspace) library linux-headers: Add vduse.h block/export: Abstract out the logic of virtio-blk I/O process block/export: Fix incorrect length passed to vu_queue_push() block: Support passing NULL ops to blk_set_dev_ops() block: simplify handling of try to merge different sized bitmaps block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap block: block_dirty_bitmap_merge(): fix error path block: get rid of blk->guest_block_size block: drop unused bdrv_co_drain() API Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
commit
40d5224907
|
@ -3580,6 +3580,8 @@ M: Coiby Xu <Coiby.Xu@gmail.com>
|
|||
S: Maintained
|
||||
F: block/export/vhost-user-blk-server.c
|
||||
F: block/export/vhost-user-blk-server.h
|
||||
F: block/export/virtio-blk-handler.c
|
||||
F: block/export/virtio-blk-handler.h
|
||||
F: include/qemu/vhost-user-server.h
|
||||
F: tests/qtest/libqos/vhost-user-blk.c
|
||||
F: tests/qtest/libqos/vhost-user-blk.h
|
||||
|
@ -3592,6 +3594,13 @@ L: qemu-block@nongnu.org
|
|||
S: Supported
|
||||
F: block/export/fuse.c
|
||||
|
||||
VDUSE library and block device exports
|
||||
M: Xie Yongji <xieyongji@bytedance.com>
|
||||
S: Maintained
|
||||
F: subprojects/libvduse/
|
||||
F: block/export/vduse-blk.c
|
||||
F: block/export/vduse-blk.h
|
||||
|
||||
Replication
|
||||
M: Wen Congyang <wencongyang2@huawei.com>
|
||||
M: Xie Changlong <xiechanglong.d@gmail.com>
|
||||
|
|
|
@ -228,15 +228,13 @@ out:
|
|||
|
||||
static void backup_init_bcs_bitmap(BackupBlockJob *job)
|
||||
{
|
||||
bool ret;
|
||||
uint64_t estimate;
|
||||
BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
|
||||
|
||||
if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
|
||||
bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
|
||||
ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
|
||||
NULL, true);
|
||||
assert(ret);
|
||||
bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, NULL,
|
||||
true);
|
||||
} else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
|
||||
/*
|
||||
* We can't hog the coroutine to initialize this thoroughly.
|
||||
|
|
|
@ -56,9 +56,6 @@ struct BlockBackend {
|
|||
const BlockDevOps *dev_ops;
|
||||
void *dev_opaque;
|
||||
|
||||
/* the block size for which the guest device expects atomicity */
|
||||
int guest_block_size;
|
||||
|
||||
/* If the BDS tree is removed, some of its options are stored here (which
|
||||
* can be used to restore those options in the new BDS on insert) */
|
||||
BlockBackendRootState root_state;
|
||||
|
@ -998,7 +995,6 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
|
|||
blk->dev = NULL;
|
||||
blk->dev_ops = NULL;
|
||||
blk->dev_opaque = NULL;
|
||||
blk->guest_block_size = 512;
|
||||
blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
|
||||
blk_unref(blk);
|
||||
}
|
||||
|
@ -1062,7 +1058,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
|
|||
blk->dev_opaque = opaque;
|
||||
|
||||
/* Are we currently quiesced? Should we enforce this right now? */
|
||||
if (blk->quiesce_counter && ops->drained_begin) {
|
||||
if (blk->quiesce_counter && ops && ops->drained_begin) {
|
||||
ops->drained_begin(opaque);
|
||||
}
|
||||
}
|
||||
|
@ -2100,12 +2096,6 @@ int blk_get_max_iov(BlockBackend *blk)
|
|||
return blk->root->bs->bl.max_iov;
|
||||
}
|
||||
|
||||
void blk_set_guest_block_size(BlockBackend *blk, int align)
|
||||
{
|
||||
IO_CODE();
|
||||
blk->guest_block_size = align;
|
||||
}
|
||||
|
||||
void *blk_try_blockalign(BlockBackend *blk, size_t size)
|
||||
{
|
||||
IO_CODE();
|
||||
|
|
|
@ -309,10 +309,7 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *parent,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) {
|
||||
error_setg(errp, "Merging of parent and successor bitmap failed");
|
||||
return NULL;
|
||||
}
|
||||
hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap);
|
||||
|
||||
parent->disabled = successor->disabled;
|
||||
parent->busy = false;
|
||||
|
@ -912,13 +909,15 @@ bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) {
|
||||
error_setg(errp, "Bitmaps are incompatible and can't be merged");
|
||||
if (bdrv_dirty_bitmap_size(src) != bdrv_dirty_bitmap_size(dest)) {
|
||||
error_setg(errp, "Bitmaps are of different sizes (destination size is %"
|
||||
PRId64 ", source size is %" PRId64 ") and can't be merged",
|
||||
bdrv_dirty_bitmap_size(dest), bdrv_dirty_bitmap_size(src));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
|
||||
assert(ret);
|
||||
bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
|
||||
ret = true;
|
||||
|
||||
out:
|
||||
bdrv_dirty_bitmaps_unlock(dest->bs);
|
||||
|
@ -932,17 +931,16 @@ out:
|
|||
/**
|
||||
* bdrv_dirty_bitmap_merge_internal: merge src into dest.
|
||||
* Does NOT check bitmap permissions; not suitable for use as public API.
|
||||
* @dest, @src and @backup (if not NULL) must have same size.
|
||||
*
|
||||
* @backup: If provided, make a copy of dest here prior to merge.
|
||||
* @lock: If true, lock and unlock bitmaps on the way in/out.
|
||||
* returns true if the merge succeeded; false if unattempted.
|
||||
*/
|
||||
bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
|
||||
void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
|
||||
const BdrvDirtyBitmap *src,
|
||||
HBitmap **backup,
|
||||
bool lock)
|
||||
{
|
||||
bool ret;
|
||||
IO_CODE();
|
||||
|
||||
assert(!bdrv_dirty_bitmap_readonly(dest));
|
||||
|
@ -959,9 +957,9 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
|
|||
if (backup) {
|
||||
*backup = dest->bitmap;
|
||||
dest->bitmap = hbitmap_alloc(dest->size, hbitmap_granularity(*backup));
|
||||
ret = hbitmap_merge(*backup, src->bitmap, dest->bitmap);
|
||||
hbitmap_merge(*backup, src->bitmap, dest->bitmap);
|
||||
} else {
|
||||
ret = hbitmap_merge(dest->bitmap, src->bitmap, dest->bitmap);
|
||||
hbitmap_merge(dest->bitmap, src->bitmap, dest->bitmap);
|
||||
}
|
||||
|
||||
if (lock) {
|
||||
|
@ -970,6 +968,4 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
|
|||
bdrv_dirty_bitmaps_unlock(src->bs);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
#ifdef CONFIG_VHOST_USER_BLK_SERVER
|
||||
#include "vhost-user-blk-server.h"
|
||||
#endif
|
||||
#ifdef CONFIG_VDUSE_BLK_EXPORT
|
||||
#include "vduse-blk.h"
|
||||
#endif
|
||||
|
||||
static const BlockExportDriver *blk_exp_drivers[] = {
|
||||
&blk_exp_nbd,
|
||||
|
@ -35,6 +38,9 @@ static const BlockExportDriver *blk_exp_drivers[] = {
|
|||
#ifdef CONFIG_FUSE
|
||||
&blk_exp_fuse,
|
||||
#endif
|
||||
#ifdef CONFIG_VDUSE_BLK_EXPORT
|
||||
&blk_exp_vduse_blk,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Only accessed from the main thread */
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
blockdev_ss.add(files('export.c'))
|
||||
|
||||
if have_vhost_user_blk_server
|
||||
blockdev_ss.add(files('vhost-user-blk-server.c'))
|
||||
blockdev_ss.add(files('vhost-user-blk-server.c', 'virtio-blk-handler.c'))
|
||||
endif
|
||||
|
||||
blockdev_ss.add(when: fuse, if_true: files('fuse.c'))
|
||||
|
||||
if have_vduse_blk_export
|
||||
blockdev_ss.add(files('vduse-blk.c', 'virtio-blk-handler.c'))
|
||||
blockdev_ss.add(libvduse)
|
||||
endif
|
||||
|
|
|
@ -0,0 +1,374 @@
|
|||
/*
|
||||
* Export QEMU block device via VDUSE
|
||||
*
|
||||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qapi/error.h"
|
||||
#include "block/export.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "util/block-helpers.h"
|
||||
#include "subprojects/libvduse/libvduse.h"
|
||||
#include "virtio-blk-handler.h"
|
||||
|
||||
#include "standard-headers/linux/virtio_blk.h"
|
||||
|
||||
#define VDUSE_DEFAULT_NUM_QUEUE 1
|
||||
#define VDUSE_DEFAULT_QUEUE_SIZE 256
|
||||
|
||||
typedef struct VduseBlkExport {
|
||||
BlockExport export;
|
||||
VirtioBlkHandler handler;
|
||||
VduseDev *dev;
|
||||
uint16_t num_queues;
|
||||
char *recon_file;
|
||||
unsigned int inflight;
|
||||
} VduseBlkExport;
|
||||
|
||||
typedef struct VduseBlkReq {
|
||||
VduseVirtqElement elem;
|
||||
VduseVirtq *vq;
|
||||
} VduseBlkReq;
|
||||
|
||||
static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
|
||||
{
|
||||
vblk_exp->inflight++;
|
||||
}
|
||||
|
||||
static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
|
||||
{
|
||||
if (--vblk_exp->inflight == 0) {
|
||||
aio_wait_kick();
|
||||
}
|
||||
}
|
||||
|
||||
static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
|
||||
{
|
||||
vduse_queue_push(req->vq, &req->elem, in_len);
|
||||
vduse_queue_notify(req->vq);
|
||||
|
||||
free(req);
|
||||
}
|
||||
|
||||
static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
|
||||
{
|
||||
VduseBlkReq *req = opaque;
|
||||
VduseVirtq *vq = req->vq;
|
||||
VduseDev *dev = vduse_queue_get_dev(vq);
|
||||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
||||
VirtioBlkHandler *handler = &vblk_exp->handler;
|
||||
VduseVirtqElement *elem = &req->elem;
|
||||
struct iovec *in_iov = elem->in_sg;
|
||||
struct iovec *out_iov = elem->out_sg;
|
||||
unsigned in_num = elem->in_num;
|
||||
unsigned out_num = elem->out_num;
|
||||
int in_len;
|
||||
|
||||
in_len = virtio_blk_process_req(handler, in_iov,
|
||||
out_iov, in_num, out_num);
|
||||
if (in_len < 0) {
|
||||
free(req);
|
||||
return;
|
||||
}
|
||||
|
||||
vduse_blk_req_complete(req, in_len);
|
||||
vduse_blk_inflight_dec(vblk_exp);
|
||||
}
|
||||
|
||||
static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
||||
|
||||
while (1) {
|
||||
VduseBlkReq *req;
|
||||
|
||||
req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
|
||||
if (!req) {
|
||||
break;
|
||||
}
|
||||
req->vq = vq;
|
||||
|
||||
Coroutine *co =
|
||||
qemu_coroutine_create(vduse_blk_virtio_process_req, req);
|
||||
|
||||
vduse_blk_inflight_inc(vblk_exp);
|
||||
qemu_coroutine_enter(co);
|
||||
}
|
||||
}
|
||||
|
||||
static void on_vduse_vq_kick(void *opaque)
|
||||
{
|
||||
VduseVirtq *vq = opaque;
|
||||
VduseDev *dev = vduse_queue_get_dev(vq);
|
||||
int fd = vduse_queue_get_fd(vq);
|
||||
eventfd_t kick_data;
|
||||
|
||||
if (eventfd_read(fd, &kick_data) == -1) {
|
||||
error_report("failed to read data from eventfd");
|
||||
return;
|
||||
}
|
||||
|
||||
vduse_blk_vq_handler(dev, vq);
|
||||
}
|
||||
|
||||
static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
||||
|
||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
|
||||
true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
|
||||
/* Make sure we don't miss any kick afer reconnecting */
|
||||
eventfd_write(vduse_queue_get_fd(vq), 1);
|
||||
}
|
||||
|
||||
static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
|
||||
|
||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
|
||||
true, NULL, NULL, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
static const VduseOps vduse_blk_ops = {
|
||||
.enable_queue = vduse_blk_enable_queue,
|
||||
.disable_queue = vduse_blk_disable_queue,
|
||||
};
|
||||
|
||||
static void on_vduse_dev_kick(void *opaque)
|
||||
{
|
||||
VduseDev *dev = opaque;
|
||||
|
||||
vduse_dev_handler(dev);
|
||||
}
|
||||
|
||||
static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
|
||||
true, on_vduse_dev_kick, NULL, NULL, NULL,
|
||||
vblk_exp->dev);
|
||||
|
||||
for (i = 0; i < vblk_exp->num_queues; i++) {
|
||||
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
|
||||
int fd = vduse_queue_get_fd(vq);
|
||||
|
||||
if (fd < 0) {
|
||||
continue;
|
||||
}
|
||||
aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
|
||||
on_vduse_vq_kick, NULL, NULL, NULL, vq);
|
||||
}
|
||||
}
|
||||
|
||||
static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < vblk_exp->num_queues; i++) {
|
||||
VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
|
||||
int fd = vduse_queue_get_fd(vq);
|
||||
|
||||
if (fd < 0) {
|
||||
continue;
|
||||
}
|
||||
aio_set_fd_handler(vblk_exp->export.ctx, fd,
|
||||
true, NULL, NULL, NULL, NULL, NULL);
|
||||
}
|
||||
aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
|
||||
true, NULL, NULL, NULL, NULL, NULL);
|
||||
|
||||
AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
|
||||
}
|
||||
|
||||
|
||||
static void blk_aio_attached(AioContext *ctx, void *opaque)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = opaque;
|
||||
|
||||
vblk_exp->export.ctx = ctx;
|
||||
vduse_blk_attach_ctx(vblk_exp, ctx);
|
||||
}
|
||||
|
||||
static void blk_aio_detach(void *opaque)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = opaque;
|
||||
|
||||
vduse_blk_detach_ctx(vblk_exp);
|
||||
vblk_exp->export.ctx = NULL;
|
||||
}
|
||||
|
||||
static void vduse_blk_resize(void *opaque)
|
||||
{
|
||||
BlockExport *exp = opaque;
|
||||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||
struct virtio_blk_config config;
|
||||
|
||||
config.capacity =
|
||||
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
|
||||
vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
|
||||
offsetof(struct virtio_blk_config, capacity),
|
||||
(char *)&config.capacity);
|
||||
}
|
||||
|
||||
static const BlockDevOps vduse_block_ops = {
|
||||
.resize_cb = vduse_blk_resize,
|
||||
};
|
||||
|
||||
static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
||||
Error **errp)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||
BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
|
||||
uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
|
||||
uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
|
||||
uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
|
||||
Error *local_err = NULL;
|
||||
struct virtio_blk_config config = { 0 };
|
||||
uint64_t features;
|
||||
int i, ret;
|
||||
|
||||
if (vblk_opts->has_num_queues) {
|
||||
num_queues = vblk_opts->num_queues;
|
||||
if (num_queues == 0) {
|
||||
error_setg(errp, "num-queues must be greater than 0");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (vblk_opts->has_queue_size) {
|
||||
queue_size = vblk_opts->queue_size;
|
||||
if (queue_size <= 2 || !is_power_of_2(queue_size) ||
|
||||
queue_size > VIRTQUEUE_MAX_SIZE) {
|
||||
error_setg(errp, "queue-size is invalid");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (vblk_opts->has_logical_block_size) {
|
||||
logical_block_size = vblk_opts->logical_block_size;
|
||||
check_block_size(exp->id, "logical-block-size", logical_block_size,
|
||||
&local_err);
|
||||
if (local_err) {
|
||||
error_propagate(errp, local_err);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
vblk_exp->num_queues = num_queues;
|
||||
vblk_exp->handler.blk = exp->blk;
|
||||
vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ?
|
||||
vblk_opts->serial : "");
|
||||
vblk_exp->handler.logical_block_size = logical_block_size;
|
||||
vblk_exp->handler.writable = opts->writable;
|
||||
|
||||
config.capacity =
|
||||
cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
|
||||
config.seg_max = cpu_to_le32(queue_size - 2);
|
||||
config.min_io_size = cpu_to_le16(1);
|
||||
config.opt_io_size = cpu_to_le32(1);
|
||||
config.num_queues = cpu_to_le16(num_queues);
|
||||
config.blk_size = cpu_to_le32(logical_block_size);
|
||||
config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
|
||||
config.max_discard_seg = cpu_to_le32(1);
|
||||
config.discard_sector_alignment =
|
||||
cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
|
||||
config.max_write_zeroes_sectors =
|
||||
cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
|
||||
config.max_write_zeroes_seg = cpu_to_le32(1);
|
||||
|
||||
features = vduse_get_virtio_features() |
|
||||
(1ULL << VIRTIO_BLK_F_SEG_MAX) |
|
||||
(1ULL << VIRTIO_BLK_F_TOPOLOGY) |
|
||||
(1ULL << VIRTIO_BLK_F_BLK_SIZE) |
|
||||
(1ULL << VIRTIO_BLK_F_FLUSH) |
|
||||
(1ULL << VIRTIO_BLK_F_DISCARD) |
|
||||
(1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
|
||||
|
||||
if (num_queues > 1) {
|
||||
features |= 1ULL << VIRTIO_BLK_F_MQ;
|
||||
}
|
||||
if (!opts->writable) {
|
||||
features |= 1ULL << VIRTIO_BLK_F_RO;
|
||||
}
|
||||
|
||||
vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
|
||||
features, num_queues,
|
||||
sizeof(struct virtio_blk_config),
|
||||
(char *)&config, &vduse_blk_ops,
|
||||
vblk_exp);
|
||||
if (!vblk_exp->dev) {
|
||||
error_setg(errp, "failed to create vduse device");
|
||||
ret = -ENOMEM;
|
||||
goto err_dev;
|
||||
}
|
||||
|
||||
vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
|
||||
g_get_tmp_dir(), vblk_opts->name);
|
||||
if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
|
||||
error_setg(errp, "failed to set reconnect log file");
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_queues; i++) {
|
||||
vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
|
||||
}
|
||||
|
||||
aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
|
||||
on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
|
||||
|
||||
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
||||
vblk_exp);
|
||||
|
||||
blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
|
||||
|
||||
return 0;
|
||||
err:
|
||||
vduse_dev_destroy(vblk_exp->dev);
|
||||
g_free(vblk_exp->recon_file);
|
||||
err_dev:
|
||||
g_free(vblk_exp->handler.serial);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vduse_blk_exp_delete(BlockExport *exp)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||
int ret;
|
||||
|
||||
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
||||
vblk_exp);
|
||||
blk_set_dev_ops(exp->blk, NULL, NULL);
|
||||
ret = vduse_dev_destroy(vblk_exp->dev);
|
||||
if (ret != -EBUSY) {
|
||||
unlink(vblk_exp->recon_file);
|
||||
}
|
||||
g_free(vblk_exp->recon_file);
|
||||
g_free(vblk_exp->handler.serial);
|
||||
}
|
||||
|
||||
static void vduse_blk_exp_request_shutdown(BlockExport *exp)
|
||||
{
|
||||
VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
|
||||
|
||||
aio_context_acquire(vblk_exp->export.ctx);
|
||||
vduse_blk_detach_ctx(vblk_exp);
|
||||
aio_context_acquire(vblk_exp->export.ctx);
|
||||
}
|
||||
|
||||
const BlockExportDriver blk_exp_vduse_blk = {
|
||||
.type = BLOCK_EXPORT_TYPE_VDUSE_BLK,
|
||||
.instance_size = sizeof(VduseBlkExport),
|
||||
.create = vduse_blk_exp_create,
|
||||
.delete = vduse_blk_exp_delete,
|
||||
.request_shutdown = vduse_blk_exp_request_shutdown,
|
||||
};
|
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Export QEMU block device via VDUSE
|
||||
*
|
||||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef VDUSE_BLK_H
|
||||
#define VDUSE_BLK_H
|
||||
|
||||
#include "block/export.h"
|
||||
|
||||
extern const BlockExportDriver blk_exp_vduse_blk;
|
||||
|
||||
#endif /* VDUSE_BLK_H */
|
|
@ -17,31 +17,15 @@
|
|||
#include "vhost-user-blk-server.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "sysemu/block-backend.h"
|
||||
#include "util/block-helpers.h"
|
||||
|
||||
/*
|
||||
* Sector units are 512 bytes regardless of the
|
||||
* virtio_blk_config->blk_size value.
|
||||
*/
|
||||
#define VIRTIO_BLK_SECTOR_BITS 9
|
||||
#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS)
|
||||
#include "virtio-blk-handler.h"
|
||||
|
||||
enum {
|
||||
VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
|
||||
VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768,
|
||||
VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768,
|
||||
};
|
||||
struct virtio_blk_inhdr {
|
||||
unsigned char status;
|
||||
};
|
||||
|
||||
typedef struct VuBlkReq {
|
||||
VuVirtqElement elem;
|
||||
int64_t sector_num;
|
||||
size_t size;
|
||||
struct virtio_blk_inhdr *in;
|
||||
struct virtio_blk_outhdr out;
|
||||
VuServer *server;
|
||||
struct VuVirtq *vq;
|
||||
} VuBlkReq;
|
||||
|
@ -50,248 +34,44 @@ typedef struct VuBlkReq {
|
|||
typedef struct {
|
||||
BlockExport export;
|
||||
VuServer vu_server;
|
||||
uint32_t blk_size;
|
||||
VirtioBlkHandler handler;
|
||||
QIOChannelSocket *sioc;
|
||||
struct virtio_blk_config blkcfg;
|
||||
bool writable;
|
||||
} VuBlkExport;
|
||||
|
||||
static void vu_blk_req_complete(VuBlkReq *req)
|
||||
static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
|
||||
{
|
||||
VuDev *vu_dev = &req->server->vu_dev;
|
||||
|
||||
/* IO size with 1 extra status byte */
|
||||
vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
|
||||
vu_queue_push(vu_dev, req->vq, &req->elem, in_len);
|
||||
vu_queue_notify(vu_dev, req->vq);
|
||||
|
||||
free(req);
|
||||
}
|
||||
|
||||
static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
|
||||
size_t size)
|
||||
{
|
||||
uint64_t nb_sectors;
|
||||
uint64_t total_sectors;
|
||||
|
||||
if (size % VIRTIO_BLK_SECTOR_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
|
||||
|
||||
QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
|
||||
if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
|
||||
return false;
|
||||
}
|
||||
if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) {
|
||||
return false;
|
||||
}
|
||||
blk_get_geometry(vexp->export.blk, &total_sectors);
|
||||
if (sector > total_sectors || nb_sectors > total_sectors - sector) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int coroutine_fn
|
||||
vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
|
||||
uint32_t iovcnt, uint32_t type)
|
||||
{
|
||||
BlockBackend *blk = vexp->export.blk;
|
||||
struct virtio_blk_discard_write_zeroes desc;
|
||||
ssize_t size;
|
||||
uint64_t sector;
|
||||
uint32_t num_sectors;
|
||||
uint32_t max_sectors;
|
||||
uint32_t flags;
|
||||
int bytes;
|
||||
|
||||
/* Only one desc is currently supported */
|
||||
if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
|
||||
return VIRTIO_BLK_S_UNSUPP;
|
||||
}
|
||||
|
||||
size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
|
||||
if (unlikely(size != sizeof(desc))) {
|
||||
error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
sector = le64_to_cpu(desc.sector);
|
||||
num_sectors = le32_to_cpu(desc.num_sectors);
|
||||
flags = le32_to_cpu(desc.flags);
|
||||
max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
|
||||
VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS :
|
||||
VHOST_USER_BLK_MAX_DISCARD_SECTORS;
|
||||
|
||||
/* This check ensures that 'bytes' fits in an int */
|
||||
if (unlikely(num_sectors > max_sectors)) {
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
|
||||
|
||||
if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) {
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
/*
|
||||
* The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
|
||||
* and write zeroes commands if any unknown flag is set.
|
||||
*/
|
||||
if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
|
||||
return VIRTIO_BLK_S_UNSUPP;
|
||||
}
|
||||
|
||||
if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
|
||||
int blk_flags = 0;
|
||||
|
||||
if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
|
||||
blk_flags |= BDRV_REQ_MAY_UNMAP;
|
||||
}
|
||||
|
||||
if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
|
||||
bytes, blk_flags) == 0) {
|
||||
return VIRTIO_BLK_S_OK;
|
||||
}
|
||||
} else if (type == VIRTIO_BLK_T_DISCARD) {
|
||||
/*
|
||||
* The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
|
||||
* discard commands if the unmap flag is set.
|
||||
*/
|
||||
if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
|
||||
return VIRTIO_BLK_S_UNSUPP;
|
||||
}
|
||||
|
||||
if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
|
||||
bytes) == 0) {
|
||||
return VIRTIO_BLK_S_OK;
|
||||
}
|
||||
}
|
||||
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
/* Called with server refcount increased, must decrease before returning */
|
||||
static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
|
||||
{
|
||||
VuBlkReq *req = opaque;
|
||||
VuServer *server = req->server;
|
||||
VuVirtqElement *elem = &req->elem;
|
||||
uint32_t type;
|
||||
|
||||
VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
|
||||
BlockBackend *blk = vexp->export.blk;
|
||||
|
||||
VirtioBlkHandler *handler = &vexp->handler;
|
||||
struct iovec *in_iov = elem->in_sg;
|
||||
struct iovec *out_iov = elem->out_sg;
|
||||
unsigned in_num = elem->in_num;
|
||||
unsigned out_num = elem->out_num;
|
||||
int in_len;
|
||||
|
||||
/* refer to hw/block/virtio_blk.c */
|
||||
if (elem->out_num < 1 || elem->in_num < 1) {
|
||||
error_report("virtio-blk request missing headers");
|
||||
goto err;
|
||||
in_len = virtio_blk_process_req(handler, in_iov, out_iov,
|
||||
in_num, out_num);
|
||||
if (in_len < 0) {
|
||||
free(req);
|
||||
vhost_user_server_unref(server);
|
||||
return;
|
||||
}
|
||||
|
||||
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
|
||||
sizeof(req->out)) != sizeof(req->out))) {
|
||||
error_report("virtio-blk request outhdr too short");
|
||||
goto err;
|
||||
}
|
||||
|
||||
iov_discard_front(&out_iov, &out_num, sizeof(req->out));
|
||||
|
||||
if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
|
||||
error_report("virtio-blk request inhdr too short");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* We always touch the last byte, so just see how big in_iov is. */
|
||||
req->in = (void *)in_iov[in_num - 1].iov_base
|
||||
+ in_iov[in_num - 1].iov_len
|
||||
- sizeof(struct virtio_blk_inhdr);
|
||||
iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
|
||||
|
||||
type = le32_to_cpu(req->out.type);
|
||||
switch (type & ~VIRTIO_BLK_T_BARRIER) {
|
||||
case VIRTIO_BLK_T_IN:
|
||||
case VIRTIO_BLK_T_OUT: {
|
||||
QEMUIOVector qiov;
|
||||
int64_t offset;
|
||||
ssize_t ret = 0;
|
||||
bool is_write = type & VIRTIO_BLK_T_OUT;
|
||||
req->sector_num = le64_to_cpu(req->out.sector);
|
||||
|
||||
if (is_write && !vexp->writable) {
|
||||
req->in->status = VIRTIO_BLK_S_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_write) {
|
||||
qemu_iovec_init_external(&qiov, out_iov, out_num);
|
||||
} else {
|
||||
qemu_iovec_init_external(&qiov, in_iov, in_num);
|
||||
}
|
||||
|
||||
if (unlikely(!vu_blk_sect_range_ok(vexp,
|
||||
req->sector_num,
|
||||
qiov.size))) {
|
||||
req->in->status = VIRTIO_BLK_S_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS;
|
||||
|
||||
if (is_write) {
|
||||
ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
|
||||
} else {
|
||||
ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
req->in->status = VIRTIO_BLK_S_OK;
|
||||
} else {
|
||||
req->in->status = VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VIRTIO_BLK_T_FLUSH:
|
||||
if (blk_co_flush(blk) == 0) {
|
||||
req->in->status = VIRTIO_BLK_S_OK;
|
||||
} else {
|
||||
req->in->status = VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
break;
|
||||
case VIRTIO_BLK_T_GET_ID: {
|
||||
size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
|
||||
VIRTIO_BLK_ID_BYTES);
|
||||
snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
|
||||
req->in->status = VIRTIO_BLK_S_OK;
|
||||
req->size = elem->in_sg[0].iov_len;
|
||||
break;
|
||||
}
|
||||
case VIRTIO_BLK_T_DISCARD:
|
||||
case VIRTIO_BLK_T_WRITE_ZEROES: {
|
||||
if (!vexp->writable) {
|
||||
req->in->status = VIRTIO_BLK_S_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num,
|
||||
type);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
req->in->status = VIRTIO_BLK_S_UNSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
vu_blk_req_complete(req);
|
||||
vhost_user_server_unref(server);
|
||||
return;
|
||||
|
||||
err:
|
||||
free(req);
|
||||
vu_blk_req_complete(req, in_len);
|
||||
vhost_user_server_unref(server);
|
||||
}
|
||||
|
||||
|
@ -348,7 +128,7 @@ static uint64_t vu_blk_get_features(VuDev *dev)
|
|||
1ull << VIRTIO_RING_F_EVENT_IDX |
|
||||
1ull << VHOST_USER_F_PROTOCOL_FEATURES;
|
||||
|
||||
if (!vexp->writable) {
|
||||
if (!vexp->handler.writable) {
|
||||
features |= 1ull << VIRTIO_BLK_F_RO;
|
||||
}
|
||||
|
||||
|
@ -455,12 +235,12 @@ vu_blk_initialize_config(BlockDriverState *bs,
|
|||
config->opt_io_size = cpu_to_le32(1);
|
||||
config->num_queues = cpu_to_le16(num_queues);
|
||||
config->max_discard_sectors =
|
||||
cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS);
|
||||
cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
|
||||
config->max_discard_seg = cpu_to_le32(1);
|
||||
config->discard_sector_alignment =
|
||||
cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS);
|
||||
config->max_write_zeroes_sectors
|
||||
= cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS);
|
||||
= cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
|
||||
config->max_write_zeroes_seg = cpu_to_le32(1);
|
||||
}
|
||||
|
||||
|
@ -480,7 +260,6 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
|||
uint64_t logical_block_size;
|
||||
uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
|
||||
|
||||
vexp->writable = opts->writable;
|
||||
vexp->blkcfg.wce = 0;
|
||||
|
||||
if (vu_opts->has_logical_block_size) {
|
||||
|
@ -494,8 +273,6 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
|||
error_propagate(errp, local_err);
|
||||
return -EINVAL;
|
||||
}
|
||||
vexp->blk_size = logical_block_size;
|
||||
blk_set_guest_block_size(exp->blk, logical_block_size);
|
||||
|
||||
if (vu_opts->has_num_queues) {
|
||||
num_queues = vu_opts->num_queues;
|
||||
|
@ -504,6 +281,10 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
|||
error_setg(errp, "num-queues must be greater than 0");
|
||||
return -EINVAL;
|
||||
}
|
||||
vexp->handler.blk = exp->blk;
|
||||
vexp->handler.serial = g_strdup("vhost_user_blk");
|
||||
vexp->handler.logical_block_size = logical_block_size;
|
||||
vexp->handler.writable = opts->writable;
|
||||
|
||||
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
|
||||
logical_block_size, num_queues);
|
||||
|
@ -515,6 +296,7 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
|
|||
num_queues, &vu_blk_iface, errp)) {
|
||||
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
|
||||
blk_aio_detach, vexp);
|
||||
g_free(vexp->handler.serial);
|
||||
return -EADDRNOTAVAIL;
|
||||
}
|
||||
|
||||
|
@ -527,6 +309,7 @@ static void vu_blk_exp_delete(BlockExport *exp)
|
|||
|
||||
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
|
||||
vexp);
|
||||
g_free(vexp->handler.serial);
|
||||
}
|
||||
|
||||
const BlockExportDriver blk_exp_vhost_user_blk = {
|
||||
|
|
|
@ -0,0 +1,240 @@
|
|||
/*
|
||||
* Handler for virtio-blk I/O
|
||||
*
|
||||
* Copyright (c) 2020 Red Hat, Inc.
|
||||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Coiby Xu <coiby.xu@gmail.com>
|
||||
* Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "virtio-blk-handler.h"
|
||||
|
||||
#include "standard-headers/linux/virtio_blk.h"
|
||||
|
||||
struct virtio_blk_inhdr {
|
||||
unsigned char status;
|
||||
};
|
||||
|
||||
static bool virtio_blk_sect_range_ok(BlockBackend *blk, uint32_t block_size,
|
||||
uint64_t sector, size_t size)
|
||||
{
|
||||
uint64_t nb_sectors;
|
||||
uint64_t total_sectors;
|
||||
|
||||
if (size % VIRTIO_BLK_SECTOR_SIZE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
|
||||
|
||||
QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
|
||||
if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
|
||||
return false;
|
||||
}
|
||||
if ((sector << VIRTIO_BLK_SECTOR_BITS) % block_size) {
|
||||
return false;
|
||||
}
|
||||
blk_get_geometry(blk, &total_sectors);
|
||||
if (sector > total_sectors || nb_sectors > total_sectors - sector) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int coroutine_fn
|
||||
virtio_blk_discard_write_zeroes(VirtioBlkHandler *handler, struct iovec *iov,
|
||||
uint32_t iovcnt, uint32_t type)
|
||||
{
|
||||
BlockBackend *blk = handler->blk;
|
||||
struct virtio_blk_discard_write_zeroes desc;
|
||||
ssize_t size;
|
||||
uint64_t sector;
|
||||
uint32_t num_sectors;
|
||||
uint32_t max_sectors;
|
||||
uint32_t flags;
|
||||
int bytes;
|
||||
|
||||
/* Only one desc is currently supported */
|
||||
if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
|
||||
return VIRTIO_BLK_S_UNSUPP;
|
||||
}
|
||||
|
||||
size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
|
||||
if (unlikely(size != sizeof(desc))) {
|
||||
error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
sector = le64_to_cpu(desc.sector);
|
||||
num_sectors = le32_to_cpu(desc.num_sectors);
|
||||
flags = le32_to_cpu(desc.flags);
|
||||
max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
|
||||
VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS :
|
||||
VIRTIO_BLK_MAX_DISCARD_SECTORS;
|
||||
|
||||
/* This check ensures that 'bytes' fits in an int */
|
||||
if (unlikely(num_sectors > max_sectors)) {
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
|
||||
|
||||
if (unlikely(!virtio_blk_sect_range_ok(blk, handler->logical_block_size,
|
||||
sector, bytes))) {
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
/*
|
||||
* The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
|
||||
* and write zeroes commands if any unknown flag is set.
|
||||
*/
|
||||
if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
|
||||
return VIRTIO_BLK_S_UNSUPP;
|
||||
}
|
||||
|
||||
if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
|
||||
int blk_flags = 0;
|
||||
|
||||
if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
|
||||
blk_flags |= BDRV_REQ_MAY_UNMAP;
|
||||
}
|
||||
|
||||
if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
|
||||
bytes, blk_flags) == 0) {
|
||||
return VIRTIO_BLK_S_OK;
|
||||
}
|
||||
} else if (type == VIRTIO_BLK_T_DISCARD) {
|
||||
/*
|
||||
* The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
|
||||
* discard commands if the unmap flag is set.
|
||||
*/
|
||||
if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
|
||||
return VIRTIO_BLK_S_UNSUPP;
|
||||
}
|
||||
|
||||
if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
|
||||
bytes) == 0) {
|
||||
return VIRTIO_BLK_S_OK;
|
||||
}
|
||||
}
|
||||
|
||||
return VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
|
||||
int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
|
||||
struct iovec *in_iov,
|
||||
struct iovec *out_iov,
|
||||
unsigned int in_num,
|
||||
unsigned int out_num)
|
||||
{
|
||||
BlockBackend *blk = handler->blk;
|
||||
struct virtio_blk_inhdr *in;
|
||||
struct virtio_blk_outhdr out;
|
||||
uint32_t type;
|
||||
int in_len;
|
||||
|
||||
if (out_num < 1 || in_num < 1) {
|
||||
error_report("virtio-blk request missing headers");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (unlikely(iov_to_buf(out_iov, out_num, 0, &out,
|
||||
sizeof(out)) != sizeof(out))) {
|
||||
error_report("virtio-blk request outhdr too short");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
iov_discard_front(&out_iov, &out_num, sizeof(out));
|
||||
|
||||
if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
|
||||
error_report("virtio-blk request inhdr too short");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* We always touch the last byte, so just see how big in_iov is. */
|
||||
in_len = iov_size(in_iov, in_num);
|
||||
in = (void *)in_iov[in_num - 1].iov_base
|
||||
+ in_iov[in_num - 1].iov_len
|
||||
- sizeof(struct virtio_blk_inhdr);
|
||||
iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
|
||||
|
||||
type = le32_to_cpu(out.type);
|
||||
switch (type & ~VIRTIO_BLK_T_BARRIER) {
|
||||
case VIRTIO_BLK_T_IN:
|
||||
case VIRTIO_BLK_T_OUT: {
|
||||
QEMUIOVector qiov;
|
||||
int64_t offset;
|
||||
ssize_t ret = 0;
|
||||
bool is_write = type & VIRTIO_BLK_T_OUT;
|
||||
int64_t sector_num = le64_to_cpu(out.sector);
|
||||
|
||||
if (is_write && !handler->writable) {
|
||||
in->status = VIRTIO_BLK_S_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_write) {
|
||||
qemu_iovec_init_external(&qiov, out_iov, out_num);
|
||||
} else {
|
||||
qemu_iovec_init_external(&qiov, in_iov, in_num);
|
||||
}
|
||||
|
||||
if (unlikely(!virtio_blk_sect_range_ok(blk,
|
||||
handler->logical_block_size,
|
||||
sector_num, qiov.size))) {
|
||||
in->status = VIRTIO_BLK_S_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
offset = sector_num << VIRTIO_BLK_SECTOR_BITS;
|
||||
|
||||
if (is_write) {
|
||||
ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
|
||||
} else {
|
||||
ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
|
||||
}
|
||||
if (ret >= 0) {
|
||||
in->status = VIRTIO_BLK_S_OK;
|
||||
} else {
|
||||
in->status = VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VIRTIO_BLK_T_FLUSH:
|
||||
if (blk_co_flush(blk) == 0) {
|
||||
in->status = VIRTIO_BLK_S_OK;
|
||||
} else {
|
||||
in->status = VIRTIO_BLK_S_IOERR;
|
||||
}
|
||||
break;
|
||||
case VIRTIO_BLK_T_GET_ID: {
|
||||
size_t size = MIN(strlen(handler->serial) + 1,
|
||||
MIN(iov_size(in_iov, in_num),
|
||||
VIRTIO_BLK_ID_BYTES));
|
||||
iov_from_buf(in_iov, in_num, 0, handler->serial, size);
|
||||
in->status = VIRTIO_BLK_S_OK;
|
||||
break;
|
||||
}
|
||||
case VIRTIO_BLK_T_DISCARD:
|
||||
case VIRTIO_BLK_T_WRITE_ZEROES:
|
||||
if (!handler->writable) {
|
||||
in->status = VIRTIO_BLK_S_IOERR;
|
||||
break;
|
||||
}
|
||||
in->status = virtio_blk_discard_write_zeroes(handler, out_iov,
|
||||
out_num, type);
|
||||
break;
|
||||
default:
|
||||
in->status = VIRTIO_BLK_S_UNSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
return in_len;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Handler for virtio-blk I/O
|
||||
*
|
||||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef VIRTIO_BLK_HANDLER_H
|
||||
#define VIRTIO_BLK_HANDLER_H
|
||||
|
||||
#include "sysemu/block-backend.h"
|
||||
|
||||
#define VIRTIO_BLK_SECTOR_BITS 9
|
||||
#define VIRTIO_BLK_SECTOR_SIZE (1ULL << VIRTIO_BLK_SECTOR_BITS)
|
||||
|
||||
#define VIRTIO_BLK_MAX_DISCARD_SECTORS 32768
|
||||
#define VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS 32768
|
||||
|
||||
typedef struct {
|
||||
BlockBackend *blk;
|
||||
char *serial;
|
||||
uint32_t logical_block_size;
|
||||
bool writable;
|
||||
} VirtioBlkHandler;
|
||||
|
||||
int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
|
||||
struct iovec *in_iov,
|
||||
struct iovec *out_iov,
|
||||
unsigned int in_num,
|
||||
unsigned int out_num);
|
||||
|
||||
#endif /* VIRTIO_BLK_HANDLER_H */
|
|
@ -891,7 +891,7 @@ out:
|
|||
static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
|
||||
{
|
||||
bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
|
||||
bs->bl.max_pdiscard = SIZE_MAX;
|
||||
bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);
|
||||
}
|
||||
|
||||
static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
|
||||
|
|
15
block/io.c
15
block/io.c
|
@ -588,21 +588,6 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
|
|||
BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for pending requests to complete on a single BlockDriverState subtree,
|
||||
* and suspend block driver's internal I/O until next request arrives.
|
||||
*
|
||||
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
|
||||
* AioContext.
|
||||
*/
|
||||
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
assert(qemu_in_coroutine());
|
||||
bdrv_drained_begin(bs);
|
||||
bdrv_drained_end(bs);
|
||||
}
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs)
|
||||
{
|
||||
IO_OR_GS_CODE();
|
||||
|
|
|
@ -261,8 +261,9 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
|
|||
HBitmap **backup, Error **errp)
|
||||
{
|
||||
BlockDriverState *bs;
|
||||
BdrvDirtyBitmap *dst, *src, *anon;
|
||||
BdrvDirtyBitmap *dst, *src;
|
||||
BlockDirtyBitmapOrStrList *lst;
|
||||
HBitmap *local_backup = NULL;
|
||||
|
||||
GLOBAL_STATE_CODE();
|
||||
|
||||
|
@ -271,12 +272,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst),
|
||||
NULL, errp);
|
||||
if (!anon) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (lst = bms; lst; lst = lst->next) {
|
||||
switch (lst->value->type) {
|
||||
const char *name, *node;
|
||||
|
@ -285,8 +280,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
|
|||
src = bdrv_find_dirty_bitmap(bs, name);
|
||||
if (!src) {
|
||||
error_setg(errp, "Dirty bitmap '%s' not found", name);
|
||||
dst = NULL;
|
||||
goto out;
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
case QTYPE_QDICT:
|
||||
|
@ -294,26 +288,36 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
|
|||
name = lst->value->u.external.name;
|
||||
src = block_dirty_bitmap_lookup(node, name, NULL, errp);
|
||||
if (!src) {
|
||||
dst = NULL;
|
||||
goto out;
|
||||
goto fail;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
if (!bdrv_merge_dirty_bitmap(anon, src, NULL, errp)) {
|
||||
dst = NULL;
|
||||
goto out;
|
||||
/* We do backup only for first merge operation */
|
||||
if (!bdrv_merge_dirty_bitmap(dst, src,
|
||||
local_backup ? NULL : &local_backup,
|
||||
errp))
|
||||
{
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
/* Merge into dst; dst is unchanged on failure. */
|
||||
bdrv_merge_dirty_bitmap(dst, anon, backup, errp);
|
||||
if (backup) {
|
||||
*backup = local_backup;
|
||||
} else {
|
||||
hbitmap_free(local_backup);
|
||||
}
|
||||
|
||||
out:
|
||||
bdrv_release_dirty_bitmap(anon);
|
||||
return dst;
|
||||
|
||||
fail:
|
||||
if (local_backup) {
|
||||
bdrv_restore_dirty_bitmap(dst, local_backup);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
|
||||
|
|
|
@ -521,12 +521,8 @@ static int coroutine_fn nbd_co_send_request(BlockDriverState *bs,
|
|||
if (qiov) {
|
||||
qio_channel_set_cork(s->ioc, true);
|
||||
rc = nbd_send_request(s->ioc, request);
|
||||
if (rc >= 0) {
|
||||
if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
|
||||
NULL) < 0) {
|
||||
rc = -EIO;
|
||||
}
|
||||
} else if (rc >= 0) {
|
||||
if (rc >= 0 && qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
|
||||
NULL) < 0) {
|
||||
rc = -EIO;
|
||||
}
|
||||
qio_channel_set_cork(s->ioc, false);
|
||||
|
|
24
block/rbd.c
24
block/rbd.c
|
@ -831,6 +831,26 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
|
|||
error_setg_errno(errp, -r, "error opening pool %s", opts->pool);
|
||||
goto failed_shutdown;
|
||||
}
|
||||
|
||||
#ifdef HAVE_RBD_NAMESPACE_EXISTS
|
||||
if (opts->has_q_namespace && strlen(opts->q_namespace) > 0) {
|
||||
bool exists;
|
||||
|
||||
r = rbd_namespace_exists(*io_ctx, opts->q_namespace, &exists);
|
||||
if (r < 0) {
|
||||
error_setg_errno(errp, -r, "error checking namespace");
|
||||
goto failed_ioctx_destroy;
|
||||
}
|
||||
|
||||
if (!exists) {
|
||||
error_setg(errp, "namespace '%s' does not exist",
|
||||
opts->q_namespace);
|
||||
r = -ENOENT;
|
||||
goto failed_ioctx_destroy;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set the namespace after opening the io context on the pool,
|
||||
* if nspace == NULL or if nspace == "", it is just as we did nothing
|
||||
|
@ -840,6 +860,10 @@ static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
|
|||
r = 0;
|
||||
goto out;
|
||||
|
||||
#ifdef HAVE_RBD_NAMESPACE_EXISTS
|
||||
failed_ioctx_destroy:
|
||||
rados_ioctx_destroy(*io_ctx);
|
||||
#endif
|
||||
failed_shutdown:
|
||||
rados_shutdown(*cluster);
|
||||
out:
|
||||
|
|
|
@ -77,6 +77,7 @@ Standard options:
|
|||
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=unix,addr.path=<socket-path>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
|
||||
--export [type=]vhost-user-blk,id=<id>,node-name=<node-name>,addr.type=fd,addr.str=<fd>[,writable=on|off][,logical-block-size=<block-size>][,num-queues=<num-queues>]
|
||||
--export [type=]fuse,id=<id>,node-name=<node-name>,mountpoint=<file>[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
|
||||
--export [type=]vduse-blk,id=<id>,node-name=<node-name>,name=<vduse-name>[,writable=on|off][,num-queues=<num-queues>][,queue-size=<queue-size>][,logical-block-size=<block-size>][,serial=<serial-number>]
|
||||
|
||||
is a block export definition. ``node-name`` is the block node that should be
|
||||
exported. ``writable`` determines whether or not the export allows write
|
||||
|
@ -110,6 +111,27 @@ Standard options:
|
|||
``allow-other`` to auto (the default) will try enabling this option, and on
|
||||
error fall back to disabling it.
|
||||
|
||||
The ``vduse-blk`` export type takes a ``name`` (must be unique across the host)
|
||||
to create the VDUSE device.
|
||||
``num-queues`` sets the number of virtqueues (the default is 1).
|
||||
``queue-size`` sets the virtqueue descriptor table size (the default is 256).
|
||||
|
||||
The instantiated VDUSE device must then be added to the vDPA bus using the
|
||||
vdpa(8) command from the iproute2 project::
|
||||
|
||||
# vdpa dev add name <id> mgmtdev vduse
|
||||
|
||||
The device can be removed from the vDPA bus later as follows::
|
||||
|
||||
# vdpa dev del <id>
|
||||
|
||||
For more information about attaching vDPA devices to the host with
|
||||
virtio_vdpa.ko or attaching them to guests with vhost_vdpa.ko, see
|
||||
https://vdpa-dev.gitlab.io/.
|
||||
|
||||
For more information about VDUSE, see
|
||||
https://docs.kernel.org/userspace-api/vduse.html.
|
||||
|
||||
.. option:: --monitor MONITORDEF
|
||||
|
||||
is a QMP monitor definition. See the :manpage:`qemu(1)` manual page for
|
||||
|
|
|
@ -1228,7 +1228,6 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
|
|||
|
||||
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
|
||||
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
|
||||
blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
|
||||
|
||||
blk_iostatus_enable(s->blk);
|
||||
|
||||
|
|
|
@ -243,7 +243,6 @@ static void xen_block_realize(XenDevice *xendev, Error **errp)
|
|||
}
|
||||
|
||||
blk_set_dev_ops(blk, &xen_block_dev_ops, blockdev);
|
||||
blk_set_guest_block_size(blk, conf->logical_block_size);
|
||||
|
||||
if (conf->discard_granularity == -1) {
|
||||
conf->discard_granularity = conf->physical_block_size;
|
||||
|
|
|
@ -2548,7 +2548,6 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind,
|
|||
s->smart_selftest_count = 0;
|
||||
if (kind == IDE_CD) {
|
||||
blk_set_dev_ops(blk, &ide_cd_block_ops, s);
|
||||
blk_set_guest_block_size(blk, 2048);
|
||||
} else {
|
||||
if (!blk_is_inserted(s->blk)) {
|
||||
error_setg(errp, "Device needs media, but drive is empty");
|
||||
|
|
|
@ -2419,7 +2419,6 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
|
|||
} else {
|
||||
blk_set_dev_ops(s->qdev.conf.blk, &scsi_disk_block_ops, s);
|
||||
}
|
||||
blk_set_guest_block_size(s->qdev.conf.blk, s->qdev.blocksize);
|
||||
|
||||
blk_iostatus_enable(s->qdev.conf.blk);
|
||||
|
||||
|
|
|
@ -321,7 +321,6 @@ static void scsi_read_complete(void * opaque, int ret)
|
|||
s->blocksize = ldl_be_p(&r->buf[8]);
|
||||
s->max_lba = ldq_be_p(&r->buf[0]);
|
||||
}
|
||||
blk_set_guest_block_size(s->conf.blk, s->blocksize);
|
||||
|
||||
/*
|
||||
* Patch MODE SENSE device specific parameters if the BDS is opened
|
||||
|
|
|
@ -81,6 +81,8 @@ extern AioWait global_aio_wait;
|
|||
AioContext *ctx_ = (ctx); \
|
||||
/* Increment wait_->num_waiters before evaluating cond. */ \
|
||||
qatomic_inc(&wait_->num_waiters); \
|
||||
/* Paired with smp_mb in aio_wait_kick(). */ \
|
||||
smp_mb(); \
|
||||
if (ctx_ && in_aio_context_home_thread(ctx_)) { \
|
||||
while ((cond)) { \
|
||||
aio_poll(ctx_, true); \
|
||||
|
|
|
@ -270,7 +270,6 @@ void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
|
|||
cond); })
|
||||
|
||||
void bdrv_drain(BlockDriverState *bs);
|
||||
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
|
||||
|
||||
int generated_co_wrapper
|
||||
bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
|
||||
|
|
|
@ -102,7 +102,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
|
|||
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
|
||||
|
||||
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
|
||||
bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
|
||||
void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
|
||||
const BdrvDirtyBitmap *src,
|
||||
HBitmap **backup, bool lock);
|
||||
|
||||
|
|
|
@ -76,20 +76,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size);
|
|||
*
|
||||
* Store result of merging @a and @b into @result.
|
||||
* @result is allowed to be equal to @a or @b.
|
||||
*
|
||||
* Return true if the merge was successful,
|
||||
* false if it was not attempted.
|
||||
* All bitmaps must have same size.
|
||||
*/
|
||||
bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
|
||||
|
||||
/**
|
||||
* hbitmap_can_merge:
|
||||
*
|
||||
* hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and
|
||||
* necessary for hbitmap_merge will not fail.
|
||||
*
|
||||
*/
|
||||
bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b);
|
||||
void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
|
||||
|
||||
/**
|
||||
* hbitmap_empty:
|
||||
|
|
|
@ -72,7 +72,6 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
|
|||
void blk_iostatus_set_err(BlockBackend *blk, int error);
|
||||
int blk_get_max_iov(BlockBackend *blk);
|
||||
int blk_get_max_hw_iov(BlockBackend *blk);
|
||||
void blk_set_guest_block_size(BlockBackend *blk, int align);
|
||||
|
||||
void blk_io_plug(BlockBackend *blk);
|
||||
void blk_io_unplug(BlockBackend *blk);
|
||||
|
|
|
@ -0,0 +1,306 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef _VDUSE_H_
|
||||
#define _VDUSE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define VDUSE_BASE 0x81
|
||||
|
||||
/* The ioctls for control device (/dev/vduse/control) */
|
||||
|
||||
#define VDUSE_API_VERSION 0
|
||||
|
||||
/*
|
||||
* Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
|
||||
* This is used for future extension.
|
||||
*/
|
||||
#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64)
|
||||
|
||||
/* Set the version of VDUSE API that userspace supported. */
|
||||
#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64)
|
||||
|
||||
/**
|
||||
* struct vduse_dev_config - basic configuration of a VDUSE device
|
||||
* @name: VDUSE device name, needs to be NUL terminated
|
||||
* @vendor_id: virtio vendor id
|
||||
* @device_id: virtio device id
|
||||
* @features: virtio features
|
||||
* @vq_num: the number of virtqueues
|
||||
* @vq_align: the allocation alignment of virtqueue's metadata
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
* @config_size: the size of the configuration space
|
||||
* @config: the buffer of the configuration space
|
||||
*
|
||||
* Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
|
||||
*/
|
||||
struct vduse_dev_config {
|
||||
#define VDUSE_NAME_MAX 256
|
||||
char name[VDUSE_NAME_MAX];
|
||||
__u32 vendor_id;
|
||||
__u32 device_id;
|
||||
__u64 features;
|
||||
__u32 vq_num;
|
||||
__u32 vq_align;
|
||||
__u32 reserved[13];
|
||||
__u32 config_size;
|
||||
__u8 config[];
|
||||
};
|
||||
|
||||
/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */
|
||||
#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
|
||||
|
||||
/*
|
||||
* Destroy a VDUSE device. Make sure there are no more references
|
||||
* to the char device (/dev/vduse/$NAME).
|
||||
*/
|
||||
#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
|
||||
|
||||
/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
|
||||
|
||||
/**
|
||||
* struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last]
|
||||
* @offset: the mmap offset on returned file descriptor
|
||||
* @start: start of the IOVA region
|
||||
* @last: last of the IOVA region
|
||||
* @perm: access permission of the IOVA region
|
||||
*
|
||||
* Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region.
|
||||
*/
|
||||
struct vduse_iotlb_entry {
|
||||
__u64 offset;
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
#define VDUSE_ACCESS_RO 0x1
|
||||
#define VDUSE_ACCESS_WO 0x2
|
||||
#define VDUSE_ACCESS_RW 0x3
|
||||
__u8 perm;
|
||||
};
|
||||
|
||||
/*
|
||||
* Find the first IOVA region that overlaps with the range [start, last]
|
||||
* and return the corresponding file descriptor. Return -EINVAL means the
|
||||
* IOVA region doesn't exist. Caller should set start and last fields.
|
||||
*/
|
||||
#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry)
|
||||
|
||||
/*
|
||||
* Get the negotiated virtio features. It's a subset of the features in
|
||||
* struct vduse_dev_config which can be accepted by virtio driver. It's
|
||||
* only valid after FEATURES_OK status bit is set.
|
||||
*/
|
||||
#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
|
||||
|
||||
/**
|
||||
* struct vduse_config_data - data used to update configuration space
|
||||
* @offset: the offset from the beginning of configuration space
|
||||
* @length: the length to write to configuration space
|
||||
* @buffer: the buffer used to write from
|
||||
*
|
||||
* Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
|
||||
* configuration space.
|
||||
*/
|
||||
struct vduse_config_data {
|
||||
__u32 offset;
|
||||
__u32 length;
|
||||
__u8 buffer[];
|
||||
};
|
||||
|
||||
/* Set device configuration space */
|
||||
#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
|
||||
|
||||
/*
|
||||
* Inject a config interrupt. It's usually used to notify virtio driver
|
||||
* that device configuration space has changed.
|
||||
*/
|
||||
#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13)
|
||||
|
||||
/**
|
||||
* struct vduse_vq_config - basic configuration of a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @max_size: the max size of virtqueue
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
*
|
||||
* Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
|
||||
*/
|
||||
struct vduse_vq_config {
|
||||
__u32 index;
|
||||
__u16 max_size;
|
||||
__u16 reserved[13];
|
||||
};
|
||||
|
||||
/*
|
||||
* Setup the specified virtqueue. Make sure all virtqueues have been
|
||||
* configured before the device is attached to vDPA bus.
|
||||
*/
|
||||
#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config)
|
||||
|
||||
/**
|
||||
* struct vduse_vq_state_split - split virtqueue state
|
||||
* @avail_index: available index
|
||||
*/
|
||||
struct vduse_vq_state_split {
|
||||
__u16 avail_index;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_state_packed - packed virtqueue state
|
||||
* @last_avail_counter: last driver ring wrap counter observed by device
|
||||
* @last_avail_idx: device available index
|
||||
* @last_used_counter: device ring wrap counter
|
||||
* @last_used_idx: used index
|
||||
*/
|
||||
struct vduse_vq_state_packed {
|
||||
__u16 last_avail_counter;
|
||||
__u16 last_avail_idx;
|
||||
__u16 last_used_counter;
|
||||
__u16 last_used_idx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_info - information of a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @num: the size of virtqueue
|
||||
* @desc_addr: address of desc area
|
||||
* @driver_addr: address of driver area
|
||||
* @device_addr: address of device area
|
||||
* @split: split virtqueue state
|
||||
* @packed: packed virtqueue state
|
||||
* @ready: ready status of virtqueue
|
||||
*
|
||||
* Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information.
|
||||
*/
|
||||
struct vduse_vq_info {
|
||||
__u32 index;
|
||||
__u32 num;
|
||||
__u64 desc_addr;
|
||||
__u64 driver_addr;
|
||||
__u64 device_addr;
|
||||
union {
|
||||
struct vduse_vq_state_split split;
|
||||
struct vduse_vq_state_packed packed;
|
||||
};
|
||||
__u8 ready;
|
||||
};
|
||||
|
||||
/* Get the specified virtqueue's information. Caller should set index field. */
|
||||
#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info)
|
||||
|
||||
/**
|
||||
* struct vduse_vq_eventfd - eventfd configuration for a virtqueue
|
||||
* @index: virtqueue index
|
||||
* @fd: eventfd, -1 means de-assigning the eventfd
|
||||
*
|
||||
* Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd.
|
||||
*/
|
||||
struct vduse_vq_eventfd {
|
||||
__u32 index;
|
||||
#define VDUSE_EVENTFD_DEASSIGN -1
|
||||
int fd;
|
||||
};
|
||||
|
||||
/*
|
||||
* Setup kick eventfd for specified virtqueue. The kick eventfd is used
|
||||
* by VDUSE kernel module to notify userspace to consume the avail vring.
|
||||
*/
|
||||
#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd)
|
||||
|
||||
/*
|
||||
* Inject an interrupt for specific virtqueue. It's used to notify virtio driver
|
||||
* to consume the used vring.
|
||||
*/
|
||||
#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
|
||||
|
||||
/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
|
||||
|
||||
/**
|
||||
* enum vduse_req_type - request type
|
||||
* @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace
|
||||
* @VDUSE_SET_STATUS: set the device status
|
||||
* @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
|
||||
* specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
|
||||
*/
|
||||
enum vduse_req_type {
|
||||
VDUSE_GET_VQ_STATE,
|
||||
VDUSE_SET_STATUS,
|
||||
VDUSE_UPDATE_IOTLB,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_vq_state - virtqueue state
|
||||
* @index: virtqueue index
|
||||
* @split: split virtqueue state
|
||||
* @packed: packed virtqueue state
|
||||
*/
|
||||
struct vduse_vq_state {
|
||||
__u32 index;
|
||||
union {
|
||||
struct vduse_vq_state_split split;
|
||||
struct vduse_vq_state_packed packed;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_status - device status
|
||||
* @status: device status
|
||||
*/
|
||||
struct vduse_dev_status {
|
||||
__u8 status;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_iova_range - IOVA range [start, last]
|
||||
* @start: start of the IOVA range
|
||||
* @last: last of the IOVA range
|
||||
*/
|
||||
struct vduse_iova_range {
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_request - control request
|
||||
* @type: request type
|
||||
* @request_id: request id
|
||||
* @reserved: for future use
|
||||
* @vq_state: virtqueue state, only index field is available
|
||||
* @s: device status
|
||||
* @iova: IOVA range for updating
|
||||
* @padding: padding
|
||||
*
|
||||
* Structure used by read(2) on /dev/vduse/$NAME.
|
||||
*/
|
||||
struct vduse_dev_request {
|
||||
__u32 type;
|
||||
__u32 request_id;
|
||||
__u32 reserved[4];
|
||||
union {
|
||||
struct vduse_vq_state vq_state;
|
||||
struct vduse_dev_status s;
|
||||
struct vduse_iova_range iova;
|
||||
__u32 padding[32];
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* struct vduse_dev_response - response to control request
|
||||
* @request_id: corresponding request id
|
||||
* @result: the result of request
|
||||
* @reserved: for future use, needs to be initialized to zero
|
||||
* @vq_state: virtqueue state
|
||||
* @padding: padding
|
||||
*
|
||||
* Structure used by write(2) on /dev/vduse/$NAME.
|
||||
*/
|
||||
struct vduse_dev_response {
|
||||
__u32 request_id;
|
||||
#define VDUSE_REQ_RESULT_OK 0x00
|
||||
#define VDUSE_REQ_RESULT_FAILED 0x01
|
||||
__u32 result;
|
||||
__u32 reserved[4];
|
||||
union {
|
||||
struct vduse_vq_state vq_state;
|
||||
__u32 padding[32];
|
||||
};
|
||||
};
|
||||
|
||||
#endif /* _VDUSE_H_ */
|
34
meson.build
34
meson.build
|
@ -1541,6 +1541,26 @@ if get_option('fuse_lseek').allowed()
|
|||
endif
|
||||
endif
|
||||
|
||||
have_libvduse = (targetos == 'linux')
|
||||
if get_option('libvduse').enabled()
|
||||
if targetos != 'linux'
|
||||
error('libvduse requires linux')
|
||||
endif
|
||||
elif get_option('libvduse').disabled()
|
||||
have_libvduse = false
|
||||
endif
|
||||
|
||||
have_vduse_blk_export = (have_libvduse and targetos == 'linux')
|
||||
if get_option('vduse_blk_export').enabled()
|
||||
if targetos != 'linux'
|
||||
error('vduse_blk_export requires linux')
|
||||
elif not have_libvduse
|
||||
error('vduse_blk_export requires libvduse support')
|
||||
endif
|
||||
elif get_option('vduse_blk_export').disabled()
|
||||
have_vduse_blk_export = false
|
||||
endif
|
||||
|
||||
# libbpf
|
||||
libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
|
||||
if libbpf.found() and not cc.links('''
|
||||
|
@ -1783,6 +1803,7 @@ config_host_data.set('CONFIG_VHOST_CRYPTO', have_vhost_user_crypto)
|
|||
config_host_data.set('CONFIG_VHOST_VDPA', have_vhost_vdpa)
|
||||
config_host_data.set('CONFIG_VMNET', vmnet.found())
|
||||
config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER', have_vhost_user_blk_server)
|
||||
config_host_data.set('CONFIG_VDUSE_BLK_EXPORT', have_vduse_blk_export)
|
||||
config_host_data.set('CONFIG_PNG', png.found())
|
||||
config_host_data.set('CONFIG_VNC', vnc.found())
|
||||
config_host_data.set('CONFIG_VNC_JPEG', jpeg.found())
|
||||
|
@ -1882,6 +1903,12 @@ config_host_data.set('HAVE_GETIFADDRS', cc.has_function('getifaddrs'))
|
|||
config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util))
|
||||
config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
|
||||
config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include <stdlib.h>'))
|
||||
if rbd.found()
|
||||
config_host_data.set('HAVE_RBD_NAMESPACE_EXISTS',
|
||||
cc.has_function('rbd_namespace_exists',
|
||||
dependencies: rbd,
|
||||
prefix: '#include <rbd/librbd.h>'))
|
||||
endif
|
||||
if rdma.found()
|
||||
config_host_data.set('HAVE_IBV_ADVISE_MR',
|
||||
cc.has_function('ibv_advise_mr',
|
||||
|
@ -2986,6 +3013,12 @@ if targetos == 'linux' and have_vhost_user
|
|||
vhost_user = libvhost_user.get_variable('vhost_user_dep')
|
||||
endif
|
||||
|
||||
libvduse = not_found
|
||||
if have_libvduse
|
||||
libvduse_proj = subproject('libvduse')
|
||||
libvduse = libvduse_proj.get_variable('libvduse_dep')
|
||||
endif
|
||||
|
||||
# NOTE: the trace/ subdirectory needs the qapi_trace_events variable
|
||||
# that is filled in by qapi/.
|
||||
subdir('qapi')
|
||||
|
@ -3842,6 +3875,7 @@ if have_block
|
|||
summary_info += {'qed support': get_option('qed').allowed()}
|
||||
summary_info += {'parallels support': get_option('parallels').allowed()}
|
||||
summary_info += {'FUSE exports': fuse}
|
||||
summary_info += {'VDUSE block exports': have_vduse_blk_export}
|
||||
endif
|
||||
summary(summary_info, bool_yn: true, section: 'Block layer support')
|
||||
|
||||
|
|
|
@ -257,6 +257,10 @@ option('virtfs', type: 'feature', value: 'auto',
|
|||
description: 'virtio-9p support')
|
||||
option('virtiofsd', type: 'feature', value: 'auto',
|
||||
description: 'build virtiofs daemon (virtiofsd)')
|
||||
option('libvduse', type: 'feature', value: 'auto',
|
||||
description: 'build VDUSE Library')
|
||||
option('vduse_blk_export', type: 'feature', value: 'auto',
|
||||
description: 'VDUSE block export support')
|
||||
|
||||
option('capstone', type: 'feature', value: 'auto',
|
||||
description: 'Whether and how to find the capstone library')
|
||||
|
|
|
@ -178,6 +178,27 @@
|
|||
'*allow-other': 'FuseExportAllowOther' },
|
||||
'if': 'CONFIG_FUSE' }
|
||||
|
||||
##
|
||||
# @BlockExportOptionsVduseBlk:
|
||||
#
|
||||
# A vduse-blk block export.
|
||||
#
|
||||
# @name: the name of VDUSE device (must be unique across the host).
|
||||
# @num-queues: the number of virtqueues. Defaults to 1.
|
||||
# @queue-size: the size of virtqueue. Defaults to 256.
|
||||
# @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE]
|
||||
# and must be power of 2. Defaults to 512 bytes.
|
||||
# @serial: the serial number of virtio block device. Defaults to empty string.
|
||||
#
|
||||
# Since: 7.1
|
||||
##
|
||||
{ 'struct': 'BlockExportOptionsVduseBlk',
|
||||
'data': { 'name': 'str',
|
||||
'*num-queues': 'uint16',
|
||||
'*queue-size': 'uint16',
|
||||
'*logical-block-size': 'size',
|
||||
'*serial': 'str' } }
|
||||
|
||||
##
|
||||
# @NbdServerAddOptions:
|
||||
#
|
||||
|
@ -284,6 +305,7 @@
|
|||
# @nbd: NBD export
|
||||
# @vhost-user-blk: vhost-user-blk export (since 5.2)
|
||||
# @fuse: FUSE export (since: 6.0)
|
||||
# @vduse-blk: vduse-blk export (since 7.1)
|
||||
#
|
||||
# Since: 4.2
|
||||
##
|
||||
|
@ -291,7 +313,8 @@
|
|||
'data': [ 'nbd',
|
||||
{ 'name': 'vhost-user-blk',
|
||||
'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
|
||||
{ 'name': 'fuse', 'if': 'CONFIG_FUSE' } ] }
|
||||
{ 'name': 'fuse', 'if': 'CONFIG_FUSE' },
|
||||
{ 'name': 'vduse-blk', 'if': 'CONFIG_VDUSE_BLK_EXPORT' } ] }
|
||||
|
||||
##
|
||||
# @BlockExportOptions:
|
||||
|
@ -335,7 +358,9 @@
|
|||
'vhost-user-blk': { 'type': 'BlockExportOptionsVhostUserBlk',
|
||||
'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
|
||||
'fuse': { 'type': 'BlockExportOptionsFuse',
|
||||
'if': 'CONFIG_FUSE' }
|
||||
'if': 'CONFIG_FUSE' },
|
||||
'vduse-blk': { 'type': 'BlockExportOptionsVduseBlk',
|
||||
'if': 'CONFIG_VDUSE_BLK_EXPORT' }
|
||||
} }
|
||||
|
||||
##
|
||||
|
|
|
@ -110,6 +110,7 @@ meson_options_help() {
|
|||
printf "%s\n" ' libssh ssh block device support'
|
||||
printf "%s\n" ' libudev Use libudev to enumerate host devices'
|
||||
printf "%s\n" ' libusb libusb support for USB passthrough'
|
||||
printf "%s\n" ' libvduse build VDUSE Library'
|
||||
printf "%s\n" ' linux-aio Linux AIO support'
|
||||
printf "%s\n" ' linux-io-uring Linux io_uring support'
|
||||
printf "%s\n" ' live-block-migration'
|
||||
|
@ -161,6 +162,8 @@ meson_options_help() {
|
|||
printf "%s\n" ' vhost-user vhost-user backend support'
|
||||
printf "%s\n" ' vhost-user-blk-server'
|
||||
printf "%s\n" ' build vhost-user-blk server'
|
||||
printf "%s\n" ' vduse-blk-export'
|
||||
printf "%s\n" ' VDUSE block export support'
|
||||
printf "%s\n" ' vhost-vdpa vhost-vdpa kernel backend support'
|
||||
printf "%s\n" ' virglrenderer virgl rendering support'
|
||||
printf "%s\n" ' virtfs virtio-9p support'
|
||||
|
@ -307,6 +310,8 @@ _meson_option_parse() {
|
|||
--disable-libudev) printf "%s" -Dlibudev=disabled ;;
|
||||
--enable-libusb) printf "%s" -Dlibusb=enabled ;;
|
||||
--disable-libusb) printf "%s" -Dlibusb=disabled ;;
|
||||
--enable-libvduse) printf "%s" -Dlibvduse=enabled ;;
|
||||
--disable-libvduse) printf "%s" -Dlibvduse=disabled ;;
|
||||
--enable-linux-aio) printf "%s" -Dlinux_aio=enabled ;;
|
||||
--disable-linux-aio) printf "%s" -Dlinux_aio=disabled ;;
|
||||
--enable-linux-io-uring) printf "%s" -Dlinux_io_uring=enabled ;;
|
||||
|
@ -429,6 +434,8 @@ _meson_option_parse() {
|
|||
--disable-vhost-user) printf "%s" -Dvhost_user=disabled ;;
|
||||
--enable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=enabled ;;
|
||||
--disable-vhost-user-blk-server) printf "%s" -Dvhost_user_blk_server=disabled ;;
|
||||
--enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
|
||||
--disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
|
||||
--enable-vhost-vdpa) printf "%s" -Dvhost_vdpa=enabled ;;
|
||||
--disable-vhost-vdpa) printf "%s" -Dvhost_vdpa=disabled ;;
|
||||
--enable-virglrenderer) printf "%s" -Dvirglrenderer=enabled ;;
|
||||
|
|
|
@ -161,7 +161,7 @@ done
|
|||
rm -rf "$output/linux-headers/linux"
|
||||
mkdir -p "$output/linux-headers/linux"
|
||||
for header in kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
|
||||
psci.h psp-sev.h userfaultfd.h mman.h; do
|
||||
psci.h psp-sev.h userfaultfd.h mman.h vduse.h; do
|
||||
cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
|
||||
done
|
||||
|
||||
|
|
|
@ -121,6 +121,16 @@ static void help(void)
|
|||
" vhost-user-blk device over file descriptor\n"
|
||||
"\n"
|
||||
#endif /* CONFIG_VHOST_USER_BLK_SERVER */
|
||||
#ifdef CONFIG_VDUSE_BLK_EXPORT
|
||||
" --export [type=]vduse-blk,id=<id>,node-name=<node-name>\n"
|
||||
" ,name=<vduse-name>[,writable=on|off]\n"
|
||||
" [,num-queues=<num-queues>][,queue-size=<queue-size>]\n"
|
||||
" [,logical-block-size=<logical-block-size>]\n"
|
||||
" [,serial=<serial-number>]\n"
|
||||
" export the specified block node as a\n"
|
||||
" vduse-blk device\n"
|
||||
"\n"
|
||||
#endif /* CONFIG_VDUSE_BLK_EXPORT */
|
||||
" --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n"
|
||||
" configure a QMP monitor\n"
|
||||
"\n"
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
../../../include/qemu/atomic.h
|
|
@ -0,0 +1 @@
|
|||
../../../include/qemu/compiler.h
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* VDUSE (vDPA Device in Userspace) library
|
||||
*
|
||||
* Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Xie Yongji <xieyongji@bytedance.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef LIBVDUSE_H
|
||||
#define LIBVDUSE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#define VIRTQUEUE_MAX_SIZE 1024
|
||||
|
||||
/* VDUSE device structure */
|
||||
typedef struct VduseDev VduseDev;
|
||||
|
||||
/* Virtqueue structure */
|
||||
typedef struct VduseVirtq VduseVirtq;
|
||||
|
||||
/* Some operation of VDUSE backend */
|
||||
typedef struct VduseOps {
|
||||
/* Called when virtqueue can be processed */
|
||||
void (*enable_queue)(VduseDev *dev, VduseVirtq *vq);
|
||||
/* Called when virtqueue processing should be stopped */
|
||||
void (*disable_queue)(VduseDev *dev, VduseVirtq *vq);
|
||||
} VduseOps;
|
||||
|
||||
/* Describing elements of the I/O buffer */
|
||||
typedef struct VduseVirtqElement {
|
||||
/* Descriptor table index */
|
||||
unsigned int index;
|
||||
/* Number of physically-contiguous device-readable descriptors */
|
||||
unsigned int out_num;
|
||||
/* Number of physically-contiguous device-writable descriptors */
|
||||
unsigned int in_num;
|
||||
/* Array to store physically-contiguous device-writable descriptors */
|
||||
struct iovec *in_sg;
|
||||
/* Array to store physically-contiguous device-readable descriptors */
|
||||
struct iovec *out_sg;
|
||||
} VduseVirtqElement;
|
||||
|
||||
|
||||
/**
|
||||
* vduse_get_virtio_features:
|
||||
*
|
||||
* Get supported virtio features
|
||||
*
|
||||
* Returns: supported feature bits
|
||||
*/
|
||||
uint64_t vduse_get_virtio_features(void);
|
||||
|
||||
/**
|
||||
* vduse_queue_get_dev:
|
||||
* @vq: specified virtqueue
|
||||
*
|
||||
* Get corresponding VDUSE device from the virtqueue.
|
||||
*
|
||||
* Returns: a pointer to VDUSE device on success, NULL on failure.
|
||||
*/
|
||||
VduseDev *vduse_queue_get_dev(VduseVirtq *vq);
|
||||
|
||||
/**
|
||||
* vduse_queue_get_fd:
|
||||
* @vq: specified virtqueue
|
||||
*
|
||||
* Get the kick fd for the virtqueue.
|
||||
*
|
||||
* Returns: file descriptor on success, -1 on failure.
|
||||
*/
|
||||
int vduse_queue_get_fd(VduseVirtq *vq);
|
||||
|
||||
/**
|
||||
* vduse_queue_pop:
|
||||
* @vq: specified virtqueue
|
||||
* @sz: the size of struct to return (must be >= VduseVirtqElement)
|
||||
*
|
||||
* Pop an element from virtqueue available ring.
|
||||
*
|
||||
* Returns: a pointer to a structure containing VduseVirtqElement on success,
|
||||
* NULL on failure.
|
||||
*/
|
||||
void *vduse_queue_pop(VduseVirtq *vq, size_t sz);
|
||||
|
||||
/**
|
||||
* vduse_queue_push:
|
||||
* @vq: specified virtqueue
|
||||
* @elem: pointer to VduseVirtqElement returned by vduse_queue_pop()
|
||||
* @len: length in bytes to write
|
||||
*
|
||||
* Push an element to virtqueue used ring.
|
||||
*/
|
||||
void vduse_queue_push(VduseVirtq *vq, const VduseVirtqElement *elem,
|
||||
unsigned int len);
|
||||
/**
|
||||
* vduse_queue_notify:
|
||||
* @vq: specified virtqueue
|
||||
*
|
||||
* Request to notify the queue.
|
||||
*/
|
||||
void vduse_queue_notify(VduseVirtq *vq);
|
||||
|
||||
/**
|
||||
* vduse_dev_get_priv:
|
||||
* @dev: VDUSE device
|
||||
*
|
||||
* Get the private pointer passed to vduse_dev_create().
|
||||
*
|
||||
* Returns: private pointer on success, NULL on failure.
|
||||
*/
|
||||
void *vduse_dev_get_priv(VduseDev *dev);
|
||||
|
||||
/**
|
||||
* vduse_dev_get_queue:
|
||||
* @dev: VDUSE device
|
||||
* @index: virtqueue index
|
||||
*
|
||||
* Get the specified virtqueue.
|
||||
*
|
||||
* Returns: a pointer to the virtqueue on success, NULL on failure.
|
||||
*/
|
||||
VduseVirtq *vduse_dev_get_queue(VduseDev *dev, int index);
|
||||
|
||||
/**
|
||||
* vduse_dev_get_fd:
|
||||
* @dev: VDUSE device
|
||||
*
|
||||
* Get the control message fd for the VDUSE device.
|
||||
*
|
||||
* Returns: file descriptor on success, -1 on failure.
|
||||
*/
|
||||
int vduse_dev_get_fd(VduseDev *dev);
|
||||
|
||||
/**
|
||||
* vduse_dev_handler:
|
||||
* @dev: VDUSE device
|
||||
*
|
||||
* Used to process the control message.
|
||||
*
|
||||
* Returns: file descriptor on success, -errno on failure.
|
||||
*/
|
||||
int vduse_dev_handler(VduseDev *dev);
|
||||
|
||||
/**
|
||||
* vduse_dev_update_config:
|
||||
* @dev: VDUSE device
|
||||
* @size: the size to write to configuration space
|
||||
* @offset: the offset from the beginning of configuration space
|
||||
* @buffer: the buffer used to write from
|
||||
*
|
||||
* Update device configuration space and inject a config interrupt.
|
||||
*
|
||||
* Returns: 0 on success, -errno on failure.
|
||||
*/
|
||||
int vduse_dev_update_config(VduseDev *dev, uint32_t size,
|
||||
uint32_t offset, char *buffer);
|
||||
|
||||
/**
|
||||
* vduse_dev_setup_queue:
|
||||
* @dev: VDUSE device
|
||||
* @index: virtqueue index
|
||||
* @max_size: the max size of virtqueue
|
||||
*
|
||||
* Setup the specified virtqueue.
|
||||
*
|
||||
* Returns: 0 on success, -errno on failure.
|
||||
*/
|
||||
int vduse_dev_setup_queue(VduseDev *dev, int index, int max_size);
|
||||
|
||||
/**
|
||||
* vduse_set_reconnect_log_file:
|
||||
* @dev: VDUSE device
|
||||
* @file: filename of reconnect log
|
||||
*
|
||||
* Specify the file to store log for reconnecting. It should
|
||||
* be called before vduse_dev_setup_queue().
|
||||
*
|
||||
* Returns: 0 on success, -errno on failure.
|
||||
*/
|
||||
int vduse_set_reconnect_log_file(VduseDev *dev, const char *filename);
|
||||
|
||||
/**
|
||||
* vduse_dev_create_by_fd:
|
||||
* @fd: passed file descriptor
|
||||
* @num_queues: the number of virtqueues
|
||||
* @ops: the operation of VDUSE backend
|
||||
* @priv: private pointer
|
||||
*
|
||||
* Create VDUSE device from a passed file descriptor.
|
||||
*
|
||||
* Returns: pointer to VDUSE device on success, NULL on failure.
|
||||
*/
|
||||
VduseDev *vduse_dev_create_by_fd(int fd, uint16_t num_queues,
|
||||
const VduseOps *ops, void *priv);
|
||||
|
||||
/**
|
||||
* vduse_dev_create_by_name:
|
||||
* @name: VDUSE device name
|
||||
* @num_queues: the number of virtqueues
|
||||
* @ops: the operation of VDUSE backend
|
||||
* @priv: private pointer
|
||||
*
|
||||
* Create VDUSE device on /dev/vduse/$NAME.
|
||||
*
|
||||
* Returns: pointer to VDUSE device on success, NULL on failure.
|
||||
*/
|
||||
VduseDev *vduse_dev_create_by_name(const char *name, uint16_t num_queues,
|
||||
const VduseOps *ops, void *priv);
|
||||
|
||||
/**
|
||||
* vduse_dev_create:
|
||||
* @name: VDUSE device name
|
||||
* @device_id: virtio device id
|
||||
* @vendor_id: virtio vendor id
|
||||
* @features: virtio features
|
||||
* @num_queues: the number of virtqueues
|
||||
* @config_size: the size of the configuration space
|
||||
* @config: the buffer of the configuration space
|
||||
* @ops: the operation of VDUSE backend
|
||||
* @priv: private pointer
|
||||
*
|
||||
* Create VDUSE device.
|
||||
*
|
||||
* Returns: pointer to VDUSE device on success, NULL on failure.
|
||||
*/
|
||||
VduseDev *vduse_dev_create(const char *name, uint32_t device_id,
|
||||
uint32_t vendor_id, uint64_t features,
|
||||
uint16_t num_queues, uint32_t config_size,
|
||||
char *config, const VduseOps *ops, void *priv);
|
||||
|
||||
/**
|
||||
* vduse_dev_destroy:
|
||||
* @dev: VDUSE device
|
||||
*
|
||||
* Destroy the VDUSE device.
|
||||
*
|
||||
* Returns: 0 on success, -errno on failure.
|
||||
*/
|
||||
int vduse_dev_destroy(VduseDev *dev);
|
||||
|
||||
#endif
|
|
@ -0,0 +1 @@
|
|||
../../../linux-headers/linux/
|
|
@ -0,0 +1,10 @@
|
|||
project('libvduse', 'c',
|
||||
license: 'GPL-2.0-or-later',
|
||||
default_options: ['c_std=gnu99'])
|
||||
|
||||
libvduse = static_library('vduse',
|
||||
files('libvduse.c'),
|
||||
c_args: '-D_GNU_SOURCE')
|
||||
|
||||
libvduse_dep = declare_dependency(link_with: libvduse,
|
||||
include_directories: include_directories('.'))
|
|
@ -0,0 +1 @@
|
|||
../../../include/standard-headers/linux/
|
|
@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque)
|
|||
|
||||
void aio_wait_kick(void)
|
||||
{
|
||||
/* The barrier (or an atomic op) is in the caller. */
|
||||
/*
|
||||
* Paired with smp_mb in AIO_WAIT_WHILE. Here we have:
|
||||
* write(condition);
|
||||
* aio_wait_kick() {
|
||||
* smp_mb();
|
||||
* read(num_waiters);
|
||||
* }
|
||||
*
|
||||
* And in AIO_WAIT_WHILE:
|
||||
* write(num_waiters);
|
||||
* smp_mb();
|
||||
* read(condition);
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
if (qatomic_read(&global_aio_wait.num_waiters)) {
|
||||
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
|
||||
}
|
||||
|
|
|
@ -873,11 +873,6 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size)
|
|||
}
|
||||
}
|
||||
|
||||
bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b)
|
||||
{
|
||||
return (a->orig_size == b->orig_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* hbitmap_sparse_merge: performs dst = dst | src
|
||||
* works with differing granularities.
|
||||
|
@ -901,28 +896,24 @@ static void hbitmap_sparse_merge(HBitmap *dst, const HBitmap *src)
|
|||
* Given HBitmaps A and B, let R := A (BITOR) B.
|
||||
* Bitmaps A and B will not be modified,
|
||||
* except when bitmap R is an alias of A or B.
|
||||
*
|
||||
* @return true if the merge was successful,
|
||||
* false if it was not attempted.
|
||||
* Bitmaps must have same size.
|
||||
*/
|
||||
bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
|
||||
void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
|
||||
{
|
||||
int i;
|
||||
uint64_t j;
|
||||
|
||||
if (!hbitmap_can_merge(a, b) || !hbitmap_can_merge(a, result)) {
|
||||
return false;
|
||||
}
|
||||
assert(hbitmap_can_merge(b, result));
|
||||
assert(a->orig_size == result->orig_size);
|
||||
assert(b->orig_size == result->orig_size);
|
||||
|
||||
if ((!hbitmap_count(a) && result == b) ||
|
||||
(!hbitmap_count(b) && result == a)) {
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hbitmap_count(a) && !hbitmap_count(b)) {
|
||||
hbitmap_reset_all(result);
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (a->granularity != b->granularity) {
|
||||
|
@ -935,7 +926,7 @@ bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
|
|||
if (b != result) {
|
||||
hbitmap_sparse_merge(result, b);
|
||||
}
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
/* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
|
||||
|
@ -951,8 +942,6 @@ bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result)
|
|||
|
||||
/* Recompute the dirty count */
|
||||
result->count = hb_count_between(result, 0, result->size - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
char *hbitmap_sha256(const HBitmap *bitmap, Error **errp)
|
||||
|
|
Loading…
Reference in New Issue