From d38cc6fd1cafc3f834bb529f79bfc23089e9e54f Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Wed, 19 Oct 2022 22:28:02 +0200 Subject: [PATCH 1/2] hw/nvme: reenable cqe batching Commit 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell updates") had the unintended effect of disabling batching of CQEs. This patch changes the sq/cq timers to bottom halfs and instead of calling nvme_post_cqes() immediately (causing an interrupt per cqe), we defer the call. | iops -----------------+------ baseline | 138k +cqe batching | 233k Fixes: 2e53b0b45024 ("hw/nvme: Use ioeventfd to handle doorbell updates") Reviewed-by: Keith Busch Reviewed-by: Jinhao Fan Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 26 +++++++++++--------------- hw/nvme/nvme.h | 4 ++-- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 87aeba0564..73c870a429 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1401,13 +1401,7 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); - if (req->sq->ioeventfd_enabled) { - /* Post CQE directly since we are in main loop thread */ - nvme_post_cqes(cq); - } else { - /* Schedule the timer to post CQE later since we are in vcpu thread */ - timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); - } + qemu_bh_schedule(cq->bh); } static void nvme_process_aers(void *opaque) @@ -4252,7 +4246,7 @@ static void nvme_cq_notifier(EventNotifier *e) nvme_irq_deassert(n, cq); } - nvme_post_cqes(cq); + qemu_bh_schedule(cq->bh); } static int nvme_init_cq_ioeventfd(NvmeCQueue *cq) @@ -4307,7 +4301,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n) uint16_t offset = sq->sqid << 3; n->sq[sq->sqid] = NULL; - timer_free(sq->timer); + qemu_bh_delete(sq->bh); if (sq->ioeventfd_enabled) { memory_region_del_eventfd(&n->iomem, 0x1000 + offset, 4, false, 0, &sq->notifier); @@ -4381,7 +4375,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, sq->io_req[i].sq = sq; QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); } - sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq); + + sq->bh = qemu_bh_new(nvme_process_sq, sq); if (n->dbbuf_enabled) { sq->db_addr = n->dbbuf_dbs + (sqid << 3); @@ -4698,7 +4693,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n) uint16_t offset = (cq->cqid << 3) + (1 << 2); n->cq[cq->cqid] = NULL; - timer_free(cq->timer); + qemu_bh_delete(cq->bh); if (cq->ioeventfd_enabled) { memory_region_del_eventfd(&n->iomem, 0x1000 + offset, 4, false, 0, &cq->notifier); @@ -4771,7 +4766,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, } } n->cq[cqid] = cq; - cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); + cq->bh = qemu_bh_new(nvme_post_cqes, cq); } static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) @@ -6913,9 +6908,9 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) if (start_sqs) { NvmeSQueue *sq; QTAILQ_FOREACH(sq, &cq->sq_list, entry) { - timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + qemu_bh_schedule(sq->bh); } - timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + qemu_bh_schedule(cq->bh); } if (cq->tail == cq->head) { @@ -6984,7 +6979,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail, sizeof(sq->tail)); } - timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + + qemu_bh_schedule(sq->bh); } } diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 79f5c281c2..7adf042ec3 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -375,7 +375,7 @@ typedef struct NvmeSQueue { uint64_t dma_addr; uint64_t db_addr; uint64_t ei_addr; - QEMUTimer *timer; + QEMUBH *bh; EventNotifier notifier; bool ioeventfd_enabled; NvmeRequest *io_req; @@ -396,7 +396,7 @@ typedef struct NvmeCQueue { uint64_t dma_addr; uint64_t db_addr; uint64_t ei_addr; - QEMUTimer *timer; + QEMUBH *bh; EventNotifier notifier; bool ioeventfd_enabled; QTAILQ_HEAD(, NvmeSQueue) sq_list; From 632cb6cf07122b330d8ef419ec2f4aab561a9fba Mon Sep 17 00:00:00 2001 From: Francis Pravin Antony Michael Raj Date: Wed, 2 Nov 2022 09:06:00 +0100 Subject: [PATCH 2/2] hw/nvme: Abort copy command when format is one while pif As per the NVMe Command Set specification Section 3.2.2, if i) The namespace is formatted to use 16b Guard Protection Information (i.e., pif = 0) and ii) The Descriptor Format is not cleared to 0h Then the copy command should be aborted with the status code of Invalid Namespace or Format Fixes: 44219b6029fc ("hw/nvme: 64-bit pi support") Signed-off-by: Francis Pravin Antony Michael Raj Signed-off-by: Jonathan Derrick Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 73c870a429..9a9857ccf8 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -3034,7 +3034,8 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req) goto invalid; } - if (ns->pif && format != 0x1) { + if ((ns->pif == 0x0 && format != 0x0) || + (ns->pif != 0x0 && format != 0x1)) { status = NVME_INVALID_FORMAT | NVME_DNR; goto invalid; }