hw/nvme: reenable cqe batching

Commit 2e53b0b450 ("hw/nvme: Use ioeventfd to handle doorbell
updates") had the unintended effect of disabling batching of CQEs.

This patch changes the sq/cq timers to bottom halfs and instead of
calling nvme_post_cqes() immediately (causing an interrupt per cqe), we
defer the call.

                   | iops
  -----------------+------
    baseline       | 138k
    +cqe batching  | 233k

Fixes: 2e53b0b450 ("hw/nvme: Use ioeventfd to handle doorbell updates")
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
This commit is contained in:
Klaus Jensen 2022-10-19 22:28:02 +02:00
parent a11f65ec1b
commit d38cc6fd1c
2 changed files with 13 additions and 17 deletions

View File

@ -1401,13 +1401,7 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry); QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry); QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
if (req->sq->ioeventfd_enabled) { qemu_bh_schedule(cq->bh);
/* Post CQE directly since we are in main loop thread */
nvme_post_cqes(cq);
} else {
/* Schedule the timer to post CQE later since we are in vcpu thread */
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}
} }
static void nvme_process_aers(void *opaque) static void nvme_process_aers(void *opaque)
@ -4252,7 +4246,7 @@ static void nvme_cq_notifier(EventNotifier *e)
nvme_irq_deassert(n, cq); nvme_irq_deassert(n, cq);
} }
nvme_post_cqes(cq); qemu_bh_schedule(cq->bh);
} }
static int nvme_init_cq_ioeventfd(NvmeCQueue *cq) static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
@ -4307,7 +4301,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
uint16_t offset = sq->sqid << 3; uint16_t offset = sq->sqid << 3;
n->sq[sq->sqid] = NULL; n->sq[sq->sqid] = NULL;
timer_free(sq->timer); qemu_bh_delete(sq->bh);
if (sq->ioeventfd_enabled) { if (sq->ioeventfd_enabled) {
memory_region_del_eventfd(&n->iomem, memory_region_del_eventfd(&n->iomem,
0x1000 + offset, 4, false, 0, &sq->notifier); 0x1000 + offset, 4, false, 0, &sq->notifier);
@ -4381,7 +4375,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
sq->io_req[i].sq = sq; sq->io_req[i].sq = sq;
QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
} }
sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
sq->bh = qemu_bh_new(nvme_process_sq, sq);
if (n->dbbuf_enabled) { if (n->dbbuf_enabled) {
sq->db_addr = n->dbbuf_dbs + (sqid << 3); sq->db_addr = n->dbbuf_dbs + (sqid << 3);
@ -4698,7 +4693,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
uint16_t offset = (cq->cqid << 3) + (1 << 2); uint16_t offset = (cq->cqid << 3) + (1 << 2);
n->cq[cq->cqid] = NULL; n->cq[cq->cqid] = NULL;
timer_free(cq->timer); qemu_bh_delete(cq->bh);
if (cq->ioeventfd_enabled) { if (cq->ioeventfd_enabled) {
memory_region_del_eventfd(&n->iomem, memory_region_del_eventfd(&n->iomem,
0x1000 + offset, 4, false, 0, &cq->notifier); 0x1000 + offset, 4, false, 0, &cq->notifier);
@ -4771,7 +4766,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
} }
} }
n->cq[cqid] = cq; n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); cq->bh = qemu_bh_new(nvme_post_cqes, cq);
} }
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@ -6913,9 +6908,9 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
if (start_sqs) { if (start_sqs) {
NvmeSQueue *sq; NvmeSQueue *sq;
QTAILQ_FOREACH(sq, &cq->sq_list, entry) { QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); qemu_bh_schedule(sq->bh);
} }
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); qemu_bh_schedule(cq->bh);
} }
if (cq->tail == cq->head) { if (cq->tail == cq->head) {
@ -6984,7 +6979,8 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail, pci_dma_write(&n->parent_obj, sq->db_addr, &sq->tail,
sizeof(sq->tail)); sizeof(sq->tail));
} }
timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
qemu_bh_schedule(sq->bh);
} }
} }

View File

@ -375,7 +375,7 @@ typedef struct NvmeSQueue {
uint64_t dma_addr; uint64_t dma_addr;
uint64_t db_addr; uint64_t db_addr;
uint64_t ei_addr; uint64_t ei_addr;
QEMUTimer *timer; QEMUBH *bh;
EventNotifier notifier; EventNotifier notifier;
bool ioeventfd_enabled; bool ioeventfd_enabled;
NvmeRequest *io_req; NvmeRequest *io_req;
@ -396,7 +396,7 @@ typedef struct NvmeCQueue {
uint64_t dma_addr; uint64_t dma_addr;
uint64_t db_addr; uint64_t db_addr;
uint64_t ei_addr; uint64_t ei_addr;
QEMUTimer *timer; QEMUBH *bh;
EventNotifier notifier; EventNotifier notifier;
bool ioeventfd_enabled; bool ioeventfd_enabled;
QTAILQ_HEAD(, NvmeSQueue) sq_list; QTAILQ_HEAD(, NvmeSQueue) sq_list;