From 1e64facc015e16d8e4efa239feaeda9e4e9aeb04 Mon Sep 17 00:00:00 2001 From: Dmitry Tikhov Date: Tue, 12 Apr 2022 11:59:09 +0300 Subject: [PATCH 01/11] hw/nvme: fix narrowing conversion Since nlbas is of type int, it does not work with large namespace size values, e.g., 9 TB size of file backing namespace and 8 byte metadata with 4096 bytes lbasz gives negative nlbas value, which is later promoted to negative int64_t type value and results in negative ns->moff which breaks namespace Signed-off-by: Dmitry Tikhov Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ns.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 324f53ea0c..af6504fad2 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -29,7 +29,8 @@ void nvme_ns_init_format(NvmeNamespace *ns) { NvmeIdNs *id_ns = &ns->id_ns; BlockDriverInfo bdi; - int npdg, nlbas, ret; + int npdg, ret; + int64_t nlbas; ns->lbaf = id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; ns->lbasz = 1 << ns->lbaf.ds; @@ -42,7 +43,7 @@ void nvme_ns_init_format(NvmeNamespace *ns) id_ns->ncap = id_ns->nsze; id_ns->nuse = id_ns->ncap; - ns->moff = (int64_t)nlbas << ns->lbaf.ds; + ns->moff = nlbas << ns->lbaf.ds; npdg = ns->blkconf.discard_granularity / ns->lbasz; From 51c453266309166c2737623211c0afc12884cccd Mon Sep 17 00:00:00 2001 From: Dmitry Tikhov Date: Fri, 15 Apr 2022 23:48:32 +0300 Subject: [PATCH 02/11] hw/nvme: add missing return statement Since there is no return after nvme_dsm_cb invocation, metadata associated with non-zero block range is currently zeroed. Also this behaviour leads to segfault since we schedule iocb->bh two times. First when entering nvme_dsm_cb with iocb->idx == iocb->nr and second because of missing return on call stack unwinding by calling blk_aio_pwrite_zeroes and subsequent nvme_dsm_cb callback. Fixes: d7d1474fd85d ("hw/nvme: reimplement dsm to allow cancellation") Signed-off-by: Dmitry Tikhov Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 03760ddeae..74540a03d5 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2372,6 +2372,7 @@ static void nvme_dsm_md_cb(void *opaque, int ret) } nvme_dsm_cb(iocb, 0); + return; } iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_moff(ns, slba), From 2e8f952ae7de23b4847937dbbf51f7a1ab10a2af Mon Sep 17 00:00:00 2001 From: Dmitry Tikhov Date: Thu, 21 Apr 2022 13:51:58 +0300 Subject: [PATCH 03/11] hw/nvme: fix copy cmd for pi enabled namespaces Current implementation have problem in the read part of copy command. Because there is no metadata mangling before nvme_dif_check invocation, reftag error could be thrown for blocks of namespace that have not been previously written to. Signed-off-by: Dmitry Tikhov Reviewed-by: Klaus Jensen Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 74540a03d5..08574c4dcb 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -2787,6 +2787,10 @@ static void nvme_copy_in_completed_cb(void *opaque, int ret) size_t mlen = nvme_m2b(ns, nlb); uint8_t *mbounce = iocb->bounce + nvme_l2b(ns, nlb); + status = nvme_dif_mangle_mdata(ns, mbounce, mlen, slba); + if (status) { + goto invalid; + } status = nvme_dif_check(ns, iocb->bounce, len, mbounce, mlen, prinfor, slba, apptag, appmask, &reftag); if (status) { From 9235a72a5df0fae1ede89f02717b597ef91cf6ad Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 6 May 2022 00:21:47 +0200 Subject: [PATCH 04/11] hw/nvme: fix smart aen Pass the right constant to nvme_smart_event(). The NVME_AER* values hold the bit position in the SMART byte, not the shifted value that we expect it to be in nvme_smart_event(). Fixes: c62720f137df ("hw/block/nvme: trigger async event during injecting smart warning") Acked-by: zhenwei pi Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 08574c4dcb..a2f6069f7f 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -5325,7 +5325,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req) if ((n->temperature >= n->features.temp_thresh_hi) || (n->temperature <= n->features.temp_thresh_low)) { - nvme_smart_event(n, NVME_AER_INFO_SMART_TEMP_THRESH); + nvme_smart_event(n, NVME_SMART_TEMPERATURE); } break; From a859eb9f8f64e116671048a43a07d87bc6527a55 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 29 Apr 2022 10:33:32 +0200 Subject: [PATCH 05/11] hw/nvme: enforce common serial per subsystem The Identify Controller Serial Number (SN) is the serial number for the NVM subsystem and must be the same across all controller in the NVM subsystem. Enforce this. Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/nvme.h | 1 + hw/nvme/subsys.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h index 6773819325..e41771604f 100644 --- a/hw/nvme/nvme.h +++ b/hw/nvme/nvme.h @@ -48,6 +48,7 @@ typedef struct NvmeSubsystem { DeviceState parent_obj; NvmeBus bus; uint8_t subnqn[256]; + char *serial; NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS]; NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1]; diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index fb58d63950..691a90d209 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -27,6 +27,13 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp) return -1; } + if (!subsys->serial) { + subsys->serial = g_strdup(n->params.serial); + } else if (strcmp(subsys->serial, n->params.serial)) { + error_setg(errp, "invalid controller serial"); + return -1; + } + subsys->ctrls[cntlid] = n; for (nsid = 1; nsid < ARRAY_SIZE(subsys->namespaces); nsid++) { From 36d83272d5e45dff13e988ee0a59f11c58b442ba Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 29 Apr 2022 10:33:33 +0200 Subject: [PATCH 06/11] hw/nvme: do not auto-generate eui64 We cannot provide auto-generated unique or persistent namespace identifiers (EUI64, NGUID, UUID) easily. Since 6.1, namespaces have been assigned a generated EUI64 of the form "52:54:00:". This is will be unique within a QEMU instance, but not globally. Revert that this is assigned automatically and immediately deprecate the compatibility parameter. Users can opt-in to this with the `eui64-default=on` device parameter or set it explicitly with `eui64=UINT64`. Cc: libvir-list@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Klaus Jensen --- docs/about/deprecated.rst | 7 +++++++ hw/core/machine.c | 1 + hw/nvme/ns.c | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index e19bcba242..47a8628b56 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -296,6 +296,13 @@ contains native support for this feature and thus use of the option ROM approach is obsolete. The native SeaBIOS support can be activated by using ``-machine graphics=off``. +``-device nvme-ns,eui64-default=on|off`` (since 7.1) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In QEMU versions 6.1, 6.2 and 7.0, the ``nvme-ns`` generates an EUI-64 +identifer that is not globally unique. If an EUI-64 identifer is required, the +user must set it explicitly using the ``nvme-ns`` device parameter ``eui64``. + Block device options '''''''''''''''''''' diff --git a/hw/core/machine.c b/hw/core/machine.c index bb0dc8f6a9..c53548d0b1 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_0[] = { { "arm-gicv3-common", "force-8-bit-prio", "on" }, + { "nvme-ns", "eui64-default", "on"}, }; const size_t hw_compat_7_0_len = G_N_ELEMENTS(hw_compat_7_0); diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index af6504fad2..06a04131f1 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -641,7 +641,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_SIZE("zoned.zrwas", NvmeNamespace, params.zrwas, 0), DEFINE_PROP_SIZE("zoned.zrwafg", NvmeNamespace, params.zrwafg, -1), DEFINE_PROP_BOOL("eui64-default", NvmeNamespace, params.eui64_default, - true), + false), DEFINE_PROP_END_OF_LIST(), }; From bd9f371c6f6eeb8e907dfc770876ad8ef4ff85fc Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 29 Apr 2022 10:33:34 +0200 Subject: [PATCH 07/11] hw/nvme: do not auto-generate uuid Do not default to generate an UUID for namespaces if it is not explicitly specified. This is a technically a breaking change in behavior. However, since the UUID changes on every VM launch, it is not spec compliant and is of little use since the UUID cannot be used reliably anyway and the behavior prior to this patch must be considered buggy. Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Klaus Jensen --- hw/nvme/ns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c index 06a04131f1..1b9c9d1156 100644 --- a/hw/nvme/ns.c +++ b/hw/nvme/ns.c @@ -614,7 +614,7 @@ static Property nvme_ns_props[] = { DEFINE_PROP_BOOL("detached", NvmeNamespace, params.detached, false), DEFINE_PROP_BOOL("shared", NvmeNamespace, params.shared, true), DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0), - DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid), + DEFINE_PROP_UUID_NODEFAULT("uuid", NvmeNamespace, params.uuid), DEFINE_PROP_UINT64("eui64", NvmeNamespace, params.eui64, 0), DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0), DEFINE_PROP_UINT8("mset", NvmeNamespace, params.mset, 0), From 9f2e1acf83c332752f52c39dad390c94ec2ba9f5 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 29 Apr 2022 10:33:35 +0200 Subject: [PATCH 08/11] hw/nvme: do not report null uuid Do not report the "null uuid" (all zeros) in the namespace identification descriptors. Reported-by: Luis Chamberlain Reported-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index a2f6069f7f..909e357a7e 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4955,16 +4955,13 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } - /* - * If the EUI-64 field is 0 and the NGUID field is 0, the namespace must - * provide a valid Namespace UUID in the Namespace Identification Descriptor - * data structure. QEMU does not yet support setting NGUID. - */ - uuid.hdr.nidt = NVME_NIDT_UUID; - uuid.hdr.nidl = NVME_NIDL_UUID; - memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID); - memcpy(pos, &uuid, sizeof(uuid)); - pos += sizeof(uuid); + if (!qemu_uuid_is_null(&ns->params.uuid)) { + uuid.hdr.nidt = NVME_NIDT_UUID; + uuid.hdr.nidl = NVME_NIDL_UUID; + memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID); + memcpy(pos, &uuid, sizeof(uuid)); + pos += sizeof(uuid); + } if (ns->params.eui64) { eui64.hdr.nidt = NVME_NIDT_EUI64; From fbba243bc700a4e479331e20544c7f6a41ae87b3 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 29 Apr 2022 10:33:36 +0200 Subject: [PATCH 09/11] hw/nvme: bump firmware revision The Linux kernel quirks the QEMU NVMe controller pretty heavily because of the namespace identifier mess. Since this is now fixed, bump the firmware revision number to allow the quirk to be disabled for this revision. As of now, bump the firmware revision number to be equal to the QEMU release version number. Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen --- hw/nvme/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 909e357a7e..1e6e0fcad9 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -6713,7 +6713,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); - strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); + strpadcpy((char *)id->fr, sizeof(id->fr), QEMU_VERSION, ' '); strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); id->cntlid = cpu_to_le16(n->cntlid); From 8b1e59a6873662a01379cf052384e5dedefe7447 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 19 Apr 2022 13:24:23 +0200 Subject: [PATCH 10/11] hw/nvme: deprecate the use-intel-id compatibility parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since version 5.2 commit 6eb7a071292a ("hw/block/nvme: change controller pci id"), the emulated NVMe controller has defaulted to a non-Intel PCI identifier. Deprecate the compatibility parameter so we can get rid of it once and for all. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Klaus Jensen --- docs/about/deprecated.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 47a8628b56..aa2e320207 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -303,6 +303,14 @@ In QEMU versions 6.1, 6.2 and 7.0, the ``nvme-ns`` generates an EUI-64 identifer that is not globally unique. If an EUI-64 identifer is required, the user must set it explicitly using the ``nvme-ns`` device parameter ``eui64``. +``-device nvme,use-intel-id=on|off`` (since 7.1) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``nvme`` device originally used a PCI Vendor/Device Identifier combination +from Intel that was not properly allocated. Since version 5.2, the controller +has used a properly allocated identifier. Deprecate the ``use-intel-id`` +machine compatibility parameter. + Block device options '''''''''''''''''''' From d7fe639cabf778903f6cab23ff58c905c71375ec Mon Sep 17 00:00:00 2001 From: Dmitry Tikhov Date: Wed, 20 Apr 2022 11:20:44 +0300 Subject: [PATCH 11/11] hw/nvme: add new command abort case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVMe command set specification for end-to-end data protection formatted namespace states: o If the Reference Tag Check bit of the PRCHK field is set to ‘1’ and the namespace is formatted for Type 3 protection, then the controller: ▪ should not compare the protection Information Reference Tag field to the computed reference tag; and ▪ may ignore the ILBRT and EILBRT fields. If a command is aborted as a result of the Reference Tag Check bit of the PRCHK field being set to ‘1’, then that command should be aborted with a status code of Invalid Protection Information, but may be aborted with a status code of Invalid Field in Command. Currently qemu compares reftag in the nvme_dif_prchk function whenever Reference Tag Check bit is set in the command. For type 3 namespaces however, caller of nvme_dif_prchk - nvme_dif_check does not increment reftag for each subsequent logical block. That way commands incorporating more than one logical block for type 3 formatted namespaces with reftag check bit set, always fail with End-to-end Reference Tag Check Error. Comply with spec by handling case of set Reference Tag Check bit in the type 3 formatted namespace. Fixes: 146f720c5563 ("hw/block/nvme: end-to-end data protection") Signed-off-by: Dmitry Tikhov Signed-off-by: Klaus Jensen --- hw/nvme/dif.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/nvme/dif.c b/hw/nvme/dif.c index 62d885f83e..63c44c86ab 100644 --- a/hw/nvme/dif.c +++ b/hw/nvme/dif.c @@ -26,6 +26,11 @@ uint16_t nvme_check_prinfo(NvmeNamespace *ns, uint8_t prinfo, uint64_t slba, return NVME_INVALID_PROT_INFO | NVME_DNR; } + if ((NVME_ID_NS_DPS_TYPE(ns->id_ns.dps) == NVME_ID_NS_DPS_TYPE_3) && + (prinfo & NVME_PRINFO_PRCHK_REF)) { + return NVME_INVALID_PROT_INFO; + } + return NVME_SUCCESS; }