From 0e6bff0d43bf04c6e7a16c2775879816ca056b3d Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:36 +0800 Subject: [PATCH 01/62] vdpa: Use iovec for vhost_vdpa_net_cvq_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches in this series will no longer perform an immediate poll and check of the device's used buffers for each CVQ state load command. Consequently, there will be multiple pending buffers in the shadow VirtQueue, making it a must for every control command to have its own buffer. To achieve this, this patch refactor vhost_vdpa_net_cvq_add() to accept `struct iovec`, which eliminates the coupling of control commands to `s->cvq_cmd_out_buffer` and `s->status`, allowing them to use their own buffer. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <8a328f146fb043f34edb75ba6d043d2d6de88f99.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 939c984d5b..618758596a 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -618,22 +618,14 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) vhost_vdpa_net_client_stop(nc); } -static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, - size_t in_len) +static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, + const struct iovec *out_sg, size_t out_num, + const struct iovec *in_sg, size_t in_num) { - /* Buffers for the device */ - const struct iovec out = { - .iov_base = s->cvq_cmd_out_buffer, - .iov_len = out_len, - }; - const struct iovec in = { - .iov_base = s->status, - .iov_len = sizeof(virtio_net_ctrl_ack), - }; VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); int r; - r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); + r = vhost_svq_add(svq, out_sg, out_num, in_sg, in_num, NULL); if (unlikely(r != 0)) { if (unlikely(r == -ENOSPC)) { qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", @@ -659,6 +651,15 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, .cmd = cmd, }; size_t data_size = iov_size(data_sg, data_num); + /* Buffers for the device */ + const struct iovec out = { + .iov_base = s->cvq_cmd_out_buffer, + .iov_len = sizeof(ctrl) + data_size, + }; + const struct iovec in = { + .iov_base = s->status, + .iov_len = sizeof(*s->status), + }; assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); @@ -669,8 +670,7 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, iov_to_buf(data_sg, data_num, 0, s->cvq_cmd_out_buffer + sizeof(ctrl), data_size); - return vhost_vdpa_net_cvq_add(s, data_size + sizeof(ctrl), - sizeof(virtio_net_ctrl_ack)); + return vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1); } static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) @@ -1248,10 +1248,15 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, .iov_base = s->cvq_cmd_out_buffer, }; /* in buffer used for device model */ - const struct iovec in = { + const struct iovec model_in = { .iov_base = &status, .iov_len = sizeof(status), }; + /* in buffer used for vdpa device */ + const struct iovec vdpa_in = { + .iov_base = s->status, + .iov_len = sizeof(*s->status), + }; ssize_t dev_written = -EINVAL; out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, @@ -1285,7 +1290,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, goto out; } } else { - dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); + dev_written = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1); if (unlikely(dev_written < 0)) { goto out; } @@ -1301,7 +1306,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, } status = VIRTIO_NET_ERR; - virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1); + virtio_net_handle_ctrl_iov(svq->vdev, &model_in, 1, &out, 1); if (status != VIRTIO_NET_OK) { error_report("Bad CVQ processing in model"); } From 327dedb8df91f57ef917ab5b5db519146ee6f08b Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:37 +0800 Subject: [PATCH 02/62] vdpa: Avoid using vhost_vdpa_net_load_*() outside vhost_vdpa_net_load() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches in this series will refactor vhost_vdpa_net_load_cmd() to iterate through the control commands shadow buffers, allowing QEMU to send CVQ state load commands in parallel at device startup. Considering that QEMU always forwards the CVQ command serialized outside of vhost_vdpa_net_load(), it is more elegant to send the CVQ commands directly without invoking vhost_vdpa_net_load_*() helpers. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <254f0618efde7af7229ba4fdada667bb9d318991.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 618758596a..86b8d31244 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -1114,12 +1114,14 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { */ static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s, VirtQueueElement *elem, - struct iovec *out) + struct iovec *out, + const struct iovec *in) { struct virtio_net_ctrl_mac mac_data, *mac_ptr; struct virtio_net_ctrl_hdr *hdr_ptr; uint32_t cursor; ssize_t r; + uint8_t on = 1; /* parse the non-multicast MAC address entries from CVQ command */ cursor = sizeof(*hdr_ptr); @@ -1167,7 +1169,13 @@ static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s, * filter table to the vdpa device, it should send the * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode */ - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_PROMISC, 1); + hdr_ptr = out->iov_base; + out->iov_len = sizeof(*hdr_ptr) + sizeof(on); + + hdr_ptr->class = VIRTIO_NET_CTRL_RX; + hdr_ptr->cmd = VIRTIO_NET_CTRL_RX_PROMISC; + iov_from_buf(out, 1, sizeof(*hdr_ptr), &on, sizeof(on)); + r = vhost_vdpa_net_cvq_add(s, out, 1, in, 1); if (unlikely(r < 0)) { return r; } @@ -1285,7 +1293,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, * the CVQ command directly. */ dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem, - &out); + &out, &vdpa_in); if (unlikely(dev_written < 0)) { goto out; } From 24e59cfe0cb53416b06c2c117bc22ff22dc54df3 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:38 +0800 Subject: [PATCH 03/62] vdpa: Check device ack in vhost_vdpa_net_load_rx_mode() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Considering that vhost_vdpa_net_load_rx_mode() is only called within vhost_vdpa_net_load_rx() now, this patch refactors vhost_vdpa_net_load_rx_mode() to include a check for the device's ack, simplifying the code and improving its maintainability. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <68811d52f96ae12d68f0d67d996ac1642a623943.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 76 ++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 86b8d31244..36a4e57c0d 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -827,14 +827,24 @@ static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, .iov_base = &on, .iov_len = sizeof(on), }; - return vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_RX, - cmd, &data, 1); + ssize_t dev_written; + + dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_RX, + cmd, &data, 1); + if (unlikely(dev_written < 0)) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + + return 0; } static int vhost_vdpa_net_load_rx(VhostVDPAState *s, const VirtIONet *n) { - ssize_t dev_written; + ssize_t r; if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) { return 0; @@ -859,13 +869,9 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (!n->mac_table.uni_overflow && !n->promisc) { - dev_written = vhost_vdpa_net_load_rx_mode(s, - VIRTIO_NET_CTRL_RX_PROMISC, 0); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_PROMISC, 0); + if (unlikely(r < 0)) { + return r; } } @@ -887,13 +893,9 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->mac_table.multi_overflow || n->allmulti) { - dev_written = vhost_vdpa_net_load_rx_mode(s, - VIRTIO_NET_CTRL_RX_ALLMULTI, 1); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_ALLMULTI, 1); + if (unlikely(r < 0)) { + return r; } } @@ -912,13 +914,9 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->alluni) { - dev_written = vhost_vdpa_net_load_rx_mode(s, - VIRTIO_NET_CTRL_RX_ALLUNI, 1); - if (dev_written < 0) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_ALLUNI, 1); + if (r < 0) { + return r; } } @@ -933,13 +931,9 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->nomulti) { - dev_written = vhost_vdpa_net_load_rx_mode(s, - VIRTIO_NET_CTRL_RX_NOMULTI, 1); - if (dev_written < 0) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_NOMULTI, 1); + if (r < 0) { + return r; } } @@ -954,13 +948,9 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->nouni) { - dev_written = vhost_vdpa_net_load_rx_mode(s, - VIRTIO_NET_CTRL_RX_NOUNI, 1); - if (dev_written < 0) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_NOUNI, 1); + if (r < 0) { + return r; } } @@ -975,13 +965,9 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->nobcast) { - dev_written = vhost_vdpa_net_load_rx_mode(s, - VIRTIO_NET_CTRL_RX_NOBCAST, 1); - if (dev_written < 0) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_NOBCAST, 1); + if (r < 0) { + return r; } } From a864a3219d7e63569583d204c12bff2a0f90463e Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:39 +0800 Subject: [PATCH 04/62] vdpa: Move vhost_svq_poll() to the caller of vhost_vdpa_net_cvq_add() This patch moves vhost_svq_poll() to the caller of vhost_vdpa_net_cvq_add() and introduces a helper funtion. By making this change, next patches in this series is able to refactor vhost_vdpa_net_load_x() only to delay the polling and checking process until either the SVQ is full or control commands shadow buffers are full. Signed-off-by: Hawkins Jiawei Message-Id: <196cadb55175a75275660c6634a538289f027ae3.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 53 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 36a4e57c0d..ea73e3c410 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -631,15 +631,21 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", __func__); } - return r; } - /* - * We can poll here since we've had BQL from the time we sent the - * descriptor. Also, we need to take the answer before SVQ pulls by itself, - * when BQL is released - */ - return vhost_svq_poll(svq, 1); + return r; +} + +/* + * Convenience wrapper to poll SVQ for multiple control commands. + * + * Caller should hold the BQL when invoking this function, and should take + * the answer before SVQ pulls by itself when BQL is released. + */ +static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight) +{ + VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); + return vhost_svq_poll(svq, cmds_in_flight); } static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, @@ -660,6 +666,7 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, .iov_base = s->status, .iov_len = sizeof(*s->status), }; + ssize_t r; assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); @@ -670,7 +677,16 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, iov_to_buf(data_sg, data_num, 0, s->cvq_cmd_out_buffer + sizeof(ctrl), data_size); - return vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1); + r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1); + if (unlikely(r < 0)) { + return r; + } + + /* + * We can poll here since we've had BQL from the time + * we sent the descriptor. + */ + return vhost_vdpa_net_svq_poll(s, 1); } static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) @@ -1165,6 +1181,15 @@ static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s, if (unlikely(r < 0)) { return r; } + + /* + * We can poll here since we've had BQL from the time + * we sent the descriptor. + */ + r = vhost_vdpa_net_svq_poll(s, 1); + if (unlikely(r < sizeof(*s->status))) { + return r; + } if (*s->status != VIRTIO_NET_OK) { return sizeof(*s->status); } @@ -1284,10 +1309,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, goto out; } } else { - dev_written = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1); - if (unlikely(dev_written < 0)) { + ssize_t r; + r = vhost_vdpa_net_cvq_add(s, &out, 1, &vdpa_in, 1); + if (unlikely(r < 0)) { + dev_written = r; goto out; } + + /* + * We can poll here since we've had BQL from the time + * we sent the descriptor. + */ + dev_written = vhost_vdpa_net_svq_poll(s, 1); } if (unlikely(dev_written < sizeof(status))) { From 1d7e2a8fd4996fdb20d74fce41fe897311f3b06a Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:40 +0800 Subject: [PATCH 05/62] vdpa: Introduce cursors to vhost_vdpa_net_loadx() This patch introduces two new arugments, `out_cursor` and `in_cursor`, to vhost_vdpa_net_loadx(). Addtionally, it includes a helper function vhost_vdpa_net_load_cursor_reset() for resetting these cursors. Furthermore, this patch refactors vhost_vdpa_net_load_cmd() so that vhost_vdpa_net_load_cmd() prepares buffers for the device using the cursors arguments, instead of directly accesses `s->cvq_cmd_out_buffer` and `s->status` fields. By making these change, next patches in this series can refactor vhost_vdpa_net_load_cmd() directly to iterate through the control commands shadow buffers, allowing QEMU to send CVQ state load commands in parallel at device startup. Signed-off-by: Hawkins Jiawei Message-Id: <1c6516e233a14cc222f0884e148e4e1adceda78d.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 111 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 75 insertions(+), 36 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index ea73e3c410..ef4d242811 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -648,7 +648,22 @@ static ssize_t vhost_vdpa_net_svq_poll(VhostVDPAState *s, size_t cmds_in_flight) return vhost_svq_poll(svq, cmds_in_flight); } -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, +static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s, + struct iovec *out_cursor, + struct iovec *in_cursor) +{ + /* reset the cursor of the output buffer for the device */ + out_cursor->iov_base = s->cvq_cmd_out_buffer; + out_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); + + /* reset the cursor of the in buffer for the device */ + in_cursor->iov_base = s->status; + in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); +} + +static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, + struct iovec *out_cursor, + struct iovec *in_cursor, uint8_t class, uint8_t cmd, const struct iovec *data_sg, size_t data_num) { @@ -657,25 +672,21 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, .cmd = cmd, }; size_t data_size = iov_size(data_sg, data_num); - /* Buffers for the device */ - const struct iovec out = { - .iov_base = s->cvq_cmd_out_buffer, - .iov_len = sizeof(ctrl) + data_size, - }; - const struct iovec in = { - .iov_base = s->status, - .iov_len = sizeof(*s->status), - }; + struct iovec out, in; ssize_t r; assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); /* pack the CVQ command header */ - memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); - + iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl)); /* pack the CVQ command command-specific-data */ iov_to_buf(data_sg, data_num, 0, - s->cvq_cmd_out_buffer + sizeof(ctrl), data_size); + out_cursor->iov_base + sizeof(ctrl), data_size); + + /* extract the required buffer from the cursor for output */ + iov_copy(&out, 1, out_cursor, 1, 0, sizeof(ctrl) + data_size); + /* extract the required buffer from the cursor for input */ + iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status)); r = vhost_vdpa_net_cvq_add(s, &out, 1, &in, 1); if (unlikely(r < 0)) { @@ -689,14 +700,17 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, return vhost_vdpa_net_svq_poll(s, 1); } -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, + struct iovec *out_cursor, + struct iovec *in_cursor) { if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { const struct iovec data = { .iov_base = (void *)n->mac, .iov_len = sizeof(n->mac), }; - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, + ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, &data, 1); if (unlikely(dev_written < 0)) { @@ -748,7 +762,7 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) .iov_len = mul_macs_size, }, }; - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, + ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET, data, ARRAY_SIZE(data)); @@ -763,7 +777,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) } static int vhost_vdpa_net_load_mq(VhostVDPAState *s, - const VirtIONet *n) + const VirtIONet *n, + struct iovec *out_cursor, + struct iovec *in_cursor) { struct virtio_net_ctrl_mq mq; ssize_t dev_written; @@ -777,7 +793,8 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, .iov_base = &mq, .iov_len = sizeof(mq), }; - dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, + dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &data, 1); if (unlikely(dev_written < 0)) { @@ -791,7 +808,9 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, } static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, - const VirtIONet *n) + const VirtIONet *n, + struct iovec *out_cursor, + struct iovec *in_cursor) { uint64_t offloads; ssize_t dev_written; @@ -822,7 +841,8 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, .iov_base = &offloads, .iov_len = sizeof(offloads), }; - dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS, + dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_GUEST_OFFLOADS, VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, &data, 1); if (unlikely(dev_written < 0)) { @@ -836,6 +856,8 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, } static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, + struct iovec *out_cursor, + struct iovec *in_cursor, uint8_t cmd, uint8_t on) { @@ -845,7 +867,8 @@ static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, }; ssize_t dev_written; - dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_RX, + dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX, cmd, &data, 1); if (unlikely(dev_written < 0)) { return dev_written; @@ -858,7 +881,9 @@ static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, } static int vhost_vdpa_net_load_rx(VhostVDPAState *s, - const VirtIONet *n) + const VirtIONet *n, + struct iovec *out_cursor, + struct iovec *in_cursor) { ssize_t r; @@ -885,7 +910,8 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (!n->mac_table.uni_overflow && !n->promisc) { - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_PROMISC, 0); + r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX_PROMISC, 0); if (unlikely(r < 0)) { return r; } @@ -909,7 +935,8 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->mac_table.multi_overflow || n->allmulti) { - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_ALLMULTI, 1); + r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX_ALLMULTI, 1); if (unlikely(r < 0)) { return r; } @@ -930,7 +957,8 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->alluni) { - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_ALLUNI, 1); + r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX_ALLUNI, 1); if (r < 0) { return r; } @@ -947,7 +975,8 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->nomulti) { - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_NOMULTI, 1); + r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX_NOMULTI, 1); if (r < 0) { return r; } @@ -964,7 +993,8 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->nouni) { - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_NOUNI, 1); + r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX_NOUNI, 1); if (r < 0) { return r; } @@ -981,7 +1011,8 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, * configuration only at live migration. */ if (n->nobcast) { - r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_NOBCAST, 1); + r = vhost_vdpa_net_load_rx_mode(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX_NOBCAST, 1); if (r < 0) { return r; } @@ -992,13 +1023,16 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, const VirtIONet *n, + struct iovec *out_cursor, + struct iovec *in_cursor, uint16_t vid) { const struct iovec data = { .iov_base = &vid, .iov_len = sizeof(vid), }; - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_VLAN, + ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_ADD, &data, 1); if (unlikely(dev_written < 0)) { @@ -1012,7 +1046,9 @@ static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, } static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, - const VirtIONet *n) + const VirtIONet *n, + struct iovec *out_cursor, + struct iovec *in_cursor) { int r; @@ -1023,7 +1059,8 @@ static int vhost_vdpa_net_load_vlan(VhostVDPAState *s, for (int i = 0; i < MAX_VLAN >> 5; i++) { for (int j = 0; n->vlans[i] && j <= 0x1f; j++) { if (n->vlans[i] & (1U << j)) { - r = vhost_vdpa_net_load_single_vlan(s, n, (i << 5) + j); + r = vhost_vdpa_net_load_single_vlan(s, n, out_cursor, + in_cursor, (i << 5) + j); if (unlikely(r != 0)) { return r; } @@ -1040,6 +1077,7 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) struct vhost_vdpa *v = &s->vhost_vdpa; const VirtIONet *n; int r; + struct iovec out_cursor, in_cursor; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -1047,23 +1085,24 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) if (v->shadow_vqs_enabled) { n = VIRTIO_NET(v->dev->vdev); - r = vhost_vdpa_net_load_mac(s, n); + vhost_vdpa_net_load_cursor_reset(s, &out_cursor, &in_cursor); + r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor); if (unlikely(r < 0)) { return r; } - r = vhost_vdpa_net_load_mq(s, n); + r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor); if (unlikely(r)) { return r; } - r = vhost_vdpa_net_load_offloads(s, n); + r = vhost_vdpa_net_load_offloads(s, n, &out_cursor, &in_cursor); if (unlikely(r)) { return r; } - r = vhost_vdpa_net_load_rx(s, n); + r = vhost_vdpa_net_load_rx(s, n, &out_cursor, &in_cursor); if (unlikely(r)) { return r; } - r = vhost_vdpa_net_load_vlan(s, n); + r = vhost_vdpa_net_load_vlan(s, n, &out_cursor, &in_cursor); if (unlikely(r)) { return r; } From 99d6a32469debf1a48921125879b614d15acfb7a Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:41 +0800 Subject: [PATCH 06/62] vhost: Expose vhost_svq_available_slots() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches in this series will delay the polling and checking of buffers until either the SVQ is full or control commands shadow buffers are full, no longer perform an immediate poll and check of the device's used buffers for each CVQ state load command. To achieve this, this patch exposes vhost_svq_available_slots(), allowing QEMU to know whether the SVQ is full. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <25938079f0bd8185fd664c64e205e629f7a966be.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-shadow-virtqueue.c | 2 +- hw/virtio/vhost-shadow-virtqueue.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index e731b1d2ea..fc5f408f77 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -66,7 +66,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp) * * @svq: The svq */ -static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) +uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) { return svq->num_free; } diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 5bce67837b..19c842a15b 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -114,6 +114,7 @@ typedef struct VhostShadowVirtqueue { bool vhost_svq_valid_features(uint64_t features, Error **errp); +uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq); void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const VirtQueueElement *elem, uint32_t len); int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, From acec5f685c7ad6bd3c9bb9a57d4e509160480376 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 13 Oct 2023 16:09:42 +0800 Subject: [PATCH 07/62] vdpa: Send cvq state load commands in parallel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables sending CVQ state load commands in parallel at device startup by following steps: * Refactor vhost_vdpa_net_load_cmd() to iterate through the control commands shadow buffers. This allows different CVQ state load commands to use their own unique buffers. * Delay the polling and checking of buffers until either the SVQ is full or control commands shadow buffers are full. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578 Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <9350f32278e39f7bce297b8f2d82dac27c6f8c9a.1697165821.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 165 +++++++++++++++++++++++++++++------------------ 1 file changed, 102 insertions(+), 63 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index ef4d242811..4b7c3b81b8 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -661,6 +661,31 @@ static void vhost_vdpa_net_load_cursor_reset(VhostVDPAState *s, in_cursor->iov_len = vhost_vdpa_net_cvq_cmd_page_len(); } +/* + * Poll SVQ for multiple pending control commands and check the device's ack. + * + * Caller should hold the BQL when invoking this function. + * + * @s: The VhostVDPAState + * @len: The length of the pending status shadow buffer + */ +static ssize_t vhost_vdpa_net_svq_flush(VhostVDPAState *s, size_t len) +{ + /* device uses a one-byte length ack for each control command */ + ssize_t dev_written = vhost_vdpa_net_svq_poll(s, len); + if (unlikely(dev_written != len)) { + return -EIO; + } + + /* check the device's ack */ + for (int i = 0; i < len; ++i) { + if (s->status[i] != VIRTIO_NET_OK) { + return -EIO; + } + } + return 0; +} + static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, struct iovec *out_cursor, struct iovec *in_cursor, uint8_t class, @@ -671,11 +696,31 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, .class = class, .cmd = cmd, }; - size_t data_size = iov_size(data_sg, data_num); + size_t data_size = iov_size(data_sg, data_num), cmd_size; struct iovec out, in; ssize_t r; + unsigned dummy_cursor_iov_cnt; + VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); + cmd_size = sizeof(ctrl) + data_size; + if (vhost_svq_available_slots(svq) < 2 || + iov_size(out_cursor, 1) < cmd_size) { + /* + * It is time to flush all pending control commands if SVQ is full + * or control commands shadow buffers are full. + * + * We can poll here since we've had BQL from the time + * we sent the descriptor. + */ + r = vhost_vdpa_net_svq_flush(s, in_cursor->iov_base - + (void *)s->status); + if (unlikely(r < 0)) { + return r; + } + + vhost_vdpa_net_load_cursor_reset(s, out_cursor, in_cursor); + } /* pack the CVQ command header */ iov_from_buf(out_cursor, 1, 0, &ctrl, sizeof(ctrl)); @@ -684,7 +729,7 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, out_cursor->iov_base + sizeof(ctrl), data_size); /* extract the required buffer from the cursor for output */ - iov_copy(&out, 1, out_cursor, 1, 0, sizeof(ctrl) + data_size); + iov_copy(&out, 1, out_cursor, 1, 0, cmd_size); /* extract the required buffer from the cursor for input */ iov_copy(&in, 1, in_cursor, 1, 0, sizeof(*s->status)); @@ -693,11 +738,13 @@ static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, return r; } - /* - * We can poll here since we've had BQL from the time - * we sent the descriptor. - */ - return vhost_vdpa_net_svq_poll(s, 1); + /* iterate the cursors */ + dummy_cursor_iov_cnt = 1; + iov_discard_front(&out_cursor, &dummy_cursor_iov_cnt, cmd_size); + dummy_cursor_iov_cnt = 1; + iov_discard_front(&in_cursor, &dummy_cursor_iov_cnt, sizeof(*s->status)); + + return 0; } static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, @@ -709,15 +756,12 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, .iov_base = (void *)n->mac, .iov_len = sizeof(n->mac), }; - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, - VIRTIO_NET_CTRL_MAC, - VIRTIO_NET_CTRL_MAC_ADDR_SET, - &data, 1); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, + &data, 1); + if (unlikely(r < 0)) { + return r; } } @@ -762,15 +806,12 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, .iov_len = mul_macs_size, }, }; - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, - VIRTIO_NET_CTRL_MAC, - VIRTIO_NET_CTRL_MAC_TABLE_SET, - data, ARRAY_SIZE(data)); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_TABLE_SET, + data, ARRAY_SIZE(data)); + if (unlikely(r < 0)) { + return r; } return 0; @@ -782,7 +823,7 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, struct iovec *in_cursor) { struct virtio_net_ctrl_mq mq; - ssize_t dev_written; + ssize_t r; if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) { return 0; @@ -793,15 +834,12 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, .iov_base = &mq, .iov_len = sizeof(mq), }; - dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, - VIRTIO_NET_CTRL_MQ, - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, - &data, 1); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, + &data, 1); + if (unlikely(r < 0)) { + return r; } return 0; @@ -813,7 +851,7 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, struct iovec *in_cursor) { uint64_t offloads; - ssize_t dev_written; + ssize_t r; if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { @@ -841,15 +879,12 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, .iov_base = &offloads, .iov_len = sizeof(offloads), }; - dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, - VIRTIO_NET_CTRL_GUEST_OFFLOADS, - VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, - &data, 1); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_GUEST_OFFLOADS, + VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, + &data, 1); + if (unlikely(r < 0)) { + return r; } return 0; @@ -865,16 +900,12 @@ static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, .iov_base = &on, .iov_len = sizeof(on), }; - ssize_t dev_written; + ssize_t r; - dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, - VIRTIO_NET_CTRL_RX, - cmd, &data, 1); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (*s->status != VIRTIO_NET_OK) { - return -EIO; + r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_RX, cmd, &data, 1); + if (unlikely(r < 0)) { + return r; } return 0; @@ -1031,15 +1062,12 @@ static int vhost_vdpa_net_load_single_vlan(VhostVDPAState *s, .iov_base = &vid, .iov_len = sizeof(vid), }; - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, - VIRTIO_NET_CTRL_VLAN, - VIRTIO_NET_CTRL_VLAN_ADD, - &data, 1); - if (unlikely(dev_written < 0)) { - return dev_written; - } - if (unlikely(*s->status != VIRTIO_NET_OK)) { - return -EIO; + ssize_t r = vhost_vdpa_net_load_cmd(s, out_cursor, in_cursor, + VIRTIO_NET_CTRL_VLAN, + VIRTIO_NET_CTRL_VLAN_ADD, + &data, 1); + if (unlikely(r < 0)) { + return r; } return 0; @@ -1106,6 +1134,17 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) if (unlikely(r)) { return r; } + + /* + * We need to poll and check all pending device's used buffers. + * + * We can poll here since we've had BQL from the time + * we sent the descriptor. + */ + r = vhost_vdpa_net_svq_flush(s, in_cursor.iov_base - (void *)s->status); + if (unlikely(r)) { + return r; + } } for (int i = 0; i < v->dev->vq_index; ++i) { From 1428831981edc53fe819f76d07306f6b041fe55c Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:15 +0200 Subject: [PATCH 08/62] vhost-user: strip superfluous whitespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Stefano Garzarella Reviewed-by: Philippe Mathieu-Daudé Tested-by: Albert Esteve Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-2-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 68eb1f0c99..3e33a2e9e0 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -388,7 +388,7 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, * operations such as configuring device memory mappings or issuing device * resets, which affect the whole device instead of individual VQs, * vhost-user messages should only be sent once. - * + * * Devices with multiple vhost_devs are given an associated dev->vq_index * so per_device requests are only sent if vq_index is 0. */ From ed0b3ebbae4d3afd9f742613ed64444e4d04ae0a Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:16 +0200 Subject: [PATCH 09/62] vhost-user: tighten "reply_supported" scope in "set_vring_addr" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the vhost_user_set_vring_addr() function, we calculate "reply_supported" unconditionally, even though we'll only need it if "wait_for_reply" is also true. Restrict the scope of "reply_supported" to the minimum. This is purely refactoring -- no observable change. Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Stefano Garzarella Tested-by: Albert Esteve Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-3-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 3e33a2e9e0..6c7b4cc75c 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1321,17 +1321,18 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, .hdr.size = sizeof(msg.payload.addr), }; - bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); - /* * wait for a reply if logging is enabled to make sure * backend is actually logging changes */ bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); - if (reply_supported && wait_for_reply) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } } ret = vhost_user_write(dev, &msg, NULL, 0); From 54ae36822f6f14603ed117b09fd1b59d6ebfb963 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:17 +0200 Subject: [PATCH 10/62] vhost-user: factor out "vhost_user_write_sync" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tails of the "vhost_user_set_vring_addr" and "vhost_user_set_u64" functions are now byte-for-byte identical. Factor the common tail out to a new function called "vhost_user_write_sync". This is purely refactoring -- no observable change. Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefano Garzarella Tested-by: Albert Esteve Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-4-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 66 ++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 6c7b4cc75c..95dbf9880c 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1310,10 +1310,35 @@ static int enforce_reply(struct vhost_dev *dev, return vhost_user_get_features(dev, &dummy); } +/* Note: "msg->hdr.flags" may be modified. */ +static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, + bool wait_for_reply) +{ + int ret; + + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + if (reply_supported) { + msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + } + + ret = vhost_user_write(dev, msg, NULL, 0); + if (ret < 0) { + return ret; + } + + if (wait_for_reply) { + return enforce_reply(dev, msg); + } + + return 0; +} + static int vhost_user_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { - int ret; VhostUserMsg msg = { .hdr.request = VHOST_USER_SET_VRING_ADDR, .hdr.flags = VHOST_USER_VERSION, @@ -1327,24 +1352,7 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, */ bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); - if (wait_for_reply) { - bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); - if (reply_supported) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; - } - } - - ret = vhost_user_write(dev, &msg, NULL, 0); - if (ret < 0) { - return ret; - } - - if (wait_for_reply) { - return enforce_reply(dev, &msg); - } - - return 0; + return vhost_user_write_sync(dev, &msg, wait_for_reply); } static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, @@ -1356,26 +1364,8 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, .payload.u64 = u64, .hdr.size = sizeof(msg.payload.u64), }; - int ret; - if (wait_for_reply) { - bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); - if (reply_supported) { - msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; - } - } - - ret = vhost_user_write(dev, &msg, NULL, 0); - if (ret < 0) { - return ret; - } - - if (wait_for_reply) { - return enforce_reply(dev, &msg); - } - - return 0; + return vhost_user_write_sync(dev, &msg, wait_for_reply); } static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) From 99ad9ec89d45dbc532471537a4468b9a5ea386e3 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:18 +0200 Subject: [PATCH 11/62] vhost-user: flatten "enforce_reply" into "vhost_user_write_sync" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At this point, only "vhost_user_write_sync" calls "enforce_reply"; embed the latter into the former. This is purely refactoring -- no observable change. Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefano Garzarella Tested-by: Albert Esteve Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-5-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 95dbf9880c..23e9039922 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1292,24 +1292,6 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) return 0; } -static int enforce_reply(struct vhost_dev *dev, - const VhostUserMsg *msg) -{ - uint64_t dummy; - - if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { - return process_message_reply(dev, msg); - } - - /* - * We need to wait for a reply but the backend does not - * support replies for the command we just sent. - * Send VHOST_USER_GET_FEATURES which makes all backends - * send a reply. - */ - return vhost_user_get_features(dev, &dummy); -} - /* Note: "msg->hdr.flags" may be modified. */ static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, bool wait_for_reply) @@ -1330,7 +1312,19 @@ static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, } if (wait_for_reply) { - return enforce_reply(dev, msg); + uint64_t dummy; + + if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + return process_message_reply(dev, msg); + } + + /* + * We need to wait for a reply but the backend does not + * support replies for the command we just sent. + * Send VHOST_USER_GET_FEATURES which makes all backends + * send a reply. + */ + return vhost_user_get_features(dev, &dummy); } return 0; From df3b2abc32c319352e6b350990919d3387520669 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:19 +0200 Subject: [PATCH 12/62] vhost-user: hoist "write_sync", "get_features", "get_u64" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to avoid a forward-declaration for "vhost_user_write_sync" in a subsequent patch, hoist "vhost_user_write_sync" -> "vhost_user_get_features" -> "vhost_user_get_u64" just above "vhost_set_vring". This is purely code movement -- no observable change. Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Stefano Garzarella Tested-by: Albert Esteve Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-6-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 170 ++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 23e9039922..3c14947589 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1073,6 +1073,91 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev, return vhost_user_write(dev, &msg, NULL, 0); } +static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) +{ + int ret; + VhostUserMsg msg = { + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, + }; + + if (vhost_user_per_device_request(request) && dev->vq_index != 0) { + return 0; + } + + ret = vhost_user_write(dev, &msg, NULL, 0); + if (ret < 0) { + return ret; + } + + ret = vhost_user_read(dev, &msg); + if (ret < 0) { + return ret; + } + + if (msg.hdr.request != request) { + error_report("Received unexpected msg type. Expected %d received %d", + request, msg.hdr.request); + return -EPROTO; + } + + if (msg.hdr.size != sizeof(msg.payload.u64)) { + error_report("Received bad msg size."); + return -EPROTO; + } + + *u64 = msg.payload.u64; + + return 0; +} + +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) +{ + if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { + return -EPROTO; + } + + return 0; +} + +/* Note: "msg->hdr.flags" may be modified. */ +static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, + bool wait_for_reply) +{ + int ret; + + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + if (reply_supported) { + msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + } + + ret = vhost_user_write(dev, msg, NULL, 0); + if (ret < 0) { + return ret; + } + + if (wait_for_reply) { + uint64_t dummy; + + if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + return process_message_reply(dev, msg); + } + + /* + * We need to wait for a reply but the backend does not + * support replies for the command we just sent. + * Send VHOST_USER_GET_FEATURES which makes all backends + * send a reply. + */ + return vhost_user_get_features(dev, &dummy); + } + + return 0; +} + static int vhost_set_vring(struct vhost_dev *dev, unsigned long int request, struct vhost_vring_state *ring) @@ -1245,91 +1330,6 @@ static int vhost_user_set_vring_err(struct vhost_dev *dev, return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_ERR, file); } -static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) -{ - int ret; - VhostUserMsg msg = { - .hdr.request = request, - .hdr.flags = VHOST_USER_VERSION, - }; - - if (vhost_user_per_device_request(request) && dev->vq_index != 0) { - return 0; - } - - ret = vhost_user_write(dev, &msg, NULL, 0); - if (ret < 0) { - return ret; - } - - ret = vhost_user_read(dev, &msg); - if (ret < 0) { - return ret; - } - - if (msg.hdr.request != request) { - error_report("Received unexpected msg type. Expected %d received %d", - request, msg.hdr.request); - return -EPROTO; - } - - if (msg.hdr.size != sizeof(msg.payload.u64)) { - error_report("Received bad msg size."); - return -EPROTO; - } - - *u64 = msg.payload.u64; - - return 0; -} - -static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) -{ - if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) { - return -EPROTO; - } - - return 0; -} - -/* Note: "msg->hdr.flags" may be modified. */ -static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, - bool wait_for_reply) -{ - int ret; - - if (wait_for_reply) { - bool reply_supported = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_REPLY_ACK); - if (reply_supported) { - msg->hdr.flags |= VHOST_USER_NEED_REPLY_MASK; - } - } - - ret = vhost_user_write(dev, msg, NULL, 0); - if (ret < 0) { - return ret; - } - - if (wait_for_reply) { - uint64_t dummy; - - if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) { - return process_message_reply(dev, msg); - } - - /* - * We need to wait for a reply but the backend does not - * support replies for the command we just sent. - * Send VHOST_USER_GET_FEATURES which makes all backends - * send a reply. - */ - return vhost_user_get_features(dev, &dummy); - } - - return 0; -} - static int vhost_user_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { From 75b6b6da21a225b7853515d9e14775f8a5d9af3a Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:20 +0200 Subject: [PATCH 13/62] vhost-user: allow "vhost_set_vring" to wait for a reply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "vhost_set_vring" function already centralizes the common parts of "vhost_user_set_vring_num", "vhost_user_set_vring_base" and "vhost_user_set_vring_enable". We'll want to allow some of those callers to wait for a reply. Therefore, rebase "vhost_set_vring" from just "vhost_user_write" to "vhost_user_write_sync", exposing the "wait_for_reply" parameter. This is purely refactoring -- there is no observable change. That's because: - all three callers pass in "false" for "wait_for_reply", which disables all logic in "vhost_user_write_sync" except the call to "vhost_user_write"; - the fds=NULL and fd_num=0 arguments of the original "vhost_user_write" call inside "vhost_set_vring" are hard-coded within "vhost_user_write_sync". Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefano Garzarella Tested-by: Albert Esteve Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-7-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 3c14947589..7e452849fa 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1160,7 +1160,8 @@ static int vhost_user_write_sync(struct vhost_dev *dev, VhostUserMsg *msg, static int vhost_set_vring(struct vhost_dev *dev, unsigned long int request, - struct vhost_vring_state *ring) + struct vhost_vring_state *ring, + bool wait_for_reply) { VhostUserMsg msg = { .hdr.request = request, @@ -1169,13 +1170,13 @@ static int vhost_set_vring(struct vhost_dev *dev, .hdr.size = sizeof(msg.payload.state), }; - return vhost_user_write(dev, &msg, NULL, 0); + return vhost_user_write_sync(dev, &msg, wait_for_reply); } static int vhost_user_set_vring_num(struct vhost_dev *dev, struct vhost_vring_state *ring) { - return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring, false); } static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) @@ -1206,7 +1207,7 @@ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, static int vhost_user_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { - return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring, false); } static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) @@ -1224,7 +1225,7 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) .num = enable, }; - ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); + ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, false); if (ret < 0) { /* * Restoring the previous state is likely infeasible, as well as From d7dc0682f5cb549ea9afdc418f95412e83cc5e8c Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 2 Oct 2023 22:32:21 +0200 Subject: [PATCH 14/62] vhost-user: call VHOST_USER_SET_VRING_ENABLE synchronously MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (1) The virtio-1.2 specification writes: > 3 General Initialization And Device Operation > 3.1 Device Initialization > 3.1.1 Driver Requirements: Device Initialization > > [...] > > 7. Perform device-specific setup, including discovery of virtqueues for > the device, optional per-bus setup, reading and possibly writing the > device’s virtio configuration space, and population of virtqueues. > > 8. Set the DRIVER_OK status bit. At this point the device is “live”. and > 4 Virtio Transport Options > 4.1 Virtio Over PCI Bus > 4.1.4 Virtio Structure PCI Capabilities > 4.1.4.3 Common configuration structure layout > 4.1.4.3.2 Driver Requirements: Common configuration structure layout > > [...] > > The driver MUST configure the other virtqueue fields before enabling the > virtqueue with queue_enable. > > [...] (The same statements are present in virtio-1.0 identically, at .) These together mean that the following sub-sequence of steps is valid for a virtio-1.0 guest driver: (1.1) set "queue_enable" for the needed queues as the final part of device initialization step (7), (1.2) set DRIVER_OK in step (8), (1.3) immediately start sending virtio requests to the device. (2) When vhost-user is enabled, and the VHOST_USER_F_PROTOCOL_FEATURES special virtio feature is negotiated, then virtio rings start in disabled state, according to . In this case, explicit VHOST_USER_SET_VRING_ENABLE messages are needed for enabling vrings. Therefore setting "queue_enable" from the guest (1.1) -- which is technically "buffered" on the QEMU side until the guest sets DRIVER_OK (1.2) -- is a *control plane* operation, which -- after (1.2) -- travels from the guest through QEMU to the vhost-user backend, using a unix domain socket. Whereas sending a virtio request (1.3) is a *data plane* operation, which evades QEMU -- it travels from guest to the vhost-user backend via eventfd. This means that operations ((1.1) + (1.2)) and (1.3) travel through different channels, and their relative order can be reversed, as perceived by the vhost-user backend. That's exactly what happens when OVMF's virtiofs driver (VirtioFsDxe) runs against the Rust-language virtiofsd version 1.7.2. (Which uses version 0.10.1 of the vhost-user-backend crate, and version 0.8.1 of the vhost crate.) Namely, when VirtioFsDxe binds a virtiofs device, it goes through the device initialization steps (i.e., control plane operations), and immediately sends a FUSE_INIT request too (i.e., performs a data plane operation). In the Rust-language virtiofsd, this creates a race between two components that run *concurrently*, i.e., in different threads or processes: - Control plane, handling vhost-user protocol messages: The "VhostUserSlaveReqHandlerMut::set_vring_enable" method [crates/vhost-user-backend/src/handler.rs] handles VHOST_USER_SET_VRING_ENABLE messages, and updates each vring's "enabled" flag according to the message processed. - Data plane, handling virtio / FUSE requests: The "VringEpollHandler::handle_event" method [crates/vhost-user-backend/src/event_loop.rs] handles the incoming virtio / FUSE request, consuming the virtio kick at the same time. If the vring's "enabled" flag is set, the virtio / FUSE request is processed genuinely. If the vring's "enabled" flag is clear, then the virtio / FUSE request is discarded. Note that OVMF enables the queue *first*, and sends FUSE_INIT *second*. However, if the data plane processor in virtiofsd wins the race, then it sees the FUSE_INIT *before* the control plane processor took notice of VHOST_USER_SET_VRING_ENABLE and green-lit the queue for the data plane processor. Therefore the latter drops FUSE_INIT on the floor, and goes back to waiting for further virtio / FUSE requests with epoll_wait. Meanwhile OVMF is stuck waiting for the FUSET_INIT response -- a deadlock. The deadlock is not deterministic. OVMF hangs infrequently during first boot. However, OVMF hangs almost certainly during reboots from the UEFI shell. The race can be "reliably masked" by inserting a very small delay -- a single debug message -- at the top of "VringEpollHandler::handle_event", i.e., just before the data plane processor checks the "enabled" field of the vring. That delay suffices for the control plane processor to act upon VHOST_USER_SET_VRING_ENABLE. We can deterministically prevent the race in QEMU, by blocking OVMF inside step (1.2) -- i.e., in the write to the device status register that "unleashes" queue enablement -- until VHOST_USER_SET_VRING_ENABLE actually *completes*. That way OVMF's VCPU cannot advance to the FUSE_INIT submission before virtiofsd's control plane processor takes notice of the queue being enabled. Wait for VHOST_USER_SET_VRING_ENABLE completion by: - setting the NEED_REPLY flag on VHOST_USER_SET_VRING_ENABLE, and waiting for the reply, if the VHOST_USER_PROTOCOL_F_REPLY_ACK vhost-user feature has been negotiated, or - performing a separate VHOST_USER_GET_FEATURES *exchange*, which requires a backend response regardless of VHOST_USER_PROTOCOL_F_REPLY_ACK. Cc: "Michael S. Tsirkin" (supporter:vhost) Cc: Eugenio Perez Martin Cc: German Maglione Cc: Liu Jiang Cc: Sergio Lopez Pascual Cc: Stefano Garzarella Signed-off-by: Laszlo Ersek Reviewed-by: Stefano Garzarella Tested-by: Albert Esteve [lersek@redhat.com: work Eugenio's explanation into the commit message, about QEMU containing step (1.1) until step (1.2)] Reviewed-by: Eugenio Pérez Message-Id: <20231002203221.17241-8-lersek@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 7e452849fa..427ee0ebfb 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1225,7 +1225,21 @@ static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) .num = enable, }; - ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, false); + /* + * SET_VRING_ENABLE travels from guest to QEMU to vhost-user backend / + * control plane thread via unix domain socket. Virtio requests travel + * from guest to vhost-user backend / data plane thread via eventfd. + * Even if the guest enables the ring first, and pushes its first virtio + * request second (conforming to the virtio spec), the data plane thread + * in the backend may see the virtio request before the control plane + * thread sees the queue enablement. This causes (in fact, requires) the + * data plane thread to discard the virtio request (it arrived on a + * seemingly disabled queue). To prevent this out-of-order delivery, + * don't let the guest proceed to pushing the virtio request until the + * backend control plane acknowledges enabling the queue -- IOW, pass + * wait_for_reply=true below. + */ + ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state, true); if (ret < 0) { /* * Restoring the previous state is likely infeasible, as well as From b15c18c51910ec5ef8c506f641f487507e42c79b Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 9 Oct 2023 12:43:21 +0200 Subject: [PATCH 15/62] memory: initialize 'fv' in MemoryRegionCache to make Coverity happy Coverity scan reports multiple false-positive "defects" for the following series of actions in virtio.c: MemoryRegionCache indirect_desc_cache; address_space_cache_init_empty(&indirect_desc_cache); address_space_cache_destroy(&indirect_desc_cache); For some reason it's unable to recognize the dependency between 'mrs.mr' and 'fv' and insists that '!mrs.mr' check in address_space_cache_destroy may take a 'false' branch, even though it is explicitly initialized to NULL in the address_space_cache_init_empty(): *** CID 1522371: Memory - illegal accesses (UNINIT) /qemu/hw/virtio/virtio.c: 1627 in virtqueue_split_pop() 1621 } 1622 1623 vq->inuse++; 1624 1625 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); 1626 done: >>> CID 1522371: Memory - illegal accesses (UNINIT) >>> Using uninitialized value "indirect_desc_cache.fv" when >>> calling "address_space_cache_destroy". 1627 address_space_cache_destroy(&indirect_desc_cache); 1628 1629 return elem; 1630 1631 err_undo_map: 1632 virtqueue_undo_map_desc(out_num, in_num, iov); ** CID 1522370: Memory - illegal accesses (UNINIT) Instead of trying to silence these false positive reports in 4 different places, initializing 'fv' as well, as this doesn't result in any noticeable performance impact. Signed-off-by: Ilya Maximets Message-Id: <20231009104322.3085887-1-i.maximets@ovn.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: David Hildenbrand Reviewed-by: Stefan Hajnoczi --- include/exec/memory.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/exec/memory.h b/include/exec/memory.h index 653a32ea10..9087d02769 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2793,6 +2793,8 @@ int64_t address_space_cache_init(MemoryRegionCache *cache, static inline void address_space_cache_init_empty(MemoryRegionCache *cache) { cache->mrs.mr = NULL; + /* There is no real need to initialize fv, but it makes Coverity happy. */ + cache->fv = NULL; } /** From 22d2464f7ee6242114733e62541eed14cde15977 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 3 Oct 2023 21:45:30 -0400 Subject: [PATCH 16/62] vhost-user: do not send RESET_OWNER on device reset The VHOST_USER_RESET_OWNER message is deprecated in the spec: This is no longer used. Used to be sent to request disabling all rings, but some back-ends interpreted it to also discard connection state (this interpretation would lead to bugs). It is recommended that back-ends either ignore this message, or use it to disable all rings. The only caller of vhost_user_reset_device() is vhost_user_scsi_reset(). It checks that F_RESET_DEVICE was negotiated before calling it: static void vhost_user_scsi_reset(VirtIODevice *vdev) { VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev); struct vhost_dev *dev = &vsc->dev; /* * Historically, reset was not implemented so only reset devices * that are expecting it. */ if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { return; } if (dev->vhost_ops->vhost_reset_device) { dev->vhost_ops->vhost_reset_device(dev); } } Therefore VHOST_USER_RESET_OWNER is actually never sent by vhost_user_reset_device(). Remove the dead code. This effectively moves the vhost-user protocol specific code from vhost-user-scsi.c into vhost-user.c where it belongs. Signed-off-by: Stefan Hajnoczi Message-Id: <20231004014532.1228637-2-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Raphael Norwitz --- hw/scsi/vhost-user-scsi.c | 9 --------- hw/virtio/vhost-user.c | 13 +++++++++---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index df6b66cc1a..78aef4765f 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -67,15 +67,6 @@ static void vhost_user_scsi_reset(VirtIODevice *vdev) VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev); struct vhost_dev *dev = &vsc->dev; - /* - * Historically, reset was not implemented so only reset devices - * that are expecting it. - */ - if (!virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { - return; - } - if (dev->vhost_ops->vhost_reset_device) { dev->vhost_ops->vhost_reset_device(dev); } diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 427ee0ebfb..f9414f03de 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -1482,12 +1482,17 @@ static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { .hdr.flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_RESET_DEVICE, }; - msg.hdr.request = virtio_has_feature(dev->protocol_features, - VHOST_USER_PROTOCOL_F_RESET_DEVICE) - ? VHOST_USER_RESET_DEVICE - : VHOST_USER_RESET_OWNER; + /* + * Historically, reset was not implemented so only reset devices + * that are expecting it. + */ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RESET_DEVICE)) { + return -ENOSYS; + } return vhost_user_write(dev, &msg, NULL, 0); } From e6383293eb01928692047e617665a742cca87e23 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 3 Oct 2023 21:45:31 -0400 Subject: [PATCH 17/62] vhost-backend: remove vhost_kernel_reset_device() vhost_kernel_reset_device() invokes RESET_OWNER, which disassociates the owner process from the device. The device is left non-operational since SET_OWNER is only called once during startup in vhost_dev_init(). vhost_kernel_reset_device() is never called so this latent bug never appears. Get rid of vhost_kernel_reset_device() for now. If someone needs it in the future they'll need to implement it correctly. Signed-off-by: Stefan Hajnoczi Message-Id: <20231004014532.1228637-3-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Raphael Norwitz Reviewed-by: Hanna Czenczek --- hw/virtio/vhost-backend.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 8e581575c9..17f3fc6a08 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -197,11 +197,6 @@ static int vhost_kernel_set_owner(struct vhost_dev *dev) return vhost_kernel_call(dev, VHOST_SET_OWNER, NULL); } -static int vhost_kernel_reset_device(struct vhost_dev *dev) -{ - return vhost_kernel_call(dev, VHOST_RESET_OWNER, NULL); -} - static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) { assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); @@ -322,7 +317,6 @@ const VhostOps kernel_ops = { .vhost_get_features = vhost_kernel_get_features, .vhost_set_backend_cap = vhost_kernel_set_backend_cap, .vhost_set_owner = vhost_kernel_set_owner, - .vhost_reset_device = vhost_kernel_reset_device, .vhost_get_vq_index = vhost_kernel_get_vq_index, .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid, .vhost_vsock_set_running = vhost_kernel_vsock_set_running, From c0c4f147291f37765a5275aa24c3e1195468903b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 3 Oct 2023 21:45:32 -0400 Subject: [PATCH 18/62] virtio: call ->vhost_reset_device() during reset vhost-user-scsi has a VirtioDeviceClass->reset() function that calls ->vhost_reset_device(). The other vhost devices don't notify the vhost device upon reset. Stateful vhost devices may need to handle device reset in order to free resources or prevent stale device state from interfering after reset. Call ->vhost_device_reset() from virtio_reset() so that that vhost devices are notified of device reset. This patch affects behavior as follows: - vhost-kernel: No change in behavior since ->vhost_reset_device() is not implemented. - vhost-user: back-ends that negotiate VHOST_USER_PROTOCOL_F_RESET_DEVICE now receive a VHOST_USER_DEVICE_RESET message upon device reset. Otherwise there is no change in behavior. DPDK, SPDK, libvhost-user, and the vhost-user-backend crate do not negotiate VHOST_USER_PROTOCOL_F_RESET_DEVICE automatically. - vhost-vdpa: an extra SET_STATUS 0 call is made during device reset. Signed-off-by: Stefan Hajnoczi Message-Id: <20231004014532.1228637-4-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Raphael Norwitz Reviewed-by: Hanna Czenczek --- hw/scsi/vhost-user-scsi.c | 11 ----------- hw/virtio/vhost.c | 9 +++++++++ hw/virtio/virtio.c | 4 ++++ include/hw/virtio/vhost.h | 10 ++++++++++ meson.build | 1 + 5 files changed, 24 insertions(+), 11 deletions(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 78aef4765f..b7c6100f3e 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -62,16 +62,6 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) } } -static void vhost_user_scsi_reset(VirtIODevice *vdev) -{ - VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev); - struct vhost_dev *dev = &vsc->dev; - - if (dev->vhost_ops->vhost_reset_device) { - dev->vhost_ops->vhost_reset_device(dev); - } -} - static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) { } @@ -191,7 +181,6 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data) vdc->get_features = vhost_scsi_common_get_features; vdc->set_config = vhost_scsi_common_set_config; vdc->set_status = vhost_user_scsi_set_status; - vdc->reset = vhost_user_scsi_reset; fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path; } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9f37206ba0..92a6933f66 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -2154,3 +2154,12 @@ int vhost_net_set_backend(struct vhost_dev *hdev, return -ENOSYS; } + +int vhost_reset_device(struct vhost_dev *hdev) +{ + if (hdev->vhost_ops->vhost_reset_device) { + return hdev->vhost_ops->vhost_reset_device(hdev); + } + + return -ENOSYS; +} diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 6facd64fbc..fb24bc927b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2136,6 +2136,10 @@ void virtio_reset(void *opaque) vdev->device_endian = virtio_default_endian(); } + if (vdev->vhost_started) { + vhost_reset_device(k->get_vhost(vdev)); + } + if (k->reset) { k->reset(vdev); } diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index c7e5467693..00e0a669b8 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -339,4 +339,14 @@ int vhost_dev_set_inflight(struct vhost_dev *dev, int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, struct vhost_inflight *inflight); bool vhost_dev_has_iommu(struct vhost_dev *dev); + +#ifdef CONFIG_VHOST +int vhost_reset_device(struct vhost_dev *hdev); +#else +static inline int vhost_reset_device(struct vhost_dev *hdev) +{ + return -ENOSYS; +} +#endif /* CONFIG_VHOST */ + #endif diff --git a/meson.build b/meson.build index e0d1f84b26..cbee764817 100644 --- a/meson.build +++ b/meson.build @@ -2134,6 +2134,7 @@ config_host_data.set('CONFIG_TPM', have_tpm) config_host_data.set('CONFIG_TSAN', get_option('tsan')) config_host_data.set('CONFIG_USB_LIBUSB', libusb.found()) config_host_data.set('CONFIG_VDE', vde.found()) +config_host_data.set('CONFIG_VHOST', have_vhost) config_host_data.set('CONFIG_VHOST_NET', have_vhost_net) config_host_data.set('CONFIG_VHOST_NET_USER', have_vhost_net_user) config_host_data.set('CONFIG_VHOST_NET_VDPA', have_vhost_net_vdpa) From bd7a6d88ec900a9a2c5b3ef09f941558908d3cda Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Wed, 4 Oct 2023 11:23:55 +0200 Subject: [PATCH 19/62] hw/i386/acpi-build: Remove build-time assertion on PIIX/ICH9 reset registers being identical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6103451aeb74 ("hw/i386: Build-time assertion on pc/q35 reset register being identical.") introduced a build-time check where the addresses of the reset registers are expected to be equal. Back then rev3 of the FADT was used which required the reset register to be populated and there was common code. In commit 3a3fcc75f92a ("pc: acpi: force FADT rev1 for 440fx based machine types") the FADT was downgraded to rev1 for PIIX where the reset register isn't available. Thus, there is no need for the assertion any longer, so remove it. Signed-off-by: Bernhard Beschow Reviewed-by: Ani Sinha Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20231004092355.12929-1-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 3f2b27cf75..b0e1f074f1 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -56,7 +56,6 @@ /* Supported chipsets: */ #include "hw/southbridge/ich9.h" -#include "hw/southbridge/piix.h" #include "hw/acpi/pcihp.h" #include "hw/i386/fw_cfg.h" #include "hw/i386/pc.h" @@ -242,10 +241,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->pcihp_io_len = object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); - /* The above need not be conditional on machine type because the reset port - * happens to be the same on PIIX (pc) and ICH9 (q35). */ - QEMU_BUILD_BUG_ON(ICH9_RST_CNT_IOPORT != PIIX_RCR_IOPORT); - /* Fill in optional s3/s4 related properties */ o = object_property_get_qobject(obj, ACPI_PM_PROP_S3_DISABLED, NULL); if (o) { From 74d7ea50627a60257a92e77402780897075654fd Mon Sep 17 00:00:00 2001 From: Damien Zammit Date: Sun, 26 Feb 2023 01:58:10 +0000 Subject: [PATCH 20/62] timer/i8254: Fix one shot PIT mode Currently, the one-shot (mode 1) PIT expires far too quickly, due to the output being set under the wrong logic. This change fixes the one-shot PIT mode to behave similarly to mode 0. TESTED: using the one-shot PIT mode to calibrate a local apic timer. Signed-off-by: Damien Zammit Message-Id: <20230226015755.52624-1-damien@zamaudio.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/timer/i8254_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c index e4093e2904..b25da448c8 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c @@ -52,10 +52,8 @@ int pit_get_out(PITChannelState *s, int64_t current_time) switch (s->mode) { default: case 0: - out = (d >= s->count); - break; case 1: - out = (d < s->count); + out = (d >= s->count); break; case 2: if ((d % s->count) == 0 && d != 0) { From 9b50fd02900c11e6e50c7913e5772031749b3e8d Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino Date: Fri, 6 Oct 2023 17:39:01 -0300 Subject: [PATCH 21/62] hw/display: fix memleak from virtio_add_resource When the given uuid is already present in the hash table, virtio_add_resource() does not add the passed VirtioSharedObject. In this case, free it in the callers to avoid leaking memory. This fixed the following `make check` error, when built with --enable-sanitizers: 4/166 qemu:unit / test-virtio-dmabuf ERROR 1.51s exit status 1 ==7716==ERROR: LeakSanitizer: detected memory leaks Direct leak of 320 byte(s) in 20 object(s) allocated from: #0 0x7f6fc16e3808 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cc:144 #1 0x7f6fc1503e98 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x57e98) #2 0x564d63cafb6b in test_add_invalid_resource ../tests/unit/test-virtio-dmabuf.c:100 #3 0x7f6fc152659d (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x7a59d) SUMMARY: AddressSanitizer: 320 byte(s) leaked in 20 allocation(s). The changes at virtio_add_resource() itself are not strictly necessary for the memleak fix, but they make it more obvious that, on an error return, the passed object is not added to the hash. Signed-off-by: Matheus Tavares Bernardino Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Albert Esteve Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com>
--- hw/display/virtio-dmabuf.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/display/virtio-dmabuf.c b/hw/display/virtio-dmabuf.c index 4a8e430f3d..3dba4577ca 100644 --- a/hw/display/virtio-dmabuf.c +++ b/hw/display/virtio-dmabuf.c @@ -29,7 +29,7 @@ static int uuid_equal_func(const void *lhv, const void *rhv) static bool virtio_add_resource(QemuUUID *uuid, VirtioSharedObject *value) { - bool result = false; + bool result = true; g_mutex_lock(&lock); if (resource_uuids == NULL) { @@ -39,7 +39,9 @@ static bool virtio_add_resource(QemuUUID *uuid, VirtioSharedObject *value) g_free); } if (g_hash_table_lookup(resource_uuids, uuid) == NULL) { - result = g_hash_table_insert(resource_uuids, uuid, value); + g_hash_table_insert(resource_uuids, uuid, value); + } else { + result = false; } g_mutex_unlock(&lock); @@ -57,6 +59,9 @@ bool virtio_add_dmabuf(QemuUUID *uuid, int udmabuf_fd) vso->type = TYPE_DMABUF; vso->value = GINT_TO_POINTER(udmabuf_fd); result = virtio_add_resource(uuid, vso); + if (!result) { + g_free(vso); + } return result; } @@ -72,6 +77,9 @@ bool virtio_add_vhost_device(QemuUUID *uuid, struct vhost_dev *dev) vso->type = TYPE_VHOST_DEV; vso->value = dev; result = virtio_add_resource(uuid, vso); + if (!result) { + g_free(vso); + } return result; } From 9c91051119f8c493a5802c4f5347516679e55552 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:09 +0200 Subject: [PATCH 22/62] hw/i386/pc: Merge two if statements into one By being the only entity assigning a non-NULL value to "rtc_irq", the first if statement determines whether the second if statement is executed. So merge the two statements into one. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index bb3854d1d0..7e6c4dc526 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1199,7 +1199,6 @@ void pc_basic_device_init(struct PCMachineState *pcms, DeviceState *hpet = NULL; int pit_isa_irq = 0; qemu_irq pit_alt_irq = NULL; - qemu_irq rtc_irq = NULL; ISADevice *pit = NULL; MemoryRegion *ioport80_io = g_new(MemoryRegion, 1); MemoryRegion *ioportF0_io = g_new(MemoryRegion, 1); @@ -1219,6 +1218,8 @@ void pc_basic_device_init(struct PCMachineState *pcms, */ if (pcms->hpet_enabled && (!kvm_irqchip_in_kernel() || kvm_has_pit_state2())) { + qemu_irq rtc_irq; + hpet = qdev_try_new(TYPE_HPET); if (!hpet) { error_report("couldn't create HPET device"); @@ -1243,9 +1244,6 @@ void pc_basic_device_init(struct PCMachineState *pcms, pit_isa_irq = -1; pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); - } - - if (rtc_irq) { qdev_connect_gpio_out(DEVICE(rtc_state), 0, rtc_irq); } else { uint32_t irq = object_property_get_uint(OBJECT(rtc_state), @@ -1253,6 +1251,7 @@ void pc_basic_device_init(struct PCMachineState *pcms, &error_fatal); isa_connect_gpio_out(rtc_state, 0, irq); } + object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(rtc_state), "date"); From fe9a7350c2900c9609e7a8ce1e042e3458a245e2 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:10 +0200 Subject: [PATCH 23/62] hw/i386/pc_piix: Allow for setting properties before realizing PIIX3 south bridge The next patches will need to take advantage of it. Signed-off-by: Bernhard Beschow Reviewed-by: Peter Maydell Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-3-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index e36a3262b2..6d2f5509e6 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -264,7 +264,8 @@ static void pc_init1(MachineState *machine, PIIX3State *piix3; PCIDevice *pci_dev; - pci_dev = pci_create_simple_multifunction(pci_bus, -1, TYPE_PIIX3_DEVICE); + pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); + pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); if (xen_enabled()) { pci_device_set_intx_routing_notifier( From 8b6cf5128ee510729895e00a669fa7ce7457c949 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:11 +0200 Subject: [PATCH 24/62] hw/i386/pc_piix: Assign PIIX3's ISA interrupts before its realize() Unlike its PIIX4 counterpart, TYPE_PIIX3_DEVICE doesn't instantiate a PIC itself. Instead, it relies on the board to do so. This means that the board needs to wire the ISA IRQs to the PIIX3 device model. As long as the board assigns the ISA IRQs after PIIX3's realize(), internal devices can't be wired in pci_piix3_realize() since the qemu_irqs are still NULL. Fix that by assigning the ISA interrupts before realize(). This will allow for embedding child devices into the host device as already done for PIIX4. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6d2f5509e6..a003923788 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -265,6 +265,8 @@ static void pc_init1(MachineState *machine, PCIDevice *pci_dev; pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); + piix3 = PIIX3_PCI_DEVICE(pci_dev); + piix3->pic = x86ms->gsi; pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); if (xen_enabled()) { @@ -281,8 +283,6 @@ static void pc_init1(MachineState *machine, XEN_IOAPIC_NUM_PIRQS); } - piix3 = PIIX3_PCI_DEVICE(pci_dev); - piix3->pic = x86ms->gsi; piix3_devfn = piix3->dev.devfn; isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0")); rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev), From 32f29b26ff150dba6d1c455b826e44447b9ead45 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:12 +0200 Subject: [PATCH 25/62] hw/isa/piix3: Resolve redundant PIIX_NUM_PIC_IRQS PIIX_NUM_PIC_IRQS is assumed to be the same as ISA_NUM_IRQS, otherwise inconsistencies can occur. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-5-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix3.c | 8 ++++---- include/hw/southbridge/piix.h | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 117024e450..7240c91440 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -48,7 +48,7 @@ static void piix3_set_irq_level_internal(PIIX3State *piix3, int pirq, int level) uint64_t mask; pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq]; - if (pic_irq >= PIIX_NUM_PIC_IRQS) { + if (pic_irq >= ISA_NUM_IRQS) { return; } @@ -62,7 +62,7 @@ static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level) int pic_irq; pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq]; - if (pic_irq >= PIIX_NUM_PIC_IRQS) { + if (pic_irq >= ISA_NUM_IRQS) { return; } @@ -83,7 +83,7 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin) int irq = piix3->dev.config[PIIX_PIRQCA + pin]; PCIINTxRoute route; - if (irq < PIIX_NUM_PIC_IRQS) { + if (irq < ISA_NUM_IRQS) { route.mode = PCI_INTX_ENABLED; route.irq = irq; } else { @@ -115,7 +115,7 @@ static void piix3_write_config(PCIDevice *dev, pci_bus_fire_intx_routing_notifier(pci_get_bus(&piix3->dev)); piix3_update_irq_levels(piix3); - for (pic_irq = 0; pic_irq < PIIX_NUM_PIC_IRQS; pic_irq++) { + for (pic_irq = 0; pic_irq < ISA_NUM_IRQS; pic_irq++) { piix3_set_irq_pic(piix3, pic_irq); } } diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 278171752f..2317bb7974 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -27,7 +27,6 @@ */ #define PIIX_RCR_IOPORT 0xcf9 -#define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */ #define PIIX_NUM_PIRQS 4ULL /* PIRQ[A-D] */ struct PIIXState { @@ -39,10 +38,10 @@ struct PIIXState { * So one PIC level is tracked by PIIX_NUM_PIRQS bits. * * PIRQ is mapped to PIC pins, we track it by - * PIIX_NUM_PIRQS * PIIX_NUM_PIC_IRQS = 64 bits with + * PIIX_NUM_PIRQS * ISA_NUM_IRQS = 64 bits with * pic_irq * PIIX_NUM_PIRQS + pirq */ -#if PIIX_NUM_PIC_IRQS * PIIX_NUM_PIRQS > 64 +#if ISA_NUM_IRQS * PIIX_NUM_PIRQS > 64 #error "unable to encode pic state in 64bit in pic_levels." #endif uint64_t pic_levels; From 001cb25f3fb84768db4c1fb0ffd77779e0676745 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:13 +0200 Subject: [PATCH 26/62] hw/i386/pc_piix: Wire PIIX3's ISA interrupts by new "isa-irqs" property Avoid assigning the private member of struct PIIX3State from outside which goes against best QOM practices. Instead, implement best QOM practice by adding an "isa-irqs" array property to TYPE_PIIX3_DEVICE and assign it in board code, i.e. from outside. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-6-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 7 ++++++- hw/isa/piix3.c | 2 ++ include/hw/southbridge/piix.h | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a003923788..4dc7298c15 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -263,10 +263,15 @@ static void pc_init1(MachineState *machine, if (pcmc->pci_enabled) { PIIX3State *piix3; PCIDevice *pci_dev; + DeviceState *dev; + size_t i; pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); piix3 = PIIX3_PCI_DEVICE(pci_dev); - piix3->pic = x86ms->gsi; + dev = DEVICE(pci_dev); + for (i = 0; i < ISA_NUM_IRQS; i++) { + qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]); + } pci_realize_and_unref(pci_dev, pci_bus, &error_fatal); if (xen_enabled()) { diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 7240c91440..c17547a2c0 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -312,6 +312,8 @@ static void pci_piix3_init(Object *obj) { PIIX3State *d = PIIX3_PCI_DEVICE(obj); + qdev_init_gpio_out_named(DEVICE(obj), d->pic, "isa-irqs", ISA_NUM_IRQS); + object_initialize_child(obj, "rtc", &d->rtc, TYPE_MC146818_RTC); } diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 2317bb7974..bb898c6c88 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -46,7 +46,7 @@ struct PIIXState { #endif uint64_t pic_levels; - qemu_irq *pic; + qemu_irq pic[ISA_NUM_IRQS]; /* This member isn't used. Just for save/load compatibility */ int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; From b9a8b8d29f438b6a695b12bd2d3e0567cbae5dc7 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:14 +0200 Subject: [PATCH 27/62] hw/i386/pc_piix: Remove redundant "piix3" variable The variable is never used by its declared type. Eliminate it. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-7-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 4dc7298c15..cd6c00c0b3 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -261,13 +261,11 @@ static void pc_init1(MachineState *machine, gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); if (pcmc->pci_enabled) { - PIIX3State *piix3; PCIDevice *pci_dev; DeviceState *dev; size_t i; pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); - piix3 = PIIX3_PCI_DEVICE(pci_dev); dev = DEVICE(pci_dev); for (i = 0; i < ISA_NUM_IRQS; i++) { qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]); @@ -288,8 +286,8 @@ static void pc_init1(MachineState *machine, XEN_IOAPIC_NUM_PIRQS); } - piix3_devfn = piix3->dev.devfn; - isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0")); + piix3_devfn = pci_dev->devfn; + isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0")); rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev), "rtc")); } else { From 40f70623875b4ae46e04f57cfa7c80b6af917e1b Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:15 +0200 Subject: [PATCH 28/62] hw/isa/piix3: Rename "pic" attribute to "isa_irqs_in" TYPE_PIIX3_DEVICE doesn't instantiate a PIC since it relies on the board to do so. The "pic" attribute, however, suggests that there is one. Rename the attribute to reflect that it represents ISA interrupt lines. Use the same naming convention as in the VIA south bridges as well as in TYPE_I82378. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-8-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/piix3.c | 5 +++-- include/hw/southbridge/piix.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index c17547a2c0..616f5418fa 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -36,7 +36,7 @@ static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq) { - qemu_set_irq(piix3->pic[pic_irq], + qemu_set_irq(piix3->isa_irqs_in[pic_irq], !!(piix3->pic_levels & (((1ULL << PIIX_NUM_PIRQS) - 1) << (pic_irq * PIIX_NUM_PIRQS)))); @@ -312,7 +312,8 @@ static void pci_piix3_init(Object *obj) { PIIX3State *d = PIIX3_PCI_DEVICE(obj); - qdev_init_gpio_out_named(DEVICE(obj), d->pic, "isa-irqs", ISA_NUM_IRQS); + qdev_init_gpio_out_named(DEVICE(obj), d->isa_irqs_in, "isa-irqs", + ISA_NUM_IRQS); object_initialize_child(obj, "rtc", &d->rtc, TYPE_MC146818_RTC); } diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index bb898c6c88..b07ff6bb26 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -46,7 +46,7 @@ struct PIIXState { #endif uint64_t pic_levels; - qemu_irq pic[ISA_NUM_IRQS]; + qemu_irq isa_irqs_in[ISA_NUM_IRQS]; /* This member isn't used. Just for save/load compatibility */ int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; From 295385127e83a923e166f8b4fe1e543ee4540018 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:16 +0200 Subject: [PATCH 29/62] hw/i386/pc_q35: Wire ICH9 LPC function's interrupts before its realize() When the board assigns the ISA IRQs after the device's realize(), internal devices such as the RTC can't be wired in ich9_lpc_realize() since the qemu_irqs are still NULL. Fix that by assigning the ISA interrupts before realize(). This change is necessary for PIIX consolidation because PIIX4 wires the RTC interrupts in its realize() method, so PIIX3 needs to do so as well. Since the PC and Q35 boards share RTC code, and since PIIX3 needs the change, ICH9 needs to be adapted as well. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-9-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 14 +++++++------- hw/isa/lpc_ich9.c | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index a7386f2ca2..597943ff1b 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -242,11 +242,18 @@ static void pc_q35_init(MachineState *machine) host_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0")); pcms->bus = host_bus; + /* irq lines */ + gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); + /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), TYPE_ICH9_LPC_DEVICE); qdev_prop_set_bit(DEVICE(lpc), "smm-enabled", x86_machine_is_smm_enabled(x86ms)); + lpc_dev = DEVICE(lpc); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]); + } pci_realize_and_unref(lpc, host_bus, &error_fatal); rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(lpc), "rtc")); @@ -273,13 +280,6 @@ static void pc_q35_init(MachineState *machine) "true", true); } - /* irq lines */ - gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); - - lpc_dev = DEVICE(lpc); - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]); - } isa_bus = ISA_BUS(qdev_get_child_bus(lpc_dev, "isa.0")); if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 3f59980aa0..3fcefc5a8a 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -675,6 +675,9 @@ static void ich9_lpc_initfn(Object *obj) object_initialize_child(obj, "rtc", &lpc->rtc, TYPE_MC146818_RTC); + qdev_init_gpio_out_named(DEVICE(lpc), lpc->gsi, ICH9_GPIO_GSI, + IOAPIC_NUM_PINS); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_SCI_INT, &lpc->sci_gsi, OBJ_PROP_FLAG_READ); object_property_add_uint8_ptr(OBJECT(lpc), ACPI_PM_PROP_ACPI_ENABLE_CMD, @@ -691,7 +694,6 @@ static void ich9_lpc_initfn(Object *obj) static void ich9_lpc_realize(PCIDevice *d, Error **errp) { ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); - DeviceState *dev = DEVICE(d); PCIBus *pci_bus = pci_get_bus(d); ISABus *isa_bus; @@ -734,8 +736,6 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) ICH9_RST_CNT_IOPORT, &lpc->rst_cnt_mem, 1); - qdev_init_gpio_out_named(dev, lpc->gsi, ICH9_GPIO_GSI, IOAPIC_NUM_PINS); - isa_bus_register_input_irqs(isa_bus, lpc->gsi); i8257_dma_init(isa_bus, 0); From 64127940aeb674cb5d9d8d0ea4ca20591bf2b010 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:17 +0200 Subject: [PATCH 30/62] hw/isa/piix3: Wire PIC IRQs to ISA bus in host device Thie PIIX3 south bridge implements both the PIC and the ISA bus, so wiring the interrupts there makes the device model more self-contained. Furthermore, this allows the ISA interrupts to be wired to internal child devices in pci_piix3_realize() which will be performed in subsequent patches. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-10-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 2 +- hw/isa/piix3.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index cd6c00c0b3..5988656279 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -293,6 +293,7 @@ static void pc_init1(MachineState *machine, } else { isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); + isa_bus_register_input_irqs(isa_bus, x86ms->gsi); rtc_state = isa_new(TYPE_MC146818_RTC); qdev_prop_set_int32(DEVICE(rtc_state), "base_year", 2000); @@ -301,7 +302,6 @@ static void pc_init1(MachineState *machine, i8257_dma_init(isa_bus, 0); pcms->hpet_enabled = false; } - isa_bus_register_input_irqs(isa_bus, x86ms->gsi); if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { pc_i8259_create(isa_bus, gsi_state->i8259_irq); diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 616f5418fa..3e7c42fa68 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -278,6 +278,8 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) memory_region_add_subregion_overlap(pci_address_space_io(dev), PIIX_RCR_IOPORT, &d->rcr_mem, 1); + isa_bus_register_input_irqs(isa_bus, d->isa_irqs_in); + i8257_dma_init(isa_bus, 0); /* RTC */ From 56b1f50e3c101bfe5f52bac73de0e88438de11bd Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:18 +0200 Subject: [PATCH 31/62] hw/i386/pc: Wire RTC ISA IRQs in south bridges Makes the south bridges a bit more self-contained and aligns PIIX3 more with PIIX4. The latter is needed for consolidating the PIIX south bridges. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-11-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 7 ++----- hw/isa/lpc_ich9.c | 3 +++ hw/isa/piix3.c | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7e6c4dc526..355e1b7cf6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1244,12 +1244,9 @@ void pc_basic_device_init(struct PCMachineState *pcms, pit_isa_irq = -1; pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); + + /* overwrite connection created by south bridge */ qdev_connect_gpio_out(DEVICE(rtc_state), 0, rtc_irq); - } else { - uint32_t irq = object_property_get_uint(OBJECT(rtc_state), - "irq", - &error_fatal); - isa_connect_gpio_out(rtc_state, 0, irq); } object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(rtc_state), diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 3fcefc5a8a..23eba64f22 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -696,6 +696,7 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) ICH9LPCState *lpc = ICH9_LPC_DEVICE(d); PCIBus *pci_bus = pci_get_bus(d); ISABus *isa_bus; + uint32_t irq; if ((lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT)) && !(lpc->smi_host_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT))) { @@ -745,6 +746,8 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) if (!qdev_realize(DEVICE(&lpc->rtc), BUS(isa_bus), errp)) { return; } + irq = object_property_get_uint(OBJECT(&lpc->rtc), "irq", &error_fatal); + isa_connect_gpio_out(ISA_DEVICE(&lpc->rtc), 0, irq); pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 3e7c42fa68..11d72ca2bb 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -266,6 +266,7 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) { PIIX3State *d = PIIX3_PCI_DEVICE(dev); ISABus *isa_bus; + uint32_t irq; isa_bus = isa_bus_new(DEVICE(d), pci_address_space(dev), pci_address_space_io(dev), errp); @@ -287,6 +288,8 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) if (!qdev_realize(DEVICE(&d->rtc), BUS(isa_bus), errp)) { return; } + irq = object_property_get_uint(OBJECT(&d->rtc), "irq", &error_fatal); + isa_connect_gpio_out(ISA_DEVICE(&d->rtc), 0, irq); } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) From e47e5a5b79ffd6b3ca72ea383e3c756b68402735 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:19 +0200 Subject: [PATCH 32/62] hw/isa/piix3: Create IDE controller in host device The IDE controller is an integral part of PIIX3 (function 1). So create it as part of the south bridge. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-12-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/i386/Kconfig | 1 - hw/i386/pc_piix.c | 13 ++++++------- hw/isa/Kconfig | 1 + hw/isa/piix3.c | 9 +++++++++ include/hw/southbridge/piix.h | 2 ++ 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 9051083c1e..ade817f1b6 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -73,7 +73,6 @@ config I440FX select PC_ACPI select PCI_I440FX select PIIX3 - select IDE_PIIX select DIMM select SMBIOS select FW_CFG_DMA diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 5988656279..c98a997482 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -43,7 +43,6 @@ #include "net/net.h" #include "hw/ide/isa.h" #include "hw/ide/pci.h" -#include "hw/ide/piix.h" #include "hw/irq.h" #include "sysemu/kvm.h" #include "hw/i386/kvm/clock.h" @@ -290,6 +289,10 @@ static void pc_init1(MachineState *machine, isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0")); rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev), "rtc")); + dev = DEVICE(object_resolve_path_component(OBJECT(pci_dev), "ide")); + pci_ide_create_devs(PCI_DEVICE(dev)); + idebus[0] = qdev_get_child_bus(dev, "ide.0"); + idebus[1] = qdev_get_child_bus(dev, "ide.1"); } else { isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); @@ -301,6 +304,8 @@ static void pc_init1(MachineState *machine, i8257_dma_init(isa_bus, 0); pcms->hpet_enabled = false; + idebus[0] = NULL; + idebus[1] = NULL; } if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) { @@ -329,12 +334,6 @@ static void pc_init1(MachineState *machine, pc_nic_init(pcmc, isa_bus, pci_bus); if (pcmc->pci_enabled) { - PCIDevice *dev; - - dev = pci_create_simple(pci_bus, piix3_devfn + 1, TYPE_PIIX3_IDE); - pci_ide_create_devs(dev); - idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0"); - idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1"); pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); } #ifdef CONFIG_IDE_ISA diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index c10cbc5fc1..28345edbb3 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -34,6 +34,7 @@ config PC87312 config PIIX3 bool select I8257 + select IDE_PIIX select ISA_BUS select MC146818RTC diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 11d72ca2bb..3f1dabade0 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -29,6 +29,7 @@ #include "hw/southbridge/piix.h" #include "hw/irq.h" #include "hw/qdev-properties.h" +#include "hw/ide/piix.h" #include "hw/isa/isa.h" #include "sysemu/runstate.h" #include "migration/vmstate.h" @@ -265,6 +266,7 @@ static const MemoryRegionOps rcr_ops = { static void pci_piix3_realize(PCIDevice *dev, Error **errp) { PIIX3State *d = PIIX3_PCI_DEVICE(dev); + PCIBus *pci_bus = pci_get_bus(dev); ISABus *isa_bus; uint32_t irq; @@ -290,6 +292,12 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) } irq = object_property_get_uint(OBJECT(&d->rtc), "irq", &error_fatal); isa_connect_gpio_out(ISA_DEVICE(&d->rtc), 0, irq); + + /* IDE */ + qdev_prop_set_int32(DEVICE(&d->ide), "addr", dev->devfn + 1); + if (!qdev_realize(DEVICE(&d->ide), BUS(pci_bus), errp)) { + return; + } } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) @@ -321,6 +329,7 @@ static void pci_piix3_init(Object *obj) ISA_NUM_IRQS); object_initialize_child(obj, "rtc", &d->rtc, TYPE_MC146818_RTC); + object_initialize_child(obj, "ide", &d->ide, TYPE_PIIX3_IDE); } static void pci_piix3_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index b07ff6bb26..1daeff397c 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -13,6 +13,7 @@ #define HW_SOUTHBRIDGE_PIIX_H #include "hw/pci/pci_device.h" +#include "hw/ide/pci.h" #include "hw/rtc/mc146818rtc.h" /* PIRQRC[A:D]: PIRQx Route Control Registers */ @@ -52,6 +53,7 @@ struct PIIXState { int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; MC146818RtcState rtc; + PCIIDEState ide; /* Reset Control Register contents */ uint8_t rcr; From 6fe4464c05f45e726fcfa4a7927f4ed27444a0ca Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:20 +0200 Subject: [PATCH 33/62] hw/isa/piix3: Create USB controller in host device The USB controller is an integral part of PIIX3 (function 2). So create it as part of the south bridge. Note that the USB function is optional in QEMU. This is why it gets object_initialize_child()'ed in realize rather than in instance_init. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-13-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 7 ++----- hw/isa/Kconfig | 1 + hw/isa/piix3.c | 16 ++++++++++++++++ include/hw/southbridge/piix.h | 4 ++++ 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index c98a997482..8dcd6851d0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -51,7 +51,6 @@ #include "exec/memory.h" #include "hw/acpi/acpi.h" #include "hw/acpi/piix4.h" -#include "hw/usb/hcd-uhci.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -265,6 +264,8 @@ static void pc_init1(MachineState *machine, size_t i; pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); + object_property_set_bool(OBJECT(pci_dev), "has-usb", + machine_usb(machine), &error_abort); dev = DEVICE(pci_dev); for (i = 0; i < ISA_NUM_IRQS; i++) { qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]); @@ -359,10 +360,6 @@ static void pc_init1(MachineState *machine, } #endif - if (pcmc->pci_enabled && machine_usb(machine)) { - pci_create_simple(pci_bus, piix3_devfn + 2, TYPE_PIIX3_USB_UHCI); - } - if (pcmc->pci_enabled && x86_machine_is_acpi_enabled(X86_MACHINE(pcms))) { PCIDevice *piix4_pm; diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 28345edbb3..1076df69ca 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -37,6 +37,7 @@ config PIIX3 select IDE_PIIX select ISA_BUS select MC146818RTC + select USB_UHCI config PIIX4 bool diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 3f1dabade0..aebc0da23b 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -298,6 +298,16 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) if (!qdev_realize(DEVICE(&d->ide), BUS(pci_bus), errp)) { return; } + + /* USB */ + if (d->has_usb) { + object_initialize_child(OBJECT(dev), "uhci", &d->uhci, + TYPE_PIIX3_USB_UHCI); + qdev_prop_set_int32(DEVICE(&d->uhci), "addr", dev->devfn + 2); + if (!qdev_realize(DEVICE(&d->uhci), BUS(pci_bus), errp)) { + return; + } + } } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) @@ -332,6 +342,11 @@ static void pci_piix3_init(Object *obj) object_initialize_child(obj, "ide", &d->ide, TYPE_PIIX3_IDE); } +static Property pci_piix3_props[] = { + DEFINE_PROP_BOOL("has-usb", PIIX3State, has_usb, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void pci_piix3_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -352,6 +367,7 @@ static void pci_piix3_class_init(ObjectClass *klass, void *data) * pc_piix.c's pc_init1() */ dc->user_creatable = false; + device_class_set_props(dc, pci_piix3_props); adevc->build_dev_aml = build_pci_isa_aml; } diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 1daeff397c..5cd866f1f2 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -15,6 +15,7 @@ #include "hw/pci/pci_device.h" #include "hw/ide/pci.h" #include "hw/rtc/mc146818rtc.h" +#include "hw/usb/hcd-uhci.h" /* PIRQRC[A:D]: PIRQx Route Control Registers */ #define PIIX_PIRQCA 0x60 @@ -54,12 +55,15 @@ struct PIIXState { MC146818RtcState rtc; PCIIDEState ide; + UHCIState uhci; /* Reset Control Register contents */ uint8_t rcr; /* IO memory region for Reset Control Register (PIIX_RCR_IOPORT) */ MemoryRegion rcr_mem; + + bool has_usb; }; typedef struct PIIXState PIIX3State; From 0a15cf0801815a359af211361fba309a2cc5c1e8 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:21 +0200 Subject: [PATCH 34/62] hw/isa/piix3: Create power management controller in host device The power management controller is an integral part of PIIX3 (function 3). So create it as part of the south bridge. Note that the ACPI function is optional in QEMU. This is why it gets object_initialize_child()'ed in realize rather than in instance_init. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-14-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 24 +++++++++++------------- hw/isa/Kconfig | 1 + hw/isa/piix3.c | 15 +++++++++++++++ include/hw/southbridge/piix.h | 6 ++++++ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8dcd6851d0..70cffcfe4f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -50,7 +50,6 @@ #include "hw/i2c/smbus_eeprom.h" #include "exec/memory.h" #include "hw/acpi/acpi.h" -#include "hw/acpi/piix4.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -115,7 +114,7 @@ static void pc_init1(MachineState *machine, MemoryRegion *system_io = get_system_io(); PCIBus *pci_bus = NULL; ISABus *isa_bus; - int piix3_devfn = -1; + Object *piix4_pm = NULL; qemu_irq smi_irq; GSIState *gsi_state; BusState *idebus[MAX_IDE_BUS]; @@ -266,6 +265,13 @@ static void pc_init1(MachineState *machine, pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); object_property_set_bool(OBJECT(pci_dev), "has-usb", machine_usb(machine), &error_abort); + object_property_set_bool(OBJECT(pci_dev), "has-acpi", + x86_machine_is_acpi_enabled(x86ms), + &error_abort); + qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100); + object_property_set_bool(OBJECT(pci_dev), "smm-enabled", + x86_machine_is_smm_enabled(x86ms), + &error_abort); dev = DEVICE(pci_dev); for (i = 0; i < ISA_NUM_IRQS; i++) { qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]); @@ -286,10 +292,10 @@ static void pc_init1(MachineState *machine, XEN_IOAPIC_NUM_PIRQS); } - piix3_devfn = pci_dev->devfn; isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0")); rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev), "rtc")); + piix4_pm = object_resolve_path_component(OBJECT(pci_dev), "pm"); dev = DEVICE(object_resolve_path_component(OBJECT(pci_dev), "ide")); pci_ide_create_devs(PCI_DEVICE(dev)); idebus[0] = qdev_get_child_bus(dev, "ide.0"); @@ -360,17 +366,9 @@ static void pc_init1(MachineState *machine, } #endif - if (pcmc->pci_enabled && x86_machine_is_acpi_enabled(X86_MACHINE(pcms))) { - PCIDevice *piix4_pm; - + if (piix4_pm) { smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0); - piix4_pm = pci_new(piix3_devfn + 3, TYPE_PIIX4_PM); - qdev_prop_set_uint32(DEVICE(piix4_pm), "smb_io_base", 0xb100); - qdev_prop_set_bit(DEVICE(piix4_pm), "smm-enabled", - x86_machine_is_smm_enabled(x86ms)); - pci_realize_and_unref(piix4_pm, pci_bus, &error_fatal); - qdev_connect_gpio_out(DEVICE(piix4_pm), 0, x86ms->gsi[9]); qdev_connect_gpio_out_named(DEVICE(piix4_pm), "smi-irq", 0, smi_irq); pcms->smbus = I2C_BUS(qdev_get_child_bus(DEVICE(piix4_pm), "i2c")); /* TODO: Populate SPD eeprom data. */ @@ -382,7 +380,7 @@ static void pc_init1(MachineState *machine, object_property_allow_set_link, OBJ_PROP_LINK_STRONG); object_property_set_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, - OBJECT(piix4_pm), &error_abort); + piix4_pm, &error_abort); } if (machine->nvdimms_state->is_enabled) { diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 1076df69ca..17ddb25afc 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -33,6 +33,7 @@ config PC87312 config PIIX3 bool + select ACPI_PIIX4 select I8257 select IDE_PIIX select ISA_BUS diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index aebc0da23b..5b867df299 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -308,6 +308,18 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) return; } } + + /* Power Management */ + if (d->has_acpi) { + object_initialize_child(OBJECT(d), "pm", &d->pm, TYPE_PIIX4_PM); + qdev_prop_set_int32(DEVICE(&d->pm), "addr", dev->devfn + 3); + qdev_prop_set_uint32(DEVICE(&d->pm), "smb_io_base", d->smb_io_base); + qdev_prop_set_bit(DEVICE(&d->pm), "smm-enabled", d->smm_enabled); + if (!qdev_realize(DEVICE(&d->pm), BUS(pci_bus), errp)) { + return; + } + qdev_connect_gpio_out(DEVICE(&d->pm), 0, d->isa_irqs_in[9]); + } } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) @@ -343,7 +355,10 @@ static void pci_piix3_init(Object *obj) } static Property pci_piix3_props[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX3State, smb_io_base, 0), + DEFINE_PROP_BOOL("has-acpi", PIIX3State, has_acpi, true), DEFINE_PROP_BOOL("has-usb", PIIX3State, has_usb, true), + DEFINE_PROP_BOOL("smm-enabled", PIIX3State, smm_enabled, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 5cd866f1f2..c56ce49fd3 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -13,6 +13,7 @@ #define HW_SOUTHBRIDGE_PIIX_H #include "hw/pci/pci_device.h" +#include "hw/acpi/piix4.h" #include "hw/ide/pci.h" #include "hw/rtc/mc146818rtc.h" #include "hw/usb/hcd-uhci.h" @@ -56,6 +57,9 @@ struct PIIXState { MC146818RtcState rtc; PCIIDEState ide; UHCIState uhci; + PIIX4PMState pm; + + uint32_t smb_io_base; /* Reset Control Register contents */ uint8_t rcr; @@ -63,7 +67,9 @@ struct PIIXState { /* IO memory region for Reset Control Register (PIIX_RCR_IOPORT) */ MemoryRegion rcr_mem; + bool has_acpi; bool has_usb; + bool smm_enabled; }; typedef struct PIIXState PIIX3State; From 9769cfc3e419a704ff1d7feb4621da660903499a Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:22 +0200 Subject: [PATCH 35/62] hw/isa/piix3: Drop the "3" from PIIX base class name TYPE_PIIX3_PCI_DEVICE was the former base class of the Xen and non-Xen variants of the PIIX3 ISA device models. It will become the base class for the PIIX3 and PIIX4 device models, so drop the "3" from the type names. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-15-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix3.c | 56 +++++++++++++++++------------------ include/hw/southbridge/piix.h | 6 ++-- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 5b867df299..c7e59249b6 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -35,7 +35,7 @@ #include "migration/vmstate.h" #include "hw/acpi/acpi_aml_interface.h" -static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq) +static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq) { qemu_set_irq(piix3->isa_irqs_in[pic_irq], !!(piix3->pic_levels & @@ -43,7 +43,7 @@ static void piix3_set_irq_pic(PIIX3State *piix3, int pic_irq) (pic_irq * PIIX_NUM_PIRQS)))); } -static void piix3_set_irq_level_internal(PIIX3State *piix3, int pirq, int level) +static void piix3_set_irq_level_internal(PIIXState *piix3, int pirq, int level) { int pic_irq; uint64_t mask; @@ -58,7 +58,7 @@ static void piix3_set_irq_level_internal(PIIX3State *piix3, int pirq, int level) piix3->pic_levels |= mask * !!level; } -static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level) +static void piix3_set_irq_level(PIIXState *piix3, int pirq, int level) { int pic_irq; @@ -74,13 +74,13 @@ static void piix3_set_irq_level(PIIX3State *piix3, int pirq, int level) static void piix3_set_irq(void *opaque, int pirq, int level) { - PIIX3State *piix3 = opaque; + PIIXState *piix3 = opaque; piix3_set_irq_level(piix3, pirq, level); } static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin) { - PIIX3State *piix3 = opaque; + PIIXState *piix3 = opaque; int irq = piix3->dev.config[PIIX_PIRQCA + pin]; PCIINTxRoute route; @@ -95,7 +95,7 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin) } /* irq routing is changed. so rebuild bitmap */ -static void piix3_update_irq_levels(PIIX3State *piix3) +static void piix3_update_irq_levels(PIIXState *piix3) { PCIBus *bus = pci_get_bus(&piix3->dev); int pirq; @@ -111,7 +111,7 @@ static void piix3_write_config(PCIDevice *dev, { pci_default_write_config(dev, address, val, len); if (ranges_overlap(address, len, PIIX_PIRQCA, 4)) { - PIIX3State *piix3 = PIIX3_PCI_DEVICE(dev); + PIIXState *piix3 = PIIX_PCI_DEVICE(dev); int pic_irq; pci_bus_fire_intx_routing_notifier(pci_get_bus(&piix3->dev)); @@ -124,7 +124,7 @@ static void piix3_write_config(PCIDevice *dev, static void piix3_reset(DeviceState *dev) { - PIIX3State *d = PIIX3_PCI_DEVICE(dev); + PIIXState *d = PIIX_PCI_DEVICE(dev); uint8_t *pci_conf = d->dev.config; pci_conf[0x04] = 0x07; /* master, memory and I/O */ @@ -165,7 +165,7 @@ static void piix3_reset(DeviceState *dev) static int piix3_post_load(void *opaque, int version_id) { - PIIX3State *piix3 = opaque; + PIIXState *piix3 = opaque; int pirq; /* @@ -188,7 +188,7 @@ static int piix3_post_load(void *opaque, int version_id) static int piix3_pre_save(void *opaque) { int i; - PIIX3State *piix3 = opaque; + PIIXState *piix3 = opaque; for (i = 0; i < ARRAY_SIZE(piix3->pci_irq_levels_vmstate); i++) { piix3->pci_irq_levels_vmstate[i] = @@ -200,7 +200,7 @@ static int piix3_pre_save(void *opaque) static bool piix3_rcr_needed(void *opaque) { - PIIX3State *piix3 = opaque; + PIIXState *piix3 = opaque; return (piix3->rcr != 0); } @@ -211,7 +211,7 @@ static const VMStateDescription vmstate_piix3_rcr = { .minimum_version_id = 1, .needed = piix3_rcr_needed, .fields = (VMStateField[]) { - VMSTATE_UINT8(rcr, PIIX3State), + VMSTATE_UINT8(rcr, PIIXState), VMSTATE_END_OF_LIST() } }; @@ -223,8 +223,8 @@ static const VMStateDescription vmstate_piix3 = { .post_load = piix3_post_load, .pre_save = piix3_pre_save, .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(dev, PIIX3State), - VMSTATE_INT32_ARRAY_V(pci_irq_levels_vmstate, PIIX3State, + VMSTATE_PCI_DEVICE(dev, PIIXState), + VMSTATE_INT32_ARRAY_V(pci_irq_levels_vmstate, PIIXState, PIIX_NUM_PIRQS, 3), VMSTATE_END_OF_LIST() }, @@ -237,7 +237,7 @@ static const VMStateDescription vmstate_piix3 = { static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) { - PIIX3State *d = opaque; + PIIXState *d = opaque; if (val & 4) { qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); @@ -248,7 +248,7 @@ static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) static uint64_t rcr_read(void *opaque, hwaddr addr, unsigned len) { - PIIX3State *d = opaque; + PIIXState *d = opaque; return d->rcr; } @@ -265,7 +265,7 @@ static const MemoryRegionOps rcr_ops = { static void pci_piix3_realize(PCIDevice *dev, Error **errp) { - PIIX3State *d = PIIX3_PCI_DEVICE(dev); + PIIXState *d = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); ISABus *isa_bus; uint32_t irq; @@ -345,7 +345,7 @@ static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) static void pci_piix3_init(Object *obj) { - PIIX3State *d = PIIX3_PCI_DEVICE(obj); + PIIXState *d = PIIX_PCI_DEVICE(obj); qdev_init_gpio_out_named(DEVICE(obj), d->isa_irqs_in, "isa-irqs", ISA_NUM_IRQS); @@ -355,10 +355,10 @@ static void pci_piix3_init(Object *obj) } static Property pci_piix3_props[] = { - DEFINE_PROP_UINT32("smb_io_base", PIIX3State, smb_io_base, 0), - DEFINE_PROP_BOOL("has-acpi", PIIX3State, has_acpi, true), - DEFINE_PROP_BOOL("has-usb", PIIX3State, has_usb, true), - DEFINE_PROP_BOOL("smm-enabled", PIIX3State, smm_enabled, false), + DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0), + DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true), + DEFINE_PROP_BOOL("has-usb", PIIXState, has_usb, true), + DEFINE_PROP_BOOL("smm-enabled", PIIXState, smm_enabled, false), DEFINE_PROP_END_OF_LIST(), }; @@ -386,10 +386,10 @@ static void pci_piix3_class_init(ObjectClass *klass, void *data) adevc->build_dev_aml = build_pci_isa_aml; } -static const TypeInfo piix3_pci_type_info = { - .name = TYPE_PIIX3_PCI_DEVICE, +static const TypeInfo piix_pci_type_info = { + .name = TYPE_PIIX_PCI_DEVICE, .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PIIX3State), + .instance_size = sizeof(PIIXState), .instance_init = pci_piix3_init, .abstract = true, .class_init = pci_piix3_class_init, @@ -403,7 +403,7 @@ static const TypeInfo piix3_pci_type_info = { static void piix3_realize(PCIDevice *dev, Error **errp) { ERRP_GUARD(); - PIIX3State *piix3 = PIIX3_PCI_DEVICE(dev); + PIIXState *piix3 = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); pci_piix3_realize(dev, errp); @@ -424,13 +424,13 @@ static void piix3_class_init(ObjectClass *klass, void *data) static const TypeInfo piix3_info = { .name = TYPE_PIIX3_DEVICE, - .parent = TYPE_PIIX3_PCI_DEVICE, + .parent = TYPE_PIIX_PCI_DEVICE, .class_init = piix3_class_init, }; static void piix3_register_types(void) { - type_register_static(&piix3_pci_type_info); + type_register_static(&piix_pci_type_info); type_register_static(&piix3_info); } diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index c56ce49fd3..0b257e1582 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -71,11 +71,9 @@ struct PIIXState { bool has_usb; bool smm_enabled; }; -typedef struct PIIXState PIIX3State; -#define TYPE_PIIX3_PCI_DEVICE "pci-piix3" -DECLARE_INSTANCE_CHECKER(PIIX3State, PIIX3_PCI_DEVICE, - TYPE_PIIX3_PCI_DEVICE) +#define TYPE_PIIX_PCI_DEVICE "pci-piix" +OBJECT_DECLARE_SIMPLE_TYPE(PIIXState, PIIX_PCI_DEVICE) #define TYPE_PIIX3_DEVICE "PIIX3" #define TYPE_PIIX4_PCI_DEVICE "piix4-isa" From 06f6efefe06c7958471cbf84c56ccc87624577d8 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:23 +0200 Subject: [PATCH 36/62] hw/isa/piix4: Remove unused inbound ISA interrupt lines The Malta board, which is the only user of PIIX4, doesn't connect to the exported interrupt lines. PIIX3 doesn't expose such interrupt lines either, so remove them for PIIX4 for simplicity and consistency. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-16-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix4.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index e0b149f8eb..3c3c7a094c 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -148,12 +148,6 @@ static void piix4_request_i8259_irq(void *opaque, int irq, int level) qemu_set_irq(s->cpu_intr, level); } -static void piix4_set_i8259_irq(void *opaque, int irq, int level) -{ - PIIX4State *s = opaque; - qemu_set_irq(s->isa[irq], level); -} - static void piix4_rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned int len) { @@ -197,8 +191,6 @@ static void piix4_realize(PCIDevice *dev, Error **errp) return; } - qdev_init_gpio_in_named(DEVICE(dev), piix4_set_i8259_irq, - "isa", ISA_NUM_IRQS); qdev_init_gpio_out_named(DEVICE(dev), &s->cpu_intr, "intr", 1); From de710ac40867a409d5e3fbd83940ce9e1f6922d7 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:24 +0200 Subject: [PATCH 37/62] hw/isa/piix4: Rename "isa" attribute to "isa_irqs_in" Rename the "isa" attribute to align it with PIIX3 for consolidation. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-17-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/piix4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index 3c3c7a094c..9c8b6c98ab 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -45,7 +45,7 @@ struct PIIX4State { PCIDevice dev; qemu_irq cpu_intr; - qemu_irq *isa; + qemu_irq *isa_irqs_in; MC146818RtcState rtc; PCIIDEState ide; @@ -75,7 +75,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int level) pic_level |= pci_bus_get_irq_level(bus, i); } } - qemu_set_irq(s->isa[pic_irq], pic_level); + qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level); } } @@ -201,10 +201,10 @@ static void piix4_realize(PCIDevice *dev, Error **errp) /* initialize i8259 pic */ i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1); - s->isa = i8259_init(isa_bus, *i8259_out_irq); + s->isa_irqs_in = i8259_init(isa_bus, *i8259_out_irq); /* initialize ISA irqs */ - isa_bus_register_input_irqs(isa_bus, s->isa); + isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); /* initialize pit */ i8254_pit_init(isa_bus, 0x40, 0, NULL); @@ -236,7 +236,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp) if (!qdev_realize(DEVICE(&s->pm), BUS(pci_bus), errp)) { return; } - qdev_connect_gpio_out(DEVICE(&s->pm), 0, s->isa[9]); + qdev_connect_gpio_out(DEVICE(&s->pm), 0, s->isa_irqs_in[9]); pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS); } From 80ec6f5b574e5368007d8076fd32316765bf4ffb Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:25 +0200 Subject: [PATCH 38/62] hw/isa/piix4: Rename reset control operations to match PIIX3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both implementations are the same and will be shared upon merging. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-18-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix4.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index 9c8b6c98ab..eb456622c5 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -148,8 +148,8 @@ static void piix4_request_i8259_irq(void *opaque, int irq, int level) qemu_set_irq(s->cpu_intr, level); } -static void piix4_rcr_write(void *opaque, hwaddr addr, uint64_t val, - unsigned int len) +static void rcr_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int len) { PIIX4State *s = opaque; @@ -161,16 +161,16 @@ static void piix4_rcr_write(void *opaque, hwaddr addr, uint64_t val, s->rcr = val & 2; /* keep System Reset type only */ } -static uint64_t piix4_rcr_read(void *opaque, hwaddr addr, unsigned int len) +static uint64_t rcr_read(void *opaque, hwaddr addr, unsigned int len) { PIIX4State *s = opaque; return s->rcr; } -static const MemoryRegionOps piix4_rcr_ops = { - .read = piix4_rcr_read, - .write = piix4_rcr_write, +static const MemoryRegionOps rcr_ops = { + .read = rcr_read, + .write = rcr_write, .endianness = DEVICE_LITTLE_ENDIAN, .impl = { .min_access_size = 1, @@ -194,7 +194,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp) qdev_init_gpio_out_named(DEVICE(dev), &s->cpu_intr, "intr", 1); - memory_region_init_io(&s->rcr_mem, OBJECT(dev), &piix4_rcr_ops, s, + memory_region_init_io(&s->rcr_mem, OBJECT(dev), &rcr_ops, s, "reset-control", 1); memory_region_add_subregion_overlap(pci_address_space_io(dev), PIIX_RCR_IOPORT, &s->rcr_mem, 1); From 74bdcfb4b2ba7958efe8e66e06757579d61ebbb3 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:26 +0200 Subject: [PATCH 39/62] hw/isa/piix4: Reuse struct PIIXState from PIIX3 PIIX4 has its own, private PIIX4State structure. PIIX3 has almost the same structure, provided in a public header. So reuse it and add a cpu_intr attribute to it which is only used by PIIX4. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-19-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/piix4.c | 26 +++++++++++--------------- include/hw/southbridge/piix.h | 1 + 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c index eb456622c5..71899aaa69 100644 --- a/hw/isa/piix4.c +++ b/hw/isa/piix4.c @@ -42,21 +42,9 @@ #include "sysemu/runstate.h" #include "qom/object.h" -struct PIIX4State { - PCIDevice dev; - qemu_irq cpu_intr; - qemu_irq *isa_irqs_in; +typedef struct PIIXState PIIX4State; - MC146818RtcState rtc; - PCIIDEState ide; - UHCIState uhci; - PIIX4PMState pm; - /* Reset Control Register */ - MemoryRegion rcr_mem; - uint8_t rcr; -}; - -OBJECT_DECLARE_SIMPLE_TYPE(PIIX4State, PIIX4_PCI_DEVICE) +DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE) static void piix4_set_irq(void *opaque, int irq_num, int level) { @@ -184,6 +172,8 @@ static void piix4_realize(PCIDevice *dev, Error **errp) PCIBus *pci_bus = pci_get_bus(dev); ISABus *isa_bus; qemu_irq *i8259_out_irq; + qemu_irq *i8259; + size_t i; isa_bus = isa_bus_new(DEVICE(dev), pci_address_space(dev), pci_address_space_io(dev), errp); @@ -201,7 +191,13 @@ static void piix4_realize(PCIDevice *dev, Error **errp) /* initialize i8259 pic */ i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1); - s->isa_irqs_in = i8259_init(isa_bus, *i8259_out_irq); + i8259 = i8259_init(isa_bus, *i8259_out_irq); + + for (i = 0; i < ISA_NUM_IRQS; i++) { + s->isa_irqs_in[i] = i8259[i]; + } + + g_free(i8259); /* initialize ISA irqs */ isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 0b257e1582..dd5f7b31c0 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -49,6 +49,7 @@ struct PIIXState { #endif uint64_t pic_levels; + qemu_irq cpu_intr; qemu_irq isa_irqs_in[ISA_NUM_IRQS]; /* This member isn't used. Just for save/load compatibility */ From 1697189977032c5bce6e63036277ad4d8ea2f44b Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:27 +0200 Subject: [PATCH 40/62] hw/isa/piix3: Merge hw/isa/piix4.c Now that the PIIX3 and PIIX4 device models are sufficiently prepared, their implementations can be merged into one file for further consolidation. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-20-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 6 +- hw/i386/Kconfig | 2 +- hw/isa/Kconfig | 11 +- hw/isa/meson.build | 3 +- hw/isa/{piix3.c => piix.c} | 190 +++++++++++++++++++++++- hw/isa/piix4.c | 290 ------------------------------------- hw/mips/Kconfig | 2 +- 7 files changed, 195 insertions(+), 309 deletions(-) rename hw/isa/{piix3.c => piix.c} (71%) delete mode 100644 hw/isa/piix4.c diff --git a/MAINTAINERS b/MAINTAINERS index 9bd4fe378d..d9fe5aa367 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1302,7 +1302,7 @@ Malta M: Philippe Mathieu-Daudé R: Aurelien Jarno S: Odd Fixes -F: hw/isa/piix4.c +F: hw/isa/piix.c F: hw/acpi/piix4.c F: hw/mips/malta.c F: hw/pci-host/gt64120.c @@ -1724,7 +1724,7 @@ F: hw/pci-host/pam.c F: include/hw/pci-host/i440fx.h F: include/hw/pci-host/q35.h F: include/hw/pci-host/pam.h -F: hw/isa/piix3.c +F: hw/isa/piix.c F: hw/isa/lpc_ich9.c F: hw/i2c/smbus_ich9.c F: hw/acpi/piix4.c @@ -2478,7 +2478,7 @@ PIIX4 South Bridge (i82371AB) M: Hervé Poussineau M: Philippe Mathieu-Daudé S: Maintained -F: hw/isa/piix4.c +F: hw/isa/piix.c F: include/hw/southbridge/piix.h Firmware configuration (fw_cfg) diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index ade817f1b6..94772c726b 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -72,7 +72,7 @@ config I440FX select PC_PCI select PC_ACPI select PCI_I440FX - select PIIX3 + select PIIX select DIMM select SMBIOS select FW_CFG_DMA diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 17ddb25afc..040a18c070 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -31,16 +31,7 @@ config PC87312 select FDC_ISA select IDE_ISA -config PIIX3 - bool - select ACPI_PIIX4 - select I8257 - select IDE_PIIX - select ISA_BUS - select MC146818RTC - select USB_UHCI - -config PIIX4 +config PIIX bool # For historical reasons, SuperIO devices are created in the board # for PIIX4. diff --git a/hw/isa/meson.build b/hw/isa/meson.build index b855e81276..2ab99ce0c6 100644 --- a/hw/isa/meson.build +++ b/hw/isa/meson.build @@ -3,8 +3,7 @@ system_ss.add(when: 'CONFIG_I82378', if_true: files('i82378.c')) system_ss.add(when: 'CONFIG_ISA_BUS', if_true: files('isa-bus.c')) system_ss.add(when: 'CONFIG_ISA_SUPERIO', if_true: files('isa-superio.c')) system_ss.add(when: 'CONFIG_PC87312', if_true: files('pc87312.c')) -system_ss.add(when: 'CONFIG_PIIX3', if_true: files('piix3.c')) -system_ss.add(when: 'CONFIG_PIIX4', if_true: files('piix4.c')) +system_ss.add(when: 'CONFIG_PIIX', if_true: files('piix.c')) system_ss.add(when: 'CONFIG_SMC37C669', if_true: files('smc37c669-superio.c')) system_ss.add(when: 'CONFIG_VT82C686', if_true: files('vt82c686.c')) diff --git a/hw/isa/piix3.c b/hw/isa/piix.c similarity index 71% rename from hw/isa/piix3.c rename to hw/isa/piix.c index c7e59249b6..f6da334c6f 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix.c @@ -2,6 +2,7 @@ * QEMU PIIX PCI ISA Bridge Emulation * * Copyright (c) 2006 Fabrice Bellard + * Copyright (c) 2018 Hervé Poussineau * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -27,14 +28,20 @@ #include "qapi/error.h" #include "hw/dma/i8257.h" #include "hw/southbridge/piix.h" +#include "hw/timer/i8254.h" #include "hw/irq.h" #include "hw/qdev-properties.h" #include "hw/ide/piix.h" +#include "hw/intc/i8259.h" #include "hw/isa/isa.h" #include "sysemu/runstate.h" #include "migration/vmstate.h" #include "hw/acpi/acpi_aml_interface.h" +typedef struct PIIXState PIIX4State; + +DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE) + static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq) { qemu_set_irq(piix3->isa_irqs_in[pic_irq], @@ -78,6 +85,33 @@ static void piix3_set_irq(void *opaque, int pirq, int level) piix3_set_irq_level(piix3, pirq, level); } +static void piix4_set_irq(void *opaque, int irq_num, int level) +{ + int i, pic_irq, pic_level; + PIIX4State *s = opaque; + PCIBus *bus = pci_get_bus(&s->dev); + + /* now we change the pic irq level according to the piix irq mappings */ + /* XXX: optimize */ + pic_irq = s->dev.config[PIIX_PIRQCA + irq_num]; + if (pic_irq < ISA_NUM_IRQS) { + /* The pic level is the logical OR of all the PCI irqs mapped to it. */ + pic_level = 0; + for (i = 0; i < PIIX_NUM_PIRQS; i++) { + if (pic_irq == s->dev.config[PIIX_PIRQCA + i]) { + pic_level |= pci_bus_get_irq_level(bus, i); + } + } + qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level); + } +} + +static void piix4_request_i8259_irq(void *opaque, int irq, int level) +{ + PIIX4State *s = opaque; + qemu_set_irq(s->cpu_intr, level); +} + static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin) { PIIXState *piix3 = opaque; @@ -122,9 +156,8 @@ static void piix3_write_config(PCIDevice *dev, } } -static void piix3_reset(DeviceState *dev) +static void piix_reset(PIIXState *d) { - PIIXState *d = PIIX_PCI_DEVICE(dev); uint8_t *pci_conf = d->dev.config; pci_conf[0x04] = 0x07; /* master, memory and I/O */ @@ -163,6 +196,13 @@ static void piix3_reset(DeviceState *dev) d->rcr = 0; } +static void piix3_reset(DeviceState *dev) +{ + PIIXState *d = PIIX_PCI_DEVICE(dev); + + piix_reset(d); +} + static int piix3_post_load(void *opaque, int version_id) { PIIXState *piix3 = opaque; @@ -185,6 +225,17 @@ static int piix3_post_load(void *opaque, int version_id) return 0; } +static int piix4_post_load(void *opaque, int version_id) +{ + PIIX4State *s = opaque; + + if (version_id == 2) { + s->rcr = 0; + } + + return 0; +} + static int piix3_pre_save(void *opaque) { int i; @@ -234,6 +285,17 @@ static const VMStateDescription vmstate_piix3 = { } }; +static const VMStateDescription vmstate_piix4 = { + .name = "PIIX4", + .version_id = 3, + .minimum_version_id = 2, + .post_load = piix4_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(dev, PIIX4State), + VMSTATE_UINT8_V(rcr, PIIX4State, 3), + VMSTATE_END_OF_LIST() + } +}; static void rcr_write(void *opaque, hwaddr addr, uint64_t val, unsigned len) { @@ -428,10 +490,134 @@ static const TypeInfo piix3_info = { .class_init = piix3_class_init, }; +static void piix4_realize(PCIDevice *dev, Error **errp) +{ + PIIX4State *s = PIIX4_PCI_DEVICE(dev); + PCIBus *pci_bus = pci_get_bus(dev); + ISABus *isa_bus; + qemu_irq *i8259_out_irq; + qemu_irq *i8259; + size_t i; + + isa_bus = isa_bus_new(DEVICE(dev), pci_address_space(dev), + pci_address_space_io(dev), errp); + if (!isa_bus) { + return; + } + + qdev_init_gpio_out_named(DEVICE(dev), &s->cpu_intr, + "intr", 1); + + memory_region_init_io(&s->rcr_mem, OBJECT(dev), &rcr_ops, s, + "reset-control", 1); + memory_region_add_subregion_overlap(pci_address_space_io(dev), + PIIX_RCR_IOPORT, &s->rcr_mem, 1); + + /* initialize i8259 pic */ + i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1); + i8259 = i8259_init(isa_bus, *i8259_out_irq); + + for (i = 0; i < ISA_NUM_IRQS; i++) { + s->isa_irqs_in[i] = i8259[i]; + } + + g_free(i8259); + + /* initialize ISA irqs */ + isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); + + /* initialize pit */ + i8254_pit_init(isa_bus, 0x40, 0, NULL); + + /* DMA */ + i8257_dma_init(isa_bus, 0); + + /* RTC */ + qdev_prop_set_int32(DEVICE(&s->rtc), "base_year", 2000); + if (!qdev_realize(DEVICE(&s->rtc), BUS(isa_bus), errp)) { + return; + } + s->rtc.irq = isa_get_irq(ISA_DEVICE(&s->rtc), s->rtc.isairq); + + /* IDE */ + qdev_prop_set_int32(DEVICE(&s->ide), "addr", dev->devfn + 1); + if (!qdev_realize(DEVICE(&s->ide), BUS(pci_bus), errp)) { + return; + } + + /* USB */ + qdev_prop_set_int32(DEVICE(&s->uhci), "addr", dev->devfn + 2); + if (!qdev_realize(DEVICE(&s->uhci), BUS(pci_bus), errp)) { + return; + } + + /* ACPI controller */ + qdev_prop_set_int32(DEVICE(&s->pm), "addr", dev->devfn + 3); + if (!qdev_realize(DEVICE(&s->pm), BUS(pci_bus), errp)) { + return; + } + qdev_connect_gpio_out(DEVICE(&s->pm), 0, s->isa_irqs_in[9]); + + pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS); +} + +static void piix4_isa_reset(DeviceState *dev) +{ + PIIX4State *s = PIIX4_PCI_DEVICE(dev); + + piix_reset(s); +} + +static void piix4_init(Object *obj) +{ + PIIX4State *s = PIIX4_PCI_DEVICE(obj); + + object_initialize_child(obj, "rtc", &s->rtc, TYPE_MC146818_RTC); + object_initialize_child(obj, "ide", &s->ide, TYPE_PIIX4_IDE); + object_initialize_child(obj, "uhci", &s->uhci, TYPE_PIIX4_USB_UHCI); + + object_initialize_child(obj, "pm", &s->pm, TYPE_PIIX4_PM); + qdev_prop_set_uint32(DEVICE(&s->pm), "smb_io_base", 0x1100); + qdev_prop_set_bit(DEVICE(&s->pm), "smm-enabled", 0); +} + +static void piix4_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = piix4_realize; + k->vendor_id = PCI_VENDOR_ID_INTEL; + k->device_id = PCI_DEVICE_ID_INTEL_82371AB_0; + k->class_id = PCI_CLASS_BRIDGE_ISA; + dc->reset = piix4_isa_reset; + dc->desc = "ISA bridge"; + dc->vmsd = &vmstate_piix4; + /* + * Reason: part of PIIX4 southbridge, needs to be wired up, + * e.g. by mips_malta_init() + */ + dc->user_creatable = false; + dc->hotpluggable = false; +} + +static const TypeInfo piix4_info = { + .name = TYPE_PIIX4_PCI_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PIIX4State), + .instance_init = piix4_init, + .class_init = piix4_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + static void piix3_register_types(void) { type_register_static(&piix_pci_type_info); type_register_static(&piix3_info); + type_register_static(&piix4_info); } type_init(piix3_register_types) diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c deleted file mode 100644 index 71899aaa69..0000000000 --- a/hw/isa/piix4.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * QEMU PIIX4 PCI Bridge Emulation - * - * Copyright (c) 2006 Fabrice Bellard - * Copyright (c) 2018 Hervé Poussineau - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include "hw/irq.h" -#include "hw/southbridge/piix.h" -#include "hw/pci/pci.h" -#include "hw/ide/piix.h" -#include "hw/isa/isa.h" -#include "hw/intc/i8259.h" -#include "hw/dma/i8257.h" -#include "hw/timer/i8254.h" -#include "hw/rtc/mc146818rtc.h" -#include "hw/ide/pci.h" -#include "hw/acpi/piix4.h" -#include "hw/usb/hcd-uhci.h" -#include "migration/vmstate.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" -#include "qom/object.h" - -typedef struct PIIXState PIIX4State; - -DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE) - -static void piix4_set_irq(void *opaque, int irq_num, int level) -{ - int i, pic_irq, pic_level; - PIIX4State *s = opaque; - PCIBus *bus = pci_get_bus(&s->dev); - - /* now we change the pic irq level according to the piix irq mappings */ - /* XXX: optimize */ - pic_irq = s->dev.config[PIIX_PIRQCA + irq_num]; - if (pic_irq < ISA_NUM_IRQS) { - /* The pic level is the logical OR of all the PCI irqs mapped to it. */ - pic_level = 0; - for (i = 0; i < PIIX_NUM_PIRQS; i++) { - if (pic_irq == s->dev.config[PIIX_PIRQCA + i]) { - pic_level |= pci_bus_get_irq_level(bus, i); - } - } - qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level); - } -} - -static void piix4_isa_reset(DeviceState *dev) -{ - PIIX4State *d = PIIX4_PCI_DEVICE(dev); - uint8_t *pci_conf = d->dev.config; - - pci_conf[0x04] = 0x07; // master, memory and I/O - pci_conf[0x05] = 0x00; - pci_conf[0x06] = 0x00; - pci_conf[0x07] = 0x02; // PCI_status_devsel_medium - pci_conf[0x4c] = 0x4d; - pci_conf[0x4e] = 0x03; - pci_conf[0x4f] = 0x00; - pci_conf[0x60] = 0x80; - pci_conf[0x61] = 0x80; - pci_conf[0x62] = 0x80; - pci_conf[0x63] = 0x80; - pci_conf[0x69] = 0x02; - pci_conf[0x70] = 0x80; - pci_conf[0x76] = 0x0c; - pci_conf[0x77] = 0x0c; - pci_conf[0x78] = 0x02; - pci_conf[0x79] = 0x00; - pci_conf[0x80] = 0x00; - pci_conf[0x82] = 0x00; - pci_conf[0xa0] = 0x08; - pci_conf[0xa2] = 0x00; - pci_conf[0xa3] = 0x00; - pci_conf[0xa4] = 0x00; - pci_conf[0xa5] = 0x00; - pci_conf[0xa6] = 0x00; - pci_conf[0xa7] = 0x00; - pci_conf[0xa8] = 0x0f; - pci_conf[0xaa] = 0x00; - pci_conf[0xab] = 0x00; - pci_conf[0xac] = 0x00; - pci_conf[0xae] = 0x00; - - d->rcr = 0; -} - -static int piix4_post_load(void *opaque, int version_id) -{ - PIIX4State *s = opaque; - - if (version_id == 2) { - s->rcr = 0; - } - - return 0; -} - -static const VMStateDescription vmstate_piix4 = { - .name = "PIIX4", - .version_id = 3, - .minimum_version_id = 2, - .post_load = piix4_post_load, - .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(dev, PIIX4State), - VMSTATE_UINT8_V(rcr, PIIX4State, 3), - VMSTATE_END_OF_LIST() - } -}; - -static void piix4_request_i8259_irq(void *opaque, int irq, int level) -{ - PIIX4State *s = opaque; - qemu_set_irq(s->cpu_intr, level); -} - -static void rcr_write(void *opaque, hwaddr addr, uint64_t val, - unsigned int len) -{ - PIIX4State *s = opaque; - - if (val & 4) { - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - return; - } - - s->rcr = val & 2; /* keep System Reset type only */ -} - -static uint64_t rcr_read(void *opaque, hwaddr addr, unsigned int len) -{ - PIIX4State *s = opaque; - - return s->rcr; -} - -static const MemoryRegionOps rcr_ops = { - .read = rcr_read, - .write = rcr_write, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl = { - .min_access_size = 1, - .max_access_size = 1, - }, -}; - -static void piix4_realize(PCIDevice *dev, Error **errp) -{ - PIIX4State *s = PIIX4_PCI_DEVICE(dev); - PCIBus *pci_bus = pci_get_bus(dev); - ISABus *isa_bus; - qemu_irq *i8259_out_irq; - qemu_irq *i8259; - size_t i; - - isa_bus = isa_bus_new(DEVICE(dev), pci_address_space(dev), - pci_address_space_io(dev), errp); - if (!isa_bus) { - return; - } - - qdev_init_gpio_out_named(DEVICE(dev), &s->cpu_intr, - "intr", 1); - - memory_region_init_io(&s->rcr_mem, OBJECT(dev), &rcr_ops, s, - "reset-control", 1); - memory_region_add_subregion_overlap(pci_address_space_io(dev), - PIIX_RCR_IOPORT, &s->rcr_mem, 1); - - /* initialize i8259 pic */ - i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1); - i8259 = i8259_init(isa_bus, *i8259_out_irq); - - for (i = 0; i < ISA_NUM_IRQS; i++) { - s->isa_irqs_in[i] = i8259[i]; - } - - g_free(i8259); - - /* initialize ISA irqs */ - isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); - - /* initialize pit */ - i8254_pit_init(isa_bus, 0x40, 0, NULL); - - /* DMA */ - i8257_dma_init(isa_bus, 0); - - /* RTC */ - qdev_prop_set_int32(DEVICE(&s->rtc), "base_year", 2000); - if (!qdev_realize(DEVICE(&s->rtc), BUS(isa_bus), errp)) { - return; - } - s->rtc.irq = isa_get_irq(ISA_DEVICE(&s->rtc), s->rtc.isairq); - - /* IDE */ - qdev_prop_set_int32(DEVICE(&s->ide), "addr", dev->devfn + 1); - if (!qdev_realize(DEVICE(&s->ide), BUS(pci_bus), errp)) { - return; - } - - /* USB */ - qdev_prop_set_int32(DEVICE(&s->uhci), "addr", dev->devfn + 2); - if (!qdev_realize(DEVICE(&s->uhci), BUS(pci_bus), errp)) { - return; - } - - /* ACPI controller */ - qdev_prop_set_int32(DEVICE(&s->pm), "addr", dev->devfn + 3); - if (!qdev_realize(DEVICE(&s->pm), BUS(pci_bus), errp)) { - return; - } - qdev_connect_gpio_out(DEVICE(&s->pm), 0, s->isa_irqs_in[9]); - - pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS); -} - -static void piix4_init(Object *obj) -{ - PIIX4State *s = PIIX4_PCI_DEVICE(obj); - - object_initialize_child(obj, "rtc", &s->rtc, TYPE_MC146818_RTC); - object_initialize_child(obj, "ide", &s->ide, TYPE_PIIX4_IDE); - object_initialize_child(obj, "uhci", &s->uhci, TYPE_PIIX4_USB_UHCI); - - object_initialize_child(obj, "pm", &s->pm, TYPE_PIIX4_PM); - qdev_prop_set_uint32(DEVICE(&s->pm), "smb_io_base", 0x1100); - qdev_prop_set_bit(DEVICE(&s->pm), "smm-enabled", 0); -} - -static void piix4_class_init(ObjectClass *klass, void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - - k->realize = piix4_realize; - k->vendor_id = PCI_VENDOR_ID_INTEL; - k->device_id = PCI_DEVICE_ID_INTEL_82371AB_0; - k->class_id = PCI_CLASS_BRIDGE_ISA; - dc->reset = piix4_isa_reset; - dc->desc = "ISA bridge"; - dc->vmsd = &vmstate_piix4; - /* - * Reason: part of PIIX4 southbridge, needs to be wired up, - * e.g. by mips_malta_init() - */ - dc->user_creatable = false; - dc->hotpluggable = false; -} - -static const TypeInfo piix4_info = { - .name = TYPE_PIIX4_PCI_DEVICE, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PIIX4State), - .instance_init = piix4_init, - .class_init = piix4_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { }, - }, -}; - -static void piix4_register_types(void) -{ - type_register_static(&piix4_info); -} - -type_init(piix4_register_types) diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig index da3a37e215..ac1eb06a51 100644 --- a/hw/mips/Kconfig +++ b/hw/mips/Kconfig @@ -2,7 +2,7 @@ config MALTA bool select GT64120 select ISA_SUPERIO - select PIIX4 + select PIIX config MIPSSIM bool From 2d7630f5c7dbe5ec1fc42082d135eb6e5f159ebd Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:28 +0200 Subject: [PATCH 41/62] hw/isa/piix: Allow for optional PIC creation in PIIX3 In the PC machine, the PIC is created in board code to allow it to be virtualized with various virtualization techniques. So explicitly disable its creation in the PC machine via a property which defaults to enabled. Once the PIIX implementations are consolidated this default will keep Malta working without further ado. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-21-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 2 ++ hw/isa/piix.c | 21 +++++++++++++++++++-- include/hw/southbridge/piix.h | 1 + 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 70cffcfe4f..fa39afd891 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -268,6 +268,8 @@ static void pc_init1(MachineState *machine, object_property_set_bool(OBJECT(pci_dev), "has-acpi", x86_machine_is_acpi_enabled(x86ms), &error_abort); + object_property_set_bool(OBJECT(pci_dev), "has-pic", false, + &error_abort); qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100); object_property_set_bool(OBJECT(pci_dev), "smm-enabled", x86_machine_is_smm_enabled(x86ms), diff --git a/hw/isa/piix.c b/hw/isa/piix.c index f6da334c6f..d6d9ac6473 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -106,7 +106,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int level) } } -static void piix4_request_i8259_irq(void *opaque, int irq, int level) +static void piix_request_i8259_irq(void *opaque, int irq, int level) { PIIX4State *s = opaque; qemu_set_irq(s->cpu_intr, level); @@ -343,6 +343,22 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) memory_region_add_subregion_overlap(pci_address_space_io(dev), PIIX_RCR_IOPORT, &d->rcr_mem, 1); + /* PIC */ + if (d->has_pic) { + qemu_irq *i8259_out_irq = qemu_allocate_irqs(piix_request_i8259_irq, d, + 1); + qemu_irq *i8259 = i8259_init(isa_bus, *i8259_out_irq); + size_t i; + + for (i = 0; i < ISA_NUM_IRQS; i++) { + d->isa_irqs_in[i] = i8259[i]; + } + + g_free(i8259); + + qdev_init_gpio_out_named(DEVICE(dev), &d->cpu_intr, "intr", 1); + } + isa_bus_register_input_irqs(isa_bus, d->isa_irqs_in); i8257_dma_init(isa_bus, 0); @@ -419,6 +435,7 @@ static void pci_piix3_init(Object *obj) static Property pci_piix3_props[] = { DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0), DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true), + DEFINE_PROP_BOOL("has-pic", PIIXState, has_pic, true), DEFINE_PROP_BOOL("has-usb", PIIXState, has_usb, true), DEFINE_PROP_BOOL("smm-enabled", PIIXState, smm_enabled, false), DEFINE_PROP_END_OF_LIST(), @@ -514,7 +531,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp) PIIX_RCR_IOPORT, &s->rcr_mem, 1); /* initialize i8259 pic */ - i8259_out_irq = qemu_allocate_irqs(piix4_request_i8259_irq, s, 1); + i8259_out_irq = qemu_allocate_irqs(piix_request_i8259_irq, s, 1); i8259 = i8259_init(isa_bus, *i8259_out_irq); for (i = 0; i < ISA_NUM_IRQS; i++) { diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index dd5f7b31c0..08491693b4 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -69,6 +69,7 @@ struct PIIXState { MemoryRegion rcr_mem; bool has_acpi; + bool has_pic; bool has_usb; bool smm_enabled; }; From ac4330359bee7a8cf3dab7f8639190dd17f1f4d1 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:29 +0200 Subject: [PATCH 42/62] hw/isa/piix: Allow for optional PIT creation in PIIX3 In the PC machine, the PIT is created in board code to allow it to be virtualized with various virtualization techniques. So explicitly disable its creation in the PC machine via a property which defaults to enabled. Once the PIIX implementations are consolidated this default will keep Malta working without further ado. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-22-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 2 ++ hw/isa/piix.c | 6 ++++++ include/hw/southbridge/piix.h | 1 + 3 files changed, 9 insertions(+) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index fa39afd891..e38942a3c3 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -270,6 +270,8 @@ static void pc_init1(MachineState *machine, &error_abort); object_property_set_bool(OBJECT(pci_dev), "has-pic", false, &error_abort); + object_property_set_bool(OBJECT(pci_dev), "has-pit", false, + &error_abort); qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100); object_property_set_bool(OBJECT(pci_dev), "smm-enabled", x86_machine_is_smm_enabled(x86ms), diff --git a/hw/isa/piix.c b/hw/isa/piix.c index d6d9ac6473..270b8eb1f7 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -361,6 +361,11 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) isa_bus_register_input_irqs(isa_bus, d->isa_irqs_in); + /* PIT */ + if (d->has_pit) { + i8254_pit_init(isa_bus, 0x40, 0, NULL); + } + i8257_dma_init(isa_bus, 0); /* RTC */ @@ -436,6 +441,7 @@ static Property pci_piix3_props[] = { DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0), DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true), DEFINE_PROP_BOOL("has-pic", PIIXState, has_pic, true), + DEFINE_PROP_BOOL("has-pit", PIIXState, has_pit, true), DEFINE_PROP_BOOL("has-usb", PIIXState, has_usb, true), DEFINE_PROP_BOOL("smm-enabled", PIIXState, smm_enabled, false), DEFINE_PROP_END_OF_LIST(), diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 08491693b4..86709ba2e4 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -70,6 +70,7 @@ struct PIIXState { bool has_acpi; bool has_pic; + bool has_pit; bool has_usb; bool smm_enabled; }; From f97479cad8447caa1d77c862a0890e6ff1a3acc6 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:30 +0200 Subject: [PATCH 43/62] hw/isa/piix: Harmonize names of reset control memory regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need for having different names here. Having the same name further allows code to be shared between PIIX3 and PIIX4. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-23-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index 270b8eb1f7..bd66fb7475 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -339,7 +339,7 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) } memory_region_init_io(&d->rcr_mem, OBJECT(dev), &rcr_ops, d, - "piix3-reset-control", 1); + "piix-reset-control", 1); memory_region_add_subregion_overlap(pci_address_space_io(dev), PIIX_RCR_IOPORT, &d->rcr_mem, 1); @@ -532,7 +532,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp) "intr", 1); memory_region_init_io(&s->rcr_mem, OBJECT(dev), &rcr_ops, s, - "reset-control", 1); + "piix-reset-control", 1); memory_region_add_subregion_overlap(pci_address_space_io(dev), PIIX_RCR_IOPORT, &s->rcr_mem, 1); From 7d6f26594bc1ea1f9e7d115051e63c3a71cf0b60 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:31 +0200 Subject: [PATCH 44/62] hw/isa/piix: Share PIIX3's base class with PIIX4 Having a common base class will allow for futher code sharing between PIIX3 and PIIX4. Moreover, it makes PIIX4 implement the acpi-dev-aml-interface. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-24-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 85 ++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 55 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index bd66fb7475..8f7d6c56a8 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -38,10 +38,6 @@ #include "migration/vmstate.h" #include "hw/acpi/acpi_aml_interface.h" -typedef struct PIIXState PIIX4State; - -DECLARE_INSTANCE_CHECKER(PIIX4State, PIIX4_PCI_DEVICE, TYPE_PIIX4_PCI_DEVICE) - static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq) { qemu_set_irq(piix3->isa_irqs_in[pic_irq], @@ -88,7 +84,7 @@ static void piix3_set_irq(void *opaque, int pirq, int level) static void piix4_set_irq(void *opaque, int irq_num, int level) { int i, pic_irq, pic_level; - PIIX4State *s = opaque; + PIIXState *s = opaque; PCIBus *bus = pci_get_bus(&s->dev); /* now we change the pic irq level according to the piix irq mappings */ @@ -108,7 +104,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int level) static void piix_request_i8259_irq(void *opaque, int irq, int level) { - PIIX4State *s = opaque; + PIIXState *s = opaque; qemu_set_irq(s->cpu_intr, level); } @@ -156,8 +152,9 @@ static void piix3_write_config(PCIDevice *dev, } } -static void piix_reset(PIIXState *d) +static void piix_reset(DeviceState *dev) { + PIIXState *d = PIIX_PCI_DEVICE(dev); uint8_t *pci_conf = d->dev.config; pci_conf[0x04] = 0x07; /* master, memory and I/O */ @@ -196,13 +193,6 @@ static void piix_reset(PIIXState *d) d->rcr = 0; } -static void piix3_reset(DeviceState *dev) -{ - PIIXState *d = PIIX_PCI_DEVICE(dev); - - piix_reset(d); -} - static int piix3_post_load(void *opaque, int version_id) { PIIXState *piix3 = opaque; @@ -227,7 +217,7 @@ static int piix3_post_load(void *opaque, int version_id) static int piix4_post_load(void *opaque, int version_id) { - PIIX4State *s = opaque; + PIIXState *s = opaque; if (version_id == 2) { s->rcr = 0; @@ -291,8 +281,8 @@ static const VMStateDescription vmstate_piix4 = { .minimum_version_id = 2, .post_load = piix4_post_load, .fields = (VMStateField[]) { - VMSTATE_PCI_DEVICE(dev, PIIX4State), - VMSTATE_UINT8_V(rcr, PIIX4State, 3), + VMSTATE_PCI_DEVICE(dev, PIIXState), + VMSTATE_UINT8_V(rcr, PIIXState, 3), VMSTATE_END_OF_LIST() } }; @@ -426,7 +416,7 @@ static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) qbus_build_aml(bus, scope); } -static void pci_piix3_init(Object *obj) +static void pci_piix_init(Object *obj) { PIIXState *d = PIIX_PCI_DEVICE(obj); @@ -434,7 +424,6 @@ static void pci_piix3_init(Object *obj) ISA_NUM_IRQS); object_initialize_child(obj, "rtc", &d->rtc, TYPE_MC146818_RTC); - object_initialize_child(obj, "ide", &d->ide, TYPE_PIIX3_IDE); } static Property pci_piix3_props[] = { @@ -447,27 +436,22 @@ static Property pci_piix3_props[] = { DEFINE_PROP_END_OF_LIST(), }; -static void pci_piix3_class_init(ObjectClass *klass, void *data) +static void pci_piix_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); - k->config_write = piix3_write_config; - dc->reset = piix3_reset; + dc->reset = piix_reset; dc->desc = "ISA bridge"; - dc->vmsd = &vmstate_piix3; dc->hotpluggable = false; k->vendor_id = PCI_VENDOR_ID_INTEL; - /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */ - k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0; k->class_id = PCI_CLASS_BRIDGE_ISA; /* - * Reason: part of PIIX3 southbridge, needs to be wired up by + * Reason: part of PIIX southbridge, needs to be wired up by e.g. * pc_piix.c's pc_init1() */ dc->user_creatable = false; - device_class_set_props(dc, pci_piix3_props); adevc->build_dev_aml = build_pci_isa_aml; } @@ -475,9 +459,9 @@ static const TypeInfo piix_pci_type_info = { .name = TYPE_PIIX_PCI_DEVICE, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(PIIXState), - .instance_init = pci_piix3_init, + .instance_init = pci_piix_init, .abstract = true, - .class_init = pci_piix3_class_init, + .class_init = pci_piix_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { TYPE_ACPI_DEV_AML_IF }, @@ -500,22 +484,36 @@ static void piix3_realize(PCIDevice *dev, Error **errp) pci_bus_set_route_irq_fn(pci_bus, piix3_route_intx_pin_to_irq); } +static void piix3_init(Object *obj) +{ + PIIXState *d = PIIX_PCI_DEVICE(obj); + + object_initialize_child(obj, "ide", &d->ide, TYPE_PIIX3_IDE); +} + static void piix3_class_init(ObjectClass *klass, void *data) { + DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + k->config_write = piix3_write_config; k->realize = piix3_realize; + /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */ + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0; + dc->vmsd = &vmstate_piix3; + device_class_set_props(dc, pci_piix3_props); } static const TypeInfo piix3_info = { .name = TYPE_PIIX3_DEVICE, .parent = TYPE_PIIX_PCI_DEVICE, + .instance_init = piix3_init, .class_init = piix3_class_init, }; static void piix4_realize(PCIDevice *dev, Error **errp) { - PIIX4State *s = PIIX4_PCI_DEVICE(dev); + PIIXState *s = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); ISABus *isa_bus; qemu_irq *i8259_out_irq; @@ -584,18 +582,10 @@ static void piix4_realize(PCIDevice *dev, Error **errp) pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS); } -static void piix4_isa_reset(DeviceState *dev) -{ - PIIX4State *s = PIIX4_PCI_DEVICE(dev); - - piix_reset(s); -} - static void piix4_init(Object *obj) { - PIIX4State *s = PIIX4_PCI_DEVICE(obj); + PIIXState *s = PIIX_PCI_DEVICE(obj); - object_initialize_child(obj, "rtc", &s->rtc, TYPE_MC146818_RTC); object_initialize_child(obj, "ide", &s->ide, TYPE_PIIX4_IDE); object_initialize_child(obj, "uhci", &s->uhci, TYPE_PIIX4_USB_UHCI); @@ -610,30 +600,15 @@ static void piix4_class_init(ObjectClass *klass, void *data) PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); k->realize = piix4_realize; - k->vendor_id = PCI_VENDOR_ID_INTEL; k->device_id = PCI_DEVICE_ID_INTEL_82371AB_0; - k->class_id = PCI_CLASS_BRIDGE_ISA; - dc->reset = piix4_isa_reset; - dc->desc = "ISA bridge"; dc->vmsd = &vmstate_piix4; - /* - * Reason: part of PIIX4 southbridge, needs to be wired up, - * e.g. by mips_malta_init() - */ - dc->user_creatable = false; - dc->hotpluggable = false; } static const TypeInfo piix4_info = { .name = TYPE_PIIX4_PCI_DEVICE, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PIIX4State), + .parent = TYPE_PIIX_PCI_DEVICE, .instance_init = piix4_init, .class_init = piix4_class_init, - .interfaces = (InterfaceInfo[]) { - { INTERFACE_CONVENTIONAL_PCI_DEVICE }, - { }, - }, }; static void piix3_register_types(void) From 2922dbc28c74a4b3976cb4bc020980030ccfef67 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:32 +0200 Subject: [PATCH 45/62] hw/isa/piix: Reuse PIIX3 base class' realize method in PIIX4 Resolves duplicate code. Also makes PIIX4 respect the PIIX3 properties which get added, too. This allows for using PIIX4 in the PC machine. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-25-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 80 ++++++------------------------------------------- hw/mips/malta.c | 5 ++-- 2 files changed, 12 insertions(+), 73 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index 8f7d6c56a8..2ab799b95e 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -315,7 +315,8 @@ static const MemoryRegionOps rcr_ops = { }, }; -static void pci_piix3_realize(PCIDevice *dev, Error **errp) +static void pci_piix_realize(PCIDevice *dev, const char *uhci_type, + Error **errp) { PIIXState *d = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); @@ -374,8 +375,7 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) /* USB */ if (d->has_usb) { - object_initialize_child(OBJECT(dev), "uhci", &d->uhci, - TYPE_PIIX3_USB_UHCI); + object_initialize_child(OBJECT(dev), "uhci", &d->uhci, uhci_type); qdev_prop_set_int32(DEVICE(&d->uhci), "addr", dev->devfn + 2); if (!qdev_realize(DEVICE(&d->uhci), BUS(pci_bus), errp)) { return; @@ -426,7 +426,7 @@ static void pci_piix_init(Object *obj) object_initialize_child(obj, "rtc", &d->rtc, TYPE_MC146818_RTC); } -static Property pci_piix3_props[] = { +static Property pci_piix_props[] = { DEFINE_PROP_UINT32("smb_io_base", PIIXState, smb_io_base, 0), DEFINE_PROP_BOOL("has-acpi", PIIXState, has_acpi, true), DEFINE_PROP_BOOL("has-pic", PIIXState, has_pic, true), @@ -452,6 +452,7 @@ static void pci_piix_class_init(ObjectClass *klass, void *data) * pc_piix.c's pc_init1() */ dc->user_creatable = false; + device_class_set_props(dc, pci_piix_props); adevc->build_dev_aml = build_pci_isa_aml; } @@ -475,7 +476,7 @@ static void piix3_realize(PCIDevice *dev, Error **errp) PIIXState *piix3 = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); - pci_piix3_realize(dev, errp); + pci_piix_realize(dev, TYPE_PIIX3_USB_UHCI, errp); if (*errp) { return; } @@ -501,7 +502,6 @@ static void piix3_class_init(ObjectClass *klass, void *data) /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */ k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0; dc->vmsd = &vmstate_piix3; - device_class_set_props(dc, pci_piix3_props); } static const TypeInfo piix3_info = { @@ -513,72 +513,15 @@ static const TypeInfo piix3_info = { static void piix4_realize(PCIDevice *dev, Error **errp) { + ERRP_GUARD(); PIIXState *s = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); - ISABus *isa_bus; - qemu_irq *i8259_out_irq; - qemu_irq *i8259; - size_t i; - isa_bus = isa_bus_new(DEVICE(dev), pci_address_space(dev), - pci_address_space_io(dev), errp); - if (!isa_bus) { + pci_piix_realize(dev, TYPE_PIIX4_USB_UHCI, errp); + if (*errp) { return; } - qdev_init_gpio_out_named(DEVICE(dev), &s->cpu_intr, - "intr", 1); - - memory_region_init_io(&s->rcr_mem, OBJECT(dev), &rcr_ops, s, - "piix-reset-control", 1); - memory_region_add_subregion_overlap(pci_address_space_io(dev), - PIIX_RCR_IOPORT, &s->rcr_mem, 1); - - /* initialize i8259 pic */ - i8259_out_irq = qemu_allocate_irqs(piix_request_i8259_irq, s, 1); - i8259 = i8259_init(isa_bus, *i8259_out_irq); - - for (i = 0; i < ISA_NUM_IRQS; i++) { - s->isa_irqs_in[i] = i8259[i]; - } - - g_free(i8259); - - /* initialize ISA irqs */ - isa_bus_register_input_irqs(isa_bus, s->isa_irqs_in); - - /* initialize pit */ - i8254_pit_init(isa_bus, 0x40, 0, NULL); - - /* DMA */ - i8257_dma_init(isa_bus, 0); - - /* RTC */ - qdev_prop_set_int32(DEVICE(&s->rtc), "base_year", 2000); - if (!qdev_realize(DEVICE(&s->rtc), BUS(isa_bus), errp)) { - return; - } - s->rtc.irq = isa_get_irq(ISA_DEVICE(&s->rtc), s->rtc.isairq); - - /* IDE */ - qdev_prop_set_int32(DEVICE(&s->ide), "addr", dev->devfn + 1); - if (!qdev_realize(DEVICE(&s->ide), BUS(pci_bus), errp)) { - return; - } - - /* USB */ - qdev_prop_set_int32(DEVICE(&s->uhci), "addr", dev->devfn + 2); - if (!qdev_realize(DEVICE(&s->uhci), BUS(pci_bus), errp)) { - return; - } - - /* ACPI controller */ - qdev_prop_set_int32(DEVICE(&s->pm), "addr", dev->devfn + 3); - if (!qdev_realize(DEVICE(&s->pm), BUS(pci_bus), errp)) { - return; - } - qdev_connect_gpio_out(DEVICE(&s->pm), 0, s->isa_irqs_in[9]); - pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS); } @@ -587,11 +530,6 @@ static void piix4_init(Object *obj) PIIXState *s = PIIX_PCI_DEVICE(obj); object_initialize_child(obj, "ide", &s->ide, TYPE_PIIX4_IDE); - object_initialize_child(obj, "uhci", &s->uhci, TYPE_PIIX4_USB_UHCI); - - object_initialize_child(obj, "pm", &s->pm, TYPE_PIIX4_PM); - qdev_prop_set_uint32(DEVICE(&s->pm), "smb_io_base", 0x1100); - qdev_prop_set_bit(DEVICE(&s->pm), "smm-enabled", 0); } static void piix4_class_init(ObjectClass *klass, void *data) diff --git a/hw/mips/malta.c b/hw/mips/malta.c index dac27fad9d..155f3c1cc8 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -1238,8 +1238,9 @@ void mips_malta_init(MachineState *machine) pci_bus_map_irqs(pci_bus, malta_pci_slot_get_pirq); /* Southbridge */ - piix4 = pci_create_simple_multifunction(pci_bus, PIIX4_PCI_DEVFN, - TYPE_PIIX4_PCI_DEVICE); + piix4 = pci_new_multifunction(PIIX4_PCI_DEVFN, TYPE_PIIX4_PCI_DEVICE); + qdev_prop_set_uint32(DEVICE(piix4), "smb_io_base", 0x1100); + pci_realize_and_unref(piix4, pci_bus, &error_fatal); isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix4), "isa.0")); dev = DEVICE(object_resolve_path_component(OBJECT(piix4), "ide")); From 2a62c47926420128bb23edd2ba7d4b339db86660 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:33 +0200 Subject: [PATCH 46/62] hw/isa/piix: Rename functions to be shared for PCI interrupt triggering PIIX4 will get the same optimizations which are already implemented for PIIX3. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-26-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 72 +++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index 2ab799b95e..449c1baaab 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -38,47 +38,47 @@ #include "migration/vmstate.h" #include "hw/acpi/acpi_aml_interface.h" -static void piix3_set_irq_pic(PIIXState *piix3, int pic_irq) +static void piix_set_irq_pic(PIIXState *s, int pic_irq) { - qemu_set_irq(piix3->isa_irqs_in[pic_irq], - !!(piix3->pic_levels & + qemu_set_irq(s->isa_irqs_in[pic_irq], + !!(s->pic_levels & (((1ULL << PIIX_NUM_PIRQS) - 1) << (pic_irq * PIIX_NUM_PIRQS)))); } -static void piix3_set_irq_level_internal(PIIXState *piix3, int pirq, int level) +static void piix_set_pci_irq_level_internal(PIIXState *s, int pirq, int level) { int pic_irq; uint64_t mask; - pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq]; + pic_irq = s->dev.config[PIIX_PIRQCA + pirq]; if (pic_irq >= ISA_NUM_IRQS) { return; } mask = 1ULL << ((pic_irq * PIIX_NUM_PIRQS) + pirq); - piix3->pic_levels &= ~mask; - piix3->pic_levels |= mask * !!level; + s->pic_levels &= ~mask; + s->pic_levels |= mask * !!level; } -static void piix3_set_irq_level(PIIXState *piix3, int pirq, int level) +static void piix_set_pci_irq_level(PIIXState *s, int pirq, int level) { int pic_irq; - pic_irq = piix3->dev.config[PIIX_PIRQCA + pirq]; + pic_irq = s->dev.config[PIIX_PIRQCA + pirq]; if (pic_irq >= ISA_NUM_IRQS) { return; } - piix3_set_irq_level_internal(piix3, pirq, level); + piix_set_pci_irq_level_internal(s, pirq, level); - piix3_set_irq_pic(piix3, pic_irq); + piix_set_irq_pic(s, pic_irq); } -static void piix3_set_irq(void *opaque, int pirq, int level) +static void piix_set_pci_irq(void *opaque, int pirq, int level) { - PIIXState *piix3 = opaque; - piix3_set_irq_level(piix3, pirq, level); + PIIXState *s = opaque; + piix_set_pci_irq_level(s, pirq, level); } static void piix4_set_irq(void *opaque, int irq_num, int level) @@ -108,10 +108,10 @@ static void piix_request_i8259_irq(void *opaque, int irq, int level) qemu_set_irq(s->cpu_intr, level); } -static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin) +static PCIINTxRoute piix_route_intx_pin_to_irq(void *opaque, int pin) { - PIIXState *piix3 = opaque; - int irq = piix3->dev.config[PIIX_PIRQCA + pin]; + PCIDevice *pci_dev = opaque; + int irq = pci_dev->config[PIIX_PIRQCA + pin]; PCIINTxRoute route; if (irq < ISA_NUM_IRQS) { @@ -125,29 +125,29 @@ static PCIINTxRoute piix3_route_intx_pin_to_irq(void *opaque, int pin) } /* irq routing is changed. so rebuild bitmap */ -static void piix3_update_irq_levels(PIIXState *piix3) +static void piix_update_pci_irq_levels(PIIXState *s) { - PCIBus *bus = pci_get_bus(&piix3->dev); + PCIBus *bus = pci_get_bus(&s->dev); int pirq; - piix3->pic_levels = 0; + s->pic_levels = 0; for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) { - piix3_set_irq_level(piix3, pirq, pci_bus_get_irq_level(bus, pirq)); + piix_set_pci_irq_level(s, pirq, pci_bus_get_irq_level(bus, pirq)); } } -static void piix3_write_config(PCIDevice *dev, - uint32_t address, uint32_t val, int len) +static void piix_write_config(PCIDevice *dev, uint32_t address, uint32_t val, + int len) { pci_default_write_config(dev, address, val, len); if (ranges_overlap(address, len, PIIX_PIRQCA, 4)) { - PIIXState *piix3 = PIIX_PCI_DEVICE(dev); + PIIXState *s = PIIX_PCI_DEVICE(dev); int pic_irq; - pci_bus_fire_intx_routing_notifier(pci_get_bus(&piix3->dev)); - piix3_update_irq_levels(piix3); + pci_bus_fire_intx_routing_notifier(pci_get_bus(&s->dev)); + piix_update_pci_irq_levels(s); for (pic_irq = 0; pic_irq < ISA_NUM_IRQS; pic_irq++) { - piix3_set_irq_pic(piix3, pic_irq); + piix_set_irq_pic(s, pic_irq); } } } @@ -193,9 +193,9 @@ static void piix_reset(DeviceState *dev) d->rcr = 0; } -static int piix3_post_load(void *opaque, int version_id) +static int piix_post_load(void *opaque, int version_id) { - PIIXState *piix3 = opaque; + PIIXState *s = opaque; int pirq; /* @@ -207,10 +207,10 @@ static int piix3_post_load(void *opaque, int version_id) * Here, we update irq levels without raising the interrupt. * Interrupt state will be deserialized separately through the i8259. */ - piix3->pic_levels = 0; + s->pic_levels = 0; for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) { - piix3_set_irq_level_internal(piix3, pirq, - pci_bus_get_irq_level(pci_get_bus(&piix3->dev), pirq)); + piix_set_pci_irq_level_internal(s, pirq, + pci_bus_get_irq_level(pci_get_bus(&s->dev), pirq)); } return 0; } @@ -261,7 +261,7 @@ static const VMStateDescription vmstate_piix3 = { .name = "PIIX3", .version_id = 3, .minimum_version_id = 2, - .post_load = piix3_post_load, + .post_load = piix_post_load, .pre_save = piix3_pre_save, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, PIIXState), @@ -481,8 +481,8 @@ static void piix3_realize(PCIDevice *dev, Error **errp) return; } - pci_bus_irqs(pci_bus, piix3_set_irq, piix3, PIIX_NUM_PIRQS); - pci_bus_set_route_irq_fn(pci_bus, piix3_route_intx_pin_to_irq); + pci_bus_irqs(pci_bus, piix_set_pci_irq, piix3, PIIX_NUM_PIRQS); + pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq); } static void piix3_init(Object *obj) @@ -497,7 +497,7 @@ static void piix3_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->config_write = piix3_write_config; + k->config_write = piix_write_config; k->realize = piix3_realize; /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */ k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0; From 0c9fd5a309c8449c70c727f271baf554491d4ecd Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:34 +0200 Subject: [PATCH 47/62] hw/isa/piix: Reuse PIIX3's PCI interrupt triggering in PIIX4 Speeds up PIIX4 which resolves an old TODO. Also makes PIIX4 compatible with Xen which relies on pci_bus_fire_intx_routing_notifier() to be fired. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-27-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index 449c1baaab..17677c2126 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -81,27 +81,6 @@ static void piix_set_pci_irq(void *opaque, int pirq, int level) piix_set_pci_irq_level(s, pirq, level); } -static void piix4_set_irq(void *opaque, int irq_num, int level) -{ - int i, pic_irq, pic_level; - PIIXState *s = opaque; - PCIBus *bus = pci_get_bus(&s->dev); - - /* now we change the pic irq level according to the piix irq mappings */ - /* XXX: optimize */ - pic_irq = s->dev.config[PIIX_PIRQCA + irq_num]; - if (pic_irq < ISA_NUM_IRQS) { - /* The pic level is the logical OR of all the PCI irqs mapped to it. */ - pic_level = 0; - for (i = 0; i < PIIX_NUM_PIRQS; i++) { - if (pic_irq == s->dev.config[PIIX_PIRQCA + i]) { - pic_level |= pci_bus_get_irq_level(bus, i); - } - } - qemu_set_irq(s->isa_irqs_in[pic_irq], pic_level); - } -} - static void piix_request_i8259_irq(void *opaque, int irq, int level) { PIIXState *s = opaque; @@ -223,7 +202,7 @@ static int piix4_post_load(void *opaque, int version_id) s->rcr = 0; } - return 0; + return piix_post_load(opaque, version_id); } static int piix3_pre_save(void *opaque) @@ -442,6 +421,7 @@ static void pci_piix_class_init(ObjectClass *klass, void *data) PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); + k->config_write = piix_write_config; dc->reset = piix_reset; dc->desc = "ISA bridge"; dc->hotpluggable = false; @@ -497,7 +477,6 @@ static void piix3_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->config_write = piix_write_config; k->realize = piix3_realize; /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */ k->device_id = PCI_DEVICE_ID_INTEL_82371SB_0; @@ -522,7 +501,7 @@ static void piix4_realize(PCIDevice *dev, Error **errp) return; } - pci_bus_irqs(pci_bus, piix4_set_irq, s, PIIX_NUM_PIRQS); + pci_bus_irqs(pci_bus, piix_set_pci_irq, s, PIIX_NUM_PIRQS); } static void piix4_init(Object *obj) From a203cc532a18fdb89942fa8c87d332a2a3470379 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:35 +0200 Subject: [PATCH 48/62] hw/isa/piix: Resolve duplicate code regarding PCI interrupt wiring Now that both PIIX3 and PIIX4 use piix_set_irq() to trigger PCI IRQs the wiring in the respective realize methods can be shared, too. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Message-Id: <20231007123843.127151-28-shentey@gmail.com> Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index 17677c2126..cba2098ca2 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -372,6 +372,8 @@ static void pci_piix_realize(PCIDevice *dev, const char *uhci_type, } qdev_connect_gpio_out(DEVICE(&d->pm), 0, d->isa_irqs_in[9]); } + + pci_bus_irqs(pci_bus, piix_set_pci_irq, d, PIIX_NUM_PIRQS); } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) @@ -453,7 +455,6 @@ static const TypeInfo piix_pci_type_info = { static void piix3_realize(PCIDevice *dev, Error **errp) { ERRP_GUARD(); - PIIXState *piix3 = PIIX_PCI_DEVICE(dev); PCIBus *pci_bus = pci_get_bus(dev); pci_piix_realize(dev, TYPE_PIIX3_USB_UHCI, errp); @@ -461,7 +462,6 @@ static void piix3_realize(PCIDevice *dev, Error **errp) return; } - pci_bus_irqs(pci_bus, piix_set_pci_irq, piix3, PIIX_NUM_PIRQS); pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq); } @@ -492,16 +492,7 @@ static const TypeInfo piix3_info = { static void piix4_realize(PCIDevice *dev, Error **errp) { - ERRP_GUARD(); - PIIXState *s = PIIX_PCI_DEVICE(dev); - PCIBus *pci_bus = pci_get_bus(dev); - pci_piix_realize(dev, TYPE_PIIX4_USB_UHCI, errp); - if (*errp) { - return; - } - - pci_bus_irqs(pci_bus, piix_set_pci_irq, s, PIIX_NUM_PIRQS); } static void piix4_init(Object *obj) From 12cecd45505c239dedc8bad2ded8eab8bc2f0391 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:36 +0200 Subject: [PATCH 49/62] hw/isa/piix: Implement multi-process QEMU support also for PIIX4 So far multi-process QEMU was only implemented for PIIX3. Move the support into the base class to achieve feature parity between both device models. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-29-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/piix.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/hw/isa/piix.c b/hw/isa/piix.c index cba2098ca2..04ebed5b52 100644 --- a/hw/isa/piix.c +++ b/hw/isa/piix.c @@ -374,6 +374,7 @@ static void pci_piix_realize(PCIDevice *dev, const char *uhci_type, } pci_bus_irqs(pci_bus, piix_set_pci_irq, d, PIIX_NUM_PIRQS); + pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq); } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) @@ -454,15 +455,7 @@ static const TypeInfo piix_pci_type_info = { static void piix3_realize(PCIDevice *dev, Error **errp) { - ERRP_GUARD(); - PCIBus *pci_bus = pci_get_bus(dev); - pci_piix_realize(dev, TYPE_PIIX3_USB_UHCI, errp); - if (*errp) { - return; - } - - pci_bus_set_route_irq_fn(pci_bus, piix_route_intx_pin_to_irq); } static void piix3_init(Object *obj) From aa0c9aec575f6dba4e6548ad9e5de1b1899d843e Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 7 Oct 2023 14:38:37 +0200 Subject: [PATCH 50/62] hw/i386/pc_piix: Make PIIX4 south bridge usable in PC machine QEMU's PIIX3 implementation actually models the real PIIX4, but with different PCI IDs. Usually, guests deal just fine with it. Still, in order to provide a more consistent illusion to guests, allow QEMU's PIIX4 implementation to be used in the PC machine. Signed-off-by: Bernhard Beschow Message-Id: <20231007123843.127151-30-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/target-i386-desc.rst.inc | 8 ++++ hw/i386/pc.c | 1 + hw/i386/pc_piix.c | 61 +++++++++++++++++++++++++++- include/hw/i386/pc.h | 2 + 4 files changed, 71 insertions(+), 1 deletion(-) diff --git a/docs/system/target-i386-desc.rst.inc b/docs/system/target-i386-desc.rst.inc index 7d1fffacbe..5ebbcda9db 100644 --- a/docs/system/target-i386-desc.rst.inc +++ b/docs/system/target-i386-desc.rst.inc @@ -71,3 +71,11 @@ machine property, i.e. |qemu_system_x86| some.img \ -audiodev ,id= \ -machine pcspk-audiodev= + +Machine-specific options +~~~~~~~~~~~~~~~~~~~~~~~~ + +It supports the following machine-specific options: + +- ``x-south-bridge=PIIX3|piix4-isa`` (Experimental option to select a particular + south bridge. Default: ``PIIX3``) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 355e1b7cf6..6293f57a0c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1706,6 +1706,7 @@ static void pc_machine_initfn(Object *obj) #endif /* CONFIG_VMPORT */ pcms->max_ram_below_4g = 0; /* use default */ pcms->smbios_entry_point_type = pcmc->default_smbios_ep_type; + pcms->south_bridge = pcmc->default_south_bridge; /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = pcmc->has_acpi_build; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index e38942a3c3..334d9a0299 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -262,7 +262,7 @@ static void pc_init1(MachineState *machine, DeviceState *dev; size_t i; - pci_dev = pci_new_multifunction(-1, TYPE_PIIX3_DEVICE); + pci_dev = pci_new_multifunction(-1, pcms->south_bridge); object_property_set_bool(OBJECT(pci_dev), "has-usb", machine_usb(machine), &error_abort); object_property_set_bool(OBJECT(pci_dev), "has-acpi", @@ -394,6 +394,56 @@ static void pc_init1(MachineState *machine, } } +typedef enum PCSouthBridgeOption { + PC_SOUTH_BRIDGE_OPTION_PIIX3, + PC_SOUTH_BRIDGE_OPTION_PIIX4, + PC_SOUTH_BRIDGE_OPTION_MAX, +} PCSouthBridgeOption; + +static const QEnumLookup PCSouthBridgeOption_lookup = { + .array = (const char *const[]) { + [PC_SOUTH_BRIDGE_OPTION_PIIX3] = TYPE_PIIX3_DEVICE, + [PC_SOUTH_BRIDGE_OPTION_PIIX4] = TYPE_PIIX4_PCI_DEVICE, + }, + .size = PC_SOUTH_BRIDGE_OPTION_MAX +}; + +#define NotifyVmexitOption_str(val) \ + qapi_enum_lookup(&NotifyVmexitOption_lookup, (val)) + +static int pc_get_south_bridge(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + int i; + + for (i = 0; i < PCSouthBridgeOption_lookup.size; i++) { + if (g_strcmp0(PCSouthBridgeOption_lookup.array[i], + pcms->south_bridge) == 0) { + return i; + } + } + + error_setg(errp, "Invalid south bridge value set"); + return 0; +} + +static void pc_set_south_bridge(Object *obj, int value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + if (value < 0) { + error_setg(errp, "Value can't be negative"); + return; + } + + if (value >= PCSouthBridgeOption_lookup.size) { + error_setg(errp, "Value too big"); + return; + } + + pcms->south_bridge = PCSouthBridgeOption_lookup.array[value]; +} + /* Looking for a pc_compat_2_4() function? It doesn't exist. * pc_compat_*() functions that run on machine-init time and * change global QEMU state are deprecated. Please don't create @@ -473,6 +523,8 @@ static void pc_xen_hvm_init(MachineState *machine) static void pc_i440fx_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + ObjectClass *oc = OBJECT_CLASS(m); + pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; pcmc->pci_root_uid = 0; pcmc->default_cpu_version = 1; @@ -484,6 +536,13 @@ static void pc_i440fx_machine_options(MachineClass *m) m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL); machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); + + object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption", + &PCSouthBridgeOption_lookup, + pc_get_south_bridge, + pc_set_south_bridge); + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); } static void pc_i440fx_8_2_machine_options(MachineClass *m) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index bec38cb92c..29a9724524 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -42,6 +42,7 @@ typedef struct PCMachineState { uint64_t max_ram_below_4g; OnOffAuto vmport; SmbiosEntryPointType smbios_entry_point_type; + const char *south_bridge; bool acpi_build_enabled; bool smbus_enabled; @@ -92,6 +93,7 @@ struct PCMachineClass { /* Device configuration: */ bool pci_enabled; bool kvmclock_enabled; + const char *default_south_bridge; /* Compat options: */ From f7bd1437ba877e6509fff2b5ffce82500bc79559 Mon Sep 17 00:00:00 2001 From: Li Feng Date: Mon, 9 Oct 2023 12:46:57 +0800 Subject: [PATCH 51/62] vhost-user-common: send get_inflight_fd once Currently the get_inflight_fd will be sent every time the device is started, and the backend will allocate shared memory to save the inflight state. If the backend finds that it receives the second get_inflight_fd, it will release the previous shared memory, which breaks inflight working logic. This patch is a preparation for the following patches. Signed-off-by: Li Feng Reviewed-by: Raphael Norwitz Message-Id: <20231009044735.941655-2-fengli@smartx.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/scsi/vhost-scsi-common.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c index a06f01af26..a61cd0e907 100644 --- a/hw/scsi/vhost-scsi-common.c +++ b/hw/scsi/vhost-scsi-common.c @@ -52,20 +52,28 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) vsc->dev.acked_features = vdev->guest_features; - assert(vsc->inflight == NULL); - vsc->inflight = g_new0(struct vhost_inflight, 1); - ret = vhost_dev_get_inflight(&vsc->dev, - vs->conf.virtqueue_size, - vsc->inflight); + ret = vhost_dev_prepare_inflight(&vsc->dev, vdev); if (ret < 0) { - error_report("Error get inflight: %d", -ret); + error_report("Error setting inflight format: %d", -ret); goto err_guest_notifiers; } - ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight); - if (ret < 0) { - error_report("Error set inflight: %d", -ret); - goto err_guest_notifiers; + if (vsc->inflight) { + if (!vsc->inflight->addr) { + ret = vhost_dev_get_inflight(&vsc->dev, + vs->conf.virtqueue_size, + vsc->inflight); + if (ret < 0) { + error_report("Error getting inflight: %d", -ret); + goto err_guest_notifiers; + } + } + + ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight); + if (ret < 0) { + error_report("Error setting inflight: %d", -ret); + goto err_guest_notifiers; + } } ret = vhost_dev_start(&vsc->dev, vdev, true); @@ -85,9 +93,6 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) return ret; err_guest_notifiers: - g_free(vsc->inflight); - vsc->inflight = NULL; - k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, false); err_host_notifiers: vhost_dev_disable_notifiers(&vsc->dev, vdev); @@ -111,12 +116,6 @@ void vhost_scsi_common_stop(VHostSCSICommon *vsc) } assert(ret >= 0); - if (vsc->inflight) { - vhost_dev_free_inflight(vsc->inflight); - g_free(vsc->inflight); - vsc->inflight = NULL; - } - vhost_dev_disable_notifiers(&vsc->dev, vdev); } From 4dfcc09f48c4f81a9a4e2300065edbe6b589a6ce Mon Sep 17 00:00:00 2001 From: Li Feng Date: Mon, 9 Oct 2023 12:46:58 +0800 Subject: [PATCH 52/62] vhost: move and rename the conn retry times Multiple devices need this macro, move it to a common header. Signed-off-by: Li Feng Reviewed-by: Raphael Norwitz Message-Id: <20231009044735.941655-3-fengli@smartx.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 4 +--- hw/virtio/vhost-user-gpio.c | 3 +-- include/hw/virtio/vhost.h | 2 ++ 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index eecf3f7a81..3c69fa47d5 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -32,8 +32,6 @@ #include "sysemu/sysemu.h" #include "sysemu/runstate.h" -#define REALIZE_CONNECTION_RETRIES 3 - static const int user_feature_bits[] = { VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_SEG_MAX, @@ -482,7 +480,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) s->inflight = g_new0(struct vhost_inflight, 1); s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues); - retries = REALIZE_CONNECTION_RETRIES; + retries = VU_REALIZE_CONN_RETRIES; assert(!*errp); do { if (*errp) { diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index 3d7fae3984..fc784e4213 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -15,7 +15,6 @@ #include "standard-headers/linux/virtio_ids.h" #include "trace.h" -#define REALIZE_CONNECTION_RETRIES 3 #define VHOST_NVQS 2 /* Features required from VirtIO */ @@ -365,7 +364,7 @@ static void vu_gpio_device_realize(DeviceState *dev, Error **errp) qemu_chr_fe_set_handlers(&gpio->chardev, NULL, NULL, vu_gpio_event, NULL, dev, NULL, true); - retries = REALIZE_CONNECTION_RETRIES; + retries = VU_REALIZE_CONN_RETRIES; g_assert(!*errp); do { if (*errp) { diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 00e0a669b8..5e8183f64a 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -8,6 +8,8 @@ #define VHOST_F_DEVICE_IOTLB 63 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VU_REALIZE_CONN_RETRIES 3 + /* Generic structures common for any vhost based device. */ struct vhost_inflight { From 7962e432b4e40e4395a93aa121045c58f34195fb Mon Sep 17 00:00:00 2001 From: Li Feng Date: Mon, 9 Oct 2023 12:46:59 +0800 Subject: [PATCH 53/62] vhost-user-scsi: support reconnect to backend If the backend crashes and restarts, the device is broken. This patch adds reconnect for vhost-user-scsi. This patch also improves the error messages, and reports some silent errors. Tested with spdk backend. Signed-off-by: Li Feng Message-Id: <20231009044735.941655-4-fengli@smartx.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Manos Pitsidianakis --- hw/scsi/vhost-scsi-common.c | 16 +- hw/scsi/vhost-scsi.c | 6 +- hw/scsi/vhost-user-scsi.c | 201 +++++++++++++++++++++++--- include/hw/virtio/vhost-scsi-common.h | 2 +- include/hw/virtio/vhost-user-scsi.h | 6 + 5 files changed, 201 insertions(+), 30 deletions(-) diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c index a61cd0e907..4c8637045d 100644 --- a/hw/scsi/vhost-scsi-common.c +++ b/hw/scsi/vhost-scsi-common.c @@ -16,6 +16,7 @@ */ #include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "hw/virtio/vhost.h" @@ -25,7 +26,7 @@ #include "hw/virtio/virtio-access.h" #include "hw/fw-path-provider.h" -int vhost_scsi_common_start(VHostSCSICommon *vsc) +int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp) { int ret, i; VirtIODevice *vdev = VIRTIO_DEVICE(vsc); @@ -35,18 +36,19 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) VirtIOSCSICommon *vs = (VirtIOSCSICommon *)vsc; if (!k->set_guest_notifiers) { - error_report("binding does not support guest notifiers"); + error_setg(errp, "binding does not support guest notifiers"); return -ENOSYS; } ret = vhost_dev_enable_notifiers(&vsc->dev, vdev); if (ret < 0) { + error_setg_errno(errp, -ret, "Error enabling host notifiers"); return ret; } ret = k->set_guest_notifiers(qbus->parent, vsc->dev.nvqs, true); if (ret < 0) { - error_report("Error binding guest notifier"); + error_setg_errno(errp, -ret, "Error binding guest notifier"); goto err_host_notifiers; } @@ -54,7 +56,7 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) ret = vhost_dev_prepare_inflight(&vsc->dev, vdev); if (ret < 0) { - error_report("Error setting inflight format: %d", -ret); + error_setg_errno(errp, -ret, "Error setting inflight format"); goto err_guest_notifiers; } @@ -64,21 +66,21 @@ int vhost_scsi_common_start(VHostSCSICommon *vsc) vs->conf.virtqueue_size, vsc->inflight); if (ret < 0) { - error_report("Error getting inflight: %d", -ret); + error_setg_errno(errp, -ret, "Error getting inflight"); goto err_guest_notifiers; } } ret = vhost_dev_set_inflight(&vsc->dev, vsc->inflight); if (ret < 0) { - error_report("Error setting inflight: %d", -ret); + error_setg_errno(errp, -ret, "Error setting inflight"); goto err_guest_notifiers; } } ret = vhost_dev_start(&vsc->dev, vdev, true); if (ret < 0) { - error_report("Error start vhost dev"); + error_setg_errno(errp, -ret, "Error starting vhost dev"); goto err_guest_notifiers; } diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 443f67daa4..95cadb93e7 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -75,6 +75,7 @@ static int vhost_scsi_start(VHostSCSI *s) int ret, abi_version; VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); const VhostOps *vhost_ops = vsc->dev.vhost_ops; + Error *local_err = NULL; ret = vhost_ops->vhost_scsi_get_abi_version(&vsc->dev, &abi_version); if (ret < 0) { @@ -88,14 +89,15 @@ static int vhost_scsi_start(VHostSCSI *s) return -ENOSYS; } - ret = vhost_scsi_common_start(vsc); + ret = vhost_scsi_common_start(vsc, &local_err); if (ret < 0) { + error_reportf_err(local_err, "Error starting vhost-scsi"); return ret; } ret = vhost_scsi_set_endpoint(s); if (ret < 0) { - error_report("Error setting vhost-scsi endpoint"); + error_reportf_err(local_err, "Error setting vhost-scsi endpoint"); vhost_scsi_common_stop(vsc); } diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index b7c6100f3e..24c250d3f8 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -39,26 +39,56 @@ static const int user_feature_bits[] = { VHOST_INVALID_FEATURE_BIT }; +static int vhost_user_scsi_start(VHostUserSCSI *s, Error **errp) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + int ret; + + ret = vhost_scsi_common_start(vsc, errp); + s->started_vu = !(ret < 0); + + return ret; +} + +static void vhost_user_scsi_stop(VHostUserSCSI *s) +{ + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + + if (!s->started_vu) { + return; + } + s->started_vu = false; + + vhost_scsi_common_stop(vsc); +} + static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCSI *s = (VHostUserSCSI *)vdev; + DeviceState *dev = DEVICE(vdev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); - bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + bool should_start = virtio_device_should_start(vdev, status); + Error *local_err = NULL; + int ret; - if (vhost_dev_is_started(&vsc->dev) == start) { + if (!s->connected) { return; } - if (start) { - int ret; + if (vhost_dev_is_started(&vsc->dev) == should_start) { + return; + } - ret = vhost_scsi_common_start(vsc); + if (should_start) { + ret = vhost_user_scsi_start(s, &local_err); if (ret < 0) { - error_report("unable to start vhost-user-scsi: %s", strerror(-ret)); - exit(1); + error_reportf_err(local_err, "unable to start vhost-user-scsi: %s", + strerror(-ret)); + qemu_chr_fe_disconnect(&vs->conf.chardev); } } else { - vhost_scsi_common_stop(vsc); + vhost_user_scsi_stop(s); } } @@ -66,14 +96,124 @@ static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) { } +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + int ret = 0; + + if (s->connected) { + return 0; + } + s->connected = true; + + vsc->dev.num_queues = vs->conf.num_queues; + vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; + vsc->dev.vqs = s->vhost_vqs; + vsc->dev.vq_index = 0; + vsc->dev.backend_features = 0; + + ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0, + errp); + if (ret < 0) { + return ret; + } + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + ret = vhost_user_scsi_start(s, errp); + } + + return ret; +} + +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event); + +static void vhost_user_scsi_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + + if (!s->connected) { + return; + } + s->connected = false; + + vhost_user_scsi_stop(s); + + vhost_dev_cleanup(&vsc->dev); + + /* Re-instate the event handler for new connections */ + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, + vhost_user_scsi_event, NULL, dev, NULL, true); +} + +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCSI *s = VHOST_USER_SCSI(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + Error *local_err = NULL; + + switch (event) { + case CHR_EVENT_OPENED: + if (vhost_user_scsi_connect(dev, &local_err) < 0) { + error_report_err(local_err); + qemu_chr_fe_disconnect(&vs->conf.chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + /* defer close until later to avoid circular close */ + vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev, + vhost_user_scsi_disconnect); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp) +{ + DeviceState *dev = DEVICE(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + int ret; + + s->connected = false; + + ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp); + if (ret < 0) { + return ret; + } + + ret = vhost_user_scsi_connect(dev, errp); + if (ret < 0) { + qemu_chr_fe_disconnect(&vs->conf.chardev); + return ret; + } + assert(s->connected); + + return 0; +} + static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); VHostUserSCSI *s = VHOST_USER_SCSI(dev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); - struct vhost_virtqueue *vqs = NULL; Error *err = NULL; int ret; + int retries = VU_REALIZE_CONN_RETRIES; if (!vs->conf.chardev.chr) { error_setg(errp, "vhost-user-scsi: missing chardev"); @@ -92,18 +232,28 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) goto free_virtio; } - vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues; - vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs); - vsc->dev.vq_index = 0; - vsc->dev.backend_features = 0; - vqs = vsc->dev.vqs; + vsc->inflight = g_new0(struct vhost_inflight, 1); + s->vhost_vqs = g_new0(struct vhost_virtqueue, + VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues); + + assert(!*errp); + do { + if (*errp) { + error_prepend(errp, "Reconnecting after error: "); + error_report_err(*errp); + *errp = NULL; + } + ret = vhost_user_scsi_realize_connect(s, errp); + } while (ret < 0 && retries--); - ret = vhost_dev_init(&vsc->dev, &s->vhost_user, - VHOST_BACKEND_TYPE_USER, 0, errp); if (ret < 0) { goto free_vhost; } + /* we're fully initialized, now we can operate, so add the handler */ + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, + vhost_user_scsi_event, NULL, (void *)dev, + NULL, true); /* Channel and lun both are 0 for bootable vhost-user-scsi disk */ vsc->channel = 0; vsc->lun = 0; @@ -112,8 +262,12 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) return; free_vhost: + g_free(s->vhost_vqs); + s->vhost_vqs = NULL; + g_free(vsc->inflight); + vsc->inflight = NULL; vhost_user_cleanup(&s->vhost_user); - g_free(vqs); + free_virtio: virtio_scsi_common_unrealize(dev); } @@ -123,16 +277,23 @@ static void vhost_user_scsi_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserSCSI *s = VHOST_USER_SCSI(dev); VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); - struct vhost_virtqueue *vqs = vsc->dev.vqs; + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); /* This will stop the vhost backend. */ vhost_user_scsi_set_status(vdev, 0); + qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, NULL, + NULL, false); vhost_dev_cleanup(&vsc->dev); - g_free(vqs); + g_free(s->vhost_vqs); + s->vhost_vqs = NULL; + + vhost_dev_free_inflight(vsc->inflight); + g_free(vsc->inflight); + vsc->inflight = NULL; - virtio_scsi_common_unrealize(dev); vhost_user_cleanup(&s->vhost_user); + virtio_scsi_common_unrealize(dev); } static Property vhost_user_scsi_properties[] = { diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h index 18f115527c..c5d2c09455 100644 --- a/include/hw/virtio/vhost-scsi-common.h +++ b/include/hw/virtio/vhost-scsi-common.h @@ -39,7 +39,7 @@ struct VHostSCSICommon { struct vhost_inflight *inflight; }; -int vhost_scsi_common_start(VHostSCSICommon *vsc); +int vhost_scsi_common_start(VHostSCSICommon *vsc, Error **errp); void vhost_scsi_common_stop(VHostSCSICommon *vsc); char *vhost_scsi_common_get_fw_dev_path(FWPathProvider *p, BusState *bus, DeviceState *dev); diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h index 521b08e559..78fe616ccb 100644 --- a/include/hw/virtio/vhost-user-scsi.h +++ b/include/hw/virtio/vhost-user-scsi.h @@ -28,7 +28,13 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI) struct VHostUserSCSI { VHostSCSICommon parent_obj; + + /* Properties */ + bool connected; + bool started_vu; + VhostUserState vhost_user; + struct vhost_virtqueue *vhost_vqs; }; #endif /* VHOST_USER_SCSI_H */ From a6a30a7ec01c743fe71ab7461655d89afe152fcf Mon Sep 17 00:00:00 2001 From: Li Feng Date: Mon, 9 Oct 2023 12:47:00 +0800 Subject: [PATCH 54/62] vhost-user-scsi: start vhost when guest kicks Let's keep the same behavior as vhost-user-blk. Some old guests kick virtqueue before setting VIRTIO_CONFIG_S_DRIVER_OK. Signed-off-by: Li Feng Reviewed-by: Raphael Norwitz Message-Id: <20231009044735.941655-5-fengli@smartx.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/scsi/vhost-user-scsi.c | 48 +++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 24c250d3f8..258fba5c69 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -92,8 +92,48 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) } } -static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +static void vhost_user_scsi_handle_output(VirtIODevice *vdev, VirtQueue *vq) { + VHostUserSCSI *s = (VHostUserSCSI *)vdev; + DeviceState *dev = DEVICE(vdev); + VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s); + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); + + Error *local_err = NULL; + int i, ret; + + if (!vdev->start_on_kick) { + return; + } + + if (!s->connected) { + return; + } + + if (vhost_dev_is_started(&vsc->dev)) { + return; + } + + /* + * Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start + * vhost here instead of waiting for .set_status(). + */ + ret = vhost_user_scsi_start(s, &local_err); + if (ret < 0) { + error_reportf_err(local_err, "vhost-user-scsi: vhost start failed: "); + qemu_chr_fe_disconnect(&vs->conf.chardev); + return; + } + + /* Kick right away to begin processing requests already in vring */ + for (i = 0; i < vsc->dev.nvqs; i++) { + VirtQueue *kick_vq = virtio_get_queue(vdev, i); + + if (!virtio_queue_get_desc_addr(vdev, i)) { + continue; + } + event_notifier_set(virtio_queue_get_host_notifier(kick_vq)); + } } static int vhost_user_scsi_connect(DeviceState *dev, Error **errp) @@ -220,9 +260,9 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp) return; } - virtio_scsi_common_realize(dev, vhost_dummy_handle_output, - vhost_dummy_handle_output, - vhost_dummy_handle_output, &err); + virtio_scsi_common_realize(dev, vhost_user_scsi_handle_output, + vhost_user_scsi_handle_output, + vhost_user_scsi_handle_output, &err); if (err != NULL) { error_propagate(errp, err); return; From f02a4b8e6431598612466f76aac64ab492849abf Mon Sep 17 00:00:00 2001 From: Li Feng Date: Mon, 9 Oct 2023 12:47:01 +0800 Subject: [PATCH 55/62] vhost-user: fix lost reconnect When the vhost-user is reconnecting to the backend, and if the vhost-user fails at the get_features in vhost_dev_init(), then the reconnect will fail and it will not be retriggered forever. The reason is: When the vhost-user fails at get_features, the vhost_dev_cleanup will be called immediately. vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'. The reconnect path is: vhost_user_blk_event vhost_user_async_close(.. vhost_user_blk_disconnect ..) qemu_chr_fe_set_handlers <----- clear the notifier callback schedule vhost_user_async_close_bh The vhost->vdev is null, so the vhost_user_blk_disconnect will not be called, then the event fd callback will not be reinstalled. All vhost-user devices have this issue, including vhost-user-blk/scsi. With this patch, if the vdev->vdev is null, the fd callback will still be reinstalled. Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling") Signed-off-by: Li Feng Reviewed-by: Raphael Norwitz Message-Id: <20231009044735.941655-6-fengli@smartx.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/vhost-user-blk.c | 2 +- hw/scsi/vhost-user-scsi.c | 3 ++- hw/virtio/vhost-user-gpio.c | 2 +- hw/virtio/vhost-user.c | 10 ++++++++-- include/hw/virtio/vhost-user.h | 3 ++- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 3c69fa47d5..95c758200d 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -391,7 +391,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &s->chardev, &s->dev, - vhost_user_blk_disconnect); + vhost_user_blk_disconnect, vhost_user_blk_event); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 258fba5c69..4486500cac 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -212,7 +212,8 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev, - vhost_user_scsi_disconnect); + vhost_user_scsi_disconnect, + vhost_user_scsi_event); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index fc784e4213..aff2d7eff6 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -289,7 +289,7 @@ static void vu_gpio_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev, - vu_gpio_disconnect); + vu_gpio_disconnect, vu_gpio_event); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index f9414f03de..b8a7b5542d 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2756,6 +2756,7 @@ typedef struct { DeviceState *dev; CharBackend *cd; struct vhost_dev *vhost; + IOEventHandler *event_cb; } VhostAsyncCallback; static void vhost_user_async_close_bh(void *opaque) @@ -2770,7 +2771,10 @@ static void vhost_user_async_close_bh(void *opaque) */ if (vhost->vdev) { data->cb(data->dev); - } + } else if (data->event_cb) { + qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, + NULL, data->dev, NULL, true); + } g_free(data); } @@ -2782,7 +2786,8 @@ static void vhost_user_async_close_bh(void *opaque) */ void vhost_user_async_close(DeviceState *d, CharBackend *chardev, struct vhost_dev *vhost, - vu_async_close_fn cb) + vu_async_close_fn cb, + IOEventHandler *event_cb) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { /* @@ -2798,6 +2803,7 @@ void vhost_user_async_close(DeviceState *d, data->dev = d; data->cd = chardev; data->vhost = vhost; + data->event_cb = event_cb; /* Disable any further notifications on the chardev */ qemu_chr_fe_set_handlers(chardev, diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 9f9ddf878d..6b06ecb1bd 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -106,6 +106,7 @@ typedef void (*vu_async_close_fn)(DeviceState *cb); void vhost_user_async_close(DeviceState *d, CharBackend *chardev, struct vhost_dev *vhost, - vu_async_close_fn cb); + vu_async_close_fn cb, + IOEventHandler *event_cb); #endif From 4076bc86a3afd626338f2e3d018f6b884a972143 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 11 Oct 2023 16:23:35 +0530 Subject: [PATCH 56/62] hw/i386/cxl: ensure maxram is greater than ram size for calculating cxl range pc_get_device_memory_range() finds the device memory size by calculating the difference between maxram and ram sizes. This calculation makes sense only when maxram is greater than the ram size. Make sure we check for that before calling pc_get_device_memory_range(). Signed-off-by: Ani Sinha Message-Id: <20231011105335.42296-1-anisinha@redhat.com> Acked-by: Jonathan Cameron Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 6293f57a0c..dbaefa7617 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -781,10 +781,12 @@ static void pc_get_device_memory_range(PCMachineState *pcms, static uint64_t pc_get_cxl_range_start(PCMachineState *pcms) { PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + MachineState *ms = MACHINE(pcms); hwaddr cxl_base; ram_addr_t size; - if (pcmc->has_reserved_memory) { + if (pcmc->has_reserved_memory && + (ms->ram_size < ms->maxram_size)) { pc_get_device_memory_range(pcms, &cxl_base, &size); cxl_base += size; } else { From 57a6beacd88b67af7ca9dbaf29800442a0e1bfed Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 12 Oct 2023 13:56:21 +0100 Subject: [PATCH 57/62] tests/acpi: Allow update of DSDT.cxl Addition of QTG in following patch requires an update to the test data. Signed-off-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Message-Id: <20231012125623.21101-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..9ce0f596cc 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT.cxl", From 6cdd46f66ff91a9c13c5dc4d018ae53d2f28d74a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 12 Oct 2023 13:56:22 +0100 Subject: [PATCH 58/62] hw/cxl: Add QTG _DSM support for ACPI0017 device Add a simple _DSM call support for the ACPI0017 device to return fake QTG ID values of 0 and 1 in all cases. This for _DSM plumbing testing from the OS. Following edited for readability Device (CXLM) { Name (_HID, "ACPI0017") // _HID: Hardware ID ... Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method { If ((Arg0 == ToUUID ("f365f9a6-a7de-4071-a66a-b40c0b4f8e52"))) { If ((Arg2 == Zero)) { Return (Buffer (One) { 0x01 }) } If ((Arg2 == One)) { Return (Package (0x02) { One, Package (0x02) { Zero, One } }) } } } Signed-off-by: Dave Jiang Signed-off-by: Jonathan Cameron Message-Id: <20231012125623.21101-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cxl.c | 69 +++++++++++++++++++++++++++++++++++++++++++ hw/i386/acpi-build.c | 1 + include/hw/acpi/cxl.h | 1 + 3 files changed, 71 insertions(+) diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c index 92b46bc932..9cd7905ea2 100644 --- a/hw/acpi/cxl.c +++ b/hw/acpi/cxl.c @@ -30,6 +30,75 @@ #include "qapi/error.h" #include "qemu/uuid.h" +void build_cxl_dsm_method(Aml *dev) +{ + Aml *method, *ifctx, *ifctx2; + + method = aml_method("_DSM", 4, AML_SERIALIZED); + { + Aml *function, *uuid; + + uuid = aml_arg(0); + function = aml_arg(2); + /* CXL spec v3.0 9.17.3.1 _DSM Function for Retrieving QTG ID */ + ifctx = aml_if(aml_equal( + uuid, aml_touuid("F365F9A6-A7DE-4071-A66A-B40C0B4F8E52"))); + + /* Function 0, standard DSM query function */ + ifctx2 = aml_if(aml_equal(function, aml_int(0))); + { + uint8_t byte_list[1] = { 0x01 }; /* function 1 only */ + + aml_append(ifctx2, + aml_return(aml_buffer(sizeof(byte_list), byte_list))); + } + aml_append(ifctx, ifctx2); + + /* + * Function 1 + * Creating a package with static values. The max supported QTG ID will + * be 1 and recommended QTG IDs are 0 and then 1. + * The values here are statically created to simplify emulation. Values + * from a real BIOS would be determined by the performance of all the + * present CXL memory and then assigned. + */ + ifctx2 = aml_if(aml_equal(function, aml_int(1))); + { + Aml *pak, *pak1; + + /* + * Return: A package containing two elements - a WORD that returns + * the maximum throttling group that the platform supports, and a + * package containing the QTG ID(s) that the platform recommends. + * Package { + * Max Supported QTG ID + * Package {QTG Recommendations} + * } + * + * While the SPEC specified WORD that hints at the value being + * 16bit, the ACPI dump of BIOS DSDT table showed that the values + * are integers with no specific size specification. aml_int() will + * be used for the values. + */ + pak1 = aml_package(2); + /* Set QTG ID of 0 */ + aml_append(pak1, aml_int(0)); + /* Set QTG ID of 1 */ + aml_append(pak1, aml_int(1)); + + pak = aml_package(2); + /* Set Max QTG 1 */ + aml_append(pak, aml_int(1)); + aml_append(pak, pak1); + + aml_append(ifctx2, aml_return(pak)); + } + aml_append(ifctx, ifctx2); + } + aml_append(method, ifctx); + aml_append(dev, method); +} + static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl) { PXBDev *pxb = PXB_DEV(cxl); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index b0e1f074f1..80db183b78 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1417,6 +1417,7 @@ static void build_acpi0017(Aml *table) method = aml_method("_STA", 0, AML_NOTSERIALIZED); aml_append(method, aml_return(aml_int(0x01))); aml_append(dev, method); + build_cxl_dsm_method(dev); aml_append(scope, dev); aml_append(table, scope); diff --git a/include/hw/acpi/cxl.h b/include/hw/acpi/cxl.h index acf4418886..8f22c71530 100644 --- a/include/hw/acpi/cxl.h +++ b/include/hw/acpi/cxl.h @@ -25,5 +25,6 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data, BIOSLinker *linker, const char *oem_id, const char *oem_table_id, CXLState *cxl_state); void build_cxl_osc_method(Aml *dev); +void build_cxl_dsm_method(Aml *dev); #endif From e0c72452559ea8dd723c5f13c64318cd1d1fef4c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 12 Oct 2023 13:56:23 +0100 Subject: [PATCH 59/62] tests/acpi: Update DSDT.cxl with QTG DSM Description of change in previous patch. Signed-off-by: Jonathan Cameron Reviewed-by: Fan Ni Reviewed-by: Dave Jiang Message-Id: <20231012125623.21101-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/DSDT.cxl | Bin 9655 -> 9713 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/DSDT.cxl b/tests/data/acpi/q35/DSDT.cxl index ee16a861c2de7b7caaf11d91c50fcdf308815233..145301c52af9a17242bb306c210f8a7e0f01b827 100644 GIT binary patch delta 118 zcmV-+0Ez#%OYutzL{mgm@g)EN0r#;Aks)191z%G_Ut5Ak1w&X&O$c8|Nkk1nLr_UD zFflg(6bN5aR6ziw0Tef1L{m)+pfHnY5fcg!ruk*_-luUurfRed3r~(xpb3*|0HhHE Y0Rf;6lWGB^5(okT5(EMOv%Vqu1A45k9^g@gANoypGNRo(2Yn<_sbn Q@xdXE3`;iuQ2faZ03i7h)c^nh diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 9ce0f596cc..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,2 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/q35/DSDT.cxl", From d4eb50380807f5e881cbb66630f210e3e6eb9750 Mon Sep 17 00:00:00 2001 From: Hanna Czenczek Date: Mon, 16 Oct 2023 10:32:01 +0200 Subject: [PATCH 60/62] vhost-user: Fix protocol feature bit conflict The VHOST_USER_PROTOCOL_F_XEN_MMAP feature bit was defined in f21e95ee97d, which has been part of qemu's 8.1.0 release. However, it seems it was never added to qemu's code, but it is well possible that it is already used by different front-ends outside of qemu (i.e., Xen). VHOST_USER_PROTOCOL_F_SHARED_OBJECT in contrast was added to qemu's code in 16094766627, but never defined in the vhost-user specification. As a consequence, both bits were defined to be 17, which cannot work. Regardless of whether actual code or the specification should take precedence, F_XEN_MMAP is already part of a qemu release, while F_SHARED_OBJECT is not. Therefore, bump the latter to take number 18 instead of 17, and add this to the specification. Take the opportunity to add at least a little note on the VhostUserShared structure to the specification. This structure is referenced by the new commands introduced in 16094766627, but was not defined. Fixes: 160947666276c5b7f6bca4d746bcac2966635d79 ("vhost-user: add shared_object msg") Signed-off-by: Hanna Czenczek Message-Id: <20231016083201.23736-1-hreitz@redhat.com> Reviewed-by: Emmanouil Pitsidianakis Reviewed-by: Stefano Garzarella Reviewed-by: Viresh Kumar Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user.rst | 11 +++++++++++ include/hw/virtio/vhost-user.h | 3 ++- subprojects/libvhost-user/libvhost-user.h | 3 ++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 415bb47a19..768fb5c28c 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -275,6 +275,16 @@ Inflight description :queue size: a 16-bit size of virtqueues +VhostUserShared +^^^^^^^^^^^^^^^ + ++------+ +| UUID | ++------+ + +:UUID: 16 bytes UUID, whose first three components (a 32-bit value, then + two 16-bit values) are stored in big endian. + C structure ----------- @@ -885,6 +895,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS 15 #define VHOST_USER_PROTOCOL_F_STATUS 16 #define VHOST_USER_PROTOCOL_F_XEN_MMAP 17 + #define VHOST_USER_PROTOCOL_F_SHARED_OBJECT 18 Front-end message types ----------------------- diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h index 6b06ecb1bd..20b69d8e85 100644 --- a/include/hw/virtio/vhost-user.h +++ b/include/hw/virtio/vhost-user.h @@ -29,7 +29,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, VHOST_USER_PROTOCOL_F_STATUS = 16, - VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17, + /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */ + VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18, VHOST_USER_PROTOCOL_F_MAX }; diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index b36a42a7ca..c2352904f0 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -65,7 +65,8 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, /* Feature 16 is reserved for VHOST_USER_PROTOCOL_F_STATUS. */ - VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 17, + /* Feature 17 reserved for VHOST_USER_PROTOCOL_F_XEN_MMAP. */ + VHOST_USER_PROTOCOL_F_SHARED_OBJECT = 18, VHOST_USER_PROTOCOL_F_MAX }; From 5e7abc29c56f7823f9a122898685bdfcfac2e522 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 17 Oct 2023 17:26:25 +0200 Subject: [PATCH 61/62] MAINTAINERS: Add include/hw/intc/i8259.h to the PC chip section i8259.c is already listed here, so the corresponding header should be mentioned in this section, too. Signed-off-by: Thomas Huth Message-Id: <20231017152625.229022-1-thuth@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d9fe5aa367..2f435102ec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1764,6 +1764,7 @@ F: include/hw/dma/i8257.h F: include/hw/i2c/pm_smbus.h F: include/hw/input/i8042.h F: include/hw/intc/ioapic* +F: include/hw/intc/i8259.h F: include/hw/isa/i8259_internal.h F: include/hw/isa/superio.h F: include/hw/timer/hpet.h From c7016bf700cfbee52d2797bc4c592a39b17c4de7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 23 Aug 2023 13:23:25 +0100 Subject: [PATCH 62/62] intel-iommu: Report interrupt remapping faults, fix return value A generic X86IOMMUClass->int_remap function should not return VT-d specific values; fix it to return 0 if the interrupt was successfully translated or -EINVAL if not. The VTD_FR_IR_xxx values are supposed to be used to actually raise faults through the fault reporting mechanism, so do that instead for the case where the IRQ is actually being injected. There is more work to be done here, as pretranslations for the KVM IRQ routing table can't fault; an untranslatable IRQ should be handled in userspace and the fault raised only when the IRQ actually happens (if indeed the IRTE is still not valid at that time). But we can work on that later; we can at least raise faults for the direct case. Signed-off-by: David Woodhouse Message-Id: <31bbfc9041690449d3ac891f4431ec82174ee1b4.camel@infradead.org> Acked-by: Peter Xu Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 150 ++++++++++++++++++++++----------- hw/i386/intel_iommu_internal.h | 1 + 2 files changed, 103 insertions(+), 48 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 2c832ab68b..30a108a42b 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -469,21 +469,12 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) /* Must not update F field now, should be done later */ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, - uint16_t source_id, hwaddr addr, - VTDFaultReason fault, bool is_write, - bool is_pasid, uint32_t pasid) + uint64_t hi, uint64_t lo) { - uint64_t hi = 0, lo; hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); assert(index < DMAR_FRCD_REG_NR); - lo = VTD_FRCD_FI(addr); - hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) | - VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid); - if (!is_write) { - hi |= VTD_FRCD_T; - } vtd_set_quad_raw(s, frcd_reg_addr, lo); vtd_set_quad_raw(s, frcd_reg_addr + 8, hi); @@ -509,17 +500,11 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) } /* Log and report an DMAR (address translation) fault to software */ -static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, - hwaddr addr, VTDFaultReason fault, - bool is_write, bool is_pasid, - uint32_t pasid) +static void vtd_report_frcd_fault(IntelIOMMUState *s, uint64_t source_id, + uint64_t hi, uint64_t lo) { uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); - assert(fault < VTD_FR_MAX); - - trace_vtd_dmar_fault(source_id, fault, addr, is_write); - if (fsts_reg & VTD_FSTS_PFO) { error_report_once("New fault is not recorded due to " "Primary Fault Overflow"); @@ -539,8 +524,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, return; } - vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, - is_write, is_pasid, pasid); + vtd_record_frcd(s, s->next_frcd_reg, hi, lo); if (fsts_reg & VTD_FSTS_PPF) { error_report_once("There are pending faults already, " @@ -565,6 +549,40 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, } } +/* Log and report an DMAR (address translation) fault to software */ +static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, + hwaddr addr, VTDFaultReason fault, + bool is_write, bool is_pasid, + uint32_t pasid) +{ + uint64_t hi, lo; + + assert(fault < VTD_FR_MAX); + + trace_vtd_dmar_fault(source_id, fault, addr, is_write); + + lo = VTD_FRCD_FI(addr); + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) | + VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid); + if (!is_write) { + hi |= VTD_FRCD_T; + } + + vtd_report_frcd_fault(s, source_id, hi, lo); +} + + +static void vtd_report_ir_fault(IntelIOMMUState *s, uint64_t source_id, + VTDFaultReason fault, uint16_t index) +{ + uint64_t hi, lo; + + lo = VTD_FRCD_IR_IDX(index); + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); + + vtd_report_frcd_fault(s, source_id, hi, lo); +} + /* Handle Invalidation Queue Errors of queued invalidation interface error * conditions. */ @@ -3305,8 +3323,9 @@ static Property vtd_properties[] = { }; /* Read IRTE entry with specific index */ -static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, - VTD_IR_TableEntry *entry, uint16_t sid) +static bool vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, + VTD_IR_TableEntry *entry, uint16_t sid, + bool do_fault) { static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \ {0xffff, 0xfffb, 0xfff9, 0xfff8}; @@ -3317,7 +3336,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, if (index >= iommu->intr_size) { error_report_once("%s: index too large: ind=0x%x", __func__, index); - return -VTD_FR_IR_INDEX_OVER; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_INDEX_OVER, index); + } + return false; } addr = iommu->intr_root + index * sizeof(*entry); @@ -3325,7 +3347,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, entry, sizeof(*entry), MEMTXATTRS_UNSPECIFIED)) { error_report_once("%s: read failed: ind=0x%x addr=0x%" PRIx64, __func__, index, addr); - return -VTD_FR_IR_ROOT_INVAL; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ROOT_INVAL, index); + } + return false; } entry->data[0] = le64_to_cpu(entry->data[0]); @@ -3333,11 +3358,24 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, trace_vtd_ir_irte_get(index, entry->data[1], entry->data[0]); + /* + * The remaining potential fault conditions are "qualified" by the + * Fault Processing Disable bit in the IRTE. Even "not present". + * So just clear the do_fault flag if PFD is set, which will + * prevent faults being raised. + */ + if (entry->irte.fault_disable) { + do_fault = false; + } + if (!entry->irte.present) { error_report_once("%s: detected non-present IRTE " "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")", __func__, index, entry->data[1], entry->data[0]); - return -VTD_FR_IR_ENTRY_P; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_ENTRY_P, index); + } + return false; } if (entry->irte.__reserved_0 || entry->irte.__reserved_1 || @@ -3345,7 +3383,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, error_report_once("%s: detected non-zero reserved IRTE " "(index=%u, high=0x%" PRIx64 ", low=0x%" PRIx64 ")", __func__, index, entry->data[1], entry->data[0]); - return -VTD_FR_IR_IRTE_RSVD; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_IRTE_RSVD, index); + } + return false; } if (sid != X86_IOMMU_SID_INVALID) { @@ -3361,7 +3402,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, error_report_once("%s: invalid IRTE SID " "(index=%u, sid=%u, source_id=%u)", __func__, index, sid, source_id); - return -VTD_FR_IR_SID_ERR; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index); + } + return false; } break; @@ -3373,7 +3417,10 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, error_report_once("%s: invalid SVT_BUS " "(index=%u, bus=%u, min=%u, max=%u)", __func__, index, bus, bus_min, bus_max); - return -VTD_FR_IR_SID_ERR; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index); + } + return false; } break; @@ -3382,23 +3429,24 @@ static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index, "(index=%u, type=%d)", __func__, index, entry->irte.sid_vtype); /* Take this as verification failure. */ - return -VTD_FR_IR_SID_ERR; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_SID_ERR, index); + } + return false; } } - return 0; + return true; } /* Fetch IRQ information of specific IR index */ -static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, - X86IOMMUIrq *irq, uint16_t sid) +static bool vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, + X86IOMMUIrq *irq, uint16_t sid, bool do_fault) { VTD_IR_TableEntry irte = {}; - int ret = 0; - ret = vtd_irte_get(iommu, index, &irte, sid); - if (ret) { - return ret; + if (!vtd_irte_get(iommu, index, &irte, sid, do_fault)) { + return false; } irq->trigger_mode = irte.irte.trigger_mode; @@ -3417,16 +3465,15 @@ static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, trace_vtd_ir_remap(index, irq->trigger_mode, irq->vector, irq->delivery_mode, irq->dest, irq->dest_mode); - return 0; + return true; } /* Interrupt remapping for MSI/MSI-X entry */ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, MSIMessage *origin, MSIMessage *translated, - uint16_t sid) + uint16_t sid, bool do_fault) { - int ret = 0; VTD_IR_MSIAddress addr; uint16_t index; X86IOMMUIrq irq = {}; @@ -3443,14 +3490,20 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, if (origin->address & VTD_MSI_ADDR_HI_MASK) { error_report_once("%s: MSI address high 32 bits non-zero detected: " "address=0x%" PRIx64, __func__, origin->address); - return -VTD_FR_IR_REQ_RSVD; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0); + } + return -EINVAL; } addr.data = origin->address & VTD_MSI_ADDR_LO_MASK; if (addr.addr.__head != 0xfee) { error_report_once("%s: MSI address low 32 bit invalid: 0x%" PRIx32, __func__, addr.data); - return -VTD_FR_IR_REQ_RSVD; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0); + } + return -EINVAL; } /* This is compatible mode. */ @@ -3469,9 +3522,8 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE; } - ret = vtd_remap_irq_get(iommu, index, &irq, sid); - if (ret) { - return ret; + if (!vtd_remap_irq_get(iommu, index, &irq, sid, do_fault)) { + return -EINVAL; } if (addr.addr.sub_valid) { @@ -3481,7 +3533,10 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu, "(sid=%u, address=0x%" PRIx64 ", data=0x%" PRIx32 ")", __func__, sid, origin->address, origin->data); - return -VTD_FR_IR_REQ_RSVD; + if (do_fault) { + vtd_report_ir_fault(iommu, sid, VTD_FR_IR_REQ_RSVD, 0); + } + return -EINVAL; } } else { uint8_t vector = origin->data & 0xff; @@ -3521,7 +3576,7 @@ static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src, MSIMessage *dst, uint16_t sid) { return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu), - src, dst, sid); + src, dst, sid, false); } static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr, @@ -3547,9 +3602,8 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr, sid = attrs.requester_id; } - ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid); + ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid, true); if (ret) { - /* TODO: report error */ /* Drop this interrupt */ return MEMTX_ERROR; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e1450c5cfe..f8cf99bddf 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -268,6 +268,7 @@ #define VTD_FRCD_FI(val) ((val) & ~0xfffULL) #define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) #define VTD_FRCD_PP(val) (((val) & 0x1) << 31) +#define VTD_FRCD_IR_IDX(val) (((val) & 0xffffULL) << 48) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason {