From 4271f4038372f174dbafffacca1a748d058a03ba Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Mon, 5 Jun 2023 16:21:25 +0200
Subject: [PATCH 01/15] virtio-net: correctly report maximum tx_queue_size
 value

Maximum value for tx_queue_size depends on the backend type.
1024 for vDPA/vhost-user, 256 for all the others.

The value is returned by virtio_net_max_tx_queue_size() to set the
parameter:

    n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
                                    n->net_conf.tx_queue_size);

But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024).

So the parameter is silently ignored and ethtool reports a different
value than the one provided by the user.

   ... -netdev tap,... -device virtio-net,tx_queue_size=1024

    # ethtool -g enp0s2
    Ring parameters for enp0s2:
    Pre-set maximums:
    RX:		256
    RX Mini:	n/a
    RX Jumbo:	n/a
    TX:		256
    Current hardware settings:
    RX:		256
    RX Mini:	n/a
    RX Jumbo:	n/a
    TX:		256

   ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048

    Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024

With this patch the correct maximum value is checked and displayed.

For vDPA/vhost-user:

    Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024

For all the others:

    Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256

Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user")
Cc: mst@redhat.com
Cc: qemu-stable@nongnu.org
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/virtio-net.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index aa421a908a..04783f5b94 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3630,12 +3630,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     }
 
     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
-        n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
+        n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
         !is_power_of_2(n->net_conf.tx_queue_size)) {
         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
                    "must be a power of 2 between %d and %d",
                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
-                   VIRTQUEUE_MAX_SIZE);
+                   virtio_net_max_tx_queue_size(n));
         virtio_cleanup(vdev);
         return;
     }

From 140eae9c8f760e9260356fe9b56b802a02f0a9d2 Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:13 +0800
Subject: [PATCH 02/15] hw/net: e1000: Remove the logic of padding short frames
 in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

This actually reverts commit 78aeb23eded2d0b765bf9145c71f80025b568acd.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/e1000.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index aae5f0bdc0..093c2d4531 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -888,7 +888,6 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
     uint16_t vlan_special = 0;
     uint8_t vlan_status = 0;
     uint8_t min_buf[ETH_ZLEN];
-    struct iovec min_iov;
     uint8_t *filter_buf = iov->iov_base;
     size_t size = iov_size(iov, iovcnt);
     size_t iov_ofs = 0;
@@ -905,15 +904,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
         return 0;
     }
 
-    /* Pad to minimum Ethernet frame length */
-    if (size < sizeof(min_buf)) {
-        iov_to_buf(iov, iovcnt, 0, min_buf, size);
-        memset(&min_buf[size], 0, sizeof(min_buf) - size);
-        min_iov.iov_base = filter_buf = min_buf;
-        min_iov.iov_len = size = sizeof(min_buf);
-        iovcnt = 1;
-        iov = &min_iov;
-    } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
+    if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
         /* This is very unlikely, but may happen. */
         iov_to_buf(iov, iovcnt, 0, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
         filter_buf = min_buf;

From c445f200ad241b443aa7a61a5381b26f56a18f0e Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:14 +0800
Subject: [PATCH 03/15] hw/net: vmxnet3: Remove the logic of padding short
 frames in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

This actually reverts commit 40a87c6c9b11ef9c14e0301f76abf0eb2582f08e.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/vmxnet3.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 18b9edfdb2..5dfacb1098 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -40,7 +40,6 @@
 
 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
 #define VMXNET3_MSIX_BAR_SIZE 0x2000
-#define MIN_BUF_SIZE 60
 
 /* Compatibility flags for migration */
 #define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
@@ -1977,7 +1976,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     VMXNET3State *s = qemu_get_nic_opaque(nc);
     size_t bytes_indicated;
-    uint8_t min_buf[MIN_BUF_SIZE];
 
     if (!vmxnet3_can_receive(nc)) {
         VMW_PKPRN("Cannot receive now");
@@ -1990,14 +1988,6 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
         size -= sizeof(struct virtio_net_hdr);
     }
 
-    /* Pad to minimum Ethernet frame length */
-    if (size < sizeof(min_buf)) {
-        memcpy(min_buf, buf, size);
-        memset(&min_buf[size], 0, sizeof(min_buf) - size);
-        buf = min_buf;
-        size = sizeof(min_buf);
-    }
-
     net_rx_pkt_set_packet_type(s->rx_pkt,
         get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
 

From c58da33f2f8410b6f22cd1d33377dadf3a4d8867 Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:15 +0800
Subject: [PATCH 04/15] hw/net: i82596: Remove the logic of padding short
 frames in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/i82596.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/hw/net/i82596.c b/hw/net/i82596.c
index ec21e2699a..ab26f8bea1 100644
--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@@ -72,10 +72,6 @@ enum commands {
 #define I596_EOF        0x8000
 #define SIZE_MASK       0x3fff
 
-#define ETHER_TYPE_LEN 2
-#define VLAN_TCI_LEN 2
-#define VLAN_HLEN (ETHER_TYPE_LEN + VLAN_TCI_LEN)
-
 /* various flags in the chip config registers */
 #define I596_PREFETCH   (s->config[0] & 0x80)
 #define I596_PROMISC    (s->config[8] & 0x01)
@@ -488,8 +484,6 @@ bool i82596_can_receive(NetClientState *nc)
     return true;
 }
 
-#define MIN_BUF_SIZE 60
-
 ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
 {
     I82596State *s = qemu_get_nic_opaque(nc);
@@ -500,7 +494,6 @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
     size_t bufsz = sz; /* length of data in buf */
     uint32_t crc;
     uint8_t *crc_ptr;
-    uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN];
     static const uint8_t broadcast_macaddr[6] = {
                 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -583,17 +576,6 @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
         }
     }
 
-    /* if too small buffer, then expand it */
-    if (len < MIN_BUF_SIZE + VLAN_HLEN) {
-        memcpy(buf1, buf, len);
-        memset(buf1 + len, 0, MIN_BUF_SIZE + VLAN_HLEN - len);
-        buf = buf1;
-        if (len < MIN_BUF_SIZE) {
-            len = MIN_BUF_SIZE;
-        }
-        bufsz = len;
-    }
-
     /* Calculate the ethernet checksum (4 bytes) */
     len += 4;
     crc = cpu_to_be32(crc32(~0, buf, sz));

From 05db4476c5d25e437d807175de9f862bf5bf732c Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:16 +0800
Subject: [PATCH 05/15] hw/net: ne2000: Remove the logic of padding short
 frames in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/ne2000.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index 3f31d04efb..d79c884d50 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -167,15 +167,12 @@ static int ne2000_buffer_full(NE2000State *s)
     return 0;
 }
 
-#define MIN_BUF_SIZE 60
-
 ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 {
     NE2000State *s = qemu_get_nic_opaque(nc);
     size_t size = size_;
     uint8_t *p;
     unsigned int total_len, next, avail, len, index, mcast_idx;
-    uint8_t buf1[60];
     static const uint8_t broadcast_macaddr[6] =
         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -213,15 +210,6 @@ ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
         }
     }
 
-
-    /* if too small buffer, then expand it */
-    if (size < MIN_BUF_SIZE) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
-        buf = buf1;
-        size = MIN_BUF_SIZE;
-    }
-
     index = s->curpag << 8;
     if (index >= NE2000_PMEM_END) {
         index = s->start;

From 6d0d261dbfa6122e9b3bdcab7d934ca49f069c21 Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:17 +0800
Subject: [PATCH 06/15] hw/net: pcnet: Remove the logic of padding short frames
 in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/pcnet.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
index d456094575..02828ae716 100644
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@@ -987,7 +987,6 @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 {
     PCNetState *s = qemu_get_nic_opaque(nc);
     int is_padr = 0, is_bcast = 0, is_ladr = 0;
-    uint8_t buf1[60];
     int remaining;
     int crc_err = 0;
     size_t size = size_;
@@ -1000,14 +999,6 @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
     printf("pcnet_receive size=%zu\n", size);
 #endif
 
-    /* if too small buffer, then expand it */
-    if (size < MIN_BUF_SIZE) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
-        buf = buf1;
-        size = MIN_BUF_SIZE;
-    }
-
     if (CSR_PROM(s)
         || (is_padr=padr_match(s, buf, size))
         || (is_bcast=padr_bcast(s, buf, size))

From 63b901bfd30a0975bc326ba8527880fabac2e66d Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:18 +0800
Subject: [PATCH 07/15] hw/net: rtl8139: Remove the logic of padding short
 frames in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/rtl8139.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 5f1a4d359b..b4df75b2c9 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -826,7 +826,6 @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
 
     uint32_t packet_header = 0;
 
-    uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN];
     static const uint8_t broadcast_macaddr[6] =
         { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -938,17 +937,6 @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
         }
     }
 
-    /* if too small buffer, then expand it
-     * Include some tailroom in case a vlan tag is later removed. */
-    if (size < MIN_BUF_SIZE + VLAN_HLEN) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE + VLAN_HLEN - size);
-        buf = buf1;
-        if (size < MIN_BUF_SIZE) {
-            size = MIN_BUF_SIZE;
-        }
-    }
-
     if (rtl8139_cp_receiver_enabled(s))
     {
         if (!rtl8139_cp_rx_valid(s)) {

From aee87b43fe2206acb8f5e334b42790df33a1cbad Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:19 +0800
Subject: [PATCH 08/15] hw/net: sungem: Remove the logic of padding short
 frames in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/sungem.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/hw/net/sungem.c b/hw/net/sungem.c
index eb01520790..103376c133 100644
--- a/hw/net/sungem.c
+++ b/hw/net/sungem.c
@@ -550,7 +550,6 @@ static ssize_t sungem_receive(NetClientState *nc, const uint8_t *buf,
     PCIDevice *d = PCI_DEVICE(s);
     uint32_t mac_crc, done, kick, max_fsize;
     uint32_t fcs_size, ints, rxdma_cfg, rxmac_cfg, csum, coff;
-    uint8_t smallbuf[60];
     struct gem_rxd desc;
     uint64_t dbase, baddr;
     unsigned int rx_cond;
@@ -584,19 +583,6 @@ static ssize_t sungem_receive(NetClientState *nc, const uint8_t *buf,
         return size;
     }
 
-    /* We don't drop too small frames since we get them in qemu, we pad
-     * them instead. We should probably use the min frame size register
-     * but I don't want to use a variable size staging buffer and I
-     * know both MacOS and Linux use the default 64 anyway. We use 60
-     * here to account for the non-existent FCS.
-     */
-    if (size < 60) {
-        memcpy(smallbuf, buf, size);
-        memset(&smallbuf[size], 0, 60 - size);
-        buf = smallbuf;
-        size = 60;
-    }
-
     /* Get MAC crc */
     mac_crc = net_crc32_le(buf, ETH_ALEN);
 

From 0fe0efc9cd594eb0ce36cc7722f0ce1d038df8d7 Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:20 +0800
Subject: [PATCH 09/15] hw/net: sunhme: Remove the logic of padding short
 frames in the receive path

Now that we have implemented unified short frames padding in the
QEMU networking codes, remove the same logic in the NIC codes.

Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/sunhme.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c
index 1f3d8011ae..391d26fb82 100644
--- a/hw/net/sunhme.c
+++ b/hw/net/sunhme.c
@@ -714,8 +714,6 @@ static inline void sunhme_set_rx_ring_nr(SunHMEState *s, int i)
     s->erxregs[HME_ERXI_RING >> 2] = ring;
 }
 
-#define MIN_BUF_SIZE 60
-
 static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf,
                               size_t size)
 {
@@ -724,7 +722,6 @@ static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf,
     dma_addr_t rb, addr;
     uint32_t intstatus, status, buffer, buffersize, sum;
     uint16_t csum;
-    uint8_t buf1[60];
     int nr, cr, len, rxoffset, csum_offset;
 
     trace_sunhme_rx_incoming(size);
@@ -775,14 +772,6 @@ static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf,
 
     trace_sunhme_rx_filter_accept();
 
-    /* If too small buffer, then expand it */
-    if (size < MIN_BUF_SIZE) {
-        memcpy(buf1, buf, size);
-        memset(buf1 + size, 0, MIN_BUF_SIZE - size);
-        buf = buf1;
-        size = MIN_BUF_SIZE;
-    }
-
     rb = s->erxregs[HME_ERXI_RING >> 2] & HME_ERXI_RING_ADDR;
     nr = sunhme_get_rx_ring_count(s);
     cr = sunhme_get_rx_ring_nr(s);

From 71e11da1facdf08c91b3763bd9a72c2121f2bbb6 Mon Sep 17 00:00:00 2001
From: Bin Meng <bmeng@tinylab.org>
Date: Sun, 25 Jun 2023 09:53:21 +0800
Subject: [PATCH 10/15] hw/net: ftgmac100: Drop the small packet check in the
 receive path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we have implemented unified short frames padding in the
QEMU networking codes, the small packet check logic in the receive
path is no longer needed.

Suggested-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Bin Meng <bmeng@tinylab.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/ftgmac100.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index d3bf14be53..702b001be2 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -968,14 +968,6 @@ static ssize_t ftgmac100_receive(NetClientState *nc, const uint8_t *buf,
         return -1;
     }
 
-    /* TODO : Pad to minimum Ethernet frame length */
-    /* handle small packets.  */
-    if (size < 10) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: dropped frame of %zd bytes\n",
-                      __func__, size);
-        return size;
-    }
-
     if (!ftgmac100_filter(s, buf, size)) {
         return size;
     }

From 006c3fa74c3edb978ff46d2851699e9a95609da5 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Fri, 9 Jun 2023 09:27:46 +0200
Subject: [PATCH 11/15] net: socket: prepare to cleanup net_init_socket()

Use directly net_socket_fd_init_stream() and net_socket_fd_init_dgram()
when the socket type is already known.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/socket.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index ba6e5b0b00..24dcaa55bc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -587,7 +587,7 @@ static int net_socket_connect_init(NetClientState *peer,
             break;
         }
     }
-    s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp);
+    s = net_socket_fd_init_stream(peer, model, name, fd, connected);
     if (!s) {
         return -1;
     }
@@ -629,7 +629,7 @@ static int net_socket_mcast_init(NetClientState *peer,
         return -1;
     }
 
-    s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp);
+    s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp);
     if (!s) {
         return -1;
     }
@@ -683,7 +683,7 @@ static int net_socket_udp_init(NetClientState *peer,
     }
     qemu_socket_set_nonblock(fd);
 
-    s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp);
+    s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp);
     if (!s) {
         return -1;
     }

From 23455ae341656ca867ee4a171826b9d280d6acb5 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Fri, 9 Jun 2023 09:27:47 +0200
Subject: [PATCH 12/15] net: socket: move fd type checking to its own function

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/socket.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 24dcaa55bc..6b1f0fec3a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -446,16 +446,32 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
     return s;
 }
 
+static int net_socket_fd_check(int fd, Error **errp)
+{
+    int so_type, optlen = sizeof(so_type);
+
+    if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type,
+        (socklen_t *)&optlen) < 0) {
+        error_setg(errp, "can't get socket option SO_TYPE");
+        return -1;
+    }
+    if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) {
+        error_setg(errp, "socket type=%d for fd=%d must be either"
+                   " SOCK_DGRAM or SOCK_STREAM", so_type, fd);
+        return -1;
+    }
+    return so_type;
+}
+
 static NetSocketState *net_socket_fd_init(NetClientState *peer,
                                           const char *model, const char *name,
                                           int fd, int is_connected,
                                           const char *mc, Error **errp)
 {
-    int so_type = -1, optlen=sizeof(so_type);
+    int so_type;
 
-    if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type,
-        (socklen_t *)&optlen)< 0) {
-        error_setg(errp, "can't get socket option SO_TYPE");
+    so_type = net_socket_fd_check(fd, errp);
+    if (so_type < 0) {
         close(fd);
         return NULL;
     }
@@ -465,10 +481,6 @@ static NetSocketState *net_socket_fd_init(NetClientState *peer,
                                         mc, errp);
     case SOCK_STREAM:
         return net_socket_fd_init_stream(peer, model, name, fd, is_connected);
-    default:
-        error_setg(errp, "socket type=%d for fd=%d must be either"
-                   " SOCK_DGRAM or SOCK_STREAM", so_type, fd);
-        close(fd);
     }
     return NULL;
 }

From b6aeee02980e193f744f74c48fd900940feb2799 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Fri, 9 Jun 2023 09:27:48 +0200
Subject: [PATCH 13/15] net: socket: remove net_init_socket()

Move the file descriptor type checking before doing anything with it.
If it's not usable, don't close it as it could be in use by another
part of QEMU, only fail and report an error.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/socket.c | 43 +++++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 6b1f0fec3a..8e3702e1f3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -463,28 +463,6 @@ static int net_socket_fd_check(int fd, Error **errp)
     return so_type;
 }
 
-static NetSocketState *net_socket_fd_init(NetClientState *peer,
-                                          const char *model, const char *name,
-                                          int fd, int is_connected,
-                                          const char *mc, Error **errp)
-{
-    int so_type;
-
-    so_type = net_socket_fd_check(fd, errp);
-    if (so_type < 0) {
-        close(fd);
-        return NULL;
-    }
-    switch(so_type) {
-    case SOCK_DGRAM:
-        return net_socket_fd_init_dgram(peer, model, name, fd, is_connected,
-                                        mc, errp);
-    case SOCK_STREAM:
-        return net_socket_fd_init_stream(peer, model, name, fd, is_connected);
-    }
-    return NULL;
-}
-
 static void net_socket_accept(void *opaque)
 {
     NetSocketState *s = opaque;
@@ -728,21 +706,34 @@ int net_init_socket(const Netdev *netdev, const char *name,
     }
 
     if (sock->fd) {
-        int fd, ret;
+        int fd, ret, so_type;
 
         fd = monitor_fd_param(monitor_cur(), sock->fd, errp);
         if (fd == -1) {
             return -1;
         }
+        so_type = net_socket_fd_check(fd, errp);
+        if (so_type < 0) {
+            return -1;
+        }
         ret = qemu_socket_try_set_nonblock(fd);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
                              name, fd);
             return -1;
         }
-        if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast,
-                                errp)) {
-            return -1;
+        switch (so_type) {
+        case SOCK_DGRAM:
+            if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1,
+                                          sock->mcast, errp)) {
+                return -1;
+            }
+            break;
+        case SOCK_STREAM:
+            if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) {
+                return -1;
+            }
+            break;
         }
         return 0;
     }

From e414270000e9f7fe2a56d314ab85259aeaf1bd91 Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Fri, 2 Jun 2023 16:25:16 +0900
Subject: [PATCH 14/15] e1000e: Add ICR clearing by corresponding IMS bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The datasheet does not say what happens when interrupt was asserted
(ICR.INT_ASSERT=1) and auto mask is *not* active.
However, section of 13.3.27 the PCIe* GbE Controllers Open Source
Software Developer’s Manual, which were written for older devices,
namely 631xESB/632xESB, 82563EB/82564EB, 82571EB/82572EI &
82573E/82573V/82573L, does say:
> If IMS = 0b, then the ICR register is always clear-on-read. If IMS is
> not 0b, but some ICR bit is set where the corresponding IMS bit is not
> set, then a read does not clear the ICR register. For example, if
> IMS = 10101010b and ICR = 01010101b, then a read to the ICR register
> does not clear it. If IMS = 10101010b and ICR = 0101011b, then a read
> to the ICR register clears it entirely (ICR.INT_ASSERTED = 1b).

Linux does no longer activate auto mask since commit
0a8047ac68e50e4ccbadcfc6b6b070805b976885 and the real hardware clears
ICR even in such a case so we also should do so.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1707441
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/e1000e_core.c | 38 ++++++++++++++++++++++++++++++++------
 hw/net/trace-events  |  1 +
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 9f185d099c..f8aeafa16b 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -2604,12 +2604,38 @@ e1000e_mac_icr_read(E1000ECore *core, int index)
         e1000e_lower_interrupts(core, ICR, 0xffffffff);
     }
 
-    if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
-        (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
-        trace_e1000e_irq_icr_clear_iame();
-        e1000e_lower_interrupts(core, ICR, 0xffffffff);
-        trace_e1000e_irq_icr_process_iame();
-        e1000e_lower_interrupts(core, IMS, core->mac[IAM]);
+    if (core->mac[ICR] & E1000_ICR_ASSERTED) {
+        if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME) {
+            trace_e1000e_irq_icr_clear_iame();
+            e1000e_lower_interrupts(core, ICR, 0xffffffff);
+            trace_e1000e_irq_icr_process_iame();
+            e1000e_lower_interrupts(core, IMS, core->mac[IAM]);
+        }
+
+        /*
+         * The datasheet does not say what happens when interrupt was asserted
+         * (ICR.INT_ASSERT=1) and auto mask is *not* active.
+         * However, section of 13.3.27 the PCIe* GbE Controllers Open Source
+         * Software Developer’s Manual, which were written for older devices,
+         * namely 631xESB/632xESB, 82563EB/82564EB, 82571EB/82572EI &
+         * 82573E/82573V/82573L, does say:
+         * > If IMS = 0b, then the ICR register is always clear-on-read. If IMS
+         * > is not 0b, but some ICR bit is set where the corresponding IMS bit
+         * > is not set, then a read does not clear the ICR register. For
+         * > example, if IMS = 10101010b and ICR = 01010101b, then a read to the
+         * > ICR register does not clear it. If IMS = 10101010b and
+         * > ICR = 0101011b, then a read to the ICR register clears it entirely
+         * > (ICR.INT_ASSERTED = 1b).
+         *
+         * Linux does no longer activate auto mask since commit
+         * 0a8047ac68e50e4ccbadcfc6b6b070805b976885 and the real hardware
+         * clears ICR even in such a case so we also should do so.
+         */
+        if (core->mac[ICR] & core->mac[IMS]) {
+            trace_e1000e_irq_icr_clear_icr_bit_ims(core->mac[ICR],
+                                                   core->mac[IMS]);
+            e1000e_lower_interrupts(core, ICR, 0xffffffff);
+        }
     }
 
     return ret;
diff --git a/hw/net/trace-events b/hw/net/trace-events
index e4a98b2c7d..3eeacc530c 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -217,6 +217,7 @@ e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x"
 e1000e_irq_icr_clear_nonmsix_icr_read(void) "Clearing ICR on read due to non MSI-X int"
 e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
 e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
+e1000e_irq_icr_clear_icr_bit_ims(uint32_t icr, uint32_t ims) "Clearing ICR on read due corresponding IMS bit: 0x%x & 0x%x"
 e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
 e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X"
 e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"

From da9f7f7769e8e65f6423095e978f9a375e33515c Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Mon, 29 May 2023 11:37:04 +0900
Subject: [PATCH 15/15] igb: Remove obsolete workaround for Windows

I confirmed it works with Windows even without this workaround. It is
likely to be a mistake so remove it.

Fixes: 3a977deebe ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/igb_core.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index d00b1caa6a..8b6b75c522 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -2678,12 +2678,7 @@ static uint32_t igb_get_status(IGBCore *core, int index)
         res |= E1000_STATUS_IOV_MODE;
     }
 
-    /*
-     * Windows driver 12.18.9.23 resets if E1000_STATUS_GIO_MASTER_ENABLE is
-     * left set after E1000_CTRL_LRST is set.
-     */
-    if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE) &&
-        !(core->mac[CTRL] & E1000_CTRL_LRST)) {
+    if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) {
         res |= E1000_STATUS_GIO_MASTER_ENABLE;
     }