From f3e5a17593b972a9a6079ccf7677b4389d74d5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 14 Jan 2022 13:08:57 +0800 Subject: [PATCH 1/8] hw/net/vmxnet3: Log guest-triggerable errors using LOG_GUEST_ERROR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Interrupt Cause" register (VMXNET3_REG_ICR) is read-only. Write accesses are ignored. Log them with as LOG_GUEST_ERROR instead of aborting: [R +0.239743] writeq 0xe0002031 0x46291a5a55460800 ERROR:hw/net/vmxnet3.c:1819:vmxnet3_io_bar1_write: code should not be reached Thread 1 "qemu-system-i38" received signal SIGABRT, Aborted. (gdb) bt #3 0x74c397d3 in __GI_abort () at abort.c:79 #4 0x76d3cd4c in g_assertion_message (domain=, file=, line=, func=, message=) at ../glib/gtestutils.c:3223 #5 0x76d9d45f in g_assertion_message_expr (domain=0x0, file=0x59fc2e53 "hw/net/vmxnet3.c", line=1819, func=0x59fc11e0 <__func__.vmxnet3_io_bar1_write> "vmxnet3_io_bar1_write", expr=) at ../glib/gtestutils.c:3249 #6 0x57e80a3a in vmxnet3_io_bar1_write (opaque=0x62814100, addr=56, val=70, size=4) at hw/net/vmxnet3.c:1819 #7 0x58c2d894 in memory_region_write_accessor (mr=0x62816b90, addr=56, value=0x7fff9450, size=4, shift=0, mask=4294967295, attrs=...) at softmmu/memory.c:492 #8 0x58c2d1d2 in access_with_adjusted_size (addr=56, value=0x7fff9450, size=1, access_size_min=4, access_size_max=4, access_fn= 0x58c2d290 , mr=0x62816b90, attrs=...) at softmmu/memory.c:554 #9 0x58c2bae7 in memory_region_dispatch_write (mr=0x62816b90, addr=56, data=70, op=MO_8, attrs=...) at softmmu/memory.c:1504 #10 0x58bfd034 in flatview_write_continue (fv=0x606000181700, addr=0xe0002038, attrs=..., ptr=0x7fffb9e0, len=1, addr1=56, l=1, mr=0x62816b90) at softmmu/physmem.c:2782 #11 0x58beba00 in flatview_write (fv=0x606000181700, addr=0xe0002031, attrs=..., buf=0x7fffb9e0, len=8) at softmmu/physmem.c:2822 #12 0x58beb589 in address_space_write (as=0x608000015f20, addr=0xe0002031, attrs=..., buf=0x7fffb9e0, len=8) at softmmu/physmem.c:2914 Reported-by: Dike Reported-by: Duhao <504224090@qq.com> BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2032932 Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- hw/net/vmxnet3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index f65af4e9ef..0b7acf7f89 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1816,7 +1816,9 @@ vmxnet3_io_bar1_write(void *opaque, case VMXNET3_REG_ICR: VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d", val, size); - g_assert_not_reached(); + qemu_log_mask(LOG_GUEST_ERROR, + "%s: write to read-only register VMXNET3_REG_ICR\n", + TYPE_VMXNET3); break; /* Event Cause Register */ From 41bcea7b2c99b8aec613beb62a6cdeb371a09449 Mon Sep 17 00:00:00 2001 From: Peter Foley Date: Fri, 14 Jan 2022 13:08:58 +0800 Subject: [PATCH 2/8] net/tap: Set return code on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match the other error handling in this function. Fixes: e7b347d0bf6 ("net: detect errors from probing vnet hdr flag for TAP devices") Reviewed-by: Patrick Venture Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Peter Foley Signed-off-by: Jason Wang --- net/tap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tap.c b/net/tap.c index f716be3e3f..c5cbeaa7a2 100644 --- a/net/tap.c +++ b/net/tap.c @@ -900,6 +900,7 @@ int net_init_tap(const Netdev *netdev, const char *name, if (i == 0) { vnet_hdr = tap_probe_vnet_hdr(fd, errp); if (vnet_hdr < 0) { + ret = -1; goto free_fail; } } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) { From e29919c93d19118610d64de9deb9c223024c0bc6 Mon Sep 17 00:00:00 2001 From: Peter Foley Date: Fri, 14 Jan 2022 13:08:59 +0800 Subject: [PATCH 3/8] net: Fix uninitialized data usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit e.g. 1109 15:16:20.151506 Uninitialized bytes in ioctl_common_pre at offset 0 inside [0x7ffc516af9b8, 4) 1109 15:16:20.151659 ==588974==WARNING: MemorySanitizer: use-of-uninitialized-value 1109 15:16:20.312923 #0 0x5639b88acb21 in tap_probe_vnet_hdr_len third_party/qemu/net/tap-linux.c:183:9 1109 15:16:20.312952 #1 0x5639b88afd66 in net_tap_fd_init third_party/qemu/net/tap.c:409:9 1109 15:16:20.312954 #2 0x5639b88b2d1b in net_init_tap_one third_party/qemu/net/tap.c:681:19 1109 15:16:20.312956 #3 0x5639b88b16a8 in net_init_tap third_party/qemu/net/tap.c:912:13 1109 15:16:20.312957 #4 0x5639b8890175 in net_client_init1 third_party/qemu/net/net.c:1110:9 1109 15:16:20.312958 #5 0x5639b888f912 in net_client_init third_party/qemu/net/net.c:1208:15 1109 15:16:20.312960 #6 0x5639b8894aa5 in net_param_nic third_party/qemu/net/net.c:1588:11 1109 15:16:20.312961 #7 0x5639b900cd18 in qemu_opts_foreach third_party/qemu/util/qemu-option.c:1135:14 1109 15:16:20.312962 #8 0x5639b889393c in net_init_clients third_party/qemu/net/net.c:1612:9 1109 15:16:20.312964 #9 0x5639b717aaf3 in qemu_create_late_backends third_party/qemu/softmmu/vl.c:1962:5 1109 15:16:20.312965 #10 0x5639b717aaf3 in qemu_init third_party/qemu/softmmu/vl.c:3694:5 1109 15:16:20.312967 #11 0x5639b71083b8 in main third_party/qemu/softmmu/main.c:49:5 1109 15:16:20.312968 #12 0x7f464de1d8d2 in __libc_start_main (/usr/grte/v5/lib64/libc.so.6+0x628d2) 1109 15:16:20.312969 #13 0x5639b6bbd389 in _start /usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120 1109 15:16:20.312970 1109 15:16:20.312975 Uninitialized value was stored to memory at 1109 15:16:20.313393 #0 0x5639b88acbee in tap_probe_vnet_hdr_len third_party/qemu/net/tap-linux.c 1109 15:16:20.313396 #1 0x5639b88afd66 in net_tap_fd_init third_party/qemu/net/tap.c:409:9 1109 15:16:20.313398 #2 0x5639b88b2d1b in net_init_tap_one third_party/qemu/net/tap.c:681:19 1109 15:16:20.313399 #3 0x5639b88b16a8 in net_init_tap third_party/qemu/net/tap.c:912:13 1109 15:16:20.313400 #4 0x5639b8890175 in net_client_init1 third_party/qemu/net/net.c:1110:9 1109 15:16:20.313401 #5 0x5639b888f912 in net_client_init third_party/qemu/net/net.c:1208:15 1109 15:16:20.313403 #6 0x5639b8894aa5 in net_param_nic third_party/qemu/net/net.c:1588:11 1109 15:16:20.313404 #7 0x5639b900cd18 in qemu_opts_foreach third_party/qemu/util/qemu-option.c:1135:14 1109 15:16:20.313405 #8 0x5639b889393c in net_init_clients third_party/qemu/net/net.c:1612:9 1109 15:16:20.313407 #9 0x5639b717aaf3 in qemu_create_late_backends third_party/qemu/softmmu/vl.c:1962:5 1109 15:16:20.313408 #10 0x5639b717aaf3 in qemu_init third_party/qemu/softmmu/vl.c:3694:5 1109 15:16:20.313409 #11 0x5639b71083b8 in main third_party/qemu/softmmu/main.c:49:5 1109 15:16:20.313410 #12 0x7f464de1d8d2 in __libc_start_main (/usr/grte/v5/lib64/libc.so.6+0x628d2) 1109 15:16:20.313412 #13 0x5639b6bbd389 in _start /usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120 1109 15:16:20.313413 1109 15:16:20.313417 Uninitialized value was stored to memory at 1109 15:16:20.313791 #0 0x5639b88affbd in net_tap_fd_init third_party/qemu/net/tap.c:400:26 1109 15:16:20.313826 #1 0x5639b88b2d1b in net_init_tap_one third_party/qemu/net/tap.c:681:19 1109 15:16:20.313829 #2 0x5639b88b16a8 in net_init_tap third_party/qemu/net/tap.c:912:13 1109 15:16:20.313831 #3 0x5639b8890175 in net_client_init1 third_party/qemu/net/net.c:1110:9 1109 15:16:20.313836 #4 0x5639b888f912 in net_client_init third_party/qemu/net/net.c:1208:15 1109 15:16:20.313838 #5 0x5639b8894aa5 in net_param_nic third_party/qemu/net/net.c:1588:11 1109 15:16:20.313839 #6 0x5639b900cd18 in qemu_opts_foreach third_party/qemu/util/qemu-option.c:1135:14 1109 15:16:20.313841 #7 0x5639b889393c in net_init_clients third_party/qemu/net/net.c:1612:9 1109 15:16:20.313843 #8 0x5639b717aaf3 in qemu_create_late_backends third_party/qemu/softmmu/vl.c:1962:5 1109 15:16:20.313844 #9 0x5639b717aaf3 in qemu_init third_party/qemu/softmmu/vl.c:3694:5 1109 15:16:20.313845 #10 0x5639b71083b8 in main third_party/qemu/softmmu/main.c:49:5 1109 15:16:20.313846 #11 0x7f464de1d8d2 in __libc_start_main (/usr/grte/v5/lib64/libc.so.6+0x628d2) 1109 15:16:20.313847 #12 0x5639b6bbd389 in _start /usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120 1109 15:16:20.313849 1109 15:16:20.313851 Uninitialized value was created by an allocation of 'ifr' in the stack frame of function 'tap_probe_vnet_hdr' 1109 15:16:20.313855 #0 0x5639b88ac680 in tap_probe_vnet_hdr third_party/qemu/net/tap-linux.c:151 1109 15:16:20.313856 1109 15:16:20.313878 SUMMARY: MemorySanitizer: use-of-uninitialized-value third_party/qemu/net/tap-linux.c:183:9 in tap_probe_vnet_hdr_len Fixes: dc69004c7d8 ("net: move tap_probe_vnet_hdr() to tap-linux.c") Reviewed-by: Hao Wu Reviewed-by: Patrick Venture Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Peter Foley Signed-off-by: Jason Wang --- net/tap-linux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tap-linux.c b/net/tap-linux.c index 9584769740..5e70b93037 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -150,6 +150,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp) int tap_probe_vnet_hdr(int fd, Error **errp) { struct ifreq ifr; + memset(&ifr, 0, sizeof(ifr)); if (ioctl(fd, TUNGETIFF, &ifr) != 0) { /* TUNGETIFF is available since kernel v2.6.27 */ From a5f038e2c5e262ded63869a9e4bdf4951821e480 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Fri, 14 Jan 2022 13:09:00 +0800 Subject: [PATCH 4/8] net/colo-compare.c: Optimize compare order for performance COLO-compare use the glib function g_queue_find_custom to dump another VM's networking packet to compare. But this function always start find from the queue->head(here is the newest packet), It will reduce the success rate of comparison. So this patch reversed the order of the queues for performance. Signed-off-by: Zhang Chen Reported-by: leirao Signed-off-by: Jason Wang --- net/colo-compare.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index b966e7e514..216de5a12b 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -197,7 +197,7 @@ static void colo_compare_inconsistency_notify(CompareState *s) /* Use restricted to colo_insert_packet() */ static gint seq_sorter(Packet *a, Packet *b, gpointer data) { - return a->tcp_seq - b->tcp_seq; + return b->tcp_seq - a->tcp_seq; } static void fill_pkt_tcp_info(void *data, uint32_t *max_ack) @@ -421,13 +421,13 @@ pri: if (g_queue_is_empty(&conn->primary_list)) { return; } - ppkt = g_queue_pop_head(&conn->primary_list); + ppkt = g_queue_pop_tail(&conn->primary_list); sec: if (g_queue_is_empty(&conn->secondary_list)) { - g_queue_push_head(&conn->primary_list, ppkt); + g_queue_push_tail(&conn->primary_list, ppkt); return; } - spkt = g_queue_pop_head(&conn->secondary_list); + spkt = g_queue_pop_tail(&conn->secondary_list); if (ppkt->tcp_seq == ppkt->seq_end) { colo_release_primary_pkt(s, ppkt); @@ -458,7 +458,7 @@ sec: } } if (!ppkt) { - g_queue_push_head(&conn->secondary_list, spkt); + g_queue_push_tail(&conn->secondary_list, spkt); goto pri; } } @@ -477,7 +477,7 @@ sec: if (mark == COLO_COMPARE_FREE_PRIMARY) { conn->compare_seq = ppkt->seq_end; colo_release_primary_pkt(s, ppkt); - g_queue_push_head(&conn->secondary_list, spkt); + g_queue_push_tail(&conn->secondary_list, spkt); goto pri; } else if (mark == COLO_COMPARE_FREE_SECONDARY) { conn->compare_seq = spkt->seq_end; @@ -490,8 +490,8 @@ sec: goto pri; } } else { - g_queue_push_head(&conn->primary_list, ppkt); - g_queue_push_head(&conn->secondary_list, spkt); + g_queue_push_tail(&conn->primary_list, ppkt); + g_queue_push_tail(&conn->secondary_list, spkt); #ifdef DEBUG_COLO_PACKETS qemu_hexdump(stderr, "colo-compare ppkt", ppkt->data, ppkt->size); @@ -673,7 +673,7 @@ static void colo_compare_packet(CompareState *s, Connection *conn, while (!g_queue_is_empty(&conn->primary_list) && !g_queue_is_empty(&conn->secondary_list)) { - pkt = g_queue_pop_head(&conn->primary_list); + pkt = g_queue_pop_tail(&conn->primary_list); result = g_queue_find_custom(&conn->secondary_list, pkt, (GCompareFunc)HandlePacket); @@ -689,7 +689,7 @@ static void colo_compare_packet(CompareState *s, Connection *conn, * timeout, it will trigger a checkpoint request. */ trace_colo_compare_main("packet different"); - g_queue_push_head(&conn->primary_list, pkt); + g_queue_push_tail(&conn->primary_list, pkt); colo_compare_inconsistency_notify(s); break; @@ -819,7 +819,7 @@ static int compare_chr_send(CompareState *s, entry->buf = g_malloc(size); memcpy(entry->buf, buf, size); } - g_queue_push_head(&sendco->send_list, entry); + g_queue_push_tail(&sendco->send_list, entry); if (sendco->done) { sendco->co = qemu_coroutine_create(_compare_chr_send, sendco); @@ -1347,7 +1347,7 @@ static void colo_flush_packets(void *opaque, void *user_data) Packet *pkt = NULL; while (!g_queue_is_empty(&conn->primary_list)) { - pkt = g_queue_pop_head(&conn->primary_list); + pkt = g_queue_pop_tail(&conn->primary_list); compare_chr_send(s, pkt->data, pkt->size, @@ -1357,7 +1357,7 @@ static void colo_flush_packets(void *opaque, void *user_data) packet_destroy_partial(pkt, NULL); } while (!g_queue_is_empty(&conn->secondary_list)) { - pkt = g_queue_pop_head(&conn->secondary_list); + pkt = g_queue_pop_tail(&conn->secondary_list); packet_destroy(pkt, NULL); } } From 09313cdb44b2ccec218bc85f39073954f91ee9ea Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Fri, 14 Jan 2022 13:09:01 +0800 Subject: [PATCH 5/8] net/colo-compare.c: Update the default value comments Make the comments consistent with the REGULAR_PACKET_CHECK_MS. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index 216de5a12b..62554b5b3c 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -1267,7 +1267,7 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) } if (!s->expired_scan_cycle) { - /* Set default value to 3000 MS */ + /* Set default value to 1000 MS */ s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS; } From 611382968069f54914e3cfff30f2a3b92c6219cd Mon Sep 17 00:00:00 2001 From: Rao Lei Date: Fri, 14 Jan 2022 13:09:02 +0800 Subject: [PATCH 6/8] net/filter: Optimize filter_send to coroutine This patch is to improve the logic of QEMU main thread sleep code in qemu_chr_write_buffer() where it can be blocked and can't run other coroutines during COLO IO stress test. Our approach is to put filter_send() in a coroutine. In this way, filter_send() will call qemu_coroutine_yield() in qemu_co_sleep_ns(), so that it can be scheduled out and QEMU main thread has opportunity to run other tasks. Signed-off-by: Lei Rao Signed-off-by: Zhang Chen Reviewed-by: Li Zhijian Reviewed-by: Zhang Chen Signed-off-by: Jason Wang --- net/filter-mirror.c | 66 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/net/filter-mirror.c b/net/filter-mirror.c index f20240cc9f..34a63b5dbb 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -20,6 +20,7 @@ #include "chardev/char-fe.h" #include "qemu/iov.h" #include "qemu/sockets.h" +#include "block/aio-wait.h" #define TYPE_FILTER_MIRROR "filter-mirror" typedef struct MirrorState MirrorState; @@ -42,20 +43,21 @@ struct MirrorState { bool vnet_hdr; }; -static int filter_send(MirrorState *s, - const struct iovec *iov, - int iovcnt) +typedef struct FilterSendCo { + MirrorState *s; + char *buf; + ssize_t size; + bool done; + int ret; +} FilterSendCo; + +static int _filter_send(MirrorState *s, + char *buf, + ssize_t size) { NetFilterState *nf = NETFILTER(s); int ret = 0; - ssize_t size = 0; uint32_t len = 0; - char *buf; - - size = iov_size(iov, iovcnt); - if (!size) { - return 0; - } len = htonl(size); ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); @@ -80,10 +82,7 @@ static int filter_send(MirrorState *s, } } - buf = g_malloc(size); - iov_to_buf(iov, iovcnt, 0, buf, size); ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); - g_free(buf); if (ret != size) { goto err; } @@ -94,6 +93,47 @@ err: return ret < 0 ? ret : -EIO; } +static void coroutine_fn filter_send_co(void *opaque) +{ + FilterSendCo *data = opaque; + + data->ret = _filter_send(data->s, data->buf, data->size); + data->done = true; + g_free(data->buf); + aio_wait_kick(); +} + +static int filter_send(MirrorState *s, + const struct iovec *iov, + int iovcnt) +{ + ssize_t size = iov_size(iov, iovcnt); + char *buf = NULL; + + if (!size) { + return 0; + } + + buf = g_malloc(size); + iov_to_buf(iov, iovcnt, 0, buf, size); + + FilterSendCo data = { + .s = s, + .size = size, + .buf = buf, + .ret = 0, + }; + + Coroutine *co = qemu_coroutine_create(filter_send_co, &data); + qemu_coroutine_enter(co); + + while (!data.done) { + aio_poll(qemu_get_aio_context(), true); + } + + return data.ret; +} + static void redirector_to_filter(NetFilterState *nf, const uint8_t *buf, int len) From 870374214e4cc122f086f55732f1b17ec320132e Mon Sep 17 00:00:00 2001 From: Nick Hudson Date: Sat, 12 Feb 2022 09:44:17 +0000 Subject: [PATCH 7/8] hw/net: e1000e: Clear ICR on read when using non MSI-X interrupts In section 7.4.3 of the 82574 datasheet it states that "In systems that do not support MSI-X, reading the ICR register clears it's bits..." Some OSes rely on this. Signed-off-by: Nick Hudson Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 5 +++++ hw/net/trace-events | 1 + 2 files changed, 6 insertions(+) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index 8ae6fb7e14..2c51089a82 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -2607,6 +2607,11 @@ e1000e_mac_icr_read(E1000ECore *core, int index) core->mac[ICR] = 0; } + if (!msix_enabled(core->owner)) { + trace_e1000e_irq_icr_clear_nonmsix_icr_read(); + core->mac[ICR] = 0; + } + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { trace_e1000e_irq_icr_clear_iame(); diff --git a/hw/net/trace-events b/hw/net/trace-events index 643338f610..4c0ec3fda1 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -221,6 +221,7 @@ e1000e_irq_write_ics(uint32_t val) "Adding ICR bits 0x%x" e1000e_irq_icr_process_iame(void) "Clearing IMS bits due to IAME" e1000e_irq_read_ics(uint32_t ics) "Current ICS: 0x%x" e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x" +e1000e_irq_icr_clear_nonmsix_icr_read(void) "Clearing ICR on read due to non MSI-X int" e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x" e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x" e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS" From 9d6267b240c114d1a3cd314a08fd6e1339d34b83 Mon Sep 17 00:00:00 2001 From: Thomas Jansen Date: Sat, 12 Feb 2022 19:56:41 +0100 Subject: [PATCH 8/8] net/eth: Don't consider ESP to be an IPv6 option header The IPv6 option headers all have in common that they start with some common fields, in particular the type of the next header followed by the extention header length. This is used to traverse the list of the options. The ESP header does not follow that format, which can break the IPv6 option header traversal code in eth_parse_ipv6_hdr(). The effect of that is that network interfaces such as vmxnet3 that use the following call chain eth_is_ip6_extension_header_type eth_parse_ipv6_hdr net_tx_pkt_parse_headers net_tx_pkt_parse vmxnet3_process_tx_queue to send packets from the VM out to the host will drop packets of the following structure: Ethernet-Header(IPv6-Header(ESP(encrypted data))) Note that not all types of network interfaces use the net_tx_pkt_parse function though, leading to inconsistent behavior regarding sending those packets. The e1000 network interface for example does not suffer from this limitation. By not considering ESP to be an IPv6 header we can allow sending those packets out to the host on all types of network interfaces. Fixes: 75020a702151 ("Common definitions for VMWARE devices") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/149 Buglink: https://bugs.launchpad.net/qemu/+bug/1758091 Signed-off-by: Thomas Jansen Signed-off-by: Jason Wang --- net/eth.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/eth.c b/net/eth.c index fe876d1a55..f074b2f9f3 100644 --- a/net/eth.c +++ b/net/eth.c @@ -389,7 +389,6 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type) case IP6_HOP_BY_HOP: case IP6_ROUTING: case IP6_FRAGMENT: - case IP6_ESP: case IP6_AUTHENTICATION: case IP6_DESTINATON: case IP6_MOBILITY: