From fdec16e3c2a614e2861f3086b05d444b5d8c3406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Mon, 11 Mar 2019 18:25:05 +0100 Subject: [PATCH 1/4] net/socket: learn to talk with a unix dgram socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -net socket has a fd argument, and may be passed pre-opened sockets. TCP sockets use framing. UDP sockets have datagram boundaries. When given a unix dgram socket, it will be able to read from it, but will attempt to send on the dgram_dst, which is unset. The other end will not receive the data. Let's teach -net socket to recognize a UNIX DGRAM socket, and use the regular send() command (without dgram_dst). This makes running slirp out-of-process possible that way (python pseudo-code): a, b = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM) subprocess.Popen('qemu -net socket,fd=%d -net user' % a.fileno(), shell=True) subprocess.Popen('qemu ... -net nic -net socket,fd=%d' % b.fileno(), shell=True) Signed-off-by: Marc-André Lureau Signed-off-by: Jason Wang --- net/socket.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index 90ef3517be..c92354049b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -119,9 +119,13 @@ static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, ssize_t ret; do { - ret = qemu_sendto(s->fd, buf, size, 0, - (struct sockaddr *)&s->dgram_dst, - sizeof(s->dgram_dst)); + if (s->dgram_dst.sin_family != AF_UNIX) { + ret = qemu_sendto(s->fd, buf, size, 0, + (struct sockaddr *)&s->dgram_dst, + sizeof(s->dgram_dst)); + } else { + ret = send(s->fd, buf, size, 0); + } } while (ret == -1 && errno == EINTR); if (ret == -1 && errno == EAGAIN) { @@ -336,6 +340,15 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, int newfd; NetClientState *nc; NetSocketState *s; + SocketAddress *sa; + SocketAddressType sa_type; + + sa = socket_local_address(fd, errp); + if (!sa) { + return NULL; + } + sa_type = sa->type; + qapi_free_SocketAddress(sa); /* fd passed: multicast: "learn" dgram_dst address from bound address and save it * Because this may be "shared" socket from a "master" process, datagrams would be recv() @@ -379,8 +392,12 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, "socket: fd=%d (cloned mcast=%s:%d)", fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port)); } else { + if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) { + s->dgram_dst.sin_family = AF_UNIX; + } + snprintf(nc->info_str, sizeof(nc->info_str), - "socket: fd=%d", fd); + "socket: fd=%d %s", fd, SocketAddressType_str(sa_type)); } return s; From 157628d067072118b15569661f0adf9141c34383 Mon Sep 17 00:00:00 2001 From: yuchenlin Date: Wed, 13 Mar 2019 06:56:49 +0000 Subject: [PATCH 2/4] e1000: Delay flush queue when receive RCTL Due to too early RCT0 interrput, win10x32 may hang on booting. This problem can be reproduced by doing power cycle on win10x32 guest. In our environment, we have 10 win10x32 and stress power cycle. The problem will happen about 20 rounds. Below shows some log with comment: The normal case: 22831@1551928392.984687:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 22831@1551928392.985655:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 22831@1551928392.985801:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: RCTL: 0, mac_reg[RCTL] = 0x0 22831@1551928393.056710:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: ICR read: 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: RCTL: 0, mac_reg[RCTL] = 0x0 22831@1551928393.077548:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: ICR read: 0 e1000: set_ics 2, ICR 0, IMR 0 e1000: set_ics 2, ICR 2, IMR 0 e1000: RCTL: 0, mac_reg[RCTL] = 0x0 22831@1551928393.102974:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 22831@1551928393.103267:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle RX now e1000: set_ics 0, ICR 2, IMR 9d <- unmask interrupt e1000: RCTL: 255, mac_reg[RCTL] = 0x48002 e1000: set_ics 80, ICR 2, IMR 9d <- interrupt and work! ... The bad case: 27744@1551930483.117766:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 27744@1551930483.118398:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: RCTL: 0, mac_reg[RCTL] = 0x0 27744@1551930483.198063:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: ICR read: 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: RCTL: 0, mac_reg[RCTL] = 0x0 27744@1551930483.218675:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: set_ics 0, ICR 0, IMR 0 e1000: ICR read: 0 e1000: set_ics 2, ICR 0, IMR 0 e1000: set_ics 2, ICR 2, IMR 0 e1000: RCTL: 0, mac_reg[RCTL] = 0x0 27744@1551930483.241768:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 27744@1551930483.241979:e1000x_rx_disabled Received packet dropped because receive is disabled RCTL = 0 e1000: RCTL: 255, mac_reg[RCTL] = 0x40002 <- win10x32 says it can handle RX now e1000: set_ics 80, ICR 2, IMR 0 <- flush queue (caused by setting RCTL) e1000: set_ics 0, ICR 82, IMR 9d <- unmask interrupt and because 0x82&0x9d != 0 generate interrupt, hang on here... To workaround this problem, simply delay flush queue. Also stop receiving when timer is going to run. Tested on CentOS, Win7SP1x64 and Win10x32. Signed-off-by: yuchenlin Reviewed-by: Dmitry Fleytman Signed-off-by: Jason Wang --- hw/net/e1000.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 5e144cb4e4..9b39bccfb2 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -120,6 +120,8 @@ typedef struct E1000State_st { bool mit_irq_level; /* Tracks interrupt pin level. */ uint32_t mit_ide; /* Tracks E1000_TXD_CMD_IDE bit. */ + QEMUTimer *flush_queue_timer; + /* Compatibility flags for migration to/from qemu 1.3.0 and older */ #define E1000_FLAG_AUTONEG_BIT 0 #define E1000_FLAG_MIT_BIT 1 @@ -366,6 +368,7 @@ static void e1000_reset(void *opaque) timer_del(d->autoneg_timer); timer_del(d->mit_timer); + timer_del(d->flush_queue_timer); d->mit_timer_on = 0; d->mit_irq_level = 0; d->mit_ide = 0; @@ -391,6 +394,14 @@ set_ctrl(E1000State *s, int index, uint32_t val) s->mac_reg[CTRL] = val & ~E1000_CTRL_RST; } +static void +e1000_flush_queue_timer(void *opaque) +{ + E1000State *s = opaque; + + qemu_flush_queued_packets(qemu_get_queue(s->nic)); +} + static void set_rx_control(E1000State *s, int index, uint32_t val) { @@ -399,7 +410,8 @@ set_rx_control(E1000State *s, int index, uint32_t val) s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], s->mac_reg[RCTL]); - qemu_flush_queued_packets(qemu_get_queue(s->nic)); + timer_mod(s->flush_queue_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 1000); } static void @@ -837,7 +849,7 @@ e1000_can_receive(NetClientState *nc) E1000State *s = qemu_get_nic_opaque(nc); return e1000x_rx_ready(&s->parent_obj, s->mac_reg) && - e1000_has_rxbufs(s, 1); + e1000_has_rxbufs(s, 1) && !timer_pending(s->flush_queue_timer); } static uint64_t rx_desc_base(E1000State *s) @@ -881,6 +893,10 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) return -1; } + if (timer_pending(s->flush_queue_timer)) { + return 0; + } + /* Pad to minimum Ethernet frame length */ if (size < sizeof(min_buf)) { iov_to_buf(iov, iovcnt, 0, min_buf, size); @@ -1637,6 +1653,8 @@ pci_e1000_uninit(PCIDevice *dev) timer_free(d->autoneg_timer); timer_del(d->mit_timer); timer_free(d->mit_timer); + timer_del(d->flush_queue_timer); + timer_free(d->flush_queue_timer); qemu_del_nic(d->nic); } @@ -1700,6 +1718,8 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) d->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, e1000_autoneg_timer, d); d->mit_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000_mit_timer, d); + d->flush_queue_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + e1000_flush_queue_timer, d); } static void qdev_e1000_reset(DeviceState *dev) From c6bf50ff72c2eefbb041c71a0944426df685a9df Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Fri, 15 Mar 2019 14:12:58 +0800 Subject: [PATCH 3/4] MAINTAINERS: Update the latest email address Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c2ad5062f6..56139ac8ab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2179,7 +2179,7 @@ F: include/migration/failover.h F: docs/COLO-FT.txt COLO Proxy -M: Zhang Chen +M: Zhang Chen M: Li Zhijian S: Supported F: docs/colo-proxy.txt From ab79237a15e8f8c23310291b672d83374cf17935 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Wed, 21 Nov 2018 03:21:59 -0800 Subject: [PATCH 4/4] net: tap: use qemu_set_nonblock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fcntl will change the flags directly, use qemu_set_nonblock() instead. Reviewed-by: Daniel P. Berrangé Acked-by: Michael S. Tsirkin Signed-off-by: Li Qiang Signed-off-by: Jason Wang --- net/tap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/tap.c b/net/tap.c index cc8525f154..e8aadd8d4b 100644 --- a/net/tap.c +++ b/net/tap.c @@ -592,7 +592,7 @@ int net_init_bridge(const Netdev *netdev, const char *name, return -1; } - fcntl(fd, F_SETFL, O_NONBLOCK); + qemu_set_nonblock(fd); vnet_hdr = tap_probe_vnet_hdr(fd); s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr); @@ -707,7 +707,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } return; } - fcntl(vhostfd, F_SETFL, O_NONBLOCK); + qemu_set_nonblock(vhostfd); } options.opaque = (void *)(uintptr_t)vhostfd; @@ -791,7 +791,7 @@ int net_init_tap(const Netdev *netdev, const char *name, return -1; } - fcntl(fd, F_SETFL, O_NONBLOCK); + qemu_set_nonblock(fd); vnet_hdr = tap_probe_vnet_hdr(fd); @@ -839,7 +839,7 @@ int net_init_tap(const Netdev *netdev, const char *name, goto free_fail; } - fcntl(fd, F_SETFL, O_NONBLOCK); + qemu_set_nonblock(fd); if (i == 0) { vnet_hdr = tap_probe_vnet_hdr(fd); @@ -887,7 +887,7 @@ free_fail: return -1; } - fcntl(fd, F_SETFL, O_NONBLOCK); + qemu_set_nonblock(fd); vnet_hdr = tap_probe_vnet_hdr(fd); net_init_tap_one(tap, peer, "bridge", name, ifname,