From d6b732e953673e4e1f4b36868c206a08213b6708 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:45 +0800 Subject: [PATCH 01/14] net: Add vnet_hdr_len arguments in NetClientState Add vnet_hdr_len arguments in NetClientState that make other module get real vnet_hdr_len easily. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- include/net/net.h | 1 + net/net.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/net/net.h b/include/net/net.h index 99b28d5b38..9a92c704ea 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -100,6 +100,7 @@ struct NetClientState { unsigned int queue_index; unsigned rxfilter_notify_enabled:1; int vring_enable; + int vnet_hdr_len; QTAILQ_HEAD(NetFilterHead, NetFilterState) filters; }; diff --git a/net/net.c b/net/net.c index 6235aabed8..b62ce105a2 100644 --- a/net/net.c +++ b/net/net.c @@ -492,6 +492,7 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len) return; } + nc->vnet_hdr_len = len; nc->info->set_vnet_hdr_len(nc, len); } From 3cde5ea211ead04997e2ecd7f8bdc831f84700e6 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:46 +0800 Subject: [PATCH 02/14] net/net.c: Add vnet_hdr support in SocketReadState We add a flag to decide whether net_fill_rstate() need read the vnet_hdr_len or not. Signed-off-by: Zhang Chen Suggested-by: Jason Wang Signed-off-by: Jason Wang --- include/net/net.h | 9 +++++++-- net/colo-compare.c | 4 ++-- net/filter-mirror.c | 2 +- net/net.c | 36 ++++++++++++++++++++++++++++++++---- net/socket.c | 8 ++++---- 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/include/net/net.h b/include/net/net.h index 9a92c704ea..1c55a93588 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -112,9 +112,13 @@ typedef struct NICState { } NICState; struct SocketReadState { - int state; /* 0 = getting length, 1 = getting data */ + /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */ + int state; + /* This flag decide whether to read the vnet_hdr_len field */ + bool vnet_hdr; uint32_t index; uint32_t packet_len; + uint32_t vnet_hdr_len; uint8_t buf[NET_BUFSIZE]; SocketReadStateFinalize *finalize; }; @@ -177,7 +181,8 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, void print_net_client(Monitor *mon, NetClientState *nc); void hmp_info_network(Monitor *mon, const QDict *qdict); void net_socket_rs_init(SocketReadState *rs, - SocketReadStateFinalize *finalize); + SocketReadStateFinalize *finalize, + bool vnet_hdr); /* NIC info */ diff --git a/net/colo-compare.c b/net/colo-compare.c index abfc23ce80..ea9bccca46 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -743,8 +743,8 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) return; } - net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize); - net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize); + net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false); + net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false); g_queue_init(&s->conn_list); diff --git a/net/filter-mirror.c b/net/filter-mirror.c index 6043549e5f..1b6211b2a2 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -229,7 +229,7 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp) } } - net_socket_rs_init(&s->rs, redirector_rs_finalize); + net_socket_rs_init(&s->rs, redirector_rs_finalize, false); if (s->indev) { chr = qemu_chr_find(s->indev); diff --git a/net/net.c b/net/net.c index b62ce105a2..0e28099554 100644 --- a/net/net.c +++ b/net/net.c @@ -1616,11 +1616,14 @@ QemuOptsList qemu_net_opts = { }; void net_socket_rs_init(SocketReadState *rs, - SocketReadStateFinalize *finalize) + SocketReadStateFinalize *finalize, + bool vnet_hdr) { rs->state = 0; + rs->vnet_hdr = vnet_hdr; rs->index = 0; rs->packet_len = 0; + rs->vnet_hdr_len = 0; memset(rs->buf, 0, sizeof(rs->buf)); rs->finalize = finalize; } @@ -1635,8 +1638,12 @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size) unsigned int l; while (size > 0) { - /* reassemble a packet from the network */ - switch (rs->state) { /* 0 = getting length, 1 = getting data */ + /* Reassemble a packet from the network. + * 0 = getting length. + * 1 = getting vnet header length. + * 2 = getting data. + */ + switch (rs->state) { case 0: l = 4 - rs->index; if (l > size) { @@ -1650,10 +1657,31 @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size) /* got length */ rs->packet_len = ntohl(*(uint32_t *)rs->buf); rs->index = 0; - rs->state = 1; + if (rs->vnet_hdr) { + rs->state = 1; + } else { + rs->state = 2; + rs->vnet_hdr_len = 0; + } } break; case 1: + l = 4 - rs->index; + if (l > size) { + l = size; + } + memcpy(rs->buf + rs->index, buf, l); + buf += l; + size -= l; + rs->index += l; + if (rs->index == 4) { + /* got vnet header length */ + rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf); + rs->index = 0; + rs->state = 2; + } + break; + case 2: l = rs->packet_len - rs->index; if (l > size) { l = size; diff --git a/net/socket.c b/net/socket.c index dcae1ae2c0..f85ef7d61b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -174,7 +174,7 @@ static void net_socket_send(void *opaque) closesocket(s->fd); s->fd = -1; - net_socket_rs_init(&s->rs, net_socket_rs_finalize); + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); s->nc.link_down = true; memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); @@ -366,7 +366,7 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, s->fd = fd; s->listen_fd = -1; s->send_fn = net_socket_send_dgram; - net_socket_rs_init(&s->rs, net_socket_rs_finalize); + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); net_socket_read_poll(s, true); /* mcast: save bound address as dst */ @@ -417,7 +417,7 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, s->fd = fd; s->listen_fd = -1; - net_socket_rs_init(&s->rs, net_socket_rs_finalize); + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); /* Disable Nagle algorithm on TCP sockets to reduce latency */ socket_set_nodelay(fd); @@ -522,7 +522,7 @@ static int net_socket_listen_init(NetClientState *peer, s->fd = -1; s->listen_fd = fd; s->nc.link_down = true; - net_socket_rs_init(&s->rs, net_socket_rs_finalize); + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s); return 0; From dc3c5ac64582fa7061c3b631c090632a05ddb718 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:47 +0800 Subject: [PATCH 03/14] net/filter-mirror.c: Introduce parameter for filter_send() This patch change the filter_send() parameter from CharBackend to MirrorState, we can get more information like vnet_hdr(We use it to support packet with vnet_header). Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/filter-mirror.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/filter-mirror.c b/net/filter-mirror.c index 1b6211b2a2..e8e5b60f25 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -43,7 +43,7 @@ typedef struct MirrorState { SocketReadState rs; } MirrorState; -static int filter_send(CharBackend *chr_out, +static int filter_send(MirrorState *s, const struct iovec *iov, int iovcnt) { @@ -58,14 +58,14 @@ static int filter_send(CharBackend *chr_out, } len = htonl(size); - ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len)); + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); if (ret != sizeof(len)) { goto err; } buf = g_malloc(size); iov_to_buf(iov, iovcnt, 0, buf, size); - ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size); + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); g_free(buf); if (ret != size) { goto err; @@ -141,7 +141,7 @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf, MirrorState *s = FILTER_MIRROR(nf); int ret; - ret = filter_send(&s->chr_out, iov, iovcnt); + ret = filter_send(s, iov, iovcnt); if (ret) { error_report("filter mirror send failed(%s)", strerror(-ret)); } @@ -164,7 +164,7 @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf, int ret; if (qemu_chr_fe_backend_connected(&s->chr_out)) { - ret = filter_send(&s->chr_out, iov, iovcnt); + ret = filter_send(s, iov, iovcnt); if (ret) { error_report("filter redirector send failed(%s)", strerror(-ret)); } From e2521f0e038e912bc8d3747813c3e6dffd9a2eaf Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:48 +0800 Subject: [PATCH 04/14] net/filter-mirror.c: Make filter mirror support vnet support. We add the vnet_hdr_support option for filter-mirror, default is disabled. If you use virtio-net-pci or other driver needs vnet_hdr, please enable it. You can use it for example: -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support If it has vnet_hdr_support flag, we will change the sending packet format from struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}. make other module(like colo-compare) know how to parse net packet correctly. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++- qemu-options.hx | 5 ++--- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/net/filter-mirror.c b/net/filter-mirror.c index e8e5b60f25..32ecdb3050 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -41,12 +41,14 @@ typedef struct MirrorState { CharBackend chr_in; CharBackend chr_out; SocketReadState rs; + bool vnet_hdr; } MirrorState; static int filter_send(MirrorState *s, const struct iovec *iov, int iovcnt) { + NetFilterState *nf = NETFILTER(s); int ret = 0; ssize_t size = 0; uint32_t len = 0; @@ -63,6 +65,23 @@ static int filter_send(MirrorState *s, goto err; } + if (s->vnet_hdr) { + /* + * If vnet_hdr = on, we send vnet header len to make other + * module(like colo-compare) know how to parse net + * packet correctly. + */ + ssize_t vnet_hdr_len; + + vnet_hdr_len = nf->netdev->vnet_hdr_len; + + len = htonl(vnet_hdr_len); + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); + if (ret != sizeof(len)) { + goto err; + } + } + buf = g_malloc(size); iov_to_buf(iov, iovcnt, 0, buf, size); ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); @@ -229,7 +248,7 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp) } } - net_socket_rs_init(&s->rs, redirector_rs_finalize, false); + net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr); if (s->indev) { chr = qemu_chr_find(s->indev); @@ -318,6 +337,20 @@ static void filter_mirror_set_outdev(Object *obj, } } +static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp) +{ + MirrorState *s = FILTER_MIRROR(obj); + + return s->vnet_hdr; +} + +static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp) +{ + MirrorState *s = FILTER_MIRROR(obj); + + s->vnet_hdr = value; +} + static char *filter_redirector_get_outdev(Object *obj, Error **errp) { MirrorState *s = FILTER_REDIRECTOR(obj); @@ -337,8 +370,15 @@ static void filter_redirector_set_outdev(Object *obj, static void filter_mirror_init(Object *obj) { + MirrorState *s = FILTER_MIRROR(obj); + object_property_add_str(obj, "outdev", filter_mirror_get_outdev, filter_mirror_set_outdev, NULL); + + s->vnet_hdr = false; + object_property_add_bool(obj, "vnet_hdr_support", + filter_mirror_get_vnet_hdr, + filter_mirror_set_vnet_hdr, NULL); } static void filter_redirector_init(Object *obj) diff --git a/qemu-options.hx b/qemu-options.hx index 2cc70b9cfc..9eee712a00 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4249,10 +4249,9 @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter. @option{tx}: the filter is attached to the transmit queue of the netdev, where it will receive packets sent by the netdev. -@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}] +@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support] -filter-mirror on netdev @var{netdevid},mirror net packet to chardev -@var{chardevid} +filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len. @item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid}, outdev=@var{chardevid}[,queue=@var{all|rx|tx}] From 00d5c2406bceb2eb5b77b47948311bf3ef8653a7 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:49 +0800 Subject: [PATCH 05/14] net/filter-mirror.c: Add new option to enable vnet support for filter-redirector We add the vnet_hdr_support option for filter-redirector, default is disabled. If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it. Because colo-compare or other modules needs the vnet_hdr_len to parse packet, we add this new option send the len to others. You can use it for example: -object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/filter-mirror.c | 23 +++++++++++++++++++++++ qemu-options.hx | 6 +++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/net/filter-mirror.c b/net/filter-mirror.c index 32ecdb3050..90e2c92337 100644 --- a/net/filter-mirror.c +++ b/net/filter-mirror.c @@ -368,6 +368,22 @@ static void filter_redirector_set_outdev(Object *obj, s->outdev = g_strdup(value); } +static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + return s->vnet_hdr; +} + +static void filter_redirector_set_vnet_hdr(Object *obj, + bool value, + Error **errp) +{ + MirrorState *s = FILTER_REDIRECTOR(obj); + + s->vnet_hdr = value; +} + static void filter_mirror_init(Object *obj) { MirrorState *s = FILTER_MIRROR(obj); @@ -383,10 +399,17 @@ static void filter_mirror_init(Object *obj) static void filter_redirector_init(Object *obj) { + MirrorState *s = FILTER_REDIRECTOR(obj); + object_property_add_str(obj, "indev", filter_redirector_get_indev, filter_redirector_set_indev, NULL); object_property_add_str(obj, "outdev", filter_redirector_get_outdev, filter_redirector_set_outdev, NULL); + + s->vnet_hdr = false; + object_property_add_bool(obj, "vnet_hdr_support", + filter_redirector_get_vnet_hdr, + filter_redirector_set_vnet_hdr, NULL); } static void filter_mirror_fini(Object *obj) diff --git a/qemu-options.hx b/qemu-options.hx index 9eee712a00..b86c09efa1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4253,11 +4253,11 @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter. filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len. -@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid}, -outdev=@var{chardevid}[,queue=@var{all|rx|tx}] +@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support] filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev -@var{chardevid},and redirect indev's packet to filter. +@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag, +filter-redirector will redirect packet with vnet_hdr_len. Create a filter-redirector we need to differ outdev id from indev id, id can not be the same. we can just use indev or outdev, but at least one of indev or outdev need to be specified. From ada1a33f9a690b95d32115b38b88a33cb66c83bd Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:50 +0800 Subject: [PATCH 06/14] net/colo.c: Make vnet_hdr_len as packet property We can use this property flush and send packet with vnet_hdr_len. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 8 ++++++-- net/colo.c | 3 ++- net/colo.h | 4 +++- net/filter-rewriter.c | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index ea9bccca46..9c2b1ead19 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -121,9 +121,13 @@ static int packet_enqueue(CompareState *s, int mode) Connection *conn; if (mode == PRIMARY_IN) { - pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len); + pkt = packet_new(s->pri_rs.buf, + s->pri_rs.packet_len, + s->pri_rs.vnet_hdr_len); } else { - pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len); + pkt = packet_new(s->sec_rs.buf, + s->sec_rs.packet_len, + s->sec_rs.vnet_hdr_len); } if (parse_packet_early(pkt)) { diff --git a/net/colo.c b/net/colo.c index 8cc166bc22..180eaed49a 100644 --- a/net/colo.c +++ b/net/colo.c @@ -153,13 +153,14 @@ void connection_destroy(void *opaque) g_slice_free(Connection, conn); } -Packet *packet_new(const void *data, int size) +Packet *packet_new(const void *data, int size, int vnet_hdr_len) { Packet *pkt = g_slice_new(Packet); pkt->data = g_memdup(data, size); pkt->size = size; pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); + pkt->vnet_hdr_len = vnet_hdr_len; return pkt; } diff --git a/net/colo.h b/net/colo.h index 7c524f3a1c..caedb0dca7 100644 --- a/net/colo.h +++ b/net/colo.h @@ -43,6 +43,8 @@ typedef struct Packet { int size; /* Time of packet creation, in wall clock ms */ int64_t creation_ms; + /* Get vnet_hdr_len from filter */ + uint32_t vnet_hdr_len; } Packet; typedef struct ConnectionKey { @@ -82,7 +84,7 @@ Connection *connection_get(GHashTable *connection_track_table, ConnectionKey *key, GQueue *conn_list); void connection_hashtable_reset(GHashTable *connection_track_table); -Packet *packet_new(const void *data, int size); +Packet *packet_new(const void *data, int size, int vnet_hdr_len); void packet_destroy(void *opaque, void *user_data); #endif /* QEMU_COLO_PROXY_H */ diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c index afa06e8919..63256c72a0 100644 --- a/net/filter-rewriter.c +++ b/net/filter-rewriter.c @@ -158,7 +158,7 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, char *buf = g_malloc0(size); iov_to_buf(iov, iovcnt, 0, buf, size); - pkt = packet_new(buf, size); + pkt = packet_new(buf, size, 0); g_free(buf); /* From 3037e7a5b7670e9b99dd61e3fe4b9e41ce8c1143 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:51 +0800 Subject: [PATCH 07/14] net/colo-compare.c: Introduce parameter for compare_chr_send() This patch change the compare_chr_send() parameter from CharBackend to CompareState, we can get more information like vnet_hdr(We use it to support packet with vnet_header). Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index 9c2b1ead19..c5d01da0d4 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -97,7 +97,7 @@ enum { SECONDARY_IN, }; -static int compare_chr_send(CharBackend *out, +static int compare_chr_send(CompareState *s, const uint8_t *buf, uint32_t size); @@ -483,7 +483,7 @@ static void colo_compare_connection(void *opaque, void *user_data) } if (result) { - ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size); + ret = compare_chr_send(s, pkt->data, pkt->size); if (ret < 0) { error_report("colo_send_primary_packet failed"); } @@ -504,7 +504,7 @@ static void colo_compare_connection(void *opaque, void *user_data) } } -static int compare_chr_send(CharBackend *out, +static int compare_chr_send(CompareState *s, const uint8_t *buf, uint32_t size) { @@ -515,12 +515,12 @@ static int compare_chr_send(CharBackend *out, return 0; } - ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len)); + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); if (ret != sizeof(len)) { goto err; } - ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size); + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); if (ret != size) { goto err; } @@ -665,7 +665,7 @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs) if (packet_enqueue(s, PRIMARY_IN)) { trace_colo_compare_main("primary: unsupported packet in"); - compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len); + compare_chr_send(s, pri_rs->buf, pri_rs->packet_len); } else { /* compare connection */ g_queue_foreach(&s->conn_list, colo_compare_connection, s); @@ -774,7 +774,7 @@ static void colo_flush_packets(void *opaque, void *user_data) while (!g_queue_is_empty(&conn->primary_list)) { pkt = g_queue_pop_head(&conn->primary_list); - compare_chr_send(&s->chr_out, pkt->data, pkt->size); + compare_chr_send(s, pkt->data, pkt->size); packet_destroy(pkt, NULL); } while (!g_queue_is_empty(&conn->secondary_list)) { From aa3a7032f73f603818ea3e781b19ce8eab0d33f1 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:52 +0800 Subject: [PATCH 08/14] net/colo-compare.c: Make colo-compare support vnet_hdr_len We add the vnet_hdr_support option for colo-compare, default is disabled. If you use virtio-net-pci or other driver needs vnet_hdr, please enable it. You can use it for example: -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support COLO-compare can get vnet header length from filter, Add vnet_hdr_len to struct packet and output packet with the vnet_hdr_len. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 60 ++++++++++++++++++++++++++++++++++++++++------ qemu-options.hx | 4 ++-- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index c5d01da0d4..95911a260f 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -73,6 +73,7 @@ typedef struct CompareState { CharBackend chr_out; SocketReadState pri_rs; SocketReadState sec_rs; + bool vnet_hdr; /* connection list: the connections belonged to this NIC could be found * in this list. @@ -99,7 +100,8 @@ enum { static int compare_chr_send(CompareState *s, const uint8_t *buf, - uint32_t size); + uint32_t size, + uint32_t vnet_hdr_len); static gint seq_sorter(Packet *a, Packet *b, gpointer data) { @@ -483,7 +485,10 @@ static void colo_compare_connection(void *opaque, void *user_data) } if (result) { - ret = compare_chr_send(s, pkt->data, pkt->size); + ret = compare_chr_send(s, + pkt->data, + pkt->size, + pkt->vnet_hdr_len); if (ret < 0) { error_report("colo_send_primary_packet failed"); } @@ -506,7 +511,8 @@ static void colo_compare_connection(void *opaque, void *user_data) static int compare_chr_send(CompareState *s, const uint8_t *buf, - uint32_t size) + uint32_t size, + uint32_t vnet_hdr_len) { int ret = 0; uint32_t len = htonl(size); @@ -520,6 +526,18 @@ static int compare_chr_send(CompareState *s, goto err; } + if (s->vnet_hdr) { + /* + * We send vnet header len make other module(like filter-redirector) + * know how to parse net packet correctly. + */ + len = htonl(vnet_hdr_len); + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); + if (ret != sizeof(len)) { + goto err; + } + } + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); if (ret != size) { goto err; @@ -659,13 +677,32 @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp) s->outdev = g_strdup(value); } +static bool compare_get_vnet_hdr(Object *obj, Error **errp) +{ + CompareState *s = COLO_COMPARE(obj); + + return s->vnet_hdr; +} + +static void compare_set_vnet_hdr(Object *obj, + bool value, + Error **errp) +{ + CompareState *s = COLO_COMPARE(obj); + + s->vnet_hdr = value; +} + static void compare_pri_rs_finalize(SocketReadState *pri_rs) { CompareState *s = container_of(pri_rs, CompareState, pri_rs); if (packet_enqueue(s, PRIMARY_IN)) { trace_colo_compare_main("primary: unsupported packet in"); - compare_chr_send(s, pri_rs->buf, pri_rs->packet_len); + compare_chr_send(s, + pri_rs->buf, + pri_rs->packet_len, + pri_rs->vnet_hdr_len); } else { /* compare connection */ g_queue_foreach(&s->conn_list, colo_compare_connection, s); @@ -747,8 +784,8 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) return; } - net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false); - net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false); + net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr); + net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr); g_queue_init(&s->conn_list); @@ -774,7 +811,10 @@ static void colo_flush_packets(void *opaque, void *user_data) while (!g_queue_is_empty(&conn->primary_list)) { pkt = g_queue_pop_head(&conn->primary_list); - compare_chr_send(s, pkt->data, pkt->size); + compare_chr_send(s, + pkt->data, + pkt->size, + pkt->vnet_hdr_len); packet_destroy(pkt, NULL); } while (!g_queue_is_empty(&conn->secondary_list)) { @@ -792,6 +832,8 @@ static void colo_compare_class_init(ObjectClass *oc, void *data) static void colo_compare_init(Object *obj) { + CompareState *s = COLO_COMPARE(obj); + object_property_add_str(obj, "primary_in", compare_get_pri_indev, compare_set_pri_indev, NULL); @@ -801,6 +843,10 @@ static void colo_compare_init(Object *obj) object_property_add_str(obj, "outdev", compare_get_outdev, compare_set_outdev, NULL); + + s->vnet_hdr = false; + object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr, + compare_set_vnet_hdr, NULL); } static void colo_compare_finalize(Object *obj) diff --git a/qemu-options.hx b/qemu-options.hx index b86c09efa1..91a25ee3b9 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4282,13 +4282,13 @@ Dump the network traffic on netdev @var{dev} to the file specified by The file format is libpcap, so it can be analyzed with tools such as tcpdump or Wireshark. -@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid}, -outdev=@var{chardevid} +@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support] Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with secondary packet. If the packets are same, we will output primary packet to outdev@var{chardevid}, else we will notify colo-frame do checkpoint and send primary packet to outdev@var{chardevid}. +if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len. we must use it with the help of filter-mirror and filter-redirector. From 5cc444d367078e3582b8e7e0136c53d3632c544b Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:53 +0800 Subject: [PATCH 09/14] net/colo.c: Add vnet packet parse feature in colo-proxy Make colo-compare and filter-rewriter can parse vnet packet. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/colo.c b/net/colo.c index 180eaed49a..28ce7c8ae0 100644 --- a/net/colo.c +++ b/net/colo.c @@ -43,11 +43,11 @@ int parse_packet_early(Packet *pkt) { int network_length; static const uint8_t vlan[] = {0x81, 0x00}; - uint8_t *data = pkt->data; + uint8_t *data = pkt->data + pkt->vnet_hdr_len; uint16_t l3_proto; ssize_t l2hdr_len = eth_get_l2_hdr_length(data); - if (pkt->size < ETH_HLEN) { + if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) { trace_colo_proxy_main("pkt->size < ETH_HLEN"); return 1; } @@ -73,7 +73,7 @@ int parse_packet_early(Packet *pkt) } network_length = pkt->ip->ip_hl * 4; - if (pkt->size < l2hdr_len + network_length) { + if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) { trace_colo_proxy_main("pkt->size < network_header + network_length"); return 1; } From d63b366a26b46bd8109a64bf4a2ecc5e16242a8b Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:54 +0800 Subject: [PATCH 10/14] net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare COLO-Proxy just focus on packet payload, so we skip vnet header. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index 95911a260f..ca67c68615 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -201,8 +201,11 @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset) sec_ip_src, sec_ip_dst); } + offset = ppkt->vnet_hdr_len + offset; + if (ppkt->size == spkt->size) { - return memcmp(ppkt->data + offset, spkt->data + offset, + return memcmp(ppkt->data + offset, + spkt->data + offset, spkt->size - offset); } else { trace_colo_compare_main("Net packet size are not the same"); @@ -261,8 +264,9 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) */ if (ptcp->th_off > 5) { ptrdiff_t tcp_offset; + tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data - + (ptcp->th_off * 4); + + (ptcp->th_off * 4) - ppkt->vnet_hdr_len; res = colo_packet_compare_common(ppkt, spkt, tcp_offset); } else if (ptcp->th_sum == stcp->th_sum) { res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN); From 4b39bdced59708cc9475cc555b8a232da4ea01af Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:55 +0800 Subject: [PATCH 11/14] net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len We add the vnet_hdr_support option for filter-rewriter, default is disabled. If you use virtio-net-pci or other driver needs vnet_hdr, please enable it. You can use it for example: -object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support We get the vnet_hdr_len from NetClientState that make us parse net packet correctly. Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++- qemu-options.hx | 4 ++-- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c index 63256c72a0..55a6cf56fd 100644 --- a/net/filter-rewriter.c +++ b/net/filter-rewriter.c @@ -17,6 +17,7 @@ #include "qemu-common.h" #include "qapi/error.h" #include "qapi/qmp/qerror.h" +#include "qemu/error-report.h" #include "qapi-visit.h" #include "qom/object.h" #include "qemu/main-loop.h" @@ -33,6 +34,7 @@ typedef struct RewriterState { NetQueue *incoming_queue; /* hashtable to save connection */ GHashTable *connection_track_table; + bool vnet_hdr; } RewriterState; static void filter_rewriter_flush(NetFilterState *nf) @@ -155,10 +157,16 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, ConnectionKey key; Packet *pkt; ssize_t size = iov_size(iov, iovcnt); + ssize_t vnet_hdr_len = 0; char *buf = g_malloc0(size); iov_to_buf(iov, iovcnt, 0, buf, size); - pkt = packet_new(buf, size, 0); + + if (s->vnet_hdr) { + vnet_hdr_len = nf->netdev->vnet_hdr_len; + } + + pkt = packet_new(buf, size, vnet_hdr_len); g_free(buf); /* @@ -237,6 +245,32 @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp) s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); } +static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp) +{ + RewriterState *s = FILTER_COLO_REWRITER(obj); + + return s->vnet_hdr; +} + +static void filter_rewriter_set_vnet_hdr(Object *obj, + bool value, + Error **errp) +{ + RewriterState *s = FILTER_COLO_REWRITER(obj); + + s->vnet_hdr = value; +} + +static void filter_rewriter_init(Object *obj) +{ + RewriterState *s = FILTER_COLO_REWRITER(obj); + + s->vnet_hdr = false; + object_property_add_bool(obj, "vnet_hdr_support", + filter_rewriter_get_vnet_hdr, + filter_rewriter_set_vnet_hdr, NULL); +} + static void colo_rewriter_class_init(ObjectClass *oc, void *data) { NetFilterClass *nfc = NETFILTER_CLASS(oc); @@ -250,6 +284,7 @@ static const TypeInfo colo_rewriter_info = { .name = TYPE_FILTER_REWRITER, .parent = TYPE_NETFILTER, .class_init = colo_rewriter_class_init, + .instance_init = filter_rewriter_init, .instance_size = sizeof(RewriterState), }; diff --git a/qemu-options.hx b/qemu-options.hx index 91a25ee3b9..e0e04c8517 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4262,12 +4262,12 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not be the same. we can just use indev or outdev, but at least one of indev or outdev need to be specified. -@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}] +@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support] Filter-rewriter is a part of COLO project.It will rewrite tcp packet to secondary from primary to keep secondary tcp connection,and rewrite tcp packet to primary from secondary make tcp packet can be handled by -client. +client.if it has the vnet_hdr_support flag, we can parse packet with vnet header. usage: colo secondary: From 2484ff06249af64896885564b73af4f0750adfa2 Mon Sep 17 00:00:00 2001 From: Zhang Chen Date: Tue, 4 Jul 2017 14:53:56 +0800 Subject: [PATCH 12/14] docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt index c4941de198..f6a624fb8a 100644 --- a/docs/colo-proxy.txt +++ b/docs/colo-proxy.txt @@ -182,6 +182,32 @@ Secondary(ip:3.3.3.8): -chardev socket,id=red1,host=3.3.3.3,port=9004 -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 +-object filter-rewriter,id=f3,netdev=hn0,queue=all + +If you want to use virtio-net-pci or other driver with vnet_header: + +Primary(ip:3.3.3.3): +-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown +-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 +-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait +-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait +-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait +-chardev socket,id=compare0-0,host=3.3.3.3,port=9001 +-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait +-chardev socket,id=compare_out0,host=3.3.3.3,port=9005 +-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support +-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support +-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support +-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support + +Secondary(ip:3.3.3.8): +-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown +-device e1000,netdev=hn0,mac=52:a4:00:12:78:66 +-chardev socket,id=red0,host=3.3.3.3,port=9003 +-chardev socket,id=red1,host=3.3.3.3,port=9004 +-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support +-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support +-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support Note: a.COLO-proxy must work with COLO-frame and Block-replication. From 5f997fd17b9be6a662fa7b5cc1305076cd6c8894 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Thu, 13 Jul 2017 09:44:38 +0200 Subject: [PATCH 13/14] virtion-net: Prefer is_power_of_2() We have a function that checks if given number is power of two. We should prefer it instead of expanding the check on our own. Signed-off-by: Michal Privoznik Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 5630a9ec44..657d099c54 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1942,7 +1942,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) */ if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || - (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) { + !is_power_of_2(n->net_conf.rx_queue_size)) { error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " "must be a power of 2 between %d and %d.", n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, From 189ae6bb5ce1f5a322f8691d00fe942ba43dd601 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 14 Jul 2017 20:08:18 +0800 Subject: [PATCH 14/14] virtio-net: fix offload ctrl endian Spec said offloads should be le64, so use virtio_ldq_p() to guarantee valid endian. Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration") Cc: qemu-stable@nongnu.org Cc: Dmitry Fleytman Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 657d099c54..148071a396 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -758,6 +758,8 @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { uint64_t supported_offloads; + offloads = virtio_ldq_p(vdev, &offloads); + if (!n->has_vnet_hdr) { return VIRTIO_NET_ERR; }