mirror of https://github.com/xemu-project/xemu.git
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1 iQEbBAABAgAGBQJX6kSEAAoJEO8Ells5jWIRKAkH9iMMzN9USOroQIWmiyMf5S7F mlsSeSccv+U5gA6wCJooA0dwMnAFnxJ3rTcV6BEL0jE0cVHanR61eDfpeOC0lKXw NUWc91Bf4Epg0cTk9fV6yv6xZOcuN/twukrQIEZjfldpbP0ba+WoBx3x0sdYen+M Xjaix011CUEx5VmVMx8g/LbnM8s1WO+CjEjIpWAas+1M68P+elne5nOaTaj+FyzV E9BkUkcXd5ByzikYRykgS/OJGRd7S+BBSFluISekwGjTcppRccAwZsGkgYXRrF3U 1g1LOT2xuz777uP7hBqZQRyZIAaOiLY89WUFuCL1BBLbbkAnT799J/e/n6sRSg== =2gpR -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging # gpg: Signature made Tue 27 Sep 2016 11:05:56 BST # gpg: using RSA key 0xEF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * remotes/jasowang/tags/net-pull-request: (27 commits) imx_fec: fix error in qemu_send_packet argument mcf_fec: fix error in qemu_send_packet argument net: mcf: limit buffer descriptor count e1000e: Fix EIAC register implementation e1000e: Fix spurious RX TCP ACK interrupts e1000e: Fix OTHER interrupts processing for MSI-X e1000e: Fix PBACLR implementation e1000e: Fix CTRL_EXT.EIAME behavior e1000e: Flush receive queues on link up e1000e: Flush all receive queues on receive enable net: limit allocation in nc_sendv_compat tap: Allow specifying a bridge e1000: fix buliding complaint docs: Add documentation for COLO-proxy MAINTAINERS: add maintainer for COLO-proxy filter-rewriter: rewrite tcp packet to keep secondary connection filter-rewriter: track connection and parse packet filter-rewriter: introduce filter-rewriter initialization colo-compare: add TCP, UDP, ICMP packet comparison colo-compare: introduce packet comparison thread ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
333ec4ca6a
|
@ -1364,6 +1364,15 @@ F: util/uuid.c
|
|||
F: include/qemu/uuid.h
|
||||
F: tests/test-uuid.c
|
||||
|
||||
COLO Proxy
|
||||
M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||
M: Li Zhijian <lizhijian@cn.fujitsu.com>
|
||||
S: Supported
|
||||
F: docs/colo-proxy.txt
|
||||
F: net/colo*
|
||||
F: net/filter-rewriter.c
|
||||
F: net/filter-mirror.c
|
||||
|
||||
Usermode Emulation
|
||||
------------------
|
||||
Overall
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
COLO-proxy
|
||||
----------
|
||||
Copyright (c) 2016 Intel Corporation
|
||||
Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
Copyright (c) 2016 Fujitsu, Corp.
|
||||
|
||||
This work is licensed under the terms of the GNU GPL, version 2 or later.
|
||||
See the COPYING file in the top-level directory.
|
||||
|
||||
This document gives an overview of COLO proxy's design.
|
||||
|
||||
== Background ==
|
||||
COLO-proxy is a part of COLO project. It is used
|
||||
to compare the network package to help COLO decide
|
||||
whether to do checkpoint. With COLO-proxy's help,
|
||||
COLO greatly improves the performance.
|
||||
|
||||
The filter-redirector, filter-mirror, colo-compare
|
||||
and filter-rewriter compose the COLO-proxy.
|
||||
|
||||
== Architecture ==
|
||||
|
||||
COLO-Proxy is based on qemu netfilter and it's a plugin for qemu netfilter
|
||||
(except colo-compare). It keep Secondary VM connect normally to
|
||||
client and compare packets sent by PVM with sent by SVM.
|
||||
If the packet difference, notify COLO-frame to do checkpoint and send
|
||||
all primary packet has queued. Otherwise just send the queued primary
|
||||
packet and drop the queued secondary packet.
|
||||
|
||||
Below is a COLO proxy ascii figure:
|
||||
|
||||
Primary qemu Secondary qemu
|
||||
+--------------------------------------------------------------+ +----------------------------------------------------------------+
|
||||
| +----------------------------------------------------------+ | | +-----------------------------------------------------------+ |
|
||||
| | | | | | | |
|
||||
| | guest | | | | guest | |
|
||||
| | | | | | | |
|
||||
| +-------^--------------------------+-----------------------+ | | +---------------------+--------+----------------------------+ |
|
||||
| | | | | ^ | |
|
||||
| | | | | | | |
|
||||
| | +------------------------------------------------------+ | | | |
|
||||
|netfilter| | | | | | netfilter | | |
|
||||
| +----------+ +----------------------------+ | | | +-----------------------------------------------------------+ |
|
||||
| | | | | | out | | | | | | filter excute order | |
|
||||
| | | | +-----------------------------+ | | | | | | +-------------------> | |
|
||||
| | | | | | | | | | | | | | TCP | |
|
||||
| | +-----+--+-+ +-----v----+ +-----v----+ |pri +----+----+sec| | | | +------------+ +---+----+---v+rewriter++ +------------+ | |
|
||||
| | | | | | | | |in | |in | | | | | | | | | | | | |
|
||||
| | | filter | | filter | | filter +------> colo <------+ +--------> filter +--> adjust | adjust +--> filter | | |
|
||||
| | | mirror | |redirector| |redirector| | | compare | | | | | | redirector | | ack | seq | | redirector | | |
|
||||
| | | | | | | | | | | | | | | | | | | | | | | |
|
||||
| | +----^-----+ +----+-----+ +----------+ | +---------+ | | | | +------------+ +--------+--------------+ +---+--------+ | |
|
||||
| | | tx | rx rx | | | | | tx all | rx | |
|
||||
| | | | | | | | +-----------------------------------------------------------+ |
|
||||
| | | +--------------+ | | | | | |
|
||||
| | | filter excute order | | | | | | |
|
||||
| | | +----------------> | | | +--------------------------------------------------------+ |
|
||||
| +-----------------------------------------+ | | |
|
||||
| | | | | |
|
||||
+--------------------------------------------------------------+ +----------------------------------------------------------------+
|
||||
|guest receive | guest send
|
||||
| |
|
||||
+--------+----------------------------v------------------------+
|
||||
| | NOTE: filter direction is rx/tx/all
|
||||
| tap | rx:receive packets sent to the netdev
|
||||
| | tx:receive packets sent by the netdev
|
||||
+--------------------------------------------------------------+
|
||||
|
||||
1.Guest receive packet route:
|
||||
|
||||
Primary:
|
||||
|
||||
Tap --> Mirror Client Filter
|
||||
Mirror client will send packet to guest,at the
|
||||
same time, copy and forward packet to secondary
|
||||
mirror server.
|
||||
|
||||
Secondary:
|
||||
|
||||
Mirror Server Filter --> TCP Rewriter
|
||||
If receive packet is TCP packet,we will adjust ack
|
||||
and update TCP checksum, then send to secondary
|
||||
guest. Otherwise directly send to guest.
|
||||
|
||||
2.Guest send packet route:
|
||||
|
||||
Primary:
|
||||
|
||||
Guest --> Redirect Server Filter
|
||||
Redirect server filter receive primary guest packet
|
||||
but do nothing, just pass to next filter.
|
||||
|
||||
Redirect Server Filter --> COLO-Compare
|
||||
COLO-compare receive primary guest packet then
|
||||
waiting scondary redirect packet to compare it.
|
||||
If packet same,send queued primary packet and clear
|
||||
queued secondary packet, Otherwise send primary packet
|
||||
and do checkpoint.
|
||||
|
||||
COLO-Compare --> Another Redirector Filter
|
||||
The redirector get packet from colo-compare by use
|
||||
chardev socket.
|
||||
|
||||
Redirector Filter --> Tap
|
||||
Send the packet.
|
||||
|
||||
Secondary:
|
||||
|
||||
Guest --> TCP Rewriter Filter
|
||||
If the packet is TCP packet,we will adjust seq
|
||||
and update TCP checksum. Then send it to
|
||||
redirect client filter. Otherwise directly send to
|
||||
redirect client filter.
|
||||
|
||||
Redirect Client Filter --> Redirect Server Filter
|
||||
Forward packet to primary.
|
||||
|
||||
== Components introduction ==
|
||||
|
||||
Filter-mirror is a netfilter plugin.
|
||||
It gives qemu the ability to mirror
|
||||
packets to a chardev.
|
||||
|
||||
Filter-redirector is a netfilter plugin.
|
||||
It gives qemu the ability to redirect net packet.
|
||||
Redirector can redirect filter's net packet to outdev,
|
||||
and redirect indev's packet to filter.
|
||||
|
||||
filter
|
||||
+
|
||||
redirector |
|
||||
+--------------+
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
indev +---------+ +----------> outdev
|
||||
| | |
|
||||
| | |
|
||||
| | |
|
||||
+--------------+
|
||||
|
|
||||
v
|
||||
filter
|
||||
|
||||
COLO-compare, we do packet comparing job.
|
||||
Packets coming from the primary char indev will be sent to outdev.
|
||||
Packets coming from the secondary char dev will be dropped after comparing.
|
||||
COLO-comapre need two input chardev and one output chardev:
|
||||
primary_in=chardev1-id (source: primary send packet)
|
||||
secondary_in=chardev2-id (source: secondary send packet)
|
||||
outdev=chardev3-id
|
||||
|
||||
Filter-rewriter will rewrite some of secondary packet to make
|
||||
secondary guest's tcp connection established successfully.
|
||||
In this module we will rewrite tcp packet's ack to the secondary
|
||||
from primary,and rewrite tcp packet's seq to the primary from
|
||||
secondary.
|
||||
|
||||
== Usage ==
|
||||
|
||||
Here, we use demo ip and port discribe more clearly.
|
||||
Primary(ip:3.3.3.3):
|
||||
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
|
||||
-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
|
||||
-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
|
||||
-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
|
||||
-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
|
||||
-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
|
||||
-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
|
||||
-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
|
||||
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
|
||||
-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
|
||||
-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
|
||||
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
|
||||
|
||||
Secondary(ip:3.3.3.8):
|
||||
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
|
||||
-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
|
||||
-chardev socket,id=red0,host=3.3.3.3,port=9003
|
||||
-chardev socket,id=red1,host=3.3.3.3,port=9004
|
||||
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
|
||||
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
|
||||
|
||||
Note:
|
||||
a.COLO-proxy must work with COLO-frame and Block-replication.
|
||||
b.Primary COLO must be started firstly, because COLO-proxy needs
|
||||
chardev socket server running before secondary started.
|
||||
c.Filter-rewriter only rewrite tcp packet.
|
|
@ -400,7 +400,7 @@ static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
|
|||
|
||||
if (range_covers_byte(address, len, PCI_COMMAND) &&
|
||||
(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
|
||||
qemu_flush_queued_packets(qemu_get_queue(s->nic));
|
||||
e1000e_start_recv(&s->core);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -953,7 +953,7 @@ e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
|
|||
core->rx_desc_buf_size;
|
||||
}
|
||||
|
||||
static inline void
|
||||
void
|
||||
e1000e_start_recv(E1000ECore *core)
|
||||
{
|
||||
int i;
|
||||
|
@ -1710,7 +1710,8 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
|
|||
}
|
||||
|
||||
/* Perform ACK receive detection */
|
||||
if (e1000e_is_tcp_ack(core, core->rx_pkt)) {
|
||||
if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) &&
|
||||
(e1000e_is_tcp_ack(core, core->rx_pkt))) {
|
||||
n |= E1000_ICS_ACK;
|
||||
}
|
||||
|
||||
|
@ -1807,6 +1808,7 @@ e1000e_core_set_link_status(E1000ECore *core)
|
|||
core->autoneg_timer);
|
||||
} else {
|
||||
e1000x_update_regs_on_link_up(core->mac, core->phy[0]);
|
||||
e1000e_start_recv(core);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2007,19 +2009,23 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
|
|||
}
|
||||
|
||||
if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) {
|
||||
trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause);
|
||||
e1000e_clear_ims_bits(core, core->mac[IAM] & cause);
|
||||
trace_e1000e_irq_iam_clear_eiame(core->mac[IAM], cause);
|
||||
core->mac[IAM] &= ~cause;
|
||||
}
|
||||
|
||||
trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]);
|
||||
|
||||
if (core->mac[EIAC] & E1000_ICR_OTHER) {
|
||||
effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) |
|
||||
E1000_ICR_OTHER_CAUSES;
|
||||
} else {
|
||||
effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK;
|
||||
effective_eiac = core->mac[EIAC] & cause;
|
||||
|
||||
if (effective_eiac == E1000_ICR_OTHER) {
|
||||
effective_eiac |= E1000_ICR_OTHER_CAUSES;
|
||||
}
|
||||
|
||||
core->mac[ICR] &= ~effective_eiac;
|
||||
|
||||
if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
|
||||
core->mac[IMS] &= ~effective_eiac;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2130,7 +2136,7 @@ e1000e_update_interrupt_state(E1000ECore *core)
|
|||
|
||||
/* Set ICR[OTHER] for MSI-X */
|
||||
if (is_msix) {
|
||||
if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) {
|
||||
if (core->mac[ICR] & E1000_ICR_OTHER_CAUSES) {
|
||||
core->mac[ICR] |= E1000_ICR_OTHER;
|
||||
trace_e1000e_irq_add_msi_other(core->mac[ICR]);
|
||||
}
|
||||
|
@ -2168,7 +2174,7 @@ e1000e_update_interrupt_state(E1000ECore *core)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
static void
|
||||
e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
|
||||
{
|
||||
trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
|
||||
|
@ -2187,6 +2193,8 @@ e1000e_autoneg_timer(void *opaque)
|
|||
E1000ECore *core = opaque;
|
||||
if (!qemu_get_queue(core->owner_nic)->link_down) {
|
||||
e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]);
|
||||
e1000e_start_recv(core);
|
||||
|
||||
e1000e_update_flowctl_status(core);
|
||||
/* signal link status change to the guest */
|
||||
e1000e_set_interrupt_cause(core, E1000_ICR_LSC);
|
||||
|
@ -2344,7 +2352,7 @@ e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val)
|
|||
|
||||
core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK;
|
||||
|
||||
if (msix_enabled(core->owner)) {
|
||||
if (!msix_enabled(core->owner)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -144,3 +144,6 @@ e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size);
|
|||
|
||||
ssize_t
|
||||
e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt);
|
||||
|
||||
void
|
||||
e1000e_start_recv(E1000ECore *core);
|
||||
|
|
|
@ -429,7 +429,7 @@ static void imx_fec_do_tx(IMXFECState *s)
|
|||
frame_size += len;
|
||||
if (bd.flags & ENET_BD_L) {
|
||||
/* Last buffer in frame. */
|
||||
qemu_send_packet(qemu_get_queue(s->nic), frame, len);
|
||||
qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size);
|
||||
ptr = frame;
|
||||
frame_size = 0;
|
||||
s->regs[ENET_EIR] |= ENET_INT_TXF;
|
||||
|
|
|
@ -23,6 +23,7 @@ do { printf("mcf_fec: " fmt , ## __VA_ARGS__); } while (0)
|
|||
#define DPRINTF(fmt, ...) do {} while(0)
|
||||
#endif
|
||||
|
||||
#define FEC_MAX_DESC 1024
|
||||
#define FEC_MAX_FRAME_SIZE 2032
|
||||
|
||||
typedef struct {
|
||||
|
@ -149,7 +150,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
|
|||
uint32_t addr;
|
||||
mcf_fec_bd bd;
|
||||
int frame_size;
|
||||
int len;
|
||||
int len, descnt = 0;
|
||||
uint8_t frame[FEC_MAX_FRAME_SIZE];
|
||||
uint8_t *ptr;
|
||||
|
||||
|
@ -157,7 +158,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
|
|||
ptr = frame;
|
||||
frame_size = 0;
|
||||
addr = s->tx_descriptor;
|
||||
while (1) {
|
||||
while (descnt++ < FEC_MAX_DESC) {
|
||||
mcf_fec_read_bd(&bd, addr);
|
||||
DPRINTF("tx_bd %x flags %04x len %d data %08x\n",
|
||||
addr, bd.flags, bd.length, bd.data);
|
||||
|
@ -176,7 +177,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
|
|||
if (bd.flags & FEC_BD_L) {
|
||||
/* Last buffer in frame. */
|
||||
DPRINTF("Sending packet\n");
|
||||
qemu_send_packet(qemu_get_queue(s->nic), frame, len);
|
||||
qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size);
|
||||
ptr = frame;
|
||||
frame_size = 0;
|
||||
s->eir |= FEC_INT_TXF;
|
||||
|
|
|
@ -223,7 +223,7 @@ e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x"
|
|||
e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
|
||||
e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
|
||||
e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
|
||||
e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
|
||||
e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
|
||||
e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X"
|
||||
e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"
|
||||
e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts"
|
||||
|
|
|
@ -31,6 +31,11 @@
|
|||
#define MAC_TABLE_ENTRIES 64
|
||||
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
|
||||
|
||||
/* previously fixed value */
|
||||
#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
|
||||
/* for now, only allow larger queues; with virtio-1, guest can downsize */
|
||||
#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
|
||||
|
||||
/*
|
||||
* Calculate the number of bytes up to and including the given 'field' of
|
||||
* 'container'.
|
||||
|
@ -1412,7 +1417,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
|
|||
{
|
||||
VirtIODevice *vdev = VIRTIO_DEVICE(n);
|
||||
|
||||
n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
|
||||
n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
|
||||
virtio_net_handle_rx);
|
||||
if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
|
||||
n->vqs[index].tx_vq =
|
||||
virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
|
||||
|
@ -1720,6 +1726,22 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
|
|||
virtio_net_set_config_size(n, n->host_features);
|
||||
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
|
||||
|
||||
/*
|
||||
* We set a lower limit on RX queue size to what it always was.
|
||||
* Guests that want a smaller ring can always resize it without
|
||||
* help from us (using virtio 1 and up).
|
||||
*/
|
||||
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
|
||||
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
|
||||
(n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
|
||||
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
|
||||
"must be a power of 2 between %d and %d.",
|
||||
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
|
||||
VIRTQUEUE_MAX_SIZE);
|
||||
virtio_cleanup(vdev);
|
||||
return;
|
||||
}
|
||||
|
||||
n->max_queues = MAX(n->nic_conf.peers.queues, 1);
|
||||
if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
|
||||
error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
|
||||
|
@ -1880,6 +1902,8 @@ static Property virtio_net_properties[] = {
|
|||
TX_TIMER_INTERVAL),
|
||||
DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
|
||||
DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
|
||||
DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
|
||||
VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
|
||||
DEFINE_PROP_END_OF_LIST(),
|
||||
};
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ typedef struct virtio_net_conf
|
|||
uint32_t txtimer;
|
||||
int32_t txburst;
|
||||
char *tx;
|
||||
uint16_t rx_queue_size;
|
||||
} virtio_net_conf;
|
||||
|
||||
/* Maximum packet size we can receive from tap device: header + 64k */
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/* jhash.h: Jenkins hash support.
|
||||
*
|
||||
* Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
|
||||
*
|
||||
* http://burtleburtle.net/bob/hash/
|
||||
*
|
||||
* These are the credits from Bob's sources:
|
||||
*
|
||||
* lookup3.c, by Bob Jenkins, May 2006, Public Domain.
|
||||
*
|
||||
* These are functions for producing 32-bit hashes for hash table lookup.
|
||||
* hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
|
||||
* are externally useful functions. Routines to test the hash are included
|
||||
* if SELF_TEST is defined. You can use this free for any purpose. It's in
|
||||
* the public domain. It has no warranty.
|
||||
*
|
||||
* Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
|
||||
*
|
||||
* I've modified Bob's hash to be useful in the Linux kernel, and
|
||||
* any bugs present are my fault.
|
||||
* Jozsef
|
||||
*/
|
||||
|
||||
#ifndef QEMU_JHASH_H__
|
||||
#define QEMU_JHASH_H__
|
||||
|
||||
#include "qemu/bitops.h"
|
||||
|
||||
/*
|
||||
* hashtable relation copy from linux kernel jhash
|
||||
*/
|
||||
|
||||
/* __jhash_mix -- mix 3 32-bit values reversibly. */
|
||||
#define __jhash_mix(a, b, c) \
|
||||
{ \
|
||||
a -= c; a ^= rol32(c, 4); c += b; \
|
||||
b -= a; b ^= rol32(a, 6); a += c; \
|
||||
c -= b; c ^= rol32(b, 8); b += a; \
|
||||
a -= c; a ^= rol32(c, 16); c += b; \
|
||||
b -= a; b ^= rol32(a, 19); a += c; \
|
||||
c -= b; c ^= rol32(b, 4); b += a; \
|
||||
}
|
||||
|
||||
/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
|
||||
#define __jhash_final(a, b, c) \
|
||||
{ \
|
||||
c ^= b; c -= rol32(b, 14); \
|
||||
a ^= c; a -= rol32(c, 11); \
|
||||
b ^= a; b -= rol32(a, 25); \
|
||||
c ^= b; c -= rol32(b, 16); \
|
||||
a ^= c; a -= rol32(c, 4); \
|
||||
b ^= a; b -= rol32(a, 14); \
|
||||
c ^= b; c -= rol32(b, 24); \
|
||||
}
|
||||
|
||||
/* An arbitrary initial parameter */
|
||||
#define JHASH_INITVAL 0xdeadbeef
|
||||
|
||||
#endif /* QEMU_JHASH_H__ */
|
|
@ -65,7 +65,8 @@ struct CharDriverState {
|
|||
int (*chr_sync_read)(struct CharDriverState *s,
|
||||
const uint8_t *buf, int len);
|
||||
GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond);
|
||||
void (*chr_update_read_handler)(struct CharDriverState *s);
|
||||
void (*chr_update_read_handler)(struct CharDriverState *s,
|
||||
GMainContext *context);
|
||||
int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
|
||||
int (*get_msgfds)(struct CharDriverState *s, int* fds, int num);
|
||||
int (*set_msgfds)(struct CharDriverState *s, int *fds, int num);
|
||||
|
@ -422,6 +423,14 @@ void qemu_chr_add_handlers(CharDriverState *s,
|
|||
IOEventHandler *fd_event,
|
||||
void *opaque);
|
||||
|
||||
/* This API can make handler run in the context what you pass to. */
|
||||
void qemu_chr_add_handlers_full(CharDriverState *s,
|
||||
IOCanReadHandler *fd_can_read,
|
||||
IOReadHandler *fd_read,
|
||||
IOEventHandler *fd_event,
|
||||
void *opaque,
|
||||
GMainContext *context);
|
||||
|
||||
void qemu_chr_be_generic_open(CharDriverState *s);
|
||||
void qemu_chr_accept_input(CharDriverState *s);
|
||||
int qemu_chr_add_client(CharDriverState *s, int fd);
|
||||
|
|
|
@ -16,3 +16,6 @@ common-obj-$(CONFIG_NETMAP) += netmap.o
|
|||
common-obj-y += filter.o
|
||||
common-obj-y += filter-buffer.o
|
||||
common-obj-y += filter-mirror.o
|
||||
common-obj-y += colo-compare.o
|
||||
common-obj-y += colo.o
|
||||
common-obj-y += filter-rewriter.o
|
||||
|
|
|
@ -0,0 +1,781 @@
|
|||
/*
|
||||
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
|
||||
* (a.k.a. Fault Tolerance or Continuous Replication)
|
||||
*
|
||||
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
* Copyright (c) 2016 FUJITSU LIMITED
|
||||
* Copyright (c) 2016 Intel Corporation
|
||||
*
|
||||
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "trace.h"
|
||||
#include "qemu-common.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qapi/error.h"
|
||||
#include "net/net.h"
|
||||
#include "net/eth.h"
|
||||
#include "qom/object_interfaces.h"
|
||||
#include "qemu/iov.h"
|
||||
#include "qom/object.h"
|
||||
#include "qemu/typedefs.h"
|
||||
#include "net/queue.h"
|
||||
#include "sysemu/char.h"
|
||||
#include "qemu/sockets.h"
|
||||
#include "qapi-visit.h"
|
||||
#include "net/colo.h"
|
||||
|
||||
#define TYPE_COLO_COMPARE "colo-compare"
|
||||
#define COLO_COMPARE(obj) \
|
||||
OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
|
||||
|
||||
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
|
||||
#define MAX_QUEUE_SIZE 1024
|
||||
|
||||
/* TODO: Should be configurable */
|
||||
#define REGULAR_PACKET_CHECK_MS 3000
|
||||
|
||||
/*
|
||||
+ CompareState ++
|
||||
| |
|
||||
+---------------+ +---------------+ +---------------+
|
||||
|conn list +--->conn +--------->conn |
|
||||
+---------------+ +---------------+ +---------------+
|
||||
| | | | | |
|
||||
+---------------+ +---v----+ +---v----+ +---v----+ +---v----+
|
||||
|primary | |secondary |primary | |secondary
|
||||
|packet | |packet + |packet | |packet +
|
||||
+--------+ +--------+ +--------+ +--------+
|
||||
| | | |
|
||||
+---v----+ +---v----+ +---v----+ +---v----+
|
||||
|primary | |secondary |primary | |secondary
|
||||
|packet | |packet + |packet | |packet +
|
||||
+--------+ +--------+ +--------+ +--------+
|
||||
| | | |
|
||||
+---v----+ +---v----+ +---v----+ +---v----+
|
||||
|primary | |secondary |primary | |secondary
|
||||
|packet | |packet + |packet | |packet +
|
||||
+--------+ +--------+ +--------+ +--------+
|
||||
*/
|
||||
typedef struct CompareState {
|
||||
Object parent;
|
||||
|
||||
char *pri_indev;
|
||||
char *sec_indev;
|
||||
char *outdev;
|
||||
CharDriverState *chr_pri_in;
|
||||
CharDriverState *chr_sec_in;
|
||||
CharDriverState *chr_out;
|
||||
SocketReadState pri_rs;
|
||||
SocketReadState sec_rs;
|
||||
|
||||
/* connection list: the connections belonged to this NIC could be found
|
||||
* in this list.
|
||||
* element type: Connection
|
||||
*/
|
||||
GQueue conn_list;
|
||||
/* hashtable to save connection */
|
||||
GHashTable *connection_track_table;
|
||||
/* compare thread, a thread for each NIC */
|
||||
QemuThread thread;
|
||||
/* Timer used on the primary to find packets that are never matched */
|
||||
QEMUTimer *timer;
|
||||
QemuMutex timer_check_lock;
|
||||
} CompareState;
|
||||
|
||||
typedef struct CompareClass {
|
||||
ObjectClass parent_class;
|
||||
} CompareClass;
|
||||
|
||||
typedef struct CompareChardevProps {
|
||||
bool is_socket;
|
||||
} CompareChardevProps;
|
||||
|
||||
enum {
|
||||
PRIMARY_IN = 0,
|
||||
SECONDARY_IN,
|
||||
};
|
||||
|
||||
static int compare_chr_send(CharDriverState *out,
|
||||
const uint8_t *buf,
|
||||
uint32_t size);
|
||||
|
||||
/*
|
||||
* Return 0 on success, if return -1 means the pkt
|
||||
* is unsupported(arp and ipv6) and will be sent later
|
||||
*/
|
||||
static int packet_enqueue(CompareState *s, int mode)
|
||||
{
|
||||
ConnectionKey key;
|
||||
Packet *pkt = NULL;
|
||||
Connection *conn;
|
||||
|
||||
if (mode == PRIMARY_IN) {
|
||||
pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
|
||||
} else {
|
||||
pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
|
||||
}
|
||||
|
||||
if (parse_packet_early(pkt)) {
|
||||
packet_destroy(pkt, NULL);
|
||||
pkt = NULL;
|
||||
return -1;
|
||||
}
|
||||
fill_connection_key(pkt, &key);
|
||||
|
||||
conn = connection_get(s->connection_track_table,
|
||||
&key,
|
||||
&s->conn_list);
|
||||
|
||||
if (!conn->processing) {
|
||||
g_queue_push_tail(&s->conn_list, conn);
|
||||
conn->processing = true;
|
||||
}
|
||||
|
||||
if (mode == PRIMARY_IN) {
|
||||
if (g_queue_get_length(&conn->primary_list) <=
|
||||
MAX_QUEUE_SIZE) {
|
||||
g_queue_push_tail(&conn->primary_list, pkt);
|
||||
} else {
|
||||
error_report("colo compare primary queue size too big,"
|
||||
"drop packet");
|
||||
}
|
||||
} else {
|
||||
if (g_queue_get_length(&conn->secondary_list) <=
|
||||
MAX_QUEUE_SIZE) {
|
||||
g_queue_push_tail(&conn->secondary_list, pkt);
|
||||
} else {
|
||||
error_report("colo compare secondary queue size too big,"
|
||||
"drop packet");
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The IP packets sent by primary and secondary
|
||||
* will be compared in here
|
||||
* TODO support ip fragment, Out-Of-Order
|
||||
* return: 0 means packet same
|
||||
* > 0 || < 0 means packet different
|
||||
*/
|
||||
static int colo_packet_compare(Packet *ppkt, Packet *spkt)
|
||||
{
|
||||
trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
|
||||
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
|
||||
inet_ntoa(spkt->ip->ip_src),
|
||||
inet_ntoa(spkt->ip->ip_dst));
|
||||
|
||||
if (ppkt->size == spkt->size) {
|
||||
return memcmp(ppkt->data, spkt->data, spkt->size);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the compare thread on the primary
|
||||
* for compare tcp packet
|
||||
* compare_tcp copied from Dr. David Alan Gilbert's branch
|
||||
*/
|
||||
static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
|
||||
{
|
||||
struct tcphdr *ptcp, *stcp;
|
||||
int res;
|
||||
char *sdebug, *ddebug;
|
||||
|
||||
trace_colo_compare_main("compare tcp");
|
||||
if (ppkt->size != spkt->size) {
|
||||
if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
|
||||
trace_colo_compare_main("pkt size not same");
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
ptcp = (struct tcphdr *)ppkt->transport_header;
|
||||
stcp = (struct tcphdr *)spkt->transport_header;
|
||||
|
||||
/*
|
||||
* The 'identification' field in the IP header is *very* random
|
||||
* it almost never matches. Fudge this by ignoring differences in
|
||||
* unfragmented packets; they'll normally sort themselves out if different
|
||||
* anyway, and it should recover at the TCP level.
|
||||
* An alternative would be to get both the primary and secondary to rewrite
|
||||
* somehow; but that would need some sync traffic to sync the state
|
||||
*/
|
||||
if (ntohs(ppkt->ip->ip_off) & IP_DF) {
|
||||
spkt->ip->ip_id = ppkt->ip->ip_id;
|
||||
/* and the sum will be different if the IDs were different */
|
||||
spkt->ip->ip_sum = ppkt->ip->ip_sum;
|
||||
}
|
||||
|
||||
res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN,
|
||||
(spkt->size - ETH_HLEN));
|
||||
|
||||
if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
|
||||
sdebug = strdup(inet_ntoa(ppkt->ip->ip_src));
|
||||
ddebug = strdup(inet_ntoa(ppkt->ip->ip_dst));
|
||||
fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=%u/%u"
|
||||
" s: seq/ack=%u/%u res=%d flags=%x/%x\n",
|
||||
__func__, sdebug, ddebug,
|
||||
(unsigned int)ntohl(ptcp->th_seq),
|
||||
(unsigned int)ntohl(ptcp->th_ack),
|
||||
(unsigned int)ntohl(stcp->th_seq),
|
||||
(unsigned int)ntohl(stcp->th_ack),
|
||||
res, ptcp->th_flags, stcp->th_flags);
|
||||
|
||||
fprintf(stderr, "Primary len = %d\n", ppkt->size);
|
||||
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
|
||||
fprintf(stderr, "Secondary len = %d\n", spkt->size);
|
||||
qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
|
||||
|
||||
g_free(sdebug);
|
||||
g_free(ddebug);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the compare thread on the primary
|
||||
* for compare udp packet
|
||||
*/
|
||||
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
|
||||
{
|
||||
int ret;
|
||||
|
||||
trace_colo_compare_main("compare udp");
|
||||
ret = colo_packet_compare(ppkt, spkt);
|
||||
|
||||
if (ret) {
|
||||
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
|
||||
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
|
||||
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
|
||||
qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the compare thread on the primary
|
||||
* for compare icmp packet
|
||||
*/
|
||||
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
|
||||
{
|
||||
int network_length;
|
||||
|
||||
trace_colo_compare_main("compare icmp");
|
||||
network_length = ppkt->ip->ip_hl * 4;
|
||||
if (ppkt->size != spkt->size ||
|
||||
ppkt->size < network_length + ETH_HLEN) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (colo_packet_compare(ppkt, spkt)) {
|
||||
trace_colo_compare_icmp_miscompare("primary pkt size",
|
||||
ppkt->size);
|
||||
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare",
|
||||
ppkt->size);
|
||||
trace_colo_compare_icmp_miscompare("Secondary pkt size",
|
||||
spkt->size);
|
||||
qemu_hexdump((char *)spkt->data, stderr, "colo-compare",
|
||||
spkt->size);
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the compare thread on the primary
|
||||
* for compare other packet
|
||||
*/
|
||||
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
|
||||
{
|
||||
trace_colo_compare_main("compare other");
|
||||
trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
|
||||
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
|
||||
inet_ntoa(spkt->ip->ip_src),
|
||||
inet_ntoa(spkt->ip->ip_dst));
|
||||
return colo_packet_compare(ppkt, spkt);
|
||||
}
|
||||
|
||||
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
|
||||
{
|
||||
int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||
|
||||
if ((now - pkt->creation_ms) > (*check_time)) {
|
||||
trace_colo_old_packet_check_found(pkt->creation_ms);
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void colo_old_packet_check_one_conn(void *opaque,
|
||||
void *user_data)
|
||||
{
|
||||
Connection *conn = opaque;
|
||||
GList *result = NULL;
|
||||
int64_t check_time = REGULAR_PACKET_CHECK_MS;
|
||||
|
||||
result = g_queue_find_custom(&conn->primary_list,
|
||||
&check_time,
|
||||
(GCompareFunc)colo_old_packet_check_one);
|
||||
|
||||
if (result) {
|
||||
/* do checkpoint will flush old packet */
|
||||
/* TODO: colo_notify_checkpoint();*/
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for old packets that the secondary hasn't matched,
|
||||
* if we have some then we have to checkpoint to wake
|
||||
* the secondary up.
|
||||
*/
|
||||
static void colo_old_packet_check(void *opaque)
|
||||
{
|
||||
CompareState *s = opaque;
|
||||
|
||||
g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the compare thread on the primary
|
||||
* for compare connection
|
||||
*/
|
||||
static void colo_compare_connection(void *opaque, void *user_data)
|
||||
{
|
||||
CompareState *s = user_data;
|
||||
Connection *conn = opaque;
|
||||
Packet *pkt = NULL;
|
||||
GList *result = NULL;
|
||||
int ret;
|
||||
|
||||
while (!g_queue_is_empty(&conn->primary_list) &&
|
||||
!g_queue_is_empty(&conn->secondary_list)) {
|
||||
qemu_mutex_lock(&s->timer_check_lock);
|
||||
pkt = g_queue_pop_tail(&conn->primary_list);
|
||||
qemu_mutex_unlock(&s->timer_check_lock);
|
||||
switch (conn->ip_proto) {
|
||||
case IPPROTO_TCP:
|
||||
result = g_queue_find_custom(&conn->secondary_list,
|
||||
pkt, (GCompareFunc)colo_packet_compare_tcp);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
result = g_queue_find_custom(&conn->secondary_list,
|
||||
pkt, (GCompareFunc)colo_packet_compare_udp);
|
||||
break;
|
||||
case IPPROTO_ICMP:
|
||||
result = g_queue_find_custom(&conn->secondary_list,
|
||||
pkt, (GCompareFunc)colo_packet_compare_icmp);
|
||||
break;
|
||||
default:
|
||||
result = g_queue_find_custom(&conn->secondary_list,
|
||||
pkt, (GCompareFunc)colo_packet_compare_other);
|
||||
break;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
ret = compare_chr_send(s->chr_out, pkt->data, pkt->size);
|
||||
if (ret < 0) {
|
||||
error_report("colo_send_primary_packet failed");
|
||||
}
|
||||
trace_colo_compare_main("packet same and release packet");
|
||||
g_queue_remove(&conn->secondary_list, result->data);
|
||||
packet_destroy(pkt, NULL);
|
||||
} else {
|
||||
/*
|
||||
* If one packet arrive late, the secondary_list or
|
||||
* primary_list will be empty, so we can't compare it
|
||||
* until next comparison.
|
||||
*/
|
||||
trace_colo_compare_main("packet different");
|
||||
qemu_mutex_lock(&s->timer_check_lock);
|
||||
g_queue_push_tail(&conn->primary_list, pkt);
|
||||
qemu_mutex_unlock(&s->timer_check_lock);
|
||||
/* TODO: colo_notify_checkpoint();*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int compare_chr_send(CharDriverState *out,
|
||||
const uint8_t *buf,
|
||||
uint32_t size)
|
||||
{
|
||||
int ret = 0;
|
||||
uint32_t len = htonl(size);
|
||||
|
||||
if (!size) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
|
||||
if (ret != sizeof(len)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
|
||||
if (ret != size) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
return ret < 0 ? ret : -EIO;
|
||||
}
|
||||
|
||||
static int compare_chr_can_read(void *opaque)
|
||||
{
|
||||
return COMPARE_READ_LEN_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the main thread on the primary for packets
|
||||
* arriving over the socket from the primary.
|
||||
*/
|
||||
static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(opaque);
|
||||
int ret;
|
||||
|
||||
ret = net_fill_rstate(&s->pri_rs, buf, size);
|
||||
if (ret == -1) {
|
||||
qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
|
||||
error_report("colo-compare primary_in error");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the main thread on the primary for packets
|
||||
* arriving over the socket from the secondary.
|
||||
*/
|
||||
static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(opaque);
|
||||
int ret;
|
||||
|
||||
ret = net_fill_rstate(&s->sec_rs, buf, size);
|
||||
if (ret == -1) {
|
||||
qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
|
||||
error_report("colo-compare secondary_in error");
|
||||
}
|
||||
}
|
||||
|
||||
static void *colo_compare_thread(void *opaque)
|
||||
{
|
||||
GMainContext *worker_context;
|
||||
GMainLoop *compare_loop;
|
||||
CompareState *s = opaque;
|
||||
|
||||
worker_context = g_main_context_new();
|
||||
|
||||
qemu_chr_add_handlers_full(s->chr_pri_in, compare_chr_can_read,
|
||||
compare_pri_chr_in, NULL, s, worker_context);
|
||||
qemu_chr_add_handlers_full(s->chr_sec_in, compare_chr_can_read,
|
||||
compare_sec_chr_in, NULL, s, worker_context);
|
||||
|
||||
compare_loop = g_main_loop_new(worker_context, FALSE);
|
||||
|
||||
g_main_loop_run(compare_loop);
|
||||
|
||||
g_main_loop_unref(compare_loop);
|
||||
g_main_context_unref(worker_context);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static char *compare_get_pri_indev(Object *obj, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
return g_strdup(s->pri_indev);
|
||||
}
|
||||
|
||||
static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
g_free(s->pri_indev);
|
||||
s->pri_indev = g_strdup(value);
|
||||
}
|
||||
|
||||
static char *compare_get_sec_indev(Object *obj, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
return g_strdup(s->sec_indev);
|
||||
}
|
||||
|
||||
static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
g_free(s->sec_indev);
|
||||
s->sec_indev = g_strdup(value);
|
||||
}
|
||||
|
||||
static char *compare_get_outdev(Object *obj, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
return g_strdup(s->outdev);
|
||||
}
|
||||
|
||||
static void compare_set_outdev(Object *obj, const char *value, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
g_free(s->outdev);
|
||||
s->outdev = g_strdup(value);
|
||||
}
|
||||
|
||||
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
|
||||
{
|
||||
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
|
||||
|
||||
if (packet_enqueue(s, PRIMARY_IN)) {
|
||||
trace_colo_compare_main("primary: unsupported packet in");
|
||||
compare_chr_send(s->chr_out, pri_rs->buf, pri_rs->packet_len);
|
||||
} else {
|
||||
/* compare connection */
|
||||
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
|
||||
}
|
||||
}
|
||||
|
||||
static void compare_sec_rs_finalize(SocketReadState *sec_rs)
|
||||
{
|
||||
CompareState *s = container_of(sec_rs, CompareState, sec_rs);
|
||||
|
||||
if (packet_enqueue(s, SECONDARY_IN)) {
|
||||
trace_colo_compare_main("secondary: unsupported packet in");
|
||||
} else {
|
||||
/* compare connection */
|
||||
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
|
||||
}
|
||||
}
|
||||
|
||||
static int compare_chardev_opts(void *opaque,
|
||||
const char *name, const char *value,
|
||||
Error **errp)
|
||||
{
|
||||
CompareChardevProps *props = opaque;
|
||||
|
||||
if (strcmp(name, "backend") == 0 &&
|
||||
strcmp(value, "socket") == 0) {
|
||||
props->is_socket = true;
|
||||
return 0;
|
||||
} else if (strcmp(name, "host") == 0 ||
|
||||
(strcmp(name, "port") == 0) ||
|
||||
(strcmp(name, "server") == 0) ||
|
||||
(strcmp(name, "wait") == 0) ||
|
||||
(strcmp(name, "path") == 0)) {
|
||||
return 0;
|
||||
} else {
|
||||
error_setg(errp,
|
||||
"COLO-compare does not support a chardev with option %s=%s",
|
||||
name, value);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 is success.
|
||||
* Return 1 is failed.
|
||||
*/
|
||||
static int find_and_check_chardev(CharDriverState **chr,
|
||||
char *chr_name,
|
||||
Error **errp)
|
||||
{
|
||||
CompareChardevProps props;
|
||||
|
||||
*chr = qemu_chr_find(chr_name);
|
||||
if (*chr == NULL) {
|
||||
error_setg(errp, "Device '%s' not found",
|
||||
chr_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
memset(&props, 0, sizeof(props));
|
||||
if (qemu_opt_foreach((*chr)->opts, compare_chardev_opts, &props, errp)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!props.is_socket) {
|
||||
error_setg(errp, "chardev \"%s\" is not a tcp socket",
|
||||
chr_name);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check old packet regularly so it can watch for any packets
|
||||
* that the secondary hasn't produced equivalents of.
|
||||
*/
|
||||
static void check_old_packet_regular(void *opaque)
|
||||
{
|
||||
CompareState *s = opaque;
|
||||
|
||||
timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
||||
REGULAR_PACKET_CHECK_MS);
|
||||
/* if have old packet we will notify checkpoint */
|
||||
/*
|
||||
* TODO: Make timer handler run in compare thread
|
||||
* like qemu_chr_add_handlers_full.
|
||||
*/
|
||||
qemu_mutex_lock(&s->timer_check_lock);
|
||||
colo_old_packet_check(s);
|
||||
qemu_mutex_unlock(&s->timer_check_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from the main thread on the primary
|
||||
* to setup colo-compare.
|
||||
*/
|
||||
static void colo_compare_complete(UserCreatable *uc, Error **errp)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(uc);
|
||||
char thread_name[64];
|
||||
static int compare_id;
|
||||
|
||||
if (!s->pri_indev || !s->sec_indev || !s->outdev) {
|
||||
error_setg(errp, "colo compare needs 'primary_in' ,"
|
||||
"'secondary_in','outdev' property set");
|
||||
return;
|
||||
} else if (!strcmp(s->pri_indev, s->outdev) ||
|
||||
!strcmp(s->sec_indev, s->outdev) ||
|
||||
!strcmp(s->pri_indev, s->sec_indev)) {
|
||||
error_setg(errp, "'indev' and 'outdev' could not be same "
|
||||
"for compare module");
|
||||
return;
|
||||
}
|
||||
|
||||
if (find_and_check_chardev(&s->chr_pri_in, s->pri_indev, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (find_and_check_chardev(&s->chr_sec_in, s->sec_indev, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (find_and_check_chardev(&s->chr_out, s->outdev, errp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
qemu_chr_fe_claim_no_fail(s->chr_pri_in);
|
||||
|
||||
qemu_chr_fe_claim_no_fail(s->chr_sec_in);
|
||||
|
||||
qemu_chr_fe_claim_no_fail(s->chr_out);
|
||||
|
||||
net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
|
||||
net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
|
||||
|
||||
g_queue_init(&s->conn_list);
|
||||
qemu_mutex_init(&s->timer_check_lock);
|
||||
|
||||
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
||||
connection_key_equal,
|
||||
g_free,
|
||||
connection_destroy);
|
||||
|
||||
sprintf(thread_name, "colo-compare %d", compare_id);
|
||||
qemu_thread_create(&s->thread, thread_name,
|
||||
colo_compare_thread, s,
|
||||
QEMU_THREAD_JOINABLE);
|
||||
compare_id++;
|
||||
|
||||
/* A regular timer to kick any packets that the secondary doesn't match */
|
||||
s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest runs */
|
||||
check_old_packet_regular, s);
|
||||
timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
|
||||
REGULAR_PACKET_CHECK_MS);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void colo_compare_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||
|
||||
ucc->complete = colo_compare_complete;
|
||||
}
|
||||
|
||||
static void colo_compare_init(Object *obj)
|
||||
{
|
||||
object_property_add_str(obj, "primary_in",
|
||||
compare_get_pri_indev, compare_set_pri_indev,
|
||||
NULL);
|
||||
object_property_add_str(obj, "secondary_in",
|
||||
compare_get_sec_indev, compare_set_sec_indev,
|
||||
NULL);
|
||||
object_property_add_str(obj, "outdev",
|
||||
compare_get_outdev, compare_set_outdev,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static void colo_compare_finalize(Object *obj)
|
||||
{
|
||||
CompareState *s = COLO_COMPARE(obj);
|
||||
|
||||
if (s->chr_pri_in) {
|
||||
qemu_chr_add_handlers(s->chr_pri_in, NULL, NULL, NULL, NULL);
|
||||
qemu_chr_fe_release(s->chr_pri_in);
|
||||
}
|
||||
if (s->chr_sec_in) {
|
||||
qemu_chr_add_handlers(s->chr_sec_in, NULL, NULL, NULL, NULL);
|
||||
qemu_chr_fe_release(s->chr_sec_in);
|
||||
}
|
||||
if (s->chr_out) {
|
||||
qemu_chr_fe_release(s->chr_out);
|
||||
}
|
||||
|
||||
g_queue_free(&s->conn_list);
|
||||
|
||||
if (qemu_thread_is_self(&s->thread)) {
|
||||
/* compare connection */
|
||||
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
|
||||
qemu_thread_join(&s->thread);
|
||||
}
|
||||
|
||||
if (s->timer) {
|
||||
timer_del(s->timer);
|
||||
}
|
||||
|
||||
qemu_mutex_destroy(&s->timer_check_lock);
|
||||
|
||||
g_free(s->pri_indev);
|
||||
g_free(s->sec_indev);
|
||||
g_free(s->outdev);
|
||||
}
|
||||
|
||||
static const TypeInfo colo_compare_info = {
|
||||
.name = TYPE_COLO_COMPARE,
|
||||
.parent = TYPE_OBJECT,
|
||||
.instance_size = sizeof(CompareState),
|
||||
.instance_init = colo_compare_init,
|
||||
.instance_finalize = colo_compare_finalize,
|
||||
.class_size = sizeof(CompareClass),
|
||||
.class_init = colo_compare_class_init,
|
||||
.interfaces = (InterfaceInfo[]) {
|
||||
{ TYPE_USER_CREATABLE },
|
||||
{ }
|
||||
}
|
||||
};
|
||||
|
||||
static void register_types(void)
|
||||
{
|
||||
type_register_static(&colo_compare_info);
|
||||
}
|
||||
|
||||
type_init(register_types);
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
|
||||
* (a.k.a. Fault Tolerance or Continuous Replication)
|
||||
*
|
||||
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
* Copyright (c) 2016 FUJITSU LIMITED
|
||||
* Copyright (c) 2016 Intel Corporation
|
||||
*
|
||||
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "trace.h"
|
||||
#include "net/colo.h"
|
||||
|
||||
uint32_t connection_key_hash(const void *opaque)
|
||||
{
|
||||
const ConnectionKey *key = opaque;
|
||||
uint32_t a, b, c;
|
||||
|
||||
/* Jenkins hash */
|
||||
a = b = c = JHASH_INITVAL + sizeof(*key);
|
||||
a += key->src.s_addr;
|
||||
b += key->dst.s_addr;
|
||||
c += (key->src_port | key->dst_port << 16);
|
||||
__jhash_mix(a, b, c);
|
||||
|
||||
a += key->ip_proto;
|
||||
__jhash_final(a, b, c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
int connection_key_equal(const void *key1, const void *key2)
|
||||
{
|
||||
return memcmp(key1, key2, sizeof(ConnectionKey)) == 0;
|
||||
}
|
||||
|
||||
int parse_packet_early(Packet *pkt)
|
||||
{
|
||||
int network_length;
|
||||
static const uint8_t vlan[] = {0x81, 0x00};
|
||||
uint8_t *data = pkt->data;
|
||||
uint16_t l3_proto;
|
||||
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
|
||||
|
||||
if (pkt->size < ETH_HLEN) {
|
||||
trace_colo_proxy_main("pkt->size < ETH_HLEN");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: support vlan.
|
||||
*/
|
||||
if (!memcmp(&data[12], vlan, sizeof(vlan))) {
|
||||
trace_colo_proxy_main("COLO-proxy don't support vlan");
|
||||
return 1;
|
||||
}
|
||||
|
||||
pkt->network_header = data + l2hdr_len;
|
||||
|
||||
const struct iovec l2vec = {
|
||||
.iov_base = (void *) data,
|
||||
.iov_len = l2hdr_len
|
||||
};
|
||||
l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
|
||||
|
||||
if (l3_proto != ETH_P_IP) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
network_length = pkt->ip->ip_hl * 4;
|
||||
if (pkt->size < l2hdr_len + network_length) {
|
||||
trace_colo_proxy_main("pkt->size < network_header + network_length");
|
||||
return 1;
|
||||
}
|
||||
pkt->transport_header = pkt->network_header + network_length;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void fill_connection_key(Packet *pkt, ConnectionKey *key)
|
||||
{
|
||||
uint32_t tmp_ports;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
key->ip_proto = pkt->ip->ip_p;
|
||||
|
||||
switch (key->ip_proto) {
|
||||
case IPPROTO_TCP:
|
||||
case IPPROTO_UDP:
|
||||
case IPPROTO_DCCP:
|
||||
case IPPROTO_ESP:
|
||||
case IPPROTO_SCTP:
|
||||
case IPPROTO_UDPLITE:
|
||||
tmp_ports = *(uint32_t *)(pkt->transport_header);
|
||||
key->src = pkt->ip->ip_src;
|
||||
key->dst = pkt->ip->ip_dst;
|
||||
key->src_port = ntohs(tmp_ports & 0xffff);
|
||||
key->dst_port = ntohs(tmp_ports >> 16);
|
||||
break;
|
||||
case IPPROTO_AH:
|
||||
tmp_ports = *(uint32_t *)(pkt->transport_header + 4);
|
||||
key->src = pkt->ip->ip_src;
|
||||
key->dst = pkt->ip->ip_dst;
|
||||
key->src_port = ntohs(tmp_ports & 0xffff);
|
||||
key->dst_port = ntohs(tmp_ports >> 16);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void reverse_connection_key(ConnectionKey *key)
|
||||
{
|
||||
struct in_addr tmp_ip;
|
||||
uint16_t tmp_port;
|
||||
|
||||
tmp_ip = key->src;
|
||||
key->src = key->dst;
|
||||
key->dst = tmp_ip;
|
||||
|
||||
tmp_port = key->src_port;
|
||||
key->src_port = key->dst_port;
|
||||
key->dst_port = tmp_port;
|
||||
}
|
||||
|
||||
Connection *connection_new(ConnectionKey *key)
|
||||
{
|
||||
Connection *conn = g_slice_new(Connection);
|
||||
|
||||
conn->ip_proto = key->ip_proto;
|
||||
conn->processing = false;
|
||||
conn->offset = 0;
|
||||
conn->syn_flag = 0;
|
||||
g_queue_init(&conn->primary_list);
|
||||
g_queue_init(&conn->secondary_list);
|
||||
|
||||
return conn;
|
||||
}
|
||||
|
||||
void connection_destroy(void *opaque)
|
||||
{
|
||||
Connection *conn = opaque;
|
||||
|
||||
g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
|
||||
g_queue_free(&conn->primary_list);
|
||||
g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
|
||||
g_queue_free(&conn->secondary_list);
|
||||
g_slice_free(Connection, conn);
|
||||
}
|
||||
|
||||
Packet *packet_new(const void *data, int size)
|
||||
{
|
||||
Packet *pkt = g_slice_new(Packet);
|
||||
|
||||
pkt->data = g_memdup(data, size);
|
||||
pkt->size = size;
|
||||
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
|
||||
|
||||
return pkt;
|
||||
}
|
||||
|
||||
void packet_destroy(void *opaque, void *user_data)
|
||||
{
|
||||
Packet *pkt = opaque;
|
||||
|
||||
g_free(pkt->data);
|
||||
g_slice_free(Packet, pkt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear hashtable, stop this hash growing really huge
|
||||
*/
|
||||
void connection_hashtable_reset(GHashTable *connection_track_table)
|
||||
{
|
||||
g_hash_table_remove_all(connection_track_table);
|
||||
}
|
||||
|
||||
/* if not found, create a new connection and add to hash table */
|
||||
Connection *connection_get(GHashTable *connection_track_table,
|
||||
ConnectionKey *key,
|
||||
GQueue *conn_list)
|
||||
{
|
||||
Connection *conn = g_hash_table_lookup(connection_track_table, key);
|
||||
|
||||
if (conn == NULL) {
|
||||
ConnectionKey *new_key = g_memdup(key, sizeof(*key));
|
||||
|
||||
conn = connection_new(key);
|
||||
|
||||
if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) {
|
||||
trace_colo_proxy_main("colo proxy connection hashtable full,"
|
||||
" clear it");
|
||||
connection_hashtable_reset(connection_track_table);
|
||||
/*
|
||||
* clear the conn_list
|
||||
*/
|
||||
while (!g_queue_is_empty(conn_list)) {
|
||||
connection_destroy(g_queue_pop_head(conn_list));
|
||||
}
|
||||
}
|
||||
|
||||
g_hash_table_insert(connection_track_table, new_key, conn);
|
||||
}
|
||||
|
||||
return conn;
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
|
||||
* (a.k.a. Fault Tolerance or Continuous Replication)
|
||||
*
|
||||
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
* Copyright (c) 2016 FUJITSU LIMITED
|
||||
* Copyright (c) 2016 Intel Corporation
|
||||
*
|
||||
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef QEMU_COLO_PROXY_H
|
||||
#define QEMU_COLO_PROXY_H
|
||||
|
||||
#include "slirp/slirp.h"
|
||||
#include "qemu/jhash.h"
|
||||
#include "qemu/timer.h"
|
||||
|
||||
#define HASHTABLE_MAX_SIZE 16384
|
||||
|
||||
#ifndef IPPROTO_DCCP
|
||||
#define IPPROTO_DCCP 33
|
||||
#endif
|
||||
|
||||
#ifndef IPPROTO_SCTP
|
||||
#define IPPROTO_SCTP 132
|
||||
#endif
|
||||
|
||||
#ifndef IPPROTO_UDPLITE
|
||||
#define IPPROTO_UDPLITE 136
|
||||
#endif
|
||||
|
||||
typedef struct Packet {
|
||||
void *data;
|
||||
union {
|
||||
uint8_t *network_header;
|
||||
struct ip *ip;
|
||||
};
|
||||
uint8_t *transport_header;
|
||||
int size;
|
||||
/* Time of packet creation, in wall clock ms */
|
||||
int64_t creation_ms;
|
||||
} Packet;
|
||||
|
||||
typedef struct ConnectionKey {
|
||||
/* (src, dst) must be grouped, in the same way than in IP header */
|
||||
struct in_addr src;
|
||||
struct in_addr dst;
|
||||
uint16_t src_port;
|
||||
uint16_t dst_port;
|
||||
uint8_t ip_proto;
|
||||
} QEMU_PACKED ConnectionKey;
|
||||
|
||||
typedef struct Connection {
|
||||
/* connection primary send queue: element type: Packet */
|
||||
GQueue primary_list;
|
||||
/* connection secondary send queue: element type: Packet */
|
||||
GQueue secondary_list;
|
||||
/* flag to enqueue unprocessed_connections */
|
||||
bool processing;
|
||||
uint8_t ip_proto;
|
||||
/* offset = secondary_seq - primary_seq */
|
||||
tcp_seq offset;
|
||||
/*
|
||||
* we use this flag update offset func
|
||||
* run once in independent tcp connection
|
||||
*/
|
||||
int syn_flag;
|
||||
} Connection;
|
||||
|
||||
uint32_t connection_key_hash(const void *opaque);
|
||||
int connection_key_equal(const void *opaque1, const void *opaque2);
|
||||
int parse_packet_early(Packet *pkt);
|
||||
void fill_connection_key(Packet *pkt, ConnectionKey *key);
|
||||
void reverse_connection_key(ConnectionKey *key);
|
||||
Connection *connection_new(ConnectionKey *key);
|
||||
void connection_destroy(void *opaque);
|
||||
Connection *connection_get(GHashTable *connection_track_table,
|
||||
ConnectionKey *key,
|
||||
GQueue *conn_list);
|
||||
void connection_hashtable_reset(GHashTable *connection_track_table);
|
||||
Packet *packet_new(const void *data, int size);
|
||||
void packet_destroy(void *opaque, void *user_data);
|
||||
|
||||
#endif /* QEMU_COLO_PROXY_H */
|
|
@ -0,0 +1,263 @@
|
|||
/*
|
||||
* Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
|
||||
* Copyright (c) 2016 FUJITSU LIMITED
|
||||
* Copyright (c) 2016 Intel Corporation
|
||||
*
|
||||
* Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
|
||||
*
|
||||
* This work is licensed under the terms of the GNU GPL, version 2 or
|
||||
* later. See the COPYING file in the top-level directory.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "trace.h"
|
||||
#include "net/colo.h"
|
||||
#include "net/filter.h"
|
||||
#include "net/net.h"
|
||||
#include "qemu-common.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qapi-visit.h"
|
||||
#include "qom/object.h"
|
||||
#include "qemu/main-loop.h"
|
||||
#include "qemu/iov.h"
|
||||
#include "net/checksum.h"
|
||||
|
||||
#define FILTER_COLO_REWRITER(obj) \
|
||||
OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
|
||||
|
||||
#define TYPE_FILTER_REWRITER "filter-rewriter"
|
||||
|
||||
typedef struct RewriterState {
|
||||
NetFilterState parent_obj;
|
||||
NetQueue *incoming_queue;
|
||||
/* hashtable to save connection */
|
||||
GHashTable *connection_track_table;
|
||||
} RewriterState;
|
||||
|
||||
static void filter_rewriter_flush(NetFilterState *nf)
|
||||
{
|
||||
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||
|
||||
if (!qemu_net_queue_flush(s->incoming_queue)) {
|
||||
/* Unable to empty the queue, purge remaining packets */
|
||||
qemu_net_queue_purge(s->incoming_queue, nf->netdev);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 1 on success, if return 0 means the pkt
|
||||
* is not TCP packet
|
||||
*/
|
||||
static int is_tcp_packet(Packet *pkt)
|
||||
{
|
||||
if (!parse_packet_early(pkt) &&
|
||||
pkt->ip->ip_p == IPPROTO_TCP) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle tcp packet from primary guest */
|
||||
static int handle_primary_tcp_pkt(NetFilterState *nf,
|
||||
Connection *conn,
|
||||
Packet *pkt)
|
||||
{
|
||||
struct tcphdr *tcp_pkt;
|
||||
|
||||
tcp_pkt = (struct tcphdr *)pkt->transport_header;
|
||||
if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
|
||||
char *sdebug, *ddebug;
|
||||
sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
|
||||
ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
|
||||
trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
|
||||
ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
|
||||
tcp_pkt->th_flags);
|
||||
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
||||
g_free(sdebug);
|
||||
g_free(ddebug);
|
||||
}
|
||||
|
||||
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
|
||||
/*
|
||||
* we use this flag update offset func
|
||||
* run once in independent tcp connection
|
||||
*/
|
||||
conn->syn_flag = 1;
|
||||
}
|
||||
|
||||
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
|
||||
if (conn->syn_flag) {
|
||||
/*
|
||||
* offset = secondary_seq - primary seq
|
||||
* ack packet sent by guest from primary node,
|
||||
* so we use th_ack - 1 get primary_seq
|
||||
*/
|
||||
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
|
||||
conn->syn_flag = 0;
|
||||
}
|
||||
/* handle packets to the secondary from the primary */
|
||||
tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
|
||||
|
||||
net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* handle tcp packet from secondary guest */
|
||||
static int handle_secondary_tcp_pkt(NetFilterState *nf,
|
||||
Connection *conn,
|
||||
Packet *pkt)
|
||||
{
|
||||
struct tcphdr *tcp_pkt;
|
||||
|
||||
tcp_pkt = (struct tcphdr *)pkt->transport_header;
|
||||
|
||||
if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
|
||||
char *sdebug, *ddebug;
|
||||
sdebug = strdup(inet_ntoa(pkt->ip->ip_src));
|
||||
ddebug = strdup(inet_ntoa(pkt->ip->ip_dst));
|
||||
trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug,
|
||||
ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
|
||||
tcp_pkt->th_flags);
|
||||
trace_colo_filter_rewriter_conn_offset(conn->offset);
|
||||
g_free(sdebug);
|
||||
g_free(ddebug);
|
||||
}
|
||||
|
||||
if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
|
||||
/*
|
||||
* save offset = secondary_seq and then
|
||||
* in handle_primary_tcp_pkt make offset
|
||||
* = secondary_seq - primary_seq
|
||||
*/
|
||||
conn->offset = ntohl(tcp_pkt->th_seq);
|
||||
}
|
||||
|
||||
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
|
||||
/* handle packets to the primary from the secondary*/
|
||||
tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
|
||||
|
||||
net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
|
||||
NetClientState *sender,
|
||||
unsigned flags,
|
||||
const struct iovec *iov,
|
||||
int iovcnt,
|
||||
NetPacketSent *sent_cb)
|
||||
{
|
||||
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||
Connection *conn;
|
||||
ConnectionKey key;
|
||||
Packet *pkt;
|
||||
ssize_t size = iov_size(iov, iovcnt);
|
||||
char *buf = g_malloc0(size);
|
||||
|
||||
iov_to_buf(iov, iovcnt, 0, buf, size);
|
||||
pkt = packet_new(buf, size);
|
||||
|
||||
/*
|
||||
* if we get tcp packet
|
||||
* we will rewrite it to make secondary guest's
|
||||
* connection established successfully
|
||||
*/
|
||||
if (pkt && is_tcp_packet(pkt)) {
|
||||
|
||||
fill_connection_key(pkt, &key);
|
||||
|
||||
if (sender == nf->netdev) {
|
||||
/*
|
||||
* We need make tcp TX and RX packet
|
||||
* into one connection.
|
||||
*/
|
||||
reverse_connection_key(&key);
|
||||
}
|
||||
conn = connection_get(s->connection_track_table,
|
||||
&key,
|
||||
NULL);
|
||||
|
||||
if (sender == nf->netdev) {
|
||||
/* NET_FILTER_DIRECTION_TX */
|
||||
if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
|
||||
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
||||
(const uint8_t *)pkt->data, pkt->size, NULL);
|
||||
packet_destroy(pkt, NULL);
|
||||
pkt = NULL;
|
||||
/*
|
||||
* We block the packet here,after rewrite pkt
|
||||
* and will send it
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
/* NET_FILTER_DIRECTION_RX */
|
||||
if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
|
||||
qemu_net_queue_send(s->incoming_queue, sender, 0,
|
||||
(const uint8_t *)pkt->data, pkt->size, NULL);
|
||||
packet_destroy(pkt, NULL);
|
||||
pkt = NULL;
|
||||
/*
|
||||
* We block the packet here,after rewrite pkt
|
||||
* and will send it
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
packet_destroy(pkt, NULL);
|
||||
pkt = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void colo_rewriter_cleanup(NetFilterState *nf)
|
||||
{
|
||||
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||
|
||||
/* flush packets */
|
||||
if (s->incoming_queue) {
|
||||
filter_rewriter_flush(nf);
|
||||
g_free(s->incoming_queue);
|
||||
}
|
||||
}
|
||||
|
||||
static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
|
||||
{
|
||||
RewriterState *s = FILTER_COLO_REWRITER(nf);
|
||||
|
||||
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
|
||||
connection_key_equal,
|
||||
g_free,
|
||||
connection_destroy);
|
||||
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
|
||||
}
|
||||
|
||||
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
|
||||
{
|
||||
NetFilterClass *nfc = NETFILTER_CLASS(oc);
|
||||
|
||||
nfc->setup = colo_rewriter_setup;
|
||||
nfc->cleanup = colo_rewriter_cleanup;
|
||||
nfc->receive_iov = colo_rewriter_receive_iov;
|
||||
}
|
||||
|
||||
static const TypeInfo colo_rewriter_info = {
|
||||
.name = TYPE_FILTER_REWRITER,
|
||||
.parent = TYPE_NETFILTER,
|
||||
.class_init = colo_rewriter_class_init,
|
||||
.instance_size = sizeof(RewriterState),
|
||||
};
|
||||
|
||||
static void register_types(void)
|
||||
{
|
||||
type_register_static(&colo_rewriter_info);
|
||||
}
|
||||
|
||||
type_init(register_types);
|
|
@ -690,9 +690,13 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
|
|||
buffer = iov[0].iov_base;
|
||||
offset = iov[0].iov_len;
|
||||
} else {
|
||||
buf = g_new(uint8_t, NET_BUFSIZE);
|
||||
offset = iov_size(iov, iovcnt);
|
||||
if (offset > NET_BUFSIZE) {
|
||||
return -1;
|
||||
}
|
||||
buf = g_malloc(offset);
|
||||
buffer = buf;
|
||||
offset = iov_to_buf(iov, iovcnt, 0, buf, NET_BUFSIZE);
|
||||
offset = iov_to_buf(iov, iovcnt, 0, buf, offset);
|
||||
}
|
||||
|
||||
if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
|
||||
|
@ -1179,6 +1183,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict)
|
|||
|
||||
qemu_del_net_client(nc->peer);
|
||||
qemu_del_net_client(nc);
|
||||
qemu_opts_del(qemu_opts_find(qemu_find_opts("net"), device));
|
||||
}
|
||||
|
||||
void netdev_add(QemuOpts *opts, Error **errp)
|
||||
|
|
|
@ -857,7 +857,9 @@ free_fail:
|
|||
return -1;
|
||||
}
|
||||
|
||||
fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
|
||||
fd = net_bridge_run_helper(tap->helper,
|
||||
tap->has_br ?
|
||||
tap->br : DEFAULT_BRIDGE_INTERFACE,
|
||||
errp);
|
||||
if (fd == -1) {
|
||||
return -1;
|
||||
|
|
|
@ -2636,6 +2636,8 @@
|
|||
#
|
||||
# @downscript: #optional script to shut down the interface
|
||||
#
|
||||
# @br: #optional bridge name (since 2.8)
|
||||
#
|
||||
# @helper: #optional command to execute to configure bridge
|
||||
#
|
||||
# @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes.
|
||||
|
@ -2665,6 +2667,7 @@
|
|||
'*fds': 'str',
|
||||
'*script': 'str',
|
||||
'*downscript': 'str',
|
||||
'*br': 'str',
|
||||
'*helper': 'str',
|
||||
'*sndbuf': 'size',
|
||||
'*vnet_hdr': 'bool',
|
||||
|
|
77
qemu-char.c
77
qemu-char.c
|
@ -449,11 +449,12 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...)
|
|||
|
||||
static void remove_fd_in_watch(CharDriverState *chr);
|
||||
|
||||
void qemu_chr_add_handlers(CharDriverState *s,
|
||||
IOCanReadHandler *fd_can_read,
|
||||
IOReadHandler *fd_read,
|
||||
IOEventHandler *fd_event,
|
||||
void *opaque)
|
||||
void qemu_chr_add_handlers_full(CharDriverState *s,
|
||||
IOCanReadHandler *fd_can_read,
|
||||
IOReadHandler *fd_read,
|
||||
IOEventHandler *fd_event,
|
||||
void *opaque,
|
||||
GMainContext *context)
|
||||
{
|
||||
int fe_open;
|
||||
|
||||
|
@ -467,8 +468,9 @@ void qemu_chr_add_handlers(CharDriverState *s,
|
|||
s->chr_read = fd_read;
|
||||
s->chr_event = fd_event;
|
||||
s->handler_opaque = opaque;
|
||||
if (fe_open && s->chr_update_read_handler)
|
||||
s->chr_update_read_handler(s);
|
||||
if (fe_open && s->chr_update_read_handler) {
|
||||
s->chr_update_read_handler(s, context);
|
||||
}
|
||||
|
||||
if (!s->explicit_fe_open) {
|
||||
qemu_chr_fe_set_open(s, fe_open);
|
||||
|
@ -481,6 +483,16 @@ void qemu_chr_add_handlers(CharDriverState *s,
|
|||
}
|
||||
}
|
||||
|
||||
void qemu_chr_add_handlers(CharDriverState *s,
|
||||
IOCanReadHandler *fd_can_read,
|
||||
IOReadHandler *fd_read,
|
||||
IOEventHandler *fd_event,
|
||||
void *opaque)
|
||||
{
|
||||
qemu_chr_add_handlers_full(s, fd_can_read, fd_read,
|
||||
fd_event, opaque, NULL);
|
||||
}
|
||||
|
||||
static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
|
||||
{
|
||||
return len;
|
||||
|
@ -722,7 +734,8 @@ static void mux_chr_event(void *opaque, int event)
|
|||
mux_chr_send_event(d, i, event);
|
||||
}
|
||||
|
||||
static void mux_chr_update_read_handler(CharDriverState *chr)
|
||||
static void mux_chr_update_read_handler(CharDriverState *chr,
|
||||
GMainContext *context)
|
||||
{
|
||||
MuxDriver *d = chr->opaque;
|
||||
|
||||
|
@ -736,8 +749,10 @@ static void mux_chr_update_read_handler(CharDriverState *chr)
|
|||
d->chr_event[d->mux_cnt] = chr->chr_event;
|
||||
/* Fix up the real driver with mux routines */
|
||||
if (d->mux_cnt == 0) {
|
||||
qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read,
|
||||
mux_chr_event, chr);
|
||||
qemu_chr_add_handlers_full(d->drv, mux_chr_can_read,
|
||||
mux_chr_read,
|
||||
mux_chr_event,
|
||||
chr, context);
|
||||
}
|
||||
if (d->focus != -1) {
|
||||
mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
|
||||
|
@ -853,6 +868,7 @@ typedef struct IOWatchPoll
|
|||
IOCanReadHandler *fd_can_read;
|
||||
GSourceFunc fd_read;
|
||||
void *opaque;
|
||||
GMainContext *context;
|
||||
} IOWatchPoll;
|
||||
|
||||
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
||||
|
@ -860,7 +876,8 @@ static IOWatchPoll *io_watch_poll_from_source(GSource *source)
|
|||
return container_of(source, IOWatchPoll, parent);
|
||||
}
|
||||
|
||||
static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
|
||||
static gboolean io_watch_poll_prepare(GSource *source,
|
||||
gint *timeout_)
|
||||
{
|
||||
IOWatchPoll *iwp = io_watch_poll_from_source(source);
|
||||
bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
|
||||
|
@ -873,7 +890,7 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
|
|||
iwp->src = qio_channel_create_watch(
|
||||
iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
|
||||
g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
|
||||
g_source_attach(iwp->src, NULL);
|
||||
g_source_attach(iwp->src, iwp->context);
|
||||
} else {
|
||||
g_source_destroy(iwp->src);
|
||||
g_source_unref(iwp->src);
|
||||
|
@ -920,19 +937,22 @@ static GSourceFuncs io_watch_poll_funcs = {
|
|||
static guint io_add_watch_poll(QIOChannel *ioc,
|
||||
IOCanReadHandler *fd_can_read,
|
||||
QIOChannelFunc fd_read,
|
||||
gpointer user_data)
|
||||
gpointer user_data,
|
||||
GMainContext *context)
|
||||
{
|
||||
IOWatchPoll *iwp;
|
||||
int tag;
|
||||
|
||||
iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll));
|
||||
iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
|
||||
sizeof(IOWatchPoll));
|
||||
iwp->fd_can_read = fd_can_read;
|
||||
iwp->opaque = user_data;
|
||||
iwp->ioc = ioc;
|
||||
iwp->fd_read = (GSourceFunc) fd_read;
|
||||
iwp->src = NULL;
|
||||
iwp->context = context;
|
||||
|
||||
tag = g_source_attach(&iwp->parent, NULL);
|
||||
tag = g_source_attach(&iwp->parent, context);
|
||||
g_source_unref(&iwp->parent);
|
||||
return tag;
|
||||
}
|
||||
|
@ -1064,7 +1084,8 @@ static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond)
|
|||
return qio_channel_create_watch(s->ioc_out, cond);
|
||||
}
|
||||
|
||||
static void fd_chr_update_read_handler(CharDriverState *chr)
|
||||
static void fd_chr_update_read_handler(CharDriverState *chr,
|
||||
GMainContext *context)
|
||||
{
|
||||
FDCharDriver *s = chr->opaque;
|
||||
|
||||
|
@ -1072,7 +1093,8 @@ static void fd_chr_update_read_handler(CharDriverState *chr)
|
|||
if (s->ioc_in) {
|
||||
chr->fd_in_tag = io_add_watch_poll(s->ioc_in,
|
||||
fd_chr_read_poll,
|
||||
fd_chr_read, chr);
|
||||
fd_chr_read, chr,
|
||||
context);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1319,7 +1341,8 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr)
|
|||
}
|
||||
}
|
||||
|
||||
static void pty_chr_update_read_handler(CharDriverState *chr)
|
||||
static void pty_chr_update_read_handler(CharDriverState *chr,
|
||||
GMainContext *context)
|
||||
{
|
||||
qemu_mutex_lock(&chr->chr_write_lock);
|
||||
pty_chr_update_read_handler_locked(chr);
|
||||
|
@ -1423,7 +1446,8 @@ static void pty_chr_state(CharDriverState *chr, int connected)
|
|||
if (!chr->fd_in_tag) {
|
||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||
pty_chr_read_poll,
|
||||
pty_chr_read, chr);
|
||||
pty_chr_read,
|
||||
chr, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2565,7 +2589,8 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
static void udp_chr_update_read_handler(CharDriverState *chr)
|
||||
static void udp_chr_update_read_handler(CharDriverState *chr,
|
||||
GMainContext *context)
|
||||
{
|
||||
NetCharDriver *s = chr->opaque;
|
||||
|
||||
|
@ -2573,7 +2598,8 @@ static void udp_chr_update_read_handler(CharDriverState *chr)
|
|||
if (s->ioc) {
|
||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||
udp_chr_read_poll,
|
||||
udp_chr_read, chr);
|
||||
udp_chr_read, chr,
|
||||
context);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2976,12 +3002,14 @@ static void tcp_chr_connect(void *opaque)
|
|||
if (s->ioc) {
|
||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||
tcp_chr_read_poll,
|
||||
tcp_chr_read, chr);
|
||||
tcp_chr_read,
|
||||
chr, NULL);
|
||||
}
|
||||
qemu_chr_be_generic_open(chr);
|
||||
}
|
||||
|
||||
static void tcp_chr_update_read_handler(CharDriverState *chr)
|
||||
static void tcp_chr_update_read_handler(CharDriverState *chr,
|
||||
GMainContext *context)
|
||||
{
|
||||
TCPCharDriver *s = chr->opaque;
|
||||
|
||||
|
@ -2993,7 +3021,8 @@ static void tcp_chr_update_read_handler(CharDriverState *chr)
|
|||
if (s->ioc) {
|
||||
chr->fd_in_tag = io_add_watch_poll(s->ioc,
|
||||
tcp_chr_read_poll,
|
||||
tcp_chr_read, chr);
|
||||
tcp_chr_read, chr,
|
||||
context);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1598,10 +1598,11 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
|
|||
" configure a host TAP network backend with ID 'str'\n"
|
||||
#else
|
||||
"-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
|
||||
" [,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
|
||||
" [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
|
||||
" [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
|
||||
" [,poll-us=n]\n"
|
||||
" configure a host TAP network backend with ID 'str'\n"
|
||||
" connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n"
|
||||
" use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n"
|
||||
" to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
|
||||
" to deconfigure it\n"
|
||||
|
@ -1888,8 +1889,8 @@ processed and applied to -net user. Mixing them with the new configuration
|
|||
syntax gives undefined results. Their use for new applications is discouraged
|
||||
as they will be removed from future versions.
|
||||
|
||||
@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
|
||||
@itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
|
||||
@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}]
|
||||
@itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}]
|
||||
Connect the host TAP network interface @var{name} to VLAN @var{n}.
|
||||
|
||||
Use the network script @var{file} to configure it and the network script
|
||||
|
@ -1900,8 +1901,9 @@ automatically provides one. The default network configure script is
|
|||
to disable script execution.
|
||||
|
||||
If running QEMU as an unprivileged user, use the network helper
|
||||
@var{helper} to configure the TAP interface. The default network
|
||||
helper executable is @file{/path/to/qemu-bridge-helper}.
|
||||
@var{helper} to configure the TAP interface and attach it to the bridge.
|
||||
The default network helper executable is @file{/path/to/qemu-bridge-helper}
|
||||
and the default bridge device is @file{br0}.
|
||||
|
||||
@option{fd}=@var{h} can be used to specify the handle of an already
|
||||
opened host TAP interface.
|
||||
|
@ -3887,6 +3889,19 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
|
|||
be the same. we can just use indev or outdev, but at least one of indev or outdev
|
||||
need to be specified.
|
||||
|
||||
@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},rewriter-mode=@var{mode}[,queue=@var{all|rx|tx}]
|
||||
|
||||
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
|
||||
secondary from primary to keep secondary tcp connection,and rewrite
|
||||
tcp packet to primary from secondary make tcp packet can be handled by
|
||||
client.
|
||||
|
||||
usage:
|
||||
colo secondary:
|
||||
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
|
||||
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
|
||||
-object filter-rewriter,id=rew0,netdev=hn0,queue=all
|
||||
|
||||
@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
|
||||
|
||||
Dump the network traffic on netdev @var{dev} to the file specified by
|
||||
|
@ -3894,6 +3909,45 @@ Dump the network traffic on netdev @var{dev} to the file specified by
|
|||
The file format is libpcap, so it can be analyzed with tools such as tcpdump
|
||||
or Wireshark.
|
||||
|
||||
@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
|
||||
outdev=@var{chardevid}
|
||||
|
||||
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
|
||||
secondary packet. If the packets are same, we will output primary
|
||||
packet to outdev@var{chardevid}, else we will notify colo-frame
|
||||
do checkpoint and send primary packet to outdev@var{chardevid}.
|
||||
|
||||
we must use it with the help of filter-mirror and filter-redirector.
|
||||
|
||||
@example
|
||||
|
||||
primary:
|
||||
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
|
||||
-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
|
||||
-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
|
||||
-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
|
||||
-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
|
||||
-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
|
||||
-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
|
||||
-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
|
||||
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
|
||||
-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
|
||||
-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
|
||||
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
|
||||
|
||||
secondary:
|
||||
-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
|
||||
-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
|
||||
-chardev socket,id=red0,host=3.3.3.3,port=9003
|
||||
-chardev socket,id=red1,host=3.3.3.3,port=9004
|
||||
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
|
||||
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
|
||||
|
||||
@end example
|
||||
|
||||
If you want to know the detail of above command line, you can read
|
||||
the colo-compare git log.
|
||||
|
||||
@item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
|
||||
@item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
|
||||
|
||||
|
|
16
trace-events
16
trace-events
|
@ -139,6 +139,22 @@ memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t v
|
|||
memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
||||
memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
|
||||
|
||||
# net/colo.c
|
||||
colo_proxy_main(const char *chr) ": %s"
|
||||
|
||||
# net/colo-compare.c
|
||||
colo_compare_main(const char *chr) ": %s"
|
||||
colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d"
|
||||
colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
|
||||
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
|
||||
colo_old_packet_check_found(int64_t old_time) "%" PRId64
|
||||
colo_compare_miscompare(void) ""
|
||||
|
||||
# net/filter-rewriter.c
|
||||
colo_filter_rewriter_debug(void) ""
|
||||
colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u flags=%x\n"
|
||||
colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"
|
||||
|
||||
### Guest events, keep at bottom
|
||||
|
||||
# @vaddr: Access' virtual address.
|
||||
|
|
4
vl.c
4
vl.c
|
@ -2845,7 +2845,9 @@ static bool object_create_initial(const char *type)
|
|||
if (g_str_equal(type, "filter-buffer") ||
|
||||
g_str_equal(type, "filter-dump") ||
|
||||
g_str_equal(type, "filter-mirror") ||
|
||||
g_str_equal(type, "filter-redirector")) {
|
||||
g_str_equal(type, "filter-redirector") ||
|
||||
g_str_equal(type, "colo-compare") ||
|
||||
g_str_equal(type, "filter-rewriter")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue