ivshmem: Propagate errors through ivshmem_recv_setup()

This kills off the funny state described in the previous commit.

Simplify ivshmem_io_read() accordingly, and update documentation.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <1458066895-20632-27-git-send-email-armbru@redhat.com>
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
This commit is contained in:
Markus Armbruster 2016-03-15 19:34:41 +01:00
parent 3a55fc0f24
commit 1309cf448a
3 changed files with 95 additions and 63 deletions

View File

@ -62,11 +62,11 @@ There are two ways to use this device:
likely want to write a kernel driver to handle interrupts. Requires likely want to write a kernel driver to handle interrupts. Requires
the device to be configured for interrupts, obviously. the device to be configured for interrupts, obviously.
If the device is configured for interrupts, BAR2 is initially invalid. Before QEMU 2.6.0, BAR2 can initially be invalid if the device is
It becomes safely accessible only after the ivshmem server provided configured for interrupts. It becomes safely accessible only after
the shared memory. Guest software should wait for the IVPosition the ivshmem server provided the shared memory. Guest software should
register (described below) to become non-negative before accessing wait for the IVPosition register (described below) to become
BAR2. non-negative before accessing BAR2.
The device is not capable to tell guest software whether it is The device is not capable to tell guest software whether it is
configured for interrupts. configured for interrupts.
@ -82,7 +82,7 @@ BAR 0 contains the following registers:
4 4 read/write 0 Interrupt Status 4 4 read/write 0 Interrupt Status
bit 0: peer interrupt bit 0: peer interrupt
bit 1..31: reserved bit 1..31: reserved
8 4 read-only 0 or -1 IVPosition 8 4 read-only 0 or ID IVPosition
12 4 write-only N/A Doorbell 12 4 write-only N/A Doorbell
bit 0..15: vector bit 0..15: vector
bit 16..31: peer ID bit 16..31: peer ID
@ -100,12 +100,14 @@ when an interrupt request from a peer is received. Reading the
register clears it. register clears it.
IVPosition Register: if the device is not configured for interrupts, IVPosition Register: if the device is not configured for interrupts,
this is zero. Else, it's -1 for a short while after reset, then this is zero. Else, it is the device's ID (between 0 and 65535).
changes to the device's ID (between 0 and 65535).
Before QEMU 2.6.0, the register may read -1 for a short while after
reset.
There is no good way for software to find out whether the device is There is no good way for software to find out whether the device is
configured for interrupts. A positive IVPosition means interrupts, configured for interrupts. A positive IVPosition means interrupts,
but zero could be either. The initial -1 cannot be reliably observed. but zero could be either.
Doorbell Register: writing this register requests to interrupt a peer. Doorbell Register: writing this register requests to interrupt a peer.
The written value's high 16 bits are the ID of the peer to interrupt, The written value's high 16 bits are the ID of the peer to interrupt,

View File

@ -234,12 +234,7 @@ static uint64_t ivshmem_io_read(void *opaque, hwaddr addr,
break; break;
case IVPOSITION: case IVPOSITION:
/* return my VM ID if the memory is mapped */ ret = s->vm_id;
if (memory_region_is_mapped(&s->ivshmem)) {
ret = s->vm_id;
} else {
ret = -1;
}
break; break;
default: default:
@ -511,7 +506,8 @@ static bool fifo_update_and_get_i64(IVShmemState *s,
return false; return false;
} }
static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector) static void ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector,
Error **errp)
{ {
PCIDevice *pdev = PCI_DEVICE(s); PCIDevice *pdev = PCI_DEVICE(s);
MSIMessage msg = msix_get_message(pdev, vector); MSIMessage msg = msix_get_message(pdev, vector);
@ -522,22 +518,21 @@ static int ivshmem_add_kvm_msi_virq(IVShmemState *s, int vector)
ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev); ret = kvm_irqchip_add_msi_route(kvm_state, msg, pdev);
if (ret < 0) { if (ret < 0) {
error_report("ivshmem: kvm_irqchip_add_msi_route failed"); error_setg(errp, "kvm_irqchip_add_msi_route failed");
return -1; return;
} }
s->msi_vectors[vector].virq = ret; s->msi_vectors[vector].virq = ret;
s->msi_vectors[vector].pdev = pdev; s->msi_vectors[vector].pdev = pdev;
return 0;
} }
static void setup_interrupt(IVShmemState *s, int vector) static void setup_interrupt(IVShmemState *s, int vector, Error **errp)
{ {
EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; EventNotifier *n = &s->peers[s->vm_id].eventfds[vector];
bool with_irqfd = kvm_msi_via_irqfd_enabled() && bool with_irqfd = kvm_msi_via_irqfd_enabled() &&
ivshmem_has_feature(s, IVSHMEM_MSI); ivshmem_has_feature(s, IVSHMEM_MSI);
PCIDevice *pdev = PCI_DEVICE(s); PCIDevice *pdev = PCI_DEVICE(s);
Error *err = NULL;
IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector);
@ -546,13 +541,16 @@ static void setup_interrupt(IVShmemState *s, int vector)
watch_vector_notifier(s, n, vector); watch_vector_notifier(s, n, vector);
} else if (msix_enabled(pdev)) { } else if (msix_enabled(pdev)) {
IVSHMEM_DPRINTF("with irqfd\n"); IVSHMEM_DPRINTF("with irqfd\n");
if (ivshmem_add_kvm_msi_virq(s, vector) < 0) { ivshmem_add_kvm_msi_virq(s, vector, &err);
if (err) {
error_propagate(errp, err);
return; return;
} }
if (!msix_is_masked(pdev, vector)) { if (!msix_is_masked(pdev, vector)) {
kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL,
s->msi_vectors[vector].virq); s->msi_vectors[vector].virq);
/* TODO handle error */
} }
} else { } else {
/* it will be delayed until msix is enabled, in write_config */ /* it will be delayed until msix is enabled, in write_config */
@ -560,19 +558,19 @@ static void setup_interrupt(IVShmemState *s, int vector)
} }
} }
static void process_msg_shmem(IVShmemState *s, int fd) static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
{ {
Error *err = NULL; Error *err = NULL;
void *ptr; void *ptr;
if (memory_region_is_mapped(&s->ivshmem)) { if (memory_region_is_mapped(&s->ivshmem)) {
error_report("shm already initialized"); error_setg(errp, "server sent unexpected shared memory message");
close(fd); close(fd);
return; return;
} }
if (check_shm_size(s, fd, &err) == -1) { if (check_shm_size(s, fd, &err) == -1) {
error_report_err(err); error_propagate(errp, err);
close(fd); close(fd);
return; return;
} }
@ -580,7 +578,7 @@ static void process_msg_shmem(IVShmemState *s, int fd)
/* mmap the region and map into the BAR2 */ /* mmap the region and map into the BAR2 */
ptr = mmap(0, s->ivshmem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); ptr = mmap(0, s->ivshmem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (ptr == MAP_FAILED) { if (ptr == MAP_FAILED) {
error_report("Failed to mmap shared memory %s", strerror(errno)); error_setg_errno(errp, errno, "Failed to mmap shared memory");
close(fd); close(fd);
return; return;
} }
@ -591,17 +589,19 @@ static void process_msg_shmem(IVShmemState *s, int fd)
memory_region_add_subregion(&s->bar, 0, &s->ivshmem); memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
} }
static void process_msg_disconnect(IVShmemState *s, uint16_t posn) static void process_msg_disconnect(IVShmemState *s, uint16_t posn,
Error **errp)
{ {
IVSHMEM_DPRINTF("posn %d has gone away\n", posn); IVSHMEM_DPRINTF("posn %d has gone away\n", posn);
if (posn >= s->nb_peers || posn == s->vm_id) { if (posn >= s->nb_peers || posn == s->vm_id) {
error_report("invalid peer %d", posn); error_setg(errp, "invalid peer %d", posn);
return; return;
} }
close_peer_eventfds(s, posn); close_peer_eventfds(s, posn);
} }
static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd) static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
Error **errp)
{ {
Peer *peer = &s->peers[posn]; Peer *peer = &s->peers[posn];
int vector; int vector;
@ -611,8 +611,8 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd)
* descriptor for vector N-1. Count messages to find the vector. * descriptor for vector N-1. Count messages to find the vector.
*/ */
if (peer->nb_eventfds >= s->vectors) { if (peer->nb_eventfds >= s->vectors) {
error_report("Too many eventfd received, device has %d vectors", error_setg(errp, "Too many eventfd received, device has %d vectors",
s->vectors); s->vectors);
close(fd); close(fd);
return; return;
} }
@ -623,7 +623,8 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd)
fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */
if (posn == s->vm_id) { if (posn == s->vm_id) {
setup_interrupt(s, vector); setup_interrupt(s, vector, errp);
/* TODO do we need to handle the error? */
} }
if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
@ -631,18 +632,18 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd)
} }
} }
static void process_msg(IVShmemState *s, int64_t msg, int fd) static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
{ {
IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
error_report("server sent invalid message %" PRId64, msg); error_setg(errp, "server sent invalid message %" PRId64, msg);
close(fd); close(fd);
return; return;
} }
if (msg == -1) { if (msg == -1) {
process_msg_shmem(s, fd); process_msg_shmem(s, fd, errp);
return; return;
} }
@ -651,17 +652,18 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd)
} }
if (fd >= 0) { if (fd >= 0) {
process_msg_connect(s, msg, fd); process_msg_connect(s, msg, fd, errp);
} else if (s->vm_id == -1) { } else if (s->vm_id == -1) {
s->vm_id = msg; s->vm_id = msg;
} else { } else {
process_msg_disconnect(s, msg); process_msg_disconnect(s, msg, errp);
} }
} }
static void ivshmem_read(void *opaque, const uint8_t *buf, int size) static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
{ {
IVShmemState *s = opaque; IVShmemState *s = opaque;
Error *err = NULL;
int fd; int fd;
int64_t msg; int64_t msg;
@ -672,10 +674,13 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
fd = qemu_chr_fe_get_msgfd(s->server_chr); fd = qemu_chr_fe_get_msgfd(s->server_chr);
IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
process_msg(s, msg, fd); process_msg(s, msg, fd, &err);
if (err) {
error_report_err(err);
}
} }
static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd) static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
{ {
int64_t msg; int64_t msg;
int n, ret; int n, ret;
@ -685,7 +690,7 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd)
ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n, ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n,
sizeof(msg) - n); sizeof(msg) - n);
if (ret < 0 && ret != -EINTR) { if (ret < 0 && ret != -EINTR) {
/* TODO error handling */ error_setg_errno(errp, -ret, "read from server failed");
return INT64_MIN; return INT64_MIN;
} }
n += ret; n += ret;
@ -695,15 +700,24 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd)
return msg; return msg;
} }
static void ivshmem_recv_setup(IVShmemState *s) static void ivshmem_recv_setup(IVShmemState *s, Error **errp)
{ {
Error *err = NULL;
int64_t msg; int64_t msg;
int fd; int fd;
msg = ivshmem_recv_msg(s, &fd); msg = ivshmem_recv_msg(s, &fd, &err);
if (fd != -1 || msg != IVSHMEM_PROTOCOL_VERSION) { if (err) {
fprintf(stderr, "incompatible version, you are connecting to a ivshmem-" error_propagate(errp, err);
"server using a different protocol please check your setup\n"); return;
}
if (msg != IVSHMEM_PROTOCOL_VERSION) {
error_setg(errp, "server sent version %" PRId64 ", expecting %d",
msg, IVSHMEM_PROTOCOL_VERSION);
return;
}
if (fd != -1) {
error_setg(errp, "server sent invalid version message");
return; return;
} }
@ -711,9 +725,25 @@ static void ivshmem_recv_setup(IVShmemState *s)
* Receive more messages until we got shared memory. * Receive more messages until we got shared memory.
*/ */
do { do {
msg = ivshmem_recv_msg(s, &fd); msg = ivshmem_recv_msg(s, &fd, &err);
process_msg(s, msg, fd); if (err) {
error_propagate(errp, err);
return;
}
process_msg(s, msg, fd, &err);
if (err) {
error_propagate(errp, err);
return;
}
} while (msg != -1); } while (msg != -1);
/*
* This function must either map the shared memory or fail. The
* loop above ensures that: it terminates normally only after it
* successfully processed the server's shared memory message.
* Assert that actually mapped the shared memory:
*/
assert(memory_region_is_mapped(&s->ivshmem));
} }
/* Select the MSI-X vectors used by device. /* Select the MSI-X vectors used by device.
@ -763,7 +793,13 @@ static void ivshmem_enable_irqfd(IVShmemState *s)
int i; int i;
for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) { for (i = 0; i < s->peers[s->vm_id].nb_eventfds; i++) {
ivshmem_add_kvm_msi_virq(s, i); Error *err = NULL;
ivshmem_add_kvm_msi_virq(s, i, &err);
if (err) {
error_report_err(err);
/* TODO do we need to handle the error? */
}
} }
if (msix_set_vector_notifiers(pdev, if (msix_set_vector_notifiers(pdev,
@ -809,7 +845,7 @@ static void ivshmem_write_config(PCIDevice *pdev, uint32_t address,
pci_default_write_config(pdev, address, val, len); pci_default_write_config(pdev, address, val, len);
is_enabled = msix_enabled(pdev); is_enabled = msix_enabled(pdev);
if (kvm_msi_via_irqfd_enabled() && s->vm_id != -1) { if (kvm_msi_via_irqfd_enabled()) {
if (!was_enabled && is_enabled) { if (!was_enabled && is_enabled) {
ivshmem_enable_irqfd(s); ivshmem_enable_irqfd(s);
} else if (was_enabled && !is_enabled) { } else if (was_enabled && !is_enabled) {
@ -928,15 +964,16 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
* Receive setup messages from server synchronously. * Receive setup messages from server synchronously.
* Older versions did it asynchronously, but that creates a * Older versions did it asynchronously, but that creates a
* number of entertaining race conditions. * number of entertaining race conditions.
* TODO Propagate errors! Without that, we still have races
* on errors.
*/ */
ivshmem_recv_setup(s); ivshmem_recv_setup(s, &err);
if (memory_region_is_mapped(&s->ivshmem)) { if (err) {
qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive, error_propagate(errp, err);
ivshmem_read, NULL, s); return;
} }
qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive,
ivshmem_read, NULL, s);
if (ivshmem_setup_interrupts(s) < 0) { if (ivshmem_setup_interrupts(s) < 0) {
error_setg(errp, "failed to initialize interrupts"); error_setg(errp, "failed to initialize interrupts");
return; return;

View File

@ -1289,14 +1289,7 @@ qemu-system-i386 -device ivshmem,size=@var{shm-size},vectors=@var{vectors},chard
When using the server, the guest will be assigned a VM ID (>=0) that allows guests When using the server, the guest will be assigned a VM ID (>=0) that allows guests
using the same server to communicate via interrupts. Guests can read their using the same server to communicate via interrupts. Guests can read their
VM ID from a device register (see example code). Since receiving the shared VM ID from a device register (see ivshmem-spec.txt).
memory region from the server is asynchronous, there is a (small) chance the
guest may boot before the shared memory is attached. To allow an application
to ensure shared memory is attached, the VM ID register will return -1 (an
invalid VM ID) until the memory is attached. Once the shared memory is
attached, the VM ID will return the guest's valid VM ID. With these semantics,
the guest application can check to ensure the shared memory is attached to the
guest before proceeding.
The @option{role} argument can be set to either master or peer and will affect The @option{role} argument can be set to either master or peer and will affect
how the shared memory is migrated. With @option{role=master}, the guest will how the shared memory is migrated. With @option{role=master}, the guest will