From 00d09fdbbae5f7864ce754913efc84c12fdf9f1a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 1 Feb 2018 11:07:08 +0000 Subject: [PATCH 1/3] vl: pause vcpus before stopping iothreads Commit dce8921b2baaf95974af8176406881872067adfa ("iothread: Stop threads before main() quits") introduced iothread_stop_all() to avoid the following virtio-scsi assertion failure: assert(blk_get_aio_context(d->conf.blk) == s->ctx); Back then the assertion failed because when bdrv_close_all() made d->conf.blk NULL, blk_get_aio_context() returned the global AioContext instead of s->ctx. The same assertion can still fail today when vcpus submit new I/O requests after iothread_stop_all() has moved the BDS to the global AioContext. This patch hardens the iothread_stop_all() approach by pausing vcpus before calling iothread_stop_all(). Note that the assertion failure is a race condition. It is not possible to reproduce it reliably. Signed-off-by: Stefan Hajnoczi Message-id: 20180201110708.8080-1-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi --- vl.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/vl.c b/vl.c index 21878496ec..7a5554bc41 100644 --- a/vl.c +++ b/vl.c @@ -4767,10 +4767,18 @@ int main(int argc, char **argv, char **envp) main_loop(); replay_disable_events(); - iothread_stop_all(); + /* The ordering of the following is delicate. Stop vcpus to prevent new + * I/O requests being queued by the guest. Then stop IOThreads (this + * includes a drain operation and completes all request processing). At + * this point emulated devices are still associated with their IOThreads + * (if any) but no longer have any work to do. Only then can we close + * block devices safely because we know there is no more I/O coming. + */ pause_all_vcpus(); + iothread_stop_all(); bdrv_close_all(); + res_free(); /* vhost-user must be cleaned up before chardevs. */ From b7728f32216f0a7a87bee2f4e009e68b00fd4fb5 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Wed, 7 Feb 2018 08:17:58 +0100 Subject: [PATCH 2/3] ratelimit: don't align wait time with slices It is possible for rate limited writes to keep overshooting a slice's quota by a tiny amount causing the slice-aligned waiting period to effectively halve the rate. Signed-off-by: Wolfgang Bumiller Reviewed-by: Alberto Garcia Message-id: 20180207071758.6818-1-w.bumiller@proxmox.com Signed-off-by: Stefan Hajnoczi --- include/qemu/ratelimit.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h index 8dece483f5..1b38291823 100644 --- a/include/qemu/ratelimit.h +++ b/include/qemu/ratelimit.h @@ -36,7 +36,7 @@ typedef struct { static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) { int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - uint64_t delay_slices; + double delay_slices; assert(limit->slice_quota && limit->slice_ns); @@ -55,12 +55,11 @@ static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n) return 0; } - /* Quota exceeded. Calculate the next time slice we may start - * sending data again. */ - delay_slices = (limit->dispatched + limit->slice_quota - 1) / - limit->slice_quota; + /* Quota exceeded. Wait based on the excess amount and then start a new + * slice. */ + delay_slices = (double)limit->dispatched / limit->slice_quota; limit->slice_end_time = limit->slice_start_time + - delay_slices * limit->slice_ns; + (uint64_t)(delay_slices * limit->slice_ns); return limit->slice_end_time - now; } From d2f668b74907cbd96d9df0774971768ed06de2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Thu, 8 Feb 2018 17:24:47 +0100 Subject: [PATCH 3/3] misc: fix spelling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/pupulate/populate Signed-off-by: Marc-André Lureau Reviewed-by: Peter Maydell Message-id: 20180208162447.10851-1-marcandre.lureau@redhat.com Signed-off-by: Stefan Hajnoczi --- util/qemu-coroutine-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c index 78fb79acf8..5a80c10690 100644 --- a/util/qemu-coroutine-lock.c +++ b/util/qemu-coroutine-lock.c @@ -89,7 +89,7 @@ void qemu_co_queue_run_restart(Coroutine *co) * invalid memory. Therefore, use a temporary queue and do not touch * the "co" coroutine as soon as you enter another one. * - * In its turn resumed "co" can pupulate "co_queue_wakeup" queue with + * In its turn resumed "co" can populate "co_queue_wakeup" queue with * new coroutines to be woken up. The caller, who has resumed "co", * will be responsible for traversing the same queue, which may cause * a different wakeup order but not any missing wakeups.