From d88610111b40bca19925ece0fa81710d425725a8 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 20 Jun 2018 15:32:17 +0800
Subject: [PATCH 1/5] chardev: comment details for CLOSED event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was unclear before on what does the CLOSED event mean.  Meanwhile we
add a TODO to fix up the CLOSED event in the future when the in/out
ports are different for a chardev.

CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "Marc-André Lureau" <marcandre.lureau@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20180620073223.31964-2-peterx@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 include/chardev/char.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/chardev/char.h b/include/chardev/char.h
index 04de45795e..6f0576e214 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -22,7 +22,16 @@ typedef enum {
     CHR_EVENT_OPENED, /* new connection established */
     CHR_EVENT_MUX_IN, /* mux-focus was set to this terminal */
     CHR_EVENT_MUX_OUT, /* mux-focus will move on */
-    CHR_EVENT_CLOSED /* connection closed */
+    CHR_EVENT_CLOSED /* connection closed.  NOTE: currently this event
+                      * is only bound to the read port of the chardev.
+                      * Normally the read port and write port of a
+                      * chardev should be the same, but it can be
+                      * different, e.g., for fd chardevs, when the two
+                      * fds are different.  So when we received the
+                      * CLOSED event it's still possible that the out
+                      * port is still open.  TODO: we should only send
+                      * the CLOSED event when both ports are closed.
+                      */
 } QEMUChrEvent;
 
 #define CHR_READ_BUF_LEN 4096

From 40687eb741a974c47326ef3cf7f6b25cc0680552 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 20 Jun 2018 15:32:18 +0800
Subject: [PATCH 2/5] monitor: rename *_pop_one to *_pop_any

The old names are confusing since both of the old functions are popping
an item from multiple queues rather than a single queue.  In that
sense, *_pop_any() suites better than *_pop_one().

Since at it, touch up the function monitor_qmp_response_pop_any() a bit
to let the callers pass in a QMPResponse struct instead of returning a
struct.  Change the return value to boolean to mark whether we have
popped a valid response instead.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20180620073223.31964-3-peterx@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 monitor.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/monitor.c b/monitor.c
index 7b473aad1f..2881345758 100644
--- a/monitor.c
+++ b/monitor.c
@@ -542,10 +542,10 @@ struct QMPResponse {
 typedef struct QMPResponse QMPResponse;
 
 /*
- * Return one QMPResponse.  The response is only valid if
- * response.data is not NULL.
+ * Pop a QMPResponse from any monitor's response queue into @response.
+ * Return false if all the queues are empty; else true.
  */
-static QMPResponse monitor_qmp_response_pop_one(void)
+static bool monitor_qmp_response_pop_any(QMPResponse *response)
 {
     Monitor *mon;
     QObject *data = NULL;
@@ -556,22 +556,20 @@ static QMPResponse monitor_qmp_response_pop_one(void)
         data = g_queue_pop_head(mon->qmp.qmp_responses);
         qemu_mutex_unlock(&mon->qmp.qmp_queue_lock);
         if (data) {
+            response->mon = mon;
+            response->data = data;
             break;
         }
     }
     qemu_mutex_unlock(&monitor_lock);
-    return (QMPResponse) { .mon = mon, .data = data };
+    return data != NULL;
 }
 
 static void monitor_qmp_bh_responder(void *opaque)
 {
     QMPResponse response;
 
-    while (true) {
-        response = monitor_qmp_response_pop_one();
-        if (!response.data) {
-            break;
-        }
+    while (monitor_qmp_response_pop_any(&response)) {
         monitor_json_emitter_raw(response.mon, response.data);
         qobject_unref(response.data);
     }
@@ -4199,7 +4197,7 @@ static void monitor_qmp_dispatch_one(QMPRequest *req_obj)
  * when we process one request on a specific monitor, we put that
  * monitor to the end of mon_list queue.
  */
-static QMPRequest *monitor_qmp_requests_pop_one(void)
+static QMPRequest *monitor_qmp_requests_pop_any(void)
 {
     QMPRequest *req_obj = NULL;
     Monitor *mon;
@@ -4231,7 +4229,7 @@ static QMPRequest *monitor_qmp_requests_pop_one(void)
 
 static void monitor_qmp_bh_dispatcher(void *data)
 {
-    QMPRequest *req_obj = monitor_qmp_requests_pop_one();
+    QMPRequest *req_obj = monitor_qmp_requests_pop_any();
 
     if (req_obj) {
         trace_monitor_qmp_cmd_in_band(qobject_get_try_str(req_obj->id) ?: "");

From c73a843b4a822dc0bd5df5ef38dbd681dd96ad25 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 20 Jun 2018 15:32:19 +0800
Subject: [PATCH 3/5] monitor: flush qmp responses when CLOSED

Previously we clean up the queues when we got CLOSED event.  It was used
to make sure we won't send leftover replies/events of a old client to a
new client which makes perfect sense. However this will also drop the
replies/events even if the output port of the previous chardev backend
is still open, which can lead to missing of the last replies/events.
Now this patch does an extra operation to flush the response queue
before cleaning up.

In most cases, a QMP session will be based on a bidirectional channel (a
TCP port, for example, we read/write to the same socket handle), so in
port and out port of the backend chardev are fundamentally the same
port. In these cases, it does not really matter much on whether we'll
flush the response queue since flushing will fail anyway.  However there
can be cases where in & out ports of the QMP monitor's backend chardev
are separated.  Here is an example:

  cat $QMP_COMMANDS | qemu -qmp stdio ... | filter_commands

In this case, the backend is fd-typed, and it is connected to stdio
where in port is stdin and out port is stdout.  Now if we drop all the
events on the response queue then filter_command process might miss some
events that it might expect.  The thing is that, when stdin closes,
stdout might still be there alive!

In practice, I encountered SHUTDOWN event missing when running test with
iotest 087 with Out-Of-Band enabled.  Here is one of the ways that this
can happen (after "quit" command is executed and QEMU quits the main
loop):

1. [main thread] QEMU queues a SHUTDOWN event into response queue.

2. "cat" terminates (to distinguish it from the animal, I quote it).

3. [monitor iothread] QEMU's monitor iothread reads EOF from stdin.

4. [monitor iothread] QEMU's monitor iothread calls the CLOSED event
   hook for the monitor, which will destroy the response queue of the
   monitor, then the SHUTDOWN event is dropped.

5. [main thread] QEMU's main thread cleans up the monitors in
   monitor_cleanup().  When trying to flush pending responses, it sees
   nothing.  SHUTDOWN is lost forever.

Note that before the monitor iothread was introduced, step [4]/[5] could
never happen since the main loop was the only place to detect the EOF
event of stdin and run the CLOSED event hooks.  Now things can happen in
parallel in the iothread.

Without this patch, iotest 087 will have ~10% chance to miss the
SHUTDOWN event and fail when with Out-Of-Band enabled:

  --- /home/peterx/git/qemu/tests/qemu-iotests/087.out
  +++ /home/peterx/git/qemu/bin/tests/qemu-iotests/087.out.bad
  @@ -8,7 +8,6 @@
  {"return": {}}
  {"error": {"class": "GenericError", "desc": "'node-name' must be
  specified for the root node"}}
  {"return": {}}
  -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}

  === Duplicate ID ===
  @@ -53,7 +52,6 @@
  {"return": {}}
  {"return": {}}
  {"return": {}}

  -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}

This patch fixes the problem.

Fixes: 6d2d563f8c ("qmp: cleanup qmp queues properly", 2018-03-27)
Suggested-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20180620073223.31964-4-peterx@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
[Commit message and a comment touched up]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 monitor.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/monitor.c b/monitor.c
index 2881345758..567668a0e7 100644
--- a/monitor.c
+++ b/monitor.c
@@ -541,6 +541,27 @@ struct QMPResponse {
 };
 typedef struct QMPResponse QMPResponse;
 
+static QObject *monitor_qmp_response_pop_one(Monitor *mon)
+{
+    QObject *data;
+
+    qemu_mutex_lock(&mon->qmp.qmp_queue_lock);
+    data = g_queue_pop_head(mon->qmp.qmp_responses);
+    qemu_mutex_unlock(&mon->qmp.qmp_queue_lock);
+
+    return data;
+}
+
+static void monitor_qmp_response_flush(Monitor *mon)
+{
+    QObject *data;
+
+    while ((data = monitor_qmp_response_pop_one(mon))) {
+        monitor_json_emitter_raw(mon, data);
+        qobject_unref(data);
+    }
+}
+
 /*
  * Pop a QMPResponse from any monitor's response queue into @response.
  * Return false if all the queues are empty; else true.
@@ -552,9 +573,7 @@ static bool monitor_qmp_response_pop_any(QMPResponse *response)
 
     qemu_mutex_lock(&monitor_lock);
     QTAILQ_FOREACH(mon, &mon_list, entry) {
-        qemu_mutex_lock(&mon->qmp.qmp_queue_lock);
-        data = g_queue_pop_head(mon->qmp.qmp_responses);
-        qemu_mutex_unlock(&mon->qmp.qmp_queue_lock);
+        data = monitor_qmp_response_pop_one(mon);
         if (data) {
             response->mon = mon;
             response->data = data;
@@ -4456,6 +4475,13 @@ static void monitor_qmp_event(void *opaque, int event)
         mon_refcount++;
         break;
     case CHR_EVENT_CLOSED:
+        /*
+         * Note: this is only useful when the output of the chardev
+         * backend is still open.  For example, when the backend is
+         * stdio, it's possible that stdout is still open when stdin
+         * is closed.
+         */
+        monitor_qmp_response_flush(mon);
         monitor_qmp_cleanup_queues(mon);
         json_message_parser_destroy(&mon->qmp.parser);
         json_message_parser_init(&mon->qmp.parser, handle_qmp_command);

From cbc4ae2d1a9f4a9fd6c2f5000d3845f6d28cccc1 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 20 Jun 2018 15:32:20 +0800
Subject: [PATCH 4/5] tests: iotests: drop some stderr line

In my Out-Of-Band test, "check -qcow2 060" fail with this:

  --- /home/peterx/git/qemu/tests/qemu-iotests/060.out
  +++ /home/peterx/git/qemu/bin/tests/qemu-iotests/060.out.bad
  @@ -427,8 +427,8 @@
  QMP_VERSION
  {"return": {}}
  qcow2: Image is corrupt: L2 table offset 0x2a2a2a00 unaligned (L1
  index: 0); further non-fatal corruption events will be suppressed
  -{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a0
  0 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}}
  read failed: Input/output error
  +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a0
  0 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}}
  {"return": ""}
  {"return": {}}
  {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP},
  "event": "SHUTDOWN", "data": {"guest": false}}

The order of the event and the in/out error line is swapped.  I didn't
dig up the reason, but AFAIU what we want to verify is the event rather
than stderr.  Let's drop the stderr line directly for this test.

Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20180620073223.31964-5-peterx@redhat.com>
[Commit message touched up]
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 tests/qemu-iotests/060     | 10 +++++++++-
 tests/qemu-iotests/060.out |  1 -
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
index 7bdf609f3f..74ad371885 100755
--- a/tests/qemu-iotests/060
+++ b/tests/qemu-iotests/060
@@ -33,6 +33,14 @@ _cleanup()
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
+# Sometimes the error line might be dumped before/after an event
+# randomly.  Mask it out for specific test that may trigger this
+# uncertainty for current test for now.
+_filter_io_error()
+{
+    sed '/Input\/output error/d'
+}
+
 # get standard environment, filters and checks
 . ./common.rc
 . ./common.filter
@@ -464,7 +472,7 @@ echo "{'execute': 'qmp_capabilities'}
                         }}" \
             -incoming exec:'cat /dev/null' \
             2>&1 \
-    | _filter_qmp | _filter_qemu_io
+    | _filter_qmp | _filter_qemu_io | _filter_io_error
 
 echo
 # Image should not have been marked corrupt
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
index bff023d889..d67c6234a4 100644
--- a/tests/qemu-iotests/060.out
+++ b/tests/qemu-iotests/060.out
@@ -428,7 +428,6 @@ QMP_VERSION
 {"return": {}}
 qcow2: Image is corrupt: L2 table offset 0x2a2a2a00 unaligned (L1 index: 0); further non-fatal corruption events will be suppressed
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a00 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}}
-read failed: Input/output error
 {"return": ""}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false}}

From 4bfa7974d90a0cbad29c0a27334d02cbd37bb23d Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 20 Jun 2018 15:32:21 +0800
Subject: [PATCH 5/5] docs: mention shared state protect for OOB

Out-Of-Band handlers need to protect shared state if there is any.
Mention it in the document.  Meanwhile, touch up some other places too,
either with better English, or reordering of bullets.

Suggested-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20180620073223.31964-6-peterx@redhat.com>
Signed-off-by: Markus Armbruster <armbru@redhat.com>
---
 docs/devel/qapi-code-gen.txt | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt
index 88a70e4d45..94a7e8f4d0 100644
--- a/docs/devel/qapi-code-gen.txt
+++ b/docs/devel/qapi-code-gen.txt
@@ -666,22 +666,27 @@ command:
 
 - They are executed in order,
 - They run only in main thread of QEMU,
-- They have the BQL taken during execution.
+- They run with the BQL held.
 
 When a command is executed with OOB, the following changes occur:
 
 - They can be completed before a pending in-band command,
 - They run in a dedicated monitor thread,
-- They do not take the BQL during execution.
+- They run with the BQL not held.
 
 OOB command handlers must satisfy the following conditions:
 
-- It executes extremely fast,
-- It does not take any lock, or, it can take very small locks if all
-  critical regions also follow the rules for OOB command handler code,
+- It terminates quickly,
 - It does not invoke system calls that may block,
 - It does not access guest RAM that may block when userfaultfd is
-  enabled for postcopy live migration.
+  enabled for postcopy live migration,
+- It takes only "fast" locks, i.e. all critical sections protected by
+  any lock it takes also satisfy the conditions for OOB command
+  handler code.
+
+The restrictions on locking limit access to shared state.  Such access
+requires synchronization, but OOB commands can't take the BQL or any
+other "slow" lock.
 
 If in doubt, do not implement OOB execution support.