migration/colo.c: Relaunch failover even if there was an error

If vmstate_loading is true, secondary_vm_do_failover will set failover
status to FAILOVER_STATUS_RELAUNCH and return success without initiating
failover. However, if there is an error during the vmstate_loading
section, failover isn't relaunched. Instead we then wait for
failover on colo_incoming_sem.

Fix this by relaunching failover even if there was an error. Also,
to make this work properly, set vmstate_loading to false when
returning during the vmstate_loading section.

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
Message-Id: <f60b0a8e2fadaaec792e04819dfc46951842d6ba.1589193382.git.lukasstraub2@web.de>
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Lukas Straub 2020-05-11 13:10:55 +02:00 committed by Dr. David Alan Gilbert
parent 24fa16f8cc
commit 92c932de6c
1 changed files with 12 additions and 5 deletions

View File

@ -752,6 +752,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
ret = qemu_load_device_state(fb); ret = qemu_load_device_state(fb);
if (ret < 0) { if (ret < 0) {
error_setg(errp, "COLO: load device state failed"); error_setg(errp, "COLO: load device state failed");
vmstate_loading = false;
qemu_mutex_unlock_iothread(); qemu_mutex_unlock_iothread();
return; return;
} }
@ -760,6 +761,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
replication_get_error_all(&local_err); replication_get_error_all(&local_err);
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
vmstate_loading = false;
qemu_mutex_unlock_iothread(); qemu_mutex_unlock_iothread();
return; return;
} }
@ -768,6 +770,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
replication_do_checkpoint_all(&local_err); replication_do_checkpoint_all(&local_err);
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
vmstate_loading = false;
qemu_mutex_unlock_iothread(); qemu_mutex_unlock_iothread();
return; return;
} }
@ -779,6 +782,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
vmstate_loading = false;
qemu_mutex_unlock_iothread(); qemu_mutex_unlock_iothread();
return; return;
} }
@ -789,9 +793,6 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
qemu_mutex_unlock_iothread(); qemu_mutex_unlock_iothread();
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
failover_set_state(FAILOVER_STATUS_RELAUNCH,
FAILOVER_STATUS_NONE);
failover_request_active(NULL);
return; return;
} }
@ -890,6 +891,14 @@ void *colo_process_incoming_thread(void *opaque)
error_report_err(local_err); error_report_err(local_err);
break; break;
} }
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
failover_set_state(FAILOVER_STATUS_RELAUNCH,
FAILOVER_STATUS_NONE);
failover_request_active(NULL);
break;
}
if (failover_get_state() != FAILOVER_STATUS_NONE) { if (failover_get_state() != FAILOVER_STATUS_NONE) {
error_report("failover request"); error_report("failover request");
break; break;
@ -897,8 +906,6 @@ void *colo_process_incoming_thread(void *opaque)
} }
out: out:
vmstate_loading = false;
/* /*
* There are only two reasons we can get here, some error happened * There are only two reasons we can get here, some error happened
* or the user triggered failover. * or the user triggered failover.