From 75e5b70e6b5dcc4f2219992d7cffa462aa406af0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 28 Nov 2017 11:51:27 +0100 Subject: [PATCH 01/41] memfd: fix configure test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent glibc added memfd_create in sys/mman.h. This conflicts with the definition in util/memfd.c: /builddir/build/BUILD/qemu-2.11.0-rc1/util/memfd.c:40:12: error: static declaration of memfd_create follows non-static declaration Fix the configure test, and remove the sys/memfd.h inclusion since the file actually does not exist---it is a typo in the memfd_create(2) man page. Cc: Marc-André Lureau Signed-off-by: Paolo Bonzini --- configure | 2 +- util/memfd.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/configure b/configure index 9c8aa5a98b..99ccc1725a 100755 --- a/configure +++ b/configure @@ -3923,7 +3923,7 @@ fi # check if memfd is supported memfd=no cat > $TMPC << EOF -#include +#include int main(void) { diff --git a/util/memfd.c b/util/memfd.c index 4571d1aba8..412e94a405 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -31,9 +31,7 @@ #include "qemu/memfd.h" -#ifdef CONFIG_MEMFD -#include -#elif defined CONFIG_LINUX +#if defined CONFIG_LINUX && !defined CONFIG_MEMFD #include #include From 68a9398261ca38979bbc2b7c89ed5bb044ccc9e6 Mon Sep 17 00:00:00 2001 From: linzhecheng Date: Tue, 28 Nov 2017 12:46:56 +0800 Subject: [PATCH 02/41] qemu-thread: fix races on threads that exit very quickly If we create a thread with QEMU_THREAD_DETACHED mode, QEMU may get a segfault with low probability. The backtrace is: #0 0x00007f46c60291d7 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 #1 0x00007f46c602a8c8 in __GI_abort () at abort.c:90 #2 0x00000000008543c9 in PAT_abort () #3 0x000000000085140d in patchIllInsHandler () #4 #5 pthread_detach (th=139933037614848) at pthread_detach.c:50 #6 0x0000000000829759 in qemu_thread_create (thread=thread@entry=0x7ffdaa8205e0, name=name@entry=0x94d94a "io-task-worker", start_routine=start_routine@entry=0x7eb9a0 , arg=arg@entry=0x3f5cf70, mode=mode@entry=1) at util/qemu_thread_posix.c:512 #7 0x00000000007ebc96 in qio_task_run_in_thread (task=0x31db2c0, worker=worker@entry=0x7e7e40 , opaque=0xcd23380, destroy=0x7f1180 ) at io/task.c:141 #8 0x00000000007e7f33 in qio_channel_socket_connect_async (ioc=ioc@entry=0x626c0b0, addr=, callback=callback@entry=0x55e080 , opaque=opaque@entry=0x42862c0, destroy=destroy@entry=0x0) at io/channel_socket.c:194 #9 0x000000000055bdd1 in socket_reconnect_timeout (opaque=0x42862c0) at qemu_char.c:4744 #10 0x00007f46c72483b3 in g_timeout_dispatch () from /usr/lib64/libglib-2.0.so.0 #11 0x00007f46c724799a in g_main_context_dispatch () from /usr/lib64/libglib-2.0.so.0 #12 0x000000000076c646 in glib_pollfds_poll () at main_loop.c:228 #13 0x000000000076c6eb in os_host_main_loop_wait (timeout=348000000) at main_loop.c:273 #14 0x000000000076c815 in main_loop_wait (nonblocking=nonblocking@entry=0) at main_loop.c:521 #15 0x000000000056a511 in main_loop () at vl.c:2076 #16 0x0000000000420705 in main (argc=, argv=, envp=) at vl.c:4940 The cause of this problem is a glibc bug; for more information, see https://sourceware.org/bugzilla/show_bug.cgi?id=19951. The solution for this bug is to use pthread_attr_setdetachstate. There is a similar issue with pthread_setname_np, which is moved from creating thread to created thread. Signed-off-by: linzhecheng Message-Id: <20171128044656.10592-1-linzhecheng@huawei.com> Reviewed-by: Fam Zheng [Simplify the code by removing qemu_thread_set_name, and free the arguments before invoking the start routine. - Paolo] Signed-off-by: Paolo Bonzini --- util/qemu-thread-posix.c | 61 ++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index 7306475899..959a57079f 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -479,15 +479,29 @@ static void __attribute__((constructor)) qemu_thread_atexit_init(void) } -/* Attempt to set the threads name; note that this is for debug, so - * we're not going to fail if we can't set it. - */ -static void qemu_thread_set_name(QemuThread *thread, const char *name) -{ #ifdef CONFIG_PTHREAD_SETNAME_NP - pthread_setname_np(thread->thread, name); -#endif +typedef struct { + void *(*start_routine)(void *); + void *arg; + char *name; +} QemuThreadArgs; + +static void *qemu_thread_start(void *args) +{ + QemuThreadArgs *qemu_thread_args = args; + void *(*start_routine)(void *) = qemu_thread_args->start_routine; + void *arg = qemu_thread_args->arg; + + /* Attempt to set the threads name; note that this is for debug, so + * we're not going to fail if we can't set it. + */ + pthread_setname_np(pthread_self(), qemu_thread_args->name); + g_free(qemu_thread_args->name); + g_free(qemu_thread_args); + return start_routine(arg); } +#endif + void qemu_thread_create(QemuThread *thread, const char *name, void *(*start_routine)(void*), @@ -502,23 +516,34 @@ void qemu_thread_create(QemuThread *thread, const char *name, error_exit(err, __func__); } + if (mode == QEMU_THREAD_DETACHED) { + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + } + /* Leave signal handling to the iothread. */ sigfillset(&set); pthread_sigmask(SIG_SETMASK, &set, &oldset); - err = pthread_create(&thread->thread, &attr, start_routine, arg); + +#ifdef CONFIG_PTHREAD_SETNAME_NP + if (name_threads) { + QemuThreadArgs *qemu_thread_args; + qemu_thread_args = g_new0(QemuThreadArgs, 1); + qemu_thread_args->name = g_strdup(name); + qemu_thread_args->start_routine = start_routine; + qemu_thread_args->arg = arg; + + err = pthread_create(&thread->thread, &attr, + qemu_thread_start, qemu_thread_args); + } else +#endif + { + err = pthread_create(&thread->thread, &attr, + start_routine, arg); + } + if (err) error_exit(err, __func__); - if (name_threads) { - qemu_thread_set_name(thread, name); - } - - if (mode == QEMU_THREAD_DETACHED) { - err = pthread_detach(thread->thread); - if (err) { - error_exit(err, __func__); - } - } pthread_sigmask(SIG_SETMASK, &oldset, NULL); pthread_attr_destroy(&attr); From a4a9b6eaf35dbe4bf0e069854945bf5e45fc7eab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 1 Dec 2017 18:40:06 +0100 Subject: [PATCH 03/41] qemu-pr-helper: miscellaneous fixes 1) Return a generic sense if TEST UNIT READY does not provide one; 2) Fix two mistakes in copying from the spec. Cc: qemu-stable@nongnu.org Reported-by: Dr. David Alan Gilbert Signed-off-by: Paolo Bonzini --- include/scsi/utils.h | 6 +++++- scsi/qemu-pr-helper.c | 30 ++++++++++++++++++++++++++---- scsi/utils.c | 10 ++++++++++ 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/include/scsi/utils.h b/include/scsi/utils.h index 00a4bdb080..eb07e474ee 100644 --- a/include/scsi/utils.h +++ b/include/scsi/utils.h @@ -76,7 +76,11 @@ extern const struct SCSISense sense_code_LUN_FAILURE; extern const struct SCSISense sense_code_LUN_COMM_FAILURE; /* Command aborted, Overlapped Commands Attempted */ extern const struct SCSISense sense_code_OVERLAPPED_COMMANDS; -/* LUN not ready, Capacity data has changed */ +/* Medium error, Unrecovered read error */ +extern const struct SCSISense sense_code_READ_ERROR; +/* LUN not ready, Cause not reportable */ +extern const struct SCSISense sense_code_NOT_READY; +/* Unit attention, Capacity data has changed */ extern const struct SCSISense sense_code_CAPACITY_CHANGED; /* Unit attention, SCSI bus reset */ extern const struct SCSISense sense_code_SCSI_BUS_RESET; diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c index dd9785143b..9fe615c73c 100644 --- a/scsi/qemu-pr-helper.c +++ b/scsi/qemu-pr-helper.c @@ -314,6 +314,22 @@ static int is_mpath(int fd) return !strncmp(tgt->target_type, "multipath", DM_MAX_TYPE_NAME); } +static SCSISense mpath_generic_sense(int r) +{ + switch (r) { + case MPATH_PR_SENSE_NOT_READY: + return SENSE_CODE(NOT_READY); + case MPATH_PR_SENSE_MEDIUM_ERROR: + return SENSE_CODE(READ_ERROR); + case MPATH_PR_SENSE_HARDWARE_ERROR: + return SENSE_CODE(TARGET_FAILURE); + case MPATH_PR_SENSE_ABORTED_COMMAND: + return SENSE_CODE(IO_ERROR); + default: + abort(); + } +} + static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense) { switch (r) { @@ -329,7 +345,13 @@ static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense) */ uint8_t cdb[6] = { TEST_UNIT_READY }; int sz = 0; - return do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE); + int r = do_sgio(fd, cdb, sense, NULL, &sz, SG_DXFER_NONE); + + if (r != GOOD) { + return r; + } + scsi_build_sense(sense, mpath_generic_sense(r)); + return CHECK_CONDITION; } case MPATH_PR_SENSE_UNIT_ATTENTION: @@ -449,7 +471,7 @@ static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, memset(¶mp, 0, sizeof(paramp)); memcpy(¶mp.key, ¶m[0], 8); memcpy(¶mp.sa_key, ¶m[8], 8); - paramp.sa_flags = param[10]; + paramp.sa_flags = param[20]; if (sz > PR_OUT_FIXED_PARAM_SIZE) { size_t transportid_len; int i, j; @@ -478,8 +500,8 @@ static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense, j += offsetof(struct transportid, n_port_name[8]); i += 24; break; - case 3: - case 0x43: + case 5: + case 0x45: /* iSCSI transport. */ len = lduw_be_p(¶m[i + 2]); if (len > 252 || (len & 3) || i + len + 4 > transportid_len) { diff --git a/scsi/utils.c b/scsi/utils.c index 5684951b12..e4182a9b09 100644 --- a/scsi/utils.c +++ b/scsi/utils.c @@ -211,6 +211,16 @@ const struct SCSISense sense_code_LUN_COMM_FAILURE = { .key = ABORTED_COMMAND, .asc = 0x08, .ascq = 0x00 }; +/* Medium Error, Unrecovered read error */ +const struct SCSISense sense_code_READ_ERROR = { + .key = MEDIUM_ERROR, .asc = 0x11, .ascq = 0x00 +}; + +/* Not ready, Cause not reportable */ +const struct SCSISense sense_code_NOT_READY = { + .key = NOT_READY, .asc = 0x04, .ascq = 0x00 +}; + /* Unit attention, Capacity data has changed */ const struct SCSISense sense_code_CAPACITY_CHANGED = { .key = UNIT_ATTENTION, .asc = 0x2a, .ascq = 0x09 From 2ba60ec1751844fe75d32dd80ebb3d72480b4b17 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 24 Nov 2017 17:44:22 +0100 Subject: [PATCH 04/41] contrib: add systemd unit files This lets distros standardize on how QEMU should install systemd services for qemu-ga and qemu-pr-helper. The qemu-ga unit file comes from Fedora, but I checked that Debian is using the same path for the virtio-serisal port. I would like to include this in 2.11, so that the qemu-pr-helper socket can be standardized across distros. Note however that the files are not installed. We can add a configure option in 2.12 perhaps, but it's too late now; documenting the files in the release notes should do. Suggested-by: Daniel P. Berrange Signed-off-by: Paolo Bonzini Message-Id: <20171124164422.3960-1-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- contrib/systemd/qemu-guest-agent.service | 11 +++++++++++ contrib/systemd/qemu-pr-helper.service | 15 +++++++++++++++ contrib/systemd/qemu-pr-helper.socket | 9 +++++++++ 3 files changed, 35 insertions(+) create mode 100644 contrib/systemd/qemu-guest-agent.service create mode 100644 contrib/systemd/qemu-pr-helper.service create mode 100644 contrib/systemd/qemu-pr-helper.socket diff --git a/contrib/systemd/qemu-guest-agent.service b/contrib/systemd/qemu-guest-agent.service new file mode 100644 index 0000000000..51cd7b37ff --- /dev/null +++ b/contrib/systemd/qemu-guest-agent.service @@ -0,0 +1,11 @@ +[Unit] +Description=QEMU Guest Agent +BindTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device + +[Service] +ExecStart=-/usr/bin/qemu-ga +Restart=always +RestartSec=0 + +[Install] diff --git a/contrib/systemd/qemu-pr-helper.service b/contrib/systemd/qemu-pr-helper.service new file mode 100644 index 0000000000..a1d27b0221 --- /dev/null +++ b/contrib/systemd/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/contrib/systemd/qemu-pr-helper.socket b/contrib/systemd/qemu-pr-helper.socket new file mode 100644 index 0000000000..9d7c3e5e2c --- /dev/null +++ b/contrib/systemd/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target From 07488549f884a658689370b9ef878dc50eced83e Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 5 Dec 2017 15:19:28 +0800 Subject: [PATCH 05/41] scsi-block: Add share-rw option Scsi-block doesn't use the DEFINE_BLOCK_PROPERTIES() macro so it didn't gain the share-rw back when it was added to all other storage devices. This option is meaningful here, and need to be used when attaching a shared storage to guest. Signed-off-by: Fam Zheng Message-Id: <20171205071928.30242-1-famz@redhat.com> Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 870d9ae85a..e58833a087 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -3004,6 +3004,7 @@ static const TypeInfo scsi_cd_info = { static Property scsi_block_properties[] = { DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), \ DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk), + DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false), DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0), DEFINE_PROP_END_OF_LIST(), }; From c2380365d1d6c8c9f920651a2a429c75d977a589 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 5 Dec 2017 15:22:20 +0800 Subject: [PATCH 06/41] MAITAINERS: List Fam Zheng as reviewer for SCSI patches Just so that I notice those patches more easily. Signed-off-by: Fam Zheng Message-Id: <20171205072220.885-1-famz@redhat.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8859a50c36..73a5555735 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1011,6 +1011,7 @@ T: git git://github.com/jasowang/qemu.git net SCSI M: Paolo Bonzini +R: Fam Zheng S: Supported F: include/hw/scsi/* F: hw/scsi/* @@ -1271,6 +1272,7 @@ T: git git://github.com/stefanha/qemu.git block Block SCSI subsystem M: Paolo Bonzini +R: Fam Zheng L: qemu-block@nongnu.org S: Supported F: include/scsi/* From aff9e6e46a343e1404498be4edd03db1112f0950 Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Wed, 22 Nov 2017 15:27:56 +0800 Subject: [PATCH 07/41] x86/cpu: Enable new SSE/AVX/AVX512 cpu features Intel IceLake cpu has added new cpu features,AVX512_VBMI2/GFNI/ VAES/VPCLMULQDQ/AVX512_VNNI/AVX512_BITALG. Those new cpu features need expose to guest VM. The bit definition: CPUID.(EAX=7,ECX=0):ECX[bit 06] AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 08] GFNI CPUID.(EAX=7,ECX=0):ECX[bit 09] VAES CPUID.(EAX=7,ECX=0):ECX[bit 10] VPCLMULQDQ CPUID.(EAX=7,ECX=0):ECX[bit 11] AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 12] AVX512_BITALG The release document ref below link: https://software.intel.com/sites/default/files/managed/c5/15/\ architecture-instruction-set-extensions-programming-reference.pdf Signed-off-by: Yang Zhong Message-Id: <1511335676-20797-1-git-send-email-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 6 +++--- target/i386/cpu.h | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 82603e3130..325b52e325 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -437,9 +437,9 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_7_0_ECX] = { .feat_names = { NULL, "avx512vbmi", "umip", "pku", - "ospke", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, "avx512-vpopcntdq", NULL, + "ospke", NULL, "avx512vbmi2", NULL, + "gfni", "vaes", "vpclmulqdq", "avx512vnni", + "avx512bitalg", NULL, "avx512-vpopcntdq", NULL, "la57", NULL, NULL, NULL, NULL, NULL, "rdpid", NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b086b1528b..cdbf8b0cd7 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -635,6 +635,12 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_7_0_ECX_UMIP (1U << 2) #define CPUID_7_0_ECX_PKU (1U << 3) #define CPUID_7_0_ECX_OSPKE (1U << 4) +#define CPUID_7_0_ECX_VBMI2 (1U << 6) /* Additional VBMI Instrs */ +#define CPUID_7_0_ECX_GFNI (1U << 8) +#define CPUID_7_0_ECX_VAES (1U << 9) +#define CPUID_7_0_ECX_VPCLMULQDQ (1U << 10) +#define CPUID_7_0_ECX_AVX512VNNI (1U << 11) +#define CPUID_7_0_ECX_AVX512BITALG (1U << 12) #define CPUID_7_0_ECX_AVX512_VPOPCNTDQ (1U << 14) /* POPCNT for vectors of DW/QW */ #define CPUID_7_0_ECX_LA57 (1U << 16) #define CPUID_7_0_ECX_RDPID (1U << 22) From da1cc323b8aee60d4816ce7521177b14ec3008b4 Mon Sep 17 00:00:00 2001 From: Evgeny Yakovlev Date: Wed, 22 Nov 2017 21:14:16 +0300 Subject: [PATCH 08/41] hyperv: set partition-wide MSRs only on first vcpu Hyper-V has a notion of partition-wide MSRs. Those MSRs are read and written as usual on each VCPU, however the hypervisor maintains a single global value for all VCPUs. Thus writing such an MSR from any single VCPU affects the global value that is read by all other VCPUs. This leads to an issue during VCPU hotplug: the zero-initialzied values of those MSRs get synced into KVM and override the global values as has already been set by the guest. This change makes the partition-wide MSRs only be synchronized on the first vcpu. Signed-off-by: Evgeny Yakovlev Signed-off-by: Roman Kagan Message-Id: <20171122181418.14180-2-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 5 ++++- target/i386/kvm.c | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index cdbf8b0cd7..17f1bb7ecb 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1097,10 +1097,13 @@ typedef struct CPUX86State { uint64_t async_pf_en_msr; uint64_t pv_eoi_en_msr; + /* Partition-wide HV MSRs, will be updated only on the first vcpu */ uint64_t msr_hv_hypercall; uint64_t msr_hv_guest_os_id; - uint64_t msr_hv_vapic; uint64_t msr_hv_tsc; + + /* Per-VCPU HV MSRs */ + uint64_t msr_hv_vapic; uint64_t msr_hv_crash_params[HV_CRASH_PARAMS]; uint64_t msr_hv_runtime; uint64_t msr_hv_synic_control; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index d4b2ce2e94..89fa65e243 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -1678,19 +1678,26 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, env->msr_global_ctrl); } - if (has_msr_hv_hypercall) { - kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, - env->msr_hv_guest_os_id); - kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, - env->msr_hv_hypercall); + /* + * Hyper-V partition-wide MSRs: to avoid clearing them on cpu hot-add, + * only sync them to KVM on the first cpu + */ + if (current_cpu == first_cpu) { + if (has_msr_hv_hypercall) { + kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, + env->msr_hv_guest_os_id); + kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, + env->msr_hv_hypercall); + } + if (cpu->hyperv_time) { + kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, + env->msr_hv_tsc); + } } if (cpu->hyperv_vapic) { kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, env->msr_hv_vapic); } - if (cpu->hyperv_time) { - kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, env->msr_hv_tsc); - } if (has_msr_hv_crash) { int j; From 689141dde2957894ae99315bb4e42e6ecd980248 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Wed, 22 Nov 2017 21:14:17 +0300 Subject: [PATCH 09/41] hyperv: ensure SINTx msrs are reset properly Initially SINTx msrs should be in "masked" state. To ensure that happens on *every* reset, move setting their values to kvm_arch_vcpu_reset. Signed-off-by: Roman Kagan Message-Id: <20171122181418.14180-3-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 89fa65e243..5d93391d79 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -662,8 +662,6 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE; } if (cpu->hyperv_synic) { - int sint; - if (!has_msr_hv_synic || kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); @@ -672,9 +670,6 @@ static int hyperv_handle_properties(CPUState *cs) env->features[FEAT_HYPERV_EAX] |= HV_SYNIC_AVAILABLE; env->msr_hv_synic_version = HV_SYNIC_VERSION; - for (sint = 0; sint < ARRAY_SIZE(env->msr_hv_synic_sint); sint++) { - env->msr_hv_synic_sint[sint] = HV_SINT_MASKED; - } } if (cpu->hyperv_stimer) { if (!has_msr_hv_stimer) { @@ -1053,6 +1048,13 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) } else { env->mp_state = KVM_MP_STATE_RUNNABLE; } + + if (cpu->hyperv_synic) { + int i; + for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { + env->msr_hv_synic_sint[i] = HV_SINT_MASKED; + } + } } void kvm_arch_do_init_vcpu(X86CPU *cpu) From 09df29b665a91ba78b2187ce3b1967526ce121f6 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Wed, 22 Nov 2017 21:14:18 +0300 Subject: [PATCH 10/41] hyperv: make SynIC version msr constant The value of HV_X64_MSR_SVERSION is initialized once at vcpu init, and is reset to zero on vcpu reset, which is wrong. It is supposed to be a constant, so drop the field from X86CPU, set the msr with the constant value, and don't bother getting it. Signed-off-by: Roman Kagan Message-Id: <20171122181418.14180-4-rkagan@virtuozzo.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 1 - target/i386/kvm.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 17f1bb7ecb..d605cc6ccb 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1107,7 +1107,6 @@ typedef struct CPUX86State { uint64_t msr_hv_crash_params[HV_CRASH_PARAMS]; uint64_t msr_hv_runtime; uint64_t msr_hv_synic_control; - uint64_t msr_hv_synic_version; uint64_t msr_hv_synic_evt_page; uint64_t msr_hv_synic_msg_page; uint64_t msr_hv_synic_sint[HV_SINT_COUNT]; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 5d93391d79..351b64f77c 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -669,7 +669,6 @@ static int hyperv_handle_properties(CPUState *cs) } env->features[FEAT_HYPERV_EAX] |= HV_SYNIC_AVAILABLE; - env->msr_hv_synic_version = HV_SYNIC_VERSION; } if (cpu->hyperv_stimer) { if (!has_msr_hv_stimer) { @@ -1715,10 +1714,10 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (cpu->hyperv_synic) { int j; + kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, HV_SYNIC_VERSION); + kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, env->msr_hv_synic_control); - kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, - env->msr_hv_synic_version); kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, env->msr_hv_synic_evt_page); kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, @@ -2082,7 +2081,6 @@ static int kvm_get_msrs(X86CPU *cpu) uint32_t msr; kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 0); - kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, 0); kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 0); kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 0); for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) { @@ -2286,9 +2284,6 @@ static int kvm_get_msrs(X86CPU *cpu) case HV_X64_MSR_SCONTROL: env->msr_hv_synic_control = msrs[i].data; break; - case HV_X64_MSR_SVERSION: - env->msr_hv_synic_version = msrs[i].data; - break; case HV_X64_MSR_SIEFP: env->msr_hv_synic_evt_page = msrs[i].data; break; From ebd05fea9be1dfd043aaa763fb6d2cd971346a58 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 Nov 2017 20:12:15 +0100 Subject: [PATCH 11/41] cpus: make pause_all_cpus() play with SMP on single threaded TCG pause_all_cpus() is sometimes called from a VCPU thread (e.g. s390x during special reset). It cannot deal with multiple VCPUs per Thread (single threaded TCG) yet. Booting an s390x guest with -smp 2 and single threaded TCG from disk currently fails. The DIAG 308 will issue a pause_all_cpus() and wait forever for the CPUs to actually stop. But it is waiting for itself. So let's stop all VCPUs belonging to the current thread. Factor out stopping of a VCPU. Signed-off-by: David Hildenbrand Message-Id: <20171129191215.11323-1-david@redhat.com> Signed-off-by: Paolo Bonzini --- cpus.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/cpus.c b/cpus.c index 114c29b6a0..3740c4db62 100644 --- a/cpus.c +++ b/cpus.c @@ -1057,13 +1057,22 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu) { } +static void qemu_cpu_stop(CPUState *cpu, bool exit) +{ + g_assert(qemu_cpu_is_self(cpu)); + cpu->stop = false; + cpu->stopped = true; + if (exit) { + cpu_exit(cpu); + } + qemu_cond_broadcast(&qemu_pause_cond); +} + static void qemu_wait_io_event_common(CPUState *cpu) { atomic_mb_set(&cpu->thread_kicked, false); if (cpu->stop) { - cpu->stop = false; - cpu->stopped = true; - qemu_cond_broadcast(&qemu_pause_cond); + qemu_cpu_stop(cpu, false); } process_queued_cpu_work(cpu); } @@ -1610,12 +1619,12 @@ void pause_all_vcpus(void) qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); CPU_FOREACH(cpu) { - cpu->stop = true; - qemu_cpu_kick(cpu); - } - - if (qemu_in_vcpu_thread()) { - cpu_stop_current(); + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { + cpu->stop = true; + qemu_cpu_kick(cpu); + } } while (!all_vcpus_paused()) { @@ -1799,10 +1808,7 @@ void qemu_init_vcpu(CPUState *cpu) void cpu_stop_current(void) { if (current_cpu) { - current_cpu->stop = false; - current_cpu->stopped = true; - cpu_exit(current_cpu); - qemu_cond_broadcast(&qemu_pause_cond); + qemu_cpu_stop(current_cpu, true); } } From d84be02d69a23dea249f351324d497f613994129 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 Nov 2017 20:13:19 +0100 Subject: [PATCH 12/41] cpu-exec: fix missed CPU kick during interrupt injection The conditional memory barrier not only looks strange but actually is wrong. On s390x, I can reproduce interrupts via cpu_interrupt() not leading to a proper kick out of emulation every now and then. cpu_interrupt() is especially used for inter CPU communication via SIGP (esp. external calls and emergency interrupts). With this patch, I was not able to reproduce. (esp. no stalls or hangs in the guest). My setup is s390x MTTCG with 16 VCPUs on 8 CPU host, running make -j16. Signed-off-by: David Hildenbrand Message-Id: <20171129191319.11483-1-david@redhat.com> Signed-off-by: Paolo Bonzini --- accel/tcg/cpu-exec.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 9b544d88c8..4452cd9856 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -525,19 +525,13 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, TranslationBlock **last_tb) { CPUClass *cc = CPU_GET_CLASS(cpu); - int32_t insns_left; /* Clear the interrupt flag now since we're processing * cpu->interrupt_request and cpu->exit_request. + * Ensure zeroing happens before reading cpu->exit_request or + * cpu->interrupt_request (see also smp_wmb in cpu_exit()) */ - insns_left = atomic_read(&cpu->icount_decr.u32); - atomic_set(&cpu->icount_decr.u16.high, 0); - if (unlikely(insns_left < 0)) { - /* Ensure the zeroing of icount_decr comes before the next read - * of cpu->exit_request or cpu->interrupt_request. - */ - smp_mb(); - } + atomic_mb_set(&cpu->icount_decr.u16.high, 0); if (unlikely(atomic_read(&cpu->interrupt_request))) { int interrupt_request; From a4926d99129a1d8072fc4681cd4efdb214f65ed4 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 13 Nov 2017 07:48:45 +0100 Subject: [PATCH 13/41] target/i386: Fix compiler warnings These gcc warnings are fixed: target/i386/translate.c:4461:12: warning: variable 'prefixes' might be clobbered by 'longjmp' or 'vfork' [-Wclobbered] target/i386/translate.c:4466:9: warning: variable 'rex_w' might be clobbered by 'longjmp' or 'vfork' [-Wclobbered] target/i386/translate.c:4466:16: warning: variable 'rex_r' might be clobbered by 'longjmp' or 'vfork' [-Wclobbered] Tested with x86_64-w64-mingw32-gcc from Debian stretch. Signed-off-by: Stefan Weil Message-Id: <20171113064845.29142-1-sw@weilnetz.de> Signed-off-by: Paolo Bonzini --- target/i386/translate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/target/i386/translate.c b/target/i386/translate.c index 088a9d9766..f410938244 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -4467,10 +4467,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) target_ulong pc_start = s->base.pc_next; s->pc_start = s->pc = pc_start; - prefixes = 0; s->override = -1; - rex_w = -1; - rex_r = 0; #ifdef TARGET_X86_64 s->rex_x = 0; s->rex_b = 0; @@ -4484,6 +4481,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) return s->pc; } + prefixes = 0; + rex_w = -1; + rex_r = 0; + next_byte: b = x86_ldub_code(env, s); /* Collect prefixes. */ From 1ef7c96ee2133753f4aa48617ddbef10d5a88fc9 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 11 Dec 2017 01:19:50 +0100 Subject: [PATCH 14/41] baum: Truncate braille device size to 84x1 Baum device bigger than 84 do not actually exist, but the user's own Braille device might be wider than 84 columns. Some guest drivers would be upset by such sizes, so clamp the device size. Signed-off-by: Samuel Thibault Message-Id: <20171211001950.27843-1-samuel.thibault@ens-lyon.org> Signed-off-by: Paolo Bonzini --- chardev/baum.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/chardev/baum.c b/chardev/baum.c index 67fd783a59..78b0c87625 100644 --- a/chardev/baum.c +++ b/chardev/baum.c @@ -1,7 +1,7 @@ /* * QEMU Baum Braille Device * - * Copyright (c) 2008, 2010-2011, 2016 Samuel Thibault + * Copyright (c) 2008, 2010-2011, 2016-2017 Samuel Thibault * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -239,6 +239,12 @@ static int baum_deferred_init(BaumChardev *baum) brlapi_perror("baum: brlapi__getDisplaySize"); return 0; } + if (baum->y > 1) { + baum->y = 1; + } + if (baum->x > 84) { + baum->x = 84; + } con = qemu_console_lookup_by_index(0); if (con && qemu_console_is_graphic(con)) { From 62473511ecbabdf737ba9053845e3551099b04bc Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 12 Dec 2017 11:12:19 +0000 Subject: [PATCH 15/41] sockets: remove obsolete code that updated listen address When listening on unix/tcp sockets there was optional code that would update the original SocketAddress struct with the info about the actual address that was listened on. Since the conversion of everything to QIOChannelSocket, no remaining caller made use of this feature. It has been replaced with the ability to query the listen address after the fact using the function qio_channel_socket_get_local_address. This is a better model when the input address can result in listening on multiple distinct sockets. Signed-off-by: Daniel P. Berrange Reviewed-by: Peter Xu Message-Id: <20171212111219.32601-1-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- include/qemu/sockets.h | 2 +- qga/channel-posix.c | 2 +- util/qemu-sockets.c | 31 +++++-------------------------- 3 files changed, 7 insertions(+), 28 deletions(-) diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h index 4f7311b52a..8889bcb1ec 100644 --- a/include/qemu/sockets.h +++ b/include/qemu/sockets.h @@ -35,7 +35,7 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp); NetworkAddressFamily inet_netfamily(int family); -int unix_listen(const char *path, char *ostr, int olen, Error **errp); +int unix_listen(const char *path, Error **errp); int unix_connect(const char *path, Error **errp); SocketAddress *socket_parse(const char *str, Error **errp); diff --git a/qga/channel-posix.c b/qga/channel-posix.c index 3f34465159..b812bf4d51 100644 --- a/qga/channel-posix.c +++ b/qga/channel-posix.c @@ -190,7 +190,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, if (fd < 0) { Error *local_err = NULL; - fd = unix_listen(path, NULL, strlen(path), &local_err); + fd = unix_listen(path, &local_err); if (local_err != NULL) { g_critical("%s", error_get_pretty(local_err)); error_free(local_err); diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index af4f01211a..d6a1e1759e 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -198,7 +198,6 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) static int inet_listen_saddr(InetSocketAddress *saddr, int port_offset, - bool update_addr, Error **errp) { struct addrinfo ai,*res,*e; @@ -326,15 +325,6 @@ listen_failed: return -1; listen_ok: - if (update_addr) { - g_free(saddr->host); - saddr->host = g_strdup(uaddr); - g_free(saddr->port); - saddr->port = g_strdup_printf("%d", - inet_getport(e) - port_offset); - saddr->has_ipv6 = saddr->ipv6 = e->ai_family == PF_INET6; - saddr->has_ipv4 = saddr->ipv4 = e->ai_family != PF_INET6; - } freeaddrinfo(res); return slisten; } @@ -790,7 +780,6 @@ static int vsock_parse(VsockSocketAddress *addr, const char *str, #ifndef _WIN32 static int unix_listen_saddr(UnixSocketAddress *saddr, - bool update_addr, Error **errp) { struct sockaddr_un un; @@ -855,12 +844,7 @@ static int unix_listen_saddr(UnixSocketAddress *saddr, goto err; } - if (update_addr && pathbuf) { - g_free(saddr->path); - saddr->path = pathbuf; - } else { - g_free(pathbuf); - } + g_free(pathbuf); return sock; err: @@ -920,7 +904,6 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp) #else static int unix_listen_saddr(UnixSocketAddress *saddr, - bool update_addr, Error **errp) { error_setg(errp, "unix sockets are not available on windows"); @@ -937,7 +920,7 @@ static int unix_connect_saddr(UnixSocketAddress *saddr, Error **errp) #endif /* compatibility wrapper */ -int unix_listen(const char *str, char *ostr, int olen, Error **errp) +int unix_listen(const char *str, Error **errp) { char *path, *optstr; int sock, len; @@ -957,11 +940,7 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp) saddr->path = g_strdup(str); } - sock = unix_listen_saddr(saddr, true, errp); - - if (sock != -1 && ostr) { - snprintf(ostr, olen, "%s%s", saddr->path, optstr ? optstr : ""); - } + sock = unix_listen_saddr(saddr, errp); qapi_free_UnixSocketAddress(saddr); return sock; @@ -1052,11 +1031,11 @@ int socket_listen(SocketAddress *addr, Error **errp) switch (addr->type) { case SOCKET_ADDRESS_TYPE_INET: - fd = inet_listen_saddr(&addr->u.inet, 0, false, errp); + fd = inet_listen_saddr(&addr->u.inet, 0, errp); break; case SOCKET_ADDRESS_TYPE_UNIX: - fd = unix_listen_saddr(&addr->u.q_unix, false, errp); + fd = unix_listen_saddr(&addr->u.q_unix, errp); break; case SOCKET_ADDRESS_TYPE_FD: From cfcca361d77142f25fb1128755084cf91faa4db7 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Wed, 13 Dec 2017 11:19:19 +0000 Subject: [PATCH 16/41] target/i386: Fix handling of VEX prefixes In commit e3af7c788b73a6495eb9d94992ef11f6ad6f3c56 we replaced direct calls to to cpu_ld*_code() with calls to the x86_ld*_code() wrappers which incorporate an advance of s->pc. Unfortunately we didn't notice that in one place the old code was deliberately not incrementing s->pc: @@ -4501,7 +4528,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) static const int pp_prefix[4] = { 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ }; - int vex3, vex2 = cpu_ldub_code(env, s->pc); + int vex3, vex2 = x86_ldub_code(env, s); if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, This meant we were mishandling this set of instructions. Remove the manual advance of s->pc for the "is VEX" case (which is now done by x86_ldub_code()) and instead rewind PC in the case where we decide that this isn't really VEX. Signed-off-by: Peter Maydell Cc: qemu-stable@nongnu.org Reported-by: Alexandro Sanchez Bach Message-Id: <1513163959-17545-1-git-send-email-peter.maydell@linaro.org> Signed-off-by: Paolo Bonzini --- target/i386/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/translate.c b/target/i386/translate.c index f410938244..23d7eec964 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -4548,9 +4548,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, otherwise the instruction is LES or LDS. */ + s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ break; } - s->pc++; /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ From 92b540dac9fc3a572c7342edd0b073000f5a6abf Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 30 Nov 2017 09:53:02 +0100 Subject: [PATCH 17/41] tests/boot-serial-test: Make sure that we check the timeout regularly If the guest continuesly writes characters to the UART, we never leave the inner while loop and thus never check whether we've reached the timeout value. So if we fail to find the expected string in the UART output, the test just hangs and never finishs. Use a counter to regularly break out of the while loop to check the timeout. Signed-off-by: Thomas Huth Message-Id: <1512031988-32490-2-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- tests/boot-serial-test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index c935d69824..fa4183de29 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -43,12 +43,13 @@ static testdef_t tests[] = { static void check_guest_output(const testdef_t *test, int fd) { bool output_ok = false; - int i, nbr, pos = 0; + int i, nbr, pos = 0, ccnt; char ch; /* Poll serial output... Wait at most 60 seconds */ for (i = 0; i < 6000; ++i) { - while ((nbr = read(fd, &ch, 1)) == 1) { + ccnt = 0; + while ((nbr = read(fd, &ch, 1)) == 1 && ccnt++ < 512) { if (ch == test->expect[pos]) { pos += 1; if (test->expect[pos] == '\0') { From e12c08d3b67c4f4e5a16ee815188fc13632530ce Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 30 Nov 2017 09:53:03 +0100 Subject: [PATCH 18/41] tests/boot-serial-test: Add code to allow to specify our own kernel or bios QEMU only ships with some few firmware images, i.e. we can currently run the boot-serial test only on a very limited set of machines. But writing some characters to the default UART of a machine can often be done with some few lines of assembly, so we add the possibility to the boot-serial tester to use its own mini-kernels or mini-firmwares. We write such images then into a file that we can load with the "-kernel" or "-bios" parameter when we launch QEMU. Signed-off-by: Thomas Huth Message-Id: <1512031988-32490-3-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- tests/boot-serial-test.c | 54 +++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c index fa4183de29..d99726919e 100644 --- a/tests/boot-serial-test.c +++ b/tests/boot-serial-test.c @@ -7,9 +7,10 @@ * or later. See the COPYING file in the top-level directory. * * This test is used to check that the serial output of the firmware - * (that we provide for some machines) contains an expected string. - * Thus we check that the firmware still boots at least to a certain - * point and so we know that the machine is not completely broken. + * (that we provide for some machines) or some small mini-kernels that + * we provide here contains an expected string. Thus we check that the + * firmware/kernel still boots at least to a certain point and so we + * know that the machine is not completely broken. */ #include "qemu/osdep.h" @@ -20,6 +21,9 @@ typedef struct testdef { const char *machine; /* Name of the machine */ const char *extra; /* Additional parameters */ const char *expect; /* Expected string in the serial output */ + size_t codesize; /* Size of the kernel or bios data */ + const uint8_t *kernel; /* Set in case we use our own mini kernel */ + const uint8_t *bios; /* Set in case we use our own mini bios */ } testdef_t; static testdef_t tests[] = { @@ -72,26 +76,52 @@ done: static void test_machine(const void *data) { const testdef_t *test = data; - char tmpname[] = "/tmp/qtest-boot-serial-XXXXXX"; - int fd; + char serialtmp[] = "/tmp/qtest-boot-serial-sXXXXXX"; + char codetmp[] = "/tmp/qtest-boot-serial-cXXXXXX"; + const char *codeparam = ""; + const uint8_t *code = NULL; + int ser_fd; - fd = mkstemp(tmpname); - g_assert(fd != -1); + ser_fd = mkstemp(serialtmp); + g_assert(ser_fd != -1); + + if (test->kernel) { + code = test->kernel; + codeparam = "-kernel"; + } else if (test->bios) { + code = test->bios; + codeparam = "-bios"; + } + + if (code) { + ssize_t wlen; + int code_fd; + + code_fd = mkstemp(codetmp); + g_assert(code_fd != -1); + wlen = write(code_fd, code, test->codesize); + g_assert(wlen == test->codesize); + close(code_fd); + } /* * Make sure that this test uses tcg if available: It is used as a * fast-enough smoketest for that. */ - global_qtest = qtest_startf("-M %s,accel=tcg:kvm " + global_qtest = qtest_startf("%s %s -M %s,accel=tcg:kvm " "-chardev file,id=serial0,path=%s " "-no-shutdown -serial chardev:serial0 %s", - test->machine, tmpname, test->extra); - unlink(tmpname); + codeparam, code ? codetmp : "", + test->machine, serialtmp, test->extra); + unlink(serialtmp); + if (code) { + unlink(codetmp); + } - check_guest_output(test, fd); + check_guest_output(test, ser_fd); qtest_quit(global_qtest); - close(fd); + close(ser_fd); } int main(int argc, char *argv[]) From 7ce32f3005afef1feb6f6cd3a7e4f36a3ff300ab Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 30 Nov 2017 09:53:06 +0100 Subject: [PATCH 19/41] hw/moxie/moxiesim: Add support for loading a BIOS on moxiesim The moxiesim machine already defines a memory region for a firmware, but does not provide the possibility to load an image via "-bios" yet. This will be needed for the boot-serial tester, so let's add support for "-bios" here now. Signed-off-by: Thomas Huth Message-Id: <1512031988-32490-6-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- hw/moxie/moxiesim.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/moxie/moxiesim.c b/hw/moxie/moxiesim.c index 3c3ba9d8c5..6c200becab 100644 --- a/hw/moxie/moxiesim.c +++ b/hw/moxie/moxiesim.c @@ -25,6 +25,7 @@ * THE SOFTWARE. */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qapi/error.h" #include "qemu-common.h" #include "cpu.h" @@ -40,6 +41,8 @@ #include "elf.h" #define PHYS_MEM_BASE 0x80000000 +#define FIRMWARE_BASE 0x1000 +#define FIRMWARE_SIZE (128 * 0x1000) typedef struct { uint64_t ram_size; @@ -122,8 +125,8 @@ static void moxiesim_init(MachineState *machine) memory_region_init_ram(ram, NULL, "moxiesim.ram", ram_size, &error_fatal); memory_region_add_subregion(address_space_mem, ram_base, ram); - memory_region_init_ram(rom, NULL, "moxie.rom", 128 * 0x1000, &error_fatal); - memory_region_add_subregion(get_system_memory(), 0x1000, rom); + memory_region_init_ram(rom, NULL, "moxie.rom", FIRMWARE_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), FIRMWARE_BASE, rom); if (kernel_filename) { loader_params.ram_size = ram_size; @@ -132,6 +135,11 @@ static void moxiesim_init(MachineState *machine) loader_params.initrd_filename = initrd_filename; load_kernel(cpu, &loader_params); } + if (bios_name) { + if (load_image_targphys(bios_name, FIRMWARE_BASE, FIRMWARE_SIZE) < 0) { + error_report("Failed to load firmware '%s'", bios_name); + } + } /* A single 16450 sits at offset 0x3f8. */ if (serial_hds[0]) { From 80ceb07a83375e3a0091591f96bd47bce2f640ce Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 23 Nov 2017 17:23:32 +0800 Subject: [PATCH 20/41] cpu: refactor cpu_address_space_init() Normally we create an address space for that CPU and pass that address space into the function. Let's just do it inside to unify address space creations. It'll simplify my next patch to rename those address spaces. Signed-off-by: Peter Xu Message-Id: <20171123092333.16085-3-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- cpus.c | 5 +---- exec.c | 7 ++++++- include/exec/exec-all.h | 6 ++++-- target/arm/cpu.c | 13 +++---------- target/i386/cpu.c | 10 ++-------- 5 files changed, 16 insertions(+), 25 deletions(-) diff --git a/cpus.c b/cpus.c index 3740c4db62..83700c1716 100644 --- a/cpus.c +++ b/cpus.c @@ -1787,11 +1787,8 @@ void qemu_init_vcpu(CPUState *cpu) /* If the target cpu hasn't set up any address spaces itself, * give it the default one. */ - AddressSpace *as = g_new0(AddressSpace, 1); - - address_space_init(as, cpu->memory, "cpu-memory"); cpu->num_ases = 1; - cpu_address_space_init(cpu, as, 0); + cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory); } if (kvm_enabled()) { diff --git a/exec.c b/exec.c index 3e7c57e914..3ab515e47c 100644 --- a/exec.c +++ b/exec.c @@ -705,9 +705,14 @@ CPUState *qemu_get_cpu(int index) } #if !defined(CONFIG_USER_ONLY) -void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx) +void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr) { CPUAddressSpace *newas; + AddressSpace *as = g_new0(AddressSpace, 1); + + assert(mr); + address_space_init(as, mr, prefix); /* Target code should have set num_ases before calling us */ assert(asidx < cpu->num_ases); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 0f51c92adb..b37f7d8d92 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -74,8 +74,9 @@ void cpu_reloading_memory_map(void); /** * cpu_address_space_init: * @cpu: CPU to add this address space to - * @as: address space to add * @asidx: integer index of this address space + * @prefix: prefix to be used as name of address space + * @mr: the root memory region of address space * * Add the specified address space to the CPU's cpu_ases list. * The address space added with @asidx 0 is the one used for the @@ -89,7 +90,8 @@ void cpu_reloading_memory_map(void); * * Note that with KVM only one address space is supported. */ -void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx); +void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr); #endif #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 7f7a3d1e32..cc1856c32b 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -705,9 +705,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) CPUARMState *env = &cpu->env; int pagebits; Error *local_err = NULL; -#ifndef CONFIG_USER_ONLY - AddressSpace *as; -#endif cpu_exec_realizefn(cs, &local_err); if (local_err != NULL) { @@ -912,21 +909,17 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) { - as = g_new0(AddressSpace, 1); - cs->num_ases = 2; if (!cpu->secure_memory) { cpu->secure_memory = cs->memory; } - address_space_init(as, cpu->secure_memory, "cpu-secure-memory"); - cpu_address_space_init(cs, as, ARMASIdx_S); + cpu_address_space_init(cs, ARMASIdx_S, "cpu-secure-memory", + cpu->secure_memory); } else { cs->num_ases = 1; } - as = g_new0(AddressSpace, 1); - address_space_init(as, cs->memory, "cpu-memory"); - cpu_address_space_init(cs, as, ARMASIdx_NS); + cpu_address_space_init(cs, ARMASIdx_NS, "cpu-memory", cs->memory); #endif qemu_init_vcpu(cs); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 325b52e325..b069eafcc6 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3736,11 +3736,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY if (tcg_enabled()) { - AddressSpace *as_normal = g_new0(AddressSpace, 1); - AddressSpace *as_smm = g_new(AddressSpace, 1); - - address_space_init(as_normal, cs->memory, "cpu-memory"); - cpu->cpu_as_mem = g_new(MemoryRegion, 1); cpu->cpu_as_root = g_new(MemoryRegion, 1); @@ -3755,11 +3750,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) get_system_memory(), 0, ~0ull); memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0); memory_region_set_enabled(cpu->cpu_as_mem, true); - address_space_init(as_smm, cpu->cpu_as_root, "CPU"); cs->num_ases = 2; - cpu_address_space_init(cs, as_normal, 0); - cpu_address_space_init(cs, as_smm, 1); + cpu_address_space_init(cs, 0, "cpu-memory", cs->memory); + cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root); /* ... SMRAM with higher priority, linked from /machine/smram. */ cpu->machine_done.notify = x86_cpu_machine_done; From 87a621d857be1b2b3dd1d0847ca311a863dbcb53 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 23 Nov 2017 17:23:33 +0800 Subject: [PATCH 21/41] cpu: suffix cpu address spaces with cpu index Renaming cpu address space names so that they won't be the same when there are more than one. Signed-off-by: Peter Xu Message-Id: <20171123092333.16085-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- exec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/exec.c b/exec.c index 3ab515e47c..6b5828ee6e 100644 --- a/exec.c +++ b/exec.c @@ -710,9 +710,12 @@ void cpu_address_space_init(CPUState *cpu, int asidx, { CPUAddressSpace *newas; AddressSpace *as = g_new0(AddressSpace, 1); + char *as_name; assert(mr); - address_space_init(as, mr, prefix); + as_name = g_strdup_printf("%s-%d", prefix, cpu->cpu_index); + address_space_init(as, mr, as_name); + g_free(as_name); /* Target code should have set num_ases before calling us */ assert(asidx < cpu->num_ases); From aef172ffdc2f9c41d9cc043a55f1259e7c07e587 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Fri, 8 Dec 2017 12:51:07 +0100 Subject: [PATCH 22/41] block/iscsi: dont leave allocmap in an invalid state on UNMAP failure we forgot to set the allocmap to invalid if an UNMAP call fails. Cc: qemu-stable@nongnu.org Signed-off-by: Peter Lieven Message-Id: <1512733868-9009-2-git-send-email-pl@kamp.de> Reviewed-by: Eric Blake Signed-off-by: Paolo Bonzini --- block/iscsi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 4683f3b244..c532ec79d1 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -2,7 +2,7 @@ * QEMU Block driver for iSCSI images * * Copyright (c) 2010-2011 Ronnie Sahlberg - * Copyright (c) 2012-2016 Peter Lieven + * Copyright (c) 2012-2017 Peter Lieven * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -1128,6 +1128,9 @@ retry: goto retry; } + iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, + bytes >> BDRV_SECTOR_BITS); + if (iTask.status == SCSI_STATUS_CHECK_CONDITION) { /* the target might fail with a check condition if it is not happy with the alignment of the UNMAP request @@ -1140,9 +1143,6 @@ retry: goto out_unlock; } - iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, - bytes >> BDRV_SECTOR_BITS); - out_unlock: qemu_mutex_unlock(&iscsilun->mutex); return r; From e38bc23454ef763deb4405ebdee6a1081aa00bc8 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Fri, 8 Dec 2017 12:51:08 +0100 Subject: [PATCH 23/41] block/iscsi: only report an iSCSI Failure if we don't handle it gracefully we currently report an "iSCSI Failure" in iscsi_co_generic_cb if the task hasn't completed with SCSI_STATUS_GOOD. However, we expect a failure in some cases and handle it gracefully. This is the case for misaligned UNMAPs and WRITESAME10/16 calls without UNMAP. In this case a failure in the logs can be quite misleading. While we are at it improve the logging to reveal which operation failed at what LBA. Signed-off-by: Peter Lieven Message-Id: <1512733868-9009-3-git-send-email-pl@kamp.de> Signed-off-by: Paolo Bonzini --- block/iscsi.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index c532ec79d1..5c0a9e55b6 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -104,6 +104,7 @@ typedef struct IscsiTask { IscsiLun *iscsilun; QEMUTimer retry_timer; int err_code; + char *err_str; } IscsiTask; typedef struct IscsiAIOCB { @@ -265,7 +266,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, } } iTask->err_code = iscsi_translate_sense(&task->sense); - error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); + iTask->err_str = g_strdup(iscsi_get_error(iscsi)); } out: @@ -629,6 +630,8 @@ retry: if (iTask.status != SCSI_STATUS_GOOD) { iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors); + error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba, + iTask.err_str); r = iTask.err_code; goto out_unlock; } @@ -637,6 +640,7 @@ retry: out_unlock: qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); return r; } @@ -651,10 +655,9 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, struct scsi_get_lba_status *lbas = NULL; struct scsi_lba_status_descriptor *lbasd = NULL; struct IscsiTask iTask; + uint64_t lba; int64_t ret; - iscsi_co_init_iscsitask(iscsilun, &iTask); - if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { ret = -EINVAL; goto out; @@ -670,11 +673,13 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, goto out; } + lba = sector_qemu2lun(sector_num, iscsilun); + + iscsi_co_init_iscsitask(iscsilun, &iTask); qemu_mutex_lock(&iscsilun->mutex); retry: if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun, - sector_qemu2lun(sector_num, iscsilun), - 8 + 16, iscsi_co_generic_cb, + lba, 8 + 16, iscsi_co_generic_cb, &iTask) == NULL) { ret = -ENOMEM; goto out_unlock; @@ -701,6 +706,8 @@ retry: * because the device is busy or the cmd is not * supported) we pretend all blocks are allocated * for backwards compatibility */ + error_report("iSCSI GET_LBA_STATUS failed at lba %" PRIu64 ": %s", + lba, iTask.err_str); goto out_unlock; } @@ -738,6 +745,7 @@ retry: } out_unlock: qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); out: if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); @@ -756,6 +764,7 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, struct IscsiTask iTask; uint64_t lba; uint32_t num_sectors; + int r = 0; if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; @@ -853,19 +862,23 @@ retry: iTask.complete = 0; goto retry; } - qemu_mutex_unlock(&iscsilun->mutex); if (iTask.status != SCSI_STATUS_GOOD) { - return iTask.err_code; + error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s", + lba, iTask.err_str); + r = iTask.err_code; } - return 0; + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; } static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; + int r = 0; iscsi_co_init_iscsitask(iscsilun, &iTask); qemu_mutex_lock(&iscsilun->mutex); @@ -892,13 +905,15 @@ retry: iTask.complete = 0; goto retry; } - qemu_mutex_unlock(&iscsilun->mutex); if (iTask.status != SCSI_STATUS_GOOD) { - return iTask.err_code; + error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str); + r = iTask.err_code; } - return 0; + qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); + return r; } #ifdef __linux__ @@ -1139,12 +1154,15 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { + error_report("iSCSI UNMAP failed at lba %" PRIu64 ": %s", + list.lba, iTask.err_str); r = iTask.err_code; goto out_unlock; } out_unlock: qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); return r; } @@ -1241,6 +1259,8 @@ retry: if (iTask.status != SCSI_STATUS_GOOD) { iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS, bytes >> BDRV_SECTOR_BITS); + error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s", + lba, iTask.err_str); r = iTask.err_code; goto out_unlock; } @@ -1255,6 +1275,7 @@ retry: out_unlock: qemu_mutex_unlock(&iscsilun->mutex); + g_free(iTask.err_str); return r; } From 8af36743c26372789b1c92606dd181b2a6d2ad53 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Wed, 13 Dec 2017 17:52:28 +0000 Subject: [PATCH 24/41] exec: Don't reuse unassigned_mem_ops for io_mem_rom We set up the io_mem_rom special memory region using the unassigned_mem_ops structure; this is then used when a guest tries to write to ROM. This is incorrect, because the behaviour of unassigned memory may be different from that of ROM for writes. In particular, on some architectures writing to unassigned memory generates a guest exception, whereas writing to ROM is generally ignored. Use a special readonly_mem_ops for this purpose instead, so writes to ROM are ignored for all guest CPUs. Signed-off-by: Peter Maydell Message-Id: <1513187549-2435-2-git-send-email-peter.maydell@linaro.org> Signed-off-by: Paolo Bonzini --- exec.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/exec.c b/exec.c index 6b5828ee6e..4722e521d4 100644 --- a/exec.c +++ b/exec.c @@ -2725,6 +2725,37 @@ static uint16_t dummy_section(PhysPageMap *map, FlatView *fv, MemoryRegion *mr) return phys_section_add(map, §ion); } +static void readonly_mem_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + /* Ignore any write to ROM. */ +} + +static bool readonly_mem_accepts(void *opaque, hwaddr addr, + unsigned size, bool is_write) +{ + return is_write; +} + +/* This will only be used for writes, because reads are special cased + * to directly access the underlying host ram. + */ +static const MemoryRegionOps readonly_mem_ops = { + .write = readonly_mem_write, + .valid.accepts = readonly_mem_accepts, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, +}; + MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs) { int asidx = cpu_asidx_from_attrs(cpu, attrs); @@ -2737,7 +2768,8 @@ MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs) static void io_mem_init(void) { - memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); + memory_region_init_io(&io_mem_rom, NULL, &readonly_mem_ops, + NULL, NULL, UINT64_MAX); memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); From 5fb3d632884cbc113d9d13b1c8b0c5f2a8c7bc0d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Wed, 13 Dec 2017 17:52:29 +0000 Subject: [PATCH 25/41] hw/mips/boston: Remove workaround for writes to ROM aborting Now that the memory system correctly handles writes to ROM for guest CPUs that may generate exceptions for decode errors, we can remove the workaround from the boston board. Signed-off-by: Peter Maydell Message-Id: <1513187549-2435-3-git-send-email-peter.maydell@linaro.org> Signed-off-by: Paolo Bonzini --- hw/mips/boston.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 1cb4b6aca2..fb23161b33 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -248,16 +248,6 @@ static const MemoryRegionOps boston_platreg_ops = { .endianness = DEVICE_NATIVE_ENDIAN, }; -static void boston_flash_write(void *opaque, hwaddr addr, - uint64_t val, unsigned size) -{ -} - -static const MemoryRegionOps boston_flash_ops = { - .write = boston_flash_write, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - static const TypeInfo boston_device = { .name = TYPE_MIPS_BOSTON, .parent = TYPE_SYS_BUS_DEVICE, @@ -481,8 +471,8 @@ static void boston_mach_init(MachineState *machine) sysbus_mmio_map_overlap(SYS_BUS_DEVICE(s->cps), 0, 0, 1); flash = g_new(MemoryRegion, 1); - memory_region_init_rom_device_nomigrate(flash, NULL, &boston_flash_ops, s, - "boston.flash", 128 * M_BYTE, &err); + memory_region_init_rom_nomigrate(flash, NULL, + "boston.flash", 128 * M_BYTE, &err); memory_region_add_subregion_overlap(sys_mem, 0x18000000, flash, 0); ddr = g_new(MemoryRegion, 1); From 7299e1a411de99761a4260e44b4f1cf2e4e126ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 15 Dec 2017 00:43:55 -0300 Subject: [PATCH 26/41] hw/i386/vmport: replace fprintf() by trace events or LOG_UNIMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20171215034356.4449-2-f4bug@amsat.org> [Replace unknown command tracepoint with LOG_UNIMP, add generic tracepoint for vmport commands. - Paolo] Signed-off-by: Paolo Bonzini --- hw/i386/trace-events | 4 ++++ hw/i386/vmport.c | 14 +++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/i386/trace-events b/hw/i386/trace-events index d43b4b6cd3..22d44648af 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -113,3 +113,7 @@ amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: translation level 0x%"PR amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical address 0x%"PRIx64 amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64 amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64 + +# hw/i386/vmport.c +vmport_register(unsigned char command, void *func, void *opaque) "command: 0x%02x func: %p opaque: %p" +vmport_command(unsigned char command) "command: 0x%02x" diff --git a/hw/i386/vmport.c b/hw/i386/vmport.c index eb880c6def..9b8c68806e 100644 --- a/hw/i386/vmport.c +++ b/hw/i386/vmport.c @@ -27,8 +27,7 @@ #include "hw/i386/pc.h" #include "sysemu/hw_accel.h" #include "hw/qdev.h" - -/* #define VMPORT_DEBUG */ +#include "trace.h" #define VMPORT_CMD_GETVERSION 0x0a #define VMPORT_CMD_GETRAMSIZE 0x14 @@ -54,6 +53,7 @@ void vmport_register(unsigned char command, VMPortReadFunc *func, void *opaque) return; } + trace_vmport_register(command, func, opaque); port_state->func[command] = func; port_state->opaque[command] = opaque; } @@ -76,13 +76,9 @@ static uint64_t vmport_ioport_read(void *opaque, hwaddr addr, } command = env->regs[R_ECX]; - if (command >= VMPORT_ENTRIES) { - return eax; - } - if (!s->func[command]) { -#ifdef VMPORT_DEBUG - fprintf(stderr, "vmport: unknown command %x\n", command); -#endif + trace_vmport_command(command); + if (command >= VMPORT_ENTRIES || !s->func[command]) { + qemu_log_mask(LOG_UNIMP, "vmport: unknown command %x\n", command); return eax; } From f68d98b21fa74155dc7c1fd212474379ac3c7531 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Nov 2017 13:27:41 +0100 Subject: [PATCH 27/41] scsi: provide general-purpose functions to manage sense data Extract the common parts of scsi_sense_buf_to_errno, scsi_convert_sense and scsi_target_send_command's REQUEST SENSE handling into two new functions scsi_parse_sense_buf and scsi_build_sense_buf. Fix a bug in scsi_target_send_command along the way; the length was written in buf[10] rather than buf[7]. Reported-by: Dr. David Alan Gilbert Reviewed-by: Dr. David Alan Gilbert Fixes: b07fbce634 ("scsi-bus: correct responses for INQUIRY and REQUEST SENSE") Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-bus.c | 16 +---- include/scsi/utils.h | 3 + scsi/utils.c | 139 +++++++++++++++++++++---------------------- 3 files changed, 72 insertions(+), 86 deletions(-) diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index 977f7bce1f..965becf31f 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -540,20 +540,8 @@ static int32_t scsi_target_send_command(SCSIRequest *req, uint8_t *buf) if (req->lun != 0) { const struct SCSISense sense = SENSE_CODE(LUN_NOT_SUPPORTED); - if (fixed_sense) { - r->buf[0] = 0x70; - r->buf[2] = sense.key; - r->buf[10] = 10; - r->buf[12] = sense.asc; - r->buf[13] = sense.ascq; - r->len = MIN(req->cmd.xfer, SCSI_SENSE_LEN); - } else { - r->buf[0] = 0x72; - r->buf[1] = sense.key; - r->buf[2] = sense.asc; - r->buf[3] = sense.ascq; - r->len = 8; - } + r->len = scsi_build_sense_buf(r->buf, req->cmd.xfer, + sense, fixed_sense); } else { r->len = scsi_device_get_sense(r->req.dev, r->buf, MIN(req->cmd.xfer, r->buf_len), diff --git a/include/scsi/utils.h b/include/scsi/utils.h index eb07e474ee..4b705f5e0f 100644 --- a/include/scsi/utils.h +++ b/include/scsi/utils.h @@ -31,6 +31,9 @@ typedef struct SCSISense { } SCSISense; int scsi_build_sense(uint8_t *buf, SCSISense sense); +SCSISense scsi_parse_sense_buf(const uint8_t *in_buf, int in_len); +int scsi_build_sense_buf(uint8_t *buf, size_t max_size, SCSISense sense, + bool fixed_sense); /* * Predefined sense codes diff --git a/scsi/utils.c b/scsi/utils.c index e4182a9b09..61bc1a8e2b 100644 --- a/scsi/utils.c +++ b/scsi/utils.c @@ -96,15 +96,60 @@ int scsi_cdb_length(uint8_t *buf) return cdb_len; } +SCSISense scsi_parse_sense_buf(const uint8_t *in_buf, int in_len) +{ + bool fixed_in; + SCSISense sense; + + assert(in_len > 0); + fixed_in = (in_buf[0] & 2) == 0; + if (fixed_in) { + if (in_len < 14) { + return SENSE_CODE(IO_ERROR); + } + sense.key = in_buf[2]; + sense.asc = in_buf[12]; + sense.ascq = in_buf[13]; + } else { + if (in_len < 4) { + return SENSE_CODE(IO_ERROR); + } + sense.key = in_buf[1]; + sense.asc = in_buf[2]; + sense.ascq = in_buf[3]; + } + + return sense; +} + +int scsi_build_sense_buf(uint8_t *out_buf, size_t size, SCSISense sense, + bool fixed_sense) +{ + int len; + uint8_t buf[SCSI_SENSE_LEN] = { 0 }; + + if (fixed_sense) { + buf[0] = 0x70; + buf[2] = sense.key; + buf[7] = 10; + buf[12] = sense.asc; + buf[13] = sense.ascq; + len = 18; + } else { + buf[0] = 0x72; + buf[1] = sense.key; + buf[2] = sense.asc; + buf[3] = sense.ascq; + len = 8; + } + len = MIN(len, size); + memcpy(out_buf, buf, len); + return len; +} + int scsi_build_sense(uint8_t *buf, SCSISense sense) { - memset(buf, 0, 18); - buf[0] = 0x70; - buf[2] = sense.key; - buf[7] = 10; - buf[12] = sense.asc; - buf[13] = sense.ascq; - return 18; + return scsi_build_sense_buf(buf, SCSI_SENSE_LEN, sense, true); } /* @@ -274,52 +319,21 @@ const struct SCSISense sense_code_SPACE_ALLOC_FAILED = { int scsi_convert_sense(uint8_t *in_buf, int in_len, uint8_t *buf, int len, bool fixed) { - bool fixed_in; SCSISense sense; - if (!fixed && len < 8) { - return 0; + bool fixed_in; + + fixed_in = (in_buf[0] & 2) == 0; + if (in_len && fixed == fixed_in) { + memcpy(buf, in_buf, MIN(len, in_len)); + return MIN(len, in_len); } if (in_len == 0) { - sense.key = NO_SENSE; - sense.asc = 0; - sense.ascq = 0; + sense = SENSE_CODE(NO_SENSE); } else { - fixed_in = (in_buf[0] & 2) == 0; - - if (fixed == fixed_in) { - memcpy(buf, in_buf, MIN(len, in_len)); - return MIN(len, in_len); - } - - if (fixed_in) { - sense.key = in_buf[2]; - sense.asc = in_buf[12]; - sense.ascq = in_buf[13]; - } else { - sense.key = in_buf[1]; - sense.asc = in_buf[2]; - sense.ascq = in_buf[3]; - } - } - - memset(buf, 0, len); - if (fixed) { - /* Return fixed format sense buffer */ - buf[0] = 0x70; - buf[2] = sense.key; - buf[7] = 10; - buf[12] = sense.asc; - buf[13] = sense.ascq; - return MIN(len, SCSI_SENSE_LEN); - } else { - /* Return descriptor format sense buffer */ - buf[0] = 0x72; - buf[1] = sense.key; - buf[2] = sense.asc; - buf[3] = sense.ascq; - return 8; + sense = scsi_parse_sense_buf(in_buf, in_len); } + return scsi_build_sense_buf(buf, len, sense, fixed); } int scsi_sense_to_errno(int key, int asc, int ascq) @@ -366,34 +380,15 @@ int scsi_sense_to_errno(int key, int asc, int ascq) } } -int scsi_sense_buf_to_errno(const uint8_t *sense, size_t sense_size) +int scsi_sense_buf_to_errno(const uint8_t *in_buf, size_t in_len) { - int key, asc, ascq; - if (sense_size < 1) { + SCSISense sense; + if (in_len < 1) { return EIO; } - switch (sense[0]) { - case 0x70: /* Fixed format sense data. */ - if (sense_size < 14) { - return EIO; - } - key = sense[2] & 0xF; - asc = sense[12]; - ascq = sense[13]; - break; - case 0x72: /* Descriptor format sense data. */ - if (sense_size < 4) { - return EIO; - } - key = sense[1] & 0xF; - asc = sense[2]; - ascq = sense[3]; - break; - default: - return EIO; - break; - } - return scsi_sense_to_errno(key, asc, ascq); + + sense = scsi_parse_sense_buf(in_buf, in_len); + return scsi_sense_to_errno(sense.key, sense.asc, sense.ascq); } const char *scsi_command_name(uint8_t cmd) From 9661e208f8cc94f66176db6f069a0f8adef9478d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Nov 2017 13:45:59 +0100 Subject: [PATCH 28/41] scsi: replace hex constants with #defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sense keys have nice #defines in scsi/constants.h, use them. Reported-by: Dr. David Alan Gilbert Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- scsi/utils.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scsi/utils.c b/scsi/utils.c index 61bc1a8e2b..ddae650a99 100644 --- a/scsi/utils.c +++ b/scsi/utils.c @@ -339,16 +339,16 @@ int scsi_convert_sense(uint8_t *in_buf, int in_len, int scsi_sense_to_errno(int key, int asc, int ascq) { switch (key) { - case 0x00: /* NO SENSE */ - case 0x01: /* RECOVERED ERROR */ - case 0x06: /* UNIT ATTENTION */ + case NO_SENSE: + case RECOVERED_ERROR: + case UNIT_ATTENTION: /* These sense keys are not errors */ return 0; - case 0x0b: /* COMMAND ABORTED */ + case ABORTED_COMMAND: /* COMMAND ABORTED */ return ECANCELED; - case 0x02: /* NOT READY */ - case 0x05: /* ILLEGAL REQUEST */ - case 0x07: /* DATA PROTECTION */ + case NOT_READY: + case ILLEGAL_REQUEST: + case DATA_PROTECT: /* Parse ASCQ */ break; default: From ed57c757961d4518717502f908373331cb48f261 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 18 Dec 2017 18:44:25 +0100 Subject: [PATCH 29/41] Remove legacy -no-kvm-pit option It's only printing a warning since QEMU v1.3.0, so nobody should use this anymore today. Let's get rid of this now. Signed-off-by: Thomas Huth Message-Id: <1513619065-31722-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-doc.texi | 5 ----- qemu-options.hx | 3 --- vl.c | 4 ---- 3 files changed, 12 deletions(-) diff --git a/qemu-doc.texi b/qemu-doc.texi index 69e2953dc6..90bea7331d 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -2612,11 +2612,6 @@ synonym for setting ``-global kvm-pit.lost_tick_policy=discard''. The ``-no-kvm-irqchip'' argument is now a synonym for setting ``-machine kernel_irqchip=off''. -@subsection -no-kvm-pit (since 1.3.0) - -The ``-no-kvm-pit'' argument is ignored. It is no longer -possible to disable the KVM PIT directly. - @subsection -no-kvm (since 1.3.0) The ``-no-kvm'' argument is now a synonym for setting diff --git a/qemu-options.hx b/qemu-options.hx index b1e5781908..94647e21e3 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3927,9 +3927,6 @@ HXCOMM Deprecated by kvm-pit driver properties DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection, "", QEMU_ARCH_I386) -HXCOMM Deprecated (ignored) -DEF("no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit, "", QEMU_ARCH_I386) - HXCOMM Deprecated by -machine kernel_irqchip=on|off property DEF("no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip, "", QEMU_ARCH_I386) diff --git a/vl.c b/vl.c index e9012bb009..d3a5c5d021 100644 --- a/vl.c +++ b/vl.c @@ -3817,10 +3817,6 @@ int main(int argc, char **argv, char **envp) olist = qemu_find_opts("machine"); qemu_opts_parse_noisily(olist, "accel=tcg", false); break; - case QEMU_OPTION_no_kvm_pit: { - warn_report("ignoring deprecated option"); - break; - } case QEMU_OPTION_no_kvm_pit_reinjection: { static GlobalProperty kvm_pit_lost_tick_policy = { .driver = "kvm-pit", From 0880a873007b51c06ab008366cbd5e510be15bad Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 10 Dec 2017 14:38:15 +0800 Subject: [PATCH 30/41] i8259: convert DPRINTFs into trace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One thing to mention is that in pic_set_irq() I need to uncomment a few lines in the macros to make sure IRQ value calculation is correct. Signed-off-by: Peter Xu Message-Id: <20171210063819.14892-2-peterx@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/intc/i8259.c | 26 +++++++++++--------------- hw/intc/trace-events | 7 +++++++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index fe9ecd6bd4..f12e0b27f1 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -30,17 +30,11 @@ #include "qemu/log.h" #include "hw/isa/i8259_internal.h" #include "hw/intc/intc.h" +#include "trace.h" /* debug PIC */ //#define DEBUG_PIC -#ifdef DEBUG_PIC -#define DPRINTF(fmt, ...) \ - do { printf("pic: " fmt , ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) -#endif - //#define DEBUG_IRQ_LATENCY //#define DEBUG_IRQ_COUNT @@ -122,8 +116,7 @@ static void pic_update_irq(PICCommonState *s) irq = pic_get_irq(s); if (irq >= 0) { - DPRINTF("pic%d: imr=%x irr=%x padd=%d\n", - s->master ? 0 : 1, s->imr, s->irr, s->priority_add); + trace_pic_update_irq(s->master, s->imr, s->irr, s->priority_add); qemu_irq_raise(s->int_out[0]); } else { qemu_irq_lower(s->int_out[0]); @@ -140,9 +133,11 @@ static void pic_set_irq(void *opaque, int irq, int level) defined(DEBUG_IRQ_LATENCY) int irq_index = s->master ? irq : irq + 8; #endif + + trace_pic_set_irq(s->master, irq, level); + #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) if (level != irq_level[irq_index]) { - DPRINTF("pic_set_irq: irq=%d level=%d\n", irq_index, level); irq_level[irq_index] = level; #ifdef DEBUG_IRQ_COUNT if (level == 1) { @@ -223,18 +218,18 @@ int pic_read_irq(DeviceState *d) intno = s->irq_base + irq; } -#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_LATENCY) if (irq == 2) { irq = irq2 + 8; } -#endif + #ifdef DEBUG_IRQ_LATENCY printf("IRQ%d latency=%0.3fus\n", irq, (double)(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - irq_time[irq]) * 1000000.0 / NANOSECONDS_PER_SECOND); #endif - DPRINTF("pic_interrupt: irq=%d\n", irq); + + trace_pic_interrupt(irq, intno); return intno; } @@ -289,7 +284,8 @@ static void pic_ioport_write(void *opaque, hwaddr addr64, uint32_t val = val64; int priority, cmd, irq; - DPRINTF("write: addr=0x%02x val=0x%02x\n", addr, val); + trace_pic_ioport_write(s->master, addr, val); + if (addr == 0) { if (val & 0x10) { pic_init_reset(s); @@ -402,7 +398,7 @@ static uint64_t pic_ioport_read(void *opaque, hwaddr addr, ret = s->imr; } } - DPRINTF("read: addr=0x%02" HWADDR_PRIx " val=0x%02x\n", addr, ret); + trace_pic_ioport_read(s->master, addr, ret); return ret; } diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 7077aaaee6..be769186fc 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -1,5 +1,12 @@ # See docs/devel/tracing.txt for syntax documentation. +# hw/intc/i8259.c +pic_update_irq(bool master, uint8_t imr, uint8_t irr, uint8_t padd) "master %d imr %"PRIu8" irr %"PRIu8" padd %"PRIu8 +pic_set_irq(bool master, int irq, int level) "master %d irq %d level %d" +pic_interrupt(int irq, int intno) "irq %d intno %d" +pic_ioport_write(bool master, uint64_t addr, uint64_t val) "master %d addr 0x%"PRIx64" val 0x%"PRIx64 +pic_ioport_read(bool master, uint64_t addr, int val) "master %d addr 0x%"PRIx64" val 0x%x" + # hw/intc/apic_common.c cpu_set_apic_base(uint64_t val) "0x%016"PRIx64 cpu_get_apic_base(uint64_t val) "0x%016"PRIx64 From f260f7361ca6caf7bb672195c50db99eff26b856 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 10 Dec 2017 14:38:16 +0800 Subject: [PATCH 31/41] i8259: use DEBUG_IRQ_COUNT always MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not really scary to even enable it forever. After all it's i8259, and it's even not the kernel one. Then we can remove quite a few of lines to make it cleaner. And "info irq" will always work for it. Signed-off-by: Peter Xu Message-Id: <20171210063819.14892-3-peterx@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/intc/i8259.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index f12e0b27f1..20c9d0a58b 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -36,7 +36,6 @@ //#define DEBUG_PIC //#define DEBUG_IRQ_LATENCY -//#define DEBUG_IRQ_COUNT #define TYPE_I8259 "isa-i8259" #define PIC_CLASS(class) OBJECT_CLASS_CHECK(PICClass, (class), TYPE_I8259) @@ -52,12 +51,8 @@ typedef struct PICClass { DeviceRealize parent_realize; } PICClass; -#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) static int irq_level[16]; -#endif -#ifdef DEBUG_IRQ_COUNT static uint64_t irq_count[16]; -#endif #ifdef DEBUG_IRQ_LATENCY static int64_t irq_time[16]; #endif @@ -128,24 +123,17 @@ static void pic_set_irq(void *opaque, int irq, int level) { PICCommonState *s = opaque; int mask = 1 << irq; - -#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) || \ - defined(DEBUG_IRQ_LATENCY) int irq_index = s->master ? irq : irq + 8; -#endif trace_pic_set_irq(s->master, irq, level); -#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) if (level != irq_level[irq_index]) { irq_level[irq_index] = level; -#ifdef DEBUG_IRQ_COUNT if (level == 1) { irq_count[irq_index]++; } -#endif } -#endif + #ifdef DEBUG_IRQ_LATENCY if (level) { irq_time[irq_index] = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); @@ -253,12 +241,8 @@ static bool pic_get_statistics(InterruptStatsProvider *obj, PICCommonState *s = PIC_COMMON(obj); if (s->master) { -#ifdef DEBUG_IRQ_COUNT *irq_counts = irq_count; *nb_irqs = ARRAY_SIZE(irq_count); -#else - return false; -#endif } else { *irq_counts = NULL; *nb_irqs = 0; From 1b23190aba72a974c9a08496bf6d45e14b60087a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 10 Dec 2017 14:38:17 +0800 Subject: [PATCH 32/41] i8259: generalize statistics into common code It was only for userspace i8259. Move it to general code so that kvm-i8259 can also use it in the future. Signed-off-by: Peter Xu Message-Id: <20171210063819.14892-4-peterx@redhat.com> Signed-off-by: Paolo Bonzini --- hw/intc/i8259.c | 37 +---------------------------- hw/intc/i8259_common.c | 41 +++++++++++++++++++++++++++++++++ include/hw/isa/i8259_internal.h | 7 ++++-- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index 20c9d0a58b..d9b9666aff 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -25,11 +25,9 @@ #include "hw/hw.h" #include "hw/i386/pc.h" #include "hw/isa/isa.h" -#include "monitor/monitor.h" #include "qemu/timer.h" #include "qemu/log.h" #include "hw/isa/i8259_internal.h" -#include "hw/intc/intc.h" #include "trace.h" /* debug PIC */ @@ -51,8 +49,6 @@ typedef struct PICClass { DeviceRealize parent_realize; } PICClass; -static int irq_level[16]; -static uint64_t irq_count[16]; #ifdef DEBUG_IRQ_LATENCY static int64_t irq_time[16]; #endif @@ -126,13 +122,7 @@ static void pic_set_irq(void *opaque, int irq, int level) int irq_index = s->master ? irq : irq + 8; trace_pic_set_irq(s->master, irq, level); - - if (level != irq_level[irq_index]) { - irq_level[irq_index] = level; - if (level == 1) { - irq_count[irq_index]++; - } - } + pic_stat_update_irq(irq_index, level); #ifdef DEBUG_IRQ_LATENCY if (level) { @@ -235,31 +225,6 @@ static void pic_reset(DeviceState *dev) pic_init_reset(s); } -static bool pic_get_statistics(InterruptStatsProvider *obj, - uint64_t **irq_counts, unsigned int *nb_irqs) -{ - PICCommonState *s = PIC_COMMON(obj); - - if (s->master) { - *irq_counts = irq_count; - *nb_irqs = ARRAY_SIZE(irq_count); - } else { - *irq_counts = NULL; - *nb_irqs = 0; - } - return true; -} - -static void pic_print_info(InterruptStatsProvider *obj, Monitor *mon) -{ - PICCommonState *s = PIC_COMMON(obj); - monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " - "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", - s->master ? 0 : 1, s->irr, s->imr, s->isr, s->priority_add, - s->irq_base, s->read_reg_select, s->elcr, - s->special_fully_nested_mode); -} - static void pic_ioport_write(void *opaque, hwaddr addr64, uint64_t val64, unsigned size) { diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index 18427b459a..a3caddeefb 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -25,6 +25,10 @@ #include "qemu/osdep.h" #include "hw/i386/pc.h" #include "hw/isa/i8259_internal.h" +#include "monitor/monitor.h" + +static int irq_level[16]; +static uint64_t irq_count[16]; void pic_reset_common(PICCommonState *s) { @@ -98,6 +102,43 @@ ISADevice *i8259_init_chip(const char *name, ISABus *bus, bool master) return isadev; } +void pic_stat_update_irq(int irq, int level) +{ + if (level != irq_level[irq]) { + irq_level[irq] = level; + if (level == 1) { + irq_count[irq]++; + } + } +} + +bool pic_get_statistics(InterruptStatsProvider *obj, + uint64_t **irq_counts, unsigned int *nb_irqs) +{ + PICCommonState *s = PIC_COMMON(obj); + + if (s->master) { + *irq_counts = irq_count; + *nb_irqs = ARRAY_SIZE(irq_count); + } else { + *irq_counts = NULL; + *nb_irqs = 0; + } + + return true; +} + +void pic_print_info(InterruptStatsProvider *obj, Monitor *mon) +{ + PICCommonState *s = PIC_COMMON(obj); + + monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " + "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", + s->master ? 0 : 1, s->irr, s->imr, s->isr, s->priority_add, + s->irq_base, s->read_reg_select, s->elcr, + s->special_fully_nested_mode); +} + static const VMStateDescription vmstate_pic_common = { .name = "i8259", .version_id = 1, diff --git a/include/hw/isa/i8259_internal.h b/include/hw/isa/i8259_internal.h index 6954b6ec5f..f742c2a726 100644 --- a/include/hw/isa/i8259_internal.h +++ b/include/hw/isa/i8259_internal.h @@ -28,6 +28,7 @@ #include "hw/hw.h" #include "hw/i386/pc.h" #include "hw/isa/isa.h" +#include "hw/intc/intc.h" typedef struct PICCommonState PICCommonState; @@ -76,8 +77,10 @@ struct PICCommonState { }; void pic_reset_common(PICCommonState *s); - ISADevice *i8259_init_chip(const char *name, ISABus *bus, bool master); - +void pic_stat_update_irq(int irq, int level); +bool pic_get_statistics(InterruptStatsProvider *obj, + uint64_t **irq_counts, unsigned int *nb_irqs); +void pic_print_info(InterruptStatsProvider *obj, Monitor *mon); #endif /* QEMU_I8259_INTERNAL_H */ From e267d16496e3998f271767a80ff9b0cc43be8a35 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 10 Dec 2017 14:38:18 +0800 Subject: [PATCH 33/41] kvm-i8259: support "info pic" and "info irq" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's leverage the i8259 common code for kvm-i8259 too. I think it's still possible that stats can lost when i8259 is in kernel and meanwhile when irqfd is used, e.g., by vfio or vhost devices. However that should be rare IMHO since they should be using MSIs mostly if they really want performance (that's why people use vhost and device assignment), and no old INTx should be used. As long as the INTx users are emulated in QEMU the stats will be correct. For "info pic", it should be always accurate since we fetch kvm regs before dump. More importantly, it's just too simple to do this now - it's only 10+ LOC to gain this feature. Signed-off-by: Peter Xu Message-Id: <20171210063819.14892-5-peterx@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/i386/kvm/i8259.c | 8 ++++++++ hw/intc/i8259_common.c | 1 + 2 files changed, 9 insertions(+) diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c index 11d1b726b6..57abe091b0 100644 --- a/hw/i386/kvm/i8259.c +++ b/hw/i386/kvm/i8259.c @@ -111,6 +111,7 @@ static void kvm_pic_set_irq(void *opaque, int irq, int level) { int delivered; + pic_stat_update_irq(irq, level); delivered = kvm_set_irq(kvm_state, irq, level); apic_report_irq_delivered(delivered); } @@ -139,12 +140,15 @@ static void kvm_i8259_class_init(ObjectClass *klass, void *data) KVMPICClass *kpc = KVM_PIC_CLASS(klass); PICCommonClass *k = PIC_COMMON_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); + InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); dc->reset = kvm_pic_reset; kpc->parent_realize = dc->realize; dc->realize = kvm_pic_realize; k->pre_save = kvm_pic_get; k->post_load = kvm_pic_put; + ic->get_statistics = pic_get_statistics; + ic->print_info = pic_print_info; } static const TypeInfo kvm_i8259_info = { @@ -153,6 +157,10 @@ static const TypeInfo kvm_i8259_info = { .instance_size = sizeof(PICCommonState), .class_init = kvm_i8259_class_init, .class_size = sizeof(KVMPICClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { } + }, }; static void kvm_pic_register_types(void) diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index a3caddeefb..7efd2e8012 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -132,6 +132,7 @@ void pic_print_info(InterruptStatsProvider *obj, Monitor *mon) { PICCommonState *s = PIC_COMMON(obj); + pic_dispatch_pre_save(s); monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d " "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n", s->master ? 0 : 1, s->irr, s->imr, s->isr, s->priority_add, From b8c7723440324a7822acdb0b6f35c7ccb791862a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Sun, 10 Dec 2017 14:38:19 +0800 Subject: [PATCH 34/41] i8259: move TYPE_INTERRUPT_STATS_PROVIDER upper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now both classes (i8259, i8259-kvm) support this. Move this upper to the common class code. Signed-off-by: Peter Xu Message-Id: <20171210063819.14892-6-peterx@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/i386/kvm/i8259.c | 7 ------- hw/intc/i8259.c | 7 ------- hw/intc/i8259_common.c | 7 +++++++ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c index 57abe091b0..b91e98074e 100644 --- a/hw/i386/kvm/i8259.c +++ b/hw/i386/kvm/i8259.c @@ -140,15 +140,12 @@ static void kvm_i8259_class_init(ObjectClass *klass, void *data) KVMPICClass *kpc = KVM_PIC_CLASS(klass); PICCommonClass *k = PIC_COMMON_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); dc->reset = kvm_pic_reset; kpc->parent_realize = dc->realize; dc->realize = kvm_pic_realize; k->pre_save = kvm_pic_get; k->post_load = kvm_pic_put; - ic->get_statistics = pic_get_statistics; - ic->print_info = pic_print_info; } static const TypeInfo kvm_i8259_info = { @@ -157,10 +154,6 @@ static const TypeInfo kvm_i8259_info = { .instance_size = sizeof(PICCommonState), .class_init = kvm_i8259_class_init, .class_size = sizeof(KVMPICClass), - .interfaces = (InterfaceInfo[]) { - { TYPE_INTERRUPT_STATS_PROVIDER }, - { } - }, }; static void kvm_pic_register_types(void) diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c index d9b9666aff..1602255a87 100644 --- a/hw/intc/i8259.c +++ b/hw/intc/i8259.c @@ -442,13 +442,10 @@ static void i8259_class_init(ObjectClass *klass, void *data) { PICClass *k = PIC_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); - InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); k->parent_realize = dc->realize; dc->realize = pic_realize; dc->reset = pic_reset; - ic->get_statistics = pic_get_statistics; - ic->print_info = pic_print_info; } static const TypeInfo i8259_info = { @@ -457,10 +454,6 @@ static const TypeInfo i8259_info = { .parent = TYPE_PIC_COMMON, .class_init = i8259_class_init, .class_size = sizeof(PICClass), - .interfaces = (InterfaceInfo[]) { - { TYPE_INTERRUPT_STATS_PROVIDER }, - { } - }, }; static void pic_register_types(void) diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c index 7efd2e8012..c75c880157 100644 --- a/hw/intc/i8259_common.c +++ b/hw/intc/i8259_common.c @@ -178,6 +178,7 @@ static Property pic_properties_common[] = { static void pic_common_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass); dc->vmsd = &vmstate_pic_common; dc->props = pic_properties_common; @@ -189,6 +190,8 @@ static void pic_common_class_init(ObjectClass *klass, void *data) * code. */ dc->user_creatable = false; + ic->get_statistics = pic_get_statistics; + ic->print_info = pic_print_info; } static const TypeInfo pic_common_type = { @@ -198,6 +201,10 @@ static const TypeInfo pic_common_type = { .class_size = sizeof(PICCommonClass), .class_init = pic_common_class_init, .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { } + }, }; static void pic_common_register_types(void) From 6b012d2311e5ba0a952c2dcfe4327a73353c9fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 15 Dec 2017 19:18:10 +0100 Subject: [PATCH 35/41] checkpatch: volatile with a comment or sig_atomic_t is okay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This assumes that the comment gives some justification; "volatile sig_atomic_t" is also self-explanatory and usually correct. Discussed in: '[Qemu-devel] [PATCH] dump-guest-memory.py: fix "You can't do that without a process to debug"' Suggested-by: Fam Zheng Signed-off-by: Marc-André Lureau Message-Id: <20171215181810.4122-1-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- scripts/checkpatch.pl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 34df753571..3dc27d9656 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2475,8 +2475,11 @@ sub process { # no volatiles please my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b}; - if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) { - ERROR("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr); + if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/ && + $line !~ /sig_atomic_t/ && + !ctx_has_comment($first_line, $linenr)) { + my $msg = "Use of volatile is usually wrong, please add a comment\n" . $herecurr; + ERROR($msg); } # warn about #if 0 From 5a22ab71623c0fb709d49df353bdf2ec7c445c4c Mon Sep 17 00:00:00 2001 From: Yang Zhong Date: Wed, 20 Dec 2017 21:16:46 +0800 Subject: [PATCH 36/41] rcu: reduce more than 7MB heap memory by malloc_trim() Since there are some issues in memory alloc/free machenism in glibc for little chunk memory, if Qemu frequently alloc/free little chunk memory, the glibc doesn't alloc little chunk memory from free list of glibc and still allocate from OS, which make the heap size bigger and bigger. This patch introduce malloc_trim(), which will free heap memory when there is no rcu call during rcu thread loop. malloc_trim() can be enabled/disabled by --enable-malloc-trim/ --disable-malloc-trim in the Qemu configure command. The default malloc_trim() is enabled for libc. Below are test results from smaps file. (1)without patch 55f0783e1000-55f07992a000 rw-p 00000000 00:00 0 [heap] Size: 21796 kB Rss: 14260 kB Pss: 14260 kB (2)with patch 55cc5fadf000-55cc61008000 rw-p 00000000 00:00 0 [heap] Size: 21668 kB Rss: 6940 kB Pss: 6940 kB Signed-off-by: Yang Zhong Message-Id: <1513775806-19779-1-git-send-email-yang.zhong@intel.com> Signed-off-by: Paolo Bonzini --- configure | 35 +++++++++++++++++++++++++++++++++++ util/rcu.c | 6 ++++++ 2 files changed, 41 insertions(+) diff --git a/configure b/configure index 99ccc1725a..100309c33f 100755 --- a/configure +++ b/configure @@ -426,6 +426,7 @@ vxhs="" supported_cpu="no" supported_os="no" bogus_os="no" +malloc_trim="" # parse CC options first for opt do @@ -1047,6 +1048,10 @@ for opt do ;; --enable-tcg) tcg="yes" ;; + --disable-malloc-trim) malloc_trim="no" + ;; + --enable-malloc-trim) malloc_trim="yes" + ;; --disable-spice) spice="no" ;; --enable-spice) spice="yes" @@ -1466,6 +1471,7 @@ Advanced options (experts only): Default:trace- --disable-slirp disable SLIRP userspace network connectivity --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI) + --enable-malloc-trim enable libc malloc_trim() for memory optimization --oss-lib path to OSS library --cpu=CPU Build for host CPU [$cpu] --with-coroutine=BACKEND coroutine backend. Supported options: @@ -3860,6 +3866,30 @@ if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then exit 1 fi +# Even if malloc_trim() is available, these non-libc memory allocators +# do not support it. +if test "$tcmalloc" = "yes" || test "$jemalloc" = "yes" ; then + if test "$malloc_trim" = "yes" ; then + echo "Disabling malloc_trim with non-libc memory allocator" + fi + malloc_trim="no" +fi + +####################################### +# malloc_trim + +if test "$malloc_trim" != "no" ; then + cat > $TMPC << EOF +#include +int main(void) { malloc_trim(0); return 0; } +EOF + if compile_prog "" "" ; then + malloc_trim="yes" + else + malloc_trim="no" + fi +fi + ########################################## # tcmalloc probe @@ -5505,6 +5535,7 @@ if test "$tcg" = "yes" ; then echo "TCG debug enabled $debug_tcg" echo "TCG interpreter $tcg_interpreter" fi +echo "malloc trim support $malloc_trim" echo "RDMA support $rdma" echo "fdt support $fdt" echo "preadv support $preadv" @@ -6015,6 +6046,10 @@ if test "$opengl" = "yes" ; then fi fi +if test "$malloc_trim" = "yes" ; then + echo "CONFIG_MALLOC_TRIM=y" >> $config_host_mak +fi + if test "$avx2_opt" = "yes" ; then echo "CONFIG_AVX2_OPT=y" >> $config_host_mak fi diff --git a/util/rcu.c b/util/rcu.c index ca5a63e36a..f4d09c8304 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -32,6 +32,9 @@ #include "qemu/atomic.h" #include "qemu/thread.h" #include "qemu/main-loop.h" +#if defined(CONFIG_MALLOC_TRIM) +#include +#endif /* * Global grace period counter. Bit 0 is always one in rcu_gp_ctr. @@ -246,6 +249,9 @@ static void *call_rcu_thread(void *opaque) qemu_event_reset(&rcu_call_ready_event); n = atomic_read(&rcu_call_count); if (n == 0) { +#if defined(CONFIG_MALLOC_TRIM) + malloc_trim(4 * 1024 * 1024); +#endif qemu_event_wait(&rcu_call_ready_event); } } From d09c4a47874f30820b08c39ad39bcca9b8cde084 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 3 Nov 2017 16:28:23 +0100 Subject: [PATCH 37/41] chardev: fix backend events regression with mux chardev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kirill noticied that on recent versions on QEMU he was not able to trigger SysRq to invoke debug capabilites of Linux Kernel. He tracked it down to qemu_chr_be_event() ignoring CHR_EVENT_BREAK due s->be being NULL. The bug was introduced in 2.8, commit a4afa548fc6d ("char: move front end handlers in CharBackend"). Since the commit, the qemu_chr_be_event() failed to deliver CHR_EVENT_BREAK due to qemu_chr_fe_init() does not set s->be in case of mux. Let's fix this by teaching mux to send an event to the frontend with the focus. Reported-by: Kirill A. Shutemov Signed-off-by: Marc-André Lureau Fixes: a4afa548fc6d ("char: move front end handlers in CharBackend") Message-Id: <20171103152824.21948-2-marcandre.lureau@redhat.com> Tested-by: Kirill A. Shutemov Signed-off-by: Paolo Bonzini --- chardev/char-mux.c | 10 ++++++++++ chardev/char.c | 18 ++++++++++++------ include/chardev/char.h | 1 + 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/chardev/char-mux.c b/chardev/char-mux.c index 4cda5e7458..567bf965cd 100644 --- a/chardev/char-mux.c +++ b/chardev/char-mux.c @@ -123,6 +123,15 @@ static void mux_chr_send_event(MuxChardev *d, int mux_nr, int event) } } +static void mux_chr_be_event(Chardev *chr, int event) +{ + MuxChardev *d = MUX_CHARDEV(chr); + + if (d->focus != -1) { + mux_chr_send_event(d, d->focus, event); + } +} + static int mux_proc_byte(Chardev *chr, MuxChardev *d, int ch) { if (d->term_got_escape) { @@ -346,6 +355,7 @@ static void char_mux_class_init(ObjectClass *oc, void *data) cc->chr_write = mux_chr_write; cc->chr_accept_input = mux_chr_accept_input; cc->chr_add_watch = mux_chr_add_watch; + cc->chr_be_event = mux_chr_be_event; } static const TypeInfo char_mux_type_info = { diff --git a/chardev/char.c b/chardev/char.c index 2ae4f465ec..8c3765ee99 100644 --- a/chardev/char.c +++ b/chardev/char.c @@ -43,10 +43,19 @@ static Object *get_chardevs_root(void) return container_get(object_get_root(), "/chardevs"); } -void qemu_chr_be_event(Chardev *s, int event) +static void chr_be_event(Chardev *s, int event) { CharBackend *be = s->be; + if (!be || !be->chr_event) { + return; + } + + be->chr_event(be->opaque, event); +} + +void qemu_chr_be_event(Chardev *s, int event) +{ /* Keep track if the char device is open */ switch (event) { case CHR_EVENT_OPENED: @@ -57,11 +66,7 @@ void qemu_chr_be_event(Chardev *s, int event) break; } - if (!be || !be->chr_event) { - return; - } - - be->chr_event(be->opaque, event); + CHARDEV_GET_CLASS(s)->chr_be_event(s, event); } /* Not reporting errors from writing to logfile, as logs are @@ -244,6 +249,7 @@ static void char_class_init(ObjectClass *oc, void *data) ChardevClass *cc = CHARDEV_CLASS(oc); cc->chr_write = null_chr_write; + cc->chr_be_event = chr_be_event; } static void char_finalize(Object *obj) diff --git a/include/chardev/char.h b/include/chardev/char.h index 43aabccef5..778d610295 100644 --- a/include/chardev/char.h +++ b/include/chardev/char.h @@ -248,6 +248,7 @@ typedef struct ChardevClass { void (*chr_accept_input)(Chardev *chr); void (*chr_set_echo)(Chardev *chr, bool echo); void (*chr_set_fe_open)(Chardev *chr, int fe_open); + void (*chr_be_event)(Chardev *s, int event); } ChardevClass; Chardev *qemu_chardev_new(const char *id, const char *typename, From d45f80ba82f281a35168011125cf6664cd217c22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 3 Nov 2017 16:28:24 +0100 Subject: [PATCH 38/41] test: add some chardev mux event tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check the expected behaviour of qemu_chr_be_event() on a mux chardev. For some reason, sending the event on the base chardev broadcast to all frontends, while sending it on the mux chardev itself should trigger the event on the currently focused chardev frontend. Signed-off-by: Marc-André Lureau Message-Id: <20171103152824.21948-3-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- tests/test-char.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test-char.c b/tests/test-char.c index 7ac25ff73f..911e3f6e8d 100644 --- a/tests/test-char.c +++ b/tests/test-char.c @@ -5,6 +5,7 @@ #include "qemu/config-file.h" #include "qemu/sockets.h" #include "chardev/char-fe.h" +#include "chardev/char-mux.h" #include "sysemu/sysemu.h" #include "qapi/error.h" #include "qom/qom-qobject.h" @@ -164,6 +165,7 @@ static void char_mux_test(void) FeHandler h1 = { 0, }, h2 = { 0, }; CharBackend chr_be1, chr_be2; + muxes_realized = true; /* done after machine init */ opts = qemu_opts_create(qemu_find_opts("chardev"), "mux-label", 1, &error_abort); qemu_opt_set(opts, "backend", "ringbuf", &error_abort); @@ -201,8 +203,23 @@ static void char_mux_test(void) g_assert_cmpstr(h2.read_buf, ==, "hello"); h2.read_count = 0; + g_assert_cmpint(h1.last_event, !=, 42); /* should be MUX_OUT or OPENED */ + g_assert_cmpint(h2.last_event, !=, 42); /* should be MUX_IN or OPENED */ + /* sending event on the base broadcast to all fe, historical reasons? */ + qemu_chr_be_event(base, 42); + g_assert_cmpint(h1.last_event, ==, 42); + g_assert_cmpint(h2.last_event, ==, 42); + qemu_chr_be_event(chr, -1); + g_assert_cmpint(h1.last_event, ==, 42); + g_assert_cmpint(h2.last_event, ==, -1); + /* switch focus */ qemu_chr_be_write(base, (void *)"\1c", 2); + g_assert_cmpint(h1.last_event, ==, CHR_EVENT_MUX_IN); + g_assert_cmpint(h2.last_event, ==, CHR_EVENT_MUX_OUT); + qemu_chr_be_event(chr, -1); + g_assert_cmpint(h1.last_event, ==, -1); + g_assert_cmpint(h2.last_event, ==, CHR_EVENT_MUX_OUT); qemu_chr_be_write(base, (void *)"hello", 6); g_assert_cmpint(h2.read_count, ==, 0); From 862172f45c38fb6c446cc8db8c9a3f5a9be1dee7 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 18 Dec 2017 10:16:42 +0000 Subject: [PATCH 39/41] blockdev: convert internal NBD server to QIONetListener Instead of creating a QIOChannelSocket directly for the NBD server socket, use a QIONetListener. This provides the ability to listen on multiple sockets at the same time, so enables full support for IPv4/IPv6 dual stack. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-Id: <20171218101643.20360-2-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- blockdev-nbd.c | 50 ++++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index 28f551a7b0..9e3c22109c 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -18,10 +18,10 @@ #include "qmp-commands.h" #include "block/nbd.h" #include "io/channel-socket.h" +#include "io/net-listener.h" typedef struct NBDServerData { - QIOChannelSocket *listen_ioc; - int watch; + QIONetListener *listener; QCryptoTLSCreds *tlscreds; } NBDServerData; @@ -32,27 +32,13 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) nbd_client_put(client); } -static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition, - gpointer opaque) +static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) { - QIOChannelSocket *cioc; - - if (!nbd_server) { - return FALSE; - } - - cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc), - NULL); - if (!cioc) { - return TRUE; - } - qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); nbd_client_new(NULL, cioc, nbd_server->tlscreds, NULL, nbd_blockdev_client_closed); - object_unref(OBJECT(cioc)); - return TRUE; } @@ -62,10 +48,8 @@ static void nbd_server_free(NBDServerData *server) return; } - if (server->watch != -1) { - g_source_remove(server->watch); - } - object_unref(OBJECT(server->listen_ioc)); + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); if (server->tlscreds) { object_unref(OBJECT(server->tlscreds)); } @@ -112,12 +96,12 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, } nbd_server = g_new0(NBDServerData, 1); - nbd_server->watch = -1; - nbd_server->listen_ioc = qio_channel_socket_new(); - qio_channel_set_name(QIO_CHANNEL(nbd_server->listen_ioc), - "nbd-listener"); - if (qio_channel_socket_listen_sync( - nbd_server->listen_ioc, addr, errp) < 0) { + nbd_server->listener = qio_net_listener_new(); + + qio_net_listener_set_name(nbd_server->listener, + "nbd-listener"); + + if (qio_net_listener_open_sync(nbd_server->listener, addr, errp) < 0) { goto error; } @@ -134,12 +118,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, } } - nbd_server->watch = qio_channel_add_watch( - QIO_CHANNEL(nbd_server->listen_ioc), - G_IO_IN, - nbd_accept, - NULL, - NULL); + qio_net_listener_set_client_func(nbd_server->listener, + nbd_accept, + NULL, + NULL); return; From e4849c1d7cd4472ff4e747a6bcc1994a6e370307 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 18 Dec 2017 10:16:43 +0000 Subject: [PATCH 40/41] blockdev: convert qemu-nbd server to QIONetListener Instead of creating a QIOChannelSocket directly for the NBD server socket, use a QIONetListener. This provides the ability to listen on multiple sockets at the same time, so enables full support for IPv4/IPv6 dual stack. This also means we can honour multiple FDs received during socket activation. Reviewed-by: Eric Blake Signed-off-by: Daniel P. Berrange Message-Id: <20171218101643.20360-3-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-nbd.c | 61 +++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index d75ca51482..3723493be1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -37,6 +37,7 @@ #include "qapi/qmp/qstring.h" #include "qom/object_interfaces.h" #include "io/channel-socket.h" +#include "io/net-listener.h" #include "crypto/init.h" #include "trace/control.h" #include "qemu-version.h" @@ -62,8 +63,7 @@ static int persistent = 0; static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state; static int shared = 1; static int nb_fds; -static QIOChannelSocket *server_ioc; -static int server_watch = -1; +static QIONetListener *server; static QCryptoTLSCreds *tlscreds; static void usage(const char *name) @@ -344,44 +344,25 @@ static void nbd_client_closed(NBDClient *client, bool negotiated) nbd_client_put(client); } -static gboolean nbd_accept(QIOChannel *ioc, GIOCondition cond, gpointer opaque) +static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) { - QIOChannelSocket *cioc; - - cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc), - NULL); - if (!cioc) { - return TRUE; - } - if (state >= TERMINATE) { - object_unref(OBJECT(cioc)); - return TRUE; + return; } nb_fds++; nbd_update_server_watch(); nbd_client_new(newproto ? NULL : exp, cioc, tlscreds, NULL, nbd_client_closed); - object_unref(OBJECT(cioc)); - - return TRUE; } static void nbd_update_server_watch(void) { if (nbd_can_accept()) { - if (server_watch == -1) { - server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc), - G_IO_IN, - nbd_accept, - NULL, NULL); - } + qio_net_listener_set_client_func(server, nbd_accept, NULL, NULL); } else { - if (server_watch != -1) { - g_source_remove(server_watch); - server_watch = -1; - } + qio_net_listener_set_client_func(server, NULL, NULL, NULL); } } @@ -915,23 +896,29 @@ int main(int argc, char **argv) snprintf(sockpath, 128, SOCKET_PATH, basename(device)); } + server = qio_net_listener_new(); if (socket_activation == 0) { - server_ioc = qio_channel_socket_new(); saddr = nbd_build_socket_address(sockpath, bindto, port); - if (qio_channel_socket_listen_sync(server_ioc, saddr, &local_err) < 0) { - object_unref(OBJECT(server_ioc)); + if (qio_net_listener_open_sync(server, saddr, &local_err) < 0) { + object_unref(OBJECT(server)); error_report_err(local_err); - return 1; + exit(EXIT_FAILURE); } } else { + size_t i; /* See comment in check_socket_activation above. */ - assert(socket_activation == 1); - server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD, - &local_err); - if (server_ioc == NULL) { - error_report("Failed to use socket activation: %s", - error_get_pretty(local_err)); - exit(EXIT_FAILURE); + for (i = 0; i < socket_activation; i++) { + QIOChannelSocket *sioc; + sioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD + i, + &local_err); + if (sioc == NULL) { + object_unref(OBJECT(server)); + error_report("Failed to use socket activation: %s", + error_get_pretty(local_err)); + exit(EXIT_FAILURE); + } + qio_net_listener_add(server, sioc); + object_unref(OBJECT(sioc)); } } From 194b7f0d448361dd58d2f7f189147cf075988255 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 18 Dec 2017 13:54:17 +0000 Subject: [PATCH 41/41] chardev: convert the socket server to QIONetListener Instead of creating a QIOChannelSocket directly for the chardev server socket, use a QIONetListener. This provides the ability to listen on multiple sockets at the same time, so enables full support for IPv4/IPv6 dual stack. Signed-off-by: Daniel P. Berrange Message-Id: <20171218135417.28301-2-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- chardev/char-socket.c | 73 ++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/chardev/char-socket.c b/chardev/char-socket.c index 53eda8ef00..630a7f2995 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -25,6 +25,7 @@ #include "chardev/char.h" #include "io/channel-socket.h" #include "io/channel-tls.h" +#include "io/net-listener.h" #include "qemu/error-report.h" #include "qapi/error.h" #include "qapi/clone-visitor.h" @@ -40,8 +41,7 @@ typedef struct { Chardev parent; QIOChannel *ioc; /* Client I/O channel */ QIOChannelSocket *sioc; /* Client master channel */ - QIOChannelSocket *listen_ioc; - guint listen_tag; + QIONetListener *listener; QCryptoTLSCreds *tls_creds; int connected; int max_size; @@ -93,9 +93,9 @@ static void check_report_connect_error(Chardev *chr, qemu_chr_socket_restart_timer(chr); } -static gboolean tcp_chr_accept(QIOChannel *chan, - GIOCondition cond, - void *opaque); +static void tcp_chr_accept(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque); static int tcp_chr_read_poll(void *opaque); static void tcp_chr_disconnect(Chardev *chr); @@ -401,9 +401,9 @@ static void tcp_chr_disconnect(Chardev *chr) tcp_chr_free_connection(chr); - if (s->listen_ioc && s->listen_tag == 0) { - s->listen_tag = qio_channel_add_watch( - QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); + if (s->listener) { + qio_net_listener_set_client_func(s->listener, tcp_chr_accept, + chr, NULL); } update_disconnected_filename(s); if (emit_close) { @@ -702,9 +702,8 @@ static int tcp_chr_new_client(Chardev *chr, QIOChannelSocket *sioc) if (s->do_nodelay) { qio_channel_set_delay(s->ioc, false); } - if (s->listen_tag) { - g_source_remove(s->listen_tag); - s->listen_tag = 0; + if (s->listener) { + qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); } if (s->tls_creds) { @@ -736,24 +735,14 @@ static int tcp_chr_add_client(Chardev *chr, int fd) return ret; } -static gboolean tcp_chr_accept(QIOChannel *channel, - GIOCondition cond, - void *opaque) +static void tcp_chr_accept(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque) { Chardev *chr = CHARDEV(opaque); - QIOChannelSocket *sioc; - sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(channel), - NULL); - if (!sioc) { - return TRUE; - } - - tcp_chr_new_client(chr, sioc); - - object_unref(OBJECT(sioc)); - - return TRUE; + tcp_chr_set_client_ioc_name(chr, cioc); + tcp_chr_new_client(chr, cioc); } static int tcp_chr_wait_connected(Chardev *chr, Error **errp) @@ -767,9 +756,10 @@ static int tcp_chr_wait_connected(Chardev *chr, Error **errp) if (s->is_listen) { info_report("QEMU waiting for connection on: %s", chr->filename); - qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), true, NULL); - tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); - qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); + sioc = qio_net_listener_wait_client(s->listener); + tcp_chr_set_client_ioc_name(chr, sioc); + tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); } else { sioc = qio_channel_socket_new(); tcp_chr_set_client_ioc_name(chr, sioc); @@ -797,12 +787,9 @@ static void char_socket_finalize(Object *obj) s->reconnect_timer = 0; } qapi_free_SocketAddress(s->addr); - if (s->listen_tag) { - g_source_remove(s->listen_tag); - s->listen_tag = 0; - } - if (s->listen_ioc) { - object_unref(OBJECT(s->listen_ioc)); + if (s->listener) { + qio_net_listener_set_client_func(s->listener, NULL, NULL, NULL); + object_unref(OBJECT(s->listener)); } if (s->tls_creds) { object_unref(OBJECT(s->tls_creds)); @@ -935,29 +922,29 @@ static void qmp_chardev_open_socket(Chardev *chr, } else { if (s->is_listen) { char *name; - sioc = qio_channel_socket_new(); + s->listener = qio_net_listener_new(); name = g_strdup_printf("chardev-tcp-listener-%s", chr->label); - qio_channel_set_name(QIO_CHANNEL(sioc), name); + qio_net_listener_set_name(s->listener, name); g_free(name); - if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { + if (qio_net_listener_open_sync(s->listener, s->addr, errp) < 0) { + object_unref(OBJECT(s->listener)); + s->listener = NULL; goto error; } qapi_free_SocketAddress(s->addr); - s->addr = socket_local_address(sioc->fd, errp); + s->addr = socket_local_address(s->listener->sioc[0]->fd, errp); update_disconnected_filename(s); - s->listen_ioc = sioc; if (is_waitconnect && qemu_chr_wait_connected(chr, errp) < 0) { return; } if (!s->ioc) { - s->listen_tag = qio_channel_add_watch( - QIO_CHANNEL(s->listen_ioc), G_IO_IN, - tcp_chr_accept, chr, NULL); + qio_net_listener_set_client_func(s->listener, tcp_chr_accept, + chr, NULL); } } else if (qemu_chr_wait_connected(chr, errp) < 0) { goto error;