From 024949caf32805f4cc3e7d363a80084b47aac1f6 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 4 Dec 2017 14:22:11 +0000 Subject: [PATCH 01/13] linux-user: Fix locking order in fork_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our locking order is that the tb lock should be taken inside the mmap_lock, but fork_start() grabs locks the other way around. This means that if a heavily multithreaded guest process (such as Java) calls fork() it can deadlock, with the thread that called fork() stuck in fork_start() with the tb lock and waiting for the mmap lock, but some other thread in tb_find() with the mmap lock and waiting for the tb lock. The cpu_list_lock() should also always be taken last, not first. Fix this by making fork_start() grab the locks in the right order. The order in which we drop locks doesn't matter, so we leave fork_end() the way it is. Signed-off-by: Peter Maydell Cc: qemu-stable@nongnu.org Reviewed-by: Paolo Bonzini Reviewed-by: Alex Bennée Message-Id: <1512397331-15238-1-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index 450eb3ce65..e8406917e3 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -127,9 +127,9 @@ int cpu_get_pic_interrupt(CPUX86State *env) /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) { - cpu_list_lock(); - qemu_mutex_lock(&tb_ctx.tb_lock); mmap_fork_start(); + qemu_mutex_lock(&tb_ctx.tb_lock); + cpu_list_lock(); } void fork_end(int child) From 06065c451f10c7ef62cfb575a87f323a70ae1c9e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 7 Dec 2017 12:41:21 +0000 Subject: [PATCH 02/13] linux-user: wrap fork() in a start/end exclusive section When we do a fork() in usermode emulation, we need to be in a start/end exclusive section, so that we can ensure that no other thread is in an RCU section. Otherwise you can get this deadlock: - fork thread: has mmap_lock, waits for rcu_sync_lock (because rcu_init_lock() is registered as a pthread_atfork() hook) - RCU thread: has rcu_sync_lock, waits for rcu_read_(un)lock - another CPU thread: in RCU critical section, waits for mmap_lock This can show up if you have a heavily multithreaded guest program that does a fork(). Signed-off-by: Peter Maydell Reported-by: Stuart Monteith Message-Id: <1512650481-1723-1-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/linux-user/main.c b/linux-user/main.c index e8406917e3..2140465709 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -127,6 +127,7 @@ int cpu_get_pic_interrupt(CPUX86State *env) /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) { + start_exclusive(); mmap_fork_start(); qemu_mutex_lock(&tb_ctx.tb_lock); cpu_list_lock(); @@ -147,9 +148,13 @@ void fork_end(int child) qemu_mutex_init(&tb_ctx.tb_lock); qemu_init_cpu_list(); gdbserver_fork(thread_cpu); + /* qemu_init_cpu_list() takes care of reinitializing the + * exclusive state, so we don't need to end_exclusive() here. + */ } else { qemu_mutex_unlock(&tb_ctx.tb_lock); cpu_list_unlock(); + end_exclusive(); } } From 7174970a94df10ee84143edc7c94a2472d654604 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Dec 2017 13:52:55 +0000 Subject: [PATCH 03/13] linux-user: Fix length calculations in host_to_target_cmsg() The handling of length calculations in host_to_target_cmsg() was rather confused: * when checking for whether the target cmsg header fit in the remaining buffer, we were using the host struct size, not the target size * we were setting tgt_len to "target payload + header length" but then using it as if it were the target payload length alone * in various message type cases we weren't handling the possibility that host or target buffers were truncated Fix these problems. The second one in particular is liable to result in us overrunning the guest provided buffer, since we will try to convert more data than is actually present. Fixes: https://bugs.launchpad.net/qemu/+bug/1701808 Reported-by: Bruno Haible Signed-off-by: Peter Maydell Message-Id: <1513345976-22958-2-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 11c9116c4a..a1b9772a85 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -1782,7 +1782,7 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, * to the guest via the CTRUNC bit), unlike truncation * in target_to_host_cmsg, which is a QEMU bug. */ - if (msg_controllen < sizeof(struct cmsghdr)) { + if (msg_controllen < sizeof(struct target_cmsghdr)) { target_msgh->msg_flags |= tswap32(MSG_CTRUNC); break; } @@ -1794,8 +1794,6 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, } target_cmsg->cmsg_type = tswap32(cmsg->cmsg_type); - tgt_len = TARGET_CMSG_LEN(len); - /* Payload types which need a different size of payload on * the target must adjust tgt_len here. */ @@ -1809,12 +1807,13 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, break; } default: + tgt_len = len; break; } - if (msg_controllen < tgt_len) { + if (msg_controllen < TARGET_CMSG_LEN(tgt_len)) { target_msgh->msg_flags |= tswap32(MSG_CTRUNC); - tgt_len = msg_controllen; + tgt_len = msg_controllen - sizeof(struct target_cmsghdr); } /* We must now copy-and-convert len bytes of payload @@ -1875,6 +1874,10 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, uint32_t *v = (uint32_t *)data; uint32_t *t_int = (uint32_t *)target_data; + if (len != sizeof(uint32_t) || + tgt_len != sizeof(uint32_t)) { + goto unimplemented; + } __put_user(*v, t_int); break; } @@ -1888,6 +1891,10 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, struct errhdr_t *target_errh = (struct errhdr_t *)target_data; + if (len != sizeof(struct errhdr_t) || + tgt_len != sizeof(struct errhdr_t)) { + goto unimplemented; + } __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno); __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin); __put_user(errh->ee.ee_type, &target_errh->ee.ee_type); @@ -1911,6 +1918,10 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, uint32_t *v = (uint32_t *)data; uint32_t *t_int = (uint32_t *)target_data; + if (len != sizeof(uint32_t) || + tgt_len != sizeof(uint32_t)) { + goto unimplemented; + } __put_user(*v, t_int); break; } @@ -1924,6 +1935,10 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, struct errhdr6_t *target_errh = (struct errhdr6_t *)target_data; + if (len != sizeof(struct errhdr6_t) || + tgt_len != sizeof(struct errhdr6_t)) { + goto unimplemented; + } __put_user(errh->ee.ee_errno, &target_errh->ee.ee_errno); __put_user(errh->ee.ee_origin, &target_errh->ee.ee_origin); __put_user(errh->ee.ee_type, &target_errh->ee.ee_type); @@ -1950,8 +1965,8 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, } } - target_cmsg->cmsg_len = tswapal(tgt_len); - tgt_space = TARGET_CMSG_SPACE(len); + target_cmsg->cmsg_len = tswapal(TARGET_CMSG_LEN(tgt_len)); + tgt_space = TARGET_CMSG_SPACE(tgt_len); if (msg_controllen < tgt_space) { tgt_space = msg_controllen; } From ad762b990fa9da53e203b934583838d4dd371e20 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Dec 2017 13:52:56 +0000 Subject: [PATCH 04/13] linux-user: Don't use CMSG_ALIGN(sizeof struct cmsghdr) The Linux struct cmsghdr is already guaranteed to be sufficiently aligned that CMSG_ALIGN(sizeof struct cmsghdr) is always equal to sizeof struct cmsghdr. Stop doing the unnecessary alignment arithmetic for host and target cmsghdr. This follows kernel commit 1ff8cebf49ed9e9ca2 and brings our TARGET_CMSG_* macros back into line with the kernel ones, as well as making them easier to understand. Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Message-Id: <1513345976-22958-3-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 4 ++-- linux-user/syscall_defs.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a1b9772a85..39553c81b6 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -1692,7 +1692,7 @@ static inline abi_long target_to_host_cmsg(struct msghdr *msgh, void *target_data = TARGET_CMSG_DATA(target_cmsg); int len = tswapal(target_cmsg->cmsg_len) - - TARGET_CMSG_ALIGN(sizeof (struct target_cmsghdr)); + - sizeof(struct target_cmsghdr); space += CMSG_SPACE(len); if (space > msgh->msg_controllen) { @@ -1773,7 +1773,7 @@ static inline abi_long host_to_target_cmsg(struct target_msghdr *target_msgh, void *data = CMSG_DATA(cmsg); void *target_data = TARGET_CMSG_DATA(target_cmsg); - int len = cmsg->cmsg_len - CMSG_ALIGN(sizeof (struct cmsghdr)); + int len = cmsg->cmsg_len - sizeof(struct cmsghdr); int tgt_len, tgt_space; /* We never copy a half-header but may copy half-data; diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h index bec3680b94..a35c52a60a 100644 --- a/linux-user/syscall_defs.h +++ b/linux-user/syscall_defs.h @@ -303,9 +303,9 @@ struct target_cmsghdr { __target_cmsg_nxthdr(mhdr, cmsg, cmsg_start) #define TARGET_CMSG_ALIGN(len) (((len) + sizeof (abi_long) - 1) \ & (size_t) ~(sizeof (abi_long) - 1)) -#define TARGET_CMSG_SPACE(len) (TARGET_CMSG_ALIGN (len) \ - + TARGET_CMSG_ALIGN (sizeof (struct target_cmsghdr))) -#define TARGET_CMSG_LEN(len) (TARGET_CMSG_ALIGN (sizeof (struct target_cmsghdr)) + (len)) +#define TARGET_CMSG_SPACE(len) (sizeof(struct target_cmsghdr) + \ + TARGET_CMSG_ALIGN(len)) +#define TARGET_CMSG_LEN(len) (sizeof(struct target_cmsghdr) + (len)) static __inline__ struct target_cmsghdr * __target_cmsg_nxthdr(struct target_msghdr *__mhdr, From 10fa993aae539fa8d0da1da17169e05f1dfb4f95 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 15 Dec 2017 15:18:00 +0000 Subject: [PATCH 05/13] linux-user: Translate flags argument to dup3 syscall The third argument to dup3() is a flags word which may be O_CLOEXEC. We weren't translating this flag from target to host value, which meant that if the target used a different value from the host (eg sparc guest and x86 host) the dup3() call would fail EINVAL. Do the correct translation. Fixes: https://bugs.launchpad.net/qemu/+bug/1704658 Reported-by: Bruno Haible Signed-off-by: Peter Maydell Reviewed-by: Laurent Vivier Message-Id: <1513351080-25917-1-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 39553c81b6..41ded90ee6 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8490,11 +8490,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #endif #if defined(CONFIG_DUP3) && defined(TARGET_NR_dup3) case TARGET_NR_dup3: - ret = get_errno(dup3(arg1, arg2, arg3)); + { + int host_flags; + + if ((arg3 & ~TARGET_O_CLOEXEC) != 0) { + return -EINVAL; + } + host_flags = target_to_host_bitmask(arg3, fcntl_flags_tbl); + ret = get_errno(dup3(arg1, arg2, host_flags)); if (ret >= 0) { fd_trans_dup(arg1, arg2); } break; + } #endif #ifdef TARGET_NR_getppid /* not on alpha */ case TARGET_NR_getppid: From 95e6d4305affbbf5258c6a533be9bfd24599da90 Mon Sep 17 00:00:00 2001 From: Maximilian Riemensberger Date: Sun, 7 Jan 2018 01:01:44 +0000 Subject: [PATCH 06/13] linux-user/mmap.c: Avoid choosing NULL as start address mmap() is required by the linux kernel ABI and POSIX to return a non-NULL address when the implementation chooses a start address for the mapping. The current implementation of mmap_find_vma_reserved() can return NULL as start address of a mapping which leads to subsequent crashes inside the guests glibc, e.g. output of qemu-arm-static --strace executing a test binary stx_test: 1879 mmap2(NULL,8388608,PROT_READ|PROT_WRITE,MAP_PRIVATE|MAP_ANONYMOUS|0x20000,-1,0) = 0x00000000 1879 write(2,0xf6fd39d0,79) stx_test: allocatestack.c:514: allocate_stack: Assertion `mem != NULL' failed. This patch fixes mmap_find_vma_reserved() by skipping NULL as start address while searching for a suitable mapping start address. CC: Riku Voipio CC: Laurent Vivier CC: Peter Maydell Signed-off-by: Maximilian Riemensberger Reviewed-by: Laurent Vivier Reviewed-by: Richard Henderson Message-Id: <1515286904-86418-1-git-send-email-riemensberger@cadami.net> Signed-off-by: Laurent Vivier --- linux-user/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 4888f53139..0fbfd6dff2 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -234,7 +234,7 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size) if (prot) { end_addr = addr; } - if (addr + size == end_addr) { + if (addr && addr + size == end_addr) { break; } addr -= qemu_host_page_size; From 2e0a8713bd6804ce49044915ba9cd5bd7148de77 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 9 Jan 2018 21:16:43 +0100 Subject: [PATCH 07/13] linux-user: Fix sched_get/setaffinity conversion sched_get/setaffinity linux-user syscalls were missing conversions for little/big endian, which is hairy since longs may not be the same size either. For simplicity, this just introduces loops to convert bit by bit like is done for select. Signed-off-by: Samuel Thibault Reviewed-by: Laurent Vivier Message-Id: <20180109201643.1479-1-samuel.thibault@ens-lyon.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 81 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 41ded90ee6..143e4a959d 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -7731,6 +7731,73 @@ static TargetFdTrans target_inotify_trans = { }; #endif +static int target_to_host_cpu_mask(unsigned long *host_mask, + size_t host_size, + abi_ulong target_addr, + size_t target_size) +{ + unsigned target_bits = sizeof(abi_ulong) * 8; + unsigned host_bits = sizeof(*host_mask) * 8; + abi_ulong *target_mask; + unsigned i, j; + + assert(host_size >= target_size); + + target_mask = lock_user(VERIFY_READ, target_addr, target_size, 1); + if (!target_mask) { + return -TARGET_EFAULT; + } + memset(host_mask, 0, host_size); + + for (i = 0 ; i < target_size / sizeof(abi_ulong); i++) { + unsigned bit = i * target_bits; + abi_ulong val; + + __get_user(val, &target_mask[i]); + for (j = 0; j < target_bits; j++, bit++) { + if (val & (1UL << j)) { + host_mask[bit / host_bits] |= 1UL << (bit % host_bits); + } + } + } + + unlock_user(target_mask, target_addr, 0); + return 0; +} + +static int host_to_target_cpu_mask(const unsigned long *host_mask, + size_t host_size, + abi_ulong target_addr, + size_t target_size) +{ + unsigned target_bits = sizeof(abi_ulong) * 8; + unsigned host_bits = sizeof(*host_mask) * 8; + abi_ulong *target_mask; + unsigned i, j; + + assert(host_size >= target_size); + + target_mask = lock_user(VERIFY_WRITE, target_addr, target_size, 0); + if (!target_mask) { + return -TARGET_EFAULT; + } + + for (i = 0 ; i < target_size / sizeof(abi_ulong); i++) { + unsigned bit = i * target_bits; + abi_ulong val = 0; + + for (j = 0; j < target_bits; j++, bit++) { + if (host_mask[bit / host_bits] & (1UL << (bit % host_bits))) { + val |= 1UL << j; + } + } + __put_user(val, &target_mask[i]); + } + + unlock_user(target_mask, target_addr, target_size); + return 0; +} + /* do_syscall() should always have a single exit point at the end so that actions, such as logging of syscall results, can be performed. All errnos that do_syscall() returns must be -TARGET_. */ @@ -10376,6 +10443,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, mask_size = (arg2 + (sizeof(*mask) - 1)) & ~(sizeof(*mask) - 1); mask = alloca(mask_size); + memset(mask, 0, mask_size); ret = get_errno(sys_sched_getaffinity(arg1, mask_size, mask)); if (!is_error(ret)) { @@ -10395,9 +10463,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = arg2; } - if (copy_to_user(arg3, mask, ret)) { - goto efault; - } + ret = host_to_target_cpu_mask(mask, mask_size, arg3, arg2); } } break; @@ -10415,13 +10481,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; } mask_size = (arg2 + (sizeof(*mask) - 1)) & ~(sizeof(*mask) - 1); - mask = alloca(mask_size); - if (!lock_user_struct(VERIFY_READ, p, arg3, 1)) { - goto efault; + + ret = target_to_host_cpu_mask(mask, mask_size, arg3, arg2); + if (ret) { + break; } - memcpy(mask, p, arg2); - unlock_user_struct(p, arg2, 0); ret = get_errno(sys_sched_setaffinity(arg1, mask_size, mask)); } From 444cd5c3aeb2acffce302e354ad22d1bb37bbfae Mon Sep 17 00:00:00 2001 From: Marco A L Barbosa Date: Thu, 11 Jan 2018 16:37:14 -0200 Subject: [PATCH 08/13] linux-user: Add AT_SECURE auxval Signed-off-by: Marco A L Barbosa Reviewed-by: Peter Maydell Reviewed-by: Laurent Vivier Message-Id: <20180111183714.22834-2-malbarbo@gmail.com> Signed-off-by: Laurent Vivier --- linux-user/elfload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 20f3d8c2c3..32a47674e6 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1354,7 +1354,7 @@ struct exec ~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1)) #define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1)) -#define DLINFO_ITEMS 14 +#define DLINFO_ITEMS 15 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n) { @@ -1786,6 +1786,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_HWCAP, (abi_ulong) ELF_HWCAP); NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK)); NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes); + NEW_AUX_ENT(AT_SECURE, (abi_ulong) qemu_getauxval(AT_SECURE)); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, (abi_ulong) ELF_HWCAP2); From b827c3ed30e4777ab9bd91cb240252a4815b3322 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 12 Jan 2018 09:14:35 +0100 Subject: [PATCH 09/13] linux-user: Add getcpu() support Signed-off-by: Samuel Thibault Reviewed-by: Laurent Vivier Message-Id: <20180112081435.21299-1-samuel.thibault@ens-lyon.org> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 143e4a959d..104408c050 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -296,6 +296,8 @@ _syscall3(int, sys_sched_getaffinity, pid_t, pid, unsigned int, len, #define __NR_sys_sched_setaffinity __NR_sched_setaffinity _syscall3(int, sys_sched_setaffinity, pid_t, pid, unsigned int, len, unsigned long *, user_mask_ptr); +#define __NR_sys_getcpu __NR_getcpu +_syscall3(int, sys_getcpu, unsigned *, cpu, unsigned *, node, void *, tcache); _syscall4(int, reboot, int, magic1, int, magic2, unsigned int, cmd, void *, arg); _syscall2(int, capget, struct __user_cap_header_struct *, header, @@ -10491,6 +10493,23 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, ret = get_errno(sys_sched_setaffinity(arg1, mask_size, mask)); } break; + case TARGET_NR_getcpu: + { + unsigned cpu, node; + ret = get_errno(sys_getcpu(arg1 ? &cpu : NULL, + arg2 ? &node : NULL, + NULL)); + if (is_error(ret)) { + goto fail; + } + if (arg1 && put_user_u32(cpu, arg1)) { + goto efault; + } + if (arg2 && put_user_u32(node, arg2)) { + goto efault; + } + } + break; case TARGET_NR_sched_setparam: { struct sched_param *target_schp; From bfdec7f80e639cb858ed7df77da763cca85e1e61 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 3 Nov 2017 20:38:02 +0100 Subject: [PATCH 10/13] linux-user: remove nmi.c and fw-path-provider.c linux-user binaries don't need firmware and NMI, so don't add them in this case, move QDEV firmware functions to qdev-fw.c Signed-off-by: Laurent Vivier Acked-by: Paolo Bonzini Message-Id: <20171103193802.11876-1-laurent@vivier.eu> --- hw/core/Makefile.objs | 5 ++- hw/core/qdev-fw.c | 96 +++++++++++++++++++++++++++++++++++++++++++ hw/core/qdev.c | 77 ---------------------------------- 3 files changed, 99 insertions(+), 79 deletions(-) create mode 100644 hw/core/qdev-fw.c diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs index f8d7a4aaed..1240728c87 100644 --- a/hw/core/Makefile.objs +++ b/hw/core/Makefile.objs @@ -1,11 +1,12 @@ # core qdev-related obj files, also used by *-user: common-obj-y += qdev.o qdev-properties.o common-obj-y += bus.o reset.o -common-obj-y += fw-path-provider.o +common-obj-$(CONFIG_SOFTMMU) += qdev-fw.o +common-obj-$(CONFIG_SOFTMMU) += fw-path-provider.o # irq.o needed for qdev GPIO handling: common-obj-y += irq.o common-obj-y += hotplug.o -common-obj-y += nmi.o +common-obj-$(CONFIG_SOFTMMU) += nmi.o common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o common-obj-$(CONFIG_XILINX_AXI) += stream.o diff --git a/hw/core/qdev-fw.c b/hw/core/qdev-fw.c new file mode 100644 index 0000000000..aa35e9d0ac --- /dev/null +++ b/hw/core/qdev-fw.c @@ -0,0 +1,96 @@ +/* + * qdev fw helpers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/qdev.h" +#include "hw/fw-path-provider.h" + +const char *qdev_fw_name(DeviceState *dev) +{ + DeviceClass *dc = DEVICE_GET_CLASS(dev); + + if (dc->fw_name) { + return dc->fw_name; + } + + return object_get_typename(OBJECT(dev)); +} + +static char *bus_get_fw_dev_path(BusState *bus, DeviceState *dev) +{ + BusClass *bc = BUS_GET_CLASS(bus); + + if (bc->get_fw_dev_path) { + return bc->get_fw_dev_path(dev); + } + + return NULL; +} + +static char *qdev_get_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) +{ + Object *obj = OBJECT(dev); + char *d = NULL; + + while (!d && obj->parent) { + obj = obj->parent; + d = fw_path_provider_try_get_dev_path(obj, bus, dev); + } + return d; +} + +char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) +{ + Object *obj = OBJECT(dev); + + return fw_path_provider_try_get_dev_path(obj, bus, dev); +} + +static int qdev_get_fw_dev_path_helper(DeviceState *dev, char *p, int size) +{ + int l = 0; + + if (dev && dev->parent_bus) { + char *d; + l = qdev_get_fw_dev_path_helper(dev->parent_bus->parent, p, size); + d = qdev_get_fw_dev_path_from_handler(dev->parent_bus, dev); + if (!d) { + d = bus_get_fw_dev_path(dev->parent_bus, dev); + } + if (d) { + l += snprintf(p + l, size - l, "%s", d); + g_free(d); + } else { + return l; + } + } + l += snprintf(p + l , size - l, "/"); + + return l; +} + +char *qdev_get_fw_dev_path(DeviceState *dev) +{ + char path[128]; + int l; + + l = qdev_get_fw_dev_path_helper(dev, path, 128); + + path[l - 1] = '\0'; + + return g_strdup(path); +} diff --git a/hw/core/qdev.c b/hw/core/qdev.c index f739753e3a..2456035d1a 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -27,7 +27,6 @@ #include "qemu/osdep.h" #include "hw/qdev.h" -#include "hw/fw-path-provider.h" #include "sysemu/sysemu.h" #include "qapi/qmp/qerror.h" #include "qapi/visitor.h" @@ -48,17 +47,6 @@ const VMStateDescription *qdev_get_vmsd(DeviceState *dev) return dc->vmsd; } -const char *qdev_fw_name(DeviceState *dev) -{ - DeviceClass *dc = DEVICE_GET_CLASS(dev); - - if (dc->fw_name) { - return dc->fw_name; - } - - return object_get_typename(OBJECT(dev)); -} - static void bus_remove_child(BusState *bus, DeviceState *child) { BusChild *kid; @@ -631,71 +619,6 @@ DeviceState *qdev_find_recursive(BusState *bus, const char *id) return NULL; } -static char *bus_get_fw_dev_path(BusState *bus, DeviceState *dev) -{ - BusClass *bc = BUS_GET_CLASS(bus); - - if (bc->get_fw_dev_path) { - return bc->get_fw_dev_path(dev); - } - - return NULL; -} - -static char *qdev_get_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) -{ - Object *obj = OBJECT(dev); - char *d = NULL; - - while (!d && obj->parent) { - obj = obj->parent; - d = fw_path_provider_try_get_dev_path(obj, bus, dev); - } - return d; -} - -char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev) -{ - Object *obj = OBJECT(dev); - - return fw_path_provider_try_get_dev_path(obj, bus, dev); -} - -static int qdev_get_fw_dev_path_helper(DeviceState *dev, char *p, int size) -{ - int l = 0; - - if (dev && dev->parent_bus) { - char *d; - l = qdev_get_fw_dev_path_helper(dev->parent_bus->parent, p, size); - d = qdev_get_fw_dev_path_from_handler(dev->parent_bus, dev); - if (!d) { - d = bus_get_fw_dev_path(dev->parent_bus, dev); - } - if (d) { - l += snprintf(p + l, size - l, "%s", d); - g_free(d); - } else { - return l; - } - } - l += snprintf(p + l , size - l, "/"); - - return l; -} - -char* qdev_get_fw_dev_path(DeviceState *dev) -{ - char path[128]; - int l; - - l = qdev_get_fw_dev_path_helper(dev, path, 128); - - path[l-1] = '\0'; - - return g_strdup(path); -} - char *qdev_get_dev_path(DeviceState *dev) { BusClass *bc; From a78b1299f1bbb9608e3e3a36a7f16cf700a2789d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 28 Nov 2017 14:35:24 +0000 Subject: [PATCH 11/13] linux-user: Propagate siginfo_t through to handle_cpu_signal() Currently all the architecture/OS specific cpu_signal_handler() functions call handle_cpu_signal() without passing it the siginfo_t. We're going to want that so we can look at the si_code to determine whether this is a SEGV_ACCERR access violation or some other kind of fault, so change the functions to pass through the pointer to the siginfo_t rather than just the si_addr value. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-Id: <1511879725-9576-2-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- accel/tcg/user-exec.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index f42285ea1c..e8f26ff0cb 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -57,12 +57,13 @@ static void cpu_exit_tb_from_sighandler(CPUState *cpu, sigset_t *old_set) the effective address of the memory exception. 'is_write' is 1 if a write caused the exception and otherwise 0'. 'old_set' is the signal set which should be restored */ -static inline int handle_cpu_signal(uintptr_t pc, unsigned long address, +static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, int is_write, sigset_t *old_set) { CPUState *cpu = current_cpu; CPUClass *cc; int ret; + unsigned long address = (unsigned long)info->si_addr; /* We must handle PC addresses from two different sources: * a call return address and a signal frame address. @@ -215,9 +216,8 @@ int cpu_signal_handler(int host_signum, void *pinfo, #endif pc = EIP_sig(uc); trapno = TRAP_sig(uc); - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - trapno == 0xe ? - (ERROR_sig(uc) >> 1) & 1 : 0, + return handle_cpu_signal(pc, info, + trapno == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0, &MASK_sig(uc)); } @@ -261,9 +261,8 @@ int cpu_signal_handler(int host_signum, void *pinfo, #endif pc = PC_sig(uc); - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - TRAP_sig(uc) == 0xe ? - (ERROR_sig(uc) >> 1) & 1 : 0, + return handle_cpu_signal(pc, info, + TRAP_sig(uc) == 0xe ? (ERROR_sig(uc) >> 1) & 1 : 0, &MASK_sig(uc)); } @@ -341,8 +340,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, is_write = 1; } #endif - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - is_write, &uc->uc_sigmask); + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } #elif defined(__alpha__) @@ -372,8 +370,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, is_write = 1; } - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - is_write, &uc->uc_sigmask); + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } #elif defined(__sparc__) @@ -432,8 +429,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, break; } } - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - is_write, sigmask); + return handle_cpu_signal(pc, info, is_write, sigmask); } #elif defined(__arm__) @@ -466,9 +462,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, * later processor; on v5 we will always report this as a read). */ is_write = extract32(uc->uc_mcontext.error_code, 11, 1); - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - is_write, - &uc->uc_sigmask); + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } #elif defined(__aarch64__) @@ -495,8 +489,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) /* Ignore bits 23 & 24, controlling indexing. */ || (insn & 0x3a400000) == 0x28000000); /* C3.3.7,14-16 */ - return handle_cpu_signal(pc, (uintptr_t)info->si_addr, - is_write, &uc->uc_sigmask); + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } #elif defined(__ia64) @@ -529,9 +522,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) default: break; } - return handle_cpu_signal(ip, (unsigned long)info->si_addr, - is_write, - (sigset_t *)&uc->uc_sigmask); + return handle_cpu_signal(ip, info, is_write, (sigset_t *)&uc->uc_sigmask); } #elif defined(__s390__) @@ -583,8 +574,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, } break; } - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - is_write, &uc->uc_sigmask); + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } #elif defined(__mips__) @@ -599,8 +589,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, /* XXX: compute is_write */ is_write = 0; - return handle_cpu_signal(pc, (unsigned long)info->si_addr, - is_write, &uc->uc_sigmask); + return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask); } #else From 9c4bbee9e3b83544257e82566342c29e15a88637 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 28 Nov 2017 14:35:25 +0000 Subject: [PATCH 12/13] page_unprotect(): handle calls to pages that are PAGE_WRITE If multiple guest threads in user-mode emulation write to a page which QEMU has marked read-only because of cached TCG translations, the threads can race in page_unprotect: * threads A & B both try to do a write to a page with code in it at the same time (ie which we've made non-writeable, so SEGV) * they race into the signal handler with this faulting address * thread A happens to get to page_unprotect() first and takes the mmap lock, so thread B sits waiting for it to be done * A then finds the page, marks it PAGE_WRITE and mprotect()s it writable * A can then continue OK (returns from signal handler to retry the memory access) * ...but when B gets the mmap lock it finds that the page is already PAGE_WRITE, and so it exits page_unprotect() via the "not due to protected translation" code path, and wrongly delivers the signal to the guest rather than just retrying the access In particular, this meant that trying to run 'javac' in user-mode emulation would fail with a spurious guest SIGSEGV. Handle this by making page_unprotect() assume that a call for a page which is already PAGE_WRITE is due to a race of this sort and return a "fault handled" indication. Since this would cause an infinite loop if we ever called page_unprotect() for some other kind of fault than "write failed due to bad access permissions", tighten the condition in handle_cpu_signal() to check the signal number and si_code, and add a comment so that if somebody does ever find themselves debugging an infinite loop of faults they have some clue about why. (The trick for identifying the correct setting for current_tb_invalidated for thread B (needed to handle the precise-SMC case) is due to Richard Henderson. Paolo Bonzini suggested just relying on si_code rather than trying anything more complicated.) Signed-off-by: Peter Maydell Message-Id: <1511879725-9576-3-git-send-email-peter.maydell@linaro.org> Signed-off-by: Laurent Vivier --- accel/tcg/translate-all.c | 50 ++++++++++++++++++++++++--------------- accel/tcg/user-exec.c | 13 +++++++++- 2 files changed, 43 insertions(+), 20 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 7736257085..67795cd78c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -2181,29 +2181,41 @@ int page_unprotect(target_ulong address, uintptr_t pc) /* if the page was really writable, then we change its protection back to writable */ - if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) { - host_start = address & qemu_host_page_mask; - host_end = host_start + qemu_host_page_size; - - prot = 0; + if (p->flags & PAGE_WRITE_ORG) { current_tb_invalidated = false; - for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) { - p = page_find(addr >> TARGET_PAGE_BITS); - p->flags |= PAGE_WRITE; - prot |= p->flags; - - /* and since the content will be modified, we must invalidate - the corresponding translated code. */ - current_tb_invalidated |= tb_invalidate_phys_page(addr, pc); -#ifdef CONFIG_USER_ONLY - if (DEBUG_TB_CHECK_GATE) { - tb_invalidate_check(addr); + if (p->flags & PAGE_WRITE) { + /* If the page is actually marked WRITE then assume this is because + * this thread raced with another one which got here first and + * set the page to PAGE_WRITE and did the TB invalidate for us. + */ +#ifdef TARGET_HAS_PRECISE_SMC + TranslationBlock *current_tb = tb_find_pc(pc); + if (current_tb) { + current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID; } #endif - } - mprotect((void *)g2h(host_start), qemu_host_page_size, - prot & PAGE_BITS); + } else { + host_start = address & qemu_host_page_mask; + host_end = host_start + qemu_host_page_size; + prot = 0; + for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) { + p = page_find(addr >> TARGET_PAGE_BITS); + p->flags |= PAGE_WRITE; + prot |= p->flags; + + /* and since the content will be modified, we must invalidate + the corresponding translated code. */ + current_tb_invalidated |= tb_invalidate_phys_page(addr, pc); +#ifdef CONFIG_USER_ONLY + if (DEBUG_TB_CHECK_GATE) { + tb_invalidate_check(addr); + } +#endif + } + mprotect((void *)g2h(host_start), qemu_host_page_size, + prot & PAGE_BITS); + } mmap_unlock(); /* If current TB was invalidated return to main loop */ return current_tb_invalidated ? 2 : 1; diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c index e8f26ff0cb..c973752562 100644 --- a/accel/tcg/user-exec.c +++ b/accel/tcg/user-exec.c @@ -104,7 +104,18 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info, pc, address, is_write, *(unsigned long *)old_set); #endif /* XXX: locking issue */ - if (is_write && h2g_valid(address)) { + /* Note that it is important that we don't call page_unprotect() unless + * this is really a "write to nonwriteable page" fault, because + * page_unprotect() assumes that if it is called for an access to + * a page that's writeable this means we had two threads racing and + * another thread got there first and already made the page writeable; + * so we will retry the access. If we were to call page_unprotect() + * for some other kind of fault that should really be passed to the + * guest, we'd end up in an infinite loop of retrying the faulting + * access. + */ + if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR && + h2g_valid(address)) { switch (page_unprotect(h2g(address), pc)) { case 0: /* Fault not caused by a page marked unwritable to protect From 95d0307cc10ca3df879c1be519f1ad650efb20a8 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 23 Jan 2018 11:53:31 +0100 Subject: [PATCH 13/13] linux-user: implement renameat2 This is needed for new architectures like RISC-V which do not provide any other rename-like syscall. Signed-off-by: Andreas Schwab Reviewed-by: Laurent Vivier Message-Id: Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 104408c050..74378947f0 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -600,6 +600,24 @@ static int sys_utimensat(int dirfd, const char *pathname, #endif #endif /* TARGET_NR_utimensat */ +#ifdef TARGET_NR_renameat2 +#if defined(__NR_renameat2) +#define __NR_sys_renameat2 __NR_renameat2 +_syscall5(int, sys_renameat2, int, oldfd, const char *, old, int, newfd, + const char *, new, unsigned int, flags) +#else +static int sys_renameat2(int oldfd, const char *old, + int newfd, const char *new, int flags) +{ + if (flags == 0) { + return renameat(oldfd, old, newfd, new); + } + errno = ENOSYS; + return -1; +} +#endif +#endif /* TARGET_NR_renameat2 */ + #ifdef CONFIG_INOTIFY #include @@ -8426,6 +8444,22 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } break; #endif +#if defined(TARGET_NR_renameat2) + case TARGET_NR_renameat2: + { + void *p2; + p = lock_user_string(arg2); + p2 = lock_user_string(arg4); + if (!p || !p2) { + ret = -TARGET_EFAULT; + } else { + ret = get_errno(sys_renameat2(arg1, p, arg3, p2, arg5)); + } + unlock_user(p2, arg4, 0); + unlock_user(p, arg2, 0); + } + break; +#endif #ifdef TARGET_NR_mkdir case TARGET_NR_mkdir: if (!(p = lock_user_string(arg1)))