From 5b0c2742c839376b7e03c4654914aaec6a8a7b09 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 1 Aug 2024 22:23:22 +0200 Subject: [PATCH 1/6] linux-user/elfload: Fix pr_pid values in core files Analyzing qemu-produced core dumps of multi-threaded apps runs into: (gdb) info threads [...] 21 Thread 0x3ff83cc0740 (LWP 9295) warning: Couldn't find general-purpose registers in core file. in ?? () The reason is that all pr_pid values are the same, because the same TaskState is used for all CPUs when generating NT_PRSTATUS notes. Fix by using TaskStates associated with individual CPUs. Cc: qemu-stable@nongnu.org Fixes: 243c47066253 ("linux-user/elfload: Write corefile elf header in one block") Signed-off-by: Ilya Leoshkevich Reviewed-by: Richard Henderson Message-ID: <20240801202340.21845-1-iii@linux.ibm.com> Signed-off-by: Richard Henderson --- linux-user/elfload.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 0d4dc1f6d1..b27dd01734 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -4102,8 +4102,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset) bswap_phdr(phdr, 1); } -static void fill_prstatus_note(void *data, const TaskState *ts, - CPUState *cpu, int signr) +static void fill_prstatus_note(void *data, CPUState *cpu, int signr) { /* * Because note memory is only aligned to 4, and target_elf_prstatus @@ -4113,7 +4112,7 @@ static void fill_prstatus_note(void *data, const TaskState *ts, struct target_elf_prstatus prstatus = { .pr_info.si_signo = signr, .pr_cursig = signr, - .pr_pid = ts->ts_tid, + .pr_pid = get_task_state(cpu)->ts_tid, .pr_ppid = getppid(), .pr_pgrp = getpgrp(), .pr_sid = getsid(0), @@ -4428,8 +4427,7 @@ static int elf_core_dump(int signr, const CPUArchState *env) CPU_FOREACH(cpu_iter) { dptr = fill_note(&hptr, NT_PRSTATUS, "CORE", sizeof(struct target_elf_prstatus)); - fill_prstatus_note(dptr, ts, cpu_iter, - cpu_iter == cpu ? signr : 0); + fill_prstatus_note(dptr, cpu_iter, cpu_iter == cpu ? signr : 0); } if (dump_write(fd, header, data_offset) < 0) { From 4ec5ebea078ed3a16d6c7e612ff9c2e04ea73931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 2 Aug 2024 16:54:17 +0200 Subject: [PATCH 2/6] qemu/osdep: Move close_all_open_fds() to oslib-posix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move close_all_open_fds() in oslib-posix, rename it qemu_close_all_open_fds() and export it. Signed-off-by: Clément Léger Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240802145423.3232974-2-cleger@rivosinc.com> Signed-off-by: Richard Henderson --- include/qemu/osdep.h | 7 +++++++ system/async-teardown.c | 37 +------------------------------------ util/oslib-posix.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 720ed21a7e..de77c5c254 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -757,6 +757,13 @@ static inline void qemu_reset_optind(void) int qemu_fdatasync(int fd); +/** + * qemu_close_all_open_fd: + * + * Close all open file descriptors + */ +void qemu_close_all_open_fd(void); + /** * Sync changes made to the memory mapped file back to the backing * storage. For POSIX compliant systems this will fallback diff --git a/system/async-teardown.c b/system/async-teardown.c index 396963c091..edf49e1007 100644 --- a/system/async-teardown.c +++ b/system/async-teardown.c @@ -26,40 +26,6 @@ static pid_t the_ppid; -/* - * Close all open file descriptors. - */ -static void close_all_open_fd(void) -{ - struct dirent *de; - int fd, dfd; - DIR *dir; - -#ifdef CONFIG_CLOSE_RANGE - int r = close_range(0, ~0U, 0); - if (!r) { - /* Success, no need to try other ways. */ - return; - } -#endif - - dir = opendir("/proc/self/fd"); - if (!dir) { - /* If /proc is not mounted, there is nothing that can be done. */ - return; - } - /* Avoid closing the directory. */ - dfd = dirfd(dir); - - for (de = readdir(dir); de; de = readdir(dir)) { - fd = atoi(de->d_name); - if (fd != dfd) { - close(fd); - } - } - closedir(dir); -} - static void hup_handler(int signal) { /* Check every second if this process has been reparented. */ @@ -85,9 +51,8 @@ static int async_teardown_fn(void *arg) /* * Close all file descriptors that might have been inherited from the * main qemu process when doing clone, needed to make libvirt happy. - * Not using close_range for increased compatibility with older kernels. */ - close_all_open_fd(); + qemu_close_all_open_fd(); /* Set up a handler for SIGHUP and unblock SIGHUP. */ sigaction(SIGHUP, &sa, NULL); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index b090fe0eed..1e867efa47 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -807,3 +807,37 @@ int qemu_msync(void *addr, size_t length, int fd) return msync(addr, length, MS_SYNC); } + +/* + * Close all open file descriptors. + */ +void qemu_close_all_open_fd(void) +{ + struct dirent *de; + int fd, dfd; + DIR *dir; + +#ifdef CONFIG_CLOSE_RANGE + int r = close_range(0, ~0U, 0); + if (!r) { + /* Success, no need to try other ways. */ + return; + } +#endif + + dir = opendir("/proc/self/fd"); + if (!dir) { + /* If /proc is not mounted, there is nothing that can be done. */ + return; + } + /* Avoid closing the directory. */ + dfd = dirfd(dir); + + for (de = readdir(dir); de; de = readdir(dir)) { + fd = atoi(de->d_name); + if (fd != dfd) { + close(fd); + } + } + closedir(dir); +} From ffa28f9cf503e22bfe621b062d29cbdb9a0aa786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 2 Aug 2024 16:54:18 +0200 Subject: [PATCH 3/6] qemu/osdep: Split qemu_close_all_open_fd() and add fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make it cleaner, split qemu_close_all_open_fd() logic into multiple subfunctions (close with close_range(), with /proc/self/fd and fallback). Signed-off-by: Clément Léger Reviewed-by: Richard Henderson Message-ID: <20240802145423.3232974-3-cleger@rivosinc.com> Signed-off-by: Richard Henderson --- util/oslib-posix.c | 50 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 1e867efa47..9b79fc7cff 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -808,27 +808,16 @@ int qemu_msync(void *addr, size_t length, int fd) return msync(addr, length, MS_SYNC); } -/* - * Close all open file descriptors. - */ -void qemu_close_all_open_fd(void) +static bool qemu_close_all_open_fd_proc(void) { struct dirent *de; int fd, dfd; DIR *dir; -#ifdef CONFIG_CLOSE_RANGE - int r = close_range(0, ~0U, 0); - if (!r) { - /* Success, no need to try other ways. */ - return; - } -#endif - dir = opendir("/proc/self/fd"); if (!dir) { /* If /proc is not mounted, there is nothing that can be done. */ - return; + return false; } /* Avoid closing the directory. */ dfd = dirfd(dir); @@ -840,4 +829,39 @@ void qemu_close_all_open_fd(void) } } closedir(dir); + + return true; +} + +static bool qemu_close_all_open_fd_close_range(void) +{ +#ifdef CONFIG_CLOSE_RANGE + int r = close_range(0, ~0U, 0); + if (!r) { + /* Success, no need to try other ways. */ + return true; + } +#endif + return false; +} + +static void qemu_close_all_open_fd_fallback(void) +{ + int open_max = sysconf(_SC_OPEN_MAX), i; + + /* Fallback */ + for (i = 0; i < open_max; i++) { + close(i); + } +} + +/* + * Close all open file descriptors. + */ +void qemu_close_all_open_fd(void) +{ + if (!qemu_close_all_open_fd_close_range() && + !qemu_close_all_open_fd_proc()) { + qemu_close_all_open_fd_fallback(); + } } From a9b5d6e536c2c728a059b36ad434322dd9329c89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 2 Aug 2024 16:54:19 +0200 Subject: [PATCH 4/6] net/tap: Factorize fd closing after forking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same code is used twice to actually close all open file descriptors after forking. Factorize it in a single place. Signed-off-by: Clément Léger Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20240802145423.3232974-4-cleger@rivosinc.com> Signed-off-by: Richard Henderson --- net/tap.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/net/tap.c b/net/tap.c index 51f7aec39d..7b2d5d5703 100644 --- a/net/tap.c +++ b/net/tap.c @@ -385,6 +385,17 @@ static TAPState *net_tap_fd_init(NetClientState *peer, return s; } +static void close_all_fds_after_fork(int excluded_fd) +{ + int open_max = sysconf(_SC_OPEN_MAX), i; + + for (i = 3; i < open_max; i++) { + if (i != excluded_fd) { + close(i); + } + } +} + static void launch_script(const char *setup_script, const char *ifname, int fd, Error **errp) { @@ -400,13 +411,7 @@ static void launch_script(const char *setup_script, const char *ifname, return; } if (pid == 0) { - int open_max = sysconf(_SC_OPEN_MAX), i; - - for (i = 3; i < open_max; i++) { - if (i != fd) { - close(i); - } - } + close_all_fds_after_fork(fd); parg = args; *parg++ = (char *)setup_script; *parg++ = (char *)ifname; @@ -490,17 +495,11 @@ static int net_bridge_run_helper(const char *helper, const char *bridge, return -1; } if (pid == 0) { - int open_max = sysconf(_SC_OPEN_MAX), i; char *fd_buf = NULL; char *br_buf = NULL; char *helper_cmd = NULL; - for (i = 3; i < open_max; i++) { - if (i != sv[1]) { - close(i); - } - } - + close_all_fds_after_fork(sv[1]); fd_buf = g_strdup_printf("%s%d", "--fd=", sv[1]); if (strrchr(helper, ' ') || strrchr(helper, '\t')) { From 7532ca570a449bafe990a00f21ae41bff7709845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 2 Aug 2024 16:54:20 +0200 Subject: [PATCH 5/6] qemu/osdep: Add excluded fd parameter to qemu_close_all_open_fd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order for this function to be usable by tap.c code, add a list of file descriptors that should not be closed. Signed-off-by: Clément Léger Message-ID: <20240802145423.3232974-5-cleger@rivosinc.com> [rth: Use max_fd in qemu_close_all_open_fd_close_range] Signed-off-by: Richard Henderson --- include/qemu/osdep.h | 8 +++- system/async-teardown.c | 2 +- util/oslib-posix.c | 98 ++++++++++++++++++++++++++++++++++------- 3 files changed, 89 insertions(+), 19 deletions(-) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index de77c5c254..4cc4c32b14 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -760,9 +760,13 @@ int qemu_fdatasync(int fd); /** * qemu_close_all_open_fd: * - * Close all open file descriptors + * Close all open file descriptors except the ones supplied in the @skip array + * + * @skip: ordered array of distinct file descriptors that should not be closed + * if any, or NULL. + * @nskip: number of entries in the @skip array or 0 if @skip is NULL. */ -void qemu_close_all_open_fd(void); +void qemu_close_all_open_fd(const int *skip, unsigned int nskip); /** * Sync changes made to the memory mapped file back to the backing diff --git a/system/async-teardown.c b/system/async-teardown.c index edf49e1007..9148ee8d04 100644 --- a/system/async-teardown.c +++ b/system/async-teardown.c @@ -52,7 +52,7 @@ static int async_teardown_fn(void *arg) * Close all file descriptors that might have been inherited from the * main qemu process when doing clone, needed to make libvirt happy. */ - qemu_close_all_open_fd(); + qemu_close_all_open_fd(NULL, 0); /* Set up a handler for SIGHUP and unblock SIGHUP. */ sigaction(SIGHUP, &sa, NULL); diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 9b79fc7cff..11b35e48fb 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -808,11 +808,12 @@ int qemu_msync(void *addr, size_t length, int fd) return msync(addr, length, MS_SYNC); } -static bool qemu_close_all_open_fd_proc(void) +static bool qemu_close_all_open_fd_proc(const int *skip, unsigned int nskip) { struct dirent *de; int fd, dfd; DIR *dir; + unsigned int skip_start = 0, skip_end = nskip; dir = opendir("/proc/self/fd"); if (!dir) { @@ -823,8 +824,33 @@ static bool qemu_close_all_open_fd_proc(void) dfd = dirfd(dir); for (de = readdir(dir); de; de = readdir(dir)) { + bool close_fd = true; + + if (de->d_name[0] == '.') { + continue; + } fd = atoi(de->d_name); - if (fd != dfd) { + if (fd == dfd) { + continue; + } + + for (unsigned int i = skip_start; i < skip_end; i++) { + if (fd < skip[i]) { + /* We are below the next skipped fd, break */ + break; + } else if (fd == skip[i]) { + close_fd = false; + /* Restrict the range as we found fds matching start/end */ + if (i == skip_start) { + skip_start++; + } else if (i == skip_end) { + skip_end--; + } + break; + } + } + + if (close_fd) { close(fd); } } @@ -833,24 +859,60 @@ static bool qemu_close_all_open_fd_proc(void) return true; } -static bool qemu_close_all_open_fd_close_range(void) +static bool qemu_close_all_open_fd_close_range(const int *skip, + unsigned int nskip, + int open_max) { #ifdef CONFIG_CLOSE_RANGE - int r = close_range(0, ~0U, 0); - if (!r) { - /* Success, no need to try other ways. */ - return true; - } -#endif + int max_fd = open_max - 1; + int first = 0, last; + unsigned int cur_skip = 0; + int ret; + + do { + /* Find the start boundary of the range to close */ + while (cur_skip < nskip && first == skip[cur_skip]) { + cur_skip++; + first++; + } + + /* Find the upper boundary of the range to close */ + last = max_fd; + if (cur_skip < nskip) { + last = skip[cur_skip] - 1; + last = MIN(last, max_fd); + } + + /* With the adjustments to the range, we might be done. */ + if (first > last) { + break; + } + + ret = close_range(first, last, 0); + if (ret < 0) { + return false; + } + + first = last + 1; + } while (last < max_fd); + + return true; +#else return false; +#endif } -static void qemu_close_all_open_fd_fallback(void) +static void qemu_close_all_open_fd_fallback(const int *skip, unsigned int nskip, + int open_max) { - int open_max = sysconf(_SC_OPEN_MAX), i; + unsigned int cur_skip = 0; /* Fallback */ - for (i = 0; i < open_max; i++) { + for (int i = 0; i < open_max; i++) { + if (cur_skip < nskip && i == skip[cur_skip]) { + cur_skip++; + continue; + } close(i); } } @@ -858,10 +920,14 @@ static void qemu_close_all_open_fd_fallback(void) /* * Close all open file descriptors. */ -void qemu_close_all_open_fd(void) +void qemu_close_all_open_fd(const int *skip, unsigned int nskip) { - if (!qemu_close_all_open_fd_close_range() && - !qemu_close_all_open_fd_proc()) { - qemu_close_all_open_fd_fallback(); + int open_max = sysconf(_SC_OPEN_MAX); + + assert(skip != NULL || nskip == 0); + + if (!qemu_close_all_open_fd_close_range(skip, nskip, open_max) && + !qemu_close_all_open_fd_proc(skip, nskip)) { + qemu_close_all_open_fd_fallback(skip, nskip, open_max); } } From 9996a35c6433c0e019a1c05791299db5e63a5db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Fri, 2 Aug 2024 16:54:21 +0200 Subject: [PATCH 6/6] net/tap: Use qemu_close_all_open_fd() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a slow implementation to close all open fd after forking, use qemu_close_all_open_fd(). Signed-off-by: Clément Léger Reviewed-by: Richard Henderson Message-ID: <20240802145423.3232974-6-cleger@rivosinc.com> Signed-off-by: Richard Henderson --- net/tap.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/tap.c b/net/tap.c index 7b2d5d5703..3f90022c0b 100644 --- a/net/tap.c +++ b/net/tap.c @@ -387,13 +387,20 @@ static TAPState *net_tap_fd_init(NetClientState *peer, static void close_all_fds_after_fork(int excluded_fd) { - int open_max = sysconf(_SC_OPEN_MAX), i; + const int skip_fd[] = {STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO, + excluded_fd}; + unsigned int nskip = ARRAY_SIZE(skip_fd); - for (i = 3; i < open_max; i++) { - if (i != excluded_fd) { - close(i); - } + /* + * skip_fd must be an ordered array of distinct fds, exclude + * excluded_fd if already included in the [STDIN_FILENO - STDERR_FILENO] + * range + */ + if (excluded_fd <= STDERR_FILENO) { + nskip--; } + + qemu_close_all_open_fd(skip_fd, nskip); } static void launch_script(const char *setup_script, const char *ifname,