From cc5ab87200257199eba91aba9baf141ae0e91d0c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 2 Nov 2020 10:30:09 -0500 Subject: [PATCH 01/12] migration: Unify reset of last_rb on destination node when recover When postcopy recover happens, we need to reset last_rb after each return of postcopy_pause_fault_thread() because that means we just got the postcopy migration continued. Unify this reset to the place right before we want to kick the fault thread again, when we get the command MIG_CMD_POSTCOPY_RESUME from source. This is actually more than that - because the main thread on destination will now be able to call migrate_send_rp_req_pages_pending() too, so the fault thread is not the only user of last_rb now. Move the reset earlier will allow the first call to migrate_send_rp_req_pages_pending() to use the reset value even if called from the main thread. (NOTE: this is not a real fix to 0c26781c09 mentioned below, however it is just a mark that when picking up 0c26781c09 we'd better have this one too; the real fix will come later) Fixes: 0c26781c09 ("migration: Sync requested pages after postcopy recovery") Tested-by: Christian Schoenebeck Signed-off-by: Peter Xu Reviewed-by: Dr. David Alan Gilbert Message-Id: <20201102153010.11979-2-peterx@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- migration/postcopy-ram.c | 2 -- migration/savevm.c | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index d3bb3a744b..d99842eb1b 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -903,7 +903,6 @@ static void *postcopy_ram_fault_thread(void *opaque) * the channel is rebuilt. */ if (postcopy_pause_fault_thread(mis)) { - mis->last_rb = NULL; /* Continue to read the userfaultfd */ } else { error_report("%s: paused but don't allow to continue", @@ -985,7 +984,6 @@ retry: /* May be network failure, try to wait for recovery */ if (ret == -EIO && postcopy_pause_fault_thread(mis)) { /* We got reconnected somehow, try to continue */ - mis->last_rb = NULL; goto retry; } else { /* This is a unavoidable fault */ diff --git a/migration/savevm.c b/migration/savevm.c index 21ccba9fb3..e8834991ec 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2061,6 +2061,12 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) return 0; } + /* + * Reset the last_rb before we resend any page req to source again, since + * the source should have it reset already. + */ + mis->last_rb = NULL; + /* * This means source VM is ready to resume the postcopy migration. * It's time to switch state and release the fault thread to From 5e773431134a6c3f65c0088d578384577c4a3f2c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 2 Nov 2020 10:30:10 -0500 Subject: [PATCH 02/12] migration: Postpone the kick of the fault thread after recover The new migrate_send_rp_req_pages_pending() call should greatly improve destination responsiveness because it will resync faulted address after postcopy recovery. However it is also the 1st place to initiate the page request from the main thread. One thing is overlooked on that migrate_send_rp_message_req_pages() is not designed to be thread-safe. So if we wake the fault thread before syncing all the faulted pages in the main thread, it means they can race. Postpone the wake up operation after the sync of faulted addresses. Fixes: 0c26781c09 ("migration: Sync requested pages after postcopy recovery") Tested-by: Christian Schoenebeck Signed-off-by: Peter Xu Message-Id: <20201102153010.11979-3-peterx@redhat.com> Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- migration/savevm.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/migration/savevm.c b/migration/savevm.c index e8834991ec..5f937a2762 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2069,12 +2069,9 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) /* * This means source VM is ready to resume the postcopy migration. - * It's time to switch state and release the fault thread to - * continue service page faults. */ migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVER, MIGRATION_STATUS_POSTCOPY_ACTIVE); - qemu_sem_post(&mis->postcopy_pause_sem_fault); trace_loadvm_postcopy_handle_resume(); @@ -2095,6 +2092,14 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) */ migrate_send_rp_req_pages_pending(mis); + /* + * It's time to switch state and release the fault thread to continue + * service page faults. Note that this should be explicitly after the + * above call to migrate_send_rp_req_pages_pending(). In short: + * migrate_send_rp_message_req_pages() is not thread safe, yet. + */ + qemu_sem_post(&mis->postcopy_pause_sem_fault); + return 0; } From dcaac9f124833e62e720d0733aa53453f438b7d7 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 2 Nov 2020 15:07:50 +0000 Subject: [PATCH 03/12] virtiofsd: Seccomp: Add 'send' for syslog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ppc, and some other archs, it looks like syslog ends up using 'send' rather than 'sendto'. Reference: https://github.com/kata-containers/kata-containers/issues/1050 Reported-by: amulmek1@in.ibm.com Signed-off-by: Dr. David Alan Gilbert Message-Id: <20201102150750.34565-1-dgilbert@redhat.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_seccomp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index eb9af8265f..672fb72a31 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -118,6 +118,7 @@ static const int syscall_whitelist[] = { /* Syscalls used when --syslog is enabled */ static const int syscall_whitelist_syslog[] = { + SCMP_SYS(send), SCMP_SYS(sendto), }; From 2693026042d2f6f5749babb8827b8098ed5d7eb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 2 Nov 2020 10:23:39 +0100 Subject: [PATCH 04/12] tools/virtiofsd: Check vu_init() return value (CID 1435958) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 6f5fd837889, vu_init() can fail if malloc() returns NULL. This fixes the following Coverity warning: CID 1435958 (#1 of 1): Unchecked return value (CHECKED_RETURN) Fixes: 6f5fd837889 ("libvhost-user: support many virtqueues") Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20201102092339.2034297-1-philmd@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_virtio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c index 324936948d..83ba07c6cd 100644 --- a/tools/virtiofsd/fuse_virtio.c +++ b/tools/virtiofsd/fuse_virtio.c @@ -1013,8 +1013,11 @@ int virtio_session_mount(struct fuse_session *se) se->vu_socketfd = data_sock; se->virtio_dev->se = se; pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL); - vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL, - fv_set_watch, fv_remove_watch, &fv_iface); + if (!vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL, + fv_set_watch, fv_remove_watch, &fv_iface)) { + fuse_log(FUSE_LOG_ERR, "%s: vu_init failed\n", __func__); + return -1; + } return 0; } From 0429eaf518be1d4742356056e6c886b7f9bc9712 Mon Sep 17 00:00:00 2001 From: Jiachen Zhang Date: Tue, 27 Oct 2020 16:15:58 +0800 Subject: [PATCH 05/12] virtiofsd: Fix the help message of posix lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 88fc107956a5812649e5918e0c092d3f78bb28ad disabled remote posix locks by default. But the --help message still says it is enabled by default. So fix it to output no_posix_lock. Signed-off-by: Jiachen Zhang Message-Id: <20201027081558.29904-1-zhangjiachen.jaycee@bytedance.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index 2e181a49b5..ae1b22e4a6 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -161,7 +161,7 @@ void fuse_cmdline_help(void) " allowed (default: 10)\n" " -o posix_lock|no_posix_lock\n" " enable/disable remote posix lock\n" - " default: posix_lock\n" + " default: no_posix_lock\n" " -o readdirplus|no_readdirplus\n" " enable/disable readirplus\n" " default: readdirplus except with " From 9c6ac0436356132da97c4bbb7f6c90b062438176 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:53 +0100 Subject: [PATCH 06/12] virtiofsd: Check FUSE_SUBMOUNTS FUSE_SUBMOUNTS is a pure indicator by the kernel to signal that it supports submounts. It does not check its state in the init reply, so there is nothing for fuse_lowlevel.c to do but to check its existence and copy it into fuse_conn_info.capable. Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Message-Id: <20201102161859.156603-2-mreitz@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_common.h | 7 +++++++ tools/virtiofsd/fuse_lowlevel.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/tools/virtiofsd/fuse_common.h b/tools/virtiofsd/fuse_common.h index 686c42c0a5..5aee5193eb 100644 --- a/tools/virtiofsd/fuse_common.h +++ b/tools/virtiofsd/fuse_common.h @@ -352,6 +352,13 @@ struct fuse_file_info { */ #define FUSE_CAP_NO_OPENDIR_SUPPORT (1 << 24) +/** + * Indicates that the kernel supports the FUSE_ATTR_SUBMOUNT flag. + * + * Setting (or unsetting) this flag in the `want` field has *no effect*. + */ +#define FUSE_CAP_SUBMOUNTS (1 << 27) + /** * Ioctl flags * diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 4d1ba2925d..370222339b 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -1988,6 +1988,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, bufsize = max_bufsize; } } + if (arg->flags & FUSE_SUBMOUNTS) { + se->conn.capable |= FUSE_CAP_SUBMOUNTS; + } #ifdef HAVE_SPLICE #ifdef HAVE_VMSPLICE se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE; From 93e79851abb78fe36882232b84edc795d1d942fd Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:54 +0100 Subject: [PATCH 07/12] virtiofsd: Add attr_flags to fuse_entry_param fuse_entry_param is converted to fuse_attr on the line (by fill_entry()), so it should have a member that mirrors fuse_attr.flags. fill_entry() should then copy this fuse_entry_param.attr_flags to fuse_attr.flags. Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Message-Id: <20201102161859.156603-3-mreitz@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/fuse_lowlevel.c | 2 ++ tools/virtiofsd/fuse_lowlevel.h | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c index 370222339b..c70fb16a9a 100644 --- a/tools/virtiofsd/fuse_lowlevel.c +++ b/tools/virtiofsd/fuse_lowlevel.c @@ -341,6 +341,8 @@ static void fill_entry(struct fuse_entry_out *arg, .attr_valid_nsec = calc_timeout_nsec(e->attr_timeout), }; convert_stat(&e->attr, &arg->attr); + + arg->attr.flags = e->attr_flags; } /* diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h index 562fd5241e..9c06240f9e 100644 --- a/tools/virtiofsd/fuse_lowlevel.h +++ b/tools/virtiofsd/fuse_lowlevel.h @@ -102,6 +102,11 @@ struct fuse_entry_param { * large value. */ double entry_timeout; + + /** + * Flags for fuse_attr.flags that do not fit into attr. + */ + uint32_t attr_flags; }; /** From 84e319a575d04e9d53c24d801572a3ead538f9e4 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:55 +0100 Subject: [PATCH 08/12] meson.build: Check for statx() Check whether the glibc provides statx() and if so, define CONFIG_STATX. Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Message-Id: <20201102161859.156603-4-mreitz@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- meson.build | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/meson.build b/meson.build index 47e32e1fcb..39ac5cf6d8 100644 --- a/meson.build +++ b/meson.build @@ -736,6 +736,21 @@ if not has_malloc_trim and get_option('malloc_trim').enabled() endif endif +# Check whether the glibc provides statx() + +statx_test = ''' + #ifndef _GNU_SOURCE + #define _GNU_SOURCE + #endif + #include + int main(void) { + struct statx statxbuf; + statx(0, "", 0, STATX_BASIC_STATS, &statxbuf); + return 0; + }''' + +has_statx = cc.links(statx_test) + ################# # config-host.h # ################# @@ -768,6 +783,7 @@ config_host_data.set('CONFIG_XKBCOMMON', xkbcommon.found()) config_host_data.set('CONFIG_KEYUTILS', keyutils.found()) config_host_data.set('CONFIG_GETTID', has_gettid) config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) +config_host_data.set('CONFIG_STATX', has_statx) config_host_data.set('QEMU_VERSION', '"@0@"'.format(meson.project_version())) config_host_data.set('QEMU_VERSION_MAJOR', meson.project_version().split('.')[0]) config_host_data.set('QEMU_VERSION_MINOR', meson.project_version().split('.')[1]) From d672fce6baa0a02dfa64d717fb072c06e301d695 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:56 +0100 Subject: [PATCH 09/12] virtiofsd: Add mount ID to the lo_inode key Using st_dev is not sufficient to uniquely identify a mount: You can mount the same device twice, but those are still separate trees, and e.g. by mounting something else inside one of them, they may differ. Using statx(), we can get a mount ID that uniquely identifies a mount. If that is available, add it to the lo_inode key. Most of this patch is taken from Miklos's mail here: https://marc.info/?l=fuse-devel&m=160062521827983 (virtiofsd-use-mount-id.patch attachment) Suggested-by: Miklos Szeredi Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Message-Id: <20201102161859.156603-5-mreitz@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/passthrough_ll.c | 95 ++++++++++++++++++++++++--- tools/virtiofsd/passthrough_seccomp.c | 1 + 2 files changed, 86 insertions(+), 10 deletions(-) diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index a0beb986f3..34d107975f 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -94,6 +94,7 @@ struct lo_map { struct lo_key { ino_t ino; dev_t dev; + uint64_t mnt_id; }; struct lo_inode { @@ -166,6 +167,7 @@ struct lo_data { int readdirplus_set; int readdirplus_clear; int allow_direct_io; + bool use_statx; struct lo_inode root; GHashTable *inodes; /* protected by lo->mutex */ struct lo_map ino_map; /* protected by lo->mutex */ @@ -219,7 +221,8 @@ static struct { /* That we loaded cap-ng in the current thread from the saved */ static __thread bool cap_loaded = 0; -static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); +static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, + uint64_t mnt_id); static int is_dot_or_dotdot(const char *name) { @@ -741,12 +744,14 @@ out_err: fuse_reply_err(req, saverr); } -static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) +static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, + uint64_t mnt_id) { struct lo_inode *p; struct lo_key key = { .ino = st->st_ino, .dev = st->st_dev, + .mnt_id = mnt_id, }; pthread_mutex_lock(&lo->mutex); @@ -774,6 +779,60 @@ static void posix_locks_value_destroy(gpointer data) free(plock); } +static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, + struct stat *statbuf, int flags, uint64_t *mnt_id) +{ + int res; + +#if defined(CONFIG_STATX) && defined(STATX_MNT_ID) + if (lo->use_statx) { + struct statx statxbuf; + + res = statx(dirfd, pathname, flags, STATX_BASIC_STATS | STATX_MNT_ID, + &statxbuf); + if (!res) { + memset(statbuf, 0, sizeof(*statbuf)); + statbuf->st_dev = makedev(statxbuf.stx_dev_major, + statxbuf.stx_dev_minor); + statbuf->st_ino = statxbuf.stx_ino; + statbuf->st_mode = statxbuf.stx_mode; + statbuf->st_nlink = statxbuf.stx_nlink; + statbuf->st_uid = statxbuf.stx_uid; + statbuf->st_gid = statxbuf.stx_gid; + statbuf->st_rdev = makedev(statxbuf.stx_rdev_major, + statxbuf.stx_rdev_minor); + statbuf->st_size = statxbuf.stx_size; + statbuf->st_blksize = statxbuf.stx_blksize; + statbuf->st_blocks = statxbuf.stx_blocks; + statbuf->st_atim.tv_sec = statxbuf.stx_atime.tv_sec; + statbuf->st_atim.tv_nsec = statxbuf.stx_atime.tv_nsec; + statbuf->st_mtim.tv_sec = statxbuf.stx_mtime.tv_sec; + statbuf->st_mtim.tv_nsec = statxbuf.stx_mtime.tv_nsec; + statbuf->st_ctim.tv_sec = statxbuf.stx_ctime.tv_sec; + statbuf->st_ctim.tv_nsec = statxbuf.stx_ctime.tv_nsec; + + if (statxbuf.stx_mask & STATX_MNT_ID) { + *mnt_id = statxbuf.stx_mnt_id; + } else { + *mnt_id = 0; + } + return 0; + } else if (errno != ENOSYS) { + return -1; + } + lo->use_statx = false; + /* fallback */ + } +#endif + res = fstatat(dirfd, pathname, statbuf, flags); + if (res == -1) { + return -1; + } + *mnt_id = 0; + + return 0; +} + /* * Increments nlookup and caller must release refcount using * lo_inode_put(&parent). @@ -784,6 +843,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, int newfd; int res; int saverr; + uint64_t mnt_id; struct lo_data *lo = lo_data(req); struct lo_inode *inode = NULL; struct lo_inode *dir = lo_inode(req, parent); @@ -811,12 +871,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, goto out_err; } - res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + res = do_statx(lo, newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + &mnt_id); if (res == -1) { goto out_err; } - inode = lo_find(lo, &e->attr); + inode = lo_find(lo, &e->attr, mnt_id); if (inode) { close(newfd); } else { @@ -838,6 +899,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, inode->fd = newfd; inode->key.ino = e->attr.st_ino; inode->key.dev = e->attr.st_dev; + inode->key.mnt_id = mnt_id; pthread_mutex_init(&inode->plock_mutex, NULL); inode->posix_locks = g_hash_table_new_full( g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); @@ -1090,15 +1152,23 @@ static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, const char *name) { int res; + uint64_t mnt_id; struct stat attr; + struct lo_data *lo = lo_data(req); + struct lo_inode *dir = lo_inode(req, parent); - res = fstatat(lo_fd(req, parent), name, &attr, - AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (!dir) { + return NULL; + } + + res = do_statx(lo, dir->fd, name, &attr, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, &mnt_id); + lo_inode_put(lo, &dir); if (res == -1) { return NULL; } - return lo_find(lo_data(req), &attr); + return lo_find(lo, &attr, mnt_id); } static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) @@ -3266,6 +3336,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) { int fd, res; struct stat stat; + uint64_t mnt_id; fd = open("/", O_PATH); if (fd == -1) { @@ -3273,7 +3344,8 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) exit(1); } - res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + res = do_statx(lo, fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, + &mnt_id); if (res == -1) { fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); exit(1); @@ -3283,6 +3355,7 @@ static void setup_root(struct lo_data *lo, struct lo_inode *root) root->fd = fd; root->key.ino = stat.st_ino; root->key.dev = stat.st_dev; + root->key.mnt_id = mnt_id; root->nlookup = 2; g_atomic_int_set(&root->refcount, 2); } @@ -3291,7 +3364,7 @@ static guint lo_key_hash(gconstpointer key) { const struct lo_key *lkey = key; - return (guint)lkey->ino + (guint)lkey->dev; + return (guint)lkey->ino + (guint)lkey->dev + (guint)lkey->mnt_id; } static gboolean lo_key_equal(gconstpointer a, gconstpointer b) @@ -3299,7 +3372,7 @@ static gboolean lo_key_equal(gconstpointer a, gconstpointer b) const struct lo_key *la = a; const struct lo_key *lb = b; - return la->ino == lb->ino && la->dev == lb->dev; + return la->ino == lb->ino && la->dev == lb->dev && la->mnt_id == lb->mnt_id; } static void fuse_lo_data_cleanup(struct lo_data *lo) @@ -3445,6 +3518,8 @@ int main(int argc, char *argv[]) exit(1); } + lo.use_statx = true; + se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); if (se == NULL) { goto err_out1; diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c index 672fb72a31..11623f56f2 100644 --- a/tools/virtiofsd/passthrough_seccomp.c +++ b/tools/virtiofsd/passthrough_seccomp.c @@ -76,6 +76,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(mremap), SCMP_SYS(munmap), SCMP_SYS(newfstatat), + SCMP_SYS(statx), SCMP_SYS(open), SCMP_SYS(openat), SCMP_SYS(ppoll), From 9d82f6a3e68c2986633ad11687da2193e9acb0cb Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:57 +0100 Subject: [PATCH 10/12] virtiofsd: Announce sub-mount points Whenever we encounter a directory with an st_dev or mount ID that differs from that of its parent, we set the FUSE_ATTR_SUBMOUNT flag so the guest can create a submount for it. We only need to do so in lo_do_lookup(). The following functions return a fuse_attr object: - lo_create(), though fuse_reply_create(): Calls lo_do_lookup(). - lo_lookup(), though fuse_reply_entry(): Calls lo_do_lookup(). - lo_mknod_symlink(), through fuse_reply_entry(): Calls lo_do_lookup(). - lo_link(), through fuse_reply_entry(): Creating a link cannot create a submount, so there is no need to check for it. - lo_getattr(), through fuse_reply_attr(): Announcing submounts when the node is first detected (at lookup) is sufficient. We do not need to return the submount attribute later. - lo_do_readdir(), through fuse_add_direntry_plus(): Calls lo_do_lookup(). Make announcing submounts optional, so submounts are only announced to the guest with the announce_submounts option. Some users may prefer the current behavior, so that the guest learns nothing about the host mount structure. (announce_submounts is force-disabled when the guest does not present the FUSE_SUBMOUNTS capability, or when there is no statx().) Signed-off-by: Max Reitz Reviewed-by: Stefan Hajnoczi Message-Id: <20201102161859.156603-6-mreitz@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tools/virtiofsd/helper.c | 1 + tools/virtiofsd/passthrough_ll.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c index ae1b22e4a6..75ac48dec2 100644 --- a/tools/virtiofsd/helper.c +++ b/tools/virtiofsd/helper.c @@ -190,6 +190,7 @@ void fuse_cmdline_help(void) " retain/discard O_DIRECT flags passed down\n" " to virtiofsd from guest applications.\n" " default: no_allow_direct_io\n" + " -o announce_submounts Announce sub-mount points to the guest\n" ); } diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c index 34d107975f..ec1008bceb 100644 --- a/tools/virtiofsd/passthrough_ll.c +++ b/tools/virtiofsd/passthrough_ll.c @@ -40,6 +40,7 @@ #include "fuse_virtio.h" #include "fuse_log.h" #include "fuse_lowlevel.h" +#include "standard-headers/linux/fuse.h" #include #include #include @@ -167,6 +168,7 @@ struct lo_data { int readdirplus_set; int readdirplus_clear; int allow_direct_io; + int announce_submounts; bool use_statx; struct lo_inode root; GHashTable *inodes; /* protected by lo->mutex */ @@ -207,6 +209,7 @@ static const struct fuse_opt lo_opts[] = { { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, + { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 }, FUSE_OPT_END }; static bool use_syslog = false; @@ -601,6 +604,20 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn) fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); conn->want &= ~FUSE_CAP_READDIRPLUS; } + + if (!(conn->capable & FUSE_CAP_SUBMOUNTS) && lo->announce_submounts) { + fuse_log(FUSE_LOG_WARNING, "lo_init: Cannot announce submounts, client " + "does not support it\n"); + lo->announce_submounts = false; + } + +#ifndef CONFIG_STATX + if (lo->announce_submounts) { + fuse_log(FUSE_LOG_WARNING, "lo_init: Cannot announce submounts, there " + "is no statx()\n"); + lo->announce_submounts = false; + } +#endif } static void lo_getattr(fuse_req_t req, fuse_ino_t ino, @@ -877,6 +894,11 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, goto out_err; } + if (S_ISDIR(e->attr.st_mode) && lo->announce_submounts && + (e->attr.st_dev != dir->key.dev || mnt_id != dir->key.mnt_id)) { + e->attr_flags |= FUSE_ATTR_SUBMOUNT; + } + inode = lo_find(lo, &e->attr, mnt_id); if (inode) { close(newfd); From 0a7798037acb65cf4e9d64372690a47018e60d02 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:58 +0100 Subject: [PATCH 11/12] tests/acceptance/boot_linux: Accept SSH pubkey MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let download_cloudinit() take an optional pubkey, which subclasses of BootLinux can pass through setUp(). Signed-off-by: Max Reitz Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Willian Rampazzo Reviewed-by: Stefan Hajnoczi Message-Id: <20201102161859.156603-7-mreitz@redhat.com> Signed-off-by: Dr. David Alan Gilbert --- tests/acceptance/boot_linux.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/acceptance/boot_linux.py b/tests/acceptance/boot_linux.py index c743e231f4..1da4a53d6a 100644 --- a/tests/acceptance/boot_linux.py +++ b/tests/acceptance/boot_linux.py @@ -57,7 +57,7 @@ class BootLinuxBase(Test): self.cancel('Failed to download/prepare boot image') return boot.path - def download_cloudinit(self): + def download_cloudinit(self, ssh_pubkey=None): self.log.info('Preparing cloudinit image') try: cloudinit_iso = os.path.join(self.workdir, 'cloudinit.iso') @@ -67,7 +67,8 @@ class BootLinuxBase(Test): password='password', # QEMU's hard coded usermode router address phone_home_host='10.0.2.2', - phone_home_port=self.phone_home_port) + phone_home_port=self.phone_home_port, + authorized_key=ssh_pubkey) except Exception: self.cancel('Failed to prepared cloudinit image') return cloudinit_iso @@ -80,19 +81,19 @@ class BootLinux(BootLinuxBase): timeout = 900 chksum = None - def setUp(self): + def setUp(self, ssh_pubkey=None): super(BootLinux, self).setUp() self.vm.add_args('-smp', '2') self.vm.add_args('-m', '1024') self.prepare_boot() - self.prepare_cloudinit() + self.prepare_cloudinit(ssh_pubkey) def prepare_boot(self): path = self.download_boot() self.vm.add_args('-drive', 'file=%s' % path) - def prepare_cloudinit(self): - cloudinit_iso = self.download_cloudinit() + def prepare_cloudinit(self, ssh_pubkey=None): + cloudinit_iso = self.download_cloudinit(ssh_pubkey) self.vm.add_args('-drive', 'file=%s,format=raw' % cloudinit_iso) def launch_and_wait(self): From af1bb3fe7f146fafdaadb479975ca2b53b49df40 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Mon, 2 Nov 2020 17:18:59 +0100 Subject: [PATCH 12/12] tests/acceptance: Add virtiofs_submounts.py This test invokes several shell scripts to create a random directory tree full of submounts, and then check in the VM whether every submount has its own ID and the structure looks as expected. (Note that the test scripts must be non-executable, so Avocado will not try to execute them as if they were tests on their own, too.) Because at this commit's date it is unlikely that the Linux kernel on the image provided by boot_linux.py supports submounts in virtio-fs, the test will be cancelled if no custom Linux binary is provided through the vmlinuz parameter. (The on-image kernel can be used by providing an empty string via vmlinuz=.) So, invoking the test can be done as follows: $ avocado run \ tests/acceptance/virtiofs_submounts.py \ -p vmlinuz=/path/to/linux/build/arch/x86/boot/bzImage This test requires root privileges (through passwordless sudo -n), because at this point, virtiofsd requires them. (If you have a timestamp_timeout period for sudoers (e.g. the default of 5 min), you can provide this by executing something like "sudo true" before invoking Avocado.) Signed-off-by: Max Reitz Message-Id: <20201102161859.156603-8-mreitz@redhat.com> Tested-by: Eduardo Habkost Signed-off-by: Dr. David Alan Gilbert --- tests/acceptance/virtiofs_submounts.py | 321 ++++++++++++++++++ .../virtiofs_submounts.py.data/cleanup.sh | 46 +++ .../guest-cleanup.sh | 30 ++ .../virtiofs_submounts.py.data/guest.sh | 138 ++++++++ .../virtiofs_submounts.py.data/host.sh | 127 +++++++ 5 files changed, 662 insertions(+) create mode 100644 tests/acceptance/virtiofs_submounts.py create mode 100644 tests/acceptance/virtiofs_submounts.py.data/cleanup.sh create mode 100644 tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh create mode 100644 tests/acceptance/virtiofs_submounts.py.data/guest.sh create mode 100644 tests/acceptance/virtiofs_submounts.py.data/host.sh diff --git a/tests/acceptance/virtiofs_submounts.py b/tests/acceptance/virtiofs_submounts.py new file mode 100644 index 0000000000..361e5990b6 --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py @@ -0,0 +1,321 @@ +import logging +import re +import os +import subprocess +import time + +from avocado import skipUnless +from avocado_qemu import Test, BUILD_DIR +from avocado_qemu import wait_for_console_pattern +from avocado.utils import ssh + +from qemu.accel import kvm_available + +from boot_linux import BootLinux + + +def run_cmd(args): + subp = subprocess.Popen(args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + stdout, stderr = subp.communicate() + ret = subp.returncode + + return (stdout, stderr, ret) + +def has_cmd(name, args=None): + """ + This function is for use in a @avocado.skipUnless decorator, e.g.: + + @skipUnless(*has_cmd('sudo -n', ('sudo', '-n', 'true'))) + def test_something_that_needs_sudo(self): + ... + """ + + if args is None: + args = ('which', name) + + try: + _, stderr, exitcode = run_cmd(args) + except Exception as e: + exitcode = -1 + stderr = str(e) + + if exitcode != 0: + cmd_line = ' '.join(args) + err = f'{name} required, but "{cmd_line}" failed: {stderr.strip()}' + return (False, err) + else: + return (True, '') + +def has_cmds(*cmds): + """ + This function is for use in a @avocado.skipUnless decorator and + allows checking for the availability of multiple commands, e.g.: + + @skipUnless(*has_cmds(('cmd1', ('cmd1', '--some-parameter')), + 'cmd2', 'cmd3')) + def test_something_that_needs_cmd1_and_cmd2(self): + ... + """ + + for cmd in cmds: + if isinstance(cmd, str): + cmd = (cmd,) + + ok, errstr = has_cmd(*cmd) + if not ok: + return (False, errstr) + + return (True, '') + + +class VirtiofsSubmountsTest(BootLinux): + """ + :avocado: tags=arch:x86_64 + """ + + def get_portfwd(self): + port = None + + res = self.vm.command('human-monitor-command', + command_line='info usernet') + for line in res.split('\r\n'): + match = \ + re.search(r'TCP.HOST_FORWARD.*127\.0\.0\.1\s*(\d+)\s+10\.', + line) + if match is not None: + port = match[1] + break + + self.assertIsNotNone(port) + self.log.debug('sshd listening on port: ' + port) + return port + + def ssh_connect(self, username, keyfile): + self.ssh_logger = logging.getLogger('ssh') + port = self.get_portfwd() + self.ssh_session = ssh.Session('127.0.0.1', port=int(port), + user=username, key=keyfile) + for i in range(10): + try: + self.ssh_session.connect() + return + except: + time.sleep(4) + pass + self.fail('sshd timeout') + + def ssh_command(self, command): + self.ssh_logger.info(command) + result = self.ssh_session.cmd(command) + stdout_lines = [line.rstrip() for line + in result.stdout_text.splitlines()] + for line in stdout_lines: + self.ssh_logger.info(line) + stderr_lines = [line.rstrip() for line + in result.stderr_text.splitlines()] + for line in stderr_lines: + self.ssh_logger.warning(line) + + self.assertEqual(result.exit_status, 0, + f'Guest command failed: {command}') + return stdout_lines, stderr_lines + + def run(self, args, ignore_error=False): + stdout, stderr, ret = run_cmd(args) + + if ret != 0: + cmdline = ' '.join(args) + if not ignore_error: + self.fail(f'{cmdline}: Returned {ret}: {stderr}') + else: + self.log.warn(f'{cmdline}: Returned {ret}: {stderr}') + + return (stdout, stderr, ret) + + def set_up_shared_dir(self): + atwd = os.getenv('AVOCADO_TEST_WORKDIR') + self.shared_dir = os.path.join(atwd, 'virtiofs-shared') + + os.mkdir(self.shared_dir) + + self.run(('cp', self.get_data('guest.sh'), + os.path.join(self.shared_dir, 'check.sh'))) + + self.run(('cp', self.get_data('guest-cleanup.sh'), + os.path.join(self.shared_dir, 'cleanup.sh'))) + + def set_up_virtiofs(self): + attmp = os.getenv('AVOCADO_TESTS_COMMON_TMPDIR') + self.vfsdsock = os.path.join(attmp, 'vfsdsock') + + self.run(('sudo', '-n', 'rm', '-f', self.vfsdsock), ignore_error=True) + + self.virtiofsd = \ + subprocess.Popen(('sudo', '-n', + 'tools/virtiofsd/virtiofsd', + f'--socket-path={self.vfsdsock}', + '-o', f'source={self.shared_dir}', + '-o', 'cache=always', + '-o', 'xattr', + '-o', 'announce_submounts', + '-f'), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + universal_newlines=True) + + while not os.path.exists(self.vfsdsock): + if self.virtiofsd.poll() is not None: + self.fail('virtiofsd exited prematurely: ' + + self.virtiofsd.communicate()[1]) + time.sleep(0.1) + + self.run(('sudo', '-n', 'chmod', 'go+rw', self.vfsdsock)) + + self.vm.add_args('-chardev', + f'socket,id=vfsdsock,path={self.vfsdsock}', + '-device', + 'vhost-user-fs-pci,queue-size=1024,chardev=vfsdsock' \ + ',tag=host', + '-object', + 'memory-backend-file,id=mem,size=1G,' \ + 'mem-path=/dev/shm,share=on', + '-numa', + 'node,memdev=mem') + + def launch_vm(self): + self.launch_and_wait() + self.ssh_connect('root', self.ssh_key) + + def set_up_nested_mounts(self): + scratch_dir = os.path.join(self.shared_dir, 'scratch') + try: + os.mkdir(scratch_dir) + except FileExistsError: + pass + + args = ['bash', self.get_data('host.sh'), scratch_dir] + if self.seed: + args += [self.seed] + + out, _, _ = self.run(args) + seed = re.search(r'^Seed: \d+', out) + self.log.info(seed[0]) + + def mount_in_guest(self): + self.ssh_command('mkdir -p /mnt/host') + self.ssh_command('mount -t virtiofs host /mnt/host') + + def check_in_guest(self): + self.ssh_command('bash /mnt/host/check.sh /mnt/host/scratch/share') + + def live_cleanup(self): + self.ssh_command('bash /mnt/host/cleanup.sh /mnt/host/scratch') + + # It would be nice if the above was sufficient to make virtiofsd clear + # all references to the mounted directories (so they can be unmounted + # on the host), but unfortunately it is not. To do so, we have to + # resort to a remount. + self.ssh_command('mount -o remount /mnt/host') + + scratch_dir = os.path.join(self.shared_dir, 'scratch') + self.run(('bash', self.get_data('cleanup.sh'), scratch_dir)) + + @skipUnless(*has_cmds(('sudo -n', ('sudo', '-n', 'true')), + 'ssh-keygen', 'bash', 'losetup', 'mkfs.xfs', 'mount')) + def setUp(self): + vmlinuz = self.params.get('vmlinuz') + if vmlinuz is None: + self.cancel('vmlinuz parameter not set; you must point it to a ' + 'Linux kernel binary to test (to run this test with ' \ + 'the on-image kernel, set it to an empty string)') + + self.seed = self.params.get('seed') + + atwd = os.getenv('AVOCADO_TEST_WORKDIR') + self.ssh_key = os.path.join(atwd, 'id_ed25519') + + self.run(('ssh-keygen', '-t', 'ed25519', '-f', self.ssh_key)) + + pubkey = open(self.ssh_key + '.pub').read() + + super(VirtiofsSubmountsTest, self).setUp(pubkey) + + if len(vmlinuz) > 0: + self.vm.add_args('-kernel', vmlinuz, + '-append', 'console=ttyS0 root=/dev/sda1') + + # Allow us to connect to SSH + self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22', + '-device', 'e1000,netdev=vnet') + + if not kvm_available(self.arch, self.qemu_bin): + self.cancel(KVM_NOT_AVAILABLE) + self.vm.add_args('-accel', 'kvm') + + def tearDown(self): + try: + self.vm.shutdown() + except: + pass + + scratch_dir = os.path.join(self.shared_dir, 'scratch') + self.run(('bash', self.get_data('cleanup.sh'), scratch_dir), + ignore_error=True) + + def test_pre_virtiofsd_set_up(self): + self.set_up_shared_dir() + + self.set_up_nested_mounts() + + self.set_up_virtiofs() + self.launch_vm() + self.mount_in_guest() + self.check_in_guest() + + def test_pre_launch_set_up(self): + self.set_up_shared_dir() + self.set_up_virtiofs() + + self.set_up_nested_mounts() + + self.launch_vm() + self.mount_in_guest() + self.check_in_guest() + + def test_post_launch_set_up(self): + self.set_up_shared_dir() + self.set_up_virtiofs() + self.launch_vm() + + self.set_up_nested_mounts() + + self.mount_in_guest() + self.check_in_guest() + + def test_post_mount_set_up(self): + self.set_up_shared_dir() + self.set_up_virtiofs() + self.launch_vm() + self.mount_in_guest() + + self.set_up_nested_mounts() + + self.check_in_guest() + + def test_two_runs(self): + self.set_up_shared_dir() + + self.set_up_nested_mounts() + + self.set_up_virtiofs() + self.launch_vm() + self.mount_in_guest() + self.check_in_guest() + + self.live_cleanup() + self.set_up_nested_mounts() + + self.check_in_guest() diff --git a/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh b/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh new file mode 100644 index 0000000000..2a6579a0fe --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/cleanup.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 " +} + +scratch_dir=$1 +if [ -z "$scratch_dir" ]; then + print_usage "$0" 'Scratch dir not given' >&2 + exit 1 +fi + +cd "$scratch_dir/share" || exit 1 +mps=(mnt*) +mp_i=0 +for mp in "${mps[@]}"; do + mp_i=$((mp_i + 1)) + printf "Unmounting %i/%i...\r" "$mp_i" "${#mps[@]}" + + sudo umount -R "$mp" + rm -rf "$mp" +done +echo + +rm some-file +cd .. +rmdir share + +imgs=(fs*.img) +img_i=0 +for img in "${imgs[@]}"; do + img_i=$((img_i + 1)) + printf "Detaching and deleting %i/%i...\r" "$img_i" "${#imgs[@]}" + + dev=$(losetup -j "$img" | sed -e 's/:.*//') + sudo losetup -d "$dev" + rm -f "$img" +done +echo + +echo 'Done.' diff --git a/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh b/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh new file mode 100644 index 0000000000..729cb2d1a5 --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/guest-cleanup.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 " +} + +scratch_dir=$1 +if [ -z "$scratch_dir" ]; then + print_usage "$0" 'Scratch dir not given' >&2 + exit 1 +fi + +cd "$scratch_dir/share" || exit 1 + +mps=(mnt*) +mp_i=0 +for mp in "${mps[@]}"; do + mp_i=$((mp_i + 1)) + printf "Unmounting %i/%i...\r" "$mp_i" "${#mps[@]}" + + sudo umount -R "$mp" +done +echo + +echo 'Done.' diff --git a/tests/acceptance/virtiofs_submounts.py.data/guest.sh b/tests/acceptance/virtiofs_submounts.py.data/guest.sh new file mode 100644 index 0000000000..59ba40fde1 --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/guest.sh @@ -0,0 +1,138 @@ +#!/bin/bash + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 " + echo '(The shared directory is the "share" directory in the scratch' \ + 'directory)' +} + +shared_dir=$1 +if [ -z "$shared_dir" ]; then + print_usage "$0" 'Shared dir not given' >&2 + exit 1 +fi + +cd "$shared_dir" + +# FIXME: This should not be necessary, but it is. In order for all +# submounts to be proper mount points, we need to visit them. +# (Before we visit them, they will not be auto-mounted, and so just +# appear as normal directories, with the catch that their st_ino will +# be the st_ino of the filesystem they host, while the st_dev will +# still be the st_dev of the parent.) +# `find` does not work, because it will refuse to touch the mount +# points as long as they are not mounted; their st_dev being shared +# with the parent and st_ino just being the root node's inode ID +# will practically ensure that this node exists elsewhere on the +# filesystem, and `find` is required to recognize loops and not to +# follow them. +# Thus, we have to manually visit all nodes first. + +mnt_i=0 + +function recursively_visit() +{ + pushd "$1" >/dev/null + for entry in *; do + if [[ "$entry" == mnt* ]]; then + mnt_i=$((mnt_i + 1)) + printf "Triggering auto-mount $mnt_i...\r" + fi + + if [ -d "$entry" ]; then + recursively_visit "$entry" + fi + done + popd >/dev/null +} + +recursively_visit . +echo + + +if [ -n "$(find -name not-mounted)" ]; then + echo "Error: not-mounted files visible on mount points:" >&2 + find -name not-mounted >&2 + exit 1 +fi + +if [ ! -f some-file -o "$(cat some-file)" != 'root' ]; then + echo "Error: Bad file in the share root" >&2 + exit 1 +fi + +shopt -s nullglob + +function check_submounts() +{ + local base_path=$1 + + for mp in mnt*; do + printf "Checking submount %i...\r" "$((${#devs[@]} + 1))" + + mp_i=$(echo "$mp" | sed -e 's/mnt//') + dev=$(stat -c '%D' "$mp") + + if [ -n "${devs[mp_i]}" ]; then + echo "Error: $mp encountered twice" >&2 + exit 1 + fi + devs[mp_i]=$dev + + pushd "$mp" >/dev/null + path="$base_path$mp" + while true; do + expected_content="$(printf '%s\n%s\n' "$mp_i" "$path")" + if [ ! -f some-file ]; then + echo "Error: $PWD/some-file does not exist" >&2 + exit 1 + fi + + if [ "$(cat some-file)" != "$expected_content" ]; then + echo "Error: Bad content in $PWD/some-file:" >&2 + echo '--- found ---' + cat some-file + echo '--- expected ---' + echo "$expected_content" + exit 1 + fi + if [ "$(stat -c '%D' some-file)" != "$dev" ]; then + echo "Error: $PWD/some-file has the wrong device ID" >&2 + exit 1 + fi + + if [ -d sub ]; then + if [ "$(stat -c '%D' sub)" != "$dev" ]; then + echo "Error: $PWD/some-file has the wrong device ID" >&2 + exit 1 + fi + cd sub + path="$path/sub" + else + if [ -n "$(echo mnt*)" ]; then + check_submounts "$path/" + fi + break + fi + done + popd >/dev/null + done +} + +root_dev=$(stat -c '%D' some-file) +devs=() +check_submounts '' +echo + +reused_devs=$(echo "$root_dev ${devs[@]}" | tr ' ' '\n' | sort | uniq -d) +if [ -n "$reused_devs" ]; then + echo "Error: Reused device IDs: $reused_devs" >&2 + exit 1 +fi + +echo "Test passed for ${#devs[@]} submounts." diff --git a/tests/acceptance/virtiofs_submounts.py.data/host.sh b/tests/acceptance/virtiofs_submounts.py.data/host.sh new file mode 100644 index 0000000000..d8a9afebdb --- /dev/null +++ b/tests/acceptance/virtiofs_submounts.py.data/host.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +mount_count=128 + +function print_usage() +{ + if [ -n "$2" ]; then + echo "Error: $2" + echo + fi + echo "Usage: $1 [seed]" + echo "(If no seed is given, it will be randomly generated.)" +} + +scratch_dir=$1 +if [ -z "$scratch_dir" ]; then + print_usage "$0" 'No scratch dir given' >&2 + exit 1 +fi + +if [ ! -d "$scratch_dir" ]; then + print_usage "$0" "$scratch_dir is not a directory" >&2 + exit 1 +fi + +seed=$2 +if [ -z "$seed" ]; then + seed=$RANDOM +fi +RANDOM=$seed + +echo "Seed: $seed" + +set -e +shopt -s nullglob + +cd "$scratch_dir" +if [ -d share ]; then + echo 'Error: This directory seems to be in use already' >&2 + exit 1 +fi + +for ((i = 0; i < $mount_count; i++)); do + printf "Setting up fs %i/%i...\r" "$((i + 1))" "$mount_count" + + rm -f fs$i.img + truncate -s 512M fs$i.img + mkfs.xfs -q fs$i.img + devs[i]=$(sudo losetup -f --show fs$i.img) +done +echo + +top_level_mounts=$((RANDOM % mount_count + 1)) + +mkdir -p share +echo 'root' > share/some-file + +for ((i = 0; i < $top_level_mounts; i++)); do + printf "Mounting fs %i/%i...\r" "$((i + 1))" "$mount_count" + + mkdir -p share/mnt$i + touch share/mnt$i/not-mounted + sudo mount "${devs[i]}" share/mnt$i + sudo chown "$(id -u):$(id -g)" share/mnt$i + + pushd share/mnt$i >/dev/null + path=mnt$i + nesting=$((RANDOM % 4)) + for ((j = 0; j < $nesting; j++)); do + cat > some-file < some-file </dev/null +done + +for ((; i < $mount_count; i++)); do + printf "Mounting fs %i/%i...\r" "$((i + 1))" "$mount_count" + + mp_i=$((i % top_level_mounts)) + + pushd share/mnt$mp_i >/dev/null + path=mnt$mp_i + while true; do + sub_mp="$(echo mnt*)" + if cd sub 2>/dev/null; then + path="$path/sub" + elif [ -n "$sub_mp" ] && cd "$sub_mp" 2>/dev/null; then + path="$path/$sub_mp" + else + break + fi + done + mkdir mnt$i + touch mnt$i/not-mounted + sudo mount "${devs[i]}" mnt$i + sudo chown "$(id -u):$(id -g)" mnt$i + + cd mnt$i + path="$path/mnt$i" + nesting=$((RANDOM % 4)) + for ((j = 0; j < $nesting; j++)); do + cat > some-file < some-file </dev/null +done +echo + +echo 'Done.'