From 24f68139247fd5a265874c743c46f293bd3432fa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 7 Apr 2024 23:26:18 -1000 Subject: [PATCH 01/28] target/i386: Add tcg/access.[ch] Provide a method to amortize page lookup across large blocks. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/access.c | 169 ++++++++++++++++++++++++++++++++++++ target/i386/tcg/access.h | 40 +++++++++ target/i386/tcg/meson.build | 1 + 3 files changed, 210 insertions(+) create mode 100644 target/i386/tcg/access.c create mode 100644 target/i386/tcg/access.h diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c new file mode 100644 index 0000000000..56a1181ea5 --- /dev/null +++ b/target/i386/tcg/access.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Access guest memory in blocks. */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "access.h" + + +void access_prepare_mmu(X86Access *ret, CPUX86State *env, + vaddr vaddr, unsigned size, + MMUAccessType type, int mmu_idx, uintptr_t ra) +{ + int size1, size2; + void *haddr1, *haddr2; + + assert(size > 0 && size <= TARGET_PAGE_SIZE); + + size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)), + size2 = size - size1; + + memset(ret, 0, sizeof(*ret)); + ret->vaddr = vaddr; + ret->size = size; + ret->size1 = size1; + ret->mmu_idx = mmu_idx; + ret->env = env; + ret->ra = ra; + + haddr1 = probe_access(env, vaddr, size1, type, mmu_idx, ra); + ret->haddr1 = haddr1; + + if (unlikely(size2)) { + haddr2 = probe_access(env, vaddr + size1, size2, type, mmu_idx, ra); + if (haddr2 == haddr1 + size1) { + ret->size1 = size; + } else { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + ret->haddr2 = haddr2; +#endif + } + } +} + +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr, + unsigned size, MMUAccessType type, uintptr_t ra) +{ + int mmu_idx = cpu_mmu_index(env_cpu(env), false); + access_prepare_mmu(ret, env, vaddr, size, type, mmu_idx, ra); +} + +static void *access_ptr(X86Access *ac, vaddr addr, unsigned len) +{ + vaddr offset = addr - ac->vaddr; + + assert(addr >= ac->vaddr); + +#ifdef CONFIG_USER_ONLY + assert(offset <= ac->size1 - len); + return ac->haddr1 + offset; +#else + if (likely(offset <= ac->size1 - len)) { + return ac->haddr1 + offset; + } + assert(offset <= ac->size - len); + /* + * If the address is not naturally aligned, it might span both pages. + * Only return ac->haddr2 if the area is entirely within the second page, + * otherwise fall back to slow accesses. + */ + if (likely(offset >= ac->size1)) { + return ac->haddr2 + (offset - ac->size1); + } + return NULL; +#endif +} + +#ifdef CONFIG_USER_ONLY +# define test_ptr(p) true +#else +# define test_ptr(p) likely(p) +#endif + +uint8_t access_ldb(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint8_t)); + + if (test_ptr(p)) { + return ldub_p(p); + } + return cpu_ldub_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint16_t access_ldw(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint16_t)); + + if (test_ptr(p)) { + return lduw_le_p(p); + } + return cpu_lduw_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint32_t access_ldl(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint32_t)); + + if (test_ptr(p)) { + return ldl_le_p(p); + } + return cpu_ldl_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint64_t access_ldq(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint64_t)); + + if (test_ptr(p)) { + return ldq_le_p(p); + } + return cpu_ldq_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +void access_stb(X86Access *ac, vaddr addr, uint8_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint8_t)); + + if (test_ptr(p)) { + stb_p(p, val); + } else { + cpu_stb_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stw(X86Access *ac, vaddr addr, uint16_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint16_t)); + + if (test_ptr(p)) { + stw_le_p(p, val); + } else { + cpu_stw_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stl(X86Access *ac, vaddr addr, uint32_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint32_t)); + + if (test_ptr(p)) { + stl_le_p(p, val); + } else { + cpu_stl_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stq(X86Access *ac, vaddr addr, uint64_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint64_t)); + + if (test_ptr(p)) { + stq_le_p(p, val); + } else { + cpu_stq_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} diff --git a/target/i386/tcg/access.h b/target/i386/tcg/access.h new file mode 100644 index 0000000000..d70808a3a3 --- /dev/null +++ b/target/i386/tcg/access.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Access guest memory in blocks. */ + +#ifndef X86_TCG_ACCESS_H +#define X86_TCG_ACCESS_H + +/* An access covers at most sizeof(X86XSaveArea), at most 2 pages. */ +typedef struct X86Access { + target_ulong vaddr; + void *haddr1; + void *haddr2; + uint16_t size; + uint16_t size1; + /* + * If we can't access the host page directly, we'll have to do I/O access + * via ld/st helpers. These are internal details, so we store the rest + * to do the access here instead of passing it around in the helpers. + */ + int mmu_idx; + CPUX86State *env; + uintptr_t ra; +} X86Access; + +void access_prepare_mmu(X86Access *ret, CPUX86State *env, + vaddr vaddr, unsigned size, + MMUAccessType type, int mmu_idx, uintptr_t ra); +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr, + unsigned size, MMUAccessType type, uintptr_t ra); + +uint8_t access_ldb(X86Access *ac, vaddr addr); +uint16_t access_ldw(X86Access *ac, vaddr addr); +uint32_t access_ldl(X86Access *ac, vaddr addr); +uint64_t access_ldq(X86Access *ac, vaddr addr); + +void access_stb(X86Access *ac, vaddr addr, uint8_t val); +void access_stw(X86Access *ac, vaddr addr, uint16_t val); +void access_stl(X86Access *ac, vaddr addr, uint32_t val); +void access_stq(X86Access *ac, vaddr addr, uint64_t val); + +#endif diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build index f9110e890c..1105b35d92 100644 --- a/target/i386/tcg/meson.build +++ b/target/i386/tcg/meson.build @@ -1,4 +1,5 @@ i386_ss.add(when: 'CONFIG_TCG', if_true: files( + 'access.c', 'bpt_helper.c', 'cc_helper.c', 'excp_helper.c', From d3e8b648ab7f94f0e9235ade9977954e426991a0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 7 Apr 2024 23:50:46 -1000 Subject: [PATCH 02/28] target/i386: Convert do_fldt, do_fstt to X86Access Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 44 +++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index ece22a3553..1662643a8f 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -27,6 +27,7 @@ #include "fpu/softfloat.h" #include "fpu/softfloat-macros.h" #include "helper-tcg.h" +#include "access.h" /* float macros */ #define FT0 (env->ft0) @@ -84,23 +85,22 @@ static inline void fpop(CPUX86State *env) env->fpstt = (env->fpstt + 1) & 7; } -static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) +static floatx80 do_fldt(X86Access *ac, target_ulong ptr) { CPU_LDoubleU temp; - temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); - temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); + temp.l.lower = access_ldq(ac, ptr); + temp.l.upper = access_ldw(ac, ptr + 8); return temp.d; } -static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, - uintptr_t retaddr) +static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) { CPU_LDoubleU temp; temp.d = f; - cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); - cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); + access_stq(ac, ptr, temp.l.lower); + access_stw(ac, ptr + 8, temp.l.upper); } /* x87 FPU helpers */ @@ -382,16 +382,22 @@ int64_t helper_fisttll_ST0(CPUX86State *env) void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) { int new_fpstt; + X86Access ac; + + access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); + env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ } void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) { - do_fstt(env, ST0, ptr, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); + do_fstt(&ac, ptr, ST0); } void helper_fpush(CPUX86State *env) @@ -2460,15 +2466,18 @@ void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, uintptr_t retaddr) { + X86Access ac; floatx80 tmp; int i; do_fstenv(env, ptr, data32, retaddr); ptr += (target_ulong)14 << data32; + access_prepare(&ac, env, ptr, 80, MMU_DATA_STORE, retaddr); + for (i = 0; i < 8; i++) { tmp = ST(i); - do_fstt(env, tmp, ptr, retaddr); + do_fstt(&ac, ptr, tmp); ptr += 10; } @@ -2483,14 +2492,17 @@ void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, uintptr_t retaddr) { + X86Access ac; floatx80 tmp; int i; do_fldenv(env, ptr, data32, retaddr); ptr += (target_ulong)14 << data32; + access_prepare(&ac, env, ptr, 80, MMU_DATA_LOAD, retaddr); + for (i = 0; i < 8; i++) { - tmp = do_fldt(env, ptr, retaddr); + tmp = do_fldt(&ac, ptr); ST(i) = tmp; ptr += 10; } @@ -2507,6 +2519,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) { int fpus, fptag, i; target_ulong addr; + X86Access ac; fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; fptag = 0; @@ -2525,9 +2538,11 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ addr = ptr + XO(legacy.fpregs); + access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_STORE, ra); + for (i = 0; i < 8; i++) { floatx80 tmp = ST(i); - do_fstt(env, tmp, addr, ra); + do_fstt(&ac, addr, tmp); addr += 16; } } @@ -2700,6 +2715,7 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) { int i, fpuc, fpus, fptag; target_ulong addr; + X86Access ac; fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); @@ -2712,8 +2728,10 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) } addr = ptr + XO(legacy.fpregs); + access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_LOAD, ra); + for (i = 0; i < 8; i++) { - floatx80 tmp = do_fldt(env, addr, ra); + floatx80 tmp = do_fldt(&ac, addr); ST(i) = tmp; addr += 16; } From 4526f58a2727a37c829c21c3f095226601099552 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 7 Apr 2024 23:58:17 -1000 Subject: [PATCH 03/28] target/i386: Convert helper_{fbld,fbst}_ST0 to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 1662643a8f..6237cd8383 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -773,18 +773,21 @@ void helper_fninit(CPUX86State *env) void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) { + X86Access ac; floatx80 tmp; uint64_t val; unsigned int v; int i; + access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); + val = 0; for (i = 8; i >= 0; i--) { - v = cpu_ldub_data_ra(env, ptr + i, GETPC()); + v = access_ldb(&ac, ptr + i); val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); } tmp = int64_to_floatx80(val, &env->fp_status); - if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { + if (access_ldb(&ac, ptr + 9) & 0x80) { tmp = floatx80_chs(tmp); } fpush(env); @@ -798,7 +801,9 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) target_ulong mem_ref, mem_end; int64_t val; CPU_LDoubleU temp; + X86Access ac; + access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); temp.d = ST0; val = floatx80_to_int64(ST0, &env->fp_status); @@ -806,20 +811,20 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { set_float_exception_flags(float_flag_invalid, &env->fp_status); while (mem_ref < ptr + 7) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + access_stb(&ac, mem_ref++, 0); } - cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); - cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); - cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + access_stb(&ac, mem_ref++, 0xc0); + access_stb(&ac, mem_ref++, 0xff); + access_stb(&ac, mem_ref++, 0xff); merge_exception_flags(env, old_flags); return; } mem_end = mem_ref + 9; if (SIGND(temp)) { - cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); + access_stb(&ac, mem_end, 0x80); val = -val; } else { - cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); + access_stb(&ac, mem_end, 0x00); } while (mem_ref < mem_end) { if (val == 0) { @@ -828,10 +833,10 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) v = val % 100; val = val / 100; v = ((v / 10) << 4) | (v % 10); - cpu_stb_data_ra(env, mem_ref++, v, GETPC()); + access_stb(&ac, mem_ref++, v); } while (mem_ref < mem_end) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + access_stb(&ac, mem_ref++, 0); } merge_exception_flags(env, old_flags); } From bc13c2dd01288d26bfb38bdc958ad58b58661ac0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 00:14:48 -1000 Subject: [PATCH 04/28] target/i386: Convert do_fldenv to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 6237cd8383..5ad6e04639 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2442,20 +2442,15 @@ static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) #endif } -static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) { int i, fpus, fptag; + CPUX86State *env = ac->env; + + cpu_set_fpuc(env, access_ldw(ac, ptr)); + fpus = access_ldw(ac, ptr + (2 << data32)); + fptag = access_ldw(ac, ptr + (4 << data32)); - if (data32) { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); - } else { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); - } cpu_set_fpus(env, fpus); for (i = 0; i < 8; i++) { env->fptags[i] = ((fptag & 3) == 3); @@ -2465,7 +2460,10 @@ static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) { - do_fldenv(env, ptr, data32, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); + do_fldenv(&ac, ptr, data32); } static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, @@ -2499,12 +2497,12 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, { X86Access ac; floatx80 tmp; - int i; + int i, envsize = 14 << data32; - do_fldenv(env, ptr, data32, retaddr); - ptr += (target_ulong)14 << data32; + access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_LOAD, retaddr); - access_prepare(&ac, env, ptr, 80, MMU_DATA_LOAD, retaddr); + do_fldenv(&ac, ptr, data32); + ptr += envsize; for (i = 0; i < 8; i++) { tmp = do_fldt(&ac, ptr); From 505e2ef744b3ebd0a28a94ed9b00f99595b0cf6a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 00:23:22 -1000 Subject: [PATCH 05/28] target/i386: Convert do_fstenv to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 45 +++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 5ad6e04639..01e9a1fbbf 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2373,9 +2373,9 @@ void helper_fxam_ST0(CPUX86State *env) } } -static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) { + CPUX86State *env = ac->env; int fpus, fptag, exp, i; uint64_t mant; CPU_LDoubleU tmp; @@ -2402,28 +2402,31 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, } if (data32) { /* 32 bit */ - cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); - cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); - cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ - cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ - cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ - cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ + access_stl(ac, ptr, env->fpuc); + access_stl(ac, ptr + 4, fpus); + access_stl(ac, ptr + 8, fptag); + access_stl(ac, ptr + 12, env->fpip); /* fpip */ + access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ + access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ + access_stl(ac, ptr + 24, env->fpds); /* fpos */ } else { /* 16 bit */ - cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); - cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); - cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); - cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); - cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); - cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); + access_stw(ac, ptr, env->fpuc); + access_stw(ac, ptr + 2, fpus); + access_stw(ac, ptr + 4, fptag); + access_stw(ac, ptr + 6, env->fpip); + access_stw(ac, ptr + 8, env->fpcs); + access_stw(ac, ptr + 10, env->fpdp); + access_stw(ac, ptr + 12, env->fpds); } } void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) { - do_fstenv(env, ptr, data32, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); + do_fstenv(&ac, ptr, data32); } static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) @@ -2471,12 +2474,12 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, { X86Access ac; floatx80 tmp; - int i; + int i, envsize = 14 << data32; - do_fstenv(env, ptr, data32, retaddr); + access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_STORE, retaddr); - ptr += (target_ulong)14 << data32; - access_prepare(&ac, env, ptr, 80, MMU_DATA_STORE, retaddr); + do_fstenv(&ac, ptr, data32); + ptr += envsize; for (i = 0; i < 8; i++) { tmp = ST(i); From 94f60f8f1c07de1449f798141cf13ba93f07d875 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 00:35:06 -1000 Subject: [PATCH 06/28] target/i386: Convert do_fsave, do_frstor to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 60 ++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 01e9a1fbbf..df12eac71e 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2469,21 +2469,16 @@ void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) do_fldenv(&ac, ptr, data32); } -static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fsave(X86Access *ac, target_ulong ptr, int data32) { - X86Access ac; - floatx80 tmp; - int i, envsize = 14 << data32; + CPUX86State *env = ac->env; - access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_STORE, retaddr); + do_fstenv(ac, ptr, data32); + ptr += 14 << data32; - do_fstenv(&ac, ptr, data32); - ptr += envsize; - - for (i = 0; i < 8; i++) { - tmp = ST(i); - do_fstt(&ac, ptr, tmp); + for (int i = 0; i < 8; i++) { + floatx80 tmp = ST(i); + do_fstt(ac, ptr, tmp); ptr += 10; } @@ -2492,23 +2487,22 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) { - do_fsave(env, ptr, data32, GETPC()); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); + do_fsave(&ac, ptr, data32); } -static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_frstor(X86Access *ac, target_ulong ptr, int data32) { - X86Access ac; - floatx80 tmp; - int i, envsize = 14 << data32; + CPUX86State *env = ac->env; - access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_LOAD, retaddr); + do_fldenv(ac, ptr, data32); + ptr += 14 << data32; - do_fldenv(&ac, ptr, data32); - ptr += envsize; - - for (i = 0; i < 8; i++) { - tmp = do_fldt(&ac, ptr); + for (int i = 0; i < 8; i++) { + floatx80 tmp = do_fldt(ac, ptr); ST(i) = tmp; ptr += 10; } @@ -2516,7 +2510,11 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) { - do_frstor(env, ptr, data32, GETPC()); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); + do_frstor(&ac, ptr, data32); } #define XO(X) offsetof(X86XSaveArea, X) @@ -2972,12 +2970,20 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) #if defined(CONFIG_USER_ONLY) void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) { - do_fsave(env, ptr, data32, 0); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0); + do_fsave(&ac, ptr, data32); } void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) { - do_frstor(env, ptr, data32, 0); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0); + do_frstor(&ac, ptr, data32); } void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) From b7e6d3ad30b53121ff184e93d33dcf61e354c9bb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 07:58:42 -1000 Subject: [PATCH 07/28] target/i386: Convert do_xsave_{fpu,mxcr,sse} to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 52 +++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index df12eac71e..8fbe6e00ce 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2519,11 +2519,11 @@ void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) #define XO(X) offsetof(X86XSaveArea, X) -static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_fpu(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int fpus, fptag, i; target_ulong addr; - X86Access ac; fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; fptag = 0; @@ -2531,35 +2531,37 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) fptag |= (env->fptags[i] << i); } - cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); + access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); + access_stw(ac, ptr + XO(legacy.fsw), fpus); + access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); /* In 32-bit mode this is eip, sel, dp, sel. In 64-bit mode this is rip, rdp. But in either case we don't write actual data, just zeros. */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ + access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ + access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ addr = ptr + XO(legacy.fpregs); - access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_STORE, ra); for (i = 0; i < 8; i++) { floatx80 tmp = ST(i); - do_fstt(&ac, addr, tmp); + do_fstt(ac, addr, tmp); addr += 16; } } -static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + update_mxcsr_from_sse_status(env); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); + access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); + access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); } -static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_sse(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; target_ulong addr; @@ -2571,8 +2573,8 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) addr = ptr + XO(legacy.xmm_regs); for (i = 0; i < nb_xmm_regs; i++) { - cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); - cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); + access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); + access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); addr += 16; } } @@ -2619,20 +2621,24 @@ static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) { + X86Access ac; + /* The operand must be 16 byte aligned */ if (ptr & 0xf) { raise_exception_ra(env, EXCP0D_GPF, ra); } - do_xsave_fpu(env, ptr, ra); + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_STORE, ra); + do_xsave_fpu(&ac, ptr); if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xsave_mxcsr(env, ptr, ra); + do_xsave_mxcsr(&ac, ptr); /* Fast FXSAVE leaves out the XMM registers */ if (!(env->efer & MSR_EFER_FFXSR) || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { - do_xsave_sse(env, ptr, ra); + do_xsave_sse(&ac, ptr); } } } @@ -2660,6 +2666,7 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uint64_t inuse, uint64_t opt, uintptr_t ra) { uint64_t old_bv, new_bv; + X86Access ac; /* The OS must have enabled XSAVE. */ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { @@ -2675,15 +2682,18 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, rfbm &= env->xcr0; opt &= rfbm; + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_STORE, ra); + if (opt & XSTATE_FP_MASK) { - do_xsave_fpu(env, ptr, ra); + do_xsave_fpu(&ac, ptr); } if (rfbm & XSTATE_SSE_MASK) { /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ - do_xsave_mxcsr(env, ptr, ra); + do_xsave_mxcsr(&ac, ptr); } if (opt & XSTATE_SSE_MASK) { - do_xsave_sse(env, ptr, ra); + do_xsave_sse(&ac, ptr); } if (opt & XSTATE_YMM_MASK) { do_xsave_ymmh(env, ptr + XO(avx_state), ra); From e41d2eaf17f1bcd0b5c085b5c9b6151b592ee620 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 08:33:29 -1000 Subject: [PATCH 08/28] target/i386: Convert do_xrstor_{fpu,mxcr,sse} to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 46 ++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 8fbe6e00ce..f21cdb45ea 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2725,39 +2725,41 @@ void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); } -static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, fpuc, fpus, fptag; target_ulong addr; - X86Access ac; - fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); - fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); - fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); + fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); + fpus = access_ldw(ac, ptr + XO(legacy.fsw)); + fptag = access_ldw(ac, ptr + XO(legacy.ftw)); cpu_set_fpuc(env, fpuc); cpu_set_fpus(env, fpus); + fptag ^= 0xff; for (i = 0; i < 8; i++) { env->fptags[i] = ((fptag >> i) & 1); } addr = ptr + XO(legacy.fpregs); - access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_LOAD, ra); for (i = 0; i < 8; i++) { - floatx80 tmp = do_fldt(&ac, addr); + floatx80 tmp = do_fldt(ac, addr); ST(i) = tmp; addr += 16; } } -static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) { - cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); + CPUX86State *env = ac->env; + cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); } -static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_sse(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; target_ulong addr; @@ -2769,8 +2771,8 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) addr = ptr + XO(legacy.xmm_regs); for (i = 0; i < nb_xmm_regs; i++) { - env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); - env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); + env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); + env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); addr += 16; } } @@ -2850,20 +2852,24 @@ static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) { + X86Access ac; + /* The operand must be 16 byte aligned */ if (ptr & 0xf) { raise_exception_ra(env, EXCP0D_GPF, ra); } - do_xrstor_fpu(env, ptr, ra); + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_LOAD, ra); + do_xrstor_fpu(&ac, ptr); if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xrstor_mxcsr(env, ptr, ra); + do_xrstor_mxcsr(&ac, ptr); /* Fast FXRSTOR leaves out the XMM registers */ if (!(env->efer & MSR_EFER_FFXSR) || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { - do_xrstor_sse(env, ptr, ra); + do_xrstor_sse(&ac, ptr); } } } @@ -2876,6 +2882,7 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr) static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) { uint64_t xstate_bv, xcomp_bv, reserve0; + X86Access ac; rfbm &= env->xcr0; @@ -2914,9 +2921,12 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr raise_exception_ra(env, EXCP0D_GPF, ra); } + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_LOAD, ra); + if (rfbm & XSTATE_FP_MASK) { if (xstate_bv & XSTATE_FP_MASK) { - do_xrstor_fpu(env, ptr, ra); + do_xrstor_fpu(&ac, ptr); } else { do_fninit(env); memset(env->fpregs, 0, sizeof(env->fpregs)); @@ -2925,9 +2935,9 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_SSE_MASK) { /* Note that the standard form of XRSTOR loads MXCSR from memory whether or not the XSTATE_BV bit is set. */ - do_xrstor_mxcsr(env, ptr, ra); + do_xrstor_mxcsr(&ac, ptr); if (xstate_bv & XSTATE_SSE_MASK) { - do_xrstor_sse(env, ptr, ra); + do_xrstor_sse(&ac, ptr); } else { do_clear_sse(env); } From 6d030aab29f8713776aa2fec31bc94bb98a96e55 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 08:44:37 -1000 Subject: [PATCH 09/28] tagret/i386: Convert do_fxsave, do_fxrstor to X86Access Move the alignment fault from do_* to helper_*, as it need not apply to usage from within user-only signal handling. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 84 ++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index f21cdb45ea..4dcb0b92ff 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2619,8 +2619,25 @@ static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) cpu_stq_data_ra(env, ptr, env->pkru, ra); } -static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_fxsave(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + + do_xsave_fpu(ac, ptr); + if (env->cr[4] & CR4_OSFXSR_MASK) { + do_xsave_mxcsr(ac, ptr); + /* Fast FXSAVE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + do_xsave_sse(ac, ptr); + } + } +} + +void helper_fxsave(CPUX86State *env, target_ulong ptr) +{ + uintptr_t ra = GETPC(); X86Access ac; /* The operand must be 16 byte aligned */ @@ -2630,22 +2647,7 @@ static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), MMU_DATA_STORE, ra); - do_xsave_fpu(&ac, ptr); - - if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xsave_mxcsr(&ac, ptr); - /* Fast FXSAVE leaves out the XMM registers */ - if (!(env->efer & MSR_EFER_FFXSR) - || (env->hflags & HF_CPL_MASK) - || !(env->hflags & HF_LMA_MASK)) { - do_xsave_sse(&ac, ptr); - } - } -} - -void helper_fxsave(CPUX86State *env, target_ulong ptr) -{ - do_fxsave(env, ptr, GETPC()); + do_fxsave(&ac, ptr); } static uint64_t get_xinuse(CPUX86State *env) @@ -2850,8 +2852,25 @@ static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) env->pkru = cpu_ldq_data_ra(env, ptr, ra); } -static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_fxrstor(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + + do_xrstor_fpu(ac, ptr); + if (env->cr[4] & CR4_OSFXSR_MASK) { + do_xrstor_mxcsr(ac, ptr); + /* Fast FXRSTOR leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + do_xrstor_sse(ac, ptr); + } + } +} + +void helper_fxrstor(CPUX86State *env, target_ulong ptr) +{ + uintptr_t ra = GETPC(); X86Access ac; /* The operand must be 16 byte aligned */ @@ -2861,22 +2880,7 @@ static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), MMU_DATA_LOAD, ra); - do_xrstor_fpu(&ac, ptr); - - if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xrstor_mxcsr(&ac, ptr); - /* Fast FXRSTOR leaves out the XMM registers */ - if (!(env->efer & MSR_EFER_FFXSR) - || (env->hflags & HF_CPL_MASK) - || !(env->hflags & HF_LMA_MASK)) { - do_xrstor_sse(&ac, ptr); - } - } -} - -void helper_fxrstor(CPUX86State *env, target_ulong ptr) -{ - do_fxrstor(env, ptr, GETPC()); + do_fxrstor(&ac, ptr); } static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) @@ -3008,12 +3012,20 @@ void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) { - do_fxsave(env, ptr, 0); + X86Access ac; + + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_STORE, 0); + do_fxsave(&ac, ptr); } void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) { - do_fxrstor(env, ptr, 0); + X86Access ac; + + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_LOAD, 0); + do_fxrstor(&ac, ptr); } void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) From 6b1b736bae9b89882ed293d0256f2a0de1d03f9c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 09:01:58 -1000 Subject: [PATCH 10/28] target/i386: Convert do_xsave_* to X86Access The body of do_xsave is now fully converted. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 47 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 4dcb0b92ff..356397a4ab 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2579,8 +2579,9 @@ static void do_xsave_sse(X86Access *ac, target_ulong ptr) } } -static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; if (env->hflags & HF_CS64_MASK) { @@ -2590,33 +2591,36 @@ static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) } for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { - cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); - cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); + access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); + access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); } } -static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); int i; for (i = 0; i < 4; i++, addr += 16) { - cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); - cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); + access_stq(ac, addr, env->bnd_regs[i].lb); + access_stq(ac, addr + 8, env->bnd_regs[i].ub); } } -static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) { - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), - env->bndcs_regs.cfgu, ra); - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), - env->bndcs_regs.sts, ra); + CPUX86State *env = ac->env; + + access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), + env->bndcs_regs.cfgu); + access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), + env->bndcs_regs.sts); } -static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_pkru(X86Access *ac, target_ulong ptr) { - cpu_stq_data_ra(env, ptr, env->pkru, ra); + access_stq(ac, ptr, ac->env->pkru); } static void do_fxsave(X86Access *ac, target_ulong ptr) @@ -2669,6 +2673,7 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, { uint64_t old_bv, new_bv; X86Access ac; + unsigned size; /* The OS must have enabled XSAVE. */ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { @@ -2684,8 +2689,8 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, rfbm &= env->xcr0; opt &= rfbm; - access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), - MMU_DATA_STORE, ra); + size = xsave_area_size(opt, false); + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); if (opt & XSTATE_FP_MASK) { do_xsave_fpu(&ac, ptr); @@ -2698,22 +2703,22 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, do_xsave_sse(&ac, ptr); } if (opt & XSTATE_YMM_MASK) { - do_xsave_ymmh(env, ptr + XO(avx_state), ra); + do_xsave_ymmh(&ac, ptr + XO(avx_state)); } if (opt & XSTATE_BNDREGS_MASK) { - do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); + do_xsave_bndregs(&ac, ptr + XO(bndreg_state)); } if (opt & XSTATE_BNDCSR_MASK) { - do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); + do_xsave_bndcsr(&ac, ptr + XO(bndcsr_state)); } if (opt & XSTATE_PKRU_MASK) { - do_xsave_pkru(env, ptr + XO(pkru_state), ra); + do_xsave_pkru(&ac, ptr + XO(pkru_state)); } /* Update the XSTATE_BV field. */ - old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); + old_bv = access_ldq(&ac, ptr + XO(header.xstate_bv)); new_bv = (old_bv & ~rfbm) | (inuse & rfbm); - cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); + access_stq(&ac, ptr + XO(header.xstate_bv), new_bv); } void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) From 58955a96d9ce59ada80af88e4ba7c8ecfb79c87f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 09:14:47 -1000 Subject: [PATCH 11/28] target/i386: Convert do_xrstor_* to X86Access The body of do_xrstor is now fully converted. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 51 ++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 356397a4ab..7796688514 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2800,8 +2800,9 @@ static void do_clear_sse(CPUX86State *env) } } -static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; if (env->hflags & HF_CS64_MASK) { @@ -2811,8 +2812,8 @@ static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) } for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { - env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); - env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); + env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); + env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); } } @@ -2832,29 +2833,32 @@ static void do_clear_ymmh(CPUX86State *env) } } -static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); int i; for (i = 0; i < 4; i++, addr += 16) { - env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); - env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); + env->bnd_regs[i].lb = access_ldq(ac, addr); + env->bnd_regs[i].ub = access_ldq(ac, addr + 8); } } -static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + /* FIXME: Extend highest implemented bit of linear address. */ env->bndcs_regs.cfgu - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); + = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); env->bndcs_regs.sts - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); + = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); } -static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) { - env->pkru = cpu_ldq_data_ra(env, ptr, ra); + ac->env->pkru = access_ldq(ac, ptr); } static void do_fxrstor(X86Access *ac, target_ulong ptr) @@ -2892,6 +2896,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr { uint64_t xstate_bv, xcomp_bv, reserve0; X86Access ac; + unsigned size, size_ext; rfbm &= env->xcr0; @@ -2905,7 +2910,10 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr raise_exception_ra(env, EXCP0D_GPF, ra); } - xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); + size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); + + xstate_bv = access_ldq(&ac, ptr + XO(header.xstate_bv)); if ((int64_t)xstate_bv < 0) { /* FIXME: Compact form. */ @@ -2924,14 +2932,17 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr describes only XCOMP_BV, but the description of the standard form of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which includes the next 64-bit field. */ - xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); - reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); + xcomp_bv = access_ldq(&ac, ptr + XO(header.xcomp_bv)); + reserve0 = access_ldq(&ac, ptr + XO(header.reserve0)); if (xcomp_bv || reserve0) { raise_exception_ra(env, EXCP0D_GPF, ra); } - access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), - MMU_DATA_LOAD, ra); + size_ext = xsave_area_size(rfbm & xstate_bv, false); + if (size < size_ext) { + /* TODO: See if existing page probe has covered extra size. */ + access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); + } if (rfbm & XSTATE_FP_MASK) { if (xstate_bv & XSTATE_FP_MASK) { @@ -2953,14 +2964,14 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr } if (rfbm & XSTATE_YMM_MASK) { if (xstate_bv & XSTATE_YMM_MASK) { - do_xrstor_ymmh(env, ptr + XO(avx_state), ra); + do_xrstor_ymmh(&ac, ptr + XO(avx_state)); } else { do_clear_ymmh(env); } } if (rfbm & XSTATE_BNDREGS_MASK) { if (xstate_bv & XSTATE_BNDREGS_MASK) { - do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); + do_xrstor_bndregs(&ac, ptr + XO(bndreg_state)); env->hflags |= HF_MPX_IU_MASK; } else { memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); @@ -2969,7 +2980,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr } if (rfbm & XSTATE_BNDCSR_MASK) { if (xstate_bv & XSTATE_BNDCSR_MASK) { - do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); + do_xrstor_bndcsr(&ac, ptr + XO(bndcsr_state)); } else { memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); } @@ -2978,7 +2989,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_PKRU_MASK) { uint64_t old_pkru = env->pkru; if (xstate_bv & XSTATE_PKRU_MASK) { - do_xrstor_pkru(env, ptr + XO(pkru_state), ra); + do_xrstor_pkru(&ac, ptr + XO(pkru_state)); } else { env->pkru = 0; } From a8f68831c6dfd1903555e4402addd5138f78db97 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 09:45:13 -1000 Subject: [PATCH 12/28] target/i386: Split out do_xsave_chk This path is not required by user-only, and can in fact be shared between xsave and xrstor. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 51 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 7796688514..6a319dadf2 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2675,16 +2675,6 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, X86Access ac; unsigned size; - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - /* Never save anything not enabled by XCR0. */ rfbm &= env->xcr0; opt &= rfbm; @@ -2721,15 +2711,35 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, access_stq(&ac, ptr + XO(header.xstate_bv), new_bv); } +static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } +} + void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); + uintptr_t ra = GETPC(); + + do_xsave_chk(env, ptr, ra); + do_xsave(env, ptr, rfbm, get_xinuse(env), -1, ra); } void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - uint64_t inuse = get_xinuse(env); - do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); + uintptr_t ra = GETPC(); + uint64_t inuse; + + do_xsave_chk(env, ptr, ra); + inuse = get_xinuse(env); + do_xsave(env, ptr, rfbm, inuse, inuse, ra); } static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) @@ -2900,16 +2910,6 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr rfbm &= env->xcr0; - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); @@ -3004,7 +3004,10 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xrstor(env, ptr, rfbm, GETPC()); + uintptr_t ra = GETPC(); + + do_xsave_chk(env, ptr, ra); + do_xrstor(env, ptr, rfbm, ra); } #if defined(CONFIG_USER_ONLY) From a2d64d61c1fa1826344fef02e5cc7e331d307e0f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 09:48:26 -1000 Subject: [PATCH 13/28] target/i386: Add rbfm argument to cpu_x86_{xsave,xrstor} For now, continue to pass all 1's from signal.c. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 4 ++-- target/i386/cpu.h | 4 ++-- target/i386/tcg/fpu_helper.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 990048f42a..824375d42a 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -268,7 +268,7 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs /* Zero the header, XSAVE *adds* features to an existing save state. */ memset(fxsave->xfeatures, 0, 64); - cpu_x86_xsave(env, fxsave_addr); + cpu_x86_xsave(env, fxsave_addr, -1); __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1); __put_user(extended_size, &fxsave->sw_reserved.extended_size); __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures); @@ -569,7 +569,7 @@ static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs return 1; } if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) { - cpu_x86_xrstor(env, fxsave_addr); + cpu_x86_xrstor(env, fxsave_addr, -1); return 0; } } diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c500a69a69..91170a088a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2262,8 +2262,8 @@ void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); -void cpu_x86_xsave(CPUX86State *s, target_ulong ptr); -void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr); +void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm); +void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr, uint64_t rbfm); /* cpu.c */ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 6a319dadf2..a09d6aaf07 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -3047,14 +3047,14 @@ void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) do_fxrstor(&ac, ptr); } -void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) +void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); + do_xsave(env, ptr, rfbm, get_xinuse(env), -1, 0); } -void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) +void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xrstor(env, ptr, -1, 0); + do_xrstor(env, ptr, rfbm, 0); } #endif From 6dba8b471cb2b40140b672a9ae9b965a7a132409 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 09:55:21 -1000 Subject: [PATCH 14/28] target/i386: Add {hw,sw}_reserved to X86LegacyXSaveArea This completes the 512 byte structure, allowing the union to be removed. Assert that the structure layout is as expected. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/cpu.h | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 91170a088a..fdd318963a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1425,23 +1425,34 @@ typedef struct { */ #define UNASSIGNED_APIC_ID 0xFFFFFFFF -typedef union X86LegacyXSaveArea { - struct { - uint16_t fcw; - uint16_t fsw; - uint8_t ftw; - uint8_t reserved; - uint16_t fpop; - uint64_t fpip; - uint64_t fpdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - FPReg fpregs[8]; - uint8_t xmm_regs[16][16]; +typedef struct X86LegacyXSaveArea { + uint16_t fcw; + uint16_t fsw; + uint8_t ftw; + uint8_t reserved; + uint16_t fpop; + union { + struct { + uint64_t fpip; + uint64_t fpdp; + }; + struct { + uint32_t fip; + uint32_t fcs; + uint32_t foo; + uint32_t fos; + }; }; - uint8_t data[512]; + uint32_t mxcsr; + uint32_t mxcsr_mask; + FPReg fpregs[8]; + uint8_t xmm_regs[16][16]; + uint32_t hw_reserved[12]; + uint32_t sw_reserved[12]; } X86LegacyXSaveArea; +QEMU_BUILD_BUG_ON(sizeof(X86LegacyXSaveArea) != 512); + typedef struct X86XSaveHeader { uint64_t xstate_bv; uint64_t xcomp_bv; From 077c43eb0d30a257ee33f1b48ea5b29eafcf4eb5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 10:22:32 -1000 Subject: [PATCH 15/28] linux-user/i386: Drop xfeatures_size from sigcontext arithmetic This is subtracting sizeof(target_fpstate_fxsave) in TARGET_FXSAVE_SIZE, then adding it again via &fxsave->xfeatures. Perform the same computation using xstate_size alone. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 824375d42a..89048ed069 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -253,7 +253,6 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs __put_user(0, &fxsave->sw_reserved.magic1); } else { uint32_t xstate_size = xsave_area_size(env->xcr0, false); - uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; /* * extended_size is the offset from fpstate_addr to right after the end @@ -273,7 +272,8 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs __put_user(extended_size, &fxsave->sw_reserved.extended_size); __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures); __put_user(xstate_size, &fxsave->sw_reserved.xstate_size); - __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *) &fxsave->xfeatures[xfeatures_size]); + __put_user(TARGET_FP_XSTATE_MAGIC2, + (uint32_t *)((void *)fxsave + xstate_size)); } } @@ -559,7 +559,6 @@ static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size); uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size); - uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; /* Linux checks MAGIC2 using xstate_size, not extended_size. */ if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 && @@ -568,7 +567,7 @@ static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { return 1; } - if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) { + if (tswapl(*(uint32_t *)((void *)fxsave + xstate_size)) == TARGET_FP_XSTATE_MAGIC2) { cpu_x86_xrstor(env, fxsave_addr, -1); return 0; } From fcc9b64d0767563adb03c933a208e7ba5250c6f0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 10:25:47 -1000 Subject: [PATCH 16/28] linux-user/i386: Remove xfeatures from target_fpstate_fxsave This is easily computed by advancing past the structure. At the same time, replace the magic number "64". Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 89048ed069..f8064691c4 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -67,7 +67,6 @@ struct target_fpstate_fxsave { uint32_t xmm_space[64]; uint32_t hw_reserved[12]; struct target_fpx_sw_bytes sw_reserved; - uint8_t xfeatures[]; }; #define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave) QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512); @@ -266,7 +265,7 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs assert(!(fxsave_addr & 0x3f)); /* Zero the header, XSAVE *adds* features to an existing save state. */ - memset(fxsave->xfeatures, 0, 64); + memset(fxsave + 1, 0, sizeof(X86XSaveHeader)); cpu_x86_xsave(env, fxsave_addr, -1); __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1); __put_user(extended_size, &fxsave->sw_reserved.extended_size); From 3b6e9491e3e8a51e7eb8d25316b75ca4a58b22ff Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 10:33:54 -1000 Subject: [PATCH 17/28] linux-user/i386: Replace target_fpstate_fxsave with X86LegacyXSaveArea Use the structure definition from target/i386/cpu.h. The only minor quirk is re-casting the sw_reserved area to the OS specific struct target_fpx_sw_bytes. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 71 +++++++++++++++------------------------- 1 file changed, 26 insertions(+), 45 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index f8064691c4..5b1c570bff 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -34,16 +34,6 @@ struct target_fpreg { uint16_t exponent; }; -struct target_fpxreg { - uint16_t significand[4]; - uint16_t exponent; - uint16_t padding[3]; -}; - -struct target_xmmreg { - uint32_t element[4]; -}; - struct target_fpx_sw_bytes { uint32_t magic1; uint32_t extended_size; @@ -53,25 +43,6 @@ struct target_fpx_sw_bytes { }; QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4); -struct target_fpstate_fxsave { - /* FXSAVE format */ - uint16_t cw; - uint16_t sw; - uint16_t twd; - uint16_t fop; - uint64_t rip; - uint64_t rdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - uint32_t st_space[32]; - uint32_t xmm_space[64]; - uint32_t hw_reserved[12]; - struct target_fpx_sw_bytes sw_reserved; -}; -#define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave) -QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512); -QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_fxsave, sw_reserved) != 464); - struct target_fpstate_32 { /* Regular FPU environment */ uint32_t cw; @@ -84,7 +55,7 @@ struct target_fpstate_32 { struct target_fpreg st[8]; uint16_t status; uint16_t magic; /* 0xffff = regular FPU data only */ - struct target_fpstate_fxsave fxsave; + X86LegacyXSaveArea fxsave; }; /* @@ -97,7 +68,7 @@ QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15); # define target_fpstate target_fpstate_32 # define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave) #else -# define target_fpstate target_fpstate_fxsave +# define target_fpstate X86LegacyXSaveArea # define TARGET_FPSTATE_FXSAVE_OFFSET 0 #endif @@ -241,15 +212,17 @@ struct rt_sigframe { * Set up a signal frame. */ -static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, +static void xsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, abi_ulong fxsave_addr) { + struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved; + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { /* fxsave_addr must be 16 byte aligned for fxsave */ assert(!(fxsave_addr & 0xf)); cpu_x86_fxsave(env, fxsave_addr); - __put_user(0, &fxsave->sw_reserved.magic1); + __put_user(0, &sw->magic1); } else { uint32_t xstate_size = xsave_area_size(env->xcr0, false); @@ -267,10 +240,10 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs /* Zero the header, XSAVE *adds* features to an existing save state. */ memset(fxsave + 1, 0, sizeof(X86XSaveHeader)); cpu_x86_xsave(env, fxsave_addr, -1); - __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1); - __put_user(extended_size, &fxsave->sw_reserved.extended_size); - __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures); - __put_user(xstate_size, &fxsave->sw_reserved.xstate_size); + __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1); + __put_user(extended_size, &sw->extended_size); + __put_user(env->xcr0, &sw->xfeatures); + __put_user(xstate_size, &sw->xstate_size); __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *)((void *)fxsave + xstate_size)); } @@ -384,9 +357,9 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset } if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - return (esp - (fxsave_offset + TARGET_FXSAVE_SIZE)) & -8ul; + return (esp - (fxsave_offset + sizeof(X86LegacyXSaveArea))) & -8ul; } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - return ((esp - TARGET_FXSAVE_SIZE) & -16ul) - fxsave_offset; + return ((esp - sizeof(X86LegacyXSaveArea)) & -16ul) - fxsave_offset; } else { size_t xstate_size = xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE; @@ -552,21 +525,29 @@ give_sigsegv: force_sigsegv(sig); } -static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, +static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, abi_ulong fxsave_addr) { + struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved; + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { - uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size); - uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size); + uint32_t magic1 = tswapl(sw->magic1); + uint32_t extended_size = tswapl(sw->extended_size); + uint32_t xstate_size = tswapl(sw->xstate_size); + uint32_t minimum_size = (TARGET_FPSTATE_FXSAVE_OFFSET + + TARGET_FP_XSTATE_MAGIC2_SIZE + + xstate_size); + uint32_t magic2; /* Linux checks MAGIC2 using xstate_size, not extended_size. */ - if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 && - extended_size >= TARGET_FPSTATE_FXSAVE_OFFSET + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE) { + if (magic1 == TARGET_FP_XSTATE_MAGIC1 + && extended_size >= minimum_size) { if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr, extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { return 1; } - if (tswapl(*(uint32_t *)((void *)fxsave + xstate_size)) == TARGET_FP_XSTATE_MAGIC2) { + magic2 = tswapl(*(uint32_t *)((void *)fxsave + xstate_size)); + if (magic2 == TARGET_FP_XSTATE_MAGIC2) { cpu_x86_xrstor(env, fxsave_addr, -1); return 0; } From 5cc77ebe9b9e07fcd06011dc23162069ef8c5eff Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 10:53:55 -1000 Subject: [PATCH 18/28] linux-user/i386: Split out struct target_fregs_state Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 43 +++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 5b1c570bff..3271ebd333 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -34,6 +34,23 @@ struct target_fpreg { uint16_t exponent; }; +/* Legacy x87 fpu state format for FSAVE/FRESTOR. */ +struct target_fregs_state { + uint32_t cwd; + uint32_t swd; + uint32_t twd; + uint32_t fip; + uint32_t fcs; + uint32_t foo; + uint32_t fos; + struct target_fpreg st[8]; + + /* Software status information [not touched by FSAVE]. */ + uint16_t status; + uint16_t magic; /* 0xffff: FPU data only, 0x0000: FXSR FPU data */ +}; +QEMU_BUILD_BUG_ON(sizeof(struct target_fregs_state) != 32 + 80); + struct target_fpx_sw_bytes { uint32_t magic1; uint32_t extended_size; @@ -44,29 +61,19 @@ struct target_fpx_sw_bytes { QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4); struct target_fpstate_32 { - /* Regular FPU environment */ - uint32_t cw; - uint32_t sw; - uint32_t tag; - uint32_t ipoff; - uint32_t cssel; - uint32_t dataoff; - uint32_t datasel; - struct target_fpreg st[8]; - uint16_t status; - uint16_t magic; /* 0xffff = regular FPU data only */ - X86LegacyXSaveArea fxsave; + struct target_fregs_state fpstate; + X86LegacyXSaveArea fxstate; }; /* * For simplicity, setup_frame aligns struct target_fpstate_32 to * 16 bytes, so ensure that the FXSAVE area is also aligned. */ -QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15); +QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxstate) & 15); #ifndef TARGET_X86_64 # define target_fpstate target_fpstate_32 -# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave) +# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxstate) #else # define target_fpstate X86LegacyXSaveArea # define TARGET_FPSTATE_FXSAVE_OFFSET 0 @@ -279,15 +286,15 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss); cpu_x86_fsave(env, fpstate_addr, 1); - fpstate->status = fpstate->sw; + fpstate->fpstate.status = fpstate->fpstate.swd; if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { magic = 0xffff; } else { - xsave_sigcontext(env, &fpstate->fxsave, + xsave_sigcontext(env, &fpstate->fxstate, fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); magic = 0; } - __put_user(magic, &fpstate->magic); + __put_user(magic, &fpstate->fpstate.magic); #else __put_user(env->regs[R_EDI], &sc->rdi); __put_user(env->regs[R_ESI], &sc->rsi); @@ -623,7 +630,7 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) cpu_x86_frstor(env, fpstate_addr, 1); err = 0; } else { - err = xrstor_sigcontext(env, &fpstate->fxsave, + err = xrstor_sigcontext(env, &fpstate->fxstate, fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); } #else From bae0455ce3ded1df80dee4e844194568063ad093 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 11:51:53 -1000 Subject: [PATCH 19/28] linux-user/i386: Fix -mregparm=3 for signal delivery Since v2.6.19, the kernel has supported -mregparm=3. Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 3271ebd333..6763b4bda8 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -405,8 +405,6 @@ void setup_frame(int sig, struct target_sigaction *ka, if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; - __put_user(sig, &frame->sig); - setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0], frame_addr + offsetof(struct sigframe, fpstate)); @@ -428,6 +426,13 @@ void setup_frame(int sig, struct target_sigaction *ka, env->regs[R_ESP] = frame_addr; env->eip = ka->_sa_handler; + /* Store argument for both -mregparm=3 and standard. */ + env->regs[R_EAX] = sig; + __put_user(sig, &frame->sig); + /* The kernel clears EDX and ECX even though there is only one arg. */ + env->regs[R_EDX] = 0; + env->regs[R_ECX] = 0; + cpu_x86_load_seg(env, R_DS, __USER_DS); cpu_x86_load_seg(env, R_ES, __USER_DS); cpu_x86_load_seg(env, R_SS, __USER_DS); @@ -449,9 +454,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, target_sigset_t *set, CPUX86State *env) { abi_ulong frame_addr; -#ifndef TARGET_X86_64 - abi_ulong addr; -#endif struct rt_sigframe *frame; int i; @@ -461,14 +463,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) goto give_sigsegv; - /* These fields are only in rt_sigframe on 32 bit */ -#ifndef TARGET_X86_64 - __put_user(sig, &frame->sig); - addr = frame_addr + offsetof(struct rt_sigframe, info); - __put_user(addr, &frame->pinfo); - addr = frame_addr + offsetof(struct rt_sigframe, uc); - __put_user(addr, &frame->puc); -#endif if (ka->sa_flags & TARGET_SA_SIGINFO) { frame->info = *info; } @@ -508,9 +502,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, env->eip = ka->_sa_handler; #ifndef TARGET_X86_64 + /* Store arguments for both -mregparm=3 and standard. */ env->regs[R_EAX] = sig; + __put_user(sig, &frame->sig); env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, info); + __put_user(env->regs[R_EDX], &frame->pinfo); env->regs[R_ECX] = frame_addr + offsetof(struct rt_sigframe, uc); + __put_user(env->regs[R_ECX], &frame->puc); #else env->regs[R_EAX] = 0; env->regs[R_EDI] = sig; From c536f9b77ccb771fc480ec8d3c1cefac243eac73 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 13:07:54 -1000 Subject: [PATCH 20/28] linux-user/i386: Return boolean success from restore_sigcontext Invert the sense of the return value and use bool. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 51 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 6763b4bda8..9e6d883ea1 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -564,12 +564,12 @@ static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, return 0; } -static int -restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) +static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) { - int err = 1; abi_ulong fpstate_addr; unsigned int tmpflags; + struct target_fpstate *fpstate; + bool ok; #ifndef TARGET_X86_64 cpu_x86_load_seg(env, R_GS, tswap16(sc->gs)); @@ -617,29 +617,27 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) // regs->orig_eax = -1; /* disable syscall checks */ fpstate_addr = tswapl(sc->fpstate); - if (fpstate_addr != 0) { - struct target_fpstate *fpstate; - if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr, - sizeof(struct target_fpstate))) { - return err; - } -#ifndef TARGET_X86_64 - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - cpu_x86_frstor(env, fpstate_addr, 1); - err = 0; - } else { - err = xrstor_sigcontext(env, &fpstate->fxstate, - fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); - } -#else - err = xrstor_sigcontext(env, fpstate, fpstate_addr); -#endif - unlock_user_struct(fpstate, fpstate_addr, 0); - } else { - err = 0; + if (fpstate_addr == 0) { + return true; } + if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr, + sizeof(struct target_fpstate))) { + return false; + } +#ifndef TARGET_X86_64 + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + cpu_x86_frstor(env, fpstate_addr, 1); + ok = true; + } else { + ok = !xrstor_sigcontext(env, &fpstate->fxstate, + fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); + } +#else + ok = !xrstor_sigcontext(env, fpstate, fpstate_addr); +#endif + unlock_user_struct(fpstate, fpstate_addr, 0); - return err; + return ok; } /* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */ @@ -665,8 +663,9 @@ long do_sigreturn(CPUX86State *env) set_sigmask(&set); /* restore registers */ - if (restore_sigcontext(env, &frame->sc)) + if (!restore_sigcontext(env, &frame->sc)) { goto badframe; + } unlock_user_struct(frame, frame_addr, 0); return -QEMU_ESIGRETURN; @@ -690,7 +689,7 @@ long do_rt_sigreturn(CPUX86State *env) target_to_host_sigset(&set, &frame->uc.tuc_sigmask); set_sigmask(&set); - if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) { + if (!restore_sigcontext(env, &frame->uc.tuc_mcontext)) { goto badframe; } From 9e9b7d4c15b1cbefc608487a38e10c8f708fb187 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 13:15:03 -1000 Subject: [PATCH 21/28] linux-user/i386: Return boolean success from xrstor_sigcontext Invert the sense of the return value and use bool. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 9e6d883ea1..03031ef9e5 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -530,8 +530,8 @@ give_sigsegv: force_sigsegv(sig); } -static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, - abi_ulong fxsave_addr) +static bool xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, + abi_ulong fxsave_addr) { struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved; @@ -549,19 +549,19 @@ static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, && extended_size >= minimum_size) { if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr, extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { - return 1; + return false; } magic2 = tswapl(*(uint32_t *)((void *)fxsave + xstate_size)); if (magic2 == TARGET_FP_XSTATE_MAGIC2) { cpu_x86_xrstor(env, fxsave_addr, -1); - return 0; + return true; } } /* fall through to fxrstor */ } cpu_x86_fxrstor(env, fxsave_addr); - return 0; + return true; } static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) @@ -629,11 +629,11 @@ static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) cpu_x86_frstor(env, fpstate_addr, 1); ok = true; } else { - ok = !xrstor_sigcontext(env, &fpstate->fxstate, - fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); + ok = xrstor_sigcontext(env, &fpstate->fxstate, + fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); } #else - ok = !xrstor_sigcontext(env, fpstate, fpstate_addr); + ok = xrstor_sigcontext(env, fpstate, fpstate_addr); #endif unlock_user_struct(fpstate, fpstate_addr, 0); From a7365e984d27b961f381cf3be46682e4da5ab6f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 14:30:30 -1000 Subject: [PATCH 22/28] linux-user/i386: Fix allocation and alignment of fp state For modern cpus, the kernel uses xsave to store all extra cpu state across the signal handler. For xsave/xrstor to work, the pointer must be 64 byte aligned. Moreover, the regular part of the signal frame must be 16 byte aligned. Attempt to mirror the kernel code as much as possible. Use enum FPStateKind instead of use_xsave() and use_fxsr(). Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1648 Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 558 +++++++++++++++++++------------ tests/tcg/x86_64/Makefile.target | 1 + tests/tcg/x86_64/test-1648.c | 33 ++ 3 files changed, 377 insertions(+), 215 deletions(-) create mode 100644 tests/tcg/x86_64/test-1648.c diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 03031ef9e5..47e6c0ff0d 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -65,20 +65,6 @@ struct target_fpstate_32 { X86LegacyXSaveArea fxstate; }; -/* - * For simplicity, setup_frame aligns struct target_fpstate_32 to - * 16 bytes, so ensure that the FXSAVE area is also aligned. - */ -QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxstate) & 15); - -#ifndef TARGET_X86_64 -# define target_fpstate target_fpstate_32 -# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxstate) -#else -# define target_fpstate X86LegacyXSaveArea -# define TARGET_FPSTATE_FXSAVE_OFFSET 0 -#endif - struct target_sigcontext_32 { uint16_t gs, __gsh; uint16_t fs, __fsh; @@ -161,24 +147,16 @@ struct sigframe { int sig; struct target_sigcontext sc; /* - * The actual fpstate is placed after retcode[] below, to make - * room for the variable-sized xsave data. The older unused fpstate - * has to be kept to avoid changing the offset of extramask[], which + * The actual fpstate is placed after retcode[] below, to make room + * for the variable-sized xsave data. The older unused fpstate has + * to be kept to avoid changing the offset of extramask[], which * is part of the ABI. */ - struct target_fpstate fpstate_unused; + struct target_fpstate_32 fpstate_unused; abi_ulong extramask[TARGET_NSIG_WORDS-1]; char retcode[8]; - - /* - * This field will be 16-byte aligned in memory. Applying QEMU_ALIGNED - * to it ensures that the base of the frame has an appropriate alignment - * too. - */ - struct target_fpstate fpstate QEMU_ALIGNED(8); + /* fp state follows here */ }; -#define TARGET_SIGFRAME_FXSAVE_OFFSET ( \ - offsetof(struct sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) struct rt_sigframe { abi_ulong pretcode; @@ -188,10 +166,8 @@ struct rt_sigframe { struct target_siginfo info; struct target_ucontext uc; char retcode[8]; - struct target_fpstate fpstate QEMU_ALIGNED(8); + /* fp state follows here */ }; -#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ - offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) /* * Verify that vdso-asmoffset.h constants match. @@ -209,66 +185,178 @@ struct rt_sigframe { abi_ulong pretcode; struct target_ucontext uc; struct target_siginfo info; - struct target_fpstate fpstate QEMU_ALIGNED(16); + /* fp state follows here */ }; -#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ - offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) #endif +typedef enum { +#ifndef TARGET_X86_64 + FPSTATE_FSAVE, +#endif + FPSTATE_FXSAVE, + FPSTATE_XSAVE +} FPStateKind; + +static FPStateKind get_fpstate_kind(CPUX86State *env) +{ + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { + return FPSTATE_XSAVE; + } +#ifdef TARGET_X86_64 + return FPSTATE_FXSAVE; +#else + if (env->features[FEAT_1_EDX] & CPUID_FXSR) { + return FPSTATE_FXSAVE; + } + return FPSTATE_FSAVE; +#endif +} + +static unsigned get_fpstate_size(CPUX86State *env, FPStateKind fpkind) +{ + /* + * Kernel: + * fpu__alloc_mathframe + * xstate_sigframe_size(current->thread.fpu.fpstate); + * size = fpstate->user_size + * use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size + * where fpstate->user_size is computed at init in + * fpu__init_system_xstate_size_legacy and + * fpu__init_system_xstate. + * + * Here we have no place to pre-compute, so inline it all. + */ + switch (fpkind) { + case FPSTATE_XSAVE: + return (xsave_area_size(env->xcr0, false) + + TARGET_FP_XSTATE_MAGIC2_SIZE); + case FPSTATE_FXSAVE: + return sizeof(X86LegacyXSaveArea); +#ifndef TARGET_X86_64 + case FPSTATE_FSAVE: + return sizeof(struct target_fregs_state); +#endif + } + g_assert_not_reached(); +} + +static abi_ptr get_sigframe(struct target_sigaction *ka, CPUX86State *env, + unsigned frame_size, FPStateKind fpkind, + abi_ptr *fpstate, abi_ptr *fxstate, abi_ptr *fpend) +{ + abi_ptr sp; + unsigned math_size; + + /* Default to using normal stack */ + sp = get_sp_from_cpustate(env); +#ifdef TARGET_X86_64 + sp -= 128; /* this is the redzone */ +#endif + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa_flags & TARGET_SA_ONSTACK) { + sp = target_sigsp(sp, ka); + } else { +#ifndef TARGET_X86_64 + /* This is the legacy signal stack switching. */ + if ((env->segs[R_SS].selector & 0xffff) != __USER_DS + && !(ka->sa_flags & TARGET_SA_RESTORER) + && ka->sa_restorer) { + sp = ka->sa_restorer; + } +#endif + } + + math_size = get_fpstate_size(env, fpkind); + sp = ROUND_DOWN(sp - math_size, 64); + *fpend = sp + math_size; + *fxstate = sp; +#ifndef TARGET_X86_64 + if (fpkind != FPSTATE_FSAVE) { + sp -= sizeof(struct target_fregs_state); + } +#endif + *fpstate = sp; + + sp -= frame_size; + /* + * Align the stack pointer according to the ABI, i.e. so that on + * function entry ((sp + sizeof(return_addr)) & 15) == 0. + */ + sp += sizeof(target_ulong); + sp = ROUND_DOWN(sp, 16); + sp -= sizeof(target_ulong); + + return sp; +} + /* * Set up a signal frame. */ -static void xsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, - abi_ulong fxsave_addr) +static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr) { - struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved; + struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; - if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - /* fxsave_addr must be 16 byte aligned for fxsave */ - assert(!(fxsave_addr & 0xf)); - - cpu_x86_fxsave(env, fxsave_addr); - __put_user(0, &sw->magic1); - } else { - uint32_t xstate_size = xsave_area_size(env->xcr0, false); - - /* - * extended_size is the offset from fpstate_addr to right after the end - * of the extended save states. On 32-bit that includes the legacy - * FSAVE area. - */ - uint32_t extended_size = TARGET_FPSTATE_FXSAVE_OFFSET - + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE; - - /* fxsave_addr must be 64 byte aligned for xsave */ - assert(!(fxsave_addr & 0x3f)); - - /* Zero the header, XSAVE *adds* features to an existing save state. */ - memset(fxsave + 1, 0, sizeof(X86XSaveHeader)); - cpu_x86_xsave(env, fxsave_addr, -1); - __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1); - __put_user(extended_size, &sw->extended_size); - __put_user(env->xcr0, &sw->xfeatures); - __put_user(xstate_size, &sw->xstate_size); - __put_user(TARGET_FP_XSTATE_MAGIC2, - (uint32_t *)((void *)fxsave + xstate_size)); - } + /* fxstate_addr must be 16 byte aligned for fxsave */ + assert(!(fxstate_addr & 0xf)); + cpu_x86_fxsave(env, fxstate_addr); + __put_user(0, &sw->magic1); } -static void setup_sigcontext(struct target_sigcontext *sc, - struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask, - abi_ulong fpstate_addr) +static void xsave_sigcontext(CPUX86State *env, + X86LegacyXSaveArea *fxstate, + abi_ptr fpstate_addr, + abi_ptr xstate_addr, + abi_ptr fpend_addr) +{ + struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; + /* + * extended_size is the offset from fpstate_addr to right after + * the end of the extended save states. On 32-bit that includes + * the legacy FSAVE area. + */ + uint32_t extended_size = fpend_addr - fpstate_addr; + /* Recover xstate_size by removing magic2. */ + uint32_t xstate_size = (fpend_addr - xstate_addr + - TARGET_FP_XSTATE_MAGIC2_SIZE); + /* magic2 goes just after xstate. */ + uint32_t *magic2 = (void *)fxstate + xstate_size; + + /* xstate_addr must be 64 byte aligned for xsave */ + assert(!(xstate_addr & 0x3f)); + + /* Zero the header, XSAVE *adds* features to an existing save state. */ + memset(fxstate + 1, 0, sizeof(X86XSaveHeader)); + cpu_x86_xsave(env, xstate_addr, -1); + + __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1); + __put_user(extended_size, &sw->extended_size); + __put_user(env->xcr0, &sw->xfeatures); + __put_user(xstate_size, &sw->xstate_size); + __put_user(TARGET_FP_XSTATE_MAGIC2, magic2); +} + +static void setup_sigcontext(CPUX86State *env, + struct target_sigcontext *sc, + abi_ulong mask, FPStateKind fpkind, + struct target_fregs_state *fpstate, + abi_ptr fpstate_addr, + X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr, + abi_ptr fpend_addr) { CPUState *cs = env_cpu(env); + #ifndef TARGET_X86_64 uint16_t magic; /* already locked in setup_frame() */ - __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs); - __put_user(env->segs[R_FS].selector, (unsigned int *)&sc->fs); - __put_user(env->segs[R_ES].selector, (unsigned int *)&sc->es); - __put_user(env->segs[R_DS].selector, (unsigned int *)&sc->ds); + __put_user(env->segs[R_GS].selector, (uint32_t *)&sc->gs); + __put_user(env->segs[R_FS].selector, (uint32_t *)&sc->fs); + __put_user(env->segs[R_ES].selector, (uint32_t *)&sc->es); + __put_user(env->segs[R_DS].selector, (uint32_t *)&sc->ds); __put_user(env->regs[R_EDI], &sc->edi); __put_user(env->regs[R_ESI], &sc->esi); __put_user(env->regs[R_EBP], &sc->ebp); @@ -280,21 +368,15 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user(cs->exception_index, &sc->trapno); __put_user(env->error_code, &sc->err); __put_user(env->eip, &sc->eip); - __put_user(env->segs[R_CS].selector, (unsigned int *)&sc->cs); + __put_user(env->segs[R_CS].selector, (uint32_t *)&sc->cs); __put_user(env->eflags, &sc->eflags); __put_user(env->regs[R_ESP], &sc->esp_at_signal); - __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss); + __put_user(env->segs[R_SS].selector, (uint32_t *)&sc->ss); cpu_x86_fsave(env, fpstate_addr, 1); - fpstate->fpstate.status = fpstate->fpstate.swd; - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - magic = 0xffff; - } else { - xsave_sigcontext(env, &fpstate->fxstate, - fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); - magic = 0; - } - __put_user(magic, &fpstate->fpstate.magic); + fpstate->status = fpstate->swd; + magic = (fpkind == FPSTATE_FSAVE ? 0 : 0xffff); + __put_user(magic, &fpstate->magic); #else __put_user(env->regs[R_EDI], &sc->rdi); __put_user(env->regs[R_ESI], &sc->rsi); @@ -323,57 +405,25 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user((uint16_t)0, &sc->gs); __put_user((uint16_t)0, &sc->fs); __put_user(env->segs[R_SS].selector, &sc->ss); - - xsave_sigcontext(env, fpstate, fpstate_addr); #endif - __put_user(fpstate_addr, &sc->fpstate); + switch (fpkind) { + case FPSTATE_XSAVE: + xsave_sigcontext(env, fxstate, fpstate_addr, fxstate_addr, fpend_addr); + break; + case FPSTATE_FXSAVE: + fxsave_sigcontext(env, fxstate, fxstate_addr); + break; + default: + break; + } + __put_user(fpstate_addr, &sc->fpstate); /* non-iBCS2 extensions.. */ __put_user(mask, &sc->oldmask); __put_user(env->cr[2], &sc->cr2); } -/* - * Determine which stack to use.. - */ - -static inline abi_ulong -get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset) -{ - unsigned long esp; - - /* Default to using normal stack */ - esp = get_sp_from_cpustate(env); -#ifdef TARGET_X86_64 - esp -= 128; /* this is the redzone */ -#endif - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa_flags & TARGET_SA_ONSTACK) { - esp = target_sigsp(esp, ka); - } else { -#ifndef TARGET_X86_64 - /* This is the legacy signal stack switching. */ - if ((env->segs[R_SS].selector & 0xffff) != __USER_DS && - !(ka->sa_flags & TARGET_SA_RESTORER) && - ka->sa_restorer) { - esp = (unsigned long) ka->sa_restorer; - } -#endif - } - - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - return (esp - (fxsave_offset + sizeof(X86LegacyXSaveArea))) & -8ul; - } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { - return ((esp - sizeof(X86LegacyXSaveArea)) & -16ul) - fxsave_offset; - } else { - size_t xstate_size = - xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE; - return ((esp - xstate_size) & -64ul) - fxsave_offset; - } -} - #ifndef TARGET_X86_64 static void install_sigtramp(void *tramp) { @@ -395,20 +445,36 @@ static void install_rt_sigtramp(void *tramp) void setup_frame(int sig, struct target_sigaction *ka, target_sigset_t *set, CPUX86State *env) { - abi_ulong frame_addr; + abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr; struct sigframe *frame; - int i; + struct target_fregs_state *fpstate; + X86LegacyXSaveArea *fxstate; + unsigned total_size; + FPStateKind fpkind; - frame_addr = get_sigframe(ka, env, TARGET_SIGFRAME_FXSAVE_OFFSET); + fpkind = get_fpstate_kind(env); + frame_addr = get_sigframe(ka, env, sizeof(struct sigframe), fpkind, + &fpstate_addr, &fxstate_addr, &fpend_addr); trace_user_setup_frame(env, frame_addr); - if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) - goto give_sigsegv; + total_size = fpend_addr - frame_addr; + frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0); + if (!frame) { + force_sigsegv(sig); + return; + } - setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0], - frame_addr + offsetof(struct sigframe, fpstate)); + fxstate = (void *)frame + (fxstate_addr - frame_addr); +#ifdef TARGET_X86_64 + fpstate = NULL; +#else + fpstate = (void *)frame + (fpstate_addr - frame_addr); +#endif - for (i = 1; i < TARGET_NSIG_WORDS; i++) { + setup_sigcontext(env, &frame->sc, set->sig[0], fpkind, + fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr); + + for (int i = 1; i < TARGET_NSIG_WORDS; i++) { __put_user(set->sig[i], &frame->extramask[i - 1]); } @@ -421,6 +487,7 @@ void setup_frame(int sig, struct target_sigaction *ka, install_sigtramp(frame->retcode); __put_user(default_sigreturn, &frame->pretcode); } + unlock_user(frame, frame_addr, total_size); /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; @@ -438,13 +505,6 @@ void setup_frame(int sig, struct target_sigaction *ka, cpu_x86_load_seg(env, R_SS, __USER_DS); cpu_x86_load_seg(env, R_CS, __USER_CS); env->eflags &= ~TF_MASK; - - unlock_user_struct(frame, frame_addr, 1); - - return; - -give_sigsegv: - force_sigsegv(sig); } #endif @@ -453,37 +513,51 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, target_siginfo_t *info, target_sigset_t *set, CPUX86State *env) { - abi_ulong frame_addr; + abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr; struct rt_sigframe *frame; - int i; + X86LegacyXSaveArea *fxstate; + struct target_fregs_state *fpstate; + unsigned total_size; + FPStateKind fpkind; - frame_addr = get_sigframe(ka, env, TARGET_RT_SIGFRAME_FXSAVE_OFFSET); + fpkind = get_fpstate_kind(env); + frame_addr = get_sigframe(ka, env, sizeof(struct rt_sigframe), fpkind, + &fpstate_addr, &fxstate_addr, &fpend_addr); trace_user_setup_rt_frame(env, frame_addr); - if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) + total_size = fpend_addr - frame_addr; + frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0); + if (!frame) { goto give_sigsegv; + } if (ka->sa_flags & TARGET_SA_SIGINFO) { frame->info = *info; } /* Create the ucontext. */ - if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { - __put_user(1, &frame->uc.tuc_flags); - } else { - __put_user(0, &frame->uc.tuc_flags); - } + __put_user(fpkind == FPSTATE_XSAVE, &frame->uc.tuc_flags); __put_user(0, &frame->uc.tuc_link); target_save_altstack(&frame->uc.tuc_stack, env); - setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env, - set->sig[0], frame_addr + offsetof(struct rt_sigframe, fpstate)); - for (i = 0; i < TARGET_NSIG_WORDS; i++) { + fxstate = (void *)frame + (fxstate_addr - frame_addr); +#ifdef TARGET_X86_64 + fpstate = NULL; +#else + fpstate = (void *)frame + (fpstate_addr - frame_addr); +#endif + + setup_sigcontext(env, &frame->uc.tuc_mcontext, set->sig[0], fpkind, + fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr); + + for (int i = 0; i < TARGET_NSIG_WORDS; i++) { __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ + /* + * Set up to return from userspace. If provided, use a stub + * already in userspace. + */ if (ka->sa_flags & TARGET_SA_RESTORER) { __put_user(ka->sa_restorer, &frame->pretcode); } else { @@ -515,60 +589,113 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, env->regs[R_ESI] = frame_addr + offsetof(struct rt_sigframe, info); env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, uc); #endif + unlock_user(frame, frame_addr, total_size); cpu_x86_load_seg(env, R_DS, __USER_DS); cpu_x86_load_seg(env, R_ES, __USER_DS); cpu_x86_load_seg(env, R_CS, __USER_CS); cpu_x86_load_seg(env, R_SS, __USER_DS); env->eflags &= ~TF_MASK; - - unlock_user_struct(frame, frame_addr, 1); - return; give_sigsegv: force_sigsegv(sig); } -static bool xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave, - abi_ulong fxsave_addr) +/* + * Restore a signal frame. + */ + +static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, + X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr) { - struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved; + struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; + uint32_t magic1, magic2; + uint32_t extended_size, xstate_size, min_size, max_size; - if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { - uint32_t magic1 = tswapl(sw->magic1); - uint32_t extended_size = tswapl(sw->extended_size); - uint32_t xstate_size = tswapl(sw->xstate_size); - uint32_t minimum_size = (TARGET_FPSTATE_FXSAVE_OFFSET - + TARGET_FP_XSTATE_MAGIC2_SIZE - + xstate_size); - uint32_t magic2; + switch (fpkind) { + case FPSTATE_XSAVE: + magic1 = tswap32(sw->magic1); + extended_size = tswap32(sw->extended_size); + xstate_size = tswap32(sw->xstate_size); + min_size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); + max_size = xsave_area_size(env->xcr0, false); - /* Linux checks MAGIC2 using xstate_size, not extended_size. */ - if (magic1 == TARGET_FP_XSTATE_MAGIC1 - && extended_size >= minimum_size) { - if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr, - extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { - return false; - } - magic2 = tswapl(*(uint32_t *)((void *)fxsave + xstate_size)); - if (magic2 == TARGET_FP_XSTATE_MAGIC2) { - cpu_x86_xrstor(env, fxsave_addr, -1); - return true; - } + /* Check for the first magic field and other error scenarios. */ + if (magic1 != TARGET_FP_XSTATE_MAGIC1 || + xstate_size < min_size || + xstate_size > max_size || + xstate_size > extended_size) { + break; } - /* fall through to fxrstor */ + if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr, + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) { + return false; + } + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (get_user_u32(magic2, fxstate_addr + xstate_size)) { + return false; + } + if (magic2 != TARGET_FP_XSTATE_MAGIC2) { + break; + } + cpu_x86_xrstor(env, fxstate_addr, -1); + return true; + + default: + break; } - cpu_x86_fxrstor(env, fxsave_addr); + cpu_x86_fxrstor(env, fxstate_addr); return true; } +#ifndef TARGET_X86_64 +static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind, + struct target_fregs_state *fpstate, + abi_ptr fpstate_addr, + X86LegacyXSaveArea *fxstate, + abi_ptr fxstate_addr) +{ + switch (fpkind) { + case FPSTATE_XSAVE: + if (!xrstor_sigcontext(env, fpkind, fxstate, fxstate_addr)) { + return false; + } + break; + case FPSTATE_FXSAVE: + cpu_x86_fxrstor(env, fxstate_addr); + break; + case FPSTATE_FSAVE: + break; + default: + g_assert_not_reached(); + } + + /* + * Copy the legacy state because the FP portion of the FX frame has + * to be ignored for histerical raisins. The kernel folds the two + * states together and then performs a single load; here we perform + * the merge within ENV by loading XSTATE/FXSTATE first, then + * overriding with the FSTATE afterward. + */ + cpu_x86_frstor(env, fpstate_addr, 1); + return true; +} +#endif + static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) { - abi_ulong fpstate_addr; - unsigned int tmpflags; - struct target_fpstate *fpstate; + abi_ptr fpstate_addr; + unsigned tmpflags, math_size; + FPStateKind fpkind; + void *fpstate; bool ok; #ifndef TARGET_X86_64 @@ -614,29 +741,33 @@ static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) tmpflags = tswapl(sc->eflags); env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5); - // regs->orig_eax = -1; /* disable syscall checks */ fpstate_addr = tswapl(sc->fpstate); if (fpstate_addr == 0) { return true; } - if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr, - sizeof(struct target_fpstate))) { + + fpkind = get_fpstate_kind(env); + math_size = get_fpstate_size(env, fpkind); +#ifndef TARGET_X86_64 + if (fpkind != FPSTATE_FSAVE) { + math_size += sizeof(struct target_fregs_state); + } +#endif + fpstate = lock_user(VERIFY_READ, fpstate_addr, math_size, 1); + if (!fpstate) { return false; } -#ifndef TARGET_X86_64 - if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { - cpu_x86_frstor(env, fpstate_addr, 1); - ok = true; - } else { - ok = xrstor_sigcontext(env, &fpstate->fxstate, - fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); - } -#else - ok = xrstor_sigcontext(env, fpstate, fpstate_addr); -#endif - unlock_user_struct(fpstate, fpstate_addr, 0); +#ifdef TARGET_X86_64 + ok = xrstor_sigcontext(env, fpkind, fpstate, fpstate_addr); +#else + ok = frstor_sigcontext(env, fpkind, fpstate, fpstate_addr, + fpstate + sizeof(struct target_fregs_state), + fpstate_addr + sizeof(struct target_fregs_state)); +#endif + + unlock_user(fpstate, fpstate_addr, 0); return ok; } @@ -648,30 +779,27 @@ long do_sigreturn(CPUX86State *env) abi_ulong frame_addr = env->regs[R_ESP] - 8; target_sigset_t target_set; sigset_t set; - int i; trace_user_do_sigreturn(env, frame_addr); - if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) - goto badframe; - /* set blocked signals */ - __get_user(target_set.sig[0], &frame->sc.oldmask); - for(i = 1; i < TARGET_NSIG_WORDS; i++) { - __get_user(target_set.sig[i], &frame->extramask[i - 1]); + if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) { + force_sig(TARGET_SIGSEGV); + return -QEMU_ESIGRETURN; } + /* Set blocked signals. */ + __get_user(target_set.sig[0], &frame->sc.oldmask); + for (int i = 1; i < TARGET_NSIG_WORDS; i++) { + __get_user(target_set.sig[i], &frame->extramask[i - 1]); + } target_to_host_sigset_internal(&set, &target_set); set_sigmask(&set); - /* restore registers */ + /* Restore registers */ if (!restore_sigcontext(env, &frame->sc)) { - goto badframe; + force_sig(TARGET_SIGSEGV); } - unlock_user_struct(frame, frame_addr, 0); - return -QEMU_ESIGRETURN; -badframe: unlock_user_struct(frame, frame_addr, 0); - force_sig(TARGET_SIGSEGV); return -QEMU_ESIGRETURN; } #endif diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target index e64aab1b81..5fedf22117 100644 --- a/tests/tcg/x86_64/Makefile.target +++ b/tests/tcg/x86_64/Makefile.target @@ -13,6 +13,7 @@ X86_64_TESTS += vsyscall X86_64_TESTS += noexec X86_64_TESTS += cmpxchg X86_64_TESTS += adox +X86_64_TESTS += test-1648 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 else TESTS=$(MULTIARCH_TESTS) diff --git a/tests/tcg/x86_64/test-1648.c b/tests/tcg/x86_64/test-1648.c new file mode 100644 index 0000000000..fd0644a8ce --- /dev/null +++ b/tests/tcg/x86_64/test-1648.c @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* See https://gitlab.com/qemu-project/qemu/-/issues/1648 */ + +#include + +__attribute__((noinline)) +void bar(void) +{ + /* Success! Continue through sigreturn. */ +} + +/* + * Because of the change of ABI between foo and bar, the compiler is + * required to save XMM6-XMM15. The compiler will use MOVAPS or MOVDQA, + * which will trap if the stack frame is not 16 byte aligned. + */ +__attribute__((noinline, ms_abi)) +void foo(void) +{ + bar(); +} + +void sighandler(int num) +{ + foo(); +} + +int main(void) +{ + signal(SIGUSR1, sighandler); + raise(SIGUSR1); + return 0; +} From 7973eb943e670ea66a19e04868e01803c7594246 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 14:51:54 -1000 Subject: [PATCH 23/28] linux-user/i386: Honor xfeatures in xrstor_sigcontext Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 47e6c0ff0d..e716ec8989 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -613,6 +613,7 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; uint32_t magic1, magic2; uint32_t extended_size, xstate_size, min_size, max_size; + uint64_t xfeatures; switch (fpkind) { case FPSTATE_XSAVE: @@ -629,10 +630,25 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, xstate_size > extended_size) { break; } + + /* + * Restore the features indicated in the frame, masked by + * those currently enabled. Re-check the frame size. + * ??? It is not clear where the kernel does this, but it + * is not in check_xstate_in_sigframe, and so (probably) + * does not fall back to fxrstor. + */ + xfeatures = tswap64(sw->xfeatures) & env->xcr0; + min_size = xsave_area_size(xfeatures, false); + if (xstate_size < min_size) { + return false; + } + if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr, xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) { return false; } + /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy @@ -645,7 +661,8 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, if (magic2 != TARGET_FP_XSTATE_MAGIC2) { break; } - cpu_x86_xrstor(env, fxstate_addr, -1); + + cpu_x86_xrstor(env, fxstate_addr, xfeatures); return true; default: From c6e6d1508ac309e39fec827c3719c86c10cd8975 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 15:05:28 -1000 Subject: [PATCH 24/28] target/i386: Convert do_xsave to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 2 +- target/i386/tcg/fpu_helper.c | 72 +++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 31 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index e716ec8989..ab760db5ea 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -329,7 +329,7 @@ static void xsave_sigcontext(CPUX86State *env, /* Zero the header, XSAVE *adds* features to an existing save state. */ memset(fxstate + 1, 0, sizeof(X86XSaveHeader)); - cpu_x86_xsave(env, xstate_addr, -1); + cpu_x86_xsave(env, xstate_addr, env->xcr0); __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1); __put_user(extended_size, &sw->extended_size); diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index a09d6aaf07..f5748b72b8 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2668,47 +2668,38 @@ static uint64_t get_xinuse(CPUX86State *env) return inuse; } -static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, - uint64_t inuse, uint64_t opt, uintptr_t ra) +static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt) { uint64_t old_bv, new_bv; - X86Access ac; - unsigned size; - - /* Never save anything not enabled by XCR0. */ - rfbm &= env->xcr0; - opt &= rfbm; - - size = xsave_area_size(opt, false); - access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); if (opt & XSTATE_FP_MASK) { - do_xsave_fpu(&ac, ptr); + do_xsave_fpu(ac, ptr); } if (rfbm & XSTATE_SSE_MASK) { /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ - do_xsave_mxcsr(&ac, ptr); + do_xsave_mxcsr(ac, ptr); } if (opt & XSTATE_SSE_MASK) { - do_xsave_sse(&ac, ptr); + do_xsave_sse(ac, ptr); } if (opt & XSTATE_YMM_MASK) { - do_xsave_ymmh(&ac, ptr + XO(avx_state)); + do_xsave_ymmh(ac, ptr + XO(avx_state)); } if (opt & XSTATE_BNDREGS_MASK) { - do_xsave_bndregs(&ac, ptr + XO(bndreg_state)); + do_xsave_bndregs(ac, ptr + XO(bndreg_state)); } if (opt & XSTATE_BNDCSR_MASK) { - do_xsave_bndcsr(&ac, ptr + XO(bndcsr_state)); + do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); } if (opt & XSTATE_PKRU_MASK) { - do_xsave_pkru(&ac, ptr + XO(pkru_state)); + do_xsave_pkru(ac, ptr + XO(pkru_state)); } /* Update the XSTATE_BV field. */ - old_bv = access_ldq(&ac, ptr + XO(header.xstate_bv)); + old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); new_bv = (old_bv & ~rfbm) | (inuse & rfbm); - access_stq(&ac, ptr + XO(header.xstate_bv), new_bv); + access_stq(ac, ptr + XO(header.xstate_bv), new_bv); } static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) @@ -2724,22 +2715,32 @@ static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) } } -void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt, uintptr_t ra) { - uintptr_t ra = GETPC(); + X86Access ac; + unsigned size; do_xsave_chk(env, ptr, ra); - do_xsave(env, ptr, rfbm, get_xinuse(env), -1, ra); + + /* Never save anything not enabled by XCR0. */ + rfbm &= env->xcr0; + opt &= rfbm; + size = xsave_area_size(opt, false); + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); + do_xsave_access(&ac, ptr, rfbm, inuse, opt); +} + +void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); } void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - uintptr_t ra = GETPC(); - uint64_t inuse; - - do_xsave_chk(env, ptr, ra); - inuse = get_xinuse(env); - do_xsave(env, ptr, rfbm, inuse, inuse, ra); + uint64_t inuse = get_xinuse(env); + do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); } static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) @@ -3049,7 +3050,18 @@ void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xsave(env, ptr, rfbm, get_xinuse(env), -1, 0); + X86Access ac; + unsigned size; + + /* + * Since this is only called from user-level signal handling, + * we should have done the job correctly there. + */ + assert((rfbm & ~env->xcr0) == 0); + size = xsave_area_size(rfbm, false); + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0); + do_xsave_access(&ac, ptr, rfbm, get_xinuse(env), rfbm); } void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) From d5dc3a927ae7e64bc998d9aa29020426b4e97f8a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 16:07:05 -1000 Subject: [PATCH 25/28] target/i386: Convert do_xrstor to X86Access Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target/i386/tcg/fpu_helper.c | 106 +++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 42 deletions(-) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index f5748b72b8..1ac61c5d7d 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -2903,51 +2903,38 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr) do_fxrstor(&ac, ptr); } -static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) +static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, + target_ulong ptr) { uint64_t xstate_bv, xcomp_bv, reserve0; - X86Access ac; - unsigned size, size_ext; - rfbm &= env->xcr0; + xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); + xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); + reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); + *pxsbv = xstate_bv; - size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); - access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); - - xstate_bv = access_ldq(&ac, ptr + XO(header.xstate_bv)); - - if ((int64_t)xstate_bv < 0) { - /* FIXME: Compact form. */ - raise_exception_ra(env, EXCP0D_GPF, ra); + /* + * XCOMP_BV bit 63 indicates compact form, which we do not support, + * and thus must raise #GP. That leaves us in standard form. + * In standard form, bytes 23:8 must be zero -- which is both + * XCOMP_BV and the following 64-bit field. + */ + if (xcomp_bv || reserve0) { + return false; } - /* Standard form. */ - /* The XSTATE_BV field must not set bits not present in XCR0. */ - if (xstate_bv & ~env->xcr0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } + return (xstate_bv & ~ac->env->xcr0) == 0; +} - /* The XCOMP_BV field must be zero. Note that, as of the April 2016 - revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) - describes only XCOMP_BV, but the description of the standard form - of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which - includes the next 64-bit field. */ - xcomp_bv = access_ldq(&ac, ptr + XO(header.xcomp_bv)); - reserve0 = access_ldq(&ac, ptr + XO(header.reserve0)); - if (xcomp_bv || reserve0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - size_ext = xsave_area_size(rfbm & xstate_bv, false); - if (size < size_ext) { - /* TODO: See if existing page probe has covered extra size. */ - access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); - } +static void do_xrstor(X86Access *ac, target_ulong ptr, + uint64_t rfbm, uint64_t xstate_bv) +{ + CPUX86State *env = ac->env; if (rfbm & XSTATE_FP_MASK) { if (xstate_bv & XSTATE_FP_MASK) { - do_xrstor_fpu(&ac, ptr); + do_xrstor_fpu(ac, ptr); } else { do_fninit(env); memset(env->fpregs, 0, sizeof(env->fpregs)); @@ -2956,23 +2943,23 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_SSE_MASK) { /* Note that the standard form of XRSTOR loads MXCSR from memory whether or not the XSTATE_BV bit is set. */ - do_xrstor_mxcsr(&ac, ptr); + do_xrstor_mxcsr(ac, ptr); if (xstate_bv & XSTATE_SSE_MASK) { - do_xrstor_sse(&ac, ptr); + do_xrstor_sse(ac, ptr); } else { do_clear_sse(env); } } if (rfbm & XSTATE_YMM_MASK) { if (xstate_bv & XSTATE_YMM_MASK) { - do_xrstor_ymmh(&ac, ptr + XO(avx_state)); + do_xrstor_ymmh(ac, ptr + XO(avx_state)); } else { do_clear_ymmh(env); } } if (rfbm & XSTATE_BNDREGS_MASK) { if (xstate_bv & XSTATE_BNDREGS_MASK) { - do_xrstor_bndregs(&ac, ptr + XO(bndreg_state)); + do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); env->hflags |= HF_MPX_IU_MASK; } else { memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); @@ -2981,7 +2968,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr } if (rfbm & XSTATE_BNDCSR_MASK) { if (xstate_bv & XSTATE_BNDCSR_MASK) { - do_xrstor_bndcsr(&ac, ptr + XO(bndcsr_state)); + do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); } else { memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); } @@ -2990,7 +2977,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_PKRU_MASK) { uint64_t old_pkru = env->pkru; if (xstate_bv & XSTATE_PKRU_MASK) { - do_xrstor_pkru(&ac, ptr + XO(pkru_state)); + do_xrstor_pkru(ac, ptr + XO(pkru_state)); } else { env->pkru = 0; } @@ -3006,9 +2993,27 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { uintptr_t ra = GETPC(); + X86Access ac; + uint64_t xstate_bv; + unsigned size, size_ext; do_xsave_chk(env, ptr, ra); - do_xrstor(env, ptr, rfbm, ra); + + /* Begin with just the minimum size to validate the header. */ + size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); + if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + rfbm &= env->xcr0; + size_ext = xsave_area_size(rfbm & xstate_bv, false); + if (size < size_ext) { + /* TODO: See if existing page probe has covered extra size. */ + access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); + } + + do_xrstor(&ac, ptr, rfbm, xstate_bv); } #if defined(CONFIG_USER_ONLY) @@ -3066,7 +3071,24 @@ void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xrstor(env, ptr, rfbm, 0); + X86Access ac; + uint64_t xstate_bv; + unsigned size; + + /* + * Since this is only called from user-level signal handling, + * we should have done the job correctly there. + */ + assert((rfbm & ~env->xcr0) == 0); + size = xsave_area_size(rfbm, false); + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0); + + if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { + /* TODO: Report failure to caller. */ + xstate_bv &= env->xcr0; + } + + do_xrstor(&ac, ptr, rfbm, xstate_bv); } #endif From 76d8d0f85caf629b4df314e656d20ad6565bab9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 17:31:05 -1000 Subject: [PATCH 26/28] target/i386: Pass host pointer and size to cpu_x86_{fsave,frstor} We have already validated the memory region in the course of validating the signal frame. No need to do it again within the helper function. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 4 ++-- target/i386/cpu.h | 10 ++++++---- target/i386/tcg/fpu_helper.c | 26 ++++++++++++++++---------- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index ab760db5ea..dfbb811b56 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -373,7 +373,7 @@ static void setup_sigcontext(CPUX86State *env, __put_user(env->regs[R_ESP], &sc->esp_at_signal); __put_user(env->segs[R_SS].selector, (uint32_t *)&sc->ss); - cpu_x86_fsave(env, fpstate_addr, 1); + cpu_x86_fsave(env, fpstate, sizeof(*fpstate)); fpstate->status = fpstate->swd; magic = (fpkind == FPSTATE_FSAVE ? 0 : 0xffff); __put_user(magic, &fpstate->magic); @@ -702,7 +702,7 @@ static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind, * the merge within ENV by loading XSTATE/FXSTATE first, then * overriding with the FSTATE afterward. */ - cpu_x86_frstor(env, fpstate_addr, 1); + cpu_x86_frstor(env, fpstate, sizeof(*fpstate)); return true; } #endif diff --git a/target/i386/cpu.h b/target/i386/cpu.h index fdd318963a..f6020e0b6b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2266,11 +2266,13 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, /* used for debug or cpu save/restore */ /* cpu-exec.c */ -/* the following helpers are only usable in user mode simulation as - they can trigger unexpected exceptions */ +/* + * The following helpers are only usable in user mode simulation. + * The host pointers should come from lock_user(). + */ void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector); -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); +void cpu_x86_fsave(CPUX86State *s, void *host, size_t len); +void cpu_x86_frstor(CPUX86State *s, void *host, size_t len); void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm); diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 1ac61c5d7d..05db16a152 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -3017,22 +3017,28 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) } #if defined(CONFIG_USER_ONLY) -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) +void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) { - int size = (14 << data32) + 80; - X86Access ac; + X86Access ac = { + .haddr1 = host, + .size = 4 * 7 + 8 * 10, + .env = env, + }; - access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0); - do_fsave(&ac, ptr, data32); + assert(ac.size <= len); + do_fsave(&ac, 0, true); } -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) +void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) { - int size = (14 << data32) + 80; - X86Access ac; + X86Access ac = { + .haddr1 = host, + .size = 4 * 7 + 8 * 10, + .env = env, + }; - access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0); - do_frstor(&ac, ptr, data32); + assert(ac.size <= len); + do_frstor(&ac, 0, true); } void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) From 9c2fb9e1d589fbda266d8db611b9d3a38ab96a3c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 17:57:11 -1000 Subject: [PATCH 27/28] target/i386: Pass host pointer and size to cpu_x86_{fxsave,fxrstor} We have already validated the memory region in the course of validating the signal frame. No need to do it again within the helper function. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 13 +++++-------- target/i386/cpu.h | 4 ++-- target/i386/tcg/fpu_helper.c | 26 ++++++++++++++++---------- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index dfbb811b56..2e2972002b 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -294,14 +294,11 @@ static abi_ptr get_sigframe(struct target_sigaction *ka, CPUX86State *env, * Set up a signal frame. */ -static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate, - abi_ptr fxstate_addr) +static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate) { struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved; - /* fxstate_addr must be 16 byte aligned for fxsave */ - assert(!(fxstate_addr & 0xf)); - cpu_x86_fxsave(env, fxstate_addr); + cpu_x86_fxsave(env, fxstate, sizeof(*fxstate)); __put_user(0, &sw->magic1); } @@ -412,7 +409,7 @@ static void setup_sigcontext(CPUX86State *env, xsave_sigcontext(env, fxstate, fpstate_addr, fxstate_addr, fpend_addr); break; case FPSTATE_FXSAVE: - fxsave_sigcontext(env, fxstate, fxstate_addr); + fxsave_sigcontext(env, fxstate); break; default: break; @@ -669,7 +666,7 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, break; } - cpu_x86_fxrstor(env, fxstate_addr); + cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate)); return true; } @@ -687,7 +684,7 @@ static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind, } break; case FPSTATE_FXSAVE: - cpu_x86_fxrstor(env, fxstate_addr); + cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate)); break; case FPSTATE_FSAVE: break; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index f6020e0b6b..257cd5a617 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2273,8 +2273,8 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector); void cpu_x86_fsave(CPUX86State *s, void *host, size_t len); void cpu_x86_frstor(CPUX86State *s, void *host, size_t len); -void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); -void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); +void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len); +void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len); void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm); void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr, uint64_t rbfm); diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 05db16a152..0e5368951f 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -3041,22 +3041,28 @@ void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) do_frstor(&ac, 0, true); } -void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) { - X86Access ac; + X86Access ac = { + .haddr1 = host, + .size = sizeof(X86LegacyXSaveArea), + .env = env, + }; - access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), - MMU_DATA_STORE, 0); - do_fxsave(&ac, ptr); + assert(ac.size <= len); + do_fxsave(&ac, 0); } -void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) +void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) { - X86Access ac; + X86Access ac = { + .haddr1 = host, + .size = sizeof(X86LegacyXSaveArea), + .env = env, + }; - access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), - MMU_DATA_LOAD, 0); - do_fxrstor(&ac, ptr); + assert(ac.size <= len); + do_fxrstor(&ac, 0); } void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) From 701890bdd09b289fd9cb852e714e91373088b0f3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Apr 2024 18:04:19 -1000 Subject: [PATCH 28/28] target/i386: Pass host pointer and size to cpu_x86_{xsave,xrstor} We have already validated the memory region in the course of validating the signal frame. No need to do it again within the helper function. In addition, return failure when the header contains invalid xstate_bv. The kernel handles this via exception handling within XSTATE_OP within xrstor_from_user_sigframe. Reviewed-by: Paolo Bonzini Signed-off-by: Richard Henderson --- linux-user/i386/signal.c | 20 ++++++++++++-------- target/i386/cpu.h | 4 ++-- target/i386/tcg/fpu_helper.c | 36 +++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 2e2972002b..cb90711834 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -326,7 +326,7 @@ static void xsave_sigcontext(CPUX86State *env, /* Zero the header, XSAVE *adds* features to an existing save state. */ memset(fxstate + 1, 0, sizeof(X86XSaveHeader)); - cpu_x86_xsave(env, xstate_addr, env->xcr0); + cpu_x86_xsave(env, fxstate, fpend_addr - xstate_addr, env->xcr0); __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1); __put_user(extended_size, &sw->extended_size); @@ -611,6 +611,8 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, uint32_t magic1, magic2; uint32_t extended_size, xstate_size, min_size, max_size; uint64_t xfeatures; + void *xstate; + bool ok; switch (fpkind) { case FPSTATE_XSAVE: @@ -641,8 +643,10 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, return false; } - if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr, - xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) { + /* Re-lock the entire xstate area, with the extensions and magic. */ + xstate = lock_user(VERIFY_READ, fxstate_addr, + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE, 1); + if (!xstate) { return false; } @@ -652,15 +656,15 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind, * fpstate layout with out copying the extended state information * in the memory layout. */ - if (get_user_u32(magic2, fxstate_addr + xstate_size)) { - return false; - } + magic2 = tswap32(*(uint32_t *)(xstate + xstate_size)); if (magic2 != TARGET_FP_XSTATE_MAGIC2) { + unlock_user(xstate, fxstate_addr, 0); break; } - cpu_x86_xrstor(env, fxstate_addr, xfeatures); - return true; + ok = cpu_x86_xrstor(env, xstate, xstate_size, xfeatures); + unlock_user(xstate, fxstate_addr, 0); + return ok; default: break; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 257cd5a617..c64ef0c1a2 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2275,8 +2275,8 @@ void cpu_x86_fsave(CPUX86State *s, void *host, size_t len); void cpu_x86_frstor(CPUX86State *s, void *host, size_t len); void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len); void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len); -void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm); -void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr, uint64_t rbfm); +void cpu_x86_xsave(CPUX86State *s, void *host, size_t len, uint64_t rbfm); +bool cpu_x86_xrstor(CPUX86State *s, void *host, size_t len, uint64_t rbfm); /* cpu.c */ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 0e5368951f..c17eaaa22b 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -3065,42 +3065,44 @@ void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) do_fxrstor(&ac, 0); } -void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) { - X86Access ac; - unsigned size; + X86Access ac = { + .haddr1 = host, + .env = env, + }; /* * Since this is only called from user-level signal handling, * we should have done the job correctly there. */ assert((rfbm & ~env->xcr0) == 0); - size = xsave_area_size(rfbm, false); - - access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0); - do_xsave_access(&ac, ptr, rfbm, get_xinuse(env), rfbm); + ac.size = xsave_area_size(rfbm, false); + assert(ac.size <= len); + do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); } -void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) { - X86Access ac; + X86Access ac = { + .haddr1 = host, + .env = env, + }; uint64_t xstate_bv; - unsigned size; /* * Since this is only called from user-level signal handling, * we should have done the job correctly there. */ assert((rfbm & ~env->xcr0) == 0); - size = xsave_area_size(rfbm, false); - access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0); + ac.size = xsave_area_size(rfbm, false); + assert(ac.size <= len); - if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { - /* TODO: Report failure to caller. */ - xstate_bv &= env->xcr0; + if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { + return false; } - - do_xrstor(&ac, ptr, rfbm, xstate_bv); + do_xrstor(&ac, 0, rfbm, xstate_bv); + return true; } #endif