From 84e945aad2d0cd950996a73705b4467e30ddbfa2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 13 Feb 2024 10:56:56 +0100 Subject: [PATCH 01/10] vl, pc: turn -no-fd-bootchk into a machine property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a fd-bootchk property to PC machine types, so that -no-fd-bootchk returns an error if the machine does not support booting from floppies and checking for boot signatures therein. Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 39 ++++++++++++++++++++++++++++++++++----- include/hw/i386/pc.h | 2 +- qemu-options.hx | 2 +- system/globals.c | 1 - system/vl.c | 2 +- 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 880e95de26..f5ff970acf 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -399,8 +399,8 @@ static int boot_device2nibble(char boot_device) return 0; } -static void set_boot_dev(MC146818RtcState *s, const char *boot_device, - Error **errp) +static void set_boot_dev(PCMachineState *pcms, MC146818RtcState *s, + const char *boot_device, Error **errp) { #define PC_MAX_BOOT_DEVICES 3 int nbds, bds[3] = { 0, }; @@ -420,12 +420,14 @@ static void set_boot_dev(MC146818RtcState *s, const char *boot_device, } } mc146818rtc_set_cmos_data(s, 0x3d, (bds[1] << 4) | bds[0]); - mc146818rtc_set_cmos_data(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1)); + mc146818rtc_set_cmos_data(s, 0x38, (bds[2] << 4) | !pcms->fd_bootchk); } static void pc_boot_set(void *opaque, const char *boot_device, Error **errp) { - set_boot_dev(opaque, boot_device, errp); + PCMachineState *pcms = PC_MACHINE(current_machine); + + set_boot_dev(pcms, opaque, boot_device, errp); } static void pc_cmos_init_floppy(MC146818RtcState *rtc_state, ISADevice *floppy) @@ -611,7 +613,15 @@ void pc_cmos_init(PCMachineState *pcms, mc146818rtc_set_cmos_data(s, 0x5c, val >> 8); mc146818rtc_set_cmos_data(s, 0x5d, val >> 16); - set_boot_dev(s, MACHINE(pcms)->boot_config.order, &error_fatal); + object_property_add_link(OBJECT(pcms), "rtc_state", + TYPE_ISA_DEVICE, + (Object **)&x86ms->rtc, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); + object_property_set_link(OBJECT(pcms), "rtc_state", OBJECT(s), + &error_abort); + + set_boot_dev(pcms, s, MACHINE(pcms)->boot_config.order, &error_fatal); val = 0; val |= 0x02; /* FPU is there */ @@ -1535,6 +1545,20 @@ static void pc_machine_set_vmport(Object *obj, Visitor *v, const char *name, visit_type_OnOffAuto(v, name, &pcms->vmport, errp); } +static bool pc_machine_get_fd_bootchk(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->fd_bootchk; +} + +static void pc_machine_set_fd_bootchk(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->fd_bootchk = value; +} + static bool pc_machine_get_smbus(Object *obj, Error **errp) { PCMachineState *pcms = PC_MACHINE(obj); @@ -1723,6 +1747,7 @@ static void pc_machine_initfn(Object *obj) #ifdef CONFIG_HPET pcms->hpet_enabled = true; #endif + pcms->fd_bootchk = true; pcms->default_bus_bypass_iommu = false; pcms->pcspk = isa_new(TYPE_PC_SPEAKER); @@ -1869,6 +1894,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) NULL, NULL); object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP, "SMBIOS Entry Point type [32, 64]"); + + object_class_property_add_bool(oc, "fd-bootchk", + pc_machine_get_fd_bootchk, + pc_machine_set_fd_bootchk); } static const TypeInfo pc_machine_info = { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 4bb1899602..5065590281 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -50,6 +50,7 @@ typedef struct PCMachineState { bool hpet_enabled; bool i8042_enabled; bool default_bus_bypass_iommu; + bool fd_bootchk; uint64_t max_fw_size; /* ACPI Memory hotplug IO base address */ @@ -146,7 +147,6 @@ OBJECT_DECLARE_TYPE(PCMachineState, PCMachineClass, PC_MACHINE) GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled); /* pc.c */ -extern int fd_bootchk; void pc_acpi_smi_interrupt(void *opaque, int irq, int level); diff --git a/qemu-options.hx b/qemu-options.hx index 9be1e5817c..1136642c21 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2650,7 +2650,7 @@ DEF("no-fd-bootchk", 0, QEMU_OPTION_no_fd_bootchk, SRST ``-no-fd-bootchk`` Disable boot signature checking for floppy disks in BIOS. May be - needed to boot from old floppy disks. + needed to boot from old floppy disks. Synonym of ``-m fd-bootchk=off``. ERST DEF("acpitable", HAS_ARG, QEMU_OPTION_acpitable, diff --git a/system/globals.c b/system/globals.c index b6d4e72530..5d0046ba10 100644 --- a/system/globals.c +++ b/system/globals.c @@ -41,7 +41,6 @@ int vga_interface_type = VGA_NONE; bool vga_interface_created; Chardev *parallel_hds[MAX_PARALLEL_PORTS]; int win2k_install_hack; -int fd_bootchk = 1; int graphic_rotate; QEMUOptionRom option_rom[MAX_OPTION_ROMS]; int nb_option_roms; diff --git a/system/vl.c b/system/vl.c index b8469d9965..98bf0c386b 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2927,7 +2927,7 @@ void qemu_init(int argc, char **argv) optarg, FD_OPTS); break; case QEMU_OPTION_no_fd_bootchk: - fd_bootchk = 0; + qdict_put_str(machine_opts_dict, "fd-bootchk", "off"); break; case QEMU_OPTION_netdev: default_net = 0; From 68fb78d7d5723066ec2cacee7d25d67a4143b42f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Dec 2023 09:27:36 +0100 Subject: [PATCH 02/10] target/i386: mask high bits of CR3 in 32-bit mode CR3 bits 63:32 are ignored in 32-bit mode (either legacy 2-level paging or PAE paging). Do this in mmu_translate() to remove the last where get_physical_address() meaningfully drops the high bits of the address. Cc: qemu-stable@nongnu.org Suggested-by: Richard Henderson Fixes: 4a1e9d4d11c ("target/i386: Use atomic operations for pte updates", 2022-10-18) Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/excp_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 5b86f439ad..11126c860d 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -238,7 +238,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 3 */ - pte_addr = ((in->cr3 & ~0x1f) + ((addr >> 27) & 0x18)) & a20_mask; + pte_addr = ((in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18)) & a20_mask; if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -306,7 +306,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 2 */ - pte_addr = ((in->cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) & a20_mask; + pte_addr = ((in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc)) & a20_mask; if (!ptw_translate(&pte_trans, pte_addr)) { return false; } From d09c79010ffd880dc69e7a21e3cfdef90b928fb8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Dec 2023 17:47:38 +0100 Subject: [PATCH 03/10] target/i386: check validity of VMCB addresses MSR_VM_HSAVE_PA bits 0-11 are reserved, as are the bits above the maximum physical address width of the processor. Setting them to 1 causes a #GP (see "15.30.4 VM_HSAVE_PA MSR" in the AMD manual). The same is true of VMCB addresses passed to VMRUN/VMLOAD/VMSAVE, even though the manual is not clear on that. Cc: qemu-stable@nongnu.org Fixes: 4a1e9d4d11c ("target/i386: Use atomic operations for pte updates", 2022-10-18) Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/misc_helper.c | 3 +++ target/i386/tcg/sysemu/svm_helper.c | 27 +++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 7de0a6e866..edb7c3d894 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -212,6 +212,9 @@ void helper_wrmsr(CPUX86State *env) tlb_flush(cs); break; case MSR_VM_HSAVE_PA: + if (val & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + goto error; + } env->vm_hsave = val; break; #ifdef TARGET_X86_64 diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c index 32ff0dbb13..5d6de2294f 100644 --- a/target/i386/tcg/sysemu/svm_helper.c +++ b/target/i386/tcg/sysemu/svm_helper.c @@ -164,14 +164,19 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) uint64_t new_cr3; uint64_t new_cr4; - cpu_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0, GETPC()); - if (aflag == 2) { addr = env->regs[R_EAX]; } else { addr = (uint32_t)env->regs[R_EAX]; } + /* Exceptions are checked before the intercept. */ + if (addr & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + cpu_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0, GETPC()); + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr); env->vm_vmcb = addr; @@ -463,14 +468,19 @@ void helper_vmload(CPUX86State *env, int aflag) int mmu_idx = MMU_PHYS_IDX; target_ulong addr; - cpu_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0, GETPC()); - if (aflag == 2) { addr = env->regs[R_EAX]; } else { addr = (uint32_t)env->regs[R_EAX]; } + /* Exceptions are checked before the intercept. */ + if (addr & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + cpu_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0, GETPC()); + if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMLOAD, GETPC())) { mmu_idx = MMU_NESTED_IDX; } @@ -519,14 +529,19 @@ void helper_vmsave(CPUX86State *env, int aflag) int mmu_idx = MMU_PHYS_IDX; target_ulong addr; - cpu_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0, GETPC()); - if (aflag == 2) { addr = env->regs[R_EAX]; } else { addr = (uint32_t)env->regs[R_EAX]; } + /* Exceptions are checked before the intercept. */ + if (addr & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + cpu_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0, GETPC()); + if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMSAVE, GETPC())) { mmu_idx = MMU_NESTED_IDX; } From 5f97afe2543f09160a8d123ab6e2e8c6d98fa9ce Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Jan 2024 15:36:51 +0100 Subject: [PATCH 04/10] target/i386: introduce function to query MMU indices Remove knowledge of specific MMU indexes (other than MMU_NESTED_IDX and MMU_PHYS_IDX) from mmu_translate(). This will make it possible to split 32-bit and 64-bit MMU indexes. Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 10 ++++++++++ target/i386/tcg/sysemu/excp_helper.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index dfe43b8204..8c271ca62e 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2305,6 +2305,16 @@ uint64_t cpu_get_tsc(CPUX86State *env); #define MMU_NESTED_IDX 3 #define MMU_PHYS_IDX 4 +static inline bool is_mmu_index_smap(int mmu_index) +{ + return mmu_index == MMU_KSMAP_IDX; +} + +static inline bool is_mmu_index_user(int mmu_index) +{ + return mmu_index == MMU_USER_IDX; +} + static inline int cpu_mmu_index_kernel(CPUX86State *env) { return !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP_IDX : diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 11126c860d..a0d5ce3930 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -137,7 +137,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, const int32_t a20_mask = x86_get_a20_mask(env); const target_ulong addr = in->addr; const int pg_mode = in->pg_mode; - const bool is_user = (in->mmu_idx == MMU_USER_IDX); + const bool is_user = is_mmu_index_user(in->mmu_idx); const MMUAccessType access_type = in->access_type; uint64_t ptep, pte, rsvd_mask; PTETranslate pte_trans = { @@ -363,7 +363,7 @@ do_check_protect_pse36: } int prot = 0; - if (in->mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) { + if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) { prot |= PAGE_READ; if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) { prot |= PAGE_WRITE; From 90f641531c782c873a05895f411c05fbbbef3c49 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 Jan 2024 15:40:18 +0100 Subject: [PATCH 05/10] target/i386: use separate MMU indexes for 32-bit accesses Accesses from a 32-bit environment (32-bit code segment for instruction accesses, EFER.LMA==0 for processor accesses) have to mask away the upper 32 bits of the address. While a bit wasteful, the easiest way to do so is to use separate MMU indexes. These days, QEMU anyway is compiled with a fixed value for NB_MMU_MODES. Split MMU_USER_IDX, MMU_KSMAP_IDX and MMU_KNOSMAP_IDX in two. Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 11 +++++---- target/i386/cpu.h | 34 ++++++++++++++++++++-------- target/i386/tcg/sysemu/excp_helper.c | 3 ++- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 7f90823676..647371198c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7732,13 +7732,16 @@ static bool x86_cpu_has_work(CPUState *cs) return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0; } -static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) +static int x86_cpu_mmu_index(CPUState *env, bool ifetch) { CPUX86State *env = cpu_env(cs); + int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 1 : 0; + int mmu_index_base = + (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER64_IDX : + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; - return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX : - (!(env->hflags & HF_SMAP_MASK) || (env->eflags & AC_MASK)) - ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX; + return mmu_index_base + mmu_index_32; } static void x86_disas_set_info(CPUState *cs, disassemble_info *info) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8c271ca62e..ee4ad37202 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2299,27 +2299,41 @@ uint64_t cpu_get_tsc(CPUX86State *env); #define cpu_list x86_cpu_list /* MMU modes definitions */ -#define MMU_KSMAP_IDX 0 -#define MMU_USER_IDX 1 -#define MMU_KNOSMAP_IDX 2 -#define MMU_NESTED_IDX 3 -#define MMU_PHYS_IDX 4 +#define MMU_KSMAP64_IDX 0 +#define MMU_KSMAP32_IDX 1 +#define MMU_USER64_IDX 2 +#define MMU_USER32_IDX 3 +#define MMU_KNOSMAP64_IDX 4 +#define MMU_KNOSMAP32_IDX 5 +#define MMU_PHYS_IDX 6 +#define MMU_NESTED_IDX 7 + +#ifdef CONFIG_USER_ONLY +#ifdef TARGET_X86_64 +#define MMU_USER_IDX MMU_USER64_IDX +#else +#define MMU_USER_IDX MMU_USER32_IDX +#endif +#endif static inline bool is_mmu_index_smap(int mmu_index) { - return mmu_index == MMU_KSMAP_IDX; + return (mmu_index & ~1) == MMU_KSMAP64_IDX; } static inline bool is_mmu_index_user(int mmu_index) { - return mmu_index == MMU_USER_IDX; + return (mmu_index & ~1) == MMU_USER64_IDX; } static inline int cpu_mmu_index_kernel(CPUX86State *env) { - return !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP_IDX : - ((env->hflags & HF_CPL_MASK) < 3 && (env->eflags & AC_MASK)) - ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX; + int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 1 : 0; + int mmu_index_base = + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + ((env->hflags & HF_CPL_MASK) < 3 && (env->eflags & AC_MASK)) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; + + return mmu_index_base + mmu_index_32; } #define CC_DST (env->cc_dst) diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index a0d5ce3930..b2c525e1a9 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -545,7 +545,8 @@ static bool get_physical_address(CPUX86State *env, vaddr addr, if (likely(use_stage2)) { in.cr3 = env->nested_cr3; in.pg_mode = env->nested_pg_mode; - in.mmu_idx = MMU_USER_IDX; + in.mmu_idx = + env->nested_pg_mode & PG_MODE_LMA ? MMU_USER64_IDX : MMU_USER32_IDX; in.ptw_idx = MMU_PHYS_IDX; if (!mmu_translate(env, &in, out, err)) { From b1661801c184119a10ad6cbc3b80330fc22e7b2c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Dec 2023 18:01:52 +0100 Subject: [PATCH 06/10] target/i386: Fix physical address truncation The address translation logic in get_physical_address() will currently truncate physical addresses to 32 bits unless long mode is enabled. This is incorrect when using physical address extensions (PAE) outside of long mode, with the result that a 32-bit operating system using PAE to access memory above 4G will experience undefined behaviour. The truncation code was originally introduced in commit 33dfdb5 ("x86: only allow real mode to access 32bit without LMA"), where it applied only to translations performed while paging is disabled (and so cannot affect guests using PAE). Commit 9828198 ("target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX") rearranged the code such that the truncation also applied to the use of MMU_PHYS_IDX and MMU_NESTED_IDX. Commit 4a1e9d4 ("target/i386: Use atomic operations for pte updates") brought this truncation into scope for page table entry accesses, and is the first commit for which a Windows 10 32-bit guest will reliably fail to boot if memory above 4G is present. The truncation code however is not completely redundant. Even though the maximum address size for any executed instruction is 32 bits, helpers for operations such as BOUND, FSAVE or XSAVE may ask get_physical_address() to translate an address outside of the 32-bit range, if invoked with an argument that is close to the 4G boundary. Likewise for processor accesses, for example TSS or IDT accesses, when EFER.LMA==0. So, move the address truncation in get_physical_address() so that it applies to 32-bit MMU indexes, but not to MMU_PHYS_IDX and MMU_NESTED_IDX. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2040 Fixes: 4a1e9d4d11c ("target/i386: Use atomic operations for pte updates", 2022-10-18) Cc: qemu-stable@nongnu.org Co-developed-by: Michael Brown Signed-off-by: Michael Brown Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 6 ++++++ target/i386/tcg/sysemu/excp_helper.c | 12 +++++------- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 647371198c..ba6d7b80a7 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7732,7 +7732,7 @@ static bool x86_cpu_has_work(CPUState *cs) return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0; } -static int x86_cpu_mmu_index(CPUState *env, bool ifetch) +static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) { CPUX86State *env = cpu_env(cs); int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 1 : 0; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ee4ad37202..952174bb6f 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2326,6 +2326,12 @@ static inline bool is_mmu_index_user(int mmu_index) return (mmu_index & ~1) == MMU_USER64_IDX; } +static inline bool is_mmu_index_32(int mmu_index) +{ + assert(mmu_index < MMU_PHYS_IDX); + return mmu_index & 1; +} + static inline int cpu_mmu_index_kernel(CPUX86State *env) { int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 1 : 0; diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index b2c525e1a9..8bcdd2906d 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -558,6 +558,10 @@ static bool get_physical_address(CPUX86State *env, vaddr addr, break; default: + if (is_mmu_index_32(mmu_idx)) { + addr = (uint32_t)addr; + } + if (likely(env->cr[0] & CR0_PG_MASK)) { in.cr3 = env->cr[3]; in.mmu_idx = mmu_idx; @@ -581,14 +585,8 @@ static bool get_physical_address(CPUX86State *env, vaddr addr, break; } - /* Translation disabled. */ + /* No translation needed. */ out->paddr = addr & x86_get_a20_mask(env); -#ifdef TARGET_X86_64 - if (!(env->hflags & HF_LMA_MASK)) { - /* Without long mode we can only address 32bits in real mode */ - out->paddr = (uint32_t)out->paddr; - } -#endif out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; out->page_size = TARGET_PAGE_SIZE; return true; From a28fe7dc1939333c81b895cdced81c69eb7c5ad0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Dec 2023 09:52:27 +0100 Subject: [PATCH 07/10] target/i386: remove unnecessary/wrong application of the A20 mask If ptw_translate() does a MMU_PHYS_IDX access, the A20 mask is already applied in get_physical_address(), which is called via probe_access_full() and x86_cpu_tlb_fill(). If ptw_translate() on the other hand does a MMU_NESTED_IDX access, the A20 mask must not be applied to the address that is looked up in the nested page tables; it must be applied only to the addresses that hold the NPT entries (which is achieved via MMU_PHYS_IDX, per the previous paragraph). Therefore, we can remove A20 masking from the computation of the page table entry's address, and let get_physical_address() or mmu_translate() apply it when they know they are returning a host-physical address. Cc: qemu-stable@nongnu.org Fixes: 4a1e9d4d11c ("target/i386: Use atomic operations for pte updates", 2022-10-18) Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/excp_helper.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 8bcdd2906d..2ddc08b4bb 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -164,8 +164,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 5 */ - pte_addr = ((in->cr3 & ~0xfff) + - (((addr >> 48) & 0x1ff) << 3)) & a20_mask; + pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -189,8 +188,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 4 */ - pte_addr = ((pte & PG_ADDRESS_MASK) + - (((addr >> 39) & 0x1ff) << 3)) & a20_mask; + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -210,8 +208,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 3 */ - pte_addr = ((pte & PG_ADDRESS_MASK) + - (((addr >> 30) & 0x1ff) << 3)) & a20_mask; + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -238,7 +235,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 3 */ - pte_addr = ((in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18)) & a20_mask; + pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -260,8 +257,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 2 */ - pte_addr = ((pte & PG_ADDRESS_MASK) + - (((addr >> 21) & 0x1ff) << 3)) & a20_mask; + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -287,8 +283,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 1 */ - pte_addr = ((pte & PG_ADDRESS_MASK) + - (((addr >> 12) & 0x1ff) << 3)) & a20_mask; + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -306,7 +301,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 2 */ - pte_addr = ((in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc)) & a20_mask; + pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } @@ -335,7 +330,7 @@ static bool mmu_translate(CPUX86State *env, const TranslateParams *in, /* * Page table level 1 */ - pte_addr = ((pte & ~0xfffu) + ((addr >> 10) & 0xffc)) & a20_mask; + pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc); if (!ptw_translate(&pte_trans, pte_addr)) { return false; } From b5a9de3259f4c791bde2faff086dd5737625e41e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Dec 2023 09:48:35 +0100 Subject: [PATCH 08/10] target/i386: leave the A20 bit set in the final NPT walk The A20 mask is only applied to the final memory access. Nested page tables are always walked with the raw guest-physical address. Unlike the previous patch, in this one the masking must be kept, but it was done too early. Cc: qemu-stable@nongnu.org Fixes: 4a1e9d4d11c ("target/i386: Use atomic operations for pte updates", 2022-10-18) Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/excp_helper.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 2ddc08b4bb..8f7011d966 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -134,7 +134,6 @@ static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set) static bool mmu_translate(CPUX86State *env, const TranslateParams *in, TranslateResult *out, TranslateFault *err) { - const int32_t a20_mask = x86_get_a20_mask(env); const target_ulong addr = in->addr; const int pg_mode = in->pg_mode; const bool is_user = is_mmu_index_user(in->mmu_idx); @@ -417,10 +416,13 @@ do_check_protect_pse36: } } - /* align to page_size */ - paddr = (pte & a20_mask & PG_ADDRESS_MASK & ~(page_size - 1)) - | (addr & (page_size - 1)); + /* merge offset within page */ + paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1)); + /* + * Note that NPT is walked (for both paging structures and final guest + * addresses) using the address with the A20 bit set. + */ if (in->ptw_idx == MMU_NESTED_IDX) { CPUTLBEntryFull *full; int flags, nested_page_size; @@ -459,7 +461,7 @@ do_check_protect_pse36: } } - out->paddr = paddr; + out->paddr = paddr & x86_get_a20_mask(env); out->prot = prot; out->page_size = page_size; return true; From dcaff46101c1f3abd97bbc5ba2f6c904def4e3b3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 20 Feb 2024 17:05:20 +0100 Subject: [PATCH 09/10] ide: collapse parameters to ide_init_drive MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All calls to ide_init_drive comes from ide_dev_initfn. Just pass down the IDEDevice (IDEState is kinda obsolete and should be merged into IDEDevice). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/ide/core.c | 40 ++++++++++++++++++---------------------- hw/ide/ide-dev.c | 5 +---- hw/ide/ide-internal.h | 6 +----- 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 130c4d8865..501df37faa 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2589,24 +2589,20 @@ static const BlockDevOps ide_hd_block_ops = { .resize_cb = ide_resize_cb, }; -int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, - const char *version, const char *serial, const char *model, - uint64_t wwn, - uint32_t cylinders, uint32_t heads, uint32_t secs, - int chs_trans, Error **errp) +int ide_init_drive(IDEState *s, IDEDevice *dev, IDEDriveKind kind, Error **errp) { uint64_t nb_sectors; - s->blk = blk; + s->blk = dev->conf.blk; s->drive_kind = kind; - blk_get_geometry(blk, &nb_sectors); - s->cylinders = cylinders; - s->heads = s->drive_heads = heads; - s->sectors = s->drive_sectors = secs; - s->chs_trans = chs_trans; + blk_get_geometry(s->blk, &nb_sectors); + s->cylinders = dev->conf.cyls; + s->heads = s->drive_heads = dev->conf.heads; + s->sectors = s->drive_sectors = dev->conf.secs; + s->chs_trans = dev->chs_trans; s->nb_sectors = nb_sectors; - s->wwn = wwn; + s->wwn = dev->wwn; /* The SMART values should be preserved across power cycles but they aren't. */ s->smart_enabled = 1; @@ -2614,26 +2610,26 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, s->smart_errors = 0; s->smart_selftest_count = 0; if (kind == IDE_CD) { - blk_set_dev_ops(blk, &ide_cd_block_ops, s); + blk_set_dev_ops(s->blk, &ide_cd_block_ops, s); } else { if (!blk_is_inserted(s->blk)) { error_setg(errp, "Device needs media, but drive is empty"); return -1; } - if (!blk_is_writable(blk)) { + if (!blk_is_writable(s->blk)) { error_setg(errp, "Can't use a read-only drive"); return -1; } - blk_set_dev_ops(blk, &ide_hd_block_ops, s); + blk_set_dev_ops(s->blk, &ide_hd_block_ops, s); } - if (serial) { - pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), serial); + if (dev->serial) { + pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), dev->serial); } else { snprintf(s->drive_serial_str, sizeof(s->drive_serial_str), "QM%05d", s->drive_serial); } - if (model) { - pstrcpy(s->drive_model_str, sizeof(s->drive_model_str), model); + if (dev->model) { + pstrcpy(s->drive_model_str, sizeof(s->drive_model_str), dev->model); } else { switch (kind) { case IDE_CD: @@ -2648,14 +2644,14 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, } } - if (version) { - pstrcpy(s->version, sizeof(s->version), version); + if (dev->version) { + pstrcpy(s->version, sizeof(s->version), dev->version); } else { pstrcpy(s->version, sizeof(s->version), qemu_hw_version()); } ide_reset(s); - blk_iostatus_enable(blk); + blk_iostatus_enable(s->blk); return 0; } diff --git a/hw/ide/ide-dev.c b/hw/ide/ide-dev.c index 799bd4b6ec..722c4e78ca 100644 --- a/hw/ide/ide-dev.c +++ b/hw/ide/ide-dev.c @@ -118,10 +118,7 @@ void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp) return; } - if (ide_init_drive(s, dev->conf.blk, kind, - dev->version, dev->serial, dev->model, dev->wwn, - dev->conf.cyls, dev->conf.heads, dev->conf.secs, - dev->chs_trans, errp) < 0) { + if (ide_init_drive(s, dev, kind, errp) < 0) { return; } diff --git a/hw/ide/ide-internal.h b/hw/ide/ide-internal.h index 20dde37f45..0d64805da2 100644 --- a/hw/ide/ide-internal.h +++ b/hw/ide/ide-internal.h @@ -416,11 +416,7 @@ uint32_t ide_data_readw(void *opaque, uint32_t addr); void ide_data_writel(void *opaque, uint32_t addr, uint32_t val); uint32_t ide_data_readl(void *opaque, uint32_t addr); -int ide_init_drive(IDEState *s, BlockBackend *blk, IDEDriveKind kind, - const char *version, const char *serial, const char *model, - uint64_t wwn, - uint32_t cylinders, uint32_t heads, uint32_t secs, - int chs_trans, Error **errp); +int ide_init_drive(IDEState *s, IDEDevice *dev, IDEDriveKind kind, Error **errp); void ide_exit(IDEState *s); void ide_bus_init_output_irq(IDEBus *bus, qemu_irq irq_out); int ide_init_ioport(IDEBus *bus, ISADevice *isa, int iobase, int iobase2); From d13f40357b307e49a7983bd52d4ec35003b7ddc0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 20 Feb 2024 17:09:30 +0100 Subject: [PATCH 10/10] ide, vl: turn -win2k-hack into a property on IDE devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- hw/ide/core.c | 3 ++- hw/ide/ide-dev.c | 1 + include/hw/ide/ide-dev.h | 2 ++ include/sysemu/sysemu.h | 1 - qemu-options.hx | 3 ++- system/globals.c | 1 - system/vl.c | 2 +- 7 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hw/ide/core.c b/hw/ide/core.c index 501df37faa..e8cb2dac92 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1059,7 +1059,7 @@ static void ide_sector_write_cb(void *opaque, int ret) ide_sector_write); } - if (win2k_install_hack && ((++s->irq_count % 16) == 0)) { + if (s->win2k_install_hack && ((++s->irq_count % 16) == 0)) { /* It seems there is a bug in the Windows 2000 installer HDD IDE driver which fills the disk with empty logs when the IDE write IRQ comes too early. This hack tries to correct @@ -2597,6 +2597,7 @@ int ide_init_drive(IDEState *s, IDEDevice *dev, IDEDriveKind kind, Error **errp) s->drive_kind = kind; blk_get_geometry(s->blk, &nb_sectors); + s->win2k_install_hack = dev->win2k_install_hack; s->cylinders = dev->conf.cyls; s->heads = s->drive_heads = dev->conf.heads; s->sectors = s->drive_sectors = dev->conf.secs; diff --git a/hw/ide/ide-dev.c b/hw/ide/ide-dev.c index 722c4e78ca..03f7967798 100644 --- a/hw/ide/ide-dev.c +++ b/hw/ide/ide-dev.c @@ -31,6 +31,7 @@ static Property ide_props[] = { DEFINE_PROP_UINT32("unit", IDEDevice, unit, -1), + DEFINE_PROP_BOOL("win2k-install-hack", IDEDevice, win2k_install_hack, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/ide/ide-dev.h b/include/hw/ide/ide-dev.h index 708cc0fda3..9a0d71db4e 100644 --- a/include/hw/ide/ide-dev.h +++ b/include/hw/ide/ide-dev.h @@ -65,6 +65,7 @@ struct IDEState { int drive_serial; char drive_serial_str[21]; char drive_model_str[41]; + bool win2k_install_hack; uint64_t wwn; /* ide regs */ uint8_t feature; @@ -163,6 +164,7 @@ struct IDEDevice { * 0xffff - reserved */ uint16_t rotation_rate; + bool win2k_install_hack; }; typedef struct IDEDrive { diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 73a37949c2..eb1dc1e4ed 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -41,7 +41,6 @@ extern int graphic_height; extern int graphic_depth; extern int display_opengl; extern const char *keyboard_layout; -extern int win2k_install_hack; extern int graphic_rotate; extern int old_param; extern uint8_t *boot_splash_filedata; diff --git a/qemu-options.hx b/qemu-options.hx index 1136642c21..9a47385c15 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2641,7 +2641,8 @@ SRST ``-win2k-hack`` Use it when installing Windows 2000 to avoid a disk full bug. After Windows 2000 is installed, you no longer need this option (this - option slows down the IDE transfers). + option slows down the IDE transfers). Synonym of ``-global + ide-device.win2k-install-hack=on``. ERST DEF("no-fd-bootchk", 0, QEMU_OPTION_no_fd_bootchk, diff --git a/system/globals.c b/system/globals.c index 5d0046ba10..e353584201 100644 --- a/system/globals.c +++ b/system/globals.c @@ -40,7 +40,6 @@ int autostart = 1; int vga_interface_type = VGA_NONE; bool vga_interface_created; Chardev *parallel_hds[MAX_PARALLEL_PORTS]; -int win2k_install_hack; int graphic_rotate; QEMUOptionRom option_rom[MAX_OPTION_ROMS]; int nb_option_roms; diff --git a/system/vl.c b/system/vl.c index 98bf0c386b..e480afd7a0 100644 --- a/system/vl.c +++ b/system/vl.c @@ -3265,7 +3265,7 @@ void qemu_init(int argc, char **argv) pid_file = optarg; break; case QEMU_OPTION_win2k_hack: - win2k_install_hack = 1; + object_register_sugar_prop("ide-device", "win2k-install-hack", "true", true); break; case QEMU_OPTION_acpitable: opts = qemu_opts_parse_noisily(qemu_find_opts("acpi"),