From 12f6b8280fa3a89db853bef8373ddc949dbfde6b Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 24 Apr 2024 23:49:13 +0800 Subject: [PATCH] i386/cpu: Fix i/d-cache topology to core level for Intel CPU For i-cache and d-cache, current QEMU hardcodes the maximum IDs for CPUs sharing cache (CPUID.04H.00H:EAX[bits 25:14] and CPUID.04H.01H:EAX[bits 25:14]) to 0, and this means i-cache and d-cache are shared in the SMT level. This is correct if there's single thread per core, but is wrong for the hyper threading case (one core contains multiple threads) since the i-cache and d-cache are shared in the core level other than SMT level. For AMD CPU, commit 8f4202fb1080 ("i386: Populate AMD Processor Cache Information for cpuid 0x8000001D") has already introduced i/d cache topology as core level by default. Therefore, in order to be compatible with both multi-threaded and single-threaded situations, we should set i-cache and d-cache be shared at the core level by default. This fix changes the default i/d cache topology from per-thread to per-core. Potentially, this change in L1 cache topology may affect the performance of the VM if the user does not specifically specify the topology or bind the vCPU. However, the way to achieve optimal performance should be to create a reasonable topology and set the appropriate vCPU affinity without relying on QEMU's default topology structure. Fixes: 7e3482f82480 ("i386: Helpers to encode cache information consistently") Suggested-by: Robert Hoo Signed-off-by: Zhao Liu Reviewed-by: Xiaoyao Li Tested-by: Babu Moger Tested-by: Yongwei Ma Acked-by: Michael S. Tsirkin Message-ID: <20240424154929.1487382-6-zhao1.liu@intel.com> [Add compat property. - Paolo] Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 1 + target/i386/cpu.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4a2d6f5a97..6126bfdd2a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -79,6 +79,7 @@ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, GlobalProperty pc_compat_9_0[] = { + { TYPE_X86_CPU, "x-l1-cache-per-thread", "false" }, { TYPE_X86_CPU, "guest-phys-bits", "0" }, { "sev-guest", "legacy-vm-type", "true" }, { TYPE_X86_CPU, "legacy-multi-node", "on" }, diff --git a/target/i386/cpu.c b/target/i386/cpu.c index de1ad7270c..3c66242f6d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6258,15 +6258,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = *ebx = *ecx = *edx = 0; } else { *eax = 0; + int apic_ids_sharing_l1 = cpu->l1_cache_per_core ? cs->nr_threads : 1; switch (count) { case 0: /* L1 dcache info */ encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - 1, cs->nr_cores, + apic_ids_sharing_l1, cs->nr_cores, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - 1, cs->nr_cores, + apic_ids_sharing_l1, cs->nr_cores, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ @@ -8105,6 +8106,7 @@ static Property x86_cpu_properties[] = { false), DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, true), + DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), DEFINE_PROP_END_OF_LIST() };