diff --git a/3rdparty/cpuinfo/include/cpuinfo.h b/3rdparty/cpuinfo/include/cpuinfo.h index 6eb4b8c38e..5f93819e8b 100644 --- a/3rdparty/cpuinfo/include/cpuinfo.h +++ b/3rdparty/cpuinfo/include/cpuinfo.h @@ -522,6 +522,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_falkor = 0x00400103, /** Qualcomm Saphira. */ cpuinfo_uarch_saphira = 0x00400104, + /** Qualcomm Oryon. */ + cpuinfo_uarch_oryon = 0x00400105, /** Nvidia Denver. */ cpuinfo_uarch_denver = 0x00500100, @@ -821,6 +823,7 @@ struct cpuinfo_x86_isa { bool avx512_4vnniw; bool avx512_4fmaps; bool avx10_1; + bool avx10_2; bool amx_bf16; bool amx_tile; bool amx_int8; @@ -1444,6 +1447,14 @@ static inline bool cpuinfo_has_x86_avx10_1(void) { #endif } +static inline bool cpuinfo_has_x86_avx10_2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx10_2; +#else + return false; +#endif +} + static inline bool cpuinfo_has_x86_hle(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 return cpuinfo_isa.hle; diff --git a/3rdparty/cpuinfo/src/arm/api.h b/3rdparty/cpuinfo/src/arm/api.h index ac735e3e78..32a271c4ce 100644 --- a/3rdparty/cpuinfo/src/arm/api.h +++ b/3rdparty/cpuinfo/src/arm/api.h @@ -1,5 +1,11 @@ #pragma once +#ifdef _MSC_VER +#define RESTRICT_STATIC /* nothing for MSVC */ +#else +#define RESTRICT_STATIC restrict static +#endif + #include #include @@ -82,11 +88,11 @@ struct cpuinfo_arm_chipset { #ifndef __cplusplus CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string( - const struct cpuinfo_arm_chipset chipset[restrict static 1], - char name[restrict static CPUINFO_ARM_CHIPSET_NAME_MAX]); + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]); CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset( - struct cpuinfo_arm_chipset chipset[restrict static 1], + struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], uint32_t cores, uint32_t max_cpu_freq_max); @@ -95,23 +101,23 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch( #if CPUINFO_ARCH_ARM bool has_vfpv4, #endif - enum cpuinfo_vendor vendor[restrict static 1], - enum cpuinfo_uarch uarch[restrict static 1]); + enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], + enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]); CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, uint32_t cluster_cores, uint32_t midr, - const struct cpuinfo_arm_chipset chipset[restrict static 1], + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], uint32_t cluster_id, uint32_t arch_version, - struct cpuinfo_cache l1i[restrict static 1], - struct cpuinfo_cache l1d[restrict static 1], - struct cpuinfo_cache l2[restrict static 1], - struct cpuinfo_cache l3[restrict static 1]); + struct cpuinfo_cache l1i[RESTRICT_STATIC 1], + struct cpuinfo_cache l1d[RESTRICT_STATIC 1], + struct cpuinfo_cache l2[RESTRICT_STATIC 1], + struct cpuinfo_cache l3[RESTRICT_STATIC 1]); CPUINFO_INTERNAL uint32_t -cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[restrict static 1]); +cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[RESTRICT_STATIC 1]); #else /* defined(__cplusplus) */ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, diff --git a/3rdparty/cpuinfo/src/arm/mach/init.c b/3rdparty/cpuinfo/src/arm/mach/init.c index 47b1b18bc4..c4e6521b3c 100644 --- a/3rdparty/cpuinfo/src/arm/mach/init.c +++ b/3rdparty/cpuinfo/src/arm/mach/init.c @@ -101,7 +101,6 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, return cpuinfo_uarch_unknown; } -/* Small bodge until cpuinfo merges PR #246 */ static int read_package_name_from_brand_string(char* package_name) { size_t size; if (sysctlbyname("machdep.cpu.brand_string", NULL, &size, NULL, 0) != 0) { diff --git a/3rdparty/cpuinfo/src/arm/uarch.c b/3rdparty/cpuinfo/src/arm/uarch.c index 68531e4d1d..9679f5004e 100644 --- a/3rdparty/cpuinfo/src/arm/uarch.c +++ b/3rdparty/cpuinfo/src/arm/uarch.c @@ -9,8 +9,8 @@ void cpuinfo_arm_decode_vendor_uarch( #if CPUINFO_ARCH_ARM bool has_vfpv4, #endif /* CPUINFO_ARCH_ARM */ - enum cpuinfo_vendor vendor[restrict static 1], - enum cpuinfo_uarch uarch[restrict static 1]) { + enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], + enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]) { switch (midr_get_implementer(midr)) { case 'A': *vendor = cpuinfo_vendor_arm; @@ -332,6 +332,9 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_cortex_a55; break; #if CPUINFO_ARCH_ARM64 + case 0x001: + *uarch = cpuinfo_uarch_oryon; + break; case 0xC00: *uarch = cpuinfo_uarch_falkor; break; diff --git a/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c b/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c index a644b1d019..32c9b54683 100644 --- a/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c +++ b/3rdparty/cpuinfo/src/arm/windows/init-by-logical-sys-info.c @@ -750,11 +750,14 @@ void store_core_info_per_processor( if (cores) { processors[processor_global_index].core = cores + core_id; cores[core_id].core_id = core_id; - get_core_uarch_for_efficiency( - chip_info->chip_name, - core_info->Processor.EfficiencyClass, - &(cores[core_id].uarch), - &(cores[core_id].frequency)); + + if (chip_info->uarchs == NULL) { + cpuinfo_log_error("uarch is NULL for core %d", core_id); + return; + } + + cores[core_id].uarch = chip_info->uarchs[0].uarch; + cores[core_id].frequency = chip_info->uarchs[0].frequency; /* We don't have cluster information, so we handle it as * fixed 1 to (cluster / cores). diff --git a/3rdparty/cpuinfo/src/arm/windows/init.c b/3rdparty/cpuinfo/src/arm/windows/init.c index faa30ef567..ffbe554d16 100644 --- a/3rdparty/cpuinfo/src/arm/windows/init.c +++ b/3rdparty/cpuinfo/src/arm/windows/init.c @@ -7,6 +7,9 @@ #include #include +#include +#include + #include "windows-arm-init.h" struct cpuinfo_arm_isa cpuinfo_isa; @@ -14,62 +17,7 @@ struct cpuinfo_arm_isa cpuinfo_isa; static void set_cpuinfo_isa_fields(void); static struct woa_chip_info* get_system_info_from_registry(void); -static struct woa_chip_info woa_chip_unknown = { - L"Unknown", - woa_chip_name_unknown, - {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; - -/* Please add new SoC/chip info here! */ -static struct woa_chip_info woa_chips[woa_chip_name_last] = { - /* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */ - [woa_chip_name_microsoft_sq_1] = - {L"Microsoft SQ1", - woa_chip_name_microsoft_sq_1, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a55, - 1800000000, - }, - { - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a76, - 3000000000, - }}}, - /* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */ - [woa_chip_name_microsoft_sq_2] = - {L"Microsoft SQ2", - woa_chip_name_microsoft_sq_2, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a55, - 2420000000, - }, - {cpuinfo_vendor_arm, cpuinfo_uarch_cortex_a76, 3150000000}}}, - /* Snapdragon (TM) 8cx Gen 3 @ 3.0 GHz */ - [woa_chip_name_microsoft_sq_3] = - {L"Snapdragon (TM) 8cx Gen 3", - woa_chip_name_microsoft_sq_3, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a78, - 2420000000, - }, - {cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}}, - /* Microsoft Windows Dev Kit 2023 */ - [woa_chip_name_microsoft_sq_3_devkit] = - {L"Snapdragon Compute Platform", - woa_chip_name_microsoft_sq_3_devkit, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a78, - 2420000000, - }, - {cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}}, - /* Ampere Altra */ - [woa_chip_name_ampere_altra] = { - L"Ampere(R) Altra(R) Processor", - woa_chip_name_ampere_altra, - {{cpuinfo_vendor_arm, cpuinfo_uarch_neoverse_n1, 3000000000}}}}; +static struct woa_chip_info woa_chip_unknown = {L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { struct woa_chip_info* chip_info = NULL; @@ -87,23 +35,6 @@ BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PV return true; } -bool get_core_uarch_for_efficiency( - enum woa_chip_name chip, - BYTE EfficiencyClass, - enum cpuinfo_uarch* uarch, - uint64_t* frequency) { - /* For currently supported WoA chips, the Efficiency class selects - * the pre-defined little and big core. - * Any further supported SoC's logic should be implemented here. - */ - if (uarch && frequency && chip < woa_chip_name_last && EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) { - *uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch; - *frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency; - return true; - } - return false; -} - /* Static helper functions */ static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) { @@ -149,40 +80,112 @@ static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) { return text_buffer; } +static uint64_t read_registry_qword(LPCWSTR subkey, LPCWSTR value) { + DWORD key_type = 0; + DWORD data_size = sizeof(uint64_t); + const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */ + uint64_t qword_value = 0; + LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, &qword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) { + cpuinfo_log_error("Registry QWORD read error"); + return 0; + } + return qword_value; +} + +static uint64_t read_registry_dword(LPCWSTR subkey, LPCWSTR value) { + DWORD key_type = 0; + DWORD data_size = sizeof(DWORD); + DWORD dword_value = 0; + LSTATUS result = + RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, RRF_RT_REG_DWORD, &key_type, &dword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) { + cpuinfo_log_error("Registry DWORD read error"); + return 0; + } + return (uint64_t)dword_value; +} + +static wchar_t* wcsndup(const wchar_t* src, size_t n) { + size_t len = wcsnlen(src, n); + wchar_t* dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (len + 1) * sizeof(wchar_t)); + if (dup) { + wcsncpy_s(dup, len + 1, src, len); + dup[len] = L'\0'; + } + return dup; +} + +static struct core_info_by_chip_name get_core_info_from_midr(uint32_t midr, uint64_t frequency) { + struct core_info_by_chip_name info; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + +#if CPUINFO_ARCH_ARM + bool has_vfpv4 = false; + cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch); +#else + cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch); +#endif + + info.vendor = vendor; + info.uarch = uarch; + info.frequency = frequency; + return info; +} + static struct woa_chip_info* get_system_info_from_registry(void) { wchar_t* text_buffer = NULL; LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; LPCWSTR chip_name_value = L"ProcessorNameString"; + LPCWSTR chip_midr_value = L"CP 4000"; + LPCWSTR chip_mhz_value = L"~MHz"; struct woa_chip_info* chip_info = NULL; - HANDLE heap = GetProcessHeap(); - /* Read processor model name from registry and find in the hard-coded * list. */ text_buffer = read_registry(cpu0_subkey, chip_name_value); if (text_buffer == NULL) { - cpuinfo_log_error("Registry read error"); + cpuinfo_log_error("Registry read error for processor name"); return NULL; } - for (uint32_t i = 0; i < (uint32_t)woa_chip_name_last; i++) { - size_t compare_length = wcsnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX); - int compare_result = wcsncmp(text_buffer, woa_chips[i].chip_name_string, compare_length); - if (compare_result == 0) { - chip_info = woa_chips + i; - break; - } + + /* + * https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register + * Regedit for MIDR : + *HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000 + */ + uint64_t midr_qword = (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value); + if (midr_qword == 0) { + cpuinfo_log_error("Registry read error for MIDR value"); + return NULL; } + // MIDR is only 32 bits, so we need to cast it to uint32_t + uint32_t midr_value = (uint32_t)midr_qword; + + /* Read the frequency from the registry + * The value is in MHz, so we need to convert it to Hz */ + uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value); + if (frequency_mhz == 0) { + cpuinfo_log_error("Registry read error for frequency value"); + return NULL; + } + // Convert MHz to Hz + uint64_t frequency_hz = frequency_mhz * 1000000; + + // Allocate chip_info before using it. + chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct woa_chip_info)); if (chip_info == NULL) { - /* No match was found, so print a warning and assign the unknown - * case. */ - cpuinfo_log_error( - "Unknown chip model name '%ls'.\nPlease add new Windows on Arm SoC/chip support to arm/windows/init.c!", - text_buffer); - } else { - cpuinfo_log_debug("detected chip model name: %s", chip_info->chip_name_string); + cpuinfo_log_error("Heap allocation error for chip_info"); + return NULL; } - HeapFree(heap, 0, text_buffer); + // set chip_info fields + chip_info->chip_name_string = wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1); + chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz); + + cpuinfo_log_debug("detected chip model name: %ls", chip_info->chip_name_string); + return chip_info; } @@ -216,4 +219,4 @@ static void set_cpuinfo_isa_fields(void) { cpuinfo_isa.pmull = crypto; cpuinfo_isa.crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; -} +} \ No newline at end of file diff --git a/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h b/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h index b054a29822..0448243d08 100644 --- a/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h +++ b/3rdparty/cpuinfo/src/arm/windows/windows-arm-init.h @@ -3,17 +3,6 @@ /* Efficiency class = 0 means little core, while 1 means big core for now. */ #define MAX_WOA_VALID_EFFICIENCY_CLASSES 2 -/* List of known and supported Windows on Arm SoCs/chips. */ -enum woa_chip_name { - woa_chip_name_microsoft_sq_1 = 0, - woa_chip_name_microsoft_sq_2 = 1, - woa_chip_name_microsoft_sq_3 = 2, - woa_chip_name_microsoft_sq_3_devkit = 3, - woa_chip_name_ampere_altra = 4, - woa_chip_name_unknown = 5, - woa_chip_name_last = woa_chip_name_unknown -}; - /* Topology information hard-coded by SoC/chip name */ struct core_info_by_chip_name { enum cpuinfo_vendor vendor; @@ -26,14 +15,7 @@ struct core_info_by_chip_name { */ struct woa_chip_info { wchar_t* chip_name_string; - enum woa_chip_name chip_name; struct core_info_by_chip_name uarchs[MAX_WOA_VALID_EFFICIENCY_CLASSES]; }; -bool get_core_uarch_for_efficiency( - enum woa_chip_name chip, - BYTE EfficiencyClass, - enum cpuinfo_uarch* uarch, - uint64_t* frequency); - bool cpu_info_init_by_logical_sys_info(const struct woa_chip_info* chip_info, enum cpuinfo_vendor vendor); diff --git a/3rdparty/cpuinfo/src/x86/isa.c b/3rdparty/cpuinfo/src/x86/isa.c index 47a6afa320..222bd231dd 100644 --- a/3rdparty/cpuinfo/src/x86/isa.c +++ b/3rdparty/cpuinfo/src/x86/isa.c @@ -46,6 +46,8 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( (max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs){0, 0, 0, 0}; const struct cpuid_regs structured_feature_info1 = (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs){0, 0, 0, 0}; + const struct cpuid_regs structured_feature_info2 = + (max_base_index >= 7) ? cpuidex(0x24, 0) : (struct cpuid_regs){0, 0, 0, 0}; const uint32_t processor_capacity_info_index = UINT32_C(0x80000008); const struct cpuid_regs processor_capacity_info = (max_extended_index >= processor_capacity_info_index) @@ -430,10 +432,17 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000)); /* - * AVX 10.1 instructions: + * AVX 10.1 instructions: avx 10 isa supported. + * - Intel: edx[bit 19] in structured feature info (ecx = 1). */ isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); + /* + * AVX 10.2 instructions: avx 10 version information. + * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0). + */ + isa.avx10_2 = ((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; + /* * AVX512PF instructions: * - Intel: ebx[bit 26] in structured feature info (ecx = 0). diff --git a/3rdparty/cpuinfo/src/x86/linux/cpuinfo.c b/3rdparty/cpuinfo/src/x86/linux/cpuinfo.c index 7df72aba50..8f038b0702 100644 --- a/3rdparty/cpuinfo/src/x86/linux/cpuinfo.c +++ b/3rdparty/cpuinfo/src/x86/linux/cpuinfo.c @@ -83,8 +83,9 @@ struct proc_cpuinfo_parser_state { static bool parse_line( const char* line_start, const char* line_end, - struct proc_cpuinfo_parser_state state[restrict static 1], + void* context, uint64_t line_number) { + struct proc_cpuinfo_parser_state* restrict state = context; /* Empty line. Skip. */ if (line_start == line_end) { return true; @@ -215,5 +216,5 @@ bool cpuinfo_x86_linux_parse_proc_cpuinfo( .processors = processors, }; return cpuinfo_linux_parse_multiline_file( - "/proc/cpuinfo", BUFFER_SIZE, (cpuinfo_line_callback)parse_line, &state); + "/proc/cpuinfo", BUFFER_SIZE, parse_line, &state); }