Merge pull request #9233 from altimumdelta/CPU_Fix-Comment-ZEN+2-PDEP-PEXT

CPUDetect: Indicate slow PDEP/PEXT only for Zen1/+/2 (Family 23)
This commit is contained in:
Tilka 2020-11-08 18:42:32 +00:00 committed by GitHub
commit b4110aec52
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 6 deletions

View File

@ -42,8 +42,7 @@ struct CPUInfo
bool bAVX2 = false; bool bAVX2 = false;
bool bBMI1 = false; bool bBMI1 = false;
bool bBMI2 = false; bool bBMI2 = false;
// PDEP and PEXT are ridiculously slow on AMD Zen, so we have this flag to avoid using them there // PDEP and PEXT are ridiculously slow on AMD Zen1, Zen1+ and Zen2 (Family 23)
// Zen 2 is also affected by this issue
bool bFastBMI2 = false; bool bFastBMI2 = false;
bool bFMA = false; bool bFMA = false;
bool bFMA4 = false; bool bFMA4 = false;
@ -57,7 +56,7 @@ struct CPUInfo
bool bLAHFSAHF64 = false; bool bLAHFSAHF64 = false;
bool bLongMode = false; bool bLongMode = false;
bool bAtom = false; bool bAtom = false;
bool bZen = false; bool bZen1p2 = false;
// ARMv8 specific // ARMv8 specific
bool bFP = false; bool bFP = false;

View File

@ -118,9 +118,9 @@ void CPUInfo::Detect()
(model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 || (model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D)) model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
bAtom = true; bAtom = true;
// Detect AMD Zen (all models) // Detect AMD Zen1, Zen1+ and Zen2
if (family == 23) if (family == 23)
bZen = true; bZen1p2 = true;
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF; logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
ht = (cpu_id[3] >> 28) & 1; ht = (cpu_id[3] >> 28) & 1;
@ -175,7 +175,7 @@ void CPUInfo::Detect()
} }
bFlushToZero = bSSE; bFlushToZero = bSSE;
bFastBMI2 = bBMI2 && !bZen; bFastBMI2 = bBMI2 && !bZen1p2;
if (max_ex_fn >= 0x80000004) if (max_ex_fn >= 0x80000004)
{ {