Merge pull request #8586 from Techjar/d2s-no-pext
Avoid using PDEP and PEXT on AMD Zen
This commit is contained in:
commit
f36c735856
|
@ -42,6 +42,9 @@ struct CPUInfo
|
||||||
bool bAVX2 = false;
|
bool bAVX2 = false;
|
||||||
bool bBMI1 = false;
|
bool bBMI1 = false;
|
||||||
bool bBMI2 = false;
|
bool bBMI2 = false;
|
||||||
|
// PDEP and PEXT are ridiculously slow on AMD Zen, so we have this flag to avoid using them there
|
||||||
|
// Zen 2 is also affected by this issue
|
||||||
|
bool bFastBMI2 = false;
|
||||||
bool bFMA = false;
|
bool bFMA = false;
|
||||||
bool bFMA4 = false;
|
bool bFMA4 = false;
|
||||||
bool bAES = false;
|
bool bAES = false;
|
||||||
|
@ -54,6 +57,7 @@ struct CPUInfo
|
||||||
bool bLAHFSAHF64 = false;
|
bool bLAHFSAHF64 = false;
|
||||||
bool bLongMode = false;
|
bool bLongMode = false;
|
||||||
bool bAtom = false;
|
bool bAtom = false;
|
||||||
|
bool bZen = false;
|
||||||
|
|
||||||
// ARMv8 specific
|
// ARMv8 specific
|
||||||
bool bFP = false;
|
bool bFP = false;
|
||||||
|
|
|
@ -118,6 +118,9 @@ void CPUInfo::Detect()
|
||||||
(model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
|
(model == 0x1C || model == 0x26 || model == 0x27 || model == 0x35 || model == 0x36 ||
|
||||||
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
|
model == 0x37 || model == 0x4A || model == 0x4D || model == 0x5A || model == 0x5D))
|
||||||
bAtom = true;
|
bAtom = true;
|
||||||
|
// Detect AMD Zen (all models)
|
||||||
|
if (family == 23)
|
||||||
|
bZen = true;
|
||||||
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
|
logical_cpu_count = (cpu_id[1] >> 16) & 0xFF;
|
||||||
ht = (cpu_id[3] >> 28) & 1;
|
ht = (cpu_id[3] >> 28) & 1;
|
||||||
|
|
||||||
|
@ -172,6 +175,7 @@ void CPUInfo::Detect()
|
||||||
}
|
}
|
||||||
|
|
||||||
bFlushToZero = bSSE;
|
bFlushToZero = bSSE;
|
||||||
|
bFastBMI2 = bBMI2 && !bZen;
|
||||||
|
|
||||||
if (max_ex_fn >= 0x80000004)
|
if (max_ex_fn >= 0x80000004)
|
||||||
{
|
{
|
||||||
|
|
|
@ -68,7 +68,7 @@ void CommonAsmRoutines::GenConvertDoubleToSingle()
|
||||||
|
|
||||||
// Don't Denormalize
|
// Don't Denormalize
|
||||||
|
|
||||||
if (cpu_info.bBMI2)
|
if (cpu_info.bFastBMI2)
|
||||||
{
|
{
|
||||||
// Extract bits 0-1 and 5-34
|
// Extract bits 0-1 and 5-34
|
||||||
MOV(64, R(RSCRATCH), Imm64(0xc7ffffffe0000000));
|
MOV(64, R(RSCRATCH), Imm64(0xc7ffffffe0000000));
|
||||||
|
|
|
@ -284,7 +284,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
|
||||||
// RRRRRGGG GGGBBBBB
|
// RRRRRGGG GGGBBBBB
|
||||||
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
|
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
|
||||||
LoadAndSwap(16, scratch1, data);
|
LoadAndSwap(16, scratch1, data);
|
||||||
if (cpu_info.bBMI1 && cpu_info.bBMI2)
|
if (cpu_info.bBMI1 && cpu_info.bFastBMI2)
|
||||||
{
|
{
|
||||||
MOV(32, R(scratch2), Imm32(0x07C3F7C0));
|
MOV(32, R(scratch2), Imm32(0x07C3F7C0));
|
||||||
PDEP(32, scratch3, scratch1, R(scratch2));
|
PDEP(32, scratch3, scratch1, R(scratch2));
|
||||||
|
@ -324,7 +324,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
|
||||||
// RRRRGGGG BBBBAAAA
|
// RRRRGGGG BBBBAAAA
|
||||||
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
|
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
|
||||||
LoadAndSwap(16, scratch1, data);
|
LoadAndSwap(16, scratch1, data);
|
||||||
if (cpu_info.bBMI2)
|
if (cpu_info.bFastBMI2)
|
||||||
{
|
{
|
||||||
MOV(32, R(scratch2), Imm32(0x0F0F0F0F));
|
MOV(32, R(scratch2), Imm32(0x0F0F0F0F));
|
||||||
PDEP(32, scratch1, scratch1, R(scratch2));
|
PDEP(32, scratch1, scratch1, R(scratch2));
|
||||||
|
@ -353,7 +353,7 @@ void VertexLoaderX64::ReadColor(OpArg data, u64 attribute, int format)
|
||||||
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
|
// AAAAAAAA BBBBBBBB GGGGGGGG RRRRRRRR
|
||||||
data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap
|
data.AddMemOffset(-1); // subtract one from address so we can use a 32bit load and bswap
|
||||||
LoadAndSwap(32, scratch1, data);
|
LoadAndSwap(32, scratch1, data);
|
||||||
if (cpu_info.bBMI2)
|
if (cpu_info.bFastBMI2)
|
||||||
{
|
{
|
||||||
MOV(32, R(scratch2), Imm32(0xFCFCFCFC));
|
MOV(32, R(scratch2), Imm32(0xFCFCFCFC));
|
||||||
PDEP(32, scratch1, scratch1, R(scratch2));
|
PDEP(32, scratch1, scratch1, R(scratch2));
|
||||||
|
|
Loading…
Reference in New Issue