diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 0c4d30c5db..600b424d51 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -3131,10 +3131,29 @@ void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) int imm = size * 2 - scale; EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn); } - +void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(false, 0, dest_size >> 4, 0b10100, Rd, Rn); +} +void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(true, 0, dest_size >> 4, 0b10100, Rd, Rn); +} +void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(false, 1, dest_size >> 4, 0b10100, Rd, Rn); +} +void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(true, 1, dest_size >> 4, 0b10100, Rd, Rn); +} void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) { - Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 4, 0x12, Rd, Rn); + Emit2RegMisc(false, 0, dest_size >> 4, 0b10010, Rd, Rn); +} +void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(true, 0, dest_size >> 4, 0b10010, Rd, Rn); } // Move diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 5796b877f6..d4d95ba78d 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -829,7 +829,12 @@ public: void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn); void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); + void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); // Move void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 2e39791e47..c53225ef57 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -99,14 +99,6 @@ void JitArm64AsmRoutineManager::Generate() FlushIcache(); } -static float s_quantize_ranges[] = -{ - 0.0f, 255.0f, // U8 - -128.0, 127.0f, // S8 - 0.0f, 65535.0f, // U16 - -32768.0f, 32767.0f, // S16 -}; - void JitArm64AsmRoutineManager::GenerateCommon() { // X0 is the scale @@ -300,15 +292,9 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1, 0); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[0]); - float_emit.LD2R(32, D1, X2); - float_emit.FMIN(32, D0, D0, D2); - float_emit.FMAX(32, D0, D0, D1); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); + float_emit.UQXTN(16, D0, D0); + float_emit.UQXTN(8, D0, D0); }; storePairedU8 = GetCodePtr(); @@ -334,15 +320,9 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1, 0); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[1]); - float_emit.LD2R(32, D1, X2); - float_emit.FMIN(32, D0, D0, D2); - float_emit.FMAX(32, D0, D0, D1); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); + float_emit.SQXTN(16, D0, D0); + float_emit.SQXTN(8, D0, D0); }; storePairedS8 = GetCodePtr(); @@ -369,14 +349,8 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1, 0); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[2]); - float_emit.LD2R(32, D1, X2); - float_emit.FMIN(32, D0, D0, D2); - float_emit.FMAX(32, D0, D0, D1); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); + float_emit.UQXTN(16, D0, D0); float_emit.REV16(8, D0, D0); }; @@ -403,15 +377,8 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1, 0); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[3]); - float_emit.LD2R(32, D1, X2); - float_emit.FMIN(32, D0, D0, D2); - float_emit.FMAX(32, D0, D0, D1); - - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); + float_emit.SQXTN(16, D0, D0); float_emit.REV16(8, D0, D0); }; @@ -453,16 +420,9 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[0]); - float_emit.LDR(32, INDEX_UNSIGNED, S1, X2, 0); - float_emit.LDR(32, INDEX_UNSIGNED, S2, X2, 4); - float_emit.FMIN(S0, S0, S2); - float_emit.FMAX(S0, S0, S1); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); + float_emit.UQXTN(16, D0, D0); + float_emit.UQXTN(8, D0, D0); }; storeSingleU8 = GetCodePtr(); @@ -487,16 +447,9 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[1]); - float_emit.LDR(32, INDEX_UNSIGNED, S1, X2, 0); - float_emit.LDR(32, INDEX_UNSIGNED, S2, X2, 4); - float_emit.FMIN(S0, S0, S2); - float_emit.FMAX(S0, S0, S1); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); - float_emit.XTN(8, D0, D0); + float_emit.SQXTN(16, D0, D0); + float_emit.SQXTN(8, D0, D0); }; storeSingleS8 = GetCodePtr(); @@ -521,15 +474,8 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[2]); - float_emit.LDR(32, INDEX_UNSIGNED, S1, X2, 0); - float_emit.LDR(32, INDEX_UNSIGNED, S2, X2, 4); - float_emit.FMIN(S0, S0, S2); - float_emit.FMAX(S0, S0, S1); - float_emit.FCVTZU(32, D0, D0); - float_emit.XTN(16, D0, D0); + float_emit.UQXTN(16, D0, D0); }; storeSingleU16 = GetCodePtr(); @@ -555,15 +501,8 @@ void JitArm64AsmRoutineManager::GenerateCommon() float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0); float_emit.FMUL(32, D0, D0, D1); - // Have to clamp the result - MOVI2R(X2, (u64)&s_quantize_ranges[3]); - float_emit.LDR(32, INDEX_UNSIGNED, S1, X2, 0); - float_emit.LDR(32, INDEX_UNSIGNED, S2, X2, 4); - float_emit.FMIN(S0, S0, S2); - float_emit.FMAX(S0, S0, S1); - float_emit.FCVTZS(32, D0, D0); - float_emit.XTN(16, D0, D0); + float_emit.SQXTN(16, D0, D0); }; storeSingleS16 = GetCodePtr();