From 12b9ada26838cd885f1f93ed76b3b6b540a4c504 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Jan 2015 01:51:18 +0000 Subject: [PATCH] Rearchitect a bit of our AsmCommon routines. JitCommon is becoming a cluster of x86 specific things and things that are common to multiple recompilers. This overlap is beginning to cause us issues. Begin by breaking out the common ASM arrays to have their own file and move the x86 specific routines to their own folder. --- Source/Core/Core/CMakeLists.txt | 1 + Source/Core/Core/Core.vcxproj | 2 + Source/Core/Core/Core.vcxproj.filters | 8 +- Source/Core/Core/PowerPC/Jit64/JitAsm.h | 2 +- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 601 ++++++++++++++++++ .../Core/PowerPC/Jit64Common/Jit64AsmCommon.h | 22 + .../Core/PowerPC/JitCommon/JitAsmCommon.cpp | 597 ----------------- .../Core/PowerPC/JitCommon/JitAsmCommon.h | 17 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 2 +- 9 files changed, 636 insertions(+), 616 deletions(-) create mode 100644 Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp create mode 100644 Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index ab0eff73f3..4f7176185e 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -199,6 +199,7 @@ if(_M_X86) PowerPC/Jit64/Jit_Paired.cpp PowerPC/Jit64/JitRegCache.cpp PowerPC/Jit64/Jit_SystemRegisters.cpp + PowerPC/Jit64Common/Jit64AsmCommon.cpp PowerPC/JitCommon/JitBackpatch.cpp PowerPC/JitCommon/Jit_Util.cpp PowerPC/JitCommon/TrampolineCache.cpp) diff --git a/Source/Core/Core/Core.vcxproj b/Source/Core/Core/Core.vcxproj index bf34fbabbe..23f0167763 100644 --- a/Source/Core/Core/Core.vcxproj +++ b/Source/Core/Core/Core.vcxproj @@ -235,6 +235,7 @@ + @@ -417,6 +418,7 @@ + diff --git a/Source/Core/Core/Core.vcxproj.filters b/Source/Core/Core/Core.vcxproj.filters index d8213794ae..3fc6e92433 100644 --- a/Source/Core/Core/Core.vcxproj.filters +++ b/Source/Core/Core/Core.vcxproj.filters @@ -631,6 +631,9 @@ PowerPC + + PowerPC\Jit64Common + PowerPC\JitCommon @@ -1184,6 +1187,9 @@ PowerPC + + PowerPC\Jit64Common + PowerPC\JitCommon @@ -1229,4 +1235,4 @@ - \ No newline at end of file + diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index cd6e1bde08..1738ec97b6 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -4,7 +4,7 @@ #pragma once -#include "Core/PowerPC/JitCommon/JitAsmCommon.h" +#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" // In Dolphin, we don't use inline assembly. Instead, we generate all machine-near // code at runtime. In the case of fixed code like this, after writing it, we write diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp new file mode 100644 index 0000000000..11961dd1a8 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -0,0 +1,601 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/MathUtil.h" +#include "Common/x64ABI.h" +#include "Common/x64Emitter.h" + +#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" +#include "Core/PowerPC/JitCommon/JitBase.h" + +#define QUANTIZED_REGS_TO_SAVE \ + (ABI_ALL_CALLER_SAVED & ~BitSet32 { \ + RSCRATCH, RSCRATCH2, RSCRATCH_EXTRA, XMM0+16, XMM1+16 \ + }) + +#define QUANTIZED_REGS_TO_SAVE_LOAD (QUANTIZED_REGS_TO_SAVE | BitSet32 { RSCRATCH2 }) + +using namespace Gen; + +void CommonAsmRoutines::GenFifoWrite(int size) +{ + // Assume value in RSCRATCH + u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe; + _assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!"); + MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount)); + SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH); + ADD(32, R(RSCRATCH2), Imm8(size >> 3)); + MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2)); + RET(); +} + +void CommonAsmRoutines::GenFrsqrte() +{ + // Assume input in XMM0. + // This function clobbers all three RSCRATCH. + MOVQ_xmm(R(RSCRATCH), XMM0); + + // Negative and zero inputs set an exception and take the complex path. + TEST(64, R(RSCRATCH), R(RSCRATCH)); + FixupBranch zero = J_CC(CC_Z, true); + FixupBranch negative = J_CC(CC_S, true); + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); + + // Zero and max exponents (non-normal floats) take the complex path. + FixupBranch complex1 = J_CC(CC_Z, true); + CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); + FixupBranch complex2 = J_CC(CC_E, true); + + SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD)); + SAR(32, R(RSCRATCH_EXTRA), Imm8(1)); + MOV(32, R(RSCRATCH2), Imm32(0x3FF)); + SUB(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + SHL(64, R(RSCRATCH2), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); + + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(48)); + AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); + XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); + + SHR(64, R(RSCRATCH), Imm8(37)); + AND(32, R(RSCRATCH), Imm32(0x7FF)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); + MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); + SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; + MOVQ_xmm(XMM0, R(RSCRATCH2)); + RET(); + + // Exception flags for zero input. + SetJumpTarget(zero); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); + FixupBranch skip_set_fx1 = J_CC(CC_NZ); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX)); + FixupBranch complex3 = J(); + + // Exception flags for negative input. + SetJumpTarget(negative); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); + FixupBranch skip_set_fx2 = J_CC(CC_NZ); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT)); + + SetJumpTarget(skip_set_fx1); + SetJumpTarget(skip_set_fx2); + SetJumpTarget(complex1); + SetJumpTarget(complex2); + SetJumpTarget(complex3); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); + ABI_CallFunction((void *)&MathUtil::ApproximateReciprocalSquareRoot); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); + RET(); +} + +void CommonAsmRoutines::GenFres() +{ + // Assume input in XMM0. + // This function clobbers all three RSCRATCH. + MOVQ_xmm(R(RSCRATCH), XMM0); + + // Zero inputs set an exception and take the complex path. + TEST(64, R(RSCRATCH), R(RSCRATCH)); + FixupBranch zero = J_CC(CC_Z); + + MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); + SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); + MOV(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); + AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); // exp + AND(32, R(RSCRATCH2), Imm32(0x800)); // sign + CMP(32, R(RSCRATCH_EXTRA), Imm32(895)); + // Take the complex path for very large/small exponents. + FixupBranch complex1 = J_CC(CC_L); + CMP(32, R(RSCRATCH_EXTRA), Imm32(1149)); + FixupBranch complex2 = J_CC(CC_GE); + + SUB(32, R(RSCRATCH_EXTRA), Imm32(0x7FD)); + NEG(32, R(RSCRATCH_EXTRA)); + OR(32, R(RSCRATCH_EXTRA), R(RSCRATCH2)); + SHL(64, R(RSCRATCH_EXTRA), Imm8(52)); // vali = sign | exponent + + MOV(64, R(RSCRATCH2), R(RSCRATCH)); + SHR(64, R(RSCRATCH), Imm8(37)); + SHR(64, R(RSCRATCH2), Imm8(47)); + AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 + AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 + + IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); + ADD(32, R(RSCRATCH), Imm8(1)); + SHR(32, R(RSCRATCH), Imm8(1)); + + MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); + SUB(32, R(RSCRATCH2), R(RSCRATCH)); + SHL(64, R(RSCRATCH2), Imm8(29)); + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 + MOVQ_xmm(XMM0, R(RSCRATCH2)); + RET(); + + // Exception flags for zero input. + SetJumpTarget(zero); + TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); + FixupBranch skip_set_fx1 = J_CC(CC_NZ); + OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX)); + SetJumpTarget(skip_set_fx1); + + SetJumpTarget(complex1); + SetJumpTarget(complex2); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); + ABI_CallFunction((void *)&MathUtil::ApproximateReciprocal); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); + RET(); +} + +void CommonAsmRoutines::GenMfcr() +{ + // Input: none + // Output: RSCRATCH + // This function clobbers all three RSCRATCH. + X64Reg dst = RSCRATCH; + X64Reg tmp = RSCRATCH2; + X64Reg cr_val = RSCRATCH_EXTRA; + XOR(32, R(dst), R(dst)); + // we only need to zero the high bits of tmp once + XOR(32, R(tmp), R(tmp)); + for (int i = 0; i < 8; i++) + { + static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 }; + if (i != 0) + SHL(32, R(dst), Imm8(4)); + + MOV(64, R(cr_val), PPCSTATE(cr_val[i])); + + // EQ: Bits 31-0 == 0; set flag bit 1 + TEST(32, R(cr_val), R(cr_val)); + // FIXME: is there a better way to do this without the partial register merging? + SETcc(CC_Z, R(tmp)); + LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0)); + + // GT: Value > 0; set flag bit 2 + TEST(64, R(cr_val), R(cr_val)); + SETcc(CC_G, R(tmp)); + LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0)); + + // SO: Bit 61 set; set flag bit 0 + // LT: Bit 62 set; set flag bit 3 + SHR(64, R(cr_val), Imm8(61)); + OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable)); + } + RET(); +} + +// Safe + Fast Quantizers, originally from JITIL by magumagu +static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; +static const float GC_ALIGNED16(m_32767) = 32767.0f; +static const float GC_ALIGNED16(m_m32768) = -32768.0f; +static const float GC_ALIGNED16(m_255) = 255.0f; +static const float GC_ALIGNED16(m_127) = 127.0f; +static const float GC_ALIGNED16(m_m128) = -128.0f; + +#define QUANTIZE_OVERFLOW_SAFE + +// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range +// while it's OK for large negatives, it isn't for positives +// I don't know whether the overflow actually happens in any games +// but it potentially can cause problems, so we need some clamping + +// See comment in header for in/outs. +void CommonAsmRoutines::GenQuantizedStores() +{ + const u8* storePairedIllegal = AlignCode4(); + UD2(); + + const u8* storePairedFloat = AlignCode4(); + if (cpu_info.bSSSE3) + { + PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); + MOVQ_xmm(R(RSCRATCH), XMM0); + } + else + { + MOVQ_xmm(R(RSCRATCH), XMM0); + ROL(64, R(RSCRATCH), Imm8(32)); + BSWAP(64, RSCRATCH); + } + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 64, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + + RET(); + + const u8* storePairedU8 = AlignCode4(); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULPS(XMM0, R(XMM1)); +#ifdef QUANTIZE_OVERFLOW_SAFE + MINPS(XMM0, M(m_65535)); +#endif + CVTTPS2DQ(XMM0, R(XMM0)); + PACKSSDW(XMM0, R(XMM0)); + PACKUSWB(XMM0, R(XMM0)); + MOVD_xmm(R(RSCRATCH), XMM0); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + + RET(); + + const u8* storePairedS8 = AlignCode4(); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULPS(XMM0, R(XMM1)); +#ifdef QUANTIZE_OVERFLOW_SAFE + MINPS(XMM0, M(m_65535)); +#endif + CVTTPS2DQ(XMM0, R(XMM0)); + PACKSSDW(XMM0, R(XMM0)); + PACKSSWB(XMM0, R(XMM0)); + MOVD_xmm(R(RSCRATCH), XMM0); + + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + + RET(); + + const u8* storePairedU16 = AlignCode4(); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULPS(XMM0, R(XMM1)); + + if (cpu_info.bSSE4_1) + { +#ifdef QUANTIZE_OVERFLOW_SAFE + MINPS(XMM0, M(m_65535)); +#endif + CVTTPS2DQ(XMM0, R(XMM0)); + PACKUSDW(XMM0, R(XMM0)); + MOVD_xmm(R(RSCRATCH), XMM0); + BSWAP(32, RSCRATCH); + ROL(32, R(RSCRATCH), Imm8(16)); + } + else + { + XORPS(XMM1, R(XMM1)); + MAXPS(XMM0, R(XMM1)); + MINPS(XMM0, M(m_65535)); + + CVTTPS2DQ(XMM0, R(XMM0)); + PSHUFLW(XMM0, R(XMM0), 2); // AABBCCDD -> CCAA____ + MOVD_xmm(R(RSCRATCH), XMM0); + BSWAP(32, RSCRATCH); + } + + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + + RET(); + + const u8* storePairedS16 = AlignCode4(); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULPS(XMM0, R(XMM1)); +#ifdef QUANTIZE_OVERFLOW_SAFE + MINPS(XMM0, M(m_65535)); +#endif + CVTTPS2DQ(XMM0, R(XMM0)); + PACKSSDW(XMM0, R(XMM0)); + MOVD_xmm(R(RSCRATCH), XMM0); + BSWAP(32, RSCRATCH); + ROL(32, R(RSCRATCH), Imm8(16)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + + RET(); + + pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); + ReserveCodeSpace(8 * sizeof(u8*)); + + pairedStoreQuantized[0] = storePairedFloat; + pairedStoreQuantized[1] = storePairedIllegal; + pairedStoreQuantized[2] = storePairedIllegal; + pairedStoreQuantized[3] = storePairedIllegal; + pairedStoreQuantized[4] = storePairedU8; + pairedStoreQuantized[5] = storePairedU16; + pairedStoreQuantized[6] = storePairedS8; + pairedStoreQuantized[7] = storePairedS16; +} + +// See comment in header for in/outs. +void CommonAsmRoutines::GenQuantizedSingleStores() +{ + const u8* storeSingleIllegal = AlignCode4(); + UD2(); + + // Easy! + const u8* storeSingleFloat = AlignCode4(); + MOVD_xmm(R(RSCRATCH), XMM0); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + RET(); + + const u8* storeSingleU8 = AlignCode4(); // Used by MKWii + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + XORPS(XMM1, R(XMM1)); + MAXSS(XMM0, R(XMM1)); + MINSS(XMM0, M(&m_255)); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + RET(); + + const u8* storeSingleS8 = AlignCode4(); + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MAXSS(XMM0, M(&m_m128)); + MINSS(XMM0, M(&m_127)); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + RET(); + + const u8* storeSingleU16 = AlignCode4(); // Used by MKWii + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + XORPS(XMM1, R(XMM1)); + MAXSS(XMM0, R(XMM1)); + MINSS(XMM0, M(m_65535)); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + RET(); + + const u8* storeSingleS16 = AlignCode4(); + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MAXSS(XMM0, M(&m_m32768)); + MINSS(XMM0, M(&m_32767)); + CVTTSS2SI(RSCRATCH, R(XMM0)); + SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); + RET(); + + singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); + ReserveCodeSpace(8 * sizeof(u8*)); + + singleStoreQuantized[0] = storeSingleFloat; + singleStoreQuantized[1] = storeSingleIllegal; + singleStoreQuantized[2] = storeSingleIllegal; + singleStoreQuantized[3] = storeSingleIllegal; + singleStoreQuantized[4] = storeSingleU8; + singleStoreQuantized[5] = storeSingleU16; + singleStoreQuantized[6] = storeSingleS8; + singleStoreQuantized[7] = storeSingleS16; +} + +void CommonAsmRoutines::GenQuantizedLoads() +{ + const u8* loadPairedIllegal = AlignCode4(); + UD2(); + + // FIXME? This code (in non-MMU mode) assumes all accesses are directly to RAM, i.e. + // don't need hardware access handling. This will definitely crash if paired loads occur + // from non-RAM areas, but as far as I know, this never happens. I don't know if this is + // for a good reason, or merely because no game does this. + // If we find something that actually does do this, maybe this should be changed. How + // much of a performance hit would it be? + const u8* loadPairedFloatTwo = AlignCode4(); + if (jit->js.memcheck) + { + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 64, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); + MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); + } + else if (cpu_info.bSSSE3) + { + MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + PSHUFB(XMM0, M(pbswapShuffle2x4)); + } + else + { + LoadAndSwap(64, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); + MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); + } + RET(); + + const u8* loadPairedFloatOne = AlignCode4(); + if (jit->js.memcheck) + { + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); + UNPCKLPS(XMM0, M(m_one)); + } + else if (cpu_info.bSSSE3) + { + MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + PSHUFB(XMM0, M(pbswapShuffle1x4)); + UNPCKLPS(XMM0, M(m_one)); + } + else + { + LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); + UNPCKLPS(XMM0, M(m_one)); + } + RET(); + + const u8* loadPairedU8Two = AlignCode4(); + if (jit->js.memcheck) + { + // TODO: Support not swapping in safeLoadToReg to avoid bswapping twice + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + ROR(16, R(RSCRATCH_EXTRA), Imm8(8)); + } + else + { + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); + } + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); + if (cpu_info.bSSE4_1) + { + PMOVZXBD(XMM0, R(XMM0)); + } + else + { + PXOR(XMM1, R(XMM1)); + PUNPCKLBW(XMM0, R(XMM1)); + PUNPCKLWD(XMM0, R(XMM1)); + } + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedU8One = AlignCode4(); + if (jit->js.memcheck) + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + else + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx + CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + UNPCKLPS(XMM0, M(m_one)); + RET(); + + const u8* loadPairedS8Two = AlignCode4(); + if (jit->js.memcheck) + { + // TODO: Support not swapping in safeLoadToReg to avoid bswapping twice + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + ROR(16, R(RSCRATCH_EXTRA), Imm8(8)); + } + else + { + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); + } + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); + if (cpu_info.bSSE4_1) + { + PMOVSXBD(XMM0, R(XMM0)); + } + else + { + PUNPCKLBW(XMM0, R(XMM0)); + PUNPCKLWD(XMM0, R(XMM0)); + PSRAD(XMM0, 24); + } + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedS8One = AlignCode4(); + if (jit->js.memcheck) + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + else + UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true); + CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + UNPCKLPS(XMM0, M(m_one)); + RET(); + + const u8* loadPairedU16Two = AlignCode4(); + // TODO: Support not swapping in (un)safeLoadToReg to avoid bswapping twice + if (jit->js.memcheck) + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + else + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); + if (cpu_info.bSSE4_1) + { + PMOVZXWD(XMM0, R(XMM0)); + } + else + { + PXOR(XMM1, R(XMM1)); + PUNPCKLWD(XMM0, R(XMM1)); + } + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedU16One = AlignCode4(); + if (jit->js.memcheck) + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + else + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false); + CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + UNPCKLPS(XMM0, M(m_one)); + RET(); + + const u8* loadPairedS16Two = AlignCode4(); + if (jit->js.memcheck) + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + else + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); + ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); + MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); + if (cpu_info.bSSE4_1) + { + PMOVSXWD(XMM0, R(XMM0)); + } + else + { + PUNPCKLWD(XMM0, R(XMM0)); + PSRAD(XMM0, 16); + } + CVTDQ2PS(XMM0, R(XMM0)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULPS(XMM0, R(XMM1)); + RET(); + + const u8* loadPairedS16One = AlignCode4(); + if (jit->js.memcheck) + SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); + else + UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true); + CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); + SHR(32, R(RSCRATCH2), Imm8(5)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + UNPCKLPS(XMM0, M(m_one)); + RET(); + + pairedLoadQuantized = reinterpret_cast(const_cast(AlignCode16())); + ReserveCodeSpace(16 * sizeof(u8*)); + + pairedLoadQuantized[0] = loadPairedFloatTwo; + pairedLoadQuantized[1] = loadPairedIllegal; + pairedLoadQuantized[2] = loadPairedIllegal; + pairedLoadQuantized[3] = loadPairedIllegal; + pairedLoadQuantized[4] = loadPairedU8Two; + pairedLoadQuantized[5] = loadPairedU16Two; + pairedLoadQuantized[6] = loadPairedS8Two; + pairedLoadQuantized[7] = loadPairedS16Two; + + pairedLoadQuantized[8] = loadPairedFloatOne; + pairedLoadQuantized[9] = loadPairedIllegal; + pairedLoadQuantized[10] = loadPairedIllegal; + pairedLoadQuantized[11] = loadPairedIllegal; + pairedLoadQuantized[12] = loadPairedU8One; + pairedLoadQuantized[13] = loadPairedU16One; + pairedLoadQuantized[14] = loadPairedS8One; + pairedLoadQuantized[15] = loadPairedS16One; +} diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h new file mode 100644 index 0000000000..8d93b4f252 --- /dev/null +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.h @@ -0,0 +1,22 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/JitCommon/Jit_Util.h" +#include "Core/PowerPC/JitCommon/JitAsmCommon.h" + +class CommonAsmRoutines : public CommonAsmRoutinesBase, public EmuCodeBlock +{ +protected: + void GenQuantizedLoads(); + void GenQuantizedStores(); + void GenQuantizedSingleStores(); + +public: + void GenFifoWrite(int size); + void GenFrsqrte(); + void GenFres(); + void GenMfcr(); +}; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 7514167c86..dd693006ea 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -2,194 +2,7 @@ // Licensed under GPLv2 // Refer to the license.txt file included. -#include "Common/CPUDetect.h" -#include "Common/MathUtil.h" -#include "Common/MemoryUtil.h" - #include "Core/PowerPC/JitCommon/JitAsmCommon.h" -#include "Core/PowerPC/JitCommon/JitBase.h" - -#define QUANTIZED_REGS_TO_SAVE \ - (ABI_ALL_CALLER_SAVED & ~BitSet32 { \ - RSCRATCH, RSCRATCH2, RSCRATCH_EXTRA, XMM0+16, XMM1+16 \ - }) - -#define QUANTIZED_REGS_TO_SAVE_LOAD (QUANTIZED_REGS_TO_SAVE | BitSet32 { RSCRATCH2 }) - -using namespace Gen; - -void CommonAsmRoutines::GenFifoWrite(int size) -{ - // Assume value in RSCRATCH - u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe; - _assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!"); - MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH); - ADD(32, R(RSCRATCH2), Imm8(size >> 3)); - MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2)); - RET(); -} - -void CommonAsmRoutines::GenFrsqrte() -{ - // Assume input in XMM0. - // This function clobbers all three RSCRATCH. - MOVQ_xmm(R(RSCRATCH), XMM0); - - // Negative and zero inputs set an exception and take the complex path. - TEST(64, R(RSCRATCH), R(RSCRATCH)); - FixupBranch zero = J_CC(CC_Z, true); - FixupBranch negative = J_CC(CC_S, true); - MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); - SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); - - // Zero and max exponents (non-normal floats) take the complex path. - FixupBranch complex1 = J_CC(CC_Z, true); - CMP(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); - FixupBranch complex2 = J_CC(CC_E, true); - - SUB(32, R(RSCRATCH_EXTRA), Imm32(0x3FD)); - SAR(32, R(RSCRATCH_EXTRA), Imm8(1)); - MOV(32, R(RSCRATCH2), Imm32(0x3FF)); - SUB(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); - SHL(64, R(RSCRATCH2), Imm8(52)); // exponent = ((0x3FFLL << 52) - ((exponent - (0x3FELL << 52)) / 2)) & (0x7FFLL << 52); - - MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); - SHR(64, R(RSCRATCH_EXTRA), Imm8(48)); - AND(32, R(RSCRATCH_EXTRA), Imm8(0x1F)); - XOR(32, R(RSCRATCH_EXTRA), Imm8(0x10)); // int index = i / 2048 + (odd_exponent ? 16 : 0); - - SHR(64, R(RSCRATCH), Imm8(37)); - AND(32, R(RSCRATCH), Imm32(0x7FF)); - IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); - MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); - SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); - SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; - MOVQ_xmm(XMM0, R(RSCRATCH2)); - RET(); - - // Exception flags for zero input. - SetJumpTarget(zero); - TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); - FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX)); - FixupBranch complex3 = J(); - - // Exception flags for negative input. - SetJumpTarget(negative); - TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_VXSQRT)); - FixupBranch skip_set_fx2 = J_CC(CC_NZ); - OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_VXSQRT)); - - SetJumpTarget(skip_set_fx1); - SetJumpTarget(skip_set_fx2); - SetJumpTarget(complex1); - SetJumpTarget(complex2); - SetJumpTarget(complex3); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); - ABI_CallFunction((void *)&MathUtil::ApproximateReciprocalSquareRoot); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); - RET(); -} - -void CommonAsmRoutines::GenFres() -{ - // Assume input in XMM0. - // This function clobbers all three RSCRATCH. - MOVQ_xmm(R(RSCRATCH), XMM0); - - // Zero inputs set an exception and take the complex path. - TEST(64, R(RSCRATCH), R(RSCRATCH)); - FixupBranch zero = J_CC(CC_Z); - - MOV(64, R(RSCRATCH_EXTRA), R(RSCRATCH)); - SHR(64, R(RSCRATCH_EXTRA), Imm8(52)); - MOV(32, R(RSCRATCH2), R(RSCRATCH_EXTRA)); - AND(32, R(RSCRATCH_EXTRA), Imm32(0x7FF)); // exp - AND(32, R(RSCRATCH2), Imm32(0x800)); // sign - CMP(32, R(RSCRATCH_EXTRA), Imm32(895)); - // Take the complex path for very large/small exponents. - FixupBranch complex1 = J_CC(CC_L); - CMP(32, R(RSCRATCH_EXTRA), Imm32(1149)); - FixupBranch complex2 = J_CC(CC_GE); - - SUB(32, R(RSCRATCH_EXTRA), Imm32(0x7FD)); - NEG(32, R(RSCRATCH_EXTRA)); - OR(32, R(RSCRATCH_EXTRA), R(RSCRATCH2)); - SHL(64, R(RSCRATCH_EXTRA), Imm8(52)); // vali = sign | exponent - - MOV(64, R(RSCRATCH2), R(RSCRATCH)); - SHR(64, R(RSCRATCH), Imm8(37)); - SHR(64, R(RSCRATCH2), Imm8(47)); - AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 - AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - - IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); - ADD(32, R(RSCRATCH), Imm8(1)); - SHR(32, R(RSCRATCH), Imm8(1)); - - MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); - SUB(32, R(RSCRATCH2), R(RSCRATCH)); - SHL(64, R(RSCRATCH2), Imm8(29)); - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 - MOVQ_xmm(XMM0, R(RSCRATCH2)); - RET(); - - // Exception flags for zero input. - SetJumpTarget(zero); - TEST(32, PPCSTATE(fpscr), Imm32(FPSCR_ZX)); - FixupBranch skip_set_fx1 = J_CC(CC_NZ); - OR(32, PPCSTATE(fpscr), Imm32(FPSCR_FX | FPSCR_ZX)); - SetJumpTarget(skip_set_fx1); - - SetJumpTarget(complex1); - SetJumpTarget(complex2); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); - ABI_CallFunction((void *)&MathUtil::ApproximateReciprocal); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); - RET(); -} - -void CommonAsmRoutines::GenMfcr() -{ - // Input: none - // Output: RSCRATCH - // This function clobbers all three RSCRATCH. - X64Reg dst = RSCRATCH; - X64Reg tmp = RSCRATCH2; - X64Reg cr_val = RSCRATCH_EXTRA; - XOR(32, R(dst), R(dst)); - // we only need to zero the high bits of tmp once - XOR(32, R(tmp), R(tmp)); - for (int i = 0; i < 8; i++) - { - static const u32 m_flagTable[8] = { 0x0, 0x1, 0x8, 0x9, 0x0, 0x1, 0x8, 0x9 }; - if (i != 0) - SHL(32, R(dst), Imm8(4)); - - MOV(64, R(cr_val), PPCSTATE(cr_val[i])); - - // EQ: Bits 31-0 == 0; set flag bit 1 - TEST(32, R(cr_val), R(cr_val)); - // FIXME: is there a better way to do this without the partial register merging? - SETcc(CC_Z, R(tmp)); - LEA(32, dst, MComplex(dst, tmp, SCALE_2, 0)); - - // GT: Value > 0; set flag bit 2 - TEST(64, R(cr_val), R(cr_val)); - SETcc(CC_G, R(tmp)); - LEA(32, dst, MComplex(dst, tmp, SCALE_4, 0)); - - // SO: Bit 61 set; set flag bit 0 - // LT: Bit 62 set; set flag bit 3 - SHR(64, R(cr_val), Imm8(61)); - OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable)); - } - RET(); -} - -// Safe + Fast Quantizers, originally from JITIL by magumagu const u8 GC_ALIGNED16(pbswapShuffle1x4[16]) = { 3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = { 3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15 }; @@ -250,414 +63,4 @@ const float GC_ALIGNED16(m_dequantizeTableS[]) = (1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1), }; -static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; -static const float GC_ALIGNED16(m_32767) = 32767.0f; -static const float GC_ALIGNED16(m_m32768) = -32768.0f; -static const float GC_ALIGNED16(m_255) = 255.0f; -static const float GC_ALIGNED16(m_127) = 127.0f; -static const float GC_ALIGNED16(m_m128) = -128.0f; - const float GC_ALIGNED16(m_one[]) = { 1.0f, 0.0f, 0.0f, 0.0f }; - -#define QUANTIZE_OVERFLOW_SAFE - -// according to Intel Docs CVTPS2DQ writes 0x80000000 if the source floating point value is out of int32 range -// while it's OK for large negatives, it isn't for positives -// I don't know whether the overflow actually happens in any games -// but it potentially can cause problems, so we need some clamping - -// See comment in header for in/outs. -void CommonAsmRoutines::GenQuantizedStores() -{ - const u8* storePairedIllegal = AlignCode4(); - UD2(); - - const u8* storePairedFloat = AlignCode4(); - if (cpu_info.bSSSE3) - { - PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); - MOVQ_xmm(R(RSCRATCH), XMM0); - } - else - { - MOVQ_xmm(R(RSCRATCH), XMM0); - ROL(64, R(RSCRATCH), Imm8(32)); - BSWAP(64, RSCRATCH); - } - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 64, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - - RET(); - - const u8* storePairedU8 = AlignCode4(); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE - MINPS(XMM0, M(m_65535)); -#endif - CVTTPS2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - PACKUSWB(XMM0, R(XMM0)); - MOVD_xmm(R(RSCRATCH), XMM0); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - - RET(); - - const u8* storePairedS8 = AlignCode4(); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE - MINPS(XMM0, M(m_65535)); -#endif - CVTTPS2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - PACKSSWB(XMM0, R(XMM0)); - MOVD_xmm(R(RSCRATCH), XMM0); - - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - - RET(); - - const u8* storePairedU16 = AlignCode4(); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - MULPS(XMM0, R(XMM1)); - - if (cpu_info.bSSE4_1) - { -#ifdef QUANTIZE_OVERFLOW_SAFE - MINPS(XMM0, M(m_65535)); -#endif - CVTTPS2DQ(XMM0, R(XMM0)); - PACKUSDW(XMM0, R(XMM0)); - MOVD_xmm(R(RSCRATCH), XMM0); - BSWAP(32, RSCRATCH); - ROL(32, R(RSCRATCH), Imm8(16)); - } - else - { - XORPS(XMM1, R(XMM1)); - MAXPS(XMM0, R(XMM1)); - MINPS(XMM0, M(m_65535)); - - CVTTPS2DQ(XMM0, R(XMM0)); - PSHUFLW(XMM0, R(XMM0), 2); // AABBCCDD -> CCAA____ - MOVD_xmm(R(RSCRATCH), XMM0); - BSWAP(32, RSCRATCH); - } - - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - - RET(); - - const u8* storePairedS16 = AlignCode4(); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - MULPS(XMM0, R(XMM1)); -#ifdef QUANTIZE_OVERFLOW_SAFE - MINPS(XMM0, M(m_65535)); -#endif - CVTTPS2DQ(XMM0, R(XMM0)); - PACKSSDW(XMM0, R(XMM0)); - MOVD_xmm(R(RSCRATCH), XMM0); - BSWAP(32, RSCRATCH); - ROL(32, R(RSCRATCH), Imm8(16)); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - - RET(); - - pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); - ReserveCodeSpace(8 * sizeof(u8*)); - - pairedStoreQuantized[0] = storePairedFloat; - pairedStoreQuantized[1] = storePairedIllegal; - pairedStoreQuantized[2] = storePairedIllegal; - pairedStoreQuantized[3] = storePairedIllegal; - pairedStoreQuantized[4] = storePairedU8; - pairedStoreQuantized[5] = storePairedU16; - pairedStoreQuantized[6] = storePairedS8; - pairedStoreQuantized[7] = storePairedS16; -} - -// See comment in header for in/outs. -void CommonAsmRoutines::GenQuantizedSingleStores() -{ - const u8* storeSingleIllegal = AlignCode4(); - UD2(); - - // Easy! - const u8* storeSingleFloat = AlignCode4(); - MOVD_xmm(R(RSCRATCH), XMM0); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - RET(); - - const u8* storeSingleU8 = AlignCode4(); // Used by MKWii - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - XORPS(XMM1, R(XMM1)); - MAXSS(XMM0, R(XMM1)); - MINSS(XMM0, M(&m_255)); - CVTTSS2SI(RSCRATCH, R(XMM0)); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - RET(); - - const u8* storeSingleS8 = AlignCode4(); - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - MAXSS(XMM0, M(&m_m128)); - MINSS(XMM0, M(&m_127)); - CVTTSS2SI(RSCRATCH, R(XMM0)); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 8, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - RET(); - - const u8* storeSingleU16 = AlignCode4(); // Used by MKWii - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - XORPS(XMM1, R(XMM1)); - MAXSS(XMM0, R(XMM1)); - MINSS(XMM0, M(m_65535)); - CVTTSS2SI(RSCRATCH, R(XMM0)); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - RET(); - - const u8* storeSingleS16 = AlignCode4(); - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); - MAXSS(XMM0, M(&m_m32768)); - MINSS(XMM0, M(&m_32767)); - CVTTSS2SI(RSCRATCH, R(XMM0)); - SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 16, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); - RET(); - - singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); - ReserveCodeSpace(8 * sizeof(u8*)); - - singleStoreQuantized[0] = storeSingleFloat; - singleStoreQuantized[1] = storeSingleIllegal; - singleStoreQuantized[2] = storeSingleIllegal; - singleStoreQuantized[3] = storeSingleIllegal; - singleStoreQuantized[4] = storeSingleU8; - singleStoreQuantized[5] = storeSingleU16; - singleStoreQuantized[6] = storeSingleS8; - singleStoreQuantized[7] = storeSingleS16; -} - -void CommonAsmRoutines::GenQuantizedLoads() -{ - const u8* loadPairedIllegal = AlignCode4(); - UD2(); - - // FIXME? This code (in non-MMU mode) assumes all accesses are directly to RAM, i.e. - // don't need hardware access handling. This will definitely crash if paired loads occur - // from non-RAM areas, but as far as I know, this never happens. I don't know if this is - // for a good reason, or merely because no game does this. - // If we find something that actually does do this, maybe this should be changed. How - // much of a performance hit would it be? - const u8* loadPairedFloatTwo = AlignCode4(); - if (jit->js.memcheck) - { - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 64, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); - MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); - } - else if (cpu_info.bSSSE3) - { - MOVQ_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); - PSHUFB(XMM0, M(pbswapShuffle2x4)); - } - else - { - LoadAndSwap(64, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); - ROL(64, R(RSCRATCH_EXTRA), Imm8(32)); - MOVQ_xmm(XMM0, R(RSCRATCH_EXTRA)); - } - RET(); - - const u8* loadPairedFloatOne = AlignCode4(); - if (jit->js.memcheck) - { - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); - UNPCKLPS(XMM0, M(m_one)); - } - else if (cpu_info.bSSSE3) - { - MOVD_xmm(XMM0, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); - PSHUFB(XMM0, M(pbswapShuffle1x4)); - UNPCKLPS(XMM0, M(m_one)); - } - else - { - LoadAndSwap(32, RSCRATCH_EXTRA, MComplex(RMEM, RSCRATCH_EXTRA, 1, 0)); - MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); - UNPCKLPS(XMM0, M(m_one)); - } - RET(); - - const u8* loadPairedU8Two = AlignCode4(); - if (jit->js.memcheck) - { - // TODO: Support not swapping in safeLoadToReg to avoid bswapping twice - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - ROR(16, R(RSCRATCH_EXTRA), Imm8(8)); - } - else - { - UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); - } - MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); - if (cpu_info.bSSE4_1) - { - PMOVZXBD(XMM0, R(XMM0)); - } - else - { - PXOR(XMM1, R(XMM1)); - PUNPCKLBW(XMM0, R(XMM1)); - PUNPCKLWD(XMM0, R(XMM1)); - } - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedU8One = AlignCode4(); - if (jit->js.memcheck) - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - else - UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx - CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - UNPCKLPS(XMM0, M(m_one)); - RET(); - - const u8* loadPairedS8Two = AlignCode4(); - if (jit->js.memcheck) - { - // TODO: Support not swapping in safeLoadToReg to avoid bswapping twice - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - ROR(16, R(RSCRATCH_EXTRA), Imm8(8)); - } - else - { - UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0); - } - MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); - if (cpu_info.bSSE4_1) - { - PMOVSXBD(XMM0, R(XMM0)); - } - else - { - PUNPCKLBW(XMM0, R(XMM0)); - PUNPCKLWD(XMM0, R(XMM0)); - PSRAD(XMM0, 24); - } - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedS8One = AlignCode4(); - if (jit->js.memcheck) - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 8, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - else - UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true); - CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - UNPCKLPS(XMM0, M(m_one)); - RET(); - - const u8* loadPairedU16Two = AlignCode4(); - // TODO: Support not swapping in (un)safeLoadToReg to avoid bswapping twice - if (jit->js.memcheck) - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - else - UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); - ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); - MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); - if (cpu_info.bSSE4_1) - { - PMOVZXWD(XMM0, R(XMM0)); - } - else - { - PXOR(XMM1, R(XMM1)); - PUNPCKLWD(XMM0, R(XMM1)); - } - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedU16One = AlignCode4(); - if (jit->js.memcheck) - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - else - UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false); - CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - UNPCKLPS(XMM0, M(m_one)); - RET(); - - const u8* loadPairedS16Two = AlignCode4(); - if (jit->js.memcheck) - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 32, 0, QUANTIZED_REGS_TO_SAVE_LOAD, false, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - else - UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 32, 0, false); - ROL(32, R(RSCRATCH_EXTRA), Imm8(16)); - MOVD_xmm(XMM0, R(RSCRATCH_EXTRA)); - if (cpu_info.bSSE4_1) - { - PMOVSXWD(XMM0, R(XMM0)); - } - else - { - PUNPCKLWD(XMM0, R(XMM0)); - PSRAD(XMM0, 16); - } - CVTDQ2PS(XMM0, R(XMM0)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - MULPS(XMM0, R(XMM1)); - RET(); - - const u8* loadPairedS16One = AlignCode4(); - if (jit->js.memcheck) - SafeLoadToReg(RSCRATCH_EXTRA, R(RSCRATCH_EXTRA), 16, 0, QUANTIZED_REGS_TO_SAVE_LOAD, true, SAFE_LOADSTORE_NO_FASTMEM | SAFE_LOADSTORE_NO_PROLOG); - else - UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true); - CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); - SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); - UNPCKLPS(XMM0, M(m_one)); - RET(); - - pairedLoadQuantized = reinterpret_cast(const_cast(AlignCode16())); - ReserveCodeSpace(16 * sizeof(u8*)); - - pairedLoadQuantized[0] = loadPairedFloatTwo; - pairedLoadQuantized[1] = loadPairedIllegal; - pairedLoadQuantized[2] = loadPairedIllegal; - pairedLoadQuantized[3] = loadPairedIllegal; - pairedLoadQuantized[4] = loadPairedU8Two; - pairedLoadQuantized[5] = loadPairedU16Two; - pairedLoadQuantized[6] = loadPairedS8Two; - pairedLoadQuantized[7] = loadPairedS16Two; - - pairedLoadQuantized[8] = loadPairedFloatOne; - pairedLoadQuantized[9] = loadPairedIllegal; - pairedLoadQuantized[10] = loadPairedIllegal; - pairedLoadQuantized[11] = loadPairedIllegal; - pairedLoadQuantized[12] = loadPairedU8One; - pairedLoadQuantized[13] = loadPairedU16One; - pairedLoadQuantized[14] = loadPairedS8One; - pairedLoadQuantized[15] = loadPairedS16One; -} diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h index c872865fa7..4df759c6c5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.h @@ -4,7 +4,7 @@ #pragma once -#include "Core/PowerPC/JitCommon/Jit_Util.h" +#include "Common/CommonTypes.h" extern const u8 GC_ALIGNED16(pbswapShuffle1x4[16]); extern const u8 GC_ALIGNED16(pbswapShuffle2x4[16]); @@ -15,7 +15,6 @@ extern const float GC_ALIGNED16(m_dequantizeTableS[]); class CommonAsmRoutinesBase { public: - const u8 *fifoDirectWrite8; const u8 *fifoDirectWrite16; const u8 *fifoDirectWrite32; @@ -51,19 +50,5 @@ public: // In: ECX: Address to write to. // In: XMM0: Bottom 32-bit slot holds the float to be written. const u8 **singleStoreQuantized; - }; -class CommonAsmRoutines : public CommonAsmRoutinesBase, public EmuCodeBlock -{ -protected: - void GenQuantizedLoads(); - void GenQuantizedStores(); - void GenQuantizedSingleStores(); - -public: - void GenFifoWrite(int size); - void GenFrsqrte(); - void GenFres(); - void GenMfcr(); -}; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 6205f7d420..3ff7d74158 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -23,8 +23,8 @@ #include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PPCAnalyst.h" #include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" #include "Core/PowerPC/JitCommon/Jit_Util.h" -#include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitCache.h" #include "Core/PowerPC/JitCommon/TrampolineCache.h"