From b84a1823b288e42be27c9554d5d2e79b1696b3f5 Mon Sep 17 00:00:00 2001 From: hrydgard Date: Sat, 16 Jan 2010 22:44:49 +0000 Subject: [PATCH] Hopefully fix all remaining quantizer issues in Mario Kart Wii: * must use a truncating float-to-int conversion, for example. * introduce optimized variants of the single value psq_st operation (JIT only). * fix bug in SafeWriteRegToReg when swap = false git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4861 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/x64Emitter.cpp | 3 + Source/Core/Common/Src/x64Emitter.h | 3 + Source/Core/Core/Src/Core.cpp | 2 +- Source/Core/Core/Src/HW/Memmap.cpp | 27 +----- Source/Core/Core/Src/HW/Memmap.h | 4 + Source/Core/Core/Src/HW/MemmapFunctions.cpp | 10 ++- .../Interpreter_LoadStorePaired.cpp | 6 +- Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp | 1 + .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 77 +++------------- .../Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp | 1 + .../Src/PowerPC/JitCommon/JitAsmCommon.cpp | 89 +++++++++++++++++-- .../Core/Src/PowerPC/JitCommon/JitAsmCommon.h | 7 ++ .../Core/Src/PowerPC/JitCommon/Jit_Util.cpp | 4 +- 13 files changed, 131 insertions(+), 103 deletions(-) diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 1ea4aa7696..d34019cfc4 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -1150,6 +1150,9 @@ void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5B, true, reg void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0xE6, false, regOp, arg);} void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, regOp, arg);} +void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);} +void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);} + void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));} void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x50, true, dest, arg);} diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 31e44b1b24..399d698f25 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -512,6 +512,9 @@ public: void CVTDQ2PS(X64Reg regOp, OpArg arg); void CVTPS2DQ(X64Reg regOp, OpArg arg); + void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! + void CVTTPS2DQ(X64Reg regOp, OpArg arg); + // SSE2: Packed integer instructions void PACKSSDW(X64Reg dest, OpArg arg); void PACKSSWB(X64Reg dest, OpArg arg); diff --git a/Source/Core/Core/Src/Core.cpp b/Source/Core/Core/Src/Core.cpp index 593f4743ef..4ced4ca208 100644 --- a/Source/Core/Core/Src/Core.cpp +++ b/Source/Core/Core/Src/Core.cpp @@ -324,7 +324,7 @@ THREAD_RETURN EmuThread(void *pArg) VideoInitialize.Fifo_CPUBase = &ProcessorInterface::Fifo_CPUBase; VideoInitialize.Fifo_CPUEnd = &ProcessorInterface::Fifo_CPUEnd; VideoInitialize.Fifo_CPUWritePointer = &ProcessorInterface::Fifo_CPUWritePointer; - VideoInitialize.bAutoAspectIs16_9 = _CoreParameter.bWii ? SConfig::GetInstance().m_SYSCONF->GetData("IPL.AR") : false; + VideoInitialize.bAutoAspectIs16_9 = _CoreParameter.bWii ? (SConfig::GetInstance().m_SYSCONF->GetData("IPL.AR") ? true : false) : false; Plugins.GetVideo()->Initialize(&VideoInitialize); // Call the dll diff --git a/Source/Core/Core/Src/HW/Memmap.cpp b/Source/Core/Core/Src/HW/Memmap.cpp index 5114d315d0..8182444515 100644 --- a/Source/Core/Core/Src/HW/Memmap.cpp +++ b/Source/Core/Core/Src/HW/Memmap.cpp @@ -48,10 +48,6 @@ may be redirected here (for example to Read_U32()). #include "../Debugger/Debugger_SymbolMap.h" #include "../PluginManager.h" - - -// Declarations and definitions -// ---------------- namespace Memory { @@ -75,10 +71,9 @@ u8* base = NULL; MemArena g_arena; // ============== - -// STATE_TO_SAVE (applies to a lot of things in this file) - +// STATE_TO_SAVE bool m_IsInitialized = false; // Save the Init(), Shutdown() state +// END STATE_TO_SAVE // 64-bit: Pointers to low-mem (sub-0x10000000) mirror // 32-bit: Same as the corresponding physical/virtual pointers. @@ -130,8 +125,6 @@ void HW_Default_Write(const T _Data, const u32 _Address){ ERROR_LOG(MASTER_LOG, template void HW_Default_Read(T _Data, const u32 _Address){ ERROR_LOG(MASTER_LOG, "Illegal HW Read%i %08x", sizeof(T)*8, _Address); _dbg_assert_(MEMMAP, 0);} -u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag); - #define PAGE_SHIFT 10 #define PAGE_SIZE (1 << PAGE_SHIFT) #define PAGE_MASK (PAGE_SHIFT - 1) @@ -606,12 +599,10 @@ void CheckForBadAddresses(u32 Address, u32 Data, bool Read, int Bits) if(Read) { WARN_LOG(CONSOLE, "Read%i: Program tried to read [%08x] from [%08x]", Bits, Address); - //PanicAlert("Write_U32: Program tried to write [%08x] to [%08x]", _Address); } else { ERROR_LOG(CONSOLE, "Write%i: Program tried to write [%08x] to [%08x]", Bits, Data, Address); - //PanicAlert("Read: Program tried to write [%08x] to [%08x]", Data, Address); } } @@ -620,16 +611,14 @@ void CheckForBadAddresses(u32 Address, u32 Data, bool Read, int Bits) if(Read) { WARN_LOG(CONSOLE, "Read%i: Program read [0x%08x] from [0x%08x] * * * 0 * * *", Bits, Data, Address); - //PanicAlert("Read: Program read [%08x] from [%08x]", Data, Address); } else { WARN_LOG(CONSOLE, "Write%i: Program wrote [0x%08x] to [0x%08x] * * * 0 * * *", Bits, Data, Address); - //PanicAlert("Read: Program wrote [%08x] to [%08x]", Data, Address); } } - /* Try to figure out where the dev/di Ioctl arguments are stored (including buffer out), so we can - find the bad one */ + // Try to figure out where the dev/di Ioctl arguments are stored (including buffer out), so we can + // find the bad one if( Data == 0x1090f4c0 // good out buffer right before it, for sound/smashbros_sound.brsar || Data == 0x10913b00 // second one @@ -646,12 +635,10 @@ void CheckForBadAddresses(u32 Address, u32 Data, bool Read, int Bits) if(Read) { ERROR_LOG(CONSOLE, "Read%i: Program read [0x%08x] from [0x%08x] * * * * * * * * * * * *", Bits, Data, Address); - //PanicAlert("Read%i: Program read [%08x] from [%08x]", Bits, Data, Address); } else { ERROR_LOG(CONSOLE, "Write%i: Program wrote [0x%08x] to [0x%08x] * * * * * * * * * * * *", Bits,Data, Address); - //PanicAlert("Write%i: Program wrote [0x%08x] to [0x%08x]", Bits, Data, Address); } } } @@ -683,9 +670,6 @@ void Memset(const u32 _Address, const u8 _iValue, const u32 _iLength) } else { - // (comment for old implementation) : F|RES: rogue squadron and other games use the TLB ... so this cant work - - // fixed implementation: for (u32 i = 0; i < _iLength; i++) Write_U8(_iValue, _Address + i); } @@ -839,12 +823,9 @@ bool IsRAMAddress(const u32 addr, bool allow_locked_cache) return true; else return false; - default: return false; } } - - } // namespace diff --git a/Source/Core/Core/Src/HW/Memmap.h b/Source/Core/Core/Src/HW/Memmap.h index 51039d6fe0..2466ebdd6c 100644 --- a/Source/Core/Core/Src/HW/Memmap.h +++ b/Source/Core/Core/Src/HW/Memmap.h @@ -142,6 +142,10 @@ void Write_U16(const u16 _Data, const u32 _Address); void Write_U32(const u32 _Data, const u32 _Address); void Write_U64(const u64 _Data, const u32 _Address); +void Write_U16_Swap(const u16 _Data, const u32 _Address); +void Write_U32_Swap(const u32 _Data, const u32 _Address); +void Write_U64_Swap(const u64 _Data, const u32 _Address); + void WriteHW_U32(const u32 _Data, const u32 _Address); void GetString(std::string& _string, const u32 _Address); diff --git a/Source/Core/Core/Src/HW/MemmapFunctions.cpp b/Source/Core/Core/Src/HW/MemmapFunctions.cpp index 28e44baecf..2fb7f329e6 100644 --- a/Source/Core/Core/Src/HW/MemmapFunctions.cpp +++ b/Source/Core/Core/Src/HW/MemmapFunctions.cpp @@ -409,6 +409,9 @@ void Write_U16(const u16 _Data, const u32 _Address) WriteToHardware(_Address, _Data, _Address, FLAG_WRITE); } +void Write_U16_Swap(const u16 _Data, const u32 _Address) { + Write_U16(Common::swap16(_Data), _Address); +} void Write_U32(const u32 _Data, const u32 _Address) @@ -423,7 +426,9 @@ void Write_U32(const u32 _Data, const u32 _Address) #endif WriteToHardware(_Address, _Data, _Address, FLAG_WRITE); } - +void Write_U32_Swap(const u32 _Data, const u32 _Address) { + Write_U32(Common::swap32(_Data), _Address); +} void Write_U64(const u64 _Data, const u32 _Address) { @@ -438,6 +443,9 @@ void Write_U64(const u64 _Data, const u32 _Address) WriteToHardware(_Address, _Data, _Address + 4, FLAG_WRITE); } +void Write_U64_Swap(const u32 _Data, const u32 _Address) { + Write_U64(Common::swap64(_Data), _Address); +} u8 ReadUnchecked_U8(const u32 _Address) { diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index c0fe7008b3..9b4918d014 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -74,9 +74,9 @@ inline T CLAMP(T a, T bottom, T top) { } void Helper_Quantize(const u32 _Addr, const double _fValue, - const EQuantizeType _quantizeType, const unsigned int _uScale) + const EQuantizeType _quantizeType, const unsigned int _uScale) { - switch(_quantizeType) + switch (_quantizeType) { case QUANTIZE_FLOAT: Memory::Write_U32( ConvertToSingleFTZ( *(u64*)&_fValue ), _Addr ); @@ -222,7 +222,7 @@ void psq_st(UGeckoInstruction _inst) } else { - Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale ); + Helper_Quantize( EA, rPS0(_inst.RS), stType, stScale ); } } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp index 60c98721a7..703c43509d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/JitAsm.cpp @@ -245,6 +245,7 @@ void AsmRoutineManager::GenerateCommon() GenQuantizedLoads(); GenQuantizedStores(); + GenQuantizedSingleStores(); //CMPSD(R(XMM0), M(&zero), // TODO diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index e76f5a8b9d..2238cd627e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -72,71 +72,6 @@ void Jit64::psq_st(UGeckoInstruction inst) const EQuantizeType stType = static_cast(gqr.ST_TYPE); int stScale = gqr.ST_SCALE; - - if (inst.W) { - Default(inst); - return; - - // PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update); - // It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only - // floats so that's what we'll work on. - switch (stType) - { - case QUANTIZE_FLOAT: - { - // This one has quite a bit of optimization potential. - if (gpr.R(a).IsImm()) - { - PanicAlert("Imm: %08x", gpr.R(a).offset); - } - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); - gpr.Lock(a); - fpr.Lock(s); - // Check that the quantizer is set the way we expect. - INT3(); - CMP(16, M(&rSPR(SPR_GQR0 + inst.I)), Imm16(store_gqr)); - FixupBranch skip_opt = J_CC(CC_NE); - - if (update) - gpr.LoadToX64(a, true, true); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); - TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); - if (update && offset) - MOV(32, gpr.R(a), R(ABI_PARAM2)); - CVTSD2SS(XMM0, fpr.R(s)); - MOVD_xmm(M(&temp64), XMM0); - MOV(32, R(ABI_PARAM1), M(&temp64)); - FixupBranch argh = J_CC(CC_NZ); - BSWAP(32, ABI_PARAM1); -#ifdef _M_X64 - MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); -#else - MOV(32, R(EAX), R(ABI_PARAM2)); - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(32, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1)); -#endif - FixupBranch skip_call = J(); - SetJumpTarget(argh); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); - SetJumpTarget(skip_call); - gpr.UnlockAll(); - gpr.UnlockAllX(); - fpr.UnlockAll(); - - FixupBranch skip_slow = J(); - SetJumpTarget(skip_opt); - Default(inst); - SetJumpTarget(skip_slow); - return; - } - default: - Default(inst); - return; - } - } - #if 0 // Is this specialization still worth it? Let's keep it for now. It's probably // not very risky since a game most likely wouldn't use the same code to process @@ -176,8 +111,16 @@ void Jit64::psq_st(UGeckoInstruction inst) #else SHL(32, R(EDX), Imm8(3)); #endif - CVTPD2PS(XMM0, fpr.R(s)); - CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized)); + if (inst.W) { + // One value + XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. + CVTSD2SS(XMM0, fpr.R(s)); + CALLptr(MDisp(EDX, (u32)(u64)asm_routines.singleStoreQuantized)); + } else { + // Pair of values + CVTPD2PS(XMM0, fpr.R(s)); + CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized)); + } gpr.UnlockAll(); gpr.UnlockAllX(); } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp index 0723b97a01..d4ee182024 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitAsm.cpp @@ -251,6 +251,7 @@ void AsmRoutineManager::GenerateCommon() GenQuantizedLoads(); GenQuantizedStores(); + GenQuantizedSingleStores(); //CMPSD(R(XMM0), M(&zero), // TODO diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index ad8e5c31d5..2ffd19100a 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -137,8 +137,12 @@ static const float GC_ALIGNED16(m_dequantizeTableS[]) = static float GC_ALIGNED16(psTemp[4]); -static const float m_65535 = 65535.0f; - +static const float GC_ALIGNED16(m_65535) = 65535.0f; +static const float GC_ALIGNED16(m_32767) = 32767.0f; +static const float GC_ALIGNED16(m_m32768) = -32768.0f; +static const float GC_ALIGNED16(m_255) = 255.0f; +static const float GC_ALIGNED16(m_127) = 127.0f; +static const float GC_ALIGNED16(m_m128) = -128.0f; #define QUANTIZE_OVERFLOW_SAFE @@ -205,7 +209,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PUNPCKLDQ(XMM1, R(XMM1)); MINPS(XMM0, R(XMM1)); #endif - CVTPS2DQ(XMM0, R(XMM0)); + CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); @@ -223,7 +227,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PUNPCKLDQ(XMM1, R(XMM1)); MINPS(XMM0, R(XMM1)); #endif - CVTPS2DQ(XMM0, R(XMM0)); + CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); PACKSSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); @@ -245,7 +249,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PUNPCKLDQ(XMM1, R(XMM1)); MINPS(XMM0, R(XMM1)); - CVTPS2DQ(XMM0, R(XMM0)); + CVTTPS2DQ(XMM0, R(XMM0)); MOVQ_xmm(M(psTemp), XMM0); // place ps[0] into the higher word, ps[1] into the lower // so no need in ROL after BSWAP @@ -269,7 +273,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PUNPCKLDQ(XMM1, R(XMM1)); MINPS(XMM0, R(XMM1)); #endif - CVTPS2DQ(XMM0, R(XMM0)); + CVTTPS2DQ(XMM0, R(XMM0)); PACKSSDW(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); BSWAP(32, EAX); @@ -288,6 +292,79 @@ void CommonAsmRoutines::GenQuantizedStores() { pairedStoreQuantized[7] = storePairedS16; } +// See comment in header for in/outs. +void CommonAsmRoutines::GenQuantizedSingleStores() { + const u8* storeSingleIllegal = AlignCode4(); + UD2(); + + // Easy! + const u8* storeSingleFloat = AlignCode4(); + if (cpu_info.bSSSE3) { + PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); + // TODO: SafeWriteFloat + MOVSS(M(&psTemp[0]), XMM0); + MOV(32, R(EAX), M(&psTemp[0])); + SafeWriteRegToReg(EAX, ECX, 32, 0, false); + } else { + MOVSS(M(&psTemp[0]), XMM0); + MOV(32, R(EAX), M(&psTemp[0])); + SafeWriteRegToReg(EAX, ECX, 32, 0, true); + } + RET(); + + const u8* storeSingleU8 = AlignCode4(); // Used by MKWii + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, R(XMM1)); + PXOR(XMM1, R(XMM1)); + MAXSS(XMM0, R(XMM1)); + MINSS(XMM0, M((void *)&m_255)); + CVTTSS2SI(EAX, R(XMM0)); + SafeWriteRegToReg(AL, ECX, 8, 0, true); + RET(); + + const u8* storeSingleS8 = AlignCode4(); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, R(XMM1)); + MAXSS(XMM0, M((void *)&m_m128)); + MINSS(XMM0, M((void *)&m_127)); + CVTTSS2SI(EAX, R(XMM0)); + SafeWriteRegToReg(AL, ECX, 8, 0, true); + RET(); + + const u8* storeSingleU16 = AlignCode4(); // Used by MKWii + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + PUNPCKLDQ(XMM1, R(XMM1)); + MULPS(XMM0, R(XMM1)); + PXOR(XMM1, R(XMM1)); + MAXSS(XMM0, R(XMM1)); + MINSS(XMM0, M((void *)&m_65535)); + CVTTSS2SI(EAX, R(XMM0)); + SafeWriteRegToReg(EAX, ECX, 16, 0, true); + RET(); + + const u8* storeSingleS16 = AlignCode4(); + SHR(32, R(EAX), Imm8(6)); + MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, R(XMM1)); + MAXSS(XMM0, M((void *)&m_m32768)); + MINSS(XMM0, M((void *)&m_32767)); + CVTTSS2SI(EAX, R(XMM0)); + SafeWriteRegToReg(EAX, ECX, 16, 0, true); + RET(); + + singleStoreQuantized[0] = storeSingleFloat; + singleStoreQuantized[1] = storeSingleIllegal; + singleStoreQuantized[2] = storeSingleIllegal; + singleStoreQuantized[3] = storeSingleIllegal; + singleStoreQuantized[4] = storeSingleU8; + singleStoreQuantized[5] = storeSingleU16; + singleStoreQuantized[6] = storeSingleS8; + singleStoreQuantized[7] = storeSingleS16; +} + void CommonAsmRoutines::GenQuantizedLoads() { const u8* loadPairedIllegal = AlignCode4(); UD2(); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h index f84fa76fbf..c5f48b7782 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.h @@ -24,6 +24,8 @@ class CommonAsmRoutines : public EmuCodeBlock { protected: void GenQuantizedLoads(); void GenQuantizedStores(); + void GenQuantizedSingleStores(); + public: void GenFifoWrite(int size); void GenFifoXmm64Write(); @@ -42,6 +44,11 @@ public: // Out: Nothing. // Trashes: EAX ECX EDX const u8 GC_ALIGNED16(*pairedStoreQuantized[8]); + + // In: array index: GQR to use. + // In: ECX: Address to write to. + // In: XMM0: Bottom 32-bit slot holds the float to be written. + const u8 GC_ALIGNED16(*singleStoreQuantized[8]); }; #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 74784e7d30..5f9251ece1 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -129,8 +129,8 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce FixupBranch argh = J_CC(CC_Z); switch (accessSize) { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), reg_value, reg_addr); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), reg_value, reg_addr); break; + case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break; + case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break; case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break; } FixupBranch arg2 = J();