diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 072edded23..8989dcfcc5 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -215,6 +215,7 @@ if(_M_ARM) Src/PowerPC/JitArm32/JitArm_LoadStore.cpp Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp Src/PowerPC/JitArm32/JitArm_Paired.cpp + Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp) endif() diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 072a6d40fa..5d5d2dd19b 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -218,6 +218,9 @@ public: void ps_neg(UGeckoInstruction _inst); void ps_abs(UGeckoInstruction _inst); void ps_nabs(UGeckoInstruction _inst); + + // LoadStore paired + void psq_l(UGeckoInstruction _inst); }; #endif // _JIT64_H diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp new file mode 100644 index 0000000000..5e2cc1f876 --- /dev/null +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp @@ -0,0 +1,59 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. +#include "Common.h" +#include "Thunk.h" + +#include "../../Core.h" +#include "../PowerPC.h" +#include "../../CoreTiming.h" +#include "../PPCTables.h" +#include "ArmEmitter.h" + +#include "Jit.h" +#include "JitRegCache.h" +#include "JitAsm.h" + +void JitArm::psq_l(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStorePairedOff) + + bool update = inst.OPCD == 57; + s32 offset = inst.SIMM_12; + + // R12 contains scale + // R11 contains type + // R10 is the ADDR + + if (js.memcheck) { Default(inst); return; } + + if (inst.W) { + // Enable when supporting single loads + Default(inst); + return; + } + + LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); + //UBFX(R12, R11, 2, 6); // Scale + UBFX(R11, R11, 13, 3); // Type + + MOVI2R(R10, (u32)offset); + if (inst.RA) + ADD(R10, R10, gpr.R(inst.RA)); + if (update) + MOV(gpr.R(inst.RA), R10); + if (inst.W) + ADD(R11, R11, 8); + MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized); + ADD(R14, R14, R11); + LDR(R14, R14); + + // Values returned in S0, S1 + BL(R14); // Jump to the quantizer Load + + ARMReg vD0 = fpr.R0(inst.RS, false); + ARMReg vD1 = fpr.R1(inst.RS, false); + VCVT(vD0, S0, 0); + VCVT(vD1, S1, 0); +} diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index e1f27560a3..f3ab4f695b 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -39,9 +39,6 @@ void JitArm::mtspr(UGeckoInstruction inst) case SPR_LR: case SPR_CTR: case SPR_XER: - // These are safe to do the easy way, see the bottom of this function. - break; - case SPR_GQR0: case SPR_GQR0 + 1: case SPR_GQR0 + 2: @@ -50,19 +47,9 @@ void JitArm::mtspr(UGeckoInstruction inst) case SPR_GQR0 + 5: case SPR_GQR0 + 6: case SPR_GQR0 + 7: - // Prevent recompiler from compiling in old quantizer values. - // If the value changed, destroy all blocks using this quantizer - // This will create a little bit of block churn, but hopefully not too bad. - { - /* - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value - CMP(32, R(EAX), gpr.R(inst.RD)); - FixupBranch skip_destroy = J_CC(CC_E, false); - int gqr = iIndex - SPR_GQR0; - ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr); - SetJumpTarget(skip_destroy);*/ - } - // TODO - break block if quantizers are written to. + // These are safe to do the easy way, see the bottom of this function. + break; + default: Default(inst); return; diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index 4a1aa9d28f..23c5ab56a5 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -107,7 +107,7 @@ static GekkoOPTemplate primarytable[] = {54, &JitArm::Default}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, {55, &JitArm::Default}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, - {56, &JitArm::Default}, //"psq_l", OPTYPE_PS, FL_IN_A}}, + {56, &JitArm::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}}, {57, &JitArm::Default}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, {60, &JitArm::Default}, //"psq_st", OPTYPE_PS, FL_IN_A}}, {61, &JitArm::Default}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp index 0c3a73c38e..6cf761c949 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp @@ -137,40 +137,73 @@ void JitArmAsmRoutineManager::Generate() ADD(_SP, _SP, 4); POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns + + GenerateCommon(); + FlushIcache(); } void JitArmAsmRoutineManager::GenerateCommon() { -/* fifoDirectWrite8 = AlignCode4(); - GenFifoWrite(8); - fifoDirectWrite16 = AlignCode4(); - GenFifoWrite(16); - fifoDirectWrite32 = AlignCode4(); - GenFifoWrite(32); - fifoDirectWriteFloat = AlignCode4(); - GenFifoFloatWrite(); - fifoDirectWriteXmm64 = AlignCode4(); - GenFifoXmm64Write(); + const u8* loadPairedIllegal = GetCodePtr(); + BKPT(0x10); - GenQuantizedLoads(); - GenQuantizedStores(); - GenQuantizedSingleStores(); -*/ - //CMPSD(R(XMM0), M(&zero), - // TODO + const u8* loadPairedFloatTwo = GetCodePtr(); + PUSH(2, R12, _LR); + // R12, R14 is scratch + // R10 is the address + MOVI2R(R14, Memory::MEMVIEW32_MASK); + AND(R10, R10, R14); + MOVI2R(R14, (u32)Memory::base); + ADD(R10, R10, R14); + + LDR(R12, R10); + REV(R12, R12); + VMOV(S0, R12); + + LDR(R12, R10, 4); + REV(R12, R12); + VMOV(S1, R12); + + POP(2, R12, _PC); + const u8* loadPairedFloatOne = GetCodePtr(); + BKPT(0x12); + const u8* loadPairedU8Two = GetCodePtr(); + BKPT(0x13); + const u8* loadPairedU8One = GetCodePtr(); + BKPT(0x14); + const u8* loadPairedS8Two = GetCodePtr(); + BKPT(0x15); + const u8* loadPairedS8One = GetCodePtr(); + BKPT(0x16); + const u8* loadPairedU16Two = GetCodePtr(); + BKPT(0x17); + const u8* loadPairedU16One = GetCodePtr(); + BKPT(0x18); + const u8* loadPairedS16Two = GetCodePtr(); + BKPT(0x19); + const u8* loadPairedS16One = GetCodePtr(); + BKPT(0x20); + + pairedLoadQuantized = reinterpret_cast(const_cast(AlignCode16())); + ReserveCodeSpace(16 * sizeof(u8*)); + + pairedLoadQuantized[0] = loadPairedFloatTwo; + pairedLoadQuantized[1] = loadPairedIllegal; + pairedLoadQuantized[2] = loadPairedIllegal; + pairedLoadQuantized[3] = loadPairedIllegal; + pairedLoadQuantized[4] = loadPairedU8Two; + pairedLoadQuantized[5] = loadPairedU16Two; + pairedLoadQuantized[6] = loadPairedS8Two; + pairedLoadQuantized[7] = loadPairedS16Two; + + pairedLoadQuantized[8] = loadPairedFloatOne; + pairedLoadQuantized[9] = loadPairedIllegal; + pairedLoadQuantized[10] = loadPairedIllegal; + pairedLoadQuantized[11] = loadPairedIllegal; + pairedLoadQuantized[12] = loadPairedU8One; + pairedLoadQuantized[13] = loadPairedU16One; + pairedLoadQuantized[14] = loadPairedS8One; + pairedLoadQuantized[15] = loadPairedS16One; - // Fast write routines - special case the most common hardware write - // TODO: use this. - // Even in x86, the param values will be in the right registers. - /* - const u8 *fastMemWrite8 = AlignCode16(); - CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); - FixupBranch skip_fast_write = J_CC(CC_NE, false); - MOV(32, EAX, M(&m_gatherPipeCount)); - MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1); - ADD(32, 1, M(&m_gatherPipeCount)); - RET(); - SetJumpTarget(skip_fast_write); - CALL((void *)&Memory::Write_U8);*/ }