From fb1ebd80225fefc591583b8c70ac45f01bab3142 Mon Sep 17 00:00:00 2001 From: pierre Date: Fri, 14 Jan 2011 18:00:25 +0000 Subject: [PATCH] Core/DSPCore: Consolidation of register accesses(except for the four hw stacks), addition of a register cache(currently accumulators on 64bit only). It feels a bit slower, but the numbers are about the same. Caching accumulators doesn't change speed either. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6848 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/x64Emitter.h | 6 +- Source/Core/DSPCore/CMakeLists.txt | 1 + Source/Core/DSPCore/DSPCore.vcproj | 10 +- Source/Core/DSPCore/Src/DSPEmitter.cpp | 34 +- Source/Core/DSPCore/Src/DSPEmitter.h | 29 +- .../Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp | 23 +- Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp | 137 ++--- Source/Core/DSPCore/Src/Jit/DSPJitCCUtil.cpp | 54 +- Source/Core/DSPCore/Src/Jit/DSPJitExtOps.cpp | 317 +++------- .../Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp | 110 +--- Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp | 184 +++--- .../Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp | 109 ++-- .../Core/DSPCore/Src/Jit/DSPJitRegCache.cpp | 540 ++++++++++++++++++ Source/Core/DSPCore/Src/Jit/DSPJitRegCache.h | 169 ++++++ Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp | 301 ++++------ Source/Core/DSPCore/Src/Jit/DSPJitUtil.h | 53 -- Source/Core/DSPCore/Src/SConscript | 1 + 17 files changed, 1196 insertions(+), 882 deletions(-) create mode 100644 Source/Core/DSPCore/Src/Jit/DSPJitRegCache.cpp create mode 100644 Source/Core/DSPCore/Src/Jit/DSPJitRegCache.h diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 182cde1cf3..4501ad17aa 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -195,16 +195,16 @@ inline u32 PtrOffset(void* ptr, void* base) { _assert_msg_(DYNA_REC, 0, "pointer offset out of range"); return 0; } - return distance; + return (u32)distance; #else return (u32)ptr-(u32)base; #endif } //usage: int a[]; ARRAY_OFFSET(a,10) -#define ARRAY_OFFSET(array,index) ((u64)&(array)[index]-(u64)&(array)[0]) +#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0])) //usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFFSET(str,elem) ((u64)&(str).elem-(u64)&(str)) +#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) struct FixupBranch { diff --git a/Source/Core/DSPCore/CMakeLists.txt b/Source/Core/DSPCore/CMakeLists.txt index ed422ef986..4e74becb0f 100644 --- a/Source/Core/DSPCore/CMakeLists.txt +++ b/Source/Core/DSPCore/CMakeLists.txt @@ -18,6 +18,7 @@ set(SRCS Src/assemble.cpp Src/DSPInterpreter.cpp Src/DSPCore.cpp Src/DSPTables.cpp + Src/Jit/DSPJitRegCache.cpp Src/Jit/DSPJitExtOps.cpp Src/Jit/DSPJitBranch.cpp Src/Jit/DSPJitCCUtil.cpp diff --git a/Source/Core/DSPCore/DSPCore.vcproj b/Source/Core/DSPCore/DSPCore.vcproj index 89ecd254a7..43f7401e8a 100644 --- a/Source/Core/DSPCore/DSPCore.vcproj +++ b/Source/Core/DSPCore/DSPCore.vcproj @@ -486,6 +486,14 @@ RelativePath=".\Src\Jit\DSPJitMultiplier.cpp" > + + + + @@ -493,7 +501,7 @@ - + [MAX_BLOCKS]; compileSR = 0; compileSR |= SR_INT_ENABLE; @@ -93,6 +95,9 @@ void DSPEmitter::checkExceptions(u32 retval) MOV(16, MatR(RAX), Imm16(compilePC)); #endif + DSPJitRegCache c(gpr); + + SaveDSPRegs(); ABI_CallFunction((void *)&DSPCore_CheckExceptions); // ABI_RestoreStack(0); @@ -100,6 +105,8 @@ void DSPEmitter::checkExceptions(u32 retval) MOV(32, R(EAX), Imm32(retval)); RET(); + gpr.flushRegs(c,false); + SetJumpTarget(skipCheck); } @@ -206,14 +213,14 @@ void DSPEmitter::Compile(u16 start_addr) return; */ + LoadDSPRegs(); + blockLinkEntry = GetCodePtr(); compilePC = start_addr; bool fixup_pc = false; blockSize[start_addr] = 0; - LoadDSPRegs(); - while (compilePC < start_addr + MAX_BLOCK_SIZE) { if (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_CHECK_INT) @@ -266,8 +273,9 @@ void DSPEmitter::Compile(u16 start_addr) // These functions branch and therefore only need to be called in the // end of each block and in this order + DSPJitRegCache c(gpr); HandleLoop(); - // ABI_RestoreStack(0); + SaveDSPRegs(); ABI_PopAllCalleeSavedRegsAndAdjustStack(); if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { @@ -276,8 +284,9 @@ void DSPEmitter::Compile(u16 start_addr) else { MOV(16, R(EAX), Imm16(blockSize[start_addr])); - } + } RET(); + gpr.flushRegs(c,false); SetJumpTarget(rLoopAddressExit); SetJumpTarget(rLoopCounterExit); @@ -303,8 +312,9 @@ void DSPEmitter::Compile(u16 start_addr) CMP(16, R(AX), Imm16(compilePC)); FixupBranch rNoBranch = J_CC(CC_Z); + DSPJitRegCache c(gpr); //don't update g_dsp.pc -- the branch insn already did - // ABI_RestoreStack(0); + SaveDSPRegs(); ABI_PopAllCalleeSavedRegsAndAdjustStack(); if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { @@ -313,8 +323,9 @@ void DSPEmitter::Compile(u16 start_addr) else { MOV(16, R(EAX), Imm16(blockSize[start_addr])); - } + } RET(); + gpr.flushRegs(c,false); SetJumpTarget(rNoBranch); } @@ -342,7 +353,7 @@ void DSPEmitter::Compile(u16 start_addr) // any unresolved CALL's if (unresolvedJumps[start_addr].empty()) { - blockLinks[start_addr] = (CompiledCode)blockLinkEntry; + blockLinks[start_addr] = blockLinkEntry; for(u16 i = 0x0000; i < 0xffff; ++i) { @@ -372,7 +383,6 @@ void DSPEmitter::Compile(u16 start_addr) SaveDSPRegs(); - // ABI_RestoreStack(0); ABI_PopAllCalleeSavedRegsAndAdjustStack(); if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { @@ -389,9 +399,7 @@ const u8 *DSPEmitter::CompileStub() { const u8 *entryPoint = AlignCode16(); ABI_PushAllCalleeSavedRegsAndAdjustStack(); - // ABI_AlignStack(0); ABI_CallFunction((void *)&CompileCurrent); - // ABI_RestoreStack(0); ABI_PopAllCalleeSavedRegsAndAdjustStack(); //MOVZX(32, 16, ECX, M(&g_dsp.pc)); XOR(32, R(EAX), R(EAX)); // Return 0 cycles executed diff --git a/Source/Core/DSPCore/Src/DSPEmitter.h b/Source/Core/DSPCore/Src/DSPEmitter.h index a4acfcb4fc..401c89ae03 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.h +++ b/Source/Core/DSPCore/Src/DSPEmitter.h @@ -22,12 +22,14 @@ #include "DSPCommon.h" #include "x64Emitter.h" +#include "Jit/DSPJitRegCache.h" #define COMPILED_CODE_SIZE sizeof(void *) * 0x200000 #define MAX_BLOCKS 0x10000 typedef u32 (*CompiledCode)(); +typedef const u8 *Block; class DSPEmitter : public Gen::XCodeBlock, NonCopyable { @@ -35,14 +37,14 @@ public: DSPEmitter(); ~DSPEmitter(); - const u8 *m_compiledCode; + Block m_compiledCode; void EmitInstruction(UDSPInstruction inst); void unknown_instruction(UDSPInstruction inst); void ClearIRAM(); void CompileDispatcher(); - const u8 *CompileStub(); + Block CompileStub(); void Compile(u16 start_addr); void ClearCallFlag(); @@ -113,7 +115,7 @@ public: void dsp_op_write_reg_imm(int reg, u16 val); void dsp_conditional_extend_accum(int reg); void dsp_conditional_extend_accum_imm(int reg, u16 val); - void dsp_op_read_reg(int reg, Gen::X64Reg host_dreg); + void dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE); // Commands void dar(const UDSPInstruction opc); @@ -253,13 +255,18 @@ public: const u8 *stubEntryPoint; u16 compilePC; u16 startAddr; - CompiledCode *blockLinks; + Block *blockLinks; u16 *blockSize; - std::list unresolvedJumps[0x10000]; + std::list *unresolvedJumps; + + DSPJitRegCache gpr; + + void LoadDSPRegs(); + void SaveDSPRegs(); private: CompiledCode *blocks; - const u8 *blockLinkEntry; + Block blockLinkEntry; u16 compileSR; // The index of the last stored ext value (compile time). @@ -269,8 +276,6 @@ private: // Counts down. // int cycles; - void LoadDSPRegs(); - void SaveDSPRegs(); void Update_SR_Register(Gen::X64Reg val = Gen::EAX); @@ -282,8 +287,12 @@ private: void set_long_prod(); void round_long_acc(Gen::X64Reg long_acc = Gen::EAX); void set_long_acc(int _reg, Gen::X64Reg acc = Gen::EAX); - void get_acc_m(int _reg, Gen::X64Reg acc = Gen::EAX); - void set_acc_m(int _reg); + void get_acc_h(int _reg, Gen::X64Reg acc = Gen::EAX, bool sign = true); + void set_acc_h(int _reg, Gen::OpArg arg = R(Gen::EAX)); + void get_acc_m(int _reg, Gen::X64Reg acc = Gen::EAX, bool sign = true); + void set_acc_m(int _reg, Gen::OpArg arg = R(Gen::EAX)); + void get_acc_l(int _reg, Gen::X64Reg acc = Gen::EAX, bool sign = true); + void set_acc_l(int _reg, Gen::OpArg arg = R(Gen::EAX)); void get_long_acx(int _reg, Gen::X64Reg acx = Gen::EAX); void get_ax_l(int _reg, Gen::X64Reg acx = Gen::EAX); void get_ax_h(int _reg, Gen::X64Reg acc = Gen::EAX); diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp index e3febde0b9..fe396c6b97 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp @@ -98,15 +98,18 @@ void DSPEmitter::andcf(const UDSPInstruction opc) // g_dsp.r.sr |= SR_LOGIC_ZERO; // else // g_dsp.r.sr &= ~SR_LOGIC_ZERO; + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); AND(16, R(RAX), Imm16(imm)); CMP(16, R(RAX), Imm16(imm)); // MOV(64, R(R11), ImmPtr(&g_dsp.r)); FixupBranch notLogicZero = J_CC(CC_NE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_LOGIC_ZERO)); + OR(16, sr_reg, Imm16(SR_LOGIC_ZERO)); FixupBranch exit = J(); SetJumpTarget(notLogicZero); - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_LOGIC_ZERO)); + AND(16, sr_reg, Imm16(~SR_LOGIC_ZERO)); SetJumpTarget(exit); + gpr.putReg(DSP_REG_SR); } #else Default(opc); @@ -136,14 +139,17 @@ void DSPEmitter::andf(const UDSPInstruction opc) // g_dsp.r.sr |= SR_LOGIC_ZERO; // else // g_dsp.r.sr &= ~SR_LOGIC_ZERO; + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); TEST(16, R(RAX), Imm16(imm)); // MOV(64, R(R11), ImmPtr(&g_dsp.r)); FixupBranch notLogicZero = J_CC(CC_NE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_LOGIC_ZERO)); + OR(16, sr_reg, Imm16(SR_LOGIC_ZERO)); FixupBranch exit = J(); SetJumpTarget(notLogicZero); - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_LOGIC_ZERO)); + AND(16, sr_reg, Imm16(~SR_LOGIC_ZERO)); SetJumpTarget(exit); + gpr.putReg(DSP_REG_SR); } #else Default(opc); @@ -606,14 +612,13 @@ void DSPEmitter::addr(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - u16 *sregp = reg_ptr(sreg); // MOV(64, R(R11), ImmPtr(&g_dsp.r)); // s64 acc = dsp_get_long_acc(dreg); get_long_acc(dreg, RCX); MOV(64, R(RAX), R(RCX)); // s64 ax = (s16)g_dsp.r[sreg]; - MOVSX(64, 16, RDX, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(sreg, RDX, SIGN); // ax <<= 16; SHL(64, R(RDX), Imm8(16)); // s64 res = acc + ax; @@ -937,14 +942,13 @@ void DSPEmitter::subr(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - u16 *sregp = reg_ptr(sreg); // s64 acc = dsp_get_long_acc(dreg); get_long_acc(dreg, RCX); MOV(64, R(RAX), R(RCX)); // s64 ax = (s16)g_dsp.r[sreg]; // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, RDX, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(sreg, RDX, SIGN); // ax <<= 16; SHL(64, R(RDX), Imm8(16)); // s64 res = acc - ax; @@ -1212,11 +1216,10 @@ void DSPEmitter::movr(const UDSPInstruction opc) #ifdef _M_X64 u8 areg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - u16 *sregp = reg_ptr(sreg); // s64 acc = (s16)g_dsp.r[sreg]; // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, RAX, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(sreg, RAX, SIGN); // acc <<= 16; SHL(64, R(RAX), Imm8(16)); // acc &= ~0xffff; diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp index 2cf41b61c5..56f5edcd9a 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp @@ -25,30 +25,17 @@ using namespace Gen; -const int GetCodeSize(void(*jitCode)(const UDSPInstruction, DSPEmitter&), const UDSPInstruction opc, DSPEmitter &emitter) +template +static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) { - u16 pc = g_dsp.pc; - const u8* ptr = emitter.GetCodePtr(); - jitCode(opc, emitter); - //emitter.JMP(emitter.GetCodePtr()); - int size = (int)(emitter.GetCodePtr() - ptr); - emitter.SetCodePtr((u8*)ptr); - g_dsp.pc = pc; - return size; -} - -const u8* CheckCondition(DSPEmitter& emitter, u8 cond, u8 skipCodeSize) -{ - if (cond == 0xf) // Always true. - return NULL; - //emitter.INT3(); + u8 cond = opc & 0xf; + if (cond == 0xf) {// Always true. + jitCode(opc,emitter); + return; + } FixupBranch skipCode2; -#ifdef _M_IX86 // All32 - emitter.MOV(16, R(EAX), M(&g_dsp.r.sr)); -#else - emitter.MOV(64, R(RAX), ImmPtr(&g_dsp.r.sr)); - emitter.MOV(16, R(EAX), MatR(RAX)); -#endif + emitter.dsp_op_read_reg(DSP_REG_SR, RAX); + DSPJitRegCache c2(emitter.gpr); switch(cond) { case 0x0: // GE - Greater Equal @@ -61,19 +48,17 @@ const u8* CheckCondition(DSPEmitter& emitter, u8 cond, u8 skipCodeSize) emitter.SHR(16, R(EAX), Imm8(1)); //SR_OVERFLOW flag emitter.NOT(16, R(EAX)); emitter.XOR(16, R(EAX), R(EDX)); - emitter.TEST(16, R(EAX), Imm16(1)); - if (cond < 0x2) + if (cond < 0x2) { + emitter.TEST(16, R(EAX), Imm16(1)); break; - + } + c2 = emitter.gpr; + emitter.TEST(16, R(EAX), Imm16(1)); + //LE: problem in here, half the tests fail skipCode2 = emitter.J_CC(CC_NE); //skipCode2 = emitter.J_CC((CCFlags)(CC_NE - (cond & 1))); -#ifdef _M_IX86 // All32 - emitter.MOV(16, R(EAX), M(&g_dsp.r.sr)); -#else - emitter.MOV(64, R(RAX), ImmPtr(&g_dsp.r.sr)); - emitter.MOV(16, R(EAX), MatR(RAX)); -#endif + emitter.dsp_op_read_reg(DSP_REG_SR, RAX); emitter.TEST(16, R(EAX), Imm16(SR_ARITH_ZERO)); break; case 0x4: // NZ - Not Zero @@ -107,6 +92,7 @@ const u8* CheckCondition(DSPEmitter& emitter, u8 cond, u8 skipCodeSize) emitter.SETcc(CC_E, R(EAX)); emitter.TEST(8, R(EAX), R(EAX)); break; + //c2 = emitter.gpr; //emitter.TEST(16, R(EAX), Imm16(SR_OVER_S32 | SR_TOP2BITS)); //skipCode2 = emitter.J_CC((CCFlags)(CC_E + (cond & 1))); //emitter.TEST(16, R(EAX), Imm16(SR_ARITH_ZERO)); @@ -120,36 +106,22 @@ const u8* CheckCondition(DSPEmitter& emitter, u8 cond, u8 skipCodeSize) emitter.TEST(16, R(EAX), Imm16(SR_OVERFLOW)); break; } + DSPJitRegCache c1(emitter.gpr); FixupBranch skipCode = cond == 0xe ? emitter.J_CC(CC_E) : emitter.J_CC((CCFlags)(CC_NE - (cond & 1))); - const u8* res = emitter.GetCodePtr(); - emitter.NOP(skipCodeSize); + jitCode(opc,emitter); + emitter.gpr.flushRegs(c1); emitter.SetJumpTarget(skipCode); - if ((cond | 1) == 0x3) // || (cond | 1) == 0xb) + if ((cond | 1) == 0x3) {// || (cond | 1) == 0xb) + emitter.gpr.flushRegs(c2); emitter.SetJumpTarget(skipCode2); - return res; + } else { + c2.drop(); + } } -template -void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) +static void WriteBranchExit(DSPEmitter& emitter) { - static const int codeSize = GetCodeSize(jitCode, opc, emitter); - //emitter.INT3(); - const u8* codePtr = CheckCondition(emitter, opc & 0xf, codeSize); - //const u8* afterSkip = emitter.GetCodePtr(); - if (codePtr != NULL) - emitter.SetCodePtr((u8*)codePtr); - jitCode(opc, emitter); - //if (codePtr != NULL) - //{ - // emitter.JMP(afterSkip + 4 + sizeof(void*)); - // emitter.SetCodePtr((u8*)afterSkip); - // emitter.ADD(16, M(&g_dsp.pc), Imm8(1)); //4 bytes + pointer - //} -} - -void WriteBranchExit(DSPEmitter& emitter) -{ - // ABI_RestoreStack(0); + emitter.SaveDSPRegs(); emitter.ABI_PopAllCalleeSavedRegsAndAdjustStack(); if (DSPAnalyzer::code_flags[emitter.startAddr] & DSPAnalyzer::CODE_IDLE_SKIP) { @@ -162,13 +134,14 @@ void WriteBranchExit(DSPEmitter& emitter) emitter.RET(); } -void WriteBlockLink(DSPEmitter& emitter, u16 dest) +static void WriteBlockLink(DSPEmitter& emitter, u16 dest) { // Jump directly to the called block if it has already been compiled. if (!(dest >= emitter.startAddr && dest <= emitter.compilePC)) { if (emitter.blockLinks[dest] != 0 ) { + emitter.gpr.flushRegs(); #ifdef _M_IX86 // All32 // Check if we have enough cycles to execute the next block emitter.MOV(16, R(ESI), M(&cyclesLeft)); @@ -177,20 +150,15 @@ void WriteBlockLink(DSPEmitter& emitter, u16 dest) emitter.SUB(16, R(ESI), Imm16(emitter.blockSize[emitter.startAddr])); emitter.MOV(16, M(&cyclesLeft), R(ESI)); - emitter.JMPptr(M(&emitter.blockLinks[dest])); - - emitter.SetJumpTarget(notEnoughCycles); #else // Check if we have enough cycles to execute the next block emitter.CMP(16, R(R12), Imm16(emitter.blockSize[emitter.startAddr] + emitter.blockSize[dest])); FixupBranch notEnoughCycles = emitter.J_CC(CC_BE); emitter.SUB(16, R(R12), Imm16(emitter.blockSize[emitter.startAddr])); - emitter.MOV(64, R(RAX), ImmPtr((void *)emitter.blockLinks[dest])); - emitter.JMPptr(R(RAX)); - - emitter.SetJumpTarget(notEnoughCycles); #endif + emitter.JMP(emitter.blockLinks[dest], true); + emitter.SetJumpTarget(notEnoughCycles); } else { @@ -238,17 +206,14 @@ void DSPEmitter::jcc(const UDSPInstruction opc) void r_jmprcc(const UDSPInstruction opc, DSPEmitter& emitter) { u8 reg = (opc >> 5) & 0x7; - u16 *regp = reg_ptr(reg); //reg can only be DSP_REG_ARx and DSP_REG_IXx now, //no need to handle DSP_REG_STx. + emitter.dsp_op_read_reg(reg, RAX, NONE); #ifdef _M_IX86 // All32 - emitter.MOV(16, R(EAX), M(regp)); emitter.MOV(16, M(&g_dsp.pc), R(EAX)); #else - emitter.MOV(64, R(RSI), ImmPtr(regp)); - emitter.MOV(16, R(RSI), MatR(RSI)); - emitter.MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); - emitter.MOV(16, MatR(RAX), R(RSI)); + emitter.MOV(64, R(RSI), ImmPtr(&(g_dsp.pc))); + emitter.MOV(16, MatR(RSI), R(RAX)); #endif WriteBranchExit(emitter); } @@ -308,17 +273,14 @@ void DSPEmitter::call(const UDSPInstruction opc) void r_callr(const UDSPInstruction opc, DSPEmitter& emitter) { u8 reg = (opc >> 5) & 0x7; - u16 *regp = reg_ptr(reg); emitter.MOV(16, R(DX), Imm16(emitter.compilePC + 1)); emitter.dsp_reg_store_stack(DSP_STACK_C); + emitter.dsp_op_read_reg(reg, RAX, NONE); #ifdef _M_IX86 // All32 - emitter.MOV(16, R(EAX), M(regp)); emitter.MOV(16, M(&g_dsp.pc), R(EAX)); #else - emitter.MOV(64, R(RSI), ImmPtr(regp)); - emitter.MOV(16, R(RSI), MatR(RSI)); - emitter.MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); - emitter.MOV(16, MatR(RAX), R(RSI)); + emitter.MOV(64, R(RSI), ImmPtr(&(g_dsp.pc))); + emitter.MOV(16, MatR(RSI), R(RAX)); #endif WriteBranchExit(emitter); } @@ -404,12 +366,7 @@ void DSPEmitter::rti(const UDSPInstruction opc) { // g_dsp.r[DSP_REG_SR] = dsp_reg_load_stack(DSP_STACK_D); dsp_reg_load_stack(DSP_STACK_D); -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.sr), R(DX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), R(DX)); -#endif + dsp_op_write_reg(DSP_REG_SR, RDX); // g_dsp.pc = dsp_reg_load_stack(DSP_STACK_C); dsp_reg_load_stack(DSP_STACK_C); #ifdef _M_IX86 // All32 @@ -504,14 +461,8 @@ void DSPEmitter::HandleLoop() void DSPEmitter::loop(const UDSPInstruction opc) { u16 reg = opc & 0x1f; - u16 *regp = reg_ptr(reg); // u16 cnt = g_dsp.r[reg]; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EDX, M(regp)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EDX, MDisp(R11, PtrOffset(regp, &g_dsp.r))); -#endif + dsp_op_read_reg(reg, RDX, ZERO); u16 loop_pc = compilePC + 1; CMP(16, R(EDX), Imm16(0)); @@ -575,14 +526,8 @@ void DSPEmitter::loopi(const UDSPInstruction opc) void DSPEmitter::bloop(const UDSPInstruction opc) { u16 reg = opc & 0x1f; - u16* regp = reg_ptr(reg); // u16 cnt = g_dsp.r[reg]; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EDX, M(regp)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EDX, MDisp(R11, PtrOffset(regp, &g_dsp.r))); -#endif + dsp_op_read_reg(reg, RDX, ZERO); u16 loop_pc = dsp_imem_read(compilePC + 1); CMP(16, R(EDX), Imm16(0)); @@ -609,7 +554,9 @@ void DSPEmitter::bloop(const UDSPInstruction opc) MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size)); #endif + DSPJitRegCache c(gpr); WriteBranchExit(*this); + gpr.flushRegs(c,false); SetJumpTarget(exit); } diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitCCUtil.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitCCUtil.cpp index 196fdb198c..620c2a2c43 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitCCUtil.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitCCUtil.cpp @@ -32,18 +32,20 @@ using namespace Gen; void DSPEmitter::Update_SR_Register(Gen::X64Reg val) { #ifdef _M_X64 + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); // // 0x04 // if (_Value == 0) g_dsp.r[DSP_REG_SR] |= SR_ARITH_ZERO; CMP(64, R(val), Imm8(0)); FixupBranch notZero = J_CC(CC_NZ); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_ARITH_ZERO)); + OR(16, sr_reg, Imm16(SR_ARITH_ZERO)); SetJumpTarget(notZero); // // 0x08 // if (_Value < 0) g_dsp.r[DSP_REG_SR] |= SR_SIGN; CMP(64, R(val), Imm8(0)); FixupBranch greaterThanEqual = J_CC(CC_GE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_SIGN)); + OR(16, sr_reg, Imm16(SR_SIGN)); SetJumpTarget(greaterThanEqual); // // 0x10 @@ -51,7 +53,7 @@ void DSPEmitter::Update_SR_Register(Gen::X64Reg val) MOVSX(64, 32, RDX, R(val)); CMP(64, R(RDX), R(val)); FixupBranch noOverS32 = J_CC(CC_E); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_OVER_S32)); + OR(16, sr_reg, Imm16(SR_OVER_S32)); SetJumpTarget(noOverS32); // // 0x20 - Checks if top bits of m are equal @@ -63,8 +65,9 @@ void DSPEmitter::Update_SR_Register(Gen::X64Reg val) FixupBranch cC = J_CC(CC_NE); SetJumpTarget(zeroC); // g_dsp.r[DSP_REG_SR] |= SR_TOP2BITS; - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_TOP2BITS)); + OR(16, sr_reg, Imm16(SR_TOP2BITS)); SetJumpTarget(cC); + gpr.putReg(DSP_REG_SR); #endif } @@ -75,7 +78,10 @@ void DSPEmitter::Update_SR_Register64(Gen::X64Reg val) { #ifdef _M_X64 // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_CMP_MASK)); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + AND(16, sr_reg, Imm16(~SR_CMP_MASK)); + gpr.putReg(DSP_REG_SR); Update_SR_Register(val); #endif } @@ -86,8 +92,10 @@ void DSPEmitter::Update_SR_Register64(Gen::X64Reg val) void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) { #ifdef _M_X64 + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_CMP_MASK)); + AND(16, sr_reg, Imm16(~SR_CMP_MASK)); CMP(64, R(RCX), R(val)); @@ -95,7 +103,7 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] |= SR_CARRY; // Carry = (acc>res) FixupBranch noCarry = J_CC(CC_BE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_CARRY)); + OR(16, sr_reg, Imm16(SR_CARRY)); SetJumpTarget(noCarry); // 0x02 and 0x80 @@ -107,9 +115,10 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) AND(64, R(RCX), R(RDX)); CMP(64, R(RCX), Imm8(0)); FixupBranch noOverflow = J_CC(CC_GE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); + OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); + gpr.putReg(DSP_REG_SR); Update_SR_Register(val); #endif } @@ -120,8 +129,10 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) { #ifdef _M_X64 + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_CMP_MASK)); + AND(16, sr_reg, Imm16(~SR_CMP_MASK)); CMP(64, R(RCX), R(val)); @@ -129,7 +140,7 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] |= SR_CARRY; // Carry2 = (acc>=res) FixupBranch noCarry2 = J_CC(CC_B); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_CARRY)); + OR(16, sr_reg, Imm16(SR_CARRY)); SetJumpTarget(noCarry2); // 0x02 and 0x80 @@ -141,8 +152,9 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) AND(64, R(RCX), R(RDX)); CMP(64, R(RCX), Imm8(0)); FixupBranch noOverflow = J_CC(CC_GE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); + OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); + gpr.putReg(DSP_REG_SR); Update_SR_Register(); #endif @@ -164,20 +176,22 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) { #ifdef _M_X64 - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_CMP_MASK)); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + AND(16, sr_reg, Imm16(~SR_CMP_MASK)); // // 0x04 // if (_Value == 0) g_dsp.r[DSP_REG_SR] |= SR_ARITH_ZERO; CMP(64, R(val), Imm8(0)); FixupBranch notZero = J_CC(CC_NZ); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_ARITH_ZERO)); + OR(16, sr_reg, Imm16(SR_ARITH_ZERO)); SetJumpTarget(notZero); // // 0x08 // if (_Value < 0) g_dsp.r[DSP_REG_SR] |= SR_SIGN; CMP(64, R(val), Imm8(0)); FixupBranch greaterThanEqual = J_CC(CC_GE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_SIGN)); + OR(16, sr_reg, Imm16(SR_SIGN)); SetJumpTarget(greaterThanEqual); // // 0x20 - Checks if top bits of m are equal @@ -186,15 +200,16 @@ void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) SHR(16, R(val), Imm8(14)); CMP(16, R(val), Imm16(0)); FixupBranch nZero = J_CC(CC_NE); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_TOP2BITS)); + OR(16, sr_reg, Imm16(SR_TOP2BITS)); FixupBranch cC = J(); SetJumpTarget(nZero); CMP(16, R(val), Imm16(3)); FixupBranch notThree = J_CC(CC_NE); // g_dsp.r[DSP_REG_SR] |= SR_TOP2BITS; - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_TOP2BITS)); + OR(16, sr_reg, Imm16(SR_TOP2BITS)); SetJumpTarget(notThree); SetJumpTarget(cC); + gpr.putReg(DSP_REG_SR); #endif } @@ -204,16 +219,19 @@ void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) void DSPEmitter::Update_SR_Register16_OverS32(Gen::X64Reg val) { #ifdef _M_X64 - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~SR_CMP_MASK)); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + AND(16, sr_reg, Imm16(~SR_CMP_MASK)); // // 0x10 // if (_Value != (s32)_Value) g_dsp.r[DSP_REG_SR] |= SR_OVER_S32; MOVSX(64, 32, RSI, R(val)); CMP(64, R(RSI), R(val)); FixupBranch noOverS32 = J_CC(CC_E); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_OVER_S32)); + OR(16, sr_reg, Imm16(SR_OVER_S32)); SetJumpTarget(noOverS32); + gpr.putReg(DSP_REG_SR); // // 0x20 - Checks if top bits of m are equal // if ((((u16)_Value >> 14) == 0) || (((u16)_Value >> 14) == 3)) //AND(32, R(val), Imm32(0xc0000000)); diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitExtOps.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitExtOps.cpp index 9493c2857b..084385e2e4 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitExtOps.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitExtOps.cpp @@ -79,29 +79,9 @@ void DSPEmitter::s(const UDSPInstruction opc) { u8 dreg = opc & 0x3; u8 sreg = ((opc >> 3) & 0x3) + DSP_REG_ACL0; - u16 *sregp = 0; - switch(sreg) { - case DSP_REG_ACL0: - case DSP_REG_ACL1: - sregp = &(g_dsp.r.ac[sreg-DSP_REG_ACL0].l); - break; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - sregp = &(g_dsp.r.ac[sreg-DSP_REG_ACM0].m); - break; - default: - sregp = NULL; - break; - } // u16 addr = g_dsp.r[dest]; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[dreg])); - MOVZX(32, 16, ECX, M(sregp)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); - MOVZX(64, 16, ECX, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); -#endif + dsp_op_read_reg(dreg, RAX, ZERO); + dsp_op_read_reg(sreg, RCX, ZERO); // u16 val = g_dsp.r[src]; dmem_write(); increment_addr_reg(dreg); @@ -115,28 +95,8 @@ void DSPEmitter::sn(const UDSPInstruction opc) { u8 dreg = opc & 0x3; u8 sreg = ((opc >> 3) & 0x3) + DSP_REG_ACL0; - u16 *sregp = 0; - switch(sreg) { - case DSP_REG_ACL0: - case DSP_REG_ACL1: - sregp = &(g_dsp.r.ac[sreg-DSP_REG_ACL0].l); - break; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - sregp = &(g_dsp.r.ac[sreg-DSP_REG_ACM0].m); - break; - default: - sregp = NULL; - break; - } -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[dreg])); - MOVZX(32, 16, ECX, M(sregp)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); - MOVZX(64, 16, ECX, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); -#endif + dsp_op_read_reg(dreg, RAX, ZERO); + dsp_op_read_reg(sreg, RCX, ZERO); dmem_write(); increase_addr_reg(dreg); } @@ -208,14 +168,8 @@ void DSPEmitter::ls(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[3])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR0); @@ -235,14 +189,8 @@ void DSPEmitter::lsn(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[3])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR0); @@ -261,14 +209,8 @@ void DSPEmitter::lsm(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[3])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR0); @@ -288,14 +230,8 @@ void DSPEmitter::lsnm(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[3])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR0); @@ -313,14 +249,8 @@ void DSPEmitter::sl(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[0])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[0]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR3); @@ -339,14 +269,8 @@ void DSPEmitter::sln(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[0])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[0]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR3); @@ -365,14 +289,8 @@ void DSPEmitter::slm(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[0])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[0]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR3); @@ -391,14 +309,8 @@ void DSPEmitter::slnm(const UDSPInstruction opc) { u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[0])); - MOVZX(32, 16, ECX, M(&g_dsp.r.ac[sreg].m)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[0]))); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); -#endif + dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); + get_acc_m(sreg, ECX, false); dmem_write(); pushExtValueFromMem(dreg, DSP_REG_AR3); @@ -427,22 +339,19 @@ void DSPEmitter::ld(const UDSPInstruction opc) pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); // if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[sreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[sreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + dsp_op_read_reg(sreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); - FixupBranch after = J(); + gpr.flushRegs(c); + FixupBranch after = J(true); SetJumpTarget(not_equal); // else pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increment_addr_reg(sreg); @@ -451,22 +360,19 @@ void DSPEmitter::ld(const UDSPInstruction opc) pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[dreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + dsp_op_read_reg(dreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); FixupBranch not_equal = J_CC(CC_NE, true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); + gpr.flushRegs(c); FixupBranch after = J(true); // else SetJumpTarget(not_equal); pushExtValueFromMem2(rreg + DSP_REG_AXL0, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increment_addr_reg(dreg); @@ -486,23 +392,20 @@ void DSPEmitter::ldn(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); - // if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[sreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[sreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { + dsp_op_read_reg(sreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); - FixupBranch after = J(); + gpr.flushRegs(c); + FixupBranch after = J(true); SetJumpTarget(not_equal); // else pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increase_addr_reg(sreg); @@ -510,22 +413,19 @@ void DSPEmitter::ldn(const UDSPInstruction opc) pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[dreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + dsp_op_read_reg(dreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); - FixupBranch after = J(); // else + gpr.flushRegs(c); + FixupBranch after = J(true); // else SetJumpTarget(not_equal); pushExtValueFromMem2(rreg + DSP_REG_AXL0, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increase_addr_reg(dreg); @@ -545,23 +445,20 @@ void DSPEmitter::ldm(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); - // if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[sreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[sreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { + dsp_op_read_reg(sreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); - FixupBranch after = J(); + gpr.flushRegs(c); + FixupBranch after = J(true); SetJumpTarget(not_equal); // else pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increment_addr_reg(sreg); @@ -569,22 +466,19 @@ void DSPEmitter::ldm(const UDSPInstruction opc) pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[dreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + dsp_op_read_reg(dreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); - FixupBranch after = J(); // else + gpr.flushRegs(c); + FixupBranch after = J(true); // else SetJumpTarget(not_equal); pushExtValueFromMem2(rreg + DSP_REG_AXL0, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increment_addr_reg(dreg); @@ -604,23 +498,20 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); - // if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[sreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[sreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { + dsp_op_read_reg(sreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); - FixupBranch after = J(); + gpr.flushRegs(c); + FixupBranch after = J(true); SetJumpTarget(not_equal); // else pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increase_addr_reg(sreg); @@ -628,22 +519,19 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { -#ifdef _M_IX86 // All32 - MOV(16, R(ESI), M(&g_dsp.r.ar[dreg])); - MOV(16, R(EDI), M(&g_dsp.r.ar[3])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(ESI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); - MOV(16, R(EDI), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[3]))); -#endif + dsp_op_read_reg(dreg, RSI, NONE); + dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); SHR(16, R(ESI), Imm8(10)); SHR(16, R(EDI), Imm8(10)); + DSPJitRegCache c(gpr); CMP(16, R(ESI), R(EDI)); - FixupBranch not_equal = J_CC(CC_NE); + FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); - FixupBranch after = J(); // else + gpr.flushRegs(c); + FixupBranch after = J(true); // else SetJumpTarget(not_equal); pushExtValueFromMem2(rreg + DSP_REG_AXL0, DSP_REG_AR3); + gpr.flushRegs(c); SetJumpTarget(after); increase_addr_reg(dreg); @@ -656,24 +544,13 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) // Push value from g_dsp.r[sreg] into EBX and stores the destinationindex in // storeIndex void DSPEmitter::pushExtValueFromReg(u16 dreg, u16 sreg) { - u16 *sregp = reg_ptr(sreg); -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EBX, M(sregp)); -#else - MOV(64, R(RBX), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EBX, MDisp(RBX, PtrOffset(sregp, &g_dsp.r))); -#endif + dsp_op_read_reg(sreg, RBX, ZERO); storeIndex = dreg; } void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(&g_dsp.r.ar[sreg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[sreg]))); -#endif + dsp_op_read_reg(sreg, RCX, ZERO); dmem_read(); MOVZX(32, 16, EBX, R(EAX)); @@ -682,12 +559,7 @@ void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { void DSPEmitter::pushExtValueFromMem2(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(&g_dsp.r.ar[sreg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, ECX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[sreg]))); -#endif + dsp_op_read_reg(sreg, RCX, ZERO); dmem_read(); SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); @@ -704,16 +576,11 @@ void DSPEmitter::popExtValueToReg() { // [nakeee] the or case never happens in real // [nakeee] it's just how the hardware works so we added it if (storeIndex != -1) { - u16 *dregp = reg_ptr(storeIndex); -#ifdef _M_IX86 // All32 - MOV(16, M(dregp), R(EBX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, PtrOffset(dregp, &g_dsp.r)), R(EBX)); -#endif + dsp_op_write_reg(storeIndex, RBX); if (storeIndex >= DSP_REG_ACM0 && storeIndex2 == -1) { TEST(32, R(EBX), Imm32(SR_40_MODE_BIT << 16)); FixupBranch not_40bit = J_CC(CC_Z); + DSPJitRegCache c(gpr); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ // Sign extend into whole accum. @@ -722,16 +589,10 @@ void DSPEmitter::popExtValueToReg() { SHR(32, R(EAX), Imm8(16)); //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[storeIndex - DSP_REG_ACM0].h), - R(EAX)); - MOV(16, M(&g_dsp.r.ac[storeIndex - DSP_REG_ACM0].l), - Imm16(0)); -#else - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[storeIndex - DSP_REG_ACM0].h)), R(EAX)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[storeIndex - DSP_REG_ACM0].l)), Imm16(0)); -#endif + set_acc_h(storeIndex - DSP_REG_ACM0, R(RAX)); + set_acc_l(storeIndex - DSP_REG_ACM0, Imm16(0)); //} + gpr.flushRegs(c); SetJumpTarget(not_40bit); } } @@ -740,13 +601,7 @@ void DSPEmitter::popExtValueToReg() { if (storeIndex2 != -1) { SHR(32, R(EBX), Imm8(16)); - u16 *dregp = reg_ptr(storeIndex2); -#ifdef _M_IX86 // All32 - MOV(16, M(dregp), R(EBX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, PtrOffset(dregp, &g_dsp.r)), R(EBX)); -#endif + dsp_op_write_reg(storeIndex2, RBX); } storeIndex2 = -1; } diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp index 46ba069f4b..bd4a320bb3 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp @@ -34,16 +34,9 @@ using namespace Gen; void DSPEmitter::srs(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + 0x18; - u16 *regp = reg_ptr(reg); //u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF); -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(regp)); - MOVZX(32, 8, EAX, M(&g_dsp.r.cr)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RCX, MDisp(R11, PtrOffset(regp, &g_dsp.r))); - MOVZX(64, 8, RAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, cr))); -#endif + dsp_op_read_reg(reg, RCX, ZERO); + dsp_op_read_reg(DSP_REG_CR, RAX, ZERO); SHL(16, R(EAX), Imm8(8)); OR(8, R(EAX), Imm8(opc & 0xFF)); dmem_write(); @@ -57,23 +50,12 @@ void DSPEmitter::srs(const UDSPInstruction opc) void DSPEmitter::lrs(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + 0x18; - u16 *regp = reg_ptr(reg); //u16 addr = (g_dsp.r[DSP_REG_CR] << 8) | (opc & 0xFF); -#ifdef _M_IX86 // All32 - MOVZX(32, 8, ECX, M(&g_dsp.r.cr)); + dsp_op_read_reg(DSP_REG_CR, RCX, ZERO); SHL(16, R(ECX), Imm8(8)); OR(8, R(ECX), Imm8(opc & 0xFF)); dmem_read(); - MOV(16, M(regp), R(EAX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 8, RCX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, cr))); - SHL(16, R(ECX), Imm8(8)); - OR(8, R(ECX), Imm8(opc & 0xFF)); - dmem_read(); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, PtrOffset(regp, &g_dsp.r)), R(RAX)); -#endif + dsp_op_write_reg(reg, RAX); dsp_conditional_extend_accum(reg); } @@ -194,12 +176,7 @@ void DSPEmitter::srr(const UDSPInstruction opc) u8 sreg = opc & 0x1f; dsp_op_read_reg(sreg, ECX); -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[dreg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); -#endif + dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(); } @@ -214,12 +191,7 @@ void DSPEmitter::srrd(const UDSPInstruction opc) u8 sreg = opc & 0x1f; dsp_op_read_reg(sreg, ECX); -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[dreg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); -#endif + dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(); decrement_addr_reg(dreg); } @@ -235,12 +207,7 @@ void DSPEmitter::srri(const UDSPInstruction opc) u8 sreg = opc & 0x1f; dsp_op_read_reg(sreg, ECX); -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[dreg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); -#endif + dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(); increment_addr_reg(dreg); } @@ -256,12 +223,7 @@ void DSPEmitter::srrn(const UDSPInstruction opc) u8 sreg = opc & 0x1f; dsp_op_read_reg(sreg, ECX); -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[dreg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[dreg]))); -#endif + dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(); increase_addr_reg(dreg); } @@ -275,19 +237,9 @@ void DSPEmitter::ilrr(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(&g_dsp.r.ar[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RCX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); -#endif + dsp_op_read_reg(reg, RCX, ZERO); imem_read(); -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[dreg].m), R(EAX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[dreg].m)), R(RAX)); -#endif + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); } @@ -300,19 +252,9 @@ void DSPEmitter::ilrrd(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(&g_dsp.r.ar[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RCX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); -#endif + dsp_op_read_reg(reg, RCX, ZERO); imem_read(); -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[dreg].m), R(EAX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[dreg].m)), R(RAX)); -#endif + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); decrement_addr_reg(reg); } @@ -326,19 +268,9 @@ void DSPEmitter::ilrri(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(&g_dsp.r.ar[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RCX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); -#endif + dsp_op_read_reg(reg, RCX, ZERO); imem_read(); -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[dreg].m), R(EAX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[dreg].m)), R(RAX)); -#endif + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); increment_addr_reg(reg); } @@ -353,19 +285,9 @@ void DSPEmitter::ilrrn(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; -#ifdef _M_IX86 // All32 - MOVZX(32, 16, ECX, M(&g_dsp.r.ar[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RCX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); -#endif + dsp_op_read_reg(reg, RCX, ZERO); imem_read(); -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[dreg].m), R(EAX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[dreg].m)), R(RAX)); -#endif + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); increase_addr_reg(reg); } diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp index 22f9dc1e3f..2c8cc790de 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp @@ -25,9 +25,9 @@ using namespace Gen; //clobbers: //EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//R10 = &g_dsp.reg_stack[stack_reg][0] -//R11 = &g_dsp.r //CX = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] +//expects: +//R11 = &g_dsp.r void DSPEmitter::dsp_reg_stack_push(int stack_reg) { //g_dsp.reg_stack_ptr[stack_reg]++; @@ -35,15 +35,16 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) #ifdef _M_IX86 // All32 MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); #else - MOV(64, R(R10), ImmPtr(g_dsp.reg_stack_ptr)); - MOV(8, R(AL), MDisp(R10, stack_reg)); + MOV(8, R(AL), MDisp(R11, PtrOffset(&g_dsp.reg_stack_ptr[stack_reg], + &g_dsp.r))); #endif ADD(8, R(AL), Imm8(1)); AND(8, R(AL), Imm8(DSP_STACK_MASK)); #ifdef _M_IX86 // All32 MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); #else - MOV(8, MDisp(R10, stack_reg), R(AL)); + MOV(8, MDisp(R11, PtrOffset(&g_dsp.reg_stack_ptr[stack_reg], + &g_dsp.r)), R(AL)); #endif //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; @@ -55,24 +56,25 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) // MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(CX), MDisp(R11, STRUCT_OFFSET(g_dsp.r, st[stack_reg]))); MOVZX(64, 8, RAX, R(AL)); - MOV(64, R(R10), ImmPtr(&g_dsp.reg_stack[stack_reg][0])); - MOV(16, MComplex(R10, RAX, 2, 0), R(CX)); + MOV(16, MComplex(R11, RAX, 2, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],&g_dsp.r)), + R(CX)); #endif } //clobbers: //EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//R10 = &g_dsp.reg_stack[stack_reg][0] -//R11 = &g_dsp.r //CX = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] +//expects: +//R11 = &g_dsp.r void DSPEmitter::dsp_reg_stack_pop(int stack_reg) { //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; #ifdef _M_IX86 // All32 MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); #else - MOV(64, R(R10), ImmPtr(g_dsp.reg_stack_ptr)); - MOV(8, R(AL), MDisp(R10, stack_reg)); + MOV(8, R(AL), + MDisp(R11, PtrOffset(&g_dsp.reg_stack_ptr[stack_reg],&g_dsp.r))); #endif #ifdef _M_IX86 // All32 MOVZX(32, 8, EAX, R(AL)); @@ -80,9 +82,9 @@ void DSPEmitter::dsp_reg_stack_pop(int stack_reg) MOV(16, M(&g_dsp.r.st[stack_reg]), R(CX)); #else MOVZX(64, 8, RAX, R(AL)); - MOV(64, R(R10), ImmPtr(&g_dsp.reg_stack[stack_reg][0])); - MOV(16, R(CX), MComplex(R10, RAX, 2, 0)); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); + MOV(16, R(CX), MComplex(R11, RAX, 2, + PtrOffset(&g_dsp.reg_stack[stack_reg][0], + &g_dsp.r))); MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, st[stack_reg])), R(CX)); #endif @@ -93,8 +95,8 @@ void DSPEmitter::dsp_reg_stack_pop(int stack_reg) #ifdef _M_IX86 // All32 MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); #else - MOV(64, R(R10), ImmPtr(g_dsp.reg_stack_ptr)); - MOV(8, MDisp(R10, stack_reg), R(AL)); + MOV(8, MDisp(R11, PtrOffset(&g_dsp.reg_stack_ptr[stack_reg],&g_dsp.r)), + R(AL)); #endif } @@ -144,17 +146,10 @@ void DSPEmitter::dsp_reg_store_stack_imm(int stack_reg, u16 val) void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) { switch (reg & 0x1f) { - // 8-bit sign extended registers. Should look at prod.h too... + // 8-bit sign extended registers. case DSP_REG_ACH0: case DSP_REG_ACH1: - // sign extend from the bottom 8 bits. - MOVSX(16, 8, host_sreg, R(host_sreg)); -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[reg-DSP_REG_ACH0].h), R(host_sreg)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACH0].h)), R(host_sreg)); -#endif + gpr.writeReg(reg, R(host_sreg)); break; // Stack registers. @@ -166,17 +161,9 @@ void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) break; default: - { - u16 *regp = reg_ptr(reg); -#ifdef _M_IX86 // All32 - MOV(16, M(regp), R(host_sreg)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, PtrOffset(regp, &g_dsp.r)), R(host_sreg)); -#endif + gpr.writeReg(reg, R(host_sreg)); break; } - } } void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) @@ -185,15 +172,8 @@ void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) // 8-bit sign extended registers. Should look at prod.h too... case DSP_REG_ACH0: case DSP_REG_ACH1: - // sign extend from the bottom 8 bits. -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[reg-DSP_REG_ACH0].h), Imm16((u16)(s16)(s8)(u8)val)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACH0].h)), Imm16((u16)(s16)(s8)(u8)val)); -#endif + gpr.writeReg(reg, Imm16((u16)(s16)(s8)(u8)val)); break; - // Stack registers. case DSP_REG_ST0: case DSP_REG_ST1: @@ -203,17 +183,9 @@ void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) break; default: - { - u16 *regp = reg_ptr(reg); -#ifdef _M_IX86 // All32 - MOV(16, M(regp), Imm16(val)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, PtrOffset(regp, &g_dsp.r)), Imm16(val)); -#endif + gpr.writeReg(reg, Imm16(val)); break; } - } } void DSPEmitter::dsp_conditional_extend_accum(int reg) @@ -223,37 +195,25 @@ void DSPEmitter::dsp_conditional_extend_accum(int reg) case DSP_REG_ACM0: case DSP_REG_ACM1: { -#ifdef _M_IX86 // All32 - MOV(16, R(EAX), M(&g_dsp.r.sr)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(EAX), MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr))); -#endif - TEST(16, R(EAX), Imm16(SR_40_MODE_BIT)); - FixupBranch not_40bit = J_CC(CC_Z); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + DSPJitRegCache c(gpr); + TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); + FixupBranch not_40bit = J_CC(CC_Z,true); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ // Sign extend into whole accum. //u16 val = g_dsp.r[reg]; -#ifdef _M_IX86 // All32 - MOVSX(32, 16, EAX, M(&g_dsp.r.ac[reg-DSP_REG_ACM0].m)); -#else - MOVSX(64, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACM0].m))); -#endif + get_acc_m(reg - DSP_REG_ACM0, EAX); SHR(32, R(EAX), Imm8(16)); //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[reg - DSP_REG_ACM0].h), - R(EAX)); - MOV(16, M(&g_dsp.r.ac[reg - DSP_REG_ACM0].l), - Imm16(0)); -#else - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACM0].h)), R(EAX)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACM0].l)), Imm16(0)); -#endif + set_acc_h(reg - DSP_REG_ACM0, R(RAX)); + set_acc_l(reg - DSP_REG_ACM0, Imm16(0)); //} + gpr.flushRegs(c); SetJumpTarget(not_40bit); + gpr.putReg(DSP_REG_SR); } } } @@ -265,55 +225,57 @@ void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) case DSP_REG_ACM0: case DSP_REG_ACM1: { -#ifdef _M_IX86 // All32 - MOV(16, R(EAX), M(&g_dsp.r.sr)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(EAX), MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr))); -#endif - TEST(16, R(EAX), Imm16(SR_40_MODE_BIT)); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + DSPJitRegCache c(gpr); + TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); FixupBranch not_40bit = J_CC(CC_Z); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ // Sign extend into whole accum. //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ac[reg - DSP_REG_ACM0].h), - Imm16((val & 0x8000)?0xffff:0x0000)); - MOV(16, M(&g_dsp.r.ac[reg - DSP_REG_ACM0].l), - Imm16(0)); -#else - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACM0].h)), - Imm16((val & 0x8000)?0xffff:0x0000)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[reg-DSP_REG_ACM0].l)), - Imm16(0)); -#endif + set_acc_h(reg - DSP_REG_ACM0, Imm16((val & 0x8000)?0xffff:0x0000)); + set_acc_l(reg - DSP_REG_ACM0, Imm16(0)); //} + gpr.flushRegs(c); SetJumpTarget(not_40bit); + gpr.putReg(DSP_REG_SR); } } } -void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg) +void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend) { switch (reg & 0x1f) { case DSP_REG_ST0: case DSP_REG_ST1: case DSP_REG_ST2: case DSP_REG_ST3: - return dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg); - default: - { - u16 *regp = reg_ptr(reg); - //return g_dsp.r[reg]; + dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg); + switch(extend) { + case SIGN: #ifdef _M_IX86 // All32 - MOV(16, R(host_dreg), M(regp)); + MOVSX(32, 16, host_dreg, R(host_dreg)); #else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(host_dreg), MDisp(R11, PtrOffset(regp, &g_dsp.r))); + MOVSX(64, 16, host_dreg, R(host_dreg)); #endif - } + break; + case ZERO: +#ifdef _M_IX86 // All32 + MOVZX(32, 16, host_dreg, R(host_dreg)); +#else + MOVZX(64, 16, host_dreg, R(host_dreg)); +#endif + break; + case NONE: + default: + break; + } + return; + default: + gpr.readReg(reg, host_dreg, extend); + return; } } @@ -424,12 +386,10 @@ void DSPEmitter::addarn(const UDSPInstruction opc) void DSPEmitter::setCompileSR(u16 bit) { // g_dsp.r[DSP_REG_SR] |= bit -#ifdef _M_IX86 // All32 - OR(16, M(&g_dsp.r.sr), Imm16(bit)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - OR(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(bit)); -#endif + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + OR(16, sr_reg, Imm16(bit)); + gpr.putReg(DSP_REG_SR); compileSR |= bit; } @@ -437,12 +397,10 @@ void DSPEmitter::setCompileSR(u16 bit) { void DSPEmitter::clrCompileSR(u16 bit) { // g_dsp.r[DSP_REG_SR] &= bit -#ifdef _M_IX86 // All32 - AND(16, M(&g_dsp.r.sr), Imm16(~bit)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - AND(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(~bit)); -#endif + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + AND(16, sr_reg, Imm16(~bit)); + gpr.putReg(DSP_REG_SR); compileSR &= ~bit; } diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp index c37f26717a..0004b33396 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp @@ -41,11 +41,14 @@ void DSPEmitter::multiply() // Conditionally multiply by 2. // if ((g_dsp.r.sr & SR_MUL_MODIFY) == 0) - TEST(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_MUL_MODIFY)); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + TEST(16, sr_reg, Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; SHL(64, R(EAX), Imm8(1)); SetJumpTarget(noMult2); + gpr.putReg(DSP_REG_SR); // return prod; #endif } @@ -93,7 +96,9 @@ void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) // if ((sign == 1) && (g_dsp.r.sr & SR_MUL_UNSIGNED)) //unsigned - TEST(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_MUL_UNSIGNED)); + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + TEST(16, sr_reg, Imm16(SR_MUL_UNSIGNED)); FixupBranch unsignedMul = J_CC(CC_NZ); // prod = (s16)a * (s16)b; //signed MOVSX(64, 16, RAX, R(RDI)); @@ -135,11 +140,12 @@ void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) // Conditionally multiply by 2. // if ((g_dsp.r.sr & SR_MUL_MODIFY) == 0) - TEST(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, sr)), Imm16(SR_MUL_MODIFY)); + TEST(16, sr_reg, Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; SHL(64, R(RAX), Imm8(1)); SetJumpTarget(noMult2); + gpr.putReg(DSP_REG_SR); // return prod; } @@ -158,14 +164,12 @@ void DSPEmitter::clrp(const UDSPInstruction opc) { #ifdef _M_X64 // g_dsp.r[DSP_REG_PRODL] = 0x0000; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.l)), Imm16(0x0000)); // g_dsp.r[DSP_REG_PRODM] = 0xfff0; - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.m)), Imm16(0xfff0)); // g_dsp.r[DSP_REG_PRODH] = 0x00ff; - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.h)), Imm16(0x00ff)); // g_dsp.r[DSP_REG_PRODM2] = 0x0010; - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.m2)), Imm16(0x0010)); + //64bit move to memory does not work. use 2 32bits + MOV(32, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.val)+0), Imm32(0xfff00000U)); + MOV(32, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.val)+4), Imm32(0x001000ffU)); #else Default(opc); #endif @@ -319,8 +323,7 @@ void DSPEmitter::mulaxh(const UDSPInstruction opc) { #ifdef _M_X64 // s64 prod = dsp_multiply(dsp_get_ax_h(0), dsp_get_ax_h(0)); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[0].h))); + dsp_op_read_reg(DSP_REG_AXH0, RSI, SIGN); MOV(64, R(RDI), R(RSI)); multiply(); // dsp_set_long_prod(prod); @@ -342,10 +345,9 @@ void DSPEmitter::mul(const UDSPInstruction opc) u8 sreg = (opc >> 11) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].l))); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); // u16 axh = dsp_get_ax_h(sreg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].h))); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); // s64 prod = dsp_multiply(axh, axl); multiply(); // dsp_set_long_prod(prod); @@ -375,9 +377,9 @@ void DSPEmitter::mulac(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); PUSH(64, R(RAX)); // u16 axl = dsp_get_ax_l(sreg); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].l))); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); // u16 axh = dsp_get_ax_h(sreg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].h))); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); // s64 prod = dsp_multiply(axl, axh); multiply(); // dsp_set_long_prod(prod); @@ -463,14 +465,12 @@ void DSPEmitter::mulx(const UDSPInstruction opc) #ifdef _M_X64 u8 treg = ((opc >> 11) & 0x1); u8 sreg = ((opc >> 12) & 0x1); - u16 *sregp = reg_ptr(DSP_REG_AXL0 + sreg*2); - u16 *tregp = reg_ptr(DSP_REG_AXL1 + treg*2); // MOV(64, R(R11), ImmPtr(&g_dsp.r)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - MOVSX(64, 16, RSI, MDisp(R11, PtrOffset(sregp,&g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - MOVSX(64, 16, RDI, MDisp(R11, PtrOffset(tregp,&g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); @@ -493,17 +493,15 @@ void DSPEmitter::mulxac(const UDSPInstruction opc) u8 rreg = (opc >> 8) & 0x1; u8 treg = (opc >> 11) & 0x1; u8 sreg = (opc >> 12) & 0x1; - u16 *sregp = reg_ptr(DSP_REG_AXL0 + sreg*2); - u16 *tregp = reg_ptr(DSP_REG_AXL1 + treg*2); // s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod(); get_long_acc(rreg, RCX); get_long_prod(); ADD(64, R(RCX), R(RAX)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - MOVSX(64, 16, RSI, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - MOVSX(64, 16, RDI, MDisp(R11, PtrOffset(tregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); @@ -534,15 +532,13 @@ void DSPEmitter::mulxmv(const UDSPInstruction opc) u8 rreg = ((opc >> 8) & 0x1); u8 treg = (opc >> 11) & 0x1; u8 sreg = (opc >> 12) & 0x1; - u16 *sregp = reg_ptr(DSP_REG_AXL0 + sreg*2); - u16 *tregp = reg_ptr(DSP_REG_AXL1 + treg*2); // s64 acc = dsp_get_long_prod(); get_long_prod(RCX); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - MOVSX(64, 16, RSI, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - MOVSX(64, 16, RDI, MDisp(R11, PtrOffset(tregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); @@ -574,15 +570,13 @@ void DSPEmitter::mulxmvz(const UDSPInstruction opc) u8 rreg = (opc >> 8) & 0x1; u8 treg = (opc >> 11) & 0x1; u8 sreg = (opc >> 12) & 0x1; - u16 *sregp = reg_ptr(DSP_REG_AXL0 + sreg*2); - u16 *tregp = reg_ptr(DSP_REG_AXL1 + treg*2); // s64 acc = dsp_get_long_prod_round_prodl(); get_long_prod_round_prodl(RCX); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - MOVSX(64, 16, RSI, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - MOVSX(64, 16, RDI, MDisp(R11, PtrOffset(tregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); @@ -613,10 +607,9 @@ void DSPEmitter::mulc(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, ESI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); + get_acc_m(sreg, ESI); // u16 axh = dsp_get_ax_h(treg); - MOVSX(64, 16, EDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[treg].h))); + dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -647,9 +640,9 @@ void DSPEmitter::mulcac(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); + get_acc_m(sreg, ESI); // u16 axh = dsp_get_ax_h(treg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[treg].h))); + dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -686,9 +679,9 @@ void DSPEmitter::mulcmv(const UDSPInstruction opc) get_long_prod(); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); + get_acc_m(sreg, ESI); // u16 axh = dsp_get_ax_h(treg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[treg].h))); + dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -727,9 +720,9 @@ void DSPEmitter::mulcmvz(const UDSPInstruction opc) get_long_prod_round_prodl(); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); + get_acc_m(sreg, ESI); // u16 axh = dsp_get_ax_h(treg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[treg].h))); + dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -759,14 +752,11 @@ void DSPEmitter::maddx(const UDSPInstruction opc) #ifdef _M_X64 u8 treg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - u16 *sregp = reg_ptr(DSP_REG_AXL0 + sreg*2); - u16 *tregp = reg_ptr(DSP_REG_AXL1 + treg*2); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - MOVSX(64, 16, RSI, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - MOVSX(64, 16, RDI, MDisp(R11, PtrOffset(tregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); // s64 prod = dsp_multiply_add(val1, val2); multiply_add(); // dsp_set_long_prod(prod); @@ -786,14 +776,11 @@ void DSPEmitter::msubx(const UDSPInstruction opc) #ifdef _M_X64 u8 treg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - u16 *sregp = reg_ptr(DSP_REG_AXL0 + sreg*2); - u16 *tregp = reg_ptr(DSP_REG_AXL1 + treg*2); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - MOVSX(64, 16, RSI, MDisp(R11, PtrOffset(sregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - MOVSX(64, 16, RDI, MDisp(R11, PtrOffset(tregp, &g_dsp.r))); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); // s64 prod = dsp_multiply_sub(val1, val2); multiply_sub(); // dsp_set_long_prod(prod); @@ -814,11 +801,10 @@ void DSPEmitter::maddc(const UDSPInstruction opc) u8 treg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); // u16 accm = dsp_get_acc_m(sreg); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); + get_acc_m(sreg, ESI); // u16 axh = dsp_get_ax_h(treg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[treg].h))); + dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); // s64 prod = dsp_multiply_add(accm, axh); multiply_add(); // dsp_set_long_prod(prod); @@ -840,10 +826,9 @@ void DSPEmitter::msubc(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[sreg].m))); + get_acc_m(sreg, ESI); // u16 axh = dsp_get_ax_h(treg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[treg].h))); + dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); // s64 prod = dsp_multiply_sub(accm, axh); multiply_sub(); // dsp_set_long_prod(prod); @@ -863,11 +848,10 @@ void DSPEmitter::madd(const UDSPInstruction opc) #ifdef _M_X64 u8 sreg = (opc >> 8) & 0x1; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); // u16 axl = dsp_get_ax_l(sreg); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].l))); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); // u16 axh = dsp_get_ax_h(sreg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].h))); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); // s64 prod = dsp_multiply_add(axl, axh); multiply_add(); // dsp_set_long_prod(prod); @@ -886,12 +870,11 @@ void DSPEmitter::msub(const UDSPInstruction opc) { #ifdef _M_X64 u8 sreg = (opc >> 8) & 0x1; -// + // u16 axl = dsp_get_ax_l(sreg); - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, RSI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].l))); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); // u16 axh = dsp_get_ax_h(sreg); - MOVSX(64, 16, RDI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[sreg].h))); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); // s64 prod = dsp_multiply_sub(axl, axh); multiply_sub(); // dsp_set_long_prod(prod); diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitRegCache.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitRegCache.cpp new file mode 100644 index 0000000000..5a08db1982 --- /dev/null +++ b/Source/Core/DSPCore/Src/Jit/DSPJitRegCache.cpp @@ -0,0 +1,540 @@ +// Copyright (C) 2011 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#include "DSPJitRegCache.h" +#include "../DSPEmitter.h" +#include "../DSPMemoryMap.h" + +using namespace Gen; + +static u16 *reg_ptr(int reg) { + switch(reg) { + case DSP_REG_AR0: + case DSP_REG_AR1: + case DSP_REG_AR2: + case DSP_REG_AR3: + return &g_dsp.r.ar[reg - DSP_REG_AR0]; + case DSP_REG_IX0: + case DSP_REG_IX1: + case DSP_REG_IX2: + case DSP_REG_IX3: + return &g_dsp.r.ix[reg - DSP_REG_IX0]; + case DSP_REG_WR0: + case DSP_REG_WR1: + case DSP_REG_WR2: + case DSP_REG_WR3: + return &g_dsp.r.wr[reg - DSP_REG_WR0]; + case DSP_REG_ST0: + case DSP_REG_ST1: + case DSP_REG_ST2: + case DSP_REG_ST3: + return &g_dsp.r.st[reg - DSP_REG_ST0]; + case DSP_REG_ACH0: + case DSP_REG_ACH1: + return &g_dsp.r.ac[reg - DSP_REG_ACH0].h; + case DSP_REG_CR: return &g_dsp.r.cr; + case DSP_REG_SR: return &g_dsp.r.sr; + case DSP_REG_PRODL: return &g_dsp.r.prod.l; + case DSP_REG_PRODM: return &g_dsp.r.prod.m; + case DSP_REG_PRODH: return &g_dsp.r.prod.h; + case DSP_REG_PRODM2: return &g_dsp.r.prod.m2; + case DSP_REG_AXL0: + case DSP_REG_AXL1: + return &g_dsp.r.ax[reg - DSP_REG_AXL0].l; + case DSP_REG_AXH0: + case DSP_REG_AXH1: + return &g_dsp.r.ax[reg - DSP_REG_AXH0].h; + case DSP_REG_ACL0: + case DSP_REG_ACL1: + return &g_dsp.r.ac[reg - DSP_REG_ACL0].l; + case DSP_REG_ACM0: + case DSP_REG_ACM1: + return &g_dsp.r.ac[reg - DSP_REG_ACM0].m; + default: + _assert_msg_(DSPLLE, 0, "cannot happen"); + return NULL; + } +} + +#define ROTATED_REG_ACCS +//#undef ROTATED_REG_ACCS + +DSPJitRegCache::DSPJitRegCache(DSPEmitter &_emitter) + : emitter(_emitter), temporary(false), merged(false) { + for(unsigned int i = 0; i < NUMXREGS; i++) { + xregs[i].guest_reg = DSP_REG_STATIC; + } + xregs[RSP].guest_reg = DSP_REG_STATIC;//stack pointer + xregs[RBX].guest_reg = DSP_REG_STATIC;//extended op backing store + + xregs[RBP].guest_reg = DSP_REG_NONE;//definitely usable in dsplle because + //all external calls are protected + +#ifdef _M_X64 + xregs[R8].guest_reg = DSP_REG_STATIC;//acc0 + xregs[R9].guest_reg = DSP_REG_STATIC;//acc1 + xregs[R10].guest_reg = DSP_REG_NONE; + xregs[R11].guest_reg = DSP_REG_STATIC;//&g_dsp.r + xregs[R12].guest_reg = DSP_REG_STATIC;//used for cycle counting + xregs[R13].guest_reg = DSP_REG_NONE; + xregs[R14].guest_reg = DSP_REG_NONE; + xregs[R15].guest_reg = DSP_REG_NONE; +#endif + +#ifdef _M_X64 + acc[0].host_reg = R8; + acc[0].shift = 0; + acc[0].dirty = false; + acc[0].used = false; + acc[0].tmp_reg = INVALID_REG; + + acc[1].host_reg = R9; + acc[1].shift = 0; + acc[1].dirty = false; + acc[1].used = false; + acc[1].tmp_reg = INVALID_REG; +#endif + for(unsigned int i = 0; i < 32; i++) { + regs[i].mem = reg_ptr(i); + regs[i].size = 2; + } +#ifdef _M_X64 + regs[DSP_REG_ACC0_64].mem = &g_dsp.r.ac[0].val; + regs[DSP_REG_ACC0_64].size = 8; + regs[DSP_REG_ACC1_64].mem = &g_dsp.r.ac[1].val; + regs[DSP_REG_ACC1_64].size = 8; + regs[DSP_REG_PROD_64].mem = &g_dsp.r.prod.val; + regs[DSP_REG_PROD_64].size = 8; +#endif + regs[DSP_REG_AX0_32].mem = &g_dsp.r.ax[0].val; + regs[DSP_REG_AX0_32].size = 4; + regs[DSP_REG_AX1_32].mem = &g_dsp.r.ax[1].val; + regs[DSP_REG_AX1_32].size = 4; + for(unsigned int i = 0; i < DSP_REG_MAX_MEM_BACKED+1; i++) { + regs[i].dirty = false; +#ifdef _M_IX86 // All32 + regs[i].loc = M(regs[i].mem); +#else + regs[i].loc = MDisp(R11, PtrOffset(regs[i].mem, &g_dsp.r)); +#endif + } +} + +DSPJitRegCache::DSPJitRegCache(const DSPJitRegCache &cache) + : emitter(cache.emitter), temporary(true), merged(false) +{ + memcpy(xregs,cache.xregs,sizeof(xregs)); +#ifdef _M_X64 + memcpy(acc,cache.acc,sizeof(acc)); +#endif + memcpy(regs,cache.regs,sizeof(regs)); +} + +DSPJitRegCache& DSPJitRegCache::operator=(const DSPJitRegCache &cache) +{ + _assert_msg_(DSPLLE, &emitter == &cache.emitter, "emitter does not match"); + _assert_msg_(DSPLLE, temporary, "register cache not temporary??"); + merged = false; + memcpy(xregs,cache.xregs,sizeof(xregs)); +#ifdef _M_X64 + memcpy(acc,cache.acc,sizeof(acc)); +#endif + memcpy(regs,cache.regs,sizeof(regs)); + + return *this; +} + +DSPJitRegCache::~DSPJitRegCache() +{ + _assert_msg_(DSPLLE, !temporary || merged, "temporary cache not merged"); +} + +void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit) +{ + cache.merged = true; + +#ifdef _M_X64 + for(unsigned int i = 0; i < 2; i++) { + if (acc[i].shift > cache.acc[i].shift) { + if (emit) + emitter.ROL(64, R(acc[i].host_reg), + Imm8(acc[i].shift-cache.acc[i].shift)); + acc[i].shift = cache.acc[i].shift; + } + if (acc[i].shift < cache.acc[i].shift) { + if (emit) + emitter.ROR(64, R(acc[i].host_reg), + Imm8(cache.acc[i].shift-acc[i].shift)); + acc[i].shift = cache.acc[i].shift; + } + } +#endif +} + +void DSPJitRegCache::drop() +{ + merged = true; +} + +void DSPJitRegCache::flushRegs() +{ + //also needs to undo any dynamic changes to static allocated regs + //this should have the same effect as + //merge(DSPJitRegCache(emitter)); +#ifdef _M_X64 +#ifdef ROTATED_REG_ACCS + for(unsigned int i = 0; i < 2; i++) { + if (acc[i].shift > 0) { + emitter.ROL(64, R(acc[i].host_reg), + Imm8(acc[i].shift)); + acc[i].shift = 0; + } + _assert_msg_(DSPLLE, !acc[i].used, + "accumulator still in use"); + if (acc[i].used) + emitter.INT3(); + } +#endif +#endif +} + +static u64 ebp_store; + +void DSPJitRegCache::loadStaticRegs() +{ +#ifdef _M_X64 +#ifdef ROTATED_REG_ACCS + emitter.MOV(64, R(R8), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[0].val))); + emitter.MOV(64, R(R9), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[1].val))); +#endif + emitter.MOV(64, MDisp(R11, PtrOffset(&ebp_store, &g_dsp.r)), R(RBP)); +#else + emitter.MOV(32, M(&ebp_store), R(EBP)); +#endif +} + +void DSPJitRegCache::saveStaticRegs() +{ + flushRegs(); +#ifdef _M_X64 +#ifdef ROTATED_REG_ACCS + emitter.MOV(64, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[0].val)), R(R8)); + emitter.MOV(64, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[1].val)), R(R9)); +#endif + emitter.MOV(64, R(RBP), MDisp(R11, PtrOffset(&ebp_store, &g_dsp.r))); +#else + emitter.MOV(32, R(EBP), M(&ebp_store)); +#endif +} + +void DSPJitRegCache::getReg(int reg, OpArg &oparg, bool load) +{ + switch(reg) { +#ifdef _M_X64 +#ifdef ROTATED_REG_ACCS + case DSP_REG_ACH0: + case DSP_REG_ACH1: + { + _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACH0].used, + "accumulator already in use"); + if (acc[reg-DSP_REG_ACH0].used) + emitter.INT3(); + oparg = R(acc[reg-DSP_REG_ACH0].host_reg); + if (acc[reg-DSP_REG_ACH0].shift < 32) { + emitter.ROR(64, oparg, Imm8(32-acc[reg-DSP_REG_ACH0].shift)); + acc[reg-DSP_REG_ACH0].shift = 32; + } + + acc[reg-DSP_REG_ACH0].used = true; + } + break; + case DSP_REG_ACM0: + case DSP_REG_ACM1: + { + _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACM0].used, + "accumulator already in use"); + if (acc[reg-DSP_REG_ACM0].used) + emitter.INT3(); + oparg = R(acc[reg-DSP_REG_ACM0].host_reg); + if (acc[reg-DSP_REG_ACM0].shift < 16) { + emitter.ROR(64, oparg, Imm8(16-acc[reg-DSP_REG_ACM0].shift)); + acc[reg-DSP_REG_ACM0].shift = 16; + } + if (acc[reg-DSP_REG_ACM0].shift > 16) { + emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACM0].shift-16)); + acc[reg-DSP_REG_ACM0].shift = 16; + } + acc[reg-DSP_REG_ACM0].used = true; + } + break; + case DSP_REG_ACL0: + case DSP_REG_ACL1: + { + _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACL0].used, + "accumulator already in use"); + if (acc[reg-DSP_REG_ACL0].used) + emitter.INT3(); + oparg = R(acc[reg-DSP_REG_ACL0].host_reg); + if (acc[reg-DSP_REG_ACL0].shift > 0) { + emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACL0].shift)); + acc[reg-DSP_REG_ACL0].shift = 0; + } + acc[reg-DSP_REG_ACL0].used = true; + } + break; + case DSP_REG_ACC0_64: + case DSP_REG_ACC1_64: + { + if (acc[reg-DSP_REG_ACC0_64].used) + emitter.INT3(); + _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACC0_64].used, + "accumulator already in use"); + oparg = R(acc[reg-DSP_REG_ACC0_64].host_reg); + if (load) { + if (acc[reg-DSP_REG_ACC0_64].shift > 0) { + emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACC0_64].shift)); + } + emitter.SHL(64, oparg, Imm8(64-40));//sign extend + emitter.SAR(64, oparg, Imm8(64-40)); + } + //don't bother to rotate if caller replaces all data + acc[reg-DSP_REG_ACC0_64].shift = 0; + acc[reg-DSP_REG_ACC0_64].used = true; + } + break; +#endif +#endif + default: + { +/* + getFreeXReg(reg[reg].host_reg); + X64Reg tmp = reg[reg].host_reg; + oparg = R(tmp); + + if (load) { + u16 *regp = reg_ptr(reg); +#ifdef _M_IX86 // All32 + emitter.MOV(16, oparg, M(regp)); +#else + emitter.MOV(16, oparg, MDisp(R11, PtrOffset(regp, &g_dsp.r))); +#endif + } +*/ + oparg = regs[reg].loc; //when loading/storing from/to mem, need to consider regs[reg].size + } + break; + } +} + +void DSPJitRegCache::putReg(int reg, bool dirty) +{ + switch(reg) { +#ifdef _M_X64 +#ifdef ROTATED_REG_ACCS + case DSP_REG_ACH0: + case DSP_REG_ACH1: + { + + if (dirty) { + if (acc[reg-DSP_REG_ACH0].shift > 0) { + emitter.ROL(64, R(acc[reg-DSP_REG_ACH0].host_reg), + Imm8(acc[reg-DSP_REG_ACH0].shift)); + acc[reg-DSP_REG_ACH0].shift = 0; + } + emitter.SHL(64, R(acc[reg-DSP_REG_ACH0].host_reg), Imm8(64-40));//sign extend + emitter.SAR(64, R(acc[reg-DSP_REG_ACH0].host_reg), Imm8(64-40)); + } + acc[reg-DSP_REG_ACH0].used = false; + } + break; + case DSP_REG_ACM0: + case DSP_REG_ACM1: + { + acc[reg-DSP_REG_ACM0].used = false; + } + break; + case DSP_REG_ACL0: + case DSP_REG_ACL1: + acc[reg-DSP_REG_ACL0].used = false; + break; + case DSP_REG_ACC0_64: + case DSP_REG_ACC1_64: + { + if (dirty) { + OpArg _reg = R(acc[reg-DSP_REG_ACC0_64].host_reg); + + emitter.SHL(64, _reg, Imm8(64-40));//sign extend + emitter.SAR(64, _reg, Imm8(64-40)); + } + acc[reg-DSP_REG_ACC0_64].used = false; + } + break; +#else + case DSP_REG_ACH0: + case DSP_REG_ACH1: + { + //need to fix in memory for now. + u16 *regp = reg_ptr(reg); + OpArg mem; + mem = MDisp(R11,PtrOffset(regp,&g_dsp.r)); + X64Reg tmp; + getFreeXReg(tmp); + // sign extend from the bottom 8 bits. + emitter.MOVSX(16, 8, tmp, mem); + emitter.MOV(16, mem, R(tmp)); + putXReg(tmp); + } + break; +#endif +#else + case DSP_REG_ACH0: + case DSP_REG_ACH1: + { + //need to fix in memory for now. + u16 *regp = reg_ptr(reg); + OpArg mem; + mem = M(regp); + X64Reg tmp; + getFreeXReg(tmp); + // sign extend from the bottom 8 bits. + emitter.MOVSX(16, 8, tmp, mem); + emitter.MOV(16, mem, R(tmp)); + putXReg(tmp); + } + break; +#endif + default: + { +/* + X64Reg tmp = reg[reg].host_reg; + + if(dirty) { + u16 *regp = reg_ptr(reg); +#ifdef _M_IX86 // All32 + emitter.MOV(16, M(dregp), R(tmp)); +#else + emitter.MOV(16, MDisp(R11, PtrOffset(dregp, &g_dsp.r)), R(tmp)); +#endif + } +*/ + } + break; + } +} + +void DSPJitRegCache::readReg(int sreg, X64Reg host_dreg, DSPJitSignExtend extend) +{ + OpArg reg; + getReg(sreg, reg); + switch(regs[sreg].size) { + case 2: + switch(extend) { +#ifdef _M_X64 + case SIGN: emitter.MOVSX(64, 16, host_dreg, reg); break; + case ZERO: emitter.MOVZX(64, 16, host_dreg, reg); break; +#else + case SIGN: emitter.MOVSX(32, 16, host_dreg, reg); break; + case ZERO: emitter.MOVZX(32, 16, host_dreg, reg); break; +#endif + case NONE: emitter.MOV(16, R(host_dreg), reg); break; + } + break; + case 4: +#ifdef _M_X64 + switch(extend) { + case SIGN: emitter.MOVSX(64, 32, host_dreg, reg); break; + case ZERO: emitter.MOVZX(64, 32, host_dreg, reg); break; + case NONE: emitter.MOV(32, R(host_dreg), reg); break; + } +#else + emitter.MOV(32, R(host_dreg), reg); break; +#endif + break; +#ifdef _M_X64 + case 8: + emitter.MOV(64, R(host_dreg), reg); break; + break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + putReg(sreg, false); +} + +void DSPJitRegCache::writeReg(int dreg, OpArg arg) +{ + OpArg reg; + getReg(dreg, reg, false); + switch(regs[dreg].size) { + case 2: emitter.MOV(16, reg, arg); break; + case 4: emitter.MOV(32, reg, arg); break; +#ifdef _M_X64 + case 8: emitter.MOV(64, reg, arg); break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + putReg(dreg, true); +} + +X64Reg DSPJitRegCache::spillXReg() +{ + //todo: implement + return INVALID_REG; +} + +void DSPJitRegCache::spillXReg(X64Reg reg) +{ + //todo: implement +} + +X64Reg DSPJitRegCache::findFreeXReg() +{ + int i; + for(i = 0; i < NUMXREGS; i++) { + if (xregs[i].guest_reg == DSP_REG_NONE) { + return (X64Reg)i; + } + } + return INVALID_REG; +} + +void DSPJitRegCache::getFreeXReg(X64Reg ®) +{ + reg = findFreeXReg(); + if (reg == INVALID_REG) + reg = spillXReg(); + _assert_msg_(DSPLLE, reg != INVALID_REG, "could not find register"); + xregs[reg].guest_reg = DSP_REG_USED; +} + +void DSPJitRegCache::getXReg(X64Reg reg) +{ + if (xregs[reg].guest_reg != DSP_REG_NONE) + spillXReg(reg); + _assert_msg_(DSPLLE, xregs[reg].guest_reg != DSP_REG_NONE, "register already in use"); + xregs[reg].guest_reg = DSP_REG_USED; +} + +void DSPJitRegCache::putXReg(X64Reg reg) +{ + _assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_USED, + "putXReg without get(Free)XReg"); + xregs[reg].guest_reg = DSP_REG_NONE; +} diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitRegCache.h b/Source/Core/DSPCore/Src/Jit/DSPJitRegCache.h new file mode 100644 index 0000000000..a230947c11 --- /dev/null +++ b/Source/Core/DSPCore/Src/Jit/DSPJitRegCache.h @@ -0,0 +1,169 @@ +// Copyright (C) 2011 Dolphin Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official SVN repository and contact information can be found at +// http://code.google.com/p/dolphin-emu/ + +#ifndef _DSPJITREGCACHE_H +#define _DSPJITREGCACHE_H + +#include "x64Emitter.h" + +class DSPEmitter; + +enum DSPJitRegSpecial { + DSP_REG_ACC0_64 =32, + DSP_REG_ACC1_64 =33, + DSP_REG_AX0_32 =34, + DSP_REG_AX1_32 =35, + DSP_REG_PROD_64 =36, + DSP_REG_MAX_MEM_BACKED = 36, + + DSP_REG_USED =253, + DSP_REG_STATIC =254, + DSP_REG_NONE =255 +}; + +enum DSPJitSignExtend { + SIGN, ZERO, NONE +}; + +#ifdef _M_X64 +#define NUMXREGS 16 +#elif _M_IX86 +#define NUMXREGS 8 +#endif + +class DSPJitRegCache { +private: + struct X64CachedReg + { + int guest_reg; //including DSPJitRegSpecial + }; + struct DynamicReg { + Gen::OpArg loc; + void *mem; + size_t size; + bool dirty; + }; + +#ifdef _M_X64 + //when there is a way to do this efficiently in x86, uncondition + struct { + Gen::X64Reg host_reg; + int shift; + bool dirty; + bool used; + Gen::X64Reg tmp_reg; + } acc[2]; +#endif + + DynamicReg regs[DSP_REG_MAX_MEM_BACKED+1]; + X64CachedReg xregs[NUMXREGS]; + + DSPEmitter &emitter; + bool temporary; + bool merged; +private: + //find a free host reg + Gen::X64Reg findFreeXReg(); + Gen::X64Reg spillXReg(); + void spillXReg(Gen::X64Reg reg); +public: + DSPJitRegCache(DSPEmitter &_emitter); + + //for branching into multiple control flows + DSPJitRegCache(const DSPJitRegCache &cache); + DSPJitRegCache& operator=(const DSPJitRegCache &cache); + + ~DSPJitRegCache(); + + //merge must be done _before_ leaving the code branch, so we can fix + //up any differences in state + void flushRegs(DSPJitRegCache &cache, bool emit = true); + /* since some use cases are non-trivial, some examples: + + //this does not modify the final state of gpr + + FixupBranch b = JCC(); + DSPJitRegCache c = gpr; + + gpr.flushRegs(c); + SetBranchTarget(b); + + + //this does not modify the final state of gpr + + DSPJitRegCache c = gpr; + FixupBranch b1 = JCC(); + + gpr.flushRegs(c); + FixupBranch b2 = JMP(); + SetBranchTarget(b1); + + gpr.flushRegs(c); + SetBranchTarget(b2); + + + //this allows gpr to be modified in the second branch + //and fixes gpr according to the results form in the first branch + + DSPJitRegCache c = gpr; + FixupBranch b1 = JCC(); + + FixupBranch b2 = JMP(); + SetBranchTarget(b1); + + gpr.flushRegs(c); + SetBranchTarget(b2); + + + //this does not modify the final state of gpr + + u8* b = GetCodePtr(); + DSPJitRegCache c = gpr; + + gpr.flushRegs(c); + JCC(b); + + + this all is not needed when gpr would not be used at all in the + conditional branch + */ + //drop this copy without warning + void drop(); + + //prepare state so that another flushed DSPJitRegCache can take over + void flushRegs(); + + void loadStaticRegs();//load statically allocated regs from memory + void saveStaticRegs();//save statically allocated regs to memory + + //gives no SCALE_RIP with abs(offset) >= 0x80000000 + void getReg(int reg, Gen::OpArg &oparg, bool load = true); + //done with all usages of OpArg above + void putReg(int reg, bool dirty = true); + + void readReg(int sreg, Gen::X64Reg host_dreg, DSPJitSignExtend extend); + void writeReg(int dreg, Gen::OpArg arg); + + //find a free host reg, spill if used, reserve + void getFreeXReg(Gen::X64Reg ®); + //spill a specific host reg if used, reserve + void getXReg(Gen::X64Reg reg); + //unreserve the given host reg + void putXReg(Gen::X64Reg reg); +}; + +#endif /*_DSPJITREGCACHE_H*/ diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp index 1fb15ae00f..a9f78e9a60 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp @@ -18,6 +18,7 @@ #include "../DSPMemoryMap.h" #include "../DSPHWInterface.h" #include "../DSPEmitter.h" +#include "DSPJitUtil.h" #include "x64Emitter.h" #include "ABI.h" @@ -39,14 +40,13 @@ using namespace Gen; // ECX = temp void DSPEmitter::increment_addr_reg(int reg) { -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[reg])); - MOVZX(32, 16, EDX, M(&g_dsp.r.wr[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); - MOVZX(32, 16, EDX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, wr[reg]))); -#endif + OpArg ar_reg; + OpArg wr_reg; + gpr.getReg(DSP_REG_WR0+reg,wr_reg); + MOVZX(32, 16, EDX, wr_reg); + gpr.putReg(DSP_REG_WR0+reg); + gpr.getReg(DSP_REG_AR0+reg,ar_reg); + MOVZX(32, 16, EAX, ar_reg); //u32 nar = ar + 1; MOV(32, R(EDI), R(EAX)); @@ -64,12 +64,8 @@ void DSPEmitter::increment_addr_reg(int reg) SetJumpTarget(nowrap); // g_dsp.r.ar[reg] = nar; -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ar[reg]), R(AX)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg])), R(AX)); -#endif + MOV(16, ar_reg, R(AX)); + gpr.putReg(DSP_REG_AR0+reg); } // EAX = g_dsp.r.ar[reg] @@ -78,14 +74,13 @@ void DSPEmitter::increment_addr_reg(int reg) // ECX = temp void DSPEmitter::decrement_addr_reg(int reg) { -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[reg])); - MOVZX(32, 16, EDX, M(&g_dsp.r.wr[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); - MOVZX(32, 16, EDX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, wr[reg]))); -#endif + OpArg ar_reg; + OpArg wr_reg; + gpr.getReg(DSP_REG_WR0+reg,wr_reg); + MOVZX(32, 16, EDX, wr_reg); + gpr.putReg(DSP_REG_WR0+reg); + gpr.getReg(DSP_REG_AR0+reg,ar_reg); + MOVZX(32, 16, EAX, ar_reg); // u32 nar = ar + wr; // edi = nar @@ -104,12 +99,8 @@ void DSPEmitter::decrement_addr_reg(int reg) SetJumpTarget(nowrap); // g_dsp.r.ar[reg] = nar; -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ar[reg]), R(DI)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg])), R(DI)); -#endif + MOV(16, ar_reg, R(DI)); + gpr.putReg(DSP_REG_AR0+reg); } // Increase addr register according to the correspond ix register @@ -120,16 +111,17 @@ void DSPEmitter::decrement_addr_reg(int reg) // EDI = temp void DSPEmitter::increase_addr_reg(int reg) { -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[reg])); - MOVZX(32, 16, EDX, M(&g_dsp.r.wr[reg])); - MOVSX(32, 16, ESI, M(&g_dsp.r.ix[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); - MOVZX(32, 16, EDX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, wr[reg]))); - MOVSX(32, 16, ESI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ix[reg]))); -#endif + OpArg ar_reg; + OpArg wr_reg; + OpArg ix_reg; + gpr.getReg(DSP_REG_WR0+reg,wr_reg); + gpr.getReg(DSP_REG_IX0+reg,ix_reg); + MOVZX(32, 16, EDX, wr_reg); + MOVSX(32, 16, ESI, ix_reg); + gpr.putReg(DSP_REG_WR0+reg); + gpr.putReg(DSP_REG_IX0+reg); + gpr.getReg(DSP_REG_AR0+reg,ar_reg); + MOVZX(32, 16, EAX, ar_reg); //u32 nar = ar + ix; //edi = nar @@ -170,12 +162,8 @@ void DSPEmitter::increase_addr_reg(int reg) SetJumpTarget(done3); // g_dsp.r.ar[reg] = nar; -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ar[reg]), R(DI)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg])), R(DI)); -#endif + MOV(16, ar_reg, R(DI)); + gpr.putReg(DSP_REG_AR0+reg); } // Decrease addr register according to the correspond ix register @@ -186,16 +174,17 @@ void DSPEmitter::increase_addr_reg(int reg) // EDI = temp void DSPEmitter::decrease_addr_reg(int reg) { -#ifdef _M_IX86 // All32 - MOVZX(32, 16, EAX, M(&g_dsp.r.ar[reg])); - MOVZX(32, 16, EDX, M(&g_dsp.r.wr[reg])); - MOVSX(32, 16, ESI, M(&g_dsp.r.ix[reg])); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(32, 16, EAX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg]))); - MOVZX(32, 16, EDX, MDisp(R11, STRUCT_OFFSET(g_dsp.r, wr[reg]))); - MOVSX(32, 16, ESI, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ix[reg]))); -#endif + OpArg ar_reg; + OpArg wr_reg; + OpArg ix_reg; + gpr.getReg(DSP_REG_WR0+reg,wr_reg); + gpr.getReg(DSP_REG_IX0+reg,ix_reg); + MOVZX(32, 16, EDX, wr_reg); + MOVSX(32, 16, ESI, ix_reg); + gpr.putReg(DSP_REG_WR0+reg); + gpr.putReg(DSP_REG_IX0+reg); + gpr.getReg(DSP_REG_WR0+reg,ar_reg); + MOVZX(32, 16, EAX, ar_reg); NOT(32, R(ESI)); //esi = ~ix @@ -237,12 +226,8 @@ void DSPEmitter::decrease_addr_reg(int reg) SetJumpTarget(done3); //return nar -#ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.r.ar[reg]), R(DI)); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ar[reg])), R(DI)); -#endif + MOV(16, ar_reg, R(DI)); + gpr.putReg(DSP_REG_WR0+reg); } @@ -268,9 +253,11 @@ void DSPEmitter::dmem_write() // else if (saddr == 0xf) SetJumpTarget(ifx); // Does it mean gdsp_ifx_write needs u32 rather than u16? + DSPJitRegCache c(gpr); SaveDSPRegs(); ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); LoadDSPRegs(); + gpr.flushRegs(c); SetJumpTarget(end); } @@ -374,9 +361,11 @@ void DSPEmitter::dmem_read() SetJumpTarget(ifx); // else if (saddr == 0xf) // return gdsp_ifx_read(addr); + DSPJitRegCache c(gpr); SaveDSPRegs(); ABI_CallFunctionR((void *)gdsp_ifx_read, ECX); LoadDSPRegs(); + gpr.flushRegs(c); SetJumpTarget(end); SetJumpTarget(end2); } @@ -419,34 +408,22 @@ void DSPEmitter::dmem_read_imm(u16 address) void DSPEmitter::get_long_prod(X64Reg long_prod) { #ifdef _M_X64 -#if 0 // MOV(64, R(R11), ImmPtr(&g_dsp.r)); //s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; - MOVSX(64, 8, long_prod, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.h))); - //val <<= 32; - SHL(64, R(long_prod), Imm8(16)); - //s64 low_prod = g_dsp.r[DSP_REG_PRODM]; - OR(16, R(long_prod), MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.m))); - //low_prod += g_dsp.r[DSP_REG_PRODM2]; - ADD(16, R(long_prod), MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.m2))); - //low_prod <<= 16; - SHL(64, R(long_prod), Imm8(16)); - //low_prod |= g_dsp.r[DSP_REG_PRODL]; - OR(16, R(long_prod), MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.l))); - //return val; -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - //s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; - MOV(64, R(long_prod), MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.val))); - MOV(64, R(R11), R(long_prod)); + OpArg reg; + gpr.getReg(DSP_REG_PROD_64, reg); + X64Reg tmp; + gpr.getFreeXReg(tmp); + MOV(64, R(long_prod), reg); + MOV(64, R(tmp), R(long_prod)); SHL(64, R(long_prod), Imm8(64-40));//sign extend SAR(64, R(long_prod), Imm8(64-40)); - SHR(64, R(R11), Imm8(48)); - SHL(64, R(R11), Imm8(16)); - ADD(64, R(long_prod), R(R11)); - MOV(64, R(R11), ImmPtr(&g_dsp.r)); + SHR(64, R(tmp), Imm8(48)); + SHL(64, R(tmp), Imm8(16)); + ADD(64, R(long_prod), R(tmp)); + gpr.putXReg(tmp); + gpr.putReg(DSP_REG_PROD_64, false); -#endif #endif } @@ -481,28 +458,18 @@ void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod) void DSPEmitter::set_long_prod() { #ifdef _M_X64 -#if 0 + OpArg reg; + gpr.getReg(DSP_REG_PROD_64, reg, false); + X64Reg tmp; + gpr.getFreeXReg(tmp); + + MOV(64, R(tmp), Imm64(0x000000ffffffffffULL)); + AND(64, R(RAX), R(tmp)); // g_dsp.r[DSP_REG_PRODL] = (u16)val; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.l)), R(AX)); - // val >>= 16; - SAR(64, R(RAX), Imm8(16)); - // g_dsp.r[DSP_REG_PRODM] = (u16)val; - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.m)), R(AX)); - // val >>= 16; - SAR(64, R(RAX), Imm8(16)); - // g_dsp.r[DSP_REG_PRODH] = (u8)val; - MOVSX(64, 8, RAX, R(AL)); - MOV(8, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.h)), R(AL)); - // g_dsp.r[DSP_REG_PRODM2] = 0; - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.m2)), Imm16(0)); -#else - MOV(64, R(R11), Imm64(0x000000ffffffffffULL)); - AND(64, R(RAX), R(R11)); - // g_dsp.r[DSP_REG_PRODL] = (u16)val; - MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(64, MDisp(R11, STRUCT_OFFSET(g_dsp.r, prod.val)), R(RAX)); -#endif + MOV(64, reg, R(RAX)); + + gpr.putXReg(tmp); + gpr.putReg(DSP_REG_PROD_64, true); #endif } @@ -528,113 +495,88 @@ void DSPEmitter::round_long_acc(X64Reg long_acc) #endif } -// Returns s64 in RAX +// Returns s64 in acc void DSPEmitter::get_long_acc(int _reg, X64Reg acc) { #ifdef _M_X64 -#if 0 -// s64 high = (s64)(s8)g_dsp.r[DSP_REG_ACH0 + reg] << 32; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 8, acc, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].h))); - SHL(64, R(acc), Imm8(16)); -// u32 mid_low = ((u32)g_dsp.r[DSP_REG_ACM0 + reg] << 16) | g_dsp.r[DSP_REG_ACL0 + reg]; - OR(16, R(acc), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].m))); - SHL(64, R(acc), Imm8(16)); - OR(16, R(acc), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].l))); -// return high | mid_low; -#else -// s64 high = (s64)(s8)g_dsp.r[DSP_REG_ACH0 + reg] << 32; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(64, R(acc), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].val))); - SHL(64, R(acc), Imm8(64-40));//sign extend - SAR(64, R(acc), Imm8(64-40)); -#endif + OpArg reg; + gpr.getReg(DSP_REG_ACC0_64+_reg, reg); + MOV(64, R(acc), reg); + gpr.putReg(DSP_REG_ACC0_64+_reg, false); #endif } -// In: RAX = s64 val -// Clobbers the input reg +// In: acc = s64 val void DSPEmitter::set_long_acc(int _reg, X64Reg acc) { #ifdef _M_X64 -#if 0 -// g_dsp.r[DSP_REG_ACL0 + _reg] = (u16)val; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].l)), R(acc)); -// val >>= 16; - SHR(64, R(acc), Imm8(16)); -// g_dsp.r[DSP_REG_ACM0 + _reg] = (u16)val; - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].m)), R(acc)); -// val >>= 16; - SHR(64, R(acc), Imm8(16)); -// g_dsp.r[DSP_REG_ACH0 + _reg] = (u16)(s16)(s8)(u8)val; - MOVSX(64, 8, acc, R(acc)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].h)), R(acc)); -#else - SHL(64, R(acc), Imm8(64-40));//sign extend - SAR(64, R(acc), Imm8(64-40)); -// g_dsp.r[DSP_REG_ACL0 + _reg] = (u16)val; - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(64, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].val)), R(acc)); -#endif + OpArg reg; + gpr.getReg(DSP_REG_ACC0_64+_reg, reg, false); + MOV(64, reg, R(acc)); + gpr.putReg(DSP_REG_ACC0_64+_reg); #endif } // Returns s16 in AX -void DSPEmitter::get_acc_m(int _reg, X64Reg acm) -{ -// return g_dsp.r[DSP_REG_ACM0 + _reg]; -#ifdef _M_X64 - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, acm, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].m))); -#endif -} - -// Returns s16 in AX -void DSPEmitter::set_acc_m(int _reg) +void DSPEmitter::get_acc_l(int _reg, X64Reg acl, bool sign) { // return g_dsp.r[DSP_REG_ACM0 + _reg]; -#ifdef _M_X64 - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ac[_reg].m)), R(RAX)); -#endif + gpr.readReg(_reg+DSP_REG_ACL0, acl, sign?SIGN:ZERO); +} + +void DSPEmitter::set_acc_l(int _reg, OpArg arg) +{ + // return g_dsp.r[DSP_REG_ACM0 + _reg]; + gpr.writeReg(_reg+DSP_REG_ACL0,arg); +} + +// Returns s16 in AX +void DSPEmitter::get_acc_m(int _reg, X64Reg acm, bool sign) +{ +// return g_dsp.r[DSP_REG_ACM0 + _reg]; + gpr.readReg(_reg+DSP_REG_ACM0, acm, sign?SIGN:ZERO); +} + +// In: s16 in AX +void DSPEmitter::set_acc_m(int _reg, OpArg arg) +{ + // return g_dsp.r.ac[_reg].m; + gpr.writeReg(_reg+DSP_REG_ACM0,arg); +} + +// Returns s16 in AX +void DSPEmitter::get_acc_h(int _reg, X64Reg ach, bool sign) +{ +// return g_dsp.r.ac[_reg].h; + gpr.readReg(_reg+DSP_REG_ACH0, ach, sign?SIGN:ZERO); +} + +// In: s16 in AX +void DSPEmitter::set_acc_h(int _reg, OpArg arg) +{ + // return g_dsp.r[DSP_REG_ACM0 + _reg]; + gpr.writeReg(_reg+DSP_REG_ACH0,arg); } // Returns u32 in EAX void DSPEmitter::get_long_acx(int _reg, X64Reg acx) { // return ((u32)g_dsp.r[DSP_REG_AXH0 + _reg] << 16) | g_dsp.r[DSP_REG_AXL0 + _reg]; -#ifdef _M_X64 -#if 0 - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, acx, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[_reg].h))); - SHL(64, R(acx), Imm8(16)); - OR(16, R(acx), MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[_reg].l))); -#else - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 32, acx, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[_reg].val))); -#endif -#endif + gpr.readReg(_reg+DSP_REG_AX0_32, acx, SIGN); } // Returns s16 in EAX void DSPEmitter::get_ax_l(int _reg, X64Reg axl) { // return (s16)g_dsp.r[DSP_REG_AXL0 + _reg]; -#ifdef _M_X64 - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, axl, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[_reg].l))); -#endif + gpr.readReg(_reg+DSP_REG_AXL0, axl, SIGN); } // Returns s16 in EAX void DSPEmitter::get_ax_h(int _reg, X64Reg axh) { // return (s16)g_dsp.r[DSP_REG_AXH0 + _reg]; -#ifdef _M_X64 - // MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVSX(64, 16, axh, MDisp(R11, STRUCT_OFFSET(g_dsp.r, ax[_reg].h))); -#endif + gpr.readReg(_reg+DSP_REG_AXH0, axh, SIGN); } void DSPEmitter::LoadDSPRegs() @@ -642,10 +584,13 @@ void DSPEmitter::LoadDSPRegs() #ifdef _M_X64 MOV(64, R(R11), ImmPtr(&g_dsp.r)); #endif + // Load DSP register state here... + gpr.loadStaticRegs(); } void DSPEmitter::SaveDSPRegs() { // Save DSP register state here... + gpr.saveStaticRegs(); } diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.h b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.h index 2e877d63ed..2d28b6d520 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.h +++ b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.h @@ -18,57 +18,4 @@ #ifndef __DSPJITUTIL_H__ #define __DSPJITUTIL_H__ -#include "../DSPMemoryMap.h" -#include "../DSPHWInterface.h" -#include "../DSPEmitter.h" - -static u16 *reg_ptr(int reg) { - switch(reg) { - case DSP_REG_AR0: - case DSP_REG_AR1: - case DSP_REG_AR2: - case DSP_REG_AR3: - return &g_dsp.r.ar[reg - DSP_REG_AR0]; - case DSP_REG_IX0: - case DSP_REG_IX1: - case DSP_REG_IX2: - case DSP_REG_IX3: - return &g_dsp.r.ix[reg - DSP_REG_IX0]; - case DSP_REG_WR0: - case DSP_REG_WR1: - case DSP_REG_WR2: - case DSP_REG_WR3: - return &g_dsp.r.wr[reg - DSP_REG_WR0]; - case DSP_REG_ST0: - case DSP_REG_ST1: - case DSP_REG_ST2: - case DSP_REG_ST3: - return &g_dsp.r.st[reg - DSP_REG_ST0]; - case DSP_REG_ACH0: - case DSP_REG_ACH1: - return &g_dsp.r.ac[reg - DSP_REG_ACH0].h; - case DSP_REG_CR: return &g_dsp.r.cr; - case DSP_REG_SR: return &g_dsp.r.sr; - case DSP_REG_PRODL: return &g_dsp.r.prod.l; - case DSP_REG_PRODM: return &g_dsp.r.prod.m; - case DSP_REG_PRODH: return &g_dsp.r.prod.h; - case DSP_REG_PRODM2: return &g_dsp.r.prod.m2; - case DSP_REG_AXL0: - case DSP_REG_AXL1: - return &g_dsp.r.ax[reg - DSP_REG_AXL0].l; - case DSP_REG_AXH0: - case DSP_REG_AXH1: - return &g_dsp.r.ax[reg - DSP_REG_AXH0].h; - case DSP_REG_ACL0: - case DSP_REG_ACL1: - return &g_dsp.r.ac[reg - DSP_REG_ACL0].l; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - return &g_dsp.r.ac[reg - DSP_REG_ACM0].m; - default: - _assert_msg_(DSP_JIT, 0, "cannot happen"); - return NULL; - } -} - #endif /*__DSPJITUTIL_H__*/ diff --git a/Source/Core/DSPCore/Src/SConscript b/Source/Core/DSPCore/Src/SConscript index 11b3d9dc56..f97a15753c 100644 --- a/Source/Core/DSPCore/Src/SConscript +++ b/Source/Core/DSPCore/Src/SConscript @@ -31,6 +31,7 @@ files = [ "Jit/DSPJitLoadStore.cpp", "Jit/DSPJitMultiplier.cpp", "Jit/DSPJitMisc.cpp", + "Jit/DSPJitRegCache.cpp", ] env.StaticLibrary(env['local_libs'] + 'dspcore', files)