From f2e54074f1a986479b70d4a6dec5978240e16384 Mon Sep 17 00:00:00 2001 From: skidau Date: Sun, 26 Dec 2010 03:12:29 +0000 Subject: [PATCH] LLE JIT: * Completed the JIT versions of the DSP Multiplier instructions (5 instructions added). * Bug fixed the dec and lsr16 instructions. * Minor code clean-up. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6657 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/DSPCore/Src/DSPEmitter.h | 7 +- Source/Core/DSPCore/Src/DSPTables.cpp | 12 +- .../Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp | 9 +- .../Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp | 24 +- Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp | 28 +- .../Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp | 254 +++++++++++------- Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp | 35 +-- 7 files changed, 211 insertions(+), 158 deletions(-) diff --git a/Source/Core/DSPCore/Src/DSPEmitter.h b/Source/Core/DSPCore/Src/DSPEmitter.h index 101a44445a..9991ca4520 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.h +++ b/Source/Core/DSPCore/Src/DSPEmitter.h @@ -204,20 +204,25 @@ public: void asrnr(const UDSPInstruction opc); // Multipliers - void get_multiply_prod(); void multiply(); void multiply_add(); void multiply_sub(); + void multiply_mulx(u8 axh0, u8 axh1); void clrp(const UDSPInstruction opc); void tstprod(const UDSPInstruction opc); void movp(const UDSPInstruction opc); void movnp(const UDSPInstruction opc); void movpz(const UDSPInstruction opc); + void addpaxz(const UDSPInstruction opc); void mulaxh(const UDSPInstruction opc); void mul(const UDSPInstruction opc); void mulac(const UDSPInstruction opc); void mulmv(const UDSPInstruction opc); void mulmvz(const UDSPInstruction opc); + void mulx(const UDSPInstruction opc); + void mulxac(const UDSPInstruction opc); + void mulxmv(const UDSPInstruction opc); + void mulxmvz(const UDSPInstruction opc); void mulc(const UDSPInstruction opc); void mulcac(const UDSPInstruction opc); void mulcmv(const UDSPInstruction opc); diff --git a/Source/Core/DSPCore/Src/DSPTables.cpp b/Source/Core/DSPCore/Src/DSPTables.cpp index 118226eb0c..d2f6f6f175 100644 --- a/Source/Core/DSPCore/Src/DSPTables.cpp +++ b/Source/Core/DSPCore/Src/DSPTables.cpp @@ -269,11 +269,11 @@ const DSPOPCTemplate opcodes[] = {"MULMV", 0x9600, 0xf600, DSPInterpreter::mulmv, &DSPEmitter::mulmv, 1, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, //a-b - {"MULX", 0xa000, 0xe700, DSPInterpreter::mulx, NULL, 1, 2, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}}, true, false, false, false, true}, + {"MULX", 0xa000, 0xe700, DSPInterpreter::mulx, &DSPEmitter::mulx, 1, 2, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}}, true, false, false, false, true}, {"ABS", 0xa100, 0xf700, DSPInterpreter::abs, &DSPEmitter::abs, 1, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true, false, false, false, true}, - {"MULXMVZ", 0xa200, 0xe600, DSPInterpreter::mulxmvz, NULL, 1, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, - {"MULXAC", 0xa400, 0xe600, DSPInterpreter::mulxac, NULL, 1, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, - {"MULXMV", 0xa600, 0xe600, DSPInterpreter::mulxmv, NULL, 1, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, + {"MULXMVZ", 0xa200, 0xe600, DSPInterpreter::mulxmvz, &DSPEmitter::mulxmvz,1, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, + {"MULXAC", 0xa400, 0xe600, DSPInterpreter::mulxac, &DSPEmitter::mulxac, 1, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, + {"MULXMV", 0xa600, 0xe600, DSPInterpreter::mulxmv, &DSPEmitter::mulxmv, 1, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, {"TST", 0xb100, 0xf700, DSPInterpreter::tst, &DSPEmitter::tst, 1, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true, false, false, false, true}, //c-d @@ -292,9 +292,9 @@ const DSPOPCTemplate opcodes[] = //f {"LSL16", 0xf000, 0xfe00, DSPInterpreter::lsl16, &DSPEmitter::lsl16, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, {"MADD", 0xf200, 0xfe00, DSPInterpreter::madd, &DSPEmitter::madd, 1, 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false, false, false, true}, - {"LSR16", 0xf400, 0xfe00, DSPInterpreter::lsr16, &DSPEmitter::lsl16, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, + {"LSR16", 0xf400, 0xfe00, DSPInterpreter::lsr16, &DSPEmitter::lsr16, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, {"MSUB", 0xf600, 0xfe00, DSPInterpreter::msub, &DSPEmitter::msub, 1, 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false, false, false, true}, - {"ADDPAXZ", 0xf800, 0xfc00, DSPInterpreter::addpaxz, NULL, 1, 2, {{P_ACC, 1, 0, 9, 0x0200}, {P_AX, 1, 0, 8, 0x0100}}, true, false, false, false, true}, + {"ADDPAXZ", 0xf800, 0xfc00, DSPInterpreter::addpaxz, &DSPEmitter::addpaxz,1, 2, {{P_ACC, 1, 0, 9, 0x0200}, {P_AX, 1, 0, 8, 0x0100}}, true, false, false, false, true}, {"CLRL", 0xfc00, 0xfe00, DSPInterpreter::clrl, &DSPEmitter::clrl, 1, 1, {{P_ACCL, 1, 0, 11, 0x0800}}, true, false, false, false, true}, {"MOVPZ", 0xfe00, 0xfe00, DSPInterpreter::movpz, &DSPEmitter::movpz, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, }; diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp index 1b081da30a..cd9677c35f 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitArithmetic.cpp @@ -1115,22 +1115,20 @@ void DSPEmitter::dec(const UDSPInstruction opc) get_long_acc(dreg, RCX); MOV(64, R(RAX), R(RCX)); // s64 res = acc - 1; - SUB(64, R(RAX), Imm8(1)); + SUB(64, R(RAX), Imm32(1)); // dsp_set_long_acc(dreg, res); - set_long_acc(dreg); // res = dsp_get_long_acc(dreg); // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -1, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(8, R(RDX), Imm8(1)); - NEG(8, R(RDX)); + MOV(64, R(RDX), Imm64(-1)); MOV(64, R(RSI), R(RAX)); set_long_acc(dreg, RSI); Update_SR_Register64_Carry2(); } else { - set_long_acc(dreg, RAX); + set_long_acc(dreg); } #else Default(opc); @@ -1282,7 +1280,6 @@ void DSPEmitter::mov(const UDSPInstruction opc) // flags out: --xx xx00 void DSPEmitter::lsl16(const UDSPInstruction opc) { - Default(opc); return; // TODO: Breaks ZTP Wii #ifdef _M_X64 u8 areg = (opc >> 8) & 0x1; // s64 acc = dsp_get_long_acc(areg); diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp index fdd5fc09a6..0b04be9eb2 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitLoadStore.cpp @@ -38,7 +38,7 @@ void DSPEmitter::srs(const UDSPInstruction opc) MOVZX(32, 16, ECX, M(&g_dsp.r[reg])); MOVZX(32, 8, EAX, M(&g_dsp.r[DSP_REG_CR])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RCX, MDisp(R11,reg*2)); MOVZX(64, 8, RAX, MDisp(R11,DSP_REG_CR*2)); #endif @@ -63,12 +63,12 @@ void DSPEmitter::lrs(const UDSPInstruction opc) dmem_read(); MOV(16, M(&g_dsp.r[reg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 8, RCX, MDisp(R11,DSP_REG_CR*2)); SHL(16, R(ECX), Imm8(8)); OR(8, R(ECX), Imm8(opc & 0xFF)); dmem_read(); - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(RAX)); #endif dsp_conditional_extend_accum(reg); @@ -208,7 +208,7 @@ void DSPEmitter::srr(const UDSPInstruction opc) #ifdef _M_IX86 // All32 MOVZX(32, 16, EAX, M(&g_dsp.r[dreg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RAX, MDisp(R11,dreg*2)); #endif dmem_write(); @@ -228,7 +228,7 @@ void DSPEmitter::srrd(const UDSPInstruction opc) #ifdef _M_IX86 // All32 MOVZX(32, 16, EAX, M(&g_dsp.r[dreg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RAX, MDisp(R11,dreg*2)); #endif dmem_write(); @@ -249,7 +249,7 @@ void DSPEmitter::srri(const UDSPInstruction opc) #ifdef _M_IX86 // All32 MOVZX(32, 16, EAX, M(&g_dsp.r[dreg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RAX, MDisp(R11,dreg*2)); #endif dmem_write(); @@ -283,14 +283,14 @@ void DSPEmitter::ilrr(const UDSPInstruction opc) #ifdef _M_IX86 // All32 MOVZX(32, 16, ECX, M(&g_dsp.r[reg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RCX, MDisp(R11,reg*2)); #endif imem_read(); #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[dreg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,dreg*2), R(RAX)); #endif dsp_conditional_extend_accum(dreg); @@ -308,14 +308,14 @@ void DSPEmitter::ilrrd(const UDSPInstruction opc) #ifdef _M_IX86 // All32 MOVZX(32, 16, ECX, M(&g_dsp.r[reg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RCX, MDisp(R11,reg*2)); #endif imem_read(); #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[dreg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,dreg*2), R(RAX)); #endif dsp_conditional_extend_accum(dreg); @@ -334,14 +334,14 @@ void DSPEmitter::ilrri(const UDSPInstruction opc) #ifdef _M_IX86 // All32 MOVZX(32, 16, ECX, M(&g_dsp.r[reg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOVZX(64, 16, RCX, MDisp(R11,reg*2)); #endif imem_read(); #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[dreg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,dreg*2), R(RAX)); #endif dsp_conditional_extend_accum(dreg); diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp index 9eee86dfb9..543c64603d 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitMisc.cpp @@ -51,7 +51,7 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) MOVZX(32, 8, EAX, R(AL)); MOV(16, MComplex(EAX,EAX,1,(u32)&g_dsp.reg_stack[stack_reg][0]), R(CX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(CX), MDisp(R11,(DSP_REG_ST0 + stack_reg)*2)); MOVZX(64, 8, RAX, R(AL)); MOV(64, R(R10), ImmPtr(&g_dsp.reg_stack[stack_reg][0])); @@ -81,7 +81,7 @@ void DSPEmitter::dsp_reg_stack_pop(int stack_reg) MOVZX(64, 8, RAX, R(AL)); MOV(64, R(R10), ImmPtr(&g_dsp.reg_stack[stack_reg][0])); MOV(16, R(CX), MComplex(R10,RAX,2,0)); - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,(DSP_REG_ST0 + stack_reg)*2), R(CX)); #endif @@ -108,7 +108,7 @@ void DSPEmitter::dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[DSP_REG_ST0+stack_reg]), R(EDX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,(DSP_REG_ST0+stack_reg)*2), R(EDX)); #endif } @@ -119,7 +119,7 @@ void DSPEmitter::dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg) #ifdef _M_IX86 // All32 MOV(16, R(EDX), M(&g_dsp.r[DSP_REG_ST0+stack_reg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(EDX), MDisp(R11,(DSP_REG_ST0+stack_reg)*2)); #endif dsp_reg_stack_pop(stack_reg); @@ -135,7 +135,7 @@ void DSPEmitter::dsp_reg_store_stack_imm(int stack_reg, u16 val) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[DSP_REG_ST0+stack_reg]), Imm16(val)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,(DSP_REG_ST0+stack_reg)*2), Imm16(val)); #endif } @@ -151,7 +151,7 @@ void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), R(host_sreg)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(host_sreg)); #endif break; @@ -168,7 +168,7 @@ void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), R(host_sreg)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(host_sreg)); #endif break; @@ -185,7 +185,7 @@ void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), Imm16((u16)(s16)(s8)(u8)val)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), Imm16((u16)(s16)(s8)(u8)val)); #endif break; @@ -202,7 +202,7 @@ void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), Imm16(val)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), Imm16(val)); #endif break; @@ -219,7 +219,7 @@ void DSPEmitter::dsp_conditional_extend_accum(int reg) #ifdef _M_IX86 // All32 MOV(16, R(EAX), M(&g_dsp.r[DSP_REG_SR])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(EAX), MDisp(R11,DSP_REG_SR*2)); #endif TEST(16, R(EAX), Imm16(SR_40_MODE_BIT)); @@ -261,7 +261,7 @@ void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) #ifdef _M_IX86 // All32 MOV(16, R(EAX), M(&g_dsp.r[DSP_REG_SR])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(EAX), MDisp(R11,DSP_REG_SR*2)); #endif TEST(16, R(EAX), Imm16(SR_40_MODE_BIT)); @@ -301,7 +301,7 @@ void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg) #ifdef _M_IX86 // All32 MOV(16, R(host_dreg), M(&g_dsp.r[reg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(host_dreg), MDisp(R11,reg*2)); #endif } @@ -421,7 +421,7 @@ void DSPEmitter::setCompileSR(u16 bit) { #ifdef _M_IX86 // All32 OR(16, M(&g_dsp.r[DSP_REG_SR]), Imm16(bit)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); OR(16, MDisp(R11,DSP_REG_SR*2), Imm16(bit)); #endif @@ -434,7 +434,7 @@ void DSPEmitter::clrCompileSR(u16 bit) { #ifdef _M_IX86 // All32 AND(16, M(&g_dsp.r[DSP_REG_SR]), Imm16(~bit)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); AND(16, MDisp(R11,DSP_REG_SR*2), Imm16(~bit)); #endif diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp index 20c620d5e6..7258f874ed 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp @@ -27,50 +27,6 @@ #include "ABI.h" using namespace Gen; -// Only MULX family instructions have unsigned/mixed support. -// Returns s64 in EAX -// In: RSI = u16 a, RDI = u16 b, RCX = u8 sign -void DSPEmitter::get_multiply_prod() -{ -#ifdef _M_X64 -// if ((sign == 1) && (g_dsp.r[DSP_REG_SR] & SR_MUL_UNSIGNED)) //unsigned - MOV(16, R(RDX), MDisp(R11, DSP_REG_SR * 2)); // TODO check 16bit - AND(16, R(RDX), Imm16(SR_MUL_UNSIGNED)); - TEST(16, R(RDX), R(RDX)); - FixupBranch sign3 = J_CC(CC_Z); - TEST(32, R(ECX), Imm32(1)); - FixupBranch sign1 = J_CC(CC_Z); -// prod = (u32)(a * b); - MOV(64, R(EAX), R(RDI)); - MUL(16, R(ESI)); - FixupBranch mult2 = J(); - SetJumpTarget(sign1); - TEST(32, R(ECX), Imm32(2)); - FixupBranch sign2 = J_CC(CC_Z); -// else if ((sign == 2) && (g_dsp.r[DSP_REG_SR] & SR_MUL_UNSIGNED)) //mixed -// prod = a * (s16)b; - MOVSX(64, 16, RDI, R(RDI)); - MOV(64, R(EAX), R(RDI)); - MUL(16, R(ESI)); -// else - SetJumpTarget(sign2); - SetJumpTarget(sign3); -// prod = (s16)a * (s16)b; //signed - MOV(64, R(EAX), R(RDI)); - IMUL(64, R(ESI)); - -// Conditionally multiply by 2. - SetJumpTarget(mult2); -// if ((g_dsp.r[DSP_REG_SR] & SR_MUL_MODIFY) == 0) - TEST(16, MDisp(R11, DSP_REG_SR * 2), Imm16(SR_MUL_MODIFY)); - FixupBranch noMult2 = J_CC(CC_NZ); -// prod <<= 1; - SHL(64, R(EAX), Imm8(1)); - SetJumpTarget(noMult2); -// return prod; -#endif -} - // Returns s64 in RAX // In: RSI = s16 a, RDI = s16 b void DSPEmitter::multiply() @@ -115,8 +71,12 @@ void DSPEmitter::multiply_sub() // return prod; } -//inline s64 dsp_multiply_mulx(u8 axh0, u8 axh1, u16 val1, u16 val2) -//{ +// Only MULX family instructions have unsigned/mixed support. +// Returns s64 in EAX +// In: RSI = s16 a, RDI = s16 b +// Returns s64 in RAX +void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) +{ // s64 result; // if ((axh0==0) && (axh1==0)) @@ -128,8 +88,57 @@ void DSPEmitter::multiply_sub() // else // result = dsp_multiply(val1, val2, 0); // unsigned support OFF if both ax?.h regs are used -// return result; -//} + +// if ((sign == 1) && (g_dsp.r[DSP_REG_SR] & SR_MUL_UNSIGNED)) //unsigned + TEST(16, MDisp(R11, DSP_REG_SR * 2), Imm16(SR_MUL_UNSIGNED)); + FixupBranch unsignedMul = J_CC(CC_NZ); + // prod = (s16)a * (s16)b; //signed + MOVSX(64, 16, RAX, R(RDI)); + IMUL(64, R(RSI)); + FixupBranch signedMul = J(); + + SetJumpTarget(unsignedMul); + if ((axh0==0) && (axh1==0)) + { + // unsigned support ON if both ax?.l regs are used +// prod = (u32)(a * b); + MOVZX(64, 16, RSI, R(RSI)); + MOVZX(64, 16, RAX, R(RDI)); + MUL(64, R(RSI)); + } + else if ((axh0==0) && (axh1==1)) + { + // mixed support ON (u16)axl.0 * (s16)axh.1 +// prod = a * (s16)b; + MOVZX(64, 16, RAX, R(RSI)); + IMUL(64, R(RDI)); + } + else if ((axh0==1) && (axh1==0)) + { + // mixed support ON (u16)axl.1 * (s16)axh.0 +// prod = (s16)a * b; + MOVZX(64, 16, RAX, R(RDI)); + IMUL(64, R(RSI)); + } + else + { + // unsigned support OFF if both ax?.h regs are used +// prod = (s16)a * (s16)b; //signed + MOVSX(64, 16, RAX, R(RDI)); + IMUL(64, R(RSI)); + } + + SetJumpTarget(signedMul); + + // Conditionally multiply by 2. +// if ((g_dsp.r[DSP_REG_SR] & SR_MUL_MODIFY) == 0) + TEST(16, MDisp(R11, DSP_REG_SR * 2), Imm16(SR_MUL_MODIFY)); + FixupBranch noMult2 = J_CC(CC_NZ); +// prod <<= 1; + SHL(64, R(RAX), Imm8(1)); + SetJumpTarget(noMult2); +// return prod; +} //---- @@ -262,22 +271,41 @@ void DSPEmitter::movpz(const UDSPInstruction opc) // in accumulator register. Low 16-bits of $acD ($acD.l) are set (round) to 0. // flags out: --xx xx0x -//void DSPEmitter::addpaxz(const UDSPInstruction opc) -//{ -// u8 dreg = (opc >> 8) & 0x1; -// u8 sreg = (opc >> 9) & 0x1; +void DSPEmitter::addpaxz(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 dreg = (opc >> 8) & 0x1; + u8 sreg = (opc >> 9) & 0x1; + +// s64 ax = dsp_get_long_acx(sreg); + get_long_acx(sreg, RCX); + MOV(64, R(RDI), R(RCX)); +// s64 res = prod + (ax & ~0xffff); + MOV(64, R(RDX), Imm64(~0xffff)); + AND(64, R(RDI), R(RDX)); +// s64 prod = dsp_get_long_prod_round_prodl(); + get_long_prod_round_prodl(); + ADD(64, R(RAX), R(RDI)); // s64 oldprod = dsp_get_long_prod(); -// s64 prod = dsp_get_long_prod_round_prodl(); -// s64 ax = dsp_get_long_acx(sreg); -// s64 res = prod + (ax & ~0xffff); - -// zeroWriteBackLog(); - // dsp_set_long_acc(dreg, res); // res = dsp_get_long_acc(dreg); // Update_SR_Register64(res, isCarry(oldprod, res), false); -//} + if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) + { + get_long_prod(RDX); + MOV(64, R(RSI), R(RAX)); + set_long_acc(dreg, RSI); + Update_SR_Register64_Carry(); + } + else + { + set_long_acc(dreg, RAX); + } +#else + Default(opc); +#endif +} //---- @@ -427,19 +455,25 @@ void DSPEmitter::mulmvz(const UDSPInstruction opc) // 101s t000 xxxx xxxx // Multiply one part $ax0 by one part $ax1. // Part is selected by S and T bits. Zero selects low part, one selects high part. -//void DSPEmitter::mulx(const UDSPInstruction opc) -//{ -// u8 treg = ((opc >> 11) & 0x1); -// u8 sreg = ((opc >> 12) & 0x1); +void DSPEmitter::mulx(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 treg = ((opc >> 11) & 0x1); + u8 sreg = ((opc >> 12) & 0x1); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg*2) * 2)); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXL1 + treg*2) * 2)); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); - -// zeroWriteBackLog(); - + multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); -//} + set_long_prod(); +#else + Default(opc); +#endif +} // MULXAC $ax0.S, $ax1.T, $acR // 101s t01r xxxx xxxx @@ -448,23 +482,37 @@ void DSPEmitter::mulmvz(const UDSPInstruction opc) // T bits. Zero selects low part, one selects high part. // flags out: --xx xx0x -//void DSPEmitter::mulxac(const UDSPInstruction opc) -//{ -// u8 rreg = (opc >> 8) & 0x1; -// u8 treg = (opc >> 11) & 0x1; -// u8 sreg = (opc >> 12) & 0x1; +void DSPEmitter::mulxac(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = (opc >> 8) & 0x1; + u8 treg = (opc >> 11) & 0x1; + u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod(); + get_long_acc(rreg, RCX); + get_long_prod(); + ADD(64, R(RCX), R(RAX)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg*2) * 2)); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXL1 + treg*2) * 2)); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); -// -// zeroWriteBackLog(); + multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + set_long_acc(rreg, RCX); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) + { + Update_SR_Register64(RCX); + } +#else + Default(opc); +#endif +} // MULXMV $ax0.S, $ax1.T, $acR // 101s t11r xxxx xxxx @@ -473,23 +521,35 @@ void DSPEmitter::mulmvz(const UDSPInstruction opc) // T bits. Zero selects low part, one selects high part. // flags out: --xx xx0x -//void DSPEmitter::mulxmv(const UDSPInstruction opc) -//{ -// u8 rreg = ((opc >> 8) & 0x1); -// u8 treg = (opc >> 11) & 0x1; -// u8 sreg = (opc >> 12) & 0x1; +void DSPEmitter::mulxmv(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = ((opc >> 8) & 0x1); + u8 treg = (opc >> 11) & 0x1; + u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod(); + get_long_prod(RCX); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg*2) * 2)); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXL1 + treg*2) * 2)); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); - -// zeroWriteBackLog(); + multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + set_long_acc(rreg, RCX); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) + { + Update_SR_Register64(RCX); + } +#else + Default(opc); +#endif +} // MULXMV $ax0.S, $ax1.T, $acR // 101s t01r xxxx xxxx @@ -499,23 +559,35 @@ void DSPEmitter::mulmvz(const UDSPInstruction opc) // one selects high part. // flags out: --xx xx0x -//void DSPEmitter::mulxmvz(const UDSPInstruction opc) -//{ -// u8 rreg = (opc >> 8) & 0x1; -// u8 treg = (opc >> 11) & 0x1; -// u8 sreg = (opc >> 12) & 0x1; +void DSPEmitter::mulxmvz(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = (opc >> 8) & 0x1; + u8 treg = (opc >> 11) & 0x1; + u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod_round_prodl(); + get_long_prod_round_prodl(RCX); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg*2) * 2)); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXL1 + treg*2) * 2)); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); - -// zeroWriteBackLog(); + multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + set_long_acc(rreg, RCX); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) + { + Update_SR_Register64(RCX); + } +#else + Default(opc); +#endif +} //---- diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp index fd41794e5d..5ee0950086 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp @@ -73,7 +73,7 @@ void DSPEmitter::increment_addr_reg(int reg) MOV(16, R(EAX), M(&g_dsp.r[reg])); MOV(16, R(EDX), M(&g_dsp.r[DSP_REG_WR0 + reg])); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, R(EAX), MDisp(R11,reg*2)); MOV(16, R(EDX), MDisp(R11,(DSP_REG_WR0 + reg)*2)); #endif @@ -88,7 +88,7 @@ void DSPEmitter::increment_addr_reg(int reg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(EAX)); #endif } @@ -145,7 +145,7 @@ void DSPEmitter::decrement_addr_reg(int reg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(EAX)); #endif } @@ -197,7 +197,7 @@ void DSPEmitter::increase_addr_reg(int reg) #ifdef _M_IX86 // All32 CMP(16, M(&g_dsp.r[DSP_REG_IX0 + reg]), Imm16(127)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); CMP(16, MDisp(R11,(DSP_REG_IX0 + reg)*2), Imm16(127)); #endif FixupBranch dbg = J_CC(CC_NE); @@ -227,7 +227,7 @@ void DSPEmitter::increase_addr_reg(int reg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(EAX)); #endif @@ -298,7 +298,7 @@ void DSPEmitter::decrease_addr_reg(int reg) #ifdef _M_IX86 // All32 MOV(16, M(&g_dsp.r[reg]), R(EAX)); #else - MOV(64, R(R11), ImmPtr(g_dsp.r)); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); MOV(16, MDisp(R11,reg*2), R(EAX)); #endif @@ -328,7 +328,7 @@ void DSPEmitter::dmem_write() // else if (saddr == 0xf) SetJumpTarget(ifx); // Does it mean gdsp_ifx_write needs u32 rather than u16? - ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); + ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); SetJumpTarget(end); } @@ -465,29 +465,9 @@ void DSPEmitter::dmem_read_imm(u16 address) } // Returns s64 in RAX -// Clobbers RSI, RDI void DSPEmitter::get_long_prod(X64Reg long_prod) { #ifdef _M_X64 - /* - MOV(64, R(R11), ImmPtr(&g_dsp.r)); - //s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; - MOVSX(64, 8, long_prod, MDisp(R11,DSP_REG_PRODH*2)); - //val <<= 32; - SHL(64, R(long_prod), Imm8(32)); - //s64 low_prod = g_dsp.r[DSP_REG_PRODM]; - MOVSX(64, 16, RSI, MDisp(R11,DSP_REG_PRODM*2)); - //low_prod += g_dsp.r[DSP_REG_PRODM2]; - MOVSX(64, 16, EDI, MDisp(R11,DSP_REG_PRODM2*2)); - ADD(16, R(RSI), R(EDI)); - //low_prod <<= 16; - SHL(64, R(RSI), Imm8(16)); - OR(64, R(long_prod), R(RSI)); - //low_prod |= g_dsp.r[DSP_REG_PRODL]; - MOV(16, R(long_prod), MDisp(R11,DSP_REG_PRODL*2)); - //return val; - */ - MOV(64, R(R11), ImmPtr(&g_dsp.r)); //s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; MOVSX(64, 8, long_prod, MDisp(R11,DSP_REG_PRODH*2)); @@ -502,7 +482,6 @@ void DSPEmitter::get_long_prod(X64Reg long_prod) //low_prod |= g_dsp.r[DSP_REG_PRODL]; OR(16, R(long_prod), MDisp(R11,DSP_REG_PRODL*2)); //return val; - #endif }