From 85d36e38c31c6ccc8d89f59fdb05ddeeabd03acd Mon Sep 17 00:00:00 2001 From: skidau Date: Sat, 4 Dec 2010 23:20:31 +0000 Subject: [PATCH] LLE JIT: Jitted another 8 DSP multiplier instructions (x64 only) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6519 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/DSPCore/Src/DSPEmitter.h | 10 + Source/Core/DSPCore/Src/DSPTables.cpp | 16 +- .../Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp | 269 +++++++++++------- Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp | 8 +- 4 files changed, 195 insertions(+), 108 deletions(-) diff --git a/Source/Core/DSPCore/Src/DSPEmitter.h b/Source/Core/DSPCore/Src/DSPEmitter.h index 9865fca6e1..bbb8c97ebd 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.h +++ b/Source/Core/DSPCore/Src/DSPEmitter.h @@ -119,6 +119,8 @@ public: // Multipliers void get_multiply_prod(); void multiply(); + void multiply_add(); + void multiply_sub(); void clrp(const UDSPInstruction opc); void tstprod(const UDSPInstruction opc); void movp(const UDSPInstruction opc); @@ -126,9 +128,17 @@ public: void movpz(const UDSPInstruction opc); void mulaxh(const UDSPInstruction opc); void mul(const UDSPInstruction opc); + void mulac(const UDSPInstruction opc); void mulmv(const UDSPInstruction opc); void mulmvz(const UDSPInstruction opc); void mulc(const UDSPInstruction opc); + void mulcac(const UDSPInstruction opc); + void mulcmv(const UDSPInstruction opc); + void mulcmvz(const UDSPInstruction opc); + void maddc(const UDSPInstruction opc); + void msubc(const UDSPInstruction opc); + void madd(const UDSPInstruction opc); + void msub(const UDSPInstruction opc); // CALL this to start the dispatcher const u8 *enterDispatcher; diff --git a/Source/Core/DSPCore/Src/DSPTables.cpp b/Source/Core/DSPCore/Src/DSPTables.cpp index 2edcf03ef1..e9a5e0f7d8 100644 --- a/Source/Core/DSPCore/Src/DSPTables.cpp +++ b/Source/Core/DSPCore/Src/DSPTables.cpp @@ -265,7 +265,7 @@ const DSPOPCTemplate opcodes[] = {"MUL", 0x9000, 0xf700, DSPInterpreter::mul, &DSPEmitter::mul, 1, 2, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}}, true, false, false}, {"ASR16", 0x9100, 0xf700, DSPInterpreter::asr16, NULL, 1, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true, false, false}, {"MULMVZ", 0x9200, 0xf600, DSPInterpreter::mulmvz, &DSPEmitter::mulmvz, 1, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, - {"MULAC", 0x9400, 0xf600, DSPInterpreter::mulac, NULL, 1, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, + {"MULAC", 0x9400, 0xf600, DSPInterpreter::mulac, &DSPEmitter::mulac, 1, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, {"MULMV", 0x9600, 0xf600, DSPInterpreter::mulmv, &DSPEmitter::mulmv, 1, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, //a-b @@ -279,21 +279,21 @@ const DSPOPCTemplate opcodes[] = //c-d {"MULC", 0xc000, 0xe700, DSPInterpreter::mulc, &DSPEmitter::mulc, 1, 2, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}}, true, false, false}, {"CMPAR" , 0xc100, 0xe700, DSPInterpreter::cmpar, NULL, 1, 2, {{P_ACC, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}}, true, false, false}, - {"MULCMVZ", 0xc200, 0xe600, DSPInterpreter::mulcmvz, NULL, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, - {"MULCAC", 0xc400, 0xe600, DSPInterpreter::mulcac, NULL, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, - {"MULCMV", 0xc600, 0xe600, DSPInterpreter::mulcmv, NULL, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, + {"MULCMVZ", 0xc200, 0xe600, DSPInterpreter::mulcmvz, &DSPEmitter::mulcmvz,1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, + {"MULCAC", 0xc400, 0xe600, DSPInterpreter::mulcac, &DSPEmitter::mulcac, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, + {"MULCMV", 0xc600, 0xe600, DSPInterpreter::mulcmv, &DSPEmitter::mulcmv, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, //e {"MADDX", 0xe000, 0xfc00, DSPInterpreter::maddx, NULL, 1, 2, {{P_REGM18, 1, 0, 8, 0x0200}, {P_REGM19, 1, 0, 7, 0x0100}}, true, false, false}, {"MSUBX", 0xe400, 0xfc00, DSPInterpreter::msubx, NULL, 1, 2, {{P_REGM18, 1, 0, 8, 0x0200}, {P_REGM19, 1, 0, 7, 0x0100}}, true, false, false}, - {"MADDC", 0xe800, 0xfc00, DSPInterpreter::maddc, NULL, 1, 2, {{P_ACCM, 1, 0, 9, 0x0200}, {P_REG19, 1, 0, 7, 0x0100}}, true, false, false}, - {"MSUBC", 0xec00, 0xfc00, DSPInterpreter::msubc, NULL, 1, 2, {{P_ACCM, 1, 0, 9, 0x0200}, {P_REG19, 1, 0, 7, 0x0100}}, true, false, false}, + {"MADDC", 0xe800, 0xfc00, DSPInterpreter::maddc, &DSPEmitter::maddc, 1, 2, {{P_ACCM, 1, 0, 9, 0x0200}, {P_REG19, 1, 0, 7, 0x0100}}, true, false, false}, + {"MSUBC", 0xec00, 0xfc00, DSPInterpreter::msubc, &DSPEmitter::msubc, 1, 2, {{P_ACCM, 1, 0, 9, 0x0200}, {P_REG19, 1, 0, 7, 0x0100}}, true, false, false}, //f {"LSL16", 0xf000, 0xfe00, DSPInterpreter::lsl16, NULL, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, - {"MADD", 0xf200, 0xfe00, DSPInterpreter::madd, NULL, 1, 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false, false}, + {"MADD", 0xf200, 0xfe00, DSPInterpreter::madd, &DSPEmitter::madd, 1, 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false, false}, {"LSR16", 0xf400, 0xfe00, DSPInterpreter::lsr16, NULL, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, - {"MSUB", 0xf600, 0xfe00, DSPInterpreter::msub, NULL, 1, 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false, false}, + {"MSUB", 0xf600, 0xfe00, DSPInterpreter::msub, &DSPEmitter::msub, 1, 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false, false}, {"ADDPAXZ", 0xf800, 0xfc00, DSPInterpreter::addpaxz, NULL, 1, 2, {{P_ACC, 1, 0, 9, 0x0200}, {P_AX, 1, 0, 8, 0x0100}}, true, false, false}, {"CLRL", 0xfc00, 0xfe00, DSPInterpreter::clrl, NULL, 1, 1, {{P_ACCL, 1, 0, 11, 0x0800}}, true, false, false}, {"MOVPZ", 0xfe00, 0xfe00, DSPInterpreter::movpz, &DSPEmitter::movpz, 1, 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false, false}, diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp index c48c090ecc..b54995020b 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitMultiplier.cpp @@ -55,17 +55,13 @@ void DSPEmitter::get_multiply_prod() SetJumpTarget(sign2); SetJumpTarget(sign3); // prod = (s16)a * (s16)b; //signed - MOVSX(64, 16, RSI, R(RSI)); - MOVSX(64, 16, RDI, R(RDI)); MOV(64, R(EAX), R(RDI)); - IMUL(16, R(ESI)); + IMUL(64, R(ESI)); // Conditionally multiply by 2. SetJumpTarget(mult2); -// if ((g_dsp.r[DSP_REG_SR] & SR_MUL_MODIFY) == 0) - MOV(16, R(RDX), MDisp(R11, DSP_REG_SR * 2)); // TODO check 16bit - AND(16, R(RDX), Imm16(SR_MUL_MODIFY)); - TEST(16, R(RDX), R(RDX)); +// if ((g_dsp.r[DSP_REG_SR] & SR_MUL_MODIFY) == 0) + TEST(16, MDisp(R11, DSP_REG_SR * 2), Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; SHL(64, R(EAX), Imm8(1)); @@ -74,39 +70,49 @@ void DSPEmitter::get_multiply_prod() #endif } -// Returns s64 in EAX -// In: RSI = u16 a, RDI = u16 b +// Returns s64 in RAX +// In: RSI = s16 a, RDI = s16 b void DSPEmitter::multiply() { #ifdef _M_X64 - // prod = (s16)a * (s16)b; //signed - MOVSX(64, 16, RSI, R(RSI)); - MOVSX(64, 16, RDI, R(RDI)); +// prod = (s16)a * (s16)b; //signed MOV(64, R(EAX), R(RDI)); IMUL(64, R(ESI)); - // Conditionally multiply by 2. - // if ((g_dsp.r[DSP_REG_SR] & SR_MUL_MODIFY) == 0) +// Conditionally multiply by 2. +// if ((g_dsp.r[DSP_REG_SR] & SR_MUL_MODIFY) == 0) TEST(16, MDisp(R11, DSP_REG_SR * 2), Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); - // prod <<= 1; +// prod <<= 1; SHL(64, R(EAX), Imm8(1)); SetJumpTarget(noMult2); - // return prod; +// return prod; #endif } -//inline s64 dsp_multiply_add(u16 a, u16 b, u8 sign = 0) -//{ +// Returns s64 in RAX +// Clobbers RDX +void DSPEmitter::multiply_add() +{ // s64 prod = dsp_get_long_prod() + dsp_get_multiply_prod(a, b, sign); + multiply(); + MOV(64, R(RDX), R(RAX)); + get_long_prod(); + ADD(64, R(RAX), R(RDX)); // return prod; -//} +} -//inline s64 dsp_multiply_sub(u16 a, u16 b, u8 sign = 0) -//{ +// Returns s64 in RAX +// Clobbers RDX +void DSPEmitter::multiply_sub() +{ // s64 prod = dsp_get_long_prod() - dsp_get_multiply_prod(a, b, sign); + multiply(); + MOV(64, R(RDX), R(RAX)); + get_long_prod(); + SUB(64, R(RAX), R(RDX)); // return prod; -//} +} //inline s64 dsp_multiply_mulx(u8 axh0, u8 axh1, u16 val1, u16 val2) //{ @@ -270,7 +276,7 @@ void DSPEmitter::mulaxh(const UDSPInstruction opc) #ifdef _M_X64 // s64 prod = dsp_multiply(dsp_get_ax_h(0), dsp_get_ax_h(0)); MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RSI, MDisp(R11, DSP_REG_AXH0 * 2)); + MOVSX(64, 16, RSI, MDisp(R11, DSP_REG_AXH0 * 2)); MOV(64, R(RDI), R(RSI)); multiply(); // dsp_set_long_prod(prod); @@ -293,9 +299,9 @@ void DSPEmitter::mul(const UDSPInstruction opc) // u16 axl = dsp_get_ax_l(sreg); MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg) * 2)); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(sreg); - MOVZX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + sreg) * 2)); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + sreg) * 2)); // s64 prod = dsp_multiply(axh, axl); multiply(); // dsp_set_long_prod(prod); @@ -312,22 +318,35 @@ void DSPEmitter::mul(const UDSPInstruction opc) // accumulator $axS (treat them both as signed). // flags out: --xx xx0x -//void DSPEmitter::mulac(const UDSPInstruction opc) -//{ -// u8 rreg = (opc >> 8) & 0x1; -// u8 sreg = (opc >> 11) & 0x1; +void DSPEmitter::mulac(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = (opc >> 8) & 0x1; + u8 sreg = (opc >> 11) & 0x1; // s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod(); + get_long_acc(rreg); + MOV(64, R(RDX), R(RAX)); + get_long_prod(); + ADD(64, R(RAX), R(RDX)); + PUSH(64, R(RAX)); // u16 axl = dsp_get_ax_l(sreg); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(sreg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + sreg) * 2)); // s64 prod = dsp_multiply(axl, axh); -// -// zeroWriteBackLog(); - + multiply(); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + POP(64, R(RAX)); + set_long_acc(rreg); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + Update_SR_Register64(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::mulac, opc); +#endif +} // MULMV $axS.l, $axS.h, $acR // 1001 s11r xxxx xxxx @@ -492,9 +511,9 @@ void DSPEmitter::mulc(const UDSPInstruction opc) // u16 accm = dsp_get_acc_m(sreg); MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOVZX(64, 16, ESI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); + MOVSX(64, 16, ESI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(treg); - MOVZX(64, 16, EDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); + MOVSX(64, 16, EDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -511,23 +530,36 @@ void DSPEmitter::mulc(const UDSPInstruction opc) // register before multiplication to accumulator $acR. // flags out: --xx xx0x -//void DSPEmitter::mulcac(const UDSPInstruction opc) -//{ -// u8 rreg = (opc >> 8) & 0x1; -// u8 treg = (opc >> 11) & 0x1; -// u8 sreg = (opc >> 12) & 0x1; +void DSPEmitter::mulcac(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = (opc >> 8) & 0x1; + u8 treg = (opc >> 11) & 0x1; + u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod(); + get_long_acc(rreg); + MOV(64, R(RDX), R(RAX)); + get_long_prod(); + ADD(64, R(RAX), R(RDX)); + PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(treg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); // s64 prod = dsp_multiply(accm, axh); -// -// zeroWriteBackLog(); - + multiply(); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + POP(64, R(RAX)); + set_long_acc(rreg); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + Update_SR_Register64(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::mulcac, opc); +#endif +} // MULCMV $acS.m, $axT.h, $acR // 110s t11r xxxx xxxx @@ -537,23 +569,33 @@ void DSPEmitter::mulc(const UDSPInstruction opc) // possible mistake in duddie's doc axT.h rather than axS.h // flags out: --xx xx0x -//void DSPEmitter::mulcmv(const UDSPInstruction opc) -//{ -// u8 rreg = (opc >> 8) & 0x1; -// u8 treg = (opc >> 11) & 0x1; -// u8 sreg = (opc >> 12) & 0x1; +void DSPEmitter::mulcmv(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = (opc >> 8) & 0x1; + u8 treg = (opc >> 11) & 0x1; + u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod(); + get_long_prod(); + PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(treg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); // s64 prod = dsp_multiply(accm, axh); -// -// zeroWriteBackLog(); - + multiply(); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + POP(64, R(RAX)); + set_long_acc(rreg); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + Update_SR_Register64(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::mulcmv, opc); +#endif +} // MULCMVZ $acS.m, $axT.h, $acR // 110s t01r xxxx xxxx @@ -564,23 +606,34 @@ void DSPEmitter::mulc(const UDSPInstruction opc) // accumulator $acR.l to zero. // flags out: --xx xx0x -//void DSPEmitter::mulcmvz(const UDSPInstruction opc) -//{ -// u8 rreg = (opc >> 8) & 0x1; -// u8 treg = (opc >> 11) & 0x1; -// u8 sreg = (opc >> 12) & 0x1; +void DSPEmitter::mulcmvz(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 rreg = (opc >> 8) & 0x1; + u8 treg = (opc >> 11) & 0x1; + u8 sreg = (opc >> 12) & 0x1; + MOV(64, R(R11), ImmPtr(&g_dsp.r)); // s64 acc = dsp_get_long_prod_round_prodl(); + get_long_prod_round_prodl(); + PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(treg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); // s64 prod = dsp_multiply(accm, axh); -// -// zeroWriteBackLog(); - + multiply(); // dsp_set_long_prod(prod); + set_long_prod(); // dsp_set_long_acc(rreg, acc); + POP(64, R(RAX)); + set_long_acc(rreg); // Update_SR_Register64(dsp_get_long_acc(rreg)); -//} + Update_SR_Register64(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::mulcmvz, opc); +#endif +} //---- @@ -627,71 +680,95 @@ void DSPEmitter::mulc(const UDSPInstruction opc) // Multiply middle part of accumulator $acS.m by high part of secondary // accumulator $axT.h (treat them both as signed) and add result to product // register. -//void DSPEmitter::maddc(const UDSPInstruction opc) -//{ -// u8 treg = (opc >> 8) & 0x1; -// u8 sreg = (opc >> 9) & 0x1; +void DSPEmitter::maddc(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 treg = (opc >> 8) & 0x1; + u8 sreg = (opc >> 9) & 0x1; -// u16 accm = dsp_get_acc_m(sreg); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); +// u16 accm = dsp_get_acc_m(sreg); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(treg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); // s64 prod = dsp_multiply_add(accm, axh); - -// zeroWriteBackLog(); - + multiply_add(); // dsp_set_long_prod(prod); -//} + set_long_prod(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::maddc, opc); +#endif +} // MSUBC $acS.m, $axT.h // 1110 11st xxxx xxxx // Multiply middle part of accumulator $acS.m by high part of secondary // accumulator $axT.h (treat them both as signed) and subtract result from // product register. -//void DSPEmitter::msubc(const UDSPInstruction opc) -//{ -// u8 treg = (opc >> 8) & 0x1; -// u8 sreg = (opc >> 9) & 0x1; +void DSPEmitter::msubc(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 treg = (opc >> 8) & 0x1; + u8 sreg = (opc >> 9) & 0x1; // // u16 accm = dsp_get_acc_m(sreg); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_ACM0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(treg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + treg) * 2)); // s64 prod = dsp_multiply_sub(accm, axh); - -// zeroWriteBackLog(); - + multiply_sub(); // dsp_set_long_prod(prod); -//} + set_long_prod(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::msubc, opc); +#endif +} // MADD $axS.l, $axS.h // 1111 001s xxxx xxxx // Multiply low part $axS.l of secondary accumulator $axS by high part // $axS.h of secondary accumulator $axS (treat them both as signed) and add // result to product register. -//void DSPEmitter::madd(const UDSPInstruction opc) -//{ -// u8 sreg = (opc >> 8) & 0x1; -// -// u16 axl = dsp_get_ax_l(sreg); -// u16 axh = dsp_get_ax_h(sreg); -// s64 prod = dsp_multiply_add(axl, axh); -// -// zeroWriteBackLog(); +void DSPEmitter::madd(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 sreg = (opc >> 8) & 0x1; + MOV(64, R(R11), ImmPtr(&g_dsp.r)); +// u16 axl = dsp_get_ax_l(sreg); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg) * 2)); +// u16 axh = dsp_get_ax_h(sreg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + sreg) * 2)); +// s64 prod = dsp_multiply_add(axl, axh); + multiply_add(); // dsp_set_long_prod(prod); -//} + set_long_prod(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::madd, opc); +#endif +} // MSUB $axS.l, $axS.h // 1111 011s xxxx xxxx // Multiply low part $axS.l of secondary accumulator $axS by high part // $axS.h of secondary accumulator $axS (treat them both as signed) and // subtract result from product register. -//void DSPEmitter::msub(const UDSPInstruction opc) -//{ -// u8 sreg = (opc >> 8) & 0x1; +void DSPEmitter::msub(const UDSPInstruction opc) +{ +#ifdef _M_X64 + u8 sreg = (opc >> 8) & 0x1; // // u16 axl = dsp_get_ax_l(sreg); + MOV(64, R(R11), ImmPtr(&g_dsp.r)); + MOVSX(64, 16, RSI, MDisp(R11, (DSP_REG_AXL0 + sreg) * 2)); // u16 axh = dsp_get_ax_h(sreg); + MOVSX(64, 16, RDI, MDisp(R11, (DSP_REG_AXH0 + sreg) * 2)); // s64 prod = dsp_multiply_sub(axl, axh); -// -// zeroWriteBackLog(); - + multiply_sub(); // dsp_set_long_prod(prod); -//} + set_long_prod(); +#else + ABI_CallFunctionC((void *)&DSPInterpreter::msub, opc); +#endif +} diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp index fe24dc875d..621b4dd9bb 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitUtil.cpp @@ -403,7 +403,7 @@ void DSPEmitter::ext_dmem_read(u16 addr) } // Returns s64 in RAX -// Clobbers RSI +// Clobbers RSI, RDI void DSPEmitter::get_long_prod() { #ifdef _M_X64 @@ -518,7 +518,7 @@ void DSPEmitter::get_acc_m(int _reg) // return g_dsp.r[DSP_REG_ACM0 + _reg]; #ifdef _M_X64 MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(EAX), MDisp(R11, (DSP_REG_ACM0 + _reg) * 2)); + MOVSX(64, 16, RAX, MDisp(R11, (DSP_REG_ACM0 + _reg) * 2)); #endif } @@ -528,7 +528,7 @@ void DSPEmitter::get_ax_l(int _reg) // return (s16)g_dsp.r[DSP_REG_AXL0 + _reg]; #ifdef _M_X64 MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(EAX), MDisp(R11, (DSP_REG_AXL0 + _reg) * 2)); + MOVSX(64, 16, RAX, MDisp(R11, (DSP_REG_AXL0 + _reg) * 2)); #endif } @@ -538,7 +538,7 @@ void DSPEmitter::get_ax_h(int _reg) // return (s16)g_dsp.r[DSP_REG_AXH0 + _reg]; #ifdef _M_X64 MOV(64, R(R11), ImmPtr(&g_dsp.r)); - MOV(16, R(EAX), MDisp(R11, (DSP_REG_AXH0 + _reg) * 2)); + MOVSX(64, 16, RAX, MDisp(R11, (DSP_REG_AXH0 + _reg) * 2)); #endif }