From f6ce87765f798c36deb9e0c85e20599a16b0896d Mon Sep 17 00:00:00 2001 From: Marko Pusljar Date: Fri, 7 May 2010 19:32:16 +0000 Subject: [PATCH] DSPLLE - AR decrement fixed, +other small fixes git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5437 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/DSPCore/Src/DSPIntExtOps.cpp | 14 ++-- Source/Core/DSPCore/Src/DSPIntUtil.h | 83 ++++++++++---------- Source/Core/DSPCore/Src/DSPTables.cpp | 4 +- Source/Core/DSPCore/Src/DspIntArithmetic.cpp | 6 +- Source/Core/DSPCore/Src/DspIntMisc.cpp | 2 - Source/Core/DSPCore/Src/DspIntMultiplier.cpp | 53 +++++-------- 6 files changed, 72 insertions(+), 90 deletions(-) diff --git a/Source/Core/DSPCore/Src/DSPIntExtOps.cpp b/Source/Core/DSPCore/Src/DSPIntExtOps.cpp index 73527a1ba2..690dccf0a3 100644 --- a/Source/Core/DSPCore/Src/DSPIntExtOps.cpp +++ b/Source/Core/DSPCore/Src/DSPIntExtOps.cpp @@ -214,7 +214,7 @@ void lsm(const UDSPInstruction opc) dsp_dmem_write(g_dsp.r[DSP_REG_AR3], g_dsp.r[sreg]); writeToBackLog(0, dreg, dsp_dmem_read(g_dsp.r[DSP_REG_AR0])); - writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX0 + DSP_REG_AR3])); + writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX3])); writeToBackLog(2, DSP_REG_AR0, dsp_increment_addr_reg(DSP_REG_AR0)); } @@ -233,7 +233,7 @@ void lsnm(const UDSPInstruction opc) dsp_dmem_write(g_dsp.r[DSP_REG_AR3], g_dsp.r[sreg]); writeToBackLog(0, dreg, dsp_dmem_read(g_dsp.r[DSP_REG_AR0])); - writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX0 + DSP_REG_AR3])); + writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX3])); writeToBackLog(2, DSP_REG_AR0, dsp_increase_addr_reg(DSP_REG_AR0, (s16)g_dsp.r[DSP_REG_IX0])); } @@ -286,7 +286,7 @@ void slm(const UDSPInstruction opc) dsp_dmem_write(g_dsp.r[DSP_REG_AR0], g_dsp.r[sreg]); writeToBackLog(0, dreg, dsp_dmem_read(g_dsp.r[DSP_REG_AR3])); - writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX0 + DSP_REG_AR3])); + writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX3])); writeToBackLog(2, DSP_REG_AR0, dsp_increment_addr_reg(DSP_REG_AR0)); } @@ -304,7 +304,7 @@ void slnm(const UDSPInstruction opc) dsp_dmem_write(g_dsp.r[DSP_REG_AR0], g_dsp.r[sreg]); writeToBackLog(0, dreg, dsp_dmem_read(g_dsp.r[DSP_REG_AR3])); - writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX0 + DSP_REG_AR3])); + writeToBackLog(1, DSP_REG_AR3, dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX3])); writeToBackLog(2, DSP_REG_AR0, dsp_increase_addr_reg(DSP_REG_AR0, (s16)g_dsp.r[DSP_REG_IX0])); } @@ -406,7 +406,7 @@ void ldm(const UDSPInstruction opc) } writeToBackLog(3, DSP_REG_AR3, - dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX0 + DSP_REG_AR3])); + dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX3])); } // LDNM $ax0.d, $ax1.r, @$arS @@ -438,7 +438,7 @@ void ldnm(const UDSPInstruction opc) } writeToBackLog(3, DSP_REG_AR3, - dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX0 + DSP_REG_AR3])); + dsp_increase_addr_reg(DSP_REG_AR3, (s16)g_dsp.r[DSP_REG_IX3])); } @@ -483,7 +483,7 @@ void zeroWriteBackLog() } } -//needed for 0x3... (at least)..., + clrl +//needed for 0x3... //ex. corner case -> 0x3060: main opcode modifies .m, and extended .l -> .l shoudnt be zeroed because of .m write... void zeroWriteBackLogPreserveAcc(u8 acc) { diff --git a/Source/Core/DSPCore/Src/DSPIntUtil.h b/Source/Core/DSPCore/Src/DSPIntUtil.h index c126d76153..c48b049403 100644 --- a/Source/Core/DSPCore/Src/DSPIntUtil.h +++ b/Source/Core/DSPCore/Src/DSPIntUtil.h @@ -54,6 +54,8 @@ inline bool dsp_SR_is_flag_set(int flag) // // HORRIBLE UGLINESS, someone please fix. // See http://code.google.com/p/dolphin-emu/source/detail?r=3125 +// +// increment, decrement 100% ok (as far as i can tell), increase, decrease still probs inline u16 ToMask(u16 a) { @@ -63,72 +65,71 @@ inline u16 ToMask(u16 a) return a | (a >> 1); } -inline s16 dsp_increment_addr_reg(int reg, s16 value) +inline u16 dsp_increment_addr_reg(u16 reg, u16 ar) { - u16 tmb = ToMask(g_dsp.r[DSP_REG_WR0 + reg]); + u16 wr = g_dsp.r[DSP_REG_WR0 + reg]; + u16 tmb = ToMask(wr); - if ((value & tmb) == tmb) - value ^= g_dsp.r[DSP_REG_WR0 + reg]; + if ((ar & tmb) == tmb) + ar-=wr; else - value++; + ar++; - return value; + return ar; } -inline s16 dsp_increment_addr_reg(int reg) +inline u16 dsp_increment_addr_reg(u16 reg) { return dsp_increment_addr_reg(reg, g_dsp.r[reg]); } -// See http://code.google.com/p/dolphin-emu/source/detail?r=3125 -inline s16 dsp_decrement_addr_reg(int reg, s16 value) +inline u16 dsp_decrement_addr_reg(u16 reg, u16 ar) { - // This one is easy. Looks like a hw implementation. Increment is worse... - if ((value & g_dsp.r[DSP_REG_WR0 + reg]) == 0) - value |= g_dsp.r[DSP_REG_WR0 + reg]; - else - value--; + u16 wr = g_dsp.r[DSP_REG_WR0 + reg]; + u16 tmb = ToMask(wr); + u16 min = (tmb+1-ar)&tmb; - return value; + if ((wr < min) || !min) + ar+=wr; + else + ar--; + + return ar; } -inline s16 dsp_decrement_addr_reg(int reg) +inline u16 dsp_decrement_addr_reg(u16 reg) { return dsp_decrement_addr_reg(reg, g_dsp.r[reg]); } -inline s16 dsp_increase_addr_reg(int reg, s16 value) +inline u16 dsp_increase_addr_reg(u16 reg, s16 ix) { - s16 tmp = g_dsp.r[reg]; + u16 ar = g_dsp.r[reg]; - // TODO: DO RIGHT! - if (value > 0) { - for (int i = 0; i < value; i++) { - tmp = dsp_increment_addr_reg(reg, tmp); + if (ix > 0) { + for (s32 i = 0; i < ix; i++) { + ar = dsp_increment_addr_reg(reg, ar); } - } else if (value < 0) { - for (int i = 0; i < (int)(-value); i++) { - tmp = dsp_decrement_addr_reg(reg, tmp); + } else if (ix < 0) { + for (s32 i = 0; i < (-ix); i++) { + ar = dsp_decrement_addr_reg(reg, ar); } - } - - return tmp; + } + return ar; } -inline s16 dsp_decrease_addr_reg(int reg, s16 value) +inline u16 dsp_decrease_addr_reg(u16 reg, s16 ix) { - s16 tmp = g_dsp.r[reg]; + u16 ar = g_dsp.r[reg]; - // TODO: DO RIGHT! - if (value > 0) { - for (int i = 0; i < value; i++) { - tmp = dsp_decrement_addr_reg(reg, tmp); + if (ix > 0) { + for (s32 i = 0; i < ix; i++) { + ar = dsp_decrement_addr_reg(reg, ar); } - } else if (value < 0) { - for (int i = 0; i < (int)(-value); i++) { - tmp = dsp_increment_addr_reg(reg, tmp); + } else if (ix < 0) { + for (s32 i = 0; i < (-ix); i++) { + ar = dsp_increment_addr_reg(reg, ar); } } - - return tmp; + return ar; } // --------------------------------------------------------------------------------------- @@ -203,7 +204,7 @@ inline s64 dsp_get_long_prod() ProfilerAddDelta(g_dsp.err_pc, 1); #endif - s64 val = (s8)g_dsp.r[DSP_REG_PRODH]; + s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; val <<= 32; s64 low_prod = g_dsp.r[DSP_REG_PRODM]; low_prod += g_dsp.r[DSP_REG_PRODM2]; @@ -230,7 +231,7 @@ inline void dsp_set_long_prod(s64 val) val >>= 16; g_dsp.r[DSP_REG_PRODM] = (u16)val; val >>= 16; - g_dsp.r[DSP_REG_PRODH] = (u16)val; + g_dsp.r[DSP_REG_PRODH] = (u8)val; g_dsp.r[DSP_REG_PRODM2] = 0; } diff --git a/Source/Core/DSPCore/Src/DSPTables.cpp b/Source/Core/DSPCore/Src/DSPTables.cpp index 87c94ff607..97b8aabd5d 100644 --- a/Source/Core/DSPCore/Src/DSPTables.cpp +++ b/Source/Core/DSPCore/Src/DSPTables.cpp @@ -295,8 +295,8 @@ const DSPOPCTemplate opcodes[] = {"MADD", 0xf200, 0xfe00, DSPInterpreter::madd, NULL, 1 , 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false}, {"LSR16", 0xf400, 0xfe00, DSPInterpreter::lsr16, NULL, 1 , 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false}, {"MSUB", 0xf600, 0xfe00, DSPInterpreter::msub , NULL, 1 , 2, {{P_REG18, 1, 0, 8, 0x0100}, {P_REG1A, 1, 0, 8, 0x0100}}, true, false}, - {"ADDPAXZ", 0xf800, 0xfc00, DSPInterpreter::addpaxz, NULL, 1 , 2, {{P_ACC, 1, 0, 9, 0x0200}, {P_AX, 1, 0, 8, 0x0100}}, true, false}, //Think the args are wrong - {"CLRL", 0xfc00, 0xfe00, DSPInterpreter::clrl, NULL, 1 , 1, {{P_ACCL, 1, 0, 11, 0x0800}}, true, false}, // clear acl0 + {"ADDPAXZ", 0xf800, 0xfc00, DSPInterpreter::addpaxz, NULL, 1 , 2, {{P_ACC, 1, 0, 9, 0x0200}, {P_AX, 1, 0, 8, 0x0100}}, true, false}, + {"CLRL", 0xfc00, 0xfe00, DSPInterpreter::clrl, NULL, 1 , 1, {{P_ACCL, 1, 0, 11, 0x0800}}, true, false}, {"MOVPZ", 0xfe00, 0xfe00, DSPInterpreter::movpz, NULL, 1 , 1, {{P_ACC, 1, 0, 8, 0x0100}}, true, false}, }; diff --git a/Source/Core/DSPCore/Src/DspIntArithmetic.cpp b/Source/Core/DSPCore/Src/DspIntArithmetic.cpp index 6396b5b96e..480317e8c5 100644 --- a/Source/Core/DSPCore/Src/DspIntArithmetic.cpp +++ b/Source/Core/DSPCore/Src/DspIntArithmetic.cpp @@ -41,14 +41,14 @@ void clr(const UDSPInstruction opc) // CLRL $acR.l // 1111 110r xxxx xxxx -// Clears $acR.l - low 16 bits of accumulator $acR. +// Clears (and rounds!) $acR.l - low 16 bits of accumulator $acR. // // flags out: --xx xx00 void clrl(const UDSPInstruction opc) { u8 reg = (opc >> 8) & 0x1; - s64 acc = (dsp_get_long_acc(reg) + 0x8000) & ~0xffff; + s64 acc = (dsp_get_long_acc(reg) + 0x7fff) & ~0xffff; zeroWriteBackLog(); @@ -853,7 +853,7 @@ void lsl(const UDSPInstruction opc) // LSR $acR, #I // 0001 010r 01ii iiii -// Logically shifts left accumulator $acR by number specified by value +// Logically shifts right accumulator $acR by number specified by value // calculated by negating sign extended bits 0-6. // // flags out: --xx xx00 diff --git a/Source/Core/DSPCore/Src/DspIntMisc.cpp b/Source/Core/DSPCore/Src/DspIntMisc.cpp index 81c0e9ef0f..b87667b259 100644 --- a/Source/Core/DSPCore/Src/DspIntMisc.cpp +++ b/Source/Core/DSPCore/Src/DspIntMisc.cpp @@ -43,7 +43,6 @@ void mrr(const UDSPInstruction opc) // 0000 0000 100d dddd // iiii iiii iiii iiii // Load immediate value I to register $D. -// FIXME: Perform additional operation depending on destination register. // // DSPSpy discovery: This, and possibly other instructions that load a // register, has a different behaviour in S40 mode if loaded to AC0.M: The @@ -60,7 +59,6 @@ void lri(const UDSPInstruction opc) // LRIS $(0x18+D), #I // 0000 1ddd iiii iiii // Load immediate value I (8-bit sign extended) to accumulator register. -// FIXME: Perform additional operation depending on destination register. void lris(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + DSP_REG_AXL0; diff --git a/Source/Core/DSPCore/Src/DspIntMultiplier.cpp b/Source/Core/DSPCore/Src/DspIntMultiplier.cpp index ead12a2313..f4b9ad5115 100644 --- a/Source/Core/DSPCore/Src/DspIntMultiplier.cpp +++ b/Source/Core/DSPCore/Src/DspIntMultiplier.cpp @@ -68,32 +68,14 @@ inline s64 dsp_multiply_mulx(u8 axh0, u8 axh1, u16 val1, u16 val2) { s64 result; - if ((axh0==0) && (axh1==0)) // axl.0 * axl.1 - { + if ((axh0==0) && (axh1==0)) result = dsp_multiply(val1, val2, 1); // unsigned support ON if both ax?.l regs are used - } - else if ((axh0==0) && (axh1==1)) // axl.0 * axh.1 - { - if ((val1 >= 0x8000) && (val2 >= 0x8000)) - result = dsp_multiply(val1, val2, 2); - else if ((val1 >= 0x8000) && (val2 < 0x8000)) - result = dsp_multiply(val1, val2, 1); - else - result = dsp_multiply(val1, val2, 0); - } - else if ((axh0==1) && (axh1==0)) // axh.0 * axl.1 - { - if ((val2 >= 0x8000) && (val1 >= 0x8000)) - result = dsp_multiply(val2, val1, 2); - else if ((val2 >= 0x8000) && (val1 < 0x8000)) - result = dsp_multiply(val2, val1, 1); - else - result = dsp_multiply(val2, val1, 0); - } - else // axh.0 * axh.1 - { + else if ((axh0==0) && (axh1==1)) + result = dsp_multiply(val1, val2, 2); // mixed support ON (u64)axl.0 * (s64)(s16)axh.1 + else if ((axh0==1) && (axh1==0)) + result = dsp_multiply(val2, val1, 2); // mixed support ON (u64)axl.1 * (s64)(s16)axh.0 + else result = dsp_multiply(val1, val2, 0); // unsigned support OFF if both ax?.h regs are used - } return result; } @@ -103,19 +85,20 @@ inline s64 dsp_multiply_mulx(u8 axh0, u8 axh1, u16 val1, u16 val2) // CLRP // 1000 0100 xxxx xxxx // Clears product register $prod. +// Magic numbers taken from duddie's doc +// +// 00ff_(fff0 + 0010)_0000 = 0100_0000_0000, conveniently, lower 40bits = 0 +// +// It's not ok, to just zero all of them, correct values should be set because of +// direct use of prod regs by AX/AXWII (look @that part of ucode). void clrp(const UDSPInstruction opc) { - // Magic numbers taken from duddie's doc - // These are probably a bad idea to put here. zeroWriteBackLog(); -/* + g_dsp.r[DSP_REG_PRODL] = 0x0000; g_dsp.r[DSP_REG_PRODM] = 0xfff0; g_dsp.r[DSP_REG_PRODH] = 0x00ff; g_dsp.r[DSP_REG_PRODM2] = 0x0010; -*/ - // 00ff_(fff0 + 0010)_0000 = 0100_0000_0000, conveniently, lower 40bits = 0 - dsp_set_long_prod(0); // if we are doing it wrong then let's be consistent } // TSTPROD @@ -170,7 +153,7 @@ void movnp(const UDSPInstruction opc) // MOVPZ $acD // 1111 111d xxxx xxxx // Moves multiply product from $prod register to accumulator $acD -// register and sets $acD.l to 0 +// register and sets (rounds) $acD.l to 0 // // flags out: --xx xx0x void movpz(const UDSPInstruction opc) @@ -188,7 +171,7 @@ void movpz(const UDSPInstruction opc) // ADDPAXZ $acD, $axS // 1111 10sd xxxx xxxx // Adds secondary accumulator $axS to product register and stores result -// in accumulator register. Low 16-bits of $acD ($acD.l) are set to 0. +// in accumulator register. Low 16-bits of $acD ($acD.l) are set (round) to 0. // // flags out: --xx xx0x void addpaxz(const UDSPInstruction opc) @@ -291,7 +274,7 @@ void mulmv(const UDSPInstruction opc) // MULMVZ $axS.l, $axS.h, $acR // 1001 s01r xxxx xxxx -// Move product register to accumulator register $acR and clear low part +// Move product register to accumulator register $acR and clear (round) low part // of accumulator register $acR.l. Multiply low part $axS.l of secondary // accumulator $axS by high part $axS.h of secondary accumulator $axS (treat // them both as signed). @@ -386,7 +369,7 @@ void mulxmv(const UDSPInstruction opc) // MULXMV $ax0.S, $ax1.T, $acR // 101s t01r xxxx xxxx -// Move product register to accumulator register $acR and clear low part +// Move product register to accumulator register $acR and clear (round) low part // of accumulator register $acR.l. Multiply one part $ax0 by one part $ax1 // Part is selected by S and T bits. Zero selects low part, // one selects high part. @@ -486,7 +469,7 @@ void mulcmv(const UDSPInstruction opc) // (fixed possible bug in duddie's description, s->t) // Multiply mid part of accumulator register $acS.m by high part $axT.h of // secondary accumulator $axT (treat them both as signed). Move product -// register before multiplication to accumulator $acR, set low part of +// register before multiplication to accumulator $acR, set (round) low part of // accumulator $acR.l to zero. // // flags out: --xx xx0x