From e3b1356f3d72c4c264f4bfeba59eb0071f136567 Mon Sep 17 00:00:00 2001
From: hrydgard <hrydgard@gmail.com>
Date: Sun, 5 Apr 2009 21:04:46 +0000
Subject: [PATCH] DSP: Corrected CMPI implementation. (i think)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2888 8ced0084-cf51-0410-be5f-012b33b47a6e
---
 .../Src/DSPInterpreter.cpp                    | 49 ++++++++-----------
 .../Plugin_DSP_LLE-testing/Src/DSPTables.cpp  | 19 +++++--
 .../Src/gdsp_opcodes_helper.h                 | 37 ++------------
 3 files changed, 39 insertions(+), 66 deletions(-)

diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp
index 3de062b5f8..c14b6eb6db 100644
--- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp
+++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPInterpreter.cpp
@@ -602,36 +602,16 @@ void andf(const UDSPInstruction& opc)
 	}
 }
 
-// FIXME inside
 void cmpi(const UDSPInstruction& opc)
 {
-	if (opc.hex & 0xf)
-	{
-		// FIXME: Implement
-		ERROR_LOG(DSPHLE, "dsp subf opcode");
-	}
-
-#if 1
-	// Old implementation
-	u8 reg  = 0x1e + ((opc.hex >> 8) & 0x1);
-	s64 imm = (s16)dsp_fetch_code();
-
-	s64 val = (s16)g_dsp.r[reg];
-	s64 res = val - imm;
-
-	Update_SR_Register64(res);
-#else
-	// Implementation according to docs
 	int reg  = (opc.hex >> 8) & 0x1;
 
 	// Immediate is considered to be at M level in the 40-bit accumulator.
-	s64 imm = (s64)dsp_fetch_code() << 16;
+	s64 imm = (s64)(s16)dsp_fetch_code() << 16;
 	s64 val = dsp_get_long_acc(reg);
 	s64 res = val - imm;
 
 	Update_SR_Register64(res);
-#endif
-
 }
 
 // FIXME inside
@@ -1061,27 +1041,38 @@ void srbith(const UDSPInstruction& opc)
 {
 	switch ((opc.hex >> 8) & 0xf)
 	{
+	// M0 seems to be the default. M2 is used in functions in Zelda
+	// and then reset with M0 at the end. Like the other bits here, it's
+	// done around loops with lots of multiplications.
+
 	case 0xa: // M2
-		ERROR_LOG(DSPHLE, "dsp_opc.hex_m2");
+		ERROR_LOG(DSPHLE, "M2");
 		break;
 		// FIXME: Both of these appear in the beginning of the Wind Waker
 	case 0xb: // M0
-		ERROR_LOG(DSPHLE, "dsp_opc.hex_m0");
-		break;
-	case 0xc: // CLR15
-		ERROR_LOG(DSPHLE, "dsp_opc.hex_clr15");
-		break;
-	case 0xd: // SET15
-		ERROR_LOG(DSPHLE, "dsp_opc.hex_set15");
+		ERROR_LOG(DSPHLE, "M0");
 		break;
 
+	// 15-bit precision? clamping? no idea :(
+	// CLR15 seems to be the default.
+	case 0xc: // CLR15
+		ERROR_LOG(DSPHLE, "CLR15");
+		break;
+	case 0xd: // SET15
+		ERROR_LOG(DSPHLE, "SET15");
+		break;
+
+	// 40-bit precision? clamping? no idea :(
+	// 40 seems to be the default.
 	case 0xe: // SET40  (really, clear SR's 0x4000?) something about "set 40-bit operation"?
 		g_dsp.r[R_SR] &= ~(1 << 14);
+		ERROR_LOG(DSPHLE, "SET40");
 		break;
 
 	case 0xf: // SET16  (really, set SR's 0x4000?) something about "set 16-bit operation"?
 		// that doesnt happen on a real console  << what does this comment mean?
 		g_dsp.r[R_SR] |= (1 << 14);
+		ERROR_LOG(DSPHLE, "SET16");
 		break;
 
 	default:
diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPTables.cpp b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPTables.cpp
index f15683fa8f..2b7c1c462a 100644
--- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPTables.cpp
+++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/DSPTables.cpp
@@ -57,8 +57,9 @@ jnz, ifs, retlnz
 
 void nop(const UDSPInstruction& opc) {/*DSPInterpreter::unknown(opc);*/}
  
-
-// "Unrecognized opcode 0x01a2, pc 0x0165"  seems wrong.
+// Unknown Ops
+// All AX games: a100
+// Zelda Four Swords: 02ca
 
 
 // TODO: Fill up the tables with the corresponding instructions
@@ -66,6 +67,7 @@ DSPOPCTemplate opcodes[] =
 {
 	{"NOP",		0x0000, 0xffff, nop, nop, 1, 0, {}, NULL, NULL},
 	{"HALT",	0x0021, 0xffff, DSPInterpreter::halt, nop, 1, 0, {}, NULL, NULL},
+
 	{"RETNS",	0x02d0, 0xffff, DSPInterpreter::ret, nop, 1, 0, {}, NULL, NULL},
 	{"RETS",	0x02d1, 0xffff, DSPInterpreter::ret, nop, 1, 0, {}, NULL, NULL},
 	{"RETG",	0x02d2, 0xffff, DSPInterpreter::ret, nop, 1, 0, {}, NULL, NULL},
@@ -205,10 +207,16 @@ DSPOPCTemplate opcodes[] =
 	{"NX",      0x8000, 0xf700, DSPInterpreter::nx,     nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 	{"M2",      0x8a00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 	{"M0",      0x8b00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
+
+	// These guys probably change the precision or range of some operations.
+	// The question is which. 16-bit mode vs 40-bit mode sounds plausible for SET40/SET16.
+	// Maybe Set15 makes the dsp drop the top bit from all calculations or something? Or clamp?
+	// SET15/CLR15 is commonly used around MULXAC in Zeldas.
+	// SET16 is done around complicated loops with many madds etc.
 	{"CLR15",   0x8c00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 	{"SET15",   0x8d00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
-	{"SET40",		0x8e00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
-	{"SET16",		0x8f00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
+	{"SET40",	0x8e00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
+	{"SET16",	0x8f00, 0xffff, DSPInterpreter::srbith, nop, 1 | P_EXT, 0, {}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 
 	{"DECM",    0x7800, 0xfeff, DSPInterpreter::decm,   nop, 1 | P_EXT, 1, {{P_ACCM, 1, 0, 8, 0x0100}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 	{"INCM",    0x7400, 0xfeff, DSPInterpreter::incm,   nop, 1 | P_EXT, 1, {{P_ACCM, 1, 0, 8, 0x0100}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
@@ -218,6 +226,9 @@ DSPOPCTemplate opcodes[] =
 	{"NEG",		0x7c00, 0xfeff, DSPInterpreter::neg,    nop, 1 | P_EXT, 1, {{P_ACCM, 1, 0, 8, 0x0100}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 	{"MOVNP",   0x7e00, 0xfeff, DSPInterpreter::movnp,  nop, 1 | P_EXT, 1, {{P_ACCM, 1, 0, 8, 0x0100}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 
+	// ??? is seen in burnout2 : opcode 0xa100
+	// {"???",     0xa100, 0xf7ff, DSPInterpreter::unknown,nop, 1 | P_EXT, 1, {{P_ACCM, 1, 0, 11, 0x0800}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
+
 	{"TST",		0xb100, 0xf7ff, DSPInterpreter::tsta,   nop, 1 | P_EXT, 1, {{P_ACCM, 1, 0, 11, 0x0800}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 	{"TSTAXH",  0x8600, 0xfeff, DSPInterpreter::tstaxh, nop, 1 | P_EXT, 1, {{P_REG1A, 1, 0, 8, 0x0100}}, dsp_op_ext_ops_pro, dsp_op_ext_ops_epi},
 
diff --git a/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_opcodes_helper.h b/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_opcodes_helper.h
index ab44150f10..2b69a7c63f 100644
--- a/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_opcodes_helper.h
+++ b/Source/Plugins/Plugin_DSP_LLE-testing/Src/gdsp_opcodes_helper.h
@@ -120,7 +120,7 @@ inline s64 dsp_get_long_prod()
 	low_prod <<= 16;
 	low_prod |= g_dsp.r[0x14];
 	val += low_prod;
-	return(val);
+	return val;
 }
 
 
@@ -138,11 +138,8 @@ inline void dsp_set_long_prod(s64 val)
 	g_dsp.r[0x17] = 0;
 }
 
-
 // ---------------------------------------------------------------------------------------
-//
 // --- acc
-//
 // ---------------------------------------------------------------------------------------
 
 inline s64 dsp_get_long_acc(int reg)
@@ -152,37 +149,11 @@ inline s64 dsp_get_long_acc(int reg)
 #endif
 
 	_assert_(reg < 2);
-	s64 val;
-	s64 low_acc;
-	val       = (s8)g_dsp.r[0x10 + reg];
-	val     <<= 32;
-	low_acc   = g_dsp.r[0x1e + reg];
-	low_acc <<= 16;
-	low_acc  |= g_dsp.r[0x1c + reg];
-	val |= low_acc;
-	return val;
+	s64 high = (s64)(s8)g_dsp.r[0x10 + reg] << 32;
+	u32 mid_low = ((u32)g_dsp.r[0x1e + reg] << 16) | g_dsp.r[0x1c + reg];
+	return high | mid_low;
 }
 
-
-inline u64 dsp_get_ulong_acc(int reg)
-{
-#if PROFILE
-	ProfilerAddDelta(g_dsp.err_pc, 1);
-#endif
-
-	_assert_(reg < 2);
-	u64 val;
-	u64 low_acc;
-	val       = (u8)g_dsp.r[0x10 + reg];
-	val     <<= 32;
-	low_acc   = g_dsp.r[0x1e + reg];
-	low_acc <<= 16;
-	low_acc  |= g_dsp.r[0x1c + reg];
-	val |= low_acc;
-	return val;
-}
-
-
 inline void dsp_set_long_acc(int _reg, s64 val)
 {
 #if PROFILE