Merge pull request #10702 from Pokechu22/dsp-cmpaxh

DSP LLE: Rename CMPAR to CMPAXH
2022-05-28 02:32:04 -04:00 · 2022-05-28 02:32:04 -04:00 · 2d6fe6a89f
parent 872821249b ce4aba7d5e
commit 2d6fe6a89f
9 changed files with 349 additions and 16 deletions
--- a/Source/Core/Core/DSP/DSPTables.cpp
+++ b/Source/Core/Core/DSP/DSPTables.cpp
@ -284,7 +284,7 @@ const std::array<DSPOPCTemplate, 230> s_opcodes =

  //c-d
  {"MULC",     0xc000, 0xe700,    1, 2, {{P_ACCM, 1, 0, 12, 0x1000},   {P_REG1A, 1, 0, 11, 0x0800}},                            true, false, false, false, true}, // $prod = $acS.m * $axS.h
-  {"CMPAR",    0xc100, 0xe700,    1, 2, {{P_ACC,  1, 0, 11, 0x0800},   {P_REG1A, 1, 0, 12, 0x1000}},                            true, false, false, false, true}, // FLAGS($acS - axR.h)
+  {"CMPAXH",   0xc100, 0xe700,    1, 2, {{P_ACC,  1, 0, 11, 0x0800},   {P_REG1A, 1, 0, 12, 0x1000}},                            true, false, false, false, true}, // FLAGS($acS - axR.h)
  {"MULCMVZ",  0xc200, 0xe600,    1, 3, {{P_ACCM, 1, 0, 12, 0x1000},   {P_REG1A, 1, 0, 11, 0x0800},  {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR.hm, $acR.l, $prod = $prod.hm, 0, $acS.m * $axS.h
  {"MULCAC",   0xc400, 0xe600,    1, 3, {{P_ACCM, 1, 0, 12, 0x1000},   {P_REG1A, 1, 0, 11, 0x0800},  {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR, $prod = $acR + $prod, $acS.m * $axS.h
  {"MULCMV",   0xc600, 0xe600,    1, 3, {{P_ACCM, 1, 0, 12, 0x1000},   {P_REG1A, 1, 0, 11, 0x0800},  {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR, $prod = $prod, $acS.m * $axS.h
--- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
@ -124,12 +124,12 @@ void Interpreter::cmp(const UDSPInstruction)
  ZeroWriteBackLog();
 }

-// CMPAR $acS axR.h
+// CMPAXH $acS, $axR.h
 // 110r s001 xxxx xxxx
-// Compares accumulator $acS with accumulator $axR.h.
+// Compares accumulator $acS with high part of secondary accumulator $axR.h.
 //
 // flags out: x-xx xxxx
-void Interpreter::cmpar(const UDSPInstruction opc)
+void Interpreter::cmpaxh(const UDSPInstruction opc)
 {
  const u8 rreg = (opc >> 12) & 0x1;
  const u8 sreg = (opc >> 11) & 0x1;
--- a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp
@ -178,7 +178,7 @@ constexpr std::array<InterpreterOpInfo, 125> s_opcodes

  // C-D
  {0xc000, 0xe700, &Interpreter::mulc},
-  {0xc100, 0xe700, &Interpreter::cmpar},
+  {0xc100, 0xe700, &Interpreter::cmpaxh},
  {0xc200, 0xe600, &Interpreter::mulcmvz},
  {0xc400, 0xe600, &Interpreter::mulcac},
  {0xc600, 0xe600, &Interpreter::mulcmv},
--- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
+++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
@ -70,7 +70,7 @@ public:
  void clrl(UDSPInstruction opc);
  void clrp(UDSPInstruction opc);
  void cmp(UDSPInstruction opc);
-  void cmpar(UDSPInstruction opc);
+  void cmpaxh(UDSPInstruction opc);
  void cmpi(UDSPInstruction opc);
  void cmpis(UDSPInstruction opc);
  void dar(UDSPInstruction opc);
--- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h
+++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h
@ -115,7 +115,7 @@ public:
  void tst(UDSPInstruction opc);
  void tstaxh(UDSPInstruction opc);
  void cmp(UDSPInstruction opc);
-  void cmpar(UDSPInstruction opc);
+  void cmpaxh(UDSPInstruction opc);
  void cmpi(UDSPInstruction opc);
  void cmpis(UDSPInstruction opc);
  void xorr(UDSPInstruction opc);
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
@ -188,12 +188,12 @@ void DSPEmitter::cmp(const UDSPInstruction opc)
  }
 }

-// CMPAR $acS axR.h
+// CMPAXH $acS, $axR.h
 // 110r s001 xxxx xxxx
-// Compares accumulator $acS with accumulator $axR.h.
+// Compares accumulator $acS with high part of secondary accumulator $axR.h.
 //
 // flags out: x-xx xxxx
-void DSPEmitter::cmpar(const UDSPInstruction opc)
+void DSPEmitter::cmpaxh(const UDSPInstruction opc)
 {
  if (FlagsNeeded())
  {
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp
@ -178,7 +178,7 @@ const std::array<JITOpInfo, 125> s_opcodes =

  // C-D
  {0xc000, 0xe700, &DSPEmitter::mulc},
-  {0xc100, 0xe700, &DSPEmitter::cmpar},
+  {0xc100, 0xe700, &DSPEmitter::cmpaxh},
  {0xc200, 0xe600, &DSPEmitter::mulcmvz},
  {0xc400, 0xe600, &DSPEmitter::mulcac},
  {0xc600, 0xe600, &DSPEmitter::mulcmv},
--- a/Source/DSPSpy/tests/cmpaxh_test.ds
+++ b/Source/DSPSpy/tests/cmpaxh_test.ds
@ -0,0 +1,332 @@
+incdir "tests"
+include "dsp_base.inc"
+
+input_ax:
+; [0] - 0x0000'0000 - 0
+CW 0
+CW 0
+; [1] - 0x0000'0001 - 1 in $ax0.l
+CW 0
+CW 1
+; [2] - 0x0000'ffff - -1 in $ax0.l
+CW 0
+CW 0xffff
+; [3] - 0x0001'0000 - 1 in $ax0.h
+CW 1
+CW 0
+; [4] - 0x7fff'0000 - INT_MAX in $ax0.h
+CW 0x7fff
+CW 0
+; [5] - 0x8000'0000 - INT_MIN in $ax0.h
+CW 0x8000
+CW 0
+; [6] - 0xffff'0000 - -1 in $ax0.h
+CW 0xffff
+CW 0
+input_ax_end:
+
+input_acc:
+; [0] - 0x00'0000'0000 - 0
+CW 0
+CW 0
+CW 0
+; [1] - 0x00'0000'0001 - 1 in $ac0.l
+CW 0
+CW 0
+CW 1
+; [2] - 0x00'0000'ffff - -1 in $ac0.l
+CW 0
+CW 0
+CW 1
+; [3] - 0x00'0001'0000 - 1 in $ac0.m
+CW 0
+CW 1
+CW 0
+; [4] - 0x00'7fff'0000 - INT_MAX in $ac0.m
+CW 0
+CW 0x7fff
+CW 0
+; [5] - 0x00'8000'0000 - INT_MIN in $ac0.m, but not sign extended
+CW 0
+CW 0x8000
+CW 0
+; [6] - 0x00'ffff'0000 - -1 in $ac0.m, but not sign extended
+CW 0
+CW 0xffff
+CW 0
+; [7] - 0x01'0000'0000 - 1 in $ac0.l
+CW 1
+CW 0
+CW 0
+; [8] - 0x7f'ffff'0000 - true INT_MAX
+CW 0x7f
+CW 0xffff
+CW 0
+; [9] - 0x80'0000'0000 - true INT_MIN
+CW 0x80
+CW 0
+CW 0
+; [10] - 0xff'8000'0000 - INT_MIN in $ac0.m, sign-extended
+CW 0xff
+CW 0x8000
+CW 0
+; [11] - 0xff'ffff'0000 - -1
+CW 0xff
+CW 0xffff
+CW 0
+input_acc_end:
+
+/* Python script to generate the following result tables from a DSP dump:
+import struct
+def gen_tables(name, num_ax, num_acc):
+	with open(name, "rb") as fin:
+		data = fin.read()
+	reg_values = list(struct.iter_unpack(">" + "H"*0x20, data))
+	# Initial register values (there is no corresponding send_back call for these), then our two
+	# default value checks, then the TSTAXH test, then the CMPAXH test, then the test results
+	assert len(reg_values) == 1 + 2 + num_ax + num_ax * num_acc + 1
+	print("result_table_tstaxh:")
+	for ax in range(num_ax):
+		# SR is register 0x13
+		print("CW {:#04x}".format(reg_values[3 + ax][0x13]))
+	print("result_table_tstaxh_end:")
+	print()
+	print("result_table_cmpaxh:")
+	for ax in range(num_ax):
+		print("; ax [{}]".format(ax))
+		for acc in range(num_acc):
+			print("CW {:#04x}".format(reg_values[3 + num_ax + ax * num_acc + acc][0x13]))
+	print("result_table_cmpaxh_end:")
+
+gen_tables("dsp_dump0.bin", 7, 12)
+*/
+
+result_table_tstaxh:
+CW 0x22a4
+CW 0x22a4
+CW 0x22a4
+CW 0x22a0
+CW 0x2280
+CW 0x2288
+CW 0x22a8
+result_table_tstaxh_end:
+
+result_table_cmpaxh:
+; ax [0]
+CW 0x22a5
+CW 0x22a1
+CW 0x22a1
+CW 0x22a1
+CW 0x2281
+CW 0x2291
+CW 0x22b1
+CW 0x22b1
+CW 0x22b1
+CW 0x22b9
+CW 0x2289
+CW 0x22a9
+; ax [1]
+CW 0x22a5
+CW 0x22a1
+CW 0x22a1
+CW 0x22a1
+CW 0x2281
+CW 0x2291
+CW 0x22b1
+CW 0x22b1
+CW 0x22b1
+CW 0x22b9
+CW 0x2289
+CW 0x22a9
+; ax [2]
+CW 0x22a5
+CW 0x22a1
+CW 0x22a1
+CW 0x22a1
+CW 0x2281
+CW 0x2291
+CW 0x22b1
+CW 0x22b1
+CW 0x22b1
+CW 0x22b9
+CW 0x2289
+CW 0x22a9
+; ax [3]
+CW 0x22a8
+CW 0x22a8
+CW 0x22a8
+CW 0x22a5
+CW 0x2281
+CW 0x2281
+CW 0x22b1
+CW 0x22b1
+CW 0x22b1
+CW 0x22b3
+CW 0x2299
+CW 0x22a9
+; ax [4]
+CW 0x2288
+CW 0x2288
+CW 0x2288
+CW 0x2288
+CW 0x22a5
+CW 0x22a1
+CW 0x2291
+CW 0x2291
+CW 0x2291
+CW 0x2293
+CW 0x22b9
+CW 0x2289
+; ax [5]
+CW 0x2290
+CW 0x2290
+CW 0x2290
+CW 0x2290
+CW 0x22b0
+CW 0x22b0
+CW 0x2290
+CW 0x2290
+CW 0x229a
+CW 0x2298
+CW 0x22a5
+CW 0x2281
+; ax [6]
+CW 0x22a0
+CW 0x22a0
+CW 0x22a0
+CW 0x22a0
+CW 0x2290
+CW 0x2290
+CW 0x22b0
+CW 0x22b0
+CW 0x22ba
+CW 0x22b8
+CW 0x2288
+CW 0x22a5
+result_table_cmpaxh_end:
+
+test_main:
+	; Perform one test using the default values
+	; ($acc0 is 14 0009 0007 and $ax0 is 8000 0003, but this can be changed in the DSPSpy UI)
+	; Also, as a sanity check, record the computed sizes of the result tables
+	LRI $ar0, #input_ax
+	LRI $ix0, #(input_ax_end - input_ax)
+	LRI $ar1, #input_acc
+	LRI $ix1, #(input_acc_end - input_acc)
+	LRI $ar2, #result_table_tstaxh
+	LRI $ix2, #(input_ax_end - input_ax)/2
+	LRI $ar3, #result_table_cmpaxh
+	LRI $ix3, #((input_ax_end - input_ax)/2)*((input_acc_end - input_acc)/3)
+	; Set the sticky overflow bit just so that we get consistent $sr values
+	; before and after an overflow occurs
+	SBSET #1
+	CMPAXH $acc0, $ax0.h
+	CALL send_back ; Expected $sr: 2290
+	; $ar0 should match $ix0, etc
+	ADDARN $ar0, $ix0
+	LRI $ix0, #input_ax_end
+	ADDARN $ar1, $ix1
+	LRI $ix1, #input_acc_end
+	ADDARN $ar2, $ix2
+	LRI $ix2, #result_table_tstaxh_end
+	ADDARN $ar3, $ix3
+	LRI $ix3, #result_table_cmpaxh_end
+	TSTAXH $ax0.h
+	CALL send_back ; Expected $sr: 2288
+
+	CLR $acc0
+	CLR $acc1
+	LRI $ax0.h, #0
+	LRI $ax0.l, #0
+	LRI $ax1.h, #0
+	LRI $ax1.l, #0
+
+	; Check TSTAXH...
+	LRI $ar0, #input_ax
+	LRI $ar2, #result_table_tstaxh
+
+	; for (int ctr = input_ax.size(); ctr > 0; ctr--) {
+	BLOOPI #(input_ax_end - input_ax)/2, check_tstaxh_last_ins
+		; Note: if DSPSpy supported populating DMEM as well as IMEM, then there are several
+		; instructions that could make this faster and cleaner... but it doesn't currently,
+		; so we're stuck with ILRRI.
+
+		; Load the test value into $ax0.h/$ax0.l via $ac0.m
+		ILRRI $ac0.m, $ar0 ; $ac0.m = IMEM[$ar0++]
+		MRR $ax0.h, $ac0.m
+		ILRRI $ac0.m, $ar0
+		MRR $ax0.l, $ac0.m
+		; Load the expected value into $ac1.m
+		ILRRI $ac1.m, $ar2 ; $ac1.m = IMEM[$ar2++]
+		; Reduce noise in the results
+		LRI $ac0.m, #0
+
+		; Do the test
+		TSTAXH $ax0.h
+		CALL send_back
+
+		; Check if $sr matches the value we expected.  If there is any difference,
+		; note it via a nonzero $ax1.l.  (send_back saves the value of $sr)
+		MRR $ac0.m, $sr
+		CMP
+		IFNZ
+		LRIS $ax1.l, #1
+check_tstaxh_last_ins:
+	NOP
+	; }
+
+	; Check CMPAXH...
+	CLR $acc0
+	CLR $acc1
+	LRI $ar0, #input_ax
+	LRI $ar3, #result_table_cmpaxh
+
+	; for (int ctr_ax = input_ax.size(); ctr_ax > 0; ctr_ax--) {
+	BLOOPI #(input_ax_end - input_ax)/2, check_cmpaxh_last_ins_outer
+		; Load the test value into $ax0.h/$ax0.l via $ac1.m
+		ILRRI $ac1.m, $ar0
+		MRR $ax0.h, $ac1.m
+		ILRRI $ac1.m, $ar0
+		MRR $ax0.l, $ac1.m
+
+		LRI $ar1, #input_acc
+
+		; for (int ctr_acc = input_acc.size(); ctr_acc > 0; ctr_acc--) {
+		BLOOPI #(input_acc_end - input_acc)/3, check_cmpaxh_last_ins_inner
+
+			; Load the test value into $ac0.h/$ac0.m/$ac0.l via $ac1.m
+			ILRRI $ac1.m, $ar1
+			MRR $ac0.h, $ac1.m
+			ILRRI $ac0.m, $ar1 ; we can load it directly here
+			ILRRI $ac1.m, $ar1
+			MRR $ac0.l, $ac1.m
+
+			; Load the expected value into $ac1.m
+			ILRRI $ac1.m, $ar3
+
+			; Do the test
+			CMPAXH $acc0, $ax0.h
+			CALL send_back
+
+			; Check if $sr matches the value we expected.  If there is any difference,
+			; note it via a nonzero $ax1.h.  (send_back saves the value of $sr)
+			; We can overwrite $ac0.m here because we load it on the next iteration.
+			MRR $ac0.m, $sr
+			LRIS $ac0.l, #0
+			LRI $ac0.h, #0
+			CMP
+			IFNZ
+			LRIS $ax1.h, #1
+check_cmpaxh_last_ins_inner:
+		NOP
+		; }
+check_cmpaxh_last_ins_outer:
+	NOP
+	; }
+
+	; We're done testing.  In the final send_back call, if $ax1.l or $ax1.h
+	; is nonzero, the test failed.
+	CALL send_back
+
+	; We're done, DO NOT DELETE THIS LINE
+	JMP end_of_test
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@ -46,7 +46,7 @@
 % Document front page material
 \title{\textbf{\Huge GameCube DSP User's Manual}}
 \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
-\date{\today\\v0.1.2}
+\date{\today\\v0.1.3}

 % Title formatting commands
 \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@ -260,6 +260,7 @@ The purpose of this documentation is purely academic and it aims at understandin
 0.1.0            & 2021.08.21    & Pokechu22       & Added missing instructions, improved documentation of hardware registers, documented additional behaviors, and improved formatting. \\ \hline
 0.1.1            & 2022.05.14    & xperia64        & Added tested DSP bootloading transfer size                                               \\ \hline
 0.1.2            & 2022.05.21    & Pokechu22       & Fixed ``ILLR'' typo in Instruction Memory section                                        \\ \hline
+0.1.3            & 2022.05.27    & Pokechu22       & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH}                                    \\ \hline
 \end{tabular}
 \end{table}

@ -1929,17 +1930,17 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
  \DSPOpcodeFlags{X}{-}{X}{X}{X}{X}{X}{X}
 \end{DSPOpcode}

-\begin{DSPOpcode}{CMPAR}
+\begin{DSPOpcode}{CMPAXH}
  \begin{DSPOpcodeBytefield}{16}
    \monobitbox{4}{110r} & \monobitbox{4}{s001} & \monobitbox{4}{xxxx} & \monobitbox{4}{xxxx}
  \end{DSPOpcodeBytefield}

  \begin{DSPOpcodeFormat}
-    CMPAR $acS $axR.h
+    CMPAXH $acS, $axR.h
  \end{DSPOpcodeFormat}

  \begin{DSPOpcodeDescription}
-    \item Compares accumulator \Register{\$acS} with accumulator \Register{\$axR.h}.
+    \item Compares accumulator \Register{\$acS} with high part of secondary accumulator \Register{\$axR.h}.
  \end{DSPOpcodeDescription}

  \begin{DSPOpcodeOperation}
@ -5065,7 +5066,7 @@ Instruction & Opcode & Page \\ \hline
 \OpcodeRow{101s t11r xxxx xxxx}{MULXMV}
 \OpcodeRowSkip
 \OpcodeRow{110s t000 xxxx xxxx}{MULC}
-\OpcodeRow{110r s001 xxxx xxxx}{CMPAR}
+\OpcodeRow{110r s001 xxxx xxxx}{CMPAXH}
 \OpcodeRow{110s t01r xxxx xxxx}{MULCMVZ}
 \OpcodeRow{110s t10r xxxx xxxx}{MULCAC}
 \OpcodeRow{110s t11r xxxx xxxx}{MULCMV}