diff --git a/Source/Core/Core/DSP/DSPTables.cpp b/Source/Core/Core/DSP/DSPTables.cpp index 7f1ab00a6b..af357e6a25 100644 --- a/Source/Core/Core/DSP/DSPTables.cpp +++ b/Source/Core/Core/DSP/DSPTables.cpp @@ -284,7 +284,7 @@ const std::array s_opcodes = //c-d {"MULC", 0xc000, 0xe700, 1, 2, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}}, true, false, false, false, true}, // $prod = $acS.m * $axS.h - {"CMPAR", 0xc100, 0xe700, 1, 2, {{P_ACC, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 12, 0x1000}}, true, false, false, false, true}, // FLAGS($acS - axR.h) + {"CMPAXH", 0xc100, 0xe700, 1, 2, {{P_ACC, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 12, 0x1000}}, true, false, false, false, true}, // FLAGS($acS - axR.h) {"MULCMVZ", 0xc200, 0xe600, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR.hm, $acR.l, $prod = $prod.hm, 0, $acS.m * $axS.h {"MULCAC", 0xc400, 0xe600, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR, $prod = $acR + $prod, $acS.m * $axS.h {"MULCMV", 0xc600, 0xe600, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR, $prod = $prod, $acS.m * $axS.h diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp index 9a6e1335ef..e88fe6ac4a 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp @@ -124,12 +124,12 @@ void Interpreter::cmp(const UDSPInstruction) ZeroWriteBackLog(); } -// CMPAR $acS axR.h +// CMPAXH $acS, $axR.h // 110r s001 xxxx xxxx -// Compares accumulator $acS with accumulator $axR.h. +// Compares accumulator $acS with high part of secondary accumulator $axR.h. // // flags out: x-xx xxxx -void Interpreter::cmpar(const UDSPInstruction opc) +void Interpreter::cmpaxh(const UDSPInstruction opc) { const u8 rreg = (opc >> 12) & 0x1; const u8 sreg = (opc >> 11) & 0x1; diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp index eefe8d2fd7..619f5d4164 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp @@ -178,7 +178,7 @@ constexpr std::array s_opcodes // C-D {0xc000, 0xe700, &Interpreter::mulc}, - {0xc100, 0xe700, &Interpreter::cmpar}, + {0xc100, 0xe700, &Interpreter::cmpaxh}, {0xc200, 0xe600, &Interpreter::mulcmvz}, {0xc400, 0xe600, &Interpreter::mulcac}, {0xc600, 0xe600, &Interpreter::mulcmv}, diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h index 422c9a5f45..6d9e3e2709 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h @@ -70,7 +70,7 @@ public: void clrl(UDSPInstruction opc); void clrp(UDSPInstruction opc); void cmp(UDSPInstruction opc); - void cmpar(UDSPInstruction opc); + void cmpaxh(UDSPInstruction opc); void cmpi(UDSPInstruction opc); void cmpis(UDSPInstruction opc); void dar(UDSPInstruction opc); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h index 05d52abe51..f64e211e00 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h +++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h @@ -115,7 +115,7 @@ public: void tst(UDSPInstruction opc); void tstaxh(UDSPInstruction opc); void cmp(UDSPInstruction opc); - void cmpar(UDSPInstruction opc); + void cmpaxh(UDSPInstruction opc); void cmpi(UDSPInstruction opc); void cmpis(UDSPInstruction opc); void xorr(UDSPInstruction opc); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp index 864c488d81..716781750e 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp @@ -188,12 +188,12 @@ void DSPEmitter::cmp(const UDSPInstruction opc) } } -// CMPAR $acS axR.h +// CMPAXH $acS, $axR.h // 110r s001 xxxx xxxx -// Compares accumulator $acS with accumulator $axR.h. +// Compares accumulator $acS with high part of secondary accumulator $axR.h. // // flags out: x-xx xxxx -void DSPEmitter::cmpar(const UDSPInstruction opc) +void DSPEmitter::cmpaxh(const UDSPInstruction opc) { if (FlagsNeeded()) { diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp index 87f446f82d..9c38c2e97f 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp @@ -178,7 +178,7 @@ const std::array s_opcodes = // C-D {0xc000, 0xe700, &DSPEmitter::mulc}, - {0xc100, 0xe700, &DSPEmitter::cmpar}, + {0xc100, 0xe700, &DSPEmitter::cmpaxh}, {0xc200, 0xe600, &DSPEmitter::mulcmvz}, {0xc400, 0xe600, &DSPEmitter::mulcac}, {0xc600, 0xe600, &DSPEmitter::mulcmv}, diff --git a/Source/DSPSpy/tests/cmpaxh_test.ds b/Source/DSPSpy/tests/cmpaxh_test.ds new file mode 100644 index 0000000000..0cfbed4010 --- /dev/null +++ b/Source/DSPSpy/tests/cmpaxh_test.ds @@ -0,0 +1,332 @@ +incdir "tests" +include "dsp_base.inc" + +input_ax: +; [0] - 0x0000'0000 - 0 +CW 0 +CW 0 +; [1] - 0x0000'0001 - 1 in $ax0.l +CW 0 +CW 1 +; [2] - 0x0000'ffff - -1 in $ax0.l +CW 0 +CW 0xffff +; [3] - 0x0001'0000 - 1 in $ax0.h +CW 1 +CW 0 +; [4] - 0x7fff'0000 - INT_MAX in $ax0.h +CW 0x7fff +CW 0 +; [5] - 0x8000'0000 - INT_MIN in $ax0.h +CW 0x8000 +CW 0 +; [6] - 0xffff'0000 - -1 in $ax0.h +CW 0xffff +CW 0 +input_ax_end: + +input_acc: +; [0] - 0x00'0000'0000 - 0 +CW 0 +CW 0 +CW 0 +; [1] - 0x00'0000'0001 - 1 in $ac0.l +CW 0 +CW 0 +CW 1 +; [2] - 0x00'0000'ffff - -1 in $ac0.l +CW 0 +CW 0 +CW 1 +; [3] - 0x00'0001'0000 - 1 in $ac0.m +CW 0 +CW 1 +CW 0 +; [4] - 0x00'7fff'0000 - INT_MAX in $ac0.m +CW 0 +CW 0x7fff +CW 0 +; [5] - 0x00'8000'0000 - INT_MIN in $ac0.m, but not sign extended +CW 0 +CW 0x8000 +CW 0 +; [6] - 0x00'ffff'0000 - -1 in $ac0.m, but not sign extended +CW 0 +CW 0xffff +CW 0 +; [7] - 0x01'0000'0000 - 1 in $ac0.l +CW 1 +CW 0 +CW 0 +; [8] - 0x7f'ffff'0000 - true INT_MAX +CW 0x7f +CW 0xffff +CW 0 +; [9] - 0x80'0000'0000 - true INT_MIN +CW 0x80 +CW 0 +CW 0 +; [10] - 0xff'8000'0000 - INT_MIN in $ac0.m, sign-extended +CW 0xff +CW 0x8000 +CW 0 +; [11] - 0xff'ffff'0000 - -1 +CW 0xff +CW 0xffff +CW 0 +input_acc_end: + +/* Python script to generate the following result tables from a DSP dump: +import struct +def gen_tables(name, num_ax, num_acc): + with open(name, "rb") as fin: + data = fin.read() + reg_values = list(struct.iter_unpack(">" + "H"*0x20, data)) + # Initial register values (there is no corresponding send_back call for these), then our two + # default value checks, then the TSTAXH test, then the CMPAXH test, then the test results + assert len(reg_values) == 1 + 2 + num_ax + num_ax * num_acc + 1 + print("result_table_tstaxh:") + for ax in range(num_ax): + # SR is register 0x13 + print("CW {:#04x}".format(reg_values[3 + ax][0x13])) + print("result_table_tstaxh_end:") + print() + print("result_table_cmpaxh:") + for ax in range(num_ax): + print("; ax [{}]".format(ax)) + for acc in range(num_acc): + print("CW {:#04x}".format(reg_values[3 + num_ax + ax * num_acc + acc][0x13])) + print("result_table_cmpaxh_end:") + +gen_tables("dsp_dump0.bin", 7, 12) +*/ + +result_table_tstaxh: +CW 0x22a4 +CW 0x22a4 +CW 0x22a4 +CW 0x22a0 +CW 0x2280 +CW 0x2288 +CW 0x22a8 +result_table_tstaxh_end: + +result_table_cmpaxh: +; ax [0] +CW 0x22a5 +CW 0x22a1 +CW 0x22a1 +CW 0x22a1 +CW 0x2281 +CW 0x2291 +CW 0x22b1 +CW 0x22b1 +CW 0x22b1 +CW 0x22b9 +CW 0x2289 +CW 0x22a9 +; ax [1] +CW 0x22a5 +CW 0x22a1 +CW 0x22a1 +CW 0x22a1 +CW 0x2281 +CW 0x2291 +CW 0x22b1 +CW 0x22b1 +CW 0x22b1 +CW 0x22b9 +CW 0x2289 +CW 0x22a9 +; ax [2] +CW 0x22a5 +CW 0x22a1 +CW 0x22a1 +CW 0x22a1 +CW 0x2281 +CW 0x2291 +CW 0x22b1 +CW 0x22b1 +CW 0x22b1 +CW 0x22b9 +CW 0x2289 +CW 0x22a9 +; ax [3] +CW 0x22a8 +CW 0x22a8 +CW 0x22a8 +CW 0x22a5 +CW 0x2281 +CW 0x2281 +CW 0x22b1 +CW 0x22b1 +CW 0x22b1 +CW 0x22b3 +CW 0x2299 +CW 0x22a9 +; ax [4] +CW 0x2288 +CW 0x2288 +CW 0x2288 +CW 0x2288 +CW 0x22a5 +CW 0x22a1 +CW 0x2291 +CW 0x2291 +CW 0x2291 +CW 0x2293 +CW 0x22b9 +CW 0x2289 +; ax [5] +CW 0x2290 +CW 0x2290 +CW 0x2290 +CW 0x2290 +CW 0x22b0 +CW 0x22b0 +CW 0x2290 +CW 0x2290 +CW 0x229a +CW 0x2298 +CW 0x22a5 +CW 0x2281 +; ax [6] +CW 0x22a0 +CW 0x22a0 +CW 0x22a0 +CW 0x22a0 +CW 0x2290 +CW 0x2290 +CW 0x22b0 +CW 0x22b0 +CW 0x22ba +CW 0x22b8 +CW 0x2288 +CW 0x22a5 +result_table_cmpaxh_end: + +test_main: + ; Perform one test using the default values + ; ($acc0 is 14 0009 0007 and $ax0 is 8000 0003, but this can be changed in the DSPSpy UI) + ; Also, as a sanity check, record the computed sizes of the result tables + LRI $ar0, #input_ax + LRI $ix0, #(input_ax_end - input_ax) + LRI $ar1, #input_acc + LRI $ix1, #(input_acc_end - input_acc) + LRI $ar2, #result_table_tstaxh + LRI $ix2, #(input_ax_end - input_ax)/2 + LRI $ar3, #result_table_cmpaxh + LRI $ix3, #((input_ax_end - input_ax)/2)*((input_acc_end - input_acc)/3) + ; Set the sticky overflow bit just so that we get consistent $sr values + ; before and after an overflow occurs + SBSET #1 + CMPAXH $acc0, $ax0.h + CALL send_back ; Expected $sr: 2290 + ; $ar0 should match $ix0, etc + ADDARN $ar0, $ix0 + LRI $ix0, #input_ax_end + ADDARN $ar1, $ix1 + LRI $ix1, #input_acc_end + ADDARN $ar2, $ix2 + LRI $ix2, #result_table_tstaxh_end + ADDARN $ar3, $ix3 + LRI $ix3, #result_table_cmpaxh_end + TSTAXH $ax0.h + CALL send_back ; Expected $sr: 2288 + + CLR $acc0 + CLR $acc1 + LRI $ax0.h, #0 + LRI $ax0.l, #0 + LRI $ax1.h, #0 + LRI $ax1.l, #0 + + ; Check TSTAXH... + LRI $ar0, #input_ax + LRI $ar2, #result_table_tstaxh + + ; for (int ctr = input_ax.size(); ctr > 0; ctr--) { + BLOOPI #(input_ax_end - input_ax)/2, check_tstaxh_last_ins + ; Note: if DSPSpy supported populating DMEM as well as IMEM, then there are several + ; instructions that could make this faster and cleaner... but it doesn't currently, + ; so we're stuck with ILRRI. + + ; Load the test value into $ax0.h/$ax0.l via $ac0.m + ILRRI $ac0.m, $ar0 ; $ac0.m = IMEM[$ar0++] + MRR $ax0.h, $ac0.m + ILRRI $ac0.m, $ar0 + MRR $ax0.l, $ac0.m + ; Load the expected value into $ac1.m + ILRRI $ac1.m, $ar2 ; $ac1.m = IMEM[$ar2++] + ; Reduce noise in the results + LRI $ac0.m, #0 + + ; Do the test + TSTAXH $ax0.h + CALL send_back + + ; Check if $sr matches the value we expected. If there is any difference, + ; note it via a nonzero $ax1.l. (send_back saves the value of $sr) + MRR $ac0.m, $sr + CMP + IFNZ + LRIS $ax1.l, #1 +check_tstaxh_last_ins: + NOP + ; } + + ; Check CMPAXH... + CLR $acc0 + CLR $acc1 + LRI $ar0, #input_ax + LRI $ar3, #result_table_cmpaxh + + ; for (int ctr_ax = input_ax.size(); ctr_ax > 0; ctr_ax--) { + BLOOPI #(input_ax_end - input_ax)/2, check_cmpaxh_last_ins_outer + ; Load the test value into $ax0.h/$ax0.l via $ac1.m + ILRRI $ac1.m, $ar0 + MRR $ax0.h, $ac1.m + ILRRI $ac1.m, $ar0 + MRR $ax0.l, $ac1.m + + LRI $ar1, #input_acc + + ; for (int ctr_acc = input_acc.size(); ctr_acc > 0; ctr_acc--) { + BLOOPI #(input_acc_end - input_acc)/3, check_cmpaxh_last_ins_inner + + ; Load the test value into $ac0.h/$ac0.m/$ac0.l via $ac1.m + ILRRI $ac1.m, $ar1 + MRR $ac0.h, $ac1.m + ILRRI $ac0.m, $ar1 ; we can load it directly here + ILRRI $ac1.m, $ar1 + MRR $ac0.l, $ac1.m + + ; Load the expected value into $ac1.m + ILRRI $ac1.m, $ar3 + + ; Do the test + CMPAXH $acc0, $ax0.h + CALL send_back + + ; Check if $sr matches the value we expected. If there is any difference, + ; note it via a nonzero $ax1.h. (send_back saves the value of $sr) + ; We can overwrite $ac0.m here because we load it on the next iteration. + MRR $ac0.m, $sr + LRIS $ac0.l, #0 + LRI $ac0.h, #0 + CMP + IFNZ + LRIS $ax1.h, #1 +check_cmpaxh_last_ins_inner: + NOP + ; } +check_cmpaxh_last_ins_outer: + NOP + ; } + + ; We're done testing. In the final send_back call, if $ax1.l or $ax1.h + ; is nonzero, the test failed. + CALL send_back + + ; We're done, DO NOT DELETE THIS LINE + JMP end_of_test diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex index 1b8186bdfc..c49b10d37f 100644 --- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex +++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex @@ -46,7 +46,7 @@ % Document front page material \title{\textbf{\Huge GameCube DSP User's Manual}} \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}} -\date{\today\\v0.1.2} +\date{\today\\v0.1.3} % Title formatting commands \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}} @@ -260,6 +260,7 @@ The purpose of this documentation is purely academic and it aims at understandin 0.1.0 & 2021.08.21 & Pokechu22 & Added missing instructions, improved documentation of hardware registers, documented additional behaviors, and improved formatting. \\ \hline 0.1.1 & 2022.05.14 & xperia64 & Added tested DSP bootloading transfer size \\ \hline 0.1.2 & 2022.05.21 & Pokechu22 & Fixed ``ILLR'' typo in Instruction Memory section \\ \hline +0.1.3 & 2022.05.27 & Pokechu22 & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH} \\ \hline \end{tabular} \end{table} @@ -1929,17 +1930,17 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th \DSPOpcodeFlags{X}{-}{X}{X}{X}{X}{X}{X} \end{DSPOpcode} -\begin{DSPOpcode}{CMPAR} +\begin{DSPOpcode}{CMPAXH} \begin{DSPOpcodeBytefield}{16} \monobitbox{4}{110r} & \monobitbox{4}{s001} & \monobitbox{4}{xxxx} & \monobitbox{4}{xxxx} \end{DSPOpcodeBytefield} \begin{DSPOpcodeFormat} - CMPAR $acS $axR.h + CMPAXH $acS, $axR.h \end{DSPOpcodeFormat} \begin{DSPOpcodeDescription} - \item Compares accumulator \Register{\$acS} with accumulator \Register{\$axR.h}. + \item Compares accumulator \Register{\$acS} with high part of secondary accumulator \Register{\$axR.h}. \end{DSPOpcodeDescription} \begin{DSPOpcodeOperation} @@ -5065,7 +5066,7 @@ Instruction & Opcode & Page \\ \hline \OpcodeRow{101s t11r xxxx xxxx}{MULXMV} \OpcodeRowSkip \OpcodeRow{110s t000 xxxx xxxx}{MULC} -\OpcodeRow{110r s001 xxxx xxxx}{CMPAR} +\OpcodeRow{110r s001 xxxx xxxx}{CMPAXH} \OpcodeRow{110s t01r xxxx xxxx}{MULCMVZ} \OpcodeRow{110s t10r xxxx xxxx}{MULCAC} \OpcodeRow{110s t11r xxxx xxxx}{MULCMV}