Merge pull request #10702 from Pokechu22/dsp-cmpaxh

DSP LLE: Rename CMPAR to CMPAXH
This commit is contained in:
Mai M 2022-05-28 02:32:04 -04:00 committed by GitHub
commit 2d6fe6a89f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 349 additions and 16 deletions

View File

@ -284,7 +284,7 @@ const std::array<DSPOPCTemplate, 230> s_opcodes =
//c-d
{"MULC", 0xc000, 0xe700, 1, 2, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}}, true, false, false, false, true}, // $prod = $acS.m * $axS.h
{"CMPAR", 0xc100, 0xe700, 1, 2, {{P_ACC, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 12, 0x1000}}, true, false, false, false, true}, // FLAGS($acS - axR.h)
{"CMPAXH", 0xc100, 0xe700, 1, 2, {{P_ACC, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 12, 0x1000}}, true, false, false, false, true}, // FLAGS($acS - axR.h)
{"MULCMVZ", 0xc200, 0xe600, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR.hm, $acR.l, $prod = $prod.hm, 0, $acS.m * $axS.h
{"MULCAC", 0xc400, 0xe600, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR, $prod = $acR + $prod, $acS.m * $axS.h
{"MULCMV", 0xc600, 0xe600, 1, 3, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true, false, false, false, true}, // $acR, $prod = $prod, $acS.m * $axS.h

View File

@ -124,12 +124,12 @@ void Interpreter::cmp(const UDSPInstruction)
ZeroWriteBackLog();
}
// CMPAR $acS axR.h
// CMPAXH $acS, $axR.h
// 110r s001 xxxx xxxx
// Compares accumulator $acS with accumulator $axR.h.
// Compares accumulator $acS with high part of secondary accumulator $axR.h.
//
// flags out: x-xx xxxx
void Interpreter::cmpar(const UDSPInstruction opc)
void Interpreter::cmpaxh(const UDSPInstruction opc)
{
const u8 rreg = (opc >> 12) & 0x1;
const u8 sreg = (opc >> 11) & 0x1;

View File

@ -178,7 +178,7 @@ constexpr std::array<InterpreterOpInfo, 125> s_opcodes
// C-D
{0xc000, 0xe700, &Interpreter::mulc},
{0xc100, 0xe700, &Interpreter::cmpar},
{0xc100, 0xe700, &Interpreter::cmpaxh},
{0xc200, 0xe600, &Interpreter::mulcmvz},
{0xc400, 0xe600, &Interpreter::mulcac},
{0xc600, 0xe600, &Interpreter::mulcmv},

View File

@ -70,7 +70,7 @@ public:
void clrl(UDSPInstruction opc);
void clrp(UDSPInstruction opc);
void cmp(UDSPInstruction opc);
void cmpar(UDSPInstruction opc);
void cmpaxh(UDSPInstruction opc);
void cmpi(UDSPInstruction opc);
void cmpis(UDSPInstruction opc);
void dar(UDSPInstruction opc);

View File

@ -115,7 +115,7 @@ public:
void tst(UDSPInstruction opc);
void tstaxh(UDSPInstruction opc);
void cmp(UDSPInstruction opc);
void cmpar(UDSPInstruction opc);
void cmpaxh(UDSPInstruction opc);
void cmpi(UDSPInstruction opc);
void cmpis(UDSPInstruction opc);
void xorr(UDSPInstruction opc);

View File

@ -188,12 +188,12 @@ void DSPEmitter::cmp(const UDSPInstruction opc)
}
}
// CMPAR $acS axR.h
// CMPAXH $acS, $axR.h
// 110r s001 xxxx xxxx
// Compares accumulator $acS with accumulator $axR.h.
// Compares accumulator $acS with high part of secondary accumulator $axR.h.
//
// flags out: x-xx xxxx
void DSPEmitter::cmpar(const UDSPInstruction opc)
void DSPEmitter::cmpaxh(const UDSPInstruction opc)
{
if (FlagsNeeded())
{

View File

@ -178,7 +178,7 @@ const std::array<JITOpInfo, 125> s_opcodes =
// C-D
{0xc000, 0xe700, &DSPEmitter::mulc},
{0xc100, 0xe700, &DSPEmitter::cmpar},
{0xc100, 0xe700, &DSPEmitter::cmpaxh},
{0xc200, 0xe600, &DSPEmitter::mulcmvz},
{0xc400, 0xe600, &DSPEmitter::mulcac},
{0xc600, 0xe600, &DSPEmitter::mulcmv},

View File

@ -0,0 +1,332 @@
incdir "tests"
include "dsp_base.inc"
input_ax:
; [0] - 0x0000'0000 - 0
CW 0
CW 0
; [1] - 0x0000'0001 - 1 in $ax0.l
CW 0
CW 1
; [2] - 0x0000'ffff - -1 in $ax0.l
CW 0
CW 0xffff
; [3] - 0x0001'0000 - 1 in $ax0.h
CW 1
CW 0
; [4] - 0x7fff'0000 - INT_MAX in $ax0.h
CW 0x7fff
CW 0
; [5] - 0x8000'0000 - INT_MIN in $ax0.h
CW 0x8000
CW 0
; [6] - 0xffff'0000 - -1 in $ax0.h
CW 0xffff
CW 0
input_ax_end:
input_acc:
; [0] - 0x00'0000'0000 - 0
CW 0
CW 0
CW 0
; [1] - 0x00'0000'0001 - 1 in $ac0.l
CW 0
CW 0
CW 1
; [2] - 0x00'0000'ffff - -1 in $ac0.l
CW 0
CW 0
CW 1
; [3] - 0x00'0001'0000 - 1 in $ac0.m
CW 0
CW 1
CW 0
; [4] - 0x00'7fff'0000 - INT_MAX in $ac0.m
CW 0
CW 0x7fff
CW 0
; [5] - 0x00'8000'0000 - INT_MIN in $ac0.m, but not sign extended
CW 0
CW 0x8000
CW 0
; [6] - 0x00'ffff'0000 - -1 in $ac0.m, but not sign extended
CW 0
CW 0xffff
CW 0
; [7] - 0x01'0000'0000 - 1 in $ac0.l
CW 1
CW 0
CW 0
; [8] - 0x7f'ffff'0000 - true INT_MAX
CW 0x7f
CW 0xffff
CW 0
; [9] - 0x80'0000'0000 - true INT_MIN
CW 0x80
CW 0
CW 0
; [10] - 0xff'8000'0000 - INT_MIN in $ac0.m, sign-extended
CW 0xff
CW 0x8000
CW 0
; [11] - 0xff'ffff'0000 - -1
CW 0xff
CW 0xffff
CW 0
input_acc_end:
/* Python script to generate the following result tables from a DSP dump:
import struct
def gen_tables(name, num_ax, num_acc):
with open(name, "rb") as fin:
data = fin.read()
reg_values = list(struct.iter_unpack(">" + "H"*0x20, data))
# Initial register values (there is no corresponding send_back call for these), then our two
# default value checks, then the TSTAXH test, then the CMPAXH test, then the test results
assert len(reg_values) == 1 + 2 + num_ax + num_ax * num_acc + 1
print("result_table_tstaxh:")
for ax in range(num_ax):
# SR is register 0x13
print("CW {:#04x}".format(reg_values[3 + ax][0x13]))
print("result_table_tstaxh_end:")
print()
print("result_table_cmpaxh:")
for ax in range(num_ax):
print("; ax [{}]".format(ax))
for acc in range(num_acc):
print("CW {:#04x}".format(reg_values[3 + num_ax + ax * num_acc + acc][0x13]))
print("result_table_cmpaxh_end:")
gen_tables("dsp_dump0.bin", 7, 12)
*/
result_table_tstaxh:
CW 0x22a4
CW 0x22a4
CW 0x22a4
CW 0x22a0
CW 0x2280
CW 0x2288
CW 0x22a8
result_table_tstaxh_end:
result_table_cmpaxh:
; ax [0]
CW 0x22a5
CW 0x22a1
CW 0x22a1
CW 0x22a1
CW 0x2281
CW 0x2291
CW 0x22b1
CW 0x22b1
CW 0x22b1
CW 0x22b9
CW 0x2289
CW 0x22a9
; ax [1]
CW 0x22a5
CW 0x22a1
CW 0x22a1
CW 0x22a1
CW 0x2281
CW 0x2291
CW 0x22b1
CW 0x22b1
CW 0x22b1
CW 0x22b9
CW 0x2289
CW 0x22a9
; ax [2]
CW 0x22a5
CW 0x22a1
CW 0x22a1
CW 0x22a1
CW 0x2281
CW 0x2291
CW 0x22b1
CW 0x22b1
CW 0x22b1
CW 0x22b9
CW 0x2289
CW 0x22a9
; ax [3]
CW 0x22a8
CW 0x22a8
CW 0x22a8
CW 0x22a5
CW 0x2281
CW 0x2281
CW 0x22b1
CW 0x22b1
CW 0x22b1
CW 0x22b3
CW 0x2299
CW 0x22a9
; ax [4]
CW 0x2288
CW 0x2288
CW 0x2288
CW 0x2288
CW 0x22a5
CW 0x22a1
CW 0x2291
CW 0x2291
CW 0x2291
CW 0x2293
CW 0x22b9
CW 0x2289
; ax [5]
CW 0x2290
CW 0x2290
CW 0x2290
CW 0x2290
CW 0x22b0
CW 0x22b0
CW 0x2290
CW 0x2290
CW 0x229a
CW 0x2298
CW 0x22a5
CW 0x2281
; ax [6]
CW 0x22a0
CW 0x22a0
CW 0x22a0
CW 0x22a0
CW 0x2290
CW 0x2290
CW 0x22b0
CW 0x22b0
CW 0x22ba
CW 0x22b8
CW 0x2288
CW 0x22a5
result_table_cmpaxh_end:
test_main:
; Perform one test using the default values
; ($acc0 is 14 0009 0007 and $ax0 is 8000 0003, but this can be changed in the DSPSpy UI)
; Also, as a sanity check, record the computed sizes of the result tables
LRI $ar0, #input_ax
LRI $ix0, #(input_ax_end - input_ax)
LRI $ar1, #input_acc
LRI $ix1, #(input_acc_end - input_acc)
LRI $ar2, #result_table_tstaxh
LRI $ix2, #(input_ax_end - input_ax)/2
LRI $ar3, #result_table_cmpaxh
LRI $ix3, #((input_ax_end - input_ax)/2)*((input_acc_end - input_acc)/3)
; Set the sticky overflow bit just so that we get consistent $sr values
; before and after an overflow occurs
SBSET #1
CMPAXH $acc0, $ax0.h
CALL send_back ; Expected $sr: 2290
; $ar0 should match $ix0, etc
ADDARN $ar0, $ix0
LRI $ix0, #input_ax_end
ADDARN $ar1, $ix1
LRI $ix1, #input_acc_end
ADDARN $ar2, $ix2
LRI $ix2, #result_table_tstaxh_end
ADDARN $ar3, $ix3
LRI $ix3, #result_table_cmpaxh_end
TSTAXH $ax0.h
CALL send_back ; Expected $sr: 2288
CLR $acc0
CLR $acc1
LRI $ax0.h, #0
LRI $ax0.l, #0
LRI $ax1.h, #0
LRI $ax1.l, #0
; Check TSTAXH...
LRI $ar0, #input_ax
LRI $ar2, #result_table_tstaxh
; for (int ctr = input_ax.size(); ctr > 0; ctr--) {
BLOOPI #(input_ax_end - input_ax)/2, check_tstaxh_last_ins
; Note: if DSPSpy supported populating DMEM as well as IMEM, then there are several
; instructions that could make this faster and cleaner... but it doesn't currently,
; so we're stuck with ILRRI.
; Load the test value into $ax0.h/$ax0.l via $ac0.m
ILRRI $ac0.m, $ar0 ; $ac0.m = IMEM[$ar0++]
MRR $ax0.h, $ac0.m
ILRRI $ac0.m, $ar0
MRR $ax0.l, $ac0.m
; Load the expected value into $ac1.m
ILRRI $ac1.m, $ar2 ; $ac1.m = IMEM[$ar2++]
; Reduce noise in the results
LRI $ac0.m, #0
; Do the test
TSTAXH $ax0.h
CALL send_back
; Check if $sr matches the value we expected. If there is any difference,
; note it via a nonzero $ax1.l. (send_back saves the value of $sr)
MRR $ac0.m, $sr
CMP
IFNZ
LRIS $ax1.l, #1
check_tstaxh_last_ins:
NOP
; }
; Check CMPAXH...
CLR $acc0
CLR $acc1
LRI $ar0, #input_ax
LRI $ar3, #result_table_cmpaxh
; for (int ctr_ax = input_ax.size(); ctr_ax > 0; ctr_ax--) {
BLOOPI #(input_ax_end - input_ax)/2, check_cmpaxh_last_ins_outer
; Load the test value into $ax0.h/$ax0.l via $ac1.m
ILRRI $ac1.m, $ar0
MRR $ax0.h, $ac1.m
ILRRI $ac1.m, $ar0
MRR $ax0.l, $ac1.m
LRI $ar1, #input_acc
; for (int ctr_acc = input_acc.size(); ctr_acc > 0; ctr_acc--) {
BLOOPI #(input_acc_end - input_acc)/3, check_cmpaxh_last_ins_inner
; Load the test value into $ac0.h/$ac0.m/$ac0.l via $ac1.m
ILRRI $ac1.m, $ar1
MRR $ac0.h, $ac1.m
ILRRI $ac0.m, $ar1 ; we can load it directly here
ILRRI $ac1.m, $ar1
MRR $ac0.l, $ac1.m
; Load the expected value into $ac1.m
ILRRI $ac1.m, $ar3
; Do the test
CMPAXH $acc0, $ax0.h
CALL send_back
; Check if $sr matches the value we expected. If there is any difference,
; note it via a nonzero $ax1.h. (send_back saves the value of $sr)
; We can overwrite $ac0.m here because we load it on the next iteration.
MRR $ac0.m, $sr
LRIS $ac0.l, #0
LRI $ac0.h, #0
CMP
IFNZ
LRIS $ax1.h, #1
check_cmpaxh_last_ins_inner:
NOP
; }
check_cmpaxh_last_ins_outer:
NOP
; }
; We're done testing. In the final send_back call, if $ax1.l or $ax1.h
; is nonzero, the test failed.
CALL send_back
; We're done, DO NOT DELETE THIS LINE
JMP end_of_test

View File

@ -46,7 +46,7 @@
% Document front page material
\title{\textbf{\Huge GameCube DSP User's Manual}}
\author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
\date{\today\\v0.1.2}
\date{\today\\v0.1.3}
% Title formatting commands
\newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@ -260,6 +260,7 @@ The purpose of this documentation is purely academic and it aims at understandin
0.1.0 & 2021.08.21 & Pokechu22 & Added missing instructions, improved documentation of hardware registers, documented additional behaviors, and improved formatting. \\ \hline
0.1.1 & 2022.05.14 & xperia64 & Added tested DSP bootloading transfer size \\ \hline
0.1.2 & 2022.05.21 & Pokechu22 & Fixed ``ILLR'' typo in Instruction Memory section \\ \hline
0.1.3 & 2022.05.27 & Pokechu22 & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH} \\ \hline
\end{tabular}
\end{table}
@ -1929,17 +1930,17 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
\DSPOpcodeFlags{X}{-}{X}{X}{X}{X}{X}{X}
\end{DSPOpcode}
\begin{DSPOpcode}{CMPAR}
\begin{DSPOpcode}{CMPAXH}
\begin{DSPOpcodeBytefield}{16}
\monobitbox{4}{110r} & \monobitbox{4}{s001} & \monobitbox{4}{xxxx} & \monobitbox{4}{xxxx}
\end{DSPOpcodeBytefield}
\begin{DSPOpcodeFormat}
CMPAR $acS $axR.h
CMPAXH $acS, $axR.h
\end{DSPOpcodeFormat}
\begin{DSPOpcodeDescription}
\item Compares accumulator \Register{\$acS} with accumulator \Register{\$axR.h}.
\item Compares accumulator \Register{\$acS} with high part of secondary accumulator \Register{\$axR.h}.
\end{DSPOpcodeDescription}
\begin{DSPOpcodeOperation}
@ -5065,7 +5066,7 @@ Instruction & Opcode & Page \\ \hline
\OpcodeRow{101s t11r xxxx xxxx}{MULXMV}
\OpcodeRowSkip
\OpcodeRow{110s t000 xxxx xxxx}{MULC}
\OpcodeRow{110r s001 xxxx xxxx}{CMPAR}
\OpcodeRow{110r s001 xxxx xxxx}{CMPAXH}
\OpcodeRow{110s t01r xxxx xxxx}{MULCMVZ}
\OpcodeRow{110s t10r xxxx xxxx}{MULCAC}
\OpcodeRow{110s t11r xxxx xxxx}{MULCMV}