From 83aabbbece91312c38cc94030818bb9734f6b394 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 14:32:14 -0700
Subject: [PATCH 01/11] docs/DSP: Clarify LRS note

$acS.h was a typo, which has been replaced with $acD.h.
---
 .../DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index c49b10d37f..b625c884fa 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -2588,7 +2588,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeOperation}
 
   \begin{DSPOpcodeNote}
-    \item \Opcode{LRS} can use \Register{\$axD} and cannot use \Register{\$acS.h}, while \Opcode{SRS} and \Opcode{SRSH} only work on \Register{\$acS}.
+    \item \Opcode{LRS} can use \Register{\$axD}, but cannot use \Register{\$acD.h}, while \Opcode{SRS} and \Opcode{SRSH} only work on \Register{\$acS}.
   \end{DSPOpcodeNote}
 
   \DSPOpcodeFlagsUnchanged

From bb01ba60d6121b58d84a2ae5af3b04df94137e86 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 15:33:16 -0700
Subject: [PATCH 02/11] docs/DSP: Fix typo in 'NOP comment

---
 .../DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index b625c884fa..8ed85c004b 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -4767,7 +4767,7 @@ Extended opcodes do not modify the program counter (\Register{\$pc} register).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeNote}
-     \item Generally written as by not including any extension operation, such as writing \texttt{INC \$ac0} instead of writing \texttt{INC'NOP \$ac0}.
+     \item Generally written by not including any extension operation, such as writing \texttt{INC \$ac0} instead of writing \texttt{INC'NOP \$ac0}.
   \end{DSPOpcodeNote}
 \end{DSPOpcode}
 

From b349254ff4c10b5a6d7d1052c5bc5bb8bbecd239 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 16:42:20 -0700
Subject: [PATCH 03/11] docs/DSP: Document the behavior when main and extended
 opcodes both write to the same register (the write backlog)

For more information, ApplyWriteBackLog, WriteToBackLog, and ZeroWriteBackLog were added in b787f5f8f7709cff763c2f3a13b90455a0c8ee18 and the explanatory comment was added in fd40513fed84e41e4b4b65cb4f98295223bcbd9b, although it did not mention the specific instructions that could trigger this edge case. The statements about which registers can be written by main opcodes and extension opcodes are based on my own checking of all instructions in the manual.
---
 .../Core/DSP/Interpreter/DSPInterpreter.cpp   | 28 ++++++++++++++++---
 .../Core/DSP/Interpreter/DSPInterpreter.h     |  5 ----
 .../GameCube_DSP_Users_Manual.tex             |  7 +++++
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp
index 8ca18e2a36..c4cc0e5601 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp
@@ -16,8 +16,23 @@
 
 namespace DSP::Interpreter
 {
-// Not needed for game ucodes (it slows down interpreter + easier to compare int VS
-// dspjit64 without it)
+// Correctly handle instructions such as `INC'L $ac0 : $ac0.l, @$ar0` (encoded as 0x7660) where both
+// the main opcode and the extension opcode modify the same register. See the "Extended opcodes"
+// section in the manual for more details.  No official uCode writes to the same register twice like
+// this, so we don't emulate it by default (and also don't support it in the recompiler).
+//
+// Dolphin only supports this behavior in the interpreter when PRECISE_BACKLOG is defined.
+// In ExecuteInstruction, if an extended opcode is in use, the extended opcode's behavior is
+// executed first, followed by the main opcode's behavior. The extended opcode does not directly
+// write to registers, but instead records the writes into a backlog (WriteToBackLog). The main
+// opcode calls ZeroWriteBackLog after it is done reading the register values; this directly
+// writes zero to all registers that have pending writes in the backlog. The main opcode then is
+// free to write directly to registers it changes. Afterwards, ApplyWriteBackLog bitwise-ors the
+// value of the register and the value in the backlog; if the main opcode didn't write to the
+// register then ZeroWriteBackLog means that the pending value is being or'd with zero, so it's
+// used without changes. When PRECISE_BACKLOG is not defined, ZeroWriteBackLog does nothing and
+// ApplyWriteBackLog overwrites the register value with the value from the backlog (so writes from
+// extended opcodes "win" over the main opcode).
 //#define PRECISE_BACKLOG
 
 Interpreter::Interpreter(DSPCore& dsp) : m_dsp_core{dsp}
@@ -809,7 +824,7 @@ void Interpreter::ConditionalExtendAccum(int reg)
 void Interpreter::ApplyWriteBackLog()
 {
   // Always make sure to have an extra entry at the end w/ -1 to avoid
-  // infinitive loops
+  // infinite loops
   for (int i = 0; m_write_back_log_idx[i] != -1; i++)
   {
     u16 value = m_write_back_log[i];
@@ -823,6 +838,11 @@ void Interpreter::ApplyWriteBackLog()
   }
 }
 
+// The ext ops are calculated in parallel with the actual op. That means that
+// both the main op and the ext op see the same register state as input. The
+// output is simple as long as the main and ext ops don't change the same
+// register. If they do the output is the bitwise OR of the result of both the
+// main and ext ops.
 void Interpreter::WriteToBackLog(int i, int idx, u16 value)
 {
   m_write_back_log[i] = value;
@@ -840,7 +860,7 @@ void Interpreter::ZeroWriteBackLog()
 {
 #ifdef PRECISE_BACKLOG
   // always make sure to have an extra entry at the end w/ -1 to avoid
-  // infinitive loops
+  // infinite loops
   for (int i = 0; m_write_back_log_idx[i] != -1; i++)
   {
     OpWriteRegister(m_write_back_log_idx[i], 0);
diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
index 6d9e3e2709..e7cd266680 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
+++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h
@@ -235,11 +235,6 @@ private:
 
   void ConditionalExtendAccum(int reg);
 
-  // The ext ops are calculated in parallel with the actual op. That means that
-  // both the main op and the ext op see the same register state as input. The
-  // output is simple as long as the main and ext ops don't change the same
-  // register. If they do the output is the bitwise OR of the result of both the
-  // main and ext ops.
   void WriteToBackLog(int i, int idx, u16 value);
   void ZeroWriteBackLog();
   void ZeroWriteBackLogPreserveAcc(u8 acc);
diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index 8ed85c004b..87c61b167a 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -4339,9 +4339,16 @@ Extended opcodes do not exist on their own. These opcodes can only be attached t
 Specifically, opcodes where the first nybble is 0, 1, or 2 cannot be extended.
 Opcodes where the first nybble is 4 or higher can be extended, using the 8 lower bits.
 Opcodes where the first nybble is 3 can also be extended, but the main opcode is 9 bits and the extension opcode is 7 bits.  For these instructions, the extension opcode is treated as if the first bit were 0 (i.e. \texttt{0xxxxxxx}).
+(\Opcode{NX} has no behavior of its own, so it can be used to get an extended opcode's behavior on its own.)
 
 Extended opcodes do not modify the program counter (\Register{\$pc} register).
 
+Extended opcodes are run \textit{in parallel} with the main opcode; they see the same register state as the input. (For instance, \texttt{\Opcode{MOVR}\Opcode{'MV} \Register{\$ac1}, \Register{\$ax0.l} : \Register{\$ax0.l}, \Register{\$ac1.m}} (encoded as \Value{0x6113}) \textit{swaps} the values of \Register{\$ac1.m} and \Register{\$ax0.l} (and also extends the new value of \Register{\$ac1.m} into \Register{\$ac1.l} and \Register{\$ac1.h}).)
+
+Since they are executed in parallel, the main and extension opcodes could theoretically write to the same registers. All opcodes that support extension only modify a main accumulator \Register{\$acD}, as well as \Register{\$prod}, \Register{\$sr}, and/or \Register{\$pc}, while the extension opcodes themselves generally only modify an additional accumulator \Register{\$axD} and addressing registers \Register{\$arS}. The exception is \Opcode{'L} and \Opcode{'LN}, which has the option of writing to \Register{\$acD}. Thus, \texttt{\Opcode{INC}\Opcode{'L} \Register{\$ac0} : \Register{\$ac0.l}, @\Register{\$ar0}} (encoded as \Value{0x7660}) increments \Register{\$ac0} (and thus \Register{\$ac0.l}), but also sets \Register{\$ac0.l} to the value in data memory at address \Register{\$ar0} and increments \Register{\$ar0}.
+
+When the main and extension opcodes write to the same register, the register is set to the two values bitwise-or'd together. For the above example, \Register{\$ar0.l} would be set to \InlineExpression{(\Register{\$ar0.l} + 1) | MEM[\Register{\$ar0}]}. \textbf{Note that no official uCode writes to the same register twice like this.}
+
 \pagebreak{}
 
 \section{Alphabetical list of extended opcodes}

From 4dc7208195f679a54066b2ad1adfaa3398aea494 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 17:15:49 -0700
Subject: [PATCH 04/11] docs/DSP: Fix typo with arithmetic instructions that
 take a 16-bit immediate

These instructions used an 'r' in their bit list, but a 'd' in the operands.
---
 .../Core/DSP/Interpreter/DSPIntArithmetic.cpp | 16 +++++-----
 .../Core/DSP/Jit/x64/DSPJitArithmetic.cpp     | 16 +++++-----
 .../GameCube_DSP_Users_Manual.tex             | 30 +++++++++----------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
index e88fe6ac4a..90199d74d9 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
@@ -45,7 +45,7 @@ void Interpreter::clrl(const UDSPInstruction opc)
 //----
 
 // ANDCF $acD.m, #I
-// 0000 001r 1100 0000
+// 0000 001d 1100 0000
 // iiii iiii iiii iiii
 // Set logic zero (LZ) flag in status register $sr if result of logic AND of
 // accumulator mid part $acD.m with immediate value I is equal to I.
@@ -61,7 +61,7 @@ void Interpreter::andcf(const UDSPInstruction opc)
 }
 
 // ANDF $acD.m, #I
-// 0000 001r 1010 0000
+// 0000 001d 1010 0000
 // iiii iiii iiii iiii
 // Set logic zero (LZ) flag in status register $sr if result of logical AND
 // operation of accumulator mid part $acD.m with immediate value I is equal
@@ -144,7 +144,7 @@ void Interpreter::cmpaxh(const UDSPInstruction opc)
 }
 
 // CMPI $amD, #I
-// 0000 001r 1000 0000
+// 0000 001d 1000 0000
 // iiii iiii iiii iiii
 // Compares mid accumulator $acD.hm ($amD) with sign extended immediate value I.
 // Although flags are being set regarding whole accumulator register.
@@ -320,7 +320,7 @@ void Interpreter::notc(const UDSPInstruction opc)
 }
 
 // XORI $acD.m, #I
-// 0000 001r 0010 0000
+// 0000 001d 0010 0000
 // iiii iiii iiii iiii
 // Logic exclusive or (XOR) of accumulator mid part $acD.m with
 // immediate value I.
@@ -337,7 +337,7 @@ void Interpreter::xori(const UDSPInstruction opc)
 }
 
 // ANDI $acD.m, #I
-// 0000 001r 0100 0000
+// 0000 001d 0100 0000
 // iiii iiii iiii iiii
 // Logic AND of accumulator mid part $acD.m with immediate value I.
 //
@@ -354,7 +354,7 @@ void Interpreter::andi(const UDSPInstruction opc)
 }
 
 // ORI $acD.m, #I
-// 0000 001r 0110 0000
+// 0000 001d 0110 0000
 // iiii iiii iiii iiii
 // Logic OR of accumulator mid part $acD.m with immediate value I.
 //
@@ -489,8 +489,8 @@ void Interpreter::addaxl(const UDSPInstruction opc)
   UpdateSR64Add(acc, acx, GetLongAcc(dreg));
 }
 
-// ADDI $amR, #I
-// 0000 001r 0000 0000
+// ADDI $amD, #I
+// 0000 001d 0000 0000
 // iiii iiii iiii iiii
 // Adds immediate (16-bit sign extended) to mid accumulator $acD.hm.
 //
diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
index 716781750e..37e8a4385e 100644
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
@@ -53,7 +53,7 @@ void DSPEmitter::clrl(const UDSPInstruction opc)
 //----
 
 // ANDCF $acD.m, #I
-// 0000 001r 1100 0000
+// 0000 001d 1100 0000
 // iiii iiii iiii iiii
 // Set logic zero (LZ) flag in status register $sr if result of logic AND of
 // accumulator mid part $acD.m with immediate value I is equal to I.
@@ -88,7 +88,7 @@ void DSPEmitter::andcf(const UDSPInstruction opc)
 }
 
 // ANDF $acD.m, #I
-// 0000 001r 1010 0000
+// 0000 001d 1010 0000
 // iiii iiii iiii iiii
 // Set logic zero (LZ) flag in status register $sr if result of logical AND
 // operation of accumulator mid part $acD.m with immediate value I is equal
@@ -221,7 +221,7 @@ void DSPEmitter::cmpaxh(const UDSPInstruction opc)
 }
 
 // CMPI $amD, #I
-// 0000 001r 1000 0000
+// 0000 001d 1000 0000
 // iiii iiii iiii iiii
 // Compares mid accumulator $acD.hm ($amD) with sign extended immediate value I.
 // Although flags are being set regarding whole accumulator register.
@@ -458,7 +458,7 @@ void DSPEmitter::notc(const UDSPInstruction opc)
 }
 
 // XORI $acD.m, #I
-// 0000 001r 0010 0000
+// 0000 001d 0010 0000
 // iiii iiii iiii iiii
 // Logic exclusive or (XOR) of accumulator mid part $acD.m with
 // immediate value I.
@@ -482,7 +482,7 @@ void DSPEmitter::xori(const UDSPInstruction opc)
 }
 
 // ANDI $acD.m, #I
-// 0000 001r 0100 0000
+// 0000 001d 0100 0000
 // iiii iiii iiii iiii
 // Logic AND of accumulator mid part $acD.m with immediate value I.
 //
@@ -505,7 +505,7 @@ void DSPEmitter::andi(const UDSPInstruction opc)
 }
 
 // ORI $acD.m, #I
-// 0000 001r 0110 0000
+// 0000 001d 0110 0000
 // iiii iiii iiii iiii
 // Logic OR of accumulator mid part $acD.m with immediate value I.
 //
@@ -686,8 +686,8 @@ void DSPEmitter::addaxl(const UDSPInstruction opc)
   }
 }
 
-// ADDI $amR, #I
-// 0000 001r 0000 0000
+// ADDI $amD, #I
+// 0000 001d 0000 0000
 // iiii iiii iiii iiii
 // Adds immediate (16-bit sign extended) to mid accumulator $acD.hm.
 //
diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index 87c61b167a..97d4da1aea 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -1217,12 +1217,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{ADDI}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{0000} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{0000} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
   \begin{DSPOpcodeFormat}
-    ADDI $amR, #I
+    ADDI $amD, #I
   \end{DSPOpcodeFormat}
 
   \begin{DSPOpcodeDescription}
@@ -1356,7 +1356,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{ANDCF}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{1100} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{1100} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
@@ -1384,7 +1384,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{ANDF}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{1010} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{1010} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
@@ -1412,7 +1412,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{ANDI}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{0100} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{0100} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
@@ -1953,7 +1953,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{CMPI}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{1000} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{1000} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
@@ -3629,7 +3629,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{ORI}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{0110} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{0110} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
@@ -4286,7 +4286,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
 \begin{DSPOpcode}{XORI}
   \begin{DSPOpcodeBytefield}{16}
-    \monobitbox{4}{0000} & \monobitbox{4}{001r} & \monobitbox{4}{0010} & \monobitbox{4}{0000} \\
+    \monobitbox{4}{0000} & \monobitbox{4}{001d} & \monobitbox{4}{0010} & \monobitbox{4}{0000} \\
     \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii} & \monobitbox{4}{iiii}
   \end{DSPOpcodeBytefield}
 
@@ -4960,13 +4960,13 @@ Instruction & Opcode & Page \\ \hline
 \OpcodeRow{0000 0010 1101 cccc}{RETcc}
 \OpcodeRow{0000 0010 1111 cccc}{RTIcc}
 \OpcodeRowSkip
-\OpcodeRow{0000 001r 0000 0000 iiii iiii iiii iiii}{ADDI}
-\OpcodeRow{0000 001r 0010 0000 iiii iiii iiii iiii}{XORI}
-\OpcodeRow{0000 001r 0100 0000 iiii iiii iiii iiii}{ANDI}
-\OpcodeRow{0000 001r 0110 0000 iiii iiii iiii iiii}{ORI}
-\OpcodeRow{0000 001r 1000 0000 iiii iiii iiii iiii}{CMPI}
-\OpcodeRow{0000 001r 1010 0000 iiii iiii iiii iiii}{ANDF}
-\OpcodeRow{0000 001r 1100 0000 iiii iiii iiii iiii}{ANDCF}
+\OpcodeRow{0000 001d 0000 0000 iiii iiii iiii iiii}{ADDI}
+\OpcodeRow{0000 001d 0010 0000 iiii iiii iiii iiii}{XORI}
+\OpcodeRow{0000 001d 0100 0000 iiii iiii iiii iiii}{ANDI}
+\OpcodeRow{0000 001d 0110 0000 iiii iiii iiii iiii}{ORI}
+\OpcodeRow{0000 001d 1000 0000 iiii iiii iiii iiii}{CMPI}
+\OpcodeRow{0000 001d 1010 0000 iiii iiii iiii iiii}{ANDF}
+\OpcodeRow{0000 001d 1100 0000 iiii iiii iiii iiii}{ANDCF}
 \OpcodeRowSkip
 \OpcodeRow{0000 0010 1100 1010}{LSRN}
 \OpcodeRow{0000 0010 1100 1011}{ASRN}

From baf2c710ffbee24413315afe70651c170b645bf7 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 17:16:22 -0700
Subject: [PATCH 05/11] DSP: Fix typo with TST

---
 Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp | 2 +-
 Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
index 90199d74d9..5ada6bb669 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
@@ -81,7 +81,7 @@ void Interpreter::andf(const UDSPInstruction opc)
 
 // TST
 // 1011 r001 xxxx xxxx
-// Test accumulator %acR.
+// Test accumulator $acR.
 //
 // flags out: --xx xx00
 void Interpreter::tst(const UDSPInstruction opc)
diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
index 37e8a4385e..a0185f73a2 100644
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
@@ -126,7 +126,7 @@ void DSPEmitter::andf(const UDSPInstruction opc)
 
 // TST
 // 1011 r001 xxxx xxxx
-// Test accumulator %acR.
+// Test accumulator $acR.
 //
 // flags out: --xx xx00
 void DSPEmitter::tst(const UDSPInstruction opc)

From 7c63bd189375bba24f5af7703ee765fc7fc88708 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 17:33:12 -0700
Subject: [PATCH 06/11] docs/DSP: Fix inconsistency with ADDI and CMPI

We don't have anything called $amD, though we do have $acsD.  However, these instructions affect flags based on the whole accumulator, so it's better to just use $acD.
---
 .../Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp  | 13 +++++++------
 Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp   | 11 ++++++-----
 .../GameCube_DSP_Users_Manual.tex                   |  8 ++++----
 3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
index 5ada6bb669..0a9485b0ab 100644
--- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
+++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp
@@ -143,11 +143,12 @@ void Interpreter::cmpaxh(const UDSPInstruction opc)
   ZeroWriteBackLog();
 }
 
-// CMPI $amD, #I
+// CMPI $acD, #I
 // 0000 001d 1000 0000
 // iiii iiii iiii iiii
-// Compares mid accumulator $acD.hm ($amD) with sign extended immediate value I.
-// Although flags are being set regarding whole accumulator register.
+// Compares accumulator with immediate. Comparison is executed
+// by subtracting the immediate (16-bit sign extended) from mid accumulator
+// $acD.hm and computing flags based on whole accumulator $acD.
 //
 // flags out: x-xx xxxx
 void Interpreter::cmpi(const UDSPInstruction opc)
@@ -166,8 +167,8 @@ void Interpreter::cmpi(const UDSPInstruction opc)
 
 // CMPIS $acD, #I
 // 0000 011d iiii iiii
-// Compares accumulator with short immediate. Comaprison is executed
-// by subtracting short immediate (8bit sign extended) from mid accumulator
+// Compares accumulator with short immediate. Comparison is executed
+// by subtracting the short immediate (8-bit sign extended) from mid accumulator
 // $acD.hm and computing flags based on whole accumulator $acD.
 //
 // flags out: x-xx xxxx
@@ -489,7 +490,7 @@ void Interpreter::addaxl(const UDSPInstruction opc)
   UpdateSR64Add(acc, acx, GetLongAcc(dreg));
 }
 
-// ADDI $amD, #I
+// ADDI $acD, #I
 // 0000 001d 0000 0000
 // iiii iiii iiii iiii
 // Adds immediate (16-bit sign extended) to mid accumulator $acD.hm.
diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
index a0185f73a2..e2627f0e85 100644
--- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
+++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp
@@ -220,11 +220,12 @@ void DSPEmitter::cmpaxh(const UDSPInstruction opc)
   }
 }
 
-// CMPI $amD, #I
+// CMPI $acD, #I
 // 0000 001d 1000 0000
 // iiii iiii iiii iiii
-// Compares mid accumulator $acD.hm ($amD) with sign extended immediate value I.
-// Although flags are being set regarding whole accumulator register.
+// Compares accumulator with immediate. Comparison is executed
+// by subtracting the immediate (16-bit sign extended) from mid accumulator
+// $acD.hm and computing flags based on whole accumulator $acD.
 //
 // flags out: x-xx xxxx
 void DSPEmitter::cmpi(const UDSPInstruction opc)
@@ -257,7 +258,7 @@ void DSPEmitter::cmpi(const UDSPInstruction opc)
 // CMPIS $acD, #I
 // 0000 011d iiii iiii
 // Compares accumulator with short immediate. Comparison is executed
-// by subtracting short immediate (8bit sign extended) from mid accumulator
+// by subtracting the short immediate (8-bit sign extended) from mid accumulator
 // $acD.hm and computing flags based on whole accumulator $acD.
 //
 // flags out: x-xx xxxx
@@ -686,7 +687,7 @@ void DSPEmitter::addaxl(const UDSPInstruction opc)
   }
 }
 
-// ADDI $amD, #I
+// ADDI $acD, #I
 // 0000 001d 0000 0000
 // iiii iiii iiii iiii
 // Adds immediate (16-bit sign extended) to mid accumulator $acD.hm.
diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index 97d4da1aea..5218829044 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -1222,7 +1222,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeBytefield}
 
   \begin{DSPOpcodeFormat}
-    ADDI $amD, #I
+    ADDI $acD, #I
   \end{DSPOpcodeFormat}
 
   \begin{DSPOpcodeDescription}
@@ -1958,12 +1958,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeBytefield}
 
   \begin{DSPOpcodeFormat}
-    CMPI $amD, #I
+    CMPI $acD, #I
   \end{DSPOpcodeFormat}
 
   \begin{DSPOpcodeDescription}
-    \item Compares mid accumulator \Register{\$acD.hm} (\Register{\$amD}) with sign-extended immediate value \Value{I}.
-          However, flags are set with regards to the whole accumulator register.
+    \item Compares accumulator with immediate. Comparison is performed by subtracting the immediate (16-bit sign-extended)
+          from mid accumulator \Register{\$acD.hm} and computing flags based on whole accumulator \Register{\$acD}.
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}

From 0dd181f46137985142319582da671b0436524599 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Mon, 30 May 2022 12:42:34 -0700
Subject: [PATCH 07/11] docs/DSP: Add missing dollar signs in shift instruction
 operation sections

---
 .../GameCube_DSP_Users_Manual.tex             | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index 5218829044..a0c1753728 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -1519,12 +1519,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
-    IF (ac1.m & 64)
-        IF (ac1.m & 63) != 0
-            $ac0 >>= (64 - (ac1.m & 63))
+    IF ($ac1.m & 64)
+        IF ($ac1.m & 63) != 0
+            $ac0 >>= (64 - ($ac1.m & 63))
         ENDIF
     ELSE
-        $ac0 <<= ac1.m
+        $ac0 <<= $ac1.m
     ENDIF
     FLAGS($ac0)
     $pc++
@@ -1547,12 +1547,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
-    IF (ac(1-D).m & 64)
-        IF (ac(1-D).m & 63) != 0
-            $acD >>= (64 - (ac(1-D).m & 63))
+    IF ($ac(1-D).m & 64)
+        IF ($ac(1-D).m & 63) != 0
+            $acD >>= (64 - ($ac(1-D).m & 63))
         ENDIF
     ELSE
-        $acD <<= ac(1-D).m
+        $acD <<= $ac(1-D).m
     ENDIF
     FLAGS($acD)
     $pc++
@@ -1579,12 +1579,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
-    IF (axS.h & 64)
-        IF (axS.h & 63) != 0
-            $acD >>= (64 - (axS.h & 63))
+    IF ($axS.h & 64)
+        IF ($axS.h & 63) != 0
+            $acD >>= (64 - ($axS.h & 63))
         ENDIF
     ELSE
-        $acD <<= axS.h
+        $acD <<= $axS.h
     ENDIF
     FLAGS($acD)
     $pc++
@@ -2676,12 +2676,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
-    IF (ac1.m & 64)
-        IF (ac1.m & 63) != 0
-            $ac0 >>= (64 - (ac1.m & 63))
+    IF ($ac1.m & 64)
+        IF ($ac1.m & 63) != 0
+            $ac0 >>= (64 - ($ac1.m & 63))
         ENDIF
     ELSE
-        $ac0 <<= ac1.m
+        $ac0 <<= $ac1.m
     ENDIF
     FLAGS($ac0)
     $pc++
@@ -2704,12 +2704,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
-    IF (ac(1-D).m & 64)
-        IF (ac(1-D).m & 63) != 0
-            $acD >>= (64 - (ac(1-D).m & 63))
+    IF ($ac(1-D).m & 64)
+        IF ($ac(1-D).m & 63) != 0
+            $acD >>= (64 - ($ac(1-D).m & 63))
         ENDIF
     ELSE
-        $acD <<= ac(1-D).m
+        $acD <<= $ac(1-D).m
     ENDIF
     FLAGS($acD)
     $pc++
@@ -2736,12 +2736,12 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
-    IF (axS.h & 64)
-        IF (axS.h & 63) != 0
-            $acD >>= (64 - (axS.h & 63))
+    IF ($axS.h & 64)
+        IF ($axS.h & 63) != 0
+            $acD >>= (64 - ($axS.h & 63))
         ENDIF
     ELSE
-        $acD <<= axS.h
+        $acD <<= $axS.h
     ENDIF
     FLAGS($acD)
     $pc++

From d297ab18e667bcab45a327b262308b4d61882aeb Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Wed, 1 Jun 2022 22:22:04 -0700
Subject: [PATCH 08/11] docs/DSP: Add notes highlighting the difference between
 'LS and 'SL

---
 .../GameCube_DSP_Users_Manual.tex             | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index a0c1753728..f7ab05677f 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -4670,6 +4670,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0++
     $ar3++
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'SL} in that \Register{\$(0x18+D)} is associated with \Register{\$ar0} instead of \Register{\$ar3} and \Register{\$acS.m} is associated with \Register{\$ar3} instead of \Register{\$ar0}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'LSM}
@@ -4693,6 +4697,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0++
     $ar3 += $ix3
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'SLM} in that \Register{\$(0x18+D)} is associated with \Register{\$ar0} instead of \Register{\$ar3} and \Register{\$acS.m} is associated with \Register{\$ar3} instead of \Register{\$ar0}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'LSNM}
@@ -4717,6 +4725,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0 += $ix0
     $ar3 += $ix3
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'SLNM} in that \Register{\$(0x18+D)} is associated with \Register{\$ar0} instead of \Register{\$ar3} and \Register{\$acS.m} is associated with \Register{\$ar3} instead of \Register{\$ar0}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'LSN}
@@ -4740,6 +4752,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0 += $ix0
     $ar3++
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'SLN} in that \Register{\$(0x18+D)} is associated with \Register{\$ar0} instead of \Register{\$ar3} and \Register{\$acS.m} is associated with \Register{\$ar3} instead of \Register{\$ar0}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'MV}
@@ -4837,6 +4853,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0++
     $ar3++
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'LS} in that \Register{\$(0x18+D)} is associated with \Register{\$ar3} instead of \Register{\$ar0} and \Register{\$acS.m} is associated with \Register{\$ar0} instead of \Register{\$ar3}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'SLM}
@@ -4860,6 +4880,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0++
     $ar3 += $ix3
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'LSM} in that \Register{\$(0x18+D)} is associated with \Register{\$ar3} instead of \Register{\$ar0} and \Register{\$acS.m} is associated with \Register{\$ar0} instead of \Register{\$ar3}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'SLNM}
@@ -4884,6 +4908,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0 += $ix0
     $ar3 += $ix3
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'LSNM} in that \Register{\$(0x18+D)} is associated with \Register{\$ar3} instead of \Register{\$ar0} and \Register{\$acS.m} is associated with \Register{\$ar0} instead of \Register{\$ar3}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'SLN}
@@ -4907,6 +4935,10 @@ When the main and extension opcodes write to the same register, the register is
     $ar0 += $ix0
     $ar3++
   \end{DSPOpcodeOperation}
+
+  \begin{DSPOpcodeNote}
+    \item Differs from \Opcode{'LSN} in that \Register{\$(0x18+D)} is associated with \Register{\$ar3} instead of \Register{\$ar0} and \Register{\$acS.m} is associated with \Register{\$ar0} instead of \Register{\$ar3}. In both cases, \Register{\$(0x18+D)} is loaded and \Register{\$acS.m} is stored.
+  \end{DSPOpcodeNote}
 \end{DSPOpcode}
 
 \begin{DSPOpcode}{'SN}

From 12d34eec9b3c4de21849424cc11d7589a803b40e Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Wed, 25 May 2022 18:47:49 -0700
Subject: [PATCH 09/11] DSPSpy: Add 40-bit mode tests

---
 Source/DSPSpy/tests/40bit_ins_test.ds | 202 ++++++++++++++++++++++++++
 Source/DSPSpy/tests/40bit_test.ds     | 164 +++++++++++++++++++++
 2 files changed, 366 insertions(+)
 create mode 100644 Source/DSPSpy/tests/40bit_ins_test.ds
 create mode 100644 Source/DSPSpy/tests/40bit_test.ds

diff --git a/Source/DSPSpy/tests/40bit_ins_test.ds b/Source/DSPSpy/tests/40bit_ins_test.ds
new file mode 100644
index 0000000000..9ed8ad5dbe
--- /dev/null
+++ b/Source/DSPSpy/tests/40bit_ins_test.ds
@@ -0,0 +1,202 @@
+; This test covers the behavior of 40-bit mode with various instructions.
+incdir  "tests"
+include "dsp_base.inc"
+
+positive_value: EQU #0x1234
+negative_value: EQU #0x9876
+
+negative_imem_value_addr:
+CW negative_value
+
+; DSPSpy doesn't pre-populating DMEM currently, so instead use these addresses to store values.
+positive_dmem_value_addr: EQU #0x100
+negative_dmem_value_addr: EQU #0x101
+readback_dmem_addr: EQU #0x102
+
+test_main:
+	LRI $ar0, #positive_dmem_value_addr
+	LRI $ar1, #negative_dmem_value_addr
+	LRI $ar2, #negative_imem_value_addr
+	LRI $ar3, #readback_dmem_addr
+	LRI $ix0, #0
+	LRI $ix1, #0
+	LRI $ix2, #0
+	LRI $ix3, #0
+
+	LRI $ax0.h, #positive_value
+	LRI $ax1.h, #negative_value
+
+	SR @positive_dmem_value_addr, $ax0.h
+	SR @negative_dmem_value_addr, $ax1.h
+
+	LRI $cr, #(positive_dmem_value_addr / 256)
+
+	SET40
+	; Instructions that perform sign-extension
+	; $acc0 should alternate between being positive and negative here
+	; (though none of these instructions update $sr)
+
+	; [1] ILRR (also ILRRD/ILRRI/ILRRN, not covered)
+	ILRR $ac0.m, @$ar2 ; -
+	CALL send_back
+	; [2] LR
+	LR $ac0.m, @positive_dmem_value_addr ; +
+	CALL send_back
+	; [3] LRI
+	LRI $ac0.m, #negative_value ; -
+	CALL send_back
+	; [4] LRIS
+	LRIS $ac0.m, #42 ; +
+	CALL send_back
+	; [5] LRR (also LRRD/LRRI/LRRN)
+	LRR $ac0.m, @$ar1 ; -
+	CALL send_back
+	; [6] LRS
+	LRS $ac0.m, @(positive_dmem_value_addr & 0xff) ; +
+	CALL send_back
+	; [7] MRR
+	MRR $ac0.m, $ax1.h ; -
+	CALL send_back
+	; [8] 'LN (and 'L, but 'LN lets us set $ix0 to not increment $ar0)
+	NX'LN : $ac0.m, @$ar0 ; +
+	CALL send_back
+
+	; Instructions that experience saturation
+	; $ax1.l should alternate between 0x8000 and 0x7fff.
+	LRI $ac0.m, #0x4231
+	LRI $ac0.h, #0x12 ; positive
+	LRI $ac1.m, #0x2816
+	LRI $ac1.h, #0x99 ; negative
+	; [9] MRR (again)
+	MRR $ax1.l, $ac1.m ; -
+	CALL send_back
+	; [10] SR
+	SR @readback_dmem_addr, $ac0.m
+	LR $ax1.l, @readback_dmem_addr ; +
+	CALL send_back
+	; [11] SRRN (also SRR/SRRD/SRRI)
+	SRRN @$ar3, $ac1.m
+	LR $ax1.l, @readback_dmem_addr ; -
+	CALL send_back
+	; [12] SRS
+	SRS @(readback_dmem_addr & 0xff), $ac0.m
+	LR $ax1.l, @readback_dmem_addr ; +
+	CALL send_back
+	; [13] 'LSNM (also 'LS/'LSM/'LSN) - the $ax0.l read is not relevant
+	NX'LSNM : $ax0.l, $ac1.m
+	LR $ax1.l, @readback_dmem_addr ; -
+	CALL send_back
+	; [14] 'MV
+	NX'MV : $ax1.l, $ac0.m ; +
+	CALL send_back
+	; [15] 'SLNM (also 'SL/'SLM/'SLN) - the $ax0.l read is not relevant
+	; Note that 'SL stores to @$ar0, while 'LS stores to @$ar3
+	LRI $ar0, #readback_dmem_addr
+	NX'SLNM : $ac1.m, $ax0.l
+	LR $ax1.l, @readback_dmem_addr ; -
+	CALL send_back
+	LRI $ar0, #positive_dmem_value_addr
+	; [16] 'SN (also 'S)
+	NX'SN : @$ar3, $ac0.m
+	LR $ax1.l, @readback_dmem_addr ; +
+	CALL send_back
+
+	; Instructions that are not affected
+	; [17] ADDI
+	ADDI $ac0.m, #8
+	CALL send_back
+	; [18] ADDIS
+	ADDIS $ac0.m, #-8
+	CALL send_back
+	; [19] ANDC
+	ANDC $ac1.m, $ac0.m
+	CALL send_back
+	; [20] ANDI
+	ANDI $ac0.m, #0x6666
+	CALL send_back
+	; [21] ANDR
+	ANDR $ac0.m, $ax0.h
+	CALL send_back
+	; [22] ORC
+	ORC $ac0.m, $ac1.m
+	CALL send_back
+	; [23] ORI
+	ORI $ac0.m, #0xfeed
+	CALL send_back
+	; [24] ORR
+	ORR $ac1.m, $ax0.h
+	CALL send_back
+	; [25] NOT
+	NOT $ac1.m
+	CALL send_back
+	; [26] XORC
+	XORC $ac0.m, $ac1.m
+	CALL send_back
+	; [27] XORI
+	XORI $ac0.m, #0x5555
+	CALL send_back
+	; [28] XORR
+	XORR $ac1.m, $ax1.h
+	CALL send_back
+
+	; [29] MOVR always sign extends...
+	MOVR $acc1, $ax0.h
+	CALL send_back
+	; [30] ... even in SET16 mode
+	SET16
+	MOVR $acc1, $ax1.h
+	CALL send_back
+	SET40
+
+	; Shift instructions - do these see saturated $ac1.m?
+	LRI $ac0.m, #positive_value
+	LRI $ac1.m, #2
+	LRI $ac1.h, #1
+	; [31] - for diffs only
+	CALL send_back
+	; [32]
+	LSRNR $acc0, $ac1.m
+	CALL send_back
+	; [33] Shifts $acc0 by $ac1.m (in the other direction)
+	LSRN
+	CALL send_back
+
+	; Does LOOP experience saturation?
+	CLR $acc0
+	LRI $ac1.m, #0x1234
+	LRI $ac1.h, #1
+	; [34] - for diffs only
+	CALL send_back
+	; [35] LOOP
+	LOOP $ac1.m
+	INC $acc0
+	CALL send_back
+	LRI $ac1.h, #0x99
+	; [36] BLOOP
+	BLOOP $ac1.m, bloop_last_ins
+	INCM $ac0.m
+bloop_last_ins:
+	NOP
+	CALL send_back
+
+	; For the sake of clarity, the same LOOP/BLOOP calls in SET16 mode don't have saturation:
+	SET16
+	CLR $acc0
+	LRI $ac1.m, #0x1234
+	LRI $ac1.h, #1
+	; [37] - for diffs only
+	CALL send_back
+	; [38] LOOP
+	LOOP $ac1.m
+	INC $acc0
+	CALL send_back
+	LRI $ac1.h, #0x99
+	; [39] BLOOP
+	BLOOP $ac1.m, bloop2_last_ins
+	INCM $ac0.m
+bloop2_last_ins:
+	NOP
+	CALL send_back
+
+	; We're done, DO NOT DELETE THIS LINE
+	JMP end_of_test
diff --git a/Source/DSPSpy/tests/40bit_test.ds b/Source/DSPSpy/tests/40bit_test.ds
new file mode 100644
index 0000000000..a2f2b24a5f
--- /dev/null
+++ b/Source/DSPSpy/tests/40bit_test.ds
@@ -0,0 +1,164 @@
+; This test covers the behavior of 40-bit mode for a variety of values.
+; It takes a while to run completely (~5 minutes), but progress is indicated via mail shown at the
+; top of the screen in DSPSpy.  The value will go from 80000000 to 8041ffff.
+incdir  "tests"
+include "dsp_base.inc"
+
+
+
+test_main:
+	LRI $ar0, #0
+	LRI $ar1, #0
+	LRI $ar2, #0
+	LRI $ar3, #0
+	LRI $ix0, #0
+	LRI $ix1, #0
+	LRI $ix2, #0
+	LRI $ix3, #0
+
+	; Test with $ac0.l from 0xfff0 to 0x0010
+	LRI $ac0.l, #0xfff0
+BLOOPI #0x21, first_loop_last_ins
+	CALL test_saturation
+	IAR $ar0
+first_loop_last_ins:
+	INC $acc0
+
+	; Test with $ac0.l from 0x7ff0 to 0x8010
+	LRI $ac0.l, #0xfff0
+BLOOPI #0x21, second_loop_last_ins
+	CALL test_saturation
+	IAR $ar0
+second_loop_last_ins:
+	INC $acc0
+
+	; We're done.  Report the test results.
+	; $ix1 should be 0, or else saturation occurred on $ac0.l or $ac0.h.
+	; $ix2 should be 0, or else sign-extension occurred on $ac0.l or $ac0.h.
+	; $ix3 should be 0, or else we incorrectly predicted saturation on $ac0.m.
+	; $ar1/$ar2/$ar3 records the number of times it happened
+	CALL send_back
+
+	; We're done, DO NOT DELETE THIS LINE
+	JMP end_of_test
+
+
+
+test_saturation:
+	; We start with $ac0.h at -0x80 since we can use the overflow flag to check when wrapping around
+	; occurs; starting at 0 and ending when it wraps back to 0 doesn't work since we can't check the
+	; zero flag since $ac0.l may be nonzero ($ac0.l is used as an input to this subroutine)
+	LRI $ac0.m, #0
+	LRI $ac0.h, #-0x80
+
+loop_start:
+	; Compare the value of $ac0.m when in SET16 mode and in SET40 mode
+	SET40
+	; Reading $ac0.m in SET40 mode results in saturation if $ac0.h doesn't match the sign-extension
+	; of $ac0.h. Also, storing to $ac1.m in SET40 mode clears $ac1.l and sets $ac1.h to the
+	; sign-extension of $ac1.m, and $ac1.l.
+	MRR $ac1.m, $ac0.m
+	SET16
+	; Attempt to compute the saturated value of $ac1.m in $ax1.h,
+	; using what we know of $acc0.
+	TST'MV $acc0 : $ax1.h, $ac0.m
+	JL negative_acc0
+	; $acc0 is nonnegative.
+	JMPx8 check_saturated_ax1h ; If the above s32 bit is not set, we don't need to saturate
+	; If the above s32 bit _is_ set, then saturate $ax1.h.
+	LRI $ax1.h, #0x7fff
+	JMP check_saturated_ax1h
+
+negative_acc0:
+	JMPx8 check_saturated_ax1h ; If the above s32 bit is not set, we don't need to saturate
+	LRI $ax1.h, #0x8000
+	; Fall through to check_saturated_ax1h
+
+check_saturated_ax1h:
+	; $acc1 has the value of $ac0.m in SET40 mode.
+	; And, $ax1.h has what we computed that value should be, and CMPAXH always sign-extends $ax1.h
+	; (and ignores $ax1.l), so we can compare using it directly.
+	CMPAXH $acc1, $ax1.h
+	JZ check_read_low
+	; Our prediction was wrong (shouldn't happen)
+	LRI $ix3, #1
+	IAR $ar3
+	TST $acc0
+	CALL send_back
+	; Fall through to check_read_low
+
+check_read_low:
+	SET40
+	MRR $ac1.m, $ac0.l
+	SET16
+	MRR $ax1.h, $ac0.l
+	CMPAXH $acc1, $ax1.h
+	JZ check_read_high
+	; Reading $ac0.l gave different results in SET40 and SET16 modes (shouldn't happen)
+	LRI $ix1, #1
+	IAR $ar1
+	TST $acc0
+	CALL send_back
+	; Fall through to check_read_high
+
+check_read_high:
+	SET40
+	MRR $ac1.m, $ac0.h
+	SET16
+	MRR $ax1.h, $ac0.h
+	CMPAXH $acc1, $ax1.h
+	JZ check_write_low
+	; Reading $ac0.h gave different results in SET40 and SET16 modes (shouldn't happen)
+	LRI $ix1, #1
+	IAR $ar1
+	TST $acc0
+	CALL send_back
+	; Fall through to check_write_low
+
+check_write_low:
+	MOV $acc1, $acc0
+	SET40
+	MRR $ac1.l, $ac0.l
+	SET16
+	CMP
+	JZ check_write_high
+	; Writing to $ac1.l caused $acc1 to not match $acc0 (shouldn't happen)
+	LRI $ix2, #1
+	IAR $ar2
+	CALL send_back
+	; Fall through to check_write_high
+
+check_write_high:
+	MOV $acc1, $acc0
+	SET40
+	MRR $ac1.h, $ac0.h
+	SET16
+	CMP
+	JZ increment_loop
+	; Writing to $ac1.h caused $acc1 to not match $acc0 (shouldn't happen)
+	LRI $ix2, #1
+	IAR $ar2
+	CALL send_back
+	; Fall through to increment_loop
+
+increment_loop:
+	INCM $ac0.m
+	; If incrementing results in overflowing, then we're done.
+	RETO
+
+	; If ($ac0.m & 0x00ff) != 0, continue the loop without sending mail.
+	ANDF $ac0.m, #0x00ff
+	JLNZ loop_start
+	; Otherwise, send mail to report the progress. (This shows at the top of the screen in DSPSpy,
+	; but otherwise isn't handled in any meaningful way.)
+	MOV $acc1, $acc0
+	LSR $acc1, #-8
+	; Compensate for starting at INT_MIN (0x80'0000'0000) and ending at INT_MAX (0x7f'0000'0000)
+	; instead of going from 0 (0x00'0000'0000) to -1 (0xff'ffff'ffff)
+	XORI $ac1.m, #0x8000
+
+	SR @DMBH, $ar0
+	SR @DMBL, $ac1.m
+	SI @DIRQ, #0x0001
+	; We don't wait for the mail to be read, because we don't care about the response.
+	JMP loop_start

From 24a339f4377a01ab7070a563fbe735e6ac6174c8 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 18:16:31 -0700
Subject: [PATCH 10/11] docs/DSP: Explain 16-bit and 40-bit modes

---
 .../GameCube_DSP_Users_Manual.tex             | 84 ++++++++++++++-----
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index f7ab05677f..e82e25aecc 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -505,7 +505,7 @@ The high parts of the 40-bit accumulators (\Register{acX.h}) are sign-extended 8
 and the upper 8 bits read the same as the 7th bit.  For instance, \Value{0x007F} reads back as \Value{0x007F}, but \Value{0x0080} reads
 back as \Value{0xFF80}.
 
-\textbf{Accumulators \Register{\$acX}:}
+\subsection{Accumulators \Register{\$acX}}
 
 40-bit accumulator \Register{\$acX} (\Register{\$acX.hml}) consists of registers:
 
@@ -513,7 +513,7 @@ back as \Value{0xFF80}.
 $acX = $acX.h << 32 | $acX.m << 16 | $acX.l
 \end{lstlisting}
 
-\textbf{Short accumulators \Register{\$acs.X}:}
+\subsection{Short accumulators \Register{\$acsX}}
 
 24-bit accumulator \Register{\$acsX} (\Register{\$acX.hm}) consists of the upper 24 bits of accumulator \Register{\$acX}.
 
@@ -521,12 +521,30 @@ $acX = $acX.h << 32 | $acX.m << 16 | $acX.l
 $acsX = $acX.h << 16 | $acX.m
 \end{lstlisting}
 
-\textbf{Additional accumulators \Register{\$axX}:}
+\subsection{Additional accumulators \Register{\$axX}}
+
+32-bit accumulators \Register{\$axX} (\Register{\$axX.hl}) consist of registers:
 
 \begin{lstlisting}[language=C++]
 $axX = $axX.h << 16 | $axX.l
 \end{lstlisting}
 
+\subsection{16-bit and 40-bit modes}\label{subsec:SET40}
+
+Depending on the value of \RegisterField{\$sr.SXM} (bit 14), loading to \Register{\$acX.m} may also update \Register{\$acX.h} and \Register{\$acX.l}, and stores from \Register{\$acX.m} may experience saturation based on \Register{\$acX.h}. Regardless of the value of \RegisterField{\$sr.SXM}, arithmetic operations such as \Opcode{ADDI}, \Opcode{INCM}, \Opcode{MOVR}, and \Opcode{LSRN} will still affect the entire accumulator.
+
+If \RegisterField{\$sr.SXM} is set to 0, then 16-bit mode (\Opcode{SET16}) is in use. Loads to \Register{\$acX.m} will only change \Register{\$acX.m}, and storing \Register{\$acX.m} will use the value directly contained in \Register{\$acX.m}; the same applies to loads to and stores from \Register{\$acX.h} or \Register{\$acX.l} or any other register.
+
+If \RegisterField{\$sr.SXM} is set to 1, then 40-bit mode (\Opcode{SET40}) is in use. Loads to \Register{\$acX.m} will set \Register{\$acX.l} to 0 and will sign-extend into \Register{\$acX.h} (setting it to \Value{0xFF} if the sign bit is set (\InlineExpression{\$acX.m \& 0x8000 != 0}), and to 0 otherwise).  This means that in 40-bit mode, loads to \Register{\$acX.m} are effectively loads to the whole accumulator \Register{\$acX}. Loads to \Register{\$acX.h} and \Register{\$acX.l} do not have this special behavior; they only modify the specified register (as in 16-bit mode).
+
+Additionally, if \RegisterField{\$sr.SXM} is set to 1, then moving or storing from \Register{\$acX.m} may instead result in \Value{0x7fff} or \Value{0x8000} being used. This happens if \Register{\$acX.hml} is not the same as sign-extending \Register{\$acX.ml}; \Value{0x7fff} is used if \Register{\$acX} is positive and \Value{0x8000} is used if \Register{\$acX} is negative.
+
+The conditions for this saturation are the same as the conditions for \RegisterField{\$sr.AS} (bit 4, above s32) to be set when flags are updated. (This does not mean that the saturation happens if and only if \RegisterField{\$sr.AS} is set, as the flags might have been set after an operation on a different register.)
+
+The following instructions perform sign-extension when writing to \Register{\$acX.m}: \Opcode{ILRR}, \Opcode{ILRRD}, \Opcode{ILRRI}, and \Opcode{ILRRN}; \Opcode{LR}; \Opcode{LRI}; \Opcode{LRIS}; \Opcode{LRR}, \Opcode{LRRD}, \Opcode{LRRI}, and \Opcode{LRRN}; \Opcode{LRS}; \Opcode{MRR}; and \Opcode{'L} and \Opcode{'LN}.
+
+The following instructions experience saturation when reading from \Register{\$acX.m}: \Opcode{BLOOP}; \Opcode{LOOP}; \Opcode{MRR}; \Opcode{SR}; \Opcode{SRR}, \Opcode{SRRD}, \Opcode{SRRI}, and \Opcode{SRRN}; \Opcode{SRS}; \Opcode{'LS}, \Opcode{'LSM}, \Opcode{'LSM}, and \Opcode{'LSNM}; \Opcode{'MV}; \Opcode{'SL}, \Opcode{'SLM}, \Opcode{'SLN}, and \Opcode{'SLNM}; and \Opcode{'S} and \Opcode{'SN}.
+
 \pagebreak{}
 
 \section{Stacks}
@@ -569,8 +587,8 @@ Furthermore, it also contains control bits to configure the flow of certain oper
 \begin{tabular}{|l|l|l|}
 \hline
 \textbf{Bit} & \textbf{Name} & \textbf{Comment}                                              \\ \hline
-\texttt{15}  & \texttt{SU}   & Operands are signed (1 = unsigned)                            \\ \hline
-\texttt{14}  & \texttt{SXM}  & Sign extension mode (0 = \texttt{set16}, 1 = \texttt{set40})  \\ \hline
+\texttt{15}  & \texttt{SU}   & Multiplication operands are signed (1 = unsigned)             \\ \hline
+\texttt{14}  & \texttt{SXM}  & Sign extension mode (1 = 40-bit, see \nameref{subsec:SET40})  \\ \hline
 \texttt{13}  & \texttt{AM}   & Product multiply result by 2 (when \texttt{AM = 0})           \\ \hline
 \texttt{12}  &               &                                                               \\ \hline
 \texttt{11}  & \texttt{EIE}  & External interrupt enable                                     \\ \hline
@@ -1634,6 +1652,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
           \Register{\$R} reaches zero. Block ends at specified address \Address{addrA} inclusive. i.e. opcode at \Address{addrA} is the last
           opcode included in loop. Counter is pushed on loop stack \Register{\$st3}, end of block address is pushed on loop stack
           \Register{\$st2} and the repeat address is pushed on call stack \Register{\$st0}. Up to 4 nested loops are allowed.
+    \item When using \Register{\$ac0.m} or \Register{\$ac1.m} as the initial counter value, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2138,6 +2157,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Move value from instruction memory pointed by addressing register \Register{\$arS}
           to mid accumulator register \Register{\$acD.m}.
+    \item Optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2160,6 +2180,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Move value from instruction memory pointed by addressing register \Register{\$arS}
           to mid accumulator register \Register{\$acD.m}. Decrement addressing register \Register{\$arS}.
+    \item Optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2183,6 +2204,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Move value from instruction memory pointed by addressing register \Register{\$arS} to
           mid accumulator register \Register{\$acD.m}. Increment addressing register \Register{\$arS}.
+    \item Optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2207,6 +2229,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
     \item Move value from instruction memory pointed by addressing register \Register{\$arS}
           to mid accumulator register \Register{\$acD.m}. Add corresponding indexing register
           \Register{\$ixS} to addressing register \Register{\$arS}.
+    \item Optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2367,6 +2390,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
     \item Repeatedly execute the following opcode until the counter specified by the value from register \Register{\$R} reaches zero.
           Each execution decrements the counter. Register \Register{\$R} remains unchanged. If register \Register{\$R} is set to zero at the
           beginning of loop then the looped instruction will not get executed.
+    \item When using \Register{\$ac0.m} or \Register{\$ac1.m} as the initial counter value, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2418,7 +2442,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Move value from data memory pointed by address \Address{M} to register \Register{\$D}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2441,7 +2465,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Load immediate value \Value{I} to register \Register{\$D}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2463,7 +2487,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Load immediate value \Value{I} (8-bit sign-extended) to accumulator register \Register{\$(0x18+D)}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2485,7 +2509,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Move value from data memory pointed by addressing register \Register{\$arS} to register \Register{\$D}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2508,7 +2532,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Move value from data memory pointed by addressing register \Register{\$arS} to register \Register{\$D}.
           Decrements register \Register{\$arS}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2532,7 +2556,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Move value from data memory pointed by addressing register \Register{\$arS} to register \Register{\$D}.
           Increments register \Register{\$arS}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2556,7 +2580,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Move value from data memory pointed by addressing register \Register{\$arS} to register \Register{\$D}.
           Add indexing register \Register{\$ixS} to register \Register{\$arS}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -2579,7 +2603,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Move value from data memory pointed by address \Address{(\$cr << 8) | M} to register \Register{\$(0x18+D)}.
-          Perform an additional operation depending on destination register.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -3049,7 +3073,8 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Move value from register \Register{\$S} to register \Register{\$D}.
-          Perform an additional operation depending on destination register.
+    \item When moving to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
+    \item When moving from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -3861,6 +3886,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeSeeAlso}
     \item \Opcode{SET40}
+    \item \nameref{subsec:SET40}
   \end{DSPOpcodeSeeAlso}
 
   \DSPOpcodeFlagsUnchanged
@@ -3886,6 +3912,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeSeeAlso}
     \item \Opcode{SET16}
+    \item \nameref{subsec:SET40}
   \end{DSPOpcodeSeeAlso}
 
   \DSPOpcodeFlagsUnchanged
@@ -3929,7 +3956,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Store value from register \Register{\$S} to a memory pointed by address \Address{M}.
-          Perform an additional operation depending on destination register.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -3951,7 +3978,8 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Store value from source register \Register{\$S} to a memory location pointed by addressing
-          register \Register{\$arD}. Perform an additional operation depending on source register.
+          register \Register{\$arD}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -3973,7 +4001,8 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Store value from source register \Register{\$S} to a memory location pointed by addressing
-          register \Register{\$arD}. Decrement register \Register{\$arD}. Perform an additional operation depending on source register.
+          register \Register{\$arD}. Decrement register \Register{\$arD}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -3996,7 +4025,8 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Store value from source register \Register{\$S} to a memory location pointed by addressing
-          register \Register{\$arD}. Increment register \Register{\$arD}. Perform an additional operation depending on source register.
+          register \Register{\$arD}. Increment register \Register{\$arD}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4020,7 +4050,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
   \begin{DSPOpcodeDescription}
     \item Store value from source register \Register{\$S} to a memory location pointed by addressing
           register \Register{\$arD}. Add indexing register \Register{\$ixD} to register \Register{\$arD}.
-          Perform an additional operation depending on source register.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4043,7 +4073,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Store value from register \Register{\$(0x1C+S)} to a memory pointed by address \Address{(\$cr << 8) | M}.
-          Perform an additional operation depending on destination register.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4070,7 +4100,6 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
 
   \begin{DSPOpcodeDescription}
     \item Store value from register \Register{\$acS.h} to a memory pointed by address \Address{(\$cr << 8) | M}.
-          Perform an additional operation depending on destination register.
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4403,6 +4432,7 @@ When the main and extension opcodes write to the same register, the register is
   \begin{DSPOpcodeDescription}
     \item Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$arS}.
           Post increment register \Register{\$arS}.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4423,6 +4453,7 @@ When the main and extension opcodes write to the same register, the register is
   \begin{DSPOpcodeDescription}
     \item Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$arS}.
           Add indexing register \Register{\$ixS} to register \Register{\$arS}.
+    \item When loading to \Register{\$ac0.m} or \Register{\$ac1.m}, optionally perform sign extension depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4662,6 +4693,7 @@ When the main and extension opcodes write to the same register, the register is
     \item Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar0}.
           Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar3}.
           Increment both \Register{\$ar0} and \Register{\$ar3}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4689,6 +4721,7 @@ When the main and extension opcodes write to the same register, the register is
     \item Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar0}.
           Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar3}.
           Add corresponding indexing register \Register{\$ix3} to addressing register \Register{\$ar3} and increment \Register{\$ar0}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4717,6 +4750,7 @@ When the main and extension opcodes write to the same register, the register is
           Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar3}.
           Add corresponding indexing register \Register{\$ix0} to addressing register \Register{\$ar0} and add corresponding
           indexing register \Register{\$ix3} to addressing register \Register{\$ar3}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4744,6 +4778,7 @@ When the main and extension opcodes write to the same register, the register is
     \item Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar0}.
           Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar3}.
           Add corresponding indexing register \Register{\$ix0} to addressing register \Register{\$ar0} and increment \Register{\$ar3}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4769,6 +4804,7 @@ When the main and extension opcodes write to the same register, the register is
 
   \begin{DSPOpcodeDescription}
     \item Move value of register \Register{\$(0x1c+S)} to the register \Register{\$(0x18+D)}.
+    \item When moving from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4824,6 +4860,7 @@ When the main and extension opcodes write to the same register, the register is
   \begin{DSPOpcodeDescription}
     \item Store value of register \Register{\$(0x1c+S)} in the memory pointed by register \Register{\$arD}.
           Post increment register \Register{\$arD}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4845,6 +4882,7 @@ When the main and extension opcodes write to the same register, the register is
     \item Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar0}.
           Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar3}.
           Increment both \Register{\$ar0} and \Register{\$ar3}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4872,6 +4910,7 @@ When the main and extension opcodes write to the same register, the register is
     \item Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar0}.
           Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar3}.
           Add corresponding indexing register \Register{\$ix3} to addressing register \Register{\$ar3} and increment \Register{\$ar0}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4900,6 +4939,7 @@ When the main and extension opcodes write to the same register, the register is
           Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar3}.
           Add corresponding indexing register \Register{\$ix0} to addressing register \Register{\$ar0} and add corresponding
           indexing register \Register{\$ix3} to addressing register \Register{\$ar3}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4927,6 +4967,7 @@ When the main and extension opcodes write to the same register, the register is
     \item Store value from register \Register{\$acS.m} to memory location pointed by register \Register{\$ar0}.
           Load register \Register{\$(0x18+D)} with value from memory pointed by register \Register{\$ar3}.
           Add corresponding indexing register \Register{\$ix0} to addressing register \Register{\$ar0} and increment \Register{\$ar3}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}
@@ -4953,6 +4994,7 @@ When the main and extension opcodes write to the same register, the register is
   \begin{DSPOpcodeDescription}
     \item Store value of register \Register{\$(0x1c+S)} in the memory pointed by register \Register{\$arD}.
           Add indexing register \Register{\$ixD} to register \Register{\$arD}.
+    \item When storing from \Register{\$ac0.m} or \Register{\$ac1.m}, optionally apply saturation depending on the value of \RegisterField{\$sr.SXM} (see \nameref{subsec:SET40}).
   \end{DSPOpcodeDescription}
 
   \begin{DSPOpcodeOperation}

From f47dfc3dba8d4f50aac9d198074ab86523489106 Mon Sep 17 00:00:00 2001
From: Pokechu22 <Pokechu022@gmail.com>
Date: Sun, 22 May 2022 21:03:01 -0700
Subject: [PATCH 11/11] docs/DSP: Update version and history

The GFDL requires the history section to be updated.
---
 .../GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
index e82e25aecc..fc352182f5 100644
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@@ -46,7 +46,7 @@
 % Document front page material
 \title{\textbf{\Huge GameCube DSP User's Manual}}
 \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
-\date{\today\\v0.1.3}
+\date{\today\\v0.1.4}
 
 % Title formatting commands
 \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@@ -261,6 +261,7 @@ The purpose of this documentation is purely academic and it aims at understandin
 0.1.1            & 2022.05.14    & xperia64        & Added tested DSP bootloading transfer size                                               \\ \hline
 0.1.2            & 2022.05.21    & Pokechu22       & Fixed ``ILLR'' typo in Instruction Memory section                                        \\ \hline
 0.1.3            & 2022.05.27    & Pokechu22       & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH}                                    \\ \hline
+0.1.4            & 2022.06.02    & Pokechu22       & Fixed typos; added sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register. \\ \hline
 \end{tabular}
 \end{table}