[Soft-Float] - Implements fully accurate Mul operations.

Fixes : https://github.com/PCSX2/pcsx2/issues/5169 All the credits belongs to TellowKrinkle from the PCSX2 team. Also removes a useless rounding towards zero in DoAdd.
2024-11-21 20:45:02 +01:00 · 2024-11-21 20:45:02 +01:00 · 98e3df3cb9
parent d5e50284ff
commit 98e3df3cb9
6 changed files with 132 additions and 109 deletions
--- a/pcsx2/BoothMultiplier.cpp
+++ b/pcsx2/BoothMultiplier.cpp
@ -0,0 +1,64 @@
+// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
+// SPDX-License-Identifier: GPL-3.0+
+
+#include <cstdio>
+#include <cstdint>
+#include <cstring>
+#include <cerrno>
+#include <bit>
+#include "BoothMultiplier.h"
+
+BoothMultiplier::BoothRecode BoothMultiplier::Booth(uint32_t a, uint32_t b, uint32_t bit)
+{
+	uint32_t test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7;
+	a <<= (bit * 2);
+	a += (test == 3 || test == 4) ? a : 0;
+	uint32_t neg = (test >= 4 && test <= 6) ? ~0u : 0;
+	uint32_t pos = 1 << (bit * 2);
+	a ^= (neg & -pos);
+	a &= (test >= 1 && test <= 6) ? ~0u : 0;
+	return {a, neg & pos};
+}
+
+BoothMultiplier::AddResult BoothMultiplier::Add3(uint32_t a, uint32_t b, uint32_t c)
+{
+	uint32_t u = a ^ b;
+	return {u ^ c, ((u & c) | (a & b)) << 1};
+}
+
+uint64_t BoothMultiplier::MulMantissa(uint32_t a, uint32_t b)
+{
+	uint64_t full = static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
+	BoothRecode b0 = Booth(a, b, 0);
+	BoothRecode b1 = Booth(a, b, 1);
+	BoothRecode b2 = Booth(a, b, 2);
+	BoothRecode b3 = Booth(a, b, 3);
+	BoothRecode b4 = Booth(a, b, 4);
+	BoothRecode b5 = Booth(a, b, 5);
+	BoothRecode b6 = Booth(a, b, 6);
+	BoothRecode b7 = Booth(a, b, 7);
+
+	// First cycle
+	AddResult t0 = Add3(b1.data, b2.data, b3.data);
+	AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data);
+	// A few adds get skipped, squeeze them back in
+	t1.hi |= b6.negate | (b5.data & 0x800);
+	b7.data |= (b5.data & 0x400) + b5.negate;
+
+	// Second cycle
+	AddResult t2 = Add3(b0.data, t0.lo, t0.hi);
+	AddResult t3 = Add3(b7.data, t1.lo, t1.hi);
+
+	// Third cycle
+	AddResult t4 = Add3(t2.hi, t3.lo, t3.hi);
+
+	// Fourth cycle
+	AddResult t5 = Add3(t2.lo, t4.lo, t4.hi);
+
+	// Discard bits and sum
+	t5.hi += b7.negate;
+	t5.lo &= ~0x7fffu;
+	t5.hi &= ~0x7fffu;
+	uint32_t ps2lo = t5.lo + t5.hi;
+	return full - ((ps2lo ^ full) & 0x8000);
+}
--- a/pcsx2/BoothMultiplier.h
+++ b/pcsx2/BoothMultiplier.h
@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
+// SPDX-License-Identifier: GPL-3.0+
+
+#pragma once
+
+class BoothMultiplier
+{
+public:
+
+	static uint64_t MulMantissa(uint32_t a, uint32_t b);
+
+protected:
+
+private:
+	struct BoothRecode
+	{
+		uint32_t data;
+		uint32_t negate;
+	};
+
+	struct AddResult
+	{
+		uint32_t lo;
+		uint32_t hi;
+	};
+
+	static BoothRecode Booth(uint32_t a, uint32_t b, uint32_t bit);
+
+	static AddResult Add3(uint32_t a, uint32_t b, uint32_t c);
+};
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@ -93,6 +93,7 @@ set(pcsx2Sources
 	MTGS.cpp
 	MTVU.cpp
 	Patch.cpp
+	BoothMultiplier.cpp
 	Ps2Float.cpp
 	Pcsx2Config.cpp
 	PerformanceMetrics.cpp
@ -174,6 +175,7 @@ set(pcsx2Headers
 	MTVU.h
 	Memory.h
 	MemoryTypes.h
+	BoothMultiplier.h
 	Ps2Float.h
 	Patch.h
 	PerformanceMetrics.h
--- a/pcsx2/Ps2Float.cpp
+++ b/pcsx2/Ps2Float.cpp
@ -9,6 +9,7 @@
 #include <iostream>
 #include <bit>
 #include "Ps2Float.h"
+#include "BoothMultiplier.h"
 #include "Common.h"

 const uint8_t Ps2Float::BIAS = 127;
@ -77,26 +78,26 @@ Ps2Float Ps2Float::Add(Ps2Float addend)
 	int32_t temp = 0;

 	//exponent difference
-	int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff);
+	int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF);

 	//diff = 25 .. 255 , expt < expd
 	if (exp_diff >= 25)
 	{
-		b = b & Ps2Float::SIGNMASK;
+		b = b & SIGNMASK;
 	}

 	//diff = 1 .. 24, expt < expd
 	else if (exp_diff > 0)
 	{
 		exp_diff = exp_diff - 1;
-		temp = 0xffffffff << exp_diff;
+		temp = MIN_FLOATING_POINT_VALUE << exp_diff;
 		b = temp & b;
 	}

 	//diff = -255 .. -25, expd < expt
 	else if (exp_diff <= -25)
 	{
-		a = a & Ps2Float::SIGNMASK;
+		a = a & SIGNMASK;
 	}

 	//diff = -24 .. -1 , expd < expt
@ -104,7 +105,7 @@ Ps2Float Ps2Float::Add(Ps2Float addend)
 	{
 		exp_diff = -exp_diff;
 		exp_diff = exp_diff - 1;
-		temp = 0xffffffff << exp_diff;
+		temp = MIN_FLOATING_POINT_VALUE << exp_diff;
 		a = a & temp;
 	}

@ -124,26 +125,26 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend)
 	int32_t temp = 0;

 	//exponent difference
-	int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff);
+	int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF);

 	//diff = 25 .. 255 , expt < expd
 	if (exp_diff >= 25)
 	{
-		b = b & Ps2Float::SIGNMASK;
+		b = b & SIGNMASK;
 	}

 	//diff = 1 .. 24, expt < expd
 	else if (exp_diff > 0)
 	{
 		exp_diff = exp_diff - 1;
-		temp = 0xffffffff << exp_diff;
+		temp = MIN_FLOATING_POINT_VALUE << exp_diff;
 		b = temp & b;
 	}

 	//diff = -255 .. -25, expd < expt
 	else if (exp_diff <= -25)
 	{
-		a = a & Ps2Float::SIGNMASK;
+		a = a & SIGNMASK;
 	}

 	//diff = -24 .. -1 , expd < expt
@ -151,7 +152,7 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend)
 	{
 		exp_diff = -exp_diff;
 		exp_diff = exp_diff - 1;
-		temp = 0xffffffff << exp_diff;
+		temp = MIN_FLOATING_POINT_VALUE << exp_diff;
 		a = a & temp;
 	}

@ -215,7 +216,7 @@ Ps2Float Ps2Float::Sqrt()
 	/* extract mantissa and unbias exponent */
 	int32_t m = (ix >> 23) - BIAS;

-	ix = (ix & 0x007fffff) | 0x00800000;
+	ix = (ix & 0x007FFFFF) | 0x00800000;
 	if ((m & 1) == 1)
 	{
 		/* odd m, double x to make it even */
@ -247,7 +248,7 @@ Ps2Float Ps2Float::Sqrt()
 		q += q & 1;
 	}

-	ix = (q >> 1) + 0x3f000000;
+	ix = (q >> 1) + 0x3F000000;
 	ix += m << 23;

 	return Ps2Float((uint32_t)(ix));
@ -397,114 +398,32 @@ Ps2Float Ps2Float::DoAdd(Ps2Float other)
 	else if (rawExp <= 0)
 		return Ps2Float(man < 0, 0, 0);

-	return Ps2Float((uint32_t)man & Ps2Float::SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)).RoundTowardsZero();
+	return Ps2Float((uint32_t)man & SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF));
 }

 Ps2Float Ps2Float::DoMul(Ps2Float other)
 {
+	uint8_t selfExponent = Exponent;
+	uint8_t otherExponent = other.Exponent;
 	uint32_t selfMantissa = Mantissa | 0x800000;
 	uint32_t otherMantissa = other.Mantissa | 0x800000;
-	int32_t resExponent = Exponent + other.Exponent - BIAS;
+	uint32_t sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK;

-	Ps2Float result = Ps2Float(0);
+	int32_t resExponent = selfExponent + otherExponent - 127;
+	uint32_t resMantissa = (uint32_t)(BoothMultiplier::MulMantissa(selfMantissa, otherMantissa) >> 23);

-	result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other);
+	if (resMantissa > 0xFFFFFF)
+	{
+		resMantissa >>= 1;
+		resExponent++;
+	}

 	if (resExponent > 255)
-		return result.Sign ? Min() : Max();
-	else if (resExponent < 0)
-		return Ps2Float(result.Sign, 0, 0);
+		return Ps2Float(sign | MAX_FLOATING_POINT_VALUE);
+	else if (resExponent <= 0)
+		return Ps2Float(sign);

-	uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not
-	int64_t res = 0;
-	uint64_t mask = 0xFFFFFFFFFFFFFFFF;
-
-	result.Exponent = (uint8_t)(resExponent);
-
-	otherMantissa <<= 1;
-
-	uint32_t part[13]; // Partial products
-	uint32_t bit[13]; // More partial products. 0 or 1.
-
-	for (int i = 0; i <= 12; i++, otherMantissa >>= 2)
-	{
-		uint32_t test = otherMantissa & 7;
-		if (test == 0 || test == 7)
-		{
-			part[i] = 0;
-			bit[i] = 0;
-		}
-		else if (test == 3)
-		{
-			part[i] = (selfMantissa << 1);
-			bit[i] = 0;
-		}
-		else if (test == 4)
-		{
-			part[i] = ~(selfMantissa << 1);
-			bit[i] = 1;
-		}
-		else if (test < 4)
-		{
-			part[i] = selfMantissa;
-			bit[i] = 0;
-		}
-		else
-		{
-			part[i] = ~selfMantissa;
-			bit[i] = 1;
-		}
-	}
-
-	for (int i = 0; i <= 12; i++)
-	{
-		res += (uint64_t)(int32_t)part[i] << (i * 2);
-		res &= mask;
-		res += bit[i] << (i * 2);
-	}
-
-	result.Mantissa = (uint32_t)(res >> 23);
-
-	if ((testImprecision & 0x000aaa) && !(res & 0x7FFFFF))
-		result.Mantissa -= 1;
-
-	if (result.Mantissa > 0)
-	{
-		int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa);
-
-		while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS)
-		{
-			if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS)
-			{
-				result.Mantissa >>= 1;
-
-				int32_t exp = ((int32_t)result.Exponent + 1);
-
-				if (exp > 255)
-					return result.Sign ? Min() : Max();
-
-				result.Exponent = (uint8_t)exp;
-
-				leadingBitPosition--;
-			}
-			else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS)
-			{
-				result.Mantissa <<= 1;
-
-				int32_t exp = ((int32_t)result.Exponent - 1);
-
-				if (exp <= 0)
-					return Ps2Float(result.Sign, 0, 0);
-
-				result.Exponent = (uint8_t)exp;
-
-				leadingBitPosition++;
-			}
-		}
-	}
-
-	result.Mantissa &= 0x7FFFFF;
-	return result.RoundTowardsZero();
+	return Ps2Float(sign | (uint32_t)(resExponent << 23) | (resMantissa & 0x7FFFFF));
 }

 Ps2Float Ps2Float::DoDiv(Ps2Float other)
--- a/pcsx2/pcsx2.vcxproj
+++ b/pcsx2/pcsx2.vcxproj
@ -126,6 +126,7 @@
    <ClCompile Include="arm64\RecStubs.cpp">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClCompile>
+    <ClCompile Include="BoothMultiplier.cpp" />
    <ClCompile Include="CDVD\BlockdumpFileReader.cpp" />
    <ClCompile Include="CDVD\CDVDdiscReader.cpp" />
    <ClCompile Include="CDVD\CDVDdiscThread.cpp" />
@ -583,6 +584,7 @@
    <ClInclude Include="arm64\newVif_UnpackNEON.h">
      <ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
    </ClInclude>
+    <ClInclude Include="BoothMultiplier.h" />
    <ClInclude Include="CDVD\BlockdumpFileReader.h" />
    <ClInclude Include="CDVD\CDVDdiscReader.h" />
    <ClInclude Include="CDVD\CsoFileReader.h" />
--- a/pcsx2/pcsx2.vcxproj.filters
+++ b/pcsx2/pcsx2.vcxproj.filters
@ -1449,6 +1449,9 @@
    <ClCompile Include="Ps2Float.cpp">
      <Filter>System\Ps2\EmotionEngine\Shared</Filter>
    </ClCompile>
+    <ClCompile Include="BoothMultiplier.cpp">
+      <Filter>System\Ps2\EmotionEngine\Shared</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="Patch.h">
@ -2408,6 +2411,9 @@
    <ClInclude Include="Ps2Float.h">
      <Filter>System\Ps2\EmotionEngine\Shared</Filter>
    </ClInclude>
+    <ClInclude Include="BoothMultiplier.h">
+      <Filter>System\Ps2\EmotionEngine\Shared</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <CustomBuildStep Include="rdebug\deci2.h">