From d33309134b309ad8626a495424383e300a031ea1 Mon Sep 17 00:00:00 2001
From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com>
Date: Sat, 15 Feb 2025 21:12:06 +0100
Subject: [PATCH] [Soft-Float] - Implements faster division support (credits:
 TellowKrinkle).

We keep the slow path for SQRT still.

Also renames said files to better fit their use case.
---
 pcsx2/CMakeLists.txt              |   4 +-
 pcsx2/{FpgaDiv.cpp => PS2Div.cpp} | 223 ++++++++++++++++--------------
 pcsx2/{FpgaDiv.h => PS2Div.h}     |  26 +++-
 pcsx2/PS2Float.cpp                |  36 ++---
 pcsx2/pcsx2.vcxproj               |   4 +-
 pcsx2/pcsx2.vcxproj.filters       |   4 +-
 6 files changed, 166 insertions(+), 131 deletions(-)
 rename pcsx2/{FpgaDiv.cpp => PS2Div.cpp} (73%)
 rename pcsx2/{FpgaDiv.h => PS2Div.h} (72%)

diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt
index ae995173ea..0b8c1e5dcc 100644
--- a/pcsx2/CMakeLists.txt
+++ b/pcsx2/CMakeLists.txt
@@ -94,7 +94,7 @@ set(pcsx2Sources
 	MTVU.cpp
 	Patch.cpp
 	PS2Float.cpp
-	FpgaDiv.cpp
+	PS2Div.cpp
 	Pcsx2Config.cpp
 	PerformanceMetrics.cpp
 	PrecompiledHeader.cpp
@@ -176,7 +176,7 @@ set(pcsx2Headers
 	Memory.h
 	MemoryTypes.h
 	PS2Float.h
-	FpgaDiv.h
+	PS2Div.h
 	Patch.h
 	PerformanceMetrics.h
 	PrecompiledHeader.h
diff --git a/pcsx2/FpgaDiv.cpp b/pcsx2/PS2Div.cpp
similarity index 73%
rename from pcsx2/FpgaDiv.cpp
rename to pcsx2/PS2Div.cpp
index 4d0e385319..b773c8379f 100644
--- a/pcsx2/FpgaDiv.cpp
+++ b/pcsx2/PS2Div.cpp
@@ -1,13 +1,14 @@
 // SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
 // SPDX-License-Identifier: GPL-3.0+
 
-#include "FpgaDiv.h"
+#include "common/Pcsx2Defs.h"
+#include "PS2Div.h"
 #include "PS2Float.h"
 #include "Common.h"
 
-FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
+PS2Div::PS2Div(bool divMode, u32 f1, u32 f2)
 {
-	FpgaDiv::divMode = divMode;
+	PS2Div::divMode = divMode;
 
 	if (divMode)
 	{
@@ -34,6 +35,9 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
 			floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31;
 			return;
 		}
+
+		floatResult = fastdiv(f1, f2);
+		return;
 	}
 	else if ((f2 & 0x7F800000) == 0)
 	{
@@ -42,25 +46,16 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
 		return;
 	}
 
-	u32 floatDivisor, floatDividend;
-	s32 i, j, csaRes;
+	u32 floatDivisor = f1;
+	u32 floatDividend = f2;
+
+	s32 i, csaRes;
 	s32 man = 0;
 	s32 QuotientValueDomain = 1;
 
 	Product[0] = 1;
 	Carry[25] = 1;
 
-	if (divMode)
-	{
-		floatDividend = f1;
-		floatDivisor = f2;
-	}
-	else
-	{
-		floatDividend = f2;
-		floatDivisor = f1;
-	}
-
 	u8 Dvdtexp = (u8)((floatDividend >> 23) & 0xFF);
 	u8 Dvsrexp = (u8)((floatDivisor >> 23) & 0xFF);
 	s32 Dvdtsign = (s32)(floatDividend >> 31);
@@ -120,7 +115,7 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
 	Divisor[24] = 0;
 	Divisor[25] = 0;
 
-	if (!divMode && Dvdtexp % 2 == 1)
+	if (Dvdtexp % 2 == 1)
 	{
 		for (i = 0; i <= 24; i++)
 		{
@@ -141,90 +136,22 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
 	s32 sign = SignCalc(Dvdtsign, Dvsrsign) ? 1 : 0;
 	s32 exp = ExpCalc(Dvdtexp, Dvsrexp);
 
-	if (divMode && (Quotient[0] == 0))
-		exp--;
-
-	if (divMode)
+	if (Dvdtexp == 0)
 	{
-		if ((Dvdtexp == 0) && (Dvsrexp == 0))
+		sign = 0;
+		exp = 0;
+		for (i = 0; i < 25; i++)
 		{
-			iv = true;
-			exp = 255;
-			for (i = 0; i < 25; i++)
-			{
-				Quotient[i] = 1;
-			}
-		}
-		else if ((Dvdtexp == 0) || (Dvsrexp != 0))
-		{
-			if ((Dvdtexp == 0) && (Dvsrexp != 0))
-			{
-				exp = 0;
-				for (i = 0; i < 25; i++)
-				{
-					Quotient[i] = 0;
-				}
-			}
-		}
-		else
-		{
-			dz = true;
-			exp = 255;
-			for (i = 0; i < 25; i++)
-			{
-				Quotient[i] = 1;
-			}
+			Quotient[i] = 0;
 		}
 	}
-	else
+	if (Dvdtsign == 1)
 	{
-		if (Dvdtexp == 0)
-		{
-			sign = 0;
-			exp = 0;
-			for (i = 0; i < 25; i++)
-			{
-				Quotient[i] = 0;
-			}
-		}
-		if (Dvdtsign == 1)
-		{
-			iv = true;
-			sign = 0;
-		}
+		iv = true;
+		sign = 0;
 	}
 
-	if (divMode)
-	{
-		if (exp < 256)
-		{
-			if (exp < 1)
-			{
-				uf = true;
-				exp = 0;
-				for (i = 0; i < 25; i++)
-				{
-					Quotient[i] = 0;
-				}
-			}
-		}
-		else
-		{
-			of = true;
-			exp = 255;
-			for (i = 0; i < 25; i++)
-			{
-				Quotient[i] = 1;
-			}
-		}
-	}
-
-	if (divMode)
-		j = 2 - Quotient[0];
-	else
-		j = 1;
-
-	for (i = j; i < j + 23; i++)
+	for (i = 1; i < 24; i++)
 	{
 		man = man * 2 + Quotient[i];
 	}
@@ -238,17 +165,103 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
 	floatResult |= (u32)man & 0x7FFFFF;
 }
 
-bool FpgaDiv::SignCalc(s32 Dvdtsign, s32 Dvsrsign)
+s32 PS2Div::quotientSelect(CSAResult current)
+{
+	// Note: Decimal point is between bits 24 and 25
+	u32 mask = (1 << 24) - 1; // Bit 23 needs to be or'd in instead of added
+	s32 test = ((current.sum & ~mask) + current.carry) | (current.sum & mask);
+	if (test >= 1 << 23)
+	{ // test >= 0.25
+		return 1;
+	}
+	else if (test < static_cast<s32>(~0u << 24))
+	{ // test < -0.5
+		return -1;
+	}
+	else
+	{
+		return 0;
+	}
+}
+
+u32 PS2Div::mantissa(u32 x)
+{
+	return (x & 0x7fffff) | 0x800000;
+}
+
+u32 PS2Div::exponent(u32 x)
+{
+	return (x >> 23) & 0xff;
+}
+
+u32 PS2Div::fastdiv(u32 a, u32 b)
+{
+	u32 am = mantissa(a) << 2;
+	u32 bm = mantissa(b) << 2;
+	CSAResult current = {am, 0};
+	u32 quotient = 0;
+	s32 quotientBit = 1;
+	for (s32 i = 0; i < 25; i++)
+	{
+		quotient = (quotient << 1) + quotientBit;
+		u32 add = quotientBit > 0 ? ~bm : quotientBit < 0 ? bm :
+																 0;
+		current.carry += quotientBit > 0;
+		CSAResult csa = CSA(current.sum, current.carry, add);
+		quotientBit = quotientSelect(quotientBit ? csa : current);
+		current.sum = csa.sum << 1;
+		current.carry = csa.carry << 1;
+	}
+	u32 sign = ((a ^ b) & PS2Float::SIGNMASK);
+	u32 Dvdtexp = exponent(a);
+	u32 Dvsrexp = exponent(b);
+	s32 cexp = Dvdtexp - Dvsrexp + 126;
+	if (quotient >= (1 << 24))
+	{
+		cexp += 1;
+		quotient >>= 1;
+	}
+	if (Dvdtexp == 0 && Dvsrexp == 0)
+	{
+		iv = true;
+		return sign | PS2Float::MAX_FLOATING_POINT_VALUE;
+	}
+	else if (Dvdtexp == 0 || Dvsrexp != 0)
+	{
+		if (Dvdtexp == 0 && Dvsrexp != 0)
+		{
+			return sign;
+		}
+	}
+	else
+	{
+		dz = true;
+		return sign | PS2Float::MAX_FLOATING_POINT_VALUE;
+	}
+	if (cexp > 255)
+	{
+		of = true;
+		return sign | PS2Float::MAX_FLOATING_POINT_VALUE;
+	}
+	else if (cexp < 1)
+	{
+		uf = true;
+		return sign;
+	}
+	return (quotient & 0x7fffff) | (cexp << 23) | sign;
+}
+
+bool PS2Div::SignCalc(s32 Dvdtsign, s32 Dvsrsign)
 {
 	return divMode && Dvsrsign != Dvdtsign;
 }
 
-bool FpgaDiv::BitInvert(s32 val)
+bool PS2Div::BitInvert(s32 val)
 {
 	return val < 1;
 }
 
-s32 FpgaDiv::ExpCalc(s32 Dvdtexp, s32 Dvsrexp)
+s32 PS2Div::ExpCalc(s32 Dvdtexp, s32 Dvsrexp)
 {
 	s32 result;
 
@@ -261,7 +274,7 @@ s32 FpgaDiv::ExpCalc(s32 Dvdtexp, s32 Dvsrexp)
 	return result + 127;
 }
 
-s32 FpgaDiv::CSAQSLAdder(s32 QuotientValueDomain)
+s32 PS2Div::CSAQSLAdder(s32 QuotientValueDomain)
 {
 	s32 CarryArray[4];
 	s32 SumArray[4];
@@ -312,7 +325,7 @@ s32 FpgaDiv::CSAQSLAdder(s32 QuotientValueDomain)
 	return QSLAdder(SumArray, CarryArray);
 }
 
-s32 FpgaDiv::QSLAdder(s32 SumArray[], s32 CarryArray[])
+s32 PS2Div::QSLAdder(s32 SumArray[], s32 CarryArray[])
 {
 	s32 specialCondition = 0;
 	s32 result;
@@ -349,7 +362,7 @@ s32 FpgaDiv::QSLAdder(s32 SumArray[], s32 CarryArray[])
 	return result;
 }
 
-s32 FpgaDiv::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain)
+s32 PS2Div::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain)
 {
 	s32 i;
 
@@ -380,7 +393,7 @@ s32 FpgaDiv::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueD
 	return 0;
 }
 
-s32 FpgaDiv::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry)
+s32 PS2Div::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry)
 {
 	s32 addResult = carry + sum + mult;
 	resCarry = 0;
@@ -398,12 +411,12 @@ s32 FpgaDiv::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry)
 	return 0;
 }
 
-s32 FpgaDiv::CLAAdder(s32 SumArray[], s32 CarryArray[])
+s32 PS2Div::CLAAdder(s32 SumArray[], s32 CarryArray[])
 {
 	return (2 * CarryArray[1] + 4 * CarryArray[0] + CarryArray[2] + 2 * SumArray[1] + 4 * SumArray[0] + SumArray[2]) % 8;
 }
 
-s32 FpgaDiv::MultipleFormation(s32 QuotientValueDomain)
+s32 PS2Div::MultipleFormation(s32 QuotientValueDomain)
 {
 	s32 i;
 
@@ -421,7 +434,7 @@ s32 FpgaDiv::MultipleFormation(s32 QuotientValueDomain)
 	return 0;
 }
 
-s32 FpgaDiv::DivideModeFormation(s32 QuotientValueDomain)
+s32 PS2Div::DivideModeFormation(s32 QuotientValueDomain)
 {
 	s32 i;
 
@@ -441,7 +454,7 @@ s32 FpgaDiv::DivideModeFormation(s32 QuotientValueDomain)
 	return 0;
 }
 
-s32 FpgaDiv::RootModeFormation(s32 QuotientValueDomain)
+s32 PS2Div::RootModeFormation(s32 QuotientValueDomain)
 {
 	s32 i;
 
diff --git a/pcsx2/FpgaDiv.h b/pcsx2/PS2Div.h
similarity index 72%
rename from pcsx2/FpgaDiv.h
rename to pcsx2/PS2Div.h
index 752f756868..86aa37b6a7 100644
--- a/pcsx2/FpgaDiv.h
+++ b/pcsx2/PS2Div.h
@@ -6,8 +6,22 @@
 #include <vector>
 #include <array>
 
-class FpgaDiv
+class PS2Div
 {
+	struct CSAResult
+	{
+		uint32_t sum;
+		uint32_t carry;
+	};
+
+	static struct CSAResult CSA(uint32_t a, uint32_t b, uint32_t c)
+	{
+		uint32_t u = a ^ b;
+		uint32_t h = (a & b) | (u & c);
+		uint32_t l = u ^ c;
+		return {l, h << 1};
+	}
+
 public:
 
 	bool dz = false;
@@ -17,7 +31,7 @@ public:
 
 	u32 floatResult;
 
-	FpgaDiv(bool divMode, u32 f1, u32 f2);
+	PS2Div(bool divMode, u32 f1, u32 f2);
 
 protected:
 
@@ -39,6 +53,14 @@ private:
 	s32 SubSum0 = 0;
 	s32 SubMult = 0;
 
+	static s32 quotientSelect(CSAResult current);
+
+	static u32 mantissa(u32 x);
+
+	static u32 exponent(u32 x);
+
+	u32 fastdiv(u32 a, u32 b);
+
 	bool SignCalc(s32 Dvdtsign, s32 Dvsrsign);
 
 	bool BitInvert(s32 val);
diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp
index 3bbb3da496..7eb24e561f 100644
--- a/pcsx2/PS2Float.cpp
+++ b/pcsx2/PS2Float.cpp
@@ -10,7 +10,7 @@
 #include <bit>
 #include "common/Pcsx2Defs.h"
 #include "common/BitUtils.h"
-#include "FpgaDiv.h"
+#include "PS2Div.h"
 #include "PS2Float.h"
 #include "Common.h"
 
@@ -266,33 +266,33 @@ PS2Float PS2Float::MulSubAcc(PS2Float opsend, PS2Float optend)
 
 PS2Float PS2Float::Div(PS2Float divend)
 {
-	FpgaDiv fpga = FpgaDiv(true, raw, divend.raw);
-	PS2Float result = PS2Float(fpga.floatResult);
-	result.dz = fpga.dz;
-	result.iv = fpga.iv;
-	result.of = fpga.of;
-	result.uf = fpga.uf;
+	PS2Div div = PS2Div(true, raw, divend.raw);
+	PS2Float result = PS2Float(div.floatResult);
+	result.dz = div.dz;
+	result.iv = div.iv;
+	result.of = div.of;
+	result.uf = div.uf;
 	return result;
 }
 
 PS2Float PS2Float::Sqrt()
 {
-	FpgaDiv fpga = FpgaDiv(false, 0, PS2Float(false, Exponent(), Mantissa()).raw);
-	PS2Float result = PS2Float(fpga.floatResult);
-	result.dz = fpga.dz;
-	result.iv = fpga.iv;
+	PS2Div sqrt = PS2Div(false, 0, PS2Float(false, Exponent(), Mantissa()).raw);
+	PS2Float result = PS2Float(sqrt.floatResult);
+	result.dz = sqrt.dz;
+	result.iv = sqrt.iv;
 	return result;
 }
 
 PS2Float PS2Float::Rsqrt(PS2Float other)
 {
-	FpgaDiv fpgaSqrt = FpgaDiv(false, 0, PS2Float(false, other.Exponent(), other.Mantissa()).raw);
-	FpgaDiv fpgaDiv = FpgaDiv(true, raw, fpgaSqrt.floatResult);
-	PS2Float result = PS2Float(fpgaDiv.floatResult);
-	result.dz = fpgaSqrt.dz || fpgaDiv.dz;
-	result.iv = fpgaSqrt.iv || fpgaDiv.iv;
-	result.of = fpgaDiv.of;
-	result.uf = fpgaDiv.uf;
+	PS2Div sqrt = PS2Div(false, 0, PS2Float(false, other.Exponent(), other.Mantissa()).raw);
+	PS2Div div = PS2Div(true, raw, sqrt.floatResult);
+	PS2Float result = PS2Float(div.floatResult);
+	result.dz = sqrt.dz || div.dz;
+	result.iv = sqrt.iv || div.iv;
+	result.of = div.of;
+	result.uf = div.uf;
 	return result;
 }
 
diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj
index 6fc7a4c50b..5e89fcb308 100644
--- a/pcsx2/pcsx2.vcxproj
+++ b/pcsx2/pcsx2.vcxproj
@@ -201,7 +201,7 @@
     <ClCompile Include="DEV9\sockets.cpp" />
     <ClCompile Include="DEV9\net.cpp" />
     <ClCompile Include="DEV9\Win32\tap-win32.cpp" />
-    <ClCompile Include="FpgaDiv.cpp" />
+    <ClCompile Include="PS2Div.cpp" />
     <ClCompile Include="GameList.cpp" />
     <ClCompile Include="GS\Renderers\DX11\D3D11ShaderCache.cpp" />
     <ClCompile Include="GS\Renderers\DX12\D3D12Builders.cpp" />
@@ -647,7 +647,7 @@
     <ClInclude Include="DEV9\ThreadSafeMap.h" />
     <ClInclude Include="DEV9\Win32\pcap_io_win32_funcs.h" />
     <ClInclude Include="DEV9\Win32\tap.h" />
-    <ClInclude Include="FpgaDiv.h" />
+    <ClInclude Include="PS2Div.h" />
     <ClInclude Include="GameList.h" />
     <ClInclude Include="GS\GSVector4i_arm64.h" />
     <ClInclude Include="GS\GSVector4_arm64.h" />
diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters
index 8da173ff3f..701cccdb76 100644
--- a/pcsx2/pcsx2.vcxproj.filters
+++ b/pcsx2/pcsx2.vcxproj.filters
@@ -1449,7 +1449,7 @@
     <ClCompile Include="PS2Float.cpp">
       <Filter>System\Ps2\EmotionEngine\Shared</Filter>
     </ClCompile>
-    <ClCompile Include="FpgaDiv.cpp">
+    <ClCompile Include="PS2Div.cpp">
       <Filter>System\Ps2\EmotionEngine\Shared</Filter>
     </ClCompile>
   </ItemGroup>
@@ -2411,7 +2411,7 @@
     <ClInclude Include="PS2Float.h">
       <Filter>System\Ps2\EmotionEngine\Shared</Filter>
     </ClInclude>
-    <ClInclude Include="FpgaDiv.h">
+    <ClInclude Include="PS2Div.h">
       <Filter>System\Ps2\EmotionEngine\Shared</Filter>
     </ClInclude>
   </ItemGroup>