[Soft-Float] - Implements faster division support (credits: TellowKrinkle).

We keep the slow path for SQRT still.

Also renames said files to better fit their use case.
This commit is contained in:
GitHubProUser67 2025-02-15 21:12:06 +01:00
parent 8ddee82521
commit d33309134b
6 changed files with 166 additions and 131 deletions

View File

@ -94,7 +94,7 @@ set(pcsx2Sources
MTVU.cpp
Patch.cpp
PS2Float.cpp
FpgaDiv.cpp
PS2Div.cpp
Pcsx2Config.cpp
PerformanceMetrics.cpp
PrecompiledHeader.cpp
@ -176,7 +176,7 @@ set(pcsx2Headers
Memory.h
MemoryTypes.h
PS2Float.h
FpgaDiv.h
PS2Div.h
Patch.h
PerformanceMetrics.h
PrecompiledHeader.h

View File

@ -1,13 +1,14 @@
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
#include "FpgaDiv.h"
#include "common/Pcsx2Defs.h"
#include "PS2Div.h"
#include "PS2Float.h"
#include "Common.h"
FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
PS2Div::PS2Div(bool divMode, u32 f1, u32 f2)
{
FpgaDiv::divMode = divMode;
PS2Div::divMode = divMode;
if (divMode)
{
@ -34,6 +35,9 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31;
return;
}
floatResult = fastdiv(f1, f2);
return;
}
else if ((f2 & 0x7F800000) == 0)
{
@ -42,25 +46,16 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
return;
}
u32 floatDivisor, floatDividend;
s32 i, j, csaRes;
u32 floatDivisor = f1;
u32 floatDividend = f2;
s32 i, csaRes;
s32 man = 0;
s32 QuotientValueDomain = 1;
Product[0] = 1;
Carry[25] = 1;
if (divMode)
{
floatDividend = f1;
floatDivisor = f2;
}
else
{
floatDividend = f2;
floatDivisor = f1;
}
u8 Dvdtexp = (u8)((floatDividend >> 23) & 0xFF);
u8 Dvsrexp = (u8)((floatDivisor >> 23) & 0xFF);
s32 Dvdtsign = (s32)(floatDividend >> 31);
@ -120,7 +115,7 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
Divisor[24] = 0;
Divisor[25] = 0;
if (!divMode && Dvdtexp % 2 == 1)
if (Dvdtexp % 2 == 1)
{
for (i = 0; i <= 24; i++)
{
@ -141,90 +136,22 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
s32 sign = SignCalc(Dvdtsign, Dvsrsign) ? 1 : 0;
s32 exp = ExpCalc(Dvdtexp, Dvsrexp);
if (divMode && (Quotient[0] == 0))
exp--;
if (divMode)
if (Dvdtexp == 0)
{
if ((Dvdtexp == 0) && (Dvsrexp == 0))
sign = 0;
exp = 0;
for (i = 0; i < 25; i++)
{
iv = true;
exp = 255;
for (i = 0; i < 25; i++)
{
Quotient[i] = 1;
}
}
else if ((Dvdtexp == 0) || (Dvsrexp != 0))
{
if ((Dvdtexp == 0) && (Dvsrexp != 0))
{
exp = 0;
for (i = 0; i < 25; i++)
{
Quotient[i] = 0;
}
}
}
else
{
dz = true;
exp = 255;
for (i = 0; i < 25; i++)
{
Quotient[i] = 1;
}
Quotient[i] = 0;
}
}
else
if (Dvdtsign == 1)
{
if (Dvdtexp == 0)
{
sign = 0;
exp = 0;
for (i = 0; i < 25; i++)
{
Quotient[i] = 0;
}
}
if (Dvdtsign == 1)
{
iv = true;
sign = 0;
}
iv = true;
sign = 0;
}
if (divMode)
{
if (exp < 256)
{
if (exp < 1)
{
uf = true;
exp = 0;
for (i = 0; i < 25; i++)
{
Quotient[i] = 0;
}
}
}
else
{
of = true;
exp = 255;
for (i = 0; i < 25; i++)
{
Quotient[i] = 1;
}
}
}
if (divMode)
j = 2 - Quotient[0];
else
j = 1;
for (i = j; i < j + 23; i++)
for (i = 1; i < 24; i++)
{
man = man * 2 + Quotient[i];
}
@ -238,17 +165,103 @@ FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2)
floatResult |= (u32)man & 0x7FFFFF;
}
bool FpgaDiv::SignCalc(s32 Dvdtsign, s32 Dvsrsign)
s32 PS2Div::quotientSelect(CSAResult current)
{
// Note: Decimal point is between bits 24 and 25
u32 mask = (1 << 24) - 1; // Bit 23 needs to be or'd in instead of added
s32 test = ((current.sum & ~mask) + current.carry) | (current.sum & mask);
if (test >= 1 << 23)
{ // test >= 0.25
return 1;
}
else if (test < static_cast<s32>(~0u << 24))
{ // test < -0.5
return -1;
}
else
{
return 0;
}
}
u32 PS2Div::mantissa(u32 x)
{
return (x & 0x7fffff) | 0x800000;
}
u32 PS2Div::exponent(u32 x)
{
return (x >> 23) & 0xff;
}
u32 PS2Div::fastdiv(u32 a, u32 b)
{
u32 am = mantissa(a) << 2;
u32 bm = mantissa(b) << 2;
CSAResult current = {am, 0};
u32 quotient = 0;
s32 quotientBit = 1;
for (s32 i = 0; i < 25; i++)
{
quotient = (quotient << 1) + quotientBit;
u32 add = quotientBit > 0 ? ~bm : quotientBit < 0 ? bm :
0;
current.carry += quotientBit > 0;
CSAResult csa = CSA(current.sum, current.carry, add);
quotientBit = quotientSelect(quotientBit ? csa : current);
current.sum = csa.sum << 1;
current.carry = csa.carry << 1;
}
u32 sign = ((a ^ b) & PS2Float::SIGNMASK);
u32 Dvdtexp = exponent(a);
u32 Dvsrexp = exponent(b);
s32 cexp = Dvdtexp - Dvsrexp + 126;
if (quotient >= (1 << 24))
{
cexp += 1;
quotient >>= 1;
}
if (Dvdtexp == 0 && Dvsrexp == 0)
{
iv = true;
return sign | PS2Float::MAX_FLOATING_POINT_VALUE;
}
else if (Dvdtexp == 0 || Dvsrexp != 0)
{
if (Dvdtexp == 0 && Dvsrexp != 0)
{
return sign;
}
}
else
{
dz = true;
return sign | PS2Float::MAX_FLOATING_POINT_VALUE;
}
if (cexp > 255)
{
of = true;
return sign | PS2Float::MAX_FLOATING_POINT_VALUE;
}
else if (cexp < 1)
{
uf = true;
return sign;
}
return (quotient & 0x7fffff) | (cexp << 23) | sign;
}
bool PS2Div::SignCalc(s32 Dvdtsign, s32 Dvsrsign)
{
return divMode && Dvsrsign != Dvdtsign;
}
bool FpgaDiv::BitInvert(s32 val)
bool PS2Div::BitInvert(s32 val)
{
return val < 1;
}
s32 FpgaDiv::ExpCalc(s32 Dvdtexp, s32 Dvsrexp)
s32 PS2Div::ExpCalc(s32 Dvdtexp, s32 Dvsrexp)
{
s32 result;
@ -261,7 +274,7 @@ s32 FpgaDiv::ExpCalc(s32 Dvdtexp, s32 Dvsrexp)
return result + 127;
}
s32 FpgaDiv::CSAQSLAdder(s32 QuotientValueDomain)
s32 PS2Div::CSAQSLAdder(s32 QuotientValueDomain)
{
s32 CarryArray[4];
s32 SumArray[4];
@ -312,7 +325,7 @@ s32 FpgaDiv::CSAQSLAdder(s32 QuotientValueDomain)
return QSLAdder(SumArray, CarryArray);
}
s32 FpgaDiv::QSLAdder(s32 SumArray[], s32 CarryArray[])
s32 PS2Div::QSLAdder(s32 SumArray[], s32 CarryArray[])
{
s32 specialCondition = 0;
s32 result;
@ -349,7 +362,7 @@ s32 FpgaDiv::QSLAdder(s32 SumArray[], s32 CarryArray[])
return result;
}
s32 FpgaDiv::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain)
s32 PS2Div::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain)
{
s32 i;
@ -380,7 +393,7 @@ s32 FpgaDiv::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueD
return 0;
}
s32 FpgaDiv::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry)
s32 PS2Div::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry)
{
s32 addResult = carry + sum + mult;
resCarry = 0;
@ -398,12 +411,12 @@ s32 FpgaDiv::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry)
return 0;
}
s32 FpgaDiv::CLAAdder(s32 SumArray[], s32 CarryArray[])
s32 PS2Div::CLAAdder(s32 SumArray[], s32 CarryArray[])
{
return (2 * CarryArray[1] + 4 * CarryArray[0] + CarryArray[2] + 2 * SumArray[1] + 4 * SumArray[0] + SumArray[2]) % 8;
}
s32 FpgaDiv::MultipleFormation(s32 QuotientValueDomain)
s32 PS2Div::MultipleFormation(s32 QuotientValueDomain)
{
s32 i;
@ -421,7 +434,7 @@ s32 FpgaDiv::MultipleFormation(s32 QuotientValueDomain)
return 0;
}
s32 FpgaDiv::DivideModeFormation(s32 QuotientValueDomain)
s32 PS2Div::DivideModeFormation(s32 QuotientValueDomain)
{
s32 i;
@ -441,7 +454,7 @@ s32 FpgaDiv::DivideModeFormation(s32 QuotientValueDomain)
return 0;
}
s32 FpgaDiv::RootModeFormation(s32 QuotientValueDomain)
s32 PS2Div::RootModeFormation(s32 QuotientValueDomain)
{
s32 i;

View File

@ -6,8 +6,22 @@
#include <vector>
#include <array>
class FpgaDiv
class PS2Div
{
struct CSAResult
{
uint32_t sum;
uint32_t carry;
};
static struct CSAResult CSA(uint32_t a, uint32_t b, uint32_t c)
{
uint32_t u = a ^ b;
uint32_t h = (a & b) | (u & c);
uint32_t l = u ^ c;
return {l, h << 1};
}
public:
bool dz = false;
@ -17,7 +31,7 @@ public:
u32 floatResult;
FpgaDiv(bool divMode, u32 f1, u32 f2);
PS2Div(bool divMode, u32 f1, u32 f2);
protected:
@ -39,6 +53,14 @@ private:
s32 SubSum0 = 0;
s32 SubMult = 0;
static s32 quotientSelect(CSAResult current);
static u32 mantissa(u32 x);
static u32 exponent(u32 x);
u32 fastdiv(u32 a, u32 b);
bool SignCalc(s32 Dvdtsign, s32 Dvsrsign);
bool BitInvert(s32 val);

View File

@ -10,7 +10,7 @@
#include <bit>
#include "common/Pcsx2Defs.h"
#include "common/BitUtils.h"
#include "FpgaDiv.h"
#include "PS2Div.h"
#include "PS2Float.h"
#include "Common.h"
@ -266,33 +266,33 @@ PS2Float PS2Float::MulSubAcc(PS2Float opsend, PS2Float optend)
PS2Float PS2Float::Div(PS2Float divend)
{
FpgaDiv fpga = FpgaDiv(true, raw, divend.raw);
PS2Float result = PS2Float(fpga.floatResult);
result.dz = fpga.dz;
result.iv = fpga.iv;
result.of = fpga.of;
result.uf = fpga.uf;
PS2Div div = PS2Div(true, raw, divend.raw);
PS2Float result = PS2Float(div.floatResult);
result.dz = div.dz;
result.iv = div.iv;
result.of = div.of;
result.uf = div.uf;
return result;
}
PS2Float PS2Float::Sqrt()
{
FpgaDiv fpga = FpgaDiv(false, 0, PS2Float(false, Exponent(), Mantissa()).raw);
PS2Float result = PS2Float(fpga.floatResult);
result.dz = fpga.dz;
result.iv = fpga.iv;
PS2Div sqrt = PS2Div(false, 0, PS2Float(false, Exponent(), Mantissa()).raw);
PS2Float result = PS2Float(sqrt.floatResult);
result.dz = sqrt.dz;
result.iv = sqrt.iv;
return result;
}
PS2Float PS2Float::Rsqrt(PS2Float other)
{
FpgaDiv fpgaSqrt = FpgaDiv(false, 0, PS2Float(false, other.Exponent(), other.Mantissa()).raw);
FpgaDiv fpgaDiv = FpgaDiv(true, raw, fpgaSqrt.floatResult);
PS2Float result = PS2Float(fpgaDiv.floatResult);
result.dz = fpgaSqrt.dz || fpgaDiv.dz;
result.iv = fpgaSqrt.iv || fpgaDiv.iv;
result.of = fpgaDiv.of;
result.uf = fpgaDiv.uf;
PS2Div sqrt = PS2Div(false, 0, PS2Float(false, other.Exponent(), other.Mantissa()).raw);
PS2Div div = PS2Div(true, raw, sqrt.floatResult);
PS2Float result = PS2Float(div.floatResult);
result.dz = sqrt.dz || div.dz;
result.iv = sqrt.iv || div.iv;
result.of = div.of;
result.uf = div.uf;
return result;
}

View File

@ -201,7 +201,7 @@
<ClCompile Include="DEV9\sockets.cpp" />
<ClCompile Include="DEV9\net.cpp" />
<ClCompile Include="DEV9\Win32\tap-win32.cpp" />
<ClCompile Include="FpgaDiv.cpp" />
<ClCompile Include="PS2Div.cpp" />
<ClCompile Include="GameList.cpp" />
<ClCompile Include="GS\Renderers\DX11\D3D11ShaderCache.cpp" />
<ClCompile Include="GS\Renderers\DX12\D3D12Builders.cpp" />
@ -647,7 +647,7 @@
<ClInclude Include="DEV9\ThreadSafeMap.h" />
<ClInclude Include="DEV9\Win32\pcap_io_win32_funcs.h" />
<ClInclude Include="DEV9\Win32\tap.h" />
<ClInclude Include="FpgaDiv.h" />
<ClInclude Include="PS2Div.h" />
<ClInclude Include="GameList.h" />
<ClInclude Include="GS\GSVector4i_arm64.h" />
<ClInclude Include="GS\GSVector4_arm64.h" />

View File

@ -1449,7 +1449,7 @@
<ClCompile Include="PS2Float.cpp">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClCompile>
<ClCompile Include="FpgaDiv.cpp">
<ClCompile Include="PS2Div.cpp">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClCompile>
</ItemGroup>
@ -2411,7 +2411,7 @@
<ClInclude Include="PS2Float.h">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClInclude>
<ClInclude Include="FpgaDiv.h">
<ClInclude Include="PS2Div.h">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClInclude>
</ItemGroup>