[Soft-Float] - Implements fully accurate Mul operations.

Fixes : https://github.com/PCSX2/pcsx2/issues/5169

All the credits belongs to TellowKrinkle from the PCSX2 team.

Also removes a useless rounding towards zero in DoAdd.
This commit is contained in:
GitHubProUser67 2024-11-21 20:45:02 +01:00
parent d5e50284ff
commit 98e3df3cb9
6 changed files with 132 additions and 109 deletions

64
pcsx2/BoothMultiplier.cpp Normal file
View File

@ -0,0 +1,64 @@
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
#include <cstdio>
#include <cstdint>
#include <cstring>
#include <cerrno>
#include <bit>
#include "BoothMultiplier.h"
BoothMultiplier::BoothRecode BoothMultiplier::Booth(uint32_t a, uint32_t b, uint32_t bit)
{
uint32_t test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7;
a <<= (bit * 2);
a += (test == 3 || test == 4) ? a : 0;
uint32_t neg = (test >= 4 && test <= 6) ? ~0u : 0;
uint32_t pos = 1 << (bit * 2);
a ^= (neg & -pos);
a &= (test >= 1 && test <= 6) ? ~0u : 0;
return {a, neg & pos};
}
BoothMultiplier::AddResult BoothMultiplier::Add3(uint32_t a, uint32_t b, uint32_t c)
{
uint32_t u = a ^ b;
return {u ^ c, ((u & c) | (a & b)) << 1};
}
uint64_t BoothMultiplier::MulMantissa(uint32_t a, uint32_t b)
{
uint64_t full = static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
BoothRecode b0 = Booth(a, b, 0);
BoothRecode b1 = Booth(a, b, 1);
BoothRecode b2 = Booth(a, b, 2);
BoothRecode b3 = Booth(a, b, 3);
BoothRecode b4 = Booth(a, b, 4);
BoothRecode b5 = Booth(a, b, 5);
BoothRecode b6 = Booth(a, b, 6);
BoothRecode b7 = Booth(a, b, 7);
// First cycle
AddResult t0 = Add3(b1.data, b2.data, b3.data);
AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data);
// A few adds get skipped, squeeze them back in
t1.hi |= b6.negate | (b5.data & 0x800);
b7.data |= (b5.data & 0x400) + b5.negate;
// Second cycle
AddResult t2 = Add3(b0.data, t0.lo, t0.hi);
AddResult t3 = Add3(b7.data, t1.lo, t1.hi);
// Third cycle
AddResult t4 = Add3(t2.hi, t3.lo, t3.hi);
// Fourth cycle
AddResult t5 = Add3(t2.lo, t4.lo, t4.hi);
// Discard bits and sum
t5.hi += b7.negate;
t5.lo &= ~0x7fffu;
t5.hi &= ~0x7fffu;
uint32_t ps2lo = t5.lo + t5.hi;
return full - ((ps2lo ^ full) & 0x8000);
}

30
pcsx2/BoothMultiplier.h Normal file
View File

@ -0,0 +1,30 @@
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
// SPDX-License-Identifier: GPL-3.0+
#pragma once
class BoothMultiplier
{
public:
static uint64_t MulMantissa(uint32_t a, uint32_t b);
protected:
private:
struct BoothRecode
{
uint32_t data;
uint32_t negate;
};
struct AddResult
{
uint32_t lo;
uint32_t hi;
};
static BoothRecode Booth(uint32_t a, uint32_t b, uint32_t bit);
static AddResult Add3(uint32_t a, uint32_t b, uint32_t c);
};

View File

@ -93,6 +93,7 @@ set(pcsx2Sources
MTGS.cpp
MTVU.cpp
Patch.cpp
BoothMultiplier.cpp
Ps2Float.cpp
Pcsx2Config.cpp
PerformanceMetrics.cpp
@ -174,6 +175,7 @@ set(pcsx2Headers
MTVU.h
Memory.h
MemoryTypes.h
BoothMultiplier.h
Ps2Float.h
Patch.h
PerformanceMetrics.h

View File

@ -9,6 +9,7 @@
#include <iostream>
#include <bit>
#include "Ps2Float.h"
#include "BoothMultiplier.h"
#include "Common.h"
const uint8_t Ps2Float::BIAS = 127;
@ -77,26 +78,26 @@ Ps2Float Ps2Float::Add(Ps2Float addend)
int32_t temp = 0;
//exponent difference
int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff);
int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF);
//diff = 25 .. 255 , expt < expd
if (exp_diff >= 25)
{
b = b & Ps2Float::SIGNMASK;
b = b & SIGNMASK;
}
//diff = 1 .. 24, expt < expd
else if (exp_diff > 0)
{
exp_diff = exp_diff - 1;
temp = 0xffffffff << exp_diff;
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
b = temp & b;
}
//diff = -255 .. -25, expd < expt
else if (exp_diff <= -25)
{
a = a & Ps2Float::SIGNMASK;
a = a & SIGNMASK;
}
//diff = -24 .. -1 , expd < expt
@ -104,7 +105,7 @@ Ps2Float Ps2Float::Add(Ps2Float addend)
{
exp_diff = -exp_diff;
exp_diff = exp_diff - 1;
temp = 0xffffffff << exp_diff;
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
a = a & temp;
}
@ -124,26 +125,26 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend)
int32_t temp = 0;
//exponent difference
int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff);
int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF);
//diff = 25 .. 255 , expt < expd
if (exp_diff >= 25)
{
b = b & Ps2Float::SIGNMASK;
b = b & SIGNMASK;
}
//diff = 1 .. 24, expt < expd
else if (exp_diff > 0)
{
exp_diff = exp_diff - 1;
temp = 0xffffffff << exp_diff;
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
b = temp & b;
}
//diff = -255 .. -25, expd < expt
else if (exp_diff <= -25)
{
a = a & Ps2Float::SIGNMASK;
a = a & SIGNMASK;
}
//diff = -24 .. -1 , expd < expt
@ -151,7 +152,7 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend)
{
exp_diff = -exp_diff;
exp_diff = exp_diff - 1;
temp = 0xffffffff << exp_diff;
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
a = a & temp;
}
@ -215,7 +216,7 @@ Ps2Float Ps2Float::Sqrt()
/* extract mantissa and unbias exponent */
int32_t m = (ix >> 23) - BIAS;
ix = (ix & 0x007fffff) | 0x00800000;
ix = (ix & 0x007FFFFF) | 0x00800000;
if ((m & 1) == 1)
{
/* odd m, double x to make it even */
@ -247,7 +248,7 @@ Ps2Float Ps2Float::Sqrt()
q += q & 1;
}
ix = (q >> 1) + 0x3f000000;
ix = (q >> 1) + 0x3F000000;
ix += m << 23;
return Ps2Float((uint32_t)(ix));
@ -397,114 +398,32 @@ Ps2Float Ps2Float::DoAdd(Ps2Float other)
else if (rawExp <= 0)
return Ps2Float(man < 0, 0, 0);
return Ps2Float((uint32_t)man & Ps2Float::SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)).RoundTowardsZero();
return Ps2Float((uint32_t)man & SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF));
}
Ps2Float Ps2Float::DoMul(Ps2Float other)
{
uint8_t selfExponent = Exponent;
uint8_t otherExponent = other.Exponent;
uint32_t selfMantissa = Mantissa | 0x800000;
uint32_t otherMantissa = other.Mantissa | 0x800000;
int32_t resExponent = Exponent + other.Exponent - BIAS;
uint32_t sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK;
Ps2Float result = Ps2Float(0);
int32_t resExponent = selfExponent + otherExponent - 127;
uint32_t resMantissa = (uint32_t)(BoothMultiplier::MulMantissa(selfMantissa, otherMantissa) >> 23);
result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other);
if (resMantissa > 0xFFFFFF)
{
resMantissa >>= 1;
resExponent++;
}
if (resExponent > 255)
return result.Sign ? Min() : Max();
else if (resExponent < 0)
return Ps2Float(result.Sign, 0, 0);
return Ps2Float(sign | MAX_FLOATING_POINT_VALUE);
else if (resExponent <= 0)
return Ps2Float(sign);
uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not
int64_t res = 0;
uint64_t mask = 0xFFFFFFFFFFFFFFFF;
result.Exponent = (uint8_t)(resExponent);
otherMantissa <<= 1;
uint32_t part[13]; // Partial products
uint32_t bit[13]; // More partial products. 0 or 1.
for (int i = 0; i <= 12; i++, otherMantissa >>= 2)
{
uint32_t test = otherMantissa & 7;
if (test == 0 || test == 7)
{
part[i] = 0;
bit[i] = 0;
}
else if (test == 3)
{
part[i] = (selfMantissa << 1);
bit[i] = 0;
}
else if (test == 4)
{
part[i] = ~(selfMantissa << 1);
bit[i] = 1;
}
else if (test < 4)
{
part[i] = selfMantissa;
bit[i] = 0;
}
else
{
part[i] = ~selfMantissa;
bit[i] = 1;
}
}
for (int i = 0; i <= 12; i++)
{
res += (uint64_t)(int32_t)part[i] << (i * 2);
res &= mask;
res += bit[i] << (i * 2);
}
result.Mantissa = (uint32_t)(res >> 23);
if ((testImprecision & 0x000aaa) && !(res & 0x7FFFFF))
result.Mantissa -= 1;
if (result.Mantissa > 0)
{
int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa);
while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS)
{
if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS)
{
result.Mantissa >>= 1;
int32_t exp = ((int32_t)result.Exponent + 1);
if (exp > 255)
return result.Sign ? Min() : Max();
result.Exponent = (uint8_t)exp;
leadingBitPosition--;
}
else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS)
{
result.Mantissa <<= 1;
int32_t exp = ((int32_t)result.Exponent - 1);
if (exp <= 0)
return Ps2Float(result.Sign, 0, 0);
result.Exponent = (uint8_t)exp;
leadingBitPosition++;
}
}
}
result.Mantissa &= 0x7FFFFF;
return result.RoundTowardsZero();
return Ps2Float(sign | (uint32_t)(resExponent << 23) | (resMantissa & 0x7FFFFF));
}
Ps2Float Ps2Float::DoDiv(Ps2Float other)

View File

@ -126,6 +126,7 @@
<ClCompile Include="arm64\RecStubs.cpp">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="BoothMultiplier.cpp" />
<ClCompile Include="CDVD\BlockdumpFileReader.cpp" />
<ClCompile Include="CDVD\CDVDdiscReader.cpp" />
<ClCompile Include="CDVD\CDVDdiscThread.cpp" />
@ -583,6 +584,7 @@
<ClInclude Include="arm64\newVif_UnpackNEON.h">
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="BoothMultiplier.h" />
<ClInclude Include="CDVD\BlockdumpFileReader.h" />
<ClInclude Include="CDVD\CDVDdiscReader.h" />
<ClInclude Include="CDVD\CsoFileReader.h" />

View File

@ -1449,6 +1449,9 @@
<ClCompile Include="Ps2Float.cpp">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClCompile>
<ClCompile Include="BoothMultiplier.cpp">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Patch.h">
@ -2408,6 +2411,9 @@
<ClInclude Include="Ps2Float.h">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClInclude>
<ClInclude Include="BoothMultiplier.h">
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<CustomBuildStep Include="rdebug\deci2.h">