mirror of https://github.com/PCSX2/pcsx2.git
[Soft-Float] - Implements fully accurate Mul operations.
Fixes : https://github.com/PCSX2/pcsx2/issues/5169 All the credits belongs to TellowKrinkle from the PCSX2 team. Also removes a useless rounding towards zero in DoAdd.
This commit is contained in:
parent
d5e50284ff
commit
98e3df3cb9
|
@ -0,0 +1,64 @@
|
|||
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <cerrno>
|
||||
#include <bit>
|
||||
#include "BoothMultiplier.h"
|
||||
|
||||
BoothMultiplier::BoothRecode BoothMultiplier::Booth(uint32_t a, uint32_t b, uint32_t bit)
|
||||
{
|
||||
uint32_t test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7;
|
||||
a <<= (bit * 2);
|
||||
a += (test == 3 || test == 4) ? a : 0;
|
||||
uint32_t neg = (test >= 4 && test <= 6) ? ~0u : 0;
|
||||
uint32_t pos = 1 << (bit * 2);
|
||||
a ^= (neg & -pos);
|
||||
a &= (test >= 1 && test <= 6) ? ~0u : 0;
|
||||
return {a, neg & pos};
|
||||
}
|
||||
|
||||
BoothMultiplier::AddResult BoothMultiplier::Add3(uint32_t a, uint32_t b, uint32_t c)
|
||||
{
|
||||
uint32_t u = a ^ b;
|
||||
return {u ^ c, ((u & c) | (a & b)) << 1};
|
||||
}
|
||||
|
||||
uint64_t BoothMultiplier::MulMantissa(uint32_t a, uint32_t b)
|
||||
{
|
||||
uint64_t full = static_cast<uint64_t>(a) * static_cast<uint64_t>(b);
|
||||
BoothRecode b0 = Booth(a, b, 0);
|
||||
BoothRecode b1 = Booth(a, b, 1);
|
||||
BoothRecode b2 = Booth(a, b, 2);
|
||||
BoothRecode b3 = Booth(a, b, 3);
|
||||
BoothRecode b4 = Booth(a, b, 4);
|
||||
BoothRecode b5 = Booth(a, b, 5);
|
||||
BoothRecode b6 = Booth(a, b, 6);
|
||||
BoothRecode b7 = Booth(a, b, 7);
|
||||
|
||||
// First cycle
|
||||
AddResult t0 = Add3(b1.data, b2.data, b3.data);
|
||||
AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data);
|
||||
// A few adds get skipped, squeeze them back in
|
||||
t1.hi |= b6.negate | (b5.data & 0x800);
|
||||
b7.data |= (b5.data & 0x400) + b5.negate;
|
||||
|
||||
// Second cycle
|
||||
AddResult t2 = Add3(b0.data, t0.lo, t0.hi);
|
||||
AddResult t3 = Add3(b7.data, t1.lo, t1.hi);
|
||||
|
||||
// Third cycle
|
||||
AddResult t4 = Add3(t2.hi, t3.lo, t3.hi);
|
||||
|
||||
// Fourth cycle
|
||||
AddResult t5 = Add3(t2.lo, t4.lo, t4.hi);
|
||||
|
||||
// Discard bits and sum
|
||||
t5.hi += b7.negate;
|
||||
t5.lo &= ~0x7fffu;
|
||||
t5.hi &= ~0x7fffu;
|
||||
uint32_t ps2lo = t5.lo + t5.hi;
|
||||
return full - ((ps2lo ^ full) & 0x8000);
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: GPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
class BoothMultiplier
|
||||
{
|
||||
public:
|
||||
|
||||
static uint64_t MulMantissa(uint32_t a, uint32_t b);
|
||||
|
||||
protected:
|
||||
|
||||
private:
|
||||
struct BoothRecode
|
||||
{
|
||||
uint32_t data;
|
||||
uint32_t negate;
|
||||
};
|
||||
|
||||
struct AddResult
|
||||
{
|
||||
uint32_t lo;
|
||||
uint32_t hi;
|
||||
};
|
||||
|
||||
static BoothRecode Booth(uint32_t a, uint32_t b, uint32_t bit);
|
||||
|
||||
static AddResult Add3(uint32_t a, uint32_t b, uint32_t c);
|
||||
};
|
|
@ -93,6 +93,7 @@ set(pcsx2Sources
|
|||
MTGS.cpp
|
||||
MTVU.cpp
|
||||
Patch.cpp
|
||||
BoothMultiplier.cpp
|
||||
Ps2Float.cpp
|
||||
Pcsx2Config.cpp
|
||||
PerformanceMetrics.cpp
|
||||
|
@ -174,6 +175,7 @@ set(pcsx2Headers
|
|||
MTVU.h
|
||||
Memory.h
|
||||
MemoryTypes.h
|
||||
BoothMultiplier.h
|
||||
Ps2Float.h
|
||||
Patch.h
|
||||
PerformanceMetrics.h
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <iostream>
|
||||
#include <bit>
|
||||
#include "Ps2Float.h"
|
||||
#include "BoothMultiplier.h"
|
||||
#include "Common.h"
|
||||
|
||||
const uint8_t Ps2Float::BIAS = 127;
|
||||
|
@ -77,26 +78,26 @@ Ps2Float Ps2Float::Add(Ps2Float addend)
|
|||
int32_t temp = 0;
|
||||
|
||||
//exponent difference
|
||||
int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff);
|
||||
int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF);
|
||||
|
||||
//diff = 25 .. 255 , expt < expd
|
||||
if (exp_diff >= 25)
|
||||
{
|
||||
b = b & Ps2Float::SIGNMASK;
|
||||
b = b & SIGNMASK;
|
||||
}
|
||||
|
||||
//diff = 1 .. 24, expt < expd
|
||||
else if (exp_diff > 0)
|
||||
{
|
||||
exp_diff = exp_diff - 1;
|
||||
temp = 0xffffffff << exp_diff;
|
||||
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
|
||||
b = temp & b;
|
||||
}
|
||||
|
||||
//diff = -255 .. -25, expd < expt
|
||||
else if (exp_diff <= -25)
|
||||
{
|
||||
a = a & Ps2Float::SIGNMASK;
|
||||
a = a & SIGNMASK;
|
||||
}
|
||||
|
||||
//diff = -24 .. -1 , expd < expt
|
||||
|
@ -104,7 +105,7 @@ Ps2Float Ps2Float::Add(Ps2Float addend)
|
|||
{
|
||||
exp_diff = -exp_diff;
|
||||
exp_diff = exp_diff - 1;
|
||||
temp = 0xffffffff << exp_diff;
|
||||
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
|
||||
a = a & temp;
|
||||
}
|
||||
|
||||
|
@ -124,26 +125,26 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend)
|
|||
int32_t temp = 0;
|
||||
|
||||
//exponent difference
|
||||
int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff);
|
||||
int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF);
|
||||
|
||||
//diff = 25 .. 255 , expt < expd
|
||||
if (exp_diff >= 25)
|
||||
{
|
||||
b = b & Ps2Float::SIGNMASK;
|
||||
b = b & SIGNMASK;
|
||||
}
|
||||
|
||||
//diff = 1 .. 24, expt < expd
|
||||
else if (exp_diff > 0)
|
||||
{
|
||||
exp_diff = exp_diff - 1;
|
||||
temp = 0xffffffff << exp_diff;
|
||||
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
|
||||
b = temp & b;
|
||||
}
|
||||
|
||||
//diff = -255 .. -25, expd < expt
|
||||
else if (exp_diff <= -25)
|
||||
{
|
||||
a = a & Ps2Float::SIGNMASK;
|
||||
a = a & SIGNMASK;
|
||||
}
|
||||
|
||||
//diff = -24 .. -1 , expd < expt
|
||||
|
@ -151,7 +152,7 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend)
|
|||
{
|
||||
exp_diff = -exp_diff;
|
||||
exp_diff = exp_diff - 1;
|
||||
temp = 0xffffffff << exp_diff;
|
||||
temp = MIN_FLOATING_POINT_VALUE << exp_diff;
|
||||
a = a & temp;
|
||||
}
|
||||
|
||||
|
@ -215,7 +216,7 @@ Ps2Float Ps2Float::Sqrt()
|
|||
/* extract mantissa and unbias exponent */
|
||||
int32_t m = (ix >> 23) - BIAS;
|
||||
|
||||
ix = (ix & 0x007fffff) | 0x00800000;
|
||||
ix = (ix & 0x007FFFFF) | 0x00800000;
|
||||
if ((m & 1) == 1)
|
||||
{
|
||||
/* odd m, double x to make it even */
|
||||
|
@ -247,7 +248,7 @@ Ps2Float Ps2Float::Sqrt()
|
|||
q += q & 1;
|
||||
}
|
||||
|
||||
ix = (q >> 1) + 0x3f000000;
|
||||
ix = (q >> 1) + 0x3F000000;
|
||||
ix += m << 23;
|
||||
|
||||
return Ps2Float((uint32_t)(ix));
|
||||
|
@ -397,114 +398,32 @@ Ps2Float Ps2Float::DoAdd(Ps2Float other)
|
|||
else if (rawExp <= 0)
|
||||
return Ps2Float(man < 0, 0, 0);
|
||||
|
||||
return Ps2Float((uint32_t)man & Ps2Float::SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)).RoundTowardsZero();
|
||||
return Ps2Float((uint32_t)man & SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF));
|
||||
}
|
||||
|
||||
Ps2Float Ps2Float::DoMul(Ps2Float other)
|
||||
{
|
||||
uint8_t selfExponent = Exponent;
|
||||
uint8_t otherExponent = other.Exponent;
|
||||
uint32_t selfMantissa = Mantissa | 0x800000;
|
||||
uint32_t otherMantissa = other.Mantissa | 0x800000;
|
||||
int32_t resExponent = Exponent + other.Exponent - BIAS;
|
||||
uint32_t sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK;
|
||||
|
||||
Ps2Float result = Ps2Float(0);
|
||||
int32_t resExponent = selfExponent + otherExponent - 127;
|
||||
uint32_t resMantissa = (uint32_t)(BoothMultiplier::MulMantissa(selfMantissa, otherMantissa) >> 23);
|
||||
|
||||
result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other);
|
||||
if (resMantissa > 0xFFFFFF)
|
||||
{
|
||||
resMantissa >>= 1;
|
||||
resExponent++;
|
||||
}
|
||||
|
||||
if (resExponent > 255)
|
||||
return result.Sign ? Min() : Max();
|
||||
else if (resExponent < 0)
|
||||
return Ps2Float(result.Sign, 0, 0);
|
||||
return Ps2Float(sign | MAX_FLOATING_POINT_VALUE);
|
||||
else if (resExponent <= 0)
|
||||
return Ps2Float(sign);
|
||||
|
||||
uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not
|
||||
int64_t res = 0;
|
||||
uint64_t mask = 0xFFFFFFFFFFFFFFFF;
|
||||
|
||||
result.Exponent = (uint8_t)(resExponent);
|
||||
|
||||
otherMantissa <<= 1;
|
||||
|
||||
uint32_t part[13]; // Partial products
|
||||
uint32_t bit[13]; // More partial products. 0 or 1.
|
||||
|
||||
for (int i = 0; i <= 12; i++, otherMantissa >>= 2)
|
||||
{
|
||||
uint32_t test = otherMantissa & 7;
|
||||
if (test == 0 || test == 7)
|
||||
{
|
||||
part[i] = 0;
|
||||
bit[i] = 0;
|
||||
}
|
||||
else if (test == 3)
|
||||
{
|
||||
part[i] = (selfMantissa << 1);
|
||||
bit[i] = 0;
|
||||
}
|
||||
else if (test == 4)
|
||||
{
|
||||
part[i] = ~(selfMantissa << 1);
|
||||
bit[i] = 1;
|
||||
}
|
||||
else if (test < 4)
|
||||
{
|
||||
part[i] = selfMantissa;
|
||||
bit[i] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
part[i] = ~selfMantissa;
|
||||
bit[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i <= 12; i++)
|
||||
{
|
||||
res += (uint64_t)(int32_t)part[i] << (i * 2);
|
||||
res &= mask;
|
||||
res += bit[i] << (i * 2);
|
||||
}
|
||||
|
||||
result.Mantissa = (uint32_t)(res >> 23);
|
||||
|
||||
if ((testImprecision & 0x000aaa) && !(res & 0x7FFFFF))
|
||||
result.Mantissa -= 1;
|
||||
|
||||
if (result.Mantissa > 0)
|
||||
{
|
||||
int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa);
|
||||
|
||||
while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS)
|
||||
{
|
||||
if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS)
|
||||
{
|
||||
result.Mantissa >>= 1;
|
||||
|
||||
int32_t exp = ((int32_t)result.Exponent + 1);
|
||||
|
||||
if (exp > 255)
|
||||
return result.Sign ? Min() : Max();
|
||||
|
||||
result.Exponent = (uint8_t)exp;
|
||||
|
||||
leadingBitPosition--;
|
||||
}
|
||||
else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS)
|
||||
{
|
||||
result.Mantissa <<= 1;
|
||||
|
||||
int32_t exp = ((int32_t)result.Exponent - 1);
|
||||
|
||||
if (exp <= 0)
|
||||
return Ps2Float(result.Sign, 0, 0);
|
||||
|
||||
result.Exponent = (uint8_t)exp;
|
||||
|
||||
leadingBitPosition++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.Mantissa &= 0x7FFFFF;
|
||||
return result.RoundTowardsZero();
|
||||
return Ps2Float(sign | (uint32_t)(resExponent << 23) | (resMantissa & 0x7FFFFF));
|
||||
}
|
||||
|
||||
Ps2Float Ps2Float::DoDiv(Ps2Float other)
|
||||
|
|
|
@ -126,6 +126,7 @@
|
|||
<ClCompile Include="arm64\RecStubs.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="BoothMultiplier.cpp" />
|
||||
<ClCompile Include="CDVD\BlockdumpFileReader.cpp" />
|
||||
<ClCompile Include="CDVD\CDVDdiscReader.cpp" />
|
||||
<ClCompile Include="CDVD\CDVDdiscThread.cpp" />
|
||||
|
@ -583,6 +584,7 @@
|
|||
<ClInclude Include="arm64\newVif_UnpackNEON.h">
|
||||
<ExcludedFromBuild Condition="'$(Platform)'!='ARM64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="BoothMultiplier.h" />
|
||||
<ClInclude Include="CDVD\BlockdumpFileReader.h" />
|
||||
<ClInclude Include="CDVD\CDVDdiscReader.h" />
|
||||
<ClInclude Include="CDVD\CsoFileReader.h" />
|
||||
|
|
|
@ -1449,6 +1449,9 @@
|
|||
<ClCompile Include="Ps2Float.cpp">
|
||||
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="BoothMultiplier.cpp">
|
||||
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Patch.h">
|
||||
|
@ -2408,6 +2411,9 @@
|
|||
<ClInclude Include="Ps2Float.h">
|
||||
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="BoothMultiplier.h">
|
||||
<Filter>System\Ps2\EmotionEngine\Shared</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuildStep Include="rdebug\deci2.h">
|
||||
|
|
Loading…
Reference in New Issue