Hopefully fix all remaining quantizer issues in Mario Kart Wii:
* must use a truncating float-to-int conversion, for example. * introduce optimized variants of the single value psq_st operation (JIT only). * fix bug in SafeWriteRegToReg when swap = false git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4861 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
734b0f5dd4
commit
b84a1823b2
|
@ -1150,6 +1150,9 @@ void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(32, 0x5B, true, reg
|
||||||
void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0xE6, false, regOp, arg);}
|
void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0xE6, false, regOp, arg);}
|
||||||
void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, regOp, arg);}
|
void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(64, 0x5B, true, regOp, arg);}
|
||||||
|
|
||||||
|
void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);}
|
||||||
|
void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);}
|
||||||
|
|
||||||
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}
|
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}
|
||||||
|
|
||||||
void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x50, true, dest, arg);}
|
void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(32, 0x50, true, dest, arg);}
|
||||||
|
|
|
@ -512,6 +512,9 @@ public:
|
||||||
void CVTDQ2PS(X64Reg regOp, OpArg arg);
|
void CVTDQ2PS(X64Reg regOp, OpArg arg);
|
||||||
void CVTPS2DQ(X64Reg regOp, OpArg arg);
|
void CVTPS2DQ(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
|
void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
||||||
|
void CVTTPS2DQ(X64Reg regOp, OpArg arg);
|
||||||
|
|
||||||
// SSE2: Packed integer instructions
|
// SSE2: Packed integer instructions
|
||||||
void PACKSSDW(X64Reg dest, OpArg arg);
|
void PACKSSDW(X64Reg dest, OpArg arg);
|
||||||
void PACKSSWB(X64Reg dest, OpArg arg);
|
void PACKSSWB(X64Reg dest, OpArg arg);
|
||||||
|
|
|
@ -324,7 +324,7 @@ THREAD_RETURN EmuThread(void *pArg)
|
||||||
VideoInitialize.Fifo_CPUBase = &ProcessorInterface::Fifo_CPUBase;
|
VideoInitialize.Fifo_CPUBase = &ProcessorInterface::Fifo_CPUBase;
|
||||||
VideoInitialize.Fifo_CPUEnd = &ProcessorInterface::Fifo_CPUEnd;
|
VideoInitialize.Fifo_CPUEnd = &ProcessorInterface::Fifo_CPUEnd;
|
||||||
VideoInitialize.Fifo_CPUWritePointer = &ProcessorInterface::Fifo_CPUWritePointer;
|
VideoInitialize.Fifo_CPUWritePointer = &ProcessorInterface::Fifo_CPUWritePointer;
|
||||||
VideoInitialize.bAutoAspectIs16_9 = _CoreParameter.bWii ? SConfig::GetInstance().m_SYSCONF->GetData<u8>("IPL.AR") : false;
|
VideoInitialize.bAutoAspectIs16_9 = _CoreParameter.bWii ? (SConfig::GetInstance().m_SYSCONF->GetData<u8>("IPL.AR") ? true : false) : false;
|
||||||
|
|
||||||
Plugins.GetVideo()->Initialize(&VideoInitialize); // Call the dll
|
Plugins.GetVideo()->Initialize(&VideoInitialize); // Call the dll
|
||||||
|
|
||||||
|
|
|
@ -48,10 +48,6 @@ may be redirected here (for example to Read_U32()).
|
||||||
#include "../Debugger/Debugger_SymbolMap.h"
|
#include "../Debugger/Debugger_SymbolMap.h"
|
||||||
#include "../PluginManager.h"
|
#include "../PluginManager.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Declarations and definitions
|
|
||||||
// ----------------
|
|
||||||
namespace Memory
|
namespace Memory
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -75,10 +71,9 @@ u8* base = NULL;
|
||||||
MemArena g_arena;
|
MemArena g_arena;
|
||||||
// ==============
|
// ==============
|
||||||
|
|
||||||
|
// STATE_TO_SAVE
|
||||||
// STATE_TO_SAVE (applies to a lot of things in this file)
|
|
||||||
|
|
||||||
bool m_IsInitialized = false; // Save the Init(), Shutdown() state
|
bool m_IsInitialized = false; // Save the Init(), Shutdown() state
|
||||||
|
// END STATE_TO_SAVE
|
||||||
|
|
||||||
// 64-bit: Pointers to low-mem (sub-0x10000000) mirror
|
// 64-bit: Pointers to low-mem (sub-0x10000000) mirror
|
||||||
// 32-bit: Same as the corresponding physical/virtual pointers.
|
// 32-bit: Same as the corresponding physical/virtual pointers.
|
||||||
|
@ -130,8 +125,6 @@ void HW_Default_Write(const T _Data, const u32 _Address){ ERROR_LOG(MASTER_LOG,
|
||||||
template <class T>
|
template <class T>
|
||||||
void HW_Default_Read(T _Data, const u32 _Address){ ERROR_LOG(MASTER_LOG, "Illegal HW Read%i %08x", sizeof(T)*8, _Address); _dbg_assert_(MEMMAP, 0);}
|
void HW_Default_Read(T _Data, const u32 _Address){ ERROR_LOG(MASTER_LOG, "Illegal HW Read%i %08x", sizeof(T)*8, _Address); _dbg_assert_(MEMMAP, 0);}
|
||||||
|
|
||||||
u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag);
|
|
||||||
|
|
||||||
#define PAGE_SHIFT 10
|
#define PAGE_SHIFT 10
|
||||||
#define PAGE_SIZE (1 << PAGE_SHIFT)
|
#define PAGE_SIZE (1 << PAGE_SHIFT)
|
||||||
#define PAGE_MASK (PAGE_SHIFT - 1)
|
#define PAGE_MASK (PAGE_SHIFT - 1)
|
||||||
|
@ -606,12 +599,10 @@ void CheckForBadAddresses(u32 Address, u32 Data, bool Read, int Bits)
|
||||||
if(Read)
|
if(Read)
|
||||||
{
|
{
|
||||||
WARN_LOG(CONSOLE, "Read%i: Program tried to read [%08x] from [%08x]", Bits, Address);
|
WARN_LOG(CONSOLE, "Read%i: Program tried to read [%08x] from [%08x]", Bits, Address);
|
||||||
//PanicAlert("Write_U32: Program tried to write [%08x] to [%08x]", _Address);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ERROR_LOG(CONSOLE, "Write%i: Program tried to write [%08x] to [%08x]", Bits, Data, Address);
|
ERROR_LOG(CONSOLE, "Write%i: Program tried to write [%08x] to [%08x]", Bits, Data, Address);
|
||||||
//PanicAlert("Read: Program tried to write [%08x] to [%08x]", Data, Address);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -620,16 +611,14 @@ void CheckForBadAddresses(u32 Address, u32 Data, bool Read, int Bits)
|
||||||
if(Read)
|
if(Read)
|
||||||
{
|
{
|
||||||
WARN_LOG(CONSOLE, "Read%i: Program read [0x%08x] from [0x%08x] * * * 0 * * *", Bits, Data, Address);
|
WARN_LOG(CONSOLE, "Read%i: Program read [0x%08x] from [0x%08x] * * * 0 * * *", Bits, Data, Address);
|
||||||
//PanicAlert("Read: Program read [%08x] from [%08x]", Data, Address);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
WARN_LOG(CONSOLE, "Write%i: Program wrote [0x%08x] to [0x%08x] * * * 0 * * *", Bits, Data, Address);
|
WARN_LOG(CONSOLE, "Write%i: Program wrote [0x%08x] to [0x%08x] * * * 0 * * *", Bits, Data, Address);
|
||||||
//PanicAlert("Read: Program wrote [%08x] to [%08x]", Data, Address);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Try to figure out where the dev/di Ioctl arguments are stored (including buffer out), so we can
|
// Try to figure out where the dev/di Ioctl arguments are stored (including buffer out), so we can
|
||||||
find the bad one */
|
// find the bad one
|
||||||
if(
|
if(
|
||||||
Data == 0x1090f4c0 // good out buffer right before it, for sound/smashbros_sound.brsar
|
Data == 0x1090f4c0 // good out buffer right before it, for sound/smashbros_sound.brsar
|
||||||
|| Data == 0x10913b00 // second one
|
|| Data == 0x10913b00 // second one
|
||||||
|
@ -646,12 +635,10 @@ void CheckForBadAddresses(u32 Address, u32 Data, bool Read, int Bits)
|
||||||
if(Read)
|
if(Read)
|
||||||
{
|
{
|
||||||
ERROR_LOG(CONSOLE, "Read%i: Program read [0x%08x] from [0x%08x] * * * * * * * * * * * *", Bits, Data, Address);
|
ERROR_LOG(CONSOLE, "Read%i: Program read [0x%08x] from [0x%08x] * * * * * * * * * * * *", Bits, Data, Address);
|
||||||
//PanicAlert("Read%i: Program read [%08x] from [%08x]", Bits, Data, Address);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ERROR_LOG(CONSOLE, "Write%i: Program wrote [0x%08x] to [0x%08x] * * * * * * * * * * * *", Bits,Data, Address);
|
ERROR_LOG(CONSOLE, "Write%i: Program wrote [0x%08x] to [0x%08x] * * * * * * * * * * * *", Bits,Data, Address);
|
||||||
//PanicAlert("Write%i: Program wrote [0x%08x] to [0x%08x]", Bits, Data, Address);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -683,9 +670,6 @@ void Memset(const u32 _Address, const u8 _iValue, const u32 _iLength)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// (comment for old implementation) : F|RES: rogue squadron and other games use the TLB ... so this cant work
|
|
||||||
|
|
||||||
// fixed implementation:
|
|
||||||
for (u32 i = 0; i < _iLength; i++)
|
for (u32 i = 0; i < _iLength; i++)
|
||||||
Write_U8(_iValue, _Address + i);
|
Write_U8(_iValue, _Address + i);
|
||||||
}
|
}
|
||||||
|
@ -839,12 +823,9 @@ bool IsRAMAddress(const u32 addr, bool allow_locked_cache)
|
||||||
return true;
|
return true;
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -142,6 +142,10 @@ void Write_U16(const u16 _Data, const u32 _Address);
|
||||||
void Write_U32(const u32 _Data, const u32 _Address);
|
void Write_U32(const u32 _Data, const u32 _Address);
|
||||||
void Write_U64(const u64 _Data, const u32 _Address);
|
void Write_U64(const u64 _Data, const u32 _Address);
|
||||||
|
|
||||||
|
void Write_U16_Swap(const u16 _Data, const u32 _Address);
|
||||||
|
void Write_U32_Swap(const u32 _Data, const u32 _Address);
|
||||||
|
void Write_U64_Swap(const u64 _Data, const u32 _Address);
|
||||||
|
|
||||||
void WriteHW_U32(const u32 _Data, const u32 _Address);
|
void WriteHW_U32(const u32 _Data, const u32 _Address);
|
||||||
void GetString(std::string& _string, const u32 _Address);
|
void GetString(std::string& _string, const u32 _Address);
|
||||||
|
|
||||||
|
|
|
@ -409,6 +409,9 @@ void Write_U16(const u16 _Data, const u32 _Address)
|
||||||
|
|
||||||
WriteToHardware<u16>(_Address, _Data, _Address, FLAG_WRITE);
|
WriteToHardware<u16>(_Address, _Data, _Address, FLAG_WRITE);
|
||||||
}
|
}
|
||||||
|
void Write_U16_Swap(const u16 _Data, const u32 _Address) {
|
||||||
|
Write_U16(Common::swap16(_Data), _Address);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Write_U32(const u32 _Data, const u32 _Address)
|
void Write_U32(const u32 _Data, const u32 _Address)
|
||||||
|
@ -423,7 +426,9 @@ void Write_U32(const u32 _Data, const u32 _Address)
|
||||||
#endif
|
#endif
|
||||||
WriteToHardware<u32>(_Address, _Data, _Address, FLAG_WRITE);
|
WriteToHardware<u32>(_Address, _Data, _Address, FLAG_WRITE);
|
||||||
}
|
}
|
||||||
|
void Write_U32_Swap(const u32 _Data, const u32 _Address) {
|
||||||
|
Write_U32(Common::swap32(_Data), _Address);
|
||||||
|
}
|
||||||
|
|
||||||
void Write_U64(const u64 _Data, const u32 _Address)
|
void Write_U64(const u64 _Data, const u32 _Address)
|
||||||
{
|
{
|
||||||
|
@ -438,6 +443,9 @@ void Write_U64(const u64 _Data, const u32 _Address)
|
||||||
|
|
||||||
WriteToHardware<u64>(_Address, _Data, _Address + 4, FLAG_WRITE);
|
WriteToHardware<u64>(_Address, _Data, _Address + 4, FLAG_WRITE);
|
||||||
}
|
}
|
||||||
|
void Write_U64_Swap(const u32 _Data, const u32 _Address) {
|
||||||
|
Write_U64(Common::swap64(_Data), _Address);
|
||||||
|
}
|
||||||
|
|
||||||
u8 ReadUnchecked_U8(const u32 _Address)
|
u8 ReadUnchecked_U8(const u32 _Address)
|
||||||
{
|
{
|
||||||
|
|
|
@ -76,7 +76,7 @@ inline T CLAMP(T a, T bottom, T top) {
|
||||||
void Helper_Quantize(const u32 _Addr, const double _fValue,
|
void Helper_Quantize(const u32 _Addr, const double _fValue,
|
||||||
const EQuantizeType _quantizeType, const unsigned int _uScale)
|
const EQuantizeType _quantizeType, const unsigned int _uScale)
|
||||||
{
|
{
|
||||||
switch(_quantizeType)
|
switch (_quantizeType)
|
||||||
{
|
{
|
||||||
case QUANTIZE_FLOAT:
|
case QUANTIZE_FLOAT:
|
||||||
Memory::Write_U32( ConvertToSingleFTZ( *(u64*)&_fValue ), _Addr );
|
Memory::Write_U32( ConvertToSingleFTZ( *(u64*)&_fValue ), _Addr );
|
||||||
|
@ -222,7 +222,7 @@ void psq_st(UGeckoInstruction _inst)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Helper_Quantize( EA, (float)rPS0(_inst.RS), stType, stScale );
|
Helper_Quantize( EA, rPS0(_inst.RS), stType, stScale );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -245,6 +245,7 @@ void AsmRoutineManager::GenerateCommon()
|
||||||
|
|
||||||
GenQuantizedLoads();
|
GenQuantizedLoads();
|
||||||
GenQuantizedStores();
|
GenQuantizedStores();
|
||||||
|
GenQuantizedSingleStores();
|
||||||
|
|
||||||
//CMPSD(R(XMM0), M(&zero),
|
//CMPSD(R(XMM0), M(&zero),
|
||||||
// TODO
|
// TODO
|
||||||
|
|
|
@ -72,71 +72,6 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
||||||
int stScale = gqr.ST_SCALE;
|
int stScale = gqr.ST_SCALE;
|
||||||
|
|
||||||
|
|
||||||
if (inst.W) {
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
|
|
||||||
// PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update);
|
|
||||||
// It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only
|
|
||||||
// floats so that's what we'll work on.
|
|
||||||
switch (stType)
|
|
||||||
{
|
|
||||||
case QUANTIZE_FLOAT:
|
|
||||||
{
|
|
||||||
// This one has quite a bit of optimization potential.
|
|
||||||
if (gpr.R(a).IsImm())
|
|
||||||
{
|
|
||||||
PanicAlert("Imm: %08x", gpr.R(a).offset);
|
|
||||||
}
|
|
||||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
|
||||||
gpr.Lock(a);
|
|
||||||
fpr.Lock(s);
|
|
||||||
// Check that the quantizer is set the way we expect.
|
|
||||||
INT3();
|
|
||||||
CMP(16, M(&rSPR(SPR_GQR0 + inst.I)), Imm16(store_gqr));
|
|
||||||
FixupBranch skip_opt = J_CC(CC_NE);
|
|
||||||
|
|
||||||
if (update)
|
|
||||||
gpr.LoadToX64(a, true, true);
|
|
||||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
|
||||||
if (offset)
|
|
||||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
|
||||||
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
|
|
||||||
if (update && offset)
|
|
||||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
MOVD_xmm(M(&temp64), XMM0);
|
|
||||||
MOV(32, R(ABI_PARAM1), M(&temp64));
|
|
||||||
FixupBranch argh = J_CC(CC_NZ);
|
|
||||||
BSWAP(32, ABI_PARAM1);
|
|
||||||
#ifdef _M_X64
|
|
||||||
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
|
||||||
#else
|
|
||||||
MOV(32, R(EAX), R(ABI_PARAM2));
|
|
||||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOV(32, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
|
|
||||||
#endif
|
|
||||||
FixupBranch skip_call = J();
|
|
||||||
SetJumpTarget(argh);
|
|
||||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
|
||||||
SetJumpTarget(skip_call);
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
fpr.UnlockAll();
|
|
||||||
|
|
||||||
FixupBranch skip_slow = J();
|
|
||||||
SetJumpTarget(skip_opt);
|
|
||||||
Default(inst);
|
|
||||||
SetJumpTarget(skip_slow);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// Is this specialization still worth it? Let's keep it for now. It's probably
|
// Is this specialization still worth it? Let's keep it for now. It's probably
|
||||||
// not very risky since a game most likely wouldn't use the same code to process
|
// not very risky since a game most likely wouldn't use the same code to process
|
||||||
|
@ -176,8 +111,16 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
#else
|
#else
|
||||||
SHL(32, R(EDX), Imm8(3));
|
SHL(32, R(EDX), Imm8(3));
|
||||||
#endif
|
#endif
|
||||||
|
if (inst.W) {
|
||||||
|
// One value
|
||||||
|
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
|
||||||
|
CVTSD2SS(XMM0, fpr.R(s));
|
||||||
|
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||||
|
} else {
|
||||||
|
// Pair of values
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
CVTPD2PS(XMM0, fpr.R(s));
|
||||||
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized));
|
CALLptr(MDisp(EDX, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||||
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
|
@ -251,6 +251,7 @@ void AsmRoutineManager::GenerateCommon()
|
||||||
|
|
||||||
GenQuantizedLoads();
|
GenQuantizedLoads();
|
||||||
GenQuantizedStores();
|
GenQuantizedStores();
|
||||||
|
GenQuantizedSingleStores();
|
||||||
|
|
||||||
//CMPSD(R(XMM0), M(&zero),
|
//CMPSD(R(XMM0), M(&zero),
|
||||||
// TODO
|
// TODO
|
||||||
|
|
|
@ -137,8 +137,12 @@ static const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
||||||
|
|
||||||
static float GC_ALIGNED16(psTemp[4]);
|
static float GC_ALIGNED16(psTemp[4]);
|
||||||
|
|
||||||
static const float m_65535 = 65535.0f;
|
static const float GC_ALIGNED16(m_65535) = 65535.0f;
|
||||||
|
static const float GC_ALIGNED16(m_32767) = 32767.0f;
|
||||||
|
static const float GC_ALIGNED16(m_m32768) = -32768.0f;
|
||||||
|
static const float GC_ALIGNED16(m_255) = 255.0f;
|
||||||
|
static const float GC_ALIGNED16(m_127) = 127.0f;
|
||||||
|
static const float GC_ALIGNED16(m_m128) = -128.0f;
|
||||||
|
|
||||||
#define QUANTIZE_OVERFLOW_SAFE
|
#define QUANTIZE_OVERFLOW_SAFE
|
||||||
|
|
||||||
|
@ -205,7 +209,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||||
PUNPCKLDQ(XMM1, R(XMM1));
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
MINPS(XMM0, R(XMM1));
|
MINPS(XMM0, R(XMM1));
|
||||||
#endif
|
#endif
|
||||||
CVTPS2DQ(XMM0, R(XMM0));
|
CVTTPS2DQ(XMM0, R(XMM0));
|
||||||
PACKSSDW(XMM0, R(XMM0));
|
PACKSSDW(XMM0, R(XMM0));
|
||||||
PACKUSWB(XMM0, R(XMM0));
|
PACKUSWB(XMM0, R(XMM0));
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
|
@ -223,7 +227,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||||
PUNPCKLDQ(XMM1, R(XMM1));
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
MINPS(XMM0, R(XMM1));
|
MINPS(XMM0, R(XMM1));
|
||||||
#endif
|
#endif
|
||||||
CVTPS2DQ(XMM0, R(XMM0));
|
CVTTPS2DQ(XMM0, R(XMM0));
|
||||||
PACKSSDW(XMM0, R(XMM0));
|
PACKSSDW(XMM0, R(XMM0));
|
||||||
PACKSSWB(XMM0, R(XMM0));
|
PACKSSWB(XMM0, R(XMM0));
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
|
@ -245,7 +249,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||||
PUNPCKLDQ(XMM1, R(XMM1));
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
MINPS(XMM0, R(XMM1));
|
MINPS(XMM0, R(XMM1));
|
||||||
|
|
||||||
CVTPS2DQ(XMM0, R(XMM0));
|
CVTTPS2DQ(XMM0, R(XMM0));
|
||||||
MOVQ_xmm(M(psTemp), XMM0);
|
MOVQ_xmm(M(psTemp), XMM0);
|
||||||
// place ps[0] into the higher word, ps[1] into the lower
|
// place ps[0] into the higher word, ps[1] into the lower
|
||||||
// so no need in ROL after BSWAP
|
// so no need in ROL after BSWAP
|
||||||
|
@ -269,7 +273,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||||
PUNPCKLDQ(XMM1, R(XMM1));
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
MINPS(XMM0, R(XMM1));
|
MINPS(XMM0, R(XMM1));
|
||||||
#endif
|
#endif
|
||||||
CVTPS2DQ(XMM0, R(XMM0));
|
CVTTPS2DQ(XMM0, R(XMM0));
|
||||||
PACKSSDW(XMM0, R(XMM0));
|
PACKSSDW(XMM0, R(XMM0));
|
||||||
MOVD_xmm(R(EAX), XMM0);
|
MOVD_xmm(R(EAX), XMM0);
|
||||||
BSWAP(32, EAX);
|
BSWAP(32, EAX);
|
||||||
|
@ -288,6 +292,79 @@ void CommonAsmRoutines::GenQuantizedStores() {
|
||||||
pairedStoreQuantized[7] = storePairedS16;
|
pairedStoreQuantized[7] = storePairedS16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See comment in header for in/outs.
|
||||||
|
void CommonAsmRoutines::GenQuantizedSingleStores() {
|
||||||
|
const u8* storeSingleIllegal = AlignCode4();
|
||||||
|
UD2();
|
||||||
|
|
||||||
|
// Easy!
|
||||||
|
const u8* storeSingleFloat = AlignCode4();
|
||||||
|
if (cpu_info.bSSSE3) {
|
||||||
|
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||||
|
// TODO: SafeWriteFloat
|
||||||
|
MOVSS(M(&psTemp[0]), XMM0);
|
||||||
|
MOV(32, R(EAX), M(&psTemp[0]));
|
||||||
|
SafeWriteRegToReg(EAX, ECX, 32, 0, false);
|
||||||
|
} else {
|
||||||
|
MOVSS(M(&psTemp[0]), XMM0);
|
||||||
|
MOV(32, R(EAX), M(&psTemp[0]));
|
||||||
|
SafeWriteRegToReg(EAX, ECX, 32, 0, true);
|
||||||
|
}
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||||
|
MULSS(XMM0, R(XMM1));
|
||||||
|
PXOR(XMM1, R(XMM1));
|
||||||
|
MAXSS(XMM0, R(XMM1));
|
||||||
|
MINSS(XMM0, M((void *)&m_255));
|
||||||
|
CVTTSS2SI(EAX, R(XMM0));
|
||||||
|
SafeWriteRegToReg(AL, ECX, 8, 0, true);
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storeSingleS8 = AlignCode4();
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||||
|
MULSS(XMM0, R(XMM1));
|
||||||
|
MAXSS(XMM0, M((void *)&m_m128));
|
||||||
|
MINSS(XMM0, M((void *)&m_127));
|
||||||
|
CVTTSS2SI(EAX, R(XMM0));
|
||||||
|
SafeWriteRegToReg(AL, ECX, 8, 0, true);
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||||
|
PUNPCKLDQ(XMM1, R(XMM1));
|
||||||
|
MULPS(XMM0, R(XMM1));
|
||||||
|
PXOR(XMM1, R(XMM1));
|
||||||
|
MAXSS(XMM0, R(XMM1));
|
||||||
|
MINSS(XMM0, M((void *)&m_65535));
|
||||||
|
CVTTSS2SI(EAX, R(XMM0));
|
||||||
|
SafeWriteRegToReg(EAX, ECX, 16, 0, true);
|
||||||
|
RET();
|
||||||
|
|
||||||
|
const u8* storeSingleS16 = AlignCode4();
|
||||||
|
SHR(32, R(EAX), Imm8(6));
|
||||||
|
MOVSS(XMM1, MDisp(EAX, (u32)(u64)m_quantizeTableS));
|
||||||
|
MULSS(XMM0, R(XMM1));
|
||||||
|
MAXSS(XMM0, M((void *)&m_m32768));
|
||||||
|
MINSS(XMM0, M((void *)&m_32767));
|
||||||
|
CVTTSS2SI(EAX, R(XMM0));
|
||||||
|
SafeWriteRegToReg(EAX, ECX, 16, 0, true);
|
||||||
|
RET();
|
||||||
|
|
||||||
|
singleStoreQuantized[0] = storeSingleFloat;
|
||||||
|
singleStoreQuantized[1] = storeSingleIllegal;
|
||||||
|
singleStoreQuantized[2] = storeSingleIllegal;
|
||||||
|
singleStoreQuantized[3] = storeSingleIllegal;
|
||||||
|
singleStoreQuantized[4] = storeSingleU8;
|
||||||
|
singleStoreQuantized[5] = storeSingleU16;
|
||||||
|
singleStoreQuantized[6] = storeSingleS8;
|
||||||
|
singleStoreQuantized[7] = storeSingleS16;
|
||||||
|
}
|
||||||
|
|
||||||
void CommonAsmRoutines::GenQuantizedLoads() {
|
void CommonAsmRoutines::GenQuantizedLoads() {
|
||||||
const u8* loadPairedIllegal = AlignCode4();
|
const u8* loadPairedIllegal = AlignCode4();
|
||||||
UD2();
|
UD2();
|
||||||
|
|
|
@ -24,6 +24,8 @@ class CommonAsmRoutines : public EmuCodeBlock {
|
||||||
protected:
|
protected:
|
||||||
void GenQuantizedLoads();
|
void GenQuantizedLoads();
|
||||||
void GenQuantizedStores();
|
void GenQuantizedStores();
|
||||||
|
void GenQuantizedSingleStores();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void GenFifoWrite(int size);
|
void GenFifoWrite(int size);
|
||||||
void GenFifoXmm64Write();
|
void GenFifoXmm64Write();
|
||||||
|
@ -42,6 +44,11 @@ public:
|
||||||
// Out: Nothing.
|
// Out: Nothing.
|
||||||
// Trashes: EAX ECX EDX
|
// Trashes: EAX ECX EDX
|
||||||
const u8 GC_ALIGNED16(*pairedStoreQuantized[8]);
|
const u8 GC_ALIGNED16(*pairedStoreQuantized[8]);
|
||||||
|
|
||||||
|
// In: array index: GQR to use.
|
||||||
|
// In: ECX: Address to write to.
|
||||||
|
// In: XMM0: Bottom 32-bit slot holds the float to be written.
|
||||||
|
const u8 GC_ALIGNED16(*singleStoreQuantized[8]);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -129,8 +129,8 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
||||||
FixupBranch argh = J_CC(CC_Z);
|
FixupBranch argh = J_CC(CC_Z);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), reg_value, reg_addr); break;
|
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break;
|
||||||
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), reg_value, reg_addr); break;
|
case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break;
|
||||||
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break;
|
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break;
|
||||||
}
|
}
|
||||||
FixupBranch arg2 = J();
|
FixupBranch arg2 = J();
|
||||||
|
|
Loading…
Reference in New Issue