Refactor fastmem/trampoline code.

Simplication to avoid reading back the generated instructions, allowing
us to handle all possible cases.
This commit is contained in:
Matt Mastracci 2016-02-28 14:33:53 -07:00
parent ddc9e414ee
commit b1296a7825
20 changed files with 385 additions and 894 deletions

View File

@ -29,7 +29,6 @@ set(SRCS Analytics.cpp
TraversalClient.cpp TraversalClient.cpp
Version.cpp Version.cpp
x64ABI.cpp x64ABI.cpp
x64Analyzer.cpp
x64Emitter.cpp x64Emitter.cpp
Crypto/bn.cpp Crypto/bn.cpp
Crypto/ec.cpp Crypto/ec.cpp

View File

@ -133,7 +133,6 @@
<ClInclude Include="TraversalClient.h" /> <ClInclude Include="TraversalClient.h" />
<ClInclude Include="TraversalProto.h" /> <ClInclude Include="TraversalProto.h" />
<ClInclude Include="x64ABI.h" /> <ClInclude Include="x64ABI.h" />
<ClInclude Include="x64Analyzer.h" />
<ClInclude Include="x64Emitter.h" /> <ClInclude Include="x64Emitter.h" />
<ClInclude Include="Crypto\bn.h" /> <ClInclude Include="Crypto\bn.h" />
<ClInclude Include="Crypto\ec.h" /> <ClInclude Include="Crypto\ec.h" />
@ -178,7 +177,6 @@
<ClCompile Include="ucrtFreadWorkaround.cpp" /> <ClCompile Include="ucrtFreadWorkaround.cpp" />
<ClCompile Include="Version.cpp" /> <ClCompile Include="Version.cpp" />
<ClCompile Include="x64ABI.cpp" /> <ClCompile Include="x64ABI.cpp" />
<ClCompile Include="x64Analyzer.cpp" />
<ClCompile Include="x64CPUDetect.cpp" /> <ClCompile Include="x64CPUDetect.cpp" />
<ClCompile Include="x64Emitter.cpp" /> <ClCompile Include="x64Emitter.cpp" />
<ClCompile Include="x64FPURoundMode.cpp" /> <ClCompile Include="x64FPURoundMode.cpp" />

View File

@ -62,7 +62,6 @@
<ClInclude Include="Thread.h" /> <ClInclude Include="Thread.h" />
<ClInclude Include="Timer.h" /> <ClInclude Include="Timer.h" />
<ClInclude Include="x64ABI.h" /> <ClInclude Include="x64ABI.h" />
<ClInclude Include="x64Analyzer.h" />
<ClInclude Include="x64Emitter.h" /> <ClInclude Include="x64Emitter.h" />
<ClInclude Include="Logging\ConsoleListener.h"> <ClInclude Include="Logging\ConsoleListener.h">
<Filter>Logging</Filter> <Filter>Logging</Filter>
@ -253,7 +252,6 @@
<ClCompile Include="Timer.cpp" /> <ClCompile Include="Timer.cpp" />
<ClCompile Include="Version.cpp" /> <ClCompile Include="Version.cpp" />
<ClCompile Include="x64ABI.cpp" /> <ClCompile Include="x64ABI.cpp" />
<ClCompile Include="x64Analyzer.cpp" />
<ClCompile Include="x64CPUDetect.cpp" /> <ClCompile Include="x64CPUDetect.cpp" />
<ClCompile Include="x64Emitter.cpp" /> <ClCompile Include="x64Emitter.cpp" />
<ClCompile Include="x64FPURoundMode.cpp" /> <ClCompile Include="x64FPURoundMode.cpp" />

View File

@ -1,233 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "Common/x64Analyzer.h"
bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info)
{
unsigned const char* startCodePtr = codePtr;
u8 rex = 0;
u32 opcode;
int opcode_length;
// Check for regular prefix
info->operandSize = 4;
info->zeroExtend = false;
info->signExtend = false;
info->hasImmediate = false;
info->isMemoryWrite = false;
info->byteSwap = false;
u8 modRMbyte = 0;
u8 sibByte = 0;
bool hasModRM = false;
int displacementSize = 0;
if (*codePtr == 0x66)
{
info->operandSize = 2;
codePtr++;
}
else if (*codePtr == 0x67)
{
codePtr++;
}
// Check for REX prefix
if ((*codePtr & 0xF0) == 0x40)
{
rex = *codePtr;
if (rex & 8) // REX.W
{
info->operandSize = 8;
}
codePtr++;
}
opcode = *codePtr++;
opcode_length = 1;
if (opcode == 0x0F)
{
opcode = (opcode << 8) | *codePtr++;
opcode_length = 2;
if ((opcode & 0xFB) == 0x38)
{
opcode = (opcode << 8) | *codePtr++;
opcode_length = 3;
}
}
switch (opcode_length)
{
case 1:
if ((opcode & 0xF0) == 0x80 || ((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
{
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
case 2:
if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) || (opcode & 0xF0) == 0x30 ||
(opcode & 0xFF) == 0x77 || (opcode & 0xF0) == 0x80 || (opcode & 0xF8) == 0xC8)
{
// No mod R/M byte
}
else
{
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
case 3:
// TODO: support more 3-byte opcode instructions
if ((opcode & 0xFE) == 0xF0)
{
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
}
if (hasModRM)
{
ModRM mrm(modRMbyte, rex);
info->regOperandReg = mrm.reg;
if (mrm.mod < 3)
{
if (mrm.rm == 4)
{
// SIB byte
sibByte = *codePtr++;
info->scaledReg = (sibByte >> 3) & 7;
info->otherReg = (sibByte & 7);
if (rex & 2)
info->scaledReg += 8;
if (rex & 1)
info->otherReg += 8;
}
else
{
// info->scaledReg =
}
}
if (mrm.mod == 1 || mrm.mod == 2)
{
if (mrm.mod == 1)
displacementSize = 1;
else
displacementSize = 4;
}
}
if (displacementSize == 1)
info->displacement = (s32)(s8)*codePtr;
else
info->displacement = *((s32*)codePtr);
codePtr += displacementSize;
switch (opcode)
{
case 0xC6: // mem <- imm8
info->isMemoryWrite = true;
info->hasImmediate = true;
info->immediate = *codePtr;
info->operandSize = 1;
codePtr++;
break;
case 0xC7: // mem <- imm16/32
info->isMemoryWrite = true;
switch (info->operandSize)
{
case 2:
info->hasImmediate = true;
info->immediate = *(u16*)codePtr;
codePtr += 2;
break;
case 4:
info->hasImmediate = true;
info->immediate = *(u32*)codePtr;
codePtr += 4;
break;
case 8:
info->zeroExtend = true;
info->immediate = *(u32*)codePtr;
codePtr += 4;
break;
}
break;
case 0x88: // mem <- r8
info->isMemoryWrite = true;
if (info->operandSize != 4)
{
return false;
}
info->operandSize = 1;
break;
case 0x89: // mem <- r16/32/64
info->isMemoryWrite = true;
break;
case 0x8A: // r8 <- mem
if (info->operandSize != 4)
{
return false;
}
info->operandSize = 1;
break;
case 0x8B: // r16/32/64 <- mem
break;
case 0x0FB6: // movzx on byte
info->zeroExtend = true;
info->operandSize = 1;
break;
case 0x0FB7: // movzx on short
info->zeroExtend = true;
info->operandSize = 2;
break;
case 0x0FBE: // movsx on byte
info->signExtend = true;
info->operandSize = 1;
break;
case 0x0FBF: // movsx on short
info->signExtend = true;
info->operandSize = 2;
break;
case 0x0F38F0: // movbe read
info->byteSwap = true;
break;
case 0x0F38F1: // movbe write
info->byteSwap = true;
info->isMemoryWrite = true;
break;
default:
return false;
}
info->instructionSize = (int)(codePtr - startCodePtr);
return true;
}
bool InstructionInfo::operator==(const InstructionInfo& other) const
{
return operandSize == other.operandSize && instructionSize == other.instructionSize &&
regOperandReg == other.regOperandReg && otherReg == other.otherReg &&
scaledReg == other.scaledReg && zeroExtend == other.zeroExtend &&
signExtend == other.signExtend && hasImmediate == other.hasImmediate &&
isMemoryWrite == other.isMemoryWrite && byteSwap == other.byteSwap &&
immediate == other.immediate && displacement == other.displacement;
}

View File

@ -1,44 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/CommonTypes.h"
struct InstructionInfo
{
int operandSize; // 8, 16, 32, 64
int instructionSize;
int regOperandReg;
int otherReg;
int scaledReg;
bool zeroExtend;
bool signExtend;
bool hasImmediate;
bool isMemoryWrite;
bool byteSwap;
u64 immediate;
s32 displacement;
bool operator==(const InstructionInfo& other) const;
};
struct ModRM
{
int mod, reg, rm;
ModRM(u8 modRM, u8 rex)
{
mod = modRM >> 6;
reg = ((modRM >> 3) & 7) | ((rex & 4) ? 8 : 0);
rm = modRM & 7;
}
};
enum AccessType
{
OP_ACCESS_READ = 0,
OP_ACCESS_WRITE = 1
};
bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info);

View File

@ -1046,8 +1046,14 @@ void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src)
WriteMOVBE(bits, 0xF1, src, dest); WriteMOVBE(bits, 0xF1, src, dest);
} }
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend) void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend, MovInfo* info)
{ {
if (info)
{
info->address = GetWritableCodePtr();
info->nonAtomicSwapStore = false;
}
switch (size) switch (size)
{ {
case 8: case 8:
@ -1083,20 +1089,28 @@ void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_ext
} }
} }
u8* XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src) void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info)
{ {
u8* mov_location = GetWritableCodePtr();
if (cpu_info.bMOVBE) if (cpu_info.bMOVBE)
{ {
if (info)
{
info->address = GetWritableCodePtr();
info->nonAtomicSwapStore = false;
}
MOVBE(size, dst, src); MOVBE(size, dst, src);
} }
else else
{ {
BSWAP(size, src); BSWAP(size, src);
mov_location = GetWritableCodePtr(); if (info)
{
info->address = GetWritableCodePtr();
info->nonAtomicSwapStore = true;
info->nonAtomicSwapStoreSrc = src;
}
MOV(size, dst, R(src)); MOV(size, dst, R(src));
} }
return mov_location;
} }
void XEmitter::LEA(int bits, X64Reg dest, OpArg src) void XEmitter::LEA(int bits, X64Reg dest, OpArg src)

View File

@ -203,6 +203,15 @@ enum FloatOp
class XEmitter; class XEmitter;
// Information about a generated MOV op
struct MovInfo final
{
u8* address;
bool nonAtomicSwapStore;
// valid iff nonAtomicSwapStore is true
X64Reg nonAtomicSwapStoreSrc;
};
// RIP addressing does not benefit from micro op fusion on Core arch // RIP addressing does not benefit from micro op fusion on Core arch
struct OpArg struct OpArg
{ {
@ -272,6 +281,27 @@ struct OpArg
return (s8)offset; return (s8)offset;
} }
OpArg AsImm64() const
{
_dbg_assert_(DYNA_REC, IsImm());
return OpArg((u64)offset, SCALE_IMM64);
}
OpArg AsImm32() const
{
_dbg_assert_(DYNA_REC, IsImm());
return OpArg((u32)offset, SCALE_IMM32);
}
OpArg AsImm16() const
{
_dbg_assert_(DYNA_REC, IsImm());
return OpArg((u16)offset, SCALE_IMM16);
}
OpArg AsImm8() const
{
_dbg_assert_(DYNA_REC, IsImm());
return OpArg((u8)offset, SCALE_IMM8);
}
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const; void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const;
bool IsImm() const bool IsImm() const
{ {
@ -625,8 +655,9 @@ public:
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE. // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
void MOVBE(int bits, X64Reg dest, const OpArg& src); void MOVBE(int bits, X64Reg dest, const OpArg& src);
void MOVBE(int bits, const OpArg& dest, X64Reg src); void MOVBE(int bits, const OpArg& dest, X64Reg src);
void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false); void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false,
u8* SwapAndStore(int size, const OpArg& dst, X64Reg src); MovInfo* info = nullptr);
void SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info = nullptr);
// Available only on AMD >= Phenom or Intel >= Haswell // Available only on AMD >= Phenom or Intel >= Haswell
void LZCNT(int bits, X64Reg dest, const OpArg& src); void LZCNT(int bits, X64Reg dest, const OpArg& src);

View File

@ -8,7 +8,6 @@
#include "Common/CommonFuncs.h" #include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/Thread.h" #include "Common/Thread.h"
#include "Common/x64Analyzer.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "Core/MachineContext.h" #include "Core/MachineContext.h"

View File

@ -19,6 +19,7 @@
#pragma once #pragma once
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/PowerPC/Jit64/JitAsm.h" #include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h" #include "Core/PowerPC/Jit64/JitRegCache.h"

View File

@ -287,17 +287,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend); SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
if (update && storeAddress) if (update && storeAddress)
{
MemoryExceptionCheck();
MOV(32, gpr.R(a), opAddress); MOV(32, gpr.R(a), opAddress);
}
// TODO: support no-swap in SafeLoadToReg instead // TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed) if (byte_reversed)
{
MemoryExceptionCheck();
BSWAP(accessSize, gpr.RX(d)); BSWAP(accessSize, gpr.RX(d));
}
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -507,10 +501,7 @@ void Jit64::stX(UGeckoInstruction inst)
} }
if (update) if (update)
{
MemoryExceptionCheck();
ADD(32, gpr.R(a), Imm32((u32)offset)); ADD(32, gpr.R(a), Imm32((u32)offset));
}
} }
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -589,10 +580,7 @@ void Jit64::stXx(UGeckoInstruction inst)
} }
if (update) if (update)
{
MemoryExceptionCheck();
MOV(32, gpr.R(a), R(RSCRATCH2)); MOV(32, gpr.R(a), R(RSCRATCH2));
}
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();

View File

@ -80,7 +80,6 @@ void Jit64::lfXXX(UGeckoInstruction inst)
registersInUse[RSCRATCH2] = true; registersInUse[RSCRATCH2] = true;
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false); SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
MemoryExceptionCheck();
if (single) if (single)
{ {
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true); ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
@ -193,10 +192,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse); SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
if (update) if (update)
{
MemoryExceptionCheck();
MOV(32, gpr.R(a), R(RSCRATCH2)); MOV(32, gpr.R(a), R(RSCRATCH2));
}
fpr.UnlockAll(); fpr.UnlockAll();
gpr.UnlockAll(); gpr.UnlockAll();

View File

@ -40,74 +40,6 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0; u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
gpr.Lock(a, b); gpr.Lock(a, b);
if (gqrIsConstant && gqrValue == 0)
{
int storeOffset = 0;
gpr.BindToRegister(a, true, update);
X64Reg addr = gpr.RX(a);
// TODO: this is kind of ugly :/ we should probably create a universal load/store address
// calculation
// function that handles all these weird cases, e.g. how non-fastmem loadstores clobber
// addresses.
bool storeAddress = (update && jo.memcheck) || !jo.fastmem;
if (storeAddress)
{
addr = RSCRATCH2;
MOV(32, R(addr), gpr.R(a));
}
if (indexed)
{
if (update)
{
ADD(32, R(addr), gpr.R(b));
}
else
{
addr = RSCRATCH2;
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
}
else
{
MOV(32, R(addr), gpr.R(b));
if (a)
ADD(32, R(addr), gpr.R(a));
}
}
}
else
{
if (update)
ADD(32, R(addr), Imm32(offset));
else
storeOffset = offset;
}
fpr.Lock(s);
if (w)
{
CVTSD2SS(XMM0, fpr.R(s));
MOVD_xmm(R(RSCRATCH), XMM0);
}
else
{
CVTPD2PS(XMM0, fpr.R(s));
MOVQ_xmm(R(RSCRATCH), XMM0);
ROL(64, R(RSCRATCH), Imm8(32));
}
BitSet32 registersInUse = CallerSavedRegistersInUse();
if (update && storeAddress)
registersInUse[addr] = true;
SafeWriteRegToReg(RSCRATCH, addr, w ? 32 : 64, storeOffset, registersInUse);
MemoryExceptionCheck();
if (update && storeAddress)
MOV(32, gpr.R(a), R(addr));
gpr.UnlockAll();
fpr.UnlockAll();
return;
}
gpr.FlushLockX(RSCRATCH_EXTRA); gpr.FlushLockX(RSCRATCH_EXTRA);
if (update) if (update)
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
@ -130,44 +62,35 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
if (update && !jo.memcheck) if (update && !jo.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA)); MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
if (w)
CVTSD2SS(XMM0, fpr.R(s)); // one
else
CVTPD2PS(XMM0, fpr.R(s)); // pair
if (gqrIsConstant) if (gqrIsConstant)
{ {
// Paired stores don't yield any real change in performance right now, but if we can
// improve fastmem support this might change
//#define INLINE_PAIRED_STORES
#ifdef INLINE_PAIRED_STORES
if (w)
{
// One value
CVTSD2SS(XMM0, fpr.R(s));
GenQuantizedStore(true, static_cast<EQuantizeType>(gqrValue & 0x7), (gqrValue & 0x3F00) >> 8);
}
else
{
// Pair of values
CVTPD2PS(XMM0, fpr.R(s));
GenQuantizedStore(false, static_cast<EQuantizeType>(gqrValue & 0x7),
(gqrValue & 0x3F00) >> 8);
}
#else
// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
// with just the scale bits.
int type = gqrValue & 0x7; int type = gqrValue & 0x7;
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
if (w) // Paired stores (other than w/type zero) don't yield any real change in
// performance right now, but if we can improve fastmem support this might change
if (gqrValue == 0)
{ {
// One value if (w)
CVTSD2SS(XMM0, fpr.R(s)); GenQuantizedStore(true, static_cast<EQuantizeType>(type), (gqrValue & 0x3F00) >> 8);
CALL(asm_routines.singleStoreQuantized[type]); else
GenQuantizedStore(false, static_cast<EQuantizeType>(type), (gqrValue & 0x3F00) >> 8);
} }
else else
{ {
// Pair of values // We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
CVTPD2PS(XMM0, fpr.R(s)); // with just the scale bits.
CALL(asm_routines.pairedStoreQuantized[type]); MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
if (w)
CALL(asm_routines.singleStoreQuantized[type]);
else
CALL(asm_routines.pairedStoreQuantized[type]);
} }
#endif
} }
else else
{ {
@ -180,22 +103,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
if (w) if (w)
{
// One value
CVTSD2SS(XMM0, fpr.R(s));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
}
else else
{
// Pair of values
CVTPD2PS(XMM0, fpr.R(s));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
}
} }
if (update && jo.memcheck) if (update && jo.memcheck)
{ {
MemoryExceptionCheck();
if (indexed) if (indexed)
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
else else
@ -226,113 +140,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
gpr.Lock(a, b); gpr.Lock(a, b);
if (gqrIsConstant && gqrValue == 0)
{
s32 loadOffset = 0;
gpr.BindToRegister(a, true, update);
X64Reg addr = gpr.RX(a);
if (update && jo.memcheck)
{
addr = RSCRATCH2;
MOV(32, R(addr), gpr.R(a));
}
if (indexed)
{
if (update)
{
ADD(32, R(addr), gpr.R(b));
}
else
{
addr = RSCRATCH2;
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
}
else
{
MOV(32, R(addr), gpr.R(b));
if (a)
ADD(32, R(addr), gpr.R(a));
}
}
}
else
{
if (update)
ADD(32, R(addr), Imm32(offset));
else
loadOffset = offset;
}
fpr.Lock(s);
if (jo.memcheck)
{
fpr.StoreFromRegister(s);
js.revertFprLoad = s;
}
fpr.BindToRegister(s, false);
// Let's mirror the JitAsmCommon code and assume all non-MMU loads go to RAM.
if (!jo.memcheck)
{
if (w)
{
if (cpu_info.bSSSE3)
{
MOVD_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
PSHUFB(XMM0, M(pbswapShuffle1x4));
UNPCKLPS(XMM0, M(m_one));
}
else
{
LoadAndSwap(32, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
MOVD_xmm(XMM0, R(RSCRATCH));
UNPCKLPS(XMM0, M(m_one));
}
}
else
{
if (cpu_info.bSSSE3)
{
MOVQ_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
PSHUFB(XMM0, M(pbswapShuffle2x4));
}
else
{
LoadAndSwap(64, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
ROL(64, R(RSCRATCH), Imm8(32));
MOVQ_xmm(XMM0, R(RSCRATCH));
}
}
CVTPS2PD(fpr.RX(s), R(XMM0));
}
else
{
BitSet32 registersInUse = CallerSavedRegistersInUse();
registersInUse[fpr.RX(s) << 16] = false;
if (update)
registersInUse[addr] = true;
SafeLoadToReg(RSCRATCH, R(addr), w ? 32 : 64, loadOffset, registersInUse, false);
MemoryExceptionCheck();
if (w)
{
MOVD_xmm(XMM0, R(RSCRATCH));
UNPCKLPS(XMM0, M(m_one));
}
else
{
ROL(64, R(RSCRATCH), Imm8(32));
MOVQ_xmm(XMM0, R(RSCRATCH));
}
CVTPS2PD(fpr.RX(s), R(XMM0));
if (update)
MOV(32, gpr.R(a), R(addr));
}
gpr.UnlockAll();
fpr.UnlockAll();
return;
}
gpr.FlushLockX(RSCRATCH_EXTRA); gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update); gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true); fpr.BindToRegister(s, false, true);
@ -373,7 +180,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8]))); CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
} }
MemoryExceptionCheck();
CVTPS2PD(fpr.RX(s), R(XMM0)); CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && jo.memcheck) if (update && jo.memcheck)
{ {

View File

@ -572,8 +572,6 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
MULPS(XMM0, R(XMM1)); MULPS(XMM0, R(XMM1));
} }
} }
return;
} }
void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline) void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)

View File

@ -17,6 +17,7 @@
#pragma once #pragma once
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Jit64/JitAsm.h" #include "Core/PowerPC/Jit64/JitAsm.h"

View File

@ -12,27 +12,12 @@
#include "Common/CommonFuncs.h" #include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
using namespace Gen; using namespace Gen;
static void BackPatchError(const std::string& text, u8* codePtr, u32 emAddress)
{
u64 code_addr = (u64)codePtr;
disassembler disasm;
char disbuf[256];
memset(disbuf, 0, 256);
disasm.disasm64(0, code_addr, codePtr, disbuf);
PanicAlert("%s\n\n"
"Error encountered accessing emulated address %08x.\n"
"Culprit instruction: \n%s\nat %#" PRIx64,
text.c_str(), emAddress, disbuf, code_addr);
return;
}
// This generates some fairly heavy trampolines, but it doesn't really hurt. // This generates some fairly heavy trampolines, but it doesn't really hurt.
// Only instructions that access I/O will get these, and there won't be that // Only instructions that access I/O will get these, and there won't be that
// many of them in a typical program/game. // many of them in a typical program/game.
@ -56,36 +41,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
if (!IsInSpace(codePtr)) if (!IsInSpace(codePtr))
return false; // this will become a regular crash real soon after this return false; // this will become a regular crash real soon after this
InstructionInfo info = {}; auto it = backPatchInfo.find(codePtr);
if (it == backPatchInfo.end())
if (!DisassembleMov(codePtr, &info))
{
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
return false;
}
if (info.otherReg != RMEM)
{
PanicAlert("BackPatch : Base reg not RMEM."
"\n\nAttempted to access %08x.",
emAddress);
return false;
}
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
{
PanicAlert("BackPatch: MOVBE is too small");
return false;
}
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
{ {
PanicAlert("BackPatch: no register use entry for address %p", codePtr); PanicAlert("BackPatch: no register use entry for address %p", codePtr);
return false; return false;
} }
BitSet32 registersInUse = it->second; TrampolineInfo& info = it->second;
u8* exceptionHandler = nullptr; u8* exceptionHandler = nullptr;
if (jit->jo.memcheck) if (jit->jo.memcheck)
@ -95,110 +58,67 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
exceptionHandler = it2->second; exceptionHandler = it2->second;
} }
// Compute the start and length of the memory operation, including
// any byteswapping.
int totalSize = info.instructionSize;
u8* start = codePtr;
if (!info.isMemoryWrite)
{
// MOVBE and single bytes don't need to be swapped.
if (!info.byteSwap && info.operandSize > 1)
{
// REX
if ((codePtr[totalSize] & 0xF0) == 0x40)
totalSize++;
// BSWAP
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
totalSize += 2;
if (info.operandSize == 2)
{
// operand size override
if (codePtr[totalSize] == 0x66)
totalSize++;
// REX
if ((codePtr[totalSize] & 0xF0) == 0x40)
totalSize++;
// SAR/ROL
_assert_(codePtr[totalSize] == 0xC1 &&
(codePtr[totalSize + 2] == 0x10 || codePtr[totalSize + 2] == 0x08));
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
totalSize += 3;
}
}
}
else
{
if (info.byteSwap || info.hasImmediate)
{
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
}
else
{
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64* ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
switch (info.operandSize)
{
case 1:
bswapSize = 0;
break;
case 2:
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap16((u16)*ptr);
break;
case 4:
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
*ptr = Common::swap32((u32)*ptr);
break;
case 8:
bswapSize = 3;
*ptr = Common::swap64(*ptr);
break;
}
start = codePtr - bswapSize;
totalSize += bswapSize;
}
}
// In the trampoline code, we jump back into the block at the beginning // In the trampoline code, we jump back into the block at the beginning
// of the next instruction. The next instruction comes immediately // of the next instruction. The next instruction comes immediately
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start // after the backpatched operation, or BACKPATCH_SIZE bytes after the start
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs // of the backpatched operation, whichever comes last. (The JIT inserts NOPs
// into the original code if necessary to ensure there is enough space // into the original code if necessary to ensure there is enough space
// to insert the backpatch jump.) // to insert the backpatch jump.)
int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
u8* returnPtr = start + 5 + padding; jit->js.generatingTrampoline = true;
jit->js.trampolineExceptionHandler = exceptionHandler;
// Generate the trampoline. // Generate the trampoline.
const u8* trampoline; const u8* trampoline = trampolines.GenerateTrampoline(info);
if (info.isMemoryWrite) jit->js.generatingTrampoline = false;
{ jit->js.trampolineExceptionHandler = nullptr;
// TODO: special case FIFO writes.
auto it3 = pcAtLoc.find(codePtr);
if (it3 == pcAtLoc.end())
{
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
return false;
}
u32 pc = it3->second; u8* start = info.start;
trampoline =
trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
}
else
{
trampoline =
trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
}
// Patch the original memory operation. // Patch the original memory operation.
XEmitter emitter(start); XEmitter emitter(start);
emitter.JMP(trampoline, true); emitter.JMP(trampoline, true);
for (int i = 0; i < padding; ++i) // NOPs become dead code
const u8* end = info.start + info.len;
for (const u8* i = emitter.GetCodePtr(); i < end; ++i)
emitter.INT3(); emitter.INT3();
ctx->CTX_PC = (u64)start;
// Rewind time to just before the start of the write block. If we swapped memory
// before faulting (eg: the store+swap was not an atomic op like MOVBE), let's
// swap it back so that the swap can happen again (this double swap isn't ideal but
// only happens the first time we fault).
if (info.nonAtomicSwapStoreSrc != INVALID_REG)
{
u64* ptr = ContextRN(ctx, info.nonAtomicSwapStoreSrc);
switch (info.accessSize << 3)
{
case 8:
// No need to swap a byte
break;
case 16:
*ptr = Common::swap16(static_cast<u16>(*ptr));
break;
case 32:
*ptr = Common::swap32(static_cast<u32>(*ptr));
break;
case 64:
*ptr = Common::swap64(static_cast<u64>(*ptr));
break;
default:
_dbg_assert_(DYNA_REC, 0);
break;
}
}
// This is special code to undo the LEA in SafeLoadToReg if it clobbered the address
// register in the case where reg_value shared the same location as opAddress.
if (info.offsetAddedToAddress)
{
u64* ptr = ContextRN(ctx, info.op_arg.GetSimpleReg());
*ptr -= static_cast<u32>(info.offset);
}
ctx->CTX_PC = reinterpret_cast<u64>(trampoline);
return true; return true;
} }

View File

@ -96,6 +96,9 @@ protected:
bool carryFlagSet; bool carryFlagSet;
bool carryFlagInverted; bool carryFlagInverted;
bool generatingTrampoline;
u8* trampolineExceptionHandler;
int fifoBytesThisBlock; int fifoBytesThisBlock;
PPCAnalyst::BlockStats st; PPCAnalyst::BlockStats st;

View File

@ -18,6 +18,26 @@ using namespace Gen;
void EmuCodeBlock::MemoryExceptionCheck() void EmuCodeBlock::MemoryExceptionCheck()
{ {
// TODO: We really should untangle the trampolines, exception handlers and
// memory checks.
// If we are currently generating a trampoline for a failed fastmem
// load/store, the trampoline generator will have stashed the exception
// handler (that we previously generated after the fastmem instruction) in
// trampolineExceptionHandler.
if (jit->js.generatingTrampoline)
{
if (jit->js.trampolineExceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, jit->js.trampolineExceptionHandler);
}
return;
}
// If memcheck (ie: MMU) mode is enabled and we haven't generated an
// exception handler for this instruction yet, we will generate an
// exception check.
if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler) if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
{ {
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI)); TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
@ -42,10 +62,10 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
} }
u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
bool signExtend) bool signExtend, MovInfo* info)
{ {
u8* result; bool offsetAddedToAddress = false;
OpArg memOperand; OpArg memOperand;
if (opAddress.IsSimpleReg()) if (opAddress.IsSimpleReg())
{ {
@ -57,6 +77,11 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
// place to address the issue.) // place to address the issue.)
if ((u32)offset >= 0x1000) if ((u32)offset >= 0x1000)
{ {
// This method can potentially clobber the address if it shares a register
// with the load target. In this case we can just subtract offset from the
// register (see JitBackpatch for this implementation).
offsetAddedToAddress = (reg_value == opAddress.GetSimpleReg());
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset)); LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
opAddress = R(reg_value); opAddress = R(reg_value);
offset = 0; offset = 0;
@ -74,9 +99,8 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset); memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
} }
result = GetWritableCodePtr(); LoadAndSwap(accessSize, reg_value, memOperand, signExtend, info);
LoadAndSwap(accessSize, reg_value, memOperand, signExtend); return offsetAddedToAddress;
return result;
} }
// Visitor that generates code to read a MMIO value. // Visitor that generates code to read a MMIO value.
@ -231,72 +255,43 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
s32 offset, BitSet32 registersInUse, bool signExtend, int flags) s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
{ {
registersInUse[reg_value] = false; bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
if (jit->jo.fastmem && !opAddress.IsImm() &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)))
{
u8* mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
registersInUseAtLoc[mov] = registersInUse; registersInUse[reg_value] = false;
jit->js.fastmemLoadStore = mov; if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
{
u8* backpatchStart = GetWritableCodePtr();
MovInfo mov;
bool offsetAddedToAddress =
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov);
TrampolineInfo& info = backPatchInfo[mov.address];
info.pc = jit->js.compilerPC;
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
info.start = backpatchStart;
info.read = true;
info.op_reg = reg_value;
info.op_arg = opAddress;
info.offsetAddedToAddress = offsetAddedToAddress;
info.accessSize = accessSize >> 3;
info.offset = offset;
info.registersInUse = registersInUse;
info.flags = flags;
info.signExtend = signExtend;
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
if (padding > 0)
{
NOP(padding);
}
info.len = static_cast<u32>(GetCodePtr() - info.start);
jit->js.fastmemLoadStore = mov.address;
return; return;
} }
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
if (opAddress.IsImm()) if (opAddress.IsImm())
{ {
u32 address = opAddress.Imm32() + offset; u32 address = opAddress.Imm32() + offset;
SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend);
// If the address is known to be RAM, just load it directly.
if (PowerPC::IsOptimizableRAMAddress(address))
{
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
return;
}
// If the address maps to an MMIO register, inline MMIO read code.
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
if (accessSize != 64 && mmioAddress)
{
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
signExtend);
return;
}
// Fall back to general-case code.
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64:
ABI_CallFunctionC((void*)&PowerPC::Read_U64, address);
break;
case 32:
ABI_CallFunctionC((void*)&PowerPC::Read_U32, address);
break;
case 16:
ABI_CallFunctionC((void*)&PowerPC::Read_U16_ZX, address);
break;
case 8:
ABI_CallFunctionC((void*)&PowerPC::Read_U8_ZX, address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
MemoryExceptionCheck();
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
}
else if (reg_value != ABI_RETURN)
{
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
}
return; return;
} }
@ -310,8 +305,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
} }
FixupBranch exit; FixupBranch exit;
if (!jit->jo.alwaysUseMemFuncs) if (!jit->jo.alwaysUseMemFuncs && !slowmem)
{ {
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask); FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend); UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
if (farcode.Enabled()) if (farcode.Enabled())
@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
MOVZX(64, accessSize, reg_value, R(ABI_RETURN)); MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
} }
if (!jit->jo.alwaysUseMemFuncs) if (!jit->jo.alwaysUseMemFuncs && !slowmem)
{ {
if (farcode.Enabled()) if (farcode.Enabled())
{ {
@ -361,6 +361,56 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
} }
} }
void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize,
BitSet32 registersInUse, bool signExtend)
{
// If the address is known to be RAM, just load it directly.
if (PowerPC::IsOptimizableRAMAddress(address))
{
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
return;
}
// If the address maps to an MMIO register, inline MMIO read code.
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
if (accessSize != 64 && mmioAddress)
{
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
signExtend);
return;
}
// Fall back to general-case code.
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
{
case 64:
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U64), address);
break;
case 32:
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U32), address);
break;
case 16:
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U16_ZX), address);
break;
case 8:
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U8_ZX), address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
MemoryExceptionCheck();
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
}
else if (reg_value != ABI_RETURN)
{
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
}
}
static OpArg SwapImmediate(int accessSize, const OpArg& reg_value) static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
{ {
if (accessSize == 32) if (accessSize == 32)
@ -371,10 +421,15 @@ static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
return Imm8(reg_value.Imm8()); return Imm8(reg_value.Imm8());
} }
u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
bool swap) bool swap, MovInfo* info)
{ {
u8* result = GetWritableCodePtr(); if (info)
{
info->address = GetWritableCodePtr();
info->nonAtomicSwapStore = false;
}
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset); OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
if (reg_value.IsImm()) if (reg_value.IsImm())
{ {
@ -384,22 +439,19 @@ u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
} }
else if (swap) else if (swap)
{ {
result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg()); SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info);
} }
else else
{ {
MOV(accessSize, dest, reg_value); MOV(accessSize, dest, reg_value);
} }
return result;
} }
static OpArg FixImmediate(int accessSize, OpArg arg) static OpArg FixImmediate(int accessSize, OpArg arg)
{ {
if (arg.IsImm()) if (arg.IsImm())
{ {
arg = accessSize == 8 ? Imm8((u8)arg.Imm32()) : accessSize == 16 ? Imm16((u16)arg.Imm32()) : arg = accessSize == 8 ? arg.AsImm8() : accessSize == 16 ? arg.AsImm16() : arg.AsImm32();
Imm32((u32)arg.Imm32());
} }
return arg; return arg;
} }
@ -475,25 +527,38 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset, void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
BitSet32 registersInUse, int flags) BitSet32 registersInUse, int flags)
{ {
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
// set the correct immediate format // set the correct immediate format
reg_value = FixImmediate(accessSize, reg_value); reg_value = FixImmediate(accessSize, reg_value);
// TODO: support byte-swapped non-immediate fastmem stores if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
(reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)))
{ {
const u8* backpatchStart = GetCodePtr(); u8* backpatchStart = GetWritableCodePtr();
u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, MovInfo mov;
!(flags & SAFE_LOADSTORE_NO_SWAP)); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov);
TrampolineInfo& info = backPatchInfo[mov.address];
info.pc = jit->js.compilerPC;
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
info.start = backpatchStart;
info.read = false;
info.op_arg = reg_value;
info.op_reg = reg_addr;
info.offsetAddedToAddress = false;
info.accessSize = accessSize >> 3;
info.offset = offset;
info.registersInUse = registersInUse;
info.flags = flags;
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart); ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
if (padding > 0) if (padding > 0)
{ {
NOP(padding); NOP(padding);
} }
info.len = static_cast<u32>(GetCodePtr() - info.start);
jit->js.fastmemLoadStore = mov.address;
registersInUseAtLoc[mov] = registersInUse;
pcAtLoc[mov] = jit->js.compilerPC;
jit->js.fastmemLoadStore = mov;
return; return;
} }
@ -510,21 +575,22 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
} }
} }
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
FixupBranch slow, exit; FixupBranch slow, exit;
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask); if (!slowmem)
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); {
if (farcode.Enabled()) u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
SwitchToFarCode();
else // The following masks the region used by the GC/Wii virtual memory lib
exit = J(true); mem_mask |= Memory::ADDR_MASK_MEM1;
SetJumpTarget(slow);
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
if (farcode.Enabled())
SwitchToFarCode();
else
exit = J(true);
SetJumpTarget(slow);
}
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
@ -563,12 +629,18 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
break; break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
if (farcode.Enabled())
MemoryExceptionCheck();
if (!slowmem)
{ {
exit = J(true); if (farcode.Enabled())
SwitchToNearCode(); {
exit = J(true);
SwitchToNearCode();
}
SetJumpTarget(exit);
} }
SetJumpTarget(exit);
} }
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap) void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
@ -1055,7 +1127,6 @@ void EmuCodeBlock::JitClearCA()
void EmuCodeBlock::Clear() void EmuCodeBlock::Clear()
{ {
registersInUseAtLoc.clear(); backPatchInfo.clear();
pcAtLoc.clear();
exceptionHandlerAtLoc.clear(); exceptionHandlerAtLoc.clear();
} }

View File

@ -59,6 +59,47 @@ static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8; static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32; static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
// Stores information we need to batch-patch a MOV with a call to the slow read/write path after
// it faults. There will be 10s of thousands of these structs live, so be wary of making this too
// big.
struct TrampolineInfo final
{
// The start of the store operation that failed -- we will patch a JMP here
u8* start;
// The start + len = end of the store operation (points to the next instruction)
u32 len;
// The PPC PC for the current load/store block
u32 pc;
// Saved because we need these to make the ABI call in the trampoline
BitSet32 registersInUse;
// The MOV operation
Gen::X64Reg nonAtomicSwapStoreSrc;
// src/dest for load/store
s32 offset;
Gen::X64Reg op_reg;
Gen::OpArg op_arg;
// Original SafeLoadXXX/SafeStoreXXX flags
u8 flags;
// Memory access size (in bytes)
u8 accessSize : 4;
// true if this is a read op vs a write
bool read : 1;
// for read operations, true if needs sign-extension after load
bool signExtend : 1;
// Set to true if we added the offset to the address and need to undo it
bool offsetAddedToAddress : 1;
};
// Like XCodeBlock but has some utilities for memory access. // Like XCodeBlock but has some utilities for memory access.
class EmuCodeBlock : public Gen::X64CodeBlock class EmuCodeBlock : public Gen::X64CodeBlock
{ {
@ -88,15 +129,15 @@ public:
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
s32 offset, bool signExtend = false); s32 offset, bool signExtend = false);
// these return the address of the MOV, for backpatching // these return the address of the MOV, for backpatching
u8* UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize, void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
s32 offset = 0, bool swap = true); s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr);
u8* UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
s32 offset = 0, bool swap = true) s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr)
{ {
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap); UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info);
} }
u8* UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
bool signExtend); bool signExtend, Gen::MovInfo* info = nullptr);
void UnsafeWriteGatherPipe(int accessSize); void UnsafeWriteGatherPipe(int accessSize);
// Generate a load/write from the MMIO handler for a given address. Only // Generate a load/write from the MMIO handler for a given address. Only
@ -108,12 +149,18 @@ public:
{ {
SAFE_LOADSTORE_NO_SWAP = 1, SAFE_LOADSTORE_NO_SWAP = 1,
SAFE_LOADSTORE_NO_PROLOG = 2, SAFE_LOADSTORE_NO_PROLOG = 2,
// This indicates that the write being generated cannot be patched (and thus can't use fastmem)
SAFE_LOADSTORE_NO_FASTMEM = 4, SAFE_LOADSTORE_NO_FASTMEM = 4,
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8 SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
// Force slowmem (used when generating fallbacks in trampolines)
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
}; };
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset, void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
BitSet32 registersInUse, bool signExtend, int flags = 0); BitSet32 registersInUse, bool signExtend, int flags = 0);
void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize,
BitSet32 registersInUse, bool signExtend);
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves // Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
// reg_value if the load fails and js.memcheck is enabled. // reg_value if the load fails and js.memcheck is enabled.
// Works with immediate inputs and simple registers only. // Works with immediate inputs and simple registers only.
@ -158,7 +205,6 @@ public:
void Clear(); void Clear();
protected: protected:
std::unordered_map<u8*, BitSet32> registersInUseAtLoc; std::unordered_map<u8*, TrampolineInfo> backPatchInfo;
std::unordered_map<u8*, u32> pcAtLoc;
std::unordered_map<u8*, u8*> exceptionHandlerAtLoc; std::unordered_map<u8*, u8*> exceptionHandlerAtLoc;
}; };

View File

@ -9,7 +9,6 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/JitRegister.h" #include "Common/JitRegister.h"
#include "Common/x64ABI.h" #include "Common/x64ABI.h"
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h" #include "Core/PowerPC/JitCommon/Jit_Util.h"
@ -37,150 +36,50 @@ void TrampolineCache::Shutdown()
FreeCodeSpace(); FreeCodeSpace();
} }
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo& info, const u8* TrampolineCache::GenerateTrampoline(const TrampolineInfo& info)
BitSet32 registersInUse, u8* exceptionHandler, {
u8* returnPtr) if (info.read)
{
return GenerateReadTrampoline(info);
}
return GenerateWriteTrampoline(info);
}
const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
const u8* trampoline = GetCodePtr(); const u8* trampoline = GetCodePtr();
X64Reg addrReg = (X64Reg)info.scaledReg;
X64Reg dataReg = (X64Reg)info.regOperandReg;
int stack_offset = 0;
bool push_param1 = registersInUse[ABI_PARAM1];
if (push_param1) SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
{ info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
PUSH(ABI_PARAM1);
stack_offset = 8;
registersInUse[ABI_PARAM1] = 0;
}
int dataRegSize = info.operandSize == 8 ? 64 : 32; JMP(info.start + info.len, true);
if (addrReg != ABI_PARAM1 && info.displacement)
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset); JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline_%x", info.pc);
switch (info.operandSize)
{
case 8:
CALL((void*)&PowerPC::Read_U64);
break;
case 4:
CALL((void*)&PowerPC::Read_U32);
break;
case 2:
CALL((void*)&PowerPC::Read_U16);
break;
case 1:
CALL((void*)&PowerPC::Read_U8);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
if (push_param1)
POP(ABI_PARAM1);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
if (info.signExtend)
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
else if (dataReg != ABI_RETURN || info.operandSize < 4)
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
JMP(returnPtr, true);
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
return trampoline; return trampoline;
} }
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo& info, const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
BitSet32 registersInUse, u8* exceptionHandler,
u8* returnPtr, u32 pc)
{ {
if (GetSpaceLeft() < 1024) if (GetSpaceLeft() < 1024)
PanicAlert("Trampoline cache full"); PanicAlert("Trampoline cache full");
const u8* trampoline = GetCodePtr(); const u8* trampoline = GetCodePtr();
X64Reg dataReg = (X64Reg)info.regOperandReg;
X64Reg addrReg = (X64Reg)info.scaledReg;
// Don't treat FIFO writes specially for now because they require a burst // Don't treat FIFO writes specially for now because they require a burst
// check anyway. // check anyway.
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
MOV(32, PPCSTATE(pc), Imm32(pc)); MOV(32, PPCSTATE(pc), Imm32(info.pc));
ABI_PushRegistersAndAdjustStack(registersInUse, 0); SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
if (info.hasImmediate) JMP(info.start + info.len, true);
{
if (addrReg != ABI_PARAM2 && info.displacement)
LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
else if (addrReg != ABI_PARAM2)
MOV(32, R(ABI_PARAM2), R(addrReg));
else if (info.displacement)
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
// we have to swap back the immediate to pass it to the write functions JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", info.pc);
switch (info.operandSize)
{
case 8:
PanicAlert("Invalid 64-bit immediate!");
break;
case 4:
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
break;
case 2:
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
break;
case 1:
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
break;
}
}
else
{
int dataRegSize = info.operandSize == 8 ? 64 : 32;
MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
}
switch (info.operandSize)
{
case 8:
CALL((void*)&PowerPC::Write_U64);
break;
case 4:
CALL((void*)&PowerPC::Write_U32);
break;
case 2:
CALL((void*)&PowerPC::Write_U16);
break;
case 1:
CALL((void*)&PowerPC::Write_U8);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (exceptionHandler)
{
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
J_CC(CC_NZ, exceptionHandler);
}
JMP(returnPtr, true);
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
return trampoline; return trampoline;
} }

View File

@ -7,21 +7,21 @@
#include "Common/BitSet.h" #include "Common/BitSet.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
#include "Core/PowerPC/JitCommon/Jit_Util.h"
struct InstructionInfo; struct InstructionInfo;
// We need at least this many bytes for backpatching. // We need at least this many bytes for backpatching.
const int BACKPATCH_SIZE = 5; const int BACKPATCH_SIZE = 5;
class TrampolineCache : public Gen::X64CodeBlock class TrampolineCache : public EmuCodeBlock
{ {
const u8* GenerateReadTrampoline(const TrampolineInfo& info);
const u8* GenerateWriteTrampoline(const TrampolineInfo& info);
public: public:
void Init(int size); void Init(int size);
void Shutdown(); void Shutdown();
const u8* GenerateTrampoline(const TrampolineInfo& info);
const u8* GenerateReadTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
u8* exceptionHandler, u8* returnPtr);
const u8* GenerateWriteTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
u8* exceptionHandler, u8* returnPtr, u32 pc);
void ClearCodeSpace(); void ClearCodeSpace();
}; };