Merge pull request #3496 from mmastrac/fastmem_refactor
Refactor the fastmem/trampoline code
This commit is contained in:
commit
adcef046f6
|
@ -29,7 +29,6 @@ set(SRCS Analytics.cpp
|
|||
TraversalClient.cpp
|
||||
Version.cpp
|
||||
x64ABI.cpp
|
||||
x64Analyzer.cpp
|
||||
x64Emitter.cpp
|
||||
Crypto/bn.cpp
|
||||
Crypto/ec.cpp
|
||||
|
|
|
@ -133,7 +133,6 @@
|
|||
<ClInclude Include="TraversalClient.h" />
|
||||
<ClInclude Include="TraversalProto.h" />
|
||||
<ClInclude Include="x64ABI.h" />
|
||||
<ClInclude Include="x64Analyzer.h" />
|
||||
<ClInclude Include="x64Emitter.h" />
|
||||
<ClInclude Include="Crypto\bn.h" />
|
||||
<ClInclude Include="Crypto\ec.h" />
|
||||
|
@ -178,7 +177,6 @@
|
|||
<ClCompile Include="ucrtFreadWorkaround.cpp" />
|
||||
<ClCompile Include="Version.cpp" />
|
||||
<ClCompile Include="x64ABI.cpp" />
|
||||
<ClCompile Include="x64Analyzer.cpp" />
|
||||
<ClCompile Include="x64CPUDetect.cpp" />
|
||||
<ClCompile Include="x64Emitter.cpp" />
|
||||
<ClCompile Include="x64FPURoundMode.cpp" />
|
||||
|
|
|
@ -62,7 +62,6 @@
|
|||
<ClInclude Include="Thread.h" />
|
||||
<ClInclude Include="Timer.h" />
|
||||
<ClInclude Include="x64ABI.h" />
|
||||
<ClInclude Include="x64Analyzer.h" />
|
||||
<ClInclude Include="x64Emitter.h" />
|
||||
<ClInclude Include="Logging\ConsoleListener.h">
|
||||
<Filter>Logging</Filter>
|
||||
|
@ -253,7 +252,6 @@
|
|||
<ClCompile Include="Timer.cpp" />
|
||||
<ClCompile Include="Version.cpp" />
|
||||
<ClCompile Include="x64ABI.cpp" />
|
||||
<ClCompile Include="x64Analyzer.cpp" />
|
||||
<ClCompile Include="x64CPUDetect.cpp" />
|
||||
<ClCompile Include="x64Emitter.cpp" />
|
||||
<ClCompile Include="x64FPURoundMode.cpp" />
|
||||
|
|
|
@ -1,233 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Common/x64Analyzer.h"
|
||||
|
||||
bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info)
|
||||
{
|
||||
unsigned const char* startCodePtr = codePtr;
|
||||
u8 rex = 0;
|
||||
u32 opcode;
|
||||
int opcode_length;
|
||||
|
||||
// Check for regular prefix
|
||||
info->operandSize = 4;
|
||||
info->zeroExtend = false;
|
||||
info->signExtend = false;
|
||||
info->hasImmediate = false;
|
||||
info->isMemoryWrite = false;
|
||||
info->byteSwap = false;
|
||||
|
||||
u8 modRMbyte = 0;
|
||||
u8 sibByte = 0;
|
||||
bool hasModRM = false;
|
||||
|
||||
int displacementSize = 0;
|
||||
|
||||
if (*codePtr == 0x66)
|
||||
{
|
||||
info->operandSize = 2;
|
||||
codePtr++;
|
||||
}
|
||||
else if (*codePtr == 0x67)
|
||||
{
|
||||
codePtr++;
|
||||
}
|
||||
|
||||
// Check for REX prefix
|
||||
if ((*codePtr & 0xF0) == 0x40)
|
||||
{
|
||||
rex = *codePtr;
|
||||
if (rex & 8) // REX.W
|
||||
{
|
||||
info->operandSize = 8;
|
||||
}
|
||||
codePtr++;
|
||||
}
|
||||
|
||||
opcode = *codePtr++;
|
||||
opcode_length = 1;
|
||||
if (opcode == 0x0F)
|
||||
{
|
||||
opcode = (opcode << 8) | *codePtr++;
|
||||
opcode_length = 2;
|
||||
if ((opcode & 0xFB) == 0x38)
|
||||
{
|
||||
opcode = (opcode << 8) | *codePtr++;
|
||||
opcode_length = 3;
|
||||
}
|
||||
}
|
||||
|
||||
switch (opcode_length)
|
||||
{
|
||||
case 1:
|
||||
if ((opcode & 0xF0) == 0x80 || ((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
|
||||
((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) || (opcode & 0xF0) == 0x30 ||
|
||||
(opcode & 0xFF) == 0x77 || (opcode & 0xF0) == 0x80 || (opcode & 0xF8) == 0xC8)
|
||||
{
|
||||
// No mod R/M byte
|
||||
}
|
||||
else
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// TODO: support more 3-byte opcode instructions
|
||||
if ((opcode & 0xFE) == 0xF0)
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasModRM)
|
||||
{
|
||||
ModRM mrm(modRMbyte, rex);
|
||||
info->regOperandReg = mrm.reg;
|
||||
if (mrm.mod < 3)
|
||||
{
|
||||
if (mrm.rm == 4)
|
||||
{
|
||||
// SIB byte
|
||||
sibByte = *codePtr++;
|
||||
info->scaledReg = (sibByte >> 3) & 7;
|
||||
info->otherReg = (sibByte & 7);
|
||||
if (rex & 2)
|
||||
info->scaledReg += 8;
|
||||
if (rex & 1)
|
||||
info->otherReg += 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
// info->scaledReg =
|
||||
}
|
||||
}
|
||||
if (mrm.mod == 1 || mrm.mod == 2)
|
||||
{
|
||||
if (mrm.mod == 1)
|
||||
displacementSize = 1;
|
||||
else
|
||||
displacementSize = 4;
|
||||
}
|
||||
}
|
||||
|
||||
if (displacementSize == 1)
|
||||
info->displacement = (s32)(s8)*codePtr;
|
||||
else
|
||||
info->displacement = *((s32*)codePtr);
|
||||
codePtr += displacementSize;
|
||||
|
||||
switch (opcode)
|
||||
{
|
||||
case 0xC6: // mem <- imm8
|
||||
info->isMemoryWrite = true;
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *codePtr;
|
||||
info->operandSize = 1;
|
||||
codePtr++;
|
||||
break;
|
||||
|
||||
case 0xC7: // mem <- imm16/32
|
||||
info->isMemoryWrite = true;
|
||||
switch (info->operandSize)
|
||||
{
|
||||
case 2:
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *(u16*)codePtr;
|
||||
codePtr += 2;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *(u32*)codePtr;
|
||||
codePtr += 4;
|
||||
break;
|
||||
|
||||
case 8:
|
||||
info->zeroExtend = true;
|
||||
info->immediate = *(u32*)codePtr;
|
||||
codePtr += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x88: // mem <- r8
|
||||
info->isMemoryWrite = true;
|
||||
if (info->operandSize != 4)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x89: // mem <- r16/32/64
|
||||
info->isMemoryWrite = true;
|
||||
break;
|
||||
|
||||
case 0x8A: // r8 <- mem
|
||||
if (info->operandSize != 4)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x8B: // r16/32/64 <- mem
|
||||
break;
|
||||
|
||||
case 0x0FB6: // movzx on byte
|
||||
info->zeroExtend = true;
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x0FB7: // movzx on short
|
||||
info->zeroExtend = true;
|
||||
info->operandSize = 2;
|
||||
break;
|
||||
|
||||
case 0x0FBE: // movsx on byte
|
||||
info->signExtend = true;
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x0FBF: // movsx on short
|
||||
info->signExtend = true;
|
||||
info->operandSize = 2;
|
||||
break;
|
||||
|
||||
case 0x0F38F0: // movbe read
|
||||
info->byteSwap = true;
|
||||
break;
|
||||
|
||||
case 0x0F38F1: // movbe write
|
||||
info->byteSwap = true;
|
||||
info->isMemoryWrite = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
info->instructionSize = (int)(codePtr - startCodePtr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InstructionInfo::operator==(const InstructionInfo& other) const
|
||||
{
|
||||
return operandSize == other.operandSize && instructionSize == other.instructionSize &&
|
||||
regOperandReg == other.regOperandReg && otherReg == other.otherReg &&
|
||||
scaledReg == other.scaledReg && zeroExtend == other.zeroExtend &&
|
||||
signExtend == other.signExtend && hasImmediate == other.hasImmediate &&
|
||||
isMemoryWrite == other.isMemoryWrite && byteSwap == other.byteSwap &&
|
||||
immediate == other.immediate && displacement == other.displacement;
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
struct InstructionInfo
|
||||
{
|
||||
int operandSize; // 8, 16, 32, 64
|
||||
int instructionSize;
|
||||
int regOperandReg;
|
||||
int otherReg;
|
||||
int scaledReg;
|
||||
bool zeroExtend;
|
||||
bool signExtend;
|
||||
bool hasImmediate;
|
||||
bool isMemoryWrite;
|
||||
bool byteSwap;
|
||||
u64 immediate;
|
||||
s32 displacement;
|
||||
|
||||
bool operator==(const InstructionInfo& other) const;
|
||||
};
|
||||
|
||||
struct ModRM
|
||||
{
|
||||
int mod, reg, rm;
|
||||
ModRM(u8 modRM, u8 rex)
|
||||
{
|
||||
mod = modRM >> 6;
|
||||
reg = ((modRM >> 3) & 7) | ((rex & 4) ? 8 : 0);
|
||||
rm = modRM & 7;
|
||||
}
|
||||
};
|
||||
|
||||
enum AccessType
|
||||
{
|
||||
OP_ACCESS_READ = 0,
|
||||
OP_ACCESS_WRITE = 1
|
||||
};
|
||||
|
||||
bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info);
|
|
@ -1046,8 +1046,14 @@ void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src)
|
|||
WriteMOVBE(bits, 0xF1, src, dest);
|
||||
}
|
||||
|
||||
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend)
|
||||
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend, MovInfo* info)
|
||||
{
|
||||
if (info)
|
||||
{
|
||||
info->address = GetWritableCodePtr();
|
||||
info->nonAtomicSwapStore = false;
|
||||
}
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 8:
|
||||
|
@ -1083,20 +1089,28 @@ void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_ext
|
|||
}
|
||||
}
|
||||
|
||||
u8* XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src)
|
||||
void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info)
|
||||
{
|
||||
u8* mov_location = GetWritableCodePtr();
|
||||
if (cpu_info.bMOVBE)
|
||||
{
|
||||
if (info)
|
||||
{
|
||||
info->address = GetWritableCodePtr();
|
||||
info->nonAtomicSwapStore = false;
|
||||
}
|
||||
MOVBE(size, dst, src);
|
||||
}
|
||||
else
|
||||
{
|
||||
BSWAP(size, src);
|
||||
mov_location = GetWritableCodePtr();
|
||||
if (info)
|
||||
{
|
||||
info->address = GetWritableCodePtr();
|
||||
info->nonAtomicSwapStore = true;
|
||||
info->nonAtomicSwapStoreSrc = src;
|
||||
}
|
||||
MOV(size, dst, R(src));
|
||||
}
|
||||
return mov_location;
|
||||
}
|
||||
|
||||
void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
|
||||
|
|
|
@ -203,6 +203,15 @@ enum FloatOp
|
|||
|
||||
class XEmitter;
|
||||
|
||||
// Information about a generated MOV op
|
||||
struct MovInfo final
|
||||
{
|
||||
u8* address;
|
||||
bool nonAtomicSwapStore;
|
||||
// valid iff nonAtomicSwapStore is true
|
||||
X64Reg nonAtomicSwapStoreSrc;
|
||||
};
|
||||
|
||||
// RIP addressing does not benefit from micro op fusion on Core arch
|
||||
struct OpArg
|
||||
{
|
||||
|
@ -272,6 +281,27 @@ struct OpArg
|
|||
return (s8)offset;
|
||||
}
|
||||
|
||||
OpArg AsImm64() const
|
||||
{
|
||||
_dbg_assert_(DYNA_REC, IsImm());
|
||||
return OpArg((u64)offset, SCALE_IMM64);
|
||||
}
|
||||
OpArg AsImm32() const
|
||||
{
|
||||
_dbg_assert_(DYNA_REC, IsImm());
|
||||
return OpArg((u32)offset, SCALE_IMM32);
|
||||
}
|
||||
OpArg AsImm16() const
|
||||
{
|
||||
_dbg_assert_(DYNA_REC, IsImm());
|
||||
return OpArg((u16)offset, SCALE_IMM16);
|
||||
}
|
||||
OpArg AsImm8() const
|
||||
{
|
||||
_dbg_assert_(DYNA_REC, IsImm());
|
||||
return OpArg((u8)offset, SCALE_IMM8);
|
||||
}
|
||||
|
||||
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const;
|
||||
bool IsImm() const
|
||||
{
|
||||
|
@ -625,8 +655,9 @@ public:
|
|||
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
|
||||
void MOVBE(int bits, X64Reg dest, const OpArg& src);
|
||||
void MOVBE(int bits, const OpArg& dest, X64Reg src);
|
||||
void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false);
|
||||
u8* SwapAndStore(int size, const OpArg& dst, X64Reg src);
|
||||
void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false,
|
||||
MovInfo* info = nullptr);
|
||||
void SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info = nullptr);
|
||||
|
||||
// Available only on AMD >= Phenom or Intel >= Haswell
|
||||
void LZCNT(int bits, X64Reg dest, const OpArg& src);
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Thread.h"
|
||||
#include "Common/x64Analyzer.h"
|
||||
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/MachineContext.h"
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
|
|
|
@ -287,17 +287,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
|||
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
||||
|
||||
if (update && storeAddress)
|
||||
{
|
||||
MemoryExceptionCheck();
|
||||
MOV(32, gpr.R(a), opAddress);
|
||||
}
|
||||
|
||||
// TODO: support no-swap in SafeLoadToReg instead
|
||||
if (byte_reversed)
|
||||
{
|
||||
MemoryExceptionCheck();
|
||||
BSWAP(accessSize, gpr.RX(d));
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
@ -507,10 +501,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
if (update)
|
||||
{
|
||||
MemoryExceptionCheck();
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
}
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
@ -589,10 +580,7 @@ void Jit64::stXx(UGeckoInstruction inst)
|
|||
}
|
||||
|
||||
if (update)
|
||||
{
|
||||
MemoryExceptionCheck();
|
||||
MOV(32, gpr.R(a), R(RSCRATCH2));
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
|
|
@ -80,7 +80,6 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
|||
registersInUse[RSCRATCH2] = true;
|
||||
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
|
||||
|
||||
MemoryExceptionCheck();
|
||||
if (single)
|
||||
{
|
||||
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
|
||||
|
@ -193,10 +192,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
|||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
|
||||
|
||||
if (update)
|
||||
{
|
||||
MemoryExceptionCheck();
|
||||
MOV(32, gpr.R(a), R(RSCRATCH2));
|
||||
}
|
||||
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAll();
|
||||
|
|
|
@ -40,74 +40,6 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
|
||||
|
||||
gpr.Lock(a, b);
|
||||
if (gqrIsConstant && gqrValue == 0)
|
||||
{
|
||||
int storeOffset = 0;
|
||||
gpr.BindToRegister(a, true, update);
|
||||
X64Reg addr = gpr.RX(a);
|
||||
// TODO: this is kind of ugly :/ we should probably create a universal load/store address
|
||||
// calculation
|
||||
// function that handles all these weird cases, e.g. how non-fastmem loadstores clobber
|
||||
// addresses.
|
||||
bool storeAddress = (update && jo.memcheck) || !jo.fastmem;
|
||||
if (storeAddress)
|
||||
{
|
||||
addr = RSCRATCH2;
|
||||
MOV(32, R(addr), gpr.R(a));
|
||||
}
|
||||
if (indexed)
|
||||
{
|
||||
if (update)
|
||||
{
|
||||
ADD(32, R(addr), gpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
addr = RSCRATCH2;
|
||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
{
|
||||
LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(addr), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(addr), gpr.R(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (update)
|
||||
ADD(32, R(addr), Imm32(offset));
|
||||
else
|
||||
storeOffset = offset;
|
||||
}
|
||||
|
||||
fpr.Lock(s);
|
||||
if (w)
|
||||
{
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||
}
|
||||
else
|
||||
{
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||
ROL(64, R(RSCRATCH), Imm8(32));
|
||||
}
|
||||
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
if (update && storeAddress)
|
||||
registersInUse[addr] = true;
|
||||
SafeWriteRegToReg(RSCRATCH, addr, w ? 32 : 64, storeOffset, registersInUse);
|
||||
MemoryExceptionCheck();
|
||||
if (update && storeAddress)
|
||||
MOV(32, gpr.R(a), R(addr));
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
if (update)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
|
@ -130,44 +62,35 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
if (update && !jo.memcheck)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||
|
||||
if (w)
|
||||
CVTSD2SS(XMM0, fpr.R(s)); // one
|
||||
else
|
||||
CVTPD2PS(XMM0, fpr.R(s)); // pair
|
||||
|
||||
if (gqrIsConstant)
|
||||
{
|
||||
// Paired stores don't yield any real change in performance right now, but if we can
|
||||
// improve fastmem support this might change
|
||||
//#define INLINE_PAIRED_STORES
|
||||
#ifdef INLINE_PAIRED_STORES
|
||||
if (w)
|
||||
{
|
||||
// One value
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
GenQuantizedStore(true, static_cast<EQuantizeType>(gqrValue & 0x7), (gqrValue & 0x3F00) >> 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pair of values
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
GenQuantizedStore(false, static_cast<EQuantizeType>(gqrValue & 0x7),
|
||||
(gqrValue & 0x3F00) >> 8);
|
||||
}
|
||||
#else
|
||||
// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
|
||||
// with just the scale bits.
|
||||
int type = gqrValue & 0x7;
|
||||
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
|
||||
|
||||
if (w)
|
||||
// Paired stores (other than w/type zero) don't yield any real change in
|
||||
// performance right now, but if we can improve fastmem support this might change
|
||||
if (gqrValue == 0)
|
||||
{
|
||||
// One value
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
CALL(asm_routines.singleStoreQuantized[type]);
|
||||
if (w)
|
||||
GenQuantizedStore(true, static_cast<EQuantizeType>(type), (gqrValue & 0x3F00) >> 8);
|
||||
else
|
||||
GenQuantizedStore(false, static_cast<EQuantizeType>(type), (gqrValue & 0x3F00) >> 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pair of values
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
CALL(asm_routines.pairedStoreQuantized[type]);
|
||||
// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
|
||||
// with just the scale bits.
|
||||
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
|
||||
|
||||
if (w)
|
||||
CALL(asm_routines.singleStoreQuantized[type]);
|
||||
else
|
||||
CALL(asm_routines.pairedStoreQuantized[type]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -180,22 +103,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
|||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||
|
||||
if (w)
|
||||
{
|
||||
// One value
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pair of values
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||
}
|
||||
}
|
||||
|
||||
if (update && jo.memcheck)
|
||||
{
|
||||
MemoryExceptionCheck();
|
||||
if (indexed)
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
else
|
||||
|
@ -226,113 +140,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
|||
|
||||
gpr.Lock(a, b);
|
||||
|
||||
if (gqrIsConstant && gqrValue == 0)
|
||||
{
|
||||
s32 loadOffset = 0;
|
||||
gpr.BindToRegister(a, true, update);
|
||||
X64Reg addr = gpr.RX(a);
|
||||
if (update && jo.memcheck)
|
||||
{
|
||||
addr = RSCRATCH2;
|
||||
MOV(32, R(addr), gpr.R(a));
|
||||
}
|
||||
if (indexed)
|
||||
{
|
||||
if (update)
|
||||
{
|
||||
ADD(32, R(addr), gpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
addr = RSCRATCH2;
|
||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||
{
|
||||
LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(addr), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(addr), gpr.R(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (update)
|
||||
ADD(32, R(addr), Imm32(offset));
|
||||
else
|
||||
loadOffset = offset;
|
||||
}
|
||||
|
||||
fpr.Lock(s);
|
||||
if (jo.memcheck)
|
||||
{
|
||||
fpr.StoreFromRegister(s);
|
||||
js.revertFprLoad = s;
|
||||
}
|
||||
fpr.BindToRegister(s, false);
|
||||
|
||||
// Let's mirror the JitAsmCommon code and assume all non-MMU loads go to RAM.
|
||||
if (!jo.memcheck)
|
||||
{
|
||||
if (w)
|
||||
{
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVD_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||
PSHUFB(XMM0, M(pbswapShuffle1x4));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadAndSwap(32, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||
MOVD_xmm(XMM0, R(RSCRATCH));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
MOVQ_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadAndSwap(64, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
||||
ROL(64, R(RSCRATCH), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||
}
|
||||
else
|
||||
{
|
||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
||||
registersInUse[fpr.RX(s) << 16] = false;
|
||||
if (update)
|
||||
registersInUse[addr] = true;
|
||||
SafeLoadToReg(RSCRATCH, R(addr), w ? 32 : 64, loadOffset, registersInUse, false);
|
||||
MemoryExceptionCheck();
|
||||
if (w)
|
||||
{
|
||||
MOVD_xmm(XMM0, R(RSCRATCH));
|
||||
UNPCKLPS(XMM0, M(m_one));
|
||||
}
|
||||
else
|
||||
{
|
||||
ROL(64, R(RSCRATCH), Imm8(32));
|
||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
||||
}
|
||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(addr));
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
gpr.BindToRegister(a, true, update);
|
||||
fpr.BindToRegister(s, false, true);
|
||||
|
@ -373,7 +180,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
|||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
|
||||
}
|
||||
|
||||
MemoryExceptionCheck();
|
||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||
if (update && jo.memcheck)
|
||||
{
|
||||
|
|
|
@ -572,8 +572,6 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
|
|||
MULPS(XMM0, R(XMM1));
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/Gekko.h"
|
||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
|
|
|
@ -12,27 +12,12 @@
|
|||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/x64Analyzer.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
static void BackPatchError(const std::string& text, u8* codePtr, u32 emAddress)
|
||||
{
|
||||
u64 code_addr = (u64)codePtr;
|
||||
disassembler disasm;
|
||||
char disbuf[256];
|
||||
memset(disbuf, 0, 256);
|
||||
disasm.disasm64(0, code_addr, codePtr, disbuf);
|
||||
PanicAlert("%s\n\n"
|
||||
"Error encountered accessing emulated address %08x.\n"
|
||||
"Culprit instruction: \n%s\nat %#" PRIx64,
|
||||
text.c_str(), emAddress, disbuf, code_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
// This generates some fairly heavy trampolines, but it doesn't really hurt.
|
||||
// Only instructions that access I/O will get these, and there won't be that
|
||||
// many of them in a typical program/game.
|
||||
|
@ -56,36 +41,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
|||
if (!IsInSpace(codePtr))
|
||||
return false; // this will become a regular crash real soon after this
|
||||
|
||||
InstructionInfo info = {};
|
||||
|
||||
if (!DisassembleMov(codePtr, &info))
|
||||
{
|
||||
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info.otherReg != RMEM)
|
||||
{
|
||||
PanicAlert("BackPatch : Base reg not RMEM."
|
||||
"\n\nAttempted to access %08x.",
|
||||
emAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
|
||||
{
|
||||
PanicAlert("BackPatch: MOVBE is too small");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto it = registersInUseAtLoc.find(codePtr);
|
||||
if (it == registersInUseAtLoc.end())
|
||||
auto it = backPatchInfo.find(codePtr);
|
||||
if (it == backPatchInfo.end())
|
||||
{
|
||||
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
|
||||
return false;
|
||||
}
|
||||
|
||||
BitSet32 registersInUse = it->second;
|
||||
TrampolineInfo& info = it->second;
|
||||
|
||||
u8* exceptionHandler = nullptr;
|
||||
if (jit->jo.memcheck)
|
||||
|
@ -95,110 +58,67 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
|||
exceptionHandler = it2->second;
|
||||
}
|
||||
|
||||
// Compute the start and length of the memory operation, including
|
||||
// any byteswapping.
|
||||
int totalSize = info.instructionSize;
|
||||
u8* start = codePtr;
|
||||
if (!info.isMemoryWrite)
|
||||
{
|
||||
// MOVBE and single bytes don't need to be swapped.
|
||||
if (!info.byteSwap && info.operandSize > 1)
|
||||
{
|
||||
// REX
|
||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||
totalSize++;
|
||||
|
||||
// BSWAP
|
||||
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
|
||||
totalSize += 2;
|
||||
|
||||
if (info.operandSize == 2)
|
||||
{
|
||||
// operand size override
|
||||
if (codePtr[totalSize] == 0x66)
|
||||
totalSize++;
|
||||
// REX
|
||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||
totalSize++;
|
||||
// SAR/ROL
|
||||
_assert_(codePtr[totalSize] == 0xC1 &&
|
||||
(codePtr[totalSize + 2] == 0x10 || codePtr[totalSize + 2] == 0x08));
|
||||
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
|
||||
totalSize += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (info.byteSwap || info.hasImmediate)
|
||||
{
|
||||
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
|
||||
}
|
||||
else
|
||||
{
|
||||
// We entered here with a BSWAP-ed register. We'll have to swap it back.
|
||||
u64* ptr = ContextRN(ctx, info.regOperandReg);
|
||||
int bswapSize = 0;
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 1:
|
||||
bswapSize = 0;
|
||||
break;
|
||||
case 2:
|
||||
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap16((u16)*ptr);
|
||||
break;
|
||||
case 4:
|
||||
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap32((u32)*ptr);
|
||||
break;
|
||||
case 8:
|
||||
bswapSize = 3;
|
||||
*ptr = Common::swap64(*ptr);
|
||||
break;
|
||||
}
|
||||
start = codePtr - bswapSize;
|
||||
totalSize += bswapSize;
|
||||
}
|
||||
}
|
||||
|
||||
// In the trampoline code, we jump back into the block at the beginning
|
||||
// of the next instruction. The next instruction comes immediately
|
||||
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
|
||||
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
|
||||
// into the original code if necessary to ensure there is enough space
|
||||
// to insert the backpatch jump.)
|
||||
int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
|
||||
u8* returnPtr = start + 5 + padding;
|
||||
|
||||
jit->js.generatingTrampoline = true;
|
||||
jit->js.trampolineExceptionHandler = exceptionHandler;
|
||||
|
||||
// Generate the trampoline.
|
||||
const u8* trampoline;
|
||||
if (info.isMemoryWrite)
|
||||
{
|
||||
// TODO: special case FIFO writes.
|
||||
auto it3 = pcAtLoc.find(codePtr);
|
||||
if (it3 == pcAtLoc.end())
|
||||
{
|
||||
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
|
||||
return false;
|
||||
}
|
||||
const u8* trampoline = trampolines.GenerateTrampoline(info);
|
||||
jit->js.generatingTrampoline = false;
|
||||
jit->js.trampolineExceptionHandler = nullptr;
|
||||
|
||||
u32 pc = it3->second;
|
||||
trampoline =
|
||||
trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
|
||||
}
|
||||
else
|
||||
{
|
||||
trampoline =
|
||||
trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
|
||||
}
|
||||
u8* start = info.start;
|
||||
|
||||
// Patch the original memory operation.
|
||||
XEmitter emitter(start);
|
||||
emitter.JMP(trampoline, true);
|
||||
for (int i = 0; i < padding; ++i)
|
||||
// NOPs become dead code
|
||||
const u8* end = info.start + info.len;
|
||||
for (const u8* i = emitter.GetCodePtr(); i < end; ++i)
|
||||
emitter.INT3();
|
||||
ctx->CTX_PC = (u64)start;
|
||||
|
||||
// Rewind time to just before the start of the write block. If we swapped memory
|
||||
// before faulting (eg: the store+swap was not an atomic op like MOVBE), let's
|
||||
// swap it back so that the swap can happen again (this double swap isn't ideal but
|
||||
// only happens the first time we fault).
|
||||
if (info.nonAtomicSwapStoreSrc != INVALID_REG)
|
||||
{
|
||||
u64* ptr = ContextRN(ctx, info.nonAtomicSwapStoreSrc);
|
||||
switch (info.accessSize << 3)
|
||||
{
|
||||
case 8:
|
||||
// No need to swap a byte
|
||||
break;
|
||||
case 16:
|
||||
*ptr = Common::swap16(static_cast<u16>(*ptr));
|
||||
break;
|
||||
case 32:
|
||||
*ptr = Common::swap32(static_cast<u32>(*ptr));
|
||||
break;
|
||||
case 64:
|
||||
*ptr = Common::swap64(static_cast<u64>(*ptr));
|
||||
break;
|
||||
default:
|
||||
_dbg_assert_(DYNA_REC, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// This is special code to undo the LEA in SafeLoadToReg if it clobbered the address
|
||||
// register in the case where reg_value shared the same location as opAddress.
|
||||
if (info.offsetAddedToAddress)
|
||||
{
|
||||
u64* ptr = ContextRN(ctx, info.op_arg.GetSimpleReg());
|
||||
*ptr -= static_cast<u32>(info.offset);
|
||||
}
|
||||
|
||||
ctx->CTX_PC = reinterpret_cast<u64>(trampoline);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -96,6 +96,9 @@ protected:
|
|||
bool carryFlagSet;
|
||||
bool carryFlagInverted;
|
||||
|
||||
bool generatingTrampoline;
|
||||
u8* trampolineExceptionHandler;
|
||||
|
||||
int fifoBytesThisBlock;
|
||||
|
||||
PPCAnalyst::BlockStats st;
|
||||
|
|
|
@ -18,6 +18,26 @@ using namespace Gen;
|
|||
|
||||
void EmuCodeBlock::MemoryExceptionCheck()
|
||||
{
|
||||
// TODO: We really should untangle the trampolines, exception handlers and
|
||||
// memory checks.
|
||||
|
||||
// If we are currently generating a trampoline for a failed fastmem
|
||||
// load/store, the trampoline generator will have stashed the exception
|
||||
// handler (that we previously generated after the fastmem instruction) in
|
||||
// trampolineExceptionHandler.
|
||||
if (jit->js.generatingTrampoline)
|
||||
{
|
||||
if (jit->js.trampolineExceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
||||
J_CC(CC_NZ, jit->js.trampolineExceptionHandler);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If memcheck (ie: MMU) mode is enabled and we haven't generated an
|
||||
// exception handler for this instruction yet, we will generate an
|
||||
// exception check.
|
||||
if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
||||
|
@ -42,10 +62,10 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
|
|||
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
||||
}
|
||||
|
||||
u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend)
|
||||
bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend, MovInfo* info)
|
||||
{
|
||||
u8* result;
|
||||
bool offsetAddedToAddress = false;
|
||||
OpArg memOperand;
|
||||
if (opAddress.IsSimpleReg())
|
||||
{
|
||||
|
@ -57,6 +77,11 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
|||
// place to address the issue.)
|
||||
if ((u32)offset >= 0x1000)
|
||||
{
|
||||
// This method can potentially clobber the address if it shares a register
|
||||
// with the load target. In this case we can just subtract offset from the
|
||||
// register (see JitBackpatch for this implementation).
|
||||
offsetAddedToAddress = (reg_value == opAddress.GetSimpleReg());
|
||||
|
||||
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
|
||||
opAddress = R(reg_value);
|
||||
offset = 0;
|
||||
|
@ -74,9 +99,8 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
|||
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
|
||||
}
|
||||
|
||||
result = GetWritableCodePtr();
|
||||
LoadAndSwap(accessSize, reg_value, memOperand, signExtend);
|
||||
return result;
|
||||
LoadAndSwap(accessSize, reg_value, memOperand, signExtend, info);
|
||||
return offsetAddedToAddress;
|
||||
}
|
||||
|
||||
// Visitor that generates code to read a MMIO value.
|
||||
|
@ -231,72 +255,43 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
|
|||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
|
||||
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
|
||||
{
|
||||
registersInUse[reg_value] = false;
|
||||
if (jit->jo.fastmem && !opAddress.IsImm() &&
|
||||
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)))
|
||||
{
|
||||
u8* mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||
|
||||
registersInUseAtLoc[mov] = registersInUse;
|
||||
jit->js.fastmemLoadStore = mov;
|
||||
registersInUse[reg_value] = false;
|
||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
|
||||
{
|
||||
u8* backpatchStart = GetWritableCodePtr();
|
||||
MovInfo mov;
|
||||
bool offsetAddedToAddress =
|
||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov);
|
||||
TrampolineInfo& info = backPatchInfo[mov.address];
|
||||
info.pc = jit->js.compilerPC;
|
||||
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
|
||||
info.start = backpatchStart;
|
||||
info.read = true;
|
||||
info.op_reg = reg_value;
|
||||
info.op_arg = opAddress;
|
||||
info.offsetAddedToAddress = offsetAddedToAddress;
|
||||
info.accessSize = accessSize >> 3;
|
||||
info.offset = offset;
|
||||
info.registersInUse = registersInUse;
|
||||
info.flags = flags;
|
||||
info.signExtend = signExtend;
|
||||
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
||||
if (padding > 0)
|
||||
{
|
||||
NOP(padding);
|
||||
}
|
||||
info.len = static_cast<u32>(GetCodePtr() - info.start);
|
||||
|
||||
jit->js.fastmemLoadStore = mov.address;
|
||||
return;
|
||||
}
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
if (opAddress.IsImm())
|
||||
{
|
||||
u32 address = opAddress.Imm32() + offset;
|
||||
|
||||
// If the address is known to be RAM, just load it directly.
|
||||
if (PowerPC::IsOptimizableRAMAddress(address))
|
||||
{
|
||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the address maps to an MMIO register, inline MMIO read code.
|
||||
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
|
||||
if (accessSize != 64 && mmioAddress)
|
||||
{
|
||||
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
|
||||
signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fall back to general-case code.
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U64, address);
|
||||
break;
|
||||
case 32:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U32, address);
|
||||
break;
|
||||
case 16:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U16_ZX, address);
|
||||
break;
|
||||
case 8:
|
||||
ABI_CallFunctionC((void*)&PowerPC::Read_U8_ZX, address);
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
||||
MemoryExceptionCheck();
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
else if (reg_value != ABI_RETURN)
|
||||
{
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -310,8 +305,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
|||
}
|
||||
|
||||
FixupBranch exit;
|
||||
if (!jit->jo.alwaysUseMemFuncs)
|
||||
if (!jit->jo.alwaysUseMemFuncs && !slowmem)
|
||||
{
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
|
||||
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
|
||||
if (farcode.Enabled())
|
||||
|
@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
|||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
|
||||
if (!jit->jo.alwaysUseMemFuncs)
|
||||
if (!jit->jo.alwaysUseMemFuncs && !slowmem)
|
||||
{
|
||||
if (farcode.Enabled())
|
||||
{
|
||||
|
@ -361,6 +361,56 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
|||
}
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize,
|
||||
BitSet32 registersInUse, bool signExtend)
|
||||
{
|
||||
// If the address is known to be RAM, just load it directly.
|
||||
if (PowerPC::IsOptimizableRAMAddress(address))
|
||||
{
|
||||
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// If the address maps to an MMIO register, inline MMIO read code.
|
||||
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
|
||||
if (accessSize != 64 && mmioAddress)
|
||||
{
|
||||
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
|
||||
signExtend);
|
||||
return;
|
||||
}
|
||||
|
||||
// Fall back to general-case code.
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U64), address);
|
||||
break;
|
||||
case 32:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U32), address);
|
||||
break;
|
||||
case 16:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U16_ZX), address);
|
||||
break;
|
||||
case 8:
|
||||
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U8_ZX), address);
|
||||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
|
||||
MemoryExceptionCheck();
|
||||
if (signExtend && accessSize < 32)
|
||||
{
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
else if (reg_value != ABI_RETURN)
|
||||
{
|
||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||
}
|
||||
}
|
||||
|
||||
static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
||||
{
|
||||
if (accessSize == 32)
|
||||
|
@ -371,10 +421,15 @@ static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
|||
return Imm8(reg_value.Imm8());
|
||||
}
|
||||
|
||||
u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||
bool swap)
|
||||
void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||
bool swap, MovInfo* info)
|
||||
{
|
||||
u8* result = GetWritableCodePtr();
|
||||
if (info)
|
||||
{
|
||||
info->address = GetWritableCodePtr();
|
||||
info->nonAtomicSwapStore = false;
|
||||
}
|
||||
|
||||
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
|
||||
if (reg_value.IsImm())
|
||||
{
|
||||
|
@ -384,22 +439,19 @@ u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
|||
}
|
||||
else if (swap)
|
||||
{
|
||||
result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg());
|
||||
SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(accessSize, dest, reg_value);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static OpArg FixImmediate(int accessSize, OpArg arg)
|
||||
{
|
||||
if (arg.IsImm())
|
||||
{
|
||||
arg = accessSize == 8 ? Imm8((u8)arg.Imm32()) : accessSize == 16 ? Imm16((u16)arg.Imm32()) :
|
||||
Imm32((u32)arg.Imm32());
|
||||
arg = accessSize == 8 ? arg.AsImm8() : accessSize == 16 ? arg.AsImm16() : arg.AsImm32();
|
||||
}
|
||||
return arg;
|
||||
}
|
||||
|
@ -475,25 +527,38 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
|||
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||
BitSet32 registersInUse, int flags)
|
||||
{
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||
|
||||
// set the correct immediate format
|
||||
reg_value = FixImmediate(accessSize, reg_value);
|
||||
|
||||
// TODO: support byte-swapped non-immediate fastmem stores
|
||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
|
||||
(reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)))
|
||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
|
||||
{
|
||||
const u8* backpatchStart = GetCodePtr();
|
||||
u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset,
|
||||
!(flags & SAFE_LOADSTORE_NO_SWAP));
|
||||
u8* backpatchStart = GetWritableCodePtr();
|
||||
MovInfo mov;
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov);
|
||||
TrampolineInfo& info = backPatchInfo[mov.address];
|
||||
info.pc = jit->js.compilerPC;
|
||||
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
|
||||
info.start = backpatchStart;
|
||||
info.read = false;
|
||||
info.op_arg = reg_value;
|
||||
info.op_reg = reg_addr;
|
||||
info.offsetAddedToAddress = false;
|
||||
info.accessSize = accessSize >> 3;
|
||||
info.offset = offset;
|
||||
info.registersInUse = registersInUse;
|
||||
info.flags = flags;
|
||||
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
||||
if (padding > 0)
|
||||
{
|
||||
NOP(padding);
|
||||
}
|
||||
info.len = static_cast<u32>(GetCodePtr() - info.start);
|
||||
|
||||
jit->js.fastmemLoadStore = mov.address;
|
||||
|
||||
registersInUseAtLoc[mov] = registersInUse;
|
||||
pcAtLoc[mov] = jit->js.compilerPC;
|
||||
jit->js.fastmemLoadStore = mov;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -510,21 +575,22 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
|||
}
|
||||
}
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||
|
||||
FixupBranch slow, exit;
|
||||
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
if (farcode.Enabled())
|
||||
SwitchToFarCode();
|
||||
else
|
||||
exit = J(true);
|
||||
SetJumpTarget(slow);
|
||||
if (!slowmem)
|
||||
{
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||
if (farcode.Enabled())
|
||||
SwitchToFarCode();
|
||||
else
|
||||
exit = J(true);
|
||||
SetJumpTarget(slow);
|
||||
}
|
||||
|
||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||
|
@ -563,12 +629,18 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
|||
break;
|
||||
}
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||
if (farcode.Enabled())
|
||||
|
||||
MemoryExceptionCheck();
|
||||
|
||||
if (!slowmem)
|
||||
{
|
||||
exit = J(true);
|
||||
SwitchToNearCode();
|
||||
if (farcode.Enabled())
|
||||
{
|
||||
exit = J(true);
|
||||
SwitchToNearCode();
|
||||
}
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
|
||||
|
@ -1055,7 +1127,6 @@ void EmuCodeBlock::JitClearCA()
|
|||
|
||||
void EmuCodeBlock::Clear()
|
||||
{
|
||||
registersInUseAtLoc.clear();
|
||||
pcAtLoc.clear();
|
||||
backPatchInfo.clear();
|
||||
exceptionHandlerAtLoc.clear();
|
||||
}
|
||||
|
|
|
@ -59,6 +59,47 @@ static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
|
|||
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
|
||||
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
|
||||
|
||||
// Stores information we need to batch-patch a MOV with a call to the slow read/write path after
|
||||
// it faults. There will be 10s of thousands of these structs live, so be wary of making this too
|
||||
// big.
|
||||
struct TrampolineInfo final
|
||||
{
|
||||
// The start of the store operation that failed -- we will patch a JMP here
|
||||
u8* start;
|
||||
|
||||
// The start + len = end of the store operation (points to the next instruction)
|
||||
u32 len;
|
||||
|
||||
// The PPC PC for the current load/store block
|
||||
u32 pc;
|
||||
|
||||
// Saved because we need these to make the ABI call in the trampoline
|
||||
BitSet32 registersInUse;
|
||||
|
||||
// The MOV operation
|
||||
Gen::X64Reg nonAtomicSwapStoreSrc;
|
||||
|
||||
// src/dest for load/store
|
||||
s32 offset;
|
||||
Gen::X64Reg op_reg;
|
||||
Gen::OpArg op_arg;
|
||||
|
||||
// Original SafeLoadXXX/SafeStoreXXX flags
|
||||
u8 flags;
|
||||
|
||||
// Memory access size (in bytes)
|
||||
u8 accessSize : 4;
|
||||
|
||||
// true if this is a read op vs a write
|
||||
bool read : 1;
|
||||
|
||||
// for read operations, true if needs sign-extension after load
|
||||
bool signExtend : 1;
|
||||
|
||||
// Set to true if we added the offset to the address and need to undo it
|
||||
bool offsetAddedToAddress : 1;
|
||||
};
|
||||
|
||||
// Like XCodeBlock but has some utilities for memory access.
|
||||
class EmuCodeBlock : public Gen::X64CodeBlock
|
||||
{
|
||||
|
@ -88,15 +129,15 @@ public:
|
|||
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
||||
s32 offset, bool signExtend = false);
|
||||
// these return the address of the MOV, for backpatching
|
||||
u8* UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true);
|
||||
u8* UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true)
|
||||
void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr)
|
||||
{
|
||||
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
|
||||
UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info);
|
||||
}
|
||||
u8* UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend);
|
||||
bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
||||
bool signExtend, Gen::MovInfo* info = nullptr);
|
||||
void UnsafeWriteGatherPipe(int accessSize);
|
||||
|
||||
// Generate a load/write from the MMIO handler for a given address. Only
|
||||
|
@ -108,12 +149,18 @@ public:
|
|||
{
|
||||
SAFE_LOADSTORE_NO_SWAP = 1,
|
||||
SAFE_LOADSTORE_NO_PROLOG = 2,
|
||||
// This indicates that the write being generated cannot be patched (and thus can't use fastmem)
|
||||
SAFE_LOADSTORE_NO_FASTMEM = 4,
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
|
||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
|
||||
// Force slowmem (used when generating fallbacks in trampolines)
|
||||
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
|
||||
};
|
||||
|
||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
|
||||
BitSet32 registersInUse, bool signExtend, int flags = 0);
|
||||
void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize,
|
||||
BitSet32 registersInUse, bool signExtend);
|
||||
|
||||
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
|
||||
// reg_value if the load fails and js.memcheck is enabled.
|
||||
// Works with immediate inputs and simple registers only.
|
||||
|
@ -158,7 +205,6 @@ public:
|
|||
void Clear();
|
||||
|
||||
protected:
|
||||
std::unordered_map<u8*, BitSet32> registersInUseAtLoc;
|
||||
std::unordered_map<u8*, u32> pcAtLoc;
|
||||
std::unordered_map<u8*, TrampolineInfo> backPatchInfo;
|
||||
std::unordered_map<u8*, u8*> exceptionHandlerAtLoc;
|
||||
};
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include "Common/CommonTypes.h"
|
||||
#include "Common/JitRegister.h"
|
||||
#include "Common/x64ABI.h"
|
||||
#include "Common/x64Analyzer.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||
|
@ -37,150 +36,50 @@ void TrampolineCache::Shutdown()
|
|||
FreeCodeSpace();
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo& info,
|
||||
BitSet32 registersInUse, u8* exceptionHandler,
|
||||
u8* returnPtr)
|
||||
const u8* TrampolineCache::GenerateTrampoline(const TrampolineInfo& info)
|
||||
{
|
||||
if (info.read)
|
||||
{
|
||||
return GenerateReadTrampoline(info);
|
||||
}
|
||||
|
||||
return GenerateWriteTrampoline(info);
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
||||
const u8* trampoline = GetCodePtr();
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
int stack_offset = 0;
|
||||
bool push_param1 = registersInUse[ABI_PARAM1];
|
||||
|
||||
if (push_param1)
|
||||
{
|
||||
PUSH(ABI_PARAM1);
|
||||
stack_offset = 8;
|
||||
registersInUse[ABI_PARAM1] = 0;
|
||||
}
|
||||
SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
|
||||
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||
|
||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
||||
if (addrReg != ABI_PARAM1 && info.displacement)
|
||||
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
|
||||
else if (addrReg != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(addrReg));
|
||||
else if (info.displacement)
|
||||
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
||||
JMP(info.start + info.len, true);
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
|
||||
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void*)&PowerPC::Read_U64);
|
||||
break;
|
||||
case 4:
|
||||
CALL((void*)&PowerPC::Read_U32);
|
||||
break;
|
||||
case 2:
|
||||
CALL((void*)&PowerPC::Read_U16);
|
||||
break;
|
||||
case 1:
|
||||
CALL((void*)&PowerPC::Read_U8);
|
||||
break;
|
||||
}
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
|
||||
|
||||
if (push_param1)
|
||||
POP(ABI_PARAM1);
|
||||
|
||||
if (exceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||
J_CC(CC_NZ, exceptionHandler);
|
||||
}
|
||||
|
||||
if (info.signExtend)
|
||||
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
|
||||
else if (dataReg != ABI_RETURN || info.operandSize < 4)
|
||||
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
|
||||
|
||||
JMP(returnPtr, true);
|
||||
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline_%x", info.pc);
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo& info,
|
||||
BitSet32 registersInUse, u8* exceptionHandler,
|
||||
u8* returnPtr, u32 pc)
|
||||
const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
||||
const u8* trampoline = GetCodePtr();
|
||||
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
|
||||
// Don't treat FIFO writes specially for now because they require a burst
|
||||
// check anyway.
|
||||
|
||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||
MOV(32, PPCSTATE(pc), Imm32(pc));
|
||||
MOV(32, PPCSTATE(pc), Imm32(info.pc));
|
||||
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||
SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
|
||||
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||
|
||||
if (info.hasImmediate)
|
||||
{
|
||||
if (addrReg != ABI_PARAM2 && info.displacement)
|
||||
LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
|
||||
else if (addrReg != ABI_PARAM2)
|
||||
MOV(32, R(ABI_PARAM2), R(addrReg));
|
||||
else if (info.displacement)
|
||||
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
||||
JMP(info.start + info.len, true);
|
||||
|
||||
// we have to swap back the immediate to pass it to the write functions
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
PanicAlert("Invalid 64-bit immediate!");
|
||||
break;
|
||||
case 4:
|
||||
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
|
||||
break;
|
||||
case 2:
|
||||
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
|
||||
break;
|
||||
case 1:
|
||||
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
||||
MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
|
||||
}
|
||||
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 8:
|
||||
CALL((void*)&PowerPC::Write_U64);
|
||||
break;
|
||||
case 4:
|
||||
CALL((void*)&PowerPC::Write_U32);
|
||||
break;
|
||||
case 2:
|
||||
CALL((void*)&PowerPC::Write_U16);
|
||||
break;
|
||||
case 1:
|
||||
CALL((void*)&PowerPC::Write_U8);
|
||||
break;
|
||||
}
|
||||
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||
if (exceptionHandler)
|
||||
{
|
||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
||||
J_CC(CC_NZ, exceptionHandler);
|
||||
}
|
||||
JMP(returnPtr, true);
|
||||
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
|
||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", info.pc);
|
||||
return trampoline;
|
||||
}
|
||||
|
|
|
@ -7,21 +7,21 @@
|
|||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||
|
||||
struct InstructionInfo;
|
||||
|
||||
// We need at least this many bytes for backpatching.
|
||||
const int BACKPATCH_SIZE = 5;
|
||||
|
||||
class TrampolineCache : public Gen::X64CodeBlock
|
||||
class TrampolineCache : public EmuCodeBlock
|
||||
{
|
||||
const u8* GenerateReadTrampoline(const TrampolineInfo& info);
|
||||
const u8* GenerateWriteTrampoline(const TrampolineInfo& info);
|
||||
|
||||
public:
|
||||
void Init(int size);
|
||||
void Shutdown();
|
||||
|
||||
const u8* GenerateReadTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
|
||||
u8* exceptionHandler, u8* returnPtr);
|
||||
const u8* GenerateWriteTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
|
||||
u8* exceptionHandler, u8* returnPtr, u32 pc);
|
||||
const u8* GenerateTrampoline(const TrampolineInfo& info);
|
||||
void ClearCodeSpace();
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue