Refactor fastmem/trampoline code.
Simplication to avoid reading back the generated instructions, allowing us to handle all possible cases.
This commit is contained in:
parent
ddc9e414ee
commit
b1296a7825
|
@ -29,7 +29,6 @@ set(SRCS Analytics.cpp
|
||||||
TraversalClient.cpp
|
TraversalClient.cpp
|
||||||
Version.cpp
|
Version.cpp
|
||||||
x64ABI.cpp
|
x64ABI.cpp
|
||||||
x64Analyzer.cpp
|
|
||||||
x64Emitter.cpp
|
x64Emitter.cpp
|
||||||
Crypto/bn.cpp
|
Crypto/bn.cpp
|
||||||
Crypto/ec.cpp
|
Crypto/ec.cpp
|
||||||
|
|
|
@ -133,7 +133,6 @@
|
||||||
<ClInclude Include="TraversalClient.h" />
|
<ClInclude Include="TraversalClient.h" />
|
||||||
<ClInclude Include="TraversalProto.h" />
|
<ClInclude Include="TraversalProto.h" />
|
||||||
<ClInclude Include="x64ABI.h" />
|
<ClInclude Include="x64ABI.h" />
|
||||||
<ClInclude Include="x64Analyzer.h" />
|
|
||||||
<ClInclude Include="x64Emitter.h" />
|
<ClInclude Include="x64Emitter.h" />
|
||||||
<ClInclude Include="Crypto\bn.h" />
|
<ClInclude Include="Crypto\bn.h" />
|
||||||
<ClInclude Include="Crypto\ec.h" />
|
<ClInclude Include="Crypto\ec.h" />
|
||||||
|
@ -178,7 +177,6 @@
|
||||||
<ClCompile Include="ucrtFreadWorkaround.cpp" />
|
<ClCompile Include="ucrtFreadWorkaround.cpp" />
|
||||||
<ClCompile Include="Version.cpp" />
|
<ClCompile Include="Version.cpp" />
|
||||||
<ClCompile Include="x64ABI.cpp" />
|
<ClCompile Include="x64ABI.cpp" />
|
||||||
<ClCompile Include="x64Analyzer.cpp" />
|
|
||||||
<ClCompile Include="x64CPUDetect.cpp" />
|
<ClCompile Include="x64CPUDetect.cpp" />
|
||||||
<ClCompile Include="x64Emitter.cpp" />
|
<ClCompile Include="x64Emitter.cpp" />
|
||||||
<ClCompile Include="x64FPURoundMode.cpp" />
|
<ClCompile Include="x64FPURoundMode.cpp" />
|
||||||
|
|
|
@ -62,7 +62,6 @@
|
||||||
<ClInclude Include="Thread.h" />
|
<ClInclude Include="Thread.h" />
|
||||||
<ClInclude Include="Timer.h" />
|
<ClInclude Include="Timer.h" />
|
||||||
<ClInclude Include="x64ABI.h" />
|
<ClInclude Include="x64ABI.h" />
|
||||||
<ClInclude Include="x64Analyzer.h" />
|
|
||||||
<ClInclude Include="x64Emitter.h" />
|
<ClInclude Include="x64Emitter.h" />
|
||||||
<ClInclude Include="Logging\ConsoleListener.h">
|
<ClInclude Include="Logging\ConsoleListener.h">
|
||||||
<Filter>Logging</Filter>
|
<Filter>Logging</Filter>
|
||||||
|
@ -253,7 +252,6 @@
|
||||||
<ClCompile Include="Timer.cpp" />
|
<ClCompile Include="Timer.cpp" />
|
||||||
<ClCompile Include="Version.cpp" />
|
<ClCompile Include="Version.cpp" />
|
||||||
<ClCompile Include="x64ABI.cpp" />
|
<ClCompile Include="x64ABI.cpp" />
|
||||||
<ClCompile Include="x64Analyzer.cpp" />
|
|
||||||
<ClCompile Include="x64CPUDetect.cpp" />
|
<ClCompile Include="x64CPUDetect.cpp" />
|
||||||
<ClCompile Include="x64Emitter.cpp" />
|
<ClCompile Include="x64Emitter.cpp" />
|
||||||
<ClCompile Include="x64FPURoundMode.cpp" />
|
<ClCompile Include="x64FPURoundMode.cpp" />
|
||||||
|
|
|
@ -1,233 +0,0 @@
|
||||||
// Copyright 2008 Dolphin Emulator Project
|
|
||||||
// Licensed under GPLv2+
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#include "Common/x64Analyzer.h"
|
|
||||||
|
|
||||||
bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info)
|
|
||||||
{
|
|
||||||
unsigned const char* startCodePtr = codePtr;
|
|
||||||
u8 rex = 0;
|
|
||||||
u32 opcode;
|
|
||||||
int opcode_length;
|
|
||||||
|
|
||||||
// Check for regular prefix
|
|
||||||
info->operandSize = 4;
|
|
||||||
info->zeroExtend = false;
|
|
||||||
info->signExtend = false;
|
|
||||||
info->hasImmediate = false;
|
|
||||||
info->isMemoryWrite = false;
|
|
||||||
info->byteSwap = false;
|
|
||||||
|
|
||||||
u8 modRMbyte = 0;
|
|
||||||
u8 sibByte = 0;
|
|
||||||
bool hasModRM = false;
|
|
||||||
|
|
||||||
int displacementSize = 0;
|
|
||||||
|
|
||||||
if (*codePtr == 0x66)
|
|
||||||
{
|
|
||||||
info->operandSize = 2;
|
|
||||||
codePtr++;
|
|
||||||
}
|
|
||||||
else if (*codePtr == 0x67)
|
|
||||||
{
|
|
||||||
codePtr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for REX prefix
|
|
||||||
if ((*codePtr & 0xF0) == 0x40)
|
|
||||||
{
|
|
||||||
rex = *codePtr;
|
|
||||||
if (rex & 8) // REX.W
|
|
||||||
{
|
|
||||||
info->operandSize = 8;
|
|
||||||
}
|
|
||||||
codePtr++;
|
|
||||||
}
|
|
||||||
|
|
||||||
opcode = *codePtr++;
|
|
||||||
opcode_length = 1;
|
|
||||||
if (opcode == 0x0F)
|
|
||||||
{
|
|
||||||
opcode = (opcode << 8) | *codePtr++;
|
|
||||||
opcode_length = 2;
|
|
||||||
if ((opcode & 0xFB) == 0x38)
|
|
||||||
{
|
|
||||||
opcode = (opcode << 8) | *codePtr++;
|
|
||||||
opcode_length = 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (opcode_length)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
if ((opcode & 0xF0) == 0x80 || ((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
|
|
||||||
{
|
|
||||||
modRMbyte = *codePtr++;
|
|
||||||
hasModRM = true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
|
|
||||||
((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) || (opcode & 0xF0) == 0x30 ||
|
|
||||||
(opcode & 0xFF) == 0x77 || (opcode & 0xF0) == 0x80 || (opcode & 0xF8) == 0xC8)
|
|
||||||
{
|
|
||||||
// No mod R/M byte
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
modRMbyte = *codePtr++;
|
|
||||||
hasModRM = true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
// TODO: support more 3-byte opcode instructions
|
|
||||||
if ((opcode & 0xFE) == 0xF0)
|
|
||||||
{
|
|
||||||
modRMbyte = *codePtr++;
|
|
||||||
hasModRM = true;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasModRM)
|
|
||||||
{
|
|
||||||
ModRM mrm(modRMbyte, rex);
|
|
||||||
info->regOperandReg = mrm.reg;
|
|
||||||
if (mrm.mod < 3)
|
|
||||||
{
|
|
||||||
if (mrm.rm == 4)
|
|
||||||
{
|
|
||||||
// SIB byte
|
|
||||||
sibByte = *codePtr++;
|
|
||||||
info->scaledReg = (sibByte >> 3) & 7;
|
|
||||||
info->otherReg = (sibByte & 7);
|
|
||||||
if (rex & 2)
|
|
||||||
info->scaledReg += 8;
|
|
||||||
if (rex & 1)
|
|
||||||
info->otherReg += 8;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// info->scaledReg =
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mrm.mod == 1 || mrm.mod == 2)
|
|
||||||
{
|
|
||||||
if (mrm.mod == 1)
|
|
||||||
displacementSize = 1;
|
|
||||||
else
|
|
||||||
displacementSize = 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (displacementSize == 1)
|
|
||||||
info->displacement = (s32)(s8)*codePtr;
|
|
||||||
else
|
|
||||||
info->displacement = *((s32*)codePtr);
|
|
||||||
codePtr += displacementSize;
|
|
||||||
|
|
||||||
switch (opcode)
|
|
||||||
{
|
|
||||||
case 0xC6: // mem <- imm8
|
|
||||||
info->isMemoryWrite = true;
|
|
||||||
info->hasImmediate = true;
|
|
||||||
info->immediate = *codePtr;
|
|
||||||
info->operandSize = 1;
|
|
||||||
codePtr++;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0xC7: // mem <- imm16/32
|
|
||||||
info->isMemoryWrite = true;
|
|
||||||
switch (info->operandSize)
|
|
||||||
{
|
|
||||||
case 2:
|
|
||||||
info->hasImmediate = true;
|
|
||||||
info->immediate = *(u16*)codePtr;
|
|
||||||
codePtr += 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 4:
|
|
||||||
info->hasImmediate = true;
|
|
||||||
info->immediate = *(u32*)codePtr;
|
|
||||||
codePtr += 4;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 8:
|
|
||||||
info->zeroExtend = true;
|
|
||||||
info->immediate = *(u32*)codePtr;
|
|
||||||
codePtr += 4;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x88: // mem <- r8
|
|
||||||
info->isMemoryWrite = true;
|
|
||||||
if (info->operandSize != 4)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
info->operandSize = 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x89: // mem <- r16/32/64
|
|
||||||
info->isMemoryWrite = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x8A: // r8 <- mem
|
|
||||||
if (info->operandSize != 4)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
info->operandSize = 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x8B: // r16/32/64 <- mem
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x0FB6: // movzx on byte
|
|
||||||
info->zeroExtend = true;
|
|
||||||
info->operandSize = 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x0FB7: // movzx on short
|
|
||||||
info->zeroExtend = true;
|
|
||||||
info->operandSize = 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x0FBE: // movsx on byte
|
|
||||||
info->signExtend = true;
|
|
||||||
info->operandSize = 1;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x0FBF: // movsx on short
|
|
||||||
info->signExtend = true;
|
|
||||||
info->operandSize = 2;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x0F38F0: // movbe read
|
|
||||||
info->byteSwap = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x0F38F1: // movbe write
|
|
||||||
info->byteSwap = true;
|
|
||||||
info->isMemoryWrite = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
info->instructionSize = (int)(codePtr - startCodePtr);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool InstructionInfo::operator==(const InstructionInfo& other) const
|
|
||||||
{
|
|
||||||
return operandSize == other.operandSize && instructionSize == other.instructionSize &&
|
|
||||||
regOperandReg == other.regOperandReg && otherReg == other.otherReg &&
|
|
||||||
scaledReg == other.scaledReg && zeroExtend == other.zeroExtend &&
|
|
||||||
signExtend == other.signExtend && hasImmediate == other.hasImmediate &&
|
|
||||||
isMemoryWrite == other.isMemoryWrite && byteSwap == other.byteSwap &&
|
|
||||||
immediate == other.immediate && displacement == other.displacement;
|
|
||||||
}
|
|
|
@ -1,44 +0,0 @@
|
||||||
// Copyright 2008 Dolphin Emulator Project
|
|
||||||
// Licensed under GPLv2+
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
|
||||||
|
|
||||||
struct InstructionInfo
|
|
||||||
{
|
|
||||||
int operandSize; // 8, 16, 32, 64
|
|
||||||
int instructionSize;
|
|
||||||
int regOperandReg;
|
|
||||||
int otherReg;
|
|
||||||
int scaledReg;
|
|
||||||
bool zeroExtend;
|
|
||||||
bool signExtend;
|
|
||||||
bool hasImmediate;
|
|
||||||
bool isMemoryWrite;
|
|
||||||
bool byteSwap;
|
|
||||||
u64 immediate;
|
|
||||||
s32 displacement;
|
|
||||||
|
|
||||||
bool operator==(const InstructionInfo& other) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ModRM
|
|
||||||
{
|
|
||||||
int mod, reg, rm;
|
|
||||||
ModRM(u8 modRM, u8 rex)
|
|
||||||
{
|
|
||||||
mod = modRM >> 6;
|
|
||||||
reg = ((modRM >> 3) & 7) | ((rex & 4) ? 8 : 0);
|
|
||||||
rm = modRM & 7;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
enum AccessType
|
|
||||||
{
|
|
||||||
OP_ACCESS_READ = 0,
|
|
||||||
OP_ACCESS_WRITE = 1
|
|
||||||
};
|
|
||||||
|
|
||||||
bool DisassembleMov(const unsigned char* codePtr, InstructionInfo* info);
|
|
|
@ -1046,8 +1046,14 @@ void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src)
|
||||||
WriteMOVBE(bits, 0xF1, src, dest);
|
WriteMOVBE(bits, 0xF1, src, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend)
|
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend, MovInfo* info)
|
||||||
{
|
{
|
||||||
|
if (info)
|
||||||
|
{
|
||||||
|
info->address = GetWritableCodePtr();
|
||||||
|
info->nonAtomicSwapStore = false;
|
||||||
|
}
|
||||||
|
|
||||||
switch (size)
|
switch (size)
|
||||||
{
|
{
|
||||||
case 8:
|
case 8:
|
||||||
|
@ -1083,20 +1089,28 @@ void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_ext
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src)
|
void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info)
|
||||||
{
|
{
|
||||||
u8* mov_location = GetWritableCodePtr();
|
|
||||||
if (cpu_info.bMOVBE)
|
if (cpu_info.bMOVBE)
|
||||||
{
|
{
|
||||||
|
if (info)
|
||||||
|
{
|
||||||
|
info->address = GetWritableCodePtr();
|
||||||
|
info->nonAtomicSwapStore = false;
|
||||||
|
}
|
||||||
MOVBE(size, dst, src);
|
MOVBE(size, dst, src);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
BSWAP(size, src);
|
BSWAP(size, src);
|
||||||
mov_location = GetWritableCodePtr();
|
if (info)
|
||||||
|
{
|
||||||
|
info->address = GetWritableCodePtr();
|
||||||
|
info->nonAtomicSwapStore = true;
|
||||||
|
info->nonAtomicSwapStoreSrc = src;
|
||||||
|
}
|
||||||
MOV(size, dst, R(src));
|
MOV(size, dst, R(src));
|
||||||
}
|
}
|
||||||
return mov_location;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
|
void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
|
||||||
|
|
|
@ -203,6 +203,15 @@ enum FloatOp
|
||||||
|
|
||||||
class XEmitter;
|
class XEmitter;
|
||||||
|
|
||||||
|
// Information about a generated MOV op
|
||||||
|
struct MovInfo final
|
||||||
|
{
|
||||||
|
u8* address;
|
||||||
|
bool nonAtomicSwapStore;
|
||||||
|
// valid iff nonAtomicSwapStore is true
|
||||||
|
X64Reg nonAtomicSwapStoreSrc;
|
||||||
|
};
|
||||||
|
|
||||||
// RIP addressing does not benefit from micro op fusion on Core arch
|
// RIP addressing does not benefit from micro op fusion on Core arch
|
||||||
struct OpArg
|
struct OpArg
|
||||||
{
|
{
|
||||||
|
@ -272,6 +281,27 @@ struct OpArg
|
||||||
return (s8)offset;
|
return (s8)offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OpArg AsImm64() const
|
||||||
|
{
|
||||||
|
_dbg_assert_(DYNA_REC, IsImm());
|
||||||
|
return OpArg((u64)offset, SCALE_IMM64);
|
||||||
|
}
|
||||||
|
OpArg AsImm32() const
|
||||||
|
{
|
||||||
|
_dbg_assert_(DYNA_REC, IsImm());
|
||||||
|
return OpArg((u32)offset, SCALE_IMM32);
|
||||||
|
}
|
||||||
|
OpArg AsImm16() const
|
||||||
|
{
|
||||||
|
_dbg_assert_(DYNA_REC, IsImm());
|
||||||
|
return OpArg((u16)offset, SCALE_IMM16);
|
||||||
|
}
|
||||||
|
OpArg AsImm8() const
|
||||||
|
{
|
||||||
|
_dbg_assert_(DYNA_REC, IsImm());
|
||||||
|
return OpArg((u8)offset, SCALE_IMM8);
|
||||||
|
}
|
||||||
|
|
||||||
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const;
|
void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const;
|
||||||
bool IsImm() const
|
bool IsImm() const
|
||||||
{
|
{
|
||||||
|
@ -625,8 +655,9 @@ public:
|
||||||
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
|
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
|
||||||
void MOVBE(int bits, X64Reg dest, const OpArg& src);
|
void MOVBE(int bits, X64Reg dest, const OpArg& src);
|
||||||
void MOVBE(int bits, const OpArg& dest, X64Reg src);
|
void MOVBE(int bits, const OpArg& dest, X64Reg src);
|
||||||
void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false);
|
void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false,
|
||||||
u8* SwapAndStore(int size, const OpArg& dst, X64Reg src);
|
MovInfo* info = nullptr);
|
||||||
|
void SwapAndStore(int size, const OpArg& dst, X64Reg src, MovInfo* info = nullptr);
|
||||||
|
|
||||||
// Available only on AMD >= Phenom or Intel >= Haswell
|
// Available only on AMD >= Phenom or Intel >= Haswell
|
||||||
void LZCNT(int bits, X64Reg dest, const OpArg& src);
|
void LZCNT(int bits, X64Reg dest, const OpArg& src);
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
#include "Common/CommonFuncs.h"
|
#include "Common/CommonFuncs.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/Thread.h"
|
#include "Common/Thread.h"
|
||||||
#include "Common/x64Analyzer.h"
|
|
||||||
|
|
||||||
#include "Core/HW/Memmap.h"
|
#include "Core/HW/Memmap.h"
|
||||||
#include "Core/MachineContext.h"
|
#include "Core/MachineContext.h"
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Common/x64ABI.h"
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||||
|
|
|
@ -287,17 +287,11 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
||||||
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, registersInUse, signExtend);
|
||||||
|
|
||||||
if (update && storeAddress)
|
if (update && storeAddress)
|
||||||
{
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
MOV(32, gpr.R(a), opAddress);
|
MOV(32, gpr.R(a), opAddress);
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: support no-swap in SafeLoadToReg instead
|
// TODO: support no-swap in SafeLoadToReg instead
|
||||||
if (byte_reversed)
|
if (byte_reversed)
|
||||||
{
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
BSWAP(accessSize, gpr.RX(d));
|
BSWAP(accessSize, gpr.RX(d));
|
||||||
}
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
@ -507,10 +501,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update)
|
if (update)
|
||||||
{
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -589,10 +580,7 @@ void Jit64::stXx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update)
|
if (update)
|
||||||
{
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
MOV(32, gpr.R(a), R(RSCRATCH2));
|
MOV(32, gpr.R(a), R(RSCRATCH2));
|
||||||
}
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
|
|
@ -80,7 +80,6 @@ void Jit64::lfXXX(UGeckoInstruction inst)
|
||||||
registersInUse[RSCRATCH2] = true;
|
registersInUse[RSCRATCH2] = true;
|
||||||
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
|
SafeLoadToReg(RSCRATCH, addr, single ? 32 : 64, offset, registersInUse, false);
|
||||||
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
if (single)
|
if (single)
|
||||||
{
|
{
|
||||||
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
|
ConvertSingleToDouble(fpr.RX(d), RSCRATCH, true);
|
||||||
|
@ -193,10 +192,7 @@ void Jit64::stfXXX(UGeckoInstruction inst)
|
||||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
|
SafeWriteRegToReg(RSCRATCH, RSCRATCH2, accessSize, offset, registersInUse);
|
||||||
|
|
||||||
if (update)
|
if (update)
|
||||||
{
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
MOV(32, gpr.R(a), R(RSCRATCH2));
|
MOV(32, gpr.R(a), R(RSCRATCH2));
|
||||||
}
|
|
||||||
|
|
||||||
fpr.UnlockAll();
|
fpr.UnlockAll();
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
|
|
@ -40,74 +40,6 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
|
u32 gqrValue = gqrIsConstant ? it->second & 0xffff : 0;
|
||||||
|
|
||||||
gpr.Lock(a, b);
|
gpr.Lock(a, b);
|
||||||
if (gqrIsConstant && gqrValue == 0)
|
|
||||||
{
|
|
||||||
int storeOffset = 0;
|
|
||||||
gpr.BindToRegister(a, true, update);
|
|
||||||
X64Reg addr = gpr.RX(a);
|
|
||||||
// TODO: this is kind of ugly :/ we should probably create a universal load/store address
|
|
||||||
// calculation
|
|
||||||
// function that handles all these weird cases, e.g. how non-fastmem loadstores clobber
|
|
||||||
// addresses.
|
|
||||||
bool storeAddress = (update && jo.memcheck) || !jo.fastmem;
|
|
||||||
if (storeAddress)
|
|
||||||
{
|
|
||||||
addr = RSCRATCH2;
|
|
||||||
MOV(32, R(addr), gpr.R(a));
|
|
||||||
}
|
|
||||||
if (indexed)
|
|
||||||
{
|
|
||||||
if (update)
|
|
||||||
{
|
|
||||||
ADD(32, R(addr), gpr.R(b));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
addr = RSCRATCH2;
|
|
||||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
|
||||||
{
|
|
||||||
LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(32, R(addr), gpr.R(b));
|
|
||||||
if (a)
|
|
||||||
ADD(32, R(addr), gpr.R(a));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (update)
|
|
||||||
ADD(32, R(addr), Imm32(offset));
|
|
||||||
else
|
|
||||||
storeOffset = offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
fpr.Lock(s);
|
|
||||||
if (w)
|
|
||||||
{
|
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
|
||||||
MOVQ_xmm(R(RSCRATCH), XMM0);
|
|
||||||
ROL(64, R(RSCRATCH), Imm8(32));
|
|
||||||
}
|
|
||||||
|
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
|
||||||
if (update && storeAddress)
|
|
||||||
registersInUse[addr] = true;
|
|
||||||
SafeWriteRegToReg(RSCRATCH, addr, w ? 32 : 64, storeOffset, registersInUse);
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
if (update && storeAddress)
|
|
||||||
MOV(32, gpr.R(a), R(addr));
|
|
||||||
gpr.UnlockAll();
|
|
||||||
fpr.UnlockAll();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
if (update)
|
if (update)
|
||||||
gpr.BindToRegister(a, true, true);
|
gpr.BindToRegister(a, true, true);
|
||||||
|
@ -130,44 +62,35 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
if (update && !jo.memcheck)
|
if (update && !jo.memcheck)
|
||||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||||
|
|
||||||
|
if (w)
|
||||||
|
CVTSD2SS(XMM0, fpr.R(s)); // one
|
||||||
|
else
|
||||||
|
CVTPD2PS(XMM0, fpr.R(s)); // pair
|
||||||
|
|
||||||
if (gqrIsConstant)
|
if (gqrIsConstant)
|
||||||
{
|
{
|
||||||
// Paired stores don't yield any real change in performance right now, but if we can
|
|
||||||
// improve fastmem support this might change
|
|
||||||
//#define INLINE_PAIRED_STORES
|
|
||||||
#ifdef INLINE_PAIRED_STORES
|
|
||||||
if (w)
|
|
||||||
{
|
|
||||||
// One value
|
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
GenQuantizedStore(true, static_cast<EQuantizeType>(gqrValue & 0x7), (gqrValue & 0x3F00) >> 8);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Pair of values
|
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
|
||||||
GenQuantizedStore(false, static_cast<EQuantizeType>(gqrValue & 0x7),
|
|
||||||
(gqrValue & 0x3F00) >> 8);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
|
|
||||||
// with just the scale bits.
|
|
||||||
int type = gqrValue & 0x7;
|
int type = gqrValue & 0x7;
|
||||||
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
|
|
||||||
|
|
||||||
if (w)
|
// Paired stores (other than w/type zero) don't yield any real change in
|
||||||
|
// performance right now, but if we can improve fastmem support this might change
|
||||||
|
if (gqrValue == 0)
|
||||||
{
|
{
|
||||||
// One value
|
if (w)
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
GenQuantizedStore(true, static_cast<EQuantizeType>(type), (gqrValue & 0x3F00) >> 8);
|
||||||
CALL(asm_routines.singleStoreQuantized[type]);
|
else
|
||||||
|
GenQuantizedStore(false, static_cast<EQuantizeType>(type), (gqrValue & 0x3F00) >> 8);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Pair of values
|
// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
// with just the scale bits.
|
||||||
CALL(asm_routines.pairedStoreQuantized[type]);
|
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
|
||||||
|
|
||||||
|
if (w)
|
||||||
|
CALL(asm_routines.singleStoreQuantized[type]);
|
||||||
|
else
|
||||||
|
CALL(asm_routines.pairedStoreQuantized[type]);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -180,22 +103,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||||
|
|
||||||
if (w)
|
if (w)
|
||||||
{
|
|
||||||
// One value
|
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
|
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
// Pair of values
|
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update && jo.memcheck)
|
if (update && jo.memcheck)
|
||||||
{
|
{
|
||||||
MemoryExceptionCheck();
|
|
||||||
if (indexed)
|
if (indexed)
|
||||||
ADD(32, gpr.R(a), gpr.R(b));
|
ADD(32, gpr.R(a), gpr.R(b));
|
||||||
else
|
else
|
||||||
|
@ -226,113 +140,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
gpr.Lock(a, b);
|
gpr.Lock(a, b);
|
||||||
|
|
||||||
if (gqrIsConstant && gqrValue == 0)
|
|
||||||
{
|
|
||||||
s32 loadOffset = 0;
|
|
||||||
gpr.BindToRegister(a, true, update);
|
|
||||||
X64Reg addr = gpr.RX(a);
|
|
||||||
if (update && jo.memcheck)
|
|
||||||
{
|
|
||||||
addr = RSCRATCH2;
|
|
||||||
MOV(32, R(addr), gpr.R(a));
|
|
||||||
}
|
|
||||||
if (indexed)
|
|
||||||
{
|
|
||||||
if (update)
|
|
||||||
{
|
|
||||||
ADD(32, R(addr), gpr.R(b));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
addr = RSCRATCH2;
|
|
||||||
if (a && gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
|
||||||
{
|
|
||||||
LEA(32, addr, MRegSum(gpr.RX(a), gpr.RX(b)));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(32, R(addr), gpr.R(b));
|
|
||||||
if (a)
|
|
||||||
ADD(32, R(addr), gpr.R(a));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (update)
|
|
||||||
ADD(32, R(addr), Imm32(offset));
|
|
||||||
else
|
|
||||||
loadOffset = offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
fpr.Lock(s);
|
|
||||||
if (jo.memcheck)
|
|
||||||
{
|
|
||||||
fpr.StoreFromRegister(s);
|
|
||||||
js.revertFprLoad = s;
|
|
||||||
}
|
|
||||||
fpr.BindToRegister(s, false);
|
|
||||||
|
|
||||||
// Let's mirror the JitAsmCommon code and assume all non-MMU loads go to RAM.
|
|
||||||
if (!jo.memcheck)
|
|
||||||
{
|
|
||||||
if (w)
|
|
||||||
{
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
MOVD_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
|
||||||
PSHUFB(XMM0, M(pbswapShuffle1x4));
|
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LoadAndSwap(32, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
|
||||||
MOVD_xmm(XMM0, R(RSCRATCH));
|
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
MOVQ_xmm(XMM0, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
|
||||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LoadAndSwap(64, RSCRATCH, MComplex(RMEM, addr, SCALE_1, loadOffset));
|
|
||||||
ROL(64, R(RSCRATCH), Imm8(32));
|
|
||||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
BitSet32 registersInUse = CallerSavedRegistersInUse();
|
|
||||||
registersInUse[fpr.RX(s) << 16] = false;
|
|
||||||
if (update)
|
|
||||||
registersInUse[addr] = true;
|
|
||||||
SafeLoadToReg(RSCRATCH, R(addr), w ? 32 : 64, loadOffset, registersInUse, false);
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
if (w)
|
|
||||||
{
|
|
||||||
MOVD_xmm(XMM0, R(RSCRATCH));
|
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ROL(64, R(RSCRATCH), Imm8(32));
|
|
||||||
MOVQ_xmm(XMM0, R(RSCRATCH));
|
|
||||||
}
|
|
||||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
|
||||||
if (update)
|
|
||||||
MOV(32, gpr.R(a), R(addr));
|
|
||||||
}
|
|
||||||
gpr.UnlockAll();
|
|
||||||
fpr.UnlockAll();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||||
gpr.BindToRegister(a, true, update);
|
gpr.BindToRegister(a, true, update);
|
||||||
fpr.BindToRegister(s, false, true);
|
fpr.BindToRegister(s, false, true);
|
||||||
|
@ -373,7 +180,6 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
|
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||||
if (update && jo.memcheck)
|
if (update && jo.memcheck)
|
||||||
{
|
{
|
||||||
|
|
|
@ -572,8 +572,6 @@ void QuantizedMemoryRoutines::GenQuantizedLoad(bool single, EQuantizeType type,
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
|
void QuantizedMemoryRoutines::GenQuantizedLoadFloat(bool single, bool isInline)
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Common/x64ABI.h"
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
#include "Core/PowerPC/Gekko.h"
|
#include "Core/PowerPC/Gekko.h"
|
||||||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||||
|
|
|
@ -12,27 +12,12 @@
|
||||||
#include "Common/CommonFuncs.h"
|
#include "Common/CommonFuncs.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/MsgHandler.h"
|
#include "Common/MsgHandler.h"
|
||||||
#include "Common/x64Analyzer.h"
|
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
#include "Core/HW/Memmap.h"
|
#include "Core/HW/Memmap.h"
|
||||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
static void BackPatchError(const std::string& text, u8* codePtr, u32 emAddress)
|
|
||||||
{
|
|
||||||
u64 code_addr = (u64)codePtr;
|
|
||||||
disassembler disasm;
|
|
||||||
char disbuf[256];
|
|
||||||
memset(disbuf, 0, 256);
|
|
||||||
disasm.disasm64(0, code_addr, codePtr, disbuf);
|
|
||||||
PanicAlert("%s\n\n"
|
|
||||||
"Error encountered accessing emulated address %08x.\n"
|
|
||||||
"Culprit instruction: \n%s\nat %#" PRIx64,
|
|
||||||
text.c_str(), emAddress, disbuf, code_addr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This generates some fairly heavy trampolines, but it doesn't really hurt.
|
// This generates some fairly heavy trampolines, but it doesn't really hurt.
|
||||||
// Only instructions that access I/O will get these, and there won't be that
|
// Only instructions that access I/O will get these, and there won't be that
|
||||||
// many of them in a typical program/game.
|
// many of them in a typical program/game.
|
||||||
|
@ -56,36 +41,14 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
if (!IsInSpace(codePtr))
|
if (!IsInSpace(codePtr))
|
||||||
return false; // this will become a regular crash real soon after this
|
return false; // this will become a regular crash real soon after this
|
||||||
|
|
||||||
InstructionInfo info = {};
|
auto it = backPatchInfo.find(codePtr);
|
||||||
|
if (it == backPatchInfo.end())
|
||||||
if (!DisassembleMov(codePtr, &info))
|
|
||||||
{
|
|
||||||
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.otherReg != RMEM)
|
|
||||||
{
|
|
||||||
PanicAlert("BackPatch : Base reg not RMEM."
|
|
||||||
"\n\nAttempted to access %08x.",
|
|
||||||
emAddress);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
|
|
||||||
{
|
|
||||||
PanicAlert("BackPatch: MOVBE is too small");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto it = registersInUseAtLoc.find(codePtr);
|
|
||||||
if (it == registersInUseAtLoc.end())
|
|
||||||
{
|
{
|
||||||
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
|
PanicAlert("BackPatch: no register use entry for address %p", codePtr);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
BitSet32 registersInUse = it->second;
|
TrampolineInfo& info = it->second;
|
||||||
|
|
||||||
u8* exceptionHandler = nullptr;
|
u8* exceptionHandler = nullptr;
|
||||||
if (jit->jo.memcheck)
|
if (jit->jo.memcheck)
|
||||||
|
@ -95,110 +58,67 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
exceptionHandler = it2->second;
|
exceptionHandler = it2->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute the start and length of the memory operation, including
|
|
||||||
// any byteswapping.
|
|
||||||
int totalSize = info.instructionSize;
|
|
||||||
u8* start = codePtr;
|
|
||||||
if (!info.isMemoryWrite)
|
|
||||||
{
|
|
||||||
// MOVBE and single bytes don't need to be swapped.
|
|
||||||
if (!info.byteSwap && info.operandSize > 1)
|
|
||||||
{
|
|
||||||
// REX
|
|
||||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
|
||||||
totalSize++;
|
|
||||||
|
|
||||||
// BSWAP
|
|
||||||
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
|
|
||||||
totalSize += 2;
|
|
||||||
|
|
||||||
if (info.operandSize == 2)
|
|
||||||
{
|
|
||||||
// operand size override
|
|
||||||
if (codePtr[totalSize] == 0x66)
|
|
||||||
totalSize++;
|
|
||||||
// REX
|
|
||||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
|
||||||
totalSize++;
|
|
||||||
// SAR/ROL
|
|
||||||
_assert_(codePtr[totalSize] == 0xC1 &&
|
|
||||||
(codePtr[totalSize + 2] == 0x10 || codePtr[totalSize + 2] == 0x08));
|
|
||||||
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
|
|
||||||
totalSize += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (info.byteSwap || info.hasImmediate)
|
|
||||||
{
|
|
||||||
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// We entered here with a BSWAP-ed register. We'll have to swap it back.
|
|
||||||
u64* ptr = ContextRN(ctx, info.regOperandReg);
|
|
||||||
int bswapSize = 0;
|
|
||||||
switch (info.operandSize)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
bswapSize = 0;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
|
|
||||||
*ptr = Common::swap16((u16)*ptr);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
|
|
||||||
*ptr = Common::swap32((u32)*ptr);
|
|
||||||
break;
|
|
||||||
case 8:
|
|
||||||
bswapSize = 3;
|
|
||||||
*ptr = Common::swap64(*ptr);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
start = codePtr - bswapSize;
|
|
||||||
totalSize += bswapSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// In the trampoline code, we jump back into the block at the beginning
|
// In the trampoline code, we jump back into the block at the beginning
|
||||||
// of the next instruction. The next instruction comes immediately
|
// of the next instruction. The next instruction comes immediately
|
||||||
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
|
// after the backpatched operation, or BACKPATCH_SIZE bytes after the start
|
||||||
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
|
// of the backpatched operation, whichever comes last. (The JIT inserts NOPs
|
||||||
// into the original code if necessary to ensure there is enough space
|
// into the original code if necessary to ensure there is enough space
|
||||||
// to insert the backpatch jump.)
|
// to insert the backpatch jump.)
|
||||||
int padding = totalSize > BACKPATCH_SIZE ? totalSize - BACKPATCH_SIZE : 0;
|
|
||||||
u8* returnPtr = start + 5 + padding;
|
jit->js.generatingTrampoline = true;
|
||||||
|
jit->js.trampolineExceptionHandler = exceptionHandler;
|
||||||
|
|
||||||
// Generate the trampoline.
|
// Generate the trampoline.
|
||||||
const u8* trampoline;
|
const u8* trampoline = trampolines.GenerateTrampoline(info);
|
||||||
if (info.isMemoryWrite)
|
jit->js.generatingTrampoline = false;
|
||||||
{
|
jit->js.trampolineExceptionHandler = nullptr;
|
||||||
// TODO: special case FIFO writes.
|
|
||||||
auto it3 = pcAtLoc.find(codePtr);
|
|
||||||
if (it3 == pcAtLoc.end())
|
|
||||||
{
|
|
||||||
PanicAlert("BackPatch: no pc entry for address %p", codePtr);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 pc = it3->second;
|
u8* start = info.start;
|
||||||
trampoline =
|
|
||||||
trampolines.GenerateWriteTrampoline(info, registersInUse, exceptionHandler, returnPtr, pc);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
trampoline =
|
|
||||||
trampolines.GenerateReadTrampoline(info, registersInUse, exceptionHandler, returnPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Patch the original memory operation.
|
// Patch the original memory operation.
|
||||||
XEmitter emitter(start);
|
XEmitter emitter(start);
|
||||||
emitter.JMP(trampoline, true);
|
emitter.JMP(trampoline, true);
|
||||||
for (int i = 0; i < padding; ++i)
|
// NOPs become dead code
|
||||||
|
const u8* end = info.start + info.len;
|
||||||
|
for (const u8* i = emitter.GetCodePtr(); i < end; ++i)
|
||||||
emitter.INT3();
|
emitter.INT3();
|
||||||
ctx->CTX_PC = (u64)start;
|
|
||||||
|
// Rewind time to just before the start of the write block. If we swapped memory
|
||||||
|
// before faulting (eg: the store+swap was not an atomic op like MOVBE), let's
|
||||||
|
// swap it back so that the swap can happen again (this double swap isn't ideal but
|
||||||
|
// only happens the first time we fault).
|
||||||
|
if (info.nonAtomicSwapStoreSrc != INVALID_REG)
|
||||||
|
{
|
||||||
|
u64* ptr = ContextRN(ctx, info.nonAtomicSwapStoreSrc);
|
||||||
|
switch (info.accessSize << 3)
|
||||||
|
{
|
||||||
|
case 8:
|
||||||
|
// No need to swap a byte
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
*ptr = Common::swap16(static_cast<u16>(*ptr));
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
*ptr = Common::swap32(static_cast<u32>(*ptr));
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
*ptr = Common::swap64(static_cast<u64>(*ptr));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_dbg_assert_(DYNA_REC, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is special code to undo the LEA in SafeLoadToReg if it clobbered the address
|
||||||
|
// register in the case where reg_value shared the same location as opAddress.
|
||||||
|
if (info.offsetAddedToAddress)
|
||||||
|
{
|
||||||
|
u64* ptr = ContextRN(ctx, info.op_arg.GetSimpleReg());
|
||||||
|
*ptr -= static_cast<u32>(info.offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx->CTX_PC = reinterpret_cast<u64>(trampoline);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,6 +96,9 @@ protected:
|
||||||
bool carryFlagSet;
|
bool carryFlagSet;
|
||||||
bool carryFlagInverted;
|
bool carryFlagInverted;
|
||||||
|
|
||||||
|
bool generatingTrampoline;
|
||||||
|
u8* trampolineExceptionHandler;
|
||||||
|
|
||||||
int fifoBytesThisBlock;
|
int fifoBytesThisBlock;
|
||||||
|
|
||||||
PPCAnalyst::BlockStats st;
|
PPCAnalyst::BlockStats st;
|
||||||
|
|
|
@ -18,6 +18,26 @@ using namespace Gen;
|
||||||
|
|
||||||
void EmuCodeBlock::MemoryExceptionCheck()
|
void EmuCodeBlock::MemoryExceptionCheck()
|
||||||
{
|
{
|
||||||
|
// TODO: We really should untangle the trampolines, exception handlers and
|
||||||
|
// memory checks.
|
||||||
|
|
||||||
|
// If we are currently generating a trampoline for a failed fastmem
|
||||||
|
// load/store, the trampoline generator will have stashed the exception
|
||||||
|
// handler (that we previously generated after the fastmem instruction) in
|
||||||
|
// trampolineExceptionHandler.
|
||||||
|
if (jit->js.generatingTrampoline)
|
||||||
|
{
|
||||||
|
if (jit->js.trampolineExceptionHandler)
|
||||||
|
{
|
||||||
|
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
||||||
|
J_CC(CC_NZ, jit->js.trampolineExceptionHandler);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If memcheck (ie: MMU) mode is enabled and we haven't generated an
|
||||||
|
// exception handler for this instruction yet, we will generate an
|
||||||
|
// exception check.
|
||||||
if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
|
if (jit->jo.memcheck && !jit->js.fastmemLoadStore && !jit->js.fixupExceptionHandler)
|
||||||
{
|
{
|
||||||
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
TEST(32, PPCSTATE(Exceptions), Gen::Imm32(EXCEPTION_DSI));
|
||||||
|
@ -42,10 +62,10 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
|
||||||
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
|
bool EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset,
|
||||||
bool signExtend)
|
bool signExtend, MovInfo* info)
|
||||||
{
|
{
|
||||||
u8* result;
|
bool offsetAddedToAddress = false;
|
||||||
OpArg memOperand;
|
OpArg memOperand;
|
||||||
if (opAddress.IsSimpleReg())
|
if (opAddress.IsSimpleReg())
|
||||||
{
|
{
|
||||||
|
@ -57,6 +77,11 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
||||||
// place to address the issue.)
|
// place to address the issue.)
|
||||||
if ((u32)offset >= 0x1000)
|
if ((u32)offset >= 0x1000)
|
||||||
{
|
{
|
||||||
|
// This method can potentially clobber the address if it shares a register
|
||||||
|
// with the load target. In this case we can just subtract offset from the
|
||||||
|
// register (see JitBackpatch for this implementation).
|
||||||
|
offsetAddedToAddress = (reg_value == opAddress.GetSimpleReg());
|
||||||
|
|
||||||
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
|
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
|
||||||
opAddress = R(reg_value);
|
opAddress = R(reg_value);
|
||||||
offset = 0;
|
offset = 0;
|
||||||
|
@ -74,9 +99,8 @@ u8* EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
||||||
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
|
memOperand = MComplex(RMEM, reg_value, SCALE_1, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
result = GetWritableCodePtr();
|
LoadAndSwap(accessSize, reg_value, memOperand, signExtend, info);
|
||||||
LoadAndSwap(accessSize, reg_value, memOperand, signExtend);
|
return offsetAddedToAddress;
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Visitor that generates code to read a MMIO value.
|
// Visitor that generates code to read a MMIO value.
|
||||||
|
@ -231,72 +255,43 @@ FixupBranch EmuCodeBlock::CheckIfSafeAddress(const OpArg& reg_value, X64Reg reg_
|
||||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
|
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize,
|
||||||
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
|
s32 offset, BitSet32 registersInUse, bool signExtend, int flags)
|
||||||
{
|
{
|
||||||
registersInUse[reg_value] = false;
|
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||||
if (jit->jo.fastmem && !opAddress.IsImm() &&
|
|
||||||
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)))
|
|
||||||
{
|
|
||||||
u8* mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
|
||||||
|
|
||||||
registersInUseAtLoc[mov] = registersInUse;
|
registersInUse[reg_value] = false;
|
||||||
jit->js.fastmemLoadStore = mov;
|
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
|
||||||
|
{
|
||||||
|
u8* backpatchStart = GetWritableCodePtr();
|
||||||
|
MovInfo mov;
|
||||||
|
bool offsetAddedToAddress =
|
||||||
|
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend, &mov);
|
||||||
|
TrampolineInfo& info = backPatchInfo[mov.address];
|
||||||
|
info.pc = jit->js.compilerPC;
|
||||||
|
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
|
||||||
|
info.start = backpatchStart;
|
||||||
|
info.read = true;
|
||||||
|
info.op_reg = reg_value;
|
||||||
|
info.op_arg = opAddress;
|
||||||
|
info.offsetAddedToAddress = offsetAddedToAddress;
|
||||||
|
info.accessSize = accessSize >> 3;
|
||||||
|
info.offset = offset;
|
||||||
|
info.registersInUse = registersInUse;
|
||||||
|
info.flags = flags;
|
||||||
|
info.signExtend = signExtend;
|
||||||
|
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
||||||
|
if (padding > 0)
|
||||||
|
{
|
||||||
|
NOP(padding);
|
||||||
|
}
|
||||||
|
info.len = static_cast<u32>(GetCodePtr() - info.start);
|
||||||
|
|
||||||
|
jit->js.fastmemLoadStore = mov.address;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
|
||||||
|
|
||||||
// The following masks the region used by the GC/Wii virtual memory lib
|
|
||||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
|
||||||
|
|
||||||
if (opAddress.IsImm())
|
if (opAddress.IsImm())
|
||||||
{
|
{
|
||||||
u32 address = opAddress.Imm32() + offset;
|
u32 address = opAddress.Imm32() + offset;
|
||||||
|
SafeLoadToRegImmediate(reg_value, address, accessSize, registersInUse, signExtend);
|
||||||
// If the address is known to be RAM, just load it directly.
|
|
||||||
if (PowerPC::IsOptimizableRAMAddress(address))
|
|
||||||
{
|
|
||||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the address maps to an MMIO register, inline MMIO read code.
|
|
||||||
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
|
|
||||||
if (accessSize != 64 && mmioAddress)
|
|
||||||
{
|
|
||||||
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
|
|
||||||
signExtend);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fall back to general-case code.
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
switch (accessSize)
|
|
||||||
{
|
|
||||||
case 64:
|
|
||||||
ABI_CallFunctionC((void*)&PowerPC::Read_U64, address);
|
|
||||||
break;
|
|
||||||
case 32:
|
|
||||||
ABI_CallFunctionC((void*)&PowerPC::Read_U32, address);
|
|
||||||
break;
|
|
||||||
case 16:
|
|
||||||
ABI_CallFunctionC((void*)&PowerPC::Read_U16_ZX, address);
|
|
||||||
break;
|
|
||||||
case 8:
|
|
||||||
ABI_CallFunctionC((void*)&PowerPC::Read_U8_ZX, address);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
|
|
||||||
MemoryExceptionCheck();
|
|
||||||
if (signExtend && accessSize < 32)
|
|
||||||
{
|
|
||||||
// Need to sign extend values coming from the Read_U* functions.
|
|
||||||
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
|
||||||
}
|
|
||||||
else if (reg_value != ABI_RETURN)
|
|
||||||
{
|
|
||||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,8 +305,13 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||||
}
|
}
|
||||||
|
|
||||||
FixupBranch exit;
|
FixupBranch exit;
|
||||||
if (!jit->jo.alwaysUseMemFuncs)
|
if (!jit->jo.alwaysUseMemFuncs && !slowmem)
|
||||||
{
|
{
|
||||||
|
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||||
|
|
||||||
|
// The following masks the region used by the GC/Wii virtual memory lib
|
||||||
|
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||||
|
|
||||||
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
|
FixupBranch slow = CheckIfSafeAddress(R(reg_value), reg_addr, registersInUse, mem_mask);
|
||||||
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
|
UnsafeLoadToReg(reg_value, R(reg_addr), accessSize, 0, signExtend);
|
||||||
if (farcode.Enabled())
|
if (farcode.Enabled())
|
||||||
|
@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||||
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!jit->jo.alwaysUseMemFuncs)
|
if (!jit->jo.alwaysUseMemFuncs && !slowmem)
|
||||||
{
|
{
|
||||||
if (farcode.Enabled())
|
if (farcode.Enabled())
|
||||||
{
|
{
|
||||||
|
@ -361,6 +361,56 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int accessSize,
|
||||||
|
BitSet32 registersInUse, bool signExtend)
|
||||||
|
{
|
||||||
|
// If the address is known to be RAM, just load it directly.
|
||||||
|
if (PowerPC::IsOptimizableRAMAddress(address))
|
||||||
|
{
|
||||||
|
UnsafeLoadToReg(reg_value, Imm32(address), accessSize, 0, signExtend);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the address maps to an MMIO register, inline MMIO read code.
|
||||||
|
u32 mmioAddress = PowerPC::IsOptimizableMMIOAccess(address, accessSize);
|
||||||
|
if (accessSize != 64 && mmioAddress)
|
||||||
|
{
|
||||||
|
MMIOLoadToReg(Memory::mmio_mapping.get(), reg_value, registersInUse, mmioAddress, accessSize,
|
||||||
|
signExtend);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to general-case code.
|
||||||
|
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
||||||
|
switch (accessSize)
|
||||||
|
{
|
||||||
|
case 64:
|
||||||
|
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U64), address);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U32), address);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U16_ZX), address);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
ABI_CallFunctionC(reinterpret_cast<void*>(&PowerPC::Read_U8_ZX), address);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
||||||
|
|
||||||
|
MemoryExceptionCheck();
|
||||||
|
if (signExtend && accessSize < 32)
|
||||||
|
{
|
||||||
|
// Need to sign extend values coming from the Read_U* functions.
|
||||||
|
MOVSX(32, accessSize, reg_value, R(ABI_RETURN));
|
||||||
|
}
|
||||||
|
else if (reg_value != ABI_RETURN)
|
||||||
|
{
|
||||||
|
MOVZX(64, accessSize, reg_value, R(ABI_RETURN));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
||||||
{
|
{
|
||||||
if (accessSize == 32)
|
if (accessSize == 32)
|
||||||
|
@ -371,10 +421,15 @@ static OpArg SwapImmediate(int accessSize, const OpArg& reg_value)
|
||||||
return Imm8(reg_value.Imm8());
|
return Imm8(reg_value.Imm8());
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
void EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||||
bool swap)
|
bool swap, MovInfo* info)
|
||||||
{
|
{
|
||||||
u8* result = GetWritableCodePtr();
|
if (info)
|
||||||
|
{
|
||||||
|
info->address = GetWritableCodePtr();
|
||||||
|
info->nonAtomicSwapStore = false;
|
||||||
|
}
|
||||||
|
|
||||||
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
|
OpArg dest = MComplex(RMEM, reg_addr, SCALE_1, offset);
|
||||||
if (reg_value.IsImm())
|
if (reg_value.IsImm())
|
||||||
{
|
{
|
||||||
|
@ -384,22 +439,19 @@ u8* EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
||||||
}
|
}
|
||||||
else if (swap)
|
else if (swap)
|
||||||
{
|
{
|
||||||
result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg());
|
SwapAndStore(accessSize, dest, reg_value.GetSimpleReg(), info);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(accessSize, dest, reg_value);
|
MOV(accessSize, dest, reg_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static OpArg FixImmediate(int accessSize, OpArg arg)
|
static OpArg FixImmediate(int accessSize, OpArg arg)
|
||||||
{
|
{
|
||||||
if (arg.IsImm())
|
if (arg.IsImm())
|
||||||
{
|
{
|
||||||
arg = accessSize == 8 ? Imm8((u8)arg.Imm32()) : accessSize == 16 ? Imm16((u16)arg.Imm32()) :
|
arg = accessSize == 8 ? arg.AsImm8() : accessSize == 16 ? arg.AsImm16() : arg.AsImm32();
|
||||||
Imm32((u32)arg.Imm32());
|
|
||||||
}
|
}
|
||||||
return arg;
|
return arg;
|
||||||
}
|
}
|
||||||
|
@ -475,25 +527,38 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
|
||||||
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int accessSize, s32 offset,
|
||||||
BitSet32 registersInUse, int flags)
|
BitSet32 registersInUse, int flags)
|
||||||
{
|
{
|
||||||
|
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
||||||
|
bool slowmem = (flags & SAFE_LOADSTORE_FORCE_SLOWMEM) != 0;
|
||||||
|
|
||||||
// set the correct immediate format
|
// set the correct immediate format
|
||||||
reg_value = FixImmediate(accessSize, reg_value);
|
reg_value = FixImmediate(accessSize, reg_value);
|
||||||
|
|
||||||
// TODO: support byte-swapped non-immediate fastmem stores
|
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) && !slowmem)
|
||||||
if (jit->jo.fastmem && !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
|
|
||||||
(reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)))
|
|
||||||
{
|
{
|
||||||
const u8* backpatchStart = GetCodePtr();
|
u8* backpatchStart = GetWritableCodePtr();
|
||||||
u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset,
|
MovInfo mov;
|
||||||
!(flags & SAFE_LOADSTORE_NO_SWAP));
|
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, swap, &mov);
|
||||||
|
TrampolineInfo& info = backPatchInfo[mov.address];
|
||||||
|
info.pc = jit->js.compilerPC;
|
||||||
|
info.nonAtomicSwapStoreSrc = mov.nonAtomicSwapStore ? mov.nonAtomicSwapStoreSrc : INVALID_REG;
|
||||||
|
info.start = backpatchStart;
|
||||||
|
info.read = false;
|
||||||
|
info.op_arg = reg_value;
|
||||||
|
info.op_reg = reg_addr;
|
||||||
|
info.offsetAddedToAddress = false;
|
||||||
|
info.accessSize = accessSize >> 3;
|
||||||
|
info.offset = offset;
|
||||||
|
info.registersInUse = registersInUse;
|
||||||
|
info.flags = flags;
|
||||||
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
ptrdiff_t padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
|
||||||
if (padding > 0)
|
if (padding > 0)
|
||||||
{
|
{
|
||||||
NOP(padding);
|
NOP(padding);
|
||||||
}
|
}
|
||||||
|
info.len = static_cast<u32>(GetCodePtr() - info.start);
|
||||||
|
|
||||||
|
jit->js.fastmemLoadStore = mov.address;
|
||||||
|
|
||||||
registersInUseAtLoc[mov] = registersInUse;
|
|
||||||
pcAtLoc[mov] = jit->js.compilerPC;
|
|
||||||
jit->js.fastmemLoadStore = mov;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -510,21 +575,22 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
|
||||||
|
|
||||||
// The following masks the region used by the GC/Wii virtual memory lib
|
|
||||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
|
||||||
|
|
||||||
bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP);
|
|
||||||
|
|
||||||
FixupBranch slow, exit;
|
FixupBranch slow, exit;
|
||||||
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
|
if (!slowmem)
|
||||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
{
|
||||||
if (farcode.Enabled())
|
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||||
SwitchToFarCode();
|
|
||||||
else
|
// The following masks the region used by the GC/Wii virtual memory lib
|
||||||
exit = J(true);
|
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||||
SetJumpTarget(slow);
|
|
||||||
|
slow = CheckIfSafeAddress(reg_value, reg_addr, registersInUse, mem_mask);
|
||||||
|
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
|
||||||
|
if (farcode.Enabled())
|
||||||
|
SwitchToFarCode();
|
||||||
|
else
|
||||||
|
exit = J(true);
|
||||||
|
SetJumpTarget(slow);
|
||||||
|
}
|
||||||
|
|
||||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||||
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC));
|
||||||
|
@ -563,12 +629,18 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
|
||||||
if (farcode.Enabled())
|
|
||||||
|
MemoryExceptionCheck();
|
||||||
|
|
||||||
|
if (!slowmem)
|
||||||
{
|
{
|
||||||
exit = J(true);
|
if (farcode.Enabled())
|
||||||
SwitchToNearCode();
|
{
|
||||||
|
exit = J(true);
|
||||||
|
SwitchToNearCode();
|
||||||
|
}
|
||||||
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
SetJumpTarget(exit);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
|
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address, bool swap)
|
||||||
|
@ -1055,7 +1127,6 @@ void EmuCodeBlock::JitClearCA()
|
||||||
|
|
||||||
void EmuCodeBlock::Clear()
|
void EmuCodeBlock::Clear()
|
||||||
{
|
{
|
||||||
registersInUseAtLoc.clear();
|
backPatchInfo.clear();
|
||||||
pcAtLoc.clear();
|
|
||||||
exceptionHandlerAtLoc.clear();
|
exceptionHandlerAtLoc.clear();
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,47 @@ static const int FARCODE_SIZE_MMU = 1024 * 1024 * 48;
|
||||||
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
|
static const int TRAMPOLINE_CODE_SIZE = 1024 * 1024 * 8;
|
||||||
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
|
static const int TRAMPOLINE_CODE_SIZE_MMU = 1024 * 1024 * 32;
|
||||||
|
|
||||||
|
// Stores information we need to batch-patch a MOV with a call to the slow read/write path after
|
||||||
|
// it faults. There will be 10s of thousands of these structs live, so be wary of making this too
|
||||||
|
// big.
|
||||||
|
struct TrampolineInfo final
|
||||||
|
{
|
||||||
|
// The start of the store operation that failed -- we will patch a JMP here
|
||||||
|
u8* start;
|
||||||
|
|
||||||
|
// The start + len = end of the store operation (points to the next instruction)
|
||||||
|
u32 len;
|
||||||
|
|
||||||
|
// The PPC PC for the current load/store block
|
||||||
|
u32 pc;
|
||||||
|
|
||||||
|
// Saved because we need these to make the ABI call in the trampoline
|
||||||
|
BitSet32 registersInUse;
|
||||||
|
|
||||||
|
// The MOV operation
|
||||||
|
Gen::X64Reg nonAtomicSwapStoreSrc;
|
||||||
|
|
||||||
|
// src/dest for load/store
|
||||||
|
s32 offset;
|
||||||
|
Gen::X64Reg op_reg;
|
||||||
|
Gen::OpArg op_arg;
|
||||||
|
|
||||||
|
// Original SafeLoadXXX/SafeStoreXXX flags
|
||||||
|
u8 flags;
|
||||||
|
|
||||||
|
// Memory access size (in bytes)
|
||||||
|
u8 accessSize : 4;
|
||||||
|
|
||||||
|
// true if this is a read op vs a write
|
||||||
|
bool read : 1;
|
||||||
|
|
||||||
|
// for read operations, true if needs sign-extension after load
|
||||||
|
bool signExtend : 1;
|
||||||
|
|
||||||
|
// Set to true if we added the offset to the address and need to undo it
|
||||||
|
bool offsetAddedToAddress : 1;
|
||||||
|
};
|
||||||
|
|
||||||
// Like XCodeBlock but has some utilities for memory access.
|
// Like XCodeBlock but has some utilities for memory access.
|
||||||
class EmuCodeBlock : public Gen::X64CodeBlock
|
class EmuCodeBlock : public Gen::X64CodeBlock
|
||||||
{
|
{
|
||||||
|
@ -88,15 +129,15 @@ public:
|
||||||
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize,
|
||||||
s32 offset, bool signExtend = false);
|
s32 offset, bool signExtend = false);
|
||||||
// these return the address of the MOV, for backpatching
|
// these return the address of the MOV, for backpatching
|
||||||
u8* UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
void UnsafeWriteRegToReg(Gen::OpArg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||||
s32 offset = 0, bool swap = true);
|
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr);
|
||||||
u8* UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize,
|
||||||
s32 offset = 0, bool swap = true)
|
s32 offset = 0, bool swap = true, Gen::MovInfo* info = nullptr)
|
||||||
{
|
{
|
||||||
return UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap);
|
UnsafeWriteRegToReg(R(reg_value), reg_addr, accessSize, offset, swap, info);
|
||||||
}
|
}
|
||||||
u8* UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
bool UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset,
|
||||||
bool signExtend);
|
bool signExtend, Gen::MovInfo* info = nullptr);
|
||||||
void UnsafeWriteGatherPipe(int accessSize);
|
void UnsafeWriteGatherPipe(int accessSize);
|
||||||
|
|
||||||
// Generate a load/write from the MMIO handler for a given address. Only
|
// Generate a load/write from the MMIO handler for a given address. Only
|
||||||
|
@ -108,12 +149,18 @@ public:
|
||||||
{
|
{
|
||||||
SAFE_LOADSTORE_NO_SWAP = 1,
|
SAFE_LOADSTORE_NO_SWAP = 1,
|
||||||
SAFE_LOADSTORE_NO_PROLOG = 2,
|
SAFE_LOADSTORE_NO_PROLOG = 2,
|
||||||
|
// This indicates that the write being generated cannot be patched (and thus can't use fastmem)
|
||||||
SAFE_LOADSTORE_NO_FASTMEM = 4,
|
SAFE_LOADSTORE_NO_FASTMEM = 4,
|
||||||
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8
|
SAFE_LOADSTORE_CLOBBER_RSCRATCH_INSTEAD_OF_ADDR = 8,
|
||||||
|
// Force slowmem (used when generating fallbacks in trampolines)
|
||||||
|
SAFE_LOADSTORE_FORCE_SLOWMEM = 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
|
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg& opAddress, int accessSize, s32 offset,
|
||||||
BitSet32 registersInUse, bool signExtend, int flags = 0);
|
BitSet32 registersInUse, bool signExtend, int flags = 0);
|
||||||
|
void SafeLoadToRegImmediate(Gen::X64Reg reg_value, u32 address, int accessSize,
|
||||||
|
BitSet32 registersInUse, bool signExtend);
|
||||||
|
|
||||||
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
|
// Clobbers RSCRATCH or reg_addr depending on the relevant flag. Preserves
|
||||||
// reg_value if the load fails and js.memcheck is enabled.
|
// reg_value if the load fails and js.memcheck is enabled.
|
||||||
// Works with immediate inputs and simple registers only.
|
// Works with immediate inputs and simple registers only.
|
||||||
|
@ -158,7 +205,6 @@ public:
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::unordered_map<u8*, BitSet32> registersInUseAtLoc;
|
std::unordered_map<u8*, TrampolineInfo> backPatchInfo;
|
||||||
std::unordered_map<u8*, u32> pcAtLoc;
|
|
||||||
std::unordered_map<u8*, u8*> exceptionHandlerAtLoc;
|
std::unordered_map<u8*, u8*> exceptionHandlerAtLoc;
|
||||||
};
|
};
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/JitRegister.h"
|
#include "Common/JitRegister.h"
|
||||||
#include "Common/x64ABI.h"
|
#include "Common/x64ABI.h"
|
||||||
#include "Common/x64Analyzer.h"
|
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
#include "Core/PowerPC/JitCommon/JitBase.h"
|
#include "Core/PowerPC/JitCommon/JitBase.h"
|
||||||
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||||
|
@ -37,150 +36,50 @@ void TrampolineCache::Shutdown()
|
||||||
FreeCodeSpace();
|
FreeCodeSpace();
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8* TrampolineCache::GenerateReadTrampoline(const InstructionInfo& info,
|
const u8* TrampolineCache::GenerateTrampoline(const TrampolineInfo& info)
|
||||||
BitSet32 registersInUse, u8* exceptionHandler,
|
{
|
||||||
u8* returnPtr)
|
if (info.read)
|
||||||
|
{
|
||||||
|
return GenerateReadTrampoline(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
return GenerateWriteTrampoline(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u8* TrampolineCache::GenerateReadTrampoline(const TrampolineInfo& info)
|
||||||
{
|
{
|
||||||
if (GetSpaceLeft() < 1024)
|
if (GetSpaceLeft() < 1024)
|
||||||
PanicAlert("Trampoline cache full");
|
PanicAlert("Trampoline cache full");
|
||||||
|
|
||||||
const u8* trampoline = GetCodePtr();
|
const u8* trampoline = GetCodePtr();
|
||||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
|
||||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
|
||||||
int stack_offset = 0;
|
|
||||||
bool push_param1 = registersInUse[ABI_PARAM1];
|
|
||||||
|
|
||||||
if (push_param1)
|
SafeLoadToReg(info.op_reg, info.op_arg, info.accessSize << 3, info.offset, info.registersInUse,
|
||||||
{
|
info.signExtend, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||||
PUSH(ABI_PARAM1);
|
|
||||||
stack_offset = 8;
|
|
||||||
registersInUse[ABI_PARAM1] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
JMP(info.start + info.len, true);
|
||||||
if (addrReg != ABI_PARAM1 && info.displacement)
|
|
||||||
LEA(32, ABI_PARAM1, MDisp(addrReg, info.displacement));
|
|
||||||
else if (addrReg != ABI_PARAM1)
|
|
||||||
MOV(32, R(ABI_PARAM1), R(addrReg));
|
|
||||||
else if (info.displacement)
|
|
||||||
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
|
||||||
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, stack_offset);
|
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline_%x", info.pc);
|
||||||
|
|
||||||
switch (info.operandSize)
|
|
||||||
{
|
|
||||||
case 8:
|
|
||||||
CALL((void*)&PowerPC::Read_U64);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
CALL((void*)&PowerPC::Read_U32);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
CALL((void*)&PowerPC::Read_U16);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
CALL((void*)&PowerPC::Read_U8);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, stack_offset);
|
|
||||||
|
|
||||||
if (push_param1)
|
|
||||||
POP(ABI_PARAM1);
|
|
||||||
|
|
||||||
if (exceptionHandler)
|
|
||||||
{
|
|
||||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
|
||||||
J_CC(CC_NZ, exceptionHandler);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info.signExtend)
|
|
||||||
MOVSX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
|
|
||||||
else if (dataReg != ABI_RETURN || info.operandSize < 4)
|
|
||||||
MOVZX(dataRegSize, info.operandSize * 8, dataReg, R(ABI_RETURN));
|
|
||||||
|
|
||||||
JMP(returnPtr, true);
|
|
||||||
|
|
||||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_ReadTrampoline");
|
|
||||||
return trampoline;
|
return trampoline;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u8* TrampolineCache::GenerateWriteTrampoline(const InstructionInfo& info,
|
const u8* TrampolineCache::GenerateWriteTrampoline(const TrampolineInfo& info)
|
||||||
BitSet32 registersInUse, u8* exceptionHandler,
|
|
||||||
u8* returnPtr, u32 pc)
|
|
||||||
{
|
{
|
||||||
if (GetSpaceLeft() < 1024)
|
if (GetSpaceLeft() < 1024)
|
||||||
PanicAlert("Trampoline cache full");
|
PanicAlert("Trampoline cache full");
|
||||||
|
|
||||||
const u8* trampoline = GetCodePtr();
|
const u8* trampoline = GetCodePtr();
|
||||||
|
|
||||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
|
||||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
|
||||||
|
|
||||||
// Don't treat FIFO writes specially for now because they require a burst
|
// Don't treat FIFO writes specially for now because they require a burst
|
||||||
// check anyway.
|
// check anyway.
|
||||||
|
|
||||||
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
// PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs
|
||||||
MOV(32, PPCSTATE(pc), Imm32(pc));
|
MOV(32, PPCSTATE(pc), Imm32(info.pc));
|
||||||
|
|
||||||
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
|
SafeWriteRegToReg(info.op_arg, info.op_reg, info.accessSize << 3, info.offset,
|
||||||
|
info.registersInUse, info.flags | SAFE_LOADSTORE_FORCE_SLOWMEM);
|
||||||
|
|
||||||
if (info.hasImmediate)
|
JMP(info.start + info.len, true);
|
||||||
{
|
|
||||||
if (addrReg != ABI_PARAM2 && info.displacement)
|
|
||||||
LEA(32, ABI_PARAM2, MDisp(addrReg, info.displacement));
|
|
||||||
else if (addrReg != ABI_PARAM2)
|
|
||||||
MOV(32, R(ABI_PARAM2), R(addrReg));
|
|
||||||
else if (info.displacement)
|
|
||||||
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
|
||||||
|
|
||||||
// we have to swap back the immediate to pass it to the write functions
|
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", info.pc);
|
||||||
switch (info.operandSize)
|
|
||||||
{
|
|
||||||
case 8:
|
|
||||||
PanicAlert("Invalid 64-bit immediate!");
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
MOV(32, R(ABI_PARAM1), Imm32(Common::swap32((u32)info.immediate)));
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
MOV(16, R(ABI_PARAM1), Imm16(Common::swap16((u16)info.immediate)));
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
MOV(8, R(ABI_PARAM1), Imm8((u8)info.immediate));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
int dataRegSize = info.operandSize == 8 ? 64 : 32;
|
|
||||||
MOVTwo(dataRegSize, ABI_PARAM2, addrReg, info.displacement, ABI_PARAM1, dataReg);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (info.operandSize)
|
|
||||||
{
|
|
||||||
case 8:
|
|
||||||
CALL((void*)&PowerPC::Write_U64);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
CALL((void*)&PowerPC::Write_U32);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
CALL((void*)&PowerPC::Write_U16);
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
CALL((void*)&PowerPC::Write_U8);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
|
|
||||||
if (exceptionHandler)
|
|
||||||
{
|
|
||||||
TEST(32, PPCSTATE(Exceptions), Imm32(EXCEPTION_DSI));
|
|
||||||
J_CC(CC_NZ, exceptionHandler);
|
|
||||||
}
|
|
||||||
JMP(returnPtr, true);
|
|
||||||
|
|
||||||
JitRegister::Register(trampoline, GetCodePtr(), "JIT_WriteTrampoline_%x", pc);
|
|
||||||
return trampoline;
|
return trampoline;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,21 +7,21 @@
|
||||||
#include "Common/BitSet.h"
|
#include "Common/BitSet.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
|
#include "Core/PowerPC/JitCommon/Jit_Util.h"
|
||||||
|
|
||||||
struct InstructionInfo;
|
struct InstructionInfo;
|
||||||
|
|
||||||
// We need at least this many bytes for backpatching.
|
// We need at least this many bytes for backpatching.
|
||||||
const int BACKPATCH_SIZE = 5;
|
const int BACKPATCH_SIZE = 5;
|
||||||
|
|
||||||
class TrampolineCache : public Gen::X64CodeBlock
|
class TrampolineCache : public EmuCodeBlock
|
||||||
{
|
{
|
||||||
|
const u8* GenerateReadTrampoline(const TrampolineInfo& info);
|
||||||
|
const u8* GenerateWriteTrampoline(const TrampolineInfo& info);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void Init(int size);
|
void Init(int size);
|
||||||
void Shutdown();
|
void Shutdown();
|
||||||
|
const u8* GenerateTrampoline(const TrampolineInfo& info);
|
||||||
const u8* GenerateReadTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
|
|
||||||
u8* exceptionHandler, u8* returnPtr);
|
|
||||||
const u8* GenerateWriteTrampoline(const InstructionInfo& info, BitSet32 registersInUse,
|
|
||||||
u8* exceptionHandler, u8* returnPtr, u32 pc);
|
|
||||||
void ClearCodeSpace();
|
void ClearCodeSpace();
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue