Finish replacing ThunkManager with ABI_PushRegistersAndAdjustStack.

As part of that, change SafeLoadToEAX to SafeLoadToReg, and have JitIL
use that, which should fix fastmem on JitIL.

This should also fix a potential stack corruption issue with x86.
This commit is contained in:
comex 2013-09-29 22:51:07 -04:00
parent a53dc6f981
commit 5e4665301b
37 changed files with 220 additions and 406 deletions

View File

@ -37,7 +37,6 @@ else()
if(NOT _M_GENERIC) #X86
set(SRCS ${SRCS}
Src/x64FPURoundMode.cpp
Src/x64Thunk.cpp
)
endif()
set(SRCS ${SRCS} Src/x64CPUDetect.cpp)

View File

@ -217,7 +217,6 @@
<ClCompile Include="Src\x64CPUDetect.cpp" />
<ClCompile Include="Src\x64Emitter.cpp" />
<ClCompile Include="Src\x64FPURoundMode.cpp" />
<ClCompile Include="Src\x64Thunk.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Atomic.h" />
@ -263,7 +262,6 @@
<ClInclude Include="Src\SymbolDB.h" />
<ClInclude Include="Src\SysConf.h" />
<ClInclude Include="Src\Thread.h" />
<ClInclude Include="Src\Thunk.h" />
<ClInclude Include="Src\Timer.h" />
<ClInclude Include="Src\x64ABI.h" />
<ClInclude Include="Src\x64Analyzer.h" />

View File

@ -47,7 +47,6 @@
<ClCompile Include="Src\x64ABI.cpp" />
<ClCompile Include="Src\x64CPUDetect.cpp" />
<ClCompile Include="Src\x64FPURoundMode.cpp" />
<ClCompile Include="Src\x64Thunk.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Atomic.h" />
@ -84,7 +83,6 @@
<ClInclude Include="Src\SymbolDB.h" />
<ClInclude Include="Src\SysConf.h" />
<ClInclude Include="Src\Thread.h" />
<ClInclude Include="Src\Thunk.h" />
<ClInclude Include="Src\Timer.h" />
<ClInclude Include="Src\x64Analyzer.h" />
<ClInclude Include="Src\x64Emitter.h" />

View File

@ -1,46 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#ifndef _THUNK_H_
#define _THUNK_H_
#include <map>
#include "Common.h"
#include "x64Emitter.h"
// This simple class creates a wrapper around a C/C++ function that saves all fp state
// before entering it, and restores it upon exit. This is required to be able to selectively
// call functions from generated code, without inflicting the performance hit and increase
// of complexity that it means to protect the generated code from this problem.
// This process is called thunking.
// There will only ever be one level of thunking on the stack, plus,
// we don't want to pollute the stack, so we store away regs somewhere global.
// NOT THREAD SAFE. This may only be used from the CPU thread.
// Any other thread using this stuff will be FATAL.
class ThunkManager : public Gen::XCodeBlock
{
std::map<void *, const u8 *> thunks;
const u8 *save_regs;
const u8 *load_regs;
public:
ThunkManager() {
Init();
}
~ThunkManager() {
Shutdown();
}
void *ProtectFunction(void *function, int num_params);
private:
void Init();
void Shutdown();
void Reset();
};
#endif // _THUNK_H_

View File

@ -57,6 +57,86 @@ void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) {
}
}
void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog)
{
int regSize =
#ifdef _M_X64
8;
#else
4;
#endif
int shadow = 0;
#if defined(_WIN32) && defined(_M_X64)
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}
void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog)
{
int regSize =
#ifdef _M_X64
8;
#else
4;
#endif
int size = 0;
#if defined(_WIN32) && defined(_M_X64)
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf;
if (size)
ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}
#ifdef _M_IX86 // All32
// Shared code between Win32 and Unix32

View File

@ -1634,74 +1634,6 @@ void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u
CALLptr(M(impptr));
}
void XEmitter::PushRegistersAndAlignStack(u32 mask)
{
int shadow = 0;
#ifdef _WIN32
shadow = 0x20;
#endif
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
{
PUSH((X64Reg) r);
count++;
}
}
int size = (count & 1) ? 0 : 8;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
size += 16;
}
size += shadow;
if (size)
SUB(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
int offset = shadow;
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD(MDisp(RSP, offset), (X64Reg) x);
offset += 16;
}
}
}
void XEmitter::PopRegistersAndAlignStack(u32 mask)
{
int size = 0;
#ifdef _WIN32
size += 0x20;
#endif
for (int x = 0; x < 16; x++)
{
if (mask & (1 << (16 + x)))
{
MOVAPD((X64Reg) x, MDisp(RSP, size));
size += 16;
}
}
int count = 0;
for (int r = 0; r < 16; r++)
{
if (mask & (1 << r))
count++;
}
size += (count & 1) ? 0 : 8;
if (size)
ADD(64, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size));
for (int r = 15; r >= 0; r--)
{
if (mask & (1 << r))
{
POP((X64Reg) r);
}
}
}
#endif
}

View File

@ -646,6 +646,10 @@ public:
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
// A more flexible version of the above.
void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog);
void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog);
unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false);
void ABI_AlignStack(unsigned int frameSize, bool noProlog = false);
void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false);
@ -691,9 +695,6 @@ public:
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x
void PushRegistersAndAlignStack(u32 mask);
void PopRegistersAndAlignStack(u32 mask);
#endif
}; // class XEmitter

View File

@ -1,121 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include <map>
#include "Common.h"
#include "MemoryUtil.h"
#include "x64ABI.h"
#include "Thunk.h"
#define THUNK_ARENA_SIZE 1024*1024*1
namespace
{
static u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
static u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
static u16 saved_mxcsr;
} // namespace
using namespace Gen;
void ThunkManager::Init()
{
AllocCodeSpace(THUNK_ARENA_SIZE);
save_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
STMXCSR(M(&saved_mxcsr));
#ifdef _M_X64
MOV(64, M(saved_gpr_state + 0 ), R(RCX));
MOV(64, M(saved_gpr_state + 8 ), R(RDX));
MOV(64, M(saved_gpr_state + 16), R(R8) );
MOV(64, M(saved_gpr_state + 24), R(R9) );
MOV(64, M(saved_gpr_state + 32), R(R10));
MOV(64, M(saved_gpr_state + 40), R(R11));
#ifndef _WIN32
MOV(64, M(saved_gpr_state + 48), R(RSI));
MOV(64, M(saved_gpr_state + 56), R(RDI));
#endif
MOV(64, M(saved_gpr_state + 64), R(RBX));
#else
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
#endif
RET();
load_regs = GetCodePtr();
LDMXCSR(M(&saved_mxcsr));
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
#ifdef _M_X64
MOV(64, R(RCX), M(saved_gpr_state + 0 ));
MOV(64, R(RDX), M(saved_gpr_state + 8 ));
MOV(64, R(R8) , M(saved_gpr_state + 16));
MOV(64, R(R9) , M(saved_gpr_state + 24));
MOV(64, R(R10), M(saved_gpr_state + 32));
MOV(64, R(R11), M(saved_gpr_state + 40));
#ifndef _WIN32
MOV(64, R(RSI), M(saved_gpr_state + 48));
MOV(64, R(RDI), M(saved_gpr_state + 56));
#endif
MOV(64, R(RBX), M(saved_gpr_state + 64));
#else
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
#endif
RET();
}
void ThunkManager::Reset()
{
thunks.clear();
ResetCodePtr();
}
void ThunkManager::Shutdown()
{
Reset();
FreeCodeSpace();
}
void *ThunkManager::ProtectFunction(void *function, int num_params)
{
std::map<void *, const u8 *>::iterator iter;
iter = thunks.find(function);
if (iter != thunks.end())
return (void *)iter->second;
if (!region)
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
const u8 *call_point = GetCodePtr();
#ifdef _M_X64
// Make sure to align stack.
ABI_AlignStack(0, true);
CALL((void*)save_regs);
CALL((void*)function);
CALL((void*)load_regs);
ABI_RestoreStack(0, true);
RET();
#else
CALL((void*)save_regs);
// Since parameters are in the previous stack frame, not in registers, this takes some
// trickery : we simply re-push the parameters. might not be optimal, but that doesn't really
// matter.
ABI_AlignStack(num_params * 4, true);
unsigned int alignedSize = ABI_GetAlignedFrameSize(num_params * 4, true);
for (int i = 0; i < num_params; i++) {
// ESP is changing, so we do not need i
PUSH(32, MDisp(ESP, alignedSize));
}
CALL(function);
ABI_RestoreStack(num_params * 4, true);
CALL((void*)load_regs);
RET();
#endif
thunks[function] = call_point;
return (void *)call_point;
}

View File

@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include "Common.h"
#include "Thunk.h"
#include "../Core.h"
#include "HW.h"
#include "../PowerPC/PowerPC.h"

View File

@ -12,7 +12,6 @@
#include "Common.h"
#include "x64Emitter.h"
#include "x64ABI.h"
#include "Thunk.h"
#include "../../HLE/HLE.h"
#include "../../Core.h"
#include "../../PatchEngine.h"
@ -552,7 +551,10 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
{
js.fifoBytesThisBlock -= 32;
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
u32 registersInUse = RegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
}
u32 function = HLE::GetFunctionIndex(ops[i].address);

View File

@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -6,7 +6,6 @@
// Should give a very noticable speed boost to paired single heavy code.
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"
@ -120,11 +119,8 @@ void Jit64::lXXx(UGeckoInstruction inst)
// do our job at first
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(d);
SafeLoadToEAX(gpr.R(a), accessSize, offset, RegistersInUse(), signExtend);
gpr.KillImmediate(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.BindToRegister(d, false, true);
SafeLoadToReg(gpr.RX(d), gpr.R(a), accessSize, offset, RegistersInUse(), signExtend);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
@ -174,18 +170,32 @@ void Jit64::lXXx(UGeckoInstruction inst)
{
if ((inst.OPCD != 31) && gpr.R(a).IsImm())
{
opAddress = Imm32((u32)gpr.R(a).offset + (s32)inst.SIMM_16);
u32 val = (u32)gpr.R(a).offset + (s32)inst.SIMM_16;
opAddress = Imm32(val);
if (update)
gpr.SetImmediate32(a, val);
}
else if ((inst.OPCD == 31) && gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
opAddress = Imm32((u32)gpr.R(a).offset + (u32)gpr.R(b).offset);
u32 val = (u32)gpr.R(a).offset + (u32)gpr.R(b).offset;
opAddress = Imm32(val);
if (update)
gpr.SetImmediate32(a, val);
}
else
{
gpr.FlushLockX(ABI_PARAM1);
opAddress = R(ABI_PARAM1);
MOV(32, opAddress, gpr.R(a));
if (update || (inst.OPCD != 31 && inst.SIMM_16 == 0))
{
gpr.BindToRegister(a, true, update);
opAddress = gpr.R(a);
}
else
{
gpr.FlushLockX(ABI_PARAM1);
opAddress = R(ABI_PARAM1);
MOV(32, opAddress, gpr.R(a));
}
if (inst.OPCD == 31)
ADD(32, opAddress, gpr.R(b));
else
@ -193,29 +203,9 @@ void Jit64::lXXx(UGeckoInstruction inst)
}
}
SafeLoadToEAX(opAddress, accessSize, 0, RegistersInUse(), signExtend);
// We must flush immediate values from the following registers because
// they may change at runtime if no MMU exception has been raised
gpr.KillImmediate(d, true, true);
if (update)
{
gpr.Lock(a);
gpr.BindToRegister(a, true, true);
}
MEMCHECK_START
if (update)
{
if (inst.OPCD == 31)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)(s32)inst.SIMM_16));
}
MOV(32, gpr.R(d), R(EAX));
MEMCHECK_END
gpr.Lock(a, b, d);
gpr.BindToRegister(d, false, true);
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, 0, RegistersInUse(), signExtend);
gpr.UnlockAll();
gpr.UnlockAllX();
@ -318,12 +308,15 @@ void Jit64::stX(UGeckoInstruction inst)
else
{
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
u32 registersInUse = RegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize)
{
case 32: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), gpr.R(s), addr); break;
case 16: ABI_CallFunctionAC(thunks.ProtectFunction(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), gpr.R(s), addr); break;
case 8: ABI_CallFunctionAC(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), gpr.R(s), addr); break;
case 32: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); break;
case 16: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); break;
case 8: ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
if (update)
gpr.SetImmediate32(a, addr);
return;

View File

@ -50,7 +50,7 @@ void Jit64::lfs(UGeckoInstruction inst)
}
s32 offset = (s32)(s16)inst.SIMM_16;
SafeLoadToEAX(gpr.R(a), 32, offset, RegistersInUse(), false);
SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false);
MEMCHECK_START
@ -338,7 +338,7 @@ void Jit64::lfsx(UGeckoInstruction inst)
MEMCHECK_END
} else {
SafeLoadToEAX(R(EAX), 32, 0, RegistersInUse(), false);
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
MEMCHECK_START

View File

@ -7,7 +7,6 @@
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"
#include "../../HW/GPFifo.h"
@ -102,20 +101,19 @@ void Jit64::psq_st(UGeckoInstruction inst)
#else
int addr_scale = SCALE_8;
#endif
u32 registersInUse = RegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false);
if (inst.W) {
// One value
XORPS(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
CVTSD2SS(XMM0, fpr.R(s));
ABI_AlignStack(0);
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.singleStoreQuantized));
ABI_RestoreStack(0);
} else {
// Pair of values
CVTPD2PS(XMM0, fpr.R(s));
ABI_AlignStack(0);
CALLptr(MScaled(EDX, addr_scale, (u32)(u64)asm_routines.pairedStoreQuantized));
ABI_RestoreStack(0);
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
gpr.UnlockAll();
gpr.UnlockAllX();
}

View File

@ -11,7 +11,6 @@
#include "../PPCTables.h"
#include "x64Emitter.h"
#include "x64ABI.h"
#include "Thunk.h"
#include "Jit.h"
#include "JitRegCache.h"

View File

@ -123,7 +123,6 @@ Fix profiled loads/stores to work safely. On 32-bit, one solution is to
#include "IR.h"
#include "../PPCTables.h"
#include "../../CoreTiming.h"
#include "Thunk.h"
#include "../../HW/Memmap.h"
#include "JitILAsm.h"
#include "JitIL.h"

View File

@ -27,7 +27,6 @@ The register allocation is linear scan allocation.
#include "IR.h"
#include "../PPCTables.h"
#include "../../CoreTiming.h"
#include "Thunk.h"
#include "../../HW/Memmap.h"
#include "JitILAsm.h"
#include "JitIL.h"
@ -39,8 +38,6 @@ The register allocation is linear scan allocation.
#include "../../Core.h"
#include "HW/ProcessorInterface.h"
static ThunkManager thunks;
using namespace IREmitter;
using namespace Gen;
@ -435,13 +432,14 @@ static void regMarkMemAddress(RegInfo& RI, InstLoc I, InstLoc AI, unsigned OpNum
}
// in 64-bit build, this returns a completely bizarre address sometimes!
static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
unsigned OpNum, unsigned Size, X64Reg* dest) {
static std::pair<OpArg, u32> regBuildMemAddress(RegInfo& RI, InstLoc I,
InstLoc AI, unsigned OpNum, unsigned Size, X64Reg* dest) {
if (isImm(*AI)) {
unsigned addr = RI.Build->GetImmValue(AI);
unsigned addr = RI.Build->GetImmValue(AI);
if (Memory::IsRAMAddress(addr)) {
if (dest)
*dest = regFindFreeReg(RI);
return std::make_pair(Imm32(addr), 0);
}
}
unsigned offset;
@ -473,38 +471,15 @@ static OpArg regBuildMemAddress(RegInfo& RI, InstLoc I, InstLoc AI,
} else {
baseReg = regEnsureInReg(RI, AddrBase);
}
return MDisp(baseReg, offset);
return std::make_pair(R(baseReg), offset);
}
static void regEmitMemLoad(RegInfo& RI, InstLoc I, unsigned Size) {
X64Reg reg;
OpArg addr = regBuildMemAddress(RI, I, getOp1(I), 1, Size, &reg);
auto info = regBuildMemAddress(RI, I, getOp1(I), 1, Size, &reg);
RI.Jit->TEST(32, R(ECX), Imm32(0x0C000000 | mem_mask));
FixupBranch argh = RI.Jit->J_CC(CC_Z);
// Slow safe read using Memory::Read_Ux routines
#ifdef _M_IX86 // we don't allocate EAX on x64 so no reason to save it.
if (reg != EAX) {
RI.Jit->PUSH(32, R(EAX));
}
#endif
switch (Size)
{
case 32: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), ECX); break;
case 16: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), ECX); break;
case 8: RI.Jit->ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), ECX); break;
}
if (reg != EAX) {
RI.Jit->MOV(32, R(reg), R(EAX));
#ifdef _M_IX86
RI.Jit->POP(32, R(EAX));
#endif
}
FixupBranch arg2 = RI.Jit->J();
RI.Jit->SetJumpTarget(argh);
RI.Jit->UnsafeLoadRegToReg(ECX, reg, Size, 0, false);
RI.Jit->SetJumpTarget(arg2);
RI.Jit->SafeLoadToReg(reg, info.first, Size, info.second, regsInUse(RI), false);
if (regReadUse(RI, I))
RI.regs[reg] = I;
}
@ -521,8 +496,11 @@ static OpArg regImmForConst(RegInfo& RI, InstLoc I, unsigned Size) {
}
static void regEmitMemStore(RegInfo& RI, InstLoc I, unsigned Size) {
OpArg addr = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0);
RI.Jit->LEA(32, ECX, addr);
auto info = regBuildMemAddress(RI, I, getOp2(I), 2, Size, 0);
if (info.first.IsImm())
RI.Jit->MOV(32, R(ECX), info.first);
else
RI.Jit->LEA(32, ECX, MDisp(info.first.GetSimpleReg(), info.second));
regSpill(RI, EAX);
if (isImm(*getOp1(I))) {
RI.Jit->MOV(Size, R(EAX), regImmForConst(RI, getOp1(I), Size));

View File

@ -7,7 +7,6 @@
#include "Common.h"
#include "x64Emitter.h"
#include "x64ABI.h"
#include "Thunk.h"
#include "../../HLE/HLE.h"
#include "../../Core.h"
#include "../../PatchEngine.h"

View File

@ -13,7 +13,6 @@
#include "CPUDetect.h"
#include "x64ABI.h"
#include "Thunk.h"
#include "../../HW/GPFifo.h"
#include "../../Core.h"

View File

@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include "Common.h"
#include "Thunk.h"
#include "../../ConfigManager.h"
#include "../PowerPC.h"

View File

@ -6,7 +6,6 @@
// Should give a very noticable speed boost to paired single heavy code.
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"

View File

@ -4,7 +4,6 @@
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"
#include "../../HW/GPFifo.h"

View File

@ -11,7 +11,6 @@
#include "../PPCTables.h"
#include "x64Emitter.h"
#include "x64ABI.h"
#include "Thunk.h"
#include "JitIL.h"

View File

@ -15,7 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -16,7 +16,6 @@
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -15,7 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -16,7 +16,6 @@
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -16,7 +16,6 @@
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -2,7 +2,6 @@
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -15,7 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -15,7 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../../Core.h"
#include "../PowerPC.h"

View File

@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include "x64ABI.h"
#include "Thunk.h"
#include "CPUDetect.h"
#include "x64Emitter.h"
@ -167,7 +166,7 @@ void CommonAsmRoutines::GenQuantizedStores() {
MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX));
FixupBranch skip_complex = J();
SetJumpTarget(too_complex);
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX, /* noProlog = */ true);
ABI_CallFunctionRR((void *)&WriteDual32, RAX, RCX, /* noProlog = */ true);
SetJumpTarget(skip_complex);
RET();
#else
@ -184,10 +183,10 @@ void CommonAsmRoutines::GenQuantizedStores() {
FixupBranch arg2 = J();
SetJumpTarget(argh);
MOV(32, R(EAX), M(((char*)&psTemp)));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true);
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, ECX, /* noProlog = */ true);
MOV(32, R(EAX), M(((char*)&psTemp)+4));
ADD(32, R(ECX), Imm32(4));
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true);
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, ECX, /* noProlog = */ true);
SetJumpTarget(arg2);
RET();
#endif

View File

@ -6,7 +6,6 @@
#define _JITASMCOMMON_H
#include "../JitCommon/Jit_Util.h"
#include "Thunk.h"
class CommonAsmRoutinesBase {
public:
@ -65,9 +64,6 @@ public:
void GenFifoXmm64Write();
void GenFifoFloatWrite();
private:
ThunkManager thunks;
};
#endif

View File

@ -13,7 +13,6 @@
#include "x64Emitter.h"
#include "x64ABI.h"
#include "Thunk.h"
#include "x64Analyzer.h"
#include "StringUtil.h"
@ -76,7 +75,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
if (info.displacement) {
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
}
PushRegistersAndAlignStack(registersInUse);
ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize)
{
case 4:
@ -96,7 +95,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
MOV(32, R(dataReg), R(EAX));
}
PopRegistersAndAlignStack(registersInUse);
ABI_PopRegistersAndAdjustStack(registersInUse, true);
RET();
#endif
return trampoline;
@ -137,7 +136,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
}
PushRegistersAndAlignStack(registersInUse);
ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize)
{
case 8:
@ -154,7 +153,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r
break;
}
PopRegistersAndAlignStack(registersInUse);
ABI_PopRegistersAndAdjustStack(registersInUse, true);
RET();
#endif

View File

@ -8,7 +8,6 @@
#include "Common.h"
#include "x64Emitter.h"
#include "x64Analyzer.h"
#include "Thunk.h"
// meh.
#if defined(_WIN32)
@ -234,8 +233,6 @@ public:
const u8 *GetReadTrampoline(const InstructionInfo &info, u32 registersInUse);
const u8 *GetWriteTrampoline(const InstructionInfo &info, u32 registersInUse);
private:
ThunkManager thunks;
};
#endif

View File

@ -3,7 +3,6 @@
// Refer to the license.txt file included.
#include "Common.h"
#include "Thunk.h"
#include "CPUDetect.h"
#include "../PowerPC.h"
@ -58,34 +57,46 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
#endif
}
u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend)
u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend)
{
u8 *result;
#ifdef _M_X64
if (opAddress.IsSimpleReg())
{
// Deal with potential wraparound. (This is just a heuristic, and it would
// be more correct to actually mirror the first page at the end, but the
// only case where it probably actually matters is JitIL turning adds into
// offsets with the wrong sign, so whatever. Since the original code
// *could* try to wrap an address around, however, this is the correct
// place to address the issue.)
if ((u32) offset >= 0x1000) {
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
opAddress = R(reg_value);
offset = 0;
}
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
}
else
{
MOV(32, R(EAX), opAddress);
MOV(32, R(reg_value), opAddress);
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MComplex(RBX, EAX, SCALE_1, offset));
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
}
#else
if (opAddress.IsImm())
{
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
}
else
{
if (!opAddress.IsSimpleReg(EAX))
MOV(32, R(EAX), opAddress);
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
if (!opAddress.IsSimpleReg(reg_value))
MOV(32, R(reg_value), opAddress);
AND(32, R(reg_value), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOVZX(32, accessSize, EAX, MDisp(EAX, (u32)Memory::base + offset));
MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset));
}
#endif
@ -95,26 +106,27 @@ u8 *EmuCodeBlock::UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize,
if (accessSize == 32)
{
BSWAP(32, EAX);
BSWAP(32, reg_value);
}
else if (accessSize == 16)
{
BSWAP(32, EAX);
BSWAP(32, reg_value);
if (signExtend)
SAR(32, R(EAX), Imm8(16));
SAR(32, R(reg_value), Imm8(16));
else
SHR(32, R(EAX), Imm8(16));
SHR(32, R(reg_value), Imm8(16));
}
else if (signExtend)
{
// TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, EAX, R(EAX));
MOVSX(32, accessSize, reg_value, R(reg_value));
}
return result;
}
void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend)
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend)
{
registersInUse &= ~(1 << RAX | 1 << reg_value);
#if defined(_M_X64)
#ifdef ENABLE_MEM_CHECK
if (!Core::g_CoreStartupParameter.bMMU && !Core::g_CoreStartupParameter.bEnableDebugging && Core::g_CoreStartupParameter.bFastmem)
@ -122,10 +134,8 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
if (!Core::g_CoreStartupParameter.bMMU && Core::g_CoreStartupParameter.bFastmem)
#endif
{
u8 *mov = UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
u8 *mov = UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
// XXX: are these dead anyway?
registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX));
registersInUseAtLoc[mov] = registersInUse;
}
else
@ -149,20 +159,26 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
u32 address = (u32)opAddress.offset + offset;
if ((address & mem_mask) == 0)
{
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
}
else
{
ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize)
{
case 32: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), address); break;
case 16: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), address); break;
case 8: ABI_CallFunctionC(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), address); break;
case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break;
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
MOVSX(32, accessSize, reg_value, R(EAX));
}
else if (reg_value != EAX)
{
MOVZX(32, accessSize, reg_value, R(EAX));
}
}
}
@ -173,45 +189,57 @@ void EmuCodeBlock::SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s
MOV(32, R(EAX), opAddress);
ADD(32, R(EAX), Imm32(offset));
TEST(32, R(EAX), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
FixupBranch fast = J_CC(CC_Z, true);
ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize)
{
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), EAX); break;
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), EAX); break;
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), EAX); break;
case 32: ABI_CallFunctionR((void *)&Memory::Read_U32, EAX); break;
case 16: ABI_CallFunctionR((void *)&Memory::Read_U16_ZX, EAX); break;
case 8: ABI_CallFunctionR((void *)&Memory::Read_U8_ZX, EAX); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
MOVSX(32, accessSize, reg_value, R(EAX));
}
else if (reg_value != EAX)
{
MOVZX(32, accessSize, reg_value, R(EAX));
}
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadToEAX(R(EAX), accessSize, 0, signExtend);
UnsafeLoadToReg(reg_value, R(EAX), accessSize, 0, signExtend);
SetJumpTarget(exit);
}
else
{
TEST(32, opAddress, Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
FixupBranch fast = J_CC(CC_Z, true);
ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize)
{
case 32: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), opAddress); break;
case 16: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U16_ZX, 1), opAddress); break;
case 8: ABI_CallFunctionA(thunks.ProtectFunction((void *)&Memory::Read_U8_ZX, 1), opAddress); break;
case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, opAddress); break;
case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, opAddress); break;
case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, opAddress); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
if (signExtend && accessSize < 32)
{
// Need to sign extend values coming from the Read_U* functions.
MOVSX(32, accessSize, EAX, R(EAX));
MOVSX(32, accessSize, reg_value, R(EAX));
}
else if (reg_value != EAX)
{
MOVZX(32, accessSize, reg_value, R(EAX));
}
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeLoadToEAX(opAddress, accessSize, offset, signExtend);
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
SetJumpTarget(exit);
}
}
@ -239,6 +267,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
// Destroys both arg registers
void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags)
{
registersInUse &= ~(1 << RAX);
#if defined(_M_X64)
if (!Core::g_CoreStartupParameter.bMMU &&
Core::g_CoreStartupParameter.bFastmem &&
@ -255,8 +284,6 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
NOP(1);
}
// XXX: are these dead anyway?
registersInUse &= ~((1 << ABI_PARAM1) | (1 << ABI_PARAM2) | (1 << RAX));
registersInUseAtLoc[mov] = registersInUse;
return;
}
@ -280,16 +307,18 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
#endif
TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch fast = J_CC(CC_Z);
FixupBranch fast = J_CC(CC_Z, true);
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
bool noProlog = flags & SAFE_WRITE_NO_PROLOG;
bool swap = !(flags & SAFE_WRITE_NO_SWAP);
ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr, noProlog); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr, noProlog); break;
case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, noProlog); break;
case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, noProlog); break;
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, noProlog); break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, false);
FixupBranch exit = J();
SetJumpTarget(fast);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap);
@ -315,7 +344,9 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re
MOV(32, R(EAX), M(&float_buffer));
BSWAP(32, EAX);
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
ABI_CallFunctionRR(thunks.ProtectFunction(((void *)&Memory::Write_U32), 2), EAX, reg_addr);
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);
ABI_PopRegistersAndAdjustStack(registersInUse, false);
FixupBranch arg2 = J();
SetJumpTarget(argh);
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));

View File

@ -6,7 +6,6 @@
#define _JITUTIL_H
#include "x64Emitter.h"
#include "Thunk.h"
#include <unordered_map>
// Like XCodeBlock but has some utilities for memory access.
@ -16,8 +15,8 @@ public:
void UnsafeLoadRegToRegNoSwap(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset);
// these return the address of the MOV, for backpatching
u8 *UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true);
u8 *UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend);
u8 *UnsafeLoadToReg(Gen::X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend);
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend);
enum SafeWriteFlags
{
SAFE_WRITE_NO_SWAP = 1,
@ -38,7 +37,6 @@ public:
void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm);
protected:
ThunkManager thunks;
std::unordered_map<u8 *, u32> registersInUseAtLoc;
};