small code cleanup in JIT: use JITIL's nice JITDISABLE macro
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4477 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
31e61da40d
commit
5b4d12c1f3
|
@ -77,7 +77,10 @@ void Jit(u32 em_address);
|
||||||
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
||||||
#define INSTRUCTION_START
|
#define INSTRUCTION_START
|
||||||
|
|
||||||
|
#define JITDISABLE(type) \
|
||||||
|
if (Core::g_CoreStartupParameter.bJITOff || \
|
||||||
|
Core::g_CoreStartupParameter.bJIT##type##Off) \
|
||||||
|
{Default(inst); return;}
|
||||||
|
|
||||||
class TrampolineCache : public Gen::XCodeBlock
|
class TrampolineCache : public Gen::XCodeBlock
|
||||||
{
|
{
|
||||||
|
|
|
@ -73,9 +73,8 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
|
||||||
|
|
||||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(FloatingPoint)
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
@ -104,9 +103,8 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::fmaddXX(UGeckoInstruction inst)
|
void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(FloatingPoint)
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
@ -162,9 +160,8 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::fsign(UGeckoInstruction inst)
|
void Jit64::fsign(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(FloatingPoint)
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
@ -195,8 +192,7 @@ void Jit64::fsign(UGeckoInstruction inst)
|
||||||
void Jit64::fmrx(UGeckoInstruction inst)
|
void Jit64::fmrx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
JITDISABLE(FloatingPoint)
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
if (inst.Rc) {
|
if (inst.Rc) {
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
}
|
}
|
||||||
|
@ -213,9 +209,9 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
// TODO : This still causes crashes in Nights, and broken graphics
|
// TODO : This still causes crashes in Nights, and broken graphics
|
||||||
// in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :(
|
// in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :(
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || jo.fpAccurateFcmp
|
JITDISABLE(FloatingPoint)
|
||||||
|| Core::g_CoreStartupParameter.bJITFloatingPointOff) {
|
if (jo.fpAccurateFcmp) {
|
||||||
Default(inst); return; // turn off from debugger
|
Default(inst); return; // turn off from debugger
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -33,441 +33,434 @@
|
||||||
#include "JitAsm.h"
|
#include "JitAsm.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
void Jit64::lbzx(UGeckoInstruction inst)
|
void Jit64::lbzx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff
|
INSTRUCTION_START
|
||||||
|| Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
|
JITDISABLE(LoadStore)
|
||||||
{Default(inst); return;} // turn off from debugger
|
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
|
||||||
INSTRUCTION_START;
|
Default(inst); return;
|
||||||
|
|
||||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||||
gpr.Lock(a, b, d);
|
gpr.Lock(a, b, d);
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
if (b == d || a == d)
|
if (b == d || a == d)
|
||||||
gpr.LoadToX64(d, true, true);
|
gpr.LoadToX64(d, true, true);
|
||||||
else
|
else
|
||||||
gpr.LoadToX64(d, false, true);
|
gpr.LoadToX64(d, false, true);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||||
if (a)
|
if (a)
|
||||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
#if 0
|
#if 0
|
||||||
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
|
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
#else
|
#else
|
||||||
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
|
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
|
||||||
#endif
|
#endif
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::lwzx(UGeckoInstruction inst)
|
void Jit64::lwzx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStore)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||||
gpr.Lock(a, b, d);
|
gpr.Lock(a, b, d);
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
if (b == d || a == d)
|
if (b == d || a == d)
|
||||||
gpr.LoadToX64(d, true, true);
|
gpr.LoadToX64(d, true, true);
|
||||||
else
|
else
|
||||||
gpr.LoadToX64(d, false, true);
|
gpr.LoadToX64(d, false, true);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||||
if (a)
|
if (a)
|
||||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
#if 1
|
#if 1
|
||||||
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
|
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
#else
|
#else
|
||||||
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false);
|
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false);
|
||||||
#endif
|
#endif
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::lhax(UGeckoInstruction inst)
|
void Jit64::lhax(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(LoadStore)
|
||||||
|
|
||||||
|
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||||
|
gpr.Lock(a, b, d);
|
||||||
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
|
if (b == d || a == d)
|
||||||
|
gpr.LoadToX64(d, true, true);
|
||||||
|
else
|
||||||
|
gpr.LoadToX64(d, false, true);
|
||||||
|
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||||
|
if (a)
|
||||||
|
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
|
|
||||||
|
// Some homebrew actually loads from a hw reg with this instruction
|
||||||
|
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
|
||||||
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
|
||||||
|
gpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::lXz(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(LoadStore)
|
||||||
|
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
|
||||||
|
Default(inst); return;
|
||||||
|
|
||||||
|
int d = inst.RD;
|
||||||
|
int a = inst.RA;
|
||||||
|
|
||||||
|
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||||
|
// Will give nice boost to dual core mode
|
||||||
|
// (mb2): I agree,
|
||||||
|
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
|
||||||
|
// ... maybe the throttle one already do that :p
|
||||||
|
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
|
||||||
|
if (Core::GetStartupParameter().bSkipIdle &&
|
||||||
|
inst.OPCD == 32 &&
|
||||||
|
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
|
||||||
|
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
|
||||||
|
(Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
|
||||||
|
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
// TODO(LinesPrower):
|
||||||
{Default(inst); return;} // turn off from debugger
|
// - Rewrite this!
|
||||||
INSTRUCTION_START;
|
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
|
||||||
|
// It only demonstrates the idea
|
||||||
|
|
||||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
// do our job at first
|
||||||
gpr.Lock(a, b, d);
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
if (b == d || a == d)
|
gpr.Lock(d, a);
|
||||||
gpr.LoadToX64(d, true, true);
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
else
|
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
||||||
gpr.LoadToX64(d, false, true);
|
gpr.LoadToX64(d, false, true);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
|
||||||
if (a)
|
|
||||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
|
||||||
|
|
||||||
// Some homebrew actually loads from a hw reg with this instruction
|
|
||||||
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
|
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
|
||||||
|
gpr.Flush(FLUSH_ALL);
|
||||||
|
|
||||||
|
// if it's still 0, we can wait until the next event
|
||||||
|
CMP(32, R(RAX), Imm32(0));
|
||||||
|
FixupBranch noIdle = J_CC(CC_NE);
|
||||||
|
|
||||||
|
gpr.Flush(FLUSH_ALL);
|
||||||
|
fpr.Flush(FLUSH_ALL);
|
||||||
|
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
||||||
|
|
||||||
|
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
|
||||||
|
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
|
||||||
|
JMP(asm_routines.testExceptions, true);
|
||||||
|
|
||||||
|
SetJumpTarget(noIdle);
|
||||||
|
|
||||||
|
//js.compilerPC += 8;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::lXz(UGeckoInstruction inst)
|
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
|
||||||
|
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
|
||||||
|
|
||||||
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
|
if (!a)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff
|
Default(inst);
|
||||||
|| Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
|
return;
|
||||||
{Default(inst); return;} // turn off from debugger
|
}
|
||||||
INSTRUCTION_START;
|
int accessSize;
|
||||||
|
switch (inst.OPCD)
|
||||||
int d = inst.RD;
|
{
|
||||||
int a = inst.RA;
|
case 32:
|
||||||
|
accessSize = 32;
|
||||||
|
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
|
||||||
|
break; //lwz
|
||||||
|
case 40: accessSize = 16; break; //lhz
|
||||||
|
case 34: accessSize = 8; break; //lbz
|
||||||
|
default:
|
||||||
|
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
|
||||||
|
PanicAlert("lXz: invalid access size");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
//Still here? Do regular path.
|
||||||
// Will give nice boost to dual core mode
|
|
||||||
// (mb2): I agree,
|
|
||||||
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
|
|
||||||
// ... maybe the throttle one already do that :p
|
|
||||||
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
|
|
||||||
if (Core::GetStartupParameter().bSkipIdle &&
|
|
||||||
inst.OPCD == 32 &&
|
|
||||||
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
|
|
||||||
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
|
|
||||||
(Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
|
|
||||||
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
|
|
||||||
{
|
|
||||||
// TODO(LinesPrower):
|
|
||||||
// - Rewrite this!
|
|
||||||
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
|
|
||||||
// It only demonstrates the idea
|
|
||||||
|
|
||||||
// do our job at first
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
|
||||||
gpr.Lock(d, a);
|
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
|
||||||
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
|
||||||
gpr.LoadToX64(d, false, true);
|
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
|
|
||||||
gpr.Flush(FLUSH_ALL);
|
|
||||||
|
|
||||||
// if it's still 0, we can wait until the next event
|
|
||||||
CMP(32, R(RAX), Imm32(0));
|
|
||||||
FixupBranch noIdle = J_CC(CC_NE);
|
|
||||||
|
|
||||||
gpr.Flush(FLUSH_ALL);
|
|
||||||
fpr.Flush(FLUSH_ALL);
|
|
||||||
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
|
||||||
|
|
||||||
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
|
|
||||||
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
|
|
||||||
JMP(asm_routines.testExceptions, true);
|
|
||||||
|
|
||||||
SetJumpTarget(noIdle);
|
|
||||||
|
|
||||||
//js.compilerPC += 8;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
|
|
||||||
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
|
|
||||||
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
|
||||||
if (!a)
|
|
||||||
{
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
int accessSize;
|
|
||||||
switch (inst.OPCD)
|
|
||||||
{
|
|
||||||
case 32:
|
|
||||||
accessSize = 32;
|
|
||||||
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
|
|
||||||
break; //lwz
|
|
||||||
case 40: accessSize = 16; break; //lhz
|
|
||||||
case 34: accessSize = 8; break; //lbz
|
|
||||||
default:
|
|
||||||
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
|
|
||||||
PanicAlert("lXz: invalid access size");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Still here? Do regular path.
|
|
||||||
#if defined(_M_X64)
|
#if defined(_M_X64)
|
||||||
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
|
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
|
||||||
#else
|
#else
|
||||||
if (true) {
|
if (true) {
|
||||||
#endif
|
#endif
|
||||||
// Safe and boring
|
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
|
||||||
gpr.Lock(d, a);
|
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
|
||||||
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
|
|
||||||
gpr.LoadToX64(d, false, true);
|
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fast and daring
|
|
||||||
gpr.Lock(a, d);
|
|
||||||
gpr.LoadToX64(a, true, false);
|
|
||||||
gpr.LoadToX64(d, a == d, true);
|
|
||||||
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
|
|
||||||
switch (accessSize) {
|
|
||||||
case 32:
|
|
||||||
BSWAP(32, gpr.R(d).GetSimpleReg());
|
|
||||||
break;
|
|
||||||
// Careful in the backpatch - need to properly nop over first
|
|
||||||
// case 16:
|
|
||||||
// BSWAP(32, gpr.R(d).GetSimpleReg());
|
|
||||||
// SHR(32, gpr.R(d), Imm8(16));
|
|
||||||
// break;
|
|
||||||
}
|
|
||||||
gpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::lha(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int d = inst.RD;
|
|
||||||
int a = inst.RA;
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
|
||||||
// Safe and boring
|
// Safe and boring
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
gpr.Lock(d, a);
|
gpr.Lock(d, a);
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
|
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
|
||||||
gpr.LoadToX64(d, d == a, true);
|
gpr.LoadToX64(d, false, true);
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::lwzux(UGeckoInstruction inst)
|
// Fast and daring
|
||||||
|
gpr.Lock(a, d);
|
||||||
|
gpr.LoadToX64(a, true, false);
|
||||||
|
gpr.LoadToX64(d, a == d, true);
|
||||||
|
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
|
||||||
|
switch (accessSize) {
|
||||||
|
case 32:
|
||||||
|
BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||||
|
break;
|
||||||
|
// Careful in the backpatch - need to properly nop over first
|
||||||
|
// case 16:
|
||||||
|
// BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||||
|
// SHR(32, gpr.R(d), Imm8(16));
|
||||||
|
// break;
|
||||||
|
}
|
||||||
|
gpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::lha(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(LoadStore)
|
||||||
|
|
||||||
|
int d = inst.RD;
|
||||||
|
int a = inst.RA;
|
||||||
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
|
// Safe and boring
|
||||||
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
|
gpr.Lock(d, a);
|
||||||
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
|
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
|
||||||
|
gpr.LoadToX64(d, d == a, true);
|
||||||
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
gpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::lwzux(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(LoadStore)
|
||||||
|
|
||||||
|
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||||
|
if (!a || a == d || a == b)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
Default(inst);
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
|
||||||
if (!a || a == d || a == b)
|
|
||||||
{
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
gpr.Lock(a, b, d);
|
|
||||||
|
|
||||||
gpr.LoadToX64(d, b == d, true);
|
|
||||||
gpr.LoadToX64(a, true, true);
|
|
||||||
ADD(32, gpr.R(a), gpr.R(b));
|
|
||||||
MOV(32, R(EAX), gpr.R(a));
|
|
||||||
SafeLoadRegToEAX(EAX, 32, 0, false);
|
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
gpr.Lock(a, b, d);
|
||||||
|
|
||||||
// Zero cache line.
|
gpr.LoadToX64(d, b == d, true);
|
||||||
void Jit64::dcbz(UGeckoInstruction inst)
|
gpr.LoadToX64(a, true, true);
|
||||||
{
|
ADD(32, gpr.R(a), gpr.R(b));
|
||||||
Default(inst); return;
|
MOV(32, R(EAX), gpr.R(a));
|
||||||
|
SafeLoadRegToEAX(EAX, 32, 0, false);
|
||||||
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
gpr.UnlockAll();
|
||||||
{Default(inst); return;} // turn off from debugger
|
return;
|
||||||
INSTRUCTION_START;
|
}
|
||||||
|
|
||||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
// Zero cache line.
|
||||||
if (inst.RA)
|
void Jit64::dcbz(UGeckoInstruction inst)
|
||||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
{
|
||||||
AND(32, R(EAX), Imm32(~31));
|
INSTRUCTION_START
|
||||||
XORPD(XMM0, R(XMM0));
|
JITDISABLE(LoadStore)
|
||||||
|
|
||||||
|
Default(inst); return;
|
||||||
|
|
||||||
|
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||||
|
if (inst.RA)
|
||||||
|
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||||
|
AND(32, R(EAX), Imm32(~31));
|
||||||
|
XORPD(XMM0, R(XMM0));
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||||
#else
|
#else
|
||||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
|
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
|
||||||
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
|
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::stX(UGeckoInstruction inst)
|
void Jit64::stX(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(LoadStore)
|
||||||
|
|
||||||
|
int s = inst.RS;
|
||||||
|
int a = inst.RA;
|
||||||
|
|
||||||
|
bool update = inst.OPCD & 1;
|
||||||
|
|
||||||
|
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||||
|
if (a || update)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
int accessSize;
|
||||||
{Default(inst); return;} // turn off from debugger
|
switch (inst.OPCD & ~1)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int s = inst.RS;
|
|
||||||
int a = inst.RA;
|
|
||||||
|
|
||||||
bool update = inst.OPCD & 1;
|
|
||||||
|
|
||||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
|
||||||
if (a || update)
|
|
||||||
{
|
{
|
||||||
int accessSize;
|
case 36: accessSize = 32; break; //stw
|
||||||
switch (inst.OPCD & ~1)
|
case 44: accessSize = 16; break; //sth
|
||||||
{
|
case 38: accessSize = 8; break; //stb
|
||||||
case 36: accessSize = 32; break; //stw
|
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||||
case 44: accessSize = 16; break; //sth
|
}
|
||||||
case 38: accessSize = 8; break; //stb
|
|
||||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gpr.R(a).IsImm())
|
if (gpr.R(a).IsImm())
|
||||||
|
{
|
||||||
|
// If we already know the address through constant folding, we can do some
|
||||||
|
// fun tricks...
|
||||||
|
u32 addr = (u32)gpr.R(a).offset;
|
||||||
|
addr += offset;
|
||||||
|
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
||||||
{
|
{
|
||||||
// If we already know the address through constant folding, we can do some
|
if (offset && update)
|
||||||
// fun tricks...
|
gpr.SetImmediate32(a, addr);
|
||||||
u32 addr = (u32)gpr.R(a).offset;
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
addr += offset;
|
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
||||||
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
if (offset && update)
|
|
||||||
gpr.SetImmediate32(a, addr);
|
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
|
||||||
switch (accessSize)
|
|
||||||
{
|
|
||||||
// No need to protect these, they don't touch any state
|
// No need to protect these, they don't touch any state
|
||||||
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
||||||
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break;
|
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break;
|
||||||
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break;
|
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break;
|
||||||
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break;
|
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break;
|
||||||
}
|
|
||||||
js.fifoBytesThisBlock += accessSize >> 3;
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
js.fifoBytesThisBlock += accessSize >> 3;
|
||||||
{
|
|
||||||
if (offset && update)
|
|
||||||
gpr.SetImmediate32(a, addr);
|
|
||||||
MOV(accessSize, R(EAX), gpr.R(s));
|
|
||||||
BSWAP(accessSize, EAX);
|
|
||||||
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Other IO not worth the trouble.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Optimized stack access?
|
|
||||||
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
|
|
||||||
{
|
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
|
||||||
MOV(32, R(EAX), gpr.R(s));
|
|
||||||
BSWAP(32, EAX);
|
|
||||||
#ifdef _M_X64
|
|
||||||
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
|
|
||||||
#elif _M_IX86
|
|
||||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
|
|
||||||
#endif
|
|
||||||
if (update)
|
|
||||||
ADD(32, gpr.R(a), Imm32(offset));
|
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
||||||
/* // TODO - figure out why Beyond Good and Evil hates this
|
|
||||||
#ifdef _M_X64
|
|
||||||
if (accessSize == 32 && !update && jo.enableFastMem)
|
|
||||||
{
|
{
|
||||||
// Fast and daring - requires 64-bit
|
if (offset && update)
|
||||||
MOV(32, R(EAX), gpr.R(s));
|
gpr.SetImmediate32(a, addr);
|
||||||
gpr.LoadToX64(a, true, false);
|
MOV(accessSize, R(EAX), gpr.R(s));
|
||||||
BSWAP(32, EAX);
|
BSWAP(accessSize, EAX);
|
||||||
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
|
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
#endif*/
|
// Other IO not worth the trouble.
|
||||||
|
}
|
||||||
|
|
||||||
//Still here? Do regular path.
|
// Optimized stack access?
|
||||||
gpr.Lock(s, a);
|
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
|
||||||
gpr.FlushLockX(ECX, EDX);
|
{
|
||||||
MOV(32, R(EDX), gpr.R(a));
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
MOV(32, R(ECX), gpr.R(s));
|
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||||
if (offset)
|
MOV(32, R(EAX), gpr.R(s));
|
||||||
ADD(32, R(EDX), Imm32((u32)offset));
|
BSWAP(32, EAX);
|
||||||
if (update && offset)
|
#ifdef _M_X64
|
||||||
{
|
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
|
||||||
gpr.LoadToX64(a, true, true);
|
#elif _M_IX86
|
||||||
MOV(32, gpr.R(a), R(EDX));
|
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
}
|
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
|
||||||
TEST(32, R(EDX), Imm32(0x0C000000));
|
|
||||||
FixupBranch unsafe_addr = J_CC(CC_NZ);
|
|
||||||
BSWAP(accessSize, ECX);
|
|
||||||
#ifdef _M_X64
|
|
||||||
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
|
||||||
#else
|
|
||||||
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
|
|
||||||
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
|
|
||||||
#endif
|
#endif
|
||||||
FixupBranch skip_call = J();
|
if (update)
|
||||||
SetJumpTarget(unsafe_addr);
|
ADD(32, gpr.R(a), Imm32(offset));
|
||||||
switch (accessSize)
|
|
||||||
{
|
|
||||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
|
|
||||||
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
|
|
||||||
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
|
|
||||||
}
|
|
||||||
SetJumpTarget(skip_call);
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Default(inst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::stXx(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int a = inst.RA, b = inst.RB, s = inst.RS;
|
|
||||||
if (!a || a == s || a == b)
|
|
||||||
{
|
|
||||||
Default(inst);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
gpr.Lock(a, b, s);
|
|
||||||
gpr.FlushLockX(ECX, EDX);
|
|
||||||
|
|
||||||
if (inst.SUBOP10 & 32) {
|
/* // TODO - figure out why Beyond Good and Evil hates this
|
||||||
gpr.LoadToX64(a, true, true);
|
#ifdef _M_X64
|
||||||
ADD(32, gpr.R(a), gpr.R(b));
|
if (accessSize == 32 && !update && jo.enableFastMem)
|
||||||
MOV(32, R(EDX), gpr.R(a));
|
{
|
||||||
} else {
|
// Fast and daring - requires 64-bit
|
||||||
MOV(32, R(EDX), gpr.R(a));
|
MOV(32, R(EAX), gpr.R(s));
|
||||||
ADD(32, R(EDX), gpr.R(b));
|
gpr.LoadToX64(a, true, false);
|
||||||
|
BSWAP(32, EAX);
|
||||||
|
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
unsigned accessSize;
|
#endif*/
|
||||||
switch (inst.SUBOP10 & ~32) {
|
|
||||||
|
//Still here? Do regular path.
|
||||||
|
gpr.Lock(s, a);
|
||||||
|
gpr.FlushLockX(ECX, EDX);
|
||||||
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
|
MOV(32, R(ECX), gpr.R(s));
|
||||||
|
if (offset)
|
||||||
|
ADD(32, R(EDX), Imm32((u32)offset));
|
||||||
|
if (update && offset)
|
||||||
|
{
|
||||||
|
gpr.LoadToX64(a, true, true);
|
||||||
|
MOV(32, gpr.R(a), R(EDX));
|
||||||
|
}
|
||||||
|
TEST(32, R(EDX), Imm32(0x0C000000));
|
||||||
|
FixupBranch unsafe_addr = J_CC(CC_NZ);
|
||||||
|
BSWAP(accessSize, ECX);
|
||||||
|
#ifdef _M_X64
|
||||||
|
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
||||||
|
#else
|
||||||
|
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
|
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
|
||||||
|
#endif
|
||||||
|
FixupBranch skip_call = J();
|
||||||
|
SetJumpTarget(unsafe_addr);
|
||||||
|
switch (accessSize)
|
||||||
|
{
|
||||||
|
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
|
||||||
|
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
|
||||||
|
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
|
||||||
|
}
|
||||||
|
SetJumpTarget(skip_call);
|
||||||
|
gpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Default(inst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::stXx(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(LoadStore)
|
||||||
|
|
||||||
|
int a = inst.RA, b = inst.RB, s = inst.RS;
|
||||||
|
if (!a || a == s || a == b)
|
||||||
|
{
|
||||||
|
Default(inst);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
gpr.Lock(a, b, s);
|
||||||
|
gpr.FlushLockX(ECX, EDX);
|
||||||
|
|
||||||
|
if (inst.SUBOP10 & 32) {
|
||||||
|
gpr.LoadToX64(a, true, true);
|
||||||
|
ADD(32, gpr.R(a), gpr.R(b));
|
||||||
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
|
} else {
|
||||||
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
|
ADD(32, R(EDX), gpr.R(b));
|
||||||
|
}
|
||||||
|
unsigned accessSize;
|
||||||
|
switch (inst.SUBOP10 & ~32) {
|
||||||
case 151: accessSize = 32; break;
|
case 151: accessSize = 32; break;
|
||||||
case 407: accessSize = 16; break;
|
case 407: accessSize = 16; break;
|
||||||
case 215: accessSize = 8; break;
|
case 215: accessSize = 8; break;
|
||||||
}
|
|
||||||
|
|
||||||
MOV(32, R(ECX), gpr.R(s));
|
|
||||||
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MOV(32, R(ECX), gpr.R(s));
|
||||||
|
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
|
||||||
|
|
||||||
|
gpr.UnlockAll();
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// A few games use these heavily in video codecs.
|
// A few games use these heavily in video codecs.
|
||||||
void Jit64::lmw(UGeckoInstruction inst)
|
void Jit64::lmw(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
|
@ -512,4 +505,4 @@ void Jit64::icbi(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
Default(inst);
|
Default(inst);
|
||||||
WriteExit(js.compilerPC + 4, 0);
|
WriteExit(js.compilerPC + 4, 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,9 +51,8 @@ u32 GC_ALIGNED16(temp32);
|
||||||
|
|
||||||
void Jit64::lfs(UGeckoInstruction inst)
|
void Jit64::lfs(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStoreFloating)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int d = inst.RD;
|
int d = inst.RD;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
|
@ -88,9 +87,8 @@ void Jit64::lfs(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::lfd(UGeckoInstruction inst)
|
void Jit64::lfd(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStoreFloating)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int d = inst.RD;
|
int d = inst.RD;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
|
@ -155,10 +153,8 @@ void Jit64::lfd(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::stfd(UGeckoInstruction inst)
|
void Jit64::stfd(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStoreFloating)
|
||||||
|
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
|
@ -234,9 +230,8 @@ void Jit64::stfd(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::stfs(UGeckoInstruction inst)
|
void Jit64::stfs(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStoreFloating)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
bool update = inst.OPCD & 1;
|
bool update = inst.OPCD & 1;
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
|
@ -291,9 +286,8 @@ void Jit64::stfs(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::stfsx(UGeckoInstruction inst)
|
void Jit64::stfsx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStoreFloating)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
|
@ -311,9 +305,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::lfsx(UGeckoInstruction inst)
|
void Jit64::lfsx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStoreFloating)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
fpr.Lock(inst.RS);
|
fpr.Lock(inst.RS);
|
||||||
fpr.LoadToX64(inst.RS, false, true);
|
fpr.LoadToX64(inst.RS, false, true);
|
||||||
|
|
|
@ -91,9 +91,8 @@ const double GC_ALIGNED16(m_dequantizeTableD[]) =
|
||||||
// We will have to break block after quantizers are written to.
|
// We will have to break block after quantizers are written to.
|
||||||
void Jit64::psq_st(UGeckoInstruction inst)
|
void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStorePaired)
|
||||||
INSTRUCTION_START;
|
|
||||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||||
|
|
||||||
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
||||||
|
@ -296,9 +295,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
||||||
|
|
||||||
void Jit64::psq_l(UGeckoInstruction inst)
|
void Jit64::psq_l(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(LoadStorePaired)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||||
|
|
||||||
|
|
|
@ -34,370 +34,361 @@
|
||||||
// cmppd, andpd, andnpd, or
|
// cmppd, andpd, andnpd, or
|
||||||
// lfsx, ps_merge01 etc
|
// lfsx, ps_merge01 etc
|
||||||
|
|
||||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
||||||
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
|
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
|
||||||
|
|
||||||
void Jit64::ps_mr(UGeckoInstruction inst)
|
void Jit64::ps_mr(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int b = inst.FB;
|
||||||
|
if (d == b)
|
||||||
|
return;
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_sel(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
|
||||||
|
Default(inst); return;
|
||||||
|
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
|
||||||
|
int d = inst.FD;
|
||||||
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
int c = inst.FC;
|
||||||
|
fpr.FlushLockX(XMM7);
|
||||||
|
fpr.FlushLockX(XMM6);
|
||||||
|
fpr.Lock(a, b, c, d);
|
||||||
|
fpr.LoadToX64(a, true, false);
|
||||||
|
fpr.LoadToX64(d, false, true);
|
||||||
|
// BLENDPD would have been nice...
|
||||||
|
MOVAPD(XMM7, fpr.R(a));
|
||||||
|
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
|
||||||
|
MOVAPD(XMM6, R(XMM7));
|
||||||
|
ANDPD(XMM7, fpr.R(d));
|
||||||
|
ANDNPD(XMM6, fpr.R(c));
|
||||||
|
MOVAPD(fpr.RX(d), R(XMM7));
|
||||||
|
ORPD(fpr.RX(d), R(XMM6));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
fpr.UnlockAllX();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_sign(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int b = inst.FB;
|
||||||
|
|
||||||
|
fpr.Lock(d, b);
|
||||||
|
if (d != b)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
if (d == b)
|
|
||||||
return;
|
|
||||||
fpr.LoadToX64(d, false);
|
fpr.LoadToX64(d, false);
|
||||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
void Jit64::ps_sel(UGeckoInstruction inst)
|
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
fpr.LoadToX64(d, true);
|
||||||
{Default(inst); return;} // turn off from debugger
|
}
|
||||||
INSTRUCTION_START;
|
|
||||||
|
switch (inst.SUBOP10)
|
||||||
|
{
|
||||||
|
case 40: //neg
|
||||||
|
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||||
|
break;
|
||||||
|
case 136: //nabs
|
||||||
|
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||||
|
break;
|
||||||
|
case 264: //abs
|
||||||
|
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int b = inst.FB;
|
||||||
|
fpr.Lock(d, b);
|
||||||
|
SQRTPD(XMM0, fpr.R(b));
|
||||||
|
MOVAPD(XMM1, M((void*)&psOneOne));
|
||||||
|
DIVPD(XMM1, R(XMM0));
|
||||||
|
MOVAPD(fpr.R(d), XMM1);
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
//add a, b, c
|
||||||
|
|
||||||
|
//mov a, b
|
||||||
|
//add a, c
|
||||||
|
//we need:
|
||||||
|
/*
|
||||||
|
psq_l
|
||||||
|
psq_stu
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
add a,b,a
|
||||||
|
*/
|
||||||
|
|
||||||
|
//There's still a little bit more optimization that can be squeezed out of this
|
||||||
|
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
||||||
|
{
|
||||||
|
fpr.Lock(d, a, b);
|
||||||
|
|
||||||
|
if (d == a)
|
||||||
|
{
|
||||||
|
fpr.LoadToX64(d, true);
|
||||||
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
|
}
|
||||||
|
else if (d == b && reversible)
|
||||||
|
{
|
||||||
|
fpr.LoadToX64(d, true);
|
||||||
|
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||||
|
}
|
||||||
|
else if (a != d && b != d)
|
||||||
|
{
|
||||||
|
//sources different from d, can use rather quick solution
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||||
|
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||||
|
}
|
||||||
|
else if (b != d)
|
||||||
|
{
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
MOVAPD(XMM0, fpr.R(b));
|
||||||
|
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||||
|
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||||
|
}
|
||||||
|
else //Other combo, must use two temps :(
|
||||||
|
{
|
||||||
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
|
MOVAPD(XMM1, fpr.R(b));
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
(this->*op)(XMM0, Gen::R(XMM1));
|
||||||
|
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||||
|
}
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::ps_arith(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
||||||
|
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
||||||
|
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
||||||
|
case 23://sel
|
||||||
Default(inst);
|
Default(inst);
|
||||||
return;
|
break;
|
||||||
|
case 24://res
|
||||||
if (inst.Rc) {
|
Default(inst);
|
||||||
Default(inst); return;
|
break;
|
||||||
}
|
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
||||||
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
|
default:
|
||||||
int d = inst.FD;
|
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
int c = inst.FC;
|
|
||||||
fpr.FlushLockX(XMM7);
|
|
||||||
fpr.FlushLockX(XMM6);
|
|
||||||
fpr.Lock(a, b, c, d);
|
|
||||||
fpr.LoadToX64(a, true, false);
|
|
||||||
fpr.LoadToX64(d, false, true);
|
|
||||||
// BLENDPD would have been nice...
|
|
||||||
MOVAPD(XMM7, fpr.R(a));
|
|
||||||
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
|
|
||||||
MOVAPD(XMM6, R(XMM7));
|
|
||||||
ANDPD(XMM7, fpr.R(d));
|
|
||||||
ANDNPD(XMM6, fpr.R(c));
|
|
||||||
MOVAPD(fpr.RX(d), R(XMM7));
|
|
||||||
ORPD(fpr.RX(d), R(XMM6));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
fpr.UnlockAllX();
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::ps_sign(UGeckoInstruction inst)
|
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(Paired)
|
||||||
INSTRUCTION_START;
|
if (inst.Rc) {
|
||||||
if (inst.Rc) {
|
Default(inst); return;
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int b = inst.FB;
|
|
||||||
|
|
||||||
fpr.Lock(d, b);
|
|
||||||
if (d != b)
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (inst.SUBOP10)
|
|
||||||
{
|
|
||||||
case 40: //neg
|
|
||||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
|
||||||
break;
|
|
||||||
case 136: //nabs
|
|
||||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
|
||||||
break;
|
|
||||||
case 264: //abs
|
|
||||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
int d = inst.FD;
|
||||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
int c = inst.FC;
|
||||||
|
fpr.Lock(a,b,c,d);
|
||||||
|
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
||||||
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
case 10:
|
||||||
{Default(inst); return;} // turn off from debugger
|
// Do the sum in upper subregisters, merge uppers
|
||||||
INSTRUCTION_START;
|
MOVDDUP(XMM0, fpr.R(a));
|
||||||
if (inst.Rc) {
|
MOVAPD(XMM1, fpr.R(b));
|
||||||
Default(inst); return;
|
ADDPD(XMM0, R(XMM1));
|
||||||
}
|
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
||||||
int d = inst.FD;
|
MOVAPD(fpr.R(d), XMM0);
|
||||||
int b = inst.FB;
|
break;
|
||||||
fpr.Lock(d, b);
|
case 11:
|
||||||
SQRTPD(XMM0, fpr.R(b));
|
// Do the sum in lower subregisters, merge lowers
|
||||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
DIVPD(XMM1, R(XMM0));
|
MOVAPD(XMM1, fpr.R(b));
|
||||||
|
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
||||||
|
ADDPD(XMM0, R(XMM1)); // sum lowers
|
||||||
|
MOVAPD(XMM1, fpr.R(c));
|
||||||
|
UNPCKLPD(XMM1, R(XMM0)); // merge
|
||||||
MOVAPD(fpr.R(d), XMM1);
|
MOVAPD(fpr.R(d), XMM1);
|
||||||
fpr.UnlockAll();
|
break;
|
||||||
|
default:
|
||||||
|
PanicAlert("ps_sum WTF!!!");
|
||||||
}
|
}
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
//add a, b, c
|
|
||||||
|
|
||||||
//mov a, b
|
|
||||||
//add a, c
|
|
||||||
//we need:
|
|
||||||
/*
|
|
||||||
psq_l
|
|
||||||
psq_stu
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
add a,b,a
|
|
||||||
*/
|
|
||||||
|
|
||||||
//There's still a little bit more optimization that can be squeezed out of this
|
void Jit64::ps_muls(UGeckoInstruction inst)
|
||||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int a = inst.FA;
|
||||||
|
int c = inst.FC;
|
||||||
|
fpr.Lock(a, c, d);
|
||||||
|
fpr.LoadToX64(d, d == a || d == c, true);
|
||||||
|
switch (inst.SUBOP5)
|
||||||
{
|
{
|
||||||
fpr.Lock(d, a, b);
|
case 12:
|
||||||
|
// Single multiply scalar high
|
||||||
if (d == a)
|
// TODO - faster version for when regs are different
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, true);
|
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
|
||||||
else if (d == b && reversible)
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, true);
|
|
||||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
|
||||||
}
|
|
||||||
else if (a != d && b != d)
|
|
||||||
{
|
|
||||||
//sources different from d, can use rather quick solution
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
|
||||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
|
||||||
}
|
|
||||||
else if (b != d)
|
|
||||||
{
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(XMM0, fpr.R(b));
|
|
||||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
|
||||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
|
||||||
}
|
|
||||||
else //Other combo, must use two temps :(
|
|
||||||
{
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
|
||||||
MOVAPD(XMM1, fpr.R(b));
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
(this->*op)(XMM0, Gen::R(XMM1));
|
|
||||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
|
||||||
}
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::ps_arith(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
switch (inst.SUBOP5)
|
|
||||||
{
|
|
||||||
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
|
||||||
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
|
||||||
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
|
||||||
case 23://sel
|
|
||||||
Default(inst);
|
|
||||||
break;
|
|
||||||
case 24://res
|
|
||||||
Default(inst);
|
|
||||||
break;
|
|
||||||
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
|
||||||
default:
|
|
||||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
int c = inst.FC;
|
|
||||||
fpr.Lock(a,b,c,d);
|
|
||||||
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
|
||||||
switch (inst.SUBOP5)
|
|
||||||
{
|
|
||||||
case 10:
|
|
||||||
// Do the sum in upper subregisters, merge uppers
|
|
||||||
MOVDDUP(XMM0, fpr.R(a));
|
|
||||||
MOVAPD(XMM1, fpr.R(b));
|
|
||||||
ADDPD(XMM0, R(XMM1));
|
|
||||||
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
|
||||||
MOVAPD(fpr.R(d), XMM0);
|
|
||||||
break;
|
|
||||||
case 11:
|
|
||||||
// Do the sum in lower subregisters, merge lowers
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
|
||||||
MOVAPD(XMM1, fpr.R(b));
|
|
||||||
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
|
||||||
ADDPD(XMM0, R(XMM1)); // sum lowers
|
|
||||||
MOVAPD(XMM1, fpr.R(c));
|
|
||||||
UNPCKLPD(XMM1, R(XMM0)); // merge
|
|
||||||
MOVAPD(fpr.R(d), XMM1);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
PanicAlert("ps_sum WTF!!!");
|
|
||||||
}
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void Jit64::ps_muls(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int a = inst.FA;
|
|
||||||
int c = inst.FC;
|
|
||||||
fpr.Lock(a, c, d);
|
|
||||||
fpr.LoadToX64(d, d == a || d == c, true);
|
|
||||||
switch (inst.SUBOP5)
|
|
||||||
{
|
|
||||||
case 12:
|
|
||||||
// Single multiply scalar high
|
|
||||||
// TODO - faster version for when regs are different
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
|
||||||
MOVDDUP(XMM1, fpr.R(c));
|
|
||||||
MULPD(XMM0, R(XMM1));
|
|
||||||
MOVAPD(fpr.R(d), XMM0);
|
|
||||||
break;
|
|
||||||
case 13:
|
|
||||||
// TODO - faster version for when regs are different
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
|
||||||
MOVAPD(XMM1, fpr.R(c));
|
|
||||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
|
||||||
MULPD(XMM0, R(XMM1));
|
|
||||||
MOVAPD(fpr.R(d), XMM0);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
PanicAlert("ps_muls WTF!!!");
|
|
||||||
}
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
|
||||||
void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int d = inst.FD;
|
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
fpr.Lock(a,b,d);
|
|
||||||
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
switch (inst.SUBOP10)
|
MOVDDUP(XMM1, fpr.R(c));
|
||||||
{
|
MULPD(XMM0, R(XMM1));
|
||||||
case 528:
|
MOVAPD(fpr.R(d), XMM0);
|
||||||
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf
|
break;
|
||||||
break; //00
|
case 13:
|
||||||
case 560:
|
// TODO - faster version for when regs are different
|
||||||
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
|
|
||||||
break; //01
|
|
||||||
case 592:
|
|
||||||
SHUFPD(XMM0, fpr.R(b), 1);
|
|
||||||
break; //10
|
|
||||||
case 624:
|
|
||||||
UNPCKHPD(XMM0, fpr.R(b));
|
|
||||||
break; //11
|
|
||||||
default:
|
|
||||||
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
|
|
||||||
}
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//TODO: add optimized cases
|
|
||||||
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
if (inst.Rc) {
|
|
||||||
Default(inst); return;
|
|
||||||
}
|
|
||||||
int a = inst.FA;
|
|
||||||
int b = inst.FB;
|
|
||||||
int c = inst.FC;
|
|
||||||
int d = inst.FD;
|
|
||||||
fpr.Lock(a,b,c,d);
|
|
||||||
|
|
||||||
MOVAPD(XMM0, fpr.R(a));
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
switch (inst.SUBOP5)
|
MOVAPD(XMM1, fpr.R(c));
|
||||||
{
|
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||||
case 14: //madds0
|
MULPD(XMM0, R(XMM1));
|
||||||
MOVDDUP(XMM1, fpr.R(c));
|
MOVAPD(fpr.R(d), XMM0);
|
||||||
MULPD(XMM0, R(XMM1));
|
break;
|
||||||
ADDPD(XMM0, fpr.R(b));
|
default:
|
||||||
break;
|
PanicAlert("ps_muls WTF!!!");
|
||||||
case 15: //madds1
|
|
||||||
MOVAPD(XMM1, fpr.R(c));
|
|
||||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
|
||||||
MULPD(XMM0, R(XMM1));
|
|
||||||
ADDPD(XMM0, fpr.R(b));
|
|
||||||
break;
|
|
||||||
case 28: //msub
|
|
||||||
MULPD(XMM0, fpr.R(c));
|
|
||||||
SUBPD(XMM0, fpr.R(b));
|
|
||||||
break;
|
|
||||||
case 29: //madd
|
|
||||||
MULPD(XMM0, fpr.R(c));
|
|
||||||
ADDPD(XMM0, fpr.R(b));
|
|
||||||
break;
|
|
||||||
case 30: //nmsub
|
|
||||||
MULPD(XMM0, fpr.R(c));
|
|
||||||
SUBPD(XMM0, fpr.R(b));
|
|
||||||
XORPD(XMM0, M((void*)&psSignBits));
|
|
||||||
break;
|
|
||||||
case 31: //nmadd
|
|
||||||
MULPD(XMM0, fpr.R(c));
|
|
||||||
ADDPD(XMM0, fpr.R(b));
|
|
||||||
XORPD(XMM0, M((void*)&psSignBits));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
|
||||||
//Default(inst);
|
|
||||||
//fpr.UnlockAll();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
fpr.LoadToX64(d, false);
|
|
||||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
|
||||||
ForceSinglePrecisionP(fpr.RX(d));
|
|
||||||
fpr.UnlockAll();
|
|
||||||
}
|
}
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
||||||
|
void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int d = inst.FD;
|
||||||
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
fpr.Lock(a,b,d);
|
||||||
|
|
||||||
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
|
switch (inst.SUBOP10)
|
||||||
|
{
|
||||||
|
case 528:
|
||||||
|
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf
|
||||||
|
break; //00
|
||||||
|
case 560:
|
||||||
|
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
|
||||||
|
break; //01
|
||||||
|
case 592:
|
||||||
|
SHUFPD(XMM0, fpr.R(b), 1);
|
||||||
|
break; //10
|
||||||
|
case 624:
|
||||||
|
UNPCKHPD(XMM0, fpr.R(b));
|
||||||
|
break; //11
|
||||||
|
default:
|
||||||
|
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
|
||||||
|
}
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//TODO: add optimized cases
|
||||||
|
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(Paired)
|
||||||
|
if (inst.Rc) {
|
||||||
|
Default(inst); return;
|
||||||
|
}
|
||||||
|
int a = inst.FA;
|
||||||
|
int b = inst.FB;
|
||||||
|
int c = inst.FC;
|
||||||
|
int d = inst.FD;
|
||||||
|
fpr.Lock(a,b,c,d);
|
||||||
|
|
||||||
|
MOVAPD(XMM0, fpr.R(a));
|
||||||
|
switch (inst.SUBOP5)
|
||||||
|
{
|
||||||
|
case 14: //madds0
|
||||||
|
MOVDDUP(XMM1, fpr.R(c));
|
||||||
|
MULPD(XMM0, R(XMM1));
|
||||||
|
ADDPD(XMM0, fpr.R(b));
|
||||||
|
break;
|
||||||
|
case 15: //madds1
|
||||||
|
MOVAPD(XMM1, fpr.R(c));
|
||||||
|
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||||
|
MULPD(XMM0, R(XMM1));
|
||||||
|
ADDPD(XMM0, fpr.R(b));
|
||||||
|
break;
|
||||||
|
case 28: //msub
|
||||||
|
MULPD(XMM0, fpr.R(c));
|
||||||
|
SUBPD(XMM0, fpr.R(b));
|
||||||
|
break;
|
||||||
|
case 29: //madd
|
||||||
|
MULPD(XMM0, fpr.R(c));
|
||||||
|
ADDPD(XMM0, fpr.R(b));
|
||||||
|
break;
|
||||||
|
case 30: //nmsub
|
||||||
|
MULPD(XMM0, fpr.R(c));
|
||||||
|
SUBPD(XMM0, fpr.R(b));
|
||||||
|
XORPD(XMM0, M((void*)&psSignBits));
|
||||||
|
break;
|
||||||
|
case 31: //nmadd
|
||||||
|
MULPD(XMM0, fpr.R(c));
|
||||||
|
ADDPD(XMM0, fpr.R(b));
|
||||||
|
XORPD(XMM0, M((void*)&psSignBits));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||||
|
//Default(inst);
|
||||||
|
//fpr.UnlockAll();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fpr.LoadToX64(d, false);
|
||||||
|
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||||
|
ForceSinglePrecisionP(fpr.RX(d));
|
||||||
|
fpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
|
@ -29,172 +29,165 @@
|
||||||
#include "Jit.h"
|
#include "Jit.h"
|
||||||
#include "JitRegCache.h"
|
#include "JitRegCache.h"
|
||||||
|
|
||||||
void Jit64::mtspr(UGeckoInstruction inst)
|
void Jit64::mtspr(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(SystemRegisters)
|
||||||
|
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
|
int d = inst.RD;
|
||||||
|
|
||||||
|
switch (iIndex)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
case SPR_LR:
|
||||||
{Default(inst); return;} // turn off from debugger
|
case SPR_CTR:
|
||||||
INSTRUCTION_START;
|
case SPR_XER:
|
||||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
// These are safe to do the easy way, see the bottom of this function.
|
||||||
int d = inst.RD;
|
break;
|
||||||
|
|
||||||
switch (iIndex)
|
case SPR_GQR0:
|
||||||
|
case SPR_GQR0 + 1:
|
||||||
|
case SPR_GQR0 + 2:
|
||||||
|
case SPR_GQR0 + 3:
|
||||||
|
case SPR_GQR0 + 4:
|
||||||
|
case SPR_GQR0 + 5:
|
||||||
|
case SPR_GQR0 + 6:
|
||||||
|
case SPR_GQR0 + 7:
|
||||||
|
js.blockSetsQuantizers = true;
|
||||||
|
// Prevent recompiler from compiling in old quantizer values.
|
||||||
|
// If the value changed, destroy all blocks using this quantizer
|
||||||
|
// This will create a little bit of block churn, but hopefully not too bad.
|
||||||
{
|
{
|
||||||
case SPR_LR:
|
/*
|
||||||
case SPR_CTR:
|
|
||||||
case SPR_XER:
|
|
||||||
// These are safe to do the easy way, see the bottom of this function.
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SPR_GQR0:
|
|
||||||
case SPR_GQR0 + 1:
|
|
||||||
case SPR_GQR0 + 2:
|
|
||||||
case SPR_GQR0 + 3:
|
|
||||||
case SPR_GQR0 + 4:
|
|
||||||
case SPR_GQR0 + 5:
|
|
||||||
case SPR_GQR0 + 6:
|
|
||||||
case SPR_GQR0 + 7:
|
|
||||||
js.blockSetsQuantizers = true;
|
|
||||||
// Prevent recompiler from compiling in old quantizer values.
|
|
||||||
// If the value changed, destroy all blocks using this quantizer
|
|
||||||
// This will create a little bit of block churn, but hopefully not too bad.
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value
|
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value
|
||||||
CMP(32, R(EAX), gpr.R(inst.RD));
|
CMP(32, R(EAX), gpr.R(inst.RD));
|
||||||
FixupBranch skip_destroy = J_CC(CC_E, false);
|
FixupBranch skip_destroy = J_CC(CC_E, false);
|
||||||
int gqr = iIndex - SPR_GQR0;
|
int gqr = iIndex - SPR_GQR0;
|
||||||
ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr);
|
ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr);
|
||||||
SetJumpTarget(skip_destroy);*/
|
SetJumpTarget(skip_destroy);*/
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
// TODO - break block if quantizers are written to.
|
// TODO - break block if quantizers are written to.
|
||||||
default:
|
default:
|
||||||
Default(inst);
|
Default(inst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// OK, this is easy.
|
// OK, this is easy.
|
||||||
|
gpr.Lock(d);
|
||||||
|
gpr.LoadToX64(d, true);
|
||||||
|
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d));
|
||||||
|
gpr.UnlockAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
|
JITDISABLE(SystemRegisters)
|
||||||
|
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
|
int d = inst.RD;
|
||||||
|
switch (iIndex)
|
||||||
|
{
|
||||||
|
case SPR_WPAR:
|
||||||
|
Default(inst);
|
||||||
|
return;
|
||||||
|
// case SPR_DEC:
|
||||||
|
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
|
||||||
|
//break;
|
||||||
|
case SPR_TL:
|
||||||
|
case SPR_TU:
|
||||||
|
//CALL((void Jit64::*)&CoreTiming::Advance);
|
||||||
|
// fall through
|
||||||
|
default:
|
||||||
gpr.Lock(d);
|
gpr.Lock(d);
|
||||||
gpr.LoadToX64(d, true);
|
gpr.LoadToX64(d, false);
|
||||||
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d));
|
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
void Jit64::mfspr(UGeckoInstruction inst)
|
|
||||||
{
|
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
|
||||||
{Default(inst); return;} // turn off from debugger
|
|
||||||
INSTRUCTION_START;
|
|
||||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
|
||||||
int d = inst.RD;
|
|
||||||
switch (iIndex)
|
|
||||||
{
|
|
||||||
case SPR_WPAR:
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
// case SPR_DEC:
|
|
||||||
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
|
|
||||||
//break;
|
|
||||||
case SPR_TL:
|
|
||||||
case SPR_TU:
|
|
||||||
//CALL((void Jit64::*)&CoreTiming::Advance);
|
|
||||||
// fall through
|
|
||||||
default:
|
|
||||||
gpr.Lock(d);
|
|
||||||
gpr.LoadToX64(d, false);
|
|
||||||
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
|
|
||||||
gpr.UnlockAll();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// =======================================================================================
|
// =======================================================================================
|
||||||
// Don't interpret this, if we do we get thrown out
|
// Don't interpret this, if we do we get thrown out
|
||||||
// --------------
|
// --------------
|
||||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(SystemRegisters)
|
||||||
INSTRUCTION_START;
|
gpr.LoadToX64(inst.RS, true, false);
|
||||||
gpr.LoadToX64(inst.RS, true, false);
|
MOV(32, M(&MSR), gpr.R(inst.RS));
|
||||||
MOV(32, M(&MSR), gpr.R(inst.RS));
|
gpr.Flush(FLUSH_ALL);
|
||||||
gpr.Flush(FLUSH_ALL);
|
fpr.Flush(FLUSH_ALL);
|
||||||
fpr.Flush(FLUSH_ALL);
|
WriteExit(js.compilerPC + 4, 0);
|
||||||
WriteExit(js.compilerPC + 4, 0);
|
}
|
||||||
}
|
// ==============
|
||||||
// ==============
|
|
||||||
|
|
||||||
|
|
||||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(SystemRegisters)
|
||||||
INSTRUCTION_START;
|
//Privileged?
|
||||||
//Privileged?
|
gpr.LoadToX64(inst.RD, false);
|
||||||
gpr.LoadToX64(inst.RD, false);
|
MOV(32, gpr.R(inst.RD), M(&MSR));
|
||||||
MOV(32, gpr.R(inst.RD), M(&MSR));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::mftb(UGeckoInstruction inst)
|
void Jit64::mftb(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(SystemRegisters)
|
||||||
INSTRUCTION_START;
|
mfspr(inst);
|
||||||
mfspr(inst);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void Jit64::mfcr(UGeckoInstruction inst)
|
void Jit64::mfcr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(SystemRegisters)
|
||||||
INSTRUCTION_START;
|
// USES_CR
|
||||||
// USES_CR
|
int d = inst.RD;
|
||||||
int d = inst.RD;
|
gpr.LoadToX64(d, false, true);
|
||||||
gpr.LoadToX64(d, false, true);
|
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
SHL(32, R(EAX), Imm8(4));
|
||||||
|
for (int i = 1; i < 7; i++) {
|
||||||
|
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||||
SHL(32, R(EAX), Imm8(4));
|
SHL(32, R(EAX), Imm8(4));
|
||||||
for (int i = 1; i < 7; i++) {
|
|
||||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
|
||||||
SHL(32, R(EAX), Imm8(4));
|
|
||||||
}
|
|
||||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
|
||||||
}
|
}
|
||||||
|
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||||
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
INSTRUCTION_START
|
||||||
{Default(inst); return;} // turn off from debugger
|
JITDISABLE(SystemRegisters)
|
||||||
INSTRUCTION_START;
|
|
||||||
|
|
||||||
// USES_CR
|
// USES_CR
|
||||||
u32 mask = 0;
|
u32 mask = 0;
|
||||||
u32 crm = inst.CRM;
|
u32 crm = inst.CRM;
|
||||||
if (crm == 0xFF) {
|
if (crm == 0xFF) {
|
||||||
gpr.FlushLockX(ECX);
|
gpr.FlushLockX(ECX);
|
||||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
MOV(32, R(ECX), R(EAX));
|
MOV(32, R(ECX), R(EAX));
|
||||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||||
AND(32, R(ECX), Imm32(0xF));
|
AND(32, R(ECX), Imm32(0xF));
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||||
}
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
} else {
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
|
|
||||||
// TODO: translate this to work in new CR model.
|
|
||||||
for (int i = 0; i < 8; i++) {
|
|
||||||
if (crm & (1 << i))
|
|
||||||
mask |= 0xF << (i*4);
|
|
||||||
}
|
|
||||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
|
||||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
|
||||||
AND(32, R(EAX), Imm32(mask));
|
|
||||||
AND(32, R(ECX), Imm32(~mask));
|
|
||||||
OR(32, R(EAX), R(ECX));
|
|
||||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
|
||||||
}
|
}
|
||||||
|
gpr.UnlockAllX();
|
||||||
|
} else {
|
||||||
|
Default(inst);
|
||||||
|
return;
|
||||||
|
|
||||||
|
// TODO: translate this to work in new CR model.
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
if (crm & (1 << i))
|
||||||
|
mask |= 0xF << (i*4);
|
||||||
|
}
|
||||||
|
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||||
|
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||||
|
AND(32, R(EAX), Imm32(mask));
|
||||||
|
AND(32, R(ECX), Imm32(~mask));
|
||||||
|
OR(32, R(EAX), R(ECX));
|
||||||
|
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -31,12 +31,12 @@
|
||||||
//#define INSTRUCTION_START Default(inst); return;
|
//#define INSTRUCTION_START Default(inst); return;
|
||||||
#define INSTRUCTION_START
|
#define INSTRUCTION_START
|
||||||
|
|
||||||
void Jit64::mtspr(UGeckoInstruction inst)
|
void Jit64::mtspr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(SystemRegisters)
|
JITDISABLE(SystemRegisters)
|
||||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
switch(iIndex) {
|
switch(iIndex) {
|
||||||
case SPR_LR:
|
case SPR_LR:
|
||||||
ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD));
|
ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD));
|
||||||
return;
|
return;
|
||||||
|
@ -60,121 +60,121 @@
|
||||||
default:
|
default:
|
||||||
Default(inst);
|
Default(inst);
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::mfspr(UGeckoInstruction inst)
|
void Jit64::mfspr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(SystemRegisters)
|
JITDISABLE(SystemRegisters)
|
||||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||||
switch (iIndex)
|
switch (iIndex)
|
||||||
{
|
|
||||||
case SPR_LR:
|
|
||||||
ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
|
|
||||||
return;
|
|
||||||
case SPR_CTR:
|
|
||||||
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
|
|
||||||
return;
|
|
||||||
case SPR_GQR0:
|
|
||||||
case SPR_GQR0 + 1:
|
|
||||||
case SPR_GQR0 + 2:
|
|
||||||
case SPR_GQR0 + 3:
|
|
||||||
case SPR_GQR0 + 4:
|
|
||||||
case SPR_GQR0 + 5:
|
|
||||||
case SPR_GQR0 + 6:
|
|
||||||
case SPR_GQR0 + 7:
|
|
||||||
ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD);
|
|
||||||
return;
|
|
||||||
default:
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// =======================================================================================
|
|
||||||
// Don't interpret this, if we do we get thrown out
|
|
||||||
// --------------
|
|
||||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
|
||||||
{
|
{
|
||||||
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS));
|
case SPR_LR:
|
||||||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
|
ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
|
||||||
|
return;
|
||||||
|
case SPR_CTR:
|
||||||
|
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
|
||||||
|
return;
|
||||||
|
case SPR_GQR0:
|
||||||
|
case SPR_GQR0 + 1:
|
||||||
|
case SPR_GQR0 + 2:
|
||||||
|
case SPR_GQR0 + 3:
|
||||||
|
case SPR_GQR0 + 4:
|
||||||
|
case SPR_GQR0 + 5:
|
||||||
|
case SPR_GQR0 + 6:
|
||||||
|
case SPR_GQR0 + 7:
|
||||||
|
ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD);
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
Default(inst);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
// ==============
|
}
|
||||||
|
|
||||||
|
|
||||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
// =======================================================================================
|
||||||
{
|
// Don't interpret this, if we do we get thrown out
|
||||||
INSTRUCTION_START
|
// --------------
|
||||||
|
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS));
|
||||||
|
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||||
|
}
|
||||||
|
// ==============
|
||||||
|
|
||||||
|
|
||||||
|
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||||
|
{
|
||||||
|
INSTRUCTION_START
|
||||||
JITDISABLE(SystemRegisters)
|
JITDISABLE(SystemRegisters)
|
||||||
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
|
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::mftb(UGeckoInstruction inst)
|
void Jit64::mftb(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
JITDISABLE(SystemRegisters)
|
JITDISABLE(SystemRegisters)
|
||||||
mfspr(inst);
|
mfspr(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::mfcr(UGeckoInstruction inst)
|
void Jit64::mfcr(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
#if 0
|
#if 0
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
// USES_CR
|
// USES_CR
|
||||||
int d = inst.RD;
|
int d = inst.RD;
|
||||||
gpr.LoadToX64(d, false, true);
|
gpr.LoadToX64(d, false, true);
|
||||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||||
|
SHL(32, R(EAX), Imm8(4));
|
||||||
|
for (int i = 1; i < 7; i++) {
|
||||||
|
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||||
SHL(32, R(EAX), Imm8(4));
|
SHL(32, R(EAX), Imm8(4));
|
||||||
for (int i = 1; i < 7; i++) {
|
|
||||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
|
||||||
SHL(32, R(EAX), Imm8(4));
|
|
||||||
}
|
|
||||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
|
||||||
MOV(32, gpr.R(d), R(EAX));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||||
|
MOV(32, gpr.R(d), R(EAX));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
Default(inst); return;
|
Default(inst); return;
|
||||||
#if 0
|
#if 0
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
INSTRUCTION_START;
|
INSTRUCTION_START;
|
||||||
|
|
||||||
// USES_CR
|
// USES_CR
|
||||||
u32 mask = 0;
|
u32 mask = 0;
|
||||||
u32 crm = inst.CRM;
|
u32 crm = inst.CRM;
|
||||||
if (crm == 0xFF) {
|
if (crm == 0xFF) {
|
||||||
gpr.FlushLockX(ECX);
|
gpr.FlushLockX(ECX);
|
||||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
MOV(32, R(ECX), R(EAX));
|
MOV(32, R(ECX), R(EAX));
|
||||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||||
AND(32, R(ECX), Imm32(0xF));
|
AND(32, R(ECX), Imm32(0xF));
|
||||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||||
}
|
|
||||||
gpr.UnlockAllX();
|
|
||||||
} else {
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
|
|
||||||
// TODO: translate this to work in new CR model.
|
|
||||||
for (int i = 0; i < 8; i++) {
|
|
||||||
if (crm & (1 << i))
|
|
||||||
mask |= 0xF << (i*4);
|
|
||||||
}
|
|
||||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
|
||||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
|
||||||
AND(32, R(EAX), Imm32(mask));
|
|
||||||
AND(32, R(ECX), Imm32(~mask));
|
|
||||||
OR(32, R(EAX), R(ECX));
|
|
||||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
|
||||||
}
|
}
|
||||||
#endif
|
gpr.UnlockAllX();
|
||||||
|
} else {
|
||||||
|
Default(inst);
|
||||||
|
return;
|
||||||
|
|
||||||
|
// TODO: translate this to work in new CR model.
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
if (crm & (1 << i))
|
||||||
|
mask |= 0xF << (i*4);
|
||||||
|
}
|
||||||
|
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||||
|
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||||
|
AND(32, R(EAX), Imm32(mask));
|
||||||
|
AND(32, R(ECX), Imm32(~mask));
|
||||||
|
OR(32, R(EAX), R(ECX));
|
||||||
|
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue