small code cleanup in JIT: use JITIL's nice JITDISABLE macro
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4477 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
31e61da40d
commit
5b4d12c1f3
|
@ -77,7 +77,10 @@ void Jit(u32 em_address);
|
|||
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
||||
#define INSTRUCTION_START
|
||||
|
||||
|
||||
#define JITDISABLE(type) \
|
||||
if (Core::g_CoreStartupParameter.bJITOff || \
|
||||
Core::g_CoreStartupParameter.bJIT##type##Off) \
|
||||
{Default(inst); return;}
|
||||
|
||||
class TrampolineCache : public Gen::XCodeBlock
|
||||
{
|
||||
|
|
|
@ -73,9 +73,8 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
|
|||
|
||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(FloatingPoint)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
|
@ -104,9 +103,8 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(FloatingPoint)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
|
@ -162,9 +160,8 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::fsign(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(FloatingPoint)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
|
@ -195,8 +192,7 @@ void Jit64::fsign(UGeckoInstruction inst)
|
|||
void Jit64::fmrx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
JITDISABLE(FloatingPoint)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
|
@ -213,9 +209,9 @@ void Jit64::fcmpx(UGeckoInstruction inst)
|
|||
{
|
||||
// TODO : This still causes crashes in Nights, and broken graphics
|
||||
// in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :(
|
||||
INSTRUCTION_START;
|
||||
if(Core::g_CoreStartupParameter.bJITOff || jo.fpAccurateFcmp
|
||||
|| Core::g_CoreStartupParameter.bJITFloatingPointOff) {
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(FloatingPoint)
|
||||
if (jo.fpAccurateFcmp) {
|
||||
Default(inst); return; // turn off from debugger
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -33,441 +33,434 @@
|
|||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
void Jit64::lbzx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff
|
||||
|| Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
void Jit64::lbzx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
|
||||
Default(inst); return;
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (b == d || a == d)
|
||||
gpr.LoadToX64(d, true, true);
|
||||
else
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (b == d || a == d)
|
||||
gpr.LoadToX64(d, true, true);
|
||||
else
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
#if 0
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
#else
|
||||
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
|
||||
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
|
||||
#endif
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lwzx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
void Jit64::lwzx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (b == d || a == d)
|
||||
gpr.LoadToX64(d, true, true);
|
||||
else
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (b == d || a == d)
|
||||
gpr.LoadToX64(d, true, true);
|
||||
else
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
#if 1
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
#else
|
||||
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false);
|
||||
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false);
|
||||
#endif
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lhax(UGeckoInstruction inst)
|
||||
void Jit64::lhax(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (b == d || a == d)
|
||||
gpr.LoadToX64(d, true, true);
|
||||
else
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
|
||||
// Some homebrew actually loads from a hw reg with this instruction
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::lXz(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
|
||||
Default(inst); return;
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||
// Will give nice boost to dual core mode
|
||||
// (mb2): I agree,
|
||||
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
|
||||
// ... maybe the throttle one already do that :p
|
||||
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
|
||||
if (Core::GetStartupParameter().bSkipIdle &&
|
||||
inst.OPCD == 32 &&
|
||||
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
|
||||
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
|
||||
(Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
|
||||
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
// TODO(LinesPrower):
|
||||
// - Rewrite this!
|
||||
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
|
||||
// It only demonstrates the idea
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.Lock(a, b, d);
|
||||
// do our job at first
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
if (b == d || a == d)
|
||||
gpr.LoadToX64(d, true, true);
|
||||
else
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(b));
|
||||
if (a)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(a));
|
||||
|
||||
// Some homebrew actually loads from a hw reg with this instruction
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
|
||||
gpr.Lock(d, a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
|
||||
// if it's still 0, we can wait until the next event
|
||||
CMP(32, R(RAX), Imm32(0));
|
||||
FixupBranch noIdle = J_CC(CC_NE);
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
||||
|
||||
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
|
||||
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
|
||||
JMP(asm_routines.testExceptions, true);
|
||||
|
||||
SetJumpTarget(noIdle);
|
||||
|
||||
//js.compilerPC += 8;
|
||||
return;
|
||||
}
|
||||
|
||||
void Jit64::lXz(UGeckoInstruction inst)
|
||||
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
|
||||
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (!a)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff
|
||||
|| Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
int accessSize;
|
||||
switch (inst.OPCD)
|
||||
{
|
||||
case 32:
|
||||
accessSize = 32;
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
|
||||
break; //lwz
|
||||
case 40: accessSize = 16; break; //lhz
|
||||
case 34: accessSize = 8; break; //lbz
|
||||
default:
|
||||
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
|
||||
PanicAlert("lXz: invalid access size");
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
|
||||
// Will give nice boost to dual core mode
|
||||
// (mb2): I agree,
|
||||
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
|
||||
// ... maybe the throttle one already do that :p
|
||||
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
|
||||
if (Core::GetStartupParameter().bSkipIdle &&
|
||||
inst.OPCD == 32 &&
|
||||
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
|
||||
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
|
||||
(Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
|
||||
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
|
||||
{
|
||||
// TODO(LinesPrower):
|
||||
// - Rewrite this!
|
||||
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
|
||||
// It only demonstrates the idea
|
||||
|
||||
// do our job at first
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(d, a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
|
||||
// if it's still 0, we can wait until the next event
|
||||
CMP(32, R(RAX), Imm32(0));
|
||||
FixupBranch noIdle = J_CC(CC_NE);
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
|
||||
|
||||
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
|
||||
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
|
||||
JMP(asm_routines.testExceptions, true);
|
||||
|
||||
SetJumpTarget(noIdle);
|
||||
|
||||
//js.compilerPC += 8;
|
||||
return;
|
||||
}
|
||||
|
||||
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
|
||||
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (!a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
int accessSize;
|
||||
switch (inst.OPCD)
|
||||
{
|
||||
case 32:
|
||||
accessSize = 32;
|
||||
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
|
||||
break; //lwz
|
||||
case 40: accessSize = 16; break; //lhz
|
||||
case 34: accessSize = 8; break; //lbz
|
||||
default:
|
||||
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
|
||||
PanicAlert("lXz: invalid access size");
|
||||
return;
|
||||
}
|
||||
|
||||
//Still here? Do regular path.
|
||||
//Still here? Do regular path.
|
||||
#if defined(_M_X64)
|
||||
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
|
||||
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
|
||||
#else
|
||||
if (true) {
|
||||
if (true) {
|
||||
#endif
|
||||
// Safe and boring
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(d, a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
// Fast and daring
|
||||
gpr.Lock(a, d);
|
||||
gpr.LoadToX64(a, true, false);
|
||||
gpr.LoadToX64(d, a == d, true);
|
||||
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
|
||||
switch (accessSize) {
|
||||
case 32:
|
||||
BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||
break;
|
||||
// Careful in the backpatch - need to properly nop over first
|
||||
// case 16:
|
||||
// BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||
// SHR(32, gpr.R(d), Imm8(16));
|
||||
// break;
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::lha(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
// Safe and boring
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(d, a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
|
||||
gpr.LoadToX64(d, d == a, true);
|
||||
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
void Jit64::lwzux(UGeckoInstruction inst)
|
||||
// Fast and daring
|
||||
gpr.Lock(a, d);
|
||||
gpr.LoadToX64(a, true, false);
|
||||
gpr.LoadToX64(d, a == d, true);
|
||||
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
|
||||
switch (accessSize) {
|
||||
case 32:
|
||||
BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||
break;
|
||||
// Careful in the backpatch - need to properly nop over first
|
||||
// case 16:
|
||||
// BSWAP(32, gpr.R(d).GetSimpleReg());
|
||||
// SHR(32, gpr.R(d), Imm8(16));
|
||||
// break;
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::lha(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
// Safe and boring
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(d, a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
|
||||
gpr.LoadToX64(d, d == a, true);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
void Jit64::lwzux(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
if (!a || a == d || a == b)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
if (!a || a == d || a == b)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
gpr.Lock(a, b, d);
|
||||
|
||||
gpr.LoadToX64(d, b == d, true);
|
||||
gpr.LoadToX64(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
SafeLoadRegToEAX(EAX, 32, 0, false);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
gpr.UnlockAll();
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
gpr.Lock(a, b, d);
|
||||
|
||||
// Zero cache line.
|
||||
void Jit64::dcbz(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
gpr.LoadToX64(d, b == d, true);
|
||||
gpr.LoadToX64(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
SafeLoadRegToEAX(EAX, 32, 0, false);
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
gpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
// Zero cache line.
|
||||
void Jit64::dcbz(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
Default(inst); return;
|
||||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
#ifdef _M_X64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
#else
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::stX(UGeckoInstruction inst)
|
||||
void Jit64::stX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (a || update)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (a || update)
|
||||
int accessSize;
|
||||
switch (inst.OPCD & ~1)
|
||||
{
|
||||
int accessSize;
|
||||
switch (inst.OPCD & ~1)
|
||||
{
|
||||
case 36: accessSize = 32; break; //stw
|
||||
case 44: accessSize = 16; break; //sth
|
||||
case 38: accessSize = 8; break; //stb
|
||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||
}
|
||||
case 36: accessSize = 32; break; //stw
|
||||
case 44: accessSize = 16; break; //sth
|
||||
case 38: accessSize = 8; break; //stb
|
||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||
}
|
||||
|
||||
if (gpr.R(a).IsImm())
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
// If we already know the address through constant folding, we can do some
|
||||
// fun tricks...
|
||||
u32 addr = (u32)gpr.R(a).offset;
|
||||
addr += offset;
|
||||
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
||||
{
|
||||
// If we already know the address through constant folding, we can do some
|
||||
// fun tricks...
|
||||
u32 addr = (u32)gpr.R(a).offset;
|
||||
addr += offset;
|
||||
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
|
||||
{
|
||||
if (offset && update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
||||
switch (accessSize)
|
||||
{
|
||||
if (offset && update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
||||
switch (accessSize)
|
||||
{
|
||||
// No need to protect these, they don't touch any state
|
||||
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
|
||||
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break;
|
||||
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break;
|
||||
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break;
|
||||
}
|
||||
js.fifoBytesThisBlock += accessSize >> 3;
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break;
|
||||
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break;
|
||||
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break;
|
||||
}
|
||||
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
||||
{
|
||||
if (offset && update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
MOV(accessSize, R(EAX), gpr.R(s));
|
||||
BSWAP(accessSize, EAX);
|
||||
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
||||
return;
|
||||
}
|
||||
// Other IO not worth the trouble.
|
||||
}
|
||||
|
||||
// Optimized stack access?
|
||||
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
BSWAP(32, EAX);
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
|
||||
#elif _M_IX86
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
|
||||
#endif
|
||||
if (update)
|
||||
ADD(32, gpr.R(a), Imm32(offset));
|
||||
js.fifoBytesThisBlock += accessSize >> 3;
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
/* // TODO - figure out why Beyond Good and Evil hates this
|
||||
#ifdef _M_X64
|
||||
if (accessSize == 32 && !update && jo.enableFastMem)
|
||||
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
|
||||
{
|
||||
// Fast and daring - requires 64-bit
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
gpr.LoadToX64(a, true, false);
|
||||
BSWAP(32, EAX);
|
||||
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
|
||||
if (offset && update)
|
||||
gpr.SetImmediate32(a, addr);
|
||||
MOV(accessSize, R(EAX), gpr.R(s));
|
||||
BSWAP(accessSize, EAX);
|
||||
WriteToConstRamAddress(accessSize, R(EAX), addr);
|
||||
return;
|
||||
}
|
||||
#endif*/
|
||||
// Other IO not worth the trouble.
|
||||
}
|
||||
|
||||
//Still here? Do regular path.
|
||||
gpr.Lock(s, a);
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
MOV(32, R(ECX), gpr.R(s));
|
||||
if (offset)
|
||||
ADD(32, R(EDX), Imm32((u32)offset));
|
||||
if (update && offset)
|
||||
{
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
}
|
||||
TEST(32, R(EDX), Imm32(0x0C000000));
|
||||
FixupBranch unsafe_addr = J_CC(CC_NZ);
|
||||
BSWAP(accessSize, ECX);
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
||||
#else
|
||||
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
|
||||
// Optimized stack access?
|
||||
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
BSWAP(32, EAX);
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
|
||||
#elif _M_IX86
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
|
||||
#endif
|
||||
FixupBranch skip_call = J();
|
||||
SetJumpTarget(unsafe_addr);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
|
||||
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
|
||||
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
|
||||
}
|
||||
SetJumpTarget(skip_call);
|
||||
gpr.UnlockAll();
|
||||
if (update)
|
||||
ADD(32, gpr.R(a), Imm32(offset));
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
else
|
||||
{
|
||||
Default(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::stXx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int a = inst.RA, b = inst.RB, s = inst.RS;
|
||||
if (!a || a == s || a == b)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
gpr.Lock(a, b, s);
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
|
||||
if (inst.SUBOP10 & 32) {
|
||||
gpr.LoadToX64(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
} else {
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
ADD(32, R(EDX), gpr.R(b));
|
||||
/* // TODO - figure out why Beyond Good and Evil hates this
|
||||
#ifdef _M_X64
|
||||
if (accessSize == 32 && !update && jo.enableFastMem)
|
||||
{
|
||||
// Fast and daring - requires 64-bit
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
gpr.LoadToX64(a, true, false);
|
||||
BSWAP(32, EAX);
|
||||
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
|
||||
return;
|
||||
}
|
||||
unsigned accessSize;
|
||||
switch (inst.SUBOP10 & ~32) {
|
||||
#endif*/
|
||||
|
||||
//Still here? Do regular path.
|
||||
gpr.Lock(s, a);
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
MOV(32, R(ECX), gpr.R(s));
|
||||
if (offset)
|
||||
ADD(32, R(EDX), Imm32((u32)offset));
|
||||
if (update && offset)
|
||||
{
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, gpr.R(a), R(EDX));
|
||||
}
|
||||
TEST(32, R(EDX), Imm32(0x0C000000));
|
||||
FixupBranch unsafe_addr = J_CC(CC_NZ);
|
||||
BSWAP(accessSize, ECX);
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
||||
#else
|
||||
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
|
||||
#endif
|
||||
FixupBranch skip_call = J();
|
||||
SetJumpTarget(unsafe_addr);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
|
||||
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
|
||||
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
|
||||
}
|
||||
SetJumpTarget(skip_call);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
else
|
||||
{
|
||||
Default(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::stXx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStore)
|
||||
|
||||
int a = inst.RA, b = inst.RB, s = inst.RS;
|
||||
if (!a || a == s || a == b)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
gpr.Lock(a, b, s);
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
|
||||
if (inst.SUBOP10 & 32) {
|
||||
gpr.LoadToX64(a, true, true);
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
} else {
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
ADD(32, R(EDX), gpr.R(b));
|
||||
}
|
||||
unsigned accessSize;
|
||||
switch (inst.SUBOP10 & ~32) {
|
||||
case 151: accessSize = 32; break;
|
||||
case 407: accessSize = 16; break;
|
||||
case 215: accessSize = 8; break;
|
||||
}
|
||||
|
||||
MOV(32, R(ECX), gpr.R(s));
|
||||
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
MOV(32, R(ECX), gpr.R(s));
|
||||
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
// A few games use these heavily in video codecs.
|
||||
void Jit64::lmw(UGeckoInstruction inst)
|
||||
{
|
||||
|
@ -512,4 +505,4 @@ void Jit64::icbi(UGeckoInstruction inst)
|
|||
{
|
||||
Default(inst);
|
||||
WriteExit(js.compilerPC + 4, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,9 +51,8 @@ u32 GC_ALIGNED16(temp32);
|
|||
|
||||
void Jit64::lfs(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStoreFloating)
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
@ -88,9 +87,8 @@ void Jit64::lfs(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::lfd(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStoreFloating)
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
@ -155,10 +153,8 @@ void Jit64::lfd(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::stfd(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStoreFloating)
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
|
@ -234,9 +230,8 @@ void Jit64::stfd(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::stfs(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStoreFloating)
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
int s = inst.RS;
|
||||
|
@ -291,9 +286,8 @@ void Jit64::stfs(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::stfsx(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStoreFloating)
|
||||
|
||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
|
@ -311,9 +305,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::lfsx(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStoreFloating)
|
||||
|
||||
fpr.Lock(inst.RS);
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
|
|
|
@ -91,9 +91,8 @@ const double GC_ALIGNED16(m_dequantizeTableD[]) =
|
|||
// We will have to break block after quantizers are written to.
|
||||
void Jit64::psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStorePaired)
|
||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||
|
||||
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
|
@ -296,9 +295,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
|
||||
void Jit64::psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(LoadStorePaired)
|
||||
|
||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||
|
||||
|
|
|
@ -34,370 +34,361 @@
|
|||
// cmppd, andpd, andnpd, or
|
||||
// lfsx, ps_merge01 etc
|
||||
|
||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
||||
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
|
||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
||||
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
|
||||
|
||||
void Jit64::ps_mr(UGeckoInstruction inst)
|
||||
void Jit64::ps_mr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
if (d == b)
|
||||
return;
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
|
||||
void Jit64::ps_sel(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
|
||||
Default(inst); return;
|
||||
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.FlushLockX(XMM7);
|
||||
fpr.FlushLockX(XMM6);
|
||||
fpr.Lock(a, b, c, d);
|
||||
fpr.LoadToX64(a, true, false);
|
||||
fpr.LoadToX64(d, false, true);
|
||||
// BLENDPD would have been nice...
|
||||
MOVAPD(XMM7, fpr.R(a));
|
||||
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
|
||||
MOVAPD(XMM6, R(XMM7));
|
||||
ANDPD(XMM7, fpr.R(d));
|
||||
ANDNPD(XMM6, fpr.R(c));
|
||||
MOVAPD(fpr.RX(d), R(XMM7));
|
||||
ORPD(fpr.RX(d), R(XMM6));
|
||||
fpr.UnlockAll();
|
||||
fpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::ps_sign(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
|
||||
fpr.Lock(d, b);
|
||||
if (d != b)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
if (d == b)
|
||||
return;
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
|
||||
void Jit64::ps_sel(UGeckoInstruction inst)
|
||||
else
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
fpr.LoadToX64(d, true);
|
||||
}
|
||||
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: //neg
|
||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 136: //nabs
|
||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 264: //abs
|
||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
||||
break;
|
||||
}
|
||||
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(d, b);
|
||||
SQRTPD(XMM0, fpr.R(b));
|
||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
||||
DIVPD(XMM1, R(XMM0));
|
||||
MOVAPD(fpr.R(d), XMM1);
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
//add a, b, c
|
||||
|
||||
//mov a, b
|
||||
//add a, c
|
||||
//we need:
|
||||
/*
|
||||
psq_l
|
||||
psq_stu
|
||||
*/
|
||||
|
||||
/*
|
||||
add a,b,a
|
||||
*/
|
||||
|
||||
//There's still a little bit more optimization that can be squeezed out of this
|
||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
|
||||
if (d == a)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (d == b && reversible)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||
}
|
||||
else if (a != d && b != d)
|
||||
{
|
||||
//sources different from d, can use rather quick solution
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (b != d)
|
||||
{
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(XMM0, fpr.R(b));
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
else //Other combo, must use two temps :(
|
||||
{
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
fpr.LoadToX64(d, false);
|
||||
(this->*op)(XMM0, Gen::R(XMM1));
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_arith(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
||||
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
||||
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
||||
case 23://sel
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.FlushLockX(XMM7);
|
||||
fpr.FlushLockX(XMM6);
|
||||
fpr.Lock(a, b, c, d);
|
||||
fpr.LoadToX64(a, true, false);
|
||||
fpr.LoadToX64(d, false, true);
|
||||
// BLENDPD would have been nice...
|
||||
MOVAPD(XMM7, fpr.R(a));
|
||||
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
|
||||
MOVAPD(XMM6, R(XMM7));
|
||||
ANDPD(XMM7, fpr.R(d));
|
||||
ANDNPD(XMM6, fpr.R(c));
|
||||
MOVAPD(fpr.RX(d), R(XMM7));
|
||||
ORPD(fpr.RX(d), R(XMM6));
|
||||
fpr.UnlockAll();
|
||||
fpr.UnlockAllX();
|
||||
break;
|
||||
case 24://res
|
||||
Default(inst);
|
||||
break;
|
||||
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::ps_sign(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
|
||||
fpr.Lock(d, b);
|
||||
if (d != b)
|
||||
{
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
}
|
||||
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: //neg
|
||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 136: //nabs
|
||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 264: //abs
|
||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
||||
break;
|
||||
}
|
||||
|
||||
fpr.UnlockAll();
|
||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a,b,c,d);
|
||||
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(d, b);
|
||||
SQRTPD(XMM0, fpr.R(b));
|
||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
||||
DIVPD(XMM1, R(XMM0));
|
||||
case 10:
|
||||
// Do the sum in upper subregisters, merge uppers
|
||||
MOVDDUP(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
ADDPD(XMM0, R(XMM1));
|
||||
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
case 11:
|
||||
// Do the sum in lower subregisters, merge lowers
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
||||
ADDPD(XMM0, R(XMM1)); // sum lowers
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
UNPCKLPD(XMM1, R(XMM0)); // merge
|
||||
MOVAPD(fpr.R(d), XMM1);
|
||||
fpr.UnlockAll();
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_sum WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
//add a, b, c
|
||||
|
||||
//mov a, b
|
||||
//add a, c
|
||||
//we need:
|
||||
/*
|
||||
psq_l
|
||||
psq_stu
|
||||
*/
|
||||
|
||||
/*
|
||||
add a,b,a
|
||||
*/
|
||||
|
||||
//There's still a little bit more optimization that can be squeezed out of this
|
||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
||||
void Jit64::ps_muls(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a, c, d);
|
||||
fpr.LoadToX64(d, d == a || d == c, true);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
|
||||
if (d == a)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (d == b && reversible)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||
}
|
||||
else if (a != d && b != d)
|
||||
{
|
||||
//sources different from d, can use rather quick solution
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (b != d)
|
||||
{
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(XMM0, fpr.R(b));
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
else //Other combo, must use two temps :(
|
||||
{
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
fpr.LoadToX64(d, false);
|
||||
(this->*op)(XMM0, Gen::R(XMM1));
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_arith(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
||||
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
||||
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
||||
case 23://sel
|
||||
Default(inst);
|
||||
break;
|
||||
case 24://res
|
||||
Default(inst);
|
||||
break;
|
||||
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a,b,c,d);
|
||||
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 10:
|
||||
// Do the sum in upper subregisters, merge uppers
|
||||
MOVDDUP(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
ADDPD(XMM0, R(XMM1));
|
||||
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
case 11:
|
||||
// Do the sum in lower subregisters, merge lowers
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
||||
ADDPD(XMM0, R(XMM1)); // sum lowers
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
UNPCKLPD(XMM1, R(XMM0)); // merge
|
||||
MOVAPD(fpr.R(d), XMM1);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_sum WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::ps_muls(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a, c, d);
|
||||
fpr.LoadToX64(d, d == a || d == c, true);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 12:
|
||||
// Single multiply scalar high
|
||||
// TODO - faster version for when regs are different
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
case 13:
|
||||
// TODO - faster version for when regs are different
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_muls WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
||||
void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(a,b,d);
|
||||
|
||||
case 12:
|
||||
// Single multiply scalar high
|
||||
// TODO - faster version for when regs are different
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 528:
|
||||
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf
|
||||
break; //00
|
||||
case 560:
|
||||
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
|
||||
break; //01
|
||||
case 592:
|
||||
SHUFPD(XMM0, fpr.R(b), 1);
|
||||
break; //10
|
||||
case 624:
|
||||
UNPCKHPD(XMM0, fpr.R(b));
|
||||
break; //11
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
//TODO: add optimized cases
|
||||
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
int d = inst.FD;
|
||||
fpr.Lock(a,b,c,d);
|
||||
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
case 13:
|
||||
// TODO - faster version for when regs are different
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 14: //madds0
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 15: //madds1
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 28: //msub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 29: //madd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
//Default(inst);
|
||||
//fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_muls WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
||||
void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(a,b,d);
|
||||
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 528:
|
||||
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf
|
||||
break; //00
|
||||
case 560:
|
||||
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
|
||||
break; //01
|
||||
case 592:
|
||||
SHUFPD(XMM0, fpr.R(b), 1);
|
||||
break; //10
|
||||
case 624:
|
||||
UNPCKHPD(XMM0, fpr.R(b));
|
||||
break; //11
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
//TODO: add optimized cases
|
||||
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(Paired)
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
int d = inst.FD;
|
||||
fpr.Lock(a,b,c,d);
|
||||
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 14: //madds0
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 15: //madds1
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 28: //msub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 29: //madd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
//Default(inst);
|
||||
//fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
|
|
@ -29,172 +29,165 @@
|
|||
#include "Jit.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
void Jit64::mtspr(UGeckoInstruction inst)
|
||||
void Jit64::mtspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
int d = inst.RD;
|
||||
|
||||
switch (iIndex)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
int d = inst.RD;
|
||||
case SPR_LR:
|
||||
case SPR_CTR:
|
||||
case SPR_XER:
|
||||
// These are safe to do the easy way, see the bottom of this function.
|
||||
break;
|
||||
|
||||
switch (iIndex)
|
||||
case SPR_GQR0:
|
||||
case SPR_GQR0 + 1:
|
||||
case SPR_GQR0 + 2:
|
||||
case SPR_GQR0 + 3:
|
||||
case SPR_GQR0 + 4:
|
||||
case SPR_GQR0 + 5:
|
||||
case SPR_GQR0 + 6:
|
||||
case SPR_GQR0 + 7:
|
||||
js.blockSetsQuantizers = true;
|
||||
// Prevent recompiler from compiling in old quantizer values.
|
||||
// If the value changed, destroy all blocks using this quantizer
|
||||
// This will create a little bit of block churn, but hopefully not too bad.
|
||||
{
|
||||
case SPR_LR:
|
||||
case SPR_CTR:
|
||||
case SPR_XER:
|
||||
// These are safe to do the easy way, see the bottom of this function.
|
||||
break;
|
||||
|
||||
case SPR_GQR0:
|
||||
case SPR_GQR0 + 1:
|
||||
case SPR_GQR0 + 2:
|
||||
case SPR_GQR0 + 3:
|
||||
case SPR_GQR0 + 4:
|
||||
case SPR_GQR0 + 5:
|
||||
case SPR_GQR0 + 6:
|
||||
case SPR_GQR0 + 7:
|
||||
js.blockSetsQuantizers = true;
|
||||
// Prevent recompiler from compiling in old quantizer values.
|
||||
// If the value changed, destroy all blocks using this quantizer
|
||||
// This will create a little bit of block churn, but hopefully not too bad.
|
||||
{
|
||||
/*
|
||||
/*
|
||||
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value
|
||||
CMP(32, R(EAX), gpr.R(inst.RD));
|
||||
FixupBranch skip_destroy = J_CC(CC_E, false);
|
||||
int gqr = iIndex - SPR_GQR0;
|
||||
ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr);
|
||||
SetJumpTarget(skip_destroy);*/
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
// TODO - break block if quantizers are written to.
|
||||
default:
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
default:
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
// OK, this is easy.
|
||||
// OK, this is easy.
|
||||
gpr.Lock(d);
|
||||
gpr.LoadToX64(d, true);
|
||||
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
int d = inst.RD;
|
||||
switch (iIndex)
|
||||
{
|
||||
case SPR_WPAR:
|
||||
Default(inst);
|
||||
return;
|
||||
// case SPR_DEC:
|
||||
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
|
||||
//break;
|
||||
case SPR_TL:
|
||||
case SPR_TU:
|
||||
//CALL((void Jit64::*)&CoreTiming::Advance);
|
||||
// fall through
|
||||
default:
|
||||
gpr.Lock(d);
|
||||
gpr.LoadToX64(d, true);
|
||||
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d));
|
||||
gpr.LoadToX64(d, false);
|
||||
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
|
||||
gpr.UnlockAll();
|
||||
break;
|
||||
}
|
||||
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
int d = inst.RD;
|
||||
switch (iIndex)
|
||||
{
|
||||
case SPR_WPAR:
|
||||
Default(inst);
|
||||
return;
|
||||
// case SPR_DEC:
|
||||
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
|
||||
//break;
|
||||
case SPR_TL:
|
||||
case SPR_TU:
|
||||
//CALL((void Jit64::*)&CoreTiming::Advance);
|
||||
// fall through
|
||||
default:
|
||||
gpr.Lock(d);
|
||||
gpr.LoadToX64(d, false);
|
||||
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
|
||||
gpr.UnlockAll();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// =======================================================================================
|
||||
// Don't interpret this, if we do we get thrown out
|
||||
// --------------
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
gpr.LoadToX64(inst.RS, true, false);
|
||||
MOV(32, M(&MSR), gpr.R(inst.RS));
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
WriteExit(js.compilerPC + 4, 0);
|
||||
}
|
||||
// ==============
|
||||
// =======================================================================================
|
||||
// Don't interpret this, if we do we get thrown out
|
||||
// --------------
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
gpr.LoadToX64(inst.RS, true, false);
|
||||
MOV(32, M(&MSR), gpr.R(inst.RS));
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
WriteExit(js.compilerPC + 4, 0);
|
||||
}
|
||||
// ==============
|
||||
|
||||
|
||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
//Privileged?
|
||||
gpr.LoadToX64(inst.RD, false);
|
||||
MOV(32, gpr.R(inst.RD), M(&MSR));
|
||||
}
|
||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
//Privileged?
|
||||
gpr.LoadToX64(inst.RD, false);
|
||||
MOV(32, gpr.R(inst.RD), M(&MSR));
|
||||
}
|
||||
|
||||
void Jit64::mftb(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
mfspr(inst);
|
||||
}
|
||||
void Jit64::mftb(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
mfspr(inst);
|
||||
}
|
||||
|
||||
void Jit64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||
void Jit64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
for (int i = 1; i < 7; i++) {
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
for (int i = 1; i < 7; i++) {
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
}
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
}
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
}
|
||||
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
|
||||
// USES_CR
|
||||
u32 mask = 0;
|
||||
u32 crm = inst.CRM;
|
||||
if (crm == 0xFF) {
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
MOV(32, R(ECX), R(EAX));
|
||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||
AND(32, R(ECX), Imm32(0xF));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
} else {
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
// TODO: translate this to work in new CR model.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (crm & (1 << i))
|
||||
mask |= 0xF << (i*4);
|
||||
}
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
AND(32, R(ECX), Imm32(~mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
// USES_CR
|
||||
u32 mask = 0;
|
||||
u32 crm = inst.CRM;
|
||||
if (crm == 0xFF) {
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
MOV(32, R(ECX), R(EAX));
|
||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||
AND(32, R(ECX), Imm32(0xF));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
} else {
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
// TODO: translate this to work in new CR model.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (crm & (1 << i))
|
||||
mask |= 0xF << (i*4);
|
||||
}
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
AND(32, R(ECX), Imm32(~mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,12 +31,12 @@
|
|||
//#define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
void Jit64::mtspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
void Jit64::mtspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch(iIndex) {
|
||||
switch(iIndex) {
|
||||
case SPR_LR:
|
||||
ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD));
|
||||
return;
|
||||
|
@ -60,121 +60,121 @@
|
|||
default:
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch (iIndex)
|
||||
{
|
||||
case SPR_LR:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
|
||||
return;
|
||||
case SPR_CTR:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
|
||||
return;
|
||||
case SPR_GQR0:
|
||||
case SPR_GQR0 + 1:
|
||||
case SPR_GQR0 + 2:
|
||||
case SPR_GQR0 + 3:
|
||||
case SPR_GQR0 + 4:
|
||||
case SPR_GQR0 + 5:
|
||||
case SPR_GQR0 + 6:
|
||||
case SPR_GQR0 + 7:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD);
|
||||
return;
|
||||
default:
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// =======================================================================================
|
||||
// Don't interpret this, if we do we get thrown out
|
||||
// --------------
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
switch (iIndex)
|
||||
{
|
||||
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS));
|
||||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
case SPR_LR:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
|
||||
return;
|
||||
case SPR_CTR:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
|
||||
return;
|
||||
case SPR_GQR0:
|
||||
case SPR_GQR0 + 1:
|
||||
case SPR_GQR0 + 2:
|
||||
case SPR_GQR0 + 3:
|
||||
case SPR_GQR0 + 4:
|
||||
case SPR_GQR0 + 5:
|
||||
case SPR_GQR0 + 6:
|
||||
case SPR_GQR0 + 7:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD);
|
||||
return;
|
||||
default:
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
// ==============
|
||||
}
|
||||
|
||||
|
||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
// =======================================================================================
|
||||
// Don't interpret this, if we do we get thrown out
|
||||
// --------------
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
{
|
||||
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS));
|
||||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
}
|
||||
// ==============
|
||||
|
||||
|
||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(SystemRegisters)
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::mftb(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
JITDISABLE(SystemRegisters)
|
||||
void Jit64::mftb(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
JITDISABLE(SystemRegisters)
|
||||
mfspr(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
void Jit64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
#if 0
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
for (int i = 1; i < 7; i++) {
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
for (int i = 1; i < 7; i++) {
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
}
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
#endif
|
||||
}
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
#if 0
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
// USES_CR
|
||||
u32 mask = 0;
|
||||
u32 crm = inst.CRM;
|
||||
if (crm == 0xFF) {
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
MOV(32, R(ECX), R(EAX));
|
||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||
AND(32, R(ECX), Imm32(0xF));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
} else {
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
// TODO: translate this to work in new CR model.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (crm & (1 << i))
|
||||
mask |= 0xF << (i*4);
|
||||
}
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
AND(32, R(ECX), Imm32(~mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
// USES_CR
|
||||
u32 mask = 0;
|
||||
u32 crm = inst.CRM;
|
||||
if (crm == 0xFF) {
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
MOV(32, R(ECX), R(EAX));
|
||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||
AND(32, R(ECX), Imm32(0xF));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||
}
|
||||
#endif
|
||||
gpr.UnlockAllX();
|
||||
} else {
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
// TODO: translate this to work in new CR model.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (crm & (1 << i))
|
||||
mask |= 0xF << (i*4);
|
||||
}
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
AND(32, R(ECX), Imm32(~mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue