small code cleanup in JIT: use JITIL's nice JITDISABLE macro

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4477 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Shawn Hoffman 2009-10-29 04:40:26 +00:00
parent 31e61da40d
commit 5b4d12c1f3
9 changed files with 1788 additions and 1867 deletions

View File

@ -77,7 +77,10 @@ void Jit(u32 em_address);
// #define INSTRUCTION_START PPCTables::CountInstruction(inst); // #define INSTRUCTION_START PPCTables::CountInstruction(inst);
#define INSTRUCTION_START #define INSTRUCTION_START
#define JITDISABLE(type) \
if (Core::g_CoreStartupParameter.bJITOff || \
Core::g_CoreStartupParameter.bJIT##type##Off) \
{Default(inst); return;}
class TrampolineCache : public Gen::XCodeBlock class TrampolineCache : public Gen::XCodeBlock
{ {

View File

@ -73,9 +73,8 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm
void Jit64::fp_arith_s(UGeckoInstruction inst) void Jit64::fp_arith_s(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(FloatingPoint)
INSTRUCTION_START;
if (inst.Rc) { if (inst.Rc) {
Default(inst); return; Default(inst); return;
} }
@ -104,9 +103,8 @@ void Jit64::fp_arith_s(UGeckoInstruction inst)
void Jit64::fmaddXX(UGeckoInstruction inst) void Jit64::fmaddXX(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(FloatingPoint)
INSTRUCTION_START;
if (inst.Rc) { if (inst.Rc) {
Default(inst); return; Default(inst); return;
} }
@ -162,9 +160,8 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
void Jit64::fsign(UGeckoInstruction inst) void Jit64::fsign(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(FloatingPoint)
INSTRUCTION_START;
if (inst.Rc) { if (inst.Rc) {
Default(inst); return; Default(inst); return;
} }
@ -195,8 +192,7 @@ void Jit64::fsign(UGeckoInstruction inst)
void Jit64::fmrx(UGeckoInstruction inst) void Jit64::fmrx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) JITDISABLE(FloatingPoint)
{Default(inst); return;} // turn off from debugger
if (inst.Rc) { if (inst.Rc) {
Default(inst); return; Default(inst); return;
} }
@ -213,9 +209,9 @@ void Jit64::fcmpx(UGeckoInstruction inst)
{ {
// TODO : This still causes crashes in Nights, and broken graphics // TODO : This still causes crashes in Nights, and broken graphics
// in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :( // in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :(
INSTRUCTION_START; INSTRUCTION_START
if(Core::g_CoreStartupParameter.bJITOff || jo.fpAccurateFcmp JITDISABLE(FloatingPoint)
|| Core::g_CoreStartupParameter.bJITFloatingPointOff) { if (jo.fpAccurateFcmp) {
Default(inst); return; // turn off from debugger Default(inst); return; // turn off from debugger
} }

File diff suppressed because it is too large Load Diff

View File

@ -33,441 +33,434 @@
#include "JitAsm.h" #include "JitAsm.h"
#include "JitRegCache.h" #include "JitRegCache.h"
void Jit64::lbzx(UGeckoInstruction inst) void Jit64::lbzx(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff INSTRUCTION_START
|| Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) JITDISABLE(LoadStore)
{Default(inst); return;} // turn off from debugger if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff)
INSTRUCTION_START; Default(inst); return;
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d) if (b == d || a == d)
gpr.LoadToX64(d, true, true); gpr.LoadToX64(d, true, true);
else else
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
#if 0 #if 0
SafeLoadRegToEAX(ABI_PARAM1, 8, 0); SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
#else #else
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false); UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false);
#endif #endif
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
void Jit64::lwzx(UGeckoInstruction inst) void Jit64::lwzx(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStore)
INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d) if (b == d || a == d)
gpr.LoadToX64(d, true, true); gpr.LoadToX64(d, true, true);
else else
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b)); MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a) if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a)); ADD(32, R(ABI_PARAM1), gpr.R(a));
#if 1 #if 1
SafeLoadRegToEAX(ABI_PARAM1, 32, 0); SafeLoadRegToEAX(ABI_PARAM1, 32, 0);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
#else #else
UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false); UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false);
#endif #endif
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
void Jit64::lhax(UGeckoInstruction inst) void Jit64::lhax(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d)
gpr.LoadToX64(d, true, true);
else
gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a));
// Some homebrew actually loads from a hw reg with this instruction
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::lXz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff)
Default(inst); return;
int d = inst.RD;
int a = inst.RA;
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
// Will give nice boost to dual core mode
// (mb2): I agree,
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
// ... maybe the throttle one already do that :p
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
if (Core::GetStartupParameter().bSkipIdle &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
(Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) // TODO(LinesPrower):
{Default(inst); return;} // turn off from debugger // - Rewrite this!
INSTRUCTION_START; // It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
// It only demonstrates the idea
int a = inst.RA, b = inst.RB, d = inst.RD; // do our job at first
gpr.Lock(a, b, d); s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d) gpr.Lock(d, a);
gpr.LoadToX64(d, true, true); MOV(32, R(ABI_PARAM1), gpr.R(a));
else SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
MOV(32, R(ABI_PARAM1), gpr.R(b));
if (a)
ADD(32, R(ABI_PARAM1), gpr.R(a));
// Some homebrew actually loads from a hw reg with this instruction
SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
gpr.Flush(FLUSH_ALL);
// if it's still 0, we can wait until the next event
CMP(32, R(RAX), Imm32(0));
FixupBranch noIdle = J_CC(CC_NE);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
JMP(asm_routines.testExceptions, true);
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
} }
void Jit64::lXz(UGeckoInstruction inst) // R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
s32 offset = (s32)(s16)inst.SIMM_16;
if (!a)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff Default(inst);
|| Core::g_CoreStartupParameter.bJITLoadStorelXzOff) return;
{Default(inst); return;} // turn off from debugger }
INSTRUCTION_START; int accessSize;
switch (inst.OPCD)
int d = inst.RD; {
int a = inst.RA; case 32:
accessSize = 32;
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
break; //lwz
case 40: accessSize = 16; break; //lhz
case 34: accessSize = 8; break; //lbz
default:
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
PanicAlert("lXz: invalid access size");
return;
}
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate //Still here? Do regular path.
// Will give nice boost to dual core mode
// (mb2): I agree,
// IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode
// ... maybe the throttle one already do that :p
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
if (Core::GetStartupParameter().bSkipIdle &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
(Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
// TODO(LinesPrower):
// - Rewrite this!
// It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right
// It only demonstrates the idea
// do our job at first
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
gpr.Flush(FLUSH_ALL);
// if it's still 0, we can wait until the next event
CMP(32, R(RAX), Imm32(0));
FixupBranch noIdle = J_CC(CC_NE);
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
// ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0
//MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC));
JMP(asm_routines.testExceptions, true);
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
// R2 always points to the small read-only data area. We could bake R2-relative loads into immediates.
// R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode.
s32 offset = (s32)(s16)inst.SIMM_16;
if (!a)
{
Default(inst);
return;
}
int accessSize;
switch (inst.OPCD)
{
case 32:
accessSize = 32;
if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;}
break; //lwz
case 40: accessSize = 16; break; //lhz
case 34: accessSize = 8; break; //lbz
default:
//_assert_msg_(DYNA_REC, 0, "lXz: invalid access size");
PanicAlert("lXz: invalid access size");
return;
}
//Still here? Do regular path.
#if defined(_M_X64) #if defined(_M_X64)
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) { if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
#else #else
if (true) { if (true) {
#endif #endif
// Safe and boring
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
return;
}
// Fast and daring
gpr.Lock(a, d);
gpr.LoadToX64(a, true, false);
gpr.LoadToX64(d, a == d, true);
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
switch (accessSize) {
case 32:
BSWAP(32, gpr.R(d).GetSimpleReg());
break;
// Careful in the backpatch - need to properly nop over first
// case 16:
// BSWAP(32, gpr.R(d).GetSimpleReg());
// SHR(32, gpr.R(d), Imm8(16));
// break;
}
gpr.UnlockAll();
}
void Jit64::lha(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
int d = inst.RD;
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring // Safe and boring
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a); gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
gpr.LoadToX64(d, d == a, true); gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
return; return;
} }
void Jit64::lwzux(UGeckoInstruction inst) // Fast and daring
gpr.Lock(a, d);
gpr.LoadToX64(a, true, false);
gpr.LoadToX64(d, a == d, true);
MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset));
switch (accessSize) {
case 32:
BSWAP(32, gpr.R(d).GetSimpleReg());
break;
// Careful in the backpatch - need to properly nop over first
// case 16:
// BSWAP(32, gpr.R(d).GetSimpleReg());
// SHR(32, gpr.R(d), Imm8(16));
// break;
}
gpr.UnlockAll();
}
void Jit64::lha(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int d = inst.RD;
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
gpr.LoadToX64(d, d == a, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
return;
}
void Jit64::lwzux(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (!a || a == d || a == b)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) Default(inst);
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD;
if (!a || a == d || a == b)
{
Default(inst);
return;
}
gpr.Lock(a, b, d);
gpr.LoadToX64(d, b == d, true);
gpr.LoadToX64(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EAX), gpr.R(a));
SafeLoadRegToEAX(EAX, 32, 0, false);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
return; return;
} }
gpr.Lock(a, b, d);
// Zero cache line. gpr.LoadToX64(d, b == d, true);
void Jit64::dcbz(UGeckoInstruction inst) gpr.LoadToX64(a, true, true);
{ ADD(32, gpr.R(a), gpr.R(b));
Default(inst); return; MOV(32, R(EAX), gpr.R(a));
SafeLoadRegToEAX(EAX, 32, 0, false);
MOV(32, gpr.R(d), R(EAX));
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) gpr.UnlockAll();
{Default(inst); return;} // turn off from debugger return;
INSTRUCTION_START; }
MOV(32, R(EAX), gpr.R(inst.RB)); // Zero cache line.
if (inst.RA) void Jit64::dcbz(UGeckoInstruction inst)
ADD(32, R(EAX), gpr.R(inst.RA)); {
AND(32, R(EAX), Imm32(~31)); INSTRUCTION_START
XORPD(XMM0, R(XMM0)); JITDISABLE(LoadStore)
Default(inst); return;
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
AND(32, R(EAX), Imm32(~31));
XORPD(XMM0, R(XMM0));
#ifdef _M_X64 #ifdef _M_X64
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
#else #else
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
#endif #endif
} }
void Jit64::stX(UGeckoInstruction inst) void Jit64::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int s = inst.RS;
int a = inst.RA;
bool update = inst.OPCD & 1;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a || update)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) int accessSize;
{Default(inst); return;} // turn off from debugger switch (inst.OPCD & ~1)
INSTRUCTION_START;
int s = inst.RS;
int a = inst.RA;
bool update = inst.OPCD & 1;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a || update)
{ {
int accessSize; case 36: accessSize = 32; break; //stw
switch (inst.OPCD & ~1) case 44: accessSize = 16; break; //sth
{ case 38: accessSize = 8; break; //stb
case 36: accessSize = 32; break; //stw default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
case 44: accessSize = 16; break; //sth }
case 38: accessSize = 8; break; //stb
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
}
if (gpr.R(a).IsImm()) if (gpr.R(a).IsImm())
{
// If we already know the address through constant folding, we can do some
// fun tricks...
u32 addr = (u32)gpr.R(a).offset;
addr += offset;
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
{ {
// If we already know the address through constant folding, we can do some if (offset && update)
// fun tricks... gpr.SetImmediate32(a, addr);
u32 addr = (u32)gpr.R(a).offset; gpr.FlushLockX(ABI_PARAM1);
addr += offset; MOV(32, R(ABI_PARAM1), gpr.R(s));
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) switch (accessSize)
{ {
if (offset && update)
gpr.SetImmediate32(a, addr);
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(s));
switch (accessSize)
{
// No need to protect these, they don't touch any state // No need to protect these, they don't touch any state
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break; case 8: CALL((void *)asm_routines.fifoDirectWrite8); break;
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break; case 16: CALL((void *)asm_routines.fifoDirectWrite16); break;
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break; case 32: CALL((void *)asm_routines.fifoDirectWrite32); break;
}
js.fifoBytesThisBlock += accessSize >> 3;
gpr.UnlockAllX();
return;
} }
else if (Memory::IsRAMAddress(addr) && accessSize == 32) js.fifoBytesThisBlock += accessSize >> 3;
{
if (offset && update)
gpr.SetImmediate32(a, addr);
MOV(accessSize, R(EAX), gpr.R(s));
BSWAP(accessSize, EAX);
WriteToConstRamAddress(accessSize, R(EAX), addr);
return;
}
// Other IO not worth the trouble.
}
// Optimized stack access?
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
{
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(32, R(EAX), gpr.R(s));
BSWAP(32, EAX);
#ifdef _M_X64
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
#elif _M_IX86
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
#endif
if (update)
ADD(32, gpr.R(a), Imm32(offset));
gpr.UnlockAllX(); gpr.UnlockAllX();
return; return;
} }
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
/* // TODO - figure out why Beyond Good and Evil hates this
#ifdef _M_X64
if (accessSize == 32 && !update && jo.enableFastMem)
{ {
// Fast and daring - requires 64-bit if (offset && update)
MOV(32, R(EAX), gpr.R(s)); gpr.SetImmediate32(a, addr);
gpr.LoadToX64(a, true, false); MOV(accessSize, R(EAX), gpr.R(s));
BSWAP(32, EAX); BSWAP(accessSize, EAX);
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX)); WriteToConstRamAddress(accessSize, R(EAX), addr);
return; return;
} }
#endif*/ // Other IO not worth the trouble.
}
//Still here? Do regular path. // Optimized stack access?
gpr.Lock(s, a); if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
gpr.FlushLockX(ECX, EDX); {
MOV(32, R(EDX), gpr.R(a)); gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ABI_PARAM1), gpr.R(a));
if (offset) MOV(32, R(EAX), gpr.R(s));
ADD(32, R(EDX), Imm32((u32)offset)); BSWAP(32, EAX);
if (update && offset) #ifdef _M_X64
{ MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
gpr.LoadToX64(a, true, true); #elif _M_IX86
MOV(32, gpr.R(a), R(EDX)); AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
} MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
TEST(32, R(EDX), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
BSWAP(accessSize, ECX);
#ifdef _M_X64
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
#else
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
#endif #endif
FixupBranch skip_call = J(); if (update)
SetJumpTarget(unsafe_addr); ADD(32, gpr.R(a), Imm32(offset));
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
}
SetJumpTarget(skip_call);
gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
}
else
{
Default(inst);
}
}
void Jit64::stXx(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
int a = inst.RA, b = inst.RB, s = inst.RS;
if (!a || a == s || a == b)
{
Default(inst);
return; return;
} }
gpr.Lock(a, b, s);
gpr.FlushLockX(ECX, EDX);
if (inst.SUBOP10 & 32) { /* // TODO - figure out why Beyond Good and Evil hates this
gpr.LoadToX64(a, true, true); #ifdef _M_X64
ADD(32, gpr.R(a), gpr.R(b)); if (accessSize == 32 && !update && jo.enableFastMem)
MOV(32, R(EDX), gpr.R(a)); {
} else { // Fast and daring - requires 64-bit
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EAX), gpr.R(s));
ADD(32, R(EDX), gpr.R(b)); gpr.LoadToX64(a, true, false);
BSWAP(32, EAX);
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
return;
} }
unsigned accessSize; #endif*/
switch (inst.SUBOP10 & ~32) {
//Still here? Do regular path.
gpr.Lock(s, a);
gpr.FlushLockX(ECX, EDX);
MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s));
if (offset)
ADD(32, R(EDX), Imm32((u32)offset));
if (update && offset)
{
gpr.LoadToX64(a, true, true);
MOV(32, gpr.R(a), R(EDX));
}
TEST(32, R(EDX), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
BSWAP(accessSize, ECX);
#ifdef _M_X64
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
#else
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
#endif
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
}
SetJumpTarget(skip_call);
gpr.UnlockAll();
gpr.UnlockAllX();
}
else
{
Default(inst);
}
}
void Jit64::stXx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
int a = inst.RA, b = inst.RB, s = inst.RS;
if (!a || a == s || a == b)
{
Default(inst);
return;
}
gpr.Lock(a, b, s);
gpr.FlushLockX(ECX, EDX);
if (inst.SUBOP10 & 32) {
gpr.LoadToX64(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EDX), gpr.R(a));
} else {
MOV(32, R(EDX), gpr.R(a));
ADD(32, R(EDX), gpr.R(b));
}
unsigned accessSize;
switch (inst.SUBOP10 & ~32) {
case 151: accessSize = 32; break; case 151: accessSize = 32; break;
case 407: accessSize = 16; break; case 407: accessSize = 16; break;
case 215: accessSize = 8; break; case 215: accessSize = 8; break;
}
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
gpr.UnlockAll();
gpr.UnlockAllX();
return;
} }
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
gpr.UnlockAll();
gpr.UnlockAllX();
return;
}
// A few games use these heavily in video codecs. // A few games use these heavily in video codecs.
void Jit64::lmw(UGeckoInstruction inst) void Jit64::lmw(UGeckoInstruction inst)
{ {
@ -512,4 +505,4 @@ void Jit64::icbi(UGeckoInstruction inst)
{ {
Default(inst); Default(inst);
WriteExit(js.compilerPC + 4, 0); WriteExit(js.compilerPC + 4, 0);
} }

View File

@ -51,9 +51,8 @@ u32 GC_ALIGNED16(temp32);
void Jit64::lfs(UGeckoInstruction inst) void Jit64::lfs(UGeckoInstruction inst)
{ {
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStoreFloating)
INSTRUCTION_START;
int d = inst.RD; int d = inst.RD;
int a = inst.RA; int a = inst.RA;
@ -88,9 +87,8 @@ void Jit64::lfs(UGeckoInstruction inst)
void Jit64::lfd(UGeckoInstruction inst) void Jit64::lfd(UGeckoInstruction inst)
{ {
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStoreFloating)
INSTRUCTION_START;
int d = inst.RD; int d = inst.RD;
int a = inst.RA; int a = inst.RA;
@ -155,10 +153,8 @@ void Jit64::lfd(UGeckoInstruction inst)
void Jit64::stfd(UGeckoInstruction inst) void Jit64::stfd(UGeckoInstruction inst)
{ {
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStoreFloating)
INSTRUCTION_START;
int s = inst.RS; int s = inst.RS;
int a = inst.RA; int a = inst.RA;
@ -234,9 +230,8 @@ void Jit64::stfd(UGeckoInstruction inst)
void Jit64::stfs(UGeckoInstruction inst) void Jit64::stfs(UGeckoInstruction inst)
{ {
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStoreFloating)
INSTRUCTION_START;
bool update = inst.OPCD & 1; bool update = inst.OPCD & 1;
int s = inst.RS; int s = inst.RS;
@ -291,9 +286,8 @@ void Jit64::stfs(UGeckoInstruction inst)
void Jit64::stfsx(UGeckoInstruction inst) void Jit64::stfsx(UGeckoInstruction inst)
{ {
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStoreFloating)
INSTRUCTION_START;
// We can take a shortcut here - it's not likely that a hardware access would use this instruction. // We can take a shortcut here - it's not likely that a hardware access would use this instruction.
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
@ -311,9 +305,8 @@ void Jit64::stfsx(UGeckoInstruction inst)
void Jit64::lfsx(UGeckoInstruction inst) void Jit64::lfsx(UGeckoInstruction inst)
{ {
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStoreFloating)
INSTRUCTION_START;
fpr.Lock(inst.RS); fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true); fpr.LoadToX64(inst.RS, false, true);

View File

@ -91,9 +91,8 @@ const double GC_ALIGNED16(m_dequantizeTableD[]) =
// We will have to break block after quantizers are written to. // We will have to break block after quantizers are written to.
void Jit64::psq_st(UGeckoInstruction inst) void Jit64::psq_st(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStorePaired)
INSTRUCTION_START;
js.block_flags |= BLOCK_USE_GQR0 << inst.I; js.block_flags |= BLOCK_USE_GQR0 << inst.I;
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
@ -296,9 +295,8 @@ void Jit64::psq_st(UGeckoInstruction inst)
void Jit64::psq_l(UGeckoInstruction inst) void Jit64::psq_l(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(LoadStorePaired)
INSTRUCTION_START;
js.block_flags |= BLOCK_USE_GQR0 << inst.I; js.block_flags |= BLOCK_USE_GQR0 << inst.I;

View File

@ -34,370 +34,361 @@
// cmppd, andpd, andnpd, or // cmppd, andpd, andnpd, or
// lfsx, ps_merge01 etc // lfsx, ps_merge01 etc
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0}; const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
void Jit64::ps_mr(UGeckoInstruction inst) void Jit64::ps_mr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
if (d == b)
return;
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
}
void Jit64::ps_sel(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
Default(inst); return;
if (inst.Rc) {
Default(inst); return;
}
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.FlushLockX(XMM7);
fpr.FlushLockX(XMM6);
fpr.Lock(a, b, c, d);
fpr.LoadToX64(a, true, false);
fpr.LoadToX64(d, false, true);
// BLENDPD would have been nice...
MOVAPD(XMM7, fpr.R(a));
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
MOVAPD(XMM6, R(XMM7));
ANDPD(XMM7, fpr.R(d));
ANDNPD(XMM6, fpr.R(c));
MOVAPD(fpr.RX(d), R(XMM7));
ORPD(fpr.RX(d), R(XMM6));
fpr.UnlockAll();
fpr.UnlockAllX();
}
void Jit64::ps_sign(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.Lock(d, b);
if (d != b)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
if (d == b)
return;
fpr.LoadToX64(d, false); fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b)); MOVAPD(fpr.RX(d), fpr.R(b));
} }
else
void Jit64::ps_sel(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) fpr.LoadToX64(d, true);
{Default(inst); return;} // turn off from debugger }
INSTRUCTION_START;
switch (inst.SUBOP10)
{
case 40: //neg
XORPD(fpr.RX(d), M((void*)&psSignBits));
break;
case 136: //nabs
ORPD(fpr.RX(d), M((void*)&psSignBits));
break;
case 264: //abs
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
break;
}
fpr.UnlockAll();
}
void Jit64::ps_rsqrte(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.Lock(d, b);
SQRTPD(XMM0, fpr.R(b));
MOVAPD(XMM1, M((void*)&psOneOne));
DIVPD(XMM1, R(XMM0));
MOVAPD(fpr.R(d), XMM1);
fpr.UnlockAll();
}
//add a, b, c
//mov a, b
//add a, c
//we need:
/*
psq_l
psq_stu
*/
/*
add a,b,a
*/
//There's still a little bit more optimization that can be squeezed out of this
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
{
fpr.Lock(d, a, b);
if (d == a)
{
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (d == b && reversible)
{
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(a));
}
else if (a != d && b != d)
{
//sources different from d, can use rather quick solution
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (b != d)
{
fpr.LoadToX64(d, false);
MOVAPD(XMM0, fpr.R(b));
MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
else //Other combo, must use two temps :(
{
MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
fpr.LoadToX64(d, false);
(this->*op)(XMM0, Gen::R(XMM1));
MOVAPD(fpr.RX(d), Gen::R(XMM0));
}
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
void Jit64::ps_arith(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
switch (inst.SUBOP5)
{
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
case 23://sel
Default(inst); Default(inst);
return; break;
case 24://res
if (inst.Rc) { Default(inst);
Default(inst); return; break;
} case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro. default:
int d = inst.FD; _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.FlushLockX(XMM7);
fpr.FlushLockX(XMM6);
fpr.Lock(a, b, c, d);
fpr.LoadToX64(a, true, false);
fpr.LoadToX64(d, false, true);
// BLENDPD would have been nice...
MOVAPD(XMM7, fpr.R(a));
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
MOVAPD(XMM6, R(XMM7));
ANDPD(XMM7, fpr.R(d));
ANDNPD(XMM6, fpr.R(c));
MOVAPD(fpr.RX(d), R(XMM7));
ORPD(fpr.RX(d), R(XMM6));
fpr.UnlockAll();
fpr.UnlockAllX();
} }
}
void Jit64::ps_sign(UGeckoInstruction inst) void Jit64::ps_sum(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(Paired)
INSTRUCTION_START; if (inst.Rc) {
if (inst.Rc) { Default(inst); return;
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.Lock(d, b);
if (d != b)
{
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(b));
}
else
{
fpr.LoadToX64(d, true);
}
switch (inst.SUBOP10)
{
case 40: //neg
XORPD(fpr.RX(d), M((void*)&psSignBits));
break;
case 136: //nabs
ORPD(fpr.RX(d), M((void*)&psSignBits));
break;
case 264: //abs
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
break;
}
fpr.UnlockAll();
} }
int d = inst.FD;
void Jit64::ps_rsqrte(UGeckoInstruction inst) int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.Lock(a,b,c,d);
fpr.LoadToX64(d, d == a || d == b || d == c, true);
switch (inst.SUBOP5)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) case 10:
{Default(inst); return;} // turn off from debugger // Do the sum in upper subregisters, merge uppers
INSTRUCTION_START; MOVDDUP(XMM0, fpr.R(a));
if (inst.Rc) { MOVAPD(XMM1, fpr.R(b));
Default(inst); return; ADDPD(XMM0, R(XMM1));
} UNPCKHPD(XMM0, fpr.R(c)); //merge
int d = inst.FD; MOVAPD(fpr.R(d), XMM0);
int b = inst.FB; break;
fpr.Lock(d, b); case 11:
SQRTPD(XMM0, fpr.R(b)); // Do the sum in lower subregisters, merge lowers
MOVAPD(XMM1, M((void*)&psOneOne)); MOVAPD(XMM0, fpr.R(a));
DIVPD(XMM1, R(XMM0)); MOVAPD(XMM1, fpr.R(b));
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
ADDPD(XMM0, R(XMM1)); // sum lowers
MOVAPD(XMM1, fpr.R(c));
UNPCKLPD(XMM1, R(XMM0)); // merge
MOVAPD(fpr.R(d), XMM1); MOVAPD(fpr.R(d), XMM1);
fpr.UnlockAll(); break;
default:
PanicAlert("ps_sum WTF!!!");
} }
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
//add a, b, c
//mov a, b
//add a, c
//we need:
/*
psq_l
psq_stu
*/
/*
add a,b,a
*/
//There's still a little bit more optimization that can be squeezed out of this void Jit64::ps_muls(UGeckoInstruction inst)
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg)) {
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int a = inst.FA;
int c = inst.FC;
fpr.Lock(a, c, d);
fpr.LoadToX64(d, d == a || d == c, true);
switch (inst.SUBOP5)
{ {
fpr.Lock(d, a, b); case 12:
// Single multiply scalar high
if (d == a) // TODO - faster version for when regs are different
{
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (d == b && reversible)
{
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(a));
}
else if (a != d && b != d)
{
//sources different from d, can use rather quick solution
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (b != d)
{
fpr.LoadToX64(d, false);
MOVAPD(XMM0, fpr.R(b));
MOVAPD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
else //Other combo, must use two temps :(
{
MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
fpr.LoadToX64(d, false);
(this->*op)(XMM0, Gen::R(XMM1));
MOVAPD(fpr.RX(d), Gen::R(XMM0));
}
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
void Jit64::ps_arith(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
switch (inst.SUBOP5)
{
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
case 23://sel
Default(inst);
break;
case 24://res
Default(inst);
break;
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
default:
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
}
}
void Jit64::ps_sum(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
fpr.Lock(a,b,c,d);
fpr.LoadToX64(d, d == a || d == b || d == c, true);
switch (inst.SUBOP5)
{
case 10:
// Do the sum in upper subregisters, merge uppers
MOVDDUP(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
ADDPD(XMM0, R(XMM1));
UNPCKHPD(XMM0, fpr.R(c)); //merge
MOVAPD(fpr.R(d), XMM0);
break;
case 11:
// Do the sum in lower subregisters, merge lowers
MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(b));
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
ADDPD(XMM0, R(XMM1)); // sum lowers
MOVAPD(XMM1, fpr.R(c));
UNPCKLPD(XMM1, R(XMM0)); // merge
MOVAPD(fpr.R(d), XMM1);
break;
default:
PanicAlert("ps_sum WTF!!!");
}
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
void Jit64::ps_muls(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int a = inst.FA;
int c = inst.FC;
fpr.Lock(a, c, d);
fpr.LoadToX64(d, d == a || d == c, true);
switch (inst.SUBOP5)
{
case 12:
// Single multiply scalar high
// TODO - faster version for when regs are different
MOVAPD(XMM0, fpr.R(a));
MOVDDUP(XMM1, fpr.R(c));
MULPD(XMM0, R(XMM1));
MOVAPD(fpr.R(d), XMM0);
break;
case 13:
// TODO - faster version for when regs are different
MOVAPD(XMM0, fpr.R(a));
MOVAPD(XMM1, fpr.R(c));
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
MULPD(XMM0, R(XMM1));
MOVAPD(fpr.R(d), XMM0);
break;
default:
PanicAlert("ps_muls WTF!!!");
}
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
//TODO: find easy cases and optimize them, do a breakout like ps_arith
void Jit64::ps_mergeXX(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
fpr.Lock(a,b,d);
MOVAPD(XMM0, fpr.R(a)); MOVAPD(XMM0, fpr.R(a));
switch (inst.SUBOP10) MOVDDUP(XMM1, fpr.R(c));
{ MULPD(XMM0, R(XMM1));
case 528: MOVAPD(fpr.R(d), XMM0);
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf break;
break; //00 case 13:
case 560: // TODO - faster version for when regs are different
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
break; //01
case 592:
SHUFPD(XMM0, fpr.R(b), 1);
break; //10
case 624:
UNPCKHPD(XMM0, fpr.R(b));
break; //11
default:
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
}
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0));
fpr.UnlockAll();
}
//TODO: add optimized cases
void Jit64::ps_maddXX(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
int d = inst.FD;
fpr.Lock(a,b,c,d);
MOVAPD(XMM0, fpr.R(a)); MOVAPD(XMM0, fpr.R(a));
switch (inst.SUBOP5) MOVAPD(XMM1, fpr.R(c));
{ SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
case 14: //madds0 MULPD(XMM0, R(XMM1));
MOVDDUP(XMM1, fpr.R(c)); MOVAPD(fpr.R(d), XMM0);
MULPD(XMM0, R(XMM1)); break;
ADDPD(XMM0, fpr.R(b)); default:
break; PanicAlert("ps_muls WTF!!!");
case 15: //madds1
MOVAPD(XMM1, fpr.R(c));
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
MULPD(XMM0, R(XMM1));
ADDPD(XMM0, fpr.R(b));
break;
case 28: //msub
MULPD(XMM0, fpr.R(c));
SUBPD(XMM0, fpr.R(b));
break;
case 29: //madd
MULPD(XMM0, fpr.R(c));
ADDPD(XMM0, fpr.R(b));
break;
case 30: //nmsub
MULPD(XMM0, fpr.R(c));
SUBPD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits));
break;
case 31: //nmadd
MULPD(XMM0, fpr.R(c));
ADDPD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits));
break;
default:
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
//Default(inst);
//fpr.UnlockAll();
return;
}
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0));
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
} }
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}
//TODO: find easy cases and optimize them, do a breakout like ps_arith
void Jit64::ps_mergeXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int a = inst.FA;
int b = inst.FB;
fpr.Lock(a,b,d);
MOVAPD(XMM0, fpr.R(a));
switch (inst.SUBOP10)
{
case 528:
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf
break; //00
case 560:
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
break; //01
case 592:
SHUFPD(XMM0, fpr.R(b), 1);
break; //10
case 624:
UNPCKHPD(XMM0, fpr.R(b));
break; //11
default:
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
}
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0));
fpr.UnlockAll();
}
//TODO: add optimized cases
void Jit64::ps_maddXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Paired)
if (inst.Rc) {
Default(inst); return;
}
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
int d = inst.FD;
fpr.Lock(a,b,c,d);
MOVAPD(XMM0, fpr.R(a));
switch (inst.SUBOP5)
{
case 14: //madds0
MOVDDUP(XMM1, fpr.R(c));
MULPD(XMM0, R(XMM1));
ADDPD(XMM0, fpr.R(b));
break;
case 15: //madds1
MOVAPD(XMM1, fpr.R(c));
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
MULPD(XMM0, R(XMM1));
ADDPD(XMM0, fpr.R(b));
break;
case 28: //msub
MULPD(XMM0, fpr.R(c));
SUBPD(XMM0, fpr.R(b));
break;
case 29: //madd
MULPD(XMM0, fpr.R(c));
ADDPD(XMM0, fpr.R(b));
break;
case 30: //nmsub
MULPD(XMM0, fpr.R(c));
SUBPD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits));
break;
case 31: //nmadd
MULPD(XMM0, fpr.R(c));
ADDPD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits));
break;
default:
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
//Default(inst);
//fpr.UnlockAll();
return;
}
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), Gen::R(XMM0));
ForceSinglePrecisionP(fpr.RX(d));
fpr.UnlockAll();
}

View File

@ -29,172 +29,165 @@
#include "Jit.h" #include "Jit.h"
#include "JitRegCache.h" #include "JitRegCache.h"
void Jit64::mtspr(UGeckoInstruction inst) void Jit64::mtspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(SystemRegisters)
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
int d = inst.RD;
switch (iIndex)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) case SPR_LR:
{Default(inst); return;} // turn off from debugger case SPR_CTR:
INSTRUCTION_START; case SPR_XER:
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); // These are safe to do the easy way, see the bottom of this function.
int d = inst.RD; break;
switch (iIndex) case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
js.blockSetsQuantizers = true;
// Prevent recompiler from compiling in old quantizer values.
// If the value changed, destroy all blocks using this quantizer
// This will create a little bit of block churn, but hopefully not too bad.
{ {
case SPR_LR: /*
case SPR_CTR:
case SPR_XER:
// These are safe to do the easy way, see the bottom of this function.
break;
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
js.blockSetsQuantizers = true;
// Prevent recompiler from compiling in old quantizer values.
// If the value changed, destroy all blocks using this quantizer
// This will create a little bit of block churn, but hopefully not too bad.
{
/*
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value
CMP(32, R(EAX), gpr.R(inst.RD)); CMP(32, R(EAX), gpr.R(inst.RD));
FixupBranch skip_destroy = J_CC(CC_E, false); FixupBranch skip_destroy = J_CC(CC_E, false);
int gqr = iIndex - SPR_GQR0; int gqr = iIndex - SPR_GQR0;
ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr); ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr);
SetJumpTarget(skip_destroy);*/ SetJumpTarget(skip_destroy);*/
} }
break; break;
// TODO - break block if quantizers are written to. // TODO - break block if quantizers are written to.
default: default:
Default(inst); Default(inst);
return; return;
} }
// OK, this is easy. // OK, this is easy.
gpr.Lock(d);
gpr.LoadToX64(d, true);
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d));
gpr.UnlockAll();
}
void Jit64::mfspr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(SystemRegisters)
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
int d = inst.RD;
switch (iIndex)
{
case SPR_WPAR:
Default(inst);
return;
// case SPR_DEC:
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
//break;
case SPR_TL:
case SPR_TU:
//CALL((void Jit64::*)&CoreTiming::Advance);
// fall through
default:
gpr.Lock(d); gpr.Lock(d);
gpr.LoadToX64(d, true); gpr.LoadToX64(d, false);
MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
gpr.UnlockAll(); gpr.UnlockAll();
break;
} }
}
void Jit64::mfspr(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
int d = inst.RD;
switch (iIndex)
{
case SPR_WPAR:
Default(inst);
return;
// case SPR_DEC:
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
//break;
case SPR_TL:
case SPR_TU:
//CALL((void Jit64::*)&CoreTiming::Advance);
// fall through
default:
gpr.Lock(d);
gpr.LoadToX64(d, false);
MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex]));
gpr.UnlockAll();
break;
}
}
// ======================================================================================= // =======================================================================================
// Don't interpret this, if we do we get thrown out // Don't interpret this, if we do we get thrown out
// -------------- // --------------
void Jit64::mtmsr(UGeckoInstruction inst) void Jit64::mtmsr(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(SystemRegisters)
INSTRUCTION_START; gpr.LoadToX64(inst.RS, true, false);
gpr.LoadToX64(inst.RS, true, false); MOV(32, M(&MSR), gpr.R(inst.RS));
MOV(32, M(&MSR), gpr.R(inst.RS)); gpr.Flush(FLUSH_ALL);
gpr.Flush(FLUSH_ALL); fpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL); WriteExit(js.compilerPC + 4, 0);
WriteExit(js.compilerPC + 4, 0); }
} // ==============
// ==============
void Jit64::mfmsr(UGeckoInstruction inst) void Jit64::mfmsr(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(SystemRegisters)
INSTRUCTION_START; //Privileged?
//Privileged? gpr.LoadToX64(inst.RD, false);
gpr.LoadToX64(inst.RD, false); MOV(32, gpr.R(inst.RD), M(&MSR));
MOV(32, gpr.R(inst.RD), M(&MSR)); }
}
void Jit64::mftb(UGeckoInstruction inst) void Jit64::mftb(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(SystemRegisters)
INSTRUCTION_START; mfspr(inst);
mfspr(inst); }
}
void Jit64::mfcr(UGeckoInstruction inst) void Jit64::mfcr(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(SystemRegisters)
INSTRUCTION_START; // USES_CR
// USES_CR int d = inst.RD;
int d = inst.RD; gpr.LoadToX64(d, false, true);
gpr.LoadToX64(d, false, true); MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); SHL(32, R(EAX), Imm8(4));
for (int i = 1; i < 7; i++) {
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
SHL(32, R(EAX), Imm8(4)); SHL(32, R(EAX), Imm8(4));
for (int i = 1; i < 7; i++) {
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
SHL(32, R(EAX), Imm8(4));
}
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
MOV(32, gpr.R(d), R(EAX));
} }
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
MOV(32, gpr.R(d), R(EAX));
}
void Jit64::mtcrf(UGeckoInstruction inst) void Jit64::mtcrf(UGeckoInstruction inst)
{ {
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) INSTRUCTION_START
{Default(inst); return;} // turn off from debugger JITDISABLE(SystemRegisters)
INSTRUCTION_START;
// USES_CR // USES_CR
u32 mask = 0; u32 mask = 0;
u32 crm = inst.CRM; u32 crm = inst.CRM;
if (crm == 0xFF) { if (crm == 0xFF) {
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
MOV(32, R(EAX), gpr.R(inst.RS)); MOV(32, R(EAX), gpr.R(inst.RS));
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
MOV(32, R(ECX), R(EAX)); MOV(32, R(ECX), R(EAX));
SHR(32, R(ECX), Imm8(28 - (i * 4))); SHR(32, R(ECX), Imm8(28 - (i * 4)));
AND(32, R(ECX), Imm32(0xF)); AND(32, R(ECX), Imm32(0xF));
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
}
gpr.UnlockAllX();
} else {
Default(inst);
return;
// TODO: translate this to work in new CR model.
for (int i = 0; i < 8; i++) {
if (crm & (1 << i))
mask |= 0xF << (i*4);
}
MOV(32, R(EAX), gpr.R(inst.RS));
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
AND(32, R(EAX), Imm32(mask));
AND(32, R(ECX), Imm32(~mask));
OR(32, R(EAX), R(ECX));
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
} }
gpr.UnlockAllX();
} else {
Default(inst);
return;
// TODO: translate this to work in new CR model.
for (int i = 0; i < 8; i++) {
if (crm & (1 << i))
mask |= 0xF << (i*4);
}
MOV(32, R(EAX), gpr.R(inst.RS));
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
AND(32, R(EAX), Imm32(mask));
AND(32, R(ECX), Imm32(~mask));
OR(32, R(EAX), R(ECX));
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
} }
}

View File

@ -31,12 +31,12 @@
//#define INSTRUCTION_START Default(inst); return; //#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START #define INSTRUCTION_START
void Jit64::mtspr(UGeckoInstruction inst) void Jit64::mtspr(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch(iIndex) { switch(iIndex) {
case SPR_LR: case SPR_LR:
ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD)); ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD));
return; return;
@ -60,121 +60,121 @@
default: default:
Default(inst); Default(inst);
return; return;
}
} }
}
void Jit64::mfspr(UGeckoInstruction inst) void Jit64::mfspr(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
switch (iIndex) switch (iIndex)
{
case SPR_LR:
ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
return;
case SPR_CTR:
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
return;
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD);
return;
default:
Default(inst);
return;
}
}
// =======================================================================================
// Don't interpret this, if we do we get thrown out
// --------------
void Jit64::mtmsr(UGeckoInstruction inst)
{ {
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS)); case SPR_LR:
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
return;
case SPR_CTR:
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
return;
case SPR_GQR0:
case SPR_GQR0 + 1:
case SPR_GQR0 + 2:
case SPR_GQR0 + 3:
case SPR_GQR0 + 4:
case SPR_GQR0 + 5:
case SPR_GQR0 + 6:
case SPR_GQR0 + 7:
ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD);
return;
default:
Default(inst);
return;
} }
// ============== }
void Jit64::mfmsr(UGeckoInstruction inst) // =======================================================================================
{ // Don't interpret this, if we do we get thrown out
INSTRUCTION_START // --------------
void Jit64::mtmsr(UGeckoInstruction inst)
{
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS));
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
}
// ==============
void Jit64::mfmsr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD); ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
} }
void Jit64::mftb(UGeckoInstruction inst) void Jit64::mftb(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
JITDISABLE(SystemRegisters) JITDISABLE(SystemRegisters)
mfspr(inst); mfspr(inst);
} }
void Jit64::mfcr(UGeckoInstruction inst) void Jit64::mfcr(UGeckoInstruction inst)
{ {
Default(inst); return; Default(inst); return;
#if 0 #if 0
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
{Default(inst); return;} // turn off from debugger {Default(inst); return;} // turn off from debugger
INSTRUCTION_START; INSTRUCTION_START;
// USES_CR // USES_CR
int d = inst.RD; int d = inst.RD;
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
SHL(32, R(EAX), Imm8(4));
for (int i = 1; i < 7; i++) {
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
SHL(32, R(EAX), Imm8(4)); SHL(32, R(EAX), Imm8(4));
for (int i = 1; i < 7; i++) {
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
SHL(32, R(EAX), Imm8(4));
}
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
MOV(32, gpr.R(d), R(EAX));
#endif
} }
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
MOV(32, gpr.R(d), R(EAX));
#endif
}
void Jit64::mtcrf(UGeckoInstruction inst) void Jit64::mtcrf(UGeckoInstruction inst)
{ {
Default(inst); return; Default(inst); return;
#if 0 #if 0
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
{Default(inst); return;} // turn off from debugger {Default(inst); return;} // turn off from debugger
INSTRUCTION_START; INSTRUCTION_START;
// USES_CR // USES_CR
u32 mask = 0; u32 mask = 0;
u32 crm = inst.CRM; u32 crm = inst.CRM;
if (crm == 0xFF) { if (crm == 0xFF) {
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
MOV(32, R(EAX), gpr.R(inst.RS)); MOV(32, R(EAX), gpr.R(inst.RS));
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++) {
MOV(32, R(ECX), R(EAX)); MOV(32, R(ECX), R(EAX));
SHR(32, R(ECX), Imm8(28 - (i * 4))); SHR(32, R(ECX), Imm8(28 - (i * 4)));
AND(32, R(ECX), Imm32(0xF)); AND(32, R(ECX), Imm32(0xF));
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
}
gpr.UnlockAllX();
} else {
Default(inst);
return;
// TODO: translate this to work in new CR model.
for (int i = 0; i < 8; i++) {
if (crm & (1 << i))
mask |= 0xF << (i*4);
}
MOV(32, R(EAX), gpr.R(inst.RS));
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
AND(32, R(EAX), Imm32(mask));
AND(32, R(ECX), Imm32(~mask));
OR(32, R(EAX), R(ECX));
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
} }
#endif gpr.UnlockAllX();
} else {
Default(inst);
return;
// TODO: translate this to work in new CR model.
for (int i = 0; i < 8; i++) {
if (crm & (1 << i))
mask |= 0xF << (i*4);
}
MOV(32, R(EAX), gpr.R(inst.RS));
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
AND(32, R(EAX), Imm32(mask));
AND(32, R(ECX), Imm32(~mask));
OR(32, R(EAX), R(ECX));
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
} }
#endif
}