Core: Clean up body/brace placements in Jit64 and JitCommon

This commit is contained in:
Lioncash 2014-08-20 10:50:40 -04:00
parent a09cf1d8f1
commit e7f49692e8
16 changed files with 397 additions and 167 deletions

View File

@ -162,8 +162,10 @@ void Jit64::Init()
jo.enableBlocklink = false; jo.enableBlocklink = false;
} }
else else
{
jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU; jo.enableBlocklink = !Core::g_CoreStartupParameter.bMMU;
} }
}
jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF; jo.fpAccurateFcmp = Core::g_CoreStartupParameter.bEnableFPRF;
jo.optimizeGatherPipe = true; jo.optimizeGatherPipe = true;
jo.fastInterrupts = false; jo.fastInterrupts = false;
@ -435,7 +437,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
// Conditionally add profiling code. // Conditionally add profiling code.
if (Profiler::g_ProfileBlocks) { if (Profiler::g_ProfileBlocks)
{
ADD(32, M(&b->runCount), Imm8(1)); ADD(32, M(&b->runCount), Imm8(1));
#ifdef _WIN32 #ifdef _WIN32
b->ticCounter = 0; b->ticCounter = 0;
@ -617,7 +620,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
//NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str()); //NOTICE_LOG(DYNA_REC, "Unflushed register: %s", ppc_inst.c_str());
} }
#endif #endif
if (js.skipnext) { if (js.skipnext)
{
js.skipnext = false; js.skipnext = false;
i++; // Skip next instruction i++; // Skip next instruction
} }

View File

@ -68,18 +68,22 @@ public:
void ClearCache() override; void ClearCache() override;
const u8 *GetDispatcher() { const u8 *GetDispatcher()
{
return asm_routines.dispatcher; return asm_routines.dispatcher;
} }
const CommonAsmRoutines *GetAsmRoutines() override {
const CommonAsmRoutines *GetAsmRoutines() override
{
return &asm_routines; return &asm_routines;
} }
const char *GetName() override { const char *GetName() override
{
return "JIT64"; return "JIT64";
} }
// Run!
// Run!
void Run() override; void Run() override;
void SingleStep() override; void SingleStep() override;

View File

@ -27,13 +27,15 @@ private:
void GenerateCommon(); void GenerateCommon();
public: public:
void Init() { void Init()
{
AllocCodeSpace(8192); AllocCodeSpace(8192);
Generate(); Generate();
WriteProtect(); WriteProtect();
} }
void Shutdown() { void Shutdown()
{
FreeCodeSpace(); FreeCodeSpace();
} }
}; };

View File

@ -52,21 +52,35 @@ void RegCache::Start()
void RegCache::Lock(int p1, int p2, int p3, int p4) void RegCache::Lock(int p1, int p2, int p3, int p4)
{ {
regs[p1].locked = true; regs[p1].locked = true;
if (p2 != 0xFF) regs[p2].locked = true;
if (p3 != 0xFF) regs[p3].locked = true; if (p2 != 0xFF)
if (p4 != 0xFF) regs[p4].locked = true; regs[p2].locked = true;
if (p3 != 0xFF)
regs[p3].locked = true;
if (p4 != 0xFF)
regs[p4].locked = true;
} }
// these are x64 reg indices // these are x64 reg indices
void RegCache::LockX(int x1, int x2, int x3, int x4) void RegCache::LockX(int x1, int x2, int x3, int x4)
{ {
if (xregs[x1].locked) { if (xregs[x1].locked)
{
PanicAlert("RegCache: x %i already locked!", x1); PanicAlert("RegCache: x %i already locked!", x1);
} }
xregs[x1].locked = true; xregs[x1].locked = true;
if (x2 != 0xFF) xregs[x2].locked = true;
if (x3 != 0xFF) xregs[x3].locked = true; if (x2 != 0xFF)
if (x4 != 0xFF) xregs[x4].locked = true; xregs[x2].locked = true;
if (x3 != 0xFF)
xregs[x3].locked = true;
if (x4 != 0xFF)
xregs[x4].locked = true;
} }
void RegCache::UnlockAll() void RegCache::UnlockAll()
@ -321,6 +335,7 @@ void RegCache::Flush(FlushMode mode)
{ {
PanicAlert("Someone forgot to unlock PPC reg %" PRIx64 " (X64 reg %i).", i, RX(i)); PanicAlert("Someone forgot to unlock PPC reg %" PRIx64 " (X64 reg %i).", i, RX(i));
} }
if (regs[i].away) if (regs[i].away)
{ {
if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm()) if (regs[i].location.IsSimpleReg() || regs[i].location.IsImm())

View File

@ -47,23 +47,34 @@ protected:
public: public:
RegCache(); RegCache();
virtual ~RegCache() {} virtual ~RegCache() {}
void Start(); void Start();
void DiscardRegContentsIfCached(size_t preg); void DiscardRegContentsIfCached(size_t preg);
void SetEmitter(Gen::XEmitter *emitter) {emit = emitter;} void SetEmitter(Gen::XEmitter *emitter)
{
emit = emitter;
}
void FlushR(Gen::X64Reg reg); void FlushR(Gen::X64Reg reg);
void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2) {FlushR(reg); FlushR(reg2);} void FlushR(Gen::X64Reg reg, Gen::X64Reg reg2)
void FlushLockX(Gen::X64Reg reg) { {
FlushR(reg);
FlushR(reg2);
}
void FlushLockX(Gen::X64Reg reg)
{
FlushR(reg); FlushR(reg);
LockX(reg); LockX(reg);
} }
void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2) { void FlushLockX(Gen::X64Reg reg1, Gen::X64Reg reg2)
{
FlushR(reg1); FlushR(reg2); FlushR(reg1); FlushR(reg2);
LockX(reg1); LockX(reg2); LockX(reg1); LockX(reg2);
} }
void Flush(FlushMode mode = FLUSH_ALL); void Flush(FlushMode mode = FLUSH_ALL);
void Flush(PPCAnalyst::CodeOp *op) {Flush();} void Flush(PPCAnalyst::CodeOp *op) {Flush();}
int SanityCheck() const; int SanityCheck() const;
@ -76,7 +87,11 @@ public:
virtual void StoreRegister(size_t preg, Gen::OpArg newLoc) = 0; virtual void StoreRegister(size_t preg, Gen::OpArg newLoc) = 0;
virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0; virtual void LoadRegister(size_t preg, Gen::X64Reg newLoc) = 0;
const Gen::OpArg &R(size_t preg) const {return regs[preg].location;} const Gen::OpArg &R(size_t preg) const
{
return regs[preg].location;
}
Gen::X64Reg RX(size_t preg) const Gen::X64Reg RX(size_t preg) const
{ {
if (IsBound(preg)) if (IsBound(preg))

View File

@ -67,7 +67,8 @@ void Jit64::bx(UGeckoInstruction inst)
// If this is not the last instruction of a block, // If this is not the last instruction of a block,
// we will skip the rest process. // we will skip the rest process.
// Because PPCAnalyst::Flatten() merged the blocks. // Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction) { if (!js.isLastInstruction)
{
return; return;
} }

View File

@ -136,10 +136,13 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
fpr.BindToRegister(d, false); fpr.BindToRegister(d, false);
//YES it is necessary to dupe the result :( //YES it is necessary to dupe the result :(
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't. //TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
if (single_precision) { if (single_precision)
{
ForceSinglePrecisionS(XMM0); ForceSinglePrecisionS(XMM0);
MOVDDUP(fpr.RX(d), R(XMM0)); MOVDDUP(fpr.RX(d), R(XMM0));
} else { }
else
{
MOVSD(fpr.RX(d), R(XMM0)); MOVSD(fpr.RX(d), R(XMM0));
} }
// SMB checks flags after this op. Let's lie. // SMB checks flags after this op. Let's lie.
@ -159,7 +162,8 @@ void Jit64::fsign(UGeckoInstruction inst)
fpr.Lock(b, d); fpr.Lock(b, d);
fpr.BindToRegister(d, true, true); fpr.BindToRegister(d, true, true);
MOVSD(XMM0, fpr.R(b)); MOVSD(XMM0, fpr.R(b));
switch (inst.SUBOP10) { switch (inst.SUBOP10)
{
case 40: // fnegx case 40: // fnegx
PXOR(XMM0, M((void*)&psSignBits2)); PXOR(XMM0, M((void*)&psSignBits2));
break; break;

View File

@ -137,10 +137,26 @@ void Jit64::ComputeRC(const Gen::OpArg & arg)
} }
} }
static u32 Add(u32 a, u32 b) {return a + b;} // Following static functions are used in conjunction with regimmop
static u32 Or (u32 a, u32 b) {return a | b;} static u32 Add(u32 a, u32 b)
static u32 And(u32 a, u32 b) {return a & b;} {
static u32 Xor(u32 a, u32 b) {return a ^ b;} return a + b;
}
static u32 Or(u32 a, u32 b)
{
return a | b;
}
static u32 And(u32 a, u32 b)
{
return a & b;
}
static u32 Xor(u32 a, u32 b)
{
return a ^ b;
}
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
{ {
@ -244,18 +260,36 @@ void Jit64::reg_imm(UGeckoInstruction inst)
regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD);
} }
break; break;
case 24: case 24: // ori
if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop
{NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one. {
// Make the nop visible in the generated code. not much use but interesting if we see one.
NOP();
return;
}
regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR);
break; //ori break;
case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris case 25: // oris
case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break; regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false);
case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; break;
case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori case 28: // andi
case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true);
case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic break;
case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc case 29: // andis
regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true);
break;
case 26: // xori
regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false);
break;
case 27: // xoris
regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false);
break;
case 12: // addic
regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true);
break;
case 13: // addic_rc
regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true);
break;
default: default:
FALLBACK_IF(true); FALLBACK_IF(true);
} }
@ -277,17 +311,20 @@ void Jit64::cmpXX(UGeckoInstruction inst)
((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) || ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) ||
((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) && ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) &&
(js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) &&
!(js.next_inst.BO & BO_DONT_CHECK_CONDITION)) { !(js.next_inst.BO & BO_DONT_CHECK_CONDITION))
{
// Looks like a decent conditional branch that we can merge with. // Looks like a decent conditional branch that we can merge with.
// It only test CR, not CTR. // It only test CR, not CTR.
if (test_crf == crf) { if (test_crf == crf)
{
merge_branch = true; merge_branch = true;
} }
} }
OpArg comparand; OpArg comparand;
bool signedCompare; bool signedCompare;
if (inst.OPCD == 31) { if (inst.OPCD == 31)
{
// cmp / cmpl // cmp / cmpl
gpr.Lock(a, b); gpr.Lock(a, b);
comparand = gpr.R(b); comparand = gpr.R(b);
@ -402,6 +439,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset)); MOV(64, R(RAX), Imm32((s32)gpr.R(a).offset));
else else
MOVSX(64, 32, RAX, gpr.R(a)); MOVSX(64, 32, RAX, gpr.R(a));
if (!comparand.IsImm()) if (!comparand.IsImm())
{ {
MOVSX(64, 32, ABI_PARAM1, comparand); MOVSX(64, 32, ABI_PARAM1, comparand);
@ -419,6 +457,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
MOV(32, R(ABI_PARAM1), comparand); MOV(32, R(ABI_PARAM1), comparand);
else else
MOVZX(64, 32, ABI_PARAM1, comparand); MOVZX(64, 32, ABI_PARAM1, comparand);
comparand = R(ABI_PARAM1); comparand = R(ABI_PARAM1);
} }
SUB(64, R(RAX), comparand); SUB(64, R(RAX), comparand);
@ -466,6 +505,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
{ {
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, M(&LR), Imm32(js.compilerPC + 4));
MOV(32, R(EAX), M(&CTR)); MOV(32, R(EAX), M(&CTR));
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(EAX), Imm32(0xFFFFFFFC));
WriteExitDestInEAX(); WriteExitDestInEAX();
@ -474,8 +514,10 @@ void Jit64::cmpXX(UGeckoInstruction inst)
{ {
MOV(32, R(EAX), M(&LR)); MOV(32, R(EAX), M(&LR));
AND(32, R(EAX), Imm32(0xFFFFFFFC)); AND(32, R(EAX), Imm32(0xFFFFFFFC));
if (js.next_inst.LK) if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); MOV(32, M(&LR), Imm32(js.compilerPC + 4));
WriteExitDestInEAX(); WriteExitDestInEAX();
} }
else else
@ -506,22 +548,23 @@ void Jit64::boolX(UGeckoInstruction inst)
if (gpr.R(s).IsImm() && gpr.R(b).IsImm()) if (gpr.R(s).IsImm() && gpr.R(b).IsImm())
{ {
if (inst.SUBOP10 == 28) /* andx */ if (inst.SUBOP10 == 28) // andx
gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (u32)gpr.R(b).offset); gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 476) /* nandx */ else if (inst.SUBOP10 == 476) // nandx
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset & (u32)gpr.R(b).offset)); gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset & (u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 60) /* andcx */ else if (inst.SUBOP10 == 60) // andcx
gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (~(u32)gpr.R(b).offset)); gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (~(u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 444) /* orx */ else if (inst.SUBOP10 == 444) // orx
gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (u32)gpr.R(b).offset); gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 124) /* norx */ else if (inst.SUBOP10 == 124) // norx
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset | (u32)gpr.R(b).offset)); gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset | (u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 412) /* orcx */ else if (inst.SUBOP10 == 412) // orcx
gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (~(u32)gpr.R(b).offset)); gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (~(u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 316) /* xorx */ else if (inst.SUBOP10 == 316) // xorx
gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset); gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 284) /* eqvx */ else if (inst.SUBOP10 == 284) // eqvx
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset)); gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset));
if (inst.Rc) if (inst.Rc)
{ {
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
@ -575,16 +618,16 @@ void Jit64::boolX(UGeckoInstruction inst)
OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s)); OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s));
gpr.BindToRegister(a, true, true); gpr.BindToRegister(a, true, true);
if (inst.SUBOP10 == 28) /* andx */ if (inst.SUBOP10 == 28) // andx
{ {
AND(32, gpr.R(a), operand); AND(32, gpr.R(a), operand);
} }
else if (inst.SUBOP10 == 476) /* nandx */ else if (inst.SUBOP10 == 476) // nandx
{ {
AND(32, gpr.R(a), operand); AND(32, gpr.R(a), operand);
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
} }
else if (inst.SUBOP10 == 60) /* andcx */ else if (inst.SUBOP10 == 60) // andcx
{ {
if (a == b) if (a == b)
{ {
@ -598,16 +641,16 @@ void Jit64::boolX(UGeckoInstruction inst)
AND(32, gpr.R(a), R(EAX)); AND(32, gpr.R(a), R(EAX));
} }
} }
else if (inst.SUBOP10 == 444) /* orx */ else if (inst.SUBOP10 == 444) // orx
{ {
OR(32, gpr.R(a), operand); OR(32, gpr.R(a), operand);
} }
else if (inst.SUBOP10 == 124) /* norx */ else if (inst.SUBOP10 == 124) // norx
{ {
OR(32, gpr.R(a), operand); OR(32, gpr.R(a), operand);
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
} }
else if (inst.SUBOP10 == 412) /* orcx */ else if (inst.SUBOP10 == 412) // orcx
{ {
if (a == b) if (a == b)
{ {
@ -621,11 +664,11 @@ void Jit64::boolX(UGeckoInstruction inst)
OR(32, gpr.R(a), R(EAX)); OR(32, gpr.R(a), R(EAX));
} }
} }
else if (inst.SUBOP10 == 316) /* xorx */ else if (inst.SUBOP10 == 316) // xorx
{ {
XOR(32, gpr.R(a), operand); XOR(32, gpr.R(a), operand);
} }
else if (inst.SUBOP10 == 284) /* eqvx */ else if (inst.SUBOP10 == 284) // eqvx
{ {
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
XOR(32, gpr.R(a), operand); XOR(32, gpr.R(a), operand);
@ -643,46 +686,46 @@ void Jit64::boolX(UGeckoInstruction inst)
gpr.Lock(a,s,b); gpr.Lock(a,s,b);
gpr.BindToRegister(a, false, true); gpr.BindToRegister(a, false, true);
if (inst.SUBOP10 == 28) /* andx */ if (inst.SUBOP10 == 28) // andx
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
AND(32, gpr.R(a), gpr.R(b)); AND(32, gpr.R(a), gpr.R(b));
} }
else if (inst.SUBOP10 == 476) /* nandx */ else if (inst.SUBOP10 == 476) // nandx
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
AND(32, gpr.R(a), gpr.R(b)); AND(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
} }
else if (inst.SUBOP10 == 60) /* andcx */ else if (inst.SUBOP10 == 60) // andcx
{ {
MOV(32, gpr.R(a), gpr.R(b)); MOV(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
AND(32, gpr.R(a), gpr.R(s)); AND(32, gpr.R(a), gpr.R(s));
} }
else if (inst.SUBOP10 == 444) /* orx */ else if (inst.SUBOP10 == 444) // orx
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
OR(32, gpr.R(a), gpr.R(b)); OR(32, gpr.R(a), gpr.R(b));
} }
else if (inst.SUBOP10 == 124) /* norx */ else if (inst.SUBOP10 == 124) // norx
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
OR(32, gpr.R(a), gpr.R(b)); OR(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
} }
else if (inst.SUBOP10 == 412) /* orcx */ else if (inst.SUBOP10 == 412) // orcx
{ {
MOV(32, gpr.R(a), gpr.R(b)); MOV(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
OR(32, gpr.R(a), gpr.R(s)); OR(32, gpr.R(a), gpr.R(s));
} }
else if (inst.SUBOP10 == 316) /* xorx */ else if (inst.SUBOP10 == 316) // xorx
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
XOR(32, gpr.R(a), gpr.R(b)); XOR(32, gpr.R(a), gpr.R(b));
} }
else if (inst.SUBOP10 == 284) /* eqvx */ else if (inst.SUBOP10 == 284) // eqvx
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
@ -992,13 +1035,25 @@ void Jit64::mulli(UGeckoInstruction inst)
else if ((imm & (imm - 1)) == 0) else if ((imm & (imm - 1)) == 0)
{ {
u32 shift = 0; u32 shift = 0;
if (imm & 0xFFFF0000) shift |= 16;
if (imm & 0xFF00FF00) shift |= 8; if (imm & 0xFFFF0000)
if (imm & 0xF0F0F0F0) shift |= 4; shift |= 16;
if (imm & 0xCCCCCCCC) shift |= 2;
if (imm & 0xAAAAAAAA) shift |= 1; if (imm & 0xFF00FF00)
shift |= 8;
if (imm & 0xF0F0F0F0)
shift |= 4;
if (imm & 0xCCCCCCCC)
shift |= 2;
if (imm & 0xAAAAAAAA)
shift |= 1;
if (d != a) if (d != a)
MOV(32, gpr.R(d), gpr.R(a)); MOV(32, gpr.R(d), gpr.R(a));
if (shift) if (shift)
SHL(32, gpr.R(d), Imm8(shift)); SHL(32, gpr.R(d), Imm8(shift));
} }
@ -1047,13 +1102,25 @@ void Jit64::mullwx(UGeckoInstruction inst)
else if ((imm & (imm - 1)) == 0 && !inst.OE) else if ((imm & (imm - 1)) == 0 && !inst.OE)
{ {
u32 shift = 0; u32 shift = 0;
if (imm & 0xFFFF0000) shift |= 16;
if (imm & 0xFF00FF00) shift |= 8; if (imm & 0xFFFF0000)
if (imm & 0xF0F0F0F0) shift |= 4; shift |= 16;
if (imm & 0xCCCCCCCC) shift |= 2;
if (imm & 0xAAAAAAAA) shift |= 1; if (imm & 0xFF00FF00)
shift |= 8;
if (imm & 0xF0F0F0F0)
shift |= 4;
if (imm & 0xCCCCCCCC)
shift |= 2;
if (imm & 0xAAAAAAAA)
shift |= 1;
if (d != src) if (d != src)
MOV(32, gpr.R(d), gpr.R(src)); MOV(32, gpr.R(d), gpr.R(src));
if (shift) if (shift)
SHL(32, gpr.R(d), Imm8(shift)); SHL(32, gpr.R(d), Imm8(shift));
} }
@ -1554,6 +1621,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
{ {
ROL(32, gpr.R(a), Imm8(inst.SH)); ROL(32, gpr.R(a), Imm8(inst.SH));
} }
if (!(inst.MB==0 && inst.ME==31)) if (!(inst.MB==0 && inst.ME==31))
{ {
AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME)));
@ -1604,10 +1672,12 @@ void Jit64::rlwimix(UGeckoInstruction inst)
{ {
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
} }
if (inst.SH) if (inst.SH)
{ {
ROL(32, gpr.R(a), Imm8(inst.SH)); ROL(32, gpr.R(a), Imm8(inst.SH));
} }
if (inst.Rc) if (inst.Rc)
{ {
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
@ -1637,6 +1707,7 @@ void Jit64::rlwimix(UGeckoInstruction inst)
AND(32, R(EAX), Imm32(mask)); AND(32, R(EAX), Imm32(mask));
XOR(32, gpr.R(a), R(EAX)); XOR(32, gpr.R(a), R(EAX));
} }
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
} }
@ -1700,6 +1771,7 @@ void Jit64::negx(UGeckoInstruction inst)
{ {
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
} }
if (inst.OE) if (inst.OE)
{ {
GenerateConstantOverflow(gpr.R(d).offset == 0x80000000); GenerateConstantOverflow(gpr.R(d).offset == 0x80000000);
@ -1821,7 +1893,9 @@ void Jit64::srawx(UGeckoInstruction inst)
SetJumpTarget(nocarry); SetJumpTarget(nocarry);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
if (inst.Rc) {
if (inst.Rc)
{
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
} }
} }
@ -1888,8 +1962,10 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
u32 mask = 0x80000000; u32 mask = 0x80000000;
u32 i = 0; u32 i = 0;
for (; i < 32; i++, mask >>= 1) for (; i < 32; i++, mask >>= 1)
{
if ((u32)gpr.R(s).offset & mask) if ((u32)gpr.R(s).offset & mask)
break; break;
}
gpr.SetImmediate32(a, i); gpr.SetImmediate32(a, i);
} }
else else

View File

@ -30,26 +30,26 @@ void Jit64::lXXx(UGeckoInstruction inst)
bool signExtend = false; bool signExtend = false;
switch (inst.OPCD) switch (inst.OPCD)
{ {
case 32: /* lwz */ case 32: // lwz
case 33: /* lwzu */ case 33: // lwzu
accessSize = 32; accessSize = 32;
signExtend = false; signExtend = false;
break; break;
case 34: /* lbz */ case 34: // lbz
case 35: /* lbzu */ case 35: // lbzu
accessSize = 8; accessSize = 8;
signExtend = false; signExtend = false;
break; break;
case 40: /* lhz */ case 40: // lhz
case 41: /* lhzu */ case 41: // lhzu
accessSize = 16; accessSize = 16;
signExtend = false; signExtend = false;
break; break;
case 42: /* lha */ case 42: // lha
case 43: /* lhau */ case 43: // lhau
accessSize = 16; accessSize = 16;
signExtend = true; signExtend = true;
break; break;
@ -57,25 +57,25 @@ void Jit64::lXXx(UGeckoInstruction inst)
case 31: case 31:
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 23: /* lwzx */ case 23: // lwzx
case 55: /* lwzux */ case 55: // lwzux
accessSize = 32; accessSize = 32;
signExtend = false; signExtend = false;
break; break;
case 87: /* lbzx */ case 87: // lbzx
case 119: /* lbzux */ case 119: // lbzux
accessSize = 8; accessSize = 8;
signExtend = false; signExtend = false;
break; break;
case 279: /* lhzx */ case 279: // lhzx
case 311: /* lhzux */ case 311: // lhzux
accessSize = 16; accessSize = 16;
signExtend = false; signExtend = false;
break; break;
case 343: /* lhax */ case 343: // lhax
case 375: /* lhaux */ case 375: // lhaux
accessSize = 16; accessSize = 16;
signExtend = true; signExtend = true;
break; break;
@ -259,10 +259,18 @@ void Jit64::stX(UGeckoInstruction inst)
int accessSize; int accessSize;
switch (inst.OPCD & ~1) switch (inst.OPCD & ~1)
{ {
case 36: accessSize = 32; break; //stw case 36: // stw
case 44: accessSize = 16; break; //sth accessSize = 32;
case 38: accessSize = 8; break; //stb break;
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; case 44: // sth
accessSize = 16;
break;
case 38: // stb
accessSize = 8;
break;
default:
_assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF");
return;
} }
if ((a == 0) || gpr.R(a).IsImm()) if ((a == 0) || gpr.R(a).IsImm())
@ -273,18 +281,27 @@ void Jit64::stX(UGeckoInstruction inst)
addr += offset; addr += offset;
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
{ {
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write // Helps external systems know which instruction triggered the write
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(s)); MOV(32, R(ABI_PARAM1), gpr.R(s));
if (update) if (update)
gpr.SetImmediate32(a, addr); gpr.SetImmediate32(a, addr);
switch (accessSize)
{
// No need to protect these, they don't touch any state // No need to protect these, they don't touch any state
// question - should we inline them instead? Pro: Lose a CALL Con: Code bloat // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat
case 8: CALL((void *)asm_routines.fifoDirectWrite8); break; switch (accessSize)
case 16: CALL((void *)asm_routines.fifoDirectWrite16); break; {
case 32: CALL((void *)asm_routines.fifoDirectWrite32); break; case 8:
CALL((void *)asm_routines.fifoDirectWrite8);
break;
case 16:
CALL((void *)asm_routines.fifoDirectWrite16);
break;
case 32:
CALL((void *)asm_routines.fifoDirectWrite32);
break;
} }
js.fifoBytesThisBlock += accessSize >> 3; js.fifoBytesThisBlock += accessSize >> 3;
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -300,14 +317,22 @@ void Jit64::stX(UGeckoInstruction inst)
} }
else else
{ {
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write // Helps external systems know which instruction triggered the write
MOV(32, M(&PC), Imm32(jit->js.compilerPC));
u32 registersInUse = RegistersInUse(); u32 registersInUse = RegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize) switch (accessSize)
{ {
case 32: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr); break; case 32:
case 16: ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr); break; ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), gpr.R(s), addr);
case 8: ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; break;
case 16:
ABI_CallFunctionAC(true ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), gpr.R(s), addr);
break;
case 8:
ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr);
break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, false); ABI_PopRegistersAndAdjustStack(registersInUse, false);
if (update) if (update)
@ -359,17 +384,29 @@ void Jit64::stXx(UGeckoInstruction inst)
ADD(32, gpr.R(a), gpr.R(b)); ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
MEMCHECK_END MEMCHECK_END
} else { }
else
{
MOV(32, R(EDX), gpr.R(a)); MOV(32, R(EDX), gpr.R(a));
ADD(32, R(EDX), gpr.R(b)); ADD(32, R(EDX), gpr.R(b));
} }
int accessSize; int accessSize;
switch (inst.SUBOP10 & ~32) { switch (inst.SUBOP10 & ~32)
case 151: accessSize = 32; break; {
case 407: accessSize = 16; break; case 151:
case 215: accessSize = 8; break; accessSize = 32;
default: PanicAlert("stXx: invalid access size"); break;
accessSize = 0; break; case 407:
accessSize = 16;
break;
case 215:
accessSize = 8;
break;
default:
PanicAlert("stXx: invalid access size");
accessSize = 0;
break;
} }
MOV(32, R(ECX), gpr.R(s)); MOV(32, R(ECX), gpr.R(s));

View File

@ -47,12 +47,15 @@ void Jit64::psq_st(UGeckoInstruction inst)
MOVZX(32, 8, EDX, R(AL)); MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register! // FIXME: Fix ModR/M encoding to allow [EDX*4+disp32] without a base register!
if (inst.W) { if (inst.W)
{
// One value // One value
PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions. PXOR(XMM0, R(XMM0)); // TODO: See if we can get rid of this cheaply by tweaking the code in the singleStore* functions.
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
} else { }
else
{
// Pair of values // Pair of values
CVTPD2PS(XMM0, fpr.R(s)); CVTPD2PS(XMM0, fpr.R(s));
CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); CALLptr(MScaled(EDX, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));

View File

@ -156,12 +156,21 @@ void Jit64::ps_arith(UGeckoInstruction inst)
switch (inst.SUBOP5) switch (inst.SUBOP5)
{ {
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div case 18: // div
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD);
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add break;
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul case 20: // sub
tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD);
break;
case 21: // add
tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD);
break;
case 25: // mul
tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD);
break;
default: default:
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
break;
} }
} }

View File

@ -247,13 +247,16 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
RET(); RET();
/* /*
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3)
{
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
// TODO: SafeWriteFloat // TODO: SafeWriteFloat
MOVSS(M(&psTemp[0]), XMM0); MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0])); MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
} else { }
else
{
MOVSS(M(&psTemp[0]), XMM0); MOVSS(M(&psTemp[0]), XMM0);
MOV(32, R(EAX), M(&psTemp[0])); MOV(32, R(EAX), M(&psTemp[0]));
SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM); SafeWriteRegToReg(EAX, ECX, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
@ -320,10 +323,13 @@ void CommonAsmRoutines::GenQuantizedLoads()
UD2(); UD2();
const u8* loadPairedFloatTwo = AlignCode4(); const u8* loadPairedFloatTwo = AlignCode4();
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3)
{
MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); MOVQ_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
PSHUFB(XMM0, M((void *)pbswapShuffle2x4)); PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
} else { }
else
{
LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0)); LoadAndSwap(64, RCX, MComplex(RBX, RCX, 1, 0));
ROL(64, R(RCX), Imm8(32)); ROL(64, R(RCX), Imm8(32));
MOVQ_xmm(XMM0, R(RCX)); MOVQ_xmm(XMM0, R(RCX));
@ -331,11 +337,14 @@ void CommonAsmRoutines::GenQuantizedLoads()
RET(); RET();
const u8* loadPairedFloatOne = AlignCode4(); const u8* loadPairedFloatOne = AlignCode4();
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3)
{
MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0)); MOVD_xmm(XMM0, MComplex(RBX, RCX, 1, 0));
PSHUFB(XMM0, M((void *)pbswapShuffle1x4)); PSHUFB(XMM0, M((void *)pbswapShuffle1x4));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));
} else { }
else
{
LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0));
MOVD_xmm(XMM0, R(RCX)); MOVD_xmm(XMM0, R(RCX));
UNPCKLPS(XMM0, M((void*)m_one)); UNPCKLPS(XMM0, M((void*)m_one));

View File

@ -21,7 +21,8 @@ using namespace Gen;
extern u8 *trampolineCodePtr; extern u8 *trampolineCodePtr;
static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) { static void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress)
{
u64 code_addr = (u64)codePtr; u64 code_addr = (u64)codePtr;
disassembler disasm; disassembler disasm;
char disbuf[256]; char disbuf[256];
@ -61,9 +62,10 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re
if (addrReg != ABI_PARAM1) if (addrReg != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
if (info.displacement) {
if (info.displacement)
ADD(32, R(ABI_PARAM1), Imm32(info.displacement)); ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
}
ABI_PushRegistersAndAdjustStack(registersInUse, true); ABI_PushRegistersAndAdjustStack(registersInUse, true);
switch (info.operandSize) switch (info.operandSize)
{ {

View File

@ -360,11 +360,13 @@ using namespace Gen;
} }
} }
} }
void JitBlockCache::WriteLinkBlock(u8* location, const u8* address) void JitBlockCache::WriteLinkBlock(u8* location, const u8* address)
{ {
XEmitter emit(location); XEmitter emit(location);
emit.JMP(address, true); emit.JMP(address, true);
} }
void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address) void JitBlockCache::WriteDestroyBlock(const u8* location, u32 address)
{ {
XEmitter emit((u8 *)location); XEmitter emit((u8 *)location);

View File

@ -42,7 +42,8 @@ struct JitBlock
bool invalid; bool invalid;
struct LinkData { struct LinkData
{
u8 *exitPtrs; // to be able to rewrite the exit jum u8 *exitPtrs; // to be able to rewrite the exit jum
u32 exitAddress; u32 exitAddress;
bool linkStatus; // is it already linked? bool linkStatus; // is it already linked?
@ -81,18 +82,22 @@ public:
m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]); m_valid_block.reset(new u32[VALID_BLOCK_ALLOC_ELEMENTS]);
ClearAll(); ClearAll();
} }
void Set(u32 bit) void Set(u32 bit)
{ {
m_valid_block[bit / 32] |= 1u << (bit % 32); m_valid_block[bit / 32] |= 1u << (bit % 32);
} }
void Clear(u32 bit) void Clear(u32 bit)
{ {
m_valid_block[bit / 32] &= ~(1u << (bit % 32)); m_valid_block[bit / 32] &= ~(1u << (bit % 32));
} }
void ClearAll() void ClearAll()
{ {
memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS); memset(m_valid_block.get(), 0, sizeof(u32) * VALID_BLOCK_ALLOC_ELEMENTS);
} }
bool Test(u32 bit) bool Test(u32 bit)
{ {
return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0; return (m_valid_block[bit / 32] & (1u << (bit % 32))) != 0;
@ -125,7 +130,10 @@ class JitBaseBlockCache
public: public:
JitBaseBlockCache() : JitBaseBlockCache() :
blockCodePointers(nullptr), blocks(nullptr), num_blocks(0), blockCodePointers(nullptr), blocks(nullptr), num_blocks(0),
iCache(nullptr), iCacheEx(nullptr), iCacheVMEM(nullptr) {} iCache(nullptr), iCacheEx(nullptr), iCacheVMEM(nullptr)
{
}
int AllocateBlock(u32 em_address); int AllocateBlock(u32 em_address);
void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr); void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr);

View File

@ -77,7 +77,8 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
// offsets with the wrong sign, so whatever. Since the original code // offsets with the wrong sign, so whatever. Since the original code
// *could* try to wrap an address around, however, this is the correct // *could* try to wrap an address around, however, this is the correct
// place to address the issue.) // place to address the issue.)
if ((u32) offset >= 0x1000) { if ((u32) offset >= 0x1000)
{
LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset)); LEA(32, reg_value, MDisp(opAddress.GetSimpleReg(), offset));
opAddress = R(reg_value); opAddress = R(reg_value);
offset = 0; offset = 0;
@ -186,7 +187,9 @@ private:
// then mask, then sign extend if needed (1 instr vs. 2/3). // then mask, then sign extend if needed (1 instr vs. 2/3).
u32 all_ones = (1ULL << sbits) - 1; u32 all_ones = (1ULL << sbits) - 1;
if ((all_ones & mask) == all_ones) if ((all_ones & mask) == all_ones)
{
MoveOpArgToReg(sbits, MDisp(EAX, 0)); MoveOpArgToReg(sbits, MDisp(EAX, 0));
}
else else
{ {
m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0)); m_code->MOVZX(32, sbits, m_dst_reg, MDisp(EAX, 0));
@ -342,10 +345,18 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_PushRegistersAndAdjustStack(registersInUse, false);
switch (accessSize) switch (accessSize)
{ {
case 64: ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); break; case 64:
case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break; ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc);
case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break; break;
case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break; case 32:
ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc);
break;
case 16:
ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc);
break;
case 8:
ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc);
break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, false); ABI_PopRegistersAndAdjustStack(registersInUse, false);
@ -373,11 +384,12 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap)
{ {
u8 *result; if (accessSize == 8 && reg_value >= 4)
if (accessSize == 8 && reg_value >= 4) { {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
} }
result = GetWritableCodePtr();
u8* result = GetWritableCodePtr();
OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset); OpArg dest = MComplex(RBX, reg_addr, SCALE_1, offset);
if (swap) if (swap)
{ {
@ -396,6 +408,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
{ {
MOV(accessSize, dest, R(reg_value)); MOV(accessSize, dest, R(reg_value));
} }
return result; return result;
} }
@ -450,10 +463,18 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
switch (accessSize) switch (accessSize)
{ {
case 64: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); break; case 64:
case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break; ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false);
case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break; break;
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break; case 32:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false);
break;
case 16:
ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false);
break;
case 8:
ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false);
break;
} }
ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); ABI_PopRegistersAndAdjustStack(registersInUse, noProlog);
FixupBranch exit = J(); FixupBranch exit = J();
@ -478,7 +499,8 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 a
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg)); MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), R(arg));
} }
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) { void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them. // Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jit->jo.accurateSinglePrecision) if (jit->jo.accurateSinglePrecision)
{ {
@ -487,7 +509,8 @@ void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) {
} }
} }
void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm) { void EmuCodeBlock::ForceSinglePrecisionP(X64Reg xmm)
{
// Most games don't need these. Zelda requires it though - some platforms get stuck without them. // Most games don't need these. Zelda requires it though - some platforms get stuck without them.
if (jit->jo.accurateSinglePrecision) if (jit->jo.accurateSinglePrecision)
{ {
@ -600,10 +623,13 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
MOVSD(XMM1, R(src)); MOVSD(XMM1, R(src));
FLD(64, M(&temp64)); FLD(64, M(&temp64));
CCFlags cond; CCFlags cond;
if (cpu_info.bSSE4_1) { if (cpu_info.bSSE4_1)
{
PTEST(XMM1, M((void *)&double_exponent)); PTEST(XMM1, M((void *)&double_exponent));
cond = CC_NC; cond = CC_NC;
} else { }
else
{
// emulate PTEST; checking FPU flags is incorrect because the NaN bits // emulate PTEST; checking FPU flags is incorrect because the NaN bits
// are sticky (persist between instructions) // are sticky (persist between instructions)
MOVSD(XMM0, M((void *)&double_exponent)); MOVSD(XMM0, M((void *)&double_exponent));
@ -619,9 +645,12 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
PANDN(XMM1, M((void *)&double_qnan_bit)); PANDN(XMM1, M((void *)&double_qnan_bit));
PSRLQ(XMM1, 29); PSRLQ(XMM1, 29);
if (cpu_info.bAVX) { if (cpu_info.bAVX)
{
VPANDN(XMM0, XMM1, R(XMM0)); VPANDN(XMM0, XMM1, R(XMM0));
} else { }
else
{
PANDN(XMM1, R(XMM0)); PANDN(XMM1, R(XMM0));
MOVSS(XMM0, R(XMM1)); MOVSS(XMM0, R(XMM1));
} }
@ -633,19 +662,26 @@ void EmuCodeBlock::ConvertDoubleToSingle(X64Reg dst, X64Reg src)
void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr) void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr)
{ {
if (src_is_gpr) { if (src_is_gpr)
{
MOV(32, M(&temp32), R(src)); MOV(32, M(&temp32), R(src));
MOVD_xmm(XMM1, R(src)); MOVD_xmm(XMM1, R(src));
} else { }
else
{
MOVSS(M(&temp32), src); MOVSS(M(&temp32), src);
MOVSS(R(XMM1), src); MOVSS(R(XMM1), src);
} }
FLD(32, M(&temp32)); FLD(32, M(&temp32));
CCFlags cond; CCFlags cond;
if (cpu_info.bSSE4_1) { if (cpu_info.bSSE4_1)
{
PTEST(XMM1, M((void *)&single_exponent)); PTEST(XMM1, M((void *)&single_exponent));
cond = CC_NC; cond = CC_NC;
} else { }
else
{
// emulate PTEST; checking FPU flags is incorrect because the NaN bits // emulate PTEST; checking FPU flags is incorrect because the NaN bits
// are sticky (persist between instructions) // are sticky (persist between instructions)
MOVSS(XMM0, M((void *)&single_exponent)); MOVSS(XMM0, M((void *)&single_exponent));
@ -661,9 +697,12 @@ void EmuCodeBlock::ConvertSingleToDouble(X64Reg dst, X64Reg src, bool src_is_gpr
PANDN(XMM1, M((void *)&single_qnan_bit)); PANDN(XMM1, M((void *)&single_qnan_bit));
PSLLQ(XMM1, 29); PSLLQ(XMM1, 29);
if (cpu_info.bAVX) { if (cpu_info.bAVX)
{
VPANDN(dst, XMM1, R(dst)); VPANDN(dst, XMM1, R(dst));
} else { }
else
{
PANDN(XMM1, R(dst)); PANDN(XMM1, R(dst));
MOVSD(dst, R(XMM1)); MOVSD(dst, R(XMM1));
} }