Merge pull request #1100 from FioraAeterna/psq_insts

JIT: implement remaining psq_l/st instruction variants
This commit is contained in:
Ryan Houdek 2014-09-19 15:16:44 -05:00
commit 7cc586d615
4 changed files with 61 additions and 35 deletions

View File

@ -267,7 +267,8 @@ union UGeckoInstruction
// paired single quantized load/store
struct
{
u32 : 7;
u32 : 1;
u32 SUBOP6 : 6;
// Graphics quantization register to use
u32 Ix : 3;
// 0: paired single, 1: scalar

View File

@ -220,8 +220,8 @@ public:
void lfXXX(UGeckoInstruction inst);
void stfXXX(UGeckoInstruction inst);
void stfiwx(UGeckoInstruction inst);
void psq_l(UGeckoInstruction inst);
void psq_st(UGeckoInstruction inst);
void psq_lXX(UGeckoInstruction inst);
void psq_stXX(UGeckoInstruction inst);
void fmaddXX(UGeckoInstruction inst);
void fsign(UGeckoInstruction inst);

View File

@ -92,10 +92,10 @@ static GekkoOPTemplate primarytable[] =
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
{56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{60, &Jit64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}},
{61, &Jit64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{56, &Jit64::psq_lXX}, //"psq_l", OPTYPE_PS, FL_IN_A}},
{57, &Jit64::psq_lXX}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
{60, &Jit64::psq_stXX}, //"psq_st", OPTYPE_PS, FL_IN_A}},
{61, &Jit64::psq_stXX}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
//missing: 0, 5, 6, 9, 22, 30, 62, 58
{0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
@ -150,10 +150,10 @@ static GekkoOPTemplate table4_2[] =
static GekkoOPTemplate table4_3[] =
{
{6, &Jit64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
{7, &Jit64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}},
{38, &Jit64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}},
{39, &Jit64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}},
{6, &Jit64::psq_lXX}, //"psq_lx", OPTYPE_PS, 0}},
{7, &Jit64::psq_stXX}, //"psq_stx", OPTYPE_PS, 0}},
{38, &Jit64::psq_lXX}, //"psq_lux", OPTYPE_PS, 0}},
{39, &Jit64::psq_stXX}, //"psq_stux", OPTYPE_PS, 0}},
};
static GekkoOPTemplate table19[] =

View File

@ -16,33 +16,41 @@ using namespace Gen;
// The big problem is likely instructions that set the quantizers in the same block.
// We will have to break block after quantizers are written to.
void Jit64::psq_st(UGeckoInstruction inst)
void Jit64::psq_stXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(!inst.RA);
s32 offset = inst.SIMM_12;
bool update = inst.OPCD == 61 && offset;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
int a = inst.RA;
int s = inst.RS;
int b = indexed ? inst.RB : a;
int s = inst.FS;
gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA);
if (update)
gpr.BindToRegister(a, true, true);
fpr.BindToRegister(s, true, false);
if (offset && gpr.R(a).IsSimpleReg())
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
if (indexed)
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (offset)
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}
// In memcheck mode, don't update the address until the exception check
if (update && offset && !js.memcheck)
if (update && !js.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
// Hence, we need to mask out the unused bits. The layout of the GQR register is
@ -67,56 +75,73 @@ void Jit64::psq_st(UGeckoInstruction inst)
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
}
if (update && offset && js.memcheck)
if (update && js.memcheck)
{
MEMCHECK_START(false)
ADD(32, gpr.R(a), Imm32((u32)offset));
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
MEMCHECK_END
}
gpr.UnlockAll();
gpr.UnlockAllX();
}
void Jit64::psq_l(UGeckoInstruction inst)
void Jit64::psq_lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStorePairedOff);
FALLBACK_IF(!inst.RA);
s32 offset = inst.SIMM_12;
bool update = inst.OPCD == 57 && offset;
bool indexed = inst.OPCD == 4;
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
int a = inst.RA;
int s = inst.RS;
int b = indexed ? inst.RB : a;
int s = inst.FS;
gpr.Lock(a, b);
gpr.FlushLockX(RSCRATCH_EXTRA);
gpr.BindToRegister(a, true, update && offset);
gpr.BindToRegister(a, true, update);
fpr.BindToRegister(s, false, true);
if (offset && gpr.R(a).IsSimpleReg())
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
{
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
if (indexed)
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
else
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
}
else
{
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
if (offset)
if (indexed)
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
else if (offset)
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
}
// In memcheck mode, don't update the address until the exception check
if (update && offset && !js.memcheck)
if (update && !js.memcheck)
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
AND(32, R(RSCRATCH2), M(((char *)&GQR(inst.I)) + 2));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
if (inst.W)
OR(32, R(RSCRATCH), Imm8(8));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
// Get the high part of the GQR register
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + inst.I]);
gqr.offset += 2;
AND(32, R(RSCRATCH2), gqr);
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[inst.W * 8])));
MEMCHECK_START(false)
CVTPS2PD(fpr.RX(s), R(XMM0));
if (update && offset && js.memcheck)
if (update && js.memcheck)
{
ADD(32, gpr.R(a), Imm32((u32)offset));
if (indexed)
ADD(32, gpr.R(a), gpr.R(b));
else
ADD(32, gpr.R(a), Imm32((u32)offset));
}
MEMCHECK_END