Merge pull request #1100 from FioraAeterna/psq_insts
JIT: implement remaining psq_l/st instruction variants
This commit is contained in:
commit
7cc586d615
|
@ -267,7 +267,8 @@ union UGeckoInstruction
|
|||
// paired single quantized load/store
|
||||
struct
|
||||
{
|
||||
u32 : 7;
|
||||
u32 : 1;
|
||||
u32 SUBOP6 : 6;
|
||||
// Graphics quantization register to use
|
||||
u32 Ix : 3;
|
||||
// 0: paired single, 1: scalar
|
||||
|
|
|
@ -220,8 +220,8 @@ public:
|
|||
void lfXXX(UGeckoInstruction inst);
|
||||
void stfXXX(UGeckoInstruction inst);
|
||||
void stfiwx(UGeckoInstruction inst);
|
||||
void psq_l(UGeckoInstruction inst);
|
||||
void psq_st(UGeckoInstruction inst);
|
||||
void psq_lXX(UGeckoInstruction inst);
|
||||
void psq_stXX(UGeckoInstruction inst);
|
||||
|
||||
void fmaddXX(UGeckoInstruction inst);
|
||||
void fsign(UGeckoInstruction inst);
|
||||
|
|
|
@ -92,10 +92,10 @@ static GekkoOPTemplate primarytable[] =
|
|||
{54, &Jit64::stfXXX}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{55, &Jit64::stfXXX}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{56, &Jit64::psq_l}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
||||
{57, &Jit64::psq_l}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
{60, &Jit64::psq_st}, //"psq_st", OPTYPE_PS, FL_IN_A}},
|
||||
{61, &Jit64::psq_st}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
{56, &Jit64::psq_lXX}, //"psq_l", OPTYPE_PS, FL_IN_A}},
|
||||
{57, &Jit64::psq_lXX}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
{60, &Jit64::psq_stXX}, //"psq_st", OPTYPE_PS, FL_IN_A}},
|
||||
{61, &Jit64::psq_stXX}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
//missing: 0, 5, 6, 9, 22, 30, 62, 58
|
||||
{0, &Jit64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}},
|
||||
|
@ -150,10 +150,10 @@ static GekkoOPTemplate table4_2[] =
|
|||
|
||||
static GekkoOPTemplate table4_3[] =
|
||||
{
|
||||
{6, &Jit64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}},
|
||||
{7, &Jit64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}},
|
||||
{38, &Jit64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}},
|
||||
{39, &Jit64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}},
|
||||
{6, &Jit64::psq_lXX}, //"psq_lx", OPTYPE_PS, 0}},
|
||||
{7, &Jit64::psq_stXX}, //"psq_stx", OPTYPE_PS, 0}},
|
||||
{38, &Jit64::psq_lXX}, //"psq_lux", OPTYPE_PS, 0}},
|
||||
{39, &Jit64::psq_stXX}, //"psq_stux", OPTYPE_PS, 0}},
|
||||
};
|
||||
|
||||
static GekkoOPTemplate table19[] =
|
||||
|
|
|
@ -16,33 +16,41 @@ using namespace Gen;
|
|||
|
||||
// The big problem is likely instructions that set the quantizers in the same block.
|
||||
// We will have to break block after quantizers are written to.
|
||||
void Jit64::psq_st(UGeckoInstruction inst)
|
||||
void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStorePairedOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
s32 offset = inst.SIMM_12;
|
||||
bool update = inst.OPCD == 61 && offset;
|
||||
bool indexed = inst.OPCD == 4;
|
||||
bool update = (inst.OPCD == 61 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
|
||||
int a = inst.RA;
|
||||
int s = inst.RS;
|
||||
int b = indexed ? inst.RB : a;
|
||||
int s = inst.FS;
|
||||
|
||||
gpr.Lock(a, b);
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
if (update)
|
||||
gpr.BindToRegister(a, true, true);
|
||||
fpr.BindToRegister(s, true, false);
|
||||
if (offset && gpr.R(a).IsSimpleReg())
|
||||
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
|
||||
{
|
||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
||||
if (indexed)
|
||||
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
else
|
||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
|
||||
if (offset)
|
||||
if (indexed)
|
||||
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
|
||||
else if (offset)
|
||||
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
||||
}
|
||||
// In memcheck mode, don't update the address until the exception check
|
||||
if (update && offset && !js.memcheck)
|
||||
if (update && !js.memcheck)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||
// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
|
||||
// Hence, we need to mask out the unused bits. The layout of the GQR register is
|
||||
|
@ -67,56 +75,73 @@ void Jit64::psq_st(UGeckoInstruction inst)
|
|||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
||||
}
|
||||
|
||||
if (update && offset && js.memcheck)
|
||||
if (update && js.memcheck)
|
||||
{
|
||||
MEMCHECK_START(false)
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
if (indexed)
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
else
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
MEMCHECK_END
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::psq_l(UGeckoInstruction inst)
|
||||
void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStorePairedOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
s32 offset = inst.SIMM_12;
|
||||
bool update = inst.OPCD == 57 && offset;
|
||||
bool indexed = inst.OPCD == 4;
|
||||
bool update = (inst.OPCD == 57 && offset) || (inst.OPCD == 4 && inst.SUBOP6 & 32);
|
||||
int a = inst.RA;
|
||||
int s = inst.RS;
|
||||
int b = indexed ? inst.RB : a;
|
||||
int s = inst.FS;
|
||||
|
||||
gpr.Lock(a, b);
|
||||
gpr.FlushLockX(RSCRATCH_EXTRA);
|
||||
gpr.BindToRegister(a, true, update && offset);
|
||||
gpr.BindToRegister(a, true, update);
|
||||
fpr.BindToRegister(s, false, true);
|
||||
if (offset && gpr.R(a).IsSimpleReg())
|
||||
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && (indexed || offset))
|
||||
{
|
||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
||||
if (indexed)
|
||||
LEA(32, RSCRATCH_EXTRA, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||
else
|
||||
LEA(32, RSCRATCH_EXTRA, MDisp(gpr.RX(a), offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH_EXTRA), gpr.R(a));
|
||||
if (offset)
|
||||
if (indexed)
|
||||
ADD(32, R(RSCRATCH_EXTRA), gpr.R(b));
|
||||
else if (offset)
|
||||
ADD(32, R(RSCRATCH_EXTRA), Imm32((u32)offset));
|
||||
}
|
||||
// In memcheck mode, don't update the address until the exception check
|
||||
if (update && offset && !js.memcheck)
|
||||
if (update && !js.memcheck)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH_EXTRA));
|
||||
MOV(32, R(RSCRATCH2), Imm32(0x3F07));
|
||||
AND(32, R(RSCRATCH2), M(((char *)&GQR(inst.I)) + 2));
|
||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||
if (inst.W)
|
||||
OR(32, R(RSCRATCH), Imm8(8));
|
||||
|
||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized));
|
||||
// Get the high part of the GQR register
|
||||
OpArg gqr = PPCSTATE(spr[SPR_GQR0 + inst.I]);
|
||||
gqr.offset += 2;
|
||||
|
||||
AND(32, R(RSCRATCH2), gqr);
|
||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||
|
||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[inst.W * 8])));
|
||||
|
||||
MEMCHECK_START(false)
|
||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||
if (update && offset && js.memcheck)
|
||||
if (update && js.memcheck)
|
||||
{
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
if (indexed)
|
||||
ADD(32, gpr.R(a), gpr.R(b));
|
||||
else
|
||||
ADD(32, gpr.R(a), Imm32((u32)offset));
|
||||
}
|
||||
MEMCHECK_END
|
||||
|
||||
|
|
Loading…
Reference in New Issue