Jit64: turn 32 bit addresses into offsets
This commit is contained in:
parent
9f15054358
commit
a0597f0d62
|
@ -218,17 +218,17 @@ inline OpArg M(const T* ptr) {return OpArg((u64)(const void*)ptr, (int)SCALE_
|
||||||
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
|
inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
|
||||||
inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
|
inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
|
||||||
|
|
||||||
inline OpArg MDisp(X64Reg value, int offset)
|
inline OpArg MDisp(X64Reg value, ptrdiff_t offset)
|
||||||
{
|
{
|
||||||
return OpArg((u32)offset, SCALE_ATREG, value);
|
return OpArg(offset, SCALE_ATREG, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
|
inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, ptrdiff_t offset)
|
||||||
{
|
{
|
||||||
return OpArg(offset, scale, base, scaled);
|
return OpArg(offset, scale, base, scaled);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline OpArg MScaled(X64Reg scaled, int scale, int offset)
|
inline OpArg MScaled(X64Reg scaled, int scale, ptrdiff_t offset)
|
||||||
{
|
{
|
||||||
if (scale == SCALE_1)
|
if (scale == SCALE_1)
|
||||||
return OpArg(offset, SCALE_ATREG, scaled);
|
return OpArg(offset, SCALE_ATREG, scaled);
|
||||||
|
@ -247,17 +247,10 @@ inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
|
||||||
inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
|
inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
|
||||||
inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
|
inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
|
||||||
|
|
||||||
inline u32 PtrOffset(const void* ptr, const void* base)
|
inline bool FitsInS32(const ptrdiff_t distance)
|
||||||
{
|
{
|
||||||
s64 distance = (s64)ptr-(s64)base;
|
return distance < 0x80000000LL &&
|
||||||
if (distance >= 0x80000000LL ||
|
distance >= -0x80000000LL;
|
||||||
distance < -0x80000000LL)
|
|
||||||
{
|
|
||||||
_assert_msg_(DYNA_REC, 0, "pointer offset out of range");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (u32)distance;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//usage: int a[]; ARRAY_OFFSET(a,10)
|
//usage: int a[]; ARRAY_OFFSET(a,10)
|
||||||
|
|
|
@ -20,14 +20,16 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg)
|
||||||
AND(8, R(AL), Imm8(DSP_STACK_MASK));
|
AND(8, R(AL), Imm8(DSP_STACK_MASK));
|
||||||
MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL));
|
MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL));
|
||||||
|
|
||||||
X64Reg tmp1;
|
X64Reg tmp1, tmp2;
|
||||||
gpr.getFreeXReg(tmp1);
|
gpr.getFreeXReg(tmp1);
|
||||||
|
gpr.getFreeXReg(tmp2);
|
||||||
//g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg];
|
//g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg];
|
||||||
MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg]));
|
MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg]));
|
||||||
MOVZX(64, 8, RAX, R(AL));
|
MOVZX(64, 8, RAX, R(AL));
|
||||||
MOV(16, MComplex(EAX, EAX, 1,
|
MOV(64, R(tmp2), ImmPtr(g_dsp.reg_stack[stack_reg]));
|
||||||
PtrOffset(&g_dsp.reg_stack[stack_reg][0],nullptr)), R(tmp1));
|
MOV(16, MComplex(tmp2, EAX, SCALE_2, 0), R(tmp1));
|
||||||
gpr.putXReg(tmp1);
|
gpr.putXReg(tmp1);
|
||||||
|
gpr.putXReg(tmp2);
|
||||||
}
|
}
|
||||||
|
|
||||||
//clobbers:
|
//clobbers:
|
||||||
|
@ -37,13 +39,15 @@ void DSPEmitter::dsp_reg_stack_pop(int stack_reg)
|
||||||
{
|
{
|
||||||
//g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]];
|
//g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]];
|
||||||
MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg]));
|
MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg]));
|
||||||
X64Reg tmp1;
|
X64Reg tmp1, tmp2;
|
||||||
gpr.getFreeXReg(tmp1);
|
gpr.getFreeXReg(tmp1);
|
||||||
|
gpr.getFreeXReg(tmp2);
|
||||||
MOVZX(64, 8, RAX, R(AL));
|
MOVZX(64, 8, RAX, R(AL));
|
||||||
MOV(16, R(tmp1), MComplex(EAX, EAX, 1,
|
MOV(64, R(tmp2), ImmPtr(g_dsp.reg_stack[stack_reg]));
|
||||||
PtrOffset(&g_dsp.reg_stack[stack_reg][0],nullptr)));
|
MOV(16, R(tmp1), MComplex(tmp2, EAX, SCALE_2, 0));
|
||||||
MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1));
|
MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1));
|
||||||
gpr.putXReg(tmp1);
|
gpr.putXReg(tmp1);
|
||||||
|
gpr.putXReg(tmp2);
|
||||||
|
|
||||||
//g_dsp.reg_stack_ptr[stack_reg]--;
|
//g_dsp.reg_stack_ptr[stack_reg]--;
|
||||||
//g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK;
|
//g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK;
|
||||||
|
|
|
@ -103,9 +103,9 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
// optimizations safe, because IR and DR are usually set/cleared together.
|
// optimizations safe, because IR and DR are usually set/cleared together.
|
||||||
// TODO: Branching based on the 20 most significant bits of instruction
|
// TODO: Branching based on the 20 most significant bits of instruction
|
||||||
// addresses without translating them is wrong.
|
// addresses without translating them is wrong.
|
||||||
u64 icache = (u64)jit->GetBlockCache()->iCache.data();
|
u8* icache = jit->GetBlockCache()->iCache.data();
|
||||||
u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data();
|
u8* icacheVmem = jit->GetBlockCache()->iCacheVMEM.data();
|
||||||
u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data();
|
u8* icacheEx = jit->GetBlockCache()->iCacheEx.data();
|
||||||
u32 mask = 0;
|
u32 mask = 0;
|
||||||
FixupBranch no_mem;
|
FixupBranch no_mem;
|
||||||
FixupBranch exit_mem;
|
FixupBranch exit_mem;
|
||||||
|
@ -117,13 +117,13 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
no_mem = J_CC(CC_NZ);
|
no_mem = J_CC(CC_NZ);
|
||||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
||||||
|
|
||||||
if (icache <= INT_MAX)
|
if (FitsInS32(PPCSTATE_OFS(icache)))
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache));
|
MOV(32, R(RSCRATCH), MPIC(icache, RSCRATCH));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH2), Imm64(icache));
|
MOV(64, R(RSCRATCH2), ImmPtr(icache));
|
||||||
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,13 +132,14 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
|
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT));
|
||||||
FixupBranch no_vmem = J_CC(CC_Z);
|
FixupBranch no_vmem = J_CC(CC_Z);
|
||||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK));
|
||||||
if (icacheVmem <= INT_MAX)
|
|
||||||
|
if (FitsInS32(PPCSTATE_OFS(icacheVmem)))
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem));
|
MOV(32, R(RSCRATCH), MPIC(icacheVmem, RSCRATCH));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH2), Imm64(icacheVmem));
|
MOV(64, R(RSCRATCH2), ImmPtr(icacheVmem));
|
||||||
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,14 +150,13 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
|
TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT));
|
||||||
FixupBranch no_exram = J_CC(CC_Z);
|
FixupBranch no_exram = J_CC(CC_Z);
|
||||||
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));
|
AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK));
|
||||||
|
if (FitsInS32(PPCSTATE_OFS(icacheEx)))
|
||||||
if (icacheEx <= INT_MAX)
|
|
||||||
{
|
{
|
||||||
MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx));
|
MOV(32, R(RSCRATCH), MPIC(icacheEx, RSCRATCH));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH2), Imm64(icacheEx));
|
MOV(64, R(RSCRATCH2), ImmPtr(icacheEx));
|
||||||
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,16 +169,17 @@ void Jit64AsmRoutineManager::Generate()
|
||||||
TEST(32, R(RSCRATCH), R(RSCRATCH));
|
TEST(32, R(RSCRATCH), R(RSCRATCH));
|
||||||
FixupBranch notfound = J_CC(CC_L);
|
FixupBranch notfound = J_CC(CC_L);
|
||||||
//grab from list and jump to it
|
//grab from list and jump to it
|
||||||
u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers();
|
const u8** codePointers = jit->GetBlockCache()->GetCodePointers();
|
||||||
if (codePointers <= INT_MAX)
|
if (FitsInS32(PPCSTATE_OFS(codePointers)))
|
||||||
{
|
{
|
||||||
JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers));
|
JMPptr(MPIC(codePointers, RSCRATCH, SCALE_8));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(64, R(RSCRATCH2), Imm64(codePointers));
|
MOV(64, R(RSCRATCH2), ImmPtr(codePointers));
|
||||||
JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0));
|
JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
SetJumpTarget(notfound);
|
SetJumpTarget(notfound);
|
||||||
|
|
||||||
//Ok, no block, let's jit
|
//Ok, no block, let's jit
|
||||||
|
@ -271,7 +272,7 @@ void Jit64AsmRoutineManager::GenerateCommon()
|
||||||
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
|
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
|
||||||
FixupBranch skip_fast_write = J_CC(CC_NE, false);
|
FixupBranch skip_fast_write = J_CC(CC_NE, false);
|
||||||
MOV(32, RSCRATCH, M(&m_gatherPipeCount));
|
MOV(32, RSCRATCH, M(&m_gatherPipeCount));
|
||||||
MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1);
|
MOV(8, MPIC(&m_gatherPipe, RSCRATCH), ABI_PARAM1);
|
||||||
ADD(32, 1, M(&m_gatherPipeCount));
|
ADD(32, 1, M(&m_gatherPipeCount));
|
||||||
RET();
|
RET();
|
||||||
SetJumpTarget(skip_fast_write);
|
SetJumpTarget(skip_fast_write);
|
||||||
|
|
|
@ -45,7 +45,7 @@ void Jit64::GenerateOverflow()
|
||||||
//rare).
|
//rare).
|
||||||
static const u8 ovtable[4] = {0, 0, XER_SO_MASK, XER_SO_MASK};
|
static const u8 ovtable[4] = {0, 0, XER_SO_MASK, XER_SO_MASK};
|
||||||
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
|
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
|
||||||
MOV(8, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)ovtable));
|
MOV(8, R(RSCRATCH), MPIC(ovtable, RSCRATCH));
|
||||||
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
|
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
|
||||||
SetJumpTarget(exit);
|
SetJumpTarget(exit);
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,13 +132,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
// One value
|
// One value
|
||||||
CVTSD2SS(XMM0, fpr.R(s));
|
CVTSD2SS(XMM0, fpr.R(s));
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized));
|
CALLptr(MPIC(asm_routines.singleStoreQuantized, RSCRATCH, SCALE_8));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Pair of values
|
// Pair of values
|
||||||
CVTPD2PS(XMM0, fpr.R(s));
|
CVTPD2PS(XMM0, fpr.R(s));
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized));
|
CALLptr(MPIC(asm_routines.pairedStoreQuantized, RSCRATCH, SCALE_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (update && jo.memcheck)
|
if (update && jo.memcheck)
|
||||||
|
@ -306,7 +306,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
|
||||||
AND(32, R(RSCRATCH2), gqr);
|
AND(32, R(RSCRATCH2), gqr);
|
||||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
MOVZX(32, 8, RSCRATCH, R(RSCRATCH2));
|
||||||
|
|
||||||
CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8])));
|
CALLptr(MPIC(&asm_routines.pairedLoadQuantized[w * 8], RSCRATCH, SCALE_8));
|
||||||
|
|
||||||
MemoryExceptionCheck();
|
MemoryExceptionCheck();
|
||||||
CVTPS2PD(fpr.RX(s), R(XMM0));
|
CVTPS2PD(fpr.RX(s), R(XMM0));
|
||||||
|
|
|
@ -458,7 +458,7 @@ void Jit64::mtcrf(UGeckoInstruction inst)
|
||||||
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
|
SHR(32, R(RSCRATCH), Imm8(28 - (i * 4)));
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
AND(32, R(RSCRATCH), Imm8(0xF));
|
AND(32, R(RSCRATCH), Imm8(0xF));
|
||||||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
|
MOV(64, R(RSCRATCH), MPIC(m_crTable, RSCRATCH, SCALE_8));
|
||||||
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
|
MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -493,7 +493,7 @@ void Jit64::mcrxr(UGeckoInstruction inst)
|
||||||
// [SO OV CA 0] << 3
|
// [SO OV CA 0] << 3
|
||||||
SHL(32, R(RSCRATCH), Imm8(4));
|
SHL(32, R(RSCRATCH), Imm8(4));
|
||||||
|
|
||||||
MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable));
|
MOV(64, R(RSCRATCH), MPIC(m_crTable, RSCRATCH));
|
||||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
||||||
|
|
||||||
// Clear XER[0-3]
|
// Clear XER[0-3]
|
||||||
|
|
|
@ -24,10 +24,8 @@ void CommonAsmRoutines::GenFifoWrite(int size)
|
||||||
const void* start = GetCodePtr();
|
const void* start = GetCodePtr();
|
||||||
|
|
||||||
// Assume value in RSCRATCH
|
// Assume value in RSCRATCH
|
||||||
u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe;
|
|
||||||
_assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!");
|
|
||||||
MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount));
|
MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount));
|
||||||
SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH);
|
SwapAndStore(size, MPIC(GPFifo::m_gatherPipe, RSCRATCH2), RSCRATCH);
|
||||||
ADD(32, R(RSCRATCH2), Imm8(size >> 3));
|
ADD(32, R(RSCRATCH2), Imm8(size >> 3));
|
||||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2));
|
MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2));
|
||||||
RET();
|
RET();
|
||||||
|
@ -68,8 +66,8 @@ void CommonAsmRoutines::GenFrsqrte()
|
||||||
|
|
||||||
SHR(64, R(RSCRATCH), Imm8(37));
|
SHR(64, R(RSCRATCH), Imm8(37));
|
||||||
AND(32, R(RSCRATCH), Imm32(0x7FF));
|
AND(32, R(RSCRATCH), Imm32(0x7FF));
|
||||||
IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec));
|
IMUL(32, RSCRATCH, MPIC(MathUtil::frsqrte_expected_dec, RSCRATCH_EXTRA, SCALE_4));
|
||||||
MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base));
|
MOV(32, R(RSCRATCH_EXTRA), MPIC(MathUtil::frsqrte_expected_base, RSCRATCH_EXTRA, SCALE_4));
|
||||||
SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH));
|
||||||
SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
|
SHL(64, R(RSCRATCH_EXTRA), Imm8(26));
|
||||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26;
|
||||||
|
@ -136,11 +134,11 @@ void CommonAsmRoutines::GenFres()
|
||||||
AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024
|
AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024
|
||||||
AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
|
AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024
|
||||||
|
|
||||||
IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec));
|
IMUL(32, RSCRATCH, MPIC(MathUtil::fres_expected_dec, RSCRATCH2, SCALE_4));
|
||||||
ADD(32, R(RSCRATCH), Imm8(1));
|
ADD(32, R(RSCRATCH), Imm8(1));
|
||||||
SHR(32, R(RSCRATCH), Imm8(1));
|
SHR(32, R(RSCRATCH), Imm8(1));
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base));
|
MOV(32, R(RSCRATCH2), MPIC(MathUtil::fres_expected_base, RSCRATCH2, SCALE_4));
|
||||||
SUB(32, R(RSCRATCH2), R(RSCRATCH));
|
SUB(32, R(RSCRATCH2), R(RSCRATCH));
|
||||||
SHL(64, R(RSCRATCH2), Imm8(29));
|
SHL(64, R(RSCRATCH2), Imm8(29));
|
||||||
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29
|
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29
|
||||||
|
@ -199,7 +197,7 @@ void CommonAsmRoutines::GenMfcr()
|
||||||
// SO: Bit 61 set; set flag bit 0
|
// SO: Bit 61 set; set flag bit 0
|
||||||
// LT: Bit 62 set; set flag bit 3
|
// LT: Bit 62 set; set flag bit 3
|
||||||
SHR(64, R(cr_val), Imm8(61));
|
SHR(64, R(cr_val), Imm8(61));
|
||||||
OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable));
|
OR(32, R(dst), MPIC(m_flagTable, cr_val, SCALE_4));
|
||||||
}
|
}
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -247,7 +245,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
||||||
|
|
||||||
const u8* storePairedU8 = AlignCode4();
|
const u8* storePairedU8 = AlignCode4();
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||||
MINPS(XMM0, M(m_65535));
|
MINPS(XMM0, M(m_65535));
|
||||||
|
@ -262,7 +260,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
||||||
|
|
||||||
const u8* storePairedS8 = AlignCode4();
|
const u8* storePairedS8 = AlignCode4();
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||||
MINPS(XMM0, M(m_65535));
|
MINPS(XMM0, M(m_65535));
|
||||||
|
@ -278,7 +276,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
||||||
|
|
||||||
const u8* storePairedU16 = AlignCode4();
|
const u8* storePairedU16 = AlignCode4();
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
|
|
||||||
if (cpu_info.bSSE4_1)
|
if (cpu_info.bSSE4_1)
|
||||||
|
@ -310,7 +308,7 @@ void CommonAsmRoutines::GenQuantizedStores()
|
||||||
|
|
||||||
const u8* storePairedS16 = AlignCode4();
|
const u8* storePairedS16 = AlignCode4();
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
#ifdef QUANTIZE_OVERFLOW_SAFE
|
#ifdef QUANTIZE_OVERFLOW_SAFE
|
||||||
MINPS(XMM0, M(m_65535));
|
MINPS(XMM0, M(m_65535));
|
||||||
|
@ -355,7 +353,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
||||||
|
|
||||||
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
XORPS(XMM1, R(XMM1));
|
XORPS(XMM1, R(XMM1));
|
||||||
MAXSS(XMM0, R(XMM1));
|
MAXSS(XMM0, R(XMM1));
|
||||||
MINSS(XMM0, M(&m_255));
|
MINSS(XMM0, M(&m_255));
|
||||||
|
@ -365,7 +363,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
||||||
|
|
||||||
const u8* storeSingleS8 = AlignCode4();
|
const u8* storeSingleS8 = AlignCode4();
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
MAXSS(XMM0, M(&m_m128));
|
MAXSS(XMM0, M(&m_m128));
|
||||||
MINSS(XMM0, M(&m_127));
|
MINSS(XMM0, M(&m_127));
|
||||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||||
|
@ -374,7 +372,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
||||||
|
|
||||||
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
|
const u8* storeSingleU16 = AlignCode4(); // Used by MKWii
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
XORPS(XMM1, R(XMM1));
|
XORPS(XMM1, R(XMM1));
|
||||||
MAXSS(XMM0, R(XMM1));
|
MAXSS(XMM0, R(XMM1));
|
||||||
MINSS(XMM0, M(m_65535));
|
MINSS(XMM0, M(m_65535));
|
||||||
|
@ -384,7 +382,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
||||||
|
|
||||||
const u8* storeSingleS16 = AlignCode4();
|
const u8* storeSingleS16 = AlignCode4();
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS));
|
MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2));
|
||||||
MAXSS(XMM0, M(&m_m32768));
|
MAXSS(XMM0, M(&m_m32768));
|
||||||
MINSS(XMM0, M(&m_32767));
|
MINSS(XMM0, M(&m_32767));
|
||||||
CVTTSS2SI(RSCRATCH, R(XMM0));
|
CVTTSS2SI(RSCRATCH, R(XMM0));
|
||||||
|
@ -484,7 +482,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
}
|
}
|
||||||
CVTDQ2PS(XMM0, R(XMM0));
|
CVTDQ2PS(XMM0, R(XMM0));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -495,7 +493,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
|
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx
|
||||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -523,7 +521,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
}
|
}
|
||||||
CVTDQ2PS(XMM0, R(XMM0));
|
CVTDQ2PS(XMM0, R(XMM0));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -534,7 +532,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true);
|
UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true);
|
||||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -557,7 +555,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
}
|
}
|
||||||
CVTDQ2PS(XMM0, R(XMM0));
|
CVTDQ2PS(XMM0, R(XMM0));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -568,7 +566,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false);
|
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false);
|
||||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -590,7 +588,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
}
|
}
|
||||||
CVTDQ2PS(XMM0, R(XMM0));
|
CVTDQ2PS(XMM0, R(XMM0));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
MULPS(XMM0, R(XMM1));
|
MULPS(XMM0, R(XMM1));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
@ -601,7 +599,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
||||||
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true);
|
UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true);
|
||||||
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
CVTSI2SS(XMM0, R(RSCRATCH_EXTRA));
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS));
|
MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2));
|
||||||
UNPCKLPS(XMM0, M(m_one));
|
UNPCKLPS(XMM0, M(m_one));
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
|
|
@ -1611,7 +1611,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||||
Jit->OR(32, R(RSCRATCH), Imm8(w << 3));
|
Jit->OR(32, R(RSCRATCH), Imm8(w << 3));
|
||||||
|
|
||||||
Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I)));
|
Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I)));
|
||||||
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedLoadQuantized)));
|
Jit->CALLptr(MPIC(Jit->asm_routines.pairedLoadQuantized, RSCRATCH, SCALE_8));
|
||||||
Jit->MOVAPD(reg, R(XMM0));
|
Jit->MOVAPD(reg, R(XMM0));
|
||||||
RI.fregs[reg] = I;
|
RI.fregs[reg] = I;
|
||||||
regNormalRegClear(RI, I);
|
regNormalRegClear(RI, I);
|
||||||
|
@ -1669,7 +1669,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||||
|
|
||||||
Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp2(I)));
|
Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp2(I)));
|
||||||
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
|
Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I)));
|
||||||
Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedStoreQuantized)));
|
Jit->CALLptr(MPIC(Jit->asm_routines.pairedStoreQuantized, RSCRATCH, SCALE_8));
|
||||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||||
fregClearInst(RI, getOp1(I));
|
fregClearInst(RI, getOp1(I));
|
||||||
if (RI.IInfo[I - RI.FirstI] & 8)
|
if (RI.IInfo[I - RI.FirstI] & 8)
|
||||||
|
|
|
@ -43,6 +43,18 @@
|
||||||
// to address as much as possible in a one-byte offset form.
|
// to address as much as possible in a one-byte offset form.
|
||||||
#define RPPCSTATE RBP
|
#define RPPCSTATE RBP
|
||||||
|
|
||||||
|
namespace Gen
|
||||||
|
{
|
||||||
|
|
||||||
|
inline OpArg MPIC(const void* address, X64Reg scale_reg, int scale = SCALE_1)
|
||||||
|
{
|
||||||
|
ptrdiff_t offset = PPCSTATE_OFS(address);
|
||||||
|
_dbg_assert_(DYNA_REC, FitsInS32(offset));
|
||||||
|
return MComplex(RPPCSTATE, scale_reg, scale, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Use these to control the instruction selection
|
// Use these to control the instruction selection
|
||||||
// #define INSTRUCTION_START FallBackToInterpreter(inst); return;
|
// #define INSTRUCTION_START FallBackToInterpreter(inst); return;
|
||||||
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
// #define INSTRUCTION_START PPCTables::CountInstruction(inst);
|
||||||
|
|
|
@ -173,11 +173,11 @@ private:
|
||||||
u32 all_ones = (1ULL << sbits) - 1;
|
u32 all_ones = (1ULL << sbits) - 1;
|
||||||
if ((all_ones & mask) == all_ones)
|
if ((all_ones & mask) == all_ones)
|
||||||
{
|
{
|
||||||
MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0));
|
MoveOpArgToReg(sbits, MatR(RSCRATCH));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0));
|
m_code->MOVZX(32, sbits, m_dst_reg, MatR(RSCRATCH));
|
||||||
m_code->AND(32, R(m_dst_reg), Imm32(mask));
|
m_code->AND(32, R(m_dst_reg), Imm32(mask));
|
||||||
if (m_sign_extend)
|
if (m_sign_extend)
|
||||||
m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg));
|
m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg));
|
||||||
|
|
|
@ -14,8 +14,8 @@ namespace MMIO { class Mapping; }
|
||||||
|
|
||||||
// We offset by 0x80 because the range of one byte memory offsets is
|
// We offset by 0x80 because the range of one byte memory offsets is
|
||||||
// -0x80..0x7f.
|
// -0x80..0x7f.
|
||||||
#define PPCSTATE(x) MDisp(RPPCSTATE, \
|
#define PPCSTATE_OFS(x) ((u8*)(x) - (u8*)&PowerPC::ppcState - 0x80)
|
||||||
(int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80)
|
#define PPCSTATE(x) MDisp(RPPCSTATE, PPCSTATE_OFS(&PowerPC::ppcState.x))
|
||||||
// In case you want to disable the ppcstate register:
|
// In case you want to disable the ppcstate register:
|
||||||
// #define PPCSTATE(x) M(&PowerPC::ppcState.x)
|
// #define PPCSTATE(x) M(&PowerPC::ppcState.x)
|
||||||
#define PPCSTATE_LR PPCSTATE(spr[SPR_LR])
|
#define PPCSTATE_LR PPCSTATE(spr[SPR_LR])
|
||||||
|
|
Loading…
Reference in New Issue