Merge pull request #4205 from degasus/arm

JitArm64: Cleanup + small fix.
This commit is contained in:
Markus Wick 2016-09-12 11:47:51 +02:00 committed by GitHub
commit bdcee1c585
9 changed files with 98 additions and 99 deletions

View File

@ -95,7 +95,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
Interpreter::Instruction instr = GetInterpreterOp(inst);
MOVI2R(W0, inst.hex);
MOVI2R(X30, (u64)instr);
MOVP2R(X30, instr);
BLR(X30);
if (js.op->opinfo->flags & FL_ENDBLOCK)
@ -129,7 +129,7 @@ void JitArm64::HLEFunction(UGeckoInstruction inst)
MOVI2R(W0, js.compilerPC);
MOVI2R(W1, inst.hex);
MOVI2R(X30, (u64)&HLE::Execute);
MOVP2R(X30, &HLE::Execute);
BLR(X30);
ARM64Reg WA = gpr.GetReg();
@ -153,7 +153,7 @@ void JitArm64::Cleanup()
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
{
gpr.Lock(W0);
MOVI2R(X0, (u64)&GPFifo::FastCheckGatherPipe);
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
BLR(X0);
gpr.Unlock(W0);
}
@ -227,9 +227,9 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
if (only_external)
MOVI2R(X30, (u64)&PowerPC::CheckExternalExceptions);
MOVP2R(X30, &PowerPC::CheckExternalExceptions);
else
MOVI2R(X30, (u64)&PowerPC::CheckExceptions);
MOVP2R(X30, &PowerPC::CheckExceptions);
BLR(X30);
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
@ -254,9 +254,9 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
if (only_external)
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions);
MOVP2R(EncodeRegTo64(dest), &PowerPC::CheckExternalExceptions);
else
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions);
MOVP2R(EncodeRegTo64(dest), &PowerPC::CheckExceptions);
BLR(EncodeRegTo64(dest));
LDR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
@ -307,13 +307,13 @@ void JitArm64::BeginTimeProfile(JitBlock* b)
{
EmitResetCycleCounters();
EmitGetCycles(X1);
MOVI2R(X0, (u64)&b->ticStart);
MOVP2R(X0, &b->ticStart);
STR(INDEX_UNSIGNED, X1, X0, 0);
}
else
{
MOVI2R(X1, (u64)QueryPerformanceCounter);
MOVI2R(X0, (u64)&b->ticStart);
MOVP2R(X1, &QueryPerformanceCounter);
MOVP2R(X0, &b->ticStart);
BLR(X1);
}
}
@ -323,15 +323,15 @@ void JitArm64::EndTimeProfile(JitBlock* b)
if (m_supports_cycle_counter)
{
EmitGetCycles(X2);
MOVI2R(X0, (u64)&b->ticStart);
MOVP2R(X0, &b->ticStart);
}
else
{
MOVI2R(X1, (u64)QueryPerformanceCounter);
MOVI2R(X0, (u64)&b->ticStop);
MOVP2R(X1, &QueryPerformanceCounter);
MOVP2R(X0, &b->ticStop);
BLR(X1);
MOVI2R(X0, (u64)&b->ticStart);
MOVP2R(X0, &b->ticStart);
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
}
@ -434,7 +434,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
ARM64Reg WB = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg XB = EncodeRegTo64(WB);
MOVI2R(XA, (u64)&b->runCount);
MOVP2R(XA, &b->runCount);
LDR(INDEX_UNSIGNED, XB, XA, 0);
ADD(XB, XB, 1);
STR(INDEX_UNSIGNED, XB, XA, 0);
@ -457,7 +457,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
MOVI2R(DISPATCHER_PC, js.blockStart);
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE);
MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck);
MOVP2R(X1, &JitInterface::CompileExceptionCheck);
BLR(X1);
B(dispatcher);
SwitchToNearCode();
@ -508,7 +508,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
FixupBranch exit = B();
SetJumpTarget(Exception);
ABI_PushRegisters(regs_in_use);
MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe);
MOVP2R(X30, &GPFifo::FastCheckGatherPipe);
BLR(X30);
ABI_PopRegisters(regs_in_use);
@ -517,7 +517,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(msr));
TBZ(W30, 11, done_here);
MOVI2R(X30, (u64)&ProcessorInterface::m_InterruptCause);
MOVP2R(X30, &ProcessorInterface::m_InterruptCause);
LDR(INDEX_UNSIGNED, W30, X30, 0);
TST(W30, 23, 2);
B(CC_EQ, done_here);
@ -548,7 +548,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
SetJumpTarget(Exception);
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
TBZ(WA, 11, done_here);
MOVI2R(XA, (u64)&ProcessorInterface::m_InterruptCause);
MOVP2R(XA, &ProcessorInterface::m_InterruptCause);
LDR(INDEX_UNSIGNED, WA, XA, 0);
TST(WA, 23, 2);
B(CC_EQ, done_here);

View File

@ -18,7 +18,7 @@ void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const
s64 distance = ((s64)dest->normalEntry - (s64)location) >> 2;
if (distance >= -0x40000 && distance <= 0x3FFFF)
{
emit.B(CC_LE, dest->normalEntry);
emit.B(CC_PL, dest->normalEntry);
}
// Use the checked entry if either downcount is smaller zero,

View File

@ -181,13 +181,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
{
m_float_emit.FCVT(32, 64, D0, RS);
m_float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U32);
MOVP2R(X30, &PowerPC::Write_U32);
BLR(X30);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
{
m_float_emit.UMOV(32, W0, RS, 0);
MOVI2R(X30, (u64)&PowerPC::Write_U32);
MOVP2R(X30, &PowerPC::Write_U32);
BLR(X30);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
@ -195,19 +195,19 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
m_float_emit.FCVTN(32, D0, RS);
m_float_emit.UMOV(64, X0, D0, 0);
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
MOVI2R(X30, (u64)PowerPC::Write_U64);
MOVP2R(X30, &PowerPC::Write_U64);
BLR(X30);
}
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
{
m_float_emit.UMOV(64, X0, RS, 0);
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
MOVI2R(X30, (u64)PowerPC::Write_U64);
MOVP2R(X30, &PowerPC::Write_U64);
BLR(X30);
}
else
{
MOVI2R(X30, (u64)&PowerPC::Write_U64);
MOVP2R(X30, &PowerPC::Write_U64);
m_float_emit.UMOV(64, X0, RS, 0);
BLR(X30);
}
@ -216,13 +216,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
{
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
MOVI2R(X30, (u64)&PowerPC::Read_U32);
MOVP2R(X30, &PowerPC::Read_U32);
BLR(X30);
m_float_emit.INS(32, RS, 0, X0);
}
else
{
MOVI2R(X30, (u64)&PowerPC::Read_F64);
MOVP2R(X30, &PowerPC::Read_F64);
BLR(X30);
m_float_emit.INS(64, RS, 0, X0);
}
@ -232,27 +232,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
MOV(W0, RS);
if (flags & BackPatchInfo::FLAG_SIZE_32)
MOVI2R(X30, (u64)&PowerPC::Write_U32);
MOVP2R(X30, &PowerPC::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
MOVI2R(X30, (u64)&PowerPC::Write_U16);
MOVP2R(X30, &PowerPC::Write_U16);
else
MOVI2R(X30, (u64)&PowerPC::Write_U8);
MOVP2R(X30, &PowerPC::Write_U8);
BLR(X30);
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
MOVI2R(X30, (u64)&PowerPC::ClearCacheLine);
MOVP2R(X30, &PowerPC::ClearCacheLine);
BLR(X30);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
MOVI2R(X30, (u64)&PowerPC::Read_U32);
MOVP2R(X30, &PowerPC::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
MOVI2R(X30, (u64)&PowerPC::Read_U16);
MOVP2R(X30, &PowerPC::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
MOVI2R(X30, (u64)&PowerPC::Read_U8);
MOVP2R(X30, &PowerPC::Read_U8);
BLR(X30);

View File

@ -103,7 +103,7 @@ void JitArm64::bx(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(XA, (u64)&CoreTiming::Idle);
MOVP2R(XA, &CoreTiming::Idle);
BLR(XA);
gpr.Unlock(WA);

View File

@ -448,7 +448,7 @@ void JitArm64::lXX(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(XA, (u64)&CoreTiming::Idle);
MOVP2R(XA, &CoreTiming::Idle);
BLR(XA);
gpr.Unlock(WA);
@ -666,8 +666,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16);
}
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
MOVK(XA, ((u64)base >> 32) & 0xFFFF, SHIFT_32);
ADD(XA, XA, MEM_REG);
for (int i = inst.RD; i < 32; i++)
{
@ -701,7 +700,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
AND(value, addr, 32 - 10, 28 - 10); // upper three bits and last 10 bit are masked for the bitset
// of cachelines, 0x1ffffc00
LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
MOVI2R(EncodeRegTo64(WA), (u64)jit->GetBlockCache()->GetBlockBitSet());
MOVP2R(EncodeRegTo64(WA), jit->GetBlockCache()->GetBlockBitSet());
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
@ -722,7 +721,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
LSL(W0, addr, 5);
MOVI2R(X1, 32);
MOVI2R(X2, 0);
MOVI2R(X3, (u64)(void*)JitInterface::InvalidateICache);
MOVP2R(X3, &JitInterface::InvalidateICache);
BLR(X3);
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);

View File

@ -82,7 +82,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
UBFM(type_reg, scale_reg, 16, 18); // Type
UBFM(scale_reg, scale_reg, 24, 29); // Scale
MOVI2R(X30, (u64)&pairedLoadQuantized[inst.W * 8]);
MOVP2R(X30, &pairedLoadQuantized[inst.W * 8]);
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(X30);
@ -193,7 +193,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(fail);
// Slow
MOVI2R(X30, (u64)&pairedStoreQuantized[16 + inst.W * 8]);
MOVP2R(X30, &pairedStoreQuantized[16 + inst.W * 8]);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
ABI_PushRegisters(gprs_in_use);
@ -206,7 +206,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
SetJumpTarget(pass);
// Fast
MOVI2R(X30, (u64)&pairedStoreQuantized[inst.W * 8]);
MOVP2R(X30, &pairedStoreQuantized[inst.W * 8]);
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(EncodeRegTo64(type_reg));

View File

@ -616,7 +616,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg WB = gpr.GetReg();
ARM64Reg XB = EncodeRegTo64(WB);
MOVI2R(XB, (u64)m_crTable);
MOVP2R(XB, m_crTable);
for (int i = 0; i < 8; ++i)
{
if ((crm & (0x80 >> i)) != 0)

View File

@ -26,7 +26,7 @@ void JitArm64::GenerateAsm()
ABI_PushRegisters(regs_to_save);
MOVI2R(PPC_REG, (u64)&PowerPC::ppcState);
MOVP2R(PPC_REG, &PowerPC::ppcState);
// Load the current PC into DISPATCHER_PC
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
@ -54,10 +54,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags
LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVI2R(MEM_REG, (u64)Memory::physical_base);
MOVP2R(MEM_REG, Memory::physical_base);
FixupBranch membaseend = B();
SetJumpTarget(physmem);
MOVI2R(MEM_REG, (u64)Memory::logical_base);
MOVP2R(MEM_REG, Memory::logical_base);
SetJumpTarget(membaseend);
// iCache[(address >> 2) & iCache_Mask];
@ -104,10 +104,10 @@ void JitArm64::GenerateAsm()
// set the mem_base based on MSR flags
LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
MOVI2R(MEM_REG, (u64)Memory::physical_base);
MOVP2R(MEM_REG, Memory::physical_base);
FixupBranch membaseend = B();
SetJumpTarget(physmem);
MOVI2R(MEM_REG, (u64)Memory::logical_base);
MOVP2R(MEM_REG, Memory::logical_base);
SetJumpTarget(membaseend);
// Jump to next block.
@ -119,7 +119,7 @@ void JitArm64::GenerateAsm()
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
MOVI2R(X30, (u64)&CoreTiming::Advance);
MOVP2R(X30, &CoreTiming::Advance);
BLR(X30);
// Load the PC back into DISPATCHER_PC (the exception handler might have changed it)
@ -127,7 +127,7 @@ void JitArm64::GenerateAsm()
// Check the state pointer to see if we are exiting
// Gets checked on at the end of every slice
MOVI2R(X0, (u64)CPU::GetStatePtr());
MOVP2R(X0, CPU::GetStatePtr());
LDR(INDEX_UNSIGNED, W0, X0, 0);
CMP(W0, 0);
@ -166,20 +166,20 @@ void JitArm64::GenerateCommonAsm()
BRK(100);
const u8* loadPairedFloatTwo = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LD1(32, 1, D0, addr_reg);
float_emit.REV32(8, D0, D0);
RET(X30);
}
const u8* loadPairedU8Two = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.UXTL(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -187,13 +187,13 @@ void JitArm64::GenerateCommonAsm()
}
const u8* loadPairedS8Two = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.SXTL(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -201,13 +201,13 @@ void JitArm64::GenerateCommonAsm()
}
const u8* loadPairedU16Two = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LD1(16, 1, D0, addr_reg);
float_emit.REV16(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -215,13 +215,13 @@ void JitArm64::GenerateCommonAsm()
}
const u8* loadPairedS16Two = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LD1(16, 1, D0, addr_reg);
float_emit.REV16(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -230,20 +230,20 @@ void JitArm64::GenerateCommonAsm()
const u8* loadPairedFloatOne = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.REV32(8, D0, D0);
RET(X30);
}
const u8* loadPairedU8One = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.UXTL(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -251,13 +251,13 @@ void JitArm64::GenerateCommonAsm()
}
const u8* loadPairedS8One = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.SXTL(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -265,13 +265,13 @@ void JitArm64::GenerateCommonAsm()
}
const u8* loadPairedU16One = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.REV16(8, D0, D0);
float_emit.UXTL(16, D0, D0);
float_emit.UCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -279,13 +279,13 @@ void JitArm64::GenerateCommonAsm()
}
const u8* loadPairedS16One = GetCodePtr();
{
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
float_emit.REV16(8, D0, D0);
float_emit.SXTL(16, D0, D0);
float_emit.SCVTF(32, D0, D0);
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
MOVP2R(addr_reg, &m_dequantizeTableS);
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -324,14 +324,14 @@ void JitArm64::GenerateCommonAsm()
{
storePairedFloat = GetCodePtr();
float_emit.REV32(8, D0, D0);
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(64, Q0, 0, addr_reg, SP);
RET(X30);
storePairedFloatSlow = GetCodePtr();
float_emit.UMOV(64, X0, Q0, 0);
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
MOVI2R(X2, (u64)PowerPC::Write_U64);
MOVP2R(X2, &PowerPC::Write_U64);
BR(X2);
}
@ -339,7 +339,7 @@ void JitArm64::GenerateCommonAsm()
const u8* storePairedU8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -351,7 +351,7 @@ void JitArm64::GenerateCommonAsm()
storePairedU8 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(16, Q0, 0, addr_reg, SP);
RET(X30);
@ -359,14 +359,14 @@ void JitArm64::GenerateCommonAsm()
emit_quantize();
float_emit.UMOV(16, W0, Q0, 0);
REV16(W0, W0);
MOVI2R(X2, (u64)PowerPC::Write_U16);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
}
const u8* storePairedS8;
const u8* storePairedS8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -378,7 +378,7 @@ void JitArm64::GenerateCommonAsm()
storePairedS8 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(16, Q0, 0, addr_reg, SP);
RET(X30);
@ -386,7 +386,7 @@ void JitArm64::GenerateCommonAsm()
emit_quantize();
float_emit.UMOV(16, W0, Q0, 0);
REV16(W0, W0);
MOVI2R(X2, (u64)PowerPC::Write_U16);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
}
@ -394,7 +394,7 @@ void JitArm64::GenerateCommonAsm()
const u8* storePairedU16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -406,7 +406,7 @@ void JitArm64::GenerateCommonAsm()
storePairedU16 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(32, Q0, 0, addr_reg, SP);
RET(X30);
@ -414,14 +414,14 @@ void JitArm64::GenerateCommonAsm()
emit_quantize();
float_emit.REV32(8, D0, D0);
float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X2, (u64)PowerPC::Write_U32);
MOVP2R(X2, &PowerPC::Write_U32);
BR(X2);
}
const u8* storePairedS16; // Used by Viewtiful Joe's intro movie
const u8* storePairedS16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1, 0);
@ -433,7 +433,7 @@ void JitArm64::GenerateCommonAsm()
storePairedS16 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(32, Q0, 0, addr_reg, SP);
RET(X30);
@ -441,7 +441,7 @@ void JitArm64::GenerateCommonAsm()
emit_quantize();
float_emit.REV32(8, D0, D0);
float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X2, (u64)PowerPC::Write_U32);
MOVP2R(X2, &PowerPC::Write_U32);
BR(X2);
}
@ -450,20 +450,20 @@ void JitArm64::GenerateCommonAsm()
{
storeSingleFloat = GetCodePtr();
float_emit.REV32(8, D0, D0);
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.STR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
RET(X30);
storeSingleFloatSlow = GetCodePtr();
float_emit.UMOV(32, W0, Q0, 0);
MOVI2R(X2, (u64)&PowerPC::Write_U32);
MOVP2R(X2, &PowerPC::Write_U32);
BR(X2);
}
const u8* storeSingleU8; // Used by MKWii
const u8* storeSingleU8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
@ -475,21 +475,21 @@ void JitArm64::GenerateCommonAsm()
storeSingleU8 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(8, Q0, 0, addr_reg);
RET(X30);
storeSingleU8Slow = GetCodePtr();
emit_quantize();
float_emit.UMOV(8, W0, Q0, 0);
MOVI2R(X2, (u64)&PowerPC::Write_U8);
MOVP2R(X2, &PowerPC::Write_U8);
BR(X2);
}
const u8* storeSingleS8;
const u8* storeSingleS8Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
@ -501,21 +501,21 @@ void JitArm64::GenerateCommonAsm()
storeSingleS8 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.ST1(8, Q0, 0, addr_reg);
RET(X30);
storeSingleS8Slow = GetCodePtr();
emit_quantize();
float_emit.SMOV(8, W0, Q0, 0);
MOVI2R(X2, (u64)&PowerPC::Write_U8);
MOVP2R(X2, &PowerPC::Write_U8);
BR(X2);
}
const u8* storeSingleU16; // Used by MKWii
const u8* storeSingleU16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
@ -526,7 +526,7 @@ void JitArm64::GenerateCommonAsm()
storeSingleU16 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.REV16(8, D0, D0);
float_emit.ST1(16, Q0, 0, addr_reg);
RET(X30);
@ -534,14 +534,14 @@ void JitArm64::GenerateCommonAsm()
storeSingleU16Slow = GetCodePtr();
emit_quantize();
float_emit.UMOV(16, W0, Q0, 0);
MOVI2R(X2, (u64)&PowerPC::Write_U16);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
}
const u8* storeSingleS16;
const u8* storeSingleS16Slow;
{
auto emit_quantize = [this, &float_emit, scale_reg]() {
MOVI2R(X2, (u64)&m_quantizeTableS);
MOVP2R(X2, &m_quantizeTableS);
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
float_emit.FMUL(32, D0, D0, D1);
@ -552,7 +552,7 @@ void JitArm64::GenerateCommonAsm()
storeSingleS16 = GetCodePtr();
emit_quantize();
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
ADD(addr_reg, addr_reg, MEM_REG);
float_emit.REV16(8, D0, D0);
float_emit.ST1(16, Q0, 0, addr_reg);
RET(X30);
@ -560,7 +560,7 @@ void JitArm64::GenerateCommonAsm()
storeSingleS16Slow = GetCodePtr();
emit_quantize();
float_emit.SMOV(16, W0, Q0, 0);
MOVI2R(X2, (u64)&PowerPC::Write_U16);
MOVP2R(X2, &PowerPC::Write_U16);
BR(X2);
}

View File

@ -52,7 +52,7 @@ private:
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVI2R(X0, (u64)ptr);
m_emit->MOVP2R(X0, ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,
@ -146,7 +146,7 @@ private:
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
{
m_emit->MOVI2R(X0, (u64)ptr);
m_emit->MOVP2R(X0, ptr);
// If we do not need to mask, we can do the sign extend while loading
// from memory. If masking is required, we have to first zero extend,