Merge pull request #4205 from degasus/arm
JitArm64: Cleanup + small fix.
This commit is contained in:
commit
bdcee1c585
|
@ -95,7 +95,7 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
|
||||||
|
|
||||||
Interpreter::Instruction instr = GetInterpreterOp(inst);
|
Interpreter::Instruction instr = GetInterpreterOp(inst);
|
||||||
MOVI2R(W0, inst.hex);
|
MOVI2R(W0, inst.hex);
|
||||||
MOVI2R(X30, (u64)instr);
|
MOVP2R(X30, instr);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
if (js.op->opinfo->flags & FL_ENDBLOCK)
|
if (js.op->opinfo->flags & FL_ENDBLOCK)
|
||||||
|
@ -129,7 +129,7 @@ void JitArm64::HLEFunction(UGeckoInstruction inst)
|
||||||
|
|
||||||
MOVI2R(W0, js.compilerPC);
|
MOVI2R(W0, js.compilerPC);
|
||||||
MOVI2R(W1, inst.hex);
|
MOVI2R(W1, inst.hex);
|
||||||
MOVI2R(X30, (u64)&HLE::Execute);
|
MOVP2R(X30, &HLE::Execute);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
|
@ -153,7 +153,7 @@ void JitArm64::Cleanup()
|
||||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||||
{
|
{
|
||||||
gpr.Lock(W0);
|
gpr.Lock(W0);
|
||||||
MOVI2R(X0, (u64)&GPFifo::FastCheckGatherPipe);
|
MOVP2R(X0, &GPFifo::FastCheckGatherPipe);
|
||||||
BLR(X0);
|
BLR(X0);
|
||||||
gpr.Unlock(W0);
|
gpr.Unlock(W0);
|
||||||
}
|
}
|
||||||
|
@ -227,9 +227,9 @@ void JitArm64::WriteExceptionExit(u32 destination, bool only_external)
|
||||||
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
if (only_external)
|
if (only_external)
|
||||||
MOVI2R(X30, (u64)&PowerPC::CheckExternalExceptions);
|
MOVP2R(X30, &PowerPC::CheckExternalExceptions);
|
||||||
else
|
else
|
||||||
MOVI2R(X30, (u64)&PowerPC::CheckExceptions);
|
MOVP2R(X30, &PowerPC::CheckExceptions);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
|
|
||||||
|
@ -254,9 +254,9 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest, bool only_external)
|
||||||
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(pc));
|
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
|
STR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
if (only_external)
|
if (only_external)
|
||||||
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExternalExceptions);
|
MOVP2R(EncodeRegTo64(dest), &PowerPC::CheckExternalExceptions);
|
||||||
else
|
else
|
||||||
MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions);
|
MOVP2R(EncodeRegTo64(dest), &PowerPC::CheckExceptions);
|
||||||
BLR(EncodeRegTo64(dest));
|
BLR(EncodeRegTo64(dest));
|
||||||
LDR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
|
LDR(INDEX_UNSIGNED, dest, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
|
|
||||||
|
@ -307,13 +307,13 @@ void JitArm64::BeginTimeProfile(JitBlock* b)
|
||||||
{
|
{
|
||||||
EmitResetCycleCounters();
|
EmitResetCycleCounters();
|
||||||
EmitGetCycles(X1);
|
EmitGetCycles(X1);
|
||||||
MOVI2R(X0, (u64)&b->ticStart);
|
MOVP2R(X0, &b->ticStart);
|
||||||
STR(INDEX_UNSIGNED, X1, X0, 0);
|
STR(INDEX_UNSIGNED, X1, X0, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOVI2R(X1, (u64)QueryPerformanceCounter);
|
MOVP2R(X1, &QueryPerformanceCounter);
|
||||||
MOVI2R(X0, (u64)&b->ticStart);
|
MOVP2R(X0, &b->ticStart);
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -323,15 +323,15 @@ void JitArm64::EndTimeProfile(JitBlock* b)
|
||||||
if (m_supports_cycle_counter)
|
if (m_supports_cycle_counter)
|
||||||
{
|
{
|
||||||
EmitGetCycles(X2);
|
EmitGetCycles(X2);
|
||||||
MOVI2R(X0, (u64)&b->ticStart);
|
MOVP2R(X0, &b->ticStart);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOVI2R(X1, (u64)QueryPerformanceCounter);
|
MOVP2R(X1, &QueryPerformanceCounter);
|
||||||
MOVI2R(X0, (u64)&b->ticStop);
|
MOVP2R(X0, &b->ticStop);
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
|
|
||||||
MOVI2R(X0, (u64)&b->ticStart);
|
MOVP2R(X0, &b->ticStart);
|
||||||
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
|
LDR(INDEX_UNSIGNED, X2, X0, 8); // Stop
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -434,7 +434,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
ARM64Reg WB = gpr.GetReg();
|
ARM64Reg WB = gpr.GetReg();
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
ARM64Reg XB = EncodeRegTo64(WB);
|
ARM64Reg XB = EncodeRegTo64(WB);
|
||||||
MOVI2R(XA, (u64)&b->runCount);
|
MOVP2R(XA, &b->runCount);
|
||||||
LDR(INDEX_UNSIGNED, XB, XA, 0);
|
LDR(INDEX_UNSIGNED, XB, XA, 0);
|
||||||
ADD(XB, XB, 1);
|
ADD(XB, XB, 1);
|
||||||
STR(INDEX_UNSIGNED, XB, XA, 0);
|
STR(INDEX_UNSIGNED, XB, XA, 0);
|
||||||
|
@ -457,7 +457,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
MOVI2R(DISPATCHER_PC, js.blockStart);
|
MOVI2R(DISPATCHER_PC, js.blockStart);
|
||||||
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE);
|
MOVI2R(W0, (u32)JitInterface::ExceptionType::EXCEPTIONS_PAIRED_QUANTIZE);
|
||||||
MOVI2R(X1, (u64)&JitInterface::CompileExceptionCheck);
|
MOVP2R(X1, &JitInterface::CompileExceptionCheck);
|
||||||
BLR(X1);
|
BLR(X1);
|
||||||
B(dispatcher);
|
B(dispatcher);
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
|
@ -508,7 +508,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
FixupBranch exit = B();
|
FixupBranch exit = B();
|
||||||
SetJumpTarget(Exception);
|
SetJumpTarget(Exception);
|
||||||
ABI_PushRegisters(regs_in_use);
|
ABI_PushRegisters(regs_in_use);
|
||||||
MOVI2R(X30, (u64)&GPFifo::FastCheckGatherPipe);
|
MOVP2R(X30, &GPFifo::FastCheckGatherPipe);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
ABI_PopRegisters(regs_in_use);
|
ABI_PopRegisters(regs_in_use);
|
||||||
|
|
||||||
|
@ -517,7 +517,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
|
TBZ(W30, 3, done_here); // EXCEPTION_EXTERNAL_INT
|
||||||
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(INDEX_UNSIGNED, W30, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
TBZ(W30, 11, done_here);
|
TBZ(W30, 11, done_here);
|
||||||
MOVI2R(X30, (u64)&ProcessorInterface::m_InterruptCause);
|
MOVP2R(X30, &ProcessorInterface::m_InterruptCause);
|
||||||
LDR(INDEX_UNSIGNED, W30, X30, 0);
|
LDR(INDEX_UNSIGNED, W30, X30, 0);
|
||||||
TST(W30, 23, 2);
|
TST(W30, 23, 2);
|
||||||
B(CC_EQ, done_here);
|
B(CC_EQ, done_here);
|
||||||
|
@ -548,7 +548,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
|
||||||
SetJumpTarget(Exception);
|
SetJumpTarget(Exception);
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
TBZ(WA, 11, done_here);
|
TBZ(WA, 11, done_here);
|
||||||
MOVI2R(XA, (u64)&ProcessorInterface::m_InterruptCause);
|
MOVP2R(XA, &ProcessorInterface::m_InterruptCause);
|
||||||
LDR(INDEX_UNSIGNED, WA, XA, 0);
|
LDR(INDEX_UNSIGNED, WA, XA, 0);
|
||||||
TST(WA, 23, 2);
|
TST(WA, 23, 2);
|
||||||
B(CC_EQ, done_here);
|
B(CC_EQ, done_here);
|
||||||
|
|
|
@ -18,7 +18,7 @@ void JitArm64BlockCache::WriteLinkBlock(const JitBlock::LinkData& source, const
|
||||||
s64 distance = ((s64)dest->normalEntry - (s64)location) >> 2;
|
s64 distance = ((s64)dest->normalEntry - (s64)location) >> 2;
|
||||||
if (distance >= -0x40000 && distance <= 0x3FFFF)
|
if (distance >= -0x40000 && distance <= 0x3FFFF)
|
||||||
{
|
{
|
||||||
emit.B(CC_LE, dest->normalEntry);
|
emit.B(CC_PL, dest->normalEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the checked entry if either downcount is smaller zero,
|
// Use the checked entry if either downcount is smaller zero,
|
||||||
|
|
|
@ -181,13 +181,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
{
|
{
|
||||||
m_float_emit.FCVT(32, 64, D0, RS);
|
m_float_emit.FCVT(32, 64, D0, RS);
|
||||||
m_float_emit.UMOV(32, W0, Q0, 0);
|
m_float_emit.UMOV(32, W0, Q0, 0);
|
||||||
MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
MOVP2R(X30, &PowerPC::Write_U32);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
|
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
|
||||||
{
|
{
|
||||||
m_float_emit.UMOV(32, W0, RS, 0);
|
m_float_emit.UMOV(32, W0, RS, 0);
|
||||||
MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
MOVP2R(X30, &PowerPC::Write_U32);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
|
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2)
|
||||||
|
@ -195,19 +195,19 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
m_float_emit.FCVTN(32, D0, RS);
|
m_float_emit.FCVTN(32, D0, RS);
|
||||||
m_float_emit.UMOV(64, X0, D0, 0);
|
m_float_emit.UMOV(64, X0, D0, 0);
|
||||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
||||||
MOVI2R(X30, (u64)PowerPC::Write_U64);
|
MOVP2R(X30, &PowerPC::Write_U64);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
|
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
|
||||||
{
|
{
|
||||||
m_float_emit.UMOV(64, X0, RS, 0);
|
m_float_emit.UMOV(64, X0, RS, 0);
|
||||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
||||||
MOVI2R(X30, (u64)PowerPC::Write_U64);
|
MOVP2R(X30, &PowerPC::Write_U64);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOVI2R(X30, (u64)&PowerPC::Write_U64);
|
MOVP2R(X30, &PowerPC::Write_U64);
|
||||||
m_float_emit.UMOV(64, X0, RS, 0);
|
m_float_emit.UMOV(64, X0, RS, 0);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
|
@ -216,13 +216,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
{
|
{
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
if (flags & BackPatchInfo::FLAG_SIZE_F32)
|
||||||
{
|
{
|
||||||
MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
MOVP2R(X30, &PowerPC::Read_U32);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
m_float_emit.INS(32, RS, 0, X0);
|
m_float_emit.INS(32, RS, 0, X0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOVI2R(X30, (u64)&PowerPC::Read_F64);
|
MOVP2R(X30, &PowerPC::Read_F64);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
m_float_emit.INS(64, RS, 0, X0);
|
m_float_emit.INS(64, RS, 0, X0);
|
||||||
}
|
}
|
||||||
|
@ -232,27 +232,27 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
|
||||||
MOV(W0, RS);
|
MOV(W0, RS);
|
||||||
|
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
MOVI2R(X30, (u64)&PowerPC::Write_U32);
|
MOVP2R(X30, &PowerPC::Write_U32);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||||
MOVI2R(X30, (u64)&PowerPC::Write_U16);
|
MOVP2R(X30, &PowerPC::Write_U16);
|
||||||
else
|
else
|
||||||
MOVI2R(X30, (u64)&PowerPC::Write_U8);
|
MOVP2R(X30, &PowerPC::Write_U8);
|
||||||
|
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||||
{
|
{
|
||||||
MOVI2R(X30, (u64)&PowerPC::ClearCacheLine);
|
MOVP2R(X30, &PowerPC::ClearCacheLine);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||||
MOVI2R(X30, (u64)&PowerPC::Read_U32);
|
MOVP2R(X30, &PowerPC::Read_U32);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
else if (flags & BackPatchInfo::FLAG_SIZE_16)
|
||||||
MOVI2R(X30, (u64)&PowerPC::Read_U16);
|
MOVP2R(X30, &PowerPC::Read_U16);
|
||||||
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
else if (flags & BackPatchInfo::FLAG_SIZE_8)
|
||||||
MOVI2R(X30, (u64)&PowerPC::Read_U8);
|
MOVP2R(X30, &PowerPC::Read_U8);
|
||||||
|
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
|
|
|
@ -103,7 +103,7 @@ void JitArm64::bx(UGeckoInstruction inst)
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
|
|
||||||
MOVI2R(XA, (u64)&CoreTiming::Idle);
|
MOVP2R(XA, &CoreTiming::Idle);
|
||||||
BLR(XA);
|
BLR(XA);
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
|
|
||||||
|
|
|
@ -448,7 +448,7 @@ void JitArm64::lXX(UGeckoInstruction inst)
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
MOVI2R(XA, (u64)&CoreTiming::Idle);
|
MOVP2R(XA, &CoreTiming::Idle);
|
||||||
BLR(XA);
|
BLR(XA);
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA);
|
||||||
|
|
||||||
|
@ -666,8 +666,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
||||||
MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16);
|
MOVI2R(WA, (u32)(s32)(s16)inst.SIMM_16);
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* base = UReg_MSR(MSR).DR ? Memory::logical_base : Memory::physical_base;
|
ADD(XA, XA, MEM_REG);
|
||||||
MOVK(XA, ((u64)base >> 32) & 0xFFFF, SHIFT_32);
|
|
||||||
|
|
||||||
for (int i = inst.RD; i < 32; i++)
|
for (int i = inst.RD; i < 32; i++)
|
||||||
{
|
{
|
||||||
|
@ -701,7 +700,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
AND(value, addr, 32 - 10, 28 - 10); // upper three bits and last 10 bit are masked for the bitset
|
AND(value, addr, 32 - 10, 28 - 10); // upper three bits and last 10 bit are masked for the bitset
|
||||||
// of cachelines, 0x1ffffc00
|
// of cachelines, 0x1ffffc00
|
||||||
LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
LSR(value, value, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
MOVI2R(EncodeRegTo64(WA), (u64)jit->GetBlockCache()->GetBlockBitSet());
|
MOVP2R(EncodeRegTo64(WA), jit->GetBlockCache()->GetBlockBitSet());
|
||||||
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
|
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
|
||||||
|
|
||||||
LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
LSR(addr, addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
||||||
|
@ -722,7 +721,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
LSL(W0, addr, 5);
|
LSL(W0, addr, 5);
|
||||||
MOVI2R(X1, 32);
|
MOVI2R(X1, 32);
|
||||||
MOVI2R(X2, 0);
|
MOVI2R(X2, 0);
|
||||||
MOVI2R(X3, (u64)(void*)JitInterface::InvalidateICache);
|
MOVP2R(X3, &JitInterface::InvalidateICache);
|
||||||
BLR(X3);
|
BLR(X3);
|
||||||
|
|
||||||
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
|
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
|
||||||
|
|
|
@ -82,7 +82,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
|
||||||
UBFM(type_reg, scale_reg, 16, 18); // Type
|
UBFM(type_reg, scale_reg, 16, 18); // Type
|
||||||
UBFM(scale_reg, scale_reg, 24, 29); // Scale
|
UBFM(scale_reg, scale_reg, 24, 29); // Scale
|
||||||
|
|
||||||
MOVI2R(X30, (u64)&pairedLoadQuantized[inst.W * 8]);
|
MOVP2R(X30, &pairedLoadQuantized[inst.W * 8]);
|
||||||
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
|
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(fail);
|
SetJumpTarget(fail);
|
||||||
// Slow
|
// Slow
|
||||||
MOVI2R(X30, (u64)&pairedStoreQuantized[16 + inst.W * 8]);
|
MOVP2R(X30, &pairedStoreQuantized[16 + inst.W * 8]);
|
||||||
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
|
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
|
||||||
|
|
||||||
ABI_PushRegisters(gprs_in_use);
|
ABI_PushRegisters(gprs_in_use);
|
||||||
|
@ -206,7 +206,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
|
||||||
SetJumpTarget(pass);
|
SetJumpTarget(pass);
|
||||||
|
|
||||||
// Fast
|
// Fast
|
||||||
MOVI2R(X30, (u64)&pairedStoreQuantized[inst.W * 8]);
|
MOVP2R(X30, &pairedStoreQuantized[inst.W * 8]);
|
||||||
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
|
LDR(EncodeRegTo64(type_reg), X30, ArithOption(EncodeRegTo64(type_reg), true));
|
||||||
BLR(EncodeRegTo64(type_reg));
|
BLR(EncodeRegTo64(type_reg));
|
||||||
|
|
||||||
|
|
|
@ -616,7 +616,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
ARM64Reg XA = EncodeRegTo64(WA);
|
||||||
ARM64Reg WB = gpr.GetReg();
|
ARM64Reg WB = gpr.GetReg();
|
||||||
ARM64Reg XB = EncodeRegTo64(WB);
|
ARM64Reg XB = EncodeRegTo64(WB);
|
||||||
MOVI2R(XB, (u64)m_crTable);
|
MOVP2R(XB, m_crTable);
|
||||||
for (int i = 0; i < 8; ++i)
|
for (int i = 0; i < 8; ++i)
|
||||||
{
|
{
|
||||||
if ((crm & (0x80 >> i)) != 0)
|
if ((crm & (0x80 >> i)) != 0)
|
||||||
|
|
|
@ -26,7 +26,7 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
ABI_PushRegisters(regs_to_save);
|
ABI_PushRegisters(regs_to_save);
|
||||||
|
|
||||||
MOVI2R(PPC_REG, (u64)&PowerPC::ppcState);
|
MOVP2R(PPC_REG, &PowerPC::ppcState);
|
||||||
|
|
||||||
// Load the current PC into DISPATCHER_PC
|
// Load the current PC into DISPATCHER_PC
|
||||||
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
LDR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
|
@ -54,10 +54,10 @@ void JitArm64::GenerateAsm()
|
||||||
// set the mem_base based on MSR flags
|
// set the mem_base based on MSR flags
|
||||||
LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
||||||
MOVI2R(MEM_REG, (u64)Memory::physical_base);
|
MOVP2R(MEM_REG, Memory::physical_base);
|
||||||
FixupBranch membaseend = B();
|
FixupBranch membaseend = B();
|
||||||
SetJumpTarget(physmem);
|
SetJumpTarget(physmem);
|
||||||
MOVI2R(MEM_REG, (u64)Memory::logical_base);
|
MOVP2R(MEM_REG, Memory::logical_base);
|
||||||
SetJumpTarget(membaseend);
|
SetJumpTarget(membaseend);
|
||||||
|
|
||||||
// iCache[(address >> 2) & iCache_Mask];
|
// iCache[(address >> 2) & iCache_Mask];
|
||||||
|
@ -104,10 +104,10 @@ void JitArm64::GenerateAsm()
|
||||||
// set the mem_base based on MSR flags
|
// set the mem_base based on MSR flags
|
||||||
LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(INDEX_UNSIGNED, ARM64Reg::W28, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
FixupBranch physmem = TBNZ(ARM64Reg::W28, 31 - 27);
|
||||||
MOVI2R(MEM_REG, (u64)Memory::physical_base);
|
MOVP2R(MEM_REG, Memory::physical_base);
|
||||||
FixupBranch membaseend = B();
|
FixupBranch membaseend = B();
|
||||||
SetJumpTarget(physmem);
|
SetJumpTarget(physmem);
|
||||||
MOVI2R(MEM_REG, (u64)Memory::logical_base);
|
MOVP2R(MEM_REG, Memory::logical_base);
|
||||||
SetJumpTarget(membaseend);
|
SetJumpTarget(membaseend);
|
||||||
|
|
||||||
// Jump to next block.
|
// Jump to next block.
|
||||||
|
@ -119,7 +119,7 @@ void JitArm64::GenerateAsm()
|
||||||
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(pc));
|
||||||
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
STR(INDEX_UNSIGNED, DISPATCHER_PC, PPC_REG, PPCSTATE_OFF(npc));
|
||||||
|
|
||||||
MOVI2R(X30, (u64)&CoreTiming::Advance);
|
MOVP2R(X30, &CoreTiming::Advance);
|
||||||
BLR(X30);
|
BLR(X30);
|
||||||
|
|
||||||
// Load the PC back into DISPATCHER_PC (the exception handler might have changed it)
|
// Load the PC back into DISPATCHER_PC (the exception handler might have changed it)
|
||||||
|
@ -127,7 +127,7 @@ void JitArm64::GenerateAsm()
|
||||||
|
|
||||||
// Check the state pointer to see if we are exiting
|
// Check the state pointer to see if we are exiting
|
||||||
// Gets checked on at the end of every slice
|
// Gets checked on at the end of every slice
|
||||||
MOVI2R(X0, (u64)CPU::GetStatePtr());
|
MOVP2R(X0, CPU::GetStatePtr());
|
||||||
LDR(INDEX_UNSIGNED, W0, X0, 0);
|
LDR(INDEX_UNSIGNED, W0, X0, 0);
|
||||||
|
|
||||||
CMP(W0, 0);
|
CMP(W0, 0);
|
||||||
|
@ -166,20 +166,20 @@ void JitArm64::GenerateCommonAsm()
|
||||||
BRK(100);
|
BRK(100);
|
||||||
const u8* loadPairedFloatTwo = GetCodePtr();
|
const u8* loadPairedFloatTwo = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LD1(32, 1, D0, addr_reg);
|
float_emit.LD1(32, 1, D0, addr_reg);
|
||||||
float_emit.REV32(8, D0, D0);
|
float_emit.REV32(8, D0, D0);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
}
|
}
|
||||||
const u8* loadPairedU8Two = GetCodePtr();
|
const u8* loadPairedU8Two = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.UXTL(8, D0, D0);
|
float_emit.UXTL(8, D0, D0);
|
||||||
float_emit.UXTL(16, D0, D0);
|
float_emit.UXTL(16, D0, D0);
|
||||||
float_emit.UCVTF(32, D0, D0);
|
float_emit.UCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -187,13 +187,13 @@ void JitArm64::GenerateCommonAsm()
|
||||||
}
|
}
|
||||||
const u8* loadPairedS8Two = GetCodePtr();
|
const u8* loadPairedS8Two = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.SXTL(8, D0, D0);
|
float_emit.SXTL(8, D0, D0);
|
||||||
float_emit.SXTL(16, D0, D0);
|
float_emit.SXTL(16, D0, D0);
|
||||||
float_emit.SCVTF(32, D0, D0);
|
float_emit.SCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -201,13 +201,13 @@ void JitArm64::GenerateCommonAsm()
|
||||||
}
|
}
|
||||||
const u8* loadPairedU16Two = GetCodePtr();
|
const u8* loadPairedU16Two = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LD1(16, 1, D0, addr_reg);
|
float_emit.LD1(16, 1, D0, addr_reg);
|
||||||
float_emit.REV16(8, D0, D0);
|
float_emit.REV16(8, D0, D0);
|
||||||
float_emit.UXTL(16, D0, D0);
|
float_emit.UXTL(16, D0, D0);
|
||||||
float_emit.UCVTF(32, D0, D0);
|
float_emit.UCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -215,13 +215,13 @@ void JitArm64::GenerateCommonAsm()
|
||||||
}
|
}
|
||||||
const u8* loadPairedS16Two = GetCodePtr();
|
const u8* loadPairedS16Two = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LD1(16, 1, D0, addr_reg);
|
float_emit.LD1(16, 1, D0, addr_reg);
|
||||||
float_emit.REV16(8, D0, D0);
|
float_emit.REV16(8, D0, D0);
|
||||||
float_emit.SXTL(16, D0, D0);
|
float_emit.SXTL(16, D0, D0);
|
||||||
float_emit.SCVTF(32, D0, D0);
|
float_emit.SCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -230,20 +230,20 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
const u8* loadPairedFloatOne = GetCodePtr();
|
const u8* loadPairedFloatOne = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.REV32(8, D0, D0);
|
float_emit.REV32(8, D0, D0);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
}
|
}
|
||||||
const u8* loadPairedU8One = GetCodePtr();
|
const u8* loadPairedU8One = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.UXTL(8, D0, D0);
|
float_emit.UXTL(8, D0, D0);
|
||||||
float_emit.UXTL(16, D0, D0);
|
float_emit.UXTL(16, D0, D0);
|
||||||
float_emit.UCVTF(32, D0, D0);
|
float_emit.UCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -251,13 +251,13 @@ void JitArm64::GenerateCommonAsm()
|
||||||
}
|
}
|
||||||
const u8* loadPairedS8One = GetCodePtr();
|
const u8* loadPairedS8One = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(8, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.SXTL(8, D0, D0);
|
float_emit.SXTL(8, D0, D0);
|
||||||
float_emit.SXTL(16, D0, D0);
|
float_emit.SXTL(16, D0, D0);
|
||||||
float_emit.SCVTF(32, D0, D0);
|
float_emit.SCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -265,13 +265,13 @@ void JitArm64::GenerateCommonAsm()
|
||||||
}
|
}
|
||||||
const u8* loadPairedU16One = GetCodePtr();
|
const u8* loadPairedU16One = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.REV16(8, D0, D0);
|
float_emit.REV16(8, D0, D0);
|
||||||
float_emit.UXTL(16, D0, D0);
|
float_emit.UXTL(16, D0, D0);
|
||||||
float_emit.UCVTF(32, D0, D0);
|
float_emit.UCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -279,13 +279,13 @@ void JitArm64::GenerateCommonAsm()
|
||||||
}
|
}
|
||||||
const u8* loadPairedS16One = GetCodePtr();
|
const u8* loadPairedS16One = GetCodePtr();
|
||||||
{
|
{
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.LDR(16, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
float_emit.REV16(8, D0, D0);
|
float_emit.REV16(8, D0, D0);
|
||||||
float_emit.SXTL(16, D0, D0);
|
float_emit.SXTL(16, D0, D0);
|
||||||
float_emit.SCVTF(32, D0, D0);
|
float_emit.SCVTF(32, D0, D0);
|
||||||
|
|
||||||
MOVI2R(addr_reg, (u64)&m_dequantizeTableS);
|
MOVP2R(addr_reg, &m_dequantizeTableS);
|
||||||
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, addr_reg, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -324,14 +324,14 @@ void JitArm64::GenerateCommonAsm()
|
||||||
{
|
{
|
||||||
storePairedFloat = GetCodePtr();
|
storePairedFloat = GetCodePtr();
|
||||||
float_emit.REV32(8, D0, D0);
|
float_emit.REV32(8, D0, D0);
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(64, Q0, 0, addr_reg, SP);
|
float_emit.ST1(64, Q0, 0, addr_reg, SP);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
storePairedFloatSlow = GetCodePtr();
|
storePairedFloatSlow = GetCodePtr();
|
||||||
float_emit.UMOV(64, X0, Q0, 0);
|
float_emit.UMOV(64, X0, Q0, 0);
|
||||||
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
|
||||||
MOVI2R(X2, (u64)PowerPC::Write_U64);
|
MOVP2R(X2, &PowerPC::Write_U64);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -339,7 +339,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
const u8* storePairedU8Slow;
|
const u8* storePairedU8Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -351,7 +351,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storePairedU8 = GetCodePtr();
|
storePairedU8 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(16, Q0, 0, addr_reg, SP);
|
float_emit.ST1(16, Q0, 0, addr_reg, SP);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
|
@ -359,14 +359,14 @@ void JitArm64::GenerateCommonAsm()
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.UMOV(16, W0, Q0, 0);
|
float_emit.UMOV(16, W0, Q0, 0);
|
||||||
REV16(W0, W0);
|
REV16(W0, W0);
|
||||||
MOVI2R(X2, (u64)PowerPC::Write_U16);
|
MOVP2R(X2, &PowerPC::Write_U16);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
const u8* storePairedS8;
|
const u8* storePairedS8;
|
||||||
const u8* storePairedS8Slow;
|
const u8* storePairedS8Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -378,7 +378,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storePairedS8 = GetCodePtr();
|
storePairedS8 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(16, Q0, 0, addr_reg, SP);
|
float_emit.ST1(16, Q0, 0, addr_reg, SP);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
|
@ -386,7 +386,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.UMOV(16, W0, Q0, 0);
|
float_emit.UMOV(16, W0, Q0, 0);
|
||||||
REV16(W0, W0);
|
REV16(W0, W0);
|
||||||
MOVI2R(X2, (u64)PowerPC::Write_U16);
|
MOVP2R(X2, &PowerPC::Write_U16);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,7 +394,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
const u8* storePairedU16Slow;
|
const u8* storePairedU16Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -406,7 +406,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storePairedU16 = GetCodePtr();
|
storePairedU16 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(32, Q0, 0, addr_reg, SP);
|
float_emit.ST1(32, Q0, 0, addr_reg, SP);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
|
@ -414,14 +414,14 @@ void JitArm64::GenerateCommonAsm()
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.REV32(8, D0, D0);
|
float_emit.REV32(8, D0, D0);
|
||||||
float_emit.UMOV(32, W0, Q0, 0);
|
float_emit.UMOV(32, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)PowerPC::Write_U32);
|
MOVP2R(X2, &PowerPC::Write_U32);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
const u8* storePairedS16; // Used by Viewtiful Joe's intro movie
|
const u8* storePairedS16; // Used by Viewtiful Joe's intro movie
|
||||||
const u8* storePairedS16Slow;
|
const u8* storePairedS16Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1, 0);
|
float_emit.FMUL(32, D0, D0, D1, 0);
|
||||||
|
@ -433,7 +433,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storePairedS16 = GetCodePtr();
|
storePairedS16 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(32, Q0, 0, addr_reg, SP);
|
float_emit.ST1(32, Q0, 0, addr_reg, SP);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
|
@ -441,7 +441,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.REV32(8, D0, D0);
|
float_emit.REV32(8, D0, D0);
|
||||||
float_emit.UMOV(32, W0, Q0, 0);
|
float_emit.UMOV(32, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)PowerPC::Write_U32);
|
MOVP2R(X2, &PowerPC::Write_U32);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -450,20 +450,20 @@ void JitArm64::GenerateCommonAsm()
|
||||||
{
|
{
|
||||||
storeSingleFloat = GetCodePtr();
|
storeSingleFloat = GetCodePtr();
|
||||||
float_emit.REV32(8, D0, D0);
|
float_emit.REV32(8, D0, D0);
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.STR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
|
float_emit.STR(32, INDEX_UNSIGNED, D0, addr_reg, 0);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
storeSingleFloatSlow = GetCodePtr();
|
storeSingleFloatSlow = GetCodePtr();
|
||||||
float_emit.UMOV(32, W0, Q0, 0);
|
float_emit.UMOV(32, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)&PowerPC::Write_U32);
|
MOVP2R(X2, &PowerPC::Write_U32);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
const u8* storeSingleU8; // Used by MKWii
|
const u8* storeSingleU8; // Used by MKWii
|
||||||
const u8* storeSingleU8Slow;
|
const u8* storeSingleU8Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1);
|
float_emit.FMUL(32, D0, D0, D1);
|
||||||
|
@ -475,21 +475,21 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storeSingleU8 = GetCodePtr();
|
storeSingleU8 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(8, Q0, 0, addr_reg);
|
float_emit.ST1(8, Q0, 0, addr_reg);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
storeSingleU8Slow = GetCodePtr();
|
storeSingleU8Slow = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.UMOV(8, W0, Q0, 0);
|
float_emit.UMOV(8, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)&PowerPC::Write_U8);
|
MOVP2R(X2, &PowerPC::Write_U8);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
const u8* storeSingleS8;
|
const u8* storeSingleS8;
|
||||||
const u8* storeSingleS8Slow;
|
const u8* storeSingleS8Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1);
|
float_emit.FMUL(32, D0, D0, D1);
|
||||||
|
@ -501,21 +501,21 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storeSingleS8 = GetCodePtr();
|
storeSingleS8 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.ST1(8, Q0, 0, addr_reg);
|
float_emit.ST1(8, Q0, 0, addr_reg);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
|
||||||
storeSingleS8Slow = GetCodePtr();
|
storeSingleS8Slow = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.SMOV(8, W0, Q0, 0);
|
float_emit.SMOV(8, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)&PowerPC::Write_U8);
|
MOVP2R(X2, &PowerPC::Write_U8);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
const u8* storeSingleU16; // Used by MKWii
|
const u8* storeSingleU16; // Used by MKWii
|
||||||
const u8* storeSingleU16Slow;
|
const u8* storeSingleU16Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1);
|
float_emit.FMUL(32, D0, D0, D1);
|
||||||
|
@ -526,7 +526,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storeSingleU16 = GetCodePtr();
|
storeSingleU16 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.REV16(8, D0, D0);
|
float_emit.REV16(8, D0, D0);
|
||||||
float_emit.ST1(16, Q0, 0, addr_reg);
|
float_emit.ST1(16, Q0, 0, addr_reg);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
@ -534,14 +534,14 @@ void JitArm64::GenerateCommonAsm()
|
||||||
storeSingleU16Slow = GetCodePtr();
|
storeSingleU16Slow = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.UMOV(16, W0, Q0, 0);
|
float_emit.UMOV(16, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)&PowerPC::Write_U16);
|
MOVP2R(X2, &PowerPC::Write_U16);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
const u8* storeSingleS16;
|
const u8* storeSingleS16;
|
||||||
const u8* storeSingleS16Slow;
|
const u8* storeSingleS16Slow;
|
||||||
{
|
{
|
||||||
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
auto emit_quantize = [this, &float_emit, scale_reg]() {
|
||||||
MOVI2R(X2, (u64)&m_quantizeTableS);
|
MOVP2R(X2, &m_quantizeTableS);
|
||||||
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
ADD(scale_reg, X2, scale_reg, ArithOption(scale_reg, ST_LSL, 3));
|
||||||
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
float_emit.LDR(32, INDEX_UNSIGNED, D1, scale_reg, 0);
|
||||||
float_emit.FMUL(32, D0, D0, D1);
|
float_emit.FMUL(32, D0, D0, D1);
|
||||||
|
@ -552,7 +552,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
|
|
||||||
storeSingleS16 = GetCodePtr();
|
storeSingleS16 = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
MOVK(addr_reg, ((u64)Memory::logical_base >> 32) & 0xFFFF, SHIFT_32);
|
ADD(addr_reg, addr_reg, MEM_REG);
|
||||||
float_emit.REV16(8, D0, D0);
|
float_emit.REV16(8, D0, D0);
|
||||||
float_emit.ST1(16, Q0, 0, addr_reg);
|
float_emit.ST1(16, Q0, 0, addr_reg);
|
||||||
RET(X30);
|
RET(X30);
|
||||||
|
@ -560,7 +560,7 @@ void JitArm64::GenerateCommonAsm()
|
||||||
storeSingleS16Slow = GetCodePtr();
|
storeSingleS16Slow = GetCodePtr();
|
||||||
emit_quantize();
|
emit_quantize();
|
||||||
float_emit.SMOV(16, W0, Q0, 0);
|
float_emit.SMOV(16, W0, Q0, 0);
|
||||||
MOVI2R(X2, (u64)&PowerPC::Write_U16);
|
MOVP2R(X2, &PowerPC::Write_U16);
|
||||||
BR(X2);
|
BR(X2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ private:
|
||||||
|
|
||||||
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
|
void WriteRegToAddr(int sbits, const void* ptr, u32 mask)
|
||||||
{
|
{
|
||||||
m_emit->MOVI2R(X0, (u64)ptr);
|
m_emit->MOVP2R(X0, ptr);
|
||||||
|
|
||||||
// If we do not need to mask, we can do the sign extend while loading
|
// If we do not need to mask, we can do the sign extend while loading
|
||||||
// from memory. If masking is required, we have to first zero extend,
|
// from memory. If masking is required, we have to first zero extend,
|
||||||
|
@ -146,7 +146,7 @@ private:
|
||||||
|
|
||||||
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
|
void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask)
|
||||||
{
|
{
|
||||||
m_emit->MOVI2R(X0, (u64)ptr);
|
m_emit->MOVP2R(X0, ptr);
|
||||||
|
|
||||||
// If we do not need to mask, we can do the sign extend while loading
|
// If we do not need to mask, we can do the sign extend while loading
|
||||||
// from memory. If masking is required, we have to first zero extend,
|
// from memory. If masking is required, we have to first zero extend,
|
||||||
|
|
Loading…
Reference in New Issue