Compile fixes for Windows-on-ARM64
This commit is contained in:
parent
6fcb1c6c46
commit
d744c5a148
|
@ -364,6 +364,8 @@ void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
|
||||||
#if defined(IOS)
|
#if defined(IOS)
|
||||||
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
|
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
|
||||||
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
|
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
|
||||||
|
#elif defined(WIN32)
|
||||||
|
FlushInstructionCache(GetCurrentProcess(), start, end - start);
|
||||||
#else
|
#else
|
||||||
// Don't rely on GCC's __clear_cache implementation, as it caches
|
// Don't rely on GCC's __clear_cache implementation, as it caches
|
||||||
// icache/dcache cache line sizes, that can vary between cores on
|
// icache/dcache cache line sizes, that can vary between cores on
|
||||||
|
@ -2172,6 +2174,8 @@ void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
|
||||||
ARM64Reg second;
|
ARM64Reg second;
|
||||||
if (!(num_regs & 1))
|
if (!(num_regs & 1))
|
||||||
second = (ARM64Reg)(X0 + *it++);
|
second = (ARM64Reg)(X0 + *it++);
|
||||||
|
else
|
||||||
|
second = {};
|
||||||
|
|
||||||
// 8 byte per register, but 16 byte alignment, so we may have to padd one register.
|
// 8 byte per register, but 16 byte alignment, so we may have to padd one register.
|
||||||
// Only update the SP on the last load to avoid the dependency between those loads.
|
// Only update the SP on the last load to avoid the dependency between those loads.
|
||||||
|
@ -4164,20 +4168,19 @@ void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
|
||||||
void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative,
|
void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative,
|
||||||
bool flags)
|
bool flags)
|
||||||
{
|
{
|
||||||
switch ((negative << 1) | flags)
|
if (!negative)
|
||||||
{
|
{
|
||||||
case 0:
|
if (!flags)
|
||||||
ADD(Rd, Rn, imm, shift);
|
ADD(Rd, Rn, imm, shift);
|
||||||
break;
|
else
|
||||||
case 1:
|
ADDS(Rd, Rn, imm, shift);
|
||||||
ADDS(Rd, Rn, imm, shift);
|
}
|
||||||
break;
|
else
|
||||||
case 2:
|
{
|
||||||
SUB(Rd, Rn, imm, shift);
|
if (!flags)
|
||||||
break;
|
SUB(Rd, Rn, imm, shift);
|
||||||
case 3:
|
else
|
||||||
SUBS(Rd, Rn, imm, shift);
|
SUBS(Rd, Rn, imm, shift);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4185,7 +4188,7 @@ void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool nega
|
||||||
ARM64Reg scratch)
|
ARM64Reg scratch)
|
||||||
{
|
{
|
||||||
bool has_scratch = scratch != INVALID_REG;
|
bool has_scratch = scratch != INVALID_REG;
|
||||||
u64 imm_neg = Is64Bit(Rd) ? -imm : -imm & 0xFFFFFFFFuLL;
|
u64 imm_neg = Is64Bit(Rd) ? u64(-s64(imm)) : u64(-s64(imm)) & 0xFFFFFFFFuLL;
|
||||||
bool neg_neg = negative ? false : true;
|
bool neg_neg = negative ? false : true;
|
||||||
|
|
||||||
// Fast paths, aarch64 immediate instructions
|
// Fast paths, aarch64 immediate instructions
|
||||||
|
@ -4232,20 +4235,19 @@ void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool nega
|
||||||
(u32)imm);
|
(u32)imm);
|
||||||
|
|
||||||
negative ^= MOVI2R2(scratch, imm, imm_neg);
|
negative ^= MOVI2R2(scratch, imm, imm_neg);
|
||||||
switch ((negative << 1) | flags)
|
if (!negative)
|
||||||
{
|
{
|
||||||
case 0:
|
if (!flags)
|
||||||
ADD(Rd, Rn, scratch);
|
ADD(Rd, Rn, scratch);
|
||||||
break;
|
else
|
||||||
case 1:
|
ADDS(Rd, Rn, scratch);
|
||||||
ADDS(Rd, Rn, scratch);
|
}
|
||||||
break;
|
else
|
||||||
case 2:
|
{
|
||||||
SUB(Rd, Rn, scratch);
|
if (!flags)
|
||||||
break;
|
SUB(Rd, Rn, scratch);
|
||||||
case 3:
|
else
|
||||||
SUBS(Rd, Rn, scratch);
|
SUBS(Rd, Rn, scratch);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,17 @@
|
||||||
// Licensed under GPLv2+
|
// Licensed under GPLv2+
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <asm/hwcap.h>
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
#include <asm/hwcap.h>
|
||||||
#include <sys/auxv.h>
|
#include <sys/auxv.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
@ -16,6 +20,8 @@
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/FileUtil.h"
|
#include "Common/FileUtil.h"
|
||||||
|
|
||||||
|
#ifndef WIN32
|
||||||
|
|
||||||
const char procfile[] = "/proc/cpuinfo";
|
const char procfile[] = "/proc/cpuinfo";
|
||||||
|
|
||||||
static std::string GetCPUString()
|
static std::string GetCPUString()
|
||||||
|
@ -42,6 +48,8 @@ static std::string GetCPUString()
|
||||||
return cpu_string;
|
return cpu_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
CPUInfo cpu_info;
|
CPUInfo cpu_info;
|
||||||
|
|
||||||
CPUInfo::CPUInfo()
|
CPUInfo::CPUInfo()
|
||||||
|
@ -60,6 +68,21 @@ void CPUInfo::Detect()
|
||||||
Mode64bit = true;
|
Mode64bit = true;
|
||||||
vendor = CPUVendor::ARM;
|
vendor = CPUVendor::ARM;
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
num_cores = std::thread::hardware_concurrency();
|
||||||
|
|
||||||
|
// Windows does not provide any mechanism for querying the system registers on ARMv8, unlike Linux
|
||||||
|
// which traps the register reads and emulates them in the kernel. There are environment variables
|
||||||
|
// containing some of the CPU-specific values, which we could use for a lookup table in the
|
||||||
|
// future. For now, assume all features are present as all known devices which are Windows-on-ARM
|
||||||
|
// compatible also support these extensions.
|
||||||
|
bFP = true;
|
||||||
|
bASIMD = true;
|
||||||
|
bAES = true;
|
||||||
|
bCRC32 = true;
|
||||||
|
bSHA1 = true;
|
||||||
|
bSHA2 = true;
|
||||||
|
#else
|
||||||
// Get the information about the CPU
|
// Get the information about the CPU
|
||||||
num_cores = sysconf(_SC_NPROCESSORS_CONF);
|
num_cores = sysconf(_SC_NPROCESSORS_CONF);
|
||||||
strncpy(cpu_string, GetCPUString().c_str(), sizeof(cpu_string));
|
strncpy(cpu_string, GetCPUString().c_str(), sizeof(cpu_string));
|
||||||
|
@ -71,6 +94,7 @@ void CPUInfo::Detect()
|
||||||
bCRC32 = hwcaps & HWCAP_CRC32;
|
bCRC32 = hwcaps & HWCAP_CRC32;
|
||||||
bSHA1 = hwcaps & HWCAP_SHA1;
|
bSHA1 = hwcaps & HWCAP_SHA1;
|
||||||
bSHA2 = hwcaps & HWCAP_SHA2;
|
bSHA2 = hwcaps & HWCAP_SHA2;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Turn the CPU info into a string we can show
|
// Turn the CPU info into a string we can show
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
#include "Common/CommonFuncs.h"
|
#include "Common/CommonFuncs.h"
|
||||||
#include "Common/Intrinsics.h"
|
#include "Common/Intrinsics.h"
|
||||||
|
|
||||||
#ifdef _M_ARM_64
|
#if defined(_M_ARM_64) && !defined(_MSC_VER)
|
||||||
#include <arm_acle.h>
|
#include <arm_acle.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,10 @@ typedef CONTEXT SContext;
|
||||||
#define CTX_R14 R14
|
#define CTX_R14 R14
|
||||||
#define CTX_R15 R15
|
#define CTX_R15 R15
|
||||||
#define CTX_RIP Rip
|
#define CTX_RIP Rip
|
||||||
|
#elif _M_ARM64
|
||||||
|
#define CTX_REG(x) X[x]
|
||||||
|
#define CTX_SP Sp
|
||||||
|
#define CTX_PC Pc
|
||||||
#else
|
#else
|
||||||
#error No context definition for architecture
|
#error No context definition for architecture
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -754,9 +754,9 @@ void JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
|
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(msr));
|
||||||
FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit
|
FixupBranch b1 = TBNZ(WA, 13); // Test FP enabled bit
|
||||||
|
|
||||||
FixupBranch far = B();
|
FixupBranch far_addr = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far);
|
SetJumpTarget(far_addr);
|
||||||
|
|
||||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
|
|
|
@ -143,9 +143,9 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE));
|
JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), !(inst.BO_2 & BO_BRANCH_IF_TRUE));
|
||||||
}
|
}
|
||||||
|
|
||||||
FixupBranch far = B();
|
FixupBranch far_addr = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far);
|
SetJumpTarget(far_addr);
|
||||||
|
|
||||||
if (inst.LK)
|
if (inst.LK)
|
||||||
{
|
{
|
||||||
|
@ -160,12 +160,12 @@ void JitArm64::bcx(UGeckoInstruction inst)
|
||||||
if (js.op->branchIsIdleLoop)
|
if (js.op->branchIsIdleLoop)
|
||||||
{
|
{
|
||||||
// make idle loops go faster
|
// make idle loops go faster
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA2 = gpr.GetReg();
|
||||||
ARM64Reg XA = EncodeRegTo64(WA);
|
ARM64Reg XA2 = EncodeRegTo64(WA2);
|
||||||
|
|
||||||
MOVP2R(XA, &CoreTiming::Idle);
|
MOVP2R(XA2, &CoreTiming::Idle);
|
||||||
BLR(XA);
|
BLR(XA2);
|
||||||
gpr.Unlock(WA);
|
gpr.Unlock(WA2);
|
||||||
|
|
||||||
WriteExceptionExit(js.op->branchTo);
|
WriteExceptionExit(js.op->branchTo);
|
||||||
}
|
}
|
||||||
|
@ -260,9 +260,9 @@ void JitArm64::bclrx(UGeckoInstruction inst)
|
||||||
|
|
||||||
if (conditional)
|
if (conditional)
|
||||||
{
|
{
|
||||||
FixupBranch far = B();
|
FixupBranch far_addr = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far);
|
SetJumpTarget(far_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
|
LDR(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(spr[SPR_LR]));
|
||||||
|
|
|
@ -35,7 +35,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||||
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
|
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) &&
|
||||||
(!use_c || fpr.IsSingle(c, !packed));
|
(!use_c || fpr.IsSingle(c, !packed));
|
||||||
|
|
||||||
ARM64Reg VA, VB, VC, VD;
|
ARM64Reg VA{}, VB{}, VC{}, VD{};
|
||||||
|
|
||||||
if (packed)
|
if (packed)
|
||||||
{
|
{
|
||||||
|
|
|
@ -368,7 +368,11 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
|
||||||
|
|
||||||
if (gpr.IsImm(s))
|
if (gpr.IsImm(s))
|
||||||
{
|
{
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
gpr.SetImmediate(a, _CountLeadingZeros(gpr.GetImm(s)));
|
||||||
|
#else
|
||||||
gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
|
gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s)));
|
||||||
|
#endif
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC0(gpr.GetImm(a));
|
ComputeRC0(gpr.GetImm(a));
|
||||||
}
|
}
|
||||||
|
@ -931,7 +935,7 @@ void JitArm64::subfex(UGeckoInstruction inst)
|
||||||
|
|
||||||
// d = ~a + b + carry;
|
// d = ~a + b + carry;
|
||||||
if (gpr.IsImm(a))
|
if (gpr.IsImm(a))
|
||||||
MOVI2R(WA, ~gpr.GetImm(a));
|
MOVI2R(WA, u32(~gpr.GetImm(a)));
|
||||||
else
|
else
|
||||||
MVN(WA, gpr.R(a));
|
MVN(WA, gpr.R(a));
|
||||||
ADCS(gpr.R(d), WA, gpr.R(b));
|
ADCS(gpr.R(d), WA, gpr.R(b));
|
||||||
|
@ -1187,7 +1191,7 @@ void JitArm64::divwx(UGeckoInstruction inst)
|
||||||
if (inst.Rc)
|
if (inst.Rc)
|
||||||
ComputeRC0(imm_d);
|
ComputeRC0(imm_d);
|
||||||
}
|
}
|
||||||
else if (gpr.IsImm(b) && gpr.GetImm(b) != 0 && gpr.GetImm(b) != -1u)
|
else if (gpr.IsImm(b) && gpr.GetImm(b) != 0 && gpr.GetImm(b) != UINT32_C(0xFFFFFFFF))
|
||||||
{
|
{
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
MOVI2R(WA, gpr.GetImm(b));
|
MOVI2R(WA, gpr.GetImm(b));
|
||||||
|
|
|
@ -408,7 +408,7 @@ void JitArm64::stX(UGeckoInstruction inst)
|
||||||
gpr.BindToRegister(a, false);
|
gpr.BindToRegister(a, false);
|
||||||
|
|
||||||
ARM64Reg WA = gpr.GetReg();
|
ARM64Reg WA = gpr.GetReg();
|
||||||
ARM64Reg RB;
|
ARM64Reg RB = {};
|
||||||
ARM64Reg RA = gpr.R(a);
|
ARM64Reg RA = gpr.R(a);
|
||||||
if (regOffset != -1)
|
if (regOffset != -1)
|
||||||
RB = gpr.R(regOffset);
|
RB = gpr.R(regOffset);
|
||||||
|
@ -549,9 +549,9 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
LSR(value, value, addr); // move current bit to bit 0
|
LSR(value, value, addr); // move current bit to bit 0
|
||||||
|
|
||||||
FixupBranch bit_not_set = TBZ(value, 0);
|
FixupBranch bit_not_set = TBZ(value, 0);
|
||||||
FixupBranch far = B();
|
FixupBranch far_addr = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far);
|
SetJumpTarget(far_addr);
|
||||||
|
|
||||||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||||
|
@ -568,10 +568,10 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
|
m_float_emit.ABI_PopRegisters(fprs_to_push, X30);
|
||||||
ABI_PopRegisters(gprs_to_push);
|
ABI_PopRegisters(gprs_to_push);
|
||||||
|
|
||||||
FixupBranch near = B();
|
FixupBranch near_addr = B();
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
SetJumpTarget(bit_not_set);
|
SetJumpTarget(bit_not_set);
|
||||||
SetJumpTarget(near);
|
SetJumpTarget(near_addr);
|
||||||
|
|
||||||
gpr.Unlock(addr, value, W30);
|
gpr.Unlock(addr, value, W30);
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,6 +149,7 @@ Arm64GPRCache::GuestRegInfo Arm64GPRCache::GetGuestByIndex(size_t index)
|
||||||
if (index >= GUEST_CR_OFFSET && index < GUEST_CR_OFFSET + GUEST_CR_COUNT)
|
if (index >= GUEST_CR_OFFSET && index < GUEST_CR_OFFSET + GUEST_CR_COUNT)
|
||||||
return GetGuestCR(index - GUEST_CR_OFFSET);
|
return GetGuestCR(index - GUEST_CR_OFFSET);
|
||||||
ASSERT_MSG(DYNA_REC, false, "Invalid index for guest register");
|
ASSERT_MSG(DYNA_REC, false, "Invalid index for guest register");
|
||||||
|
return GetGuestGPR(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
|
void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
|
||||||
|
@ -161,7 +162,7 @@ void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
|
||||||
{
|
{
|
||||||
ARM64Reg host_reg = reg.GetReg();
|
ARM64Reg host_reg = reg.GetReg();
|
||||||
if (reg.IsDirty())
|
if (reg.IsDirty())
|
||||||
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
|
||||||
|
|
||||||
if (!maintain_state)
|
if (!maintain_state)
|
||||||
{
|
{
|
||||||
|
@ -173,14 +174,14 @@ void Arm64GPRCache::FlushRegister(size_t index, bool maintain_state)
|
||||||
{
|
{
|
||||||
if (!reg.GetImm())
|
if (!reg.GetImm())
|
||||||
{
|
{
|
||||||
m_emit->STR(INDEX_UNSIGNED, bitsize == 64 ? ZR : WZR, PPC_REG, guest_reg.ppc_offset);
|
m_emit->STR(INDEX_UNSIGNED, bitsize == 64 ? ZR : WZR, PPC_REG, u32(guest_reg.ppc_offset));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||||
|
|
||||||
m_emit->MOVI2R(host_reg, reg.GetImm());
|
m_emit->MOVI2R(host_reg, reg.GetImm());
|
||||||
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
m_emit->STR(INDEX_UNSIGNED, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
|
||||||
|
|
||||||
UnlockRegister(DecodeReg(host_reg));
|
UnlockRegister(DecodeReg(host_reg));
|
||||||
}
|
}
|
||||||
|
@ -207,7 +208,7 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, bool maintain_state)
|
||||||
size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
|
size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
|
||||||
ARM64Reg RX1 = R(GetGuestByIndex(i));
|
ARM64Reg RX1 = R(GetGuestByIndex(i));
|
||||||
ARM64Reg RX2 = R(GetGuestByIndex(i + 1));
|
ARM64Reg RX2 = R(GetGuestByIndex(i + 1));
|
||||||
m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, ppc_offset);
|
m_emit->STP(INDEX_SIGNED, RX1, RX2, PPC_REG, u32(ppc_offset));
|
||||||
if (!maintain_state)
|
if (!maintain_state)
|
||||||
{
|
{
|
||||||
UnlockRegister(DecodeReg(RX1));
|
UnlockRegister(DecodeReg(RX1));
|
||||||
|
@ -285,7 +286,7 @@ ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg)
|
||||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||||
reg.Load(host_reg);
|
reg.Load(host_reg);
|
||||||
reg.SetDirty(false);
|
reg.SetDirty(false);
|
||||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
|
||||||
return host_reg;
|
return host_reg;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -318,7 +319,7 @@ void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool do_load)
|
||||||
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
|
||||||
reg.Load(host_reg);
|
reg.Load(host_reg);
|
||||||
if (do_load)
|
if (do_load)
|
||||||
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, guest_reg.ppc_offset);
|
m_emit->LDR(INDEX_UNSIGNED, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -450,7 +451,7 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||||
// Load the high 64bits from the file and insert them in to the high 64bits of the host
|
// Load the high 64bits from the file and insert them in to the high 64bits of the host
|
||||||
// register
|
// register
|
||||||
ARM64Reg tmp_reg = GetReg();
|
ARM64Reg tmp_reg = GetReg();
|
||||||
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
|
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps1)));
|
||||||
m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
|
m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
|
||||||
UnlockRegister(tmp_reg);
|
UnlockRegister(tmp_reg);
|
||||||
|
|
||||||
|
@ -503,7 +504,8 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
|
||||||
reg.Load(host_reg, REG_LOWER_PAIR);
|
reg.Load(host_reg, REG_LOWER_PAIR);
|
||||||
}
|
}
|
||||||
reg.SetDirty(false);
|
reg.SetDirty(false);
|
||||||
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
|
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, PPC_REG,
|
||||||
|
u32(PPCSTATE_OFF(ps[preg].ps0)));
|
||||||
return host_reg;
|
return host_reg;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -551,7 +553,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||||
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
|
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit
|
||||||
// store.
|
// store.
|
||||||
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
|
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
|
||||||
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
|
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps0)));
|
||||||
break;
|
break;
|
||||||
case REG_DUP_SINGLE:
|
case REG_DUP_SINGLE:
|
||||||
flush_reg = GetReg();
|
flush_reg = GetReg();
|
||||||
|
@ -559,7 +561,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type)
|
||||||
[[fallthrough]];
|
[[fallthrough]];
|
||||||
case REG_DUP:
|
case REG_DUP:
|
||||||
// Store PSR1 (which is equal to PSR0) in memory.
|
// Store PSR1 (which is equal to PSR0) in memory.
|
||||||
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
|
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps1)));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// All other types doesn't store anything in PSR1.
|
// All other types doesn't store anything in PSR1.
|
||||||
|
@ -684,7 +686,10 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
|
||||||
store_size = 64;
|
store_size = 64;
|
||||||
|
|
||||||
if (dirty)
|
if (dirty)
|
||||||
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
|
{
|
||||||
|
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, PPC_REG,
|
||||||
|
u32(PPCSTATE_OFF(ps[preg].ps0)));
|
||||||
|
}
|
||||||
|
|
||||||
if (!maintain_state)
|
if (!maintain_state)
|
||||||
{
|
{
|
||||||
|
@ -700,8 +705,8 @@ void Arm64FPRCache::FlushRegister(size_t preg, bool maintain_state)
|
||||||
// Too bad moving them would break savestate compatibility between x86_64 and AArch64
|
// Too bad moving them would break savestate compatibility between x86_64 and AArch64
|
||||||
// m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG,
|
// m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, PPC_REG,
|
||||||
// PPCSTATE_OFF(ps[preg].ps0));
|
// PPCSTATE_OFF(ps[preg].ps0));
|
||||||
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps0));
|
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps0)));
|
||||||
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, PPCSTATE_OFF(ps[preg].ps1));
|
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, PPC_REG, u32(PPCSTATE_OFF(ps[preg].ps1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!maintain_state)
|
if (!maintain_state)
|
||||||
|
|
|
@ -32,6 +32,7 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
|
||||||
return jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
|
return jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62);
|
||||||
default:
|
default:
|
||||||
ASSERT_MSG(DYNA_REC, false, "Invalid CR bit");
|
ASSERT_MSG(DYNA_REC, false, "Invalid CR bit");
|
||||||
|
return {};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,9 +197,9 @@ void JitArm64::twx(UGeckoInstruction inst)
|
||||||
SetJumpTarget(fixup);
|
SetJumpTarget(fixup);
|
||||||
}
|
}
|
||||||
|
|
||||||
FixupBranch far = B();
|
FixupBranch far_addr = B();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
SetJumpTarget(far);
|
SetJumpTarget(far_addr);
|
||||||
|
|
||||||
gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
|
gpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
|
||||||
fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
|
fpr.Flush(FlushMode::FLUSH_MAINTAIN_STATE);
|
||||||
|
|
|
@ -379,7 +379,7 @@ void VertexLoaderARM64::GenerateVertexLoader()
|
||||||
bool has_tc_scale = false;
|
bool has_tc_scale = false;
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
has_tc |= tc[i];
|
has_tc |= tc[i] != 0;
|
||||||
has_tc_scale |= !!m_VtxAttr.texCoord[i].Frac;
|
has_tc_scale |= !!m_VtxAttr.texCoord[i].Frac;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,14 +17,21 @@
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
|
|
||||||
|
// OpenGL is not available on Windows-on-ARM64
|
||||||
|
#if !defined(_WIN32) || !defined(_M_ARM64)
|
||||||
|
#define HAS_OPENGL 1
|
||||||
|
#endif
|
||||||
|
|
||||||
// TODO: ugly
|
// TODO: ugly
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#include "VideoBackends/D3D/VideoBackend.h"
|
#include "VideoBackends/D3D/VideoBackend.h"
|
||||||
#include "VideoBackends/D3D12/VideoBackend.h"
|
#include "VideoBackends/D3D12/VideoBackend.h"
|
||||||
#endif
|
#endif
|
||||||
#include "VideoBackends/Null/VideoBackend.h"
|
#include "VideoBackends/Null/VideoBackend.h"
|
||||||
|
#ifdef HAS_OPENGL
|
||||||
#include "VideoBackends/OGL/VideoBackend.h"
|
#include "VideoBackends/OGL/VideoBackend.h"
|
||||||
#include "VideoBackends/Software/VideoBackend.h"
|
#include "VideoBackends/Software/VideoBackend.h"
|
||||||
|
#endif
|
||||||
#include "VideoBackends/Vulkan/VideoBackend.h"
|
#include "VideoBackends/Vulkan/VideoBackend.h"
|
||||||
|
|
||||||
#include "VideoCommon/AsyncRequests.h"
|
#include "VideoCommon/AsyncRequests.h"
|
||||||
|
@ -182,13 +189,17 @@ u16 VideoBackendBase::Video_GetBoundingBox(int index)
|
||||||
void VideoBackendBase::PopulateList()
|
void VideoBackendBase::PopulateList()
|
||||||
{
|
{
|
||||||
// OGL > D3D11 > Vulkan > SW > Null
|
// OGL > D3D11 > Vulkan > SW > Null
|
||||||
|
#ifdef HAS_OPENGL
|
||||||
g_available_video_backends.push_back(std::make_unique<OGL::VideoBackend>());
|
g_available_video_backends.push_back(std::make_unique<OGL::VideoBackend>());
|
||||||
|
#endif
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
g_available_video_backends.push_back(std::make_unique<DX11::VideoBackend>());
|
g_available_video_backends.push_back(std::make_unique<DX11::VideoBackend>());
|
||||||
g_available_video_backends.push_back(std::make_unique<DX12::VideoBackend>());
|
g_available_video_backends.push_back(std::make_unique<DX12::VideoBackend>());
|
||||||
#endif
|
#endif
|
||||||
g_available_video_backends.push_back(std::make_unique<Vulkan::VideoBackend>());
|
g_available_video_backends.push_back(std::make_unique<Vulkan::VideoBackend>());
|
||||||
|
#ifdef HAS_OPENGL
|
||||||
g_available_video_backends.push_back(std::make_unique<SW::VideoSoftware>());
|
g_available_video_backends.push_back(std::make_unique<SW::VideoSoftware>());
|
||||||
|
#endif
|
||||||
g_available_video_backends.push_back(std::make_unique<Null::VideoBackend>());
|
g_available_video_backends.push_back(std::make_unique<Null::VideoBackend>());
|
||||||
|
|
||||||
const auto iter =
|
const auto iter =
|
||||||
|
|
Loading…
Reference in New Issue