Merge pull request #1579 from Sonicadvance1/armv7-fastmem-rewrite

Rewrites ARMv7 fastmem entirely.
This commit is contained in:
Ryan Houdek 2014-11-21 14:54:05 -06:00
commit e84999386b
5 changed files with 1065 additions and 520 deletions

View File

@ -40,6 +40,7 @@ void JitArm::Init()
code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
InitBackpatch();
}
void JitArm::ClearCache()

View File

@ -48,6 +48,26 @@ private:
ArmFPRCache fpr;
PPCAnalyst::CodeBuffer code_buffer;
struct BackPatchInfo
{
enum
{
FLAG_STORE = (1 << 0),
FLAG_LOAD = (1 << 1),
FLAG_SIZE_8 = (1 << 2),
FLAG_SIZE_16 = (1 << 3),
FLAG_SIZE_32 = (1 << 4),
FLAG_SIZE_F32 = (1 << 5),
FLAG_SIZE_F64 = (1 << 6),
FLAG_REVERSE = (1 << 7),
};
u32 m_fastmem_size;
u32 m_fastmem_trouble_inst_offset;
u32 m_slowmem_size;
};
// The key is the flags
std::map<u32, BackPatchInfo> m_backpatch_info;
void DoDownCount();
@ -57,11 +77,19 @@ private:
ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
bool BackPatch(SContext* ctx);
void BeginTimeProfile(JitBlock* b);
void EndTimeProfile(JitBlock* b);
bool BackPatch(SContext* ctx);
bool DisasmLoadStore(const u8* ptr, u32* flags, ArmGen::ARMReg* rD, ArmGen::ARMReg* V1);
// Initializes the information that backpatching needs
// This is required so we know the backpatch routine sizes and trouble offsets
void InitBackpatch();
// Returns the trouble instruction offset
// Zero if it isn't a fastmem routine
u32 EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ArmGen::ARMReg RS, ArmGen::ARMReg V1 = ArmGen::ARMReg::INVALID_REG);
public:
JitArm() : code_buffer(32000) {}
~JitArm() {}
@ -118,13 +146,8 @@ public:
void GetCarryAndClear(ArmGen::ARMReg reg);
void FinalizeCarry(ArmGen::ARMReg reg);
// TODO: This shouldn't be here
void UnsafeStoreFromReg(ArmGen::ARMReg dest, ArmGen::ARMReg value, int accessSize, s32 offset);
void SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void UnsafeLoadToReg(ArmGen::ARMReg dest, ArmGen::ARMReg addr, int accessSize, s32 offsetReg, s32 offset);
void SafeLoadToReg(bool fastmem, u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse);
void SafeStoreFromReg(s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void SafeLoadToReg(ArmGen::ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update);
// OPCODES
void unknown_instruction(UGeckoInstruction _inst);

View File

@ -16,47 +16,65 @@ using namespace ArmGen;
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store, bool *new_system)
bool JitArm::DisasmLoadStore(const u8* ptr, u32* flags, ARMReg* rD, ARMReg* V1)
{
u32 inst = *(u32*)ptr;
u32 prev_inst = *(u32*)(ptr - 4);
u32 next_inst = *(u32*)(ptr + 4);
u8 op = (inst >> 20) & 0xFF;
rD = (ARMReg)((inst >> 12) & 0xF);
*rD = (ARMReg)((inst >> 12) & 0xF);
switch (op)
{
case 0x58: // STR
{
Store = true;
accessSize = 32;
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
case 0x59: // LDR
{
Store = false;
accessSize = 32;
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
// REV
if ((next_inst & 0x0FFF0FF0) != 0x06BF0F30)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0x1D: // LDRH
{
Store = false;
accessSize = 16;
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
// REV16
if((next_inst & 0x0FFF0FF0) != 0x06BF0FB0)
*flags |= BackPatchInfo::FLAG_REVERSE;
}
break;
case 0x45 + 0x18: // LDRB
{
Store = false;
accessSize = 8;
*flags |=
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
}
break;
case 0x5C: // STRB
{
Store = true;
accessSize = 8;
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
*rD = (ARMReg)((inst >> 12) & 0xF);
}
break;
case 0x1C: // STRH
{
Store = true;
accessSize = 16;
*flags |=
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
*rD = (ARMReg)(prev_inst & 0xF);
}
break;
default:
@ -66,10 +84,92 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto
switch (op2)
{
case 0xD: // VLDR/VSTR
*new_system = true;
{
bool load = (inst >> 20) & 1;
bool single = !((inst >> 8) & 1);
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (single)
*flags |= BackPatchInfo::FLAG_SIZE_F32;
else
*flags |= BackPatchInfo::FLAG_SIZE_F64;
if (single)
{
if (!load)
{
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
}
}
break;
case 0x4: // VST1/VLD1
*new_system = true;
{
u32 size = (inst >> 6) & 0x3;
bool load = (inst >> 21) & 1;
if (load)
*flags |= BackPatchInfo::FLAG_LOAD;
else
*flags |= BackPatchInfo::FLAG_STORE;
if (size == 2) // 32bit
{
if (load)
{
// For 32bit loads we are loading to a temporary
// So we need to read PC+8,PC+12 to get the two destination registers
u32 vcvt_1 = *(u32*)(ptr + 8);
u32 vcvt_2 = *(u32*)(ptr + 12);
u32 dest_register_1 = (vcvt_1 >> 12) & 0xF;
dest_register_1 |= (vcvt_1 >> 18) & 0x10;
u32 dest_register_2 = (vcvt_2 >> 12) & 0xF;
dest_register_2 |= (vcvt_2 >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register_1 + D0);
*V1 = (ARMReg)(dest_register_2 + D0);
}
else
{
// For 32bit stores we are storing from a temporary
// So we need to check the VCVT at PC-8 for the source register
u32 vcvt = *(u32*)(ptr - 8);
u32 src_register = vcvt & 0xF;
src_register |= (vcvt >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F32;
}
else if (size == 3) // 64bit
{
if (load)
{
// For 64bit loads we load directly in to the VFP register
u32 dest_register = (inst >> 12) & 0xF;
dest_register |= (inst >> 18) & 0x10;
// Make sure to encode the destination register to something our emitter understands
*rD = (ARMReg)(dest_register + D0);
}
else
{
// For 64bit stores we are storing from a temporary
// Check the previous VREV64 instruction for the real register
u32 src_register = prev_inst & 0xF;
src_register |= (prev_inst >> 1) & 0x10;
*rD = (ARMReg)(src_register + D0);
}
*flags |= BackPatchInfo::FLAG_SIZE_F64;
}
}
break;
default:
printf("Op is 0x%02x\n", op);
@ -95,94 +195,484 @@ bool JitArm::BackPatch(SContext* ctx)
// We need to get the destination register before we start
u8* codePtr = (u8*)ctx->CTX_PC;
u32 Value = *(u32*)codePtr;
ARMReg rD;
u8 accessSize;
bool Store;
bool new_system = false;
ARMReg rD = INVALID_REG;
ARMReg V1 = INVALID_REG;
u32 flags = 0;
if (!DisamLoadStore(Value, rD, accessSize, Store, &new_system))
if (!DisasmLoadStore(codePtr, &flags, &rD, &V1))
{
printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value);
exit(0);
}
if (new_system)
{
// The new system is a lot easier to backpatch than the old crap.
// Instead of backpatching over code and making sure we NOP pad and other crap
// We emit both the slow and fast path and branch over the slow path each time
// We search backwards until we find the second branch instruction
// Then proceed to replace it with a NOP and set that to the new PC.
// This ensures that we run the slow path and then branch over the fast path.
BackPatchInfo& info = m_backpatch_info[flags];
ARMXEmitter emitter(codePtr - info.m_fastmem_trouble_inst_offset * 4);
u32 new_pc = (u32)emitter.GetCodePtr();
EmitBackpatchRoutine(&emitter, flags, false, true, rD, V1);
emitter.FlushIcache();
ctx->CTX_PC = new_pc;
return true;
}
// Run backwards until we find the branch we want to NOP
for (int branches = 2; branches > 0; ctx->CTX_PC -= 4)
if ((*(u32*)ctx->CTX_PC & 0x0F000000) == 0x0A000000) // B
--branches;
u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARMReg RS, ARMReg V1)
{
ARMReg addr = R12;
ARMReg temp = R11;
u32 trouble_offset = 0;
const u8* code_base = emit->GetCodePtr();
ctx->CTX_PC += 4;
ARMXEmitter emitter((u8*)ctx->CTX_PC);
emitter.NOP(1);
emitter.FlushIcache();
return true;
}
else
if (fastmem)
{
if (Store)
ARMReg temp2 = R10;
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
emit->BIC(temp, addr, mask); // 1
emit->MOVI2R(temp2, (u32)Memory::base); // 2-3
emit->ADD(temp, temp, temp2); // 4
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
const u32 ARMREGOFFSET = 4 * 5;
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
switch (accessSize)
NEONXEmitter nemit(emit);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
case 8: // 8bit
emitter.MOVI2R(R14, (u32)&Memory::Write_U8, false); // 1-2
return 0;
break;
case 16: // 16bit
emitter.MOVI2R(R14, (u32)&Memory::Write_U16, false); // 1-2
return 0;
break;
case 32: // 32bit
emitter.MOVI2R(R14, (u32)&Memory::Write_U32, false); // 1-2
break;
emit->VCVT(S0, RS, 0);
nemit.VREV32(I_8, D0, D0);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
emit->VSTR(S0, temp, 0);
}
emitter.PUSH(4, R0, R1, R2, R3); // 3
emitter.MOV(R0, rD); // Value - 4
emitter.MOV(R1, R10); // Addr- 5
emitter.BL(R14); // 6
emitter.POP(4, R0, R1, R2, R3); // 7
u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4);
ctx->CTX_PC = newPC;
emitter.FlushIcache();
return true;
else
{
nemit.VREV64(I_8, D0, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
nemit.VST1(I_64, D0, temp);
}
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
NEONXEmitter nemit(emit);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
nemit.VLD1(F_32, D0, temp);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
nemit.VLD1(I_64, RS, temp);
nemit.VREV64(I_8, RS, RS); // Byte swap to result
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(temp2, RS);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(temp2, RS);
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->STR(temp2, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->STRH(temp2, temp);
else
emit->STRB(RS, temp);
}
else
{
const u32 ARMREGOFFSET = 4 * 4;
ARMXEmitter emitter(codePtr - ARMREGOFFSET);
switch (accessSize)
trouble_offset = (emit->GetCodePtr() - code_base) / 4;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->LDR(RS, temp); // 5
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->LDRH(RS, temp);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->LDRB(RS, temp);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
case 8: // 8bit
emitter.MOVI2R(R14, (u32)&Memory::Read_U8, false); // 2
break;
case 16: // 16bit
emitter.MOVI2R(R14, (u32)&Memory::Read_U16, false); // 2
break;
case 32: // 32bit
emitter.MOVI2R(R14, (u32)&Memory::Read_U32, false); // 2
break;
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, RS); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, RS);
}
emitter.PUSH(4, R0, R1, R2, R3); // 3
emitter.MOV(R0, R10); // 4
emitter.BL(R14); // 5
emitter.MOV(R14, R0); // 6
emitter.POP(4, R0, R1, R2, R3); // 7
emitter.MOV(rD, R14); // 8
ctx->CTX_PC -= ARMREGOFFSET + (4 * 4);
emitter.FlushIcache();
return true;
}
}
return 0;
else
{
if (flags & BackPatchInfo::FLAG_STORE &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOV(R1, addr);
emit->VCVT(S0, RS, 0);
emit->VMOV(R0, S0);
emit->MOVI2R(temp, (u32)&Memory::Write_U32);
emit->BL(temp);
}
else
{
emit->MOVI2R(temp, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(R0, RS);
emit->MOV(R2, addr);
#else
emit->VMOV(D0, RS);
emit->MOV(R0, addr);
#endif
emit->BL(temp);
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_LOAD &&
flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64))
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
emit->MOVI2R(temp, (u32)&Memory::Read_U32);
emit->BL(temp);
emit->VMOV(S0, R0);
emit->VCVT(RS, S0, 0);
emit->VCVT(V1, S0, 0);
}
else
{
emit->MOVI2R(temp, (u32)&Memory::Read_F64);
emit->BL(temp);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
emit->VMOV(RS, R0);
#else
emit->VMOV(RS, D0);
#endif
}
emit->POP(4, R0, R1, R2, R3);
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, RS);
emit->MOV(R1, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&Memory::Write_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&Memory::Write_U16);
else
emit->MOVI2R(temp, (u32)&Memory::Write_U8);
emit->BL(temp);
emit->POP(4, R0, R1, R2, R3);
}
else
{
emit->PUSH(4, R0, R1, R2, R3);
emit->MOV(R0, addr);
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->MOVI2R(temp, (u32)&Memory::Read_U32);
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->MOVI2R(temp, (u32)&Memory::Read_U16);
else if (flags & BackPatchInfo::FLAG_SIZE_8)
emit->MOVI2R(temp, (u32)&Memory::Read_U8);
emit->BL(temp);
emit->MOV(temp, R0);
emit->POP(4, R0, R1, R2, R3);
if (!(flags & BackPatchInfo::FLAG_REVERSE))
{
emit->MOV(RS, temp);
}
else
{
if (flags & BackPatchInfo::FLAG_SIZE_32)
emit->REV(RS, temp); // 6
else if (flags & BackPatchInfo::FLAG_SIZE_16)
emit->REV16(RS, temp);
}
}
}
if (do_padding)
{
BackPatchInfo& info = m_backpatch_info[flags];
u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size);
u32 code_size = emit->GetCodePtr() - code_base;
code_size /= 4;
emit->NOP(num_insts_max - code_size);
}
return trouble_offset;
}
void JitArm::InitBackpatch()
{
u32 flags = 0;
BackPatchInfo info;
u8* code_base = GetWritableCodePtr();
u8* code_end;
// Writes
{
// 8bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_STORE |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
// Loads
{
// 8bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 16bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_16 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit - reverse
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_32 |
BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags, false, false, R0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, R0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 32bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F32;
EmitBackpatchRoutine(this, flags, false, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0, D1);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
// 64bit float
{
flags =
BackPatchInfo::FLAG_LOAD |
BackPatchInfo::FLAG_SIZE_F64;
EmitBackpatchRoutine(this, flags, false, false, D0);
code_end = GetWritableCodePtr();
info.m_slowmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
info.m_fastmem_trouble_inst_offset =
EmitBackpatchRoutine(this, flags, true, false, D0);
code_end = GetWritableCodePtr();
info.m_fastmem_size = (code_end - code_base) / 4;
SetCodePtr(code_base);
m_backpatch_info[flags] = info;
}
}
}

View File

@ -18,114 +18,149 @@
using namespace ArmGen;
void JitArm::UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset)
void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset)
{
// All this gets replaced on backpatch
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(dest, dest, mask); // 1
MOVI2R(R14, (u32)Memory::base, false); // 2-3
ADD(dest, dest, R14); // 4
switch (accessSize)
{
case 32:
REV(value, value); // 5
break;
case 16:
REV16(value, value);
break;
case 8:
NOP(1);
break;
}
switch (accessSize)
{
case 32:
STR(value, dest); // 6
break;
case 16:
STRH(value, dest);
break;
case 8:
STRB(value, dest);
break;
}
NOP(1); // 7
}
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
void JitArm::SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset)
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && fastmem)
{
ARMReg RA;
ARMReg RB;
ARMReg RS = gpr.R(value);
u32 imm_addr = 0;
bool is_immediate = false;
if (dest != -1)
RA = gpr.R(dest);
if (regOffset != -1)
{
RB = gpr.R(regOffset);
MOV(R10, RB);
NOP(1);
}
else
{
MOVI2R(R10, (u32)offset, false);
}
if (dest != -1)
ADD(R10, R10, RA);
else
NOP(1);
MOV(R12, RS);
UnsafeStoreFromReg(R10, R12, accessSize, 0);
return;
}
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg rC = gpr.GetReg();
ARMReg RA = INVALID_REG;
ARMReg RB = INVALID_REG;
if (dest != -1)
RA = gpr.R(dest);
if (regOffset != -1)
RB = gpr.R(regOffset);
ARMReg RS = gpr.R(value);
switch (accessSize)
{
case 32:
MOVI2R(rA, (u32)&Memory::Write_U32);
break;
case 16:
MOVI2R(rA, (u32)&Memory::Write_U16);
break;
case 8:
MOVI2R(rA, (u32)&Memory::Write_U8);
break;
}
MOV(rB, RS);
if (regOffset == -1)
{
MOVI2R(rC, offset);
if (dest != -1)
ADD(rC, rC, RA);
{
if (gpr.IsImm(dest))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(dest));
}
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (dest != -1)
ADD(rC, RA, RB);
{
if (gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset);
}
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(dest), off))
{
ADD(rA, gpr.R(regOffset), off);
}
else
{
MOVI2R(rA, gpr.GetImm(dest));
ADD(rA, rA, gpr.R(regOffset));
}
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(regOffset), off))
{
ADD(rA, gpr.R(dest), off);
}
else
{
MOVI2R(rA, gpr.GetImm(regOffset));
ADD(rA, rA, gpr.R(dest));
}
}
else
{
ADD(rA, gpr.R(dest), gpr.R(regOffset));
}
}
else
MOV(rC, RB);
{
if (gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(regOffset);
}
else
{
MOV(rA, gpr.R(regOffset));
}
}
}
ARMReg RS = gpr.R(value);
u32 flags = BackPatchInfo::FLAG_STORE;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (is_immediate)
{
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
if (accessSize == 32)
{
REV(RS, RS);
STR(RS, R10, R11);
REV(RS, RS);
}
else if (accessSize == 16)
{
REV16(RS, RS);
STRH(RS, R10, R11);
REV16(RS, RS);
}
else
{
STRB(RS, R10, R11);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
}
else if (Memory::IsRAMAddress(imm_addr))
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, RS);
}
else
{
MOVI2R(rA, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, RS);
}
}
else
{
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, RS);
}
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
MOV(R1, rC);
BL(rA);
POP(4, R0, R1, R2, R3);
gpr.Unlock(rA, rB, rC);
}
void JitArm::stX(UGeckoInstruction inst)
@ -138,7 +173,6 @@ void JitArm::stX(UGeckoInstruction inst)
u32 accessSize = 0;
s32 regOffset = -1;
bool update = false;
bool fastmem = false;
switch (inst.OPCD)
{
case 45: // sthu
@ -152,7 +186,6 @@ void JitArm::stX(UGeckoInstruction inst)
case 183: // stwux
update = true;
case 151: // stwx
fastmem = true;
accessSize = 32;
regOffset = b;
break;
@ -173,7 +206,6 @@ void JitArm::stX(UGeckoInstruction inst)
case 37: // stwu
update = true;
case 36: // stw
fastmem = true;
accessSize = 32;
break;
case 39: // stbu
@ -182,7 +214,9 @@ void JitArm::stX(UGeckoInstruction inst)
accessSize = 8;
break;
}
SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, accessSize, offset);
if (update)
{
ARMReg rA = gpr.GetReg();
@ -193,143 +227,135 @@ void JitArm::stX(UGeckoInstruction inst)
// Check for DSI exception prior to writing back address
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch DoNotWrite = B_CC(CC_NEQ);
if (a)
SetCC(CC_EQ);
if (regOffset == -1)
{
if (regOffset == -1)
{
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
ADD(RA, RA, RB);
}
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
if (regOffset == -1)
MOVI2R(RA, (u32)offset);
else
MOV(RA, RB);
ADD(RA, RA, RB);
}
SetJumpTarget(DoNotWrite);
SetCC();
gpr.Unlock(rA);
}
}
void JitArm::UnsafeLoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offsetReg, s32 offset)
void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update)
{
ARMReg rA = gpr.GetReg();
// We want to make sure to not get LR as a temp register
ARMReg rA = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (offsetReg == -1)
{
MOVI2R(rA, offset, false); // -3
ADD(addr, addr, rA); // - 1
}
else
{
NOP(2); // -3, -2
// offsetReg is preloaded here
ADD(addr, addr, gpr.R(offsetReg)); // -1
}
// All this gets replaced on backpatch
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
BIC(addr, addr, mask); // 1
MOVI2R(rA, (u32)Memory::base, false); // 2-3
ADD(addr, addr, rA); // 4
switch (accessSize)
{
case 32:
LDR(dest, addr); // 5
break;
case 16:
LDRH(dest, addr);
break;
case 8:
LDRB(dest, addr);
break;
}
switch (accessSize)
{
case 32:
REV(dest, dest); // 6
break;
case 16:
REV16(dest, dest);
break;
case 8:
NOP(1);
break;
}
NOP(2); // 7-8
gpr.Unlock(rA);
}
void JitArm::SafeLoadToReg(bool fastmem, u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse)
{
ARMReg RD = gpr.R(dest);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && fastmem)
{
// Preload for fastmem
if (offsetReg != -1)
gpr.R(offsetReg);
if (addr != -1)
MOV(R10, gpr.R(addr));
{
if (gpr.IsImm(addr))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + offset;
}
else
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, offset);
ADD(rA, rA, gpr.R(addr));
}
}
}
else
MOV(R10, 0);
UnsafeLoadToReg(RD, R10, accessSize, offsetReg, offset);
return;
}
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
if (offsetReg == -1)
{
MOVI2R(rA, offset);
if (addr != -1)
ADD(rA, rA, gpr.R(addr));
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (addr != -1)
ADD(rA, gpr.R(addr), gpr.R(offsetReg));
{
if (gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg);
}
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(addr), off))
{
ADD(rA, gpr.R(offsetReg), off);
}
else
{
MOVI2R(rA, gpr.GetImm(addr));
ADD(rA, rA, gpr.R(offsetReg));
}
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
Operand2 off;
if (TryMakeOperand2(gpr.GetImm(offsetReg), off))
{
ADD(rA, gpr.R(addr), off);
}
else
{
MOVI2R(rA, gpr.GetImm(offsetReg));
ADD(rA, rA, gpr.R(addr));
}
}
else
{
ADD(rA, gpr.R(addr), gpr.R(offsetReg));
}
}
else
MOV(rA, gpr.R(offsetReg));
{
if (gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else
{
MOV(rA, gpr.R(offsetReg));
}
}
}
switch (accessSize)
{
case 8:
MOVI2R(rB, (u32)&Memory::Read_U8);
break;
case 16:
MOVI2R(rB, (u32)&Memory::Read_U16);
break;
case 32:
MOVI2R(rB, (u32)&Memory::Read_U32);
break;
}
PUSH(4, R0, R1, R2, R3);
MOV(R0, rA);
BL(rB);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
if (signExtend) // Only on 16 loads
SXTH(RD, RD);
if (is_immediate)
MOVI2R(rA, imm_addr);
u32 flags = BackPatchInfo::FLAG_LOAD;
if (accessSize == 32)
flags |= BackPatchInfo::FLAG_SIZE_32;
else if (accessSize == 16)
flags |= BackPatchInfo::FLAG_SIZE_16;
else
flags |= BackPatchInfo::FLAG_SIZE_8;
if (reverse)
{
if (accessSize == 32)
REV(RD, RD);
else if (accessSize == 16)
REV16(RD, RD);
}
gpr.Unlock(rA, rB);
flags |= BackPatchInfo::FLAG_REVERSE;
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
!(is_immediate && Memory::IsRAMAddress(imm_addr)), dest);
if (signExtend) // Only on 16 loads
SXTH(dest, dest);
if (update)
MOV(gpr.R(addr), rA);
}
void JitArm::lXX(UGeckoInstruction inst)
@ -344,7 +370,6 @@ void JitArm::lXX(UGeckoInstruction inst)
bool update = false;
bool signExtend = false;
bool reverse = false;
bool fastmem = false;
switch (inst.OPCD)
{
@ -354,21 +379,18 @@ void JitArm::lXX(UGeckoInstruction inst)
case 55: // lwzux
update = true;
case 23: // lwzx
fastmem = true;
accessSize = 32;
offsetReg = b;
break;
case 119: //lbzux
update = true;
case 87: // lbzx
fastmem = true;
accessSize = 8;
offsetReg = b;
break;
case 311: // lhzux
update = true;
case 279: // lhzx
fastmem = true;
accessSize = 16;
offsetReg = b;
break;
@ -392,19 +414,16 @@ void JitArm::lXX(UGeckoInstruction inst)
case 33: // lwzu
update = true;
case 32: // lwz
fastmem = true;
accessSize = 32;
break;
case 35: // lbzu
update = true;
case 34: // lbz
fastmem = true;
accessSize = 8;
break;
case 41: // lhzu
update = true;
case 40: // lhz
fastmem = true;
accessSize = 16;
break;
case 43: // lhau
@ -417,27 +436,13 @@ void JitArm::lXX(UGeckoInstruction inst)
// Check for exception before loading
ARMReg rA = gpr.GetReg(false);
ARMReg RD = gpr.R(d);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
TST(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_NEQ);
SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse);
if (update)
{
ARMReg RA = gpr.R(a);
if (offsetReg == -1)
{
rA = gpr.GetReg(false);
MOVI2R(rA, offset);
ADD(RA, RA, rA);
}
else
{
ADD(RA, RA, gpr.R(offsetReg));
}
}
SafeLoadToReg(RD, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse, update);
SetJumpTarget(DoNotLoad);
@ -449,8 +454,6 @@ void JitArm::lXX(UGeckoInstruction inst)
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
ARMReg RD = gpr.R(d);
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);

View File

@ -24,16 +24,13 @@ void JitArm::lfXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
bool single = false;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
bool zeroA = false;
s32 offsetReg = -1;
switch (inst.OPCD)
@ -42,157 +39,152 @@ void JitArm::lfXX(UGeckoInstruction inst)
switch (inst.SUBOP10)
{
case 567: // lfsux
single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
offsetReg = b;
break;
case 535: // lfsx
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
case 599: // lfdx
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
}
break;
case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
single = true;
break;
case 48: // lfs
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 50: // lfd
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FD, false), v1;
if (single)
ARMReg v0 = fpr.R0(inst.FD, false), v1 = INVALID_REG;
if (flags & BackPatchInfo::FLAG_SIZE_F32)
v1 = fpr.R1(inst.FD, false);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
RA = gpr.R(a);
// Update path /always/ uses RA
if (offsetReg == -1) // uses SIMM_16
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
MOVI2R(rB, offset);
ADD(rB, rB, RA);
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
ADD(rB, gpr.R(offsetReg), RA);
}
}
else
{
if (zeroA)
{
if (offsetReg == -1)
{
if (a)
Operand2 off;
if (TryMakeOperand2(offset, off))
{
RA = gpr.R(a);
MOVI2R(rB, offset);
ADD(rB, rB, RA);
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(rB, (u32)offset);
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ARMReg RB = gpr.R(offsetReg);
if (a)
{
RA = gpr.R(a);
ADD(rB, RB, RA);
}
else
{
MOV(rB, RB);
}
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (offsetReg == -1)
{
if (a && gpr.IsImm(a))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (update)
RA = gpr.R(a);
if (is_immediate)
MOVI2R(addr, imm_addr);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (update)
MOV(RA, rB);
MOV(RA, addr);
// This branch gets changed to a NOP when the fastpath fails
FixupBranch fast_path;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
fast_path = B();
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
!(is_immediate && Memory::IsRAMAddress(imm_addr)), v0, v1);
{
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
if (single)
{
MOVI2R(rA, (u32)&Memory::Read_U32);
BL(rA);
VMOV(S0, R0);
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
MOVI2R(rA, (u32)&Memory::Read_F64);
BL(rA);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(v0, R0);
#else
VMOV(v0, D0);
#endif
}
POP(4, R0, R1, R2, R3);
}
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
{
FixupBranch slow_out = B();
SetJumpTarget(fast_path);
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
NEONXEmitter nemit(this);
if (single)
{
nemit.VLD1(F_32, D0, rC);
nemit.VREV32(I_8, D0, D0); // Byte swap to result
VCVT(v0, S0, 0);
VCVT(v1, S0, 0);
}
else
{
nemit.VLD1(I_64, v0, rC);
nemit.VREV64(I_8, v0, v0); // Byte swap to result
}
gpr.Unlock(rC);
}
SetJumpTarget(slow_out);
}
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
@ -201,16 +193,13 @@ void JitArm::stfXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITLoadStoreFloatingOff);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RA;
u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16;
bool single = false;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
bool zeroA = false;
s32 offsetReg = -1;
switch (inst.OPCD)
@ -219,157 +208,196 @@ void JitArm::stfXX(UGeckoInstruction inst)
switch (inst.SUBOP10)
{
case 663: // stfsx
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 695: // stfsux
single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
offsetReg = b;
break;
case 727: // stfdx
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
offsetReg = b;
break;
case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
offsetReg = b;
break;
}
break;
case 53: // stfsu
flags |= BackPatchInfo::FLAG_SIZE_F32;
update = true;
single = true;
break;
case 52: // stfs
single = true;
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F32;
break;
case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64;
update = true;
break;
case 54: // stfd
zeroA = true;
flags |= BackPatchInfo::FLAG_SIZE_F64;
break;
}
ARMReg v0 = fpr.R0(inst.FS);
ARMReg rA = R11;
ARMReg addr = R12;
u32 imm_addr = 0;
bool is_immediate = false;
if (update)
{
RA = gpr.R(a);
// Update path /always/ uses RA
if (offsetReg == -1) // uses SIMM_16
// Always uses RA
if (gpr.IsImm(a) && offsetReg == -1)
{
MOVI2R(rB, offset);
ADD(rB, rB, RA);
is_immediate = true;
imm_addr = offset + gpr.GetImm(a);
}
else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else
{
ADD(rB, gpr.R(offsetReg), RA);
if (offsetReg == -1)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ADD(addr, gpr.R(offsetReg), gpr.R(a));
}
}
}
else
{
if (zeroA)
if (offsetReg == -1)
{
if (offsetReg == -1)
if (a && gpr.IsImm(a))
{
if (a)
is_immediate = true;
imm_addr = gpr.GetImm(a) + offset;
}
else if (a)
{
Operand2 off;
if (TryMakeOperand2(offset, off))
{
RA = gpr.R(a);
MOVI2R(rB, offset);
ADD(rB, rB, RA);
ADD(addr, gpr.R(a), off);
}
else
{
MOVI2R(rB, (u32)offset);
MOVI2R(addr, offset);
ADD(addr, addr, gpr.R(a));
}
}
else
{
ARMReg RB = gpr.R(offsetReg);
if (a)
{
RA = gpr.R(a);
ADD(rB, RB, RA);
}
else
{
MOV(rB, RB);
}
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg);
}
else if (!a && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else if (a)
{
ADD(addr, gpr.R(a), gpr.R(offsetReg));
}
else
{
MOV(addr, gpr.R(offsetReg));
}
}
}
if (is_immediate)
MOVI2R(addr, imm_addr);
if (update)
{
RA = gpr.R(a);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
SetCC(CC_NEQ);
MOV(RA, rB);
MOV(RA, addr);
SetCC();
}
// This branch gets changed to a NOP when the fastpath fails
FixupBranch fast_path;
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
fast_path = B();
if (is_immediate)
{
PUSH(4, R0, R1, R2, R3);
if (single)
if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe)
{
MOV(R1, rB);
VCVT(S0, v0, 0);
VMOV(R0, S0);
MOVI2R(rA, (u32)&Memory::Write_U32);
BL(rA);
}
else
{
MOVI2R(rA, (u32)&Memory::Write_F64);
#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1
VMOV(R0, v0);
MOV(R2, rB);
#else
VMOV(D0, v0);
MOV(R0, rB);
#endif
BL(rA);
}
POP(4, R0, R1, R2, R3);
}
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem)
{
FixupBranch slow_out = B();
SetJumpTarget(fast_path);
{
Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK)
ARMReg rC = gpr.GetReg();
BIC(rC, rB, mask);
MOVI2R(rA, (u32)Memory::base);
ADD(rC, rC, rA);
int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64)
accessSize = 64;
else
accessSize = 32;
MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount);
MOVI2R(R10, (u32)GPFifo::m_gatherPipe);
LDR(R11, R14);
ADD(R10, R10, R11);
NEONXEmitter nemit(this);
if (single)
if (accessSize == 64)
{
PUSH(2, R0, R1);
nemit.VREV64(I_8, D0, v0);
VMOV(R0, D0);
STR(R0, R10, 0);
STR(R1, R10, 4);
POP(2, R0, R1);
}
else if (accessSize == 32)
{
VCVT(S0, v0, 0);
nemit.VREV32(I_8, D0, D0);
VSTR(S0, rC, 0);
VMOV(addr, S0);
STR(addr, R10);
}
else
{
nemit.VREV64(I_8, D0, v0);
VSTR(D0, rC, 0);
}
gpr.Unlock(rC);
}
SetJumpTarget(slow_out);
}
ADD(R11, R11, accessSize >> 3);
STR(R11, R14);
jit->js.fifoBytesThisBlock += accessSize >> 3;
gpr.Unlock(rA, rB);
}
else if (Memory::IsRAMAddress(imm_addr))
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, v0);
}
else
{
MOVI2R(addr, imm_addr);
EmitBackpatchRoutine(this, flags, false, false, v0);
}
}
else
{
EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, v0);
}
}