diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index 7a95e38f5a..59ddf184e9 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -40,6 +40,7 @@ void JitArm::Init() code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); + InitBackpatch(); } void JitArm::ClearCache() diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 4d9493a463..1f8684bcc9 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -48,6 +48,26 @@ private: ArmFPRCache fpr; PPCAnalyst::CodeBuffer code_buffer; + struct BackPatchInfo + { + enum + { + FLAG_STORE = (1 << 0), + FLAG_LOAD = (1 << 1), + FLAG_SIZE_8 = (1 << 2), + FLAG_SIZE_16 = (1 << 3), + FLAG_SIZE_32 = (1 << 4), + FLAG_SIZE_F32 = (1 << 5), + FLAG_SIZE_F64 = (1 << 6), + FLAG_REVERSE = (1 << 7), + }; + + u32 m_fastmem_size; + u32 m_fastmem_trouble_inst_offset; + u32 m_slowmem_size; + }; + // The key is the flags + std::map m_backpatch_info; void DoDownCount(); @@ -57,11 +77,19 @@ private: ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); - bool BackPatch(SContext* ctx); - void BeginTimeProfile(JitBlock* b); void EndTimeProfile(JitBlock* b); + bool BackPatch(SContext* ctx); + bool DisasmLoadStore(const u8* ptr, u32* flags, ArmGen::ARMReg* rD, ArmGen::ARMReg* V1); + // Initializes the information that backpatching needs + // This is required so we know the backpatch routine sizes and trouble offsets + void InitBackpatch(); + + // Returns the trouble instruction offset + // Zero if it isn't a fastmem routine + u32 EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ArmGen::ARMReg RS, ArmGen::ARMReg V1 = ArmGen::ARMReg::INVALID_REG); + public: JitArm() : code_buffer(32000) {} ~JitArm() {} @@ -118,13 +146,8 @@ public: void GetCarryAndClear(ArmGen::ARMReg reg); void FinalizeCarry(ArmGen::ARMReg reg); - // TODO: This shouldn't be here - void UnsafeStoreFromReg(ArmGen::ARMReg dest, ArmGen::ARMReg value, int accessSize, s32 offset); - void SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset); - - void UnsafeLoadToReg(ArmGen::ARMReg dest, ArmGen::ARMReg addr, int accessSize, s32 offsetReg, s32 offset); - void SafeLoadToReg(bool fastmem, u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse); - + void SafeStoreFromReg(s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset); + void SafeLoadToReg(ArmGen::ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update); // OPCODES void unknown_instruction(UGeckoInstruction _inst); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp index 43349f1eca..c703e5a88d 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -16,47 +16,65 @@ using namespace ArmGen; // 1) It's really necessary. We don't know anything about the context. // 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be // that many of them in a typical program/game. -static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Store, bool *new_system) +bool JitArm::DisasmLoadStore(const u8* ptr, u32* flags, ARMReg* rD, ARMReg* V1) { + u32 inst = *(u32*)ptr; + u32 prev_inst = *(u32*)(ptr - 4); + u32 next_inst = *(u32*)(ptr + 4); u8 op = (inst >> 20) & 0xFF; - rD = (ARMReg)((inst >> 12) & 0xF); + *rD = (ARMReg)((inst >> 12) & 0xF); switch (op) { case 0x58: // STR { - Store = true; - accessSize = 32; + *flags |= + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_32; + *rD = (ARMReg)(prev_inst & 0xF); } break; case 0x59: // LDR { - Store = false; - accessSize = 32; + *flags |= + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_32; + // REV + if ((next_inst & 0x0FFF0FF0) != 0x06BF0F30) + *flags |= BackPatchInfo::FLAG_REVERSE; } break; case 0x1D: // LDRH { - Store = false; - accessSize = 16; + *flags |= + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16; + // REV16 + if((next_inst & 0x0FFF0FF0) != 0x06BF0FB0) + *flags |= BackPatchInfo::FLAG_REVERSE; } break; case 0x45 + 0x18: // LDRB { - Store = false; - accessSize = 8; + *flags |= + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_8; } break; case 0x5C: // STRB { - Store = true; - accessSize = 8; + *flags |= + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_8; + *rD = (ARMReg)((inst >> 12) & 0xF); } break; case 0x1C: // STRH { - Store = true; - accessSize = 16; + *flags |= + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_16; + *rD = (ARMReg)(prev_inst & 0xF); } break; default: @@ -66,10 +84,92 @@ static bool DisamLoadStore(const u32 inst, ARMReg &rD, u8 &accessSize, bool &Sto switch (op2) { case 0xD: // VLDR/VSTR - *new_system = true; + { + bool load = (inst >> 20) & 1; + bool single = !((inst >> 8) & 1); + + if (load) + *flags |= BackPatchInfo::FLAG_LOAD; + else + *flags |= BackPatchInfo::FLAG_STORE; + + if (single) + *flags |= BackPatchInfo::FLAG_SIZE_F32; + else + *flags |= BackPatchInfo::FLAG_SIZE_F64; + if (single) + { + if (!load) + { + u32 vcvt = *(u32*)(ptr - 8); + u32 src_register = vcvt & 0xF; + src_register |= (vcvt >> 1) & 0x10; + *rD = (ARMReg)(src_register + D0); + } + } + } break; case 0x4: // VST1/VLD1 - *new_system = true; + { + u32 size = (inst >> 6) & 0x3; + bool load = (inst >> 21) & 1; + if (load) + *flags |= BackPatchInfo::FLAG_LOAD; + else + *flags |= BackPatchInfo::FLAG_STORE; + + + if (size == 2) // 32bit + { + if (load) + { + // For 32bit loads we are loading to a temporary + // So we need to read PC+8,PC+12 to get the two destination registers + u32 vcvt_1 = *(u32*)(ptr + 8); + u32 vcvt_2 = *(u32*)(ptr + 12); + + u32 dest_register_1 = (vcvt_1 >> 12) & 0xF; + dest_register_1 |= (vcvt_1 >> 18) & 0x10; + + u32 dest_register_2 = (vcvt_2 >> 12) & 0xF; + dest_register_2 |= (vcvt_2 >> 18) & 0x10; + + // Make sure to encode the destination register to something our emitter understands + *rD = (ARMReg)(dest_register_1 + D0); + *V1 = (ARMReg)(dest_register_2 + D0); + } + else + { + // For 32bit stores we are storing from a temporary + // So we need to check the VCVT at PC-8 for the source register + u32 vcvt = *(u32*)(ptr - 8); + u32 src_register = vcvt & 0xF; + src_register |= (vcvt >> 1) & 0x10; + *rD = (ARMReg)(src_register + D0); + } + *flags |= BackPatchInfo::FLAG_SIZE_F32; + } + else if (size == 3) // 64bit + { + if (load) + { + // For 64bit loads we load directly in to the VFP register + u32 dest_register = (inst >> 12) & 0xF; + dest_register |= (inst >> 18) & 0x10; + // Make sure to encode the destination register to something our emitter understands + *rD = (ARMReg)(dest_register + D0); + } + else + { + // For 64bit stores we are storing from a temporary + // Check the previous VREV64 instruction for the real register + u32 src_register = prev_inst & 0xF; + src_register |= (prev_inst >> 1) & 0x10; + *rD = (ARMReg)(src_register + D0); + } + *flags |= BackPatchInfo::FLAG_SIZE_F64; + } + } break; default: printf("Op is 0x%02x\n", op); @@ -95,94 +195,484 @@ bool JitArm::BackPatch(SContext* ctx) // We need to get the destination register before we start u8* codePtr = (u8*)ctx->CTX_PC; u32 Value = *(u32*)codePtr; - ARMReg rD; - u8 accessSize; - bool Store; - bool new_system = false; + ARMReg rD = INVALID_REG; + ARMReg V1 = INVALID_REG; + u32 flags = 0; - if (!DisamLoadStore(Value, rD, accessSize, Store, &new_system)) + if (!DisasmLoadStore(codePtr, &flags, &rD, &V1)) { printf("Invalid backpatch at location 0x%08lx(0x%08x)\n", ctx->CTX_PC, Value); exit(0); } - if (new_system) - { - // The new system is a lot easier to backpatch than the old crap. - // Instead of backpatching over code and making sure we NOP pad and other crap - // We emit both the slow and fast path and branch over the slow path each time - // We search backwards until we find the second branch instruction - // Then proceed to replace it with a NOP and set that to the new PC. - // This ensures that we run the slow path and then branch over the fast path. + BackPatchInfo& info = m_backpatch_info[flags]; + ARMXEmitter emitter(codePtr - info.m_fastmem_trouble_inst_offset * 4); + u32 new_pc = (u32)emitter.GetCodePtr(); + EmitBackpatchRoutine(&emitter, flags, false, true, rD, V1); + emitter.FlushIcache(); + ctx->CTX_PC = new_pc; + return true; +} - // Run backwards until we find the branch we want to NOP - for (int branches = 2; branches > 0; ctx->CTX_PC -= 4) - if ((*(u32*)ctx->CTX_PC & 0x0F000000) == 0x0A000000) // B - --branches; +u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARMReg RS, ARMReg V1) +{ + ARMReg addr = R12; + ARMReg temp = R11; + u32 trouble_offset = 0; + const u8* code_base = emit->GetCodePtr(); - ctx->CTX_PC += 4; - ARMXEmitter emitter((u8*)ctx->CTX_PC); - emitter.NOP(1); - emitter.FlushIcache(); - return true; - } - else + if (fastmem) { - if (Store) + ARMReg temp2 = R10; + Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) + emit->BIC(temp, addr, mask); // 1 + emit->MOVI2R(temp2, (u32)Memory::base); // 2-3 + emit->ADD(temp, temp, temp2); // 4 + + if (flags & BackPatchInfo::FLAG_STORE && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) { - const u32 ARMREGOFFSET = 4 * 5; - ARMXEmitter emitter(codePtr - ARMREGOFFSET); - switch (accessSize) + NEONXEmitter nemit(emit); + if (flags & BackPatchInfo::FLAG_SIZE_F32) { - case 8: // 8bit - emitter.MOVI2R(R14, (u32)&Memory::Write_U8, false); // 1-2 - return 0; - break; - case 16: // 16bit - emitter.MOVI2R(R14, (u32)&Memory::Write_U16, false); // 1-2 - return 0; - break; - case 32: // 32bit - emitter.MOVI2R(R14, (u32)&Memory::Write_U32, false); // 1-2 - break; + emit->VCVT(S0, RS, 0); + nemit.VREV32(I_8, D0, D0); + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + emit->VSTR(S0, temp, 0); } - emitter.PUSH(4, R0, R1, R2, R3); // 3 - emitter.MOV(R0, rD); // Value - 4 - emitter.MOV(R1, R10); // Addr- 5 - emitter.BL(R14); // 6 - emitter.POP(4, R0, R1, R2, R3); // 7 - u32 newPC = ctx->CTX_PC - (ARMREGOFFSET + 4 * 4); - ctx->CTX_PC = newPC; - emitter.FlushIcache(); - return true; + else + { + nemit.VREV64(I_8, D0, RS); + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + nemit.VST1(I_64, D0, temp); + } + } + else if (flags & BackPatchInfo::FLAG_LOAD && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + NEONXEmitter nemit(emit); + + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + nemit.VLD1(F_32, D0, temp); + nemit.VREV32(I_8, D0, D0); // Byte swap to result + emit->VCVT(RS, S0, 0); + emit->VCVT(V1, S0, 0); + } + else + { + nemit.VLD1(I_64, RS, temp); + nemit.VREV64(I_8, RS, RS); // Byte swap to result + } + } + else if (flags & BackPatchInfo::FLAG_STORE) + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->REV(temp2, RS); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->REV16(temp2, RS); + + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->STR(temp2, temp); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->STRH(temp2, temp); + else + emit->STRB(RS, temp); } else { - const u32 ARMREGOFFSET = 4 * 4; - ARMXEmitter emitter(codePtr - ARMREGOFFSET); - switch (accessSize) + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->LDR(RS, temp); // 5 + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->LDRH(RS, temp); + else if (flags & BackPatchInfo::FLAG_SIZE_8) + emit->LDRB(RS, temp); + + + if (!(flags & BackPatchInfo::FLAG_REVERSE)) { - case 8: // 8bit - emitter.MOVI2R(R14, (u32)&Memory::Read_U8, false); // 2 - break; - case 16: // 16bit - emitter.MOVI2R(R14, (u32)&Memory::Read_U16, false); // 2 - break; - case 32: // 32bit - emitter.MOVI2R(R14, (u32)&Memory::Read_U32, false); // 2 - break; + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->REV(RS, RS); // 6 + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->REV16(RS, RS); } - emitter.PUSH(4, R0, R1, R2, R3); // 3 - emitter.MOV(R0, R10); // 4 - emitter.BL(R14); // 5 - emitter.MOV(R14, R0); // 6 - emitter.POP(4, R0, R1, R2, R3); // 7 - emitter.MOV(rD, R14); // 8 - ctx->CTX_PC -= ARMREGOFFSET + (4 * 4); - emitter.FlushIcache(); - return true; } } - return 0; + else + { + if (flags & BackPatchInfo::FLAG_STORE && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + emit->PUSH(4, R0, R1, R2, R3); + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + emit->MOV(R1, addr); + emit->VCVT(S0, RS, 0); + emit->VMOV(R0, S0); + emit->MOVI2R(temp, (u32)&Memory::Write_U32); + emit->BL(temp); + } + else + { + emit->MOVI2R(temp, (u32)&Memory::Write_F64); +#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 + emit->VMOV(R0, RS); + emit->MOV(R2, addr); +#else + emit->VMOV(D0, RS); + emit->MOV(R0, addr); +#endif + emit->BL(temp); + } + emit->POP(4, R0, R1, R2, R3); + } + else if (flags & BackPatchInfo::FLAG_LOAD && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + emit->PUSH(4, R0, R1, R2, R3); + emit->MOV(R0, addr); + if (flags & BackPatchInfo::FLAG_SIZE_F32) + { + emit->MOVI2R(temp, (u32)&Memory::Read_U32); + emit->BL(temp); + emit->VMOV(S0, R0); + emit->VCVT(RS, S0, 0); + emit->VCVT(V1, S0, 0); + } + else + { + emit->MOVI2R(temp, (u32)&Memory::Read_F64); + emit->BL(temp); + +#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 + emit->VMOV(RS, R0); +#else + emit->VMOV(RS, D0); +#endif + } + emit->POP(4, R0, R1, R2, R3); + } + else if (flags & BackPatchInfo::FLAG_STORE) + { + emit->PUSH(4, R0, R1, R2, R3); + emit->MOV(R0, RS); + emit->MOV(R1, addr); + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->MOVI2R(temp, (u32)&Memory::Write_U32); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->MOVI2R(temp, (u32)&Memory::Write_U16); + else + emit->MOVI2R(temp, (u32)&Memory::Write_U8); + + emit->BL(temp); + emit->POP(4, R0, R1, R2, R3); + } + else + { + emit->PUSH(4, R0, R1, R2, R3); + emit->MOV(R0, addr); + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->MOVI2R(temp, (u32)&Memory::Read_U32); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->MOVI2R(temp, (u32)&Memory::Read_U16); + else if (flags & BackPatchInfo::FLAG_SIZE_8) + emit->MOVI2R(temp, (u32)&Memory::Read_U8); + + emit->BL(temp); + emit->MOV(temp, R0); + emit->POP(4, R0, R1, R2, R3); + + if (!(flags & BackPatchInfo::FLAG_REVERSE)) + { + emit->MOV(RS, temp); + } + else + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->REV(RS, temp); // 6 + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->REV16(RS, temp); + } + } + } + + if (do_padding) + { + BackPatchInfo& info = m_backpatch_info[flags]; + u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size); + + u32 code_size = emit->GetCodePtr() - code_base; + code_size /= 4; + + emit->NOP(num_insts_max - code_size); + } + + return trouble_offset; } +void JitArm::InitBackpatch() +{ + u32 flags = 0; + BackPatchInfo info; + u8* code_base = GetWritableCodePtr(); + u8* code_end; + + // Writes + { + // 8bit + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_8; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 16bit + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_16; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_32; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit float + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_F32; + EmitBackpatchRoutine(this, flags, false, false, D0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, D0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 64bit float + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_F64; + EmitBackpatchRoutine(this, flags, false, false, D0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, D0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + + } + + // Loads + { + // 8bit + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_8; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 16bit + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_32; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + + // 16bit - reverse + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16 | + BackPatchInfo::FLAG_REVERSE; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit - reverse + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_32 | + BackPatchInfo::FLAG_REVERSE; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit float + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_F32; + EmitBackpatchRoutine(this, flags, false, false, D0, D1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, D0, D1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 64bit float + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_F64; + EmitBackpatchRoutine(this, flags, false, false, D0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, D0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + } +} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp index c1f13f288f..1f50c7fb49 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -18,114 +18,149 @@ using namespace ArmGen; -void JitArm::UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset) +void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset) { - // All this gets replaced on backpatch - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(dest, dest, mask); // 1 - MOVI2R(R14, (u32)Memory::base, false); // 2-3 - ADD(dest, dest, R14); // 4 - switch (accessSize) - { - case 32: - REV(value, value); // 5 - break; - case 16: - REV16(value, value); - break; - case 8: - NOP(1); - break; - } - switch (accessSize) - { - case 32: - STR(value, dest); // 6 - break; - case 16: - STRH(value, dest); - break; - case 8: - STRB(value, dest); - break; - } - NOP(1); // 7 -} + // We want to make sure to not get LR as a temp register + ARMReg rA = R12; -void JitArm::SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset) -{ - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && fastmem) - { - ARMReg RA; - ARMReg RB; - ARMReg RS = gpr.R(value); + u32 imm_addr = 0; + bool is_immediate = false; - if (dest != -1) - RA = gpr.R(dest); - - if (regOffset != -1) - { - RB = gpr.R(regOffset); - MOV(R10, RB); - NOP(1); - } - else - { - MOVI2R(R10, (u32)offset, false); - } - - if (dest != -1) - ADD(R10, R10, RA); - else - NOP(1); - - MOV(R12, RS); - UnsafeStoreFromReg(R10, R12, accessSize, 0); - return; - } - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - ARMReg rC = gpr.GetReg(); - ARMReg RA = INVALID_REG; - ARMReg RB = INVALID_REG; - if (dest != -1) - RA = gpr.R(dest); - if (regOffset != -1) - RB = gpr.R(regOffset); - ARMReg RS = gpr.R(value); - switch (accessSize) - { - case 32: - MOVI2R(rA, (u32)&Memory::Write_U32); - break; - case 16: - MOVI2R(rA, (u32)&Memory::Write_U16); - break; - case 8: - MOVI2R(rA, (u32)&Memory::Write_U8); - break; - } - MOV(rB, RS); if (regOffset == -1) { - MOVI2R(rC, offset); if (dest != -1) - ADD(rC, rC, RA); + { + if (gpr.IsImm(dest)) + { + is_immediate = true; + imm_addr = gpr.GetImm(dest) + offset; + } + else + { + Operand2 off; + if (TryMakeOperand2(offset, off)) + { + ADD(rA, gpr.R(dest), off); + } + else + { + MOVI2R(rA, offset); + ADD(rA, rA, gpr.R(dest)); + } + } + } + else + { + is_immediate = true; + imm_addr = offset; + } } else { if (dest != -1) - ADD(rC, RA, RB); + { + if (gpr.IsImm(dest) && gpr.IsImm(regOffset)) + { + is_immediate = true; + imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset); + } + else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset)) + { + Operand2 off; + if (TryMakeOperand2(gpr.GetImm(dest), off)) + { + ADD(rA, gpr.R(regOffset), off); + } + else + { + MOVI2R(rA, gpr.GetImm(dest)); + ADD(rA, rA, gpr.R(regOffset)); + } + } + else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset)) + { + Operand2 off; + if (TryMakeOperand2(gpr.GetImm(regOffset), off)) + { + ADD(rA, gpr.R(dest), off); + } + else + { + MOVI2R(rA, gpr.GetImm(regOffset)); + ADD(rA, rA, gpr.R(dest)); + } + } + else + { + ADD(rA, gpr.R(dest), gpr.R(regOffset)); + } + } else - MOV(rC, RB); + { + if (gpr.IsImm(regOffset)) + { + is_immediate = true; + imm_addr = gpr.GetImm(regOffset); + } + else + { + MOV(rA, gpr.R(regOffset)); + } + } + } + ARMReg RS = gpr.R(value); + + u32 flags = BackPatchInfo::FLAG_STORE; + if (accessSize == 32) + flags |= BackPatchInfo::FLAG_SIZE_32; + else if (accessSize == 16) + flags |= BackPatchInfo::FLAG_SIZE_16; + else + flags |= BackPatchInfo::FLAG_SIZE_8; + + if (is_immediate) + { + if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe) + { + MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount); + MOVI2R(R10, (u32)GPFifo::m_gatherPipe); + LDR(R11, R14); + if (accessSize == 32) + { + REV(RS, RS); + STR(RS, R10, R11); + REV(RS, RS); + } + else if (accessSize == 16) + { + REV16(RS, RS); + STRH(RS, R10, R11); + REV16(RS, RS); + } + else + { + STRB(RS, R10, R11); + } + ADD(R11, R11, accessSize >> 3); + STR(R11, R14); + jit->js.fifoBytesThisBlock += accessSize >> 3; + } + else if (Memory::IsRAMAddress(imm_addr)) + { + MOVI2R(rA, imm_addr); + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, RS); + } + else + { + MOVI2R(rA, imm_addr); + EmitBackpatchRoutine(this, flags, false, false, RS); + } + } + else + { + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, RS); } - PUSH(4, R0, R1, R2, R3); - MOV(R0, rB); - MOV(R1, rC); - BL(rA); - POP(4, R0, R1, R2, R3); - gpr.Unlock(rA, rB, rC); } void JitArm::stX(UGeckoInstruction inst) @@ -138,7 +173,6 @@ void JitArm::stX(UGeckoInstruction inst) u32 accessSize = 0; s32 regOffset = -1; bool update = false; - bool fastmem = false; switch (inst.OPCD) { case 45: // sthu @@ -152,7 +186,6 @@ void JitArm::stX(UGeckoInstruction inst) case 183: // stwux update = true; case 151: // stwx - fastmem = true; accessSize = 32; regOffset = b; break; @@ -173,7 +206,6 @@ void JitArm::stX(UGeckoInstruction inst) case 37: // stwu update = true; case 36: // stw - fastmem = true; accessSize = 32; break; case 39: // stbu @@ -182,7 +214,9 @@ void JitArm::stX(UGeckoInstruction inst) accessSize = 8; break; } - SafeStoreFromReg(fastmem, update ? a : (a ? a : -1), s, regOffset, accessSize, offset); + + SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, accessSize, offset); + if (update) { ARMReg rA = gpr.GetReg(); @@ -193,143 +227,135 @@ void JitArm::stX(UGeckoInstruction inst) // Check for DSI exception prior to writing back address LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); - FixupBranch DoNotWrite = B_CC(CC_NEQ); - if (a) + SetCC(CC_EQ); + if (regOffset == -1) { - if (regOffset == -1) - { - MOVI2R(rA, offset); - ADD(RA, RA, rA); - } - else - { - ADD(RA, RA, RB); - } + MOVI2R(rA, offset); + ADD(RA, RA, rA); } else { - if (regOffset == -1) - MOVI2R(RA, (u32)offset); - else - MOV(RA, RB); + ADD(RA, RA, RB); } - SetJumpTarget(DoNotWrite); + SetCC(); gpr.Unlock(rA); } } -void JitArm::UnsafeLoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offsetReg, s32 offset) +void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update) { - ARMReg rA = gpr.GetReg(); + // We want to make sure to not get LR as a temp register + ARMReg rA = R12; + + u32 imm_addr = 0; + bool is_immediate = false; + if (offsetReg == -1) { - MOVI2R(rA, offset, false); // -3 - ADD(addr, addr, rA); // - 1 - } - else - { - NOP(2); // -3, -2 - // offsetReg is preloaded here - ADD(addr, addr, gpr.R(offsetReg)); // -1 - } - - // All this gets replaced on backpatch - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(addr, addr, mask); // 1 - MOVI2R(rA, (u32)Memory::base, false); // 2-3 - ADD(addr, addr, rA); // 4 - switch (accessSize) - { - case 32: - LDR(dest, addr); // 5 - break; - case 16: - LDRH(dest, addr); - break; - case 8: - LDRB(dest, addr); - break; - } - switch (accessSize) - { - case 32: - REV(dest, dest); // 6 - break; - case 16: - REV16(dest, dest); - break; - case 8: - NOP(1); - break; - - } - NOP(2); // 7-8 - gpr.Unlock(rA); -} - -void JitArm::SafeLoadToReg(bool fastmem, u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse) -{ - ARMReg RD = gpr.R(dest); - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && fastmem) - { - // Preload for fastmem - if (offsetReg != -1) - gpr.R(offsetReg); - if (addr != -1) - MOV(R10, gpr.R(addr)); + { + if (gpr.IsImm(addr)) + { + is_immediate = true; + imm_addr = gpr.GetImm(addr) + offset; + } + else + { + Operand2 off; + if (TryMakeOperand2(offset, off)) + { + ADD(rA, gpr.R(addr), off); + } + else + { + MOVI2R(rA, offset); + ADD(rA, rA, gpr.R(addr)); + } + } + } else - MOV(R10, 0); - - UnsafeLoadToReg(RD, R10, accessSize, offsetReg, offset); - return; - } - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - - if (offsetReg == -1) - { - MOVI2R(rA, offset); - if (addr != -1) - ADD(rA, rA, gpr.R(addr)); + { + is_immediate = true; + imm_addr = offset; + } } else { if (addr != -1) - ADD(rA, gpr.R(addr), gpr.R(offsetReg)); + { + if (gpr.IsImm(addr) && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg); + } + else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg)) + { + Operand2 off; + if (TryMakeOperand2(gpr.GetImm(addr), off)) + { + ADD(rA, gpr.R(offsetReg), off); + } + else + { + MOVI2R(rA, gpr.GetImm(addr)); + ADD(rA, rA, gpr.R(offsetReg)); + } + } + else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg)) + { + Operand2 off; + if (TryMakeOperand2(gpr.GetImm(offsetReg), off)) + { + ADD(rA, gpr.R(addr), off); + } + else + { + MOVI2R(rA, gpr.GetImm(offsetReg)); + ADD(rA, rA, gpr.R(addr)); + } + } + else + { + ADD(rA, gpr.R(addr), gpr.R(offsetReg)); + } + } else - MOV(rA, gpr.R(offsetReg)); + { + if (gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offsetReg); + } + else + { + MOV(rA, gpr.R(offsetReg)); + } + } } - switch (accessSize) - { - case 8: - MOVI2R(rB, (u32)&Memory::Read_U8); - break; - case 16: - MOVI2R(rB, (u32)&Memory::Read_U16); - break; - case 32: - MOVI2R(rB, (u32)&Memory::Read_U32); - break; - } - PUSH(4, R0, R1, R2, R3); - MOV(R0, rA); - BL(rB); - MOV(rA, R0); - POP(4, R0, R1, R2, R3); - MOV(RD, rA); - if (signExtend) // Only on 16 loads - SXTH(RD, RD); + if (is_immediate) + MOVI2R(rA, imm_addr); + + u32 flags = BackPatchInfo::FLAG_LOAD; + if (accessSize == 32) + flags |= BackPatchInfo::FLAG_SIZE_32; + else if (accessSize == 16) + flags |= BackPatchInfo::FLAG_SIZE_16; + else + flags |= BackPatchInfo::FLAG_SIZE_8; + if (reverse) - { - if (accessSize == 32) - REV(RD, RD); - else if (accessSize == 16) - REV16(RD, RD); - } - gpr.Unlock(rA, rB); + flags |= BackPatchInfo::FLAG_REVERSE; + + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + !(is_immediate && Memory::IsRAMAddress(imm_addr)), dest); + + if (signExtend) // Only on 16 loads + SXTH(dest, dest); + + if (update) + MOV(gpr.R(addr), rA); } void JitArm::lXX(UGeckoInstruction inst) @@ -344,7 +370,6 @@ void JitArm::lXX(UGeckoInstruction inst) bool update = false; bool signExtend = false; bool reverse = false; - bool fastmem = false; switch (inst.OPCD) { @@ -354,21 +379,18 @@ void JitArm::lXX(UGeckoInstruction inst) case 55: // lwzux update = true; case 23: // lwzx - fastmem = true; accessSize = 32; offsetReg = b; break; case 119: //lbzux update = true; case 87: // lbzx - fastmem = true; accessSize = 8; offsetReg = b; break; case 311: // lhzux update = true; case 279: // lhzx - fastmem = true; accessSize = 16; offsetReg = b; break; @@ -392,19 +414,16 @@ void JitArm::lXX(UGeckoInstruction inst) case 33: // lwzu update = true; case 32: // lwz - fastmem = true; accessSize = 32; break; case 35: // lbzu update = true; case 34: // lbz - fastmem = true; accessSize = 8; break; case 41: // lhzu update = true; case 40: // lhz - fastmem = true; accessSize = 16; break; case 43: // lhau @@ -417,27 +436,13 @@ void JitArm::lXX(UGeckoInstruction inst) // Check for exception before loading ARMReg rA = gpr.GetReg(false); + ARMReg RD = gpr.R(d); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_NEQ); - SafeLoadToReg(fastmem, d, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse); - - if (update) - { - ARMReg RA = gpr.R(a); - if (offsetReg == -1) - { - rA = gpr.GetReg(false); - MOVI2R(rA, offset); - ADD(RA, RA, rA); - } - else - { - ADD(RA, RA, gpr.R(offsetReg)); - } - } + SafeLoadToReg(RD, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse, update); SetJumpTarget(DoNotLoad); @@ -449,8 +454,6 @@ void JitArm::lXX(UGeckoInstruction inst) (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { - ARMReg RD = gpr.R(d); - // if it's still 0, we can wait until the next event TST(RD, RD); FixupBranch noIdle = B_CC(CC_NEQ); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp index ea5fecf5ce..e0c95152c1 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp @@ -24,16 +24,13 @@ void JitArm::lfXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; - bool single = false; + u32 flags = BackPatchInfo::FLAG_LOAD; bool update = false; - bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) @@ -42,157 +39,152 @@ void JitArm::lfXX(UGeckoInstruction inst) switch (inst.SUBOP10) { case 567: // lfsux - single = true; + flags |= BackPatchInfo::FLAG_SIZE_F32; update = true; offsetReg = b; break; case 535: // lfsx - single = true; - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F32; offsetReg = b; break; case 631: // lfdux + flags |= BackPatchInfo::FLAG_SIZE_F64; update = true; offsetReg = b; break; case 599: // lfdx - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F64; offsetReg = b; break; } break; case 49: // lfsu + flags |= BackPatchInfo::FLAG_SIZE_F32; update = true; - single = true; break; case 48: // lfs - single = true; - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F32; break; case 51: // lfdu + flags |= BackPatchInfo::FLAG_SIZE_F64; update = true; break; case 50: // lfd - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F64; break; } - ARMReg v0 = fpr.R0(inst.FD, false), v1; - if (single) + ARMReg v0 = fpr.R0(inst.FD, false), v1 = INVALID_REG; + if (flags & BackPatchInfo::FLAG_SIZE_F32) v1 = fpr.R1(inst.FD, false); + ARMReg rA = R11; + ARMReg addr = R12; + + u32 imm_addr = 0; + bool is_immediate = false; if (update) { - RA = gpr.R(a); - // Update path /always/ uses RA - if (offsetReg == -1) // uses SIMM_16 + // Always uses RA + if (gpr.IsImm(a) && offsetReg == -1) { - MOVI2R(rB, offset); - ADD(rB, rB, RA); + is_immediate = true; + imm_addr = offset + gpr.GetImm(a); + } + else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); } else - { - ADD(rB, gpr.R(offsetReg), RA); - } - } - else - { - if (zeroA) { if (offsetReg == -1) { - if (a) + Operand2 off; + if (TryMakeOperand2(offset, off)) { - RA = gpr.R(a); - MOVI2R(rB, offset); - ADD(rB, rB, RA); + ADD(addr, gpr.R(a), off); } else { - MOVI2R(rB, (u32)offset); + MOVI2R(addr, offset); + ADD(addr, addr, gpr.R(a)); } } else { - ARMReg RB = gpr.R(offsetReg); - if (a) - { - RA = gpr.R(a); - ADD(rB, RB, RA); - } - else - { - MOV(rB, RB); - } + ADD(addr, gpr.R(offsetReg), gpr.R(a)); } } } + else + { + if (offsetReg == -1) + { + if (a && gpr.IsImm(a)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + offset; + } + else if (a) + { + Operand2 off; + if (TryMakeOperand2(offset, off)) + { + ADD(addr, gpr.R(a), off); + } + else + { + MOVI2R(addr, offset); + ADD(addr, addr, gpr.R(a)); + } + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); + } + else if (!a && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offsetReg); + } + else if (a) + { + ADD(addr, gpr.R(a), gpr.R(offsetReg)); + } + else + { + MOV(addr, gpr.R(offsetReg)); + } + } + } + + if (update) + RA = gpr.R(a); + + if (is_immediate) + MOVI2R(addr, imm_addr); + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); FixupBranch DoNotLoad = B_CC(CC_EQ); if (update) - MOV(RA, rB); + MOV(RA, addr); - // This branch gets changed to a NOP when the fastpath fails - FixupBranch fast_path; - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem) - fast_path = B(); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + !(is_immediate && Memory::IsRAMAddress(imm_addr)), v0, v1); - { - PUSH(4, R0, R1, R2, R3); - MOV(R0, rB); - if (single) - { - MOVI2R(rA, (u32)&Memory::Read_U32); - BL(rA); - VMOV(S0, R0); - VCVT(v0, S0, 0); - VCVT(v1, S0, 0); - } - else - { - MOVI2R(rA, (u32)&Memory::Read_F64); - BL(rA); - -#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 - VMOV(v0, R0); -#else - VMOV(v0, D0); -#endif - } - POP(4, R0, R1, R2, R3); - } - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem) - { - FixupBranch slow_out = B(); - SetJumpTarget(fast_path); - { - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - ARMReg rC = gpr.GetReg(); - BIC(rC, rB, mask); - MOVI2R(rA, (u32)Memory::base); - ADD(rC, rC, rA); - - NEONXEmitter nemit(this); - if (single) - { - nemit.VLD1(F_32, D0, rC); - nemit.VREV32(I_8, D0, D0); // Byte swap to result - VCVT(v0, S0, 0); - VCVT(v1, S0, 0); - } - else - { - nemit.VLD1(I_64, v0, rC); - nemit.VREV64(I_8, v0, v0); // Byte swap to result - } - gpr.Unlock(rC); - } - SetJumpTarget(slow_out); - } - - gpr.Unlock(rA, rB); SetJumpTarget(DoNotLoad); } @@ -201,16 +193,13 @@ void JitArm::stfXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITLoadStoreFloatingOff); - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); ARMReg RA; u32 a = inst.RA, b = inst.RB; s32 offset = inst.SIMM_16; - bool single = false; + u32 flags = BackPatchInfo::FLAG_STORE; bool update = false; - bool zeroA = false; s32 offsetReg = -1; switch (inst.OPCD) @@ -219,157 +208,196 @@ void JitArm::stfXX(UGeckoInstruction inst) switch (inst.SUBOP10) { case 663: // stfsx - single = true; - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F32; offsetReg = b; break; case 695: // stfsux - single = true; + flags |= BackPatchInfo::FLAG_SIZE_F32; offsetReg = b; break; case 727: // stfdx - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F64; offsetReg = b; break; case 759: // stfdux + flags |= BackPatchInfo::FLAG_SIZE_F64; update = true; offsetReg = b; break; } break; case 53: // stfsu + flags |= BackPatchInfo::FLAG_SIZE_F32; update = true; - single = true; break; case 52: // stfs - single = true; - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F32; break; case 55: // stfdu + flags |= BackPatchInfo::FLAG_SIZE_F64; update = true; break; case 54: // stfd - zeroA = true; + flags |= BackPatchInfo::FLAG_SIZE_F64; break; } ARMReg v0 = fpr.R0(inst.FS); + ARMReg rA = R11; + ARMReg addr = R12; + + u32 imm_addr = 0; + bool is_immediate = false; if (update) { - RA = gpr.R(a); - // Update path /always/ uses RA - if (offsetReg == -1) // uses SIMM_16 + // Always uses RA + if (gpr.IsImm(a) && offsetReg == -1) { - MOVI2R(rB, offset); - ADD(rB, rB, RA); + is_immediate = true; + imm_addr = offset + gpr.GetImm(a); + } + else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); } else { - ADD(rB, gpr.R(offsetReg), RA); + if (offsetReg == -1) + { + Operand2 off; + if (TryMakeOperand2(offset, off)) + { + ADD(addr, gpr.R(a), off); + } + else + { + MOVI2R(addr, offset); + ADD(addr, addr, gpr.R(a)); + } + } + else + { + ADD(addr, gpr.R(offsetReg), gpr.R(a)); + } } } else { - if (zeroA) + if (offsetReg == -1) { - if (offsetReg == -1) + if (a && gpr.IsImm(a)) { - if (a) + is_immediate = true; + imm_addr = gpr.GetImm(a) + offset; + } + else if (a) + { + Operand2 off; + if (TryMakeOperand2(offset, off)) { - RA = gpr.R(a); - MOVI2R(rB, offset); - ADD(rB, rB, RA); + ADD(addr, gpr.R(a), off); } else { - MOVI2R(rB, (u32)offset); + MOVI2R(addr, offset); + ADD(addr, addr, gpr.R(a)); } } else { - ARMReg RB = gpr.R(offsetReg); - if (a) - { - RA = gpr.R(a); - ADD(rB, RB, RA); - } - else - { - MOV(rB, RB); - } + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); + } + else if (!a && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offsetReg); + } + else if (a) + { + ADD(addr, gpr.R(a), gpr.R(offsetReg)); + } + else + { + MOV(addr, gpr.R(offsetReg)); } } } + if (is_immediate) + MOVI2R(addr, imm_addr); + if (update) { + RA = gpr.R(a); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); CMP(rA, EXCEPTION_DSI); SetCC(CC_NEQ); - MOV(RA, rB); + MOV(RA, addr); SetCC(); } - // This branch gets changed to a NOP when the fastpath fails - FixupBranch fast_path; - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem) - fast_path = B(); - + if (is_immediate) { - PUSH(4, R0, R1, R2, R3); - if (single) + if ((imm_addr & 0xFFFFF000) == 0xCC008000 && jit->jo.optimizeGatherPipe) { - MOV(R1, rB); - VCVT(S0, v0, 0); - VMOV(R0, S0); - MOVI2R(rA, (u32)&Memory::Write_U32); - BL(rA); - } - else - { - MOVI2R(rA, (u32)&Memory::Write_F64); -#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 - VMOV(R0, v0); - MOV(R2, rB); -#else - VMOV(D0, v0); - MOV(R0, rB); -#endif - BL(rA); - } - POP(4, R0, R1, R2, R3); - } - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem) - { - FixupBranch slow_out = B(); - SetJumpTarget(fast_path); - { - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - ARMReg rC = gpr.GetReg(); - BIC(rC, rB, mask); - MOVI2R(rA, (u32)Memory::base); - ADD(rC, rC, rA); + int accessSize; + if (flags & BackPatchInfo::FLAG_SIZE_F64) + accessSize = 64; + else + accessSize = 32; + MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount); + MOVI2R(R10, (u32)GPFifo::m_gatherPipe); + LDR(R11, R14); + ADD(R10, R10, R11); NEONXEmitter nemit(this); - if (single) + if (accessSize == 64) + { + PUSH(2, R0, R1); + nemit.VREV64(I_8, D0, v0); + VMOV(R0, D0); + STR(R0, R10, 0); + STR(R1, R10, 4); + POP(2, R0, R1); + } + else if (accessSize == 32) { VCVT(S0, v0, 0); nemit.VREV32(I_8, D0, D0); - VSTR(S0, rC, 0); + VMOV(addr, S0); + STR(addr, R10); } - else - { - nemit.VREV64(I_8, D0, v0); - VSTR(D0, rC, 0); - } - gpr.Unlock(rC); - } - SetJumpTarget(slow_out); - } + ADD(R11, R11, accessSize >> 3); + STR(R11, R14); + jit->js.fifoBytesThisBlock += accessSize >> 3; - gpr.Unlock(rA, rB); + } + else if (Memory::IsRAMAddress(imm_addr)) + { + MOVI2R(addr, imm_addr); + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, v0); + } + else + { + MOVI2R(addr, imm_addr); + EmitBackpatchRoutine(this, flags, false, false, v0); + } + } + else + { + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, v0); + } }