diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 0695b853..c7387c99 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -161,6 +161,27 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags) } } +bool DecodeLiteral(const FetchedInstr& instr, u32& addr) +{ + switch (instr.Info.Kind) + { + case ARMInstrInfo::ak_STR_IMM: + case ARMInstrInfo::ak_STRB_IMM: + addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1)); + return true; + case ARMInstrInfo::ak_STRD_IMM: + case ARMInstrInfo::ak_STRH_IMM: + addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1)); + return true; + case ARMInstrInfo::ak_STM: // I honestly hope noone was ever crazy enough to do stm pc, {whatever} + addr = instr.Addr + 8; + return true; + default: + JIT_DEBUGPRINT("Literal %08x %x not recognised\n", instr.Instr, instr.Addr); + return false; + } +} + bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link, u32& linkAddr, u32& targetAddr) { @@ -463,6 +484,23 @@ void CompileBlock(ARM* cpu) instrs[i].DataCycles = cpu->DataCycles; instrs[i].DataRegion = cpu->DataRegion; + if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem + && instrs[i].Info.SrcRegs == (1 << 15) + && instrs[i].Info.DstRegs == 0) + { + assert (!thumb); + + u32 addr; + if (DecodeLiteral(instrs[i], addr)) + { + JIT_DEBUGPRINT("pc relative write detected\n"); + u32 translatedAddr = cpu->Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr); + + ARMJIT::InvalidateByAddr(translatedAddr, false); + CodeRanges[translatedAddr / 512].InvalidLiterals |= (1 << ((translatedAddr & 0x1FF) / 16)); + } + } + if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) { @@ -631,7 +669,7 @@ void CompileBlock(ARM* cpu) JitBlocks.Add(block); } -void InvalidateByAddr(u32 pseudoPhysical) +void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore) { JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical); AddressRange* range = &CodeRanges[pseudoPhysical / 512]; @@ -657,11 +695,14 @@ void InvalidateByAddr(u32 pseudoPhysical) FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL; - u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr); - if (RestoreCandidates[slot] && RestoreCandidates[slot] != block) - delete RestoreCandidates[slot]; + if (mayRestore) + { + u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr); + if (RestoreCandidates[slot] && RestoreCandidates[slot] != block) + delete RestoreCandidates[slot]; - RestoreCandidates[slot] = block; + RestoreCandidates[slot] = block; + } } if ((range->TimesInvalidated + 1) > range->TimesInvalidated) range->TimesInvalidated++; @@ -732,6 +773,7 @@ void ResetBlockCache() u32 addr = block->AddressRanges()[j]; CodeRanges[addr / 512].Blocks.Clear(); CodeRanges[addr / 512].TimesInvalidated = 0; + CodeRanges[addr / 512].InvalidLiterals = 0; } delete block; } diff --git a/src/ARMJIT.h b/src/ARMJIT.h index 1db4d66e..09cc4636 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -61,7 +61,7 @@ inline JitBlockEntry LookUpBlock(u32 addr) void Init(); void DeInit(); -void InvalidateByAddr(u32 pseudoPhysical); +void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true); void InvalidateAll(); void InvalidateITCM(u32 addr); diff --git a/src/ARMJIT_Internal.h b/src/ARMJIT_Internal.h index 9e6713d9..fb05f75c 100644 --- a/src/ARMJIT_Internal.h +++ b/src/ARMJIT_Internal.h @@ -63,7 +63,7 @@ struct __attribute__((packed)) TinyVector { T* Data = NULL; u16 Capacity = 0; - u32 Length = 0; // make it 32 bit so we don't need movzx + u16 Length = 0; ~TinyVector() { @@ -181,6 +181,7 @@ private: struct __attribute__((packed)) AddressRange { TinyVector Blocks; + u16 InvalidLiterals; u16 TimesInvalidated; }; diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 37997743..82f80a75 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -108,7 +108,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size) MOV(32, R(RSCRATCH), R(ABI_PARAM1)); SHR(32, R(RSCRATCH), Imm8(9)); SHL(32, R(RSCRATCH), Imm8(4)); - CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0)); + CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0)); FixupBranch noCode = J_CC(CC_Z); JMP((u8*)InvalidateByAddr, true); SetJumpTarget(noCode); @@ -206,7 +206,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc) MOV(32, R(ABI_PARAM4), R(RSCRATCH)); SHR(32, R(RSCRATCH), Imm8(9)); SHL(32, R(RSCRATCH), Imm8(4)); - CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0)); + CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0)); FixupBranch noCode = J_CC(CC_Z); ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8); MOV(32, R(ABI_PARAM1), R(ABI_PARAM4)); @@ -278,10 +278,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr) Comp_AddCycles_CDI(); } -void fault(u32 a, u32 b) +/*void fault(u32 a, u32 b, u32 c, u32 d) { - printf("actually not static! %x %x\n", a, b); -} + printf("actually not static! %x %x %x %x\n", a, b, c, d); +}*/ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags) { @@ -291,11 +291,17 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz if (size == 16) addressMask = ~1; + //bool check = false; if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback))) { u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); - Comp_MemLoadLiteral(size, rd, addr); - return; + u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr); + + if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16)))) + { + Comp_MemLoadLiteral(size, rd, addr); + return; + } } { @@ -438,6 +444,20 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz CALL(memoryFunc); + /*if (Num == 0 && check) + { + CMP(32, R(EAX), rdMapped); + FixupBranch notEqual = J_CC(CC_E); + ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0); + MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8))); + MOV(32, R(ABI_PARAM2), R(EAX)); + MOV(32, R(ABI_PARAM3), rdMapped); + MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr)); + CALL((u8*)fault); + ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0); + SetJumpTarget(notEqual); + }*/ + if (!(flags & memop_Store)) { if (inlinePreparation && size == 32)