make literal optimisation more reliable

fixes spanish Pokemon HeartGold
This commit is contained in:
RSDuck 2019-11-03 15:33:20 +01:00
parent d1d96d2236
commit 3e7483636f
4 changed files with 77 additions and 14 deletions

View File

@ -161,6 +161,27 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
} }
} }
bool DecodeLiteral(const FetchedInstr& instr, u32& addr)
{
switch (instr.Info.Kind)
{
case ARMInstrInfo::ak_STR_IMM:
case ARMInstrInfo::ak_STRB_IMM:
addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1));
return true;
case ARMInstrInfo::ak_STRD_IMM:
case ARMInstrInfo::ak_STRH_IMM:
addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1));
return true;
case ARMInstrInfo::ak_STM: // I honestly hope noone was ever crazy enough to do stm pc, {whatever}
addr = instr.Addr + 8;
return true;
default:
JIT_DEBUGPRINT("Literal %08x %x not recognised\n", instr.Instr, instr.Addr);
return false;
}
}
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link, bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
u32& linkAddr, u32& targetAddr) u32& linkAddr, u32& targetAddr)
{ {
@ -463,6 +484,23 @@ void CompileBlock(ARM* cpu)
instrs[i].DataCycles = cpu->DataCycles; instrs[i].DataCycles = cpu->DataCycles;
instrs[i].DataRegion = cpu->DataRegion; instrs[i].DataRegion = cpu->DataRegion;
if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem
&& instrs[i].Info.SrcRegs == (1 << 15)
&& instrs[i].Info.DstRegs == 0)
{
assert (!thumb);
u32 addr;
if (DecodeLiteral(instrs[i], addr))
{
JIT_DEBUGPRINT("pc relative write detected\n");
u32 translatedAddr = cpu->Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
ARMJIT::InvalidateByAddr(translatedAddr, false);
CodeRanges[translatedAddr / 512].InvalidLiterals |= (1 << ((translatedAddr & 0x1FF) / 16));
}
}
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0 if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1) && instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
{ {
@ -631,7 +669,7 @@ void CompileBlock(ARM* cpu)
JitBlocks.Add(block); JitBlocks.Add(block);
} }
void InvalidateByAddr(u32 pseudoPhysical) void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
{ {
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical); JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
AddressRange* range = &CodeRanges[pseudoPhysical / 512]; AddressRange* range = &CodeRanges[pseudoPhysical / 512];
@ -657,11 +695,14 @@ void InvalidateByAddr(u32 pseudoPhysical)
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL; FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr); if (mayRestore)
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block) {
delete RestoreCandidates[slot]; u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
delete RestoreCandidates[slot];
RestoreCandidates[slot] = block; RestoreCandidates[slot] = block;
}
} }
if ((range->TimesInvalidated + 1) > range->TimesInvalidated) if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
range->TimesInvalidated++; range->TimesInvalidated++;
@ -732,6 +773,7 @@ void ResetBlockCache()
u32 addr = block->AddressRanges()[j]; u32 addr = block->AddressRanges()[j];
CodeRanges[addr / 512].Blocks.Clear(); CodeRanges[addr / 512].Blocks.Clear();
CodeRanges[addr / 512].TimesInvalidated = 0; CodeRanges[addr / 512].TimesInvalidated = 0;
CodeRanges[addr / 512].InvalidLiterals = 0;
} }
delete block; delete block;
} }

View File

@ -61,7 +61,7 @@ inline JitBlockEntry LookUpBlock(u32 addr)
void Init(); void Init();
void DeInit(); void DeInit();
void InvalidateByAddr(u32 pseudoPhysical); void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true);
void InvalidateAll(); void InvalidateAll();
void InvalidateITCM(u32 addr); void InvalidateITCM(u32 addr);

View File

@ -63,7 +63,7 @@ struct __attribute__((packed)) TinyVector
{ {
T* Data = NULL; T* Data = NULL;
u16 Capacity = 0; u16 Capacity = 0;
u32 Length = 0; // make it 32 bit so we don't need movzx u16 Length = 0;
~TinyVector() ~TinyVector()
{ {
@ -181,6 +181,7 @@ private:
struct __attribute__((packed)) AddressRange struct __attribute__((packed)) AddressRange
{ {
TinyVector<JitBlock*> Blocks; TinyVector<JitBlock*> Blocks;
u16 InvalidLiterals;
u16 TimesInvalidated; u16 TimesInvalidated;
}; };

View File

@ -108,7 +108,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
MOV(32, R(RSCRATCH), R(ABI_PARAM1)); MOV(32, R(RSCRATCH), R(ABI_PARAM1));
SHR(32, R(RSCRATCH), Imm8(9)); SHR(32, R(RSCRATCH), Imm8(9));
SHL(32, R(RSCRATCH), Imm8(4)); SHL(32, R(RSCRATCH), Imm8(4));
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0)); CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
FixupBranch noCode = J_CC(CC_Z); FixupBranch noCode = J_CC(CC_Z);
JMP((u8*)InvalidateByAddr, true); JMP((u8*)InvalidateByAddr, true);
SetJumpTarget(noCode); SetJumpTarget(noCode);
@ -206,7 +206,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
MOV(32, R(ABI_PARAM4), R(RSCRATCH)); MOV(32, R(ABI_PARAM4), R(RSCRATCH));
SHR(32, R(RSCRATCH), Imm8(9)); SHR(32, R(RSCRATCH), Imm8(9));
SHL(32, R(RSCRATCH), Imm8(4)); SHL(32, R(RSCRATCH), Imm8(4));
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0)); CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
FixupBranch noCode = J_CC(CC_Z); FixupBranch noCode = J_CC(CC_Z);
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8); ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4)); MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
@ -278,10 +278,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
Comp_AddCycles_CDI(); Comp_AddCycles_CDI();
} }
void fault(u32 a, u32 b) /*void fault(u32 a, u32 b, u32 c, u32 d)
{ {
printf("actually not static! %x %x\n", a, b); printf("actually not static! %x %x %x %x\n", a, b, c, d);
} }*/
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags) void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
{ {
@ -291,11 +291,17 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (size == 16) if (size == 16)
addressMask = ~1; addressMask = ~1;
//bool check = false;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback))) if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
{ {
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1); u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
Comp_MemLoadLiteral(size, rd, addr); u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
return;
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
{
Comp_MemLoadLiteral(size, rd, addr);
return;
}
} }
{ {
@ -438,6 +444,20 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
CALL(memoryFunc); CALL(memoryFunc);
/*if (Num == 0 && check)
{
CMP(32, R(EAX), rdMapped);
FixupBranch notEqual = J_CC(CC_E);
ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
MOV(32, R(ABI_PARAM2), R(EAX));
MOV(32, R(ABI_PARAM3), rdMapped);
MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
CALL((u8*)fault);
ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
SetJumpTarget(notEqual);
}*/
if (!(flags & memop_Store)) if (!(flags & memop_Store))
{ {
if (inlinePreparation && size == 32) if (inlinePreparation && size == 32)