make literal optimisation more reliable
fixes spanish Pokemon HeartGold
This commit is contained in:
parent
d1d96d2236
commit
3e7483636f
|
@ -161,6 +161,27 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DecodeLiteral(const FetchedInstr& instr, u32& addr)
|
||||||
|
{
|
||||||
|
switch (instr.Info.Kind)
|
||||||
|
{
|
||||||
|
case ARMInstrInfo::ak_STR_IMM:
|
||||||
|
case ARMInstrInfo::ak_STRB_IMM:
|
||||||
|
addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1));
|
||||||
|
return true;
|
||||||
|
case ARMInstrInfo::ak_STRD_IMM:
|
||||||
|
case ARMInstrInfo::ak_STRH_IMM:
|
||||||
|
addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1));
|
||||||
|
return true;
|
||||||
|
case ARMInstrInfo::ak_STM: // I honestly hope noone was ever crazy enough to do stm pc, {whatever}
|
||||||
|
addr = instr.Addr + 8;
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
JIT_DEBUGPRINT("Literal %08x %x not recognised\n", instr.Instr, instr.Addr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
|
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
|
||||||
u32& linkAddr, u32& targetAddr)
|
u32& linkAddr, u32& targetAddr)
|
||||||
{
|
{
|
||||||
|
@ -463,6 +484,23 @@ void CompileBlock(ARM* cpu)
|
||||||
instrs[i].DataCycles = cpu->DataCycles;
|
instrs[i].DataCycles = cpu->DataCycles;
|
||||||
instrs[i].DataRegion = cpu->DataRegion;
|
instrs[i].DataRegion = cpu->DataRegion;
|
||||||
|
|
||||||
|
if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem
|
||||||
|
&& instrs[i].Info.SrcRegs == (1 << 15)
|
||||||
|
&& instrs[i].Info.DstRegs == 0)
|
||||||
|
{
|
||||||
|
assert (!thumb);
|
||||||
|
|
||||||
|
u32 addr;
|
||||||
|
if (DecodeLiteral(instrs[i], addr))
|
||||||
|
{
|
||||||
|
JIT_DEBUGPRINT("pc relative write detected\n");
|
||||||
|
u32 translatedAddr = cpu->Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
|
||||||
|
|
||||||
|
ARMJIT::InvalidateByAddr(translatedAddr, false);
|
||||||
|
CodeRanges[translatedAddr / 512].InvalidLiterals |= (1 << ((translatedAddr & 0x1FF) / 16));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
|
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
|
||||||
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
|
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
|
||||||
{
|
{
|
||||||
|
@ -631,7 +669,7 @@ void CompileBlock(ARM* cpu)
|
||||||
JitBlocks.Add(block);
|
JitBlocks.Add(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InvalidateByAddr(u32 pseudoPhysical)
|
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
|
||||||
{
|
{
|
||||||
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
|
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
|
||||||
AddressRange* range = &CodeRanges[pseudoPhysical / 512];
|
AddressRange* range = &CodeRanges[pseudoPhysical / 512];
|
||||||
|
@ -657,11 +695,14 @@ void InvalidateByAddr(u32 pseudoPhysical)
|
||||||
|
|
||||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
||||||
|
|
||||||
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
if (mayRestore)
|
||||||
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
{
|
||||||
delete RestoreCandidates[slot];
|
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
||||||
|
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
||||||
|
delete RestoreCandidates[slot];
|
||||||
|
|
||||||
RestoreCandidates[slot] = block;
|
RestoreCandidates[slot] = block;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
|
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
|
||||||
range->TimesInvalidated++;
|
range->TimesInvalidated++;
|
||||||
|
@ -732,6 +773,7 @@ void ResetBlockCache()
|
||||||
u32 addr = block->AddressRanges()[j];
|
u32 addr = block->AddressRanges()[j];
|
||||||
CodeRanges[addr / 512].Blocks.Clear();
|
CodeRanges[addr / 512].Blocks.Clear();
|
||||||
CodeRanges[addr / 512].TimesInvalidated = 0;
|
CodeRanges[addr / 512].TimesInvalidated = 0;
|
||||||
|
CodeRanges[addr / 512].InvalidLiterals = 0;
|
||||||
}
|
}
|
||||||
delete block;
|
delete block;
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ inline JitBlockEntry LookUpBlock(u32 addr)
|
||||||
void Init();
|
void Init();
|
||||||
void DeInit();
|
void DeInit();
|
||||||
|
|
||||||
void InvalidateByAddr(u32 pseudoPhysical);
|
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true);
|
||||||
void InvalidateAll();
|
void InvalidateAll();
|
||||||
|
|
||||||
void InvalidateITCM(u32 addr);
|
void InvalidateITCM(u32 addr);
|
||||||
|
|
|
@ -63,7 +63,7 @@ struct __attribute__((packed)) TinyVector
|
||||||
{
|
{
|
||||||
T* Data = NULL;
|
T* Data = NULL;
|
||||||
u16 Capacity = 0;
|
u16 Capacity = 0;
|
||||||
u32 Length = 0; // make it 32 bit so we don't need movzx
|
u16 Length = 0;
|
||||||
|
|
||||||
~TinyVector()
|
~TinyVector()
|
||||||
{
|
{
|
||||||
|
@ -181,6 +181,7 @@ private:
|
||||||
struct __attribute__((packed)) AddressRange
|
struct __attribute__((packed)) AddressRange
|
||||||
{
|
{
|
||||||
TinyVector<JitBlock*> Blocks;
|
TinyVector<JitBlock*> Blocks;
|
||||||
|
u16 InvalidLiterals;
|
||||||
u16 TimesInvalidated;
|
u16 TimesInvalidated;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -108,7 +108,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
SHR(32, R(RSCRATCH), Imm8(9));
|
SHR(32, R(RSCRATCH), Imm8(9));
|
||||||
SHL(32, R(RSCRATCH), Imm8(4));
|
SHL(32, R(RSCRATCH), Imm8(4));
|
||||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||||
FixupBranch noCode = J_CC(CC_Z);
|
FixupBranch noCode = J_CC(CC_Z);
|
||||||
JMP((u8*)InvalidateByAddr, true);
|
JMP((u8*)InvalidateByAddr, true);
|
||||||
SetJumpTarget(noCode);
|
SetJumpTarget(noCode);
|
||||||
|
@ -206,7 +206,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||||
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
||||||
SHR(32, R(RSCRATCH), Imm8(9));
|
SHR(32, R(RSCRATCH), Imm8(9));
|
||||||
SHL(32, R(RSCRATCH), Imm8(4));
|
SHL(32, R(RSCRATCH), Imm8(4));
|
||||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||||
FixupBranch noCode = J_CC(CC_Z);
|
FixupBranch noCode = J_CC(CC_Z);
|
||||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
|
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
|
||||||
|
@ -278,10 +278,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
||||||
Comp_AddCycles_CDI();
|
Comp_AddCycles_CDI();
|
||||||
}
|
}
|
||||||
|
|
||||||
void fault(u32 a, u32 b)
|
/*void fault(u32 a, u32 b, u32 c, u32 d)
|
||||||
{
|
{
|
||||||
printf("actually not static! %x %x\n", a, b);
|
printf("actually not static! %x %x %x %x\n", a, b, c, d);
|
||||||
}
|
}*/
|
||||||
|
|
||||||
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
||||||
{
|
{
|
||||||
|
@ -291,11 +291,17 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||||
if (size == 16)
|
if (size == 16)
|
||||||
addressMask = ~1;
|
addressMask = ~1;
|
||||||
|
|
||||||
|
//bool check = false;
|
||||||
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
||||||
{
|
{
|
||||||
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||||
Comp_MemLoadLiteral(size, rd, addr);
|
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
|
||||||
return;
|
|
||||||
|
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
|
||||||
|
{
|
||||||
|
Comp_MemLoadLiteral(size, rd, addr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -438,6 +444,20 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||||
|
|
||||||
CALL(memoryFunc);
|
CALL(memoryFunc);
|
||||||
|
|
||||||
|
/*if (Num == 0 && check)
|
||||||
|
{
|
||||||
|
CMP(32, R(EAX), rdMapped);
|
||||||
|
FixupBranch notEqual = J_CC(CC_E);
|
||||||
|
ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
|
||||||
|
MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
|
||||||
|
MOV(32, R(ABI_PARAM2), R(EAX));
|
||||||
|
MOV(32, R(ABI_PARAM3), rdMapped);
|
||||||
|
MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
|
||||||
|
CALL((u8*)fault);
|
||||||
|
ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
|
||||||
|
SetJumpTarget(notEqual);
|
||||||
|
}*/
|
||||||
|
|
||||||
if (!(flags & memop_Store))
|
if (!(flags & memop_Store))
|
||||||
{
|
{
|
||||||
if (inlinePreparation && size == 32)
|
if (inlinePreparation && size == 32)
|
||||||
|
|
Loading…
Reference in New Issue