decrease jit block cache address granularity
fixes Dragon Quest IX move code with side effects out of assert, fixes release build (thanks to m4wx for this one) also remove some leftovers of jit pipelining
This commit is contained in:
parent
52dd0ee75a
commit
9cf7780e46
|
@ -106,7 +106,7 @@ u32 AddrTranslate9[0x2000];
|
|||
u32 AddrTranslate7[0x4000];
|
||||
|
||||
JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
|
||||
AddressRange CodeRanges[ExeMemSpaceSize / 256];
|
||||
AddressRange CodeRanges[ExeMemSpaceSize / 512];
|
||||
|
||||
TinyVector<JitBlock*> JitBlocks;
|
||||
JitBlock* RestoreCandidates[0x1000] = {NULL};
|
||||
|
@ -285,6 +285,13 @@ InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] =
|
|||
#undef F_MEM_HD
|
||||
#undef F
|
||||
|
||||
void T_BL_LONG(ARM* cpu)
|
||||
{
|
||||
ARMInterpreter::T_BL_LONG_1(cpu);
|
||||
cpu->R[15] += 2;
|
||||
ARMInterpreter::T_BL_LONG_2(cpu);
|
||||
}
|
||||
|
||||
#define F(x) ARMInterpreter::T_##x
|
||||
InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
|
||||
{
|
||||
|
@ -302,7 +309,7 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
|
|||
F(PUSH), F(POP), F(LDMIA), F(STMIA),
|
||||
F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2),
|
||||
F(UNK), F(SVC),
|
||||
NULL // BL_LONG psudo opcode
|
||||
T_BL_LONG // BL_LONG psudo opcode
|
||||
};
|
||||
#undef F
|
||||
|
||||
|
@ -341,7 +348,7 @@ void CompileBlock(ARM* cpu)
|
|||
JIT_DEBUGPRINT("start block %x (%x) %p %p (region invalidates %dx)\n",
|
||||
blockAddr, pseudoPhysicalAddr, FastBlockAccess[pseudoPhysicalAddr / 2],
|
||||
cpu->Num == 0 ? LookUpBlock<0>(blockAddr) : LookUpBlock<1>(blockAddr),
|
||||
CodeRanges[pseudoPhysicalAddr / 256].TimesInvalidated);
|
||||
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
|
||||
|
||||
u32 lastSegmentStart = blockAddr;
|
||||
|
||||
|
@ -352,7 +359,7 @@ void CompileBlock(ARM* cpu)
|
|||
instrs[i].BranchFlags = 0;
|
||||
instrs[i].SetFlags = 0;
|
||||
instrs[i].Instr = nextInstr[0];
|
||||
instrs[i].NextInstr[0] = nextInstr[0] = nextInstr[1];
|
||||
nextInstr[0] = nextInstr[1];
|
||||
|
||||
instrs[i].Addr = nextInstrAddr[0];
|
||||
nextInstrAddr[0] = nextInstrAddr[1];
|
||||
|
@ -361,7 +368,7 @@ void CompileBlock(ARM* cpu)
|
|||
|
||||
u32 translatedAddr = (cpu->Num == 0
|
||||
? TranslateAddr<0>(instrs[i].Addr)
|
||||
: TranslateAddr<1>(instrs[i].Addr)) & ~0xFF;
|
||||
: TranslateAddr<1>(instrs[i].Addr)) & ~0x1FF;
|
||||
if (i == 0 || translatedAddr != addresseRanges[numAddressRanges - 1])
|
||||
{
|
||||
bool returning = false;
|
||||
|
@ -400,7 +407,6 @@ void CompileBlock(ARM* cpu)
|
|||
nextInstr[1] = cpuv4->CodeRead32(r15);
|
||||
instrs[i].CodeCycles = cpu->CodeCycles;
|
||||
}
|
||||
instrs[i].NextInstr[1] = nextInstr[1];
|
||||
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr);
|
||||
|
||||
cpu->R[15] = r15;
|
||||
|
@ -584,7 +590,7 @@ void CompileBlock(ARM* cpu)
|
|||
for (int j = 0; j < numAddressRanges; j++)
|
||||
{
|
||||
assert(addresseRanges[j] == block->AddressRanges()[j]);
|
||||
CodeRanges[addresseRanges[j] / 256].Blocks.Add(block);
|
||||
CodeRanges[addresseRanges[j] / 512].Blocks.Add(block);
|
||||
}
|
||||
|
||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = block->EntryPoint;
|
||||
|
@ -595,7 +601,7 @@ void CompileBlock(ARM* cpu)
|
|||
void InvalidateByAddr(u32 pseudoPhysical)
|
||||
{
|
||||
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
|
||||
AddressRange* range = &CodeRanges[pseudoPhysical / 256];
|
||||
AddressRange* range = &CodeRanges[pseudoPhysical / 512];
|
||||
int startLength = range->Blocks.Length;
|
||||
for (int i = 0; i < range->Blocks.Length; i++)
|
||||
{
|
||||
|
@ -604,15 +610,17 @@ void InvalidateByAddr(u32 pseudoPhysical)
|
|||
for (int j = 0; j < block->NumAddresses; j++)
|
||||
{
|
||||
u32 addr = block->AddressRanges()[j];
|
||||
if ((addr / 256) != (pseudoPhysical / 256))
|
||||
if ((addr / 512) != (pseudoPhysical / 512))
|
||||
{
|
||||
AddressRange* otherRange = &CodeRanges[addr / 256];
|
||||
AddressRange* otherRange = &CodeRanges[addr / 512];
|
||||
assert(otherRange != range);
|
||||
assert(otherRange->Blocks.RemoveByValue(block));
|
||||
bool removed = otherRange->Blocks.RemoveByValue(block);
|
||||
assert(removed);
|
||||
}
|
||||
}
|
||||
|
||||
assert(JitBlocks.RemoveByValue(block));
|
||||
bool removed = JitBlocks.RemoveByValue(block);
|
||||
assert(removed);
|
||||
|
||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
||||
|
||||
|
@ -631,14 +639,14 @@ void InvalidateByAddr(u32 pseudoPhysical)
|
|||
void InvalidateByAddr7(u32 addr)
|
||||
{
|
||||
u32 pseudoPhysical = TranslateAddr<1>(addr);
|
||||
if (__builtin_expect(CodeRanges[pseudoPhysical / 256].Blocks.Length > 0, false))
|
||||
if (__builtin_expect(CodeRanges[pseudoPhysical / 512].Blocks.Length > 0, false))
|
||||
InvalidateByAddr(pseudoPhysical);
|
||||
}
|
||||
|
||||
void InvalidateITCM(u32 addr)
|
||||
{
|
||||
u32 pseudoPhysical = addr + ExeMemRegionOffsets[exeMem_ITCM];
|
||||
if (CodeRanges[pseudoPhysical / 256].Blocks.Length > 0)
|
||||
if (CodeRanges[pseudoPhysical / 512].Blocks.Length > 0)
|
||||
InvalidateByAddr(pseudoPhysical);
|
||||
}
|
||||
|
||||
|
@ -654,7 +662,7 @@ void InvalidateAll()
|
|||
for (int j = 0; j < block->NumAddresses; j++)
|
||||
{
|
||||
u32 addr = block->AddressRanges()[j];
|
||||
AddressRange* range = &CodeRanges[addr / 256];
|
||||
AddressRange* range = &CodeRanges[addr / 512];
|
||||
range->Blocks.Clear();
|
||||
if (range->TimesInvalidated + 1 > range->TimesInvalidated)
|
||||
range->TimesInvalidated++;
|
||||
|
@ -689,8 +697,8 @@ void ResetBlockCache()
|
|||
for (int j = 0; j < block->NumAddresses; j++)
|
||||
{
|
||||
u32 addr = block->AddressRanges()[j];
|
||||
CodeRanges[addr / 256].Blocks.Clear();
|
||||
CodeRanges[addr / 256].TimesInvalidated = 0;
|
||||
CodeRanges[addr / 512].Blocks.Clear();
|
||||
CodeRanges[addr / 512].TimesInvalidated = 0;
|
||||
}
|
||||
delete block;
|
||||
}
|
||||
|
|
|
@ -38,7 +38,6 @@ struct FetchedInstr
|
|||
u8 BranchFlags;
|
||||
u8 SetFlags;
|
||||
u32 Instr;
|
||||
u32 NextInstr[2];
|
||||
u32 Addr;
|
||||
|
||||
u8 CodeCycles;
|
||||
|
@ -185,7 +184,7 @@ struct __attribute__((packed)) AddressRange
|
|||
u16 TimesInvalidated;
|
||||
};
|
||||
|
||||
extern AddressRange CodeRanges[ExeMemSpaceSize / 256];
|
||||
extern AddressRange CodeRanges[ExeMemSpaceSize / 512];
|
||||
|
||||
typedef void (*InterpreterFunc)(ARM* cpu);
|
||||
extern InterpreterFunc InterpretARM[];
|
||||
|
|
|
@ -105,7 +105,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
|||
static_assert(sizeof(AddressRange) == 16);
|
||||
LEA(32, ABI_PARAM1, MDisp(ABI_PARAM3, ExeMemRegionOffsets[exeMem_ITCM]));
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(8));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
|
@ -203,7 +203,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
|||
|
||||
ADD(32, R(RSCRATCH), Imm32(ExeMemRegionOffsets[exeMem_ITCM]));
|
||||
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
||||
SHR(32, R(RSCRATCH), Imm8(8));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
|
@ -284,28 +284,29 @@ void fault(u32 a, u32 b)
|
|||
|
||||
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
||||
{
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
Comp_AddCycles_CD();
|
||||
}
|
||||
else
|
||||
{
|
||||
Comp_AddCycles_CDI();
|
||||
}
|
||||
|
||||
u32 addressMask = ~0;
|
||||
if (size == 32)
|
||||
addressMask = ~3;
|
||||
if (size == 16)
|
||||
addressMask = ~1;
|
||||
|
||||
if (rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
|
||||
if (rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
||||
{
|
||||
Comp_MemLoadLiteral(size, rd,
|
||||
R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1));
|
||||
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
Comp_MemLoadLiteral(size, rd, addr);
|
||||
return;
|
||||
}
|
||||
else
|
||||
|
||||
{
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
Comp_AddCycles_CD();
|
||||
}
|
||||
else
|
||||
{
|
||||
Comp_AddCycles_CDI();
|
||||
}
|
||||
|
||||
OpArg rdMapped = MapReg(rd);
|
||||
OpArg rnMapped = MapReg(rn);
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
namespace ARMInstrInfo
|
||||
{
|
||||
|
||||
#define ak(x) ((x) << 21)
|
||||
#define ak(x) ((x) << 22)
|
||||
|
||||
enum {
|
||||
A_Read0 = 1 << 0,
|
||||
|
@ -36,7 +36,8 @@ enum {
|
|||
A_StaticShiftSetC = 1 << 18,
|
||||
A_SetC = 1 << 19,
|
||||
|
||||
A_WriteMem = 1 << 20
|
||||
A_WriteMem = 1 << 20,
|
||||
A_LoadMem = 1 << 21
|
||||
};
|
||||
|
||||
#define A_BIOP A_Read16
|
||||
|
@ -122,7 +123,7 @@ const u32 A_QSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QSUB);
|
|||
const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDADD);
|
||||
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDSUB);
|
||||
|
||||
#define A_LDR A_Write12
|
||||
#define A_LDR A_Write12 | A_LoadMem
|
||||
#define A_STR A_Read12 | A_WriteMem
|
||||
|
||||
#define A_IMPLEMENT_WB_LDRSTR(x,k) \
|
||||
|
@ -143,7 +144,7 @@ A_IMPLEMENT_WB_LDRSTR(STRB,STR)
|
|||
A_IMPLEMENT_WB_LDRSTR(LDR,LDR)
|
||||
A_IMPLEMENT_WB_LDRSTR(LDRB,LDR)
|
||||
|
||||
#define A_LDRD A_Write12Double
|
||||
#define A_LDRD A_Write12Double | A_LoadMem
|
||||
#define A_STRD A_Read12Double | A_WriteMem
|
||||
|
||||
#define A_IMPLEMENT_HD_LDRSTR(x,k) \
|
||||
|
@ -159,10 +160,10 @@ A_IMPLEMENT_HD_LDRSTR(LDRH,LDR)
|
|||
A_IMPLEMENT_HD_LDRSTR(LDRSB,LDR)
|
||||
A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
|
||||
|
||||
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | A_WriteMem | ak(ak_SWP);
|
||||
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | A_WriteMem | ak(ak_SWPB);
|
||||
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | A_LoadMem | A_WriteMem | ak(ak_SWP);
|
||||
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | A_LoadMem | A_WriteMem | ak(ak_SWPB);
|
||||
|
||||
const u32 A_LDM = A_Read16 | A_MemWriteback | ak(ak_LDM);
|
||||
const u32 A_LDM = A_Read16 | A_MemWriteback | A_LoadMem | ak(ak_LDM);
|
||||
const u32 A_STM = A_Read16 | A_MemWriteback | A_WriteMem | ak(ak_STM);
|
||||
|
||||
const u32 A_B = A_BranchAlways | ak(ak_B);
|
||||
|
@ -360,6 +361,9 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||
|
||||
if (data & T_WriteMem)
|
||||
res.SpecialKind = special_WriteMem;
|
||||
|
||||
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
|
||||
res.SpecialKind = special_LoadLiteral;
|
||||
|
||||
res.EndBlock |= res.Branches();
|
||||
|
||||
|
@ -377,7 +381,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||
if (data & A_UnkOnARM7 && num != 0)
|
||||
data = A_UNK;
|
||||
|
||||
res.Kind = (data >> 21) & 0x1FF;
|
||||
res.Kind = (data >> 22) & 0x1FF;
|
||||
|
||||
if (res.Kind == ak_MCR)
|
||||
{
|
||||
|
@ -454,12 +458,15 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||
res.ReadFlags |= flag_C;
|
||||
if ((data & A_RRXReadC) && !((instr >> 7) & 0x1F))
|
||||
res.ReadFlags |= flag_C;
|
||||
if ((data & A_SetC) || (data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F))
|
||||
if ((data & A_SetC) || ((data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F)))
|
||||
res.WriteFlags |= flag_C;
|
||||
|
||||
if (data & A_WriteMem)
|
||||
res.SpecialKind = special_WriteMem;
|
||||
|
||||
if ((data & A_LoadMem) && res.SrcRegs == (1 << 15))
|
||||
res.SpecialKind = special_LoadLiteral;
|
||||
|
||||
if ((instr >> 28) < 0xE)
|
||||
{
|
||||
// make non conditional flag sets conditional
|
||||
|
|
|
@ -230,7 +230,8 @@ enum
|
|||
{
|
||||
special_NotSpecialAtAll = 0,
|
||||
special_WriteMem,
|
||||
special_WaitForInterrupt
|
||||
special_WaitForInterrupt,
|
||||
special_LoadLiteral
|
||||
};
|
||||
|
||||
struct Info
|
||||
|
|
Loading…
Reference in New Issue