wip
This commit is contained in:
parent
9f6c3c8d44
commit
5f947655f5
|
@ -76,7 +76,7 @@ u32 m_ram_code_page_count = 0;
|
||||||
u8* g_ram = nullptr; // 2MB RAM
|
u8* g_ram = nullptr; // 2MB RAM
|
||||||
u32 g_ram_size = 0;
|
u32 g_ram_size = 0;
|
||||||
u32 g_ram_mask = 0;
|
u32 g_ram_mask = 0;
|
||||||
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
|
alignas(HOST_PAGE_SIZE) u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
|
||||||
|
|
||||||
static std::array<TickCount, 3> m_exp1_access_time = {};
|
static std::array<TickCount, 3> m_exp1_access_time = {};
|
||||||
static std::array<TickCount, 3> m_exp2_access_time = {};
|
static std::array<TickCount, 3> m_exp2_access_time = {};
|
||||||
|
@ -315,9 +315,11 @@ static ALWAYS_INLINE u32 FastmemAddressToLUTPageIndex(u32 address)
|
||||||
return address >> 12;
|
return address >> 12;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ALWAYS_INLINE_RELEASE void SetLUTFastmemPage(u32 address, u8* ptr, bool writable)
|
static ALWAYS_INLINE_RELEASE void SetLUTFastmemPage(u32 address, u8* ptr, bool writable, u32 read_ticks)
|
||||||
{
|
{
|
||||||
m_fastmem_lut[FastmemAddressToLUTPageIndex(address)] = ptr;
|
DebugAssert((reinterpret_cast<uintptr_t>(ptr) & HOST_PAGE_OFFSET_MASK) == 0);
|
||||||
|
m_fastmem_lut[FastmemAddressToLUTPageIndex(address)] =
|
||||||
|
reinterpret_cast<u8*>(reinterpret_cast<uintptr_t>(ptr) | static_cast<uintptr_t>(read_ticks));
|
||||||
m_fastmem_lut[FASTMEM_LUT_NUM_PAGES + FastmemAddressToLUTPageIndex(address)] = writable ? ptr : nullptr;
|
m_fastmem_lut[FASTMEM_LUT_NUM_PAGES + FastmemAddressToLUTPageIndex(address)] = writable ? ptr : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -451,21 +453,25 @@ void UpdateFastmemViews(CPUFastmemMode mode)
|
||||||
for (u32 address = 0; address < g_ram_size; address += HOST_PAGE_SIZE)
|
for (u32 address = 0; address < g_ram_size; address += HOST_PAGE_SIZE)
|
||||||
{
|
{
|
||||||
SetLUTFastmemPage(base_address + address, &g_ram[address],
|
SetLUTFastmemPage(base_address + address, &g_ram[address],
|
||||||
!m_ram_code_bits[FastmemAddressToLUTPageIndex(address)]);
|
!m_ram_code_bits[FastmemAddressToLUTPageIndex(address)], RAM_READ_TICKS);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
auto MapScratchpad = [](u32 base_address) { SetLUTFastmemPage(base_address, CPU::g_scratchpad.data(), true, 0); };
|
||||||
|
|
||||||
// KUSEG - cached
|
// KUSEG - cached
|
||||||
MapRAM(0x00000000);
|
MapRAM(0x00000000);
|
||||||
MapRAM(0x00200000);
|
MapRAM(0x00200000);
|
||||||
MapRAM(0x00400000);
|
MapRAM(0x00400000);
|
||||||
MapRAM(0x00600000);
|
MapRAM(0x00600000);
|
||||||
|
MapScratchpad(0x1F800000);
|
||||||
|
|
||||||
// KSEG0 - cached
|
// KSEG0 - cached
|
||||||
MapRAM(0x80000000);
|
MapRAM(0x80000000);
|
||||||
MapRAM(0x80200000);
|
MapRAM(0x80200000);
|
||||||
MapRAM(0x80400000);
|
MapRAM(0x80400000);
|
||||||
MapRAM(0x80600000);
|
MapRAM(0x80600000);
|
||||||
|
MapScratchpad(0x8F800000);
|
||||||
|
|
||||||
// KSEG1 - uncached
|
// KSEG1 - uncached
|
||||||
MapRAM(0xA0000000);
|
MapRAM(0xA0000000);
|
||||||
|
@ -490,7 +496,7 @@ bool CanUseFastmemForAddress(VirtualMemoryAddress address)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
case CPUFastmemMode::LUT:
|
case CPUFastmemMode::LUT:
|
||||||
return (paddr < g_ram_size);
|
return (paddr < g_ram_size) || ((paddr & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION);
|
||||||
|
|
||||||
case CPUFastmemMode::Disabled:
|
case CPUFastmemMode::Disabled:
|
||||||
default:
|
default:
|
||||||
|
@ -548,7 +554,7 @@ void SetCodePageFastmemProtection(u32 page_index, bool writable)
|
||||||
// mirrors...
|
// mirrors...
|
||||||
const u32 ram_address = page_index * HOST_PAGE_SIZE;
|
const u32 ram_address = page_index * HOST_PAGE_SIZE;
|
||||||
for (u32 mirror_start : m_fastmem_ram_mirrors)
|
for (u32 mirror_start : m_fastmem_ram_mirrors)
|
||||||
SetLUTFastmemPage(mirror_start + ram_address, &g_ram[ram_address], writable);
|
SetLUTFastmemPage(mirror_start + ram_address, &g_ram[ram_address], writable, RAM_READ_TICKS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -576,7 +582,7 @@ void ClearRAMCodePageFlags()
|
||||||
{
|
{
|
||||||
const u32 addr = (i * HOST_PAGE_SIZE);
|
const u32 addr = (i * HOST_PAGE_SIZE);
|
||||||
for (u32 mirror_start : m_fastmem_ram_mirrors)
|
for (u32 mirror_start : m_fastmem_ram_mirrors)
|
||||||
SetLUTFastmemPage(mirror_start + addr, &g_ram[addr], true);
|
SetLUTFastmemPage(mirror_start + addr, &g_ram[addr], true, RAM_READ_TICKS);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -664,7 +670,7 @@ u8* GetMemoryRegionPointer(MemoryRegion region)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
case MemoryRegion::Scratchpad:
|
case MemoryRegion::Scratchpad:
|
||||||
return CPU::g_state.dcache.data();
|
return CPU::g_scratchpad.data();
|
||||||
|
|
||||||
case MemoryRegion::BIOS:
|
case MemoryRegion::BIOS:
|
||||||
return g_bios;
|
return g_bios;
|
||||||
|
@ -1451,30 +1457,30 @@ ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address,
|
||||||
if constexpr (size == MemoryAccessSize::Byte)
|
if constexpr (size == MemoryAccessSize::Byte)
|
||||||
{
|
{
|
||||||
if constexpr (type == MemoryAccessType::Read)
|
if constexpr (type == MemoryAccessType::Read)
|
||||||
value = ZeroExtend32(g_state.dcache[cache_offset]);
|
value = ZeroExtend32(g_scratchpad[cache_offset]);
|
||||||
else
|
else
|
||||||
g_state.dcache[cache_offset] = Truncate8(value);
|
g_scratchpad[cache_offset] = Truncate8(value);
|
||||||
}
|
}
|
||||||
else if constexpr (size == MemoryAccessSize::HalfWord)
|
else if constexpr (size == MemoryAccessSize::HalfWord)
|
||||||
{
|
{
|
||||||
if constexpr (type == MemoryAccessType::Read)
|
if constexpr (type == MemoryAccessType::Read)
|
||||||
{
|
{
|
||||||
u16 temp;
|
u16 temp;
|
||||||
std::memcpy(&temp, &g_state.dcache[cache_offset], sizeof(temp));
|
std::memcpy(&temp, &g_scratchpad[cache_offset], sizeof(temp));
|
||||||
value = ZeroExtend32(temp);
|
value = ZeroExtend32(temp);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
u16 temp = Truncate16(value);
|
u16 temp = Truncate16(value);
|
||||||
std::memcpy(&g_state.dcache[cache_offset], &temp, sizeof(temp));
|
std::memcpy(&g_scratchpad[cache_offset], &temp, sizeof(temp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if constexpr (size == MemoryAccessSize::Word)
|
else if constexpr (size == MemoryAccessSize::Word)
|
||||||
{
|
{
|
||||||
if constexpr (type == MemoryAccessType::Read)
|
if constexpr (type == MemoryAccessType::Read)
|
||||||
std::memcpy(&value, &g_state.dcache[cache_offset], sizeof(value));
|
std::memcpy(&value, &g_scratchpad[cache_offset], sizeof(value));
|
||||||
else
|
else
|
||||||
std::memcpy(&g_state.dcache[cache_offset], &value, sizeof(value));
|
std::memcpy(&g_scratchpad[cache_offset], &value, sizeof(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1924,7 +1930,7 @@ void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize
|
||||||
if (read_ticks)
|
if (read_ticks)
|
||||||
*read_ticks = 0;
|
*read_ticks = 0;
|
||||||
|
|
||||||
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
|
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
|
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
|
||||||
|
@ -1955,7 +1961,7 @@ void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||||
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
|
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ static T DoMemoryRead(PhysicalMemoryAddress address)
|
||||||
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
||||||
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
||||||
{
|
{
|
||||||
std::memcpy(&result, &CPU::g_state.dcache[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
std::memcpy(&result, &CPU::g_scratchpad[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ static void DoMemoryWrite(PhysicalMemoryAddress address, T value)
|
||||||
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
||||||
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
||||||
{
|
{
|
||||||
std::memcpy(&CPU::g_state.dcache[address & CPU::DCACHE_OFFSET_MASK], &value, sizeof(value));
|
std::memcpy(&CPU::g_scratchpad[address & CPU::DCACHE_OFFSET_MASK], &value, sizeof(value));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,7 @@ static void Branch(u32 target);
|
||||||
static void FlushPipeline();
|
static void FlushPipeline();
|
||||||
|
|
||||||
State g_state;
|
State g_state;
|
||||||
|
alignas(HOST_PAGE_SIZE) std::array<u8, DCACHE_SIZE> g_scratchpad;
|
||||||
bool g_using_interpreter = false;
|
bool g_using_interpreter = false;
|
||||||
bool TRACE_EXECUTION = false;
|
bool TRACE_EXECUTION = false;
|
||||||
|
|
||||||
|
@ -164,7 +165,7 @@ bool DoState(StateWrapper& sw)
|
||||||
sw.Do(&g_state.next_load_delay_reg);
|
sw.Do(&g_state.next_load_delay_reg);
|
||||||
sw.Do(&g_state.next_load_delay_value);
|
sw.Do(&g_state.next_load_delay_value);
|
||||||
sw.Do(&g_state.cache_control.bits);
|
sw.Do(&g_state.cache_control.bits);
|
||||||
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
|
sw.DoBytes(g_scratchpad.data(), g_scratchpad.size());
|
||||||
|
|
||||||
if (!GTE::DoState(sw))
|
if (!GTE::DoState(sw))
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -81,7 +81,6 @@ struct State
|
||||||
u8* fastmem_base = nullptr;
|
u8* fastmem_base = nullptr;
|
||||||
|
|
||||||
// data cache (used as scratchpad)
|
// data cache (used as scratchpad)
|
||||||
std::array<u8, DCACHE_SIZE> dcache = {};
|
|
||||||
std::array<u32, ICACHE_LINES> icache_tags = {};
|
std::array<u32, ICACHE_LINES> icache_tags = {};
|
||||||
std::array<u8, ICACHE_SIZE> icache_data = {};
|
std::array<u8, ICACHE_SIZE> icache_data = {};
|
||||||
|
|
||||||
|
@ -90,6 +89,7 @@ struct State
|
||||||
};
|
};
|
||||||
|
|
||||||
extern State g_state;
|
extern State g_state;
|
||||||
|
extern std::array<u8, DCACHE_SIZE> g_scratchpad;
|
||||||
extern bool g_using_interpreter;
|
extern bool g_using_interpreter;
|
||||||
|
|
||||||
void Initialize();
|
void Initialize();
|
||||||
|
|
|
@ -2868,7 +2868,7 @@ CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadMemory(VirtualMemo
|
||||||
if ((phys_addr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
if ((phys_addr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||||
{
|
{
|
||||||
u32 scratchpad_offset = phys_addr & DCACHE_OFFSET_MASK;
|
u32 scratchpad_offset = phys_addr & DCACHE_OFFSET_MASK;
|
||||||
std::memcpy(&value, &CPU::g_state.dcache[scratchpad_offset], sizeof(value));
|
std::memcpy(&value, &CPU::g_scratchpad[scratchpad_offset], sizeof(value));
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1203,9 +1203,10 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
||||||
}
|
}
|
||||||
|
|
||||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
m_emit->ubfx(GetHostReg32(RARG2), GetHostReg32(address_reg), 0, 12); // offset = addr & 0xfff
|
||||||
m_emit->ldr(GetHostReg32(RARG1),
|
m_emit->ldr(GetHostReg32(RARG1),
|
||||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||||
|
m_emit->bic(GetHostReg32(RARG1), GetHostReg32(RARG1), 0xFF); // ptr &= ~cycles
|
||||||
|
|
||||||
switch (size)
|
switch (size)
|
||||||
{
|
{
|
||||||
|
@ -1249,10 +1250,12 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
address_reg = address.host_reg;
|
address_reg = address.host_reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); // page = addr >> 12
|
||||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
m_emit->ubfx(GetHostReg32(RARG2), GetHostReg32(address_reg), 0, 12); // offset = addr & 0xfff
|
||||||
m_emit->ldr(GetHostReg32(RARG1),
|
m_emit->ldr(GetHostReg32(RARG1),
|
||||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||||
|
m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(RARG1), 0xFF); // scratch = ptr & cycles
|
||||||
|
m_emit->bic(GetHostReg32(RARG1), GetHostReg32(RARG1), 0xFF); // ptr &= ~cycles
|
||||||
|
|
||||||
m_register_cache.InhibitAllocation();
|
m_register_cache.InhibitAllocation();
|
||||||
bpi.host_pc = GetCurrentNearCodePointer();
|
bpi.host_pc = GetCurrentNearCodePointer();
|
||||||
|
@ -1285,16 +1288,8 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
|
||||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
|
||||||
DebugAssert(m_delayed_cycles_add > 0);
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
|
||||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
|
||||||
|
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
|
||||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
|
||||||
|
|
||||||
// restore fastmem base state for the next instruction
|
// restore fastmem base state for the next instruction
|
||||||
if (old_store_fastmem_base)
|
if (old_store_fastmem_base)
|
||||||
fastmem_base = GetFastmemStoreBase();
|
fastmem_base = GetFastmemStoreBase();
|
||||||
|
@ -1409,7 +1404,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
// TODO: if this gets backpatched, these instructions are wasted
|
// TODO: if this gets backpatched, these instructions are wasted
|
||||||
|
|
||||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
m_emit->ubfx(GetHostReg32(RARG2), GetHostReg32(address_reg), 0, 12);
|
||||||
m_emit->ldr(GetHostReg32(RARG1),
|
m_emit->ldr(GetHostReg32(RARG1),
|
||||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||||
|
|
||||||
|
|
|
@ -1368,6 +1368,7 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
||||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||||
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
||||||
|
m_emit->and_(GetHostReg64(RARG1), GetHostReg64(RARG1), ~static_cast<u64>(HOST_PAGE_OFFSET_MASK));
|
||||||
|
|
||||||
switch (size)
|
switch (size)
|
||||||
{
|
{
|
||||||
|
@ -1434,12 +1435,37 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
UnreachableCode();
|
UnreachableCode();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bpi.host_code_size = static_cast<u32>(
|
||||||
|
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||||
|
|
||||||
|
// generate slowmem fallback
|
||||||
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||||
|
|
||||||
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
|
// return to the block code
|
||||||
|
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||||
|
|
||||||
|
SwitchToNearCode();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||||
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
||||||
|
m_emit->and_(GetHostReg64(RARG3), GetHostReg64(RARG1), static_cast<u64>(HOST_PAGE_OFFSET_MASK));
|
||||||
|
m_emit->and_(GetHostReg64(RARG1), GetHostReg64(RARG1), ~static_cast<u64>(HOST_PAGE_OFFSET_MASK));
|
||||||
|
|
||||||
bpi.host_pc = GetCurrentNearCodePointer();
|
bpi.host_pc = GetCurrentNearCodePointer();
|
||||||
|
|
||||||
|
@ -1461,29 +1487,24 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
UnreachableCode();
|
UnreachableCode();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromHostReg(&m_register_cache, RARG3, RegSize_32));
|
||||||
|
|
||||||
|
bpi.host_code_size = static_cast<u32>(
|
||||||
|
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||||
|
|
||||||
|
// generate slowmem fallback
|
||||||
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
// return to the block code
|
||||||
|
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||||
|
|
||||||
|
SwitchToNearCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
bpi.host_code_size = static_cast<u32>(
|
|
||||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
|
||||||
|
|
||||||
// generate slowmem fallback
|
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
|
||||||
SwitchToFarCode();
|
|
||||||
|
|
||||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
|
||||||
DebugAssert(m_delayed_cycles_add > 0);
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
|
||||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
|
||||||
|
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
|
||||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
|
||||||
|
|
||||||
// return to the block code
|
|
||||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
|
||||||
|
|
||||||
SwitchToNearCode();
|
|
||||||
m_register_cache.UninhibitAllocation();
|
m_register_cache.UninhibitAllocation();
|
||||||
|
|
||||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||||
|
|
|
@ -1838,6 +1838,7 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
||||||
m_emit->shr(GetHostReg32(RARG1), 12);
|
m_emit->shr(GetHostReg32(RARG1), 12);
|
||||||
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
|
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
|
||||||
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
||||||
|
m_emit->and_(GetHostReg64(RARG1), ~static_cast<u32>(HOST_PAGE_OFFSET_MASK));
|
||||||
|
|
||||||
switch (size)
|
switch (size)
|
||||||
{
|
{
|
||||||
|
@ -1926,6 +1927,35 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// insert nops, we need at least 5 bytes for a relative jump
|
||||||
|
const u32 fastmem_size =
|
||||||
|
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||||
|
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||||
|
for (u32 i = 0; i < nops; i++)
|
||||||
|
m_emit->nop();
|
||||||
|
|
||||||
|
bpi.host_code_size = static_cast<u32>(
|
||||||
|
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||||
|
|
||||||
|
// generate slowmem fallback
|
||||||
|
m_far_emitter.align(16);
|
||||||
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||||
|
DebugAssert(m_delayed_cycles_add > 0);
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||||
|
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||||
|
|
||||||
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||||
|
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||||
|
|
||||||
|
// return to the block code
|
||||||
|
m_emit->jmp(GetCurrentNearCodePointer());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1937,6 +1967,9 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
m_emit->shr(GetHostReg32(RARG1), 12);
|
m_emit->shr(GetHostReg32(RARG1), 12);
|
||||||
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
|
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
|
||||||
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
||||||
|
m_emit->mov(GetHostReg32(RARG3), GetHostReg32(RARG1));
|
||||||
|
m_emit->and_(GetHostReg64(RARG1), ~static_cast<u32>(HOST_PAGE_OFFSET_MASK));
|
||||||
|
m_emit->and_(GetHostReg32(RARG3), static_cast<u32>(HOST_PAGE_OFFSET_MASK));
|
||||||
bpi.host_pc = GetCurrentNearCodePointer();
|
bpi.host_pc = GetCurrentNearCodePointer();
|
||||||
|
|
||||||
switch (size)
|
switch (size)
|
||||||
|
@ -1953,36 +1986,30 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
||||||
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
|
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_emit->add(m_emit->dword[GetHostReg64(RCPUPTR) + offsetof(State, pending_ticks)], GetHostReg32(RARG3));
|
||||||
|
|
||||||
|
// insert nops, we need at least 5 bytes for a relative jump
|
||||||
|
const u32 fastmem_size =
|
||||||
|
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||||
|
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||||
|
for (u32 i = 0; i < nops; i++)
|
||||||
|
m_emit->nop();
|
||||||
|
|
||||||
|
bpi.host_code_size = static_cast<u32>(
|
||||||
|
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||||
|
|
||||||
|
// generate slowmem fallback
|
||||||
|
m_far_emitter.align(16);
|
||||||
|
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||||
|
SwitchToFarCode();
|
||||||
|
|
||||||
|
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||||
|
|
||||||
|
// return to the block code
|
||||||
|
m_emit->jmp(GetCurrentNearCodePointer());
|
||||||
}
|
}
|
||||||
|
|
||||||
// insert nops, we need at least 5 bytes for a relative jump
|
|
||||||
const u32 fastmem_size =
|
|
||||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
|
||||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
|
||||||
for (u32 i = 0; i < nops; i++)
|
|
||||||
m_emit->nop();
|
|
||||||
|
|
||||||
bpi.host_code_size = static_cast<u32>(
|
|
||||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
|
||||||
|
|
||||||
// generate slowmem fallback
|
|
||||||
m_far_emitter.align(16);
|
|
||||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
|
||||||
SwitchToFarCode();
|
|
||||||
|
|
||||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
|
||||||
DebugAssert(m_delayed_cycles_add > 0);
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
|
||||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
|
||||||
|
|
||||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
|
||||||
|
|
||||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
|
||||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
|
||||||
|
|
||||||
// return to the block code
|
|
||||||
m_emit->jmp(GetCurrentNearCodePointer());
|
|
||||||
|
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
m_register_cache.UninhibitAllocation();
|
m_register_cache.UninhibitAllocation();
|
||||||
|
|
||||||
|
|
|
@ -1401,7 +1401,7 @@ static T DoMemoryRead(PhysicalMemoryAddress address)
|
||||||
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
||||||
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
||||||
{
|
{
|
||||||
std::memcpy(&result, &CPU::g_state.dcache[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
std::memcpy(&result, &CPU::g_scratchpad[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue