wip
This commit is contained in:
parent
9f6c3c8d44
commit
5f947655f5
|
@ -76,7 +76,7 @@ u32 m_ram_code_page_count = 0;
|
|||
u8* g_ram = nullptr; // 2MB RAM
|
||||
u32 g_ram_size = 0;
|
||||
u32 g_ram_mask = 0;
|
||||
u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
|
||||
alignas(HOST_PAGE_SIZE) u8 g_bios[BIOS_SIZE]{}; // 512K BIOS ROM
|
||||
|
||||
static std::array<TickCount, 3> m_exp1_access_time = {};
|
||||
static std::array<TickCount, 3> m_exp2_access_time = {};
|
||||
|
@ -315,9 +315,11 @@ static ALWAYS_INLINE u32 FastmemAddressToLUTPageIndex(u32 address)
|
|||
return address >> 12;
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE_RELEASE void SetLUTFastmemPage(u32 address, u8* ptr, bool writable)
|
||||
static ALWAYS_INLINE_RELEASE void SetLUTFastmemPage(u32 address, u8* ptr, bool writable, u32 read_ticks)
|
||||
{
|
||||
m_fastmem_lut[FastmemAddressToLUTPageIndex(address)] = ptr;
|
||||
DebugAssert((reinterpret_cast<uintptr_t>(ptr) & HOST_PAGE_OFFSET_MASK) == 0);
|
||||
m_fastmem_lut[FastmemAddressToLUTPageIndex(address)] =
|
||||
reinterpret_cast<u8*>(reinterpret_cast<uintptr_t>(ptr) | static_cast<uintptr_t>(read_ticks));
|
||||
m_fastmem_lut[FASTMEM_LUT_NUM_PAGES + FastmemAddressToLUTPageIndex(address)] = writable ? ptr : nullptr;
|
||||
}
|
||||
|
||||
|
@ -451,21 +453,25 @@ void UpdateFastmemViews(CPUFastmemMode mode)
|
|||
for (u32 address = 0; address < g_ram_size; address += HOST_PAGE_SIZE)
|
||||
{
|
||||
SetLUTFastmemPage(base_address + address, &g_ram[address],
|
||||
!m_ram_code_bits[FastmemAddressToLUTPageIndex(address)]);
|
||||
!m_ram_code_bits[FastmemAddressToLUTPageIndex(address)], RAM_READ_TICKS);
|
||||
}
|
||||
};
|
||||
|
||||
auto MapScratchpad = [](u32 base_address) { SetLUTFastmemPage(base_address, CPU::g_scratchpad.data(), true, 0); };
|
||||
|
||||
// KUSEG - cached
|
||||
MapRAM(0x00000000);
|
||||
MapRAM(0x00200000);
|
||||
MapRAM(0x00400000);
|
||||
MapRAM(0x00600000);
|
||||
MapScratchpad(0x1F800000);
|
||||
|
||||
// KSEG0 - cached
|
||||
MapRAM(0x80000000);
|
||||
MapRAM(0x80200000);
|
||||
MapRAM(0x80400000);
|
||||
MapRAM(0x80600000);
|
||||
MapScratchpad(0x8F800000);
|
||||
|
||||
// KSEG1 - uncached
|
||||
MapRAM(0xA0000000);
|
||||
|
@ -490,7 +496,7 @@ bool CanUseFastmemForAddress(VirtualMemoryAddress address)
|
|||
#endif
|
||||
|
||||
case CPUFastmemMode::LUT:
|
||||
return (paddr < g_ram_size);
|
||||
return (paddr < g_ram_size) || ((paddr & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION);
|
||||
|
||||
case CPUFastmemMode::Disabled:
|
||||
default:
|
||||
|
@ -548,7 +554,7 @@ void SetCodePageFastmemProtection(u32 page_index, bool writable)
|
|||
// mirrors...
|
||||
const u32 ram_address = page_index * HOST_PAGE_SIZE;
|
||||
for (u32 mirror_start : m_fastmem_ram_mirrors)
|
||||
SetLUTFastmemPage(mirror_start + ram_address, &g_ram[ram_address], writable);
|
||||
SetLUTFastmemPage(mirror_start + ram_address, &g_ram[ram_address], writable, RAM_READ_TICKS);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -576,7 +582,7 @@ void ClearRAMCodePageFlags()
|
|||
{
|
||||
const u32 addr = (i * HOST_PAGE_SIZE);
|
||||
for (u32 mirror_start : m_fastmem_ram_mirrors)
|
||||
SetLUTFastmemPage(mirror_start + addr, &g_ram[addr], true);
|
||||
SetLUTFastmemPage(mirror_start + addr, &g_ram[addr], true, RAM_READ_TICKS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -664,7 +670,7 @@ u8* GetMemoryRegionPointer(MemoryRegion region)
|
|||
return nullptr;
|
||||
|
||||
case MemoryRegion::Scratchpad:
|
||||
return CPU::g_state.dcache.data();
|
||||
return CPU::g_scratchpad.data();
|
||||
|
||||
case MemoryRegion::BIOS:
|
||||
return g_bios;
|
||||
|
@ -1451,30 +1457,30 @@ ALWAYS_INLINE static TickCount DoScratchpadAccess(PhysicalMemoryAddress address,
|
|||
if constexpr (size == MemoryAccessSize::Byte)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
value = ZeroExtend32(g_state.dcache[cache_offset]);
|
||||
value = ZeroExtend32(g_scratchpad[cache_offset]);
|
||||
else
|
||||
g_state.dcache[cache_offset] = Truncate8(value);
|
||||
g_scratchpad[cache_offset] = Truncate8(value);
|
||||
}
|
||||
else if constexpr (size == MemoryAccessSize::HalfWord)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
{
|
||||
u16 temp;
|
||||
std::memcpy(&temp, &g_state.dcache[cache_offset], sizeof(temp));
|
||||
std::memcpy(&temp, &g_scratchpad[cache_offset], sizeof(temp));
|
||||
value = ZeroExtend32(temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
u16 temp = Truncate16(value);
|
||||
std::memcpy(&g_state.dcache[cache_offset], &temp, sizeof(temp));
|
||||
std::memcpy(&g_scratchpad[cache_offset], &temp, sizeof(temp));
|
||||
}
|
||||
}
|
||||
else if constexpr (size == MemoryAccessSize::Word)
|
||||
{
|
||||
if constexpr (type == MemoryAccessType::Read)
|
||||
std::memcpy(&value, &g_state.dcache[cache_offset], sizeof(value));
|
||||
std::memcpy(&value, &g_scratchpad[cache_offset], sizeof(value));
|
||||
else
|
||||
std::memcpy(&g_state.dcache[cache_offset], &value, sizeof(value));
|
||||
std::memcpy(&g_scratchpad[cache_offset], &value, sizeof(value));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1924,7 +1930,7 @@ void* GetDirectReadMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize
|
|||
if (read_ticks)
|
||||
*read_ticks = 0;
|
||||
|
||||
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
|
||||
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
|
||||
}
|
||||
|
||||
if (paddr >= BIOS_BASE && paddr < (BIOS_BASE + BIOS_SIZE))
|
||||
|
@ -1955,7 +1961,7 @@ void* GetDirectWriteMemoryPointer(VirtualMemoryAddress address, MemoryAccessSize
|
|||
#endif
|
||||
|
||||
if ((paddr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||
return &g_state.dcache[paddr & DCACHE_OFFSET_MASK];
|
||||
return &g_scratchpad[paddr & DCACHE_OFFSET_MASK];
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ static T DoMemoryRead(PhysicalMemoryAddress address)
|
|||
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
||||
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
||||
{
|
||||
std::memcpy(&result, &CPU::g_state.dcache[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
||||
std::memcpy(&result, &CPU::g_scratchpad[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,7 @@ static void DoMemoryWrite(PhysicalMemoryAddress address, T value)
|
|||
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
||||
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
||||
{
|
||||
std::memcpy(&CPU::g_state.dcache[address & CPU::DCACHE_OFFSET_MASK], &value, sizeof(value));
|
||||
std::memcpy(&CPU::g_scratchpad[address & CPU::DCACHE_OFFSET_MASK], &value, sizeof(value));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ static void Branch(u32 target);
|
|||
static void FlushPipeline();
|
||||
|
||||
State g_state;
|
||||
alignas(HOST_PAGE_SIZE) std::array<u8, DCACHE_SIZE> g_scratchpad;
|
||||
bool g_using_interpreter = false;
|
||||
bool TRACE_EXECUTION = false;
|
||||
|
||||
|
@ -164,7 +165,7 @@ bool DoState(StateWrapper& sw)
|
|||
sw.Do(&g_state.next_load_delay_reg);
|
||||
sw.Do(&g_state.next_load_delay_value);
|
||||
sw.Do(&g_state.cache_control.bits);
|
||||
sw.DoBytes(g_state.dcache.data(), g_state.dcache.size());
|
||||
sw.DoBytes(g_scratchpad.data(), g_scratchpad.size());
|
||||
|
||||
if (!GTE::DoState(sw))
|
||||
return false;
|
||||
|
|
|
@ -81,7 +81,6 @@ struct State
|
|||
u8* fastmem_base = nullptr;
|
||||
|
||||
// data cache (used as scratchpad)
|
||||
std::array<u8, DCACHE_SIZE> dcache = {};
|
||||
std::array<u32, ICACHE_LINES> icache_tags = {};
|
||||
std::array<u8, ICACHE_SIZE> icache_data = {};
|
||||
|
||||
|
@ -90,6 +89,7 @@ struct State
|
|||
};
|
||||
|
||||
extern State g_state;
|
||||
extern std::array<u8, DCACHE_SIZE> g_scratchpad;
|
||||
extern bool g_using_interpreter;
|
||||
|
||||
void Initialize();
|
||||
|
|
|
@ -2868,7 +2868,7 @@ CodeGenerator::SpeculativeValue CodeGenerator::SpeculativeReadMemory(VirtualMemo
|
|||
if ((phys_addr & DCACHE_LOCATION_MASK) == DCACHE_LOCATION)
|
||||
{
|
||||
u32 scratchpad_offset = phys_addr & DCACHE_OFFSET_MASK;
|
||||
std::memcpy(&value, &CPU::g_state.dcache[scratchpad_offset], sizeof(value));
|
||||
std::memcpy(&value, &CPU::g_scratchpad[scratchpad_offset], sizeof(value));
|
||||
return value;
|
||||
}
|
||||
|
||||
|
|
|
@ -1203,9 +1203,10 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
}
|
||||
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->ubfx(GetHostReg32(RARG2), GetHostReg32(address_reg), 0, 12); // offset = addr & 0xfff
|
||||
m_emit->ldr(GetHostReg32(RARG1),
|
||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
m_emit->bic(GetHostReg32(RARG1), GetHostReg32(RARG1), 0xFF); // ptr &= ~cycles
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1249,10 +1250,12 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
address_reg = address.host_reg;
|
||||
}
|
||||
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12); // page = addr >> 12
|
||||
m_emit->ubfx(GetHostReg32(RARG2), GetHostReg32(address_reg), 0, 12); // offset = addr & 0xfff
|
||||
m_emit->ldr(GetHostReg32(RARG1),
|
||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
m_emit->and_(GetHostReg32(RSCRATCH), GetHostReg32(RARG1), 0xFF); // scratch = ptr & cycles
|
||||
m_emit->bic(GetHostReg32(RARG1), GetHostReg32(RARG1), 0xFF); // ptr &= ~cycles
|
||||
|
||||
m_register_cache.InhibitAllocation();
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
|
@ -1285,16 +1288,8 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// restore fastmem base state for the next instruction
|
||||
if (old_store_fastmem_base)
|
||||
fastmem_base = GetFastmemStoreBase();
|
||||
|
@ -1409,7 +1404,7 @@ void CodeGenerator::EmitStoreGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
// TODO: if this gets backpatched, these instructions are wasted
|
||||
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->ubfx(GetHostReg32(RARG2), GetHostReg32(address_reg), 0, 12);
|
||||
m_emit->ldr(GetHostReg32(RARG1),
|
||||
a32::MemOperand(GetHostReg32(fastmem_base), GetHostReg32(RARG1), a32::LSL, 2)); // pointer load
|
||||
|
||||
|
|
|
@ -1368,6 +1368,7 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
||||
m_emit->and_(GetHostReg64(RARG1), GetHostReg64(RARG1), ~static_cast<u64>(HOST_PAGE_OFFSET_MASK));
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1434,12 +1435,37 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_emit->lsr(GetHostReg32(RARG1), GetHostReg32(address_reg), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), GetHostReg32(address_reg), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->ldr(GetHostReg64(RARG1), a64::MemOperand(GetFastmemBasePtrReg(), GetHostReg32(RARG1), a64::LSL, 3));
|
||||
m_emit->and_(GetHostReg64(RARG3), GetHostReg64(RARG1), static_cast<u64>(HOST_PAGE_OFFSET_MASK));
|
||||
m_emit->and_(GetHostReg64(RARG1), GetHostReg64(RARG1), ~static_cast<u64>(HOST_PAGE_OFFSET_MASK));
|
||||
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
|
@ -1461,29 +1487,24 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
UnreachableCode();
|
||||
break;
|
||||
}
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromHostReg(&m_register_cache, RARG3, RegSize_32));
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
}
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
EmitBranch(GetCurrentNearCodePointer(), false);
|
||||
|
||||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
m_block->loadstore_backpatch_info.push_back(bpi);
|
||||
|
|
|
@ -1838,6 +1838,7 @@ void CodeGenerator::EmitLoadGuestRAMFastmem(const Value& address, RegSize size,
|
|||
m_emit->shr(GetHostReg32(RARG1), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
||||
m_emit->and_(GetHostReg64(RARG1), ~static_cast<u32>(HOST_PAGE_OFFSET_MASK));
|
||||
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1926,6 +1927,35 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
m_far_emitter.align(16);
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1937,6 +1967,9 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->shr(GetHostReg32(RARG1), 12);
|
||||
m_emit->and_(GetHostReg32(RARG2), HOST_PAGE_OFFSET_MASK);
|
||||
m_emit->mov(GetHostReg64(RARG1), m_emit->qword[GetFastmemBasePtrReg() + GetHostReg64(RARG1) * 8]);
|
||||
m_emit->mov(GetHostReg32(RARG3), GetHostReg32(RARG1));
|
||||
m_emit->and_(GetHostReg64(RARG1), ~static_cast<u32>(HOST_PAGE_OFFSET_MASK));
|
||||
m_emit->and_(GetHostReg32(RARG3), static_cast<u32>(HOST_PAGE_OFFSET_MASK));
|
||||
bpi.host_pc = GetCurrentNearCodePointer();
|
||||
|
||||
switch (size)
|
||||
|
@ -1953,36 +1986,30 @@ void CodeGenerator::EmitLoadGuestMemoryFastmem(const CodeBlockInstruction& cbi,
|
|||
m_emit->mov(GetHostReg32(result.host_reg), m_emit->dword[GetHostReg64(RARG1) + GetHostReg64(RARG2)]);
|
||||
break;
|
||||
}
|
||||
|
||||
m_emit->add(m_emit->dword[GetHostReg64(RCPUPTR) + offsetof(State, pending_ticks)], GetHostReg32(RARG3));
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
m_far_emitter.align(16);
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
}
|
||||
|
||||
// insert nops, we need at least 5 bytes for a relative jump
|
||||
const u32 fastmem_size =
|
||||
static_cast<u32>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc));
|
||||
const u32 nops = (fastmem_size < 5 ? 5 - fastmem_size : 0);
|
||||
for (u32 i = 0; i < nops; i++)
|
||||
m_emit->nop();
|
||||
|
||||
bpi.host_code_size = static_cast<u32>(
|
||||
static_cast<ptrdiff_t>(static_cast<u8*>(GetCurrentNearCodePointer()) - static_cast<u8*>(bpi.host_pc)));
|
||||
|
||||
// generate slowmem fallback
|
||||
m_far_emitter.align(16);
|
||||
bpi.host_slowmem_pc = GetCurrentFarCodePointer();
|
||||
SwitchToFarCode();
|
||||
|
||||
// we add the ticks *after* the add here, since we counted incorrectly, then correct for it below
|
||||
DebugAssert(m_delayed_cycles_add > 0);
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks), Value::FromConstantU32(static_cast<u32>(m_delayed_cycles_add)));
|
||||
m_delayed_cycles_add += Bus::RAM_READ_TICKS;
|
||||
|
||||
EmitLoadGuestMemorySlowmem(cbi, address, size, result, true);
|
||||
|
||||
EmitAddCPUStructField(offsetof(State, pending_ticks),
|
||||
Value::FromConstantU32(static_cast<u32>(-m_delayed_cycles_add)));
|
||||
|
||||
// return to the block code
|
||||
m_emit->jmp(GetCurrentNearCodePointer());
|
||||
|
||||
SwitchToNearCode();
|
||||
m_register_cache.UninhibitAllocation();
|
||||
|
||||
|
|
|
@ -1401,7 +1401,7 @@ static T DoMemoryRead(PhysicalMemoryAddress address)
|
|||
if ((address & CPU::DCACHE_LOCATION_MASK) == CPU::DCACHE_LOCATION &&
|
||||
(address & CPU::DCACHE_OFFSET_MASK) < CPU::DCACHE_SIZE)
|
||||
{
|
||||
std::memcpy(&result, &CPU::g_state.dcache[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
||||
std::memcpy(&result, &CPU::g_scratchpad[address & CPU::DCACHE_OFFSET_MASK], sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue