wince: 32-bit virtual mem space
use fast mem read/write for x64 and arm64 dynarecs
This commit is contained in:
parent
693a6c97f3
commit
810b8a59da
|
@ -288,6 +288,10 @@
|
|||
#define FEAT_HAS_SOFTREND BUILD_COMPILER == COMPILER_VC //GCC wants us to enable sse4 globaly to enable intrins
|
||||
#endif
|
||||
|
||||
#if HOST_CPU == CPU_X64 || HOST_CPU == CPU_ARM64
|
||||
#define HOST_64BIT_CPU
|
||||
#endif
|
||||
|
||||
#define RAM_SIZE_MAX (32*1024*1024)
|
||||
#define VRAM_SIZE_MAX (16*1024*1024)
|
||||
#define ARAM_SIZE_MAX (8*1024*1024)
|
||||
|
|
|
@ -0,0 +1,396 @@
|
|||
/*
|
||||
* vmem32.cpp
|
||||
*
|
||||
* Created on: Apr 11, 2019
|
||||
* Author: Flyinghead
|
||||
*/
|
||||
#include <unordered_set>
|
||||
#include "build.h"
|
||||
#include "vmem32.h"
|
||||
#include "_vmem.h"
|
||||
|
||||
#if HOST_OS == OS_WINDOWS
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h> /* For mode constants */
|
||||
#include <fcntl.h> /* For O_* constants */
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#ifdef _ANDROID
|
||||
#include <linux/ashmem.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MAP_NOSYNC
|
||||
#define MAP_NOSYNC 0
|
||||
#endif
|
||||
|
||||
#include "types.h"
|
||||
#include "hw/sh4/dyna/ngen.h"
|
||||
#include "hw/sh4/modules/mmu.h"
|
||||
|
||||
extern bool VramLockedWriteOffset(size_t offset);
|
||||
extern cMutex vramlist_lock;
|
||||
|
||||
#if HOST_OS == OS_WINDOWS
|
||||
extern HANDLE mem_handle;
|
||||
#else
|
||||
extern int vmem_fd;
|
||||
#endif
|
||||
|
||||
#define VMEM32_ERROR_NOT_MAPPED 0x100
|
||||
|
||||
// FIXME stolen from _vmem.cpp
|
||||
#define MAP_RAM_START_OFFSET 0
|
||||
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)
|
||||
#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE)
|
||||
|
||||
static const u64 VMEM32_SIZE = 0x100000000L;
|
||||
static const u64 KERNEL_SPACE = 0x80000000L;
|
||||
static const u64 AREA7_ADDRESS = 0x7C000000L;
|
||||
|
||||
#define VRAM_PROT_SEGMENT (1024 * 1024) // vram protection regions are grouped by 1MB segment
|
||||
|
||||
u8* vmem32_base;
|
||||
unordered_set<u32> vram_mapped_pages;
|
||||
vector<vram_block*> vram_blocks[VRAM_SIZE / VRAM_PROT_SEGMENT];
|
||||
|
||||
// stats
|
||||
u64 vmem32_page_faults;
|
||||
u64 vmem32_flush;
|
||||
|
||||
static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool write)
|
||||
{
|
||||
void* ptr;
|
||||
void* rv;
|
||||
|
||||
//printf("MAP32 %08X w/ %d\n",dst,offset);
|
||||
u32 map_times = addrsz / size;
|
||||
#if HOST_OS == OS_WINDOWS
|
||||
rv = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &vmem32_base[dst]);
|
||||
if (rv == NULL)
|
||||
return NULL;
|
||||
|
||||
for (u32 i = 1; i < map_times; i++)
|
||||
{
|
||||
dst += size;
|
||||
ptr = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &vmem32_base[dst]);
|
||||
if (ptr == NULL)
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
u32 prot = PROT_READ | (write ? PROT_WRITE : 0);
|
||||
rv = mmap(&vmem32_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
|
||||
if (MAP_FAILED == rv)
|
||||
{
|
||||
printf("MAP1 failed %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (u32 i = 1; i < map_times; i++)
|
||||
{
|
||||
dst += size;
|
||||
ptr = mmap(&vmem32_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
|
||||
if (MAP_FAILED == ptr)
|
||||
{
|
||||
printf("MAP2 failed %d\n", errno);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return rv;
|
||||
}
|
||||
|
||||
static void vmem32_unmap_buffer(u32 start, u64 end)
|
||||
{
|
||||
#if HOST_OS == OS_LINUX
|
||||
mmap(&vmem32_base[start], end - start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
#elif HOST_OS == OS_WINDOWS
|
||||
VirtualAlloc(&vmem32_base[start], end - start, MEM_RESERVE, PAGE_NOACCESS);
|
||||
#else
|
||||
#error Unsupported OS
|
||||
#endif
|
||||
}
|
||||
|
||||
static void vmem32_protect_buffer(u32 start, u32 size)
|
||||
{
|
||||
verify((start & PAGE_MASK) == 0);
|
||||
#if HOST_OS == OS_LINUX
|
||||
mprotect(&vmem32_base[start], size, PROT_READ);
|
||||
#elif HOST_OS == OS_WINDOWS
|
||||
DWORD old;
|
||||
VirtualProtect(vmem32_base + start, end - start, PAGE_READONLY, &old);
|
||||
#else
|
||||
#error Unsupported OS
|
||||
#endif
|
||||
}
|
||||
|
||||
static void vmem32_unprotect_buffer(u32 start, u32 size)
|
||||
{
|
||||
verify((start & PAGE_MASK) == 0);
|
||||
#if HOST_OS == OS_LINUX
|
||||
mprotect(&vmem32_base[start], size, PROT_READ | PROT_WRITE);
|
||||
#elif HOST_OS == OS_WINDOWS
|
||||
DWORD old;
|
||||
VirtualProtect(vmem32_base + start, end - start, PAGE_READWRITE, &old);
|
||||
#else
|
||||
#error Unsupported OS
|
||||
#endif
|
||||
}
|
||||
|
||||
void vmem32_protect_vram(vram_block *block)
|
||||
{
|
||||
if (vmem32_base == NULL)
|
||||
return;
|
||||
for (int i = block->start / VRAM_PROT_SEGMENT; i <= block->end / VRAM_PROT_SEGMENT; i++)
|
||||
{
|
||||
vram_blocks[i].push_back(block);
|
||||
}
|
||||
}
|
||||
void vmem32_unprotect_vram(vram_block *block)
|
||||
{
|
||||
if (vmem32_base == NULL)
|
||||
return;
|
||||
for (int page = block->start / VRAM_PROT_SEGMENT; page <= block->end / VRAM_PROT_SEGMENT; page++)
|
||||
{
|
||||
for (int i = 0; i < vram_blocks[page].size(); i++)
|
||||
if (vram_blocks[page][i] == block)
|
||||
{
|
||||
vram_blocks[page].erase(vram_blocks[page].begin() + i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool vmem32_map_areas()
|
||||
{
|
||||
// Aica ram
|
||||
vmem32_map_buffer(0x80800000, 0x00800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); // P1
|
||||
vmem32_map_buffer(0x82800000, ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true);
|
||||
vmem32_map_buffer(0xA0800000, 0x00800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); // P2
|
||||
vmem32_map_buffer(0xA2800000, ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true);
|
||||
|
||||
// Vram
|
||||
// Note: this should be mapped read/write but doesn't seem to be used
|
||||
vmem32_map_buffer(0x84000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); // P1
|
||||
vmem32_map_buffer(0x86000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false);
|
||||
vmem32_map_buffer(0xA4000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); // P2
|
||||
vmem32_map_buffer(0xA6000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false);
|
||||
|
||||
// System ram
|
||||
vmem32_map_buffer(0x8C000000, 0x04000000, MAP_RAM_START_OFFSET, RAM_SIZE, true); // P1
|
||||
vmem32_map_buffer(0xAC000000, 0x04000000, MAP_RAM_START_OFFSET, RAM_SIZE, true); // P2
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const u32 page_sizes[] = { 1024, 4 * 1024, 64 * 1024, 1024 * 1024 };
|
||||
|
||||
static u32 vmem32_paddr_to_offset(u32 address)
|
||||
{
|
||||
u32 low_addr = address & 0x1FFFFFFF;
|
||||
switch ((address >> 26) & 7)
|
||||
{
|
||||
case 0: // area 0
|
||||
// Aica ram
|
||||
if (low_addr >= 0x00800000 && low_addr < 0x00800000 + 0x00800000)
|
||||
{
|
||||
return ((low_addr - 0x00800000) & (ARAM_SIZE - 1)) + MAP_ARAM_START_OFFSET;
|
||||
}
|
||||
else if (low_addr >= 0x02800000 && low_addr < 0x02800000 + 0x00800000)
|
||||
{
|
||||
return low_addr - 0x02800000 + MAP_ARAM_START_OFFSET;
|
||||
}
|
||||
break;
|
||||
case 1: // area 1
|
||||
// Vram
|
||||
if (low_addr >= 0x04000000 && low_addr < 0x04000000 + 0x01000000)
|
||||
{
|
||||
return ((low_addr - 0x04000000) & (VRAM_SIZE - 1)) + MAP_VRAM_START_OFFSET;
|
||||
}
|
||||
else if (low_addr >= 0x06000000 && low_addr < 0x06000000 + 0x01000000)
|
||||
{
|
||||
return ((low_addr - 0x06000000) & (VRAM_SIZE - 1)) + MAP_VRAM_START_OFFSET;
|
||||
}
|
||||
break;
|
||||
case 3: // area 3
|
||||
// System ram
|
||||
if (low_addr >= 0x0C000000 && low_addr < 0x0C000000 + 0x04000000)
|
||||
{
|
||||
return ((low_addr - 0x0C000000) & (RAM_SIZE - 1)) + MAP_RAM_START_OFFSET;
|
||||
}
|
||||
break;
|
||||
//case 4:
|
||||
// TODO vram?
|
||||
//break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
// Unmapped address
|
||||
return -1;
|
||||
}
|
||||
|
||||
static u32 vmem32_map_mmu(u32 address, bool write)
|
||||
{
|
||||
#ifndef NO_MMU
|
||||
u32 pa;
|
||||
const TLB_Entry *entry;
|
||||
u32 rc = mmu_full_lookup<false>(address, &entry, pa);
|
||||
if (rc == MMU_ERROR_NONE)
|
||||
{
|
||||
//0X & User mode-> protection violation
|
||||
//if ((entry->Data.PR >> 1) == 0 && p_sh4rcb->cntx.sr.MD == 0)
|
||||
// return MMU_ERROR_PROTECTED;
|
||||
|
||||
//if (write)
|
||||
//{
|
||||
// if ((entry->Data.PR & 1) == 0)
|
||||
// return MMU_ERROR_PROTECTED;
|
||||
// if (entry->Data.D == 0)
|
||||
// return MMU_ERROR_FIRSTWRITE;
|
||||
//}
|
||||
u32 page_size = page_sizes[entry->Data.SZ1 * 2 + entry->Data.SZ0];
|
||||
if (page_size == 1024)
|
||||
return VMEM32_ERROR_NOT_MAPPED;
|
||||
|
||||
u32 vpn = (entry->Address.VPN << 10) & ~(page_size - 1);
|
||||
u32 ppn = (entry->Data.PPN << 10) & ~(page_size - 1);
|
||||
u32 offset = vmem32_paddr_to_offset(ppn);
|
||||
if (offset == -1)
|
||||
return VMEM32_ERROR_NOT_MAPPED;
|
||||
|
||||
if (offset >= MAP_VRAM_START_OFFSET && offset < MAP_VRAM_START_OFFSET + VRAM_SIZE)
|
||||
{
|
||||
// Check vram protected regions
|
||||
u32 start = offset - MAP_VRAM_START_OFFSET;
|
||||
if (!vram_mapped_pages.insert(vpn).second)
|
||||
{
|
||||
// page has been mapped already: vram locked write
|
||||
vmem32_unprotect_buffer(address & ~PAGE_MASK, PAGE_SIZE);
|
||||
u32 addr_offset = start + (address & (page_size - 1));
|
||||
VramLockedWriteOffset(addr_offset);
|
||||
|
||||
return MMU_ERROR_NONE;
|
||||
}
|
||||
verify(vmem32_map_buffer(vpn, page_size, offset, page_size, (entry->Data.PR & 1) != 0) != NULL);
|
||||
u32 end = start + page_size;
|
||||
const vector<vram_block *>& blocks = vram_blocks[start / VRAM_PROT_SEGMENT];
|
||||
|
||||
vramlist_lock.Lock();
|
||||
for (int i = blocks.size() - 1; i >= 0; i--)
|
||||
{
|
||||
if (blocks[i]->start < end && blocks[i]->end >= start)
|
||||
{
|
||||
u32 prot_start = max(start, blocks[i]->start);
|
||||
u32 prot_size = min(end, blocks[i]->end + 1) - prot_start;
|
||||
prot_size += prot_start % PAGE_SIZE;
|
||||
prot_start &= ~PAGE_MASK;
|
||||
vmem32_protect_buffer(vpn + (prot_start & (page_size - 1)), prot_size);
|
||||
}
|
||||
}
|
||||
vramlist_lock.Unlock();
|
||||
}
|
||||
else
|
||||
// Not vram
|
||||
verify(vmem32_map_buffer(vpn, page_size, offset, page_size, (entry->Data.PR & 1) != 0) != NULL);
|
||||
|
||||
return MMU_ERROR_NONE;
|
||||
}
|
||||
#else
|
||||
u32 rc = MMU_ERROR_PROTECTED;
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
static u32 vmem32_map_address(u32 address, bool write)
|
||||
{
|
||||
u32 area = address >> 29;
|
||||
switch (area)
|
||||
{
|
||||
case 3: // P0/U0
|
||||
if (address >= AREA7_ADDRESS)
|
||||
// area 7: unmapped
|
||||
return VMEM32_ERROR_NOT_MAPPED;
|
||||
/* no break */
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 6: // P3
|
||||
return vmem32_map_mmu(address, write);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return VMEM32_ERROR_NOT_MAPPED;
|
||||
}
|
||||
|
||||
#if !defined(NO_MMU) && defined(HOST_64BIT_CPU)
|
||||
bool vmem32_handle_signal(void *fault_addr, bool write)
|
||||
{
|
||||
if ((u8*)fault_addr < vmem32_base || (u8*)fault_addr >= vmem32_base + VMEM32_SIZE)
|
||||
return false;
|
||||
vmem32_page_faults++;
|
||||
u32 guest_addr = (u8*)fault_addr - vmem32_base;
|
||||
u32 rv = vmem32_map_address(guest_addr, write);
|
||||
//printf("vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d\n", write ? "W" : "R", fault_addr, guest_addr, rv);
|
||||
if (rv == MMU_ERROR_NONE)
|
||||
return true;
|
||||
if (rv == VMEM32_ERROR_NOT_MAPPED)
|
||||
return false;
|
||||
p_sh4rcb->cntx.pc = p_sh4rcb->cntx.exception_pc;
|
||||
DoMMUException(guest_addr, rv, write ? MMU_TT_DWRITE : MMU_TT_DREAD);
|
||||
ngen_HandleException();
|
||||
// not reached
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vmem32_flush_mmu()
|
||||
{
|
||||
vmem32_flush++;
|
||||
vram_mapped_pages.clear();
|
||||
vmem32_unmap_buffer(0, KERNEL_SPACE);
|
||||
// TODO flush P3?
|
||||
}
|
||||
|
||||
bool vmem32_init()
|
||||
{
|
||||
if (!_nvmem_enabled())
|
||||
return false;
|
||||
#ifdef HOST_64BIT_CPU
|
||||
#if HOST_OS == OS_LINUX
|
||||
void* rv = mmap(0, VMEM32_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||
verify(rv != NULL);
|
||||
munmap(rv, VMEM32_SIZE);
|
||||
vmem32_base = (u8*)rv;
|
||||
#elif HOST_OS == OS_WINDOWS
|
||||
void* rv = (u8 *)VirtualAlloc(0, VMEM32_SIZE, MEM_RESERVE, PAGE_NOACCESS);
|
||||
if (rv != NULL)
|
||||
VirtualFree(rv, 0, MEM_RELEASE);
|
||||
vmem32_base = (u8*)rv;
|
||||
#else
|
||||
#error Unsupported OS
|
||||
#endif
|
||||
|
||||
vmem32_unmap_buffer(0, VMEM32_SIZE);
|
||||
printf("vmem32_init: allocated %zx bytes from %p to %p\n", VMEM32_SIZE, vmem32_base, vmem32_base + VMEM32_SIZE);
|
||||
|
||||
if (!vmem32_map_areas())
|
||||
{
|
||||
vmem32_term();
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
void vmem32_term()
|
||||
{
|
||||
if (vmem32_base != NULL)
|
||||
{
|
||||
munmap(vmem32_base, VMEM32_SIZE);
|
||||
vmem32_base = NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#include "types.h"
|
||||
|
||||
bool vmem32_init();
|
||||
void vmem32_term();
|
||||
bool vmem32_handle_signal(void *fault_addr, bool write);
|
||||
void vmem32_flush_mmu();
|
||||
void vmem32_protect_vram(vram_block *block);
|
||||
void vmem32_unprotect_vram(vram_block *block);
|
||||
static inline bool vmem32_enabled() {
|
||||
return !settings.dynarec.disable_vmem32;
|
||||
}
|
|
@ -88,12 +88,11 @@ void clear_temp_cache(bool full)
|
|||
|
||||
void recSh4_ClearCache()
|
||||
{
|
||||
printf("recSh4:Dynarec Cache clear at %08X free space %d\n",curr_pc, emit_FreeSpace());
|
||||
LastAddr=LastAddr_min;
|
||||
bm_Reset();
|
||||
smc_hotspots.clear();
|
||||
clear_temp_cache(true);
|
||||
|
||||
printf("recSh4:Dynarec Cache clear at %08X\n",curr_pc);
|
||||
}
|
||||
|
||||
void recSh4_Run()
|
||||
|
@ -279,7 +278,7 @@ DynarecCodeEntryPtr rdv_CompilePC(u32 blockcheck_failures)
|
|||
emit_ptr_limit = (u32 *)(TempCodeCache + TEMP_CODE_SIZE);
|
||||
rbi->temp_block = true;
|
||||
}
|
||||
bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100);
|
||||
bool do_opts = !rbi->temp_block; //((rbi->addr&0x3FFFFFFF)>0x0C010100);
|
||||
rbi->staging_runs=do_opts?100:-100;
|
||||
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
|
||||
verify(rbi->code!=0);
|
||||
|
|
|
@ -100,6 +100,7 @@ extern void (*ngen_FailedToFindBlock)();
|
|||
void ngen_mainloop(void* cntx);
|
||||
|
||||
void ngen_GetFeatures(ngen_features* dst);
|
||||
void ngen_HandleException();
|
||||
|
||||
//Canonical callback interface
|
||||
enum CanonicalParamType
|
||||
|
|
|
@ -124,6 +124,8 @@ void Sh4_int_Skip()
|
|||
}
|
||||
}
|
||||
|
||||
extern u8 *vmem32_base;
|
||||
|
||||
void Sh4_int_Reset(bool Manual)
|
||||
{
|
||||
if (sh4_int_bCpuRun)
|
||||
|
@ -148,6 +150,8 @@ void Sh4_int_Reset(bool Manual)
|
|||
old_fpscr=fpscr;
|
||||
UpdateFPSCR();
|
||||
|
||||
p_sh4rcb->cntx.vmem32_base = vmem32_base;
|
||||
|
||||
//Any more registers have default value ?
|
||||
printf("Sh4 Reset\n");
|
||||
}
|
||||
|
|
|
@ -2066,7 +2066,7 @@ sh4op(i0000_nnnn_0110_1010)
|
|||
{
|
||||
u32 n = GetN(op);
|
||||
r[n] = fpscr.full;
|
||||
UpdateFPSCR();
|
||||
//UpdateFPSCR();
|
||||
}
|
||||
|
||||
//sts.l FPSCR,@-<REG_N>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "../sh4_core.h"
|
||||
#include "hw/pvr/pvr_mem.h"
|
||||
#include "hw/mem/_vmem.h"
|
||||
#include "hw/mem/vmem32.h"
|
||||
#include "mmu.h"
|
||||
|
||||
//Types
|
||||
|
@ -41,6 +42,16 @@ void CCN_QACR_write(u32 addr, u32 value)
|
|||
}
|
||||
}
|
||||
|
||||
void CCN_PTEH_write(u32 addr, u32 value)
|
||||
{
|
||||
CCN_PTEH_type temp;
|
||||
temp.reg_data = value;
|
||||
if (temp.ASID != CCN_PTEH.ASID && vmem32_enabled())
|
||||
vmem32_flush_mmu();
|
||||
|
||||
CCN_PTEH = temp;
|
||||
}
|
||||
|
||||
void CCN_MMUCR_write(u32 addr, u32 value)
|
||||
{
|
||||
CCN_MMUCR_type temp;
|
||||
|
@ -52,6 +63,8 @@ void CCN_MMUCR_write(u32 addr, u32 value)
|
|||
{
|
||||
//sh4_cpu.ResetCache();
|
||||
mmu_flush_table();
|
||||
if (vmem32_enabled())
|
||||
vmem32_flush_mmu();
|
||||
|
||||
temp.TI = 0;
|
||||
}
|
||||
|
@ -99,7 +112,7 @@ static u32 CCN_PRR_read(u32 addr)
|
|||
void ccn_init()
|
||||
{
|
||||
//CCN PTEH 0xFF000000 0x1F000000 32 Undefined Undefined Held Held Iclk
|
||||
sh4_rio_reg(CCN,CCN_PTEH_addr,RIO_DATA,32);
|
||||
sh4_rio_reg(CCN,CCN_PTEH_addr,RIO_WF,32,0,&CCN_PTEH_write);
|
||||
|
||||
//CCN PTEL 0xFF000004 0x1F000004 32 Undefined Undefined Held Held Iclk
|
||||
sh4_rio_reg(CCN,CCN_PTEL_addr,RIO_DATA,32);
|
||||
|
|
|
@ -282,6 +282,9 @@ struct Sh4Context
|
|||
|
||||
int sh4_sched_next;
|
||||
u32 interrupt_pend;
|
||||
|
||||
u32 exception_pc;
|
||||
u8 *vmem32_base;
|
||||
};
|
||||
u64 raw[64-8];
|
||||
};
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#endif
|
||||
#include <unistd.h>
|
||||
#include "hw/sh4/dyna/blockmanager.h"
|
||||
#include "hw/mem/vmem32.h"
|
||||
|
||||
#include "linux/context.h"
|
||||
|
||||
|
@ -48,7 +49,7 @@ void sigill_handler(int sn, siginfo_t * si, void *segfault_ctx) {
|
|||
context_from_segfault(&ctx, segfault_ctx);
|
||||
|
||||
unat pc = (unat)ctx.pc;
|
||||
bool dyna_cde = (pc>(unat)CodeCache) && (pc<(unat)(CodeCache + CODE_SIZE));
|
||||
bool dyna_cde = (pc>(unat)CodeCache) && (pc<(unat)(CodeCache + CODE_SIZE + TEMP_CODE_SIZE));
|
||||
|
||||
printf("SIGILL @ %lx -> %p was not in vram, dynacode:%d\n", pc, si->si_addr, dyna_cde);
|
||||
|
||||
|
@ -64,12 +65,21 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)
|
|||
|
||||
context_from_segfault(&ctx, segfault_ctx);
|
||||
|
||||
bool dyna_cde = ((unat)ctx.pc>(unat)CodeCache) && ((unat)ctx.pc<(unat)(CodeCache + CODE_SIZE));
|
||||
bool dyna_cde = ((unat)ctx.pc>(unat)CodeCache) && ((unat)ctx.pc<(unat)(CodeCache + CODE_SIZE + TEMP_CODE_SIZE));
|
||||
|
||||
//ucontext_t* ctx=(ucontext_t*)ctxr;
|
||||
//printf("mprot hit @ ptr 0x%08X @@ code: %08X, %d\n",si->si_addr,ctx->uc_mcontext.arm_pc,dyna_cde);
|
||||
|
||||
|
||||
#if !defined(NO_MMU) && defined(HOST_64BIT_CPU)
|
||||
#if HOST_CPU == CPU_ARM64
|
||||
u32 op = *(u32*)ctx.pc;
|
||||
bool write = (op & 0x00400000) == 0;
|
||||
#elif HOST_CPU == CPU_X64
|
||||
bool write = false; // TODO?
|
||||
#endif
|
||||
if (vmem32_handle_signal(si->si_addr, write))
|
||||
return;
|
||||
#endif
|
||||
if (VramLockedWrite((u8*)si->si_addr) || BM_LockedWrite((u8*)si->si_addr))
|
||||
return;
|
||||
#if FEAT_SHREC == DYNAREC_JIT
|
||||
|
@ -91,7 +101,10 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)
|
|||
context_to_segfault(&ctx, segfault_ctx);
|
||||
}
|
||||
#elif HOST_CPU == CPU_X64
|
||||
//x64 has no rewrite support
|
||||
else if (dyna_cde && ngen_Rewrite((unat&)ctx.pc, 0, 0))
|
||||
{
|
||||
context_to_segfault(&ctx, segfault_ctx);
|
||||
}
|
||||
#elif HOST_CPU == CPU_ARM64
|
||||
else if (dyna_cde && ngen_Rewrite(ctx.pc, 0, 0))
|
||||
{
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "oslib/oslib.h"
|
||||
#include "oslib/audiostream.h"
|
||||
#include "hw/mem/_vmem.h"
|
||||
#include "hw/mem/vmem32.h"
|
||||
#include "stdclass.h"
|
||||
#include "cfg/cfg.h"
|
||||
|
||||
|
@ -140,7 +141,9 @@ void LoadSpecialSettings()
|
|||
extra_depth_game = false;
|
||||
full_mmu_game = false;
|
||||
|
||||
if (reios_windows_ce)
|
||||
if (reios_windows_ce
|
||||
// Half-life
|
||||
|| !strncmp("MK-51035", reios_product_number, 8))
|
||||
{
|
||||
printf("Enabling Full MMU and Extra depth scaling for Windows CE game\n");
|
||||
settings.rend.ExtraDepthScale = 0.1;
|
||||
|
@ -275,6 +278,13 @@ int reicast_init(int argc, char* argv[])
|
|||
printf("Failed to alloc mem\n");
|
||||
return -1;
|
||||
}
|
||||
#ifdef HOST_64BIT_CPU
|
||||
if (!vmem32_init())
|
||||
{
|
||||
printf("Failed to alloc 32-bit mem space\n");
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
if (ParseCommandLine(argc, argv))
|
||||
{
|
||||
return 69;
|
||||
|
@ -461,6 +471,7 @@ void InitSettings()
|
|||
settings.dynarec.idleskip = true;
|
||||
settings.dynarec.unstable_opt = false;
|
||||
settings.dynarec.safemode = true;
|
||||
settings.dynarec.disable_vmem32 = false;
|
||||
settings.dreamcast.cable = 3; // TV composite
|
||||
settings.dreamcast.region = 3; // default
|
||||
settings.dreamcast.broadcast = 4; // default
|
||||
|
@ -534,6 +545,7 @@ void LoadSettings(bool game_specific)
|
|||
settings.dynarec.idleskip = cfgLoadBool(config_section, "Dynarec.idleskip", settings.dynarec.idleskip);
|
||||
settings.dynarec.unstable_opt = cfgLoadBool(config_section, "Dynarec.unstable-opt", settings.dynarec.unstable_opt);
|
||||
settings.dynarec.safemode = cfgLoadBool(config_section, "Dynarec.safe-mode", settings.dynarec.safemode);
|
||||
settings.dynarec.disable_vmem32 = cfgLoadBool(config_section, "Dynarec.DisableVmem32", settings.dynarec.disable_vmem32);
|
||||
//disable_nvmem can't be loaded, because nvmem init is before cfg load
|
||||
settings.dreamcast.cable = cfgLoadInt(config_section, "Dreamcast.Cable", settings.dreamcast.cable);
|
||||
settings.dreamcast.region = cfgLoadInt(config_section, "Dreamcast.Region", settings.dreamcast.region);
|
||||
|
@ -670,6 +682,7 @@ void SaveSettings()
|
|||
cfgSaveBool("config", "Dynarec.unstable-opt", settings.dynarec.unstable_opt);
|
||||
if (!safemode_game || !settings.dynarec.safemode)
|
||||
cfgSaveBool("config", "Dynarec.safe-mode", settings.dynarec.safemode);
|
||||
cfgSaveBool("config", "Dynarec.DisableVmem32", settings.dynarec.disable_vmem32);
|
||||
cfgSaveInt("config", "Dreamcast.Language", settings.dreamcast.language);
|
||||
cfgSaveBool("config", "aica.LimitFPS", settings.aica.LimitFPS);
|
||||
cfgSaveBool("config", "aica.NoBatch", settings.aica.NoBatch);
|
||||
|
|
|
@ -39,6 +39,7 @@ using namespace vixl::aarch64;
|
|||
#include "hw/sh4/dyna/ngen.h"
|
||||
#include "hw/sh4/sh4_mem.h"
|
||||
#include "hw/sh4/sh4_rom.h"
|
||||
#include "hw/mem/vmem32.h"
|
||||
#include "arm64_regalloc.h"
|
||||
|
||||
#undef do_sqw_nommu
|
||||
|
@ -185,8 +186,8 @@ void ngen_mainloop(void* v_cntx)
|
|||
"stp x29, x30, [sp, #144] \n\t"
|
||||
|
||||
"stp %[cntx], %[cycle_counter], [sp, #-16]! \n\t" // Push context, cycle_counter address
|
||||
"mov w27, %[_SH4_TIMESLICE] \n\t"
|
||||
"str w27, [%[cycle_counter]] \n\t"
|
||||
"mov w1, %[_SH4_TIMESLICE] \n\t"
|
||||
"str w1, [%[cycle_counter]] \n\t"
|
||||
|
||||
"mov x0, %[jmp_env] \n\t" // SETJMP
|
||||
"bl setjmp \n\t"
|
||||
|
@ -195,15 +196,17 @@ void ngen_mainloop(void* v_cntx)
|
|||
"ldr x28, [sp] \n\t" // Set context
|
||||
// w29 is next_pc
|
||||
"ldr w29, [x28, %[pc]] \n\t"
|
||||
// x27 is vmem32_base
|
||||
"ldr x27, [x28, %[vmem32_base]] \n\t"
|
||||
"b no_update \n"
|
||||
|
||||
".hidden intc_sched \n\t"
|
||||
".globl intc_sched \n\t"
|
||||
"intc_sched: \n\t"
|
||||
"ldr x27, [sp, #8] \n\t" // &cycle_counter
|
||||
"ldr w0, [x27] \n\t" // cycle_counter
|
||||
"ldr x1, [sp, #8] \n\t" // &cycle_counter
|
||||
"ldr w0, [x1] \n\t" // cycle_counter
|
||||
"add w0, w0, %[_SH4_TIMESLICE] \n\t"
|
||||
"str w0, [x27] \n\t"
|
||||
"str w0, [x1] \n\t"
|
||||
"mov x29, lr \n\t" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts
|
||||
"bl UpdateSystem \n\t"
|
||||
"mov lr, x29 \n\t"
|
||||
|
@ -260,7 +263,8 @@ void ngen_mainloop(void* v_cntx)
|
|||
[RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16),
|
||||
[SH4CTX_SIZE] "i" (sizeof(Sh4Context)),
|
||||
[jmp_env] "r"(reinterpret_cast<uintptr_t>(jmp_env)),
|
||||
[cycle_counter] "r"(reinterpret_cast<uintptr_t>(&cycle_counter))
|
||||
[cycle_counter] "r"(reinterpret_cast<uintptr_t>(&cycle_counter)),
|
||||
[vmem32_base] "i"(offsetof(Sh4Context, vmem32_base))
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
@ -476,10 +480,10 @@ public:
|
|||
regalloc.DoAlloc(block);
|
||||
|
||||
// scheduler
|
||||
Mov(x27, reinterpret_cast<uintptr_t>(&cycle_counter));
|
||||
Ldr(w0, MemOperand(x27));
|
||||
Mov(x1, reinterpret_cast<uintptr_t>(&cycle_counter));
|
||||
Ldr(w0, MemOperand(x1));
|
||||
Subs(w0, w0, block->guest_cycles);
|
||||
Str(w0, MemOperand(x27));
|
||||
Str(w0, MemOperand(x1));
|
||||
Label cycles_remaining;
|
||||
B(&cycles_remaining, pl);
|
||||
GenCallRuntime(intc_sched);
|
||||
|
@ -568,11 +572,11 @@ public:
|
|||
break;
|
||||
|
||||
case shop_readm:
|
||||
GenReadMemory(op, i);
|
||||
GenReadMemory(op, i, optimise);
|
||||
break;
|
||||
|
||||
case shop_writem:
|
||||
GenWriteMemory(op, i);
|
||||
GenWriteMemory(op, i, optimise);
|
||||
break;
|
||||
|
||||
case shop_sync_sr:
|
||||
|
@ -1073,10 +1077,10 @@ public:
|
|||
|
||||
void GenWriteMemorySlow(const shil_opcode& op)
|
||||
{
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
if (mmu_enabled())
|
||||
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
||||
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch (size)
|
||||
{
|
||||
|
@ -1117,7 +1121,10 @@ public:
|
|||
|
||||
void InitializeRewrite(RuntimeBlockInfo *block, size_t opid)
|
||||
{
|
||||
regalloc.DoAlloc(block);
|
||||
this->block = block;
|
||||
// with full mmu, all regs are flushed before mem ops
|
||||
if (!mmu_enabled())
|
||||
regalloc.DoAlloc(block);
|
||||
regalloc.current_opid = opid;
|
||||
}
|
||||
|
||||
|
@ -1308,14 +1315,14 @@ private:
|
|||
B(&code_label, cond);
|
||||
}
|
||||
|
||||
void GenReadMemory(const shil_opcode& op, size_t opid)
|
||||
void GenReadMemory(const shil_opcode& op, size_t opid, bool optimise)
|
||||
{
|
||||
if (GenReadMemoryImmediate(op))
|
||||
return;
|
||||
|
||||
GenMemAddr(op, call_regs[0]);
|
||||
|
||||
if (GenReadMemoryFast(op, opid))
|
||||
if (optimise && GenReadMemoryFast(op, opid))
|
||||
return;
|
||||
|
||||
GenReadMemorySlow(op);
|
||||
|
@ -1431,59 +1438,104 @@ private:
|
|||
bool GenReadMemoryFast(const shil_opcode& op, size_t opid)
|
||||
{
|
||||
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
|
||||
if (!_nvmem_enabled() || mmu_enabled())
|
||||
if (!_nvmem_enabled() || (mmu_enabled() && !vmem32_enabled()))
|
||||
return false;
|
||||
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
|
||||
// WARNING: the rewrite code relies on having two ops before the memory access
|
||||
const XRegister* base_reg;
|
||||
const XRegister* offset_reg;
|
||||
// WARNING: the rewrite code relies on having two ops before the memory access (3 when mmu is enabled)
|
||||
// Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code
|
||||
Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
|
||||
Bfc(w1, 29, 3); // addr &= ~0xE0000000
|
||||
if (!mmu_enabled())
|
||||
{
|
||||
Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
|
||||
Bfc(w1, 29, 3); // addr &= ~0xE0000000
|
||||
base_reg = &x28;
|
||||
offset_reg = &x1;
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0);
|
||||
// 3 ops before memory access
|
||||
Mov(w8, exception_pc & 0xFFFF);
|
||||
Movk(w8, exception_pc >> 16, 16);
|
||||
Str(w8, sh4_context_mem_operand(&p_sh4rcb->cntx.exception_pc));
|
||||
base_reg = &x27;
|
||||
offset_reg = call_regs64[0];
|
||||
}
|
||||
|
||||
//printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
|
||||
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch(size)
|
||||
if (regalloc.IsAllocAny(op.rd))
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
|
||||
break;
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1, SXTW));
|
||||
break;
|
||||
case 2:
|
||||
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (!op.rd.is_r32f())
|
||||
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
|
||||
else
|
||||
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
|
||||
break;
|
||||
case 4:
|
||||
if (!op.rd.is_r32f())
|
||||
Ldr(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg));
|
||||
else
|
||||
Ldr(regalloc.MapVRegister(op.rd), MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
Ldr(x1, MemOperand(x28, x1));
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
Ldr(x1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
}
|
||||
|
||||
if (size == 8)
|
||||
{
|
||||
if (size == 8)
|
||||
{
|
||||
#ifdef EXPLODE_SPANS
|
||||
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
|
||||
Fmov(regalloc.MapVRegister(op.rd, 0), w1);
|
||||
Lsr(x1, x1, 32);
|
||||
Fmov(regalloc.MapVRegister(op.rd, 1), w1);
|
||||
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
|
||||
Fmov(regalloc.MapVRegister(op.rd, 0), w1);
|
||||
Lsr(x1, x1, 32);
|
||||
Fmov(regalloc.MapVRegister(op.rd, 1), w1);
|
||||
#else
|
||||
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
Ldrsb(w1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
Ldrsh(w1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
Ldr(w1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
Ldr(x1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
}
|
||||
if (size == 8)
|
||||
Str(x1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
else
|
||||
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
|
||||
}
|
||||
EnsureCodeSize(start_instruction, read_memory_rewrite_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GenWriteMemory(const shil_opcode& op, size_t opid)
|
||||
void GenWriteMemory(const shil_opcode& op, size_t opid, bool optimise)
|
||||
{
|
||||
GenMemAddr(op, call_regs[0]);
|
||||
|
||||
|
@ -1502,7 +1554,7 @@ private:
|
|||
shil_param_to_host_reg(op.rs2, *call_regs64[1]);
|
||||
#endif
|
||||
}
|
||||
if (GenWriteMemoryFast(op, opid))
|
||||
if (optimise && GenWriteMemoryFast(op, opid))
|
||||
return;
|
||||
|
||||
GenWriteMemorySlow(op);
|
||||
|
@ -1511,15 +1563,31 @@ private:
|
|||
bool GenWriteMemoryFast(const shil_opcode& op, size_t opid)
|
||||
{
|
||||
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
|
||||
if (!_nvmem_enabled() || mmu_enabled())
|
||||
if (!_nvmem_enabled() || (mmu_enabled() && !vmem32_enabled()))
|
||||
return false;
|
||||
|
||||
Instruction *start_instruction = GetCursorAddress<Instruction *>();
|
||||
|
||||
// WARNING: the rewrite code relies on having two ops before the memory access
|
||||
const XRegister* base_reg;
|
||||
const XRegister* offset_reg;
|
||||
// WARNING: the rewrite code relies on having two ops before the memory access (3 when mmu is enabled)
|
||||
// Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code
|
||||
Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
|
||||
Bfc(w7, 29, 3); // addr &= ~0xE0000000
|
||||
if (!mmu_enabled())
|
||||
{
|
||||
Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
|
||||
Bfc(w7, 29, 3); // addr &= ~0xE0000000
|
||||
base_reg = &x28;
|
||||
offset_reg = &x7;
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0);
|
||||
Mov(w8, exception_pc & 0xFFFF);
|
||||
Movk(w8, exception_pc >> 16, 16);
|
||||
Str(w8, sh4_context_mem_operand(&p_sh4rcb->cntx.exception_pc));
|
||||
base_reg = &x27;
|
||||
offset_reg = call_regs64[0];
|
||||
}
|
||||
|
||||
//printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
|
||||
this->block->memory_accesses[GetCursorAddress<void *>()] = (u32)opid;
|
||||
|
@ -1528,19 +1596,19 @@ private:
|
|||
switch(size)
|
||||
{
|
||||
case 1:
|
||||
Strb(w1, MemOperand(x28, x7, SXTW));
|
||||
Strb(w1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
Strh(w1, MemOperand(x28, x7, SXTW));
|
||||
Strh(w1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
Str(w1, MemOperand(x28, x7));
|
||||
Str(w1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
|
||||
case 8:
|
||||
Str(x1, MemOperand(x28, x7));
|
||||
Str(x1, MemOperand(*base_reg, *offset_reg));
|
||||
break;
|
||||
}
|
||||
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
|
||||
|
@ -1699,7 +1767,7 @@ private:
|
|||
RuntimeBlockInfo* block = NULL;
|
||||
const int read_memory_rewrite_size = 6; // worst case for u64: add, bfc, ldr, fmov, lsr, fmov
|
||||
// FIXME rewrite size per read/write size?
|
||||
const int write_memory_rewrite_size = 3;
|
||||
const int write_memory_rewrite_size = 4;
|
||||
};
|
||||
|
||||
static Arm64Assembler* compiler;
|
||||
|
@ -1755,7 +1823,7 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
|
|||
u32 opid = it->second;
|
||||
verify(opid < block->oplist.size());
|
||||
const shil_opcode& op = block->oplist[opid];
|
||||
Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2); // Skip the 2 preceding ops (bic, add)
|
||||
Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2 - (mmu_enabled() ? 1 : 0)); // Skip the 2 preceding ops (bic, add)
|
||||
assembler->InitializeRewrite(block, opid);
|
||||
if (op.op == shop_readm)
|
||||
assembler->GenReadMemorySlow(op);
|
||||
|
@ -1763,11 +1831,16 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
|
|||
assembler->GenWriteMemorySlow(op);
|
||||
assembler->Finalize(true);
|
||||
delete assembler;
|
||||
host_pc = (unat)(code_ptr - 2);
|
||||
host_pc = (unat)(code_ptr - 2 - (mmu_enabled() ? 1 : 0));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ngen_HandleException()
|
||||
{
|
||||
longjmp(jmp_env, 1);
|
||||
}
|
||||
|
||||
u32 DynaRBI::Relink()
|
||||
{
|
||||
if (mmu_enabled())
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "hw/sh4/sh4_core.h"
|
||||
#include "hw/sh4/sh4_mem.h"
|
||||
#include "hw/sh4/sh4_rom.h"
|
||||
#include "hw/mem/vmem32.h"
|
||||
#include "emitter/x86_emitter.h"
|
||||
#include "profiler/profiler.h"
|
||||
#include "oslib/oslib.h"
|
||||
|
@ -130,7 +131,6 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
|
|||
"lea " _U "jmp_env(%rip), %rdi \n\t"
|
||||
#endif
|
||||
"call " _U "setjmp \n\t"
|
||||
// "testl %rax, %rax \n\t"
|
||||
|
||||
"1: \n\t" // run_loop
|
||||
"movq " _U "p_sh4rcb(%rip), %rax \n\t"
|
||||
|
@ -219,13 +219,8 @@ static void ngen_blockcheckfail(u32 pc) {
|
|||
rdv_BlockCheckFail(pc);
|
||||
}
|
||||
|
||||
static u32 exception_raised;
|
||||
|
||||
template<typename T>
|
||||
static T ReadMemNoEx(u32 addr, u32 pc)
|
||||
static void handle_mem_exception(u32 exception_raised, u32 pc)
|
||||
{
|
||||
#ifndef NO_MMU
|
||||
T rv = mmu_ReadMemNoEx<T>(addr, &exception_raised);
|
||||
if (exception_raised)
|
||||
{
|
||||
if (pc & 1)
|
||||
|
@ -233,8 +228,19 @@ static T ReadMemNoEx(u32 addr, u32 pc)
|
|||
spc = pc - 1;
|
||||
else
|
||||
spc = pc;
|
||||
cycle_counter += CPU_RATIO * 2; // probably more is needed but no easy way to find out
|
||||
longjmp(jmp_env, 1);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static T ReadMemNoEx(u32 addr, u32 pc)
|
||||
{
|
||||
#ifndef NO_MMU
|
||||
u32 exception_raised;
|
||||
T rv = mmu_ReadMemNoEx<T>(addr, &exception_raised);
|
||||
handle_mem_exception(exception_raised, pc);
|
||||
|
||||
return rv;
|
||||
#else
|
||||
// not used
|
||||
|
@ -246,32 +252,30 @@ template<typename T>
|
|||
static void WriteMemNoEx(u32 addr, T data, u32 pc)
|
||||
{
|
||||
#ifndef NO_MMU
|
||||
exception_raised = mmu_WriteMemNoEx<T>(addr, data);
|
||||
if (exception_raised)
|
||||
{
|
||||
if (pc & 1)
|
||||
// Delay slot
|
||||
spc = pc - 1;
|
||||
else
|
||||
spc = pc;
|
||||
longjmp(jmp_env, 1);
|
||||
}
|
||||
u32 exception_raised = mmu_WriteMemNoEx<T>(addr, data);
|
||||
handle_mem_exception(exception_raised, pc);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void handle_sh4_exception(SH4ThrownException& ex, u32 pc)
|
||||
{
|
||||
if (pc & 1)
|
||||
{
|
||||
// Delay slot
|
||||
AdjustDelaySlotException(ex);
|
||||
pc--;
|
||||
}
|
||||
Do_Exception(pc, ex.expEvn, ex.callVect);
|
||||
cycle_counter += CPU_RATIO * 4; // probably more is needed
|
||||
longjmp(jmp_env, 1);
|
||||
}
|
||||
|
||||
static void interpreter_fallback(u16 op, OpCallFP *oph, u32 pc)
|
||||
{
|
||||
try {
|
||||
oph(op);
|
||||
} catch (SH4ThrownException& ex) {
|
||||
if (pc & 1)
|
||||
{
|
||||
// Delay slot
|
||||
AdjustDelaySlotException(ex);
|
||||
pc--;
|
||||
}
|
||||
Do_Exception(pc, ex.expEvn, ex.callVect);
|
||||
longjmp(jmp_env, 1);
|
||||
handle_sh4_exception(ex, pc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -279,16 +283,8 @@ static void do_sqw_mmu_no_ex(u32 addr, u32 pc)
|
|||
{
|
||||
try {
|
||||
do_sqw_mmu(addr);
|
||||
exception_raised = 0;
|
||||
} catch (SH4ThrownException& ex) {
|
||||
if (pc & 1)
|
||||
{
|
||||
// Delay slot
|
||||
AdjustDelaySlotException(ex);
|
||||
pc--;
|
||||
}
|
||||
Do_Exception(pc, ex.expEvn, ex.callVect);
|
||||
exception_raised = 1;
|
||||
handle_sh4_exception(ex, pc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -300,7 +296,9 @@ static void do_sqw_nommu_local(u32 addr, u8* sqb)
|
|||
class BlockCompiler : public Xbyak::CodeGenerator
|
||||
{
|
||||
public:
|
||||
BlockCompiler() : Xbyak::CodeGenerator(emit_FreeSpace(), emit_GetCCPtr()), regalloc(this)
|
||||
BlockCompiler() : BlockCompiler((u8 *)emit_GetCCPtr()) {}
|
||||
|
||||
BlockCompiler(u8 *code_ptr) : Xbyak::CodeGenerator(emit_FreeSpace(), code_ptr), regalloc(this)
|
||||
{
|
||||
#if HOST_OS == OS_WINDOWS
|
||||
call_regs.push_back(ecx);
|
||||
|
@ -333,17 +331,11 @@ public:
|
|||
void compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise)
|
||||
{
|
||||
//printf("X86_64 compiling %08x to %p\n", block->addr, emit_GetCCPtr());
|
||||
current_opid = -1;
|
||||
if (force_checks) {
|
||||
CheckBlock(block);
|
||||
}
|
||||
regalloc.DoAlloc(block);
|
||||
|
||||
sub(dword[rip + &cycle_counter], block->guest_cycles);
|
||||
#ifdef PROFILING
|
||||
mov(rax, (uintptr_t)&guest_cpu_cycles);
|
||||
mov(ecx, block->guest_cycles);
|
||||
add(qword[rax], rcx);
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
sub(rsp, 0x28); // 32-byte shadow space + 8 byte alignment
|
||||
#else
|
||||
|
@ -364,6 +356,13 @@ public:
|
|||
jmp(exit_block, T_NEAR);
|
||||
L(fpu_enabled);
|
||||
}
|
||||
sub(dword[rip + &cycle_counter], block->guest_cycles);
|
||||
#ifdef PROFILING
|
||||
mov(rax, (uintptr_t)&guest_cpu_cycles);
|
||||
mov(ecx, block->guest_cycles);
|
||||
add(qword[rax], rcx);
|
||||
#endif
|
||||
regalloc.DoAlloc(block);
|
||||
|
||||
for (current_opid = 0; current_opid < block->oplist.size(); current_opid++)
|
||||
{
|
||||
|
@ -440,98 +439,7 @@ public:
|
|||
break;
|
||||
|
||||
case shop_readm:
|
||||
{
|
||||
u32 size = op.flags & 0x7f;
|
||||
bool immediate_address = op.rs1.is_imm();
|
||||
u32 addr = op.rs1._imm;
|
||||
if (immediate_address && mmu_enabled())
|
||||
{
|
||||
if ((op.rs1._imm >> 12) != (block->vaddr >> 12))
|
||||
{
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
immediate_address = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
if (size == 2)
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
|
||||
else if (size == 4)
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
|
||||
else
|
||||
die("Invalid immediate size");
|
||||
if (rv != MMU_ERROR_NONE)
|
||||
immediate_address = false;
|
||||
else
|
||||
addr = paddr;
|
||||
}
|
||||
}
|
||||
if (immediate_address)
|
||||
{
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(addr, isram, size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
// Immediate pointer to RAM: super-duper fast access
|
||||
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
{
|
||||
case 2:
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
movsx(regalloc.MapRegister(op.rd), word[rax]);
|
||||
else
|
||||
{
|
||||
movsx(eax, word[rax]);
|
||||
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(dword[rcx], eax);
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
mov(regalloc.MapRegister(op.rd), dword[rax]);
|
||||
else if (regalloc.IsAllocf(op.rd))
|
||||
movd(regalloc.MapXRegister(op.rd), dword[rax]);
|
||||
else
|
||||
{
|
||||
mov(eax, dword[rax]);
|
||||
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(dword[rcx], eax);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid immediate size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not RAM: the returned pointer is a memory handler
|
||||
mov(call_regs[0], addr);
|
||||
|
||||
switch(size)
|
||||
{
|
||||
case 2:
|
||||
GenCall((void (*)())ptr);
|
||||
movsx(ecx, ax);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
GenCall((void (*)())ptr);
|
||||
mov(ecx, eax);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid immediate size");
|
||||
break;
|
||||
}
|
||||
host_reg_to_shil_param(op.rd, ecx);
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!GenReadMemImmediate(op, block))
|
||||
{
|
||||
// Not an immediate address
|
||||
shil_param_to_host_reg(op.rs1, call_regs[0]);
|
||||
|
@ -547,47 +455,10 @@ public:
|
|||
add(call_regs[0], dword[rax]);
|
||||
}
|
||||
}
|
||||
if (mmu_enabled())
|
||||
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
||||
|
||||
if (size == 1) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem8);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u8>);
|
||||
movsx(ecx, al);
|
||||
}
|
||||
else if (size == 2) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem16);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u16>);
|
||||
movsx(ecx, ax);
|
||||
}
|
||||
else if (size == 4) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem32);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u32>);
|
||||
mov(ecx, eax);
|
||||
}
|
||||
else if (size == 8) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem64);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u64>);
|
||||
mov(rcx, rax);
|
||||
}
|
||||
else {
|
||||
die("1..8 bytes");
|
||||
}
|
||||
|
||||
// if (mmu_enabled())
|
||||
// {
|
||||
// test(dword[(void *)&exception_raised], 1);
|
||||
// jnz(exit_block, T_NEAR);
|
||||
// }
|
||||
if (!optimise || !GenReadMemoryFast(op, block))
|
||||
GenReadMemorySlow(op, block);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (size != 8)
|
||||
host_reg_to_shil_param(op.rd, ecx);
|
||||
else {
|
||||
|
@ -606,12 +477,10 @@ public:
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
break;
|
||||
|
||||
case shop_writem:
|
||||
{
|
||||
u32 size = op.flags & 0x7f;
|
||||
shil_param_to_host_reg(op.rs1, call_regs[0]);
|
||||
if (!op.rs3.is_null())
|
||||
{
|
||||
|
@ -626,6 +495,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
if (size != 8)
|
||||
shil_param_to_host_reg(op.rs2, call_regs[1]);
|
||||
else {
|
||||
|
@ -644,42 +514,8 @@ public:
|
|||
mov(call_regs64[1], qword[rax]);
|
||||
}
|
||||
}
|
||||
if (mmu_enabled())
|
||||
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
||||
|
||||
if (size == 1) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem8);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u8>);
|
||||
}
|
||||
else if (size == 2) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem16);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u16>);
|
||||
}
|
||||
else if (size == 4) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem32);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u32>);
|
||||
}
|
||||
else if (size == 8) {
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem64);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u64>);
|
||||
}
|
||||
else {
|
||||
die("1..8 bytes");
|
||||
}
|
||||
|
||||
// if (mmu_enabled())
|
||||
// {
|
||||
// test(dword[(void *)&exception_raised], 1);
|
||||
// jnz(exit_block, T_NEAR);
|
||||
// }
|
||||
if (!optimise || !GenWriteMemoryFast(op, block))
|
||||
GenWriteMemorySlow(op, block);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -729,8 +565,8 @@ public:
|
|||
mov(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1)); \
|
||||
if (op.rs2.is_imm()) \
|
||||
natop(regalloc.MapRegister(op.rd), op.rs2._imm); \
|
||||
else if (op.rs2.is_reg()) \
|
||||
natop(regalloc.MapRegister(op.rd), Xbyak::Reg8(regalloc.MapRegister(op.rs2).getIdx()));
|
||||
else \
|
||||
die("Unsupported operand");
|
||||
case shop_shl:
|
||||
SHIFT_OP(shl)
|
||||
break;
|
||||
|
@ -925,9 +761,6 @@ public:
|
|||
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
||||
|
||||
GenCall(do_sqw_mmu_no_ex);
|
||||
|
||||
test(dword[(void *)&exception_raised], 1);
|
||||
jnz(exit_block, T_NEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1163,6 +996,7 @@ public:
|
|||
regalloc.OpEnd(&op);
|
||||
}
|
||||
regalloc.Cleanup();
|
||||
current_opid = -1;
|
||||
|
||||
mov(rax, (size_t)&next_pc);
|
||||
|
||||
|
@ -1242,6 +1076,118 @@ public:
|
|||
emit_Skip(getSize());
|
||||
}
|
||||
|
||||
void GenReadMemorySlow(const shil_opcode& op, RuntimeBlockInfo* block)
|
||||
{
|
||||
const u8 *start_addr = getCurr();
|
||||
if (mmu_enabled())
|
||||
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch (size) {
|
||||
case 1:
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem8);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u8>);
|
||||
movsx(ecx, al);
|
||||
break;
|
||||
case 2:
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem16);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u16>);
|
||||
movsx(ecx, ax);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem32);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u32>);
|
||||
mov(ecx, eax);
|
||||
break;
|
||||
case 8:
|
||||
if (!mmu_enabled())
|
||||
GenCall(ReadMem64);
|
||||
else
|
||||
GenCall(ReadMemNoEx<u64>);
|
||||
mov(rcx, rax);
|
||||
break;
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
}
|
||||
|
||||
if (mmu_enabled())
|
||||
{
|
||||
Xbyak::Label quick_exit;
|
||||
if (getCurr() - start_addr <= read_mem_op_size - 6)
|
||||
jmp(quick_exit, T_NEAR);
|
||||
while (getCurr() - start_addr < read_mem_op_size)
|
||||
nop();
|
||||
L(quick_exit);
|
||||
verify(getCurr() - start_addr == read_mem_op_size);
|
||||
}
|
||||
}
|
||||
|
||||
void GenWriteMemorySlow(const shil_opcode& op, RuntimeBlockInfo* block)
|
||||
{
|
||||
const u8 *start_addr = getCurr();
|
||||
if (mmu_enabled())
|
||||
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
switch (size) {
|
||||
case 1:
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem8);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u8>);
|
||||
break;
|
||||
case 2:
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem16);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u16>);
|
||||
break;
|
||||
case 4:
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem32);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u32>);
|
||||
break;
|
||||
case 8:
|
||||
if (!mmu_enabled())
|
||||
GenCall(WriteMem64);
|
||||
else
|
||||
GenCall(WriteMemNoEx<u64>);
|
||||
break;
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
}
|
||||
if (mmu_enabled())
|
||||
{
|
||||
Xbyak::Label quick_exit;
|
||||
if (getCurr() - start_addr <= write_mem_op_size - 6)
|
||||
jmp(quick_exit, T_NEAR);
|
||||
while (getCurr() - start_addr < write_mem_op_size)
|
||||
nop();
|
||||
L(quick_exit);
|
||||
verify(getCurr() - start_addr == write_mem_op_size);
|
||||
}
|
||||
}
|
||||
|
||||
void InitializeRewrite(RuntimeBlockInfo *block, size_t opid)
|
||||
{
|
||||
// shouldn't be necessary since all regs are flushed before mem access when mmu is enabled
|
||||
//regalloc.DoAlloc(block);
|
||||
regalloc.current_opid = opid;
|
||||
}
|
||||
|
||||
void FinalizeRewrite()
|
||||
{
|
||||
ready();
|
||||
}
|
||||
|
||||
void ngen_CC_Start(const shil_opcode& op)
|
||||
{
|
||||
CC_pars.clear();
|
||||
|
@ -1346,16 +1292,188 @@ private:
|
|||
typedef void (BlockCompiler::*X64BinaryOp)(const Xbyak::Operand&, const Xbyak::Operand&);
|
||||
typedef void (BlockCompiler::*X64BinaryFOp)(const Xbyak::Xmm&, const Xbyak::Operand&);
|
||||
|
||||
bool GenReadMemImmediate(const shil_opcode& op, RuntimeBlockInfo* block)
|
||||
{
|
||||
if (!op.rs1.is_imm())
|
||||
return false;
|
||||
u32 size = op.flags & 0x7f;
|
||||
u32 addr = op.rs1._imm;
|
||||
if (mmu_enabled())
|
||||
{
|
||||
if ((addr >> 12) != (block->vaddr >> 12))
|
||||
// When full mmu is on, only consider addresses in the same 4k page
|
||||
return false;
|
||||
|
||||
u32 paddr;
|
||||
u32 rv;
|
||||
if (size == 2)
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
|
||||
else if (size == 4)
|
||||
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
|
||||
else
|
||||
die("Invalid immediate size");
|
||||
if (rv != MMU_ERROR_NONE)
|
||||
return false;
|
||||
|
||||
addr = paddr;
|
||||
}
|
||||
bool isram = false;
|
||||
void* ptr = _vmem_read_const(addr, isram, size);
|
||||
|
||||
if (isram)
|
||||
{
|
||||
// Immediate pointer to RAM: super-duper fast access
|
||||
mov(rax, reinterpret_cast<uintptr_t>(ptr));
|
||||
switch (size)
|
||||
{
|
||||
case 2:
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
movsx(regalloc.MapRegister(op.rd), word[rax]);
|
||||
else
|
||||
{
|
||||
movsx(eax, word[rax]);
|
||||
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(dword[rcx], eax);
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if (regalloc.IsAllocg(op.rd))
|
||||
mov(regalloc.MapRegister(op.rd), dword[rax]);
|
||||
else if (regalloc.IsAllocf(op.rd))
|
||||
movd(regalloc.MapXRegister(op.rd), dword[rax]);
|
||||
else
|
||||
{
|
||||
mov(eax, dword[rax]);
|
||||
mov(rcx, (uintptr_t)op.rd.reg_ptr());
|
||||
mov(dword[rcx], eax);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid immediate size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not RAM: the returned pointer is a memory handler
|
||||
mov(call_regs[0], addr);
|
||||
|
||||
switch(size)
|
||||
{
|
||||
case 2:
|
||||
GenCall((void (*)())ptr);
|
||||
movsx(ecx, ax);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
GenCall((void (*)())ptr);
|
||||
mov(ecx, eax);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("Invalid immediate size");
|
||||
break;
|
||||
}
|
||||
host_reg_to_shil_param(op.rd, ecx);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GenReadMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
|
||||
{
|
||||
if (!mmu_enabled() || !vmem32_enabled())
|
||||
return false;
|
||||
const u8 *start_addr = getCurr();
|
||||
|
||||
mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc);
|
||||
mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0));
|
||||
|
||||
mov(rax, (uintptr_t)p_sh4rcb->cntx.vmem32_base);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
verify(getCurr() - start_addr == 26);
|
||||
|
||||
block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
movsx(ecx, byte[rax + call_regs64[0]]);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
movsx(ecx, word[rax + call_regs64[0]]);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
mov(ecx, dword[rax + call_regs64[0]]);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
mov(rcx, qword[rax + call_regs64[0]]);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
}
|
||||
|
||||
while (getCurr() - start_addr < read_mem_op_size)
|
||||
nop();
|
||||
verify(getCurr() - start_addr == read_mem_op_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GenWriteMemoryFast(const shil_opcode& op, RuntimeBlockInfo* block)
|
||||
{
|
||||
if (!mmu_enabled() || !vmem32_enabled())
|
||||
return false;
|
||||
const u8 *start_addr = getCurr();
|
||||
|
||||
mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc);
|
||||
mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0));
|
||||
|
||||
mov(rax, (uintptr_t)p_sh4rcb->cntx.vmem32_base);
|
||||
|
||||
u32 size = op.flags & 0x7f;
|
||||
verify(getCurr() - start_addr == 26);
|
||||
|
||||
block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
|
||||
switch (size)
|
||||
{
|
||||
case 1:
|
||||
mov(byte[rax + call_regs64[0] + 0], Xbyak::Reg8(call_regs[1].getIdx(), call_regs[1] == edi || call_regs[1] == esi));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
mov(word[rax + call_regs64[0]], Xbyak::Reg16(call_regs[1].getIdx()));
|
||||
break;
|
||||
|
||||
case 4:
|
||||
mov(dword[rax + call_regs64[0]], call_regs[1]);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
mov(qword[rax + call_regs64[0]], call_regs64[1]);
|
||||
break;
|
||||
|
||||
default:
|
||||
die("1..8 bytes");
|
||||
}
|
||||
|
||||
while (getCurr() - start_addr < write_mem_op_size)
|
||||
nop();
|
||||
verify(getCurr() - start_addr == write_mem_op_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CheckBlock(RuntimeBlockInfo* block) {
|
||||
mov(call_regs[0], block->addr);
|
||||
|
||||
// if (mmu_enabled() && block->asid != 0xFFFFFFFF)
|
||||
// {
|
||||
// mov(rax, (uintptr_t)&CCN_PTEH.reg_data);
|
||||
// cmp(byte[rax], block->asid);
|
||||
// jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
|
||||
// }
|
||||
// FIXME Neither of these tests should be necessary
|
||||
// FIXME This test shouldn't be necessary
|
||||
// However the decoder makes various assumptions about the current PC value, which are simply not
|
||||
// true in a virtualized memory model. So this can only work if virtual and phy addresses are the
|
||||
// same at compile and run times.
|
||||
|
@ -1424,10 +1542,10 @@ private:
|
|||
void GenCall(Ret(*function)(Params...))
|
||||
{
|
||||
#ifndef _WIN32
|
||||
bool xmm8_mapped = regalloc.IsMapped(xmm8, current_opid);
|
||||
bool xmm9_mapped = regalloc.IsMapped(xmm9, current_opid);
|
||||
bool xmm10_mapped = regalloc.IsMapped(xmm10, current_opid);
|
||||
bool xmm11_mapped = regalloc.IsMapped(xmm11, current_opid);
|
||||
bool xmm8_mapped = current_opid != -1 && regalloc.IsMapped(xmm8, current_opid);
|
||||
bool xmm9_mapped = current_opid != -1 && regalloc.IsMapped(xmm9, current_opid);
|
||||
bool xmm10_mapped = current_opid != -1 && regalloc.IsMapped(xmm10, current_opid);
|
||||
bool xmm11_mapped = current_opid != -1 && regalloc.IsMapped(xmm11, current_opid);
|
||||
|
||||
// Need to save xmm registers as they are not preserved in linux/mach
|
||||
int offset = 0;
|
||||
|
@ -1587,11 +1705,15 @@ private:
|
|||
static const u32 float_sign_mask;
|
||||
static const u32 float_abs_mask;
|
||||
static const f32 cvtf2i_pos_saturation;
|
||||
static const u32 read_mem_op_size;
|
||||
static const u32 write_mem_op_size;
|
||||
};
|
||||
|
||||
const u32 BlockCompiler::float_sign_mask = 0x80000000;
|
||||
const u32 BlockCompiler::float_abs_mask = 0x7fffffff;
|
||||
const f32 BlockCompiler::cvtf2i_pos_saturation = 2147483520.0f; // IEEE 754: 0x4effffff;
|
||||
const u32 BlockCompiler::read_mem_op_size = 30;
|
||||
const u32 BlockCompiler::write_mem_op_size = 30;
|
||||
|
||||
void X64RegAlloc::Preload(u32 reg, Xbyak::Operand::Code nreg)
|
||||
{
|
||||
|
@ -1641,4 +1763,47 @@ void ngen_CC_Call(shil_opcode* op, void* function)
|
|||
void ngen_CC_Finish(shil_opcode* op)
|
||||
{
|
||||
}
|
||||
|
||||
bool ngen_Rewrite(unat& host_pc, unat, unat)
|
||||
{
|
||||
if (!mmu_enabled() || !vmem32_enabled())
|
||||
return false;
|
||||
|
||||
//printf("ngen_Rewrite pc %p\n", host_pc);
|
||||
RuntimeBlockInfo *block = bm_GetBlock((void *)host_pc);
|
||||
if (block == NULL)
|
||||
{
|
||||
printf("ngen_Rewrite: Block at %p not found\n", (void *)host_pc);
|
||||
return false;
|
||||
}
|
||||
u8 *code_ptr = (u8*)host_pc;
|
||||
auto it = block->memory_accesses.find(code_ptr);
|
||||
if (it == block->memory_accesses.end())
|
||||
{
|
||||
printf("ngen_Rewrite: memory access at %p not found (%lu entries)\n", code_ptr, block->memory_accesses.size());
|
||||
return false;
|
||||
}
|
||||
u32 opid = it->second;
|
||||
verify(opid < block->oplist.size());
|
||||
const shil_opcode& op = block->oplist[opid];
|
||||
|
||||
BlockCompiler *assembler = new BlockCompiler(code_ptr - 26);
|
||||
assembler->InitializeRewrite(block, opid);
|
||||
if (op.op == shop_readm)
|
||||
assembler->GenReadMemorySlow(op, block);
|
||||
else
|
||||
assembler->GenWriteMemorySlow(op, block);
|
||||
assembler->FinalizeRewrite();
|
||||
verify(block->host_code_size >= assembler->getSize());
|
||||
delete assembler;
|
||||
block->memory_accesses.erase(it);
|
||||
host_pc = (unat)(code_ptr - 26);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void ngen_HandleException()
|
||||
{
|
||||
longjmp(jmp_env, 1);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#include "TexCache.h"
|
||||
#include "hw/pvr/pvr_regs.h"
|
||||
#include "hw/mem/_vmem.h"
|
||||
#include "hw/mem/vmem32.h"
|
||||
#include "hw/sh4/modules/mmu.h"
|
||||
#include "deps/xbrz/xbrz.h"
|
||||
#include "deps/xxhash/xxhash.h"
|
||||
|
||||
|
@ -213,6 +215,8 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user
|
|||
if (_nvmem_enabled() && VRAM_SIZE == 0x800000) {
|
||||
vram.LockRegion(block->start + VRAM_SIZE, block->len);
|
||||
}
|
||||
if (mmu_enabled())
|
||||
vmem32_protect_vram(block);
|
||||
|
||||
vramlock_list_add(block);
|
||||
|
||||
|
@ -222,11 +226,8 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user
|
|||
return block;
|
||||
}
|
||||
|
||||
|
||||
bool VramLockedWrite(u8* address)
|
||||
bool VramLockedWriteOffset(size_t offset)
|
||||
{
|
||||
size_t offset=address-vram.data;
|
||||
|
||||
if (offset<VRAM_SIZE)
|
||||
{
|
||||
|
||||
|
@ -268,6 +269,16 @@ bool VramLockedWrite(u8* address)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool VramLockedWrite(u8* address)
|
||||
{
|
||||
size_t offset=address-vram.data;
|
||||
|
||||
if (offset < 0x01000000)
|
||||
return VramLockedWriteOffset(offset & (VRAM_SIZE - 1));
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
//unlocks mem
|
||||
//also frees the handle
|
||||
void libCore_vramlock_Unlock_block(vram_block* block)
|
||||
|
@ -284,6 +295,8 @@ void libCore_vramlock_Unlock_block_wb(vram_block* block)
|
|||
msgboxf("Error : block end is after vram , skipping unlock",MBX_OK);
|
||||
else
|
||||
{
|
||||
if (mmu_enabled())
|
||||
vmem32_unprotect_vram(block);
|
||||
vramlock_list_remove(block);
|
||||
//more work needed
|
||||
free(block);
|
||||
|
|
|
@ -613,19 +613,5 @@ template void texture_VQ<convBMP_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,
|
|||
#define tex1555_VQ32 texture_VQ<conv1555_TW32<pp_8888>, u32>
|
||||
#define tex4444_VQ32 texture_VQ<conv4444_TW32<pp_8888>, u32>
|
||||
|
||||
#define Is_64_Bit(addr) ((addr &0x1000000)==0)
|
||||
|
||||
//vram_block, vramLockCBFP on plugin headers
|
||||
|
||||
|
||||
u32 vramlock_ConvAddrtoOffset64(u32 Address);
|
||||
u32 vramlock_ConvOffset32toOffset64(u32 offset32);
|
||||
|
||||
void vramlock_Unlock_block(vram_block* block);
|
||||
vram_block* vramlock_Lock_32(u32 start_offset32,u32 end_offset32,void* userdata);
|
||||
vram_block* vramlock_Lock_64(u32 start_offset64,u32 end_offset64,void* userdata);
|
||||
|
||||
void vram_LockedWrite(u32 offset64);
|
||||
|
||||
void DePosterize(u32* source, u32* dest, int width, int height);
|
||||
void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha);
|
||||
|
|
|
@ -736,6 +736,7 @@ struct settings_t
|
|||
bool unstable_opt;
|
||||
bool safemode;
|
||||
bool disable_nvmem;
|
||||
bool disable_vmem32;
|
||||
} dynarec;
|
||||
|
||||
struct
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "oslib\oslib.h"
|
||||
#include "oslib\audiostream.h"
|
||||
#include "imgread\common.h"
|
||||
#include "hw\mem\vmem32.h"
|
||||
#include "xinput_gamepad.h"
|
||||
#include "win_keyboard.h"
|
||||
|
||||
|
@ -141,6 +142,11 @@ LONG ExeptionHandler(EXCEPTION_POINTERS *ExceptionInfo)
|
|||
u8* address=(u8*)pExceptionRecord->ExceptionInformation[1];
|
||||
|
||||
//printf("[EXC] During access to : 0x%X\n", address);
|
||||
#if !defined(NO_MMU) && defined(HOST_64BIT_CPU)
|
||||
bool write = false; // TODO?
|
||||
if (vmem32_handle_signal(ep->ContextRecord->Rcx, write))
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
#endif
|
||||
|
||||
if (VramLockedWrite(address))
|
||||
{
|
||||
|
@ -152,7 +158,8 @@ LONG ExeptionHandler(EXCEPTION_POINTERS *ExceptionInfo)
|
|||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
#endif
|
||||
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X86
|
||||
#if FEAT_SHREC == DYNAREC_JIT
|
||||
#if HOST_CPU == CPU_X86
|
||||
else if ( ngen_Rewrite((unat&)ep->ContextRecord->Eip,*(unat*)ep->ContextRecord->Esp,ep->ContextRecord->Eax) )
|
||||
{
|
||||
//remove the call from call stack
|
||||
|
@ -161,6 +168,11 @@ LONG ExeptionHandler(EXCEPTION_POINTERS *ExceptionInfo)
|
|||
ep->ContextRecord->Ecx=ep->ContextRecord->Eax;
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
#elif HOST_CPU == CPU_X64
|
||||
else if (dyna_cde && ngen_Rewrite((unat&)ep->ContextRecord->Rip, 0, 0))
|
||||
{
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
|
@ -576,7 +588,7 @@ _In_opt_ PVOID Context
|
|||
// (DWORD)((u8 *)__gnat_SEH_error_handler - CodeCache);
|
||||
/* Set its scope to the entire program. */
|
||||
Table[0].BeginAddress = 0;// (CodeCache - (u8*)__ImageBase);
|
||||
Table[0].EndAddress = /*(CodeCache - (u8*)__ImageBase) +*/ CODE_SIZE;
|
||||
Table[0].EndAddress = /*(CodeCache - (u8*)__ImageBase) +*/ CODE_SIZE + TEMP_CODE_SIZE;
|
||||
Table[0].UnwindData = (DWORD)((u8 *)unwind_info - CodeCache);
|
||||
printf("TABLE CALLBACK\n");
|
||||
//for (;;);
|
||||
|
@ -605,13 +617,13 @@ void setup_seh() {
|
|||
//(DWORD)((u8 *)__gnat_SEH_error_handler - CodeCache);
|
||||
/* Set its scope to the entire program. */
|
||||
Table[0].BeginAddress = 0;// (CodeCache - (u8*)__ImageBase);
|
||||
Table[0].EndAddress = /*(CodeCache - (u8*)__ImageBase) +*/ CODE_SIZE;
|
||||
Table[0].EndAddress = /*(CodeCache - (u8*)__ImageBase) +*/ CODE_SIZE + TEMP_CODE_SIZE;
|
||||
Table[0].UnwindData = (DWORD)((u8 *)unwind_info - CodeCache);
|
||||
/* Register the unwind information. */
|
||||
RtlAddFunctionTable(Table, 1, (DWORD64)CodeCache);
|
||||
#endif
|
||||
|
||||
//verify(RtlInstallFunctionTableCallback((unat)CodeCache | 0x3, (DWORD64)CodeCache, CODE_SIZE, seh_callback, 0, 0));
|
||||
//verify(RtlInstallFunctionTableCallback((unat)CodeCache | 0x3, (DWORD64)CodeCache, CODE_SIZE + TEMP_CODE_SIZE, seh_callback, 0, 0));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue