wince: unify _vmem and vmem32. Use 4GB virtual space on 64-bit arch

On 64-bit architectures, _vmem first tries to allocate 4GB then falls
back to 512 MB.
The same virtual space is now used by _vmem and vmem32 (mmu)
This commit is contained in:
Flyinghead 2019-05-23 11:40:33 +02:00
parent bc42f8a785
commit e3d95f2258
16 changed files with 246 additions and 212 deletions

View File

@ -1,4 +1,5 @@
#include "_vmem.h"
#include "vmem32.h"
#include "hw/aica/aica_if.h"
#include "hw/sh4/dyna/blockmanager.h"
@ -399,6 +400,7 @@ void _vmem_term() {}
#include "hw/sh4/sh4_mem.h"
u8* virt_ram_base;
bool vmem_4gb_space;
void* malloc_pages(size_t size) {
#if HOST_OS == OS_WINDOWS
@ -446,6 +448,26 @@ bool BM_LockedWrite(u8* address) {
return false;
}
static void _vmem_set_p0_mappings()
{
const vmem_mapping mem_mappings[] = {
// P0/U0
{0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused
{0x00800000, 0x00800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica
{0x00800000 + ARAM_SIZE, 0x02800000, 0, 0, false}, // unused
{0x02800000, 0x02800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica mirror
{0x02800000 + ARAM_SIZE, 0x04000000, 0, 0, false}, // unused
{0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB)
{0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused)
{0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror
{0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror
{0x08000000, 0x0C000000, 0, 0, false}, // Area 2
{0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors)
{0x10000000, 0x80000000, 0, 0, false}, // Area 4-7 (unused)
};
vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings));
}
bool _vmem_reserve() {
// TODO: Static assert?
verify((sizeof(Sh4RCB)%PAGE_SIZE)==0);
@ -480,33 +502,85 @@ bool _vmem_reserve() {
printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB");
printf("Info: p_sh4rcb: %p virt_ram_base: %p\n", p_sh4rcb, virt_ram_base);
// Map the different parts of the memory file into the new memory range we got.
#define MAP_RAM_START_OFFSET 0
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)
#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE)
const vmem_mapping mem_mappings[] = {
{0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused
{0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica, wraps too
{0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},
{0x01000000, 0x04000000, 0, 0, false}, // More unused
{0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB)
{0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused)
{0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror
{0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror
{0x08000000, 0x0C000000, 0, 0, false}, // Area 2
{0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors)
{0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused)
};
vmem_platform_create_mappings(&mem_mappings[0], sizeof(mem_mappings) / sizeof(mem_mappings[0]));
if (vmemstatus == MemType512MB)
{
const vmem_mapping mem_mappings[] = {
{0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused
{0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica
{0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},
{0x01000000, 0x04000000, 0, 0, false}, // More unused
{0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB)
{0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused)
{0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror
{0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror
{0x08000000, 0x0C000000, 0, 0, false}, // Area 2
{0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors)
{0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused)
};
vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings));
// Point buffers to actual data pointers
aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writable AICA addrspace
vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writable and lockable)
mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror
}
else
{
_vmem_set_p0_mappings();
const vmem_mapping mem_mappings[] = {
// P1
{0x80000000, 0x80800000, 0, 0, false}, // Area 0 -> unused
{0x80800000, 0x80800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica
{0x80800000 + ARAM_SIZE, 0x82800000, 0, 0, false}, // unused
{0x82800000, 0x82800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica mirror
{0x82800000 + ARAM_SIZE, 0x84000000, 0, 0, false}, // unused
{0x84000000, 0x85000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB)
{0x85000000, 0x86000000, 0, 0, false}, // 32 bit path (unused)
{0x86000000, 0x87000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror
{0x87000000, 0x88000000, 0, 0, false}, // 32 bit path (unused) mirror
{0x88000000, 0x8C000000, 0, 0, false}, // Area 2
{0x8C000000, 0x90000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors)
{0x90000000, 0xA0000000, 0, 0, false}, // Area 4-7 (unused)
// P2
{0xA0000000, 0xA0800000, 0, 0, false}, // Area 0 -> unused
{0xA0800000, 0xA0800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica
{0xA0800000 + ARAM_SIZE, 0xA2800000, 0, 0, false}, // unused
{0xA2800000, 0xA2800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica mirror
{0xA2800000 + ARAM_SIZE, 0xA4000000, 0, 0, false}, // unused
{0xA4000000, 0xA5000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB)
{0xA5000000, 0xA6000000, 0, 0, false}, // 32 bit path (unused)
{0xA6000000, 0xA7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror
{0xA7000000, 0xA8000000, 0, 0, false}, // 32 bit path (unused) mirror
{0xA8000000, 0xAC000000, 0, 0, false}, // Area 2
{0xAC000000, 0xB0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors)
{0xB0000000, 0xC0000000, 0, 0, false}, // Area 4-7 (unused)
// P3
{0xC0000000, 0xC0800000, 0, 0, false}, // Area 0 -> unused
{0xC0800000, 0xC0800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica
{0xC0800000 + ARAM_SIZE, 0xC2800000, 0, 0, false}, // unused
{0xC2800000, 0xC2800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica mirror
{0xC2800000 + ARAM_SIZE, 0xC4000000, 0, 0, false}, // unused
{0xC4000000, 0xC5000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB)
{0xC5000000, 0xC6000000, 0, 0, false}, // 32 bit path (unused)
{0xC6000000, 0xC7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror
{0xC7000000, 0xC8000000, 0, 0, false}, // 32 bit path (unused) mirror
{0xC8000000, 0xCC000000, 0, 0, false}, // Area 2
{0xCC000000, 0xD0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors)
{0xD0000000, 0x100000000L, 0, 0, false}, // Area 4-7 (unused)
};
vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings));
// Point buffers to actual data pointers
aica_ram.data = &virt_ram_base[0x80800000]; // Points to the first AICA addrspace in P1
vram.data = &virt_ram_base[0x84000000]; // Points to first vram mirror (writable and lockable) in P1
mem_b.data = &virt_ram_base[0x8C000000]; // Main memory, first mirror in P1
vmem_4gb_space = true;
}
// Point buffers to actual data pointers
aica_ram.size = ARAM_SIZE;
aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writtable AICA addrspace
vram.size = VRAM_SIZE;
vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writtable and lockable)
mem_b.size = RAM_SIZE;
mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror
}
// Clear out memory
@ -531,3 +605,12 @@ void _vmem_release() {
}
}
void _vmem_disable_mmu()
{
if (vmem32_enabled())
{
// Restore P0/U0 mem mappings
vmem32_flush_mmu();
_vmem_set_p0_mappings();
}
}

View File

@ -8,8 +8,8 @@ enum VMemType {
};
struct vmem_mapping {
u32 start_address, end_address;
unsigned memoffset, memsize;
u64 start_address, end_address;
u64 memoffset, memsize;
bool allow_writes;
};
@ -102,9 +102,17 @@ void* _vmem_get_ptr2(u32 addr,u32& mask);
void* _vmem_read_const(u32 addr,bool& ismem,u32 sz);
extern u8* virt_ram_base;
extern bool vmem_4gb_space;
static inline bool _nvmem_enabled() {
return virt_ram_base != 0;
}
static inline bool _nvmem_4gb_space() {
return vmem_4gb_space;
}
void _vmem_bm_reset();
void _vmem_disable_mmu();
#define MAP_RAM_START_OFFSET 0
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)
#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE)

View File

@ -41,24 +41,18 @@ extern int vmem_fd;
#define VMEM32_ERROR_NOT_MAPPED 0x100
// FIXME stolen from _vmem.cpp
#define MAP_RAM_START_OFFSET 0
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)
#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE)
static const u64 VMEM32_SIZE = 0x100000000L;
static const u64 KERNEL_SPACE = 0x80000000L;
static const u64 AREA7_ADDRESS = 0x7C000000L;
#define VRAM_PROT_SEGMENT (1024 * 1024) // vram protection regions are grouped by 1MB segment
u8* vmem32_base;
std::unordered_set<u32> vram_mapped_pages;
std::vector<vram_block*> vram_blocks[VRAM_SIZE / VRAM_PROT_SEGMENT];
static std::unordered_set<u32> vram_mapped_pages;
static std::vector<vram_block*> vram_blocks[VRAM_SIZE / VRAM_PROT_SEGMENT];
// stats
u64 vmem32_page_faults;
u64 vmem32_flush;
//u64 vmem32_page_faults;
//u64 vmem32_flush;
static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool write)
{
@ -68,20 +62,20 @@ static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool w
//printf("MAP32 %08X w/ %d\n",dst,offset);
u32 map_times = addrsz / size;
#if HOST_OS == OS_WINDOWS
rv = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &vmem32_base[dst]);
rv = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &virt_ram_base[dst]);
if (rv == NULL)
return NULL;
for (u32 i = 1; i < map_times; i++)
{
dst += size;
ptr = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &vmem32_base[dst]);
ptr = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &virt_ram_base[dst]);
if (ptr == NULL)
return NULL;
}
#else
u32 prot = PROT_READ | (write ? PROT_WRITE : 0);
rv = mmap(&vmem32_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
rv = mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
if (MAP_FAILED == rv)
{
printf("MAP1 failed %d\n", errno);
@ -91,7 +85,7 @@ static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool w
for (u32 i = 1; i < map_times; i++)
{
dst += size;
ptr = mmap(&vmem32_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
ptr = mmap(&virt_ram_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
if (MAP_FAILED == ptr)
{
printf("MAP2 failed %d\n", errno);
@ -105,9 +99,9 @@ static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool w
static void vmem32_unmap_buffer(u32 start, u64 end)
{
#if HOST_OS == OS_WINDOWS
UnmapViewOfFile(&vmem32_base[start]);
UnmapViewOfFile(&virt_ram_base[start]);
#else
mmap(&vmem32_base[start], end - start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
mmap(&virt_ram_base[start], end - start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
#endif
}
@ -116,9 +110,9 @@ static void vmem32_protect_buffer(u32 start, u32 size)
verify((start & PAGE_MASK) == 0);
#if HOST_OS == OS_WINDOWS
DWORD old;
VirtualProtect(vmem32_base + start, size, PAGE_READONLY, &old);
VirtualProtect(virt_ram_base + start, size, PAGE_READONLY, &old);
#else
mprotect(&vmem32_base[start], size, PROT_READ);
mprotect(&virt_ram_base[start], size, PROT_READ);
#endif
}
@ -127,15 +121,15 @@ static void vmem32_unprotect_buffer(u32 start, u32 size)
verify((start & PAGE_MASK) == 0);
#if HOST_OS == OS_WINDOWS
DWORD old;
VirtualProtect(vmem32_base + start, size, PAGE_READWRITE, &old);
VirtualProtect(virt_ram_base + start, size, PAGE_READWRITE, &old);
#else
mprotect(&vmem32_base[start], size, PROT_READ | PROT_WRITE);
mprotect(&virt_ram_base[start], size, PROT_READ | PROT_WRITE);
#endif
}
void vmem32_protect_vram(vram_block *block)
{
if (vmem32_base == NULL)
if (virt_ram_base == NULL)
return;
for (int i = block->start / VRAM_PROT_SEGMENT; i <= block->end / VRAM_PROT_SEGMENT; i++)
{
@ -144,7 +138,7 @@ void vmem32_protect_vram(vram_block *block)
}
void vmem32_unprotect_vram(vram_block *block)
{
if (vmem32_base == NULL)
if (virt_ram_base == NULL)
return;
for (int page = block->start / VRAM_PROT_SEGMENT; page <= block->end / VRAM_PROT_SEGMENT; page++)
{
@ -157,28 +151,6 @@ void vmem32_unprotect_vram(vram_block *block)
}
}
static bool vmem32_map_areas()
{
// Aica ram
vmem32_map_buffer(0x80800000, 0x00800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); // P1
vmem32_map_buffer(0x82800000, ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true);
vmem32_map_buffer(0xA0800000, 0x00800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); // P2
vmem32_map_buffer(0xA2800000, ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true);
// Vram
// Note: this should be mapped read/write but doesn't seem to be used
vmem32_map_buffer(0x84000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); // P1
vmem32_map_buffer(0x86000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false);
vmem32_map_buffer(0xA4000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); // P2
vmem32_map_buffer(0xA6000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false);
// System ram
vmem32_map_buffer(0x8C000000, 0x04000000, MAP_RAM_START_OFFSET, RAM_SIZE, true); // P1
vmem32_map_buffer(0xAC000000, 0x04000000, MAP_RAM_START_OFFSET, RAM_SIZE, true); // P2
return true;
}
static const u32 page_sizes[] = { 1024, 4 * 1024, 64 * 1024, 1024 * 1024 };
static u32 vmem32_paddr_to_offset(u32 address)
@ -322,10 +294,10 @@ static u32 vmem32_map_address(u32 address, bool write)
#if !defined(NO_MMU) && defined(HOST_64BIT_CPU)
bool vmem32_handle_signal(void *fault_addr, bool write)
{
if ((u8*)fault_addr < vmem32_base || (u8*)fault_addr >= vmem32_base + VMEM32_SIZE)
if ((u8*)fault_addr < virt_ram_base || (u8*)fault_addr >= virt_ram_base + VMEM32_SIZE)
return false;
vmem32_page_faults++;
u32 guest_addr = (u8*)fault_addr - vmem32_base;
//vmem32_page_faults++;
u32 guest_addr = (u8*)fault_addr - virt_ram_base;
u32 rv = vmem32_map_address(guest_addr, write);
//printf("vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d\n", write ? "W" : "R", fault_addr, guest_addr, rv);
if (rv == MMU_ERROR_NONE)
@ -342,70 +314,17 @@ bool vmem32_handle_signal(void *fault_addr, bool write)
void vmem32_flush_mmu()
{
vmem32_flush++;
//vmem32_flush++;
vram_mapped_pages.clear();
vmem32_unmap_buffer(0, KERNEL_SPACE);
// TODO flush P3?
}
bool vmem32_init()
{
if (!_nvmem_enabled())
return false;
#if HOST_OS == OS_WINDOWS
// disabled on windows for now
return true;
#endif
#ifdef HOST_64BIT_CPU
#if HOST_OS == OS_WINDOWS
void* rv = (u8 *)VirtualAlloc(0, VMEM32_SIZE, MEM_RESERVE, PAGE_NOACCESS);
if (rv != NULL)
VirtualFree(rv, 0, MEM_RELEASE);
vmem32_base = (u8*)rv;
#else
void* rv = mmap(0, VMEM32_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
verify(rv != NULL);
munmap(rv, VMEM32_SIZE);
vmem32_base = (u8*)rv;
#endif
vmem32_unmap_buffer(0, VMEM32_SIZE);
printf("vmem32_init: allocated %zx bytes from %p to %p\n", VMEM32_SIZE, vmem32_base, vmem32_base + VMEM32_SIZE);
if (!vmem32_map_areas())
{
vmem32_term();
return false;
}
#endif
return true;
}
void vmem32_term()
{
if (vmem32_base != NULL)
if (virt_ram_base != NULL)
{
#if HOST_OS == OS_WINDOWS
vmem32_flush_mmu();
// Aica ram
vmem32_unmap_buffer(0x80800000, 0x80800000 + 0x00800000); // P1
vmem32_unmap_buffer(0x82800000, 0x82800000 + ARAM_SIZE);
vmem32_unmap_buffer(0xA0800000, 0xA0800000 + 0x00800000); // P2
vmem32_unmap_buffer(0xA2800000, 0xA2800000 + ARAM_SIZE);
// Vram
vmem32_unmap_buffer(0x84000000, 0x84000000 + 0x01000000); // P1
vmem32_unmap_buffer(0x86000000, 0x86000000 + 0x01000000);
vmem32_unmap_buffer(0xA4000000, 0xA4000000 + 0x01000000); // P2
vmem32_unmap_buffer(0xA6000000, 0xA6000000 + 0x01000000);
// System ram
vmem32_unmap_buffer(0x8C000000, 0x8C000000 + 0x04000000); // P1
vmem32_unmap_buffer(0xAC000000, 0xAC000000 + 0x04000000); // P2
#else
munmap(vmem32_base, VMEM32_SIZE);
#endif
vmem32_base = NULL;
}
}

View File

@ -1,6 +1,5 @@
#include "types.h"
#include "_vmem.h"
bool vmem32_init();
void vmem32_term();
bool vmem32_handle_signal(void *fault_addr, bool write);
void vmem32_flush_mmu();
@ -10,6 +9,6 @@ static inline bool vmem32_enabled() {
#if HOST_OS == OS_WINDOWS
return false;
#else
return !settings.dynarec.disable_vmem32;
return !settings.dynarec.disable_vmem32 && _nvmem_4gb_space();
#endif
}

View File

@ -297,7 +297,6 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb)
if (SB_LMMODE0 == 0)
{
// 64b path
u8* vram=sqb+512+0x04000000;
MemWrite32(&vram[address_w&(VRAM_MASK-0x1F)],sq);
}
else

View File

@ -80,7 +80,7 @@ void clear_temp_cache(bool full)
void recSh4_ClearCache()
{
printf("recSh4:Dynarec Cache clear at %08X free space %d\n",curr_pc, emit_FreeSpace());
printf("recSh4:Dynarec Cache clear at %08X free space %d\n", curr_pc, emit_FreeSpace());
LastAddr=LastAddr_min;
bm_Reset();
smc_hotspots.clear();
@ -130,7 +130,7 @@ void emit_Skip(u32 sz)
if (emit_ptr)
emit_ptr = (u32*)((u8*)emit_ptr + sz);
else
LastAddr+=sz;
LastAddr += sz;
}
u32 emit_FreeSpace()
@ -138,7 +138,7 @@ u32 emit_FreeSpace()
if (emit_ptr)
return (emit_ptr_limit - emit_ptr) * sizeof(u32);
else
return CODE_SIZE-LastAddr;
return CODE_SIZE - LastAddr;
}
// pc must be a physical address
@ -262,7 +262,7 @@ bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
oplist.clear();
if (!dec_DecodeBlock(this,SH4_TIMESLICE/2))
if (!dec_DecodeBlock(this, SH4_TIMESLICE / 2))
return false;
AnalyseBlock(this);
@ -293,7 +293,7 @@ DynarecCodeEntryPtr rdv_CompilePC(u32 blockcheck_failures)
emit_ptr_limit = (u32 *)(TempCodeCache + TEMP_CODE_SIZE);
rbi->temp_block = true;
}
bool do_opts = !rbi->temp_block; //((rbi->addr&0x3FFFFFFF)>0x0C010100);
bool do_opts = !rbi->temp_block;
rbi->staging_runs=do_opts?100:-100;
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
verify(rbi->code!=0);
@ -505,7 +505,14 @@ void recSh4_Init()
verify(rcb_noffs(&p_sh4rcb->cntx.interrupt_pend) == -148);
if (_nvmem_enabled()) {
verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000));
if (!_nvmem_4gb_space())
{
verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000));
}
else
{
verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x8C000000));
}
}
// Prepare some pointer to the pre-allocated code cache:

View File

@ -124,8 +124,6 @@ void Sh4_int_Skip()
}
}
extern u8 *vmem32_base;
void Sh4_int_Reset(bool Manual)
{
if (sh4_int_bCpuRun)
@ -150,8 +148,6 @@ void Sh4_int_Reset(bool Manual)
old_fpscr=fpscr;
UpdateFPSCR();
p_sh4rcb->cntx.vmem32_base = vmem32_base;
//Any more registers have default value ?
printf("Sh4 Reset\n");
}

View File

@ -681,6 +681,7 @@ void mmu_set_state()
WriteMem16 = &_vmem_WriteMem16;
WriteMem32 = &_vmem_WriteMem32;
WriteMem64 = &_vmem_WriteMem64;
_vmem_disable_mmu();
}
}

View File

@ -142,7 +142,6 @@ void SetFloatStatusReg()
//called when fpscr is changed and we must check for reg banks etc..
void UpdateFPSCR()
{
verify(fpscr.PR == 0 || fpscr.SZ == 0);
if (fpscr.FR !=old_fpscr.FR)
ChangeFP(); // FPU bank change

View File

@ -284,7 +284,6 @@ struct Sh4Context
u32 interrupt_pend;
u32 exception_pc;
u8 *vmem32_base;
};
u64 raw[64-8];
};

View File

@ -60,9 +60,6 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx)
bool dyna_cde = ((unat)CC_RX2RW(ctx.pc) > (unat)CodeCache) && ((unat)CC_RX2RW(ctx.pc) < (unat)(CodeCache + CODE_SIZE + TEMP_CODE_SIZE));
//ucontext_t* ctx=(ucontext_t*)ctxr;
//printf("mprot hit @ ptr 0x%08X @@ code: %08X, %d\n",si->si_addr,ctx->uc_mcontext.arm_pc,dyna_cde);
#if !defined(NO_MMU) && defined(HOST_64BIT_CPU) && HOST_OS != OS_WINDOWS
#if HOST_CPU == CPU_ARM64
u32 op = *(u32*)ctx.pc;

View File

@ -156,11 +156,22 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) {
return MemTypeError;
// Now try to allocate a contiguous piece of memory.
unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000;
void *first_ptr = mem_region_reserve(NULL, memsize);
if (!first_ptr) {
close(vmem_fd);
return MemTypeError;
void *first_ptr = NULL;
VMemType rv;
#ifdef HOST_64BIT_CPU
size_t bigsize = 0x100000000L + sizeof(Sh4RCB) + 0x10000; // 4GB + context size + 64K padding
first_ptr = mem_region_reserve(NULL, bigsize);
rv = MemType4GB;
#endif
if (first_ptr == NULL)
{
unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000;
first_ptr = mem_region_reserve(NULL, memsize);
if (!first_ptr) {
close(vmem_fd);
return MemTypeError;
}
rv = MemType512MB;
}
// Align pointer to 64KB too, some Linaro bug (no idea but let's just be safe I guess).
@ -173,12 +184,15 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) {
// Now map the memory for the SH4 context, do not include FPCB on purpose (paged on demand).
mem_region_unlock(sh4rcb_base_ptr, sizeof(Sh4RCB) - FPCB_SIZE);
return MemType512MB;
return rv;
}
// Just tries to wipe as much as possible in the relevant area.
void vmem_platform_destroy() {
mem_region_release(virt_ram_base, 0x20000000);
if (vmem_4gb_space)
mem_region_release(virt_ram_base, 0x100000000);
else
mem_region_release(virt_ram_base, 0x20000000);
}
// Resets a chunk of memory by deleting its data and setting its protection back.
@ -207,12 +221,12 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
continue;
// Calculate the number of mirrors
unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address;
u64 address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address;
unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize;
verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1);
for (unsigned j = 0; j < num_mirrors; j++) {
unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize;
u64 offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize;
verify(mem_region_unmap_file(&virt_ram_base[offset], vmem_maps[i].memsize));
verify(mem_region_map_file((void*)(uintptr_t)vmem_fd, &virt_ram_base[offset],
vmem_maps[i].memsize, vmem_maps[i].memoffset, vmem_maps[i].allow_writes) != NULL);

View File

@ -6,7 +6,6 @@
#include "oslib/oslib.h"
#include "oslib/audiostream.h"
#include "hw/mem/_vmem.h"
#include "hw/mem/vmem32.h"
#include "stdclass.h"
#include "cfg/cfg.h"
@ -298,13 +297,6 @@ int reicast_init(int argc, char* argv[])
printf("Failed to alloc mem\n");
return -1;
}
#ifdef HOST_64BIT_CPU
if (!vmem32_init())
{
printf("Failed to alloc 32-bit mem space\n");
return -1;
}
#endif
if (ParseCommandLine(argc, argv))
{
return 69;

View File

@ -153,8 +153,6 @@ void ngen_mainloop(void* v_cntx)
"ldr x28, [sp] \n\t" // Set context
// w29 is next_pc
"ldr w29, [x28, %[pc]] \n\t"
// x27 is vmem32_base
"ldr x27, [x28, %[vmem32_base]] \n\t"
"b no_update \n"
".hidden intc_sched \n\t"
@ -220,8 +218,7 @@ void ngen_mainloop(void* v_cntx)
[RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16),
[SH4CTX_SIZE] "i" (sizeof(Sh4Context)),
[jmp_env] "r"(reinterpret_cast<uintptr_t>(jmp_env)),
[cycle_counter] "r"(reinterpret_cast<uintptr_t>(&cycle_counter)),
[vmem32_base] "i"(offsetof(Sh4Context, vmem32_base))
[cycle_counter] "r"(reinterpret_cast<uintptr_t>(&cycle_counter))
: "memory"
);
}
@ -1419,26 +1416,20 @@ private:
Instruction *start_instruction = GetCursorAddress<Instruction *>();
const XRegister* base_reg;
const XRegister* offset_reg;
// WARNING: the rewrite code relies on having two ops before the memory access (3 when mmu is enabled)
// WARNING: the rewrite code relies on having 1-2 ops before the memory access (4 when mmu is enabled)
// Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code
if (!mmu_enabled())
Add(x1, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
if (!_nvmem_4gb_space())
{
Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
Bfc(w1, 29, 3); // addr &= ~0xE0000000
base_reg = &x28;
offset_reg = &x1;
}
else
else if (mmu_enabled())
{
u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0);
// 3 ops before memory access
Mov(w8, exception_pc & 0xFFFF);
Movk(w8, exception_pc >> 16, 16);
Str(w8, sh4_context_mem_operand(&p_sh4rcb->cntx.exception_pc));
base_reg = &x27;
offset_reg = call_regs64[0];
}
//printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
@ -1450,22 +1441,22 @@ private:
switch(size)
{
case 1:
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg));
Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
break;
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg));
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
break;
case 4:
if (!op.rd.is_r32f())
Ldr(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg));
Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1));
else
Ldr(regalloc.MapVRegister(op.rd), MemOperand(*base_reg, *offset_reg));
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1));
break;
case 8:
Ldr(x1, MemOperand(*base_reg, *offset_reg));
Ldr(x1, MemOperand(x28, x1));
break;
}
@ -1486,19 +1477,19 @@ private:
switch(size)
{
case 1:
Ldrsb(w1, MemOperand(*base_reg, *offset_reg));
Ldrsb(w1, MemOperand(x28, x1));
break;
case 2:
Ldrsh(w1, MemOperand(*base_reg, *offset_reg));
Ldrsh(w1, MemOperand(x28, x1));
break;
case 4:
Ldr(w1, MemOperand(*base_reg, *offset_reg));
Ldr(w1, MemOperand(x28, x1));
break;
case 8:
Ldr(x1, MemOperand(*base_reg, *offset_reg));
Ldr(x1, MemOperand(x28, x1));
break;
}
if (size == 8)
@ -1544,25 +1535,19 @@ private:
Instruction *start_instruction = GetCursorAddress<Instruction *>();
const XRegister* base_reg;
const XRegister* offset_reg;
// WARNING: the rewrite code relies on having two ops before the memory access (3 when mmu is enabled)
// WARNING: the rewrite code relies on having 1-2 ops before the memory access (4 when mmu is enabled)
// Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code
if (!mmu_enabled())
Add(x7, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
if (!_nvmem_4gb_space())
{
Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags);
Bfc(w7, 29, 3); // addr &= ~0xE0000000
base_reg = &x28;
offset_reg = &x7;
}
else
else if (mmu_enabled())
{
u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0);
Mov(w8, exception_pc & 0xFFFF);
Movk(w8, exception_pc >> 16, 16);
Str(w8, sh4_context_mem_operand(&p_sh4rcb->cntx.exception_pc));
base_reg = &x27;
offset_reg = call_regs64[0];
}
//printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress<void *>(), this->block->addr);
@ -1572,19 +1557,19 @@ private:
switch(size)
{
case 1:
Strb(w1, MemOperand(*base_reg, *offset_reg));
Strb(w1, MemOperand(x28, x7));
break;
case 2:
Strh(w1, MemOperand(*base_reg, *offset_reg));
Strh(w1, MemOperand(x28, x7));
break;
case 4:
Str(w1, MemOperand(*base_reg, *offset_reg));
Str(w1, MemOperand(x28, x7));
break;
case 8:
Str(x1, MemOperand(*base_reg, *offset_reg));
Str(x1, MemOperand(x28, x7));
break;
}
EnsureCodeSize(start_instruction, write_memory_rewrite_size);
@ -1767,7 +1752,7 @@ private:
RuntimeBlockInfo* block = NULL;
const int read_memory_rewrite_size = 6; // worst case for u64: add, bfc, ldr, fmov, lsr, fmov
// FIXME rewrite size per read/write size?
const int write_memory_rewrite_size = 4; // TODO only 3 if !mmu
const int write_memory_rewrite_size = 5; // TODO only 2 if !mmu & 4gb
};
static Arm64Assembler* compiler;
@ -1824,7 +1809,9 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
u32 opid = it->second;
verify(opid < block->oplist.size());
const shil_opcode& op = block->oplist[opid];
Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2 - (mmu_enabled() ? 1 : 0)); // Skip the 2 preceding ops (bic, add)
// Skip the preceding ops (add, bic, ...)
u32 *code_rewrite = code_ptr - 1 - (!_nvmem_4gb_space() ? 1 : 0) - (mmu_enabled() ? 3 : 0);
Arm64Assembler *assembler = new Arm64Assembler(code_rewrite);
assembler->InitializeRewrite(block, opid);
if (op.op == shop_readm)
assembler->GenReadMemorySlow(op);
@ -1832,7 +1819,7 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
assembler->GenWriteMemorySlow(op);
assembler->Finalize(true);
delete assembler;
host_pc = (unat)CC_RW2RX(code_ptr - 2 - (mmu_enabled() ? 1 : 0));
host_pc = (unat)CC_RW2RX(code_rewrite);
return true;
}

View File

@ -1402,7 +1402,7 @@ private:
mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc);
mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0));
mov(rax, (uintptr_t)p_sh4rcb->cntx.vmem32_base);
mov(rax, (uintptr_t)virt_ram_base);
u32 size = op.flags & 0x7f;
//verify(getCurr() - start_addr == 26);
@ -1450,7 +1450,7 @@ private:
mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc);
mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0));
mov(rax, (uintptr_t)p_sh4rcb->cntx.vmem32_base);
mov(rax, (uintptr_t)virt_ram_base);
u32 size = op.flags & 0x7f;
//verify(getCurr() - start_addr == 26);

View File

@ -214,8 +214,27 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user
if (_nvmem_enabled() && VRAM_SIZE == 0x800000) {
vram.LockRegion(block->start + VRAM_SIZE, block->len);
}
if (mmu_enabled())
if (!mmu_enabled())
{
if (_nvmem_4gb_space())
{
// In 4GB mode, vram.LockRegion() locks in the P1 area only so we also need to lock P0
// We should also lock P2 and P3 but they don't seem to be used...
mem_region_lock(virt_ram_base + 0x04000000 + block->start, block->len);
//mem_region_lock(virt_ram_base + 0xA4000000 + block->start, block->len);
//mem_region_lock(virt_ram_base + 0xC4000000 + block->start, block->len);
if (VRAM_SIZE == 0x800000)
{
mem_region_lock(virt_ram_base + 0x04000000 + block->start + VRAM_SIZE, block->len);
//mem_region_lock(virt_ram_base + 0xA4000000 + block->start + VRAM_SIZE, block->len);
//mem_region_lock(virt_ram_base + 0xC4000000 + block->start + VRAM_SIZE, block->len);
}
}
}
else
{
vmem32_protect_vram(block);
}
vramlock_list_add(block);
@ -252,13 +271,20 @@ bool VramLockedWriteOffset(size_t offset)
}
list->clear();
vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)),PAGE_SIZE);
u32 aligned_offset = (u32)offset & ~(PAGE_SIZE - 1);
vram.UnLockRegion(aligned_offset, PAGE_SIZE);
//TODO: Fix this for 32M wrap as well
if (_nvmem_enabled() && VRAM_SIZE == 0x800000) {
vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE);
vram.UnLockRegion(aligned_offset + VRAM_SIZE, PAGE_SIZE);
}
if (_nvmem_4gb_space() && !mmu_enabled())
{
mem_region_unlock(virt_ram_base + 0x04000000 + aligned_offset, PAGE_SIZE);
if (VRAM_SIZE == 0x800000)
mem_region_unlock(virt_ram_base + 0x04000000 + aligned_offset + VRAM_SIZE, PAGE_SIZE);
}
vramlist_lock.Unlock();
}
@ -274,8 +300,16 @@ bool VramLockedWrite(u8* address)
if (offset < 0x01000000)
return VramLockedWriteOffset(offset & (VRAM_SIZE - 1));
else
return false;
if (_nvmem_4gb_space() && !mmu_enabled())
{
offset = address - virt_ram_base;
if (offset >= 0x04000000 && offset < 0x050000000)
return VramLockedWriteOffset((offset - 0x04000000) & (VRAM_SIZE - 1));
// 32MB wrap not set yet
//if (offset >= 0x06000000 && offset < 0x070000000)
// return VramLockedWriteOffset((offset - 0x06000000) & (VRAM_SIZE - 1));
}
return false;
}
//unlocks mem