diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index 3f7cb5947..a14787aec 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -1,4 +1,5 @@ #include "_vmem.h" +#include "vmem32.h" #include "hw/aica/aica_if.h" #include "hw/sh4/dyna/blockmanager.h" @@ -399,6 +400,7 @@ void _vmem_term() {} #include "hw/sh4/sh4_mem.h" u8* virt_ram_base; +bool vmem_4gb_space; void* malloc_pages(size_t size) { #if HOST_OS == OS_WINDOWS @@ -446,6 +448,26 @@ bool BM_LockedWrite(u8* address) { return false; } +static void _vmem_set_p0_mappings() +{ + const vmem_mapping mem_mappings[] = { + // P0/U0 + {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused + {0x00800000, 0x00800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica + {0x00800000 + ARAM_SIZE, 0x02800000, 0, 0, false}, // unused + {0x02800000, 0x02800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, // Aica mirror + {0x02800000 + ARAM_SIZE, 0x04000000, 0, 0, false}, // unused + {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) + {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) + {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror + {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 + {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0x10000000, 0x80000000, 0, 0, false}, // Area 4-7 (unused) + }; + vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); +} + bool _vmem_reserve() { // TODO: Static assert? verify((sizeof(Sh4RCB)%PAGE_SIZE)==0); @@ -480,33 +502,85 @@ bool _vmem_reserve() { printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB"); printf("Info: p_sh4rcb: %p virt_ram_base: %p\n", p_sh4rcb, virt_ram_base); // Map the different parts of the memory file into the new memory range we got. - #define MAP_RAM_START_OFFSET 0 - #define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) - #define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) - const vmem_mapping mem_mappings[] = { - {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused - {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica, wraps too - {0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, - {0x01000000, 0x04000000, 0, 0, false}, // More unused - {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) - {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) - {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror - {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror - {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 - {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) - {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) - }; - vmem_platform_create_mappings(&mem_mappings[0], sizeof(mem_mappings) / sizeof(mem_mappings[0])); + if (vmemstatus == MemType512MB) + { + const vmem_mapping mem_mappings[] = { + {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused + {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica + {0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, + {0x01000000, 0x04000000, 0, 0, false}, // More unused + {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) + {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) + {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror + {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 + {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) + }; + vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); + + // Point buffers to actual data pointers + aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writable AICA addrspace + vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writable and lockable) + mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror + } + else + { + _vmem_set_p0_mappings(); + const vmem_mapping mem_mappings[] = { + // P1 + {0x80000000, 0x80800000, 0, 0, false}, // Area 0 -> unused + {0x80800000, 0x80800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica + {0x80800000 + ARAM_SIZE, 0x82800000, 0, 0, false}, // unused + {0x82800000, 0x82800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica mirror + {0x82800000 + ARAM_SIZE, 0x84000000, 0, 0, false}, // unused + {0x84000000, 0x85000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) + {0x85000000, 0x86000000, 0, 0, false}, // 32 bit path (unused) + {0x86000000, 0x87000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0x87000000, 0x88000000, 0, 0, false}, // 32 bit path (unused) mirror + {0x88000000, 0x8C000000, 0, 0, false}, // Area 2 + {0x8C000000, 0x90000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0x90000000, 0xA0000000, 0, 0, false}, // Area 4-7 (unused) + // P2 + {0xA0000000, 0xA0800000, 0, 0, false}, // Area 0 -> unused + {0xA0800000, 0xA0800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica + {0xA0800000 + ARAM_SIZE, 0xA2800000, 0, 0, false}, // unused + {0xA2800000, 0xA2800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica mirror + {0xA2800000 + ARAM_SIZE, 0xA4000000, 0, 0, false}, // unused + {0xA4000000, 0xA5000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) + {0xA5000000, 0xA6000000, 0, 0, false}, // 32 bit path (unused) + {0xA6000000, 0xA7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0xA7000000, 0xA8000000, 0, 0, false}, // 32 bit path (unused) mirror + {0xA8000000, 0xAC000000, 0, 0, false}, // Area 2 + {0xAC000000, 0xB0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0xB0000000, 0xC0000000, 0, 0, false}, // Area 4-7 (unused) + // P3 + {0xC0000000, 0xC0800000, 0, 0, false}, // Area 0 -> unused + {0xC0800000, 0xC0800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica + {0xC0800000 + ARAM_SIZE, 0xC2800000, 0, 0, false}, // unused + {0xC2800000, 0xC2800000 + ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true},// Aica mirror + {0xC2800000 + ARAM_SIZE, 0xC4000000, 0, 0, false}, // unused + {0xC4000000, 0xC5000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) + {0xC5000000, 0xC6000000, 0, 0, false}, // 32 bit path (unused) + {0xC6000000, 0xC7000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0xC7000000, 0xC8000000, 0, 0, false}, // 32 bit path (unused) mirror + {0xC8000000, 0xCC000000, 0, 0, false}, // Area 2 + {0xCC000000, 0xD0000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0xD0000000, 0x100000000L, 0, 0, false}, // Area 4-7 (unused) + }; + vmem_platform_create_mappings(&mem_mappings[0], ARRAY_SIZE(mem_mappings)); + + // Point buffers to actual data pointers + aica_ram.data = &virt_ram_base[0x80800000]; // Points to the first AICA addrspace in P1 + vram.data = &virt_ram_base[0x84000000]; // Points to first vram mirror (writable and lockable) in P1 + mem_b.data = &virt_ram_base[0x8C000000]; // Main memory, first mirror in P1 + + vmem_4gb_space = true; + } - // Point buffers to actual data pointers aica_ram.size = ARAM_SIZE; - aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writtable AICA addrspace - vram.size = VRAM_SIZE; - vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writtable and lockable) - mem_b.size = RAM_SIZE; - mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror } // Clear out memory @@ -531,3 +605,12 @@ void _vmem_release() { } } +void _vmem_disable_mmu() +{ + if (vmem32_enabled()) + { + // Restore P0/U0 mem mappings + vmem32_flush_mmu(); + _vmem_set_p0_mappings(); + } +} diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index e609b1b09..c702b2fe1 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -8,8 +8,8 @@ enum VMemType { }; struct vmem_mapping { - u32 start_address, end_address; - unsigned memoffset, memsize; + u64 start_address, end_address; + u64 memoffset, memsize; bool allow_writes; }; @@ -102,9 +102,17 @@ void* _vmem_get_ptr2(u32 addr,u32& mask); void* _vmem_read_const(u32 addr,bool& ismem,u32 sz); extern u8* virt_ram_base; +extern bool vmem_4gb_space; static inline bool _nvmem_enabled() { return virt_ram_base != 0; } - +static inline bool _nvmem_4gb_space() { + return vmem_4gb_space; +} void _vmem_bm_reset(); +void _vmem_disable_mmu(); + +#define MAP_RAM_START_OFFSET 0 +#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) +#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) diff --git a/core/hw/mem/vmem32.cpp b/core/hw/mem/vmem32.cpp index e7236cffa..ad06af617 100644 --- a/core/hw/mem/vmem32.cpp +++ b/core/hw/mem/vmem32.cpp @@ -41,24 +41,18 @@ extern int vmem_fd; #define VMEM32_ERROR_NOT_MAPPED 0x100 -// FIXME stolen from _vmem.cpp -#define MAP_RAM_START_OFFSET 0 -#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) -#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) - static const u64 VMEM32_SIZE = 0x100000000L; static const u64 KERNEL_SPACE = 0x80000000L; static const u64 AREA7_ADDRESS = 0x7C000000L; #define VRAM_PROT_SEGMENT (1024 * 1024) // vram protection regions are grouped by 1MB segment -u8* vmem32_base; -std::unordered_set vram_mapped_pages; -std::vector vram_blocks[VRAM_SIZE / VRAM_PROT_SEGMENT]; +static std::unordered_set vram_mapped_pages; +static std::vector vram_blocks[VRAM_SIZE / VRAM_PROT_SEGMENT]; // stats -u64 vmem32_page_faults; -u64 vmem32_flush; +//u64 vmem32_page_faults; +//u64 vmem32_flush; static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool write) { @@ -68,20 +62,20 @@ static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool w //printf("MAP32 %08X w/ %d\n",dst,offset); u32 map_times = addrsz / size; #if HOST_OS == OS_WINDOWS - rv = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &vmem32_base[dst]); + rv = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &virt_ram_base[dst]); if (rv == NULL) return NULL; for (u32 i = 1; i < map_times; i++) { dst += size; - ptr = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &vmem32_base[dst]); + ptr = MapViewOfFileEx(mem_handle, FILE_MAP_READ | (write ? FILE_MAP_WRITE : 0), 0, offset, size, &virt_ram_base[dst]); if (ptr == NULL) return NULL; } #else u32 prot = PROT_READ | (write ? PROT_WRITE : 0); - rv = mmap(&vmem32_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset); + rv = mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset); if (MAP_FAILED == rv) { printf("MAP1 failed %d\n", errno); @@ -91,7 +85,7 @@ static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool w for (u32 i = 1; i < map_times; i++) { dst += size; - ptr = mmap(&vmem32_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset); + ptr = mmap(&virt_ram_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset); if (MAP_FAILED == ptr) { printf("MAP2 failed %d\n", errno); @@ -105,9 +99,9 @@ static void* vmem32_map_buffer(u32 dst, u32 addrsz, u32 offset, u32 size, bool w static void vmem32_unmap_buffer(u32 start, u64 end) { #if HOST_OS == OS_WINDOWS - UnmapViewOfFile(&vmem32_base[start]); + UnmapViewOfFile(&virt_ram_base[start]); #else - mmap(&vmem32_base[start], end - start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); + mmap(&virt_ram_base[start], end - start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); #endif } @@ -116,9 +110,9 @@ static void vmem32_protect_buffer(u32 start, u32 size) verify((start & PAGE_MASK) == 0); #if HOST_OS == OS_WINDOWS DWORD old; - VirtualProtect(vmem32_base + start, size, PAGE_READONLY, &old); + VirtualProtect(virt_ram_base + start, size, PAGE_READONLY, &old); #else - mprotect(&vmem32_base[start], size, PROT_READ); + mprotect(&virt_ram_base[start], size, PROT_READ); #endif } @@ -127,15 +121,15 @@ static void vmem32_unprotect_buffer(u32 start, u32 size) verify((start & PAGE_MASK) == 0); #if HOST_OS == OS_WINDOWS DWORD old; - VirtualProtect(vmem32_base + start, size, PAGE_READWRITE, &old); + VirtualProtect(virt_ram_base + start, size, PAGE_READWRITE, &old); #else - mprotect(&vmem32_base[start], size, PROT_READ | PROT_WRITE); + mprotect(&virt_ram_base[start], size, PROT_READ | PROT_WRITE); #endif } void vmem32_protect_vram(vram_block *block) { - if (vmem32_base == NULL) + if (virt_ram_base == NULL) return; for (int i = block->start / VRAM_PROT_SEGMENT; i <= block->end / VRAM_PROT_SEGMENT; i++) { @@ -144,7 +138,7 @@ void vmem32_protect_vram(vram_block *block) } void vmem32_unprotect_vram(vram_block *block) { - if (vmem32_base == NULL) + if (virt_ram_base == NULL) return; for (int page = block->start / VRAM_PROT_SEGMENT; page <= block->end / VRAM_PROT_SEGMENT; page++) { @@ -157,28 +151,6 @@ void vmem32_unprotect_vram(vram_block *block) } } -static bool vmem32_map_areas() -{ - // Aica ram - vmem32_map_buffer(0x80800000, 0x00800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); // P1 - vmem32_map_buffer(0x82800000, ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); - vmem32_map_buffer(0xA0800000, 0x00800000, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); // P2 - vmem32_map_buffer(0xA2800000, ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true); - - // Vram - // Note: this should be mapped read/write but doesn't seem to be used - vmem32_map_buffer(0x84000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); // P1 - vmem32_map_buffer(0x86000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); - vmem32_map_buffer(0xA4000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); // P2 - vmem32_map_buffer(0xA6000000, 0x01000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, false); - - // System ram - vmem32_map_buffer(0x8C000000, 0x04000000, MAP_RAM_START_OFFSET, RAM_SIZE, true); // P1 - vmem32_map_buffer(0xAC000000, 0x04000000, MAP_RAM_START_OFFSET, RAM_SIZE, true); // P2 - - return true; -} - static const u32 page_sizes[] = { 1024, 4 * 1024, 64 * 1024, 1024 * 1024 }; static u32 vmem32_paddr_to_offset(u32 address) @@ -322,10 +294,10 @@ static u32 vmem32_map_address(u32 address, bool write) #if !defined(NO_MMU) && defined(HOST_64BIT_CPU) bool vmem32_handle_signal(void *fault_addr, bool write) { - if ((u8*)fault_addr < vmem32_base || (u8*)fault_addr >= vmem32_base + VMEM32_SIZE) + if ((u8*)fault_addr < virt_ram_base || (u8*)fault_addr >= virt_ram_base + VMEM32_SIZE) return false; - vmem32_page_faults++; - u32 guest_addr = (u8*)fault_addr - vmem32_base; + //vmem32_page_faults++; + u32 guest_addr = (u8*)fault_addr - virt_ram_base; u32 rv = vmem32_map_address(guest_addr, write); //printf("vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d\n", write ? "W" : "R", fault_addr, guest_addr, rv); if (rv == MMU_ERROR_NONE) @@ -342,70 +314,17 @@ bool vmem32_handle_signal(void *fault_addr, bool write) void vmem32_flush_mmu() { - vmem32_flush++; + //vmem32_flush++; vram_mapped_pages.clear(); vmem32_unmap_buffer(0, KERNEL_SPACE); // TODO flush P3? } -bool vmem32_init() -{ - if (!_nvmem_enabled()) - return false; -#if HOST_OS == OS_WINDOWS - // disabled on windows for now - return true; -#endif -#ifdef HOST_64BIT_CPU -#if HOST_OS == OS_WINDOWS - void* rv = (u8 *)VirtualAlloc(0, VMEM32_SIZE, MEM_RESERVE, PAGE_NOACCESS); - if (rv != NULL) - VirtualFree(rv, 0, MEM_RELEASE); - vmem32_base = (u8*)rv; -#else - void* rv = mmap(0, VMEM32_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); - verify(rv != NULL); - munmap(rv, VMEM32_SIZE); - vmem32_base = (u8*)rv; -#endif - - vmem32_unmap_buffer(0, VMEM32_SIZE); - printf("vmem32_init: allocated %zx bytes from %p to %p\n", VMEM32_SIZE, vmem32_base, vmem32_base + VMEM32_SIZE); - - if (!vmem32_map_areas()) - { - vmem32_term(); - return false; - } -#endif - return true; -} - void vmem32_term() { - if (vmem32_base != NULL) + if (virt_ram_base != NULL) { -#if HOST_OS == OS_WINDOWS vmem32_flush_mmu(); - // Aica ram - vmem32_unmap_buffer(0x80800000, 0x80800000 + 0x00800000); // P1 - vmem32_unmap_buffer(0x82800000, 0x82800000 + ARAM_SIZE); - vmem32_unmap_buffer(0xA0800000, 0xA0800000 + 0x00800000); // P2 - vmem32_unmap_buffer(0xA2800000, 0xA2800000 + ARAM_SIZE); - - // Vram - vmem32_unmap_buffer(0x84000000, 0x84000000 + 0x01000000); // P1 - vmem32_unmap_buffer(0x86000000, 0x86000000 + 0x01000000); - vmem32_unmap_buffer(0xA4000000, 0xA4000000 + 0x01000000); // P2 - vmem32_unmap_buffer(0xA6000000, 0xA6000000 + 0x01000000); - - // System ram - vmem32_unmap_buffer(0x8C000000, 0x8C000000 + 0x04000000); // P1 - vmem32_unmap_buffer(0xAC000000, 0xAC000000 + 0x04000000); // P2 -#else - munmap(vmem32_base, VMEM32_SIZE); -#endif - vmem32_base = NULL; } } diff --git a/core/hw/mem/vmem32.h b/core/hw/mem/vmem32.h index f900c472f..809cdf192 100644 --- a/core/hw/mem/vmem32.h +++ b/core/hw/mem/vmem32.h @@ -1,6 +1,5 @@ -#include "types.h" +#include "_vmem.h" -bool vmem32_init(); void vmem32_term(); bool vmem32_handle_signal(void *fault_addr, bool write); void vmem32_flush_mmu(); @@ -10,6 +9,6 @@ static inline bool vmem32_enabled() { #if HOST_OS == OS_WINDOWS return false; #else - return !settings.dynarec.disable_vmem32; + return !settings.dynarec.disable_vmem32 && _nvmem_4gb_space(); #endif } diff --git a/core/hw/pvr/pvr_mem.cpp b/core/hw/pvr/pvr_mem.cpp index 45fb1a721..a9d125ec7 100644 --- a/core/hw/pvr/pvr_mem.cpp +++ b/core/hw/pvr/pvr_mem.cpp @@ -297,7 +297,6 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb) if (SB_LMMODE0 == 0) { // 64b path - u8* vram=sqb+512+0x04000000; MemWrite32(&vram[address_w&(VRAM_MASK-0x1F)],sq); } else diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index d7756268b..324881bb0 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -80,7 +80,7 @@ void clear_temp_cache(bool full) void recSh4_ClearCache() { - printf("recSh4:Dynarec Cache clear at %08X free space %d\n",curr_pc, emit_FreeSpace()); + printf("recSh4:Dynarec Cache clear at %08X free space %d\n", curr_pc, emit_FreeSpace()); LastAddr=LastAddr_min; bm_Reset(); smc_hotspots.clear(); @@ -130,7 +130,7 @@ void emit_Skip(u32 sz) if (emit_ptr) emit_ptr = (u32*)((u8*)emit_ptr + sz); else - LastAddr+=sz; + LastAddr += sz; } u32 emit_FreeSpace() @@ -138,7 +138,7 @@ u32 emit_FreeSpace() if (emit_ptr) return (emit_ptr_limit - emit_ptr) * sizeof(u32); else - return CODE_SIZE-LastAddr; + return CODE_SIZE - LastAddr; } // pc must be a physical address @@ -262,7 +262,7 @@ bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) oplist.clear(); - if (!dec_DecodeBlock(this,SH4_TIMESLICE/2)) + if (!dec_DecodeBlock(this, SH4_TIMESLICE / 2)) return false; AnalyseBlock(this); @@ -293,7 +293,7 @@ DynarecCodeEntryPtr rdv_CompilePC(u32 blockcheck_failures) emit_ptr_limit = (u32 *)(TempCodeCache + TEMP_CODE_SIZE); rbi->temp_block = true; } - bool do_opts = !rbi->temp_block; //((rbi->addr&0x3FFFFFFF)>0x0C010100); + bool do_opts = !rbi->temp_block; rbi->staging_runs=do_opts?100:-100; ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts); verify(rbi->code!=0); @@ -505,7 +505,14 @@ void recSh4_Init() verify(rcb_noffs(&p_sh4rcb->cntx.interrupt_pend) == -148); if (_nvmem_enabled()) { - verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000)); + if (!_nvmem_4gb_space()) + { + verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000)); + } + else + { + verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x8C000000)); + } } // Prepare some pointer to the pre-allocated code cache: diff --git a/core/hw/sh4/interpr/sh4_interpreter.cpp b/core/hw/sh4/interpr/sh4_interpreter.cpp index 40e7519b5..a2074443d 100644 --- a/core/hw/sh4/interpr/sh4_interpreter.cpp +++ b/core/hw/sh4/interpr/sh4_interpreter.cpp @@ -124,8 +124,6 @@ void Sh4_int_Skip() } } -extern u8 *vmem32_base; - void Sh4_int_Reset(bool Manual) { if (sh4_int_bCpuRun) @@ -150,8 +148,6 @@ void Sh4_int_Reset(bool Manual) old_fpscr=fpscr; UpdateFPSCR(); - p_sh4rcb->cntx.vmem32_base = vmem32_base; - //Any more registers have default value ? printf("Sh4 Reset\n"); } diff --git a/core/hw/sh4/modules/mmu.cpp b/core/hw/sh4/modules/mmu.cpp index 4799cb0fb..4d346aced 100644 --- a/core/hw/sh4/modules/mmu.cpp +++ b/core/hw/sh4/modules/mmu.cpp @@ -681,6 +681,7 @@ void mmu_set_state() WriteMem16 = &_vmem_WriteMem16; WriteMem32 = &_vmem_WriteMem32; WriteMem64 = &_vmem_WriteMem64; + _vmem_disable_mmu(); } } diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index 0680fc494..5bbfff5c7 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -142,7 +142,6 @@ void SetFloatStatusReg() //called when fpscr is changed and we must check for reg banks etc.. void UpdateFPSCR() { - verify(fpscr.PR == 0 || fpscr.SZ == 0); if (fpscr.FR !=old_fpscr.FR) ChangeFP(); // FPU bank change diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h index a74ba3a8c..be40f47a2 100644 --- a/core/hw/sh4/sh4_if.h +++ b/core/hw/sh4/sh4_if.h @@ -284,7 +284,6 @@ struct Sh4Context u32 interrupt_pend; u32 exception_pc; - u8 *vmem32_base; }; u64 raw[64-8]; }; diff --git a/core/linux/common.cpp b/core/linux/common.cpp index 358244aee..5d860c9de 100644 --- a/core/linux/common.cpp +++ b/core/linux/common.cpp @@ -60,9 +60,6 @@ void fault_handler (int sn, siginfo_t * si, void *segfault_ctx) bool dyna_cde = ((unat)CC_RX2RW(ctx.pc) > (unat)CodeCache) && ((unat)CC_RX2RW(ctx.pc) < (unat)(CodeCache + CODE_SIZE + TEMP_CODE_SIZE)); - //ucontext_t* ctx=(ucontext_t*)ctxr; - //printf("mprot hit @ ptr 0x%08X @@ code: %08X, %d\n",si->si_addr,ctx->uc_mcontext.arm_pc,dyna_cde); - #if !defined(NO_MMU) && defined(HOST_64BIT_CPU) && HOST_OS != OS_WINDOWS #if HOST_CPU == CPU_ARM64 u32 op = *(u32*)ctx.pc; diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp index dacb9ce61..59d0687c3 100644 --- a/core/linux/posix_vmem.cpp +++ b/core/linux/posix_vmem.cpp @@ -156,11 +156,22 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) { return MemTypeError; // Now try to allocate a contiguous piece of memory. - unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; - void *first_ptr = mem_region_reserve(NULL, memsize); - if (!first_ptr) { - close(vmem_fd); - return MemTypeError; + void *first_ptr = NULL; + VMemType rv; +#ifdef HOST_64BIT_CPU + size_t bigsize = 0x100000000L + sizeof(Sh4RCB) + 0x10000; // 4GB + context size + 64K padding + first_ptr = mem_region_reserve(NULL, bigsize); + rv = MemType4GB; +#endif + if (first_ptr == NULL) + { + unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; + first_ptr = mem_region_reserve(NULL, memsize); + if (!first_ptr) { + close(vmem_fd); + return MemTypeError; + } + rv = MemType512MB; } // Align pointer to 64KB too, some Linaro bug (no idea but let's just be safe I guess). @@ -173,12 +184,15 @@ VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) { // Now map the memory for the SH4 context, do not include FPCB on purpose (paged on demand). mem_region_unlock(sh4rcb_base_ptr, sizeof(Sh4RCB) - FPCB_SIZE); - return MemType512MB; + return rv; } // Just tries to wipe as much as possible in the relevant area. void vmem_platform_destroy() { - mem_region_release(virt_ram_base, 0x20000000); + if (vmem_4gb_space) + mem_region_release(virt_ram_base, 0x100000000); + else + mem_region_release(virt_ram_base, 0x20000000); } // Resets a chunk of memory by deleting its data and setting its protection back. @@ -207,12 +221,12 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma continue; // Calculate the number of mirrors - unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; + u64 address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize; verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1); for (unsigned j = 0; j < num_mirrors; j++) { - unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; + u64 offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; verify(mem_region_unmap_file(&virt_ram_base[offset], vmem_maps[i].memsize)); verify(mem_region_map_file((void*)(uintptr_t)vmem_fd, &virt_ram_base[offset], vmem_maps[i].memsize, vmem_maps[i].memoffset, vmem_maps[i].allow_writes) != NULL); diff --git a/core/nullDC.cpp b/core/nullDC.cpp index be9042423..3bf1546ff 100755 --- a/core/nullDC.cpp +++ b/core/nullDC.cpp @@ -6,7 +6,6 @@ #include "oslib/oslib.h" #include "oslib/audiostream.h" #include "hw/mem/_vmem.h" -#include "hw/mem/vmem32.h" #include "stdclass.h" #include "cfg/cfg.h" @@ -298,13 +297,6 @@ int reicast_init(int argc, char* argv[]) printf("Failed to alloc mem\n"); return -1; } -#ifdef HOST_64BIT_CPU - if (!vmem32_init()) - { - printf("Failed to alloc 32-bit mem space\n"); - return -1; - } -#endif if (ParseCommandLine(argc, argv)) { return 69; diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index 5cf45ef76..701177924 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -153,8 +153,6 @@ void ngen_mainloop(void* v_cntx) "ldr x28, [sp] \n\t" // Set context // w29 is next_pc "ldr w29, [x28, %[pc]] \n\t" - // x27 is vmem32_base - "ldr x27, [x28, %[vmem32_base]] \n\t" "b no_update \n" ".hidden intc_sched \n\t" @@ -220,8 +218,7 @@ void ngen_mainloop(void* v_cntx) [RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16), [SH4CTX_SIZE] "i" (sizeof(Sh4Context)), [jmp_env] "r"(reinterpret_cast(jmp_env)), - [cycle_counter] "r"(reinterpret_cast(&cycle_counter)), - [vmem32_base] "i"(offsetof(Sh4Context, vmem32_base)) + [cycle_counter] "r"(reinterpret_cast(&cycle_counter)) : "memory" ); } @@ -1419,26 +1416,20 @@ private: Instruction *start_instruction = GetCursorAddress(); - const XRegister* base_reg; - const XRegister* offset_reg; - // WARNING: the rewrite code relies on having two ops before the memory access (3 when mmu is enabled) + // WARNING: the rewrite code relies on having 1-2 ops before the memory access (4 when mmu is enabled) // Update ngen_Rewrite (and perhaps read_memory_rewrite_size) if adding or removing code - if (!mmu_enabled()) + Add(x1, *call_regs64[0], sizeof(Sh4Context), LeaveFlags); + if (!_nvmem_4gb_space()) { - Add(w1, *call_regs[0], sizeof(Sh4Context), LeaveFlags); Bfc(w1, 29, 3); // addr &= ~0xE0000000 - base_reg = &x28; - offset_reg = &x1; } - else + else if (mmu_enabled()) { u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0); // 3 ops before memory access Mov(w8, exception_pc & 0xFFFF); Movk(w8, exception_pc >> 16, 16); Str(w8, sh4_context_mem_operand(&p_sh4rcb->cntx.exception_pc)); - base_reg = &x27; - offset_reg = call_regs64[0]; } //printf("direct read memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress(), this->block->addr); @@ -1450,22 +1441,22 @@ private: switch(size) { case 1: - Ldrsb(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg)); + Ldrsb(regalloc.MapRegister(op.rd), MemOperand(x28, x1)); break; case 2: - Ldrsh(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg)); + Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x28, x1)); break; case 4: if (!op.rd.is_r32f()) - Ldr(regalloc.MapRegister(op.rd), MemOperand(*base_reg, *offset_reg)); + Ldr(regalloc.MapRegister(op.rd), MemOperand(x28, x1)); else - Ldr(regalloc.MapVRegister(op.rd), MemOperand(*base_reg, *offset_reg)); + Ldr(regalloc.MapVRegister(op.rd), MemOperand(x28, x1)); break; case 8: - Ldr(x1, MemOperand(*base_reg, *offset_reg)); + Ldr(x1, MemOperand(x28, x1)); break; } @@ -1486,19 +1477,19 @@ private: switch(size) { case 1: - Ldrsb(w1, MemOperand(*base_reg, *offset_reg)); + Ldrsb(w1, MemOperand(x28, x1)); break; case 2: - Ldrsh(w1, MemOperand(*base_reg, *offset_reg)); + Ldrsh(w1, MemOperand(x28, x1)); break; case 4: - Ldr(w1, MemOperand(*base_reg, *offset_reg)); + Ldr(w1, MemOperand(x28, x1)); break; case 8: - Ldr(x1, MemOperand(*base_reg, *offset_reg)); + Ldr(x1, MemOperand(x28, x1)); break; } if (size == 8) @@ -1544,25 +1535,19 @@ private: Instruction *start_instruction = GetCursorAddress(); - const XRegister* base_reg; - const XRegister* offset_reg; - // WARNING: the rewrite code relies on having two ops before the memory access (3 when mmu is enabled) + // WARNING: the rewrite code relies on having 1-2 ops before the memory access (4 when mmu is enabled) // Update ngen_Rewrite (and perhaps write_memory_rewrite_size) if adding or removing code - if (!mmu_enabled()) + Add(x7, *call_regs64[0], sizeof(Sh4Context), LeaveFlags); + if (!_nvmem_4gb_space()) { - Add(w7, *call_regs[0], sizeof(Sh4Context), LeaveFlags); Bfc(w7, 29, 3); // addr &= ~0xE0000000 - base_reg = &x28; - offset_reg = &x7; } - else + else if (mmu_enabled()) { u32 exception_pc = block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0); Mov(w8, exception_pc & 0xFFFF); Movk(w8, exception_pc >> 16, 16); Str(w8, sh4_context_mem_operand(&p_sh4rcb->cntx.exception_pc)); - base_reg = &x27; - offset_reg = call_regs64[0]; } //printf("direct write memory access opid %d pc %p code addr %08x\n", opid, GetCursorAddress(), this->block->addr); @@ -1572,19 +1557,19 @@ private: switch(size) { case 1: - Strb(w1, MemOperand(*base_reg, *offset_reg)); + Strb(w1, MemOperand(x28, x7)); break; case 2: - Strh(w1, MemOperand(*base_reg, *offset_reg)); + Strh(w1, MemOperand(x28, x7)); break; case 4: - Str(w1, MemOperand(*base_reg, *offset_reg)); + Str(w1, MemOperand(x28, x7)); break; case 8: - Str(x1, MemOperand(*base_reg, *offset_reg)); + Str(x1, MemOperand(x28, x7)); break; } EnsureCodeSize(start_instruction, write_memory_rewrite_size); @@ -1767,7 +1752,7 @@ private: RuntimeBlockInfo* block = NULL; const int read_memory_rewrite_size = 6; // worst case for u64: add, bfc, ldr, fmov, lsr, fmov // FIXME rewrite size per read/write size? - const int write_memory_rewrite_size = 4; // TODO only 3 if !mmu + const int write_memory_rewrite_size = 5; // TODO only 2 if !mmu & 4gb }; static Arm64Assembler* compiler; @@ -1824,7 +1809,9 @@ bool ngen_Rewrite(unat& host_pc, unat, unat) u32 opid = it->second; verify(opid < block->oplist.size()); const shil_opcode& op = block->oplist[opid]; - Arm64Assembler *assembler = new Arm64Assembler(code_ptr - 2 - (mmu_enabled() ? 1 : 0)); // Skip the 2 preceding ops (bic, add) + // Skip the preceding ops (add, bic, ...) + u32 *code_rewrite = code_ptr - 1 - (!_nvmem_4gb_space() ? 1 : 0) - (mmu_enabled() ? 3 : 0); + Arm64Assembler *assembler = new Arm64Assembler(code_rewrite); assembler->InitializeRewrite(block, opid); if (op.op == shop_readm) assembler->GenReadMemorySlow(op); @@ -1832,7 +1819,7 @@ bool ngen_Rewrite(unat& host_pc, unat, unat) assembler->GenWriteMemorySlow(op); assembler->Finalize(true); delete assembler; - host_pc = (unat)CC_RW2RX(code_ptr - 2 - (mmu_enabled() ? 1 : 0)); + host_pc = (unat)CC_RW2RX(code_rewrite); return true; } diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index e932f7f48..61531ba36 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -1402,7 +1402,7 @@ private: mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc); mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); - mov(rax, (uintptr_t)p_sh4rcb->cntx.vmem32_base); + mov(rax, (uintptr_t)virt_ram_base); u32 size = op.flags & 0x7f; //verify(getCurr() - start_addr == 26); @@ -1450,7 +1450,7 @@ private: mov(rax, (uintptr_t)&p_sh4rcb->cntx.exception_pc); mov(dword[rax], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); - mov(rax, (uintptr_t)p_sh4rcb->cntx.vmem32_base); + mov(rax, (uintptr_t)virt_ram_base); u32 size = op.flags & 0x7f; //verify(getCurr() - start_addr == 26); diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index ad993b6d8..f906a785d 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -214,8 +214,27 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { vram.LockRegion(block->start + VRAM_SIZE, block->len); } - if (mmu_enabled()) + if (!mmu_enabled()) + { + if (_nvmem_4gb_space()) + { + // In 4GB mode, vram.LockRegion() locks in the P1 area only so we also need to lock P0 + // We should also lock P2 and P3 but they don't seem to be used... + mem_region_lock(virt_ram_base + 0x04000000 + block->start, block->len); + //mem_region_lock(virt_ram_base + 0xA4000000 + block->start, block->len); + //mem_region_lock(virt_ram_base + 0xC4000000 + block->start, block->len); + if (VRAM_SIZE == 0x800000) + { + mem_region_lock(virt_ram_base + 0x04000000 + block->start + VRAM_SIZE, block->len); + //mem_region_lock(virt_ram_base + 0xA4000000 + block->start + VRAM_SIZE, block->len); + //mem_region_lock(virt_ram_base + 0xC4000000 + block->start + VRAM_SIZE, block->len); + } + } + } + else + { vmem32_protect_vram(block); + } vramlock_list_add(block); @@ -252,13 +271,20 @@ bool VramLockedWriteOffset(size_t offset) } list->clear(); - vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)),PAGE_SIZE); + u32 aligned_offset = (u32)offset & ~(PAGE_SIZE - 1); + vram.UnLockRegion(aligned_offset, PAGE_SIZE); //TODO: Fix this for 32M wrap as well if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { - vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE); + vram.UnLockRegion(aligned_offset + VRAM_SIZE, PAGE_SIZE); } - + if (_nvmem_4gb_space() && !mmu_enabled()) + { + mem_region_unlock(virt_ram_base + 0x04000000 + aligned_offset, PAGE_SIZE); + if (VRAM_SIZE == 0x800000) + mem_region_unlock(virt_ram_base + 0x04000000 + aligned_offset + VRAM_SIZE, PAGE_SIZE); + } + vramlist_lock.Unlock(); } @@ -274,8 +300,16 @@ bool VramLockedWrite(u8* address) if (offset < 0x01000000) return VramLockedWriteOffset(offset & (VRAM_SIZE - 1)); - else - return false; + if (_nvmem_4gb_space() && !mmu_enabled()) + { + offset = address - virt_ram_base; + if (offset >= 0x04000000 && offset < 0x050000000) + return VramLockedWriteOffset((offset - 0x04000000) & (VRAM_SIZE - 1)); + // 32MB wrap not set yet + //if (offset >= 0x06000000 && offset < 0x070000000) + // return VramLockedWriteOffset((offset - 0x06000000) & (VRAM_SIZE - 1)); + } + return false; } //unlocks mem