Move cache invalidation to vmem_platform and implement NO_RWX on arm64

Still not functional yet, this commit breaks arm64 on android for instance.
Rewrites are not yet figured out, will push them in the next commit.
This commit is contained in:
David Guillen Fandos 2019-05-12 23:25:43 +02:00
parent 3b760f9869
commit aa4fc8dd60
7 changed files with 95 additions and 59 deletions

View File

@ -27,7 +27,7 @@
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
extern void Arm64CacheFlush(void* start, void* end);
extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
class DSPAssembler : public MacroAssembler
{
@ -54,9 +54,9 @@ public:
Stp(xzr, xzr, MemOperand(x0, 48));
Ret();
FinalizeCode();
#ifdef _ANDROID
Arm64CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#endif
vmem_platform_flush_cache(
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>(),
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
return;
}
@ -387,9 +387,9 @@ public:
#endif
FinalizeCode();
#ifdef _ANDROID
Arm64CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#endif
vmem_platform_flush_cache(
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>(),
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
}
private:

View File

@ -28,7 +28,7 @@
using namespace vixl::aarch64;
//#include "deps/vixl/aarch32/disasm-aarch32.h"
extern void Arm64CacheFlush(void* start, void* end);
extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
extern u32 arm_single_op(u32 opcode);
extern "C" void arm_dispatch();
extern "C" void arm_exit();
@ -41,7 +41,7 @@ extern reg_pair arm_Reg[RN_ARM_REG_COUNT];
MacroAssembler *assembler;
extern "C" void armFlushICache(void *bgn, void *end) {
Arm64CacheFlush(bgn, end);
vmem_platform_flush_cache(bgn, end, bgn, end);
}
static MemOperand arm_reg_operand(u32 regn)
@ -143,7 +143,9 @@ void armv_end(void* codestart, u32 cycl)
assembler->FinalizeCode();
verify(assembler->GetBuffer()->GetCursorOffset() <= assembler->GetBuffer()->GetCapacity());
Arm64CacheFlush(codestart, assembler->GetBuffer()->GetEndAddress<void*>());
vmem_platform_flush_cache(
codestart, assembler->GetBuffer()->GetEndAddress<void*>(),
codestart, assembler->GetBuffer()->GetEndAddress<void*>());
icPtr += assembler->GetBuffer()->GetSizeInBytes();
#if 0

View File

@ -469,6 +469,7 @@ bool _vmem_reserve() {
}
else {
printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB");
printf("Info: p_sh4rcb: %p virt_ram_base: %p\n", p_sh4rcb, virt_ram_base);
// Map the different parts of the memory file into the new memory range we got.
#define MAP_RAM_START_OFFSET 0
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)

View File

@ -30,6 +30,8 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
// Same as above but uses two address spaces one with RX and RW protections.
// Note: this function doesnt have to be implemented, it's a fallback for the above one.
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rw, uintptr_t *rx_offset);
// This might not need an implementation (ie x86/64 cpus).
void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
// Note: if you want to disable vmem magic in any given platform, implement the
// above functions as empty functions and make vmem_platform_init return MemTypeError.

View File

@ -215,9 +215,63 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
*code_area_rw = ptr_rw;
*rx_offset = (char*)ptr_rx - (char*)ptr_rw;
printf("Info: Using NO_RWX mode, rx ptr: %p, rw ptr: %p, offset: %p\n", ptr_rx, ptr_rw, *rx_offset);
printf("Info: Using NO_RWX mode, rx ptr: %p, rw ptr: %p, offset: %lu\n", ptr_rx, ptr_rw, (unsigned long)*rx_offset);
return (ptr_rw != MAP_FAILED);
}
// Some OSes restrict cache flushing, cause why not right? :D
#if HOST_CPU == CPU_ARM64
// Code borrowed from Dolphin https://github.com/dolphin-emu/dolphin
static void Arm64_CacheFlush(void* start, void* end) {
if (start == end)
return;
#if HOST_OS == OS_DARWIN
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
#else
// Don't rely on GCC's __clear_cache implementation, as it caches
// icache/dcache cache line sizes, that can vary between cores on
// big.LITTLE architectures.
u64 addr, ctr_el0;
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
size_t isize, dsize;
__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
isize = 4 << ((ctr_el0 >> 0) & 0xf);
dsize = 4 << ((ctr_el0 >> 16) & 0xf);
// use the global minimum cache line size
icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
addr = (u64)start & ~(u64)(dsize - 1);
for (; addr < (u64)end; addr += dsize)
// use "civac" instead of "cvau", as this is the suggested workaround for
// Cortex-A53 errata 819472, 826319, 827319 and 824069.
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");
addr = (u64)start & ~(u64)(isize - 1);
for (; addr < (u64)end; addr += isize)
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");
__asm__ volatile("isb" : : : "memory");
#endif
}
void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end) {
Arm64_CacheFlush(dcache_start, dcache_end);
// Dont risk it and flush and invalidate icache&dcache for both ranges just in case.
if (icache_start != dcache_start)
Arm64_CacheFlush(icache_start, icache_end);
}
#endif // #if HOST_CPU == CPU_ARM64

View File

@ -45,12 +45,11 @@ using namespace vixl::aarch64;
extern "C" void no_update();
extern "C" void intc_sched();
extern "C" void ngen_blockcheckfail(u32 pc);
extern "C" void ngen_LinkBlock_Generic_stub();
extern "C" void ngen_LinkBlock_cond_Branch_stub();
extern "C" void ngen_LinkBlock_cond_Next_stub();
extern "C" void ngen_FailedToFindBlock_();
extern void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
struct DynaRBI : RuntimeBlockInfo
{
@ -61,47 +60,6 @@ struct DynaRBI : RuntimeBlockInfo
}
};
// Code borrowed from Dolphin https://github.com/dolphin-emu/dolphin
void Arm64CacheFlush(void* start, void* end)
{
if (start == end)
return;
#if HOST_OS == OS_DARWIN
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
#else
// Don't rely on GCC's __clear_cache implementation, as it caches
// icache/dcache cache line sizes, that can vary between cores on
// big.LITTLE architectures.
u64 addr, ctr_el0;
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
size_t isize, dsize;
__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
isize = 4 << ((ctr_el0 >> 0) & 0xf);
dsize = 4 << ((ctr_el0 >> 16) & 0xf);
// use the global minimum cache line size
icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
addr = (u64)start & ~(u64)(dsize - 1);
for (; addr < (u64)end; addr += dsize)
// use "civac" instead of "cvau", as this is the suggested workaround for
// Cortex-A53 errata 819472, 826319, 827319 and 824069.
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");
addr = (u64)start & ~(u64)(isize - 1);
for (; addr < (u64)end; addr += isize)
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");
__asm__ volatile("isb" : : : "memory");
#endif
}
double host_cpu_time;
u64 guest_cpu_cycles;
@ -147,7 +105,7 @@ __asm__
"ngen_LinkBlock_Shared_stub: \n\t"
"mov x0, lr \n\t"
"sub x0, x0, #4 \n\t" // go before the call
"bl rdv_LinkBlock \n\t"
"bl rdv_LinkBlock \n\t" // returns an RX addr
"br x0 \n"
".hidden ngen_FailedToFindBlock_ \n\t"
@ -1013,7 +971,7 @@ public:
Ldr(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
GenBranchRuntime(no_update);
break;
default:
@ -1038,7 +996,12 @@ public:
emit_Skip(block->host_code_size);
}
Arm64CacheFlush(GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
// Flush and invalidate caches
vmem_platform_flush_cache(
CC_RW2RX(GetBuffer()->GetStartAddress<void*>()), CC_RW2RX(GetBuffer()->GetEndAddress<void*>()),
GetBuffer()->GetStartAddress<void*>(), GetBuffer()->GetEndAddress<void*>());
#if 0
// if (rewrite)
{
@ -1060,10 +1023,13 @@ public:
}
private:
// Runtime branches/calls need to be adjusted if rx space is different to rw space.
// Therefore can't mix GenBranch with GenBranchRuntime!
template <typename R, typename... P>
void GenCallRuntime(R (*function)(P...))
{
ptrdiff_t offset = reinterpret_cast<uintptr_t>(function) - GetBuffer()->GetStartAddress<uintptr_t>();
ptrdiff_t offset = reinterpret_cast<uintptr_t>(function) - reinterpret_cast<uintptr_t>(CC_RW2RX(GetBuffer()->GetStartAddress<void*>()));
verify(offset >= -128 * 1024 * 1024 && offset <= 128 * 1024 * 1024);
verify((offset & 3) == 0);
Label function_label;
@ -1071,6 +1037,17 @@ private:
Bl(&function_label);
}
template <typename R, typename... P>
void GenBranchRuntime(R (*target)(P...))
{
ptrdiff_t offset = reinterpret_cast<uintptr_t>(target) - reinterpret_cast<uintptr_t>(CC_RW2RX(GetBuffer()->GetStartAddress<void*>()));
verify(offset >= -128 * 1024 * 1024 && offset <= 128 * 1024 * 1024);
verify((offset & 3) == 0);
Label target_label;
BindToOffset(&target_label, offset);
B(&target_label);
}
template <typename R, typename... P>
void GenBranch(R (*code)(P...), Condition cond = al)
{

View File

@ -183,7 +183,7 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
*code_area_rw = ptr_rw;
*rx_offset = (char*)ptr_rx - (char*)ptr_rw;
printf("Info: Using NO_RWX mode, rx ptr: %p, rw ptr: %p, offset: %p\n", ptr_rx, ptr_rw, *rx_offset);
printf("Info: Using NO_RWX mode, rx ptr: %p, rw ptr: %p, offset: %lu\n", ptr_rx, ptr_rw, (unsigned long)*rx_offset);
return (ptr_rw != NULL);
}