diff --git a/CMakeLists.txt b/CMakeLists.txt index 387b7f49..0ca9a1ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,6 +229,28 @@ target_compile_definitions(redream PRIVATE ${REDREAM_DEFS}) target_compile_options(redream PRIVATE ${REDREAM_COMPILE_FLAGS}) +#-------------------------------------------------- +# tools +#-------------------------------------------------- +set(RECC_SOURCES tools/recc.cc) + +foreach(file ${REDREAM_SOURCES}) + if(file MATCHES "(deps|src/(core|jit|sys))") + list(APPEND RECC_SOURCES ${file}) + endif() +endforeach() + +# assign source groups for visual studio projects +source_group_by_dir(RECC_SOURCES) + +add_executable(recc ${RECC_SOURCES}) +target_include_directories(recc SYSTEM PUBLIC ${REDREAM_INCLUDE_DIRS}) +target_include_directories(recc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) +target_link_libraries(recc ${REDREAM_LIBS}) +target_compile_definitions(recc PRIVATE MICROPROFILE_ENABLED=0 ${REDREAM_DEFS}) +target_compile_options(recc PRIVATE ${REDREAM_COMPILE_FLAGS}) + + #-------------------------------------------------- # tests #-------------------------------------------------- @@ -336,7 +358,7 @@ else() endif() # build test binary -set(REDREAM_TEST_SOURCES +set(RETEST_SOURCES ${REDREAM_SOURCES} test/test_interval_tree.cc test/test_intrusive_list.cc @@ -345,13 +367,13 @@ set(REDREAM_TEST_SOURCES test/test_minmax_heap.cc test/test_sh4.cc ${asm_inc}) -list(REMOVE_ITEM REDREAM_TEST_SOURCES src/main.cc) +list(REMOVE_ITEM RETEST_SOURCES src/main.cc) # assign source groups for visual studio projects -source_group_by_dir(REDREAM_TEST_SOURCES) +source_group_by_dir(RETEST_SOURCES) -add_executable(redream_test ${REDREAM_TEST_SOURCES}) -target_include_directories(redream_test PUBLIC deps/gtest-1.7.0/include src/ test/ ${REDREAM_INCLUDE_DIRS}) -target_link_libraries(redream_test gtest gtest_main ${REDREAM_LIBS}) -target_compile_definitions(redream_test PRIVATE REDREAM_TEST=1 ${REDREAM_DEFS}) -target_compile_options(redream_test PRIVATE ${REDREAM_COMPILE_FLAGS}) +add_executable(retest ${RETEST_SOURCES}) +target_include_directories(retest PUBLIC deps/gtest-1.7.0/include src/ test/ ${REDREAM_INCLUDE_DIRS}) +target_link_libraries(retest gtest gtest_main ${REDREAM_LIBS}) +target_compile_definitions(retest PRIVATE ${REDREAM_DEFS}) +target_compile_options(retest PRIVATE ${REDREAM_COMPILE_FLAGS}) diff --git a/README.md b/README.md index b131d8f2..388f4780 100644 --- a/README.md +++ b/README.md @@ -58,5 +58,5 @@ target remote localhost:24690 ## Running tests ```shell -redream_test +retest ``` diff --git a/src/core/log.cc b/src/core/log.cc index 225f20c5..cafeb51b 100644 --- a/src/core/log.cc +++ b/src/core/log.cc @@ -2,20 +2,25 @@ #include #include "core/log.h" -#define ANSI_COLOR_RED "\x1b[31m" -#define ANSI_COLOR_GREEN "\x1b[32m" -#define ANSI_COLOR_YELLOW "\x1b[33m" -#define ANSI_COLOR_BLUE "\x1b[34m" -#define ANSI_COLOR_MAGENTA "\x1b[35m" -#define ANSI_COLOR_CYAN "\x1b[36m" -#define ANSI_COLOR_RESET "\x1b[0m" +namespace re { void Log(LogLevel level, const char *format, ...) { - static char buffer[0x20000]; + static char sbuffer[0x1000]; + int buffer_size = sizeof(sbuffer); + char *buffer = sbuffer; va_list args; + // allocate a temporary buffer if need be to fit the string va_start(args, format); - vsnprintf(buffer, sizeof(buffer), format, args); + int len = vsnprintf(0, 0, format, args); + if (len > buffer_size) { + buffer_size = len + 1; + buffer = reinterpret_cast(malloc(buffer_size)); + } + va_end(args); + + va_start(args, format); + vsnprintf(buffer, buffer_size, format, args); va_end(args); #if defined(PLATFORM_LINUX) || defined(PLATFORM_DARWIN) @@ -33,4 +38,10 @@ void Log(LogLevel level, const char *format, ...) { #else printf("%s\n", buffer); #endif + + // cleanup the temporary buffer + if (buffer != sbuffer) { + free(buffer); + } +} } diff --git a/src/core/log.h b/src/core/log.h index e3f9f817..81238a8c 100644 --- a/src/core/log.h +++ b/src/core/log.h @@ -3,7 +3,21 @@ #include -enum LogLevel { LOG_LEVEL_INFO, LOG_LEVEL_WARNING, LOG_LEVEL_FATAL }; +namespace re { + +enum LogLevel { + LOG_LEVEL_INFO, + LOG_LEVEL_WARNING, + LOG_LEVEL_FATAL, +}; + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_RESET "\x1b[0m" void Log(LogLevel level, const char *format, ...); @@ -31,5 +45,6 @@ void Log(LogLevel level, const char *format, ...); exit(1); \ } while (0) #endif +} #endif diff --git a/src/emu/profiler.h b/src/core/profiler.h similarity index 100% rename from src/emu/profiler.h rename to src/core/profiler.h diff --git a/src/hw/holly/tile_accelerator.cc b/src/hw/holly/tile_accelerator.cc index fe910e02..f37d9f21 100644 --- a/src/hw/holly/tile_accelerator.cc +++ b/src/hw/holly/tile_accelerator.cc @@ -1,5 +1,5 @@ #include "core/memory.h" -#include "emu/profiler.h" +#include "core/profiler.h" #include "hw/holly/holly.h" #include "hw/holly/pixel_convert.h" #include "hw/holly/tile_accelerator.h" diff --git a/src/hw/holly/tile_renderer.cc b/src/hw/holly/tile_renderer.cc index f681a7cb..c5d8faff 100644 --- a/src/hw/holly/tile_renderer.cc +++ b/src/hw/holly/tile_renderer.cc @@ -1,5 +1,5 @@ #include "core/assert.h" -#include "emu/profiler.h" +#include "core/profiler.h" #include "hw/holly/pixel_convert.h" #include "hw/holly/tile_accelerator.h" #include "hw/holly/tile_renderer.h" diff --git a/src/hw/memory.cc b/src/hw/memory.cc index 710e796f..cde983db 100644 --- a/src/hw/memory.cc +++ b/src/hw/memory.cc @@ -78,27 +78,29 @@ MapEntry *MemoryMap::AllocEntry() { } // helpers for emitted assembly -uint8_t Memory::R8(Memory *memory, uint32_t addr) { return memory->R8(addr); } -uint16_t Memory::R16(Memory *memory, uint32_t addr) { - return memory->R16(addr); +uint8_t Memory::R8(void *memory, uint32_t addr) { + return reinterpret_cast(memory)->R8(addr); } -uint32_t Memory::R32(Memory *memory, uint32_t addr) { - return memory->R32(addr); +uint16_t Memory::R16(void *memory, uint32_t addr) { + return reinterpret_cast(memory)->R16(addr); } -uint64_t Memory::R64(Memory *memory, uint32_t addr) { - return memory->R64(addr); +uint32_t Memory::R32(void *memory, uint32_t addr) { + return reinterpret_cast(memory)->R32(addr); } -void Memory::W8(Memory *memory, uint32_t addr, uint8_t value) { - memory->W8(addr, value); +uint64_t Memory::R64(void *memory, uint32_t addr) { + return reinterpret_cast(memory)->R64(addr); } -void Memory::W16(Memory *memory, uint32_t addr, uint16_t value) { - memory->W16(addr, value); +void Memory::W8(void *memory, uint32_t addr, uint8_t value) { + reinterpret_cast(memory)->W8(addr, value); } -void Memory::W32(Memory *memory, uint32_t addr, uint32_t value) { - memory->W32(addr, value); +void Memory::W16(void *memory, uint32_t addr, uint16_t value) { + reinterpret_cast(memory)->W16(addr, value); } -void Memory::W64(Memory *memory, uint32_t addr, uint64_t value) { - memory->W64(addr, value); +void Memory::W32(void *memory, uint32_t addr, uint32_t value) { + reinterpret_cast(memory)->W32(addr, value); +} +void Memory::W64(void *memory, uint32_t addr, uint64_t value) { + reinterpret_cast(memory)->W64(addr, value); } Memory::Memory(Machine &machine) diff --git a/src/hw/memory.h b/src/hw/memory.h index 8a9dce64..a5460ef1 100644 --- a/src/hw/memory.h +++ b/src/hw/memory.h @@ -146,14 +146,14 @@ struct MemoryRegion { class Memory { public: - static uint8_t R8(Memory *memory, uint32_t addr); - static uint16_t R16(Memory *memory, uint32_t addr); - static uint32_t R32(Memory *memory, uint32_t addr); - static uint64_t R64(Memory *memory, uint32_t addr); - static void W8(Memory *memory, uint32_t addr, uint8_t value); - static void W16(Memory *memory, uint32_t addr, uint16_t value); - static void W32(Memory *memory, uint32_t addr, uint32_t value); - static void W64(Memory *memory, uint32_t addr, uint64_t value); + static uint8_t R8(void *memory, uint32_t addr); + static uint16_t R16(void *memory, uint32_t addr); + static uint32_t R32(void *memory, uint32_t addr); + static uint64_t R64(void *memory, uint32_t addr); + static void W8(void *memory, uint32_t addr, uint8_t value); + static void W16(void *memory, uint32_t addr, uint16_t value); + static void W32(void *memory, uint32_t addr, uint32_t value); + static void W64(void *memory, uint32_t addr, uint64_t value); Memory(Machine &machine); ~Memory(); diff --git a/src/hw/sh4/sh4.cc b/src/hw/sh4/sh4.cc index bafca903..5c18664d 100644 --- a/src/hw/sh4/sh4.cc +++ b/src/hw/sh4/sh4.cc @@ -1,7 +1,7 @@ #include #include "core/math.h" #include "core/memory.h" -#include "emu/profiler.h" +#include "core/profiler.h" #include "hw/sh4/sh4.h" #include "hw/dreamcast.h" #include "hw/debugger.h" @@ -12,6 +12,7 @@ using namespace re; using namespace re::hw; using namespace re::hw::sh4; using namespace re::jit; +using namespace re::jit::backend; using namespace re::jit::frontend::sh4; using namespace re::sys; @@ -57,7 +58,11 @@ bool SH4::Init() { memory_ = dc_.memory; scheduler_ = dc_.scheduler; - code_cache_ = new SH4CodeCache(memory_, &ctx_, &SH4::CompilePC); + code_cache_ = + new SH4CodeCache({&ctx_, memory_->protected_base(), memory_, &Memory::R8, + &Memory::R16, &Memory::R32, &Memory::R64, &Memory::W8, + &Memory::W16, &Memory::W32, &Memory::W64}, + &SH4::CompilePC); // initialize context memset(&ctx_, 0, sizeof(ctx_)); @@ -238,7 +243,12 @@ void SH4::Step() { code_cache_->RemoveBlocks(ctx_.pc); // recompile it with only one instruction and run it - SH4BlockEntry *block = code_cache_->CompileBlock(ctx_.pc, 1); + uint32_t guest_addr = ctx_.pc; + uint8_t *host_addr = memory_->TranslateVirtual(guest_addr); + int flags = GetCompileFlags() | SH4_SINGLE_INSTR; + + SH4BlockEntry *block = + code_cache_->CompileBlock(guest_addr, host_addr, flags); ctx_.pc = block->run(); // let the debugger know we've stopped @@ -414,9 +424,13 @@ void SH4::OnPaint(bool show_main_menu) { } uint32_t SH4::CompilePC() { - SH4CodeCache *code_cache = s_current_cpu->code_cache_; - SH4Context *ctx = &s_current_cpu->ctx_; - SH4BlockEntry *block = code_cache->CompileBlock(ctx->pc, 0); + uint32_t guest_addr = s_current_cpu->ctx_.pc; + uint8_t *host_addr = s_current_cpu->memory_->TranslateVirtual(guest_addr); + int flags = s_current_cpu->GetCompileFlags(); + + SH4BlockEntry *block = + s_current_cpu->code_cache_->CompileBlock(guest_addr, host_addr, flags); + return block->run(); } @@ -479,6 +493,17 @@ void SH4::FPSCRUpdated(SH4Context *ctx, uint64_t old_fpscr) { } } +int SH4::GetCompileFlags() { + int flags = 0; + if (ctx_.fpscr & PR) { + flags |= SH4_DOUBLE_PR; + } + if (ctx_.fpscr & SZ) { + flags |= SH4_DOUBLE_SZ; + } + return flags; +} + void SH4::SwapRegisterBank() { for (int s = 0; s < 8; s++) { uint32_t tmp = ctx_.r[s]; diff --git a/src/hw/sh4/sh4.h b/src/hw/sh4/sh4.h index dde1818a..901b4d6e 100644 --- a/src/hw/sh4/sh4.h +++ b/src/hw/sh4/sh4.h @@ -104,6 +104,7 @@ class SH4 : public Device, static void FPSCRUpdated(jit::frontend::sh4::SH4Context *ctx, uint64_t old_fpscr); + int GetCompileFlags(); void SwapRegisterBank(); void SwapFPRegisterBank(); diff --git a/src/hw/sh4/sh4_code_cache.cc b/src/hw/sh4/sh4_code_cache.cc index 750c3f0d..fd1fd7a5 100644 --- a/src/hw/sh4/sh4_code_cache.cc +++ b/src/hw/sh4/sh4_code_cache.cc @@ -1,10 +1,12 @@ #include -#include "emu/profiler.h" +#include "core/profiler.h" #include "hw/sh4/sh4_code_cache.h" +#include "hw/memory.h" #include "jit/backend/x64/x64_backend.h" #include "jit/frontend/sh4/sh4_frontend.h" #include "jit/ir/ir_builder.h" // #include "jit/ir/passes/constant_propagation_pass.h" +// #include "jit/ir/passes/conversion_elimination_pass.h" #include "jit/ir/passes/dead_code_elimination_pass.h" #include "jit/ir/passes/load_store_elimination_pass.h" #include "jit/ir/passes/register_allocation_pass.h" @@ -19,7 +21,7 @@ using namespace re::jit::ir; using namespace re::jit::ir::passes; using namespace re::sys; -SH4CodeCache::SH4CodeCache(Memory *memory, void *guest_ctx, +SH4CodeCache::SH4CodeCache(const MemoryInterface &memif, BlockPointer default_block) : default_block_(default_block) { // add exception handler to help recompile blocks when protected memory is @@ -28,12 +30,13 @@ SH4CodeCache::SH4CodeCache(Memory *memory, void *guest_ctx, this, &SH4CodeCache::HandleException); // setup parser and emitter - frontend_ = new SH4Frontend(*memory, guest_ctx); - backend_ = new X64Backend(*memory, guest_ctx); + frontend_ = new SH4Frontend(); + backend_ = new X64Backend(memif); // setup optimization passes pass_runner_.AddPass(std::unique_ptr(new LoadStoreEliminationPass())); // pass_runner_.AddPass(std::unique_ptr(new ConstantPropagationPass())); + // pass_runner_.AddPass(std::unique_ptr(new ConversionEliminationPass())); pass_runner_.AddPass(std::unique_ptr(new DeadCodeEliminationPass())); pass_runner_.AddPass( std::unique_ptr(new RegisterAllocationPass(*backend_))); @@ -59,10 +62,11 @@ SH4CodeCache::~SH4CodeCache() { delete[] blocks_; } -SH4BlockEntry *SH4CodeCache::CompileBlock(uint32_t addr, int max_instrs) { +SH4BlockEntry *SH4CodeCache::CompileBlock(uint32_t guest_addr, + uint8_t *host_addr, int flags) { PROFILER_RUNTIME("SH4CodeCache::CompileBlock"); - int offset = BLOCK_OFFSET(addr); + int offset = BLOCK_OFFSET(guest_addr); CHECK_LT(offset, MAX_BLOCKS); SH4BlockEntry *block = &blocks_[offset]; @@ -79,7 +83,7 @@ SH4BlockEntry *SH4CodeCache::CompileBlock(uint32_t addr, int max_instrs) { } // compile the SH4 into IR - IRBuilder &builder = frontend_->BuildBlock(addr, max_instrs); + IRBuilder &builder = frontend_->BuildBlock(guest_addr, host_addr, flags); pass_runner_.Run(builder, false); @@ -100,7 +104,7 @@ SH4BlockEntry *SH4CodeCache::CompileBlock(uint32_t addr, int max_instrs) { } // add the cache entry to the lookup maps - auto res = block_map_.insert(std::make_pair(addr, block)); + auto res = block_map_.insert(std::make_pair(guest_addr, block)); CHECK(res.second); auto rres = reverse_block_map_.insert( @@ -115,10 +119,10 @@ SH4BlockEntry *SH4CodeCache::CompileBlock(uint32_t addr, int max_instrs) { return block; } -void SH4CodeCache::RemoveBlocks(uint32_t addr) { +void SH4CodeCache::RemoveBlocks(uint32_t guest_addr) { // remove any block which overlaps the address while (true) { - SH4BlockEntry *block = LookupBlock(addr); + SH4BlockEntry *block = LookupBlock(guest_addr); if (!block) { break; diff --git a/src/hw/sh4/sh4_code_cache.h b/src/hw/sh4/sh4_code_cache.h index 57834dc9..aae2467e 100644 --- a/src/hw/sh4/sh4_code_cache.h +++ b/src/hw/sh4/sh4_code_cache.h @@ -1,7 +1,9 @@ #ifndef SH4_CODE_CACHE_H #define SH4_CODE_CACHE_H +#include #include "jit/backend/x64/x64_backend.h" +#include "jit/frontend/sh4/sh4_context.h" #include "jit/frontend/sh4/sh4_frontend.h" #include "jit/ir/passes/pass_runner.h" #include "sys/exception_handler.h" @@ -32,7 +34,7 @@ struct SH4BlockEntry { class SH4CodeCache { public: - SH4CodeCache(hw::Memory *memory, void *guest_ctx, + SH4CodeCache(const jit::backend::MemoryInterface &memif, jit::backend::BlockPointer default_block); ~SH4CodeCache(); @@ -52,13 +54,14 @@ class SH4CodeCache { // block, and the cache is initialized with all entries pointing to a special // default block. this default block, when called, will compile the actual // block and update the cache to point to it - SH4BlockEntry *GetBlock(uint32_t addr) { - int offset = BLOCK_OFFSET(addr); + SH4BlockEntry *GetBlock(uint32_t guest_addr) { + int offset = BLOCK_OFFSET(guest_addr); CHECK_LT(offset, MAX_BLOCKS); return &blocks_[offset]; } - SH4BlockEntry *CompileBlock(uint32_t addr, int max_instrs); - void RemoveBlocks(uint32_t addr); + SH4BlockEntry *CompileBlock(uint32_t guest_addr, uint8_t *host_addr, + int flags); + void RemoveBlocks(uint32_t guest_addr); void UnlinkBlocks(); void ClearBlocks(); diff --git a/src/jit/backend/backend.h b/src/jit/backend/backend.h index 1b3c0982..e752762a 100644 --- a/src/jit/backend/backend.h +++ b/src/jit/backend/backend.h @@ -1,40 +1,47 @@ #ifndef BACKEND_H #define BACKEND_H -#include #include "jit/ir/ir_builder.h" namespace re { -namespace hw { -class Memory; -} - namespace sys { struct Exception; } namespace jit { - namespace backend { +enum BlockFlags { + // compile the block without fast memory access optimizations + BF_SLOWMEM = 0x1, +}; + +struct MemoryInterface { + void *ctx_base; + void *mem_base; + void *mem_self; + uint8_t (*r8)(void *, uint32_t); + uint16_t (*r16)(void *, uint32_t); + uint32_t (*r32)(void *, uint32_t); + uint64_t (*r64)(void *, uint32_t); + void (*w8)(void *, uint32_t, uint8_t); + void (*w16)(void *, uint32_t, uint16_t); + void (*w32)(void *, uint32_t, uint32_t); + void (*w64)(void *, uint32_t, uint64_t); +}; + struct Register { const char *name; int value_types; const void *data; }; -enum BlockFlags { - // compile the block without fast memory access optimizations - BF_SLOWMEM = 0x1, -}; - typedef uint32_t (*BlockPointer)(); class Backend { public: - Backend(hw::Memory &memory, void *guest_ctx) - : memory_(memory), guest_ctx_(guest_ctx) {} + Backend(const MemoryInterface &memif) : memif_(memif) {} virtual ~Backend() {} virtual const Register *registers() const = 0; @@ -49,8 +56,7 @@ class Backend { virtual bool HandleFastmemException(sys::Exception &ex) = 0; protected: - hw::Memory &memory_; - void *guest_ctx_; + MemoryInterface memif_; }; } } diff --git a/src/jit/backend/x64/x64_backend.cc b/src/jit/backend/x64/x64_backend.cc index 3274f0f3..65c1c0eb 100644 --- a/src/jit/backend/x64/x64_backend.cc +++ b/src/jit/backend/x64/x64_backend.cc @@ -3,14 +3,12 @@ #include #include #include "core/memory.h" -#include "emu/profiler.h" -#include "hw/memory.h" +#include "core/profiler.h" #include "jit/backend/x64/x64_backend.h" #include "jit/backend/x64/x64_disassembler.h" #include "sys/exception_handler.h" using namespace re; -using namespace re::hw; using namespace re::jit; using namespace re::jit::backend; using namespace re::jit::backend::x64; @@ -109,8 +107,8 @@ const int x64_tmp1_idx = Xbyak::Operand::R11; static const size_t x64_code_size = 1024 * 1024 * 8; static uint8_t x64_codegen[x64_code_size]; -X64Backend::X64Backend(Memory &memory, void *guest_ctx) - : Backend(memory, guest_ctx), emitter_(x64_codegen, x64_code_size) { +X64Backend::X64Backend(const MemoryInterface &memif) + : Backend(memif), emitter_(memif, x64_codegen, x64_code_size) { Xbyak::CodeArray::protect(x64_codegen, x64_code_size, true); Reset(); @@ -136,7 +134,7 @@ BlockPointer X64Backend::AssembleBlock(ir::IRBuilder &builder, // know so it can reset the cache and try again BlockPointer fn; try { - fn = emitter_.Emit(builder, memory_, guest_ctx_, block_flags); + fn = emitter_.Emit(builder, block_flags); } catch (const Xbyak::Error &e) { if (e == Xbyak::ERR_CODE_IS_TOO_BIG) { return nullptr; @@ -196,7 +194,8 @@ bool X64Backend::HandleFastmemException(Exception &ex) { // figure out the guest address that was being accessed const uint8_t *fault_addr = reinterpret_cast(ex.fault_addr); - const uint8_t *protected_start = memory_.protected_base(); + const uint8_t *protected_start = + reinterpret_cast(memif_.mem_base); uint32_t guest_addr = static_cast(fault_addr - protected_start); // instead of handling the dynamic callback from inside of the exception @@ -215,26 +214,23 @@ bool X64Backend::HandleFastmemException(Exception &ex) { if (mov.is_load) { // prep argument registers (memory object, guest_addr) for read function - ex.thread_state.r[x64_arg0_idx] = reinterpret_cast(&memory_); + ex.thread_state.r[x64_arg0_idx] = + reinterpret_cast(memif_.mem_self); ex.thread_state.r[x64_arg1_idx] = static_cast(guest_addr); // prep function call address for thunk switch (mov.operand_size) { case 1: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::R8)); + ex.thread_state.rax = reinterpret_cast(memif_.r8); break; case 2: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::R16)); + ex.thread_state.rax = reinterpret_cast(memif_.r16); break; case 4: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::R32)); + ex.thread_state.rax = reinterpret_cast(memif_.r32); break; case 8: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::R64)); + ex.thread_state.rax = reinterpret_cast(memif_.r64); break; } @@ -243,27 +239,24 @@ bool X64Backend::HandleFastmemException(Exception &ex) { } else { // prep argument registers (memory object, guest_addr, value) for write // function - ex.thread_state.r[x64_arg0_idx] = reinterpret_cast(&memory_); + ex.thread_state.r[x64_arg0_idx] = + reinterpret_cast(memif_.mem_self); ex.thread_state.r[x64_arg1_idx] = static_cast(guest_addr); ex.thread_state.r[x64_arg2_idx] = ex.thread_state.r[mov.reg]; // prep function call address for thunk switch (mov.operand_size) { case 1: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::W8)); + ex.thread_state.rax = reinterpret_cast(memif_.w8); break; case 2: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::W16)); + ex.thread_state.rax = reinterpret_cast(memif_.w16); break; case 4: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::W32)); + ex.thread_state.rax = reinterpret_cast(memif_.w32); break; case 8: - ex.thread_state.rax = reinterpret_cast( - static_cast(&Memory::W64)); + ex.thread_state.rax = reinterpret_cast(memif_.w64); break; } diff --git a/src/jit/backend/x64/x64_backend.h b/src/jit/backend/x64/x64_backend.h index c8ef5034..2f4b32d6 100644 --- a/src/jit/backend/x64/x64_backend.h +++ b/src/jit/backend/x64/x64_backend.h @@ -21,7 +21,7 @@ typedef void (*SlowmemThunk)(); class X64Backend : public Backend { public: - X64Backend(hw::Memory &memory, void *guest_ctx); + X64Backend(const MemoryInterface &memif); ~X64Backend(); const Register *registers() const; diff --git a/src/jit/backend/x64/x64_emitter.cc b/src/jit/backend/x64/x64_emitter.cc index d095e972..835e7dca 100644 --- a/src/jit/backend/x64/x64_emitter.cc +++ b/src/jit/backend/x64/x64_emitter.cc @@ -1,13 +1,11 @@ #include #include "core/math.h" #include "core/memory.h" -#include "emu/profiler.h" -#include "hw/memory.h" +#include "core/profiler.h" #include "jit/backend/x64/x64_backend.h" #include "jit/backend/x64/x64_emitter.h" using namespace re; -using namespace re::hw; using namespace re::jit; using namespace re::jit::backend; using namespace re::jit::backend::x64; @@ -63,11 +61,9 @@ static bool IsCalleeSaved(const Xbyak::Reg ®) { return callee_saved[reg.getIdx()]; } -X64Emitter::X64Emitter(void *buffer, size_t buffer_size) - : CodeGenerator(buffer_size, buffer), - memory_(nullptr), - guest_ctx_(nullptr), - block_flags_(0) { +X64Emitter::X64Emitter(const MemoryInterface &memif, void *buffer, + size_t buffer_size) + : CodeGenerator(buffer_size, buffer), memif_(memif), block_flags_(0) { // temporary registers aren't tracked to be pushed and popped CHECK(!IsCalleeSaved(tmp0) && !IsCalleeSaved(tmp1)); @@ -88,13 +84,10 @@ void X64Emitter::Reset() { EmitConstants(); } -BlockPointer X64Emitter::Emit(IRBuilder &builder, Memory &memory, - void *guest_ctx, int block_flags) { +BlockPointer X64Emitter::Emit(IRBuilder &builder, int block_flags) { PROFILER_RUNTIME("X64Emitter::Emit"); // save off parameters for ease of access - memory_ = &memory; - guest_ctx_ = guest_ctx; block_flags_ = block_flags; // getCurr returns the current spot in the codegen buffer which the function @@ -178,8 +171,8 @@ void X64Emitter::EmitProlog(IRBuilder &builder, int *out_stack_size) { sub(rsp, stack_size); // copy guest context and memory base to argument registers - mov(r14, reinterpret_cast(guest_ctx_)); - mov(r15, reinterpret_cast(memory_->protected_base())); + mov(r14, reinterpret_cast(memif_.ctx_base)); + mov(r15, reinterpret_cast(memif_.mem_base)); *out_stack_size = stack_size; } @@ -392,44 +385,45 @@ EMITTER(STORE_HOST) { EMITTER(LOAD_GUEST) { const Xbyak::Reg result = e.GetRegister(instr); - if (instr->arg0()->constant()) { - // try to resolve the address to a physical page - uint32_t addr = static_cast(instr->arg0()->i32()); - uint8_t *host_addr = nullptr; - MemoryRegion *region = nullptr; - uint32_t offset = 0; + // TODO move to new constprop + // if (instr->arg0()->constant()) { + // // try to resolve the address to a physical page + // uint32_t addr = static_cast(instr->arg0()->i32()); + // uint8_t *host_addr = nullptr; + // MemoryRegion *region = nullptr; + // uint32_t offset = 0; - e.memory()->Lookup(addr, &host_addr, ®ion, &offset); + // e.memory()->Lookup(addr, &host_addr, ®ion, &offset); - // if the address maps to a physical page, not a dynamic handler, make it - // fast - if (host_addr) { - // FIXME it'd be nice if xbyak had a mov operation which would convert - // the displacement to a RIP-relative address when finalizing code so - // we didn't have to store the absolute address in the scratch register - e.mov(e.rax, reinterpret_cast(host_addr)); + // // if the address maps to a physical page, not a dynamic handler, make it + // // fast + // if (host_addr) { + // // FIXME it'd be nice if xbyak had a mov operation which would convert + // // the displacement to a RIP-relative address when finalizing code so + // // we didn't have to store the absolute address in the scratch register + // e.mov(e.rax, reinterpret_cast(host_addr)); - switch (instr->type()) { - case VALUE_I8: - e.mov(result, e.byte[e.rax]); - break; - case VALUE_I16: - e.mov(result, e.word[e.rax]); - break; - case VALUE_I32: - e.mov(result, e.dword[e.rax]); - break; - case VALUE_I64: - e.mov(result, e.qword[e.rax]); - break; - default: - LOG_FATAL("Unexpected load result type"); - break; - } + // switch (instr->type()) { + // case VALUE_I8: + // e.mov(result, e.byte[e.rax]); + // break; + // case VALUE_I16: + // e.mov(result, e.word[e.rax]); + // break; + // case VALUE_I32: + // e.mov(result, e.dword[e.rax]); + // break; + // case VALUE_I64: + // e.mov(result, e.qword[e.rax]); + // break; + // default: + // LOG_FATAL("Unexpected load result type"); + // break; + // } - return; - } - } + // return; + // } + // } const Xbyak::Reg a = e.GetRegister(instr->arg0()); @@ -437,27 +431,23 @@ EMITTER(LOAD_GUEST) { void *fn = nullptr; switch (instr->type()) { case VALUE_I8: - fn = reinterpret_cast( - static_cast(&Memory::R8)); + fn = reinterpret_cast(e.memif().r8); break; case VALUE_I16: - fn = reinterpret_cast( - static_cast(&Memory::R16)); + fn = reinterpret_cast(e.memif().r16); break; case VALUE_I32: - fn = reinterpret_cast( - static_cast(&Memory::R32)); + fn = reinterpret_cast(e.memif().r32); break; case VALUE_I64: - fn = reinterpret_cast( - static_cast(&Memory::R64)); + fn = reinterpret_cast(e.memif().r64); break; default: LOG_FATAL("Unexpected load result type"); break; } - e.mov(arg0, reinterpret_cast(e.memory())); + e.mov(arg0, reinterpret_cast(e.memif().mem_self)); e.mov(arg1, a); e.call(reinterpret_cast(fn)); e.mov(result, e.rax); @@ -483,44 +473,45 @@ EMITTER(LOAD_GUEST) { } EMITTER(STORE_GUEST) { - if (instr->arg0()->constant()) { - // try to resolve the address to a physical page - uint32_t addr = static_cast(instr->arg0()->i32()); - uint8_t *host_addr = nullptr; - MemoryRegion *bank = nullptr; - uint32_t offset = 0; + // TODO move to new constprop + // if (instr->arg0()->constant()) { + // // try to resolve the address to a physical page + // uint32_t addr = static_cast(instr->arg0()->i32()); + // uint8_t *host_addr = nullptr; + // MemoryRegion *bank = nullptr; + // uint32_t offset = 0; - e.memory()->Lookup(addr, &host_addr, &bank, &offset); + // e.memory()->Lookup(addr, &host_addr, &bank, &offset); - if (host_addr) { - const Xbyak::Reg b = e.GetRegister(instr->arg1()); + // if (host_addr) { + // const Xbyak::Reg b = e.GetRegister(instr->arg1()); - // FIXME it'd be nice if xbyak had a mov operation which would convert - // the displacement to a RIP-relative address when finalizing code so - // we didn't have to store the absolute address in the scratch register - e.mov(e.rax, reinterpret_cast(host_addr)); + // // FIXME it'd be nice if xbyak had a mov operation which would convert + // // the displacement to a RIP-relative address when finalizing code so + // // we didn't have to store the absolute address in the scratch register + // e.mov(e.rax, reinterpret_cast(host_addr)); - switch (instr->arg1()->type()) { - case VALUE_I8: - e.mov(e.byte[e.rax], b); - break; - case VALUE_I16: - e.mov(e.word[e.rax], b); - break; - case VALUE_I32: - e.mov(e.dword[e.rax], b); - break; - case VALUE_I64: - e.mov(e.qword[e.rax], b); - break; - default: - LOG_FATAL("Unexpected store value type"); - break; - } + // switch (instr->arg1()->type()) { + // case VALUE_I8: + // e.mov(e.byte[e.rax], b); + // break; + // case VALUE_I16: + // e.mov(e.word[e.rax], b); + // break; + // case VALUE_I32: + // e.mov(e.dword[e.rax], b); + // break; + // case VALUE_I64: + // e.mov(e.qword[e.rax], b); + // break; + // default: + // LOG_FATAL("Unexpected store value type"); + // break; + // } - return; - } - } + // return; + // } + // } const Xbyak::Reg a = e.GetRegister(instr->arg0()); const Xbyak::Reg b = e.GetRegister(instr->arg1()); @@ -529,27 +520,23 @@ EMITTER(STORE_GUEST) { void *fn = nullptr; switch (instr->arg1()->type()) { case VALUE_I8: - fn = reinterpret_cast( - static_cast(&Memory::W8)); + fn = reinterpret_cast(e.memif().w8); break; case VALUE_I16: - fn = reinterpret_cast( - static_cast(&Memory::W16)); + fn = reinterpret_cast(e.memif().w16); break; case VALUE_I32: - fn = reinterpret_cast( - static_cast(&Memory::W32)); + fn = reinterpret_cast(e.memif().w32); break; case VALUE_I64: - fn = reinterpret_cast( - static_cast(&Memory::W64)); + fn = reinterpret_cast(e.memif().w64); break; default: LOG_FATAL("Unexpected store value type"); break; } - e.mov(arg0, reinterpret_cast(e.memory())); + e.mov(arg0, reinterpret_cast(e.memif().mem_self)); e.mov(arg1, a); e.mov(arg2, b); e.call(reinterpret_cast(fn)); @@ -1434,7 +1421,7 @@ EMITTER(BRANCH_COND) { EMITTER(CALL_EXTERNAL) { const Xbyak::Reg addr = e.GetRegister(instr->arg0()); - e.mov(arg0, reinterpret_cast(e.guest_ctx())); + e.mov(arg0, reinterpret_cast(e.memif().ctx_base)); if (instr->arg1()) { const Xbyak::Reg arg = e.GetRegister(instr->arg1()); e.mov(arg1, arg); diff --git a/src/jit/backend/x64/x64_emitter.h b/src/jit/backend/x64/x64_emitter.h index 6da9d31f..3b528456 100644 --- a/src/jit/backend/x64/x64_emitter.h +++ b/src/jit/backend/x64/x64_emitter.h @@ -2,13 +2,9 @@ #define X64_EMITTER_H #include +#include "jit/backend/backend.h" namespace re { - -namespace hw { -class Memory; -} - namespace jit { namespace backend { namespace x64 { @@ -33,17 +29,15 @@ enum XmmConstant { class X64Emitter : public Xbyak::CodeGenerator { public: - X64Emitter(void *buffer, size_t buffer_size); + X64Emitter(const MemoryInterface &memif, void *buffer, size_t buffer_size); ~X64Emitter(); - void *guest_ctx() { return guest_ctx_; } - hw::Memory *memory() { return memory_; } + const MemoryInterface &memif() { return memif_; } int block_flags() { return block_flags_; } void Reset(); - BlockPointer Emit(ir::IRBuilder &builder, hw::Memory &memory, void *guest_ctx, - int block_flags); + BlockPointer Emit(ir::IRBuilder &builder, int block_flags); // helpers for the emitter callbacks const Xbyak::Reg GetRegister(const ir::Value *v); @@ -58,8 +52,7 @@ class X64Emitter : public Xbyak::CodeGenerator { void EmitBody(ir::IRBuilder &builder); void EmitEpilog(ir::IRBuilder &builder, int stack_size); - hw::Memory *memory_; - void *guest_ctx_; + MemoryInterface memif_; int block_flags_; int modified_marker_; int *modified_; diff --git a/src/jit/frontend/frontend.h b/src/jit/frontend/frontend.h index ed55579a..33b4e79f 100644 --- a/src/jit/frontend/frontend.h +++ b/src/jit/frontend/frontend.h @@ -1,29 +1,18 @@ #ifndef FRONTEND_H #define FRONTEND_H -#include #include "jit/ir/ir_builder.h" namespace re { - -namespace hw { -class Memory; -} - namespace jit { namespace frontend { class Frontend { public: - Frontend(hw::Memory &memory, void *guest_ctx) - : memory_(memory), guest_ctx_(guest_ctx) {} virtual ~Frontend() {} - virtual ir::IRBuilder &BuildBlock(uint32_t addr, int max_instrs) = 0; - - protected: - hw::Memory &memory_; - void *guest_ctx_; + virtual ir::IRBuilder &BuildBlock(uint32_t guest_addr, uint8_t *host_addr, + int flags) = 0; }; } } diff --git a/src/jit/frontend/sh4/sh4_builder.cc b/src/jit/frontend/sh4/sh4_builder.cc index 631ecc46..95994825 100644 --- a/src/jit/frontend/sh4/sh4_builder.cc +++ b/src/jit/frontend/sh4/sh4_builder.cc @@ -1,10 +1,10 @@ #include "core/assert.h" -#include "emu/profiler.h" -#include "hw/memory.h" +#include "core/memory.h" +#include "core/profiler.h" #include "jit/frontend/sh4/sh4_builder.h" +#include "jit/frontend/sh4/sh4_frontend.h" using namespace re; -using namespace re::hw; using namespace re::jit; using namespace re::jit::frontend; using namespace re::jit::frontend::sh4; @@ -14,17 +14,17 @@ static uint32_t s_fsca_table[0x20000] = { #include "jit/frontend/sh4/sh4_fsca.inc" }; -typedef void (*EmitCallback)(SH4Builder &b, const FPUState &, - const sh4::Instr &i, bool *endblock); +typedef void (*EmitCallback)(SH4Builder &b, const sh4::Instr &i, int, + bool *endblock); -#define EMITTER(name) \ - void Emit_OP_##name(SH4Builder &b, const FPUState &fpu, const sh4::Instr &i, \ +#define EMITTER(name) \ + void Emit_OP_##name(SH4Builder &b, const sh4::Instr &i, int flags, \ bool *endblock) -#define EMIT_DELAYED() \ - if (!b.EmitDelayInstr(i)) { \ - *endblock = true; \ - return; \ +#define EMIT_DELAYED() \ + if (!b.EmitDelayInstr(i, flags)) { \ + *endblock = true; \ + return; \ } #define SH4_INSTR(name, desc, instr_code, cycles, flags) static EMITTER(name); @@ -37,23 +37,20 @@ EmitCallback emit_callbacks[sh4::NUM_OPCODES] = { #undef SH4_INSTR }; -SH4Builder::SH4Builder(Arena &arena, Memory &memory, - const SH4Context &guest_ctx) - : IRBuilder(arena), memory_(memory), guest_ctx_(guest_ctx) {} +SH4Builder::SH4Builder(Arena &arena) : IRBuilder(arena) {} -void SH4Builder::Emit(uint32_t start_addr, int max_instrs) { +void SH4Builder::Emit(uint32_t guest_addr, uint8_t *host_addr, int flags) { PROFILER_RUNTIME("SH4Builder::Emit"); - pc_ = start_addr; + pc_ = guest_addr; + host_addr_ = host_addr; guest_cycles_ = 0; - fpu_state_.double_pr = guest_ctx_.fpscr & PR; - fpu_state_.double_sz = guest_ctx_.fpscr & SZ; // clamp block to max_instrs if non-zero - for (int i = 0; !max_instrs || i < max_instrs; i++) { + while (true) { Instr instr; instr.addr = pc_; - instr.opcode = memory_.R16(instr.addr); + instr.opcode = re::load(host_addr_); Disasm(&instr); if (!instr.type) { @@ -62,11 +59,12 @@ void SH4Builder::Emit(uint32_t start_addr, int max_instrs) { } pc_ += 2; + host_addr_ += 2; guest_cycles_ += instr.type->cycles; // emit the current instruction bool endblock = false; - (emit_callbacks[instr.type->op])(*this, fpu_state_, instr, &endblock); + (emit_callbacks[instr.type->op])(*this, instr, flags, &endblock); // end block if delay instruction is invalid if (endblock) { @@ -81,6 +79,11 @@ void SH4Builder::Emit(uint32_t start_addr, int max_instrs) { (OP_FLAG_BRANCH | OP_FLAG_SET_FPSCR | OP_FLAG_SET_SR)) { break; } + + // used by gdb server when stepping through instructions + if (flags & SH4_SINGLE_INSTR) { + break; + } } ir::Instr *tail_instr = instrs_.tail(); @@ -100,7 +103,7 @@ void SH4Builder::Emit(uint32_t start_addr, int max_instrs) { StoreContext(offsetof(SH4Context, num_cycles), num_cycles); // update num instructions - int sh4_num_instrs = (pc_ - start_addr) >> 1; + int sh4_num_instrs = (pc_ - guest_addr) >> 1; Value *num_instrs = LoadContext(offsetof(SH4Context, num_instrs), VALUE_I32); num_instrs = Add(num_instrs, AllocConstant(sh4_num_instrs)); StoreContext(offsetof(SH4Context, num_instrs), num_instrs); @@ -144,7 +147,6 @@ void SH4Builder::StoreXFR(int n, Value *v) { return StoreContext(offsetof(SH4Context, xf[n]), v); } - Value *SH4Builder::LoadSR() { return LoadContext(offsetof(SH4Context, sr), VALUE_I32); } @@ -208,12 +210,12 @@ void SH4Builder::InvalidInstruction(uint32_t guest_addr) { AllocConstant(static_cast(guest_addr))); } -bool SH4Builder::EmitDelayInstr(const sh4::Instr &prev) { +bool SH4Builder::EmitDelayInstr(const sh4::Instr &prev, int flags) { CHECK(prev.type->flags & OP_FLAG_DELAYED); Instr delay; delay.addr = prev.addr + 2; - delay.opcode = memory_.R16(delay.addr); + delay.opcode = re::load(host_addr_); Disasm(&delay); if (!delay.type) { @@ -224,10 +226,11 @@ bool SH4Builder::EmitDelayInstr(const sh4::Instr &prev) { CHECK(!(delay.type->flags & OP_FLAG_DELAYED)); pc_ += 2; + host_addr_ += 2; guest_cycles_ += delay.type->cycles; bool endblock = false; - (emit_callbacks[delay.type->op])(*this, fpu_state_, delay, &endblock); + (emit_callbacks[delay.type->op])(*this, delay, flags, &endblock); return true; } @@ -1677,7 +1680,7 @@ EMITTER(FLDI1) { b.StoreFPR(i.Rn, b.AllocConstant(0x3F800000)); } // FMOV DRm,XDn 1111nnn1mmm01100 // FMOV XDm,XDn 1111nnn1mmm11100 EMITTER(FMOV) { - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { if (i.Rm & 1) { Value *rm = b.LoadXFR(i.Rm & 0xe, VALUE_I64); if (i.Rn & 1) { @@ -1704,7 +1707,7 @@ EMITTER(FMOV) { EMITTER(FMOV_LOAD) { Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { Value *v_low = b.LoadGuest(addr, VALUE_I32); Value *v_high = b.LoadGuest(b.Add(addr, b.AllocConstant(4)), VALUE_I32); if (i.Rn & 1) { @@ -1725,7 +1728,7 @@ EMITTER(FMOV_LOAD) { EMITTER(FMOV_INDEX_LOAD) { Value *addr = b.Add(b.LoadGPR(0, VALUE_I32), b.LoadGPR(i.Rm, VALUE_I32)); - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { Value *v_low = b.LoadGuest(addr, VALUE_I32); Value *v_high = b.LoadGuest(b.Add(addr, b.AllocConstant(4)), VALUE_I32); if (i.Rn & 1) { @@ -1746,7 +1749,7 @@ EMITTER(FMOV_INDEX_LOAD) { EMITTER(FMOV_STORE) { Value *addr = b.LoadGPR(i.Rn, VALUE_I32); - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { Value *addr_low = addr; Value *addr_high = b.Add(addr, b.AllocConstant(4)); if (i.Rm & 1) { @@ -1767,7 +1770,7 @@ EMITTER(FMOV_STORE) { EMITTER(FMOV_INDEX_STORE) { Value *addr = b.Add(b.LoadGPR(0, VALUE_I32), b.LoadGPR(i.Rn, VALUE_I32)); - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { Value *addr_low = addr; Value *addr_high = b.Add(addr, b.AllocConstant(4)); if (i.Rm & 1) { @@ -1786,7 +1789,7 @@ EMITTER(FMOV_INDEX_STORE) { // FMOV DRm,@-Rn 1111nnnnmmm01011 // FMOV XDm,@-Rn 1111nnnnmmm11011 EMITTER(FMOV_SAVE) { - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { Value *addr = b.Sub(b.LoadGPR(i.Rn, VALUE_I32), b.AllocConstant(8)); b.StoreGPR(i.Rn, addr); @@ -1813,7 +1816,7 @@ EMITTER(FMOV_SAVE) { EMITTER(FMOV_RESTORE) { Value *addr = b.LoadGPR(i.Rm, VALUE_I32); - if (fpu.double_sz) { + if (flags & SH4_DOUBLE_SZ) { Value *v_low = b.LoadGuest(addr, VALUE_I32); Value *v_high = b.LoadGuest(b.Add(addr, b.AllocConstant(4)), VALUE_I32); if (i.Rn & 1) { @@ -1845,7 +1848,7 @@ EMITTER(FSTS) { // FABS FRn PR=0 1111nnnn01011101 // FABS DRn PR=1 1111nnn001011101 EMITTER(FABS) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; Value *v = b.FAbs(b.LoadFPR(n, VALUE_F64)); b.StoreFPR(n, v); @@ -1865,7 +1868,7 @@ EMITTER(FSRRA) { // FADD FRm,FRn PR=0 1111nnnnmmmm0000 // FADD DRm,DRn PR=1 1111nnn0mmm00000 EMITTER(FADD) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; int m = i.Rm & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); @@ -1883,7 +1886,7 @@ EMITTER(FADD) { // FCMP/EQ FRm,FRn PR=0 1111nnnnmmmm0100 // FCMP/EQ DRm,DRn PR=1 1111nnn0mmm00100 EMITTER(FCMPEQ) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; int m = i.Rm & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); @@ -1901,7 +1904,7 @@ EMITTER(FCMPEQ) { // FCMP/GT FRm,FRn PR=0 1111nnnnmmmm0101 // FCMP/GT DRm,DRn PR=1 1111nnn0mmm00101 EMITTER(FCMPGT) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; int m = i.Rm & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); @@ -1919,7 +1922,7 @@ EMITTER(FCMPGT) { // FDIV FRm,FRn PR=0 1111nnnnmmmm0011 // FDIV DRm,DRn PR=1 1111nnn0mmm00011 EMITTER(FDIV) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; int m = i.Rm & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); @@ -1939,7 +1942,7 @@ EMITTER(FDIV) { EMITTER(FLOAT) { Value *fpul = b.LoadContext(offsetof(SH4Context, fpul), VALUE_I32); - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; Value *v = b.IToF(b.SExt(fpul, VALUE_I64), VALUE_F64); b.StoreFPR(n, v); @@ -1951,7 +1954,7 @@ EMITTER(FLOAT) { // FMAC FR0,FRm,FRn PR=0 1111nnnnmmmm1110 EMITTER(FMAC) { - CHECK(!fpu.double_pr); + CHECK(!(flags & SH4_DOUBLE_PR)); Value *frn = b.LoadFPR(i.Rn, VALUE_F32); Value *frm = b.LoadFPR(i.Rm, VALUE_F32); @@ -1963,7 +1966,7 @@ EMITTER(FMAC) { // FMUL FRm,FRn PR=0 1111nnnnmmmm0010 // FMUL DRm,DRn PR=1 1111nnn0mmm00010 EMITTER(FMUL) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; int m = i.Rm & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); @@ -1981,7 +1984,7 @@ EMITTER(FMUL) { // FNEG FRn PR=0 1111nnnn01001101 // FNEG DRn PR=1 1111nnn001001101 EMITTER(FNEG) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); Value *v = b.FNeg(drn); @@ -1996,7 +1999,7 @@ EMITTER(FNEG) { // FSQRT FRn PR=0 1111nnnn01101101 // FSQRT DRn PR=1 1111nnnn01101101 EMITTER(FSQRT) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); Value *v = b.Sqrt(drn); @@ -2011,7 +2014,7 @@ EMITTER(FSQRT) { // FSUB FRm,FRn PR=0 1111nnnnmmmm0001 // FSUB DRm,DRn PR=1 1111nnn0mmm00001 EMITTER(FSUB) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int n = i.Rn & 0xe; int m = i.Rm & 0xe; Value *drn = b.LoadFPR(n, VALUE_F64); @@ -2029,7 +2032,7 @@ EMITTER(FSUB) { // FTRC FRm,FPUL PR=0 1111mmmm00111101 // FTRC DRm,FPUL PR=1 1111mmm000111101 EMITTER(FTRC) { - if (fpu.double_pr) { + if (flags & SH4_DOUBLE_PR) { int m = i.Rm & 0xe; Value *drm = b.LoadFPR(m, VALUE_F64); Value *dpv = b.Trunc(b.FToI(drm, VALUE_I64), VALUE_I32); @@ -2043,7 +2046,7 @@ EMITTER(FTRC) { // FCNVDS DRm,FPUL PR=1 1111mmm010111101 EMITTER(FCNVDS) { - CHECK(fpu.double_pr); + CHECK(flags & SH4_DOUBLE_PR); // TODO rounding modes? @@ -2055,7 +2058,7 @@ EMITTER(FCNVDS) { // FCNVSD FPUL, DRn PR=1 1111nnn010101101 EMITTER(FCNVSD) { - CHECK(fpu.double_pr); + CHECK(flags & SH4_DOUBLE_PR); // TODO rounding modes? @@ -2155,16 +2158,9 @@ EMITTER(FSCA) { EMITTER(FTRV) { int n = i.Rn << 2; - // XF0 XF4 XF8 XF12 FR0 XF0 * FR0 + XF4 * FR1 + XF8 * FR2 + XF12 * FR3 - // XF1 XF5 XF9 XF13 * FR1 = XF1 * FR0 + XF5 * FR1 + XF9 * FR2 + XF13 * FR3 - // XF2 XF6 XF10 XF14 FR2 XF2 * FR0 + XF6 * FR1 + XF10 * FR2 + XF14 * FR3 - // XF3 XF7 XF11 XF15 FR3 XF3 * FR0 + XF7 * FR1 + XF11 * FR2 + XF15 * FR3 - - Value *result = nullptr; - Value *col0 = b.LoadXFR(0, VALUE_V128); Value *row0 = b.VBroadcast(b.LoadFPR(n + 0, VALUE_F32)); - result = b.VMul(col0, row0, VALUE_F32); + Value *result = b.VMul(col0, row0, VALUE_F32); Value *col1 = b.LoadXFR(4, VALUE_V128); Value *row1 = b.VBroadcast(b.LoadFPR(n + 1, VALUE_F32)); diff --git a/src/jit/frontend/sh4/sh4_builder.h b/src/jit/frontend/sh4/sh4_builder.h index c8e71fa1..c667a189 100644 --- a/src/jit/frontend/sh4/sh4_builder.h +++ b/src/jit/frontend/sh4/sh4_builder.h @@ -15,16 +15,11 @@ namespace jit { namespace frontend { namespace sh4 { -struct FPUState { - bool double_pr; - bool double_sz; -}; - class SH4Builder : public ir::IRBuilder { public: - SH4Builder(Arena &arena, hw::Memory &memory, const SH4Context &guest_ctx); + SH4Builder(Arena &arena); - void Emit(uint32_t addr, int max_instrs); + void Emit(uint32_t guest_addr, uint8_t *host_addr, int flags); ir::Value *LoadGPR(int n, ir::ValueType type); void StoreGPR(int n, ir::Value *v); @@ -45,14 +40,12 @@ class SH4Builder : public ir::IRBuilder { void InvalidInstruction(uint32_t guest_addr); - bool EmitDelayInstr(const Instr &prev); + bool EmitDelayInstr(const Instr &prev, int flags); private: - hw::Memory &memory_; - const SH4Context &guest_ctx_; uint32_t pc_; + uint8_t *host_addr_; int guest_cycles_; - FPUState fpu_state_; }; } } diff --git a/src/jit/frontend/sh4/sh4_disassembler.cc b/src/jit/frontend/sh4/sh4_disassembler.cc index cc865918..309b2201 100644 --- a/src/jit/frontend/sh4/sh4_disassembler.cc +++ b/src/jit/frontend/sh4/sh4_disassembler.cc @@ -1,6 +1,6 @@ +#include "core/assert.h" #include "core/memory.h" #include "core/string.h" -#include "hw/memory.h" #include "jit/frontend/sh4/sh4_disassembler.h" using namespace re; diff --git a/src/jit/frontend/sh4/sh4_frontend.cc b/src/jit/frontend/sh4/sh4_frontend.cc index 9f2128fc..f9ca26b8 100644 --- a/src/jit/frontend/sh4/sh4_frontend.cc +++ b/src/jit/frontend/sh4/sh4_frontend.cc @@ -1,23 +1,21 @@ #include "jit/frontend/sh4/sh4_builder.h" #include "jit/frontend/sh4/sh4_frontend.h" -using namespace re::hw; using namespace re::jit; using namespace re::jit::frontend::sh4; using namespace re::jit::ir; -SH4Frontend::SH4Frontend(Memory &memory, void *guest_ctx) - : Frontend(memory, guest_ctx), arena_(4096) {} +SH4Frontend::SH4Frontend() : arena_(4096) {} -IRBuilder &SH4Frontend::BuildBlock(uint32_t addr, int max_instrs) { +IRBuilder &SH4Frontend::BuildBlock(uint32_t guest_addr, uint8_t *host_addr, + int flags) { arena_.Reset(); SH4Builder *builder = arena_.Alloc(); - new (builder) SH4Builder(arena_, memory_, - *reinterpret_cast(guest_ctx_)); + new (builder) SH4Builder(arena_); - builder->Emit(addr, max_instrs); + builder->Emit(guest_addr, host_addr, flags); return *builder; } diff --git a/src/jit/frontend/sh4/sh4_frontend.h b/src/jit/frontend/sh4/sh4_frontend.h index fa573585..fd071ef2 100644 --- a/src/jit/frontend/sh4/sh4_frontend.h +++ b/src/jit/frontend/sh4/sh4_frontend.h @@ -9,11 +9,17 @@ namespace jit { namespace frontend { namespace sh4 { +enum SH4BlockFlags { + SH4_SINGLE_INSTR = 0x1, + SH4_DOUBLE_PR = 0x2, + SH4_DOUBLE_SZ = 0x4, +}; + class SH4Frontend : public Frontend { public: - SH4Frontend(hw::Memory &memory, void *guest_ctx); + SH4Frontend(); - ir::IRBuilder &BuildBlock(uint32_t addr, int max_instrs); + ir::IRBuilder &BuildBlock(uint32_t guest_addr, uint8_t *host_addr, int flags); private: Arena arena_; diff --git a/src/jit/ir/ir_writer.cc b/src/jit/ir/ir_writer.cc index cd0f37c6..17470ac8 100644 --- a/src/jit/ir/ir_writer.cc +++ b/src/jit/ir/ir_writer.cc @@ -33,6 +33,9 @@ void IRWriter::PrintType(ValueType type, std::ostream &output) const { case VALUE_F64: output << "f64"; break; + case VALUE_V128: + output << "v128"; + break; default: LOG_FATAL("Unexpected value type"); break; @@ -53,17 +56,7 @@ void IRWriter::PrintValue(const Value *value, std::ostream &output) { output << " "; - if (!value->constant()) { - uintptr_t key = reinterpret_cast(value); - auto it = slots_.find(key); - - if (it == slots_.end()) { - auto res = slots_.insert(std::make_pair(key, next_slot_++)); - it = res.first; - } - - output << "%" << it->second; - } else { + if (value->constant()) { switch (value->type()) { case VALUE_I8: output << "0x" << std::hex << value->i8() << std::dec; @@ -87,6 +80,16 @@ void IRWriter::PrintValue(const Value *value, std::ostream &output) { LOG_FATAL("Unexpected value type"); break; } + } else { + uintptr_t key = reinterpret_cast(value); + auto it = slots_.find(key); + + if (it == slots_.end()) { + auto res = slots_.insert(std::make_pair(key, next_slot_++)); + it = res.first; + } + + output << "%" << it->second; } } diff --git a/src/jit/ir/passes/constant_propagation_pass.cc b/src/jit/ir/passes/constant_propagation_pass.cc index 22838534..6c8d2600 100644 --- a/src/jit/ir/passes/constant_propagation_pass.cc +++ b/src/jit/ir/passes/constant_propagation_pass.cc @@ -1,6 +1,5 @@ #include #include -#include "emu/profiler.h" #include "jit/ir/passes/constant_propagation_pass.h" using namespace re::jit::ir; @@ -91,8 +90,6 @@ static int GetConstantSig(Instr *instr) { } void ConstantPropagationPass::Run(IRBuilder &builder, bool debug) { - PROFILER_RUNTIME("ConstantPropagationPass::Run"); - auto it = builder.instrs().begin(); auto end = builder.instrs().end(); diff --git a/src/jit/ir/passes/constant_propagation_pass.h b/src/jit/ir/passes/constant_propagation_pass.h index 442ad712..228ad024 100644 --- a/src/jit/ir/passes/constant_propagation_pass.h +++ b/src/jit/ir/passes/constant_propagation_pass.h @@ -10,7 +10,7 @@ namespace passes { class ConstantPropagationPass : public Pass { public: - const char *name() { return "Constant Propagation Pass"; } + const char *name() { return "constprop"; } void Run(IRBuilder &builder, bool debug); }; diff --git a/src/jit/ir/passes/dead_code_elimination_pass.cc b/src/jit/ir/passes/dead_code_elimination_pass.cc index 2154a64e..f1a6cb00 100644 --- a/src/jit/ir/passes/dead_code_elimination_pass.cc +++ b/src/jit/ir/passes/dead_code_elimination_pass.cc @@ -1,4 +1,3 @@ -#include "emu/profiler.h" #include "jit/ir/passes/dead_code_elimination_pass.h" using namespace re::jit::backend; @@ -6,8 +5,6 @@ using namespace re::jit::ir; using namespace re::jit::ir::passes; void DeadCodeEliminationPass::Run(IRBuilder &builder, bool debug) { - PROFILER_RUNTIME("DeadCodeEliminationPass::Run"); - // iterate in reverse in order to remove groups of dead instructions that // only use eachother auto it = builder.instrs().rbegin(); diff --git a/src/jit/ir/passes/dead_code_elimination_pass.h b/src/jit/ir/passes/dead_code_elimination_pass.h index b2068f23..5fb3ad70 100644 --- a/src/jit/ir/passes/dead_code_elimination_pass.h +++ b/src/jit/ir/passes/dead_code_elimination_pass.h @@ -11,7 +11,7 @@ namespace passes { class DeadCodeEliminationPass : public Pass { public: - const char *name() { return "Dead Code Elimination Pass"; } + const char *name() { return "dce"; } void Run(IRBuilder &builder, bool debug); }; diff --git a/src/jit/ir/passes/load_store_elimination_pass.cc b/src/jit/ir/passes/load_store_elimination_pass.cc index 99d21c80..2f6a8d66 100644 --- a/src/jit/ir/passes/load_store_elimination_pass.cc +++ b/src/jit/ir/passes/load_store_elimination_pass.cc @@ -1,4 +1,3 @@ -#include "emu/profiler.h" #include "jit/ir/passes/load_store_elimination_pass.h" using namespace re::jit::ir; @@ -8,8 +7,6 @@ LoadStoreEliminationPass::LoadStoreEliminationPass() : available_(nullptr), num_available_(0) {} void LoadStoreEliminationPass::Run(IRBuilder &builder, bool debug) { - PROFILER_RUNTIME("LoadStoreEliminationPass::Run"); - Reset(); // eliminate redundant loads diff --git a/src/jit/ir/passes/load_store_elimination_pass.h b/src/jit/ir/passes/load_store_elimination_pass.h index fb55675c..289bb0c4 100644 --- a/src/jit/ir/passes/load_store_elimination_pass.h +++ b/src/jit/ir/passes/load_store_elimination_pass.h @@ -17,7 +17,7 @@ class LoadStoreEliminationPass : public Pass { public: LoadStoreEliminationPass(); - const char *name() { return "Load / Store Elimination Pass"; } + const char *name() { return "lse"; } void Run(IRBuilder &builder, bool debug); diff --git a/src/jit/ir/passes/pass_runner.cc b/src/jit/ir/passes/pass_runner.cc index 077e984d..8decf9b0 100644 --- a/src/jit/ir/passes/pass_runner.cc +++ b/src/jit/ir/passes/pass_runner.cc @@ -1,4 +1,4 @@ -#include "emu/profiler.h" +#include "core/profiler.h" #include "jit/ir/ir_builder.h" #include "jit/ir/passes/pass_runner.h" @@ -15,12 +15,14 @@ void PassRunner::Run(IRBuilder &builder, bool debug) { PROFILER_RUNTIME("PassRunner::Run"); if (debug) { - LOG_INFO("Original:"); + LOG_INFO("original:"); builder.Dump(); LOG_INFO(""); } for (auto &pass : passes_) { + PROFILER_RUNTIME(pass->name()); + pass->Run(builder, debug); if (debug) { diff --git a/src/jit/ir/passes/register_allocation_pass.cc b/src/jit/ir/passes/register_allocation_pass.cc index ac3d7b64..4a02d074 100644 --- a/src/jit/ir/passes/register_allocation_pass.cc +++ b/src/jit/ir/passes/register_allocation_pass.cc @@ -1,5 +1,4 @@ #include "core/minmax_heap.h" -#include "emu/profiler.h" #include "jit/ir/passes/register_allocation_pass.h" using namespace re::jit::backend; @@ -93,8 +92,6 @@ RegisterAllocationPass::RegisterAllocationPass(const Backend &backend) RegisterAllocationPass::~RegisterAllocationPass() { delete[] intervals_; } void RegisterAllocationPass::Run(IRBuilder &builder, bool debug) { - PROFILER_RUNTIME("RegisterAllocationPass::Run"); - Reset(); AssignOrdinals(builder); diff --git a/src/jit/ir/passes/register_allocation_pass.h b/src/jit/ir/passes/register_allocation_pass.h index 517b0f53..bcad830d 100644 --- a/src/jit/ir/passes/register_allocation_pass.h +++ b/src/jit/ir/passes/register_allocation_pass.h @@ -49,7 +49,7 @@ class RegisterAllocationPass : public Pass { RegisterAllocationPass(const backend::Backend &backend); ~RegisterAllocationPass(); - const char *name() { return "Register Allocation Pass"; } + const char *name() { return "ra"; } void Run(IRBuilder &builder, bool debug); diff --git a/src/renderer/gl_backend.cc b/src/renderer/gl_backend.cc index 4fcf9e8d..2871a126 100644 --- a/src/renderer/gl_backend.cc +++ b/src/renderer/gl_backend.cc @@ -1,5 +1,5 @@ #include "core/assert.h" -#include "emu/profiler.h" +#include "core/profiler.h" #include "renderer/gl_backend.h" using namespace re; diff --git a/src/sys/exception_handler.cc b/src/sys/exception_handler.cc index 86e62ff0..28060b05 100644 --- a/src/sys/exception_handler.cc +++ b/src/sys/exception_handler.cc @@ -1,5 +1,5 @@ #include "core/interval_tree.h" -#include "emu/profiler.h" +#include "core/profiler.h" #include "sys/exception_handler.h" using namespace re::sys; diff --git a/tools/recc.cc b/tools/recc.cc new file mode 100644 index 00000000..01faead7 --- /dev/null +++ b/tools/recc.cc @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include "jit/backend/x64/x64_backend.h" +#include "jit/frontend/sh4/sh4_frontend.h" +#include "jit/ir/ir_builder.h" +#include "jit/ir/ir_reader.h" +#include "jit/ir/passes/dead_code_elimination_pass.h" +#include "jit/ir/passes/load_store_elimination_pass.h" + +using namespace re; +using namespace re::jit::ir; +using namespace re::jit::ir::passes; + +DEFINE_string(pass, "lse,dce", "Comma-separated list of passes to run"); +DEFINE_bool(debug, false, "Enable debug spew for passes"); + +static std::vector split(const std::string &s, char delim) { + std::stringstream ss(s); + std::string item; + std::vector elems; + while (std::getline(ss, item, delim)) { + elems.push_back(std::move(item)); + } + return elems; +} + +static int get_num_instrs(IRBuilder &builder) { + int n = 0; + + for (auto instr : builder.instrs()) { + ((void)instr); + n++; + } + + return n; +} + +int main(int argc, const char **argv) { + const char *file = argv[1]; + + Arena arena(4096); + IRBuilder builder(arena); + + // read in the input ir + IRReader reader; + std::ifstream input_stream(file); + CHECK(reader.Parse(input_stream, builder)); + + // run optimization passes + std::vector passes = split(FLAGS_pass, ','); + for (auto name : passes) { + std::unique_ptr pass; + + int num_instrs_before = get_num_instrs(builder); + + if (name == "lse") { + pass = std::unique_ptr(new LoadStoreEliminationPass()); + } else if (name == "dce") { + pass = std::unique_ptr(new DeadCodeEliminationPass()); + } else { + LOG_WARNING("Unknown pass %s", name.c_str()); + } + pass->Run(builder, FLAGS_debug); + + int num_instrs_after = get_num_instrs(builder); + + // print out the resulting ir + LOG_INFO("%s:", pass->name()); + builder.Dump(); + + // print out stats about the optimization pass + if (num_instrs_after <= num_instrs_before) { + int delta = num_instrs_before - num_instrs_after; + LOG_INFO(ANSI_COLOR_GREEN "%d (%.2f%%) instructions removed" ANSI_COLOR_RESET, + delta, (delta / static_cast(num_instrs_before)) * 100.0f); + } else { + int delta = num_instrs_after - num_instrs_before; + LOG_INFO(ANSI_COLOR_RED "%d (%.2f%%) instructions added" ANSI_COLOR_RESET, + delta, (delta / static_cast(num_instrs_before)) * 100.0f); + } + LOG_INFO(""); + } + + return 0; +}