Attempting to inline a lot of MMIO operations. Works for GPU stuff mainly.

This commit is contained in:
Ben Vanik 2015-06-02 20:15:43 -07:00
parent 6618bcf2db
commit 40a6a12800
15 changed files with 199 additions and 22 deletions

View File

@ -156,11 +156,12 @@ class AudioSystem {
void ProcessXmaContext(XMAContext& context, XMAContextData& data);
static uint64_t MMIOReadRegisterThunk(AudioSystem* as, uint32_t addr) {
static uint64_t MMIOReadRegisterThunk(void* ppc_context, AudioSystem* as,
uint32_t addr) {
return as->ReadRegister(addr);
}
static void MMIOWriteRegisterThunk(AudioSystem* as, uint32_t addr,
uint64_t value) {
static void MMIOWriteRegisterThunk(void* ppc_context, AudioSystem* as,
uint32_t addr, uint64_t value) {
as->WriteRegister(addr, value);
}

View File

@ -481,9 +481,10 @@ void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
void X64Emitter::CallNativeSafe(void* fn) {
// rcx = context
// rdx = target host function
// rdx = target function
// r8 = arg0
// r9 = arg1
// r10 = arg2
mov(rdx, reinterpret_cast<uint64_t>(fn));
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));

View File

@ -1425,6 +1425,68 @@ EMITTER_OPCODE_TABLE(
STORE_CONTEXT_V128);
// ============================================================================
// OPCODE_LOAD_MMIO
// ============================================================================
// Note: all types are always aligned in the context.
EMITTER(LOAD_MMIO_I32, MATCH(I<OPCODE_LOAD_MMIO, I32<>, OffsetOp, OffsetOp>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// uint64_t (context, addr)
auto mmio_range = reinterpret_cast<MMIORange*>(i.src1.value);
auto read_address = uint32_t(i.src2.value);
e.mov(e.r8, uint64_t(mmio_range->callback_context));
e.mov(e.r9d, read_address);
e.CallNativeSafe(mmio_range->read);
e.bswap(e.eax);
e.mov(i.dest, e.eax);
if (IsTracingData()) {
e.mov(e.r8, i.dest);
e.mov(e.edx, read_address);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadI32));
}
}
};
EMITTER_OPCODE_TABLE(
OPCODE_LOAD_MMIO,
LOAD_MMIO_I32);
// ============================================================================
// OPCODE_STORE_MMIO
// ============================================================================
// Note: all types are always aligned on the stack.
EMITTER(STORE_MMIO_I32, MATCH(I<OPCODE_STORE_MMIO, VoidOp, OffsetOp, OffsetOp, I32<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// void (context, addr, value)
auto mmio_range = reinterpret_cast<MMIORange*>(i.src1.value);
auto write_address = uint32_t(i.src2.value);
e.mov(e.r8, uint64_t(mmio_range->callback_context));
e.mov(e.r9d, write_address);
if (i.src3.is_constant) {
e.mov(e.r10d, xe::byte_swap(i.src3.constant()));
} else {
e.mov(e.r10d, i.src3);
e.bswap(e.r10d);
}
e.CallNativeSafe(mmio_range->write);
if (IsTracingData()) {
if (i.src3.is_constant) {
e.mov(e.r8d, i.src3.constant());
} else {
e.mov(e.r8d, i.src3);
}
e.mov(e.edx, write_address);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreI32));
}
}
};
EMITTER_OPCODE_TABLE(
OPCODE_STORE_MMIO,
STORE_MMIO_I32);
// ============================================================================
// OPCODE_LOAD
// ============================================================================
@ -6374,6 +6436,8 @@ void RegisterSequences() {
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_CONTEXT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE_CONTEXT);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_MMIO);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE_MMIO);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE);
REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MEMSET);

View File

@ -97,6 +97,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// rdx = target function
// r8 = arg0
// r9 = arg1
// r10 = arg2
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address

View File

@ -174,6 +174,51 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) {
}
break;
case OPCODE_LOAD:
if (i->src1.value->IsConstant()) {
auto memory = processor_->memory();
auto address = i->src1.value->constant.i32;
auto mmio_range =
processor_->memory()->LookupVirtualMappedRange(address);
if (mmio_range) {
i->Replace(&OPCODE_LOAD_MMIO_info, 0);
i->src1.offset = reinterpret_cast<uint64_t>(mmio_range);
i->src2.offset = address;
} else {
auto heap = memory->LookupHeap(address);
uint32_t protect;
if (heap->QueryProtect(address, &protect) &&
!(protect & kMemoryProtectWrite)) {
// Memory is readonly - can just return the value.
switch (v->type) {
case INT32_TYPE:
v->set_constant(xe::load_and_swap<uint32_t>(
memory->TranslateVirtual(address)));
break;
default:
assert_unhandled_case(v->type);
break;
}
i->Remove();
}
}
}
break;
case OPCODE_STORE:
if (i->src1.value->IsConstant()) {
auto address = i->src1.value->constant.i32;
auto mmio_range =
processor_->memory()->LookupVirtualMappedRange(address);
if (mmio_range) {
auto value = i->src2.value;
i->Replace(&OPCODE_STORE_MMIO_info, 0);
i->src1.offset = reinterpret_cast<uint64_t>(mmio_range);
i->src2.offset = address;
i->set_src3(value);
}
}
break;
case OPCODE_SELECT:
if (i->src1.value->IsConstant()) {
if (i->src1.value->IsConstantTrue()) {

View File

@ -1087,6 +1087,23 @@ void HIRBuilder::StoreContext(size_t offset, Value* value) {
i->src3.value = NULL;
}
Value* HIRBuilder::LoadMmio(cpu::MMIORange* mmio_range, uint32_t address,
TypeName type) {
Instr* i = AppendInstr(OPCODE_LOAD_MMIO_info, 0, AllocValue(type));
i->src1.offset = reinterpret_cast<uint64_t>(mmio_range);
i->src2.offset = address;
i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::StoreMmio(cpu::MMIORange* mmio_range, uint32_t address,
Value* value) {
Instr* i = AppendInstr(OPCODE_STORE_MMIO_info, 0);
i->src1.offset = reinterpret_cast<uint64_t>(mmio_range);
i->src2.offset = address;
i->set_src3(value);
}
Value* HIRBuilder::Load(Value* address, TypeName type, uint32_t load_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_LOAD_info, load_flags, AllocValue(type));

View File

@ -19,6 +19,7 @@
#include "xenia/cpu/hir/label.h"
#include "xenia/cpu/hir/opcodes.h"
#include "xenia/cpu/hir/value.h"
#include "xenia/cpu/mmio_handler.h"
namespace xe {
namespace cpu {
@ -130,6 +131,9 @@ class HIRBuilder {
Value* LoadContext(size_t offset, TypeName type);
void StoreContext(size_t offset, Value* value);
Value* LoadMmio(cpu::MMIORange* mmio_range, uint32_t address, TypeName type);
void StoreMmio(cpu::MMIORange* mmio_range, uint32_t address, Value* value);
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
void Store(Value* address, Value* value, uint32_t store_flags = 0);
void Memset(Value* address, Value* value, Value* length);

View File

@ -140,6 +140,8 @@ enum Opcode {
OPCODE_STORE_LOCAL,
OPCODE_LOAD_CONTEXT,
OPCODE_STORE_CONTEXT,
OPCODE_LOAD_MMIO,
OPCODE_STORE_MMIO,
OPCODE_LOAD,
OPCODE_STORE,
OPCODE_MEMSET,
@ -243,6 +245,8 @@ enum OpcodeSignature {
(OPCODE_SIG_TYPE_X) | (OPCODE_SIG_TYPE_O << 3) | (OPCODE_SIG_TYPE_V << 6),
OPCODE_SIG_X_O_V_V = (OPCODE_SIG_TYPE_X) | (OPCODE_SIG_TYPE_O << 3) |
(OPCODE_SIG_TYPE_V << 6) | (OPCODE_SIG_TYPE_V << 9),
OPCODE_SIG_X_O_O_V = (OPCODE_SIG_TYPE_X) | (OPCODE_SIG_TYPE_O << 3) |
(OPCODE_SIG_TYPE_O << 6) | (OPCODE_SIG_TYPE_V << 9),
OPCODE_SIG_X_S = (OPCODE_SIG_TYPE_X) | (OPCODE_SIG_TYPE_S << 3),
OPCODE_SIG_X_V = (OPCODE_SIG_TYPE_X) | (OPCODE_SIG_TYPE_V << 3),
OPCODE_SIG_X_V_L =
@ -260,6 +264,8 @@ enum OpcodeSignature {
OPCODE_SIG_V = (OPCODE_SIG_TYPE_V),
OPCODE_SIG_V_O = (OPCODE_SIG_TYPE_V) | (OPCODE_SIG_TYPE_O << 3),
OPCODE_SIG_V_V = (OPCODE_SIG_TYPE_V) | (OPCODE_SIG_TYPE_V << 3),
OPCODE_SIG_V_O_O =
(OPCODE_SIG_TYPE_V) | (OPCODE_SIG_TYPE_O << 3) | (OPCODE_SIG_TYPE_O << 6),
OPCODE_SIG_V_V_O =
(OPCODE_SIG_TYPE_V) | (OPCODE_SIG_TYPE_V << 3) | (OPCODE_SIG_TYPE_O << 6),
OPCODE_SIG_V_V_O_V = (OPCODE_SIG_TYPE_V) | (OPCODE_SIG_TYPE_V << 3) |

View File

@ -212,6 +212,18 @@ DEFINE_OPCODE(
OPCODE_SIG_X_O_V,
0)
DEFINE_OPCODE(
OPCODE_LOAD_MMIO,
"load_mmio",
OPCODE_SIG_V_O_O,
OPCODE_FLAG_MEMORY)
DEFINE_OPCODE(
OPCODE_STORE_MMIO,
"store_mmio",
OPCODE_SIG_X_O_O_V,
OPCODE_FLAG_MEMORY)
DEFINE_OPCODE(
OPCODE_LOAD,
"load",

View File

@ -62,11 +62,20 @@ bool MMIOHandler::RegisterRange(uint32_t virtual_address, uint32_t mask,
return true;
}
MMIORange* MMIOHandler::LookupRange(uint32_t virtual_address) {
for (auto& range : mapped_ranges_) {
if ((virtual_address & range.mask) == range.address) {
return &range;
}
}
return nullptr;
}
bool MMIOHandler::CheckLoad(uint32_t virtual_address, uint64_t* out_value) {
for (const auto& range : mapped_ranges_) {
if ((virtual_address & range.mask) == range.address) {
*out_value =
static_cast<uint32_t>(range.read(range.context, virtual_address));
*out_value = static_cast<uint32_t>(
range.read(nullptr, range.callback_context, virtual_address));
return true;
}
}
@ -76,7 +85,7 @@ bool MMIOHandler::CheckLoad(uint32_t virtual_address, uint64_t* out_value) {
bool MMIOHandler::CheckStore(uint32_t virtual_address, uint64_t value) {
for (const auto& range : mapped_ranges_) {
if ((virtual_address & range.mask) == range.address) {
range.write(range.context, virtual_address, value);
range.write(nullptr, range.callback_context, virtual_address, value);
return true;
}
}
@ -243,7 +252,8 @@ bool MMIOHandler::HandleAccessFault(void* thread_state,
if (is_load) {
// Load of a memory value - read from range, swap, and store in the
// register.
uint64_t value = range->read(range->context, fault_address & 0xFFFFFFFF);
uint64_t value = range->read(nullptr, range->callback_context,
fault_address & 0xFFFFFFFF);
uint32_t be_reg_index;
if (!xe::bit_scan_forward(arg1_type & 0xFFFF, &be_reg_index)) {
be_reg_index = 0;
@ -293,7 +303,8 @@ bool MMIOHandler::HandleAccessFault(void* thread_state,
value = xe::byte_swap(static_cast<uint64_t>(value));
break;
}
range->write(range->context, fault_address & 0xFFFFFFFF, value);
range->write(nullptr, range->callback_context, fault_address & 0xFFFFFFFF,
value);
} else {
assert_always("Unknown MMIO instruction type");
return false;

View File

@ -20,12 +20,23 @@
namespace xe {
namespace cpu {
typedef uint64_t (*MMIOReadCallback)(void* context, uint32_t addr);
typedef void (*MMIOWriteCallback)(void* context, uint32_t addr, uint64_t value);
typedef uint64_t (*MMIOReadCallback)(void* ppc_context, void* callback_context,
uint32_t addr);
typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context,
uint32_t addr, uint64_t value);
typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr,
uint32_t address);
struct MMIORange {
uint32_t address;
uint32_t mask;
uint32_t size;
void* callback_context;
MMIOReadCallback read;
MMIOWriteCallback write;
};
// NOTE: only one can exist at a time!
class MMIOHandler {
public:
@ -38,6 +49,7 @@ class MMIOHandler {
bool RegisterRange(uint32_t virtual_address, uint32_t mask, uint32_t size,
void* context, MMIOReadCallback read_callback,
MMIOWriteCallback write_callback);
MMIORange* LookupRange(uint32_t virtual_address);
bool CheckLoad(uint32_t virtual_address, uint64_t* out_value);
bool CheckStore(uint32_t virtual_address, uint64_t value);
@ -76,14 +88,6 @@ class MMIOHandler {
uint8_t* virtual_membase_;
uint8_t* physical_membase_;
struct MMIORange {
uint32_t address;
uint32_t mask;
uint32_t size;
void* context;
MMIOReadCallback read;
MMIOWriteCallback write;
};
std::vector<MMIORange> mapped_ranges_;
// TODO(benvanik): data structure magic.

View File

@ -63,6 +63,11 @@ WinMMIOHandler::~WinMMIOHandler() {
// addresses in our range and call into the registered handlers, if any.
// If there are none, we continue.
LONG CALLBACK MMIOExceptionHandler(PEXCEPTION_POINTERS ex_info) {
// Fast path for SetThreadName.
if (ex_info->ExceptionRecord->ExceptionCode == 0x406D1388) {
return EXCEPTION_CONTINUE_SEARCH;
}
SCOPE_profile_cpu_i("cpu", "MMIOExceptionHandler");
// http://msdn.microsoft.com/en-us/library/ms679331(v=vs.85).aspx

View File

@ -54,11 +54,12 @@ class GL4GraphicsSystem : public GraphicsSystem {
uint64_t ReadRegister(uint32_t addr);
void WriteRegister(uint32_t addr, uint64_t value);
static uint64_t MMIOReadRegisterThunk(GL4GraphicsSystem* gs, uint32_t addr) {
static uint64_t MMIOReadRegisterThunk(void* ppc_context,
GL4GraphicsSystem* gs, uint32_t addr) {
return gs->ReadRegister(addr);
}
static void MMIOWriteRegisterThunk(GL4GraphicsSystem* gs, uint32_t addr,
uint64_t value) {
static void MMIOWriteRegisterThunk(void* ppc_context, GL4GraphicsSystem* gs,
uint32_t addr, uint64_t value) {
gs->WriteRegister(addr, value);
}

View File

@ -362,6 +362,10 @@ bool Memory::AddVirtualMappedRange(uint32_t virtual_address, uint32_t mask,
read_callback, write_callback);
}
cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) {
return mmio_handler_->LookupRange(virtual_address);
}
uintptr_t Memory::AddPhysicalWriteWatch(uint32_t physical_address,
uint32_t length,
cpu::WriteWatchCallback callback,

View File

@ -195,6 +195,7 @@ class Memory {
uint32_t size, void* context,
cpu::MMIOReadCallback read_callback,
cpu::MMIOWriteCallback write_callback);
cpu::MMIORange* LookupVirtualMappedRange(uint32_t virtual_address);
uintptr_t AddPhysicalWriteWatch(uint32_t physical_address, uint32_t length,
cpu::WriteWatchCallback callback,