implement dynamically allocateable guest to host callbacks
This commit is contained in:
parent
d0a6cec024
commit
43fd396db7
|
@ -25,37 +25,57 @@ BitMap::BitMap(uint64_t* data, size_t size_bits) {
|
||||||
data_.resize(size_bits / kDataSizeBits);
|
data_.resize(size_bits / kDataSizeBits);
|
||||||
std::memcpy(data_.data(), data, size_bits / kDataSizeBits);
|
std::memcpy(data_.data(), data, size_bits / kDataSizeBits);
|
||||||
}
|
}
|
||||||
|
inline size_t BitMap::TryAcquireAt(size_t i) {
|
||||||
|
uint64_t entry = 0;
|
||||||
|
uint64_t new_entry = 0;
|
||||||
|
int64_t acquired_idx = -1LL;
|
||||||
|
|
||||||
|
do {
|
||||||
|
entry = data_[i];
|
||||||
|
uint8_t index = lzcnt(entry);
|
||||||
|
if (index == kDataSizeBits) {
|
||||||
|
// None free.
|
||||||
|
acquired_idx = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Entry has a free bit. Acquire it.
|
||||||
|
uint64_t bit = 1ull << (kDataSizeBits - index - 1);
|
||||||
|
new_entry = entry & ~bit;
|
||||||
|
assert_not_zero(entry & bit);
|
||||||
|
|
||||||
|
acquired_idx = index;
|
||||||
|
} while (!atomic_cas(entry, new_entry, &data_[i]));
|
||||||
|
|
||||||
|
if (acquired_idx != -1) {
|
||||||
|
// Acquired.
|
||||||
|
return (i * kDataSizeBits) + acquired_idx;
|
||||||
|
}
|
||||||
|
return -1LL;
|
||||||
|
}
|
||||||
size_t BitMap::Acquire() {
|
size_t BitMap::Acquire() {
|
||||||
for (size_t i = 0; i < data_.size(); i++) {
|
for (size_t i = 0; i < data_.size(); i++) {
|
||||||
uint64_t entry = 0;
|
size_t attempt_result = TryAcquireAt(i);
|
||||||
uint64_t new_entry = 0;
|
if (attempt_result != -1LL) {
|
||||||
int64_t acquired_idx = -1;
|
return attempt_result;
|
||||||
|
|
||||||
do {
|
|
||||||
entry = data_[i];
|
|
||||||
uint8_t index = lzcnt(entry);
|
|
||||||
if (index == kDataSizeBits) {
|
|
||||||
// None free.
|
|
||||||
acquired_idx = -1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Entry has a free bit. Acquire it.
|
|
||||||
uint64_t bit = 1ull << (kDataSizeBits - index - 1);
|
|
||||||
new_entry = entry & ~bit;
|
|
||||||
assert_not_zero(entry & bit);
|
|
||||||
|
|
||||||
acquired_idx = index;
|
|
||||||
} while (!atomic_cas(entry, new_entry, &data_[i]));
|
|
||||||
|
|
||||||
if (acquired_idx != -1) {
|
|
||||||
// Acquired.
|
|
||||||
return (i * kDataSizeBits) + acquired_idx;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return -1;
|
return -1LL;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t BitMap::AcquireFromBack() {
|
||||||
|
if (!data_.size()) {
|
||||||
|
return -1LL;
|
||||||
|
}
|
||||||
|
for (ptrdiff_t i = data_.size() - 1; i >= 0; i--) {
|
||||||
|
size_t attempt_result = TryAcquireAt(static_cast<size_t>(i));
|
||||||
|
if (attempt_result != -1LL) {
|
||||||
|
return attempt_result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1LL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BitMap::Release(size_t index) {
|
void BitMap::Release(size_t index) {
|
||||||
|
|
|
@ -32,7 +32,7 @@ class BitMap {
|
||||||
// (threadsafe) Acquires an entry and returns its index. Returns -1 if there
|
// (threadsafe) Acquires an entry and returns its index. Returns -1 if there
|
||||||
// are no more free entries.
|
// are no more free entries.
|
||||||
size_t Acquire();
|
size_t Acquire();
|
||||||
|
size_t AcquireFromBack();
|
||||||
// (threadsafe) Releases an entry by an index.
|
// (threadsafe) Releases an entry by an index.
|
||||||
void Release(size_t index);
|
void Release(size_t index);
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ class BitMap {
|
||||||
const static size_t kDataSize = 8;
|
const static size_t kDataSize = 8;
|
||||||
const static size_t kDataSizeBits = kDataSize * 8;
|
const static size_t kDataSizeBits = kDataSize * 8;
|
||||||
std::vector<uint64_t> data_;
|
std::vector<uint64_t> data_;
|
||||||
|
inline size_t TryAcquireAt(size_t i);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -38,7 +38,9 @@ struct GuestPseudoStackTrace {
|
||||||
};
|
};
|
||||||
class Assembler;
|
class Assembler;
|
||||||
class CodeCache;
|
class CodeCache;
|
||||||
|
using GuestTrampolineProc = void (*)(ppc::PPCContext* context, void* userarg1,
|
||||||
|
void* userarg2);
|
||||||
|
using SimpleGuestTrampolineProc = void (*)(ppc::PPCContext*);
|
||||||
class Backend {
|
class Backend {
|
||||||
public:
|
public:
|
||||||
explicit Backend();
|
explicit Backend();
|
||||||
|
@ -95,11 +97,74 @@ class Backend {
|
||||||
virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) {
|
virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual uint32_t CreateGuestTrampoline(GuestTrampolineProc proc,
|
||||||
|
void* userdata1, void* userdata2,
|
||||||
|
bool long_term = false) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
uint32_t CreateGuestTrampoline(void (*func)(ppc::PPCContext*),
|
||||||
|
bool long_term = false) {
|
||||||
|
return CreateGuestTrampoline(
|
||||||
|
reinterpret_cast<GuestTrampolineProc>(reinterpret_cast<void*>(func)),
|
||||||
|
nullptr, nullptr);
|
||||||
|
}
|
||||||
|
// if long-term, allocate towards the back of bitset to make allocating short
|
||||||
|
// term ones faster
|
||||||
|
uint32_t CreateLongTermGuestTrampoline(void (*func)(ppc::PPCContext*)) {
|
||||||
|
return CreateGuestTrampoline(
|
||||||
|
reinterpret_cast<GuestTrampolineProc>(reinterpret_cast<void*>(func)),
|
||||||
|
nullptr, nullptr, true);
|
||||||
|
}
|
||||||
|
virtual void FreeGuestTrampoline(uint32_t trampoline_addr) {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Processor* processor_ = nullptr;
|
Processor* processor_ = nullptr;
|
||||||
MachineInfo machine_info_;
|
MachineInfo machine_info_;
|
||||||
CodeCache* code_cache_ = nullptr;
|
CodeCache* code_cache_ = nullptr;
|
||||||
};
|
};
|
||||||
|
/*
|
||||||
|
* a set of guest trampolines that all have shared ownership.
|
||||||
|
*/
|
||||||
|
struct GuestTrampolineGroup
|
||||||
|
: public std::map<SimpleGuestTrampolineProc, uint32_t> {
|
||||||
|
Backend* const m_backend;
|
||||||
|
xe_mutex m_mutex;
|
||||||
|
|
||||||
|
uint32_t _NewTrampoline(SimpleGuestTrampolineProc proc, bool longterm) {
|
||||||
|
uint32_t result;
|
||||||
|
m_mutex.lock();
|
||||||
|
auto iter = this->find(proc);
|
||||||
|
if (iter == this->end()) {
|
||||||
|
uint32_t new_entry = longterm
|
||||||
|
? m_backend->CreateLongTermGuestTrampoline(proc)
|
||||||
|
: m_backend->CreateGuestTrampoline(proc);
|
||||||
|
this->emplace_hint(iter, proc, new_entry);
|
||||||
|
result = new_entry;
|
||||||
|
} else {
|
||||||
|
result = iter->second;
|
||||||
|
}
|
||||||
|
m_mutex.unlock();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
GuestTrampolineGroup(Backend* backend) : m_backend(backend) {}
|
||||||
|
~GuestTrampolineGroup() {
|
||||||
|
m_mutex.lock();
|
||||||
|
for (auto&& entry : *this) {
|
||||||
|
m_backend->FreeGuestTrampoline(entry.second);
|
||||||
|
}
|
||||||
|
m_mutex.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t NewLongtermTrampoline(SimpleGuestTrampolineProc proc) {
|
||||||
|
return _NewTrampoline(proc, true);
|
||||||
|
}
|
||||||
|
uint32_t NewTrampoline(SimpleGuestTrampolineProc proc) {
|
||||||
|
return _NewTrampoline(proc, false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace backend
|
} // namespace backend
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
|
|
|
@ -90,6 +90,25 @@ class X64HelperEmitter : public X64Emitter {
|
||||||
void EmitLoadNonvolatileRegs();
|
void EmitLoadNonvolatileRegs();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if XE_PLATFORM_WIN32
|
||||||
|
static constexpr unsigned char guest_trampoline_template[] = {
|
||||||
|
0x48, 0xBA, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0x49,
|
||||||
|
0xB8, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0x48, 0xB9,
|
||||||
|
0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0x48, 0xB8, 0x99,
|
||||||
|
0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0xFF, 0xE0};
|
||||||
|
|
||||||
|
#else
|
||||||
|
// sysv x64 abi, exact same offsets for args
|
||||||
|
static constexpr unsigned char guest_trampoline_template[] = {
|
||||||
|
0x48, 0xBF, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0x48,
|
||||||
|
0xBE, 0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0x48, 0xB9,
|
||||||
|
0x99, 0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0x48, 0xB8, 0x99,
|
||||||
|
0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x00, 0xFF, 0xE0};
|
||||||
|
#endif
|
||||||
|
static constexpr uint32_t guest_trampoline_template_offset_arg1 = 2,
|
||||||
|
guest_trampoline_template_offset_arg2 = 0xC,
|
||||||
|
guest_trampoline_template_offset_rcx = 0x16,
|
||||||
|
guest_trampoline_template_offset_rax = 0x20;
|
||||||
X64Backend::X64Backend() : Backend(), code_cache_(nullptr) {
|
X64Backend::X64Backend() : Backend(), code_cache_(nullptr) {
|
||||||
if (cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_) != CS_ERR_OK) {
|
if (cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_) != CS_ERR_OK) {
|
||||||
assert_always("Failed to initialize capstone");
|
assert_always("Failed to initialize capstone");
|
||||||
|
@ -97,6 +116,23 @@ X64Backend::X64Backend() : Backend(), code_cache_(nullptr) {
|
||||||
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
|
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
|
||||||
cs_option(capstone_handle_, CS_OPT_DETAIL, CS_OPT_ON);
|
cs_option(capstone_handle_, CS_OPT_DETAIL, CS_OPT_ON);
|
||||||
cs_option(capstone_handle_, CS_OPT_SKIPDATA, CS_OPT_OFF);
|
cs_option(capstone_handle_, CS_OPT_SKIPDATA, CS_OPT_OFF);
|
||||||
|
uint32_t base_address = 0x10000;
|
||||||
|
void* buf_trampoline_code = nullptr;
|
||||||
|
while (base_address < 0x80000000) {
|
||||||
|
buf_trampoline_code = memory::AllocFixed(
|
||||||
|
(void*)(uintptr_t)base_address,
|
||||||
|
sizeof(guest_trampoline_template) * MAX_GUEST_TRAMPOLINES,
|
||||||
|
xe::memory::AllocationType::kReserveCommit,
|
||||||
|
xe::memory::PageAccess::kExecuteReadWrite);
|
||||||
|
if (!buf_trampoline_code) {
|
||||||
|
base_address += 65536;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xenia_assert(buf_trampoline_code);
|
||||||
|
guest_trampoline_memory_ = (uint8_t*)buf_trampoline_code;
|
||||||
|
guest_trampoline_address_bitmap_.Resize(MAX_GUEST_TRAMPOLINES);
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Backend::~X64Backend() {
|
X64Backend::~X64Backend() {
|
||||||
|
@ -106,6 +142,13 @@ X64Backend::~X64Backend() {
|
||||||
|
|
||||||
X64Emitter::FreeConstData(emitter_data_);
|
X64Emitter::FreeConstData(emitter_data_);
|
||||||
ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this);
|
ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this);
|
||||||
|
if (guest_trampoline_memory_) {
|
||||||
|
memory::DeallocFixed(
|
||||||
|
guest_trampoline_memory_,
|
||||||
|
sizeof(guest_trampoline_template) * MAX_GUEST_TRAMPOLINES,
|
||||||
|
memory::DeallocationType::kRelease);
|
||||||
|
guest_trampoline_memory_ = nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ForwardMMIOAccessForRecording(void* context, void* hostaddr) {
|
static void ForwardMMIOAccessForRecording(void* context, void* hostaddr) {
|
||||||
|
@ -212,6 +255,9 @@ bool X64Backend::Initialize(Processor* processor) {
|
||||||
if (!code_cache_->Initialize()) {
|
if (!code_cache_->Initialize()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// HV range
|
||||||
|
code_cache()->CommitExecutableRange(GUEST_TRAMPOLINE_BASE,
|
||||||
|
GUEST_TRAMPOLINE_END);
|
||||||
// Allocate emitter constant data.
|
// Allocate emitter constant data.
|
||||||
emitter_data_ = X64Emitter::PlaceConstData();
|
emitter_data_ = X64Emitter::PlaceConstData();
|
||||||
|
|
||||||
|
@ -241,7 +287,8 @@ bool X64Backend::Initialize(Processor* processor) {
|
||||||
reserved_store_32_helper = thunk_emitter.EmitReservedStoreHelper(false);
|
reserved_store_32_helper = thunk_emitter.EmitReservedStoreHelper(false);
|
||||||
reserved_store_64_helper = thunk_emitter.EmitReservedStoreHelper(true);
|
reserved_store_64_helper = thunk_emitter.EmitReservedStoreHelper(true);
|
||||||
vrsqrtefp_scalar_helper = thunk_emitter.EmitScalarVRsqrteHelper();
|
vrsqrtefp_scalar_helper = thunk_emitter.EmitScalarVRsqrteHelper();
|
||||||
vrsqrtefp_vector_helper = thunk_emitter.EmitVectorVRsqrteHelper(vrsqrtefp_scalar_helper);
|
vrsqrtefp_vector_helper =
|
||||||
|
thunk_emitter.EmitVectorVRsqrteHelper(vrsqrtefp_scalar_helper);
|
||||||
frsqrtefp_helper = thunk_emitter.EmitFrsqrteHelper();
|
frsqrtefp_helper = thunk_emitter.EmitFrsqrteHelper();
|
||||||
// Set the code cache to use the ResolveFunction thunk for default
|
// Set the code cache to use the ResolveFunction thunk for default
|
||||||
// indirections.
|
// indirections.
|
||||||
|
@ -850,7 +897,7 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackSizeLoadThunk(
|
||||||
_code_offsets code_offsets = {};
|
_code_offsets code_offsets = {};
|
||||||
code_offsets.prolog = getSize();
|
code_offsets.prolog = getSize();
|
||||||
pop(r8); // return address
|
pop(r8); // return address
|
||||||
|
|
||||||
switch (stack_element_size) {
|
switch (stack_element_size) {
|
||||||
case 4:
|
case 4:
|
||||||
mov(r11d, ptr[r8]);
|
mov(r11d, ptr[r8]);
|
||||||
|
@ -919,11 +966,11 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
||||||
|
|
||||||
bt(GetBackendFlagsPtr(), kX64BackendNJMOn);
|
bt(GetBackendFlagsPtr(), kX64BackendNJMOn);
|
||||||
jnc(handle_denormal_input, CodeGenerator::T_NEAR);
|
jnc(handle_denormal_input, CodeGenerator::T_NEAR);
|
||||||
|
|
||||||
// handle denormal input with NJM on
|
// handle denormal input with NJM on
|
||||||
// denorms get converted to zero w/ input sign, jump to our label
|
// denorms get converted to zero w/ input sign, jump to our label
|
||||||
// that handles inputs of 0 for this
|
// that handles inputs of 0 for this
|
||||||
|
|
||||||
jmp(convert_to_signed_inf_and_ret);
|
jmp(convert_to_signed_inf_and_ret);
|
||||||
L(L35);
|
L(L35);
|
||||||
|
|
||||||
|
@ -1038,7 +1085,6 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
||||||
L(L1);
|
L(L1);
|
||||||
ret();
|
ret();
|
||||||
|
|
||||||
|
|
||||||
L(handle_denormal_input);
|
L(handle_denormal_input);
|
||||||
mov(r9d, r8d);
|
mov(r9d, r8d);
|
||||||
and_(r9d, 0x7FFFFFFF);
|
and_(r9d, 0x7FFFFFFF);
|
||||||
|
@ -1089,7 +1135,6 @@ void* X64HelperEmitter::EmitScalarVRsqrteHelper() {
|
||||||
dd(0x7FC00000);
|
dd(0x7FC00000);
|
||||||
dd(0x5F34FD00);
|
dd(0x5F34FD00);
|
||||||
|
|
||||||
|
|
||||||
code_offsets.prolog_stack_alloc = getSize();
|
code_offsets.prolog_stack_alloc = getSize();
|
||||||
code_offsets.body = getSize();
|
code_offsets.body = getSize();
|
||||||
code_offsets.prolog = getSize();
|
code_offsets.prolog = getSize();
|
||||||
|
@ -1126,18 +1171,16 @@ void* X64HelperEmitter::EmitVectorVRsqrteHelper(void* scalar_helper) {
|
||||||
jnz(actual_vector_version);
|
jnz(actual_vector_version);
|
||||||
vshufps(xmm0, xmm0,xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
vshufps(xmm0, xmm0,xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||||
call(scalar_helper);
|
call(scalar_helper);
|
||||||
// this->DebugBreak();
|
// this->DebugBreak();
|
||||||
vinsertps(xmm0, xmm0, (3 << 4) | (0 << 6));
|
vinsertps(xmm0, xmm0, (3 << 4) | (0 << 6));
|
||||||
|
|
||||||
vblendps(xmm0, xmm0, ptr[backend()->LookupXMMConstantAddress(XMMFloatInf)],
|
vblendps(xmm0, xmm0, ptr[backend()->LookupXMMConstantAddress(XMMFloatInf)],
|
||||||
0b0111);
|
0b0111);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
|
|
||||||
|
|
||||||
L(actual_vector_version);
|
L(actual_vector_version);
|
||||||
|
|
||||||
|
|
||||||
xor_(ecx, ecx);
|
xor_(ecx, ecx);
|
||||||
vmovaps(result_ptr, xmm0);
|
vmovaps(result_ptr, xmm0);
|
||||||
|
|
||||||
|
@ -1172,7 +1215,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
|
||||||
code_offsets.epilog = getSize();
|
code_offsets.epilog = getSize();
|
||||||
code_offsets.tail = getSize();
|
code_offsets.tail = getSize();
|
||||||
code_offsets.prolog = getSize();
|
code_offsets.prolog = getSize();
|
||||||
|
|
||||||
Xbyak::Label L2, L7, L6, L9, L1, L12, L24, L3, L25, frsqrte_table2, LC1;
|
Xbyak::Label L2, L7, L6, L9, L1, L12, L24, L3, L25, frsqrte_table2, LC1;
|
||||||
bt(GetBackendFlagsPtr(), kX64BackendNonIEEEMode);
|
bt(GetBackendFlagsPtr(), kX64BackendNonIEEEMode);
|
||||||
vmovq(rax, xmm0);
|
vmovq(rax, xmm0);
|
||||||
|
@ -1190,7 +1233,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
|
||||||
not_(rcx);
|
not_(rcx);
|
||||||
and_(rcx, rdx);
|
and_(rcx, rdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
jne(L6);
|
jne(L6);
|
||||||
cmp(rax, rdx);
|
cmp(rax, rdx);
|
||||||
je(L1, CodeGenerator::T_NEAR);
|
je(L1, CodeGenerator::T_NEAR);
|
||||||
|
@ -1199,7 +1242,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
|
||||||
jne(L7);
|
jne(L7);
|
||||||
vcomisd(xmm0, xmm1);
|
vcomisd(xmm0, xmm1);
|
||||||
jb(L12, CodeGenerator::T_NEAR);
|
jb(L12, CodeGenerator::T_NEAR);
|
||||||
|
|
||||||
L(L7);
|
L(L7);
|
||||||
mov(rdx, 0x7ff8000000000000ULL);
|
mov(rdx, 0x7ff8000000000000ULL);
|
||||||
or_(rax, rdx);
|
or_(rax, rdx);
|
||||||
|
@ -1236,7 +1279,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
|
||||||
sal(rax, 44);
|
sal(rax, 44);
|
||||||
or_(rax, rdx);
|
or_(rax, rdx);
|
||||||
vmovq(xmm1, rax);
|
vmovq(xmm1, rax);
|
||||||
|
|
||||||
L(L1);
|
L(L1);
|
||||||
vmovapd(xmm0, xmm1);
|
vmovapd(xmm0, xmm1);
|
||||||
ret();
|
ret();
|
||||||
|
@ -1255,7 +1298,7 @@ void* X64HelperEmitter::EmitFrsqrteHelper() {
|
||||||
jne(L2);
|
jne(L2);
|
||||||
mov(rdx, 0x8000000000000000ULL);
|
mov(rdx, 0x8000000000000000ULL);
|
||||||
and_(rax, rdx);
|
and_(rax, rdx);
|
||||||
|
|
||||||
L(L3);
|
L(L3);
|
||||||
mov(rdx, 0x8000000000000000ULL);
|
mov(rdx, 0x8000000000000000ULL);
|
||||||
and_(rax, rdx);
|
and_(rax, rdx);
|
||||||
|
@ -1617,6 +1660,53 @@ uint64_t* X64Backend::GetProfilerRecordForFunction(uint32_t guest_address) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// todo:flush cache
|
||||||
|
uint32_t X64Backend::CreateGuestTrampoline(GuestTrampolineProc proc,
|
||||||
|
void* userdata1, void* userdata2,
|
||||||
|
bool longterm) {
|
||||||
|
size_t new_index;
|
||||||
|
if (longterm) {
|
||||||
|
new_index = guest_trampoline_address_bitmap_.AcquireFromBack();
|
||||||
|
} else {
|
||||||
|
new_index = guest_trampoline_address_bitmap_.Acquire();
|
||||||
|
}
|
||||||
|
|
||||||
|
xenia_assert(new_index != (size_t)-1);
|
||||||
|
|
||||||
|
uint8_t* write_pos =
|
||||||
|
&guest_trampoline_memory_[sizeof(guest_trampoline_template) * new_index];
|
||||||
|
|
||||||
|
memcpy(write_pos, guest_trampoline_template,
|
||||||
|
sizeof(guest_trampoline_template));
|
||||||
|
|
||||||
|
*reinterpret_cast<void**>(&write_pos[guest_trampoline_template_offset_arg1]) =
|
||||||
|
userdata1;
|
||||||
|
*reinterpret_cast<void**>(&write_pos[guest_trampoline_template_offset_arg2]) =
|
||||||
|
userdata2;
|
||||||
|
*reinterpret_cast<GuestTrampolineProc*>(
|
||||||
|
&write_pos[guest_trampoline_template_offset_rcx]) = proc;
|
||||||
|
*reinterpret_cast<GuestToHostThunk*>(
|
||||||
|
&write_pos[guest_trampoline_template_offset_rax]) = guest_to_host_thunk_;
|
||||||
|
|
||||||
|
uint32_t indirection_guest_addr =
|
||||||
|
GUEST_TRAMPOLINE_BASE +
|
||||||
|
(static_cast<uint32_t>(new_index) * GUEST_TRAMPOLINE_MIN_LEN);
|
||||||
|
|
||||||
|
code_cache()->AddIndirection(
|
||||||
|
indirection_guest_addr,
|
||||||
|
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(write_pos)));
|
||||||
|
|
||||||
|
return indirection_guest_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void X64Backend::FreeGuestTrampoline(uint32_t trampoline_addr) {
|
||||||
|
xenia_assert(trampoline_addr >= GUEST_TRAMPOLINE_BASE &&
|
||||||
|
trampoline_addr < GUEST_TRAMPOLINE_END);
|
||||||
|
size_t index =
|
||||||
|
(trampoline_addr - GUEST_TRAMPOLINE_BASE) / GUEST_TRAMPOLINE_MIN_LEN;
|
||||||
|
guest_trampoline_address_bitmap_.Release(index);
|
||||||
|
}
|
||||||
} // namespace x64
|
} // namespace x64
|
||||||
} // namespace backend
|
} // namespace backend
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "xenia/base/cvar.h"
|
#include "xenia/base/cvar.h"
|
||||||
|
#include "xenia/base/bit_map.h"
|
||||||
#include "xenia/cpu/backend/backend.h"
|
#include "xenia/cpu/backend/backend.h"
|
||||||
|
|
||||||
#if XE_PLATFORM_WIN32 == 1
|
#if XE_PLATFORM_WIN32 == 1
|
||||||
|
@ -42,6 +43,19 @@ typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
|
||||||
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
|
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
|
||||||
typedef void (*ResolveFunctionThunk)();
|
typedef void (*ResolveFunctionThunk)();
|
||||||
|
|
||||||
|
/*
|
||||||
|
place guest trampolines in the memory range that the HV normally occupies.
|
||||||
|
This way guests can call in via the indirection table and we don't have to clobber/reuse an existing memory range
|
||||||
|
The xboxkrnl range is already used by export trampolines (see kernel/kernel_module.cc)
|
||||||
|
*/
|
||||||
|
static constexpr uint32_t GUEST_TRAMPOLINE_BASE = 0x80000000;
|
||||||
|
static constexpr uint32_t GUEST_TRAMPOLINE_END = 0x80040000;
|
||||||
|
|
||||||
|
static constexpr uint32_t GUEST_TRAMPOLINE_MIN_LEN = 8;
|
||||||
|
|
||||||
|
static constexpr uint32_t MAX_GUEST_TRAMPOLINES =
|
||||||
|
(GUEST_TRAMPOLINE_END - GUEST_TRAMPOLINE_BASE) / GUEST_TRAMPOLINE_MIN_LEN;
|
||||||
|
|
||||||
#define RESERVE_BLOCK_SHIFT 16
|
#define RESERVE_BLOCK_SHIFT 16
|
||||||
|
|
||||||
#define RESERVE_NUM_ENTRIES \
|
#define RESERVE_NUM_ENTRIES \
|
||||||
|
@ -155,6 +169,11 @@ class X64Backend : public Backend {
|
||||||
return reinterpret_cast<X64BackendContext*>(
|
return reinterpret_cast<X64BackendContext*>(
|
||||||
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
|
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
|
||||||
}
|
}
|
||||||
|
virtual uint32_t CreateGuestTrampoline(GuestTrampolineProc proc,
|
||||||
|
void* userdata1,
|
||||||
|
void* userdata2, bool long_term) override;
|
||||||
|
|
||||||
|
virtual void FreeGuestTrampoline(uint32_t trampoline_addr) override;
|
||||||
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
|
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
|
||||||
virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) override;
|
virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) override;
|
||||||
void RecordMMIOExceptionForGuestInstruction(void* host_address);
|
void RecordMMIOExceptionForGuestInstruction(void* host_address);
|
||||||
|
@ -200,6 +219,11 @@ class X64Backend : public Backend {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
alignas(64) ReserveHelper reserve_helper_;
|
alignas(64) ReserveHelper reserve_helper_;
|
||||||
|
// allocates 8-byte aligned addresses in a normally not executable guest
|
||||||
|
// address
|
||||||
|
// range that will be used to dispatch to host code
|
||||||
|
BitMap guest_trampoline_address_bitmap_;
|
||||||
|
uint8_t* guest_trampoline_memory_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace x64
|
} // namespace x64
|
||||||
|
|
Loading…
Reference in New Issue