[x64] Change the parameters to host_to_guest_thunk

Shuffle some code around in x64_backend.cc
Add GetNativeParam to avoid hardcoding parameters
This commit is contained in:
Dr. Chat 2018-11-18 14:23:16 -06:00
parent c451fda819
commit b57bb74965
6 changed files with 268 additions and 233 deletions

View File

@ -42,6 +42,15 @@ class X64ThunkEmitter : public X64Emitter {
HostToGuestThunk EmitHostToGuestThunk();
GuestToHostThunk EmitGuestToHostThunk();
ResolveFunctionThunk EmitResolveFunctionThunk();
private:
// The following four functions provide save/load functionality for registers.
// They assume at least StackLayout::THUNK_STACK_SIZE bytes have been
// allocated on the stack.
void EmitSaveVolatileRegs();
void EmitLoadVolatileRegs();
void EmitSaveNonvolatileRegs();
void EmitLoadNonvolatileRegs();
};
X64Backend::X64Backend() : Backend(), code_cache_(nullptr) {
@ -406,53 +415,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
// Preserve nonvolatile registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm9);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm10);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm11);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[6])], xmm12);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
// Save nonvolatile registers.
EmitSaveNonvolatileRegs();
mov(rax, rcx);
mov(rsi, rdx); // context
mov(rcx, r8); // return address
call(rax);
movaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
movaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
movaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
movaps(xmm9, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
movaps(xmm10, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
movaps(xmm11, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
movaps(xmm12, qword[rsp + offsetof(StackLayout::Thunk, xmm[6])]);
movaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
movaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
movaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
EmitLoadNonvolatileRegs();
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
@ -465,56 +436,26 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
}
GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// rcx = context
// rdx = target function
// r8 = arg0
// r9 = arg1
// r10 = arg2
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
// rsp + 0 = return address
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
sub(rsp, stack_size);
// Save off volatile registers.
// TODO(DrChat): Enable this when we actually need this.
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rcx);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdx);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r8);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r9);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r10);
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r11);
// TODO(DrChat): Enable when necessary.
// EmitSaveVolatileRegs();
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm3);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm4);
// movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm5);
mov(rax, rdx);
mov(rcx, rsi); // context
mov(rdx, r8);
mov(r8, r9);
mov(r9, r10);
mov(rax, rcx); // function
mov(rcx, GetContextReg()); // context
call(rax);
// movaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
// movaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
// movaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
// movaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
// movaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
// mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
// mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
// mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
// mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
// mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
// mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
// EmitLoadVolatileRegs();
add(rsp, stack_size);
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
ret();
void* fn = Emplace(stack_size);
@ -527,7 +468,6 @@ extern "C" uint64_t ResolveFunction(void* raw_context, uint32_t target_address);
ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
// ebx = target PPC address
// rcx = context
uint32_t stack_size = 0x18;
// rsp + 0 = return address
@ -549,6 +489,85 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
return (ResolveFunctionThunk)fn;
}
void X64ThunkEmitter::EmitSaveVolatileRegs() {
// Save off volatile registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r8);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r9);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r10);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r11);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm1);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm2);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm3);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm4);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm5);
}
void X64ThunkEmitter::EmitLoadVolatileRegs() {
// Load volatile registers from our stack frame.
movaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
movaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
movaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
movaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
movaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
}
void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
// Preserve nonvolatile registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm9);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm10);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm11);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[6])], xmm12);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
movaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
}
void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
movaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
movaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
movaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
movaps(xmm9, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
movaps(xmm10, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
movaps(xmm11, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
movaps(xmm12, qword[rsp + offsetof(StackLayout::Thunk, xmm[6])]);
movaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
movaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
movaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
}
} // namespace x64
} // namespace backend
} // namespace cpu

View File

@ -224,6 +224,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, size_t* out_stack_size) {
const Instr* new_tail = instr;
if (!SelectSequence(this, instr, &new_tail)) {
// No sequence found!
// NOTE: If you encounter this after adding a new instruction, do a full
// rebuild!
assert_always();
XELOGE("Unable to process HIR opcode %s", instr->opcode->name);
break;
@ -458,16 +460,15 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
auto builtin_function = static_cast<const BuiltinFunction*>(function);
if (builtin_function->handler()) {
undefined = false;
// rcx = context
// rdx = target host function
// r8 = arg0
// r9 = arg1
mov(rcx, GetContextReg());
mov(rdx, reinterpret_cast<uint64_t>(builtin_function->handler()));
mov(r8, reinterpret_cast<uint64_t>(builtin_function->arg0()));
mov(r9, reinterpret_cast<uint64_t>(builtin_function->arg1()));
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, reinterpret_cast<uint64_t>(builtin_function->handler()));
mov(rdx, reinterpret_cast<uint64_t>(builtin_function->arg0()));
mov(r8, reinterpret_cast<uint64_t>(builtin_function->arg1()));
call(rax);
// rax = host return
}
@ -475,13 +476,15 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
auto extern_function = static_cast<const GuestFunction*>(function);
if (extern_function->extern_handler()) {
undefined = false;
// rcx = context
// rdx = target host function
mov(rcx, GetContextReg());
mov(rdx, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
mov(r8, qword[GetContextReg() + offsetof(ppc::PPCContext, kernel_state)]);
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
mov(rdx,
qword[GetContextReg() + offsetof(ppc::PPCContext, kernel_state)]);
call(rax);
// rax = host return
}
@ -518,15 +521,13 @@ void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
}
void X64Emitter::CallNativeSafe(void* fn) {
// rcx = context
// rdx = target function
// r8 = arg0
// r9 = arg1
// r10 = arg2
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, GetContextReg());
mov(rdx, reinterpret_cast<uint64_t>(fn));
mov(rcx, reinterpret_cast<uint64_t>(fn));
call(rax);
// rax = host return
}
@ -536,6 +537,19 @@ void X64Emitter::SetReturnAddress(uint64_t value) {
mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], rax);
}
Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param)
{
if (param == 0)
return rdx;
else if (param == 1)
return r8;
else if (param == 2)
return r9;
assert_always();
return r9;
}
// Important: If you change these, you must update the thunks in x64_backend.cc!
Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; }
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; }

View File

@ -187,6 +187,8 @@ class X64Emitter : public Xbyak::CodeGenerator {
void CallNativeSafe(void* fn);
void SetReturnAddress(uint64_t value);
Xbyak::Reg64 GetNativeParam(uint32_t param);
Xbyak::Reg64 GetContextReg();
Xbyak::Reg64 GetMembaseReg();
void ReloadContext();

View File

@ -285,8 +285,8 @@ struct LOAD_CONTEXT_I8
auto addr = ComputeContextAddress(e, i.src1);
e.mov(i.dest, e.byte[addr]);
if (IsTracingData()) {
e.mov(e.r8, e.byte[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(0), i.src1.value);
e.mov(e.GetNativeParam(1), e.byte[addr]);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadI8));
}
}
@ -297,8 +297,8 @@ struct LOAD_CONTEXT_I16
auto addr = ComputeContextAddress(e, i.src1);
e.mov(i.dest, e.word[addr]);
if (IsTracingData()) {
e.mov(e.r8, e.word[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.word[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadI16));
}
}
@ -309,8 +309,8 @@ struct LOAD_CONTEXT_I32
auto addr = ComputeContextAddress(e, i.src1);
e.mov(i.dest, e.dword[addr]);
if (IsTracingData()) {
e.mov(e.r8, e.dword[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.dword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadI32));
}
}
@ -321,8 +321,8 @@ struct LOAD_CONTEXT_I64
auto addr = ComputeContextAddress(e, i.src1);
e.mov(i.dest, e.qword[addr]);
if (IsTracingData()) {
e.mov(e.r8, e.qword[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.qword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadI64));
}
}
@ -333,8 +333,8 @@ struct LOAD_CONTEXT_F32
auto addr = ComputeContextAddress(e, i.src1);
e.vmovss(i.dest, e.dword[addr]);
if (IsTracingData()) {
e.lea(e.r8, e.dword[addr]);
e.mov(e.rdx, i.src1.value);
e.lea(e.GetNativeParam(1), e.dword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadF32));
}
}
@ -345,8 +345,8 @@ struct LOAD_CONTEXT_F64
auto addr = ComputeContextAddress(e, i.src1);
e.vmovsd(i.dest, e.qword[addr]);
if (IsTracingData()) {
e.lea(e.r8, e.qword[addr]);
e.mov(e.rdx, i.src1.value);
e.lea(e.GetNativeParam(1), e.qword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadF64));
}
}
@ -357,8 +357,8 @@ struct LOAD_CONTEXT_V128
auto addr = ComputeContextAddress(e, i.src1);
e.vmovaps(i.dest, e.ptr[addr]);
if (IsTracingData()) {
e.lea(e.r8, e.ptr[addr]);
e.mov(e.rdx, i.src1.value);
e.lea(e.GetNativeParam(1), e.ptr[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadV128));
}
}
@ -382,8 +382,8 @@ struct STORE_CONTEXT_I8
e.mov(e.byte[addr], i.src2);
}
if (IsTracingData()) {
e.mov(e.r8, e.byte[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.byte[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreI8));
}
}
@ -399,8 +399,8 @@ struct STORE_CONTEXT_I16
e.mov(e.word[addr], i.src2);
}
if (IsTracingData()) {
e.mov(e.r8, e.word[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.word[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreI16));
}
}
@ -416,8 +416,8 @@ struct STORE_CONTEXT_I32
e.mov(e.dword[addr], i.src2);
}
if (IsTracingData()) {
e.mov(e.r8, e.dword[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.dword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreI32));
}
}
@ -433,8 +433,8 @@ struct STORE_CONTEXT_I64
e.mov(e.qword[addr], i.src2);
}
if (IsTracingData()) {
e.mov(e.r8, e.qword[addr]);
e.mov(e.rdx, i.src1.value);
e.mov(e.GetNativeParam(1), e.qword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreI64));
}
}
@ -450,8 +450,8 @@ struct STORE_CONTEXT_F32
e.vmovss(e.dword[addr], i.src2);
}
if (IsTracingData()) {
e.lea(e.r8, e.dword[addr]);
e.mov(e.rdx, i.src1.value);
e.lea(e.GetNativeParam(1), e.dword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreF32));
}
}
@ -467,8 +467,8 @@ struct STORE_CONTEXT_F64
e.vmovsd(e.qword[addr], i.src2);
}
if (IsTracingData()) {
e.lea(e.r8, e.qword[addr]);
e.mov(e.rdx, i.src1.value);
e.lea(e.GetNativeParam(1), e.qword[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreF64));
}
}
@ -485,8 +485,8 @@ struct STORE_CONTEXT_V128
e.vmovaps(e.ptr[addr], i.src2);
}
if (IsTracingData()) {
e.lea(e.r8, e.ptr[addr]);
e.mov(e.rdx, i.src1.value);
e.lea(e.GetNativeParam(1), e.ptr[addr]);
e.mov(e.GetNativeParam(0), i.src1.value);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreV128));
}
}
@ -505,13 +505,13 @@ struct LOAD_MMIO_I32
// uint64_t (context, addr)
auto mmio_range = reinterpret_cast<MMIORange*>(i.src1.value);
auto read_address = uint32_t(i.src2.value);
e.mov(e.r8, uint64_t(mmio_range->callback_context));
e.mov(e.r9d, read_address);
e.mov(e.GetNativeParam(0), uint64_t(mmio_range->callback_context));
e.mov(e.GetNativeParam(1).cvt32(), read_address);
e.CallNativeSafe(reinterpret_cast<void*>(mmio_range->read));
e.bswap(e.eax);
e.mov(i.dest, e.eax);
if (IsTracingData()) {
e.mov(e.r8, i.dest);
e.mov(e.GetNativeParam(0), i.dest);
e.mov(e.edx, read_address);
e.CallNative(reinterpret_cast<void*>(TraceContextLoadI32));
}
@ -530,20 +530,20 @@ struct STORE_MMIO_I32
// void (context, addr, value)
auto mmio_range = reinterpret_cast<MMIORange*>(i.src1.value);
auto write_address = uint32_t(i.src2.value);
e.mov(e.r8, uint64_t(mmio_range->callback_context));
e.mov(e.r9d, write_address);
e.mov(e.GetNativeParam(0), uint64_t(mmio_range->callback_context));
e.mov(e.GetNativeParam(1).cvt32(), write_address);
if (i.src3.is_constant) {
e.mov(e.r10d, xe::byte_swap(i.src3.constant()));
e.mov(e.GetNativeParam(2).cvt32(), xe::byte_swap(i.src3.constant()));
} else {
e.mov(e.r10d, i.src3);
e.bswap(e.r10d);
e.mov(e.GetNativeParam(2).cvt32(), i.src3);
e.bswap(e.GetNativeParam(2).cvt32());
}
e.CallNativeSafe(reinterpret_cast<void*>(mmio_range->write));
if (IsTracingData()) {
if (i.src3.is_constant) {
e.mov(e.r8d, i.src3.constant());
e.mov(e.GetNativeParam(0).cvt32(), i.src3.constant());
} else {
e.mov(e.r8d, i.src3);
e.mov(e.GetNativeParam(0).cvt32(), i.src3);
}
e.mov(e.edx, write_address);
e.CallNative(reinterpret_cast<void*>(TraceContextStoreI32));
@ -708,8 +708,8 @@ struct LOAD_I8 : Sequence<LOAD_I8, I<OPCODE_LOAD, I8Op, I64Op>> {
auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(i.dest, e.byte[addr]);
if (IsTracingData()) {
e.mov(e.r8b, i.dest);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1).cvt8(), i.dest);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadI8));
}
}
@ -728,8 +728,8 @@ struct LOAD_I16 : Sequence<LOAD_I16, I<OPCODE_LOAD, I16Op, I64Op>> {
e.mov(i.dest, e.word[addr]);
}
if (IsTracingData()) {
e.mov(e.r8w, i.dest);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1).cvt16(), i.dest);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadI16));
}
}
@ -748,8 +748,8 @@ struct LOAD_I32 : Sequence<LOAD_I32, I<OPCODE_LOAD, I32Op, I64Op>> {
e.mov(i.dest, e.dword[addr]);
}
if (IsTracingData()) {
e.mov(e.r8d, i.dest);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1).cvt32(), i.dest);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadI32));
}
}
@ -768,8 +768,8 @@ struct LOAD_I64 : Sequence<LOAD_I64, I<OPCODE_LOAD, I64Op, I64Op>> {
e.mov(i.dest, e.qword[addr]);
}
if (IsTracingData()) {
e.mov(e.r8, i.dest);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1), i.dest);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadI64));
}
}
@ -782,8 +782,8 @@ struct LOAD_F32 : Sequence<LOAD_F32, I<OPCODE_LOAD, F32Op, I64Op>> {
assert_always("not implemented yet");
}
if (IsTracingData()) {
e.lea(e.r8, e.dword[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.lea(e.GetNativeParam(1), e.dword[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadF32));
}
}
@ -796,8 +796,8 @@ struct LOAD_F64 : Sequence<LOAD_F64, I<OPCODE_LOAD, F64Op, I64Op>> {
assert_always("not implemented yet");
}
if (IsTracingData()) {
e.lea(e.r8, e.qword[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.lea(e.GetNativeParam(1), e.qword[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadF64));
}
}
@ -812,8 +812,8 @@ struct LOAD_V128 : Sequence<LOAD_V128, I<OPCODE_LOAD, V128Op, I64Op>> {
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteSwapMask));
}
if (IsTracingData()) {
e.lea(e.r8, e.ptr[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.lea(e.GetNativeParam(1), e.ptr[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryLoadV128));
}
}
@ -835,8 +835,8 @@ struct STORE_I8 : Sequence<STORE_I8, I<OPCODE_STORE, VoidOp, I64Op, I8Op>> {
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8b, e.byte[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1).cvt8(), e.byte[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreI8));
}
}
@ -860,8 +860,8 @@ struct STORE_I16 : Sequence<STORE_I16, I<OPCODE_STORE, VoidOp, I64Op, I16Op>> {
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8w, e.word[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1).cvt16(), e.word[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreI16));
}
}
@ -885,8 +885,8 @@ struct STORE_I32 : Sequence<STORE_I32, I<OPCODE_STORE, VoidOp, I64Op, I32Op>> {
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8d, e.dword[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1).cvt32(), e.dword[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreI32));
}
}
@ -910,8 +910,8 @@ struct STORE_I64 : Sequence<STORE_I64, I<OPCODE_STORE, VoidOp, I64Op, I64Op>> {
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8, e.qword[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(1), e.qword[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreI64));
}
}
@ -931,8 +931,8 @@ struct STORE_F32 : Sequence<STORE_F32, I<OPCODE_STORE, VoidOp, I64Op, F32Op>> {
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.lea(e.r8, e.ptr[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.lea(e.GetNativeParam(1), e.ptr[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreF32));
}
}
@ -952,8 +952,8 @@ struct STORE_F64 : Sequence<STORE_F64, I<OPCODE_STORE, VoidOp, I64Op, F64Op>> {
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.lea(e.r8, e.ptr[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.lea(e.GetNativeParam(1), e.ptr[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreF64));
}
}
@ -976,8 +976,8 @@ struct STORE_V128
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.lea(e.r8, e.ptr[addr]);
e.lea(e.rdx, e.ptr[addr]);
e.lea(e.GetNativeParam(1), e.ptr[addr]);
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreV128));
}
}
@ -1038,9 +1038,9 @@ struct MEMSET_I64_I8_I64
}
if (IsTracingData()) {
addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r9, i.src3.constant());
e.mov(e.r8, i.src2.constant());
e.lea(e.rdx, e.ptr[addr]);
e.mov(e.GetNativeParam(2), i.src3.constant());
e.mov(e.GetNativeParam(1), i.src2.constant());
e.lea(e.GetNativeParam(0), e.ptr[addr]);
e.CallNative(reinterpret_cast<void*>(TraceMemset));
}
}

View File

@ -710,11 +710,11 @@ struct VECTOR_SHL_V128
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShl<uint8_t>));
e.vmovaps(i.dest, e.xmm0);
}
@ -768,11 +768,11 @@ struct VECTOR_SHL_V128
e.L(emu);
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShl<uint16_t>));
e.vmovaps(i.dest, e.xmm0);
@ -845,11 +845,11 @@ struct VECTOR_SHL_V128
e.L(emu);
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShl<uint32_t>));
e.vmovaps(i.dest, e.xmm0);
@ -902,11 +902,11 @@ struct VECTOR_SHR_V128
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<uint8_t>));
e.vmovaps(i.dest, e.xmm0);
}
@ -952,11 +952,11 @@ struct VECTOR_SHR_V128
e.L(emu);
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<uint16_t>));
e.vmovaps(i.dest, e.xmm0);
@ -1029,11 +1029,11 @@ struct VECTOR_SHR_V128
e.L(emu);
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<uint32_t>));
e.vmovaps(i.dest, e.xmm0);
@ -1069,11 +1069,11 @@ struct VECTOR_SHA_V128
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<int8_t>));
e.vmovaps(i.dest, e.xmm0);
}
@ -1119,11 +1119,11 @@ struct VECTOR_SHA_V128
e.L(emu);
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<int16_t>));
e.vmovaps(i.dest, e.xmm0);
@ -1181,11 +1181,11 @@ struct VECTOR_SHA_V128
e.L(emu);
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateVectorShr<int32_t>));
e.vmovaps(i.dest, e.xmm0);
@ -1223,12 +1223,12 @@ struct VECTOR_ROTATE_LEFT_V128
switch (i.instr->flags) {
case INT8_TYPE:
// TODO(benvanik): native version (with shift magic).
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
@ -1236,12 +1236,12 @@ struct VECTOR_ROTATE_LEFT_V128
break;
case INT16_TYPE:
// TODO(benvanik): native version (with shift magic).
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
@ -1264,12 +1264,12 @@ struct VECTOR_ROTATE_LEFT_V128
e.vpor(i.dest, e.xmm1);
} else {
// TODO(benvanik): non-AVX2 native version.
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
@ -1338,22 +1338,22 @@ struct VECTOR_AVERAGE
if (is_unsigned) {
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorAverage<uint32_t>));
e.vmovaps(i.dest, e.xmm0);
} else {
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorAverage<int32_t>));
e.vmovaps(i.dest, e.xmm0);
@ -1888,7 +1888,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
} else {
src = i.src1;
}
e.lea(e.r8, e.StashXmm(0, src));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateFLOAT16_2));
e.vmovaps(i.dest, e.xmm0);
}
@ -1928,7 +1928,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
} else {
src = i.src1;
}
e.lea(e.r8, e.StashXmm(0, src));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateFLOAT16_4));
e.vmovaps(i.dest, e.xmm0);
}
@ -2032,19 +2032,19 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
// unsigned -> unsigned + saturate
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulatePack8_IN_16_UN_UN_SAT));
e.vmovaps(i.dest, e.xmm0);
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask));
} else {
// unsigned -> unsigned
e.lea(e.r9, e.StashXmm(1, i.src2));
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePack8_IN_16_UN_UN));
e.vmovaps(i.dest, e.xmm0);
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMByteOrderMask));
@ -2296,7 +2296,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
} else {
src = i.src1;
}
e.lea(e.r8, e.StashXmm(0, src));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateFLOAT16_2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2332,7 +2332,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
} else {
src = i.src1;
}
e.lea(e.r8, e.StashXmm(0, src));
e.lea(e.GetNativeParam(0), e.StashXmm(0, src));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateFLOAT16_4));
e.vmovaps(i.dest, e.xmm0);
}

View File

@ -2360,7 +2360,7 @@ struct POW2_F32 : Sequence<POW2_F32, I<OPCODE_POW2, F32Op, F32Op>> {
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_always();
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePow2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2374,7 +2374,7 @@ struct POW2_F64 : Sequence<POW2_F64, I<OPCODE_POW2, F64Op, F64Op>> {
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_always();
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePow2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2389,7 +2389,7 @@ struct POW2_V128 : Sequence<POW2_V128, I<OPCODE_POW2, V128Op, V128Op>> {
return _mm_load_ps(values);
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePow2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2411,7 +2411,7 @@ struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_always();
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2425,7 +2425,7 @@ struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_always();
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2440,7 +2440,7 @@ struct LOG2_V128 : Sequence<LOG2_V128, I<OPCODE_LOG2, V128Op, V128Op>> {
return _mm_load_ps(values);
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
e.vmovaps(i.dest, e.xmm0);
}
@ -2705,11 +2705,11 @@ struct SHL_V128 : Sequence<SHL_V128, I<OPCODE_SHL, V128Op, V128Op, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.mov(e.r9, i.src2.constant());
e.mov(e.GetNativeParam(1), i.src2.constant());
} else {
e.mov(e.r9, i.src2);
e.mov(e.GetNativeParam(1), i.src2);
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShlV128));
e.vmovaps(i.dest, e.xmm0);
}
@ -2782,11 +2782,11 @@ struct SHR_V128 : Sequence<SHR_V128, I<OPCODE_SHR, V128Op, V128Op, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.mov(e.r9, i.src2.constant());
e.mov(e.GetNativeParam(1), i.src2.constant());
} else {
e.mov(e.r9, i.src2);
e.mov(e.GetNativeParam(1), i.src2);
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShrV128));
e.vmovaps(i.dest, e.xmm0);
}