diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 29b29b805..91de2ef9b 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -784,6 +784,7 @@ Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) { sizeof(vec128_t) * id)]; } +// Implies possible StashXmm(0, ...)! void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) { // https://www.agner.org/optimize/optimizing_assembly.pdf // 13.4 Generating constants @@ -846,6 +847,35 @@ Xbyak::Address X64Emitter::StashXmm(int index, const Xbyak::Xmm& r) { return addr; } +Xbyak::Address X64Emitter::StashConstantXmm(int index, float v) { + union { + float f; + uint32_t i; + } x = {v}; + auto addr = rsp + kStashOffset + (index * 16); + MovMem64(addr, x.i); + MovMem64(addr + 8, 0); + return ptr[addr]; +} + +Xbyak::Address X64Emitter::StashConstantXmm(int index, double v) { + union { + double d; + uint64_t i; + } x = {v}; + auto addr = rsp + kStashOffset + (index * 16); + MovMem64(addr, x.i); + MovMem64(addr + 8, 0); + return ptr[addr]; +} + +Xbyak::Address X64Emitter::StashConstantXmm(int index, const vec128_t& v) { + auto addr = rsp + kStashOffset + (index * 16); + MovMem64(addr, v.low); + MovMem64(addr + 8, v.high); + return ptr[addr]; +} + } // namespace x64 } // namespace backend } // namespace cpu diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index af38fb739..4f661a331 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -214,6 +214,9 @@ class X64Emitter : public Xbyak::CodeGenerator { void LoadConstantXmm(Xbyak::Xmm dest, double v); void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v); Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r); + Xbyak::Address StashConstantXmm(int index, float v); + Xbyak::Address StashConstantXmm(int index, double v); + Xbyak::Address StashConstantXmm(int index, const vec128_t& v); bool IsFeatureEnabled(uint32_t feature_flag) const { return (feature_flags_ & feature_flag) != 0; diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index dc9aa7186..8cc36eb8d 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -1223,26 +1223,24 @@ struct VECTOR_ROTATE_LEFT_V128 switch (i.instr->flags) { case INT8_TYPE: // TODO(benvanik): native version (with shift magic). - e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1)); if (i.src2.is_constant) { - e.LoadConstantXmm(e.xmm0, i.src2.constant()); - e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0)); + e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant())); } else { e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2)); } + e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1)); e.CallNativeSafe( reinterpret_cast(EmulateVectorRotateLeft)); e.vmovaps(i.dest, e.xmm0); break; case INT16_TYPE: // TODO(benvanik): native version (with shift magic). - e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1)); if (i.src2.is_constant) { - e.LoadConstantXmm(e.xmm0, i.src2.constant()); - e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0)); + e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant())); } else { e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2)); } + e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1)); e.CallNativeSafe( reinterpret_cast(EmulateVectorRotateLeft)); e.vmovaps(i.dest, e.xmm0); @@ -1264,13 +1262,13 @@ struct VECTOR_ROTATE_LEFT_V128 e.vpor(i.dest, e.xmm1); } else { // TODO(benvanik): non-AVX2 native version. - e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1)); if (i.src2.is_constant) { - e.LoadConstantXmm(e.xmm0, i.src2.constant()); - e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0)); + e.lea(e.GetNativeParam(1), + e.StashConstantXmm(1, i.src2.constant())); } else { e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2)); } + e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1)); e.CallNativeSafe( reinterpret_cast(EmulateVectorRotateLeft)); e.vmovaps(i.dest, e.xmm0);