diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index dc9aa7186..b5f648989 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -671,7 +671,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB, VECTOR_SUB); // OPCODE_VECTOR_SHL // ============================================================================ template ::value, int> = 0> -static __m128i EmulateVectorShl(void*, __m128i src1, __m128i src2) { +static __m128i EmulateVectorShl(void*, __m128i& src1, __m128i& src2) { alignas(16) T value[16 / sizeof(T)]; alignas(16) T shamt[16 / sizeof(T)]; @@ -863,7 +863,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHL, VECTOR_SHL_V128); // OPCODE_VECTOR_SHR // ============================================================================ template ::value, int> = 0> -static __m128i EmulateVectorShr(void*, __m128i src1, __m128i src2) { +static __m128i EmulateVectorShr(void*, __m128i& src1, __m128i& src2) { alignas(16) T value[16 / sizeof(T)]; alignas(16) T shamt[16 / sizeof(T)]; @@ -1199,7 +1199,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA, VECTOR_SHA_V128); // OPCODE_VECTOR_ROTATE_LEFT // ============================================================================ template ::value, int> = 0> -static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) { +static __m128i EmulateVectorRotateLeft(void*, __m128i& src1, __m128i& src2) { alignas(16) T value[16 / sizeof(T)]; alignas(16) T shamt[16 / sizeof(T)]; @@ -1289,7 +1289,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT, VECTOR_ROTATE_LEFT_V128); // OPCODE_VECTOR_AVERAGE // ============================================================================ template ::value, int> = 0> -static __m128i EmulateVectorAverage(void*, __m128i src1, __m128i src2) { +static __m128i EmulateVectorAverage(void*, __m128i& src1, __m128i& src2) { alignas(16) T src1v[16 / sizeof(T)]; alignas(16) T src2v[16 / sizeof(T)]; alignas(16) T value[16 / sizeof(T)]; @@ -1857,7 +1857,7 @@ struct PACK : Sequence> { // ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF) e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR)); } - static __m128i EmulateFLOAT16_2(void*, __m128 src1) { + static __m128i EmulateFLOAT16_2(void*, __m128& src1) { alignas(16) float a[4]; alignas(16) uint16_t b[8]; _mm_store_ps(a, src1); @@ -1898,7 +1898,7 @@ struct PACK : Sequence> { e.vmovaps(i.dest, e.xmm0); } } - static __m128i EmulateFLOAT16_4(void*, __m128 src1) { + static __m128i EmulateFLOAT16_4(void*, __m128& src1) { alignas(16) float a[4]; alignas(16) uint16_t b[8]; _mm_store_ps(a, src1); @@ -2031,8 +2031,8 @@ struct PACK : Sequence> { // Merge XZ and YW. e.vorps(i.dest, e.xmm0); } - static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i src1, - __m128i src2) { + static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i& src1, + __m128i& src2) { alignas(16) uint16_t a[8]; alignas(16) uint16_t b[8]; alignas(16) uint8_t c[16]; @@ -2044,7 +2044,7 @@ struct PACK : Sequence> { } return _mm_load_si128(reinterpret_cast<__m128i*>(c)); } - static __m128i EmulatePack8_IN_16_UN_UN(void*, __m128i src1, __m128i src2) { + static __m128i EmulatePack8_IN_16_UN_UN(void*, __m128i& src1, __m128i& src2) { alignas(16) uint8_t a[16]; alignas(16) uint8_t b[16]; alignas(16) uint8_t c[16]; @@ -2277,7 +2277,7 @@ struct UNPACK : Sequence> { e.vpor(i.dest, e.GetXmmConstPtr(XMMOne)); // To convert to 0 to 1, games multiply by 0x47008081 and add 0xC7008081. } - static __m128 EmulateFLOAT16_2(void*, __m128i src1) { + static __m128 EmulateFLOAT16_2(void*, __m128i& src1) { alignas(16) uint16_t a[8]; alignas(16) float b[4]; _mm_store_si128(reinterpret_cast<__m128i*>(a), src1); @@ -2336,7 +2336,7 @@ struct UNPACK : Sequence> { e.vmovaps(i.dest, e.xmm0); } } - static __m128 EmulateFLOAT16_4(void*, __m128i src1) { + static __m128 EmulateFLOAT16_4(void*, __m128i& src1) { alignas(16) uint16_t a[8]; alignas(16) float b[4]; _mm_store_si128(reinterpret_cast<__m128i*>(a), src1); @@ -2616,4 +2616,4 @@ EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK); } // namespace x64 } // namespace backend } // namespace cpu -} // namespace xe \ No newline at end of file +} // namespace xe diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 7d18cb4d0..1e4d193ad 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -2352,7 +2352,7 @@ EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128); // TODO(benvanik): use approx here: // https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html struct POW2_F32 : Sequence> { - static __m128 EmulatePow2(void*, __m128 src) { + static __m128 EmulatePow2(void*, __m128& src) { float src_value; _mm_store_ss(&src_value, src); float result = std::exp2(src_value); @@ -2366,7 +2366,7 @@ struct POW2_F32 : Sequence> { } }; struct POW2_F64 : Sequence> { - static __m128d EmulatePow2(void*, __m128d src) { + static __m128d EmulatePow2(void*, __m128d& src) { double src_value; _mm_store_sd(&src_value, src); double result = std::exp2(src_value); @@ -2380,7 +2380,7 @@ struct POW2_F64 : Sequence> { } }; struct POW2_V128 : Sequence> { - static __m128 EmulatePow2(void*, __m128 src) { + static __m128 EmulatePow2(void*, __m128& src) { alignas(16) float values[4]; _mm_store_ps(values, src); for (size_t i = 0; i < 4; ++i) { @@ -2403,7 +2403,7 @@ EMITTER_OPCODE_TABLE(OPCODE_POW2, POW2_F32, POW2_F64, POW2_V128); // https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html // TODO(benvanik): this emulated fn destroys all xmm registers! don't do it! struct LOG2_F32 : Sequence> { - static __m128 EmulateLog2(void*, __m128 src) { + static __m128 EmulateLog2(void*, __m128& src) { float src_value; _mm_store_ss(&src_value, src); float result = std::log2(src_value); @@ -2417,7 +2417,7 @@ struct LOG2_F32 : Sequence> { } }; struct LOG2_F64 : Sequence> { - static __m128d EmulateLog2(void*, __m128d src) { + static __m128d EmulateLog2(void*, __m128d& src) { double src_value; _mm_store_sd(&src_value, src); double result = std::log2(src_value); @@ -2431,7 +2431,7 @@ struct LOG2_F64 : Sequence> { } }; struct LOG2_V128 : Sequence> { - static __m128 EmulateLog2(void*, __m128 src) { + static __m128 EmulateLog2(void*, __m128& src) { alignas(16) float values[4]; _mm_store_ps(values, src); for (size_t i = 0; i < 4; ++i) { @@ -2713,7 +2713,7 @@ struct SHL_V128 : Sequence> { e.CallNativeSafe(reinterpret_cast(EmulateShlV128)); e.vmovaps(i.dest, e.xmm0); } - static __m128i EmulateShlV128(void*, __m128i src1, uint8_t src2) { + static __m128i EmulateShlV128(void*, __m128i& src1, uint8_t src2) { // Almost all instances are shamt = 1, but non-constant. // shamt is [0,7] uint8_t shamt = src2 & 0x7; @@ -2790,7 +2790,7 @@ struct SHR_V128 : Sequence> { e.CallNativeSafe(reinterpret_cast(EmulateShrV128)); e.vmovaps(i.dest, e.xmm0); } - static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) { + static __m128i EmulateShrV128(void*, __m128i& src1, uint8_t src2) { // Almost all instances are shamt = 1, but non-constant. // shamt is [0,7] uint8_t shamt = src2 & 0x7;