[x64] Add AVX512 optimization for `VECTOR_ROTATE_LEFT(Int32)`
`vprolvd` is an almost 1:1 analog with this opcode and can be conditionally emitted when the host supports AVX512{F,VL}. Altivec docs say that `vrl{bhw}` masks the lower log2(n) bits of the element-size. [vprold](https://www.felixcloutier.com/x86/vprold:vprolvd:vprolq:vprolvq) modulos the shift-value by the element size in bits, which is the same as masking the lower log2(n) bits. So `vrlw` maps exactly to `vprold`.
This commit is contained in:
parent
7e894d10a7
commit
337f0b2948
|
@ -2,7 +2,7 @@
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
* Copyright 2018 Xenia Developers. All rights reserved. *
|
* Copyright 2022 Xenia Developers. All rights reserved. *
|
||||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*/
|
*/
|
||||||
|
@ -1287,7 +1287,6 @@ static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) {
|
||||||
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
return _mm_load_si128(reinterpret_cast<__m128i*>(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(benvanik): AVX512 has a native variable rotate (rolv).
|
|
||||||
struct VECTOR_ROTATE_LEFT_V128
|
struct VECTOR_ROTATE_LEFT_V128
|
||||||
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
: Sequence<VECTOR_ROTATE_LEFT_V128,
|
||||||
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
I<OPCODE_VECTOR_ROTATE_LEFT, V128Op, V128Op, V128Op>> {
|
||||||
|
@ -1318,7 +1317,9 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
break;
|
break;
|
||||||
case INT32_TYPE: {
|
case INT32_TYPE: {
|
||||||
if (e.IsFeatureEnabled(kX64EmitAVX2)) {
|
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) {
|
||||||
|
e.vprolvd(i.dest, i.src1, i.src2);
|
||||||
|
} else if (e.IsFeatureEnabled(kX64EmitAVX2)) {
|
||||||
Xmm temp = i.dest;
|
Xmm temp = i.dest;
|
||||||
if (i.dest == i.src1 || i.dest == i.src2) {
|
if (i.dest == i.src1 || i.dest == i.src2) {
|
||||||
temp = e.xmm2;
|
temp = e.xmm2;
|
||||||
|
@ -2683,4 +2684,4 @@ EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
|
||||||
} // namespace x64
|
} // namespace x64
|
||||||
} // namespace backend
|
} // namespace backend
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
Loading…
Reference in New Issue