[x64] Fix corruption of src1 in calls to EmulateVectorRotateLeft.

To mitigate this mistake in the future, implemented new
StashConstantXmm functions.
This commit is contained in:
gibbed 2019-11-30 18:45:25 -06:00 committed by Gliniak
parent b992cf430e
commit 55c4a1e4cb
3 changed files with 40 additions and 9 deletions

View File

@ -784,6 +784,7 @@ Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) {
sizeof(vec128_t) * id)];
}
// Implies possible StashXmm(0, ...)!
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
// https://www.agner.org/optimize/optimizing_assembly.pdf
// 13.4 Generating constants
@ -846,6 +847,35 @@ Xbyak::Address X64Emitter::StashXmm(int index, const Xbyak::Xmm& r) {
return addr;
}
Xbyak::Address X64Emitter::StashConstantXmm(int index, float v) {
union {
float f;
uint32_t i;
} x = {v};
auto addr = rsp + kStashOffset + (index * 16);
MovMem64(addr, x.i);
MovMem64(addr + 8, 0);
return ptr[addr];
}
Xbyak::Address X64Emitter::StashConstantXmm(int index, double v) {
union {
double d;
uint64_t i;
} x = {v};
auto addr = rsp + kStashOffset + (index * 16);
MovMem64(addr, x.i);
MovMem64(addr + 8, 0);
return ptr[addr];
}
Xbyak::Address X64Emitter::StashConstantXmm(int index, const vec128_t& v) {
auto addr = rsp + kStashOffset + (index * 16);
MovMem64(addr, v.low);
MovMem64(addr + 8, v.high);
return ptr[addr];
}
} // namespace x64
} // namespace backend
} // namespace cpu

View File

@ -214,6 +214,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
void LoadConstantXmm(Xbyak::Xmm dest, double v);
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r);
Xbyak::Address StashConstantXmm(int index, float v);
Xbyak::Address StashConstantXmm(int index, double v);
Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
bool IsFeatureEnabled(uint32_t feature_flag) const {
return (feature_flags_ & feature_flag) != 0;

View File

@ -1223,26 +1223,24 @@ struct VECTOR_ROTATE_LEFT_V128
switch (i.instr->flags) {
case INT8_TYPE:
// TODO(benvanik): native version (with shift magic).
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
} else {
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
e.vmovaps(i.dest, e.xmm0);
break;
case INT16_TYPE:
// TODO(benvanik): native version (with shift magic).
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
} else {
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
e.vmovaps(i.dest, e.xmm0);
@ -1264,13 +1262,13 @@ struct VECTOR_ROTATE_LEFT_V128
e.vpor(i.dest, e.xmm1);
} else {
// TODO(benvanik): non-AVX2 native version.
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
e.lea(e.GetNativeParam(1),
e.StashConstantXmm(1, i.src2.constant()));
} else {
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
}
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
e.vmovaps(i.dest, e.xmm0);