[x64] Fix corruption of src1 in calls to EmulateVectorRotateLeft.

To mitigate this mistake in the future, implemented new
StashConstantXmm functions.
This commit is contained in:
gibbed 2019-11-30 18:45:25 -06:00 committed by Gliniak
parent b992cf430e
commit 55c4a1e4cb
3 changed files with 40 additions and 9 deletions

View File

@ -784,6 +784,7 @@ Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) {
sizeof(vec128_t) * id)]; sizeof(vec128_t) * id)];
} }
// Implies possible StashXmm(0, ...)!
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) { void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
// https://www.agner.org/optimize/optimizing_assembly.pdf // https://www.agner.org/optimize/optimizing_assembly.pdf
// 13.4 Generating constants // 13.4 Generating constants
@ -846,6 +847,35 @@ Xbyak::Address X64Emitter::StashXmm(int index, const Xbyak::Xmm& r) {
return addr; return addr;
} }
Xbyak::Address X64Emitter::StashConstantXmm(int index, float v) {
union {
float f;
uint32_t i;
} x = {v};
auto addr = rsp + kStashOffset + (index * 16);
MovMem64(addr, x.i);
MovMem64(addr + 8, 0);
return ptr[addr];
}
Xbyak::Address X64Emitter::StashConstantXmm(int index, double v) {
union {
double d;
uint64_t i;
} x = {v};
auto addr = rsp + kStashOffset + (index * 16);
MovMem64(addr, x.i);
MovMem64(addr + 8, 0);
return ptr[addr];
}
Xbyak::Address X64Emitter::StashConstantXmm(int index, const vec128_t& v) {
auto addr = rsp + kStashOffset + (index * 16);
MovMem64(addr, v.low);
MovMem64(addr + 8, v.high);
return ptr[addr];
}
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace cpu } // namespace cpu

View File

@ -214,6 +214,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
void LoadConstantXmm(Xbyak::Xmm dest, double v); void LoadConstantXmm(Xbyak::Xmm dest, double v);
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v); void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r); Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r);
Xbyak::Address StashConstantXmm(int index, float v);
Xbyak::Address StashConstantXmm(int index, double v);
Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
bool IsFeatureEnabled(uint32_t feature_flag) const { bool IsFeatureEnabled(uint32_t feature_flag) const {
return (feature_flags_ & feature_flag) != 0; return (feature_flags_ & feature_flag) != 0;

View File

@ -1223,26 +1223,24 @@ struct VECTOR_ROTATE_LEFT_V128
switch (i.instr->flags) { switch (i.instr->flags) {
case INT8_TYPE: case INT8_TYPE:
// TODO(benvanik): native version (with shift magic). // TODO(benvanik): native version (with shift magic).
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) { if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant()); e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else { } else {
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2)); e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
} }
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe( e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>)); reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
e.vmovaps(i.dest, e.xmm0); e.vmovaps(i.dest, e.xmm0);
break; break;
case INT16_TYPE: case INT16_TYPE:
// TODO(benvanik): native version (with shift magic). // TODO(benvanik): native version (with shift magic).
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) { if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant()); e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
} else { } else {
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2)); e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
} }
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe( e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>)); reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
e.vmovaps(i.dest, e.xmm0); e.vmovaps(i.dest, e.xmm0);
@ -1264,13 +1262,13 @@ struct VECTOR_ROTATE_LEFT_V128
e.vpor(i.dest, e.xmm1); e.vpor(i.dest, e.xmm1);
} else { } else {
// TODO(benvanik): non-AVX2 native version. // TODO(benvanik): non-AVX2 native version.
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
if (i.src2.is_constant) { if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant()); e.lea(e.GetNativeParam(1),
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0)); e.StashConstantXmm(1, i.src2.constant()));
} else { } else {
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2)); e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
} }
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
e.CallNativeSafe( e.CallNativeSafe(
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>)); reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
e.vmovaps(i.dest, e.xmm0); e.vmovaps(i.dest, e.xmm0);