[x64] Fix corruption of src1 in calls to EmulateVectorRotateLeft.
To mitigate this mistake in the future, implemented new StashConstantXmm functions.
This commit is contained in:
parent
b992cf430e
commit
55c4a1e4cb
|
@ -784,6 +784,7 @@ Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
|||
sizeof(vec128_t) * id)];
|
||||
}
|
||||
|
||||
// Implies possible StashXmm(0, ...)!
|
||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
|
||||
// https://www.agner.org/optimize/optimizing_assembly.pdf
|
||||
// 13.4 Generating constants
|
||||
|
@ -846,6 +847,35 @@ Xbyak::Address X64Emitter::StashXmm(int index, const Xbyak::Xmm& r) {
|
|||
return addr;
|
||||
}
|
||||
|
||||
Xbyak::Address X64Emitter::StashConstantXmm(int index, float v) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} x = {v};
|
||||
auto addr = rsp + kStashOffset + (index * 16);
|
||||
MovMem64(addr, x.i);
|
||||
MovMem64(addr + 8, 0);
|
||||
return ptr[addr];
|
||||
}
|
||||
|
||||
Xbyak::Address X64Emitter::StashConstantXmm(int index, double v) {
|
||||
union {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} x = {v};
|
||||
auto addr = rsp + kStashOffset + (index * 16);
|
||||
MovMem64(addr, x.i);
|
||||
MovMem64(addr + 8, 0);
|
||||
return ptr[addr];
|
||||
}
|
||||
|
||||
Xbyak::Address X64Emitter::StashConstantXmm(int index, const vec128_t& v) {
|
||||
auto addr = rsp + kStashOffset + (index * 16);
|
||||
MovMem64(addr, v.low);
|
||||
MovMem64(addr + 8, v.high);
|
||||
return ptr[addr];
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
|
|
|
@ -214,6 +214,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
void LoadConstantXmm(Xbyak::Xmm dest, double v);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
|
||||
Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r);
|
||||
Xbyak::Address StashConstantXmm(int index, float v);
|
||||
Xbyak::Address StashConstantXmm(int index, double v);
|
||||
Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
|
||||
|
||||
bool IsFeatureEnabled(uint32_t feature_flag) const {
|
||||
return (feature_flags_ & feature_flag) != 0;
|
||||
|
|
|
@ -1223,26 +1223,24 @@ struct VECTOR_ROTATE_LEFT_V128
|
|||
switch (i.instr->flags) {
|
||||
case INT8_TYPE:
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
|
||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
||||
} else {
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||
}
|
||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||
e.CallNativeSafe(
|
||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
|
||||
e.vmovaps(i.dest, e.xmm0);
|
||||
break;
|
||||
case INT16_TYPE:
|
||||
// TODO(benvanik): native version (with shift magic).
|
||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
|
||||
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
||||
} else {
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||
}
|
||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||
e.CallNativeSafe(
|
||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
|
||||
e.vmovaps(i.dest, e.xmm0);
|
||||
|
@ -1264,13 +1262,13 @@ struct VECTOR_ROTATE_LEFT_V128
|
|||
e.vpor(i.dest, e.xmm1);
|
||||
} else {
|
||||
// TODO(benvanik): non-AVX2 native version.
|
||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||
if (i.src2.is_constant) {
|
||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
|
||||
e.lea(e.GetNativeParam(1),
|
||||
e.StashConstantXmm(1, i.src2.constant()));
|
||||
} else {
|
||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||
}
|
||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||
e.CallNativeSafe(
|
||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
|
||||
e.vmovaps(i.dest, e.xmm0);
|
||||
|
|
Loading…
Reference in New Issue