[x64] Fix corruption of src1 in calls to EmulateVectorRotateLeft.
To mitigate this mistake in the future, implemented new StashConstantXmm functions.
This commit is contained in:
parent
b992cf430e
commit
55c4a1e4cb
|
@ -784,6 +784,7 @@ Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) {
|
||||||
sizeof(vec128_t) * id)];
|
sizeof(vec128_t) * id)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Implies possible StashXmm(0, ...)!
|
||||||
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
|
void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) {
|
||||||
// https://www.agner.org/optimize/optimizing_assembly.pdf
|
// https://www.agner.org/optimize/optimizing_assembly.pdf
|
||||||
// 13.4 Generating constants
|
// 13.4 Generating constants
|
||||||
|
@ -846,6 +847,35 @@ Xbyak::Address X64Emitter::StashXmm(int index, const Xbyak::Xmm& r) {
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Xbyak::Address X64Emitter::StashConstantXmm(int index, float v) {
|
||||||
|
union {
|
||||||
|
float f;
|
||||||
|
uint32_t i;
|
||||||
|
} x = {v};
|
||||||
|
auto addr = rsp + kStashOffset + (index * 16);
|
||||||
|
MovMem64(addr, x.i);
|
||||||
|
MovMem64(addr + 8, 0);
|
||||||
|
return ptr[addr];
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Address X64Emitter::StashConstantXmm(int index, double v) {
|
||||||
|
union {
|
||||||
|
double d;
|
||||||
|
uint64_t i;
|
||||||
|
} x = {v};
|
||||||
|
auto addr = rsp + kStashOffset + (index * 16);
|
||||||
|
MovMem64(addr, x.i);
|
||||||
|
MovMem64(addr + 8, 0);
|
||||||
|
return ptr[addr];
|
||||||
|
}
|
||||||
|
|
||||||
|
Xbyak::Address X64Emitter::StashConstantXmm(int index, const vec128_t& v) {
|
||||||
|
auto addr = rsp + kStashOffset + (index * 16);
|
||||||
|
MovMem64(addr, v.low);
|
||||||
|
MovMem64(addr + 8, v.high);
|
||||||
|
return ptr[addr];
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace x64
|
} // namespace x64
|
||||||
} // namespace backend
|
} // namespace backend
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
|
|
|
@ -214,6 +214,9 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
void LoadConstantXmm(Xbyak::Xmm dest, double v);
|
void LoadConstantXmm(Xbyak::Xmm dest, double v);
|
||||||
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
|
void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v);
|
||||||
Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r);
|
Xbyak::Address StashXmm(int index, const Xbyak::Xmm& r);
|
||||||
|
Xbyak::Address StashConstantXmm(int index, float v);
|
||||||
|
Xbyak::Address StashConstantXmm(int index, double v);
|
||||||
|
Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
|
||||||
|
|
||||||
bool IsFeatureEnabled(uint32_t feature_flag) const {
|
bool IsFeatureEnabled(uint32_t feature_flag) const {
|
||||||
return (feature_flags_ & feature_flag) != 0;
|
return (feature_flags_ & feature_flag) != 0;
|
||||||
|
|
|
@ -1223,26 +1223,24 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
switch (i.instr->flags) {
|
switch (i.instr->flags) {
|
||||||
case INT8_TYPE:
|
case INT8_TYPE:
|
||||||
// TODO(benvanik): native version (with shift magic).
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||||
}
|
}
|
||||||
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||||
e.CallNativeSafe(
|
e.CallNativeSafe(
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
|
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint8_t>));
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
break;
|
break;
|
||||||
case INT16_TYPE:
|
case INT16_TYPE:
|
||||||
// TODO(benvanik): native version (with shift magic).
|
// TODO(benvanik): native version (with shift magic).
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
e.lea(e.GetNativeParam(1), e.StashConstantXmm(1, i.src2.constant()));
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
|
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||||
}
|
}
|
||||||
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||||
e.CallNativeSafe(
|
e.CallNativeSafe(
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
|
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint16_t>));
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
@ -1264,13 +1262,13 @@ struct VECTOR_ROTATE_LEFT_V128
|
||||||
e.vpor(i.dest, e.xmm1);
|
e.vpor(i.dest, e.xmm1);
|
||||||
} else {
|
} else {
|
||||||
// TODO(benvanik): non-AVX2 native version.
|
// TODO(benvanik): non-AVX2 native version.
|
||||||
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
||||||
if (i.src2.is_constant) {
|
if (i.src2.is_constant) {
|
||||||
e.LoadConstantXmm(e.xmm0, i.src2.constant());
|
e.lea(e.GetNativeParam(1),
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, e.xmm0));
|
e.StashConstantXmm(1, i.src2.constant()));
|
||||||
} else {
|
} else {
|
||||||
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
e.lea(e.GetNativeParam(1), e.StashXmm(1, i.src2));
|
||||||
}
|
}
|
||||||
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
||||||
e.CallNativeSafe(
|
e.CallNativeSafe(
|
||||||
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
|
reinterpret_cast<void*>(EmulateVectorRotateLeft<uint32_t>));
|
||||||
e.vmovaps(i.dest, e.xmm0);
|
e.vmovaps(i.dest, e.xmm0);
|
||||||
|
|
Loading…
Reference in New Issue