Merge pull request #221 from DrChat/alt_pack
Vmaddfp test fix / Change some SSE instructions to AVX
This commit is contained in:
commit
89a499bb47
|
@ -5551,15 +5551,14 @@ EMITTER(SPLAT_I8, MATCH(I<OPCODE_SPLAT, V128<>, I8<>>)) {
|
||||||
} else {
|
} else {
|
||||||
if (i.src1.is_constant) {
|
if (i.src1.is_constant) {
|
||||||
e.mov(e.eax, i.src1.constant());
|
e.mov(e.eax, i.src1.constant());
|
||||||
e.movd(e.xmm0, e.eax);
|
e.vmovd(e.xmm0, e.eax);
|
||||||
} else {
|
} else {
|
||||||
e.movd(e.xmm0, i.src1.reg().cvt32());
|
e.vmovd(e.xmm0, i.src1.reg().cvt32());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Credits: VC++ compiler (i love you so much)
|
e.vpunpcklbw(e.xmm0, e.xmm0);
|
||||||
e.punpcklbw(e.xmm0, e.xmm0);
|
e.vpunpcklwd(e.xmm0, e.xmm0);
|
||||||
e.punpcklwd(e.xmm0, e.xmm0);
|
e.vpshufd(i.dest, e.xmm0, 0);
|
||||||
e.pshufd(i.dest, e.xmm0, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -5578,14 +5577,13 @@ EMITTER(SPLAT_I16, MATCH(I<OPCODE_SPLAT, V128<>, I16<>>)) {
|
||||||
} else {
|
} else {
|
||||||
if (i.src1.is_constant) {
|
if (i.src1.is_constant) {
|
||||||
e.mov(e.eax, i.src1.constant());
|
e.mov(e.eax, i.src1.constant());
|
||||||
e.movd(e.xmm0, e.eax);
|
e.vmovd(e.xmm0, e.eax);
|
||||||
} else {
|
} else {
|
||||||
e.movd(e.xmm0, i.src1.reg().cvt32());
|
e.vmovd(e.xmm0, i.src1.reg().cvt32());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Credits: VC++ compiler (i love you so much)
|
e.vpunpcklwd(e.xmm0, e.xmm0); // unpack low word data
|
||||||
e.punpcklwd(e.xmm0, e.xmm0); // unpack low word data
|
e.vpshufd(i.dest, e.xmm0, 0);
|
||||||
e.pshufd(i.dest, e.xmm0, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -5609,7 +5607,7 @@ EMITTER(SPLAT_I32, MATCH(I<OPCODE_SPLAT, V128<>, I32<>>)) {
|
||||||
e.vmovd(e.xmm0, i.src1.reg().cvt32());
|
e.vmovd(e.xmm0, i.src1.reg().cvt32());
|
||||||
}
|
}
|
||||||
|
|
||||||
e.pshufd(i.dest, e.xmm0, 0);
|
e.vpshufd(i.dest, e.xmm0, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -5632,7 +5630,7 @@ EMITTER(SPLAT_F32, MATCH(I<OPCODE_SPLAT, V128<>, F32<>>)) {
|
||||||
e.vmovd(i.dest, i.src1.reg().cvt32());
|
e.vmovd(i.dest, i.src1.reg().cvt32());
|
||||||
}
|
}
|
||||||
|
|
||||||
e.shufps(i.dest, i.dest, 0);
|
e.vshufps(i.dest, i.dest, i.dest, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,8 +3,37 @@ test_vmaddfp_1:
|
||||||
# 1.0, 1.5, 1.1, 1.9
|
# 1.0, 1.5, 1.1, 1.9
|
||||||
vmaddfp v3, v4, v4, v4
|
vmaddfp v3, v4, v4, v4
|
||||||
blr
|
blr
|
||||||
#_ REGISTER_OUT v3 [00000001, 00000001, 00000001, 00000001]
|
#_ REGISTER_OUT v3 [40000000, 40700000, 4013d70a, 40b051eb]
|
||||||
#_ REGISTER_OUT v4 [40000000, 40700000, 4013d70a, 40b051eb]
|
#_ REGISTER_OUT v4 [3f800000, 3fc00000, 3f8ccccd, 3ff33333]
|
||||||
# 2.0, 3.75, 2.31, 5.51
|
# 2.0, 3.75, 2.31, 5.51
|
||||||
# 40b051eb is actually 5.50999975, not 5.51?
|
# 40b051eb is actually 5.50999975, not 5.51?
|
||||||
# 40b051ec is 5.51
|
# 40b051ec is 5.51
|
||||||
|
|
||||||
|
test_vmaddfp_2:
|
||||||
|
#_ REGISTER_IN v4 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_IN v5 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_IN v6 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
# 1.0, 1.5, 1.1, 1.9
|
||||||
|
vmaddfp v3, v4, v5, v6
|
||||||
|
blr
|
||||||
|
#_ REGISTER_OUT v3 [40000000, 40000000, 40000000, 40000000]
|
||||||
|
#_ REGISTER_OUT v4 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_OUT v5 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_OUT v6 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
|
||||||
|
test_vmaddfp_3:
|
||||||
|
# v4 = 5, 5, 1, 1
|
||||||
|
# v5 = 5, 5, 1, 1
|
||||||
|
# v6 = 1, 1, 1, 1
|
||||||
|
#_ REGISTER_IN v4 [40a00000, 40a00000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_IN v5 [40a00000, 40a00000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_IN v6 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
# 1.0, 1.5, 1.1, 1.9
|
||||||
|
vmaddfp v3, v4, v5, v6
|
||||||
|
blr
|
||||||
|
# v3 = 26.0, 26.0, 2.0, 2.0
|
||||||
|
#_ REGISTER_OUT v3 [41d00000, 41d00000, 40000000, 40000000]
|
||||||
|
#_ REGISTER_OUT v4 [40a00000, 40a00000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_OUT v5 [40a00000, 40a00000, 3f800000, 3f800000]
|
||||||
|
#_ REGISTER_OUT v6 [3f800000, 3f800000, 3f800000, 3f800000]
|
||||||
|
|
Loading…
Reference in New Issue