[a64] Optimize vector-constant generation

Uses MOVI to optimize some cases of constants rather than EOR.
MOVI is a register-renaming idiom on many architectures.
This commit is contained in:
Wunkolo 2024-05-21 09:31:06 -07:00
parent a7ae117c90
commit e2d1e5d7f8
1 changed files with 17 additions and 23 deletions

View File

@ -827,17 +827,15 @@ std::byte* A64Emitter::GetVConstPtr(VConst id) {
// Implies possible StashV(0, ...)! // Implies possible StashV(0, ...)!
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) { void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
// https://www.agner.org/optimize/optimizing_assembly.pdf
// 13.4 Generating constants
if (!v.low && !v.high) { if (!v.low && !v.high) {
// 0000... // 0000...
EOR(dest.B16(), dest.B16(), dest.B16()); // MOVI is implemented as a register-rename while EOR(x, x, x) is not
} // https://dougallj.github.io/applecpu/firestorm.html
// else if (v.low == ~uint64_t(0) && v.high == ~uint64_t(0)) { MOVI(dest.B16(), 0);
// // 1111... } else if (v.low == ~uint64_t(0) && v.high == ~uint64_t(0)) {
// vpcmpeqb(dest, dest); // 1111...
// } MOVI(dest.B16(), 0xFF);
else { } else {
// TODO(benvanik): see what other common values are. // TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused. // TODO(benvanik): build constant table - 99% are reused.
MovMem64(SP, kStashOffset, v.low); MovMem64(SP, kStashOffset, v.low);
@ -853,13 +851,11 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, float v) {
} x = {v}; } x = {v};
if (!x.i) { if (!x.i) {
// +0.0f (but not -0.0f because it may be used to flip the sign via xor). // +0.0f (but not -0.0f because it may be used to flip the sign via xor).
EOR(dest.B16(), dest.B16(), dest.B16()); MOVI(dest.B16(), 0);
} } else if (x.i == ~uint32_t(0)) {
// else if (x.i == ~uint32_t(0)) { // 1111...
// // 1111... MOVI(dest.B16(), 0xFF);
// vpcmpeqb(dest, dest); } else {
// }
else {
// TODO(benvanik): see what other common values are. // TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused. // TODO(benvanik): build constant table - 99% are reused.
MOV(W0, x.i); MOV(W0, x.i);
@ -874,13 +870,11 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, double v) {
} x = {v}; } x = {v};
if (!x.i) { if (!x.i) {
// +0.0 (but not -0.0 because it may be used to flip the sign via xor). // +0.0 (but not -0.0 because it may be used to flip the sign via xor).
EOR(dest.B16(), dest.B16(), dest.B16()); MOVI(dest.toD(), oaknut::RepImm(0));
} } else if (x.i == ~uint64_t(0)) {
// else if (x.i == ~uint64_t(0)) { // 1111...
// // 1111... MOVI(dest.toD(), oaknut::RepImm(0xFF));
// vpcmpeqb(dest, dest); } else {
// }
else {
// TODO(benvanik): see what other common values are. // TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused. // TODO(benvanik): build constant table - 99% are reused.
MOV(X0, x.i); MOV(X0, x.i);