[a64] Optimize `OPCODE_SPLAT` with `MOVI`/`FMOV`
Moves the `FMOV` constant functions into `a64_util` so it is available to other translation units. Optimize constant-splats with conditional use of `MOVI` and `FMOV`.
This commit is contained in:
parent
539a03d5f6
commit
9c8b0678a5
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
#include "xenia/cpu/backend/a64/a64_util.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
|
@ -810,74 +811,6 @@ uintptr_t A64Emitter::GetVConstPtr(VConst id) const {
|
|||
return GetVConstPtr() + GetVConstOffset(id);
|
||||
}
|
||||
|
||||
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
|
||||
// returns false if the value cannot be represented
|
||||
// C2.2.3 Modified immediate constants in A64 floating-point instructions
|
||||
// abcdefgh
|
||||
// V
|
||||
// aBbbbbbc defgh000 00000000 00000000
|
||||
// B = NOT(b)
|
||||
static bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
|
||||
const uint32_t sign = (u32 >> 31) & 1;
|
||||
int32_t exp = ((u32 >> 23) & 0xff) - 127;
|
||||
int64_t mantissa = u32 & 0x7fffff;
|
||||
|
||||
// Too many mantissa bits
|
||||
if (mantissa & 0x7ffff) {
|
||||
return false;
|
||||
}
|
||||
// Too many exp bits
|
||||
if (exp < -3 || exp > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// mantissa = (16 + e:f:g:h) / 16.
|
||||
mantissa >>= 19;
|
||||
if ((mantissa & 0b1111) != mantissa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// exp = (NOT(b):c:d) - 3
|
||||
exp = ((exp + 3) & 0b111) ^ 0b100;
|
||||
|
||||
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
|
||||
// returns false if the value cannot be represented
|
||||
// C2.2.3 Modified immediate constants in A64 floating-point instructions
|
||||
// abcdefgh
|
||||
// V
|
||||
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
|
||||
// B = NOT(b)
|
||||
static bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
|
||||
const uint32_t sign = (u64 >> 63) & 1;
|
||||
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
|
||||
int64_t mantissa = u64 & 0xfffffffffffffULL;
|
||||
|
||||
// Too many mantissa bits
|
||||
if (mantissa & 0xffffffffffffULL) {
|
||||
return false;
|
||||
}
|
||||
// Too many exp bits
|
||||
if (exp < -3 || exp > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// mantissa = (16 + e:f:g:h) / 16.
|
||||
mantissa >>= 48;
|
||||
if ((mantissa & 0b1111) != mantissa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// exp = (NOT(b):c:d) - 3
|
||||
exp = ((exp + 3) & 0b111) ^ 0b100;
|
||||
|
||||
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Implies possible StashV(0, ...)!
|
||||
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
|
||||
if (!v.low && !v.high) {
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_sequences.h"
|
||||
#include "xenia/cpu/backend/a64/a64_util.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
@ -1026,12 +1027,7 @@ EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);
|
|||
struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src1.is_constant) {
|
||||
if (i.src1.constant() <= 0xFF) {
|
||||
e.MOVI(i.dest.reg().B16(), i.src1.constant());
|
||||
return;
|
||||
}
|
||||
e.MOV(W0, i.src1.constant());
|
||||
e.DUP(i.dest.reg().B16(), W0);
|
||||
e.MOVI(i.dest.reg().B16(), i.src1.constant());
|
||||
} else {
|
||||
e.DUP(i.dest.reg().B16(), i.src1);
|
||||
}
|
||||
|
@ -1040,9 +1036,12 @@ struct SPLAT_I8 : Sequence<SPLAT_I8, I<OPCODE_SPLAT, V128Op, I8Op>> {
|
|||
struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src1.is_constant) {
|
||||
if (i.src1.constant() <= 0xFF) {
|
||||
if ((i.src1.constant() & 0xFF'00) == 0) {
|
||||
e.MOVI(i.dest.reg().H8(), i.src1.constant());
|
||||
return;
|
||||
} else if ((i.src1.constant() & 0x00'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().H8(), i.src1.constant(), oaknut::util::LSL, 8);
|
||||
return;
|
||||
}
|
||||
e.MOV(W0, i.src1.constant());
|
||||
e.DUP(i.dest.reg().H8(), W0);
|
||||
|
@ -1054,9 +1053,22 @@ struct SPLAT_I16 : Sequence<SPLAT_I16, I<OPCODE_SPLAT, V128Op, I16Op>> {
|
|||
struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src1.is_constant) {
|
||||
if (i.src1.constant() <= 0xFF) {
|
||||
oaknut::FImm8 fp8(0);
|
||||
if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) {
|
||||
e.FMOV(i.dest.reg().S4(), fp8);
|
||||
return;
|
||||
} else if ((i.src1.constant() & 0xFF'FF'FF'00) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.constant());
|
||||
return;
|
||||
} else if ((i.src1.constant() & 0xFF'FF'00'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 8);
|
||||
return;
|
||||
} else if ((i.src1.constant() & 0xFF'00'FF'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 16);
|
||||
return;
|
||||
} else if ((i.src1.constant() & 0x00'FF'FF'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.constant(), oaknut::util::LSL, 24);
|
||||
return;
|
||||
}
|
||||
e.MOV(W0, i.src1.constant());
|
||||
e.DUP(i.dest.reg().S4(), W0);
|
||||
|
@ -1068,8 +1080,24 @@ struct SPLAT_I32 : Sequence<SPLAT_I32, I<OPCODE_SPLAT, V128Op, I32Op>> {
|
|||
struct SPLAT_F32 : Sequence<SPLAT_F32, I<OPCODE_SPLAT, V128Op, F32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
if (i.src1.is_constant) {
|
||||
if (i.src1.value->constant.i32 <= 0xFF) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.i32);
|
||||
oaknut::FImm8 fp8(0);
|
||||
if (f32_to_fimm8(i.src1.value->constant.u32, fp8)) {
|
||||
e.FMOV(i.dest.reg().S4(), fp8);
|
||||
return;
|
||||
} else if ((i.src1.value->constant.u32 & 0xFF'FF'FF'00) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32);
|
||||
return;
|
||||
} else if ((i.src1.value->constant.u32 & 0xFF'FF'00'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
|
||||
8);
|
||||
return;
|
||||
} else if ((i.src1.value->constant.u32 & 0xFF'00'FF'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
|
||||
16);
|
||||
return;
|
||||
} else if ((i.src1.value->constant.u32 & 0x00'FF'FF'FF) == 0) {
|
||||
e.MOVI(i.dest.reg().S4(), i.src1.value->constant.u32, oaknut::util::LSL,
|
||||
24);
|
||||
return;
|
||||
}
|
||||
e.MOV(W0, i.src1.value->constant.i32);
|
||||
|
|
|
@ -17,7 +17,77 @@
|
|||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {} // namespace a64
|
||||
namespace a64 {
|
||||
|
||||
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
|
||||
// returns false if the value cannot be represented
|
||||
// C2.2.3 Modified immediate constants in A64 ing-point instructions
|
||||
// abcdefgh
|
||||
// V
|
||||
// aBbbbbbc defgh000 00000000 00000000
|
||||
// B = NOT(b)
|
||||
constexpr bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
|
||||
const uint32_t sign = (u32 >> 31) & 1;
|
||||
int32_t exp = ((u32 >> 23) & 0xff) - 127;
|
||||
int64_t mantissa = u32 & 0x7fffff;
|
||||
|
||||
// Too many mantissa bits
|
||||
if (mantissa & 0x7ffff) {
|
||||
return false;
|
||||
}
|
||||
// Too many exp bits
|
||||
if (exp < -3 || exp > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// mantissa = (16 + e:f:g:h) / 16.
|
||||
mantissa >>= 19;
|
||||
if ((mantissa & 0b1111) != mantissa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// exp = (NOT(b):c:d) - 3
|
||||
exp = ((exp + 3) & 0b111) ^ 0b100;
|
||||
|
||||
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
|
||||
// returns false if the value cannot be represented
|
||||
// C2.2.3 Modified immediate constants in A64 floating-point instructions
|
||||
// abcdefgh
|
||||
// V
|
||||
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
|
||||
// B = NOT(b)
|
||||
constexpr bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
|
||||
const uint32_t sign = (u64 >> 63) & 1;
|
||||
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
|
||||
int64_t mantissa = u64 & 0xfffffffffffffULL;
|
||||
|
||||
// Too many mantissa bits
|
||||
if (mantissa & 0xffffffffffffULL) {
|
||||
return false;
|
||||
}
|
||||
// Too many exp bits
|
||||
if (exp < -3 || exp > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// mantissa = (16 + e:f:g:h) / 16.
|
||||
mantissa >>= 48;
|
||||
if ((mantissa & 0b1111) != mantissa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// exp = (NOT(b):c:d) - 3
|
||||
exp = ((exp + 3) & 0b111) ^ 0b100;
|
||||
|
||||
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
|
Loading…
Reference in New Issue