Possibly working LOAD_VECTOR_SHL/SHR.

This commit is contained in:
Ben Vanik 2014-02-02 02:18:59 -08:00
parent ae02dc7eba
commit 44c29a6691
4 changed files with 109 additions and 7 deletions

View File

@ -63,6 +63,7 @@ enum XmmConst {
XMMSignMaskPS = 8, XMMSignMaskPS = 8,
XMMSignMaskPD = 9, XMMSignMaskPD = 9,
XMMByteSwapMask = 10, XMMByteSwapMask = 10,
XMMPermuteControl15 = 11,
}; };
static const vec128_t xmm_consts[] = { static const vec128_t xmm_consts[] = {
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f), /* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
@ -76,6 +77,7 @@ static const vec128_t xmm_consts[] = {
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u), /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
}; };
// Use consts by first loading the base register then accessing memory: // Use consts by first loading the base register then accessing memory:
// e.mov(e.rax, XMMCONSTBASE) // e.mov(e.rax, XMMCONSTBASE)
@ -84,6 +86,45 @@ static const vec128_t xmm_consts[] = {
#define XMMCONSTBASE (uint64_t)&xmm_consts[0] #define XMMCONSTBASE (uint64_t)&xmm_consts[0]
#define XMMCONST(base_reg, name) e.ptr[base_reg + name * 16] #define XMMCONST(base_reg, name) e.ptr[base_reg + name * 16]
static vec128_t lvsl_table[17] = {
vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16),
vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17),
vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18),
vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19),
vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22),
vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23),
vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24),
vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26),
vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27),
vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28),
vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29),
vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30),
vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
};
static vec128_t lvsr_table[17] = {
vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30),
vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29),
vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28),
vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27),
vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26),
vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24),
vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23),
vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22),
vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19),
vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18),
vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17),
vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16),
vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
};
// A note about vectors: // A note about vectors:
// Alloy represents vectors as xyzw pairs, with indices 0123. // Alloy represents vectors as xyzw pairs, with indices 0123.
// XMM registers are xyzw pairs with indices 3210, making them more like wzyx. // XMM registers are xyzw pairs with indices 3210, making them more like wzyx.
@ -792,14 +833,56 @@ table->AddSequence(OPCODE_VECTOR_CONVERT_F2I, [](X64Emitter& e, Instr*& i) {
table->AddSequence(OPCODE_LOAD_VECTOR_SHL, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_LOAD_VECTOR_SHL, [](X64Emitter& e, Instr*& i) {
XEASSERT(i->dest->type == VEC128_TYPE); XEASSERT(i->dest->type == VEC128_TYPE);
UNIMPLEMENTED_SEQ(); if (i->src1.value->IsConstant()) {
Xmm dest;
e.BeginOp(i->dest, dest, REG_DEST);
auto sh = MIN(16, i->src1.value->AsUint32());
e.mov(e.rax, (uintptr_t)&lvsl_table[sh]);
e.movaps(dest, e.ptr[e.rax]);
e.EndOp(dest);
} else {
Xmm dest;
Reg8 src;
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0);
// TODO(benvanik): probably a way to do this with addressing.
e.mov(TEMP_REG, 16);
e.movzx(e.rax, src);
e.cmp(src, 16);
e.cmovb(TEMP_REG, e.rax);
e.shl(TEMP_REG, 4);
e.mov(e.rax, (uintptr_t)lvsl_table);
e.movaps(dest, e.ptr[e.rax + TEMP_REG]);
e.EndOp(dest, src);
}
i = e.Advance(i); i = e.Advance(i);
return true; return true;
}); });
table->AddSequence(OPCODE_LOAD_VECTOR_SHR, [](X64Emitter& e, Instr*& i) { table->AddSequence(OPCODE_LOAD_VECTOR_SHR, [](X64Emitter& e, Instr*& i) {
XEASSERT(i->dest->type == VEC128_TYPE); XEASSERT(i->dest->type == VEC128_TYPE);
UNIMPLEMENTED_SEQ(); if (i->src1.value->IsConstant()) {
Xmm dest;
e.BeginOp(i->dest, dest, REG_DEST);
auto sh = MIN(16, i->src1.value->AsUint32());
e.mov(e.rax, (uintptr_t)&lvsr_table[sh]);
e.movaps(dest, e.ptr[e.rax]);
e.EndOp(dest);
} else {
Xmm dest;
Reg8 src;
e.BeginOp(i->dest, dest, REG_DEST,
i->src1.value, src, 0);
// TODO(benvanik): probably a way to do this with addressing.
e.mov(TEMP_REG, 16);
e.movzx(e.rax, src);
e.cmp(src, 16);
e.cmovb(TEMP_REG, e.rax);
e.shl(TEMP_REG, 4);
e.mov(e.rax, (uintptr_t)lvsr_table);
e.movaps(dest, e.ptr[e.rax + TEMP_REG]);
e.EndOp(dest, src);
}
i = e.Advance(i); i = e.Advance(i);
return true; return true;
}); });

View File

@ -51,6 +51,12 @@ Address Stash(X64Emitter& e, const Xmm& r) {
return addr; return addr;
} }
void LoadXmmConstant(X64Emitter& e, Xmm& dest, const vec128_t& v) {
e.mov(e.qword[e.rsp + STASH_OFFSET], v.low);
e.mov(e.qword[e.rsp + STASH_OFFSET + 8], v.high);
e.movaps(dest, e.ptr[e.rsp + STASH_OFFSET]);
}
// Moves a 64bit immediate into memory. // Moves a 64bit immediate into memory.
void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) { void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) { if ((v & ~0x7FFFFFFF) == 0) {

View File

@ -55,6 +55,18 @@ XEFORCEINLINE vec128_t vec128f(float x, float y, float z, float w) {
v.f4[0] = x; v.f4[1] = y; v.f4[2] = z; v.f4[3] = w; v.f4[0] = x; v.f4[1] = y; v.f4[2] = z; v.f4[3] = w;
return v; return v;
} }
XEFORCEINLINE vec128_t vec128b(
uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3,
uint8_t y0, uint8_t y1, uint8_t y2, uint8_t y3,
uint8_t z0, uint8_t z1, uint8_t z2, uint8_t z3,
uint8_t w0, uint8_t w1, uint8_t w2, uint8_t w3) {
vec128_t v;
v.b16[0] = x3; v.b16[1] = x2; v.b16[2] = x1; v.b16[3] = x0;
v.b16[4] = y3; v.b16[5] = y2; v.b16[6] = y1; v.b16[7] = y0;
v.b16[8] = z3; v.b16[9] = z2; v.b16[10] = z1; v.b16[11] = z0;
v.b16[12] = w3; v.b16[13] = w2; v.b16[14] = w1; v.b16[15] = w0;
return v;
}
} // namespace alloy } // namespace alloy

View File

@ -184,25 +184,26 @@ public:
} }
bool IsConstantTrue() const { bool IsConstantTrue() const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && !!constant.i64; return (flags & VALUE_IS_CONSTANT) && !!constant.i64;
} }
bool IsConstantFalse() const { bool IsConstantFalse() const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && !constant.i64; return (flags & VALUE_IS_CONSTANT) && !constant.i64;
} }
bool IsConstantZero() const { bool IsConstantZero() const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; return (flags & VALUE_IS_CONSTANT) &&
!constant.v128.low && !constant.v128.high;
} }
return (flags & VALUE_IS_CONSTANT) && !constant.i64; return (flags & VALUE_IS_CONSTANT) && !constant.i64;
} }
bool IsConstantEQ(Value* other) const { bool IsConstantEQ(Value* other) const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && return (flags & VALUE_IS_CONSTANT) &&
(other->flags & VALUE_IS_CONSTANT) && (other->flags & VALUE_IS_CONSTANT) &&
@ -210,7 +211,7 @@ public:
} }
bool IsConstantNE(Value* other) const { bool IsConstantNE(Value* other) const {
if (type == VEC128_TYPE) { if (type == VEC128_TYPE) {
return false; XEASSERTALWAYS();
} }
return (flags & VALUE_IS_CONSTANT) && return (flags & VALUE_IS_CONSTANT) &&
(other->flags & VALUE_IS_CONSTANT) && (other->flags & VALUE_IS_CONSTANT) &&