Possibly working LOAD_VECTOR_SHL/SHR.
This commit is contained in:
parent
ae02dc7eba
commit
44c29a6691
|
@ -63,6 +63,7 @@ enum XmmConst {
|
||||||
XMMSignMaskPS = 8,
|
XMMSignMaskPS = 8,
|
||||||
XMMSignMaskPD = 9,
|
XMMSignMaskPD = 9,
|
||||||
XMMByteSwapMask = 10,
|
XMMByteSwapMask = 10,
|
||||||
|
XMMPermuteControl15 = 11,
|
||||||
};
|
};
|
||||||
static const vec128_t xmm_consts[] = {
|
static const vec128_t xmm_consts[] = {
|
||||||
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
|
/* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f),
|
||||||
|
@ -76,6 +77,7 @@ static const vec128_t xmm_consts[] = {
|
||||||
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
/* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||||
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
/* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
||||||
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
/* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
||||||
|
/* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15),
|
||||||
};
|
};
|
||||||
// Use consts by first loading the base register then accessing memory:
|
// Use consts by first loading the base register then accessing memory:
|
||||||
// e.mov(e.rax, XMMCONSTBASE)
|
// e.mov(e.rax, XMMCONSTBASE)
|
||||||
|
@ -84,6 +86,45 @@ static const vec128_t xmm_consts[] = {
|
||||||
#define XMMCONSTBASE (uint64_t)&xmm_consts[0]
|
#define XMMCONSTBASE (uint64_t)&xmm_consts[0]
|
||||||
#define XMMCONST(base_reg, name) e.ptr[base_reg + name * 16]
|
#define XMMCONST(base_reg, name) e.ptr[base_reg + name * 16]
|
||||||
|
|
||||||
|
static vec128_t lvsl_table[17] = {
|
||||||
|
vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
|
||||||
|
vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16),
|
||||||
|
vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17),
|
||||||
|
vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18),
|
||||||
|
vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19),
|
||||||
|
vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
|
||||||
|
vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
|
||||||
|
vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22),
|
||||||
|
vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23),
|
||||||
|
vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24),
|
||||||
|
vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
|
||||||
|
vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26),
|
||||||
|
vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27),
|
||||||
|
vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28),
|
||||||
|
vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29),
|
||||||
|
vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30),
|
||||||
|
vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
|
||||||
|
};
|
||||||
|
static vec128_t lvsr_table[17] = {
|
||||||
|
vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
|
||||||
|
vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30),
|
||||||
|
vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29),
|
||||||
|
vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28),
|
||||||
|
vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27),
|
||||||
|
vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26),
|
||||||
|
vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
|
||||||
|
vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24),
|
||||||
|
vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23),
|
||||||
|
vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22),
|
||||||
|
vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21),
|
||||||
|
vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20),
|
||||||
|
vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19),
|
||||||
|
vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18),
|
||||||
|
vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17),
|
||||||
|
vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16),
|
||||||
|
vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
|
||||||
|
};
|
||||||
|
|
||||||
// A note about vectors:
|
// A note about vectors:
|
||||||
// Alloy represents vectors as xyzw pairs, with indices 0123.
|
// Alloy represents vectors as xyzw pairs, with indices 0123.
|
||||||
// XMM registers are xyzw pairs with indices 3210, making them more like wzyx.
|
// XMM registers are xyzw pairs with indices 3210, making them more like wzyx.
|
||||||
|
@ -792,14 +833,56 @@ table->AddSequence(OPCODE_VECTOR_CONVERT_F2I, [](X64Emitter& e, Instr*& i) {
|
||||||
|
|
||||||
table->AddSequence(OPCODE_LOAD_VECTOR_SHL, [](X64Emitter& e, Instr*& i) {
|
table->AddSequence(OPCODE_LOAD_VECTOR_SHL, [](X64Emitter& e, Instr*& i) {
|
||||||
XEASSERT(i->dest->type == VEC128_TYPE);
|
XEASSERT(i->dest->type == VEC128_TYPE);
|
||||||
UNIMPLEMENTED_SEQ();
|
if (i->src1.value->IsConstant()) {
|
||||||
|
Xmm dest;
|
||||||
|
e.BeginOp(i->dest, dest, REG_DEST);
|
||||||
|
auto sh = MIN(16, i->src1.value->AsUint32());
|
||||||
|
e.mov(e.rax, (uintptr_t)&lvsl_table[sh]);
|
||||||
|
e.movaps(dest, e.ptr[e.rax]);
|
||||||
|
e.EndOp(dest);
|
||||||
|
} else {
|
||||||
|
Xmm dest;
|
||||||
|
Reg8 src;
|
||||||
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
|
i->src1.value, src, 0);
|
||||||
|
// TODO(benvanik): probably a way to do this with addressing.
|
||||||
|
e.mov(TEMP_REG, 16);
|
||||||
|
e.movzx(e.rax, src);
|
||||||
|
e.cmp(src, 16);
|
||||||
|
e.cmovb(TEMP_REG, e.rax);
|
||||||
|
e.shl(TEMP_REG, 4);
|
||||||
|
e.mov(e.rax, (uintptr_t)lvsl_table);
|
||||||
|
e.movaps(dest, e.ptr[e.rax + TEMP_REG]);
|
||||||
|
e.EndOp(dest, src);
|
||||||
|
}
|
||||||
i = e.Advance(i);
|
i = e.Advance(i);
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
||||||
table->AddSequence(OPCODE_LOAD_VECTOR_SHR, [](X64Emitter& e, Instr*& i) {
|
table->AddSequence(OPCODE_LOAD_VECTOR_SHR, [](X64Emitter& e, Instr*& i) {
|
||||||
XEASSERT(i->dest->type == VEC128_TYPE);
|
XEASSERT(i->dest->type == VEC128_TYPE);
|
||||||
UNIMPLEMENTED_SEQ();
|
if (i->src1.value->IsConstant()) {
|
||||||
|
Xmm dest;
|
||||||
|
e.BeginOp(i->dest, dest, REG_DEST);
|
||||||
|
auto sh = MIN(16, i->src1.value->AsUint32());
|
||||||
|
e.mov(e.rax, (uintptr_t)&lvsr_table[sh]);
|
||||||
|
e.movaps(dest, e.ptr[e.rax]);
|
||||||
|
e.EndOp(dest);
|
||||||
|
} else {
|
||||||
|
Xmm dest;
|
||||||
|
Reg8 src;
|
||||||
|
e.BeginOp(i->dest, dest, REG_DEST,
|
||||||
|
i->src1.value, src, 0);
|
||||||
|
// TODO(benvanik): probably a way to do this with addressing.
|
||||||
|
e.mov(TEMP_REG, 16);
|
||||||
|
e.movzx(e.rax, src);
|
||||||
|
e.cmp(src, 16);
|
||||||
|
e.cmovb(TEMP_REG, e.rax);
|
||||||
|
e.shl(TEMP_REG, 4);
|
||||||
|
e.mov(e.rax, (uintptr_t)lvsr_table);
|
||||||
|
e.movaps(dest, e.ptr[e.rax + TEMP_REG]);
|
||||||
|
e.EndOp(dest, src);
|
||||||
|
}
|
||||||
i = e.Advance(i);
|
i = e.Advance(i);
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
|
|
@ -51,6 +51,12 @@ Address Stash(X64Emitter& e, const Xmm& r) {
|
||||||
return addr;
|
return addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LoadXmmConstant(X64Emitter& e, Xmm& dest, const vec128_t& v) {
|
||||||
|
e.mov(e.qword[e.rsp + STASH_OFFSET], v.low);
|
||||||
|
e.mov(e.qword[e.rsp + STASH_OFFSET + 8], v.high);
|
||||||
|
e.movaps(dest, e.ptr[e.rsp + STASH_OFFSET]);
|
||||||
|
}
|
||||||
|
|
||||||
// Moves a 64bit immediate into memory.
|
// Moves a 64bit immediate into memory.
|
||||||
void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) {
|
void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) {
|
||||||
if ((v & ~0x7FFFFFFF) == 0) {
|
if ((v & ~0x7FFFFFFF) == 0) {
|
||||||
|
|
|
@ -55,6 +55,18 @@ XEFORCEINLINE vec128_t vec128f(float x, float y, float z, float w) {
|
||||||
v.f4[0] = x; v.f4[1] = y; v.f4[2] = z; v.f4[3] = w;
|
v.f4[0] = x; v.f4[1] = y; v.f4[2] = z; v.f4[3] = w;
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
XEFORCEINLINE vec128_t vec128b(
|
||||||
|
uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3,
|
||||||
|
uint8_t y0, uint8_t y1, uint8_t y2, uint8_t y3,
|
||||||
|
uint8_t z0, uint8_t z1, uint8_t z2, uint8_t z3,
|
||||||
|
uint8_t w0, uint8_t w1, uint8_t w2, uint8_t w3) {
|
||||||
|
vec128_t v;
|
||||||
|
v.b16[0] = x3; v.b16[1] = x2; v.b16[2] = x1; v.b16[3] = x0;
|
||||||
|
v.b16[4] = y3; v.b16[5] = y2; v.b16[6] = y1; v.b16[7] = y0;
|
||||||
|
v.b16[8] = z3; v.b16[9] = z2; v.b16[10] = z1; v.b16[11] = z0;
|
||||||
|
v.b16[12] = w3; v.b16[13] = w2; v.b16[14] = w1; v.b16[15] = w0;
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace alloy
|
} // namespace alloy
|
||||||
|
|
||||||
|
|
|
@ -184,25 +184,26 @@ public:
|
||||||
}
|
}
|
||||||
bool IsConstantTrue() const {
|
bool IsConstantTrue() const {
|
||||||
if (type == VEC128_TYPE) {
|
if (type == VEC128_TYPE) {
|
||||||
return false;
|
XEASSERTALWAYS();
|
||||||
}
|
}
|
||||||
return (flags & VALUE_IS_CONSTANT) && !!constant.i64;
|
return (flags & VALUE_IS_CONSTANT) && !!constant.i64;
|
||||||
}
|
}
|
||||||
bool IsConstantFalse() const {
|
bool IsConstantFalse() const {
|
||||||
if (type == VEC128_TYPE) {
|
if (type == VEC128_TYPE) {
|
||||||
return false;
|
XEASSERTALWAYS();
|
||||||
}
|
}
|
||||||
return (flags & VALUE_IS_CONSTANT) && !constant.i64;
|
return (flags & VALUE_IS_CONSTANT) && !constant.i64;
|
||||||
}
|
}
|
||||||
bool IsConstantZero() const {
|
bool IsConstantZero() const {
|
||||||
if (type == VEC128_TYPE) {
|
if (type == VEC128_TYPE) {
|
||||||
return false;
|
return (flags & VALUE_IS_CONSTANT) &&
|
||||||
|
!constant.v128.low && !constant.v128.high;
|
||||||
}
|
}
|
||||||
return (flags & VALUE_IS_CONSTANT) && !constant.i64;
|
return (flags & VALUE_IS_CONSTANT) && !constant.i64;
|
||||||
}
|
}
|
||||||
bool IsConstantEQ(Value* other) const {
|
bool IsConstantEQ(Value* other) const {
|
||||||
if (type == VEC128_TYPE) {
|
if (type == VEC128_TYPE) {
|
||||||
return false;
|
XEASSERTALWAYS();
|
||||||
}
|
}
|
||||||
return (flags & VALUE_IS_CONSTANT) &&
|
return (flags & VALUE_IS_CONSTANT) &&
|
||||||
(other->flags & VALUE_IS_CONSTANT) &&
|
(other->flags & VALUE_IS_CONSTANT) &&
|
||||||
|
@ -210,7 +211,7 @@ public:
|
||||||
}
|
}
|
||||||
bool IsConstantNE(Value* other) const {
|
bool IsConstantNE(Value* other) const {
|
||||||
if (type == VEC128_TYPE) {
|
if (type == VEC128_TYPE) {
|
||||||
return false;
|
XEASSERTALWAYS();
|
||||||
}
|
}
|
||||||
return (flags & VALUE_IS_CONSTANT) &&
|
return (flags & VALUE_IS_CONSTANT) &&
|
||||||
(other->flags & VALUE_IS_CONSTANT) &&
|
(other->flags & VALUE_IS_CONSTANT) &&
|
||||||
|
|
Loading…
Reference in New Issue