I don't like it, but fixing permute by int16.

This commit is contained in:
Ben Vanik 2015-01-19 12:15:10 -08:00
parent acc1286b72
commit b2e03fa628
6 changed files with 73 additions and 12 deletions

View File

@ -41,7 +41,7 @@ using alloy::runtime::ThreadState;
static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024;
static const size_t STASH_OFFSET = 32;
static const size_t STASH_OFFSET_HIGH = 32 + 16;
static const size_t STASH_OFFSET_HIGH = 32 + 32;
// If we are running with tracing on we have to store the EFLAGS in the stack,
// otherwise our calls out to C to print will clear it before DID_CARRY/etc

View File

@ -4958,6 +4958,7 @@ EMITTER_OPCODE_TABLE(
// ============================================================================
EMITTER(PERMUTE_I32, MATCH(I<OPCODE_PERMUTE, V128<>, I32<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_true(i.instr->flags == INT32_TYPE);
// Permute words between src2 and src3.
// TODO(benvanik): check src3 for zero. if 0, we can use pshufb.
if (i.src1.is_constant) {
@ -5006,8 +5007,7 @@ EMITTER(PERMUTE_I32, MATCH(I<OPCODE_PERMUTE, V128<>, I32<>, V128<>, V128<>>)) {
}
};
EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
assert_true(i.instr->flags == INT8_TYPE);
static void EmitByInt8(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): find out how to do this with only one temp register!
// Permute bytes between src2 and src3.
if (i.src3.value->IsConstantZero()) {
@ -5068,6 +5068,65 @@ EMITTER(PERMUTE_V128, MATCH(I<OPCODE_PERMUTE, V128<>, V128<>, V128<>, V128<>>))
e.vpblendvb(i.dest, src2_shuf, src3_shuf, i.dest);
}
}
static __m128i EmulateByInt16(void*, __m128i control, __m128i src1, __m128i src2) {
alignas(16) uint16_t c[8];
alignas(16) uint16_t a[8];
alignas(16) uint16_t b[8];
_mm_store_si128(reinterpret_cast<__m128i*>(c), control);
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
_mm_store_si128(reinterpret_cast<__m128i*>(b), src2);
for (size_t i = 0; i < 8; ++i) {
uint16_t si = (c[i] & 0xF) ^ 0x1;
c[i] = si >= 8 ? b[si - 8] : a[si];
}
return _mm_load_si128(reinterpret_cast<__m128i*>(c));
}
static void EmitByInt16(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): replace with proper version.
assert_true(i.src1.is_constant);
if (i.src1.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src1.constant());
e.lea(e.r8, e.StashXmm(0, e.xmm0));
} else {
e.lea(e.r8, e.StashXmm(0, i.src1));
}
if (i.src2.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src2.constant());
e.lea(e.r9, e.StashXmm(1, e.xmm0));
} else {
e.lea(e.r9, e.StashXmm(1, i.src2));
}
if (i.src3.is_constant) {
e.LoadConstantXmm(e.xmm0, i.src3.constant());
e.lea(e.r10, e.StashXmm(2, e.xmm0));
} else {
e.lea(e.r10, e.StashXmm(2, i.src3));
}
e.CallNativeSafe(reinterpret_cast<void*>(EmulateByInt16));
e.vmovaps(i.dest, e.xmm0);
}
static void EmitByInt32(X64Emitter& e, const EmitArgType& i) {
assert_always();
}
static void Emit(X64Emitter& e, const EmitArgType& i) {
switch (i.instr->flags) {
case INT8_TYPE:
EmitByInt8(e, i);
break;
case INT16_TYPE:
EmitByInt16(e, i);
break;
case INT32_TYPE:
EmitByInt32(e, i);
break;
default:
assert_unhandled_case(i.instr->flags);
return;
}
}
};
EMITTER_OPCODE_TABLE(
OPCODE_PERMUTE,

View File

@ -118,6 +118,7 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
mov(rax, rdx);
mov(rdx, r8);
mov(r8, r9);
mov(r9, r10);
call(rax);
mov(rbx, qword[rsp + 48]);

View File

@ -96,14 +96,14 @@ namespace x64 {
* | |
* | |
* +------------------+
* | scratch, 32b | rsp + 32
* | scratch, 48b | rsp + 32
* | |
* +------------------+
* | rcx / context | rsp + 64
* | rcx / context | rsp + 80
* +------------------+
* | guest ret addr | rsp + 72
* | guest ret addr | rsp + 88
* +------------------+
* | call ret addr | rsp + 80
* | call ret addr | rsp + 96
* +------------------+
* ... locals ...
* +------------------+
@ -116,10 +116,10 @@ class StackLayout {
public:
const static size_t THUNK_STACK_SIZE = 120;
const static size_t GUEST_STACK_SIZE = 88;
const static size_t GUEST_RCX_HOME = 64;
const static size_t GUEST_RET_ADDR = 72;
const static size_t GUEST_CALL_RET_ADDR = 80;
const static size_t GUEST_STACK_SIZE = 104;
const static size_t GUEST_RCX_HOME = 80;
const static size_t GUEST_RET_ADDR = 88;
const static size_t GUEST_CALL_RET_ADDR = 96;
};
class X64ThunkEmitter : public X64Emitter {

View File

@ -1852,6 +1852,7 @@ Value* HIRBuilder::Splat(Value* value, TypeName target_type) {
Value* HIRBuilder::Permute(Value* control, Value* value1, Value* value2,
TypeName part_type) {
ASSERT_TYPES_EQUAL(value1, value2);
assert_true(part_type >= INT8_TYPE && part_type <= INT32_TYPE);
// TODO(benvanik): could do some of this as constants.

View File

@ -81,7 +81,7 @@ TEST_CASE("PERMUTE_V128_BY_INT32_CONSTANT", "[instr]") {
TEST_CASE("PERMUTE_V128_BY_V128", "[instr]") {
TestFunction test([](hir::HIRBuilder& b) {
StoreVR(b, 3,
b.Permute(LoadVR(b, 3), LoadVR(b, 4), LoadVR(b, 5), VEC128_TYPE));
b.Permute(LoadVR(b, 3), LoadVR(b, 4), LoadVR(b, 5), INT8_TYPE));
b.Return();
});
test.Run([](PPCContext* ctx) {