Emulated SHR_V128 implementation.

This commit is contained in:
Ben Vanik 2015-01-08 23:35:25 -08:00
parent dfba5943da
commit fde3904130
2 changed files with 255 additions and 161 deletions

View File

@ -4192,12 +4192,39 @@ EMITTER(SHR_I64, MATCH(I<OPCODE_SHR, I64<>, I64<>, I8<>>)) {
EmitShrXX<SHR_I64, Reg64>(e, i);
}
};
EMITTER(SHR_V128, MATCH(I<OPCODE_SHR, V128<>, V128<>, I8<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.mov(e.r9, i.src2.constant());
} else {
e.mov(e.r9, i.src2);
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShrV128));
e.vmovaps(i.dest, e.xmm0);
}
static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) {
// Almost all instances are shamt = 1, but non-constant.
// shamt is [0,7]
uint8_t shamt = src2 & 0x7;
alignas(16) vec128_t value;
_mm_store_si128(reinterpret_cast<__m128i*>(&value), src1);
value.u8[0 ^ 0x3] = value.u8[0 ^ 0x3] >> shamt;
for (int i = 15; i > 0; --i) {
value.u8[i ^ 0x3] = (value.u8[i ^ 0x3] >> shamt) |
(value.u8[(i - 1) ^ 0x3] << (8 - shamt));
}
return _mm_load_si128(reinterpret_cast<__m128i*>(&value));
}
};
EMITTER_OPCODE_TABLE(
OPCODE_SHR,
SHR_I8,
SHR_I16,
SHR_I32,
SHR_I64);
SHR_I64,
SHR_V128);
// ============================================================================

View File

@ -22,46 +22,51 @@ TEST_CASE("SHR_I8", "[instr]") {
INT64_TYPE));
b.Return();
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xF0;
ctx->r[5] = 4;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0x0F);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFF;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0xFF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFF;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0x7F);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x80;
ctx->r[5] = 8;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x7F;
ctx->r[5] = 7;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xF0;
ctx->r[5] = 4;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0x0F);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFF;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0xFF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFF;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0x7F);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x80;
ctx->r[5] = 8;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7F;
ctx->r[5] = 7;
},
[](PPCContext* ctx) {
auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0);
});
}
TEST_CASE("SHR_I16", "[instr]") {
@ -71,46 +76,51 @@ TEST_CASE("SHR_I16", "[instr]") {
INT64_TYPE));
b.Return();
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFF00;
ctx->r[5] = 8;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0x00FF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFF;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0xFFFF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFE;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0x7FFF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x8000;
ctx->r[5] = 16;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x7FFF;
ctx->r[5] = 15;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFF00;
ctx->r[5] = 8;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0x00FF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFF;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0xFFFF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFE;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0x7FFF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x8000;
ctx->r[5] = 16;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7FFF;
ctx->r[5] = 15;
},
[](PPCContext* ctx) {
auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0);
});
}
TEST_CASE("SHR_I32", "[instr]") {
@ -120,46 +130,51 @@ TEST_CASE("SHR_I32", "[instr]") {
INT64_TYPE));
b.Return();
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFF0000;
ctx->r[5] = 16;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x0000FFFF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFF;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0xFFFFFFFF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFE;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x7FFFFFFF);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x80000000;
ctx->r[5] = 32;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x80000000);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x7FFFFFFF;
ctx->r[5] = 31;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFF0000;
ctx->r[5] = 16;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x0000FFFF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFF;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0xFFFFFFFF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFE;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x7FFFFFFF);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x80000000;
ctx->r[5] = 32;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x80000000);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7FFFFFFF;
ctx->r[5] = 31;
},
[](PPCContext* ctx) {
auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0);
});
}
TEST_CASE("SHR_I64", "[instr]") {
@ -168,44 +183,96 @@ TEST_CASE("SHR_I64", "[instr]") {
b.Truncate(LoadGPR(b, 5), INT8_TYPE)));
b.Return();
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFF00000000ull;
ctx->r[5] = 32;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x00000000FFFFFFFFull);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFFFFFFFFFFull;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0xFFFFFFFFFFFFFFFFull);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFFFFFFFFFEull;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x7FFFFFFFFFFFFFFFull);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x8000000000000000ull;
ctx->r[5] = 64;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x8000000000000000ull);
});
test.Run([](PPCContext* ctx) {
ctx->r[4] = 0x7FFFFFFFFFFFFFFFull;
ctx->r[5] = 63;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFF00000000ull;
ctx->r[5] = 32;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x00000000FFFFFFFFull);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFFFFFFFFFFull;
ctx->r[5] = 0;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0xFFFFFFFFFFFFFFFFull);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFFFFFFFFFEull;
ctx->r[5] = 1;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x7FFFFFFFFFFFFFFFull);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x8000000000000000ull;
ctx->r[5] = 64;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x8000000000000000ull);
});
test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7FFFFFFFFFFFFFFFull;
ctx->r[5] = 63;
},
[](PPCContext* ctx) {
auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0);
});
}
TEST_CASE("SHR_V128", "[instr]") {
TestFunction test([](hir::HIRBuilder& b) {
StoreVR(b, 3, b.Shr(LoadVR(b, 4), b.Truncate(LoadGPR(b, 1), INT8_TYPE)));
b.Return();
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 0;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 1;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 2;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0x3FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 8;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
}