diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index fcc4649d9..a3b207425 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -4192,12 +4192,39 @@ EMITTER(SHR_I64, MATCH(I, I64<>, I8<>>)) { EmitShrXX(e, i); } }; +EMITTER(SHR_V128, MATCH(I, V128<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): native version (with shift magic). + if (i.src2.is_constant) { + e.mov(e.r9, i.src2.constant()); + } else { + e.mov(e.r9, i.src2); + } + e.lea(e.r8, e.StashXmm(0, i.src1)); + e.CallNativeSafe(reinterpret_cast(EmulateShrV128)); + e.vmovaps(i.dest, e.xmm0); + } + static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) { + // Almost all instances are shamt = 1, but non-constant. + // shamt is [0,7] + uint8_t shamt = src2 & 0x7; + alignas(16) vec128_t value; + _mm_store_si128(reinterpret_cast<__m128i*>(&value), src1); + value.u8[0 ^ 0x3] = value.u8[0 ^ 0x3] >> shamt; + for (int i = 15; i > 0; --i) { + value.u8[i ^ 0x3] = (value.u8[i ^ 0x3] >> shamt) | + (value.u8[(i - 1) ^ 0x3] << (8 - shamt)); + } + return _mm_load_si128(reinterpret_cast<__m128i*>(&value)); + } +}; EMITTER_OPCODE_TABLE( OPCODE_SHR, SHR_I8, SHR_I16, SHR_I32, - SHR_I64); + SHR_I64, + SHR_V128); // ============================================================================ diff --git a/src/alloy/test/test_shr.cc b/src/alloy/test/test_shr.cc index ff5b10634..d79237000 100644 --- a/src/alloy/test/test_shr.cc +++ b/src/alloy/test/test_shr.cc @@ -22,46 +22,51 @@ TEST_CASE("SHR_I8", "[instr]") { INT64_TYPE)); b.Return(); }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xF0; - ctx->r[5] = 4; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x0F); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFF; - ctx->r[5] = 0; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0xFF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFF; - ctx->r[5] = 1; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x7F); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x80; - ctx->r[5] = 8; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x7F; - ctx->r[5] = 7; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0); - }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xF0; + ctx->r[5] = 4; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x0F); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFF; + ctx->r[5] = 0; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0xFF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFF; + ctx->r[5] = 1; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x7F); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x80; + ctx->r[5] = 8; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x7F; + ctx->r[5] = 7; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0); + }); } TEST_CASE("SHR_I16", "[instr]") { @@ -71,46 +76,51 @@ TEST_CASE("SHR_I16", "[instr]") { INT64_TYPE)); b.Return(); }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFF00; - ctx->r[5] = 8; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x00FF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFF; - ctx->r[5] = 0; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0xFFFF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFE; - ctx->r[5] = 1; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x7FFF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x8000; - ctx->r[5] = 16; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x7FFF; - ctx->r[5] = 15; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0); - }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFF00; + ctx->r[5] = 8; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x00FF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFF; + ctx->r[5] = 0; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0xFFFF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFE; + ctx->r[5] = 1; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x7FFF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x8000; + ctx->r[5] = 16; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x7FFF; + ctx->r[5] = 15; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0); + }); } TEST_CASE("SHR_I32", "[instr]") { @@ -120,46 +130,51 @@ TEST_CASE("SHR_I32", "[instr]") { INT64_TYPE)); b.Return(); }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFF0000; - ctx->r[5] = 16; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x0000FFFF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFFFFFF; - ctx->r[5] = 0; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0xFFFFFFFF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFFFFFE; - ctx->r[5] = 1; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x7FFFFFFF); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x80000000; - ctx->r[5] = 32; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x80000000); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x7FFFFFFF; - ctx->r[5] = 31; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0); - }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFF0000; + ctx->r[5] = 16; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x0000FFFF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFFFFFF; + ctx->r[5] = 0; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0xFFFFFFFF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFFFFFE; + ctx->r[5] = 1; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x7FFFFFFF); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x80000000; + ctx->r[5] = 32; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x80000000); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x7FFFFFFF; + ctx->r[5] = 31; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0); + }); } TEST_CASE("SHR_I64", "[instr]") { @@ -168,44 +183,96 @@ TEST_CASE("SHR_I64", "[instr]") { b.Truncate(LoadGPR(b, 5), INT8_TYPE))); b.Return(); }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFFFFFF00000000ull; - ctx->r[5] = 32; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x00000000FFFFFFFFull); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFFFFFFFFFFFFFFull; - ctx->r[5] = 0; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0xFFFFFFFFFFFFFFFFull); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0xFFFFFFFFFFFFFFFEull; - ctx->r[5] = 1; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x7FFFFFFFFFFFFFFFull); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x8000000000000000ull; - ctx->r[5] = 64; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0x8000000000000000ull); - }); - test.Run([](PPCContext* ctx) { - ctx->r[4] = 0x7FFFFFFFFFFFFFFFull; - ctx->r[5] = 63; - }, - [](PPCContext* ctx) { - auto result = static_cast(ctx->r[3]); - REQUIRE(result == 0); - }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFFFFFF00000000ull; + ctx->r[5] = 32; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x00000000FFFFFFFFull); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFFFFFFFFFFFFFFull; + ctx->r[5] = 0; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0xFFFFFFFFFFFFFFFFull); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0xFFFFFFFFFFFFFFFEull; + ctx->r[5] = 1; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x7FFFFFFFFFFFFFFFull); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x8000000000000000ull; + ctx->r[5] = 64; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0x8000000000000000ull); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[4] = 0x7FFFFFFFFFFFFFFFull; + ctx->r[5] = 63; + }, + [](PPCContext* ctx) { + auto result = static_cast(ctx->r[3]); + REQUIRE(result == 0); + }); } + +TEST_CASE("SHR_V128", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.Shr(LoadVR(b, 4), b.Truncate(LoadGPR(b, 1), INT8_TYPE))); + b.Return(); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[1] = 0; + ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + }, + [](PPCContext* ctx) { + auto result1 = ctx->v[3]; + REQUIRE(result1 == + vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[1] = 1; + ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + }, + [](PPCContext* ctx) { + auto result1 = ctx->v[3]; + REQUIRE(result1 == + vec128i(0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[1] = 2; + ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + }, + [](PPCContext* ctx) { + auto result1 = ctx->v[3]; + REQUIRE(result1 == + vec128i(0x3FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)); + }); + test.Run( + [](PPCContext* ctx) { + ctx->r[1] = 8; + ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + }, + [](PPCContext* ctx) { + auto result1 = ctx->v[3]; + REQUIRE(result1 == + vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)); + }); +} \ No newline at end of file