Emulated SHR_V128 implementation.

This commit is contained in:
Ben Vanik 2015-01-08 23:35:25 -08:00
parent dfba5943da
commit fde3904130
2 changed files with 255 additions and 161 deletions

View File

@ -4192,12 +4192,39 @@ EMITTER(SHR_I64, MATCH(I<OPCODE_SHR, I64<>, I64<>, I8<>>)) {
EmitShrXX<SHR_I64, Reg64>(e, i); EmitShrXX<SHR_I64, Reg64>(e, i);
} }
}; };
EMITTER(SHR_V128, MATCH(I<OPCODE_SHR, V128<>, V128<>, I8<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): native version (with shift magic).
if (i.src2.is_constant) {
e.mov(e.r9, i.src2.constant());
} else {
e.mov(e.r9, i.src2);
}
e.lea(e.r8, e.StashXmm(0, i.src1));
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShrV128));
e.vmovaps(i.dest, e.xmm0);
}
static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) {
// Almost all instances are shamt = 1, but non-constant.
// shamt is [0,7]
uint8_t shamt = src2 & 0x7;
alignas(16) vec128_t value;
_mm_store_si128(reinterpret_cast<__m128i*>(&value), src1);
value.u8[0 ^ 0x3] = value.u8[0 ^ 0x3] >> shamt;
for (int i = 15; i > 0; --i) {
value.u8[i ^ 0x3] = (value.u8[i ^ 0x3] >> shamt) |
(value.u8[(i - 1) ^ 0x3] << (8 - shamt));
}
return _mm_load_si128(reinterpret_cast<__m128i*>(&value));
}
};
EMITTER_OPCODE_TABLE( EMITTER_OPCODE_TABLE(
OPCODE_SHR, OPCODE_SHR,
SHR_I8, SHR_I8,
SHR_I16, SHR_I16,
SHR_I32, SHR_I32,
SHR_I64); SHR_I64,
SHR_V128);
// ============================================================================ // ============================================================================

View File

@ -22,7 +22,8 @@ TEST_CASE("SHR_I8", "[instr]") {
INT64_TYPE)); INT64_TYPE));
b.Return(); b.Return();
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xF0; ctx->r[4] = 0xF0;
ctx->r[5] = 4; ctx->r[5] = 4;
}, },
@ -30,7 +31,8 @@ TEST_CASE("SHR_I8", "[instr]") {
auto result = static_cast<uint8_t>(ctx->r[3]); auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0x0F); REQUIRE(result == 0x0F);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFF; ctx->r[4] = 0xFF;
ctx->r[5] = 0; ctx->r[5] = 0;
}, },
@ -38,7 +40,8 @@ TEST_CASE("SHR_I8", "[instr]") {
auto result = static_cast<uint8_t>(ctx->r[3]); auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0xFF); REQUIRE(result == 0xFF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFF; ctx->r[4] = 0xFF;
ctx->r[5] = 1; ctx->r[5] = 1;
}, },
@ -46,7 +49,8 @@ TEST_CASE("SHR_I8", "[instr]") {
auto result = static_cast<uint8_t>(ctx->r[3]); auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0x7F); REQUIRE(result == 0x7F);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x80; ctx->r[4] = 0x80;
ctx->r[5] = 8; ctx->r[5] = 8;
}, },
@ -54,7 +58,8 @@ TEST_CASE("SHR_I8", "[instr]") {
auto result = static_cast<uint8_t>(ctx->r[3]); auto result = static_cast<uint8_t>(ctx->r[3]);
REQUIRE(result == 0); REQUIRE(result == 0);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7F; ctx->r[4] = 0x7F;
ctx->r[5] = 7; ctx->r[5] = 7;
}, },
@ -71,7 +76,8 @@ TEST_CASE("SHR_I16", "[instr]") {
INT64_TYPE)); INT64_TYPE));
b.Return(); b.Return();
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFF00; ctx->r[4] = 0xFF00;
ctx->r[5] = 8; ctx->r[5] = 8;
}, },
@ -79,7 +85,8 @@ TEST_CASE("SHR_I16", "[instr]") {
auto result = static_cast<uint16_t>(ctx->r[3]); auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0x00FF); REQUIRE(result == 0x00FF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFF; ctx->r[4] = 0xFFFF;
ctx->r[5] = 0; ctx->r[5] = 0;
}, },
@ -87,7 +94,8 @@ TEST_CASE("SHR_I16", "[instr]") {
auto result = static_cast<uint16_t>(ctx->r[3]); auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0xFFFF); REQUIRE(result == 0xFFFF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFE; ctx->r[4] = 0xFFFE;
ctx->r[5] = 1; ctx->r[5] = 1;
}, },
@ -95,7 +103,8 @@ TEST_CASE("SHR_I16", "[instr]") {
auto result = static_cast<uint16_t>(ctx->r[3]); auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0x7FFF); REQUIRE(result == 0x7FFF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x8000; ctx->r[4] = 0x8000;
ctx->r[5] = 16; ctx->r[5] = 16;
}, },
@ -103,7 +112,8 @@ TEST_CASE("SHR_I16", "[instr]") {
auto result = static_cast<uint16_t>(ctx->r[3]); auto result = static_cast<uint16_t>(ctx->r[3]);
REQUIRE(result == 0); REQUIRE(result == 0);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7FFF; ctx->r[4] = 0x7FFF;
ctx->r[5] = 15; ctx->r[5] = 15;
}, },
@ -120,7 +130,8 @@ TEST_CASE("SHR_I32", "[instr]") {
INT64_TYPE)); INT64_TYPE));
b.Return(); b.Return();
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFF0000; ctx->r[4] = 0xFFFF0000;
ctx->r[5] = 16; ctx->r[5] = 16;
}, },
@ -128,7 +139,8 @@ TEST_CASE("SHR_I32", "[instr]") {
auto result = static_cast<uint32_t>(ctx->r[3]); auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x0000FFFF); REQUIRE(result == 0x0000FFFF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFF; ctx->r[4] = 0xFFFFFFFF;
ctx->r[5] = 0; ctx->r[5] = 0;
}, },
@ -136,7 +148,8 @@ TEST_CASE("SHR_I32", "[instr]") {
auto result = static_cast<uint32_t>(ctx->r[3]); auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0xFFFFFFFF); REQUIRE(result == 0xFFFFFFFF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFE; ctx->r[4] = 0xFFFFFFFE;
ctx->r[5] = 1; ctx->r[5] = 1;
}, },
@ -144,7 +157,8 @@ TEST_CASE("SHR_I32", "[instr]") {
auto result = static_cast<uint32_t>(ctx->r[3]); auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x7FFFFFFF); REQUIRE(result == 0x7FFFFFFF);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x80000000; ctx->r[4] = 0x80000000;
ctx->r[5] = 32; ctx->r[5] = 32;
}, },
@ -152,7 +166,8 @@ TEST_CASE("SHR_I32", "[instr]") {
auto result = static_cast<uint32_t>(ctx->r[3]); auto result = static_cast<uint32_t>(ctx->r[3]);
REQUIRE(result == 0x80000000); REQUIRE(result == 0x80000000);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7FFFFFFF; ctx->r[4] = 0x7FFFFFFF;
ctx->r[5] = 31; ctx->r[5] = 31;
}, },
@ -168,7 +183,8 @@ TEST_CASE("SHR_I64", "[instr]") {
b.Truncate(LoadGPR(b, 5), INT8_TYPE))); b.Truncate(LoadGPR(b, 5), INT8_TYPE)));
b.Return(); b.Return();
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFF00000000ull; ctx->r[4] = 0xFFFFFFFF00000000ull;
ctx->r[5] = 32; ctx->r[5] = 32;
}, },
@ -176,7 +192,8 @@ TEST_CASE("SHR_I64", "[instr]") {
auto result = static_cast<uint64_t>(ctx->r[3]); auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x00000000FFFFFFFFull); REQUIRE(result == 0x00000000FFFFFFFFull);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFFFFFFFFFFull; ctx->r[4] = 0xFFFFFFFFFFFFFFFFull;
ctx->r[5] = 0; ctx->r[5] = 0;
}, },
@ -184,7 +201,8 @@ TEST_CASE("SHR_I64", "[instr]") {
auto result = static_cast<uint64_t>(ctx->r[3]); auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0xFFFFFFFFFFFFFFFFull); REQUIRE(result == 0xFFFFFFFFFFFFFFFFull);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0xFFFFFFFFFFFFFFFEull; ctx->r[4] = 0xFFFFFFFFFFFFFFFEull;
ctx->r[5] = 1; ctx->r[5] = 1;
}, },
@ -192,7 +210,8 @@ TEST_CASE("SHR_I64", "[instr]") {
auto result = static_cast<uint64_t>(ctx->r[3]); auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x7FFFFFFFFFFFFFFFull); REQUIRE(result == 0x7FFFFFFFFFFFFFFFull);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x8000000000000000ull; ctx->r[4] = 0x8000000000000000ull;
ctx->r[5] = 64; ctx->r[5] = 64;
}, },
@ -200,7 +219,8 @@ TEST_CASE("SHR_I64", "[instr]") {
auto result = static_cast<uint64_t>(ctx->r[3]); auto result = static_cast<uint64_t>(ctx->r[3]);
REQUIRE(result == 0x8000000000000000ull); REQUIRE(result == 0x8000000000000000ull);
}); });
test.Run([](PPCContext* ctx) { test.Run(
[](PPCContext* ctx) {
ctx->r[4] = 0x7FFFFFFFFFFFFFFFull; ctx->r[4] = 0x7FFFFFFFFFFFFFFFull;
ctx->r[5] = 63; ctx->r[5] = 63;
}, },
@ -209,3 +229,50 @@ TEST_CASE("SHR_I64", "[instr]") {
REQUIRE(result == 0); REQUIRE(result == 0);
}); });
} }
TEST_CASE("SHR_V128", "[instr]") {
TestFunction test([](hir::HIRBuilder& b) {
StoreVR(b, 3, b.Shr(LoadVR(b, 4), b.Truncate(LoadGPR(b, 1), INT8_TYPE)));
b.Return();
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 0;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 1;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 2;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0x3FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
test.Run(
[](PPCContext* ctx) {
ctx->r[1] = 8;
ctx->v[4] = vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
},
[](PPCContext* ctx) {
auto result1 = ctx->v[3];
REQUIRE(result1 ==
vec128i(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
});
}