diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 2327458f3..09da2add0 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -2630,7 +2630,7 @@ uint32_t Translate_VECTOR_ADD_I32(IntCodeState& ics, const IntCode* i) { if (arithmetic_flags & ARITHMETIC_SATURATE) { if (arithmetic_flags & ARITHMETIC_UNSIGNED) { for (int n = 0; n < 4; n++) { - uint64_t v = VECI4(src1, n) + VECI4(src2, n); + uint64_t v = (uint64_t)VECI4(src1, n) + (uint64_t)VECI4(src2, n); if (v > 0xFFFFFFFF) { VECI4(dest, n) = 0xFFFFFFFF; ics.did_saturate = 1; @@ -2640,7 +2640,7 @@ uint32_t Translate_VECTOR_ADD_I32(IntCodeState& ics, const IntCode* i) { } } else { for (int n = 0; n < 4; n++) { - int64_t v = (int32_t)VECI4(src1, n) + (int32_t)VECI4(src2, n); + int64_t v = (int64_t)(int32_t)VECI4(src1, n) + (int64_t)(int32_t)VECI4(src2, n); if (v > 0x7FFFFFFF) { VECI4(dest, n) = 0x7FFFFFFF; ics.did_saturate = 1; diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index a97c58782..2edda3870 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -2749,7 +2749,7 @@ EMITTER(VECTOR_ADD, MATCH(I, V128<>, V128<>>)) { e.vblendvps(e.xmm2, e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS), e.xmm2); e.vpor(e.xmm1, src1, src2); // sign_or e.vpandn(e.xmm1, e.xmm0); // max_sat_mask - e.vblendvps(e.xmm2, e.GetXmmConstPtr(XMMAbsMaskPS), e.xmm1); + e.vblendvps(dest, e.GetXmmConstPtr(XMMAbsMaskPS), e.xmm1); } } else { e.vpaddd(dest, src1, src2); diff --git a/src/alloy/vec128.h b/src/alloy/vec128.h index a77a5aa79..9a7b8f728 100644 --- a/src/alloy/vec128.h +++ b/src/alloy/vec128.h @@ -42,6 +42,13 @@ typedef struct alignas(16) vec128_s { return low == b.low && high == b.high; } } vec128_t; +static inline vec128_t vec128i(uint32_t src) { + vec128_t v; + for (auto i = 0; i < 4; ++i) { + v.i4[i] = src; + } + return v; +} static inline vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { vec128_t v; v.i4[0] = x; @@ -50,6 +57,13 @@ static inline vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { v.i4[3] = w; return v; } +static inline vec128_t vec128f(float src) { + vec128_t v; + for (auto i = 0; i < 4; ++i) { + v.f4[i] = src; + } + return v; +} static inline vec128_t vec128f(float x, float y, float z, float w) { vec128_t v; v.f4[0] = x; diff --git a/tools/alloy-test/test_vector_add.cc b/tools/alloy-test/test_vector_add.cc new file mode 100644 index 000000000..08a60715d --- /dev/null +++ b/tools/alloy-test/test_vector_add.cc @@ -0,0 +1,280 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +using namespace alloy; +using namespace alloy::hir; +using namespace alloy::runtime; +using namespace alloy::test; +using alloy::frontend::ppc::PPCContext; + +TEST_CASE("VECTOR_ADD_I8", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT8_TYPE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = + vec128b(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + ctx->v[5] = vec128b(100, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128b(100, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128b(UINT8_MAX); + ctx->v[5] = vec128b(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128b(0)); + }); +} + +TEST_CASE("VECTOR_ADD_I8_SAT_SIGNED", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT8_TYPE, + ARITHMETIC_SATURATE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128b(INT8_MAX); + ctx->v[5] = vec128b(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128b(INT8_MAX)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128b(INT8_MIN); + ctx->v[5] = vec128b(-1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128b(INT8_MIN)); + }); +} + +TEST_CASE("VECTOR_ADD_I8_SAT_UNSIGNED", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT8_TYPE, + ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128b(UINT8_MAX); + ctx->v[5] = vec128b(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128b(UINT8_MAX)); + }); +} + +TEST_CASE("VECTOR_ADD_I16", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT16_TYPE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128s(0, 1, 2, 3, 4, 5, 6, 7); + ctx->v[5] = vec128s(100, 1, 2, 3, 4, 5, 6, 7); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128s(100, 2, 4, 6, 8, 10, 12, 14)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128s(UINT16_MAX); + ctx->v[5] = vec128s(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128s(0)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128s(0); + ctx->v[5] = vec128s(-1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128s(UINT16_MAX)); + }); +} + +TEST_CASE("VECTOR_ADD_I16_SAT_SIGNED", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT16_TYPE, + ARITHMETIC_SATURATE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128s(INT16_MAX); + ctx->v[5] = vec128s(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128s(INT16_MAX)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128s(INT16_MIN); + ctx->v[5] = vec128s(-1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128s(INT16_MIN)); + }); +} + +TEST_CASE("VECTOR_ADD_I16_SAT_UNSIGNED", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT16_TYPE, + ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128s(UINT16_MAX); + ctx->v[5] = vec128s(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128s(UINT16_MAX)); + }); +} + +TEST_CASE("VECTOR_ADD_I32", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT32_TYPE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(0, 1, 2, 3); + ctx->v[5] = vec128i(100, 1, 2, 3); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(100, 2, 4, 6)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(UINT32_MAX); + ctx->v[5] = vec128i(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(0)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(0); + ctx->v[5] = vec128i(-1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(UINT32_MAX)); + }); +} + +TEST_CASE("VECTOR_ADD_I32_SAT_SIGNED", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT32_TYPE, + ARITHMETIC_SATURATE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(5); + ctx->v[5] = vec128i(5); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(10)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(INT32_MAX); + ctx->v[5] = vec128i(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(INT32_MAX)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(INT32_MIN); + ctx->v[5] = vec128i(-1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(INT32_MIN)); + }); +} + +TEST_CASE("VECTOR_ADD_I32_SAT_UNSIGNED", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), INT32_TYPE, + ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(5); + ctx->v[5] = vec128i(5); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(10)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128i(UINT32_MAX); + ctx->v[5] = vec128i(1); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(UINT32_MAX)); + }); +} + +TEST_CASE("VECTOR_ADD_F32", "[instr]") { + TestFunction test([](hir::HIRBuilder& b) { + StoreVR(b, 3, b.VectorAdd(LoadVR(b, 4), LoadVR(b, 5), FLOAT32_TYPE)); + b.Return(); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128f(0.12f, 0.34f, 0.56f, 0.78f); + ctx->v[5] = vec128f(0.12f, 0.34f, 0.56f, 0.78f); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == + vec128i(0x3E75C28F, 0x3F2E147B, 0x3F8F5C29, 0x3FC7AE14)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128f(FLT_MAX); + ctx->v[5] = vec128f(FLT_MAX); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(0x7F800000)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128f(FLT_MIN); + ctx->v[5] = vec128f(-1.0f); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(0xBF800000)); + }); + test.Run([](PPCContext* ctx) { + ctx->v[4] = vec128f(FLT_MAX); + ctx->v[5] = vec128f(1.0f); + }, + [](PPCContext* ctx) { + auto result = ctx->v[3]; + REQUIRE(result == vec128i(0x7F7FFFFF)); + }); +}