LLVM DSL: rewrite extract and insert

This commit is contained in:
Nekotekina 2019-04-23 15:07:04 +03:00
parent b7b93eae13
commit c83e65f29e
2 changed files with 86 additions and 37 deletions

View File

@ -1405,6 +1405,55 @@ struct llvm_sub_sat
} }
}; };
template <typename A1, typename I2, typename T = llvm_common_t<A1>, typename U = llvm_common_t<I2>>
struct llvm_extract
{
using type = std::remove_extent_t<T>;
llvm_expr_t<A1> a1;
llvm_expr_t<I2> i2;
static_assert(llvm_value_t<T>::is_vector, "llvm_extract<>: invalid type");
static_assert(llvm_value_t<U>::is_int && !llvm_value_t<U>::is_vector, "llvm_extract<>: invalid index type");
static constexpr bool is_ok = llvm_value_t<T>::is_vector &&
llvm_value_t<U>::is_int && !llvm_value_t<U>::is_vector;
llvm::Value* eval(llvm::IRBuilder<>* ir) const
{
const auto v1 = a1.eval(ir);
const auto v2 = i2.eval(ir);
return ir->CreateExtractElement(v1, v2);
}
};
template <typename A1, typename I2, typename A3, typename T = llvm_common_t<A1>, typename U = llvm_common_t<I2>, typename V = llvm_common_t<A3>>
struct llvm_insert
{
using type = T;
llvm_expr_t<A1> a1;
llvm_expr_t<I2> i2;
llvm_expr_t<A3> a3;
static_assert(llvm_value_t<T>::is_vector, "llvm_insert<>: invalid type");
static_assert(llvm_value_t<U>::is_int && !llvm_value_t<U>::is_vector, "llvm_insert<>: invalid index type");
static_assert(std::is_same_v<V, std::remove_extent_t<T>>, "llvm_insert<>: invalid element type");
static constexpr bool is_ok = llvm_extract<A1, I2>::is_ok &&
std::is_same_v<V, std::remove_extent_t<T>>;
llvm::Value* eval(llvm::IRBuilder<>* ir) const
{
const auto v1 = a1.eval(ir);
const auto v2 = i2.eval(ir);
const auto v3 = a3.eval(ir);
return ir->CreateInsertElement(v1, v3, v2);
}
};
class cpu_translator class cpu_translator
{ {
protected: protected:
@ -1559,6 +1608,30 @@ public:
return llvm_sub_sat<T, U>{std::forward<T>(a), std::forward<U>(b)}; return llvm_sub_sat<T, U>{std::forward<T>(a), std::forward<U>(b)};
} }
template <typename T, typename U, typename = std::enable_if_t<llvm_extract<T, U>::is_ok>>
static auto extract(T&& v, U&& i)
{
return llvm_extract<T, U>{std::forward<T>(v), std::forward<U>(i)};
}
template <typename T, typename = std::enable_if_t<llvm_extract<T, llvm_const_int<u32>>::is_ok>>
static auto extract(T&& v, u32 i)
{
return llvm_extract<T, llvm_const_int<u32>>{std::forward<T>(v), llvm_const_int<u32>{i}};
}
template <typename T, typename U, typename V, typename = std::enable_if_t<llvm_insert<T, U, V>::is_ok>>
static auto insert(T&& v, U&& i, V&& e)
{
return llvm_insert<T, U, V>{std::forward<T>(v), std::forward<U>(i), std::forward<V>(e)};
}
template <typename T, typename V, typename = std::enable_if_t<llvm_insert<T, llvm_const_int<u32>, V>::is_ok>>
static auto insert(T&& v, u32 i, V&& e)
{
return llvm_insert<T, llvm_const_int<u32>, V>{std::forward<T>(v), llvm_const_int<u32>{i}, std::forward<V>(e)};
}
// Average: (a + b + 1) >> 1 // Average: (a + b + 1) >> 1
template <typename T> template <typename T>
inline auto avg(T a, T b) inline auto avg(T a, T b)
@ -1580,22 +1653,6 @@ public:
return result; return result;
} }
template <typename T, typename E>
auto insert(T v, u64 i, E e)
{
value_t<typename T::type> result;
result.value = m_ir->CreateInsertElement(v.eval(m_ir), e.eval(m_ir), i);
return result;
}
template <typename T>
auto extract(T v, u64 i)
{
typename value_t<typename T::type>::base result;
result.value = m_ir->CreateExtractElement(v.eval(m_ir), i);
return result;
}
template <typename T> template <typename T>
auto splat(u64 c) auto splat(u64 c)
{ {

View File

@ -4326,7 +4326,7 @@ public:
void WRCH(spu_opcode_t op) // void WRCH(spu_opcode_t op) //
{ {
const auto val = extract(get_vr(op.rt), 3); const auto val = eval(extract(get_vr(op.rt), 3));
if (m_interp_magn) if (m_interp_magn)
{ {
@ -5051,8 +5051,7 @@ public:
const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3));
const auto i = eval(~s & 0xf); const auto i = eval(~s & 0xf);
auto r = build<u8[16]>(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10); auto r = build<u8[16]>(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt8(0x3), i.value); set_vr(op.rt, insert(r, i, splat<u8>(0x03)));
set_vr(op.rt, r);
} }
void CHX(spu_opcode_t op) void CHX(spu_opcode_t op)
@ -5060,8 +5059,7 @@ public:
const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3));
const auto i = eval(~s >> 1 & 0x7); const auto i = eval(~s >> 1 & 0x7);
auto r = build<u16[8]>(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011); auto r = build<u16[8]>(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt16(0x0203), i.value); set_vr(op.rt, insert(r, i, splat<u16>(0x0203)));
set_vr(op.rt, r);
} }
void CWX(spu_opcode_t op) void CWX(spu_opcode_t op)
@ -5069,8 +5067,7 @@ public:
const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3));
const auto i = eval(~s >> 2 & 0x3); const auto i = eval(~s >> 2 & 0x3);
auto r = build<u32[4]>(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213); auto r = build<u32[4]>(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt32(0x010203), i.value); set_vr(op.rt, insert(r, i, splat<u32>(0x010203)));
set_vr(op.rt, r);
} }
void CDX(spu_opcode_t op) void CDX(spu_opcode_t op)
@ -5078,8 +5075,7 @@ public:
const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)); const auto s = eval(extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3));
const auto i = eval(~s >> 3 & 0x1); const auto i = eval(~s >> 3 & 0x1);
auto r = build<u64[2]>(0x18191a1b1c1d1e1f, 0x1011121314151617); auto r = build<u64[2]>(0x18191a1b1c1d1e1f, 0x1011121314151617);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt64(0x01020304050607), i.value); set_vr(op.rt, insert(r, i, splat<u64>(0x01020304050607)));
set_vr(op.rt, r);
} }
void ROTQBI(spu_opcode_t op) void ROTQBI(spu_opcode_t op)
@ -5143,8 +5139,7 @@ public:
const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7)); const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7));
const auto i = eval(~a & 0xf); const auto i = eval(~a & 0xf);
auto r = build<u8[16]>(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10); auto r = build<u8[16]>(0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt8(0x3), i.value); set_vr(op.rt, insert(r, i, splat<u8>(0x03)));
set_vr(op.rt, r);
} }
void CHD(spu_opcode_t op) void CHD(spu_opcode_t op)
@ -5152,8 +5147,7 @@ public:
const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7)); const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7));
const auto i = eval(~a >> 1 & 0x7); const auto i = eval(~a >> 1 & 0x7);
auto r = build<u16[8]>(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011); auto r = build<u16[8]>(0x1e1f, 0x1c1d, 0x1a1b, 0x1819, 0x1617, 0x1415, 0x1213, 0x1011);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt16(0x0203), i.value); set_vr(op.rt, insert(r, i, splat<u16>(0x0203)));
set_vr(op.rt, r);
} }
void CWD(spu_opcode_t op) void CWD(spu_opcode_t op)
@ -5161,8 +5155,7 @@ public:
const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7)); const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7));
const auto i = eval(~a >> 2 & 0x3); const auto i = eval(~a >> 2 & 0x3);
auto r = build<u32[4]>(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213); auto r = build<u32[4]>(0x1c1d1e1f, 0x18191a1b, 0x14151617, 0x10111213);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt32(0x010203), i.value); set_vr(op.rt, insert(r, i, splat<u32>(0x010203)));
set_vr(op.rt, r);
} }
void CDD(spu_opcode_t op) void CDD(spu_opcode_t op)
@ -5170,8 +5163,7 @@ public:
const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7)); const auto a = eval(extract(get_vr(op.ra), 3) + get_imm<u32>(op.i7));
const auto i = eval(~a >> 3 & 0x1); const auto i = eval(~a >> 3 & 0x1);
auto r = build<u64[2]>(0x18191a1b1c1d1e1f, 0x1011121314151617); auto r = build<u64[2]>(0x18191a1b1c1d1e1f, 0x1011121314151617);
r.value = m_ir->CreateInsertElement(r.value, m_ir->getInt64(0x01020304050607), i.value); set_vr(op.rt, insert(r, i, splat<u64>(0x01020304050607)));
set_vr(op.rt, r);
} }
void ROTQBII(spu_opcode_t op) void ROTQBII(spu_opcode_t op)
@ -5646,22 +5638,22 @@ public:
if (c0 && c1 && c1->getType() == get_type<u64>() && c1->getZExtValue() == 0x01020304050607) if (c0 && c1 && c1->getType() == get_type<u64>() && c1->getZExtValue() == 0x01020304050607)
{ {
vtype = get_type<u64[2]>(); vtype = get_type<u64[2]>();
_new = extract(get_vr<u64[2]>(op.ra), 1).value; _new = extract(get_vr<u64[2]>(op.ra), 1).eval(m_ir);
} }
else if (c0 && c1 && c1->getType() == get_type<u32>() && c1->getZExtValue() == 0x010203) else if (c0 && c1 && c1->getType() == get_type<u32>() && c1->getZExtValue() == 0x010203)
{ {
vtype = get_type<u32[4]>(); vtype = get_type<u32[4]>();
_new = extract(get_vr<u32[4]>(op.ra), 3).value; _new = extract(get_vr<u32[4]>(op.ra), 3).eval(m_ir);
} }
else if (c0 && c1 && c1->getType() == get_type<u16>() && c1->getZExtValue() == 0x0203) else if (c0 && c1 && c1->getType() == get_type<u16>() && c1->getZExtValue() == 0x0203)
{ {
vtype = get_type<u16[8]>(); vtype = get_type<u16[8]>();
_new = extract(get_vr<u16[8]>(op.ra), 6).value; _new = extract(get_vr<u16[8]>(op.ra), 6).eval(m_ir);
} }
else if (c0 && c1 && c1->getType() == get_type<u8>() && c1->getZExtValue() == 0x03) else if (c0 && c1 && c1->getType() == get_type<u8>() && c1->getZExtValue() == 0x03)
{ {
vtype = get_type<u8[16]>(); vtype = get_type<u8[16]>();
_new = extract(get_vr<u8[16]>(op.ra), 12).value; _new = extract(get_vr<u8[16]>(op.ra), 12).eval(m_ir);
} }
if (vtype && _new) if (vtype && _new)