mirror of https://git.suyu.dev/suyu/suyu
shader_decode: Implement HFMA2
This commit is contained in:
parent
d6f76307fe
commit
dd91650aaf
|
@ -648,6 +648,7 @@ union Instruction {
|
||||||
BitField<37, 2, HalfPrecision> precision;
|
BitField<37, 2, HalfPrecision> precision;
|
||||||
BitField<32, 1, u64> saturate;
|
BitField<32, 1, u64> saturate;
|
||||||
|
|
||||||
|
BitField<31, 1, u64> negate_b;
|
||||||
BitField<30, 1, u64> negate_c;
|
BitField<30, 1, u64> negate_c;
|
||||||
BitField<35, 2, HalfType> type_c;
|
BitField<35, 2, HalfType> type_c;
|
||||||
} rr;
|
} rr;
|
||||||
|
|
|
@ -2,6 +2,8 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
|
@ -9,6 +11,8 @@
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
using Tegra::Shader::HalfPrecision;
|
||||||
|
using Tegra::Shader::HalfType;
|
||||||
using Tegra::Shader::Instruction;
|
using Tegra::Shader::Instruction;
|
||||||
using Tegra::Shader::OpCode;
|
using Tegra::Shader::OpCode;
|
||||||
|
|
||||||
|
@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
|
||||||
const Instruction instr = {program_code[pc]};
|
const Instruction instr = {program_code[pc]};
|
||||||
const auto opcode = OpCode::Decode(instr);
|
const auto opcode = OpCode::Decode(instr);
|
||||||
|
|
||||||
UNIMPLEMENTED();
|
if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
|
||||||
|
UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
|
||||||
|
} else {
|
||||||
|
UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr auto identity = HalfType::H0_H1;
|
||||||
|
|
||||||
|
const HalfType type_a = instr.hfma2.type_a;
|
||||||
|
const Node op_a = GetRegister(instr.gpr8);
|
||||||
|
|
||||||
|
bool neg_b{}, neg_c{};
|
||||||
|
auto [saturate, type_b, op_b, type_c,
|
||||||
|
op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
|
||||||
|
switch (opcode->get().GetId()) {
|
||||||
|
case OpCode::Id::HFMA2_CR:
|
||||||
|
neg_b = instr.hfma2.negate_b;
|
||||||
|
neg_c = instr.hfma2.negate_c;
|
||||||
|
return {instr.hfma2.saturate, instr.hfma2.type_b,
|
||||||
|
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
|
||||||
|
GetRegister(instr.gpr39)};
|
||||||
|
case OpCode::Id::HFMA2_RC:
|
||||||
|
neg_b = instr.hfma2.negate_b;
|
||||||
|
neg_c = instr.hfma2.negate_c;
|
||||||
|
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
|
||||||
|
instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
|
||||||
|
case OpCode::Id::HFMA2_RR:
|
||||||
|
neg_b = instr.hfma2.rr.negate_b;
|
||||||
|
neg_c = instr.hfma2.rr.negate_c;
|
||||||
|
return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
|
||||||
|
instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
|
||||||
|
case OpCode::Id::HFMA2_IMM_R:
|
||||||
|
neg_c = instr.hfma2.negate_c;
|
||||||
|
return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
|
||||||
|
instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
|
||||||
|
default:
|
||||||
|
return {false, identity, Immediate(0), identity, Immediate(0)};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
|
||||||
|
|
||||||
|
op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
|
||||||
|
op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
|
||||||
|
|
||||||
|
MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
|
||||||
|
Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
|
||||||
|
value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
|
||||||
|
|
||||||
|
SetRegister(bb, instr.gpr0, value);
|
||||||
|
|
||||||
return pc;
|
return pc;
|
||||||
}
|
}
|
||||||
|
|
|
@ -762,9 +762,9 @@ private:
|
||||||
return GenerateBinaryInfix(operation, "/", type, type, type);
|
return GenerateBinaryInfix(operation, "/", type, type, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string FFma(Operation operation) {
|
template <Type type>
|
||||||
return GenerateTernary(operation, "fma", Type::Float, Type::Float, Type::Float,
|
std::string Fma(Operation operation) {
|
||||||
Type::Float);
|
return GenerateTernary(operation, "fma", type, type, type, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <Type type>
|
template <Type type>
|
||||||
|
@ -1231,7 +1231,7 @@ private:
|
||||||
&Add<Type::Float>,
|
&Add<Type::Float>,
|
||||||
&Mul<Type::Float>,
|
&Mul<Type::Float>,
|
||||||
&Div<Type::Float>,
|
&Div<Type::Float>,
|
||||||
&FFma,
|
&Fma<Type::Float>,
|
||||||
&Negate<Type::Float>,
|
&Negate<Type::Float>,
|
||||||
&Absolute<Type::Float>,
|
&Absolute<Type::Float>,
|
||||||
&FClamp,
|
&FClamp,
|
||||||
|
@ -1289,6 +1289,7 @@ private:
|
||||||
|
|
||||||
&Add<Type::HalfFloat>,
|
&Add<Type::HalfFloat>,
|
||||||
&Mul<Type::HalfFloat>,
|
&Mul<Type::HalfFloat>,
|
||||||
|
&Fma<Type::HalfFloat>,
|
||||||
&Absolute<Type::HalfFloat>,
|
&Absolute<Type::HalfFloat>,
|
||||||
&HNegate,
|
&HNegate,
|
||||||
&HMergeF32,
|
&HMergeF32,
|
||||||
|
|
|
@ -110,6 +110,7 @@ enum class OperationCode {
|
||||||
|
|
||||||
HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||||
HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
|
||||||
|
HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
|
||||||
HAbsolute, /// (f16vec2 a) -> f16vec2
|
HAbsolute, /// (f16vec2 a) -> f16vec2
|
||||||
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
|
HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
|
||||||
HMergeF32, /// (f16vec2 src) -> float
|
HMergeF32, /// (f16vec2 src) -> float
|
||||||
|
|
Loading…
Reference in New Issue