xenia-canary/src/xenia/gpu/spirv_shader_translator.cc

2238 lines
82 KiB
C++

/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv_shader_translator.h"
#include <cstring>
#include "xenia/base/logging.h"
#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
namespace xe {
namespace gpu {
using namespace ucode;
constexpr uint32_t kMaxInterpolators = 16;
constexpr uint32_t kMaxTemporaryRegisters = 64;
using spv::GLSLstd450;
using spv::Id;
using spv::Op;
SpirvShaderTranslator::SpirvShaderTranslator() {
compiler_.AddPass(std::make_unique<spirv::ControlFlowSimplificationPass>());
compiler_.AddPass(std::make_unique<spirv::ControlFlowAnalysisPass>());
}
SpirvShaderTranslator::~SpirvShaderTranslator() = default;
void SpirvShaderTranslator::StartTranslation() {
// Create a new builder.
builder_ = std::make_unique<spv::Builder>(0xFFFFFFFF);
auto& b = *builder_;
// Import required modules.
glsl_std_450_instruction_set_ = b.import("GLSL.std.450");
// Configure environment.
b.setSource(spv::SourceLanguage::SourceLanguageUnknown, 0);
b.setMemoryModel(spv::AddressingModel::AddressingModelLogical,
spv::MemoryModel::MemoryModelGLSL450);
b.addCapability(spv::Capability::CapabilityShader);
b.addCapability(spv::Capability::CapabilityGenericPointer);
if (is_vertex_shader()) {
b.addCapability(spv::Capability::CapabilityClipDistance);
b.addCapability(spv::Capability::CapabilityCullDistance);
}
if (is_pixel_shader()) {
b.addCapability(spv::Capability::CapabilityDerivativeControl);
}
spv::Block* function_block = nullptr;
translated_main_ =
b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main",
{}, {}, &function_block);
bool_type_ = b.makeBoolType();
float_type_ = b.makeFloatType(32);
int_type_ = b.makeIntType(32);
uint_type_ = b.makeUintType(32);
vec2_float_type_ = b.makeVectorType(float_type_, 2);
vec3_float_type_ = b.makeVectorType(float_type_, 3);
vec4_float_type_ = b.makeVectorType(float_type_, 4);
vec4_uint_type_ = b.makeVectorType(uint_type_, 4);
vec4_bool_type_ = b.makeVectorType(bool_type_, 4);
vec4_float_one_ = b.makeCompositeConstant(
vec4_float_type_,
std::vector<Id>({b.makeFloatConstant(1.f), b.makeFloatConstant(1.f),
b.makeFloatConstant(1.f), b.makeFloatConstant(1.f)}));
vec4_float_zero_ = b.makeCompositeConstant(
vec4_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)}));
registers_type_ = b.makeArrayType(vec4_float_type_,
b.makeUintConstant(register_count()), 0);
registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction,
registers_type_, "r");
aL_ = b.createVariable(spv::StorageClass::StorageClassFunction,
vec4_uint_type_, "aL");
p0_ = b.createVariable(spv::StorageClass::StorageClassFunction, bool_type_,
"p0");
ps_ = b.createVariable(spv::StorageClass::StorageClassFunction, float_type_,
"ps");
pv_ = b.createVariable(spv::StorageClass::StorageClassFunction,
vec4_float_type_, "pv");
a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_,
"a0");
// Uniform constants.
Id float_consts_type =
b.makeArrayType(vec4_float_type_, b.makeUintConstant(512), 1);
Id loop_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(32), 1);
Id bool_consts_type = b.makeArrayType(uint_type_, b.makeUintConstant(8), 1);
Id consts_struct_type = b.makeStructType(
{float_consts_type, loop_consts_type, bool_consts_type}, "consts_type");
b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock);
// Constants member decorations.
b.addMemberDecoration(consts_struct_type, 0,
spv::Decoration::DecorationOffset, 0);
b.addMemberDecoration(consts_struct_type, 0,
spv::Decoration::DecorationArrayStride,
4 * sizeof(float));
b.addMemberName(consts_struct_type, 0, "float_consts");
b.addMemberDecoration(consts_struct_type, 1,
spv::Decoration::DecorationOffset,
512 * 4 * sizeof(float));
b.addMemberDecoration(consts_struct_type, 1,
spv::Decoration::DecorationArrayStride,
sizeof(uint32_t));
b.addMemberName(consts_struct_type, 1, "loop_consts");
b.addMemberDecoration(consts_struct_type, 2,
spv::Decoration::DecorationOffset,
512 * 4 * sizeof(float) + 32 * sizeof(uint32_t));
b.addMemberDecoration(consts_struct_type, 2,
spv::Decoration::DecorationArrayStride,
sizeof(uint32_t));
b.addMemberName(consts_struct_type, 2, "bool_consts");
consts_ = b.createVariable(spv::StorageClass::StorageClassUniform,
consts_struct_type, "consts");
b.addDecoration(consts_, spv::Decoration::DecorationDescriptorSet, 0);
if (is_vertex_shader()) {
b.addDecoration(consts_, spv::Decoration::DecorationBinding, 0);
} else if (is_pixel_shader()) {
b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1);
}
// Push constants, represented by SpirvPushConstants.
Id push_constants_type = b.makeStructType(
{vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type_},
"push_consts_type");
b.addDecoration(push_constants_type, spv::Decoration::DecorationBlock);
// float4 window_scale;
b.addMemberDecoration(
push_constants_type, 0, spv::Decoration::DecorationOffset,
static_cast<int>(offsetof(SpirvPushConstants, window_scale)));
b.addMemberName(push_constants_type, 0, "window_scale");
// float4 vtx_fmt;
b.addMemberDecoration(
push_constants_type, 1, spv::Decoration::DecorationOffset,
static_cast<int>(offsetof(SpirvPushConstants, vtx_fmt)));
b.addMemberName(push_constants_type, 1, "vtx_fmt");
// float4 alpha_test;
b.addMemberDecoration(
push_constants_type, 2, spv::Decoration::DecorationOffset,
static_cast<int>(offsetof(SpirvPushConstants, alpha_test)));
b.addMemberName(push_constants_type, 2, "alpha_test");
// uint ps_param_gen;
b.addMemberDecoration(
push_constants_type, 3, spv::Decoration::DecorationOffset,
static_cast<int>(offsetof(SpirvPushConstants, ps_param_gen)));
b.addMemberName(push_constants_type, 3, "ps_param_gen");
push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant,
push_constants_type, "push_consts");
// Texture bindings
Id tex_t[] = {b.makeSampledImageType(b.makeImageType(
float_type_, spv::Dim::Dim1D, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown)),
b.makeSampledImageType(b.makeImageType(
float_type_, spv::Dim::Dim2D, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown)),
b.makeSampledImageType(b.makeImageType(
float_type_, spv::Dim::Dim3D, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown)),
b.makeSampledImageType(b.makeImageType(
float_type_, spv::Dim::DimCube, false, false, false, 1,
spv::ImageFormat::ImageFormatUnknown))};
Id tex_a_t[] = {b.makeArrayType(tex_t[0], b.makeUintConstant(32), 0),
b.makeArrayType(tex_t[1], b.makeUintConstant(32), 0),
b.makeArrayType(tex_t[2], b.makeUintConstant(32), 0),
b.makeArrayType(tex_t[3], b.makeUintConstant(32), 0)};
for (int i = 0; i < 4; i++) {
tex_[i] = b.createVariable(spv::StorageClass::StorageClassUniformConstant,
tex_a_t[i],
xe::format_string("textures%dD", i + 1).c_str());
b.addDecoration(tex_[i], spv::Decoration::DecorationDescriptorSet, 1);
b.addDecoration(tex_[i], spv::Decoration::DecorationBinding, i);
}
// Interpolators.
Id interpolators_type = b.makeArrayType(
vec4_float_type_, b.makeUintConstant(kMaxInterpolators), 0);
if (is_vertex_shader()) {
// Vertex inputs/outputs.
for (const auto& binding : vertex_bindings()) {
for (const auto& attrib : binding.attributes) {
Id attrib_type = 0;
switch (attrib.fetch_instr.attributes.data_format) {
case VertexFormat::k_32:
case VertexFormat::k_32_FLOAT:
attrib_type = float_type_;
break;
case VertexFormat::k_16_16:
case VertexFormat::k_32_32:
case VertexFormat::k_16_16_FLOAT:
case VertexFormat::k_32_32_FLOAT:
attrib_type = vec2_float_type_;
break;
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
case VertexFormat::k_32_32_32_FLOAT:
attrib_type = vec3_float_type_;
break;
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_2_10_10_10:
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_32_32_32_32:
case VertexFormat::k_16_16_16_16_FLOAT:
case VertexFormat::k_32_32_32_32_FLOAT:
attrib_type = vec4_float_type_;
break;
default:
assert_always();
}
auto attrib_var = b.createVariable(
spv::StorageClass::StorageClassInput, attrib_type,
xe::format_string("vf%d_%d", binding.fetch_constant,
attrib.fetch_instr.attributes.offset)
.c_str());
b.addDecoration(attrib_var, spv::Decoration::DecorationLocation,
attrib.attrib_index);
vertex_binding_map_[binding.fetch_constant]
[attrib.fetch_instr.attributes.offset] = attrib_var;
}
}
interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput,
interpolators_type, "interpolators");
b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0);
for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators);
i++) {
// Zero interpolators.
auto ptr = b.createAccessChain(spv::StorageClass::StorageClassOutput,
interpolators_,
std::vector<Id>({b.makeUintConstant(i)}));
b.createStore(vec4_float_zero_, ptr);
}
pos_ = b.createVariable(spv::StorageClass::StorageClassOutput,
vec4_float_type_, "gl_Position");
b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInPosition);
vertex_id_ = b.createVariable(spv::StorageClass::StorageClassInput,
int_type_, "gl_VertexId");
b.addDecoration(vertex_id_, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInVertexId);
auto vertex_id = b.createLoad(vertex_id_);
vertex_id = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_id);
auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction,
registers_ptr_,
std::vector<Id>({b.makeUintConstant(0)}));
auto r0 = b.createLoad(r0_ptr);
r0 = b.createCompositeInsert(vertex_id, r0, vec4_float_type_,
std::vector<uint32_t>({0}));
b.createStore(r0, r0_ptr);
} else {
// Pixel inputs from vertex shader.
interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput,
interpolators_type, "interpolators");
b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0);
// Pixel fragment outputs (one per render target).
Id frag_outputs_type =
b.makeArrayType(vec4_float_type_, b.makeUintConstant(4), 0);
frag_outputs_ = b.createVariable(spv::StorageClass::StorageClassOutput,
frag_outputs_type, "oC");
b.addDecoration(frag_outputs_, spv::Decoration::DecorationLocation, 0);
frag_depth_ = b.createVariable(spv::StorageClass::StorageClassOutput,
float_type_, "gl_FragDepth");
b.addDecoration(frag_depth_, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInFragDepth);
// TODO(benvanik): frag depth, etc.
// Copy interpolators to r[0..16].
// TODO: Need physical addressing in order to do this.
// b.createNoResultOp(spv::Op::OpCopyMemorySized,
// {registers_ptr_, interpolators_,
// b.makeUintConstant(16 * 4 * sizeof(float))});
for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators);
i++) {
// For now, copy interpolators register-by-register :/
auto idx = b.makeUintConstant(i);
auto i_a = b.createAccessChain(spv::StorageClass::StorageClassInput,
interpolators_, std::vector<Id>({idx}));
auto r_a = b.createAccessChain(spv::StorageClass::StorageClassFunction,
registers_ptr_, std::vector<Id>({idx}));
b.createNoResultOp(spv::Op::OpCopyMemory, std::vector<Id>({r_a, i_a}));
}
// Setup ps_param_gen
auto ps_param_gen_idx_ptr = b.createAccessChain(
spv::StorageClass::StorageClassPushConstant, push_consts_,
std::vector<Id>({b.makeUintConstant(3)}));
auto ps_param_gen_idx = b.createLoad(ps_param_gen_idx_ptr);
auto frag_coord = b.createVariable(spv::StorageClass::StorageClassInput,
vec4_float_type_, "gl_FragCoord");
b.addDecoration(frag_coord, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInFragCoord);
auto point_coord = b.createVariable(spv::StorageClass::StorageClassInput,
vec2_float_type_, "gl_PointCoord");
b.addDecoration(point_coord, spv::Decoration::DecorationBuiltIn,
spv::BuiltIn::BuiltInPointCoord);
auto param = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_,
{frag_coord, point_coord, 0, 1, 4, 5});
/*
// TODO: gl_FrontFacing
auto param_x = b.createCompositeExtract(param, float_type_, 0);
auto param_x_inv = b.createBinOp(spv::Op::OpFMul, float_type_, param_x,
b.makeFloatConstant(-1.f));
param_x = b.createCompositeInsert(param_x_inv, param, vec4_float_type_, 0);
*/
auto cond = b.createBinOp(spv::Op::OpINotEqual, bool_type_,
ps_param_gen_idx, b.makeUintConstant(-1));
spv::Builder::If ifb(cond, b);
// FYI: We do this instead of r[ps_param_gen_idx] because that causes
// nvidia to move all registers into local memory (slow!)
for (uint32_t i = 0; i < std::min(register_count(), kMaxInterpolators);
i++) {
auto reg_ptr = b.createAccessChain(
spv::StorageClass::StorageClassFunction, registers_ptr_,
std::vector<Id>({b.makeUintConstant(i)}));
auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx,
b.makeUintConstant(i));
auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param,
b.createLoad(reg_ptr));
b.createStore(reg, reg_ptr);
}
ifb.makeEndIf();
}
}
std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
auto& b = *builder_;
assert_false(open_predicated_block_);
auto block = &b.makeNewBlock();
b.createBranch(block);
b.makeReturn(false);
// main() entry point.
auto mainFn = b.makeMain();
if (is_vertex_shader()) {
b.addEntryPoint(spv::ExecutionModel::ExecutionModelVertex, mainFn, "main");
} else {
b.addEntryPoint(spv::ExecutionModel::ExecutionModelFragment, mainFn,
"main");
b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft);
}
// TODO(benvanik): transform feedback.
if (false) {
b.addCapability(spv::Capability::CapabilityTransformFeedback);
b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb);
}
b.createFunctionCall(translated_main_, std::vector<Id>({}));
if (is_vertex_shader()) {
// gl_Position transform
auto vtx_fmt_ptr = b.createAccessChain(
spv::StorageClass::StorageClassPushConstant, push_consts_,
std::vector<Id>({b.makeUintConstant(1)}));
auto window_scale_ptr = b.createAccessChain(
spv::StorageClass::StorageClassPushConstant, push_consts_,
std::vector<Id>({b.makeUintConstant(0)}));
auto vtx_fmt = b.createLoad(vtx_fmt_ptr);
auto window_scale = b.createLoad(window_scale_ptr);
auto p = b.createLoad(pos_);
auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_, vtx_fmt,
vec4_float_zero_);
// pos.w = vtx_fmt.w == 0.0 ? 1.0 / pos.w : pos.w
auto c_w = b.createCompositeExtract(c, bool_type_, 3);
auto p_w = b.createCompositeExtract(p, float_type_, 3);
auto p_w_inv = b.createBinOp(spv::Op::OpFDiv, float_type_,
b.makeFloatConstant(1.f), p_w);
p_w = b.createTriOp(spv::Op::OpSelect, float_type_, c_w, p_w, p_w_inv);
// pos.xyz = vtx_fmt.xyz != 0.0 ? pos.xyz / pos.w : pos.xyz
auto p_all_w = b.smearScalar(spv::NoPrecision, p_w, vec4_float_type_);
auto p_inv = b.createBinOp(spv::Op::OpFDiv, vec4_float_type_, p, p_all_w);
p = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, p_inv, p);
// Reinsert w
p = b.createCompositeInsert(p_w, p, vec4_float_type_, 3);
// Apply window scaling
// pos.xy *= window_scale.xy
auto p_scaled =
b.createBinOp(spv::Op::OpFMul, vec4_float_type_, p, window_scale);
p = b.createOp(spv::Op::OpVectorShuffle, vec4_float_type_,
{p, p_scaled, 4, 5, 2, 3});
b.createStore(p, pos_);
} else {
// Alpha test
auto alpha_test_enabled = b.createCompositeExtract(
push_consts_, float_type_, std::vector<uint32_t>{2, 0});
auto alpha_test_func = b.createCompositeExtract(
push_consts_, float_type_, std::vector<uint32_t>{2, 1});
auto alpha_test_ref = b.createCompositeExtract(push_consts_, float_type_,
std::vector<uint32_t>{2, 2});
alpha_test_func =
b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func);
auto oC0_alpha = b.createCompositeExtract(frag_outputs_, float_type_,
std::vector<uint32_t>({0, 3}));
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_,
alpha_test_enabled, b.makeFloatConstant(1.f));
spv::Builder::If alpha_if(cond, b);
std::vector<spv::Block*> switch_segments;
b.makeSwitch(alpha_test_func, 8, std::vector<int>({0, 1, 2, 3, 4, 5, 6, 7}),
std::vector<int>({0, 1, 2, 3, 4, 5, 6, 7}), 7,
switch_segments);
const static spv::Op alpha_op_map[] = {
spv::Op::OpNop,
spv::Op::OpFOrdGreaterThanEqual,
spv::Op::OpFOrdNotEqual,
spv::Op::OpFOrdGreaterThan,
spv::Op::OpFOrdLessThanEqual,
spv::Op::OpFOrdEqual,
spv::Op::OpFOrdLessThan,
spv::Op::OpNop,
};
// if (alpha_func == 0) passes = false;
b.nextSwitchSegment(switch_segments, 0);
b.makeDiscard();
b.addSwitchBreak();
for (int i = 1; i < 7; i++) {
b.nextSwitchSegment(switch_segments, i);
auto cond =
b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, alpha_test_ref);
spv::Builder::If discard_if(cond, b);
b.makeDiscard();
discard_if.makeEndIf();
b.addSwitchBreak();
}
// if (alpha_func == 7) passes = true;
b.nextSwitchSegment(switch_segments, 7);
b.endSwitch(switch_segments);
alpha_if.makeEndIf();
}
b.makeReturn(false);
// Compile the spv IR
compiler_.Compile(b.getModule());
std::vector<uint32_t> spirv_words;
b.dump(spirv_words);
// Cleanup builder.
builder_.reset();
// Copy bytes out.
// TODO(benvanik): avoid copy?
std::vector<uint8_t> spirv_bytes;
spirv_bytes.resize(spirv_words.size() * 4);
std::memcpy(spirv_bytes.data(), spirv_words.data(), spirv_bytes.size());
return spirv_bytes;
}
void SpirvShaderTranslator::PostTranslation(Shader* shader) {
// Validation.
// TODO(DrChat): Only do this if a flag is set (this is pretty slow).
auto validation = validator_.Validate(
reinterpret_cast<const uint32_t*>(shader->translated_binary().data()),
shader->translated_binary().size() / 4);
if (validation->has_error()) {
XELOGE("SPIR-V Shader Validation failed! Error: %s",
validation->error_string());
}
// TODO(benvanik): only if needed? could be slowish.
auto disasm = disassembler_.Disassemble(
reinterpret_cast<const uint32_t*>(shader->translated_binary().data()),
shader->translated_binary().size() / 4);
if (disasm->has_error()) {
XELOGE("Failed to disassemble SPIRV - invalid?");
} else {
set_host_disassembly(shader, disasm->to_string());
}
}
void SpirvShaderTranslator::PreProcessControlFlowInstruction(
uint32_t cf_index, const ControlFlowInstruction& instr) {
auto& b = *builder_;
if (cf_blocks_.find(cf_index) == cf_blocks_.end()) {
CFBlock block;
block.block = &b.makeNewBlock();
cf_blocks_[cf_index] = block;
} else {
cf_blocks_[cf_index].block = &b.makeNewBlock();
}
if (instr.opcode() == ControlFlowOpcode::kCondJmp) {
auto cf_block = cf_blocks_.find(instr.cond_jmp.address());
if (cf_block == cf_blocks_.end()) {
CFBlock block;
block.prev_dominates = false;
cf_blocks_[instr.cond_jmp.address()] = block;
} else {
cf_block->second.prev_dominates = false;
}
} else if (instr.opcode() == ControlFlowOpcode::kLoopStart) {
// TODO
}
}
void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) {
auto& b = *builder_;
}
void SpirvShaderTranslator::ProcessControlFlowInstructionBegin(
uint32_t cf_index) {
auto& b = *builder_;
if (cf_index == 0) {
// Kind of cheaty, but emit a branch to the first block.
b.createBranch(cf_blocks_[cf_index].block);
}
}
void SpirvShaderTranslator::ProcessControlFlowInstructionEnd(
uint32_t cf_index) {
auto& b = *builder_;
}
void SpirvShaderTranslator::ProcessControlFlowNopInstruction() {
auto& b = *builder_;
// b.createNoResultOp(spv::Op::OpNop);
}
void SpirvShaderTranslator::ProcessExecInstructionBegin(
const ParsedExecInstruction& instr) {
auto& b = *builder_;
assert_false(open_predicated_block_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
// Head has the logic to check if the body should execute.
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
auto body = head;
switch (instr.type) {
case ParsedExecInstruction::Type::kUnconditional: {
// No need to do anything.
} break;
case ParsedExecInstruction::Type::kConditional: {
// Based off of bool_consts
std::vector<Id> offsets;
offsets.push_back(b.makeUintConstant(2)); // bool_consts
offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32));
auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform,
consts_, offsets);
v = b.createLoad(v);
// Bitfield extract the bool constant.
// FIXME: NVidia's compiler seems to be broken on this instruction?
/*
v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v,
b.makeUintConstant(instr.bool_constant_index % 32),
b.makeUintConstant(1));
auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v,
b.makeUintConstant(instr.condition ? 1 : 0));
*/
v = b.createBinOp(
spv::Op::OpBitwiseAnd, uint_type_, v,
b.makeUintConstant(1 << (instr.bool_constant_index % 32)));
auto cond = b.createBinOp(
instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual,
bool_type_, v, b.makeUintConstant(0));
// Conditional branch
assert_true(cf_blocks_.size() > instr.dword_index + 1);
body = &b.makeNewBlock();
auto next_block = cf_blocks_[instr.dword_index + 1];
if (next_block.prev_dominates) {
b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
}
b.createConditionalBranch(cond, body, next_block.block);
} break;
case ParsedExecInstruction::Type::kPredicated: {
// Branch based on p0.
assert_true(cf_blocks_.size() > instr.dword_index + 1);
body = &b.makeNewBlock();
auto cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.condition));
auto next_block = cf_blocks_[instr.dword_index + 1];
if (next_block.prev_dominates) {
b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
}
b.createConditionalBranch(cond, body, next_block.block);
} break;
}
b.setBuildPoint(body);
}
void SpirvShaderTranslator::ProcessExecInstructionEnd(
const ParsedExecInstruction& instr) {
auto& b = *builder_;
if (open_predicated_block_) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (instr.is_end) {
b.makeReturn(false);
} else {
assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
}
}
void SpirvShaderTranslator::ProcessLoopStartInstruction(
const ParsedLoopStartInstruction& instr) {
auto& b = *builder_;
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
// TODO: Emit a spv LoopMerge
// (need to know the continue target and merge target beforehand though)
EmitUnimplementedTranslationError();
assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
}
void SpirvShaderTranslator::ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) {
auto& b = *builder_;
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
EmitUnimplementedTranslationError();
assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
}
void SpirvShaderTranslator::ProcessCallInstruction(
const ParsedCallInstruction& instr) {
auto& b = *builder_;
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
// Unused instruction(?)
assert_always();
EmitUnimplementedTranslationError();
assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
}
void SpirvShaderTranslator::ProcessReturnInstruction(
const ParsedReturnInstruction& instr) {
auto& b = *builder_;
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
// Unused instruction(?)
assert_always();
EmitUnimplementedTranslationError();
assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
}
// CF jump
void SpirvShaderTranslator::ProcessJumpInstruction(
const ParsedJumpInstruction& instr) {
auto& b = *builder_;
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
switch (instr.type) {
case ParsedJumpInstruction::Type::kUnconditional: {
b.createBranch(cf_blocks_[instr.target_address].block);
} break;
case ParsedJumpInstruction::Type::kConditional: {
assert_true(cf_blocks_.size() > instr.dword_index + 1);
// Based off of bool_consts
std::vector<Id> offsets;
offsets.push_back(b.makeUintConstant(2)); // bool_consts
offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32));
auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform,
consts_, offsets);
v = b.createLoad(v);
// FIXME: NVidia's compiler seems to be broken on this instruction?
/*
// Bitfield extract the bool constant.
v = b.createTriOp(spv::Op::OpBitFieldUExtract, uint_type_, v,
b.makeUintConstant(instr.bool_constant_index % 32),
b.makeUintConstant(1));
// Conditional branch
auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, v,
b.makeUintConstant(instr.condition ? 1 : 0));
*/
v = b.createBinOp(
spv::Op::OpBitwiseAnd, uint_type_, v,
b.makeUintConstant(1 << (instr.bool_constant_index % 32)));
auto cond = b.createBinOp(
instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual,
bool_type_, v, b.makeUintConstant(0));
b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block,
cf_blocks_[instr.dword_index + 1].block);
} break;
case ParsedJumpInstruction::Type::kPredicated: {
assert_true(cf_blocks_.size() > instr.dword_index + 1);
auto cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.condition));
b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block,
cf_blocks_[instr.dword_index + 1].block);
} break;
}
}
void SpirvShaderTranslator::ProcessAllocInstruction(
const ParsedAllocInstruction& instr) {
auto& b = *builder_;
auto head = cf_blocks_[instr.dword_index].block;
b.setBuildPoint(head);
switch (instr.type) {
case AllocType::kNone: {
// ?
} break;
case AllocType::kVsPosition: {
assert_true(is_vertex_shader());
} break;
// Also PS Colors
case AllocType::kVsInterpolators: {
} break;
default:
break;
}
assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
}
void SpirvShaderTranslator::ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) {
auto& b = *builder_;
assert_true(is_vertex_shader());
assert_not_zero(vertex_id_);
// Close the open predicated block if this instr isn't predicated or the
// conditions do not match.
if (open_predicated_block_ &&
(!instr.is_predicated ||
instr.predicate_condition != predicated_block_cond_)) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
// Operand 0 is the index
// Operand 1 is the binding
// TODO: Indexed fetch
auto vertex_id = LoadFromOperand(instr.operands[0]);
vertex_id = b.createCompositeExtract(vertex_id, float_type_, 0);
vertex_id = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_id);
auto shader_vertex_id = b.createLoad(vertex_id_);
auto cond =
b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id);
cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_);
// Skip loading if it's an indexed fetch.
auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index]
[instr.attributes.offset];
assert_not_zero(vertex_ptr);
auto vertex = b.createLoad(vertex_ptr);
switch (instr.attributes.data_format) {
case VertexFormat::k_8_8_8_8:
case VertexFormat::k_16_16:
case VertexFormat::k_16_16_16_16:
case VertexFormat::k_16_16_16_16_FLOAT:
case VertexFormat::k_32:
case VertexFormat::k_32_32:
case VertexFormat::k_32_32_32_32:
case VertexFormat::k_32_FLOAT:
case VertexFormat::k_32_32_FLOAT:
case VertexFormat::k_32_32_32_FLOAT:
case VertexFormat::k_32_32_32_32_FLOAT:
// These are handled, for now.
break;
case VertexFormat::k_10_11_11: {
// No conversion needed. Natively supported.
} break;
case VertexFormat::k_11_11_10: {
// This needs to be converted.
} break;
}
auto vertex_components = b.getNumComponents(vertex);
Id alt_vertex = 0;
switch (vertex_components) {
case 1:
alt_vertex = b.makeFloatConstant(0.f);
break;
case 2:
alt_vertex = b.makeCompositeConstant(
vec2_float_type_, std::vector<Id>({b.makeFloatConstant(0.f),
b.makeFloatConstant(1.f)}));
break;
case 3:
alt_vertex = b.makeCompositeConstant(
vec3_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
b.makeFloatConstant(1.f)}));
break;
case 4:
alt_vertex = b.makeCompositeConstant(
vec4_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f),
b.makeFloatConstant(0.f),
b.makeFloatConstant(1.f)}));
break;
default:
assert_unhandled_case(vertex_components);
}
vertex = b.createTriOp(spv::Op::OpSelect, b.getTypeId(vertex), cond, vertex,
alt_vertex);
StoreToResult(vertex, instr.result);
}
void SpirvShaderTranslator::ProcessTextureFetchInstruction(
const ParsedTextureFetchInstruction& instr) {
auto& b = *builder_;
// Close the open predicated block if this instr isn't predicated or the
// conditions do not match.
if (open_predicated_block_ &&
(!instr.is_predicated ||
instr.predicate_condition != predicated_block_cond_)) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
// Operand 0 is the offset
// Operand 1 is the sampler index
Id dest = 0;
Id src = LoadFromOperand(instr.operands[0]);
assert_not_zero(src);
uint32_t dim_idx = 0;
switch (instr.dimension) {
case TextureDimension::k1D: {
dim_idx = 0;
} break;
case TextureDimension::k2D: {
dim_idx = 1;
} break;
case TextureDimension::k3D: {
dim_idx = 2;
} break;
case TextureDimension::kCube: {
dim_idx = 3;
} break;
default:
assert_unhandled_case(instr.dimension);
}
switch (instr.opcode) {
case FetchOpcode::kTextureFetch: {
auto texture_index = b.makeUintConstant(instr.operands[1].storage_index);
auto texture_ptr =
b.createAccessChain(spv::StorageClass::StorageClassUniformConstant,
tex_[dim_idx], std::vector<Id>({texture_index}));
auto texture = b.createLoad(texture_ptr);
spv::Builder::TextureParameters params = {0};
params.coords = src;
params.sampler = texture;
dest = b.createTextureCall(spv::NoPrecision, vec4_float_type_, false,
false, false, false, false, params);
} break;
default:
// TODO: the rest of these
assert_always();
break;
}
if (dest) {
b.createStore(dest, pv_);
StoreToResult(dest, instr.result);
}
}
void SpirvShaderTranslator::ProcessAluInstruction(
const ParsedAluInstruction& instr) {
auto& b = *builder_;
switch (instr.type) {
case ParsedAluInstruction::Type::kNop:
b.createNoResultOp(spv::Op::OpNop);
break;
case ParsedAluInstruction::Type::kVector:
ProcessVectorAluInstruction(instr);
break;
case ParsedAluInstruction::Type::kScalar:
ProcessScalarAluInstruction(instr);
break;
}
}
void SpirvShaderTranslator::ProcessVectorAluInstruction(
const ParsedAluInstruction& instr) {
auto& b = *builder_;
// TODO: If we have identical operands, reuse previous one.
Id sources[3] = {0};
Id dest = 0;
for (size_t i = 0; i < instr.operand_count; i++) {
sources[i] = LoadFromOperand(instr.operands[i]);
}
// Close the open predicated block if this instr isn't predicated or the
// conditions do not match.
if (open_predicated_block_ &&
(!instr.is_predicated ||
instr.predicate_condition != predicated_block_cond_)) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
bool close_predicated_block = false;
switch (instr.vector_opcode) {
case AluVectorOpcode::kAdd: {
dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0],
sources[1]);
} break;
case AluVectorOpcode::kCndEq: {
// dest = src0 == 0.0 ? src1 : src2;
auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0],
vec4_float_zero_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1],
sources[2]);
} break;
case AluVectorOpcode::kCndGe: {
// dest = src0 == 0.0 ? src1 : src2;
auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_,
sources[0], vec4_float_zero_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1],
sources[2]);
} break;
case AluVectorOpcode::kCndGt: {
// dest = src0 == 0.0 ? src1 : src2;
auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_,
sources[0], vec4_float_zero_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, sources[1],
sources[2]);
} break;
case AluVectorOpcode::kCube: {
// TODO:
} break;
case AluVectorOpcode::kDst: {
auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1);
auto src1_y = b.createCompositeExtract(sources[1], float_type_, 1);
auto dst_y = b.createBinOp(spv::Op::OpFMul, float_type_, src0_y, src1_y);
auto src0_z = b.createCompositeExtract(sources[0], float_type_, 3);
auto src1_w = b.createCompositeExtract(sources[0], float_type_, 4);
dest = b.createCompositeConstruct(
vec4_float_type_,
std::vector<Id>({b.makeFloatConstant(1.f), dst_y, src0_z, src1_w}));
} break;
case AluVectorOpcode::kDp2Add: {
auto src0_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_,
{sources[0], sources[0], 0, 1});
auto src1_xy = b.createOp(spv::Op::OpVectorShuffle, vec2_float_type_,
{sources[1], sources[1], 0, 1});
auto src2_x = b.createCompositeExtract(sources[2], float_type_, 0);
dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xy, src1_xy);
dest = b.createBinOp(spv::Op::OpFAdd, float_type_, dest, src2_x);
dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
} break;
case AluVectorOpcode::kDp3: {
auto src0_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_,
{sources[0], sources[0], 0, 1, 2});
auto src1_xyz = b.createOp(spv::Op::OpVectorShuffle, vec3_float_type_,
{sources[1], sources[1], 0, 1, 2});
dest = b.createBinOp(spv::Op::OpDot, float_type_, src0_xyz, src1_xyz);
dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
} break;
case AluVectorOpcode::kDp4: {
dest = b.createBinOp(spv::Op::OpDot, float_type_, sources[0], sources[1]);
dest = b.smearScalar(spv::NoPrecision, dest, vec4_float_type_);
} break;
case AluVectorOpcode::kFloor: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
spv::GLSLstd450::kFloor,
{sources[0]});
} break;
case AluVectorOpcode::kFrc: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
spv::GLSLstd450::kFract,
{sources[0]});
} break;
case AluVectorOpcode::kKillEq: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_,
sources[0], sources[1]);
cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = vec4_float_zero_;
} break;
case AluVectorOpcode::kKillGe: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual,
vec4_bool_type_, sources[0], sources[1]);
cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = vec4_float_zero_;
} break;
case AluVectorOpcode::kKillGt: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_,
sources[0], sources[1]);
cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = vec4_float_zero_;
} break;
case AluVectorOpcode::kKillNe: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_,
sources[0], sources[1]);
cond = b.createUnaryOp(spv::Op::OpAny, bool_type_, cond);
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = vec4_float_zero_;
} break;
case AluVectorOpcode::kMad: {
dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0],
sources[1]);
dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]);
} break;
case AluVectorOpcode::kMax4: {
auto src0_x = b.createCompositeExtract(sources[0], float_type_, 0);
auto src0_y = b.createCompositeExtract(sources[0], float_type_, 1);
auto src0_z = b.createCompositeExtract(sources[0], float_type_, 2);
auto src0_w = b.createCompositeExtract(sources[0], float_type_, 3);
auto max_xy = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax,
{src0_x, src0_y});
auto max_zw = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax,
{src0_z, src0_w});
auto max_xyzw = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kFMax,
{max_xy, max_zw});
// FIXME: Docs say this only updates pv.x?
dest = b.smearScalar(spv::NoPrecision, max_xyzw, vec4_float_type_);
} break;
case AluVectorOpcode::kMaxA: {
// a0 = clamp(floor(src0.w + 0.5), -256, 255)
auto addr = b.createCompositeExtract(sources[0], float_type_, 3);
addr = b.createBinOp(spv::Op::OpFAdd, float_type_, addr,
b.makeFloatConstant(0.5f));
addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr);
addr = CreateGlslStd450InstructionCall(
spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp,
{addr, b.makeIntConstant(-256), b.makeIntConstant(255)});
b.createStore(addr, a0_);
// dest = src0 >= src1 ? src0 : src1
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
spv::GLSLstd450::kFMax,
{sources[0], sources[1]});
} break;
case AluVectorOpcode::kMax: {
if (sources[0] == sources[1]) {
// mov dst, src
dest = sources[0];
break;
}
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
spv::GLSLstd450::kFMax,
{sources[0], sources[1]});
} break;
case AluVectorOpcode::kMin: {
if (sources[0] == sources[1]) {
// mov dst, src
dest = sources[0];
break;
}
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
spv::GLSLstd450::kFMin,
{sources[0], sources[1]});
} break;
case AluVectorOpcode::kMul: {
dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0],
sources[1]);
} break;
case AluVectorOpcode::kSetpEqPush: {
auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0],
vec4_float_zero_);
auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1],
vec4_float_zero_);
auto c_and =
b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
// p0
b.createStore(c_and_w, p0_);
close_predicated_block = true;
// dest
auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
b.makeFloatConstant(1.f));
auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
vec4_float_zero_, s0);
} break;
case AluVectorOpcode::kSetpGePush: {
auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0],
vec4_float_zero_);
auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_,
sources[1], vec4_float_zero_);
auto c_and =
b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
// p0
b.createStore(c_and_w, p0_);
close_predicated_block = true;
// dest
auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
b.makeFloatConstant(1.f));
auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
vec4_float_zero_, s0);
} break;
case AluVectorOpcode::kSetpGtPush: {
auto c0 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0],
vec4_float_zero_);
auto c1 = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_,
sources[1], vec4_float_zero_);
auto c_and =
b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
// p0
b.createStore(c_and_w, p0_);
close_predicated_block = true;
// dest
auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
b.makeFloatConstant(1.f));
auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
vec4_float_zero_, s0);
} break;
case AluVectorOpcode::kSetpNePush: {
auto c0 = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_,
sources[0], vec4_float_zero_);
auto c1 = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[1],
vec4_float_zero_);
auto c_and =
b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);
// p0
b.createStore(c_and_w, p0_);
close_predicated_block = true;
// dest
auto s0_x = b.createCompositeExtract(sources[0], float_type_, 0);
s0_x = b.createBinOp(spv::Op::OpFAdd, float_type_, s0_x,
b.makeFloatConstant(1.f));
auto s0 = b.smearScalar(spv::NoPrecision, s0_x, vec4_float_type_);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c_and_x,
vec4_float_zero_, s0);
} break;
case AluVectorOpcode::kSeq: {
// foreach(el) src0 == src1 ? 1.0 : 0.0
auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0],
sources[1]);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c,
vec4_float_one_, vec4_float_zero_);
} break;
case AluVectorOpcode::kSge: {
// foreach(el) src0 >= src1 ? 1.0 : 0.0
auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_bool_type_,
sources[0], sources[1]);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c,
vec4_float_one_, vec4_float_zero_);
} break;
case AluVectorOpcode::kSgt: {
// foreach(el) src0 > src1 ? 1.0 : 0.0
auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_bool_type_,
sources[0], sources[1]);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c,
vec4_float_one_, vec4_float_zero_);
} break;
case AluVectorOpcode::kSne: {
// foreach(el) src0 != src1 ? 1.0 : 0.0
auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_bool_type_,
sources[0], sources[1]);
dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c,
vec4_float_one_, vec4_float_zero_);
} break;
case AluVectorOpcode::kTrunc: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, vec4_float_type_,
GLSLstd450::kTrunc, {sources[0]});
} break;
default:
assert_unhandled_case(instr.vector_opcode);
break;
}
assert_not_zero(dest);
if (dest) {
b.createStore(dest, pv_);
StoreToResult(dest, instr.result);
}
if (close_predicated_block && open_predicated_block_) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
}
void SpirvShaderTranslator::ProcessScalarAluInstruction(
const ParsedAluInstruction& instr) {
auto& b = *builder_;
// TODO: If we have identical operands, reuse previous one.
Id sources[3] = {0};
Id dest = 0;
for (size_t i = 0, x = 0; i < instr.operand_count; i++) {
auto src = LoadFromOperand(instr.operands[i]);
// Pull components out of the vector operands and use them as sources.
for (size_t j = 0; j < instr.operands[i].component_count; j++) {
uint32_t component = 0;
switch (instr.operands[i].components[j]) {
case SwizzleSource::kX:
component = 0;
break;
case SwizzleSource::kY:
component = 1;
break;
case SwizzleSource::kZ:
component = 2;
break;
case SwizzleSource::kW:
component = 3;
break;
case SwizzleSource::k0:
case SwizzleSource::k1:
// Don't believe this can happen.
assert_always();
break;
default:
assert_always();
break;
}
sources[x++] = b.createCompositeExtract(src, float_type_, component);
}
}
// Close the open predicated block if this instr isn't predicated or the
// conditions do not match.
if (open_predicated_block_ &&
(!instr.is_predicated ||
instr.predicate_condition != predicated_block_cond_)) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
if (!open_predicated_block_ && instr.is_predicated) {
Id pred_cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.predicate_condition));
auto block = &b.makeNewBlock();
open_predicated_block_ = true;
predicated_block_cond_ = instr.predicate_condition;
predicated_block_end_ = &b.makeNewBlock();
b.createSelectionMerge(predicated_block_end_,
spv::SelectionControlMaskNone);
b.createConditionalBranch(pred_cond, block, predicated_block_end_);
b.setBuildPoint(block);
}
bool close_predicated_block = false;
switch (instr.scalar_opcode) {
case AluScalarOpcode::kAdds:
case AluScalarOpcode::kAddsc0:
case AluScalarOpcode::kAddsc1: {
// dest = src0 + src1
dest =
b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], sources[1]);
} break;
case AluScalarOpcode::kAddsPrev: {
// dest = src0 + ps
dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0],
b.createLoad(ps_));
} break;
case AluScalarOpcode::kCos: {
// dest = cos(src0)
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kCos, {sources[0]});
} break;
case AluScalarOpcode::kExp: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kExp2, {sources[0]});
} break;
case AluScalarOpcode::kFloors: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kFloor, {sources[0]});
} break;
case AluScalarOpcode::kFrcs: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kFract, {sources[0]});
} break;
case AluScalarOpcode::kKillsEq: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = b.makeFloatConstant(0.f);
} break;
case AluScalarOpcode::kKillsGe: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_,
sources[0], b.makeFloatConstant(0.f));
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = b.makeFloatConstant(0.f);
} break;
case AluScalarOpcode::kKillsGt: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_,
sources[0], b.makeFloatConstant(0.f));
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = b.makeFloatConstant(0.f);
} break;
case AluScalarOpcode::kKillsNe: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = b.makeFloatConstant(0.f);
} break;
case AluScalarOpcode::kKillsOne: {
auto continue_block = &b.makeNewBlock();
auto kill_block = &b.makeNewBlock();
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(1.f));
b.createConditionalBranch(cond, kill_block, continue_block);
b.setBuildPoint(kill_block);
b.createNoResultOp(spv::Op::OpKill);
b.setBuildPoint(continue_block);
dest = b.makeFloatConstant(0.f);
} break;
case AluScalarOpcode::kLogc: {
auto t = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]});
// FIXME: We don't check to see if t == -INF, we just check for INF
auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, t);
dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
b.makeFloatConstant(-FLT_MAX), t);
} break;
case AluScalarOpcode::kLog: {
dest = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kLog2, {sources[0]});
} break;
case AluScalarOpcode::kMaxAsf: {
auto addr =
b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, sources[0]);
addr = CreateGlslStd450InstructionCall(
spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp,
{addr, b.makeIntConstant(-256), b.makeIntConstant(255)});
b.createStore(addr, a0_);
// dest = src0 >= src1 ? src0 : src1
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
spv::GLSLstd450::kFMax,
{sources[0], sources[1]});
} break;
case AluScalarOpcode::kMaxAs: {
// a0 = clamp(floor(src0 + 0.5), -256, 255)
auto addr = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0],
b.makeFloatConstant(0.5f));
addr = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, addr);
addr = CreateGlslStd450InstructionCall(
spv::NoPrecision, int_type_, spv::GLSLstd450::kSClamp,
{addr, b.makeIntConstant(-256), b.makeIntConstant(255)});
b.createStore(addr, a0_);
// dest = src0 >= src1 ? src0 : src1
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
spv::GLSLstd450::kFMax,
{sources[0], sources[1]});
} break;
case AluScalarOpcode::kMaxs: {
// dest = max(src0, src1)
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kFMax,
{sources[0], sources[1]});
} break;
case AluScalarOpcode::kMins: {
// dest = min(src0, src1)
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kFMin,
{sources[0], sources[1]});
} break;
case AluScalarOpcode::kMuls:
case AluScalarOpcode::kMulsc0:
case AluScalarOpcode::kMulsc1: {
// dest = src0 * src1
dest =
b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], sources[1]);
} break;
case AluScalarOpcode::kMulsPrev: {
// dest = src0 * ps
dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0],
b.createLoad(ps_));
} break;
case AluScalarOpcode::kMulsPrev2: {
// TODO: Uh... see GLSL translator for impl.
} break;
case AluScalarOpcode::kRcpc: {
dest = b.createBinOp(spv::Op::OpFDiv, float_type_,
b.makeFloatConstant(1.f), sources[0]);
dest = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp,
{dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)});
} break;
case AluScalarOpcode::kRcpf: {
dest = b.createBinOp(spv::Op::OpFDiv, float_type_,
b.makeFloatConstant(1.f), sources[0]);
auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest);
dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
b.makeFloatConstant(0.f), dest);
} break;
case AluScalarOpcode::kRcp: {
// dest = src0 != 0.0 ? 1.0 / src0 : 0.0;
auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0],
b.makeFloatConstant(0.f));
auto d = b.createBinOp(spv::Op::OpFDiv, float_type_,
b.makeFloatConstant(1.f), sources[0]);
dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
b.makeFloatConstant(0.f), d);
} break;
case AluScalarOpcode::kRsqc: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
spv::GLSLstd450::kInverseSqrt,
{sources[0]});
dest = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, spv::GLSLstd450::kFClamp,
{dest, b.makeFloatConstant(-FLT_MAX), b.makeFloatConstant(FLT_MAX)});
} break;
case AluScalarOpcode::kRsqf: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
spv::GLSLstd450::kInverseSqrt,
{sources[0]});
auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest);
dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
b.makeFloatConstant(0.f), dest);
} break;
case AluScalarOpcode::kRsq: {
// dest = src0 != 0.0 ? inversesqrt(src0) : 0.0;
auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
auto d = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
spv::GLSLstd450::kInverseSqrt,
{sources[0]});
dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
b.makeFloatConstant(0.f), d);
} break;
case AluScalarOpcode::kSeqs: {
// dest = src0 == 0.0 ? 1.0 : 0.0;
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(1.f), b.makeFloatConstant(0.f));
} break;
case AluScalarOpcode::kSges: {
// dest = src0 >= 0.0 ? 1.0 : 0.0;
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_,
sources[0], b.makeFloatConstant(0.f));
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(1.f), b.makeFloatConstant(0.f));
} break;
case AluScalarOpcode::kSgts: {
// dest = src0 > 0.0 ? 1.0 : 0.0;
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_,
sources[0], b.makeFloatConstant(0.f));
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(1.f), b.makeFloatConstant(0.f));
} break;
case AluScalarOpcode::kSnes: {
// dest = src0 != 0.0 ? 1.0 : 0.0;
auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(1.f), b.makeFloatConstant(0.f));
} break;
case AluScalarOpcode::kSetpClr: {
b.createStore(b.makeBoolConstant(false), p0_);
close_predicated_block = true;
dest = b.makeFloatConstant(FLT_MAX);
} break;
case AluScalarOpcode::kSetpEq: {
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
// p0 = cond
b.createStore(cond, p0_);
close_predicated_block = true;
// dest = cond ? 0.f : 1.f;
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(0.f), b.makeFloatConstant(1.f));
} break;
case AluScalarOpcode::kSetpGe: {
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_,
sources[0], b.makeFloatConstant(0.f));
// p0 = cond
b.createStore(cond, p0_);
close_predicated_block = true;
// dest = cond ? 0.f : 1.f;
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(0.f), b.makeFloatConstant(1.f));
} break;
case AluScalarOpcode::kSetpGt: {
auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_,
sources[0], b.makeFloatConstant(0.f));
// p0 = cond
b.createStore(cond, p0_);
close_predicated_block = true;
// dest = cond ? 0.f : 1.f;
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(0.f), b.makeFloatConstant(1.f));
} break;
case AluScalarOpcode::kSetpInv: {
// p0 = src0 == 1.0
auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(1.f));
b.createStore(cond, p0_);
close_predicated_block = true;
// if (!cond) dest = src0 == 0.0 ? 1.0 : src0;
auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_,
sources[0], b.makeFloatConstant(0.f));
auto dst_false = b.createTriOp(spv::Op::OpSelect, float_type_, dst_cond,
b.makeFloatConstant(1.f), sources[0]);
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(0.f), dst_false);
} break;
case AluScalarOpcode::kSetpNe: {
auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
// p0 = cond
b.createStore(cond, p0_);
close_predicated_block = true;
// dest = cond ? 0.f : 1.f;
dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond,
b.makeFloatConstant(0.f), b.makeFloatConstant(1.f));
} break;
case AluScalarOpcode::kSetpPop: {
auto src = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0],
b.makeFloatConstant(1.f));
auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_, src,
b.makeFloatConstant(0.f));
b.createStore(c, p0_);
close_predicated_block = true;
dest = CreateGlslStd450InstructionCall(
spv::NoPrecision, float_type_, GLSLstd450::kFMax,
{sources[0], b.makeFloatConstant(0.f)});
} break;
case AluScalarOpcode::kSetpRstr: {
auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
b.makeFloatConstant(0.f));
b.createStore(c, p0_);
close_predicated_block = true;
dest = sources[0];
} break;
case AluScalarOpcode::kSin: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kSin, {sources[0]});
} break;
case AluScalarOpcode::kSqrt: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kSqrt, {sources[0]});
} break;
case AluScalarOpcode::kSubs:
case AluScalarOpcode::kSubsc0:
case AluScalarOpcode::kSubsc1: {
dest =
b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], sources[1]);
} break;
case AluScalarOpcode::kSubsPrev: {
dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0],
b.createLoad(ps_));
} break;
case AluScalarOpcode::kTruncs: {
dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
GLSLstd450::kTrunc, {sources[0]});
} break;
default:
assert_unhandled_case(instr.scalar_opcode);
break;
}
assert_not_zero(dest);
if (dest) {
b.createStore(dest, ps_);
StoreToResult(dest, instr.result);
}
if (close_predicated_block && open_predicated_block_) {
b.createBranch(predicated_block_end_);
b.setBuildPoint(predicated_block_end_);
open_predicated_block_ = false;
predicated_block_cond_ = false;
predicated_block_end_ = nullptr;
}
}
Id SpirvShaderTranslator::CreateGlslStd450InstructionCall(
spv::Decoration precision, Id result_type, GLSLstd450 instruction_ordinal,
std::vector<Id> args) {
return builder_->createBuiltinCall(result_type, glsl_std_450_instruction_set_,
static_cast<int>(instruction_ordinal),
args);
}
Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
auto& b = *builder_;
Id storage_pointer = 0;
Id storage_type = vec4_float_type_;
spv::StorageClass storage_class;
Id storage_index = 0; // Storage index at lowest level
std::vector<Id> storage_offsets; // Offsets in nested arrays -> storage
// Out of the 512 constant registers pixel shaders get the last 256.
uint32_t storage_base = 0;
if (op.storage_source == InstructionStorageSource::kConstantFloat) {
storage_base = is_pixel_shader() ? 256 : 0;
}
switch (op.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: {
storage_index = b.makeUintConstant(storage_base + op.storage_index);
} break;
case InstructionStorageAddressingMode::kAddressAbsolute: {
// storage_index + a0
storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(storage_base + op.storage_index));
} break;
case InstructionStorageAddressingMode::kAddressRelative: {
// TODO: Based on loop index
// storage_index + aL.x
storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.makeUintConstant(0),
b.makeUintConstant(storage_base + op.storage_index));
} break;
default:
assert_always();
break;
}
switch (op.storage_source) {
case InstructionStorageSource::kRegister:
storage_pointer = registers_ptr_;
storage_class = spv::StorageClass::StorageClassFunction;
storage_type = vec4_float_type_;
storage_offsets.push_back(storage_index);
break;
case InstructionStorageSource::kConstantFloat:
storage_pointer = consts_;
storage_class = spv::StorageClass::StorageClassUniform;
storage_type = vec4_float_type_;
storage_offsets.push_back(b.makeUintConstant(0));
storage_offsets.push_back(storage_index);
break;
case InstructionStorageSource::kVertexFetchConstant:
case InstructionStorageSource::kTextureFetchConstant:
// Should not reach this.
assert_always();
break;
default:
assert_always();
break;
}
if (!storage_pointer) {
return b.createUndefined(vec4_float_type_);
}
storage_pointer =
b.createAccessChain(storage_class, storage_pointer, storage_offsets);
auto storage_value = b.createLoad(storage_pointer);
assert_true(b.getTypeId(storage_value) == vec4_float_type_);
if (op.is_absolute_value) {
storage_value = CreateGlslStd450InstructionCall(
spv::NoPrecision, storage_type, GLSLstd450::kFAbs, {storage_value});
}
if (op.is_negated) {
storage_value =
b.createUnaryOp(spv::Op::OpFNegate, storage_type, storage_value);
}
// swizzle
if (!op.is_standard_swizzle()) {
std::vector<uint32_t> operands;
operands.push_back(storage_value);
operands.push_back(b.makeCompositeConstant(
vec2_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)})));
// Components start from left and are duplicated rightwards
// e.g. count = 1, xxxx / count = 2, xyyy ...
for (int i = 0; i < 4; i++) {
auto swiz = op.components[i];
if (i > op.component_count - 1) {
swiz = op.components[op.component_count - 1];
}
switch (swiz) {
case SwizzleSource::kX:
operands.push_back(0);
break;
case SwizzleSource::kY:
operands.push_back(1);
break;
case SwizzleSource::kZ:
operands.push_back(2);
break;
case SwizzleSource::kW:
operands.push_back(3);
break;
case SwizzleSource::k0:
operands.push_back(4);
break;
case SwizzleSource::k1:
operands.push_back(5);
break;
}
}
storage_value =
b.createOp(spv::Op::OpVectorShuffle, storage_type, operands);
}
return storage_value;
}
void SpirvShaderTranslator::StoreToResult(Id source_value_id,
const InstructionResult& result) {
auto& b = *builder_;
if (result.storage_target == InstructionStorageTarget::kNone) {
// No-op?
return;
}
if (!result.has_any_writes()) {
return;
}
Id storage_pointer = 0;
Id storage_type = vec4_float_type_;
spv::StorageClass storage_class;
Id storage_index = 0; // Storage index at lowest level
std::vector<Id> storage_offsets; // Offsets in nested arrays -> storage
switch (result.storage_addressing_mode) {
case InstructionStorageAddressingMode::kStatic: {
storage_index = b.makeUintConstant(result.storage_index);
} break;
case InstructionStorageAddressingMode::kAddressAbsolute: {
// storage_index + a0
storage_index =
b.createBinOp(spv::Op::OpIAdd, uint_type_, b.createLoad(a0_),
b.makeUintConstant(result.storage_index));
} break;
case InstructionStorageAddressingMode::kAddressRelative: {
// storage_index + aL.x
// TODO
} break;
default:
assert_always();
return;
}
bool storage_array;
switch (result.storage_target) {
case InstructionStorageTarget::kRegister:
storage_pointer = registers_ptr_;
storage_class = spv::StorageClass::StorageClassFunction;
storage_type = vec4_float_type_;
storage_offsets.push_back(storage_index);
storage_array = true;
break;
case InstructionStorageTarget::kInterpolant:
assert_true(is_vertex_shader());
storage_pointer = interpolators_;
storage_class = spv::StorageClass::StorageClassOutput;
storage_type = vec4_float_type_;
storage_offsets.push_back(storage_index);
storage_array = true;
break;
case InstructionStorageTarget::kPosition:
assert_true(is_vertex_shader());
assert_not_zero(pos_);
storage_pointer = pos_;
storage_class = spv::StorageClass::StorageClassOutput;
storage_type = vec4_float_type_;
storage_offsets.push_back(0);
storage_array = false;
break;
case InstructionStorageTarget::kPointSize:
assert_true(is_vertex_shader());
// TODO(benvanik): result.storage_index
break;
case InstructionStorageTarget::kColorTarget:
assert_true(is_pixel_shader());
assert_not_zero(frag_outputs_);
storage_pointer = frag_outputs_;
storage_class = spv::StorageClass::StorageClassOutput;
storage_type = vec4_float_type_;
storage_offsets.push_back(storage_index);
storage_array = true;
break;
case InstructionStorageTarget::kDepth:
assert_true(is_pixel_shader());
storage_pointer = frag_depth_;
storage_class = spv::StorageClass::StorageClassOutput;
storage_type = float_type_;
storage_offsets.push_back(0);
storage_array = false;
break;
case InstructionStorageTarget::kNone:
assert_unhandled_case(result.storage_target);
break;
}
if (!storage_pointer) {
// assert_always();
return;
}
if (storage_array) {
storage_pointer =
b.createAccessChain(storage_class, storage_pointer, storage_offsets);
}
// Only load from storage if we need it later.
Id storage_value = 0;
if (!result.has_all_writes()) {
storage_value = b.createLoad(storage_pointer);
}
// Clamp the input value.
if (result.is_clamped) {
source_value_id = CreateGlslStd450InstructionCall(
spv::NoPrecision, b.getTypeId(source_value_id),
spv::GLSLstd450::kFClamp,
{source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)});
}
// Convert to the appropriate type, if needed.
if (b.getTypeId(source_value_id) != storage_type) {
std::vector<Id> constituents;
auto n_el = b.getNumComponents(source_value_id);
auto n_dst = b.getNumTypeComponents(storage_type);
assert_true(n_el < n_dst);
if (n_el == 1) {
// Smear scalar.
for (int i = 0; i < n_dst; i++) {
constituents.push_back(source_value_id);
}
} else {
// FIXME: This may not work as intended.
constituents.push_back(source_value_id);
for (int i = n_el; i < n_dst; i++) {
// Pad with zeroes.
constituents.push_back(b.makeFloatConstant(0.f));
}
}
source_value_id =
b.createConstructor(spv::NoPrecision, constituents, storage_type);
}
// swizzle
if (!result.is_standard_swizzle()) {
std::vector<uint32_t> operands;
operands.push_back(source_value_id);
operands.push_back(b.makeCompositeConstant(
vec2_float_type_,
std::vector<Id>({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)})));
// Components start from left and are duplicated rightwards
// e.g. count = 1, xxxx / count = 2, xyyy ...
for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) {
auto swiz = result.components[i];
if (!result.write_mask[i]) {
// Undefined / don't care.
operands.push_back(0);
continue;
}
switch (swiz) {
case SwizzleSource::kX:
operands.push_back(0);
break;
case SwizzleSource::kY:
operands.push_back(1);
break;
case SwizzleSource::kZ:
operands.push_back(2);
break;
case SwizzleSource::kW:
operands.push_back(3);
break;
case SwizzleSource::k0:
operands.push_back(4);
break;
case SwizzleSource::k1:
operands.push_back(5);
break;
}
}
source_value_id =
b.createOp(spv::Op::OpVectorShuffle, storage_type, operands);
}
// write mask
if (!result.has_all_writes()) {
std::vector<uint32_t> operands;
operands.push_back(source_value_id);
operands.push_back(storage_value);
for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) {
operands.push_back(
result.write_mask[i] ? i : b.getNumComponents(source_value_id) + i);
}
source_value_id =
b.createOp(spv::Op::OpVectorShuffle, storage_type, operands);
}
// Perform store into the pointer.
assert_true(b.getNumComponents(source_value_id) ==
b.getNumTypeComponents(storage_type));
assert_true(b.getTypeId(source_value_id) ==
b.getDerefTypeId(storage_pointer));
b.createStore(source_value_id, storage_pointer);
}
} // namespace gpu
} // namespace xe