SPIR-V: Rewrite basic control-flow to use a while loop paired with a switch statement

This commit is contained in:
Dr. Chat 2016-09-05 16:57:02 -05:00
parent 52c75c8dbc
commit 300d1c57ba
7 changed files with 308 additions and 75 deletions

View File

@ -396,7 +396,7 @@ void GlslShaderTranslator::ProcessLabel(uint32_t cf_index) {
} }
} }
void GlslShaderTranslator::ProcessControlFlowNopInstruction() { void GlslShaderTranslator::ProcessControlFlowNopInstruction(uint32_t cf_index) {
EmitSource("// cnop\n"); EmitSource("// cnop\n");
} }

View File

@ -24,6 +24,7 @@ class GlslShaderTranslator : public ShaderTranslator {
public: public:
enum class Dialect { enum class Dialect {
kGL45, kGL45,
kVulkan,
}; };
GlslShaderTranslator(Dialect dialect); GlslShaderTranslator(Dialect dialect);
@ -39,7 +40,7 @@ class GlslShaderTranslator : public ShaderTranslator {
std::vector<uint8_t> CompleteTranslation() override; std::vector<uint8_t> CompleteTranslation() override;
void ProcessLabel(uint32_t cf_index) override; void ProcessLabel(uint32_t cf_index) override;
void ProcessControlFlowNopInstruction() override; void ProcessControlFlowNopInstruction(uint32_t cf_index) override;
void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; void ProcessControlFlowInstructionBegin(uint32_t cf_index) override;
void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; void ProcessControlFlowInstructionEnd(uint32_t cf_index) override;
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;

View File

@ -368,7 +368,8 @@ bool ShaderTranslator::TranslateBlocks() {
// This is what freedreno does. // This is what freedreno does.
uint32_t max_cf_dword_index = static_cast<uint32_t>(ucode_dword_count_); uint32_t max_cf_dword_index = static_cast<uint32_t>(ucode_dword_count_);
std::set<uint32_t> label_addresses; std::set<uint32_t> label_addresses;
for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) { std::vector<ControlFlowInstruction> cf_instructions;
for (uint32_t i = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a; ControlFlowInstruction cf_a;
ControlFlowInstruction cf_b; ControlFlowInstruction cf_b;
UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b); UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b);
@ -383,12 +384,12 @@ bool ShaderTranslator::TranslateBlocks() {
AddControlFlowTargetLabel(cf_a, &label_addresses); AddControlFlowTargetLabel(cf_a, &label_addresses);
AddControlFlowTargetLabel(cf_b, &label_addresses); AddControlFlowTargetLabel(cf_b, &label_addresses);
PreProcessControlFlowInstruction(cf_index, cf_a); cf_instructions.push_back(cf_a);
++cf_index; cf_instructions.push_back(cf_b);
PreProcessControlFlowInstruction(cf_index, cf_b);
++cf_index;
} }
PreProcessControlFlowInstructions(cf_instructions);
// Translate all instructions. // Translate all instructions.
for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) { for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a; ControlFlowInstruction cf_a;
@ -491,7 +492,7 @@ void ShaderTranslator::TranslateControlFlowNop(
const ControlFlowInstruction& cf) { const ControlFlowInstruction& cf) {
ucode_disasm_buffer_.Append(" cnop\n"); ucode_disasm_buffer_.Append(" cnop\n");
ProcessControlFlowNopInstruction(); ProcessControlFlowNopInstruction(cf_index_);
} }
void ShaderTranslator::TranslateControlFlowExec( void ShaderTranslator::TranslateControlFlowExec(
@ -1065,7 +1066,9 @@ void ParseAluInstructionOperand(const AluInstruction& op, int i,
uint32_t b = ((swizzle >> 0) + 0) & 0x3; uint32_t b = ((swizzle >> 0) + 0) & 0x3;
out_op->components[0] = GetSwizzleFromComponentIndex(a); out_op->components[0] = GetSwizzleFromComponentIndex(a);
out_op->components[1] = GetSwizzleFromComponentIndex(b); out_op->components[1] = GetSwizzleFromComponentIndex(b);
} else { } else if (swizzle_component_count == 3) {
assert_always();
} else if (swizzle_component_count == 4) {
for (int j = 0; j < swizzle_component_count; ++j, swizzle >>= 2) { for (int j = 0; j < swizzle_component_count; ++j, swizzle >>= 2) {
out_op->components[j] = GetSwizzleFromComponentIndex((swizzle + j) & 0x3); out_op->components[j] = GetSwizzleFromComponentIndex((swizzle + j) & 0x3);
} }
@ -1316,8 +1319,8 @@ void ShaderTranslator::ParseAluScalarInstruction(
&i.operands[0]); &i.operands[0]);
} else { } else {
uint32_t src3_swizzle = op.src_swizzle(3); uint32_t src3_swizzle = op.src_swizzle(3);
uint32_t swiz_a = ((src3_swizzle >> 6) - 1) & 0x3; uint32_t swiz_a = ((src3_swizzle >> 6) + 3) & 0x3;
uint32_t swiz_b = (src3_swizzle & 0x3); uint32_t swiz_b = ((src3_swizzle >> 0) + 0) & 0x3;
uint32_t reg2 = (static_cast<int>(op.scalar_opcode()) & 1) | uint32_t reg2 = (static_cast<int>(op.scalar_opcode()) & 1) |
(src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1); (src3_swizzle & 0x3C) | (op.src_is_temp(3) << 1);
int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0; int const_slot = (op.src_is_temp(1) || op.src_is_temp(2)) ? 1 : 0;

View File

@ -82,8 +82,8 @@ class ShaderTranslator {
} }
// Pre-process a control-flow instruction before anything else. // Pre-process a control-flow instruction before anything else.
virtual void PreProcessControlFlowInstruction( virtual void PreProcessControlFlowInstructions(
uint32_t cf_index, const ucode::ControlFlowInstruction& instr) {} std::vector<ucode::ControlFlowInstruction> instrs) {}
// Handles translation for control flow label addresses. // Handles translation for control flow label addresses.
// This is triggered once for each label required (due to control flow // This is triggered once for each label required (due to control flow
@ -91,7 +91,7 @@ class ShaderTranslator {
virtual void ProcessLabel(uint32_t cf_index) {} virtual void ProcessLabel(uint32_t cf_index) {}
// Handles translation for control flow nop instructions. // Handles translation for control flow nop instructions.
virtual void ProcessControlFlowNopInstruction() {} virtual void ProcessControlFlowNopInstruction(uint32_t cf_index) {}
// Handles the start of a control flow instruction at the given address. // Handles the start of a control flow instruction at the given address.
virtual void ProcessControlFlowInstructionBegin(uint32_t cf_index) {} virtual void ProcessControlFlowInstructionBegin(uint32_t cf_index) {}
// Handles the end of a control flow instruction that began at the given // Handles the end of a control flow instruction that began at the given

View File

@ -103,6 +103,8 @@ void SpirvShaderTranslator::StartTranslation() {
"ps"); "ps");
pv_ = b.createVariable(spv::StorageClass::StorageClassFunction, pv_ = b.createVariable(spv::StorageClass::StorageClassFunction,
vec4_float_type_, "pv"); vec4_float_type_, "pv");
pc_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_,
"pc");
a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_, a0_ = b.createVariable(spv::StorageClass::StorageClassFunction, int_type_,
"a0"); "a0");
@ -219,6 +221,7 @@ void SpirvShaderTranslator::StartTranslation() {
for (const auto& binding : vertex_bindings()) { for (const auto& binding : vertex_bindings()) {
for (const auto& attrib : binding.attributes) { for (const auto& attrib : binding.attributes) {
Id attrib_type = 0; Id attrib_type = 0;
bool is_signed = attrib.fetch_instr.attributes.is_signed;
switch (attrib.fetch_instr.attributes.data_format) { switch (attrib.fetch_instr.attributes.data_format) {
case VertexFormat::k_32: case VertexFormat::k_32:
case VertexFormat::k_32_FLOAT: case VertexFormat::k_32_FLOAT:
@ -230,8 +233,6 @@ void SpirvShaderTranslator::StartTranslation() {
case VertexFormat::k_32_32_FLOAT: case VertexFormat::k_32_32_FLOAT:
attrib_type = vec2_float_type_; attrib_type = vec2_float_type_;
break; break;
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
case VertexFormat::k_32_32_32_FLOAT: case VertexFormat::k_32_32_32_FLOAT:
attrib_type = vec3_float_type_; attrib_type = vec3_float_type_;
break; break;
@ -243,6 +244,11 @@ void SpirvShaderTranslator::StartTranslation() {
case VertexFormat::k_32_32_32_32_FLOAT: case VertexFormat::k_32_32_32_32_FLOAT:
attrib_type = vec4_float_type_; attrib_type = vec4_float_type_;
break; break;
case VertexFormat::k_10_11_11:
case VertexFormat::k_11_11_10:
// Manually converted.
attrib_type = is_signed ? int_type_ : uint_type_;
break;
default: default:
assert_always(); assert_always();
} }
@ -387,15 +393,44 @@ void SpirvShaderTranslator::StartTranslation() {
ifb.makeEndIf(); ifb.makeEndIf();
} }
b.createStore(b.makeIntConstant(0x0), pc_);
loop_head_block_ = &b.makeNewBlock();
auto block = &b.makeNewBlock();
loop_body_block_ = &b.makeNewBlock();
loop_cont_block_ = &b.makeNewBlock();
loop_exit_block_ = &b.makeNewBlock();
b.createBranch(loop_head_block_);
// Setup continue block
b.setBuildPoint(loop_cont_block_);
b.createBranch(loop_head_block_);
// While loop header block
b.setBuildPoint(loop_head_block_);
b.createLoopMerge(loop_exit_block_, loop_cont_block_,
spv::LoopControlMask::LoopControlDontUnrollMask);
b.createBranch(block);
// Condition block
b.setBuildPoint(block);
// while (pc != 0xFFFF)
auto c = b.createBinOp(spv::Op::OpINotEqual, bool_type_, b.createLoad(pc_),
b.makeIntConstant(0xFFFF));
b.createConditionalBranch(c, loop_body_block_, loop_exit_block_);
b.setBuildPoint(loop_body_block_);
} }
std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() { std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
auto& b = *builder_; auto& b = *builder_;
assert_false(open_predicated_block_); assert_false(open_predicated_block_);
auto block = &b.makeNewBlock(); b.setBuildPoint(loop_exit_block_);
b.createBranch(block);
b.makeReturn(false); b.makeReturn(false);
exec_cond_ = false;
exec_skip_block_ = nullptr;
// main() entry point. // main() entry point.
auto mainFn = b.makeMain(); auto mainFn = b.makeMain();
@ -411,6 +446,9 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
mainFn, "main"); mainFn, "main");
b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft); b.addExecutionMode(mainFn, spv::ExecutionModeOriginUpperLeft);
// FIXME(DrChat): We need to declare the DepthReplacing execution mode if
// we write depth, and we must unconditionally write depth if declared!
for (auto id : interface_ids_) { for (auto id : interface_ids_) {
entry->addIdOperand(id); entry->addIdOperand(id);
} }
@ -527,12 +565,17 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
b.makeReturn(false); b.makeReturn(false);
// Compile the spv IR // Compile the spv IR
compiler_.Compile(b.getModule()); // compiler_.Compile(b.getModule());
std::vector<uint32_t> spirv_words; std::vector<uint32_t> spirv_words;
b.dump(spirv_words); b.dump(spirv_words);
// Cleanup builder. // Cleanup builder.
cf_blocks_.clear();
loop_head_block_ = nullptr;
loop_body_block_ = nullptr;
loop_cont_block_ = nullptr;
loop_exit_block_ = nullptr;
builder_.reset(); builder_.reset();
interface_ids_.clear(); interface_ids_.clear();
@ -568,30 +611,68 @@ void SpirvShaderTranslator::PostTranslation(Shader* shader) {
} }
} }
void SpirvShaderTranslator::PreProcessControlFlowInstruction( void SpirvShaderTranslator::PreProcessControlFlowInstructions(
uint32_t cf_index, const ControlFlowInstruction& instr) { std::vector<ucode::ControlFlowInstruction> instrs) {
auto& b = *builder_; auto& b = *builder_;
if (cf_blocks_.find(cf_index) == cf_blocks_.end()) { auto default_block = &b.makeNewBlock();
CFBlock block; switch_break_block_ = &b.makeNewBlock();
block.block = &b.makeNewBlock();
cf_blocks_[cf_index] = block; b.setBuildPoint(default_block);
} else { b.createStore(b.makeIntConstant(0xFFFF), pc_);
cf_blocks_[cf_index].block = &b.makeNewBlock(); b.createBranch(switch_break_block_);
b.setBuildPoint(switch_break_block_);
b.createBranch(loop_cont_block_);
// Now setup the switch.
default_block->addPredecessor(loop_body_block_);
b.setBuildPoint(loop_body_block_);
cf_blocks_.resize(instrs.size());
for (size_t i = 0; i < cf_blocks_.size(); i++) {
cf_blocks_[i].block = &b.makeNewBlock();
cf_blocks_[i].labelled = false;
} }
if (instr.opcode() == ControlFlowOpcode::kCondJmp) { std::vector<uint32_t> operands;
auto cf_block = cf_blocks_.find(instr.cond_jmp.address()); operands.push_back(b.createLoad(pc_)); // Selector
if (cf_block == cf_blocks_.end()) { operands.push_back(default_block->getId()); // Default
CFBlock block;
block.prev_dominates = false; // Always have a case for block 0.
cf_blocks_[instr.cond_jmp.address()] = block; operands.push_back(0);
} else { operands.push_back(cf_blocks_[0].block->getId());
cf_block->second.prev_dominates = false; cf_blocks_[0].block->addPredecessor(loop_body_block_);
cf_blocks_[0].labelled = true;
for (size_t i = 0; i < instrs.size(); i++) {
auto& instr = instrs[i];
if (instr.opcode() == ucode::ControlFlowOpcode::kCondJmp) {
uint32_t address = instr.cond_jmp.address();
cf_blocks_[address].labelled = true;
operands.push_back(address);
operands.push_back(cf_blocks_[address].block->getId());
cf_blocks_[address].block->addPredecessor(loop_body_block_);
} else if (instr.opcode() == ucode::ControlFlowOpcode::kLoopStart) {
uint32_t address = instr.loop_start.address();
cf_blocks_[address].labelled = true;
operands.push_back(address);
operands.push_back(cf_blocks_[address].block->getId());
cf_blocks_[address].block->addPredecessor(loop_body_block_);
} else if (instr.opcode() == ucode::ControlFlowOpcode::kLoopEnd) {
uint32_t address = instr.loop_end.address();
cf_blocks_[address].labelled = true;
operands.push_back(address);
operands.push_back(cf_blocks_[address].block->getId());
cf_blocks_[address].block->addPredecessor(loop_body_block_);
} }
} else if (instr.opcode() == ControlFlowOpcode::kLoopStart) {
// TODO
} }
b.createSelectionMerge(switch_break_block_, 0);
b.createNoResultOp(spv::Op::OpSwitch, operands);
} }
void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) {
@ -601,11 +682,6 @@ void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) {
void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( void SpirvShaderTranslator::ProcessControlFlowInstructionBegin(
uint32_t cf_index) { uint32_t cf_index) {
auto& b = *builder_; auto& b = *builder_;
if (cf_index == 0) {
// Kind of cheaty, but emit a branch to the first block.
b.createBranch(cf_blocks_[cf_index].block);
}
} }
void SpirvShaderTranslator::ProcessControlFlowInstructionEnd( void SpirvShaderTranslator::ProcessControlFlowInstructionEnd(
@ -613,10 +689,18 @@ void SpirvShaderTranslator::ProcessControlFlowInstructionEnd(
auto& b = *builder_; auto& b = *builder_;
} }
void SpirvShaderTranslator::ProcessControlFlowNopInstruction() { void SpirvShaderTranslator::ProcessControlFlowNopInstruction(
uint32_t cf_index) {
auto& b = *builder_; auto& b = *builder_;
// b.createNoResultOp(spv::Op::OpNop); auto head = cf_blocks_[cf_index].block;
b.setBuildPoint(head);
b.createNoResultOp(spv::Op::OpNop);
if (cf_blocks_.size() > cf_index + 1) {
b.createBranch(cf_blocks_[cf_index + 1].block);
} else {
b.makeReturn(false);
}
} }
void SpirvShaderTranslator::ProcessExecInstructionBegin( void SpirvShaderTranslator::ProcessExecInstructionBegin(
@ -635,6 +719,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin(
switch (instr.type) { switch (instr.type) {
case ParsedExecInstruction::Type::kUnconditional: { case ParsedExecInstruction::Type::kUnconditional: {
// No need to do anything. // No need to do anything.
exec_cond_ = false;
} break; } break;
case ParsedExecInstruction::Type::kConditional: { case ParsedExecInstruction::Type::kConditional: {
// Based off of bool_consts // Based off of bool_consts
@ -665,27 +750,34 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin(
// Conditional branch // Conditional branch
assert_true(cf_blocks_.size() > instr.dword_index + 1); assert_true(cf_blocks_.size() > instr.dword_index + 1);
body = &b.makeNewBlock(); body = &b.makeNewBlock();
exec_cond_ = true;
exec_skip_block_ = &b.makeNewBlock();
auto next_block = cf_blocks_[instr.dword_index + 1]; b.createSelectionMerge(
if (next_block.prev_dominates) { exec_skip_block_,
b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone); spv::SelectionControlMask::SelectionControlMaskNone);
} b.createConditionalBranch(cond, body, exec_skip_block_);
b.createConditionalBranch(cond, body, next_block.block);
b.setBuildPoint(exec_skip_block_);
b.createBranch(cf_blocks_[instr.dword_index + 1].block);
} break; } break;
case ParsedExecInstruction::Type::kPredicated: { case ParsedExecInstruction::Type::kPredicated: {
// Branch based on p0. // Branch based on p0.
assert_true(cf_blocks_.size() > instr.dword_index + 1); assert_true(cf_blocks_.size() > instr.dword_index + 1);
body = &b.makeNewBlock(); body = &b.makeNewBlock();
exec_cond_ = true;
exec_skip_block_ = &b.makeNewBlock();
auto cond = auto cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.condition)); b.makeBoolConstant(instr.condition));
b.createSelectionMerge(
exec_skip_block_,
spv::SelectionControlMask::SelectionControlMaskNone);
b.createConditionalBranch(cond, body, exec_skip_block_);
auto next_block = cf_blocks_[instr.dword_index + 1]; b.setBuildPoint(exec_skip_block_);
if (next_block.prev_dominates) { b.createBranch(cf_blocks_[instr.dword_index + 1].block);
b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
}
b.createConditionalBranch(cond, body, next_block.block);
} break; } break;
} }
b.setBuildPoint(body); b.setBuildPoint(body);
@ -705,6 +797,8 @@ void SpirvShaderTranslator::ProcessExecInstructionEnd(
if (instr.is_end) { if (instr.is_end) {
b.makeReturn(false); b.makeReturn(false);
} else if (exec_cond_) {
b.createBranch(exec_skip_block_);
} else { } else {
assert_true(cf_blocks_.size() > instr.dword_index + 1); assert_true(cf_blocks_.size() > instr.dword_index + 1);
b.createBranch(cf_blocks_[instr.dword_index + 1].block); b.createBranch(cf_blocks_[instr.dword_index + 1].block);
@ -779,7 +873,8 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
b.setBuildPoint(head); b.setBuildPoint(head);
switch (instr.type) { switch (instr.type) {
case ParsedJumpInstruction::Type::kUnconditional: { case ParsedJumpInstruction::Type::kUnconditional: {
b.createBranch(cf_blocks_[instr.target_address].block); b.createStore(b.makeIntConstant(instr.target_address), pc_);
b.createBranch(switch_break_block_);
} break; } break;
case ParsedJumpInstruction::Type::kConditional: { case ParsedJumpInstruction::Type::kConditional: {
assert_true(cf_blocks_.size() > instr.dword_index + 1); assert_true(cf_blocks_.size() > instr.dword_index + 1);
@ -810,8 +905,11 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual, instr.condition ? spv::Op::OpINotEqual : spv::Op::OpIEqual,
bool_type_, v, b.makeUintConstant(0)); bool_type_, v, b.makeUintConstant(0));
b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block, auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond,
cf_blocks_[instr.dword_index + 1].block); b.makeIntConstant(instr.target_address),
b.makeIntConstant(instr.dword_index + 1));
b.createStore(next_pc, pc_);
b.createBranch(switch_break_block_);
} break; } break;
case ParsedJumpInstruction::Type::kPredicated: { case ParsedJumpInstruction::Type::kPredicated: {
assert_true(cf_blocks_.size() > instr.dword_index + 1); assert_true(cf_blocks_.size() > instr.dword_index + 1);
@ -819,8 +917,12 @@ void SpirvShaderTranslator::ProcessJumpInstruction(
auto cond = auto cond =
b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_), b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, b.createLoad(p0_),
b.makeBoolConstant(instr.condition)); b.makeBoolConstant(instr.condition));
b.createConditionalBranch(cond, cf_blocks_[instr.target_address].block,
cf_blocks_[instr.dword_index + 1].block); auto next_pc = b.createTriOp(spv::Op::OpSelect, int_type_, cond,
b.makeIntConstant(instr.target_address),
b.makeIntConstant(instr.dword_index + 1));
b.createStore(next_pc, pc_);
b.createBranch(switch_break_block_);
} break; } break;
} }
} }
@ -854,6 +956,43 @@ void SpirvShaderTranslator::ProcessAllocInstruction(
b.createBranch(cf_blocks_[instr.dword_index + 1].block); b.createBranch(cf_blocks_[instr.dword_index + 1].block);
} }
spv::Id SpirvShaderTranslator::BitfieldExtract(spv::Id result_type,
spv::Id base, bool is_signed,
uint32_t offset,
uint32_t count) {
auto& b = *builder_;
spv::Id base_type = b.getTypeId(base);
// <-- 32 - (offset + count) ------ [bits] -?-
base = b.createBinOp(spv::Op::OpShiftLeftLogical, base_type, base,
b.makeUintConstant(32 - (offset + count)));
// [bits] -?-?-?---------------------------
auto op = is_signed ? spv::Op::OpShiftRightArithmetic
: spv::Op::OpShiftRightLogical;
base = b.createBinOp(op, base_type, base, b.makeUintConstant(32 - count));
return base;
}
spv::Id SpirvShaderTranslator::ConvertNormVar(spv::Id var, spv::Id result_type,
uint32_t bits, bool is_signed) {
auto& b = *builder_;
if (is_signed) {
auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, var,
b.makeFloatConstant(-float(1 << (bits - 1))));
auto v = b.createBinOp(spv::Op::OpFDiv, result_type, var,
b.makeFloatConstant(float((1 << (bits - 1)) - 1)));
var = b.createTriOp(spv::Op::OpSelect, result_type, c,
b.makeFloatConstant(-1.f), v);
} else {
var = b.createBinOp(spv::Op::OpFDiv, result_type, var,
b.makeFloatConstant(float((1 << bits) - 1)));
}
return var;
}
void SpirvShaderTranslator::ProcessVertexFetchInstruction( void SpirvShaderTranslator::ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) { const ParsedVertexFetchInstruction& instr) {
auto& b = *builder_; auto& b = *builder_;
@ -894,6 +1033,9 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
vertex_idx = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_idx); vertex_idx = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_idx);
auto shader_vertex_idx = b.createLoad(vertex_idx_); auto shader_vertex_idx = b.createLoad(vertex_idx_);
auto vertex_components =
GetVertexFormatComponentCount(instr.attributes.data_format);
// Skip loading if it's an indexed fetch. // Skip loading if it's an indexed fetch.
auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index] auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index]
[instr.attributes.offset]; [instr.attributes.offset];
@ -902,7 +1044,6 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_idx, auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_idx,
shader_vertex_idx); shader_vertex_idx);
auto vertex_components = b.getNumComponents(vertex);
Id alt_vertex = 0; Id alt_vertex = 0;
switch (vertex_components) { switch (vertex_components) {
case 1: case 1:
@ -949,11 +1090,79 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
break; break;
case VertexFormat::k_10_11_11: { case VertexFormat::k_10_11_11: {
// No conversion needed. Natively supported. // This needs to be converted.
bool is_signed = instr.attributes.is_signed;
auto op =
is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract;
auto comp_type = is_signed ? int_type_ : uint_type_;
assert_true(comp_type == b.getTypeId(vertex));
spv::Id components[3] = {0};
/*
components[2] = b.createTriOp(
op, comp_type, vertex, b.makeUintConstant(0), b.makeUintConstant(10));
components[1] =
b.createTriOp(op, comp_type, vertex, b.makeUintConstant(10),
b.makeUintConstant(11));
components[0] =
b.createTriOp(op, comp_type, vertex, b.makeUintConstant(21),
b.makeUintConstant(11));
*/
// Workaround until NVIDIA fixes their compiler :|
components[0] = BitfieldExtract(comp_type, vertex, is_signed, 00, 10);
components[1] = BitfieldExtract(comp_type, vertex, is_signed, 10, 11);
components[2] = BitfieldExtract(comp_type, vertex, is_signed, 21, 11);
op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF;
for (int i = 0; i < 3; i++) {
components[i] = b.createUnaryOp(op, float_type_, components[i]);
}
components[0] = ConvertNormVar(components[0], float_type_, 11, is_signed);
components[1] = ConvertNormVar(components[1], float_type_, 11, is_signed);
components[2] = ConvertNormVar(components[2], float_type_, 10, is_signed);
vertex = b.createCompositeConstruct(
vec3_float_type_,
std::vector<Id>({components[0], components[1], components[2]}));
} break; } break;
case VertexFormat::k_11_11_10: { case VertexFormat::k_11_11_10: {
// This needs to be converted. // This needs to be converted.
bool is_signed = instr.attributes.is_signed;
auto op =
is_signed ? spv::Op::OpBitFieldSExtract : spv::Op::OpBitFieldUExtract;
auto comp_type = is_signed ? int_type_ : uint_type_;
spv::Id components[3] = {0};
/*
components[2] = b.createTriOp(
op, comp_type, vertex, b.makeUintConstant(0), b.makeUintConstant(11));
components[1] =
b.createTriOp(op, comp_type, vertex, b.makeUintConstant(11),
b.makeUintConstant(11));
components[0] =
b.createTriOp(op, comp_type, vertex, b.makeUintConstant(22),
b.makeUintConstant(10));
*/
// Workaround until NVIDIA fixes their compiler :|
components[0] = BitfieldExtract(comp_type, vertex, is_signed, 00, 11);
components[1] = BitfieldExtract(comp_type, vertex, is_signed, 11, 11);
components[2] = BitfieldExtract(comp_type, vertex, is_signed, 22, 10);
op = is_signed ? spv::Op::OpConvertSToF : spv::Op::OpConvertUToF;
for (int i = 0; i < 3; i++) {
components[i] = b.createUnaryOp(op, float_type_, components[i]);
}
components[0] = ConvertNormVar(components[0], float_type_, 11, is_signed);
components[1] = ConvertNormVar(components[1], float_type_, 11, is_signed);
components[2] = ConvertNormVar(components[2], float_type_, 10, is_signed);
vertex = b.createCompositeConstruct(
vec3_float_type_,
std::vector<Id>({components[0], components[1], components[2]}));
} break; } break;
} }

View File

@ -56,12 +56,12 @@ class SpirvShaderTranslator : public ShaderTranslator {
std::vector<uint8_t> CompleteTranslation() override; std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation(Shader* shader) override; void PostTranslation(Shader* shader) override;
void PreProcessControlFlowInstruction( void PreProcessControlFlowInstructions(
uint32_t cf_index, const ucode::ControlFlowInstruction& instr) override; std::vector<ucode::ControlFlowInstruction> instrs) override;
void ProcessLabel(uint32_t cf_index) override; void ProcessLabel(uint32_t cf_index) override;
void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; void ProcessControlFlowInstructionBegin(uint32_t cf_index) override;
void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; void ProcessControlFlowInstructionEnd(uint32_t cf_index) override;
void ProcessControlFlowNopInstruction() override; void ProcessControlFlowNopInstruction(uint32_t cf_index) override;
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
void ProcessLoopStartInstruction( void ProcessLoopStartInstruction(
@ -84,6 +84,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
void ProcessVectorAluInstruction(const ParsedAluInstruction& instr); void ProcessVectorAluInstruction(const ParsedAluInstruction& instr);
void ProcessScalarAluInstruction(const ParsedAluInstruction& instr); void ProcessScalarAluInstruction(const ParsedAluInstruction& instr);
spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed,
uint32_t offset, uint32_t count);
spv::Id ConvertNormVar(spv::Id var, spv::Id result_type, uint32_t bits,
bool is_signed);
// Creates a call to the given GLSL intrinsic. // Creates a call to the given GLSL intrinsic.
spv::Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( spv::Id SpirvShaderTranslator::CreateGlslStd450InstructionCall(
spv::Decoration precision, spv::Id result_type, spv::Decoration precision, spv::Id result_type,
@ -107,6 +112,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
bool predicated_block_cond_ = false; bool predicated_block_cond_ = false;
spv::Block* predicated_block_end_ = nullptr; spv::Block* predicated_block_end_ = nullptr;
// Exec block conditional?
bool exec_cond_ = false;
spv::Block* exec_skip_block_ = nullptr;
// TODO(benvanik): replace with something better, make reusable, etc. // TODO(benvanik): replace with something better, make reusable, etc.
std::unique_ptr<spv::Builder> builder_; std::unique_ptr<spv::Builder> builder_;
spv::Id glsl_std_450_instruction_set_ = 0; spv::Id glsl_std_450_instruction_set_ = 0;
@ -132,6 +141,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id registers_ptr_ = 0, registers_type_ = 0; spv::Id registers_ptr_ = 0, registers_type_ = 0;
spv::Id consts_ = 0, a0_ = 0, aL_ = 0, p0_ = 0; spv::Id consts_ = 0, a0_ = 0, aL_ = 0, p0_ = 0;
spv::Id ps_ = 0, pv_ = 0; // IDs of previous results spv::Id ps_ = 0, pv_ = 0; // IDs of previous results
spv::Id pc_ = 0; // Program counter
spv::Id pos_ = 0; spv::Id pos_ = 0;
spv::Id push_consts_ = 0; spv::Id push_consts_ = 0;
spv::Id interpolators_ = 0; spv::Id interpolators_ = 0;
@ -148,9 +158,14 @@ class SpirvShaderTranslator : public ShaderTranslator {
struct CFBlock { struct CFBlock {
spv::Block* block = nullptr; spv::Block* block = nullptr;
bool prev_dominates = true; bool labelled = false;
}; };
std::map<uint32_t, CFBlock> cf_blocks_; std::vector<CFBlock> cf_blocks_;
spv::Block* switch_break_block_ = nullptr;
spv::Block* loop_head_block_ = nullptr;
spv::Block* loop_body_block_ = nullptr;
spv::Block* loop_cont_block_ = nullptr;
spv::Block* loop_exit_block_ = nullptr;
}; };
} // namespace gpu } // namespace gpu

View File

@ -896,19 +896,18 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
break; break;
case VertexFormat::k_2_10_10_10: case VertexFormat::k_2_10_10_10:
vertex_attrib_descr.format = is_signed vertex_attrib_descr.format = is_signed
? VK_FORMAT_A2R10G10B10_SNORM_PACK32 ? VK_FORMAT_A2B10G10R10_SNORM_PACK32
: VK_FORMAT_A2R10G10B10_UNORM_PACK32; : VK_FORMAT_A2B10G10R10_UNORM_PACK32;
break; break;
case VertexFormat::k_10_11_11: case VertexFormat::k_10_11_11:
assert_true(is_signed); // Converted in-shader.
vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; vertex_attrib_descr.format =
is_signed ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT;
break; break;
case VertexFormat::k_11_11_10: case VertexFormat::k_11_11_10:
// Converted in-shader. // Converted in-shader.
// TODO(DrChat) vertex_attrib_descr.format =
assert_always(); is_signed ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT;
// vertex_attrib_descr.format = VK_FORMAT_R32_UINT;
vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
break; break;
case VertexFormat::k_16_16: case VertexFormat::k_16_16:
vertex_attrib_descr.format = vertex_attrib_descr.format =
@ -927,14 +926,20 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
vertex_attrib_descr.format = VK_FORMAT_R16G16B16A16_SFLOAT; vertex_attrib_descr.format = VK_FORMAT_R16G16B16A16_SFLOAT;
break; break;
case VertexFormat::k_32: case VertexFormat::k_32:
// FIXME: Is this a NORM format?
assert_always();
vertex_attrib_descr.format = vertex_attrib_descr.format =
is_signed ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT; is_signed ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT;
break; break;
case VertexFormat::k_32_32: case VertexFormat::k_32_32:
// FIXME: Is this a NORM format?
assert_always();
vertex_attrib_descr.format = vertex_attrib_descr.format =
is_signed ? VK_FORMAT_R32G32_SINT : VK_FORMAT_R32G32_UINT; is_signed ? VK_FORMAT_R32G32_SINT : VK_FORMAT_R32G32_UINT;
break; break;
case VertexFormat::k_32_32_32_32: case VertexFormat::k_32_32_32_32:
// FIXME: Is this a NORM format?
assert_always();
vertex_attrib_descr.format = vertex_attrib_descr.format =
is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT; is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT;
break; break;