From 4aff1c19a718e6c53528a03bc709ee977dfc2f58 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sat, 20 Feb 2016 18:44:37 -0600 Subject: [PATCH] (WIP) SPIR-V Shader Translator --- src/xenia/gpu/shader_translator.cc | 40 +- src/xenia/gpu/shader_translator.h | 3 + src/xenia/gpu/spirv_shader_translator.cc | 909 +++++++++++++++++++++-- src/xenia/gpu/spirv_shader_translator.h | 24 + 4 files changed, 918 insertions(+), 58 deletions(-) diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index f117619cd..a89be80f5 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -14,6 +14,7 @@ #include #include +#include "xenia/base/logging.h" #include "xenia/base/math.h" namespace xe { @@ -353,7 +354,7 @@ bool ShaderTranslator::TranslateBlocks() { // This is what freedreno does. uint32_t max_cf_dword_index = static_cast(ucode_dword_count_); std::set label_addresses; - for (uint32_t i = 0; i < max_cf_dword_index; i += 3) { + for (uint32_t i = 0, cf_index = 0; i < max_cf_dword_index; i += 3) { ControlFlowInstruction cf_a; ControlFlowInstruction cf_b; UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b); @@ -367,6 +368,11 @@ bool ShaderTranslator::TranslateBlocks() { } AddControlFlowTargetLabel(cf_a, &label_addresses); AddControlFlowTargetLabel(cf_b, &label_addresses); + + PreProcessControlFlowInstruction(cf_index); + ++cf_index; + PreProcessControlFlowInstruction(cf_index); + ++cf_index; } // Translate all instructions. @@ -666,11 +672,11 @@ void ShaderTranslator::TranslateExecInstructions( static_cast(ucode_dwords_[instr_offset * 3] & 0x1F); if (fetch_opcode == FetchOpcode::kVertexFetch) { auto& op = *reinterpret_cast( - ucode_dwords_ + instr_offset * 3); + ucode_dwords_ + instr_offset * 3); TranslateVertexFetchInstruction(op); } else { auto& op = *reinterpret_cast( - ucode_dwords_ + instr_offset * 3); + ucode_dwords_ + instr_offset * 3); TranslateTextureFetchInstruction(op); } } else { @@ -1114,9 +1120,15 @@ void ShaderTranslator::ParseAluVectorInstruction( i.result.storage_target = InstructionStorageTarget::kPointSize; break; default: - assert_true(dest_num < 16); - i.result.storage_target = InstructionStorageTarget::kInterpolant; - i.result.storage_index = dest_num; + if (dest_num < 16) { + i.result.storage_target = InstructionStorageTarget::kInterpolant; + i.result.storage_index = dest_num; + } else { + // Unimplemented. + // assert_always(); + i.result.storage_target = InstructionStorageTarget::kNone; + i.result.storage_index = 0; + } break; } } else if (is_pixel_shader()) { @@ -1236,9 +1248,19 @@ void ShaderTranslator::ParseAluScalarInstruction( i.result.storage_target = InstructionStorageTarget::kPointSize; break; default: - assert_true(dest_num < 16); - i.result.storage_target = InstructionStorageTarget::kInterpolant; - i.result.storage_index = dest_num; + if (dest_num < 16) { + i.result.storage_target = InstructionStorageTarget::kInterpolant; + i.result.storage_index = dest_num; + } else { + // Unimplemented. + // assert_always(); + XELOGE( + "ShaderTranslator::ParseAluScalarInstruction: Unsupported write " + "to export %d", + dest_num); + i.result.storage_target = InstructionStorageTarget::kNone; + i.result.storage_index = 0; + } break; } } else if (is_pixel_shader()) { diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 21bae4a53..d1b27a997 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -78,6 +78,9 @@ class ShaderTranslator { shader->host_disassembly_ = std::move(value); } + // Pre-process a control-flow instruction before anything else. + virtual void PreProcessControlFlowInstruction(uint32_t cf_index) {} + // Handles translation for control flow label addresses. // This is triggered once for each label required (due to control flow // operations) before any of the instructions within the target exec. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 52848cedd..2ea0ff89c 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -15,6 +15,7 @@ namespace xe { namespace gpu { +using namespace ucode; using spv::GLSLstd450; using spv::Id; @@ -62,10 +63,155 @@ void SpirvShaderTranslator::StartTranslation() { b.addExecutionMode(mainFn, spv::ExecutionMode::ExecutionModeXfb); } - auto float_1_0 = b.makeFloatConstant(2.0f); - auto acos = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationInvariant, b.makeFloatType(32), - GLSLstd450::kAcos, {float_1_0}); + bool_type_ = b.makeBoolType(); + float_type_ = b.makeFloatType(32); + Id uint_type = b.makeUintType(32); + vec2_float_type_ = b.makeVectorType(float_type_, 2); + vec3_float_type_ = b.makeVectorType(float_type_, 3); + vec4_float_type_ = b.makeVectorType(float_type_, 4); + vec4_uint_type_ = b.makeVectorType(uint_type, 4); + vec4_bool_type_ = b.makeVectorType(bool_type_, 4); + + vec4_float_one_ = b.makeCompositeConstant( + vec4_float_type_, + std::vector({b.makeFloatConstant(1.f), b.makeFloatConstant(1.f), + b.makeFloatConstant(1.f), b.makeFloatConstant(1.f)})); + vec4_float_zero_ = b.makeCompositeConstant( + vec4_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(0.f), + b.makeFloatConstant(0.f), b.makeFloatConstant(0.f)})); + + registers_type_ = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(64), 0); + registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + registers_type_, "r"); + + aL_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + vec4_uint_type_, "aL"); + + p0_ = b.createVariable(spv::StorageClass::StorageClassPrivate, bool_type_, + "p0"); + ps_ = b.createVariable(spv::StorageClass::StorageClassPrivate, float_type_, + "ps"); + pv_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + vec4_float_type_, "pv"); + a0_ = b.createVariable(spv::StorageClass::StorageClassPrivate, + b.makeUintType(32), "a0"); + + // Uniform constants. + Id float_consts_type = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(256), 1); + Id loop_consts_type = + b.makeArrayType(b.makeUintType(32), b.makeUintConstant(32), 1); + Id bool_consts_type = + b.makeArrayType(b.makeUintType(32), b.makeUintConstant(8), 1); + + Id consts_struct_type = b.makeStructType( + {float_consts_type, loop_consts_type, bool_consts_type}, "consts_type"); + b.addDecoration(consts_struct_type, spv::Decoration::DecorationBlock); + + // Constants member decorations + b.addMemberDecoration(consts_struct_type, 0, + spv::Decoration::DecorationOffset, 0); + b.addMemberDecoration(consts_struct_type, 0, + spv::Decoration::DecorationArrayStride, + 4 * sizeof(float)); + + b.addMemberDecoration(consts_struct_type, 1, + spv::Decoration::DecorationOffset, + 256 * 4 * sizeof(float)); + b.addMemberDecoration(consts_struct_type, 1, + spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + + b.addMemberDecoration(consts_struct_type, 2, + spv::Decoration::DecorationOffset, + 256 * 4 * sizeof(float) + 32 * sizeof(uint32_t)); + b.addMemberDecoration(consts_struct_type, 2, + spv::Decoration::DecorationArrayStride, + sizeof(uint32_t)); + + consts_ = b.createVariable(spv::StorageClass::StorageClassUniform, + consts_struct_type, "consts"); + + b.addDecoration(consts_, spv::Decoration::DecorationDescriptorSet, 0); + if (is_vertex_shader()) { + b.addDecoration(consts_, spv::Decoration::DecorationBinding, 0); + } else if (is_pixel_shader()) { + b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1); + } + + // Interpolators + Id interpolators_type = + b.makeArrayType(vec4_float_type_, b.makeUintConstant(16), 0); + if (is_vertex_shader()) { + // Vertex inputs/outputs + for (const auto& binding : vertex_bindings()) { + for (const auto& attrib : binding.attributes) { + Id attrib_type = 0; + switch (attrib.fetch_instr.attributes.data_format) { + case VertexFormat::k_32: + case VertexFormat::k_32_FLOAT: + attrib_type = float_type_; + break; + case VertexFormat::k_16_16: + case VertexFormat::k_32_32: + case VertexFormat::k_16_16_FLOAT: + case VertexFormat::k_32_32_FLOAT: + attrib_type = vec2_float_type_; + break; + case VertexFormat::k_10_11_11: + case VertexFormat::k_11_11_10: + case VertexFormat::k_32_32_32_FLOAT: + attrib_type = vec3_float_type_; + break; + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_2_10_10_10: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32_32_32_32_FLOAT: + attrib_type = vec4_float_type_; + break; + default: + assert_always(); + } + + auto attrib_var = b.createVariable( + spv::StorageClass::StorageClassInput, attrib_type, + xe::format_string("vf%d_%d", binding.fetch_constant, + attrib.fetch_instr.attributes.offset) + .c_str()); + b.addDecoration(attrib_var, spv::Decoration::DecorationLocation, + attrib.attrib_index); + + vertex_binding_map_[binding.fetch_constant][attrib.fetch_instr + .attributes.offset] = + attrib_var; + } + } + + interpolators_ = b.createVariable(spv::StorageClass::StorageClassOutput, + interpolators_type, "interpolators"); + b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); + b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); + + pos_ = b.createVariable(spv::StorageClass::StorageClassOutput, + vec4_float_type_, "gl_Position"); + b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn, + spv::BuiltIn::BuiltInPosition); + } else { + // Pixel inputs/outputs + interpolators_ = b.createVariable(spv::StorageClass::StorageClassInput, + interpolators_type, "interpolators"); + b.addDecoration(interpolators_, spv::Decoration::DecorationNoPerspective); + b.addDecoration(interpolators_, spv::Decoration::DecorationLocation, 0); + + // Copy interpolators to r[0..16] + b.createNoResultOp(spv::Op::OpCopyMemorySized, + {registers_ptr_, interpolators_, + b.makeUintConstant(16 * 4 * sizeof(float))}); + } } std::vector SpirvShaderTranslator::CompleteTranslation() { @@ -99,79 +245,232 @@ void SpirvShaderTranslator::PostTranslation(Shader* shader) { set_host_disassembly(shader, disasm->to_string()); } +void SpirvShaderTranslator::PreProcessControlFlowInstruction( + uint32_t cf_index) { + auto& b = *builder_; + + cf_blocks_[cf_index] = &b.makeNewBlock(); +} + void SpirvShaderTranslator::ProcessLabel(uint32_t cf_index) { auto& b = *builder_; EmitUnimplementedTranslationError(); } +void SpirvShaderTranslator::ProcessControlFlowInstructionBegin( + uint32_t cf_index) { + auto& b = *builder_; + + if (cf_index == 0) { + // Kind of cheaty, but emit a branch to the first block. + b.createBranch(cf_blocks_[cf_index]); + } +} + +void SpirvShaderTranslator::ProcessControlFlowInstructionEnd( + uint32_t cf_index) { + auto& b = *builder_; +} + void SpirvShaderTranslator::ProcessControlFlowNopInstruction() { auto& b = *builder_; - EmitUnimplementedTranslationError(); + b.createNoResultOp(spv::Op::OpNop); } void SpirvShaderTranslator::ProcessExecInstructionBegin( const ParsedExecInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // Head has the logic to check if the body should execute. + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + auto body = head; + switch (instr.type) { + case ParsedExecInstruction::Type::kUnconditional: { + // No need to do anything. + } break; + case ParsedExecInstruction::Type::kConditional: { + // Based off of bool_consts + std::vector offsets; + offsets.push_back(b.makeUintConstant(2)); // bool_consts + offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32)); + auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, + consts_, offsets); + v = b.createLoad(v); + + // Bitfield extract the bool constant. + v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v, + b.makeUintConstant(instr.bool_constant_index % 32), + b.makeUintConstant(1)); + + // Conditional branch + assert_true(cf_blocks_.size() > instr.dword_index + 1); + body = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + } break; + case ParsedExecInstruction::Type::kPredicated: { + // Branch based on p0. + assert_true(cf_blocks_.size() > instr.dword_index + 1); + body = &b.makeNewBlock(); + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, body, cf_blocks_[instr.dword_index + 1]); + } break; + } + b.setBuildPoint(body); } void SpirvShaderTranslator::ProcessExecInstructionEnd( const ParsedExecInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + if (instr.is_end) { + b.makeReturn(false); + } else { + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); + } } void SpirvShaderTranslator::ProcessLoopStartInstruction( const ParsedLoopStartInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + + // TODO: Emit a spv LoopMerge + // (need to know the continue target and merge target beforehand though) + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessLoopEndInstruction( const ParsedLoopEndInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessCallInstruction( const ParsedCallInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessReturnInstruction( const ParsedReturnInstruction& instr) { auto& b = *builder_; + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + EmitUnimplementedTranslationError(); + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } +// CF jump void SpirvShaderTranslator::ProcessJumpInstruction( const ParsedJumpInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + switch (instr.type) { + case ParsedJumpInstruction::Type::kUnconditional: { + b.createBranch(cf_blocks_[instr.target_address]); + } break; + case ParsedJumpInstruction::Type::kConditional: { + // Based off of bool_consts + std::vector offsets; + offsets.push_back(b.makeUintConstant(2)); // bool_consts + offsets.push_back(b.makeUintConstant(instr.bool_constant_index / 32)); + auto v = b.createAccessChain(spv::StorageClass::StorageClassUniform, + consts_, offsets); + v = b.createLoad(v); + + // Bitfield extract the bool constant. + v = b.createTriOp(spv::Op::OpBitFieldUExtract, b.makeUintType(32), v, + b.makeUintConstant(instr.bool_constant_index % 32), + b.makeUintConstant(1)); + + // Conditional branch + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, v, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address], + cf_blocks_[instr.dword_index]); + } break; + case ParsedJumpInstruction::Type::kPredicated: { + assert_true(cf_blocks_.size() > instr.dword_index + 1); + auto cond = b.createBinOp(spv::Op::OpLogicalEqual, bool_type_, p0_, + b.makeBoolConstant(instr.condition)); + b.createConditionalBranch(cond, cf_blocks_[instr.target_address], + cf_blocks_[instr.dword_index]); + } break; + } } void SpirvShaderTranslator::ProcessAllocInstruction( const ParsedAllocInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + auto head = cf_blocks_[instr.dword_index]; + b.setBuildPoint(head); + + switch (instr.type) { + case AllocType::kNone: { + // ? + } break; + case AllocType::kVsPosition: { + assert_true(is_vertex_shader()); + } break; + // Also PS Colors + case AllocType::kVsInterpolators: { + } break; + default: + break; + } + + assert_true(cf_blocks_.size() > instr.dword_index + 1); + b.createBranch(cf_blocks_[instr.dword_index + 1]); } void SpirvShaderTranslator::ProcessVertexFetchInstruction( const ParsedVertexFetchInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // Operand 0 is the index + // Operand 1 is the binding + // TODO: Indexed fetch + auto vertex_ptr = + vertex_binding_map_[instr.operands[1].storage_index][instr.attributes + .offset]; + assert_not_zero(vertex_ptr); + + auto vertex = b.createLoad(vertex_ptr); + StoreToResult(vertex, instr.result); } void SpirvShaderTranslator::ProcessTextureFetchInstruction( @@ -201,18 +500,327 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; - EmitUnimplementedTranslationError(); + // TODO: instr.is_predicated + + Id sources[3] = {0}; + Id dest = 0; + for (size_t i = 0; i < instr.operand_count; i++) { + sources[i] = LoadFromOperand(instr.operands[i]); + } + + switch (instr.vector_opcode) { + case AluVectorOpcode::kAdd: { + dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, sources[0], + sources[1]); + } break; + + case AluVectorOpcode::kCube: { + // TODO: + } break; + + case AluVectorOpcode::kFloor: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFloor, {sources[0]}); + } break; + + case AluVectorOpcode::kFrc: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFract, {sources[0]}); + } break; + + case AluVectorOpcode::kMad: { + dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], + sources[1]); + dest = b.createBinOp(spv::Op::OpFAdd, vec4_float_type_, dest, sources[2]); + } break; + + case AluVectorOpcode::kMax: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFMax, {sources[0], sources[1]}); + } break; + + case AluVectorOpcode::kMin: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kFMin, {sources[0], sources[1]}); + } break; + + case AluVectorOpcode::kMul: { + dest = b.createBinOp(spv::Op::OpFMul, vec4_float_type_, sources[0], + sources[1]); + } break; + + case AluVectorOpcode::kSeq: { + // foreach(el) src0 == src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdEqual, vec4_bool_type_, sources[0], + sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kSge: { + // foreach(el) src0 >= src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, vec4_float_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kSgt: { + // foreach(el) src0 > src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdGreaterThan, vec4_float_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kSne: { + // foreach(el) src0 != src1 ? 1.0 : 0.0 + auto c = b.createBinOp(spv::Op::OpFOrdNotEqual, vec4_float_type_, + sources[0], sources[1]); + dest = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, c, + vec4_float_one_, vec4_float_zero_); + } break; + + case AluVectorOpcode::kTrunc: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + GLSLstd450::kTrunc, {sources[0]}); + } break; + + default: + break; + } + + if (dest) { + b.createStore(dest, pv_); + StoreToResult(dest, instr.result); + } } void SpirvShaderTranslator::ProcessScalarAluInstruction( const ParsedAluInstruction& instr) { auto& b = *builder_; - spv::Id value_id = LoadFromOperand(instr.operands[0]); + // TODO: instr.is_predicated - StoreToResult(value_id, instr.result); + Id sources[3] = {0}; + Id dest = 0; + for (size_t i = 0, x = 0; i < instr.operand_count; i++) { + auto src = LoadFromOperand(instr.operands[i]); - EmitUnimplementedTranslationError(); + // Pull components out of the vector operands and use them as sources. + for (size_t j = 0; j < instr.operands[i].component_count; j++) { + uint32_t component = 0; + switch (instr.operands[i].components[j]) { + case SwizzleSource::kX: + component = 0; + break; + case SwizzleSource::kY: + component = 1; + break; + case SwizzleSource::kZ: + component = 2; + break; + case SwizzleSource::kW: + component = 3; + break; + case SwizzleSource::k0: + case SwizzleSource::k1: + // Don't believe this can happen. + assert_always(); + break; + default: + assert_always(); + break; + } + + sources[x++] = b.createCompositeExtract(src, float_type_, component); + } + } + + switch (instr.scalar_opcode) { + case AluScalarOpcode::kAdds: + case AluScalarOpcode::kAddsc0: + case AluScalarOpcode::kAddsc1: { + // dest = src0 + src1 + dest = + b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], sources[1]); + } break; + + case AluScalarOpcode::kAddsPrev: { + // dest = src0 + ps + dest = b.createBinOp(spv::Op::OpFAdd, float_type_, sources[0], ps_); + } break; + + case AluScalarOpcode::kCos: { + // dest = cos(src0) + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kCos, + {sources[0]}); + } break; + + case AluScalarOpcode::kMaxs: { + // dest = max(src0, src1) + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMax, + {sources[0], sources[1]}); + } break; + + case AluScalarOpcode::kMins: { + // dest = min(src0, src1) + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kFMin, + {sources[0], sources[1]}); + } break; + + case AluScalarOpcode::kMuls: + case AluScalarOpcode::kMulsc0: + case AluScalarOpcode::kMulsc1: { + // dest = src0 * src1 + dest = + b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], sources[1]); + } break; + + case AluScalarOpcode::kMulsPrev: { + // dest = src0 * ps + dest = b.createBinOp(spv::Op::OpFMul, float_type_, sources[0], ps_); + } break; + + case AluScalarOpcode::kMulsPrev2: { + // TODO: Uh... see GLSL translator for impl. + } break; + + case AluScalarOpcode::kRcpc: { + // TODO: dest = src0 != 0.0 ? 1.0 / src0 : FLT_MAX; + } break; + + case AluScalarOpcode::kRcp: + case AluScalarOpcode::kRcpf: { + // dest = src0 != 0.0 ? 1.0 / src0 : 0.0; + auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0], + b.makeFloatConstant(0.f)); + auto d = b.createBinOp(spv::Op::OpFDiv, float_type_, + b.makeFloatConstant(1.f), sources[0]); + dest = b.createBinOp(spv::Op::OpSelect, c, b.makeFloatConstant(0.f), d); + } break; + + case AluScalarOpcode::kRsq: { + // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0; + auto c = b.createBinOp(spv::Op::OpFOrdEqual, float_type_, sources[0], + b.makeFloatConstant(0.f)); + auto d = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, vec4_float_type_, + spv::GLSLstd450::kInverseSqrt, {sources[0]}); + dest = b.createBinOp(spv::Op::OpSelect, c, b.makeFloatConstant(0.f), d); + } break; + + case AluScalarOpcode::kSeqs: { + // TODO: dest = src0 == 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSges: { + // TODO: dest = src0 >= 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSgts: { + // TODO: dest = src0 > 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSnes: { + // TODO: dest = src0 != 0.0 ? 1.0 : 0.0; + } break; + + case AluScalarOpcode::kSetpEq: { + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSetpGe: { + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThanEqual, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSetpGt: { + auto cond = b.createBinOp(spv::Op::OpFOrdGreaterThan, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSetpInv: { + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0], + b.makeFloatConstant(1.f)); + auto pred = + b.createTriOp(spv::Op::OpSelect, bool_type_, cond, + b.makeBoolConstant(true), b.makeBoolConstant(false)); + b.createStore(pred, p0_); + + // if (!cond) dest = src0 == 0.0 ? 1.0 : src0; + auto dst_cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, + sources[0], b.makeFloatConstant(0.f)); + auto dst_false = b.createTriOp(spv::Op::OpSelect, float_type_, dst_cond, + b.makeFloatConstant(1.f), sources[0]); + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), dst_false); + } break; + + case AluScalarOpcode::kSetpNe: { + auto cond = b.createBinOp(spv::Op::OpFOrdNotEqual, bool_type_, sources[0], + b.makeFloatConstant(0.f)); + + // p0 = cond + b.createStore(cond, p0_); + + // dest = cond ? 0.f : 1.f; + dest = b.createTriOp(spv::Op::OpSelect, float_type_, cond, + b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)); + } break; + + case AluScalarOpcode::kSin: { + dest = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, float_type_, GLSLstd450::kSin, + {sources[0]}); + } break; + + case AluScalarOpcode::kSubs: + case AluScalarOpcode::kSubsc0: + case AluScalarOpcode::kSubsc1: { + dest = + b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], sources[1]); + } break; + + case AluScalarOpcode::kSubsPrev: { + dest = b.createBinOp(spv::Op::OpFSub, float_type_, sources[0], ps_); + } break; + + default: + break; + } + + if (dest) { + b.createStore(dest, ps_); + StoreToResult(dest, instr.result); + } } Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( @@ -223,50 +831,129 @@ Id SpirvShaderTranslator::CreateGlslStd450InstructionCall( args); } -spv::Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { +Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) { auto& b = *builder_; - spv::Id current_type_id = b.makeFloatType(32); - spv::Id current_value_id = b.createUndefined(current_type_id); + Id storage_pointer = 0; + Id storage_type = vec4_float_type_; + spv::StorageClass storage_class; + Id storage_index = 0; // Storage index at lowest level + std::vector storage_offsets; // Offsets in nested arrays -> storage + + switch (op.storage_addressing_mode) { + case InstructionStorageAddressingMode::kStatic: { + storage_index = b.makeUintConstant(op.storage_index); + } break; + case InstructionStorageAddressingMode::kAddressAbsolute: { + // storage_index + a0 + storage_index = + b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), + b.makeUintConstant(op.storage_index)); + } break; + case InstructionStorageAddressingMode::kAddressRelative: { + // TODO: Based on loop index + // storage_index + aL.x + storage_index = b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), + b.makeUintConstant(0), + b.makeUintConstant(op.storage_index)); + } break; + default: + assert_always(); + break; + } - // storage_addressing_mode switch (op.storage_source) { case InstructionStorageSource::kRegister: - // TODO(benvanik): op.storage_index + storage_pointer = registers_ptr_; + storage_class = spv::StorageClass::StorageClassPrivate; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); break; case InstructionStorageSource::kConstantFloat: - // TODO(benvanik): op.storage_index - break; - case InstructionStorageSource::kConstantInt: - // TODO(benvanik): op.storage_index - break; - case InstructionStorageSource::kConstantBool: - // TODO(benvanik): op.storage_index + storage_pointer = consts_; + storage_class = spv::StorageClass::StorageClassUniform; + storage_type = vec4_float_type_; + storage_offsets.push_back(b.makeUintConstant(0)); + storage_offsets.push_back(storage_index); break; case InstructionStorageSource::kVertexFetchConstant: - // TODO(benvanik): op.storage_index - break; case InstructionStorageSource::kTextureFetchConstant: - // TODO(benvanik): op.storage_index + // Should not reach this. + assert_always(); + break; + default: + assert_always(); break; } + if (!storage_pointer) { + return b.createUndefined(vec4_float_type_); + } + + storage_pointer = + b.createAccessChain(storage_class, storage_pointer, storage_offsets); + auto storage_value = b.createLoad(storage_pointer); + assert_true(b.getTypeId(storage_value) == vec4_float_type_); + if (op.is_absolute_value) { - current_value_id = CreateGlslStd450InstructionCall( - spv::Decoration::DecorationRelaxedPrecision, current_type_id, - GLSLstd450::kFAbs, {current_value_id}); + storage_value = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, storage_type, GLSLstd450::kFAbs, + {storage_value}); } if (op.is_negated) { - current_value_id = - b.createUnaryOp(spv::Op::OpFNegate, current_type_id, current_value_id); + storage_value = + b.createUnaryOp(spv::Op::OpFNegate, storage_type, storage_value); } // swizzle + if (!op.is_standard_swizzle()) { + std::vector operands; + operands.push_back(storage_value); + operands.push_back(b.makeCompositeConstant( + vec2_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); - return current_value_id; + // Components start from left and are duplicated rightwards + // e.g. count = 1, xxxx / count = 2, xyyy ... + for (int i = 0; i < 4; i++) { + auto swiz = op.components[i]; + if (i > op.component_count - 1) { + swiz = op.components[op.component_count - 1]; + } + + uint32_t swiz_id = 0; + switch (swiz) { + case SwizzleSource::kX: + swiz_id = 0; + break; + case SwizzleSource::kY: + swiz_id = 1; + break; + case SwizzleSource::kZ: + swiz_id = 2; + break; + case SwizzleSource::kW: + swiz_id = 3; + break; + case SwizzleSource::k0: + swiz_id = 4; + break; + case SwizzleSource::k1: + swiz_id = 5; + break; + } + + operands.push_back(swiz_id); + } + + storage_value = + b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); + } + + return storage_value; } -void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, +void SpirvShaderTranslator::StoreToResult(Id source_value_id, const InstructionResult& result) { auto& b = *builder_; @@ -275,25 +962,67 @@ void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, return; } - spv::Id storage_pointer = 0; - // storage_addressing_mode + Id storage_pointer = 0; + Id storage_type = vec4_float_type_; + spv::StorageClass storage_class; + Id storage_index = 0; // Storage index at lowest level + std::vector storage_offsets; // Offsets in nested arrays -> storage + + switch (result.storage_addressing_mode) { + case InstructionStorageAddressingMode::kStatic: { + storage_index = b.makeUintConstant(result.storage_index); + } break; + case InstructionStorageAddressingMode::kAddressAbsolute: { + // storage_index + a0 + storage_index = + b.createBinOp(spv::Op::OpIAdd, b.makeUintType(32), b.createLoad(a0_), + b.makeUintConstant(result.storage_index)); + } break; + case InstructionStorageAddressingMode::kAddressRelative: { + // storage_index + aL.x + // TODO + } break; + default: + assert_always(); + return; + } + + bool storage_array; switch (result.storage_target) { case InstructionStorageTarget::kRegister: - // TODO(benvanik): result.storage_index + storage_pointer = registers_ptr_; + storage_class = spv::StorageClass::StorageClassPrivate; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); + storage_array = true; break; case InstructionStorageTarget::kInterpolant: - // TODO(benvanik): result.storage_index + assert_true(is_vertex_shader()); + storage_pointer = interpolators_; + storage_class = spv::StorageClass::StorageClassOutput; + storage_type = vec4_float_type_; + storage_offsets.push_back(storage_index); + storage_array = true; break; case InstructionStorageTarget::kPosition: - // TODO(benvanik): result.storage_index + assert_true(is_vertex_shader()); + assert_not_zero(pos_); + storage_pointer = pos_; + storage_class = spv::StorageClass::StorageClassOutput; + storage_type = vec4_float_type_; + storage_offsets.push_back(0); + storage_array = false; break; case InstructionStorageTarget::kPointSize: + assert_true(is_vertex_shader()); // TODO(benvanik): result.storage_index break; case InstructionStorageTarget::kColorTarget: + assert_true(is_pixel_shader()); // TODO(benvanik): result.storage_index break; case InstructionStorageTarget::kDepth: + assert_true(is_pixel_shader()); // TODO(benvanik): result.storage_index break; case InstructionStorageTarget::kNone: @@ -301,25 +1030,107 @@ void SpirvShaderTranslator::StoreToResult(spv::Id source_value_id, break; } - spv::Id current_value_id = source_value_id; - spv::Id current_type_id = b.getTypeId(source_value_id); + if (!storage_pointer) { + // assert_always(); + return; + } + + if (storage_array) { + storage_pointer = + b.createAccessChain(storage_class, storage_pointer, storage_offsets); + } + auto storage_value = b.createLoad(storage_pointer); + + // Convert to the appropriate type, if needed. + if (b.getTypeId(source_value_id) != storage_type) { + std::vector constituents; + auto n_el = b.getNumComponents(source_value_id); + auto n_dst = b.getNumTypeComponents(storage_type); + assert_true(n_el < n_dst); + + constituents.push_back(source_value_id); + for (int i = n_el; i < n_dst; i++) { + // Pad with zeroes. + constituents.push_back(b.makeFloatConstant(0.f)); + } + + source_value_id = b.createConstructor(spv::Decoration::DecorationInvariant, + constituents, storage_type); + } // Clamp the input value. if (result.is_clamped) { - // + source_value_id = CreateGlslStd450InstructionCall( + spv::Decoration::DecorationInvariant, b.getTypeId(source_value_id), + spv::GLSLstd450::kFClamp, + {b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)}); + } + + // swizzle + // TODO: 0.0 and 1.0 swizzles + if (!result.is_standard_swizzle()) { + std::vector operands; + operands.push_back(source_value_id); + operands.push_back(b.makeCompositeConstant( + vec2_float_type_, + std::vector({b.makeFloatConstant(0.f), b.makeFloatConstant(1.f)}))); + + // Components start from left and are duplicated rightwards + // e.g. count = 1, xxxx / count = 2, xyyy ... + for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) { + auto swiz = result.components[i]; + if (!result.write_mask[i]) { + // Undefined / don't care. + operands.push_back(0); + continue; + } + + uint32_t swiz_id = 0; + switch (swiz) { + case SwizzleSource::kX: + operands.push_back(0); + break; + case SwizzleSource::kY: + operands.push_back(1); + break; + case SwizzleSource::kZ: + operands.push_back(2); + break; + case SwizzleSource::kW: + operands.push_back(3); + break; + case SwizzleSource::k0: + operands.push_back(4); + break; + case SwizzleSource::k1: + operands.push_back(5); + break; + } + } + + source_value_id = + b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); } // write mask + if (!result.has_all_writes()) { + std::vector operands; + operands.push_back(source_value_id); + operands.push_back(storage_value); - // swizzle + for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) { + operands.push_back( + result.write_mask[i] ? i : b.getNumComponents(source_value_id) + i); + } - // Convert to the appropriate type, if needed. - spv::Id desired_type_id = b.makeFloatType(32); - if (current_value_id != desired_type_id) { - EmitTranslationError("Type conversion on storage not yet implemented"); + source_value_id = + b.createOp(spv::Op::OpVectorShuffle, storage_type, operands); } // Perform store into the pointer. + assert_true(b.getNumComponents(source_value_id) == + b.getNumTypeComponents(storage_type)); + b.createStore(source_value_id, storage_pointer); } } // namespace gpu diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 2b233103b..fbd3af8cd 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -32,7 +32,10 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector CompleteTranslation() override; void PostTranslation(Shader* shader) override; + void PreProcessControlFlowInstruction(uint32_t cf_index) override; void ProcessLabel(uint32_t cf_index) override; + void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; + void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; void ProcessControlFlowNopInstruction() override; void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override; void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override; @@ -75,6 +78,27 @@ class SpirvShaderTranslator : public ShaderTranslator { // TODO(benvanik): replace with something better, make reusable, etc. std::unique_ptr builder_; spv::Id glsl_std_450_instruction_set_ = 0; + + // Types + spv::Id float_type_ = 0, bool_type_ = 0; + spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; + spv::Id vec4_uint_type_ = 0; + spv::Id vec4_bool_type_ = 0; + + // Constants + spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; + + // Array of AMD registers + // These values are all pointers. + spv::Id registers_ptr_ = 0, registers_type_ = 0; + spv::Id consts_ = 0, a0_ = 0, aL_ = 0, p0_ = 0; + spv::Id ps_ = 0, pv_ = 0; // IDs of previous results + spv::Id pos_ = 0; + spv::Id interpolators_ = 0; + + // Map of {binding -> {offset -> spv input}} + std::map> vertex_binding_map_; + std::map cf_blocks_; }; } // namespace gpu