diff --git a/src/xenia/gpu/premake5.lua b/src/xenia/gpu/premake5.lua index 1f6a1eea6..1c7870edc 100644 --- a/src/xenia/gpu/premake5.lua +++ b/src/xenia/gpu/premake5.lua @@ -22,6 +22,8 @@ project("xenia-gpu") project_root.."/third_party/gflags/src", }) local_platform_files() + local_platform_files("spirv") + local_platform_files("spirv/passes") group("src") project("xenia-gpu-shader-compiler") diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 476369e53..95abe4dfa 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -99,6 +99,17 @@ struct InstructionResult { bool has_all_writes() const { return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3]; } + // Returns number of components written + uint32_t num_writes() const { + uint32_t total = 0; + for (int i = 0; i < 4; i++) { + if (write_mask[i]) { + total++; + } + } + + return total; + } // Returns true if any non-constant components are written. bool stores_non_constants() const { for (int i = 0; i < 4; ++i) { diff --git a/src/xenia/gpu/spirv/compiler.cc b/src/xenia/gpu/spirv/compiler.cc new file mode 100644 index 000000000..d31b36996 --- /dev/null +++ b/src/xenia/gpu/spirv/compiler.cc @@ -0,0 +1,36 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv/compiler.h" + +namespace xe { +namespace gpu { +namespace spirv { + +Compiler::Compiler() {} + +void Compiler::AddPass(std::unique_ptr pass) { + compiler_passes_.push_back(std::move(pass)); +} + +bool Compiler::Compile(spv::Module* module) { + for (auto& pass : compiler_passes_) { + if (!pass->Run(module)) { + return false; + } + } + + return true; +} + +void Compiler::Reset() { compiler_passes_.clear(); } + +} // namespace spirv +} // namespace gpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler.h b/src/xenia/gpu/spirv/compiler.h new file mode 100644 index 000000000..fd27969ee --- /dev/null +++ b/src/xenia/gpu/spirv/compiler.h @@ -0,0 +1,41 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_COMPILER_H_ +#define XENIA_GPU_SPIRV_COMPILER_H_ + +#include "xenia/base/arena.h" +#include "xenia/gpu/spirv/compiler_pass.h" + +#include "third_party/glslang-spirv/SpvBuilder.h" +#include "third_party/spirv/GLSL.std.450.hpp11" + +namespace xe { +namespace gpu { +namespace spirv { + +// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the +// drivers. +class Compiler { + public: + Compiler(); + + void AddPass(std::unique_ptr pass); + void Reset(); + bool Compile(spv::Module* module); + + private: + std::vector> compiler_passes_; +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_COMPILER_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/compiler_pass.h b/src/xenia/gpu/spirv/compiler_pass.h new file mode 100644 index 000000000..0d81aeeee --- /dev/null +++ b/src/xenia/gpu/spirv/compiler_pass.h @@ -0,0 +1,37 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_ +#define XENIA_GPU_SPIRV_COMPILER_PASS_H_ + +#include "xenia/base/arena.h" + +#include "third_party/glslang-spirv/SpvBuilder.h" +#include "third_party/spirv/GLSL.std.450.hpp11" + +namespace xe { +namespace gpu { +namespace spirv { + +class CompilerPass { + public: + CompilerPass() = default; + virtual ~CompilerPass() {} + + virtual bool Run(spv::Module* module) = 0; + + private: + xe::Arena ir_arena_; +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp new file mode 100644 index 000000000..4d719f769 --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp @@ -0,0 +1,30 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +ControlFlowAnalysisPass::ControlFlowAnalysisPass() {} + +bool ControlFlowAnalysisPass::Run(spv::Module* module) { + for (auto function : module->getFunctions()) { + // For each OpBranchConditional, see if we can find a point where control + // flow converges and then append an OpSelectionMerge. + // Potential problems: while loops constructed from branch instructions + } + + return true; +} + +} // namespace spirv +} // namespace gpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h new file mode 100644 index 000000000..6b279e251 --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h @@ -0,0 +1,34 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ +#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ + +#include "xenia/gpu/spirv/compiler_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +// Control-flow analysis pass. Runs through control-flow and adds merge opcodes +// where necessary. +class ControlFlowAnalysisPass : public CompilerPass { + public: + ControlFlowAnalysisPass(); + + bool Run(spv::Module* module) override; + + private: +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc new file mode 100644 index 000000000..7b01aa5aa --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc @@ -0,0 +1,48 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +ControlFlowSimplificationPass::ControlFlowSimplificationPass() {} + +bool ControlFlowSimplificationPass::Run(spv::Module* module) { + for (auto function : module->getFunctions()) { + // Walk through the blocks in the function and merge any blocks which are + // unconditionally dominated. + for (auto it = function->getBlocks().end() - 1; + it != function->getBlocks().begin() - 1;) { + auto block = *it; + if (!block->isUnreachable() && block->getPredecessors().size() == 1) { + auto prev_block = block->getPredecessors()[0]; + auto last_instr = + prev_block->getInstruction(prev_block->getInstructionCount() - 1); + if (last_instr->getOpCode() == spv::Op::OpBranch) { + if (prev_block->getSuccessors().size() == 1 && + prev_block->getSuccessors()[0] == block) { + // We're dominated by this block. Merge into it. + prev_block->merge(block); + block->setUnreachable(); + } + } + } + + --it; + } + } + + return true; +} + +} // namespace spirv +} // namespace gpu +} // namespace xe \ No newline at end of file diff --git a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h new file mode 100644 index 000000000..f851d24f1 --- /dev/null +++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h @@ -0,0 +1,34 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2016 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ +#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ + +#include "xenia/gpu/spirv/compiler_pass.h" + +namespace xe { +namespace gpu { +namespace spirv { + +// Control-flow simplification pass. Combines adjacent blocks and marks +// any unreachable blocks. +class ControlFlowSimplificationPass : public CompilerPass { + public: + ControlFlowSimplificationPass(); + + bool Run(spv::Module* module) override; + + private: +}; + +} // namespace spirv +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_ \ No newline at end of file diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 855df73f7..86bddcd80 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -12,16 +12,24 @@ #include #include "xenia/base/logging.h" +#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h" +#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h" namespace xe { namespace gpu { using namespace ucode; +constexpr int kMaxInterpolators = 16; +constexpr int kMaxTemporaryRegisters = 64; + using spv::GLSLstd450; using spv::Id; using spv::Op; -SpirvShaderTranslator::SpirvShaderTranslator() = default; +SpirvShaderTranslator::SpirvShaderTranslator() { + compiler_.AddPass(std::make_unique()); + compiler_.AddPass(std::make_unique()); +} SpirvShaderTranslator::~SpirvShaderTranslator() = default; @@ -331,11 +339,19 @@ void SpirvShaderTranslator::StartTranslation() { ps_param_gen_idx, b.makeUintConstant(-1)); spv::Builder::If ifb(cond, b); - // Index is specified - auto reg_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction, - registers_ptr_, - std::vector({ps_param_gen_idx})); - b.createStore(param, reg_ptr); + // FYI: We do this instead of r[ps_param_gen_idx] because that causes + // nvidia to move all registers into local memory (slow!) + for (uint32_t i = 0; i < kMaxInterpolators; i++) { + auto reg_ptr = b.createAccessChain( + spv::StorageClass::StorageClassFunction, registers_ptr_, + std::vector({b.makeUintConstant(i)})); + + auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx, + b.makeUintConstant(i)); + auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param, + b.createLoad(reg_ptr)); + b.createStore(reg, reg_ptr); + } ifb.makeEndIf(); } @@ -406,28 +422,64 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { b.createStore(p, pos_); } else { // Alpha test - auto alpha_test_x = b.createCompositeExtract(push_consts_, float_type_, - std::vector{2, 0}); - auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x, - b.makeFloatConstant(1.f)); + auto alpha_test_enabled = b.createCompositeExtract( + push_consts_, float_type_, std::vector{2, 0}); + auto alpha_test_func = b.createCompositeExtract( + push_consts_, float_type_, std::vector{2, 1}); + auto alpha_test_ref = b.createCompositeExtract(push_consts_, float_type_, + std::vector{2, 2}); + alpha_test_func = + b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func); + auto oC0_alpha = b.createCompositeExtract(frag_outputs_, float_type_, + std::vector({0, 3})); + auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, + alpha_test_enabled, b.makeFloatConstant(1.f)); spv::Builder::If alpha_if(cond, b); - // TODO(DrChat): Apply alpha test. + std::vector switch_segments; + b.makeSwitch(alpha_test_func, 8, std::vector({0, 1, 2, 3, 4, 5, 6, 7}), + std::vector({0, 1, 2, 3, 4, 5, 6, 7}), 7, + switch_segments); + + const static spv::Op alpha_op_map[] = { + spv::Op::OpNop, + spv::Op::OpFOrdGreaterThanEqual, + spv::Op::OpFOrdNotEqual, + spv::Op::OpFOrdGreaterThan, + spv::Op::OpFOrdLessThanEqual, + spv::Op::OpFOrdEqual, + spv::Op::OpFOrdLessThan, + spv::Op::OpNop, + }; + // if (alpha_func == 0) passes = false; - // if (alpha_func == 1 && oC[0].a < alpha_ref) passes = true; - // if (alpha_func == 2 && oC[0].a == alpha_ref) passes = true; - // if (alpha_func == 3 && oC[0].a <= alpha_ref) passes = true; - // if (alpha_func == 4 && oC[0].a > alpha_ref) passes = true; - // if (alpha_func == 5 && oC[0].a != alpha_ref) passes = true; - // if (alpha_func == 6 && oC[0].a >= alpha_ref) passes = true; + b.nextSwitchSegment(switch_segments, 0); + b.makeDiscard(); + b.addSwitchBreak(); + + for (int i = 1; i < 7; i++) { + b.nextSwitchSegment(switch_segments, i); + auto cond = + b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, alpha_test_ref); + spv::Builder::If discard_if(cond, b); + b.makeDiscard(); + discard_if.makeEndIf(); + b.addSwitchBreak(); + } + // if (alpha_func == 7) passes = true; + b.nextSwitchSegment(switch_segments, 7); + b.endSwitch(switch_segments); alpha_if.makeEndIf(); } b.makeReturn(false); + // Compile the spv IR + compiler_.Compile(b.getModule()); + std::vector spirv_words; b.dump(spirv_words); @@ -555,8 +607,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( auto next_block = cf_blocks_[instr.dword_index + 1]; if (next_block.prev_dominates) { - b.createNoResultOp(spv::Op::OpSelectionMerge, - {next_block.block->getId(), 0}); + b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone); } b.createConditionalBranch(cond, body, next_block.block); } break; @@ -570,8 +621,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin( auto next_block = cf_blocks_[instr.dword_index + 1]; if (next_block.prev_dominates) { - b.createNoResultOp(spv::Op::OpSelectionMerge, - {next_block.block->getId(), 0}); + b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone); } b.createConditionalBranch(cond, body, next_block.block); @@ -756,8 +806,8 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -771,6 +821,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( auto shader_vertex_id = b.createLoad(vertex_id_); auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id); + cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_); // Skip loading if it's an indexed fetch. auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index] @@ -778,6 +829,30 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction( assert_not_zero(vertex_ptr); auto vertex = b.createLoad(vertex_ptr); + switch (instr.attributes.data_format) { + case VertexFormat::k_8_8_8_8: + case VertexFormat::k_16_16: + case VertexFormat::k_16_16_16_16: + case VertexFormat::k_16_16_16_16_FLOAT: + case VertexFormat::k_32: + case VertexFormat::k_32_32: + case VertexFormat::k_32_32_32_32: + case VertexFormat::k_32_FLOAT: + case VertexFormat::k_32_32_FLOAT: + case VertexFormat::k_32_32_32_FLOAT: + case VertexFormat::k_32_32_32_32_FLOAT: + // These are handled, for now. + break; + + case VertexFormat::k_10_11_11: { + // No conversion needed. Natively supported. + } break; + + case VertexFormat::k_11_11_10: { + // This needs to be converted. + } break; + } + auto vertex_components = b.getNumComponents(vertex); Id alt_vertex = 0; switch (vertex_components) { @@ -836,8 +911,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -940,8 +1015,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } @@ -1170,6 +1245,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1194,6 +1270,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1218,6 +1295,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1242,6 +1320,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction( auto c_and = b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1); auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0); + c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_); auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3); // p0 @@ -1376,8 +1455,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction( predicated_block_cond_ = instr.predicate_condition; predicated_block_end_ = &b.makeNewBlock(); - b.createNoResultOp(spv::Op::OpSelectionMerge, - {predicated_block_end_->getId(), 0}); + b.createSelectionMerge(predicated_block_end_, + spv::SelectionControlMaskNone); b.createConditionalBranch(pred_cond, block, predicated_block_end_); b.setBuildPoint(block); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 39d3899c1..b6a761a24 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -17,6 +17,7 @@ #include "third_party/glslang-spirv/SpvBuilder.h" #include "third_party/spirv/GLSL.std.450.hpp11" #include "xenia/gpu/shader_translator.h" +#include "xenia/gpu/spirv/compiler.h" #include "xenia/ui/spirv/spirv_disassembler.h" #include "xenia/ui/spirv/spirv_validator.h" @@ -97,6 +98,7 @@ class SpirvShaderTranslator : public ShaderTranslator { xe::ui::spirv::SpirvDisassembler disassembler_; xe::ui::spirv::SpirvValidator validator_; + xe::gpu::spirv::Compiler compiler_; // True if there's an open predicated block bool open_predicated_block_ = false;