SPIR-V: WIP shader compiler / optimizations / alpha test implementation

2016-05-15 12:01:38 -05:00 · 2016-05-15 12:01:38 -05:00 · 2bb52ef86b
parent c06a7cdf81
commit 2bb52ef86b
11 changed files with 383 additions and 29 deletions
--- a/src/xenia/gpu/premake5.lua
+++ b/src/xenia/gpu/premake5.lua
@ -22,6 +22,8 @@ project("xenia-gpu")
    project_root.."/third_party/gflags/src",
  })
  local_platform_files()
+  local_platform_files("spirv")
+  local_platform_files("spirv/passes")

 group("src")
 project("xenia-gpu-shader-compiler")
--- a/src/xenia/gpu/shader.h
+++ b/src/xenia/gpu/shader.h
@ -99,6 +99,17 @@ struct InstructionResult {
  bool has_all_writes() const {
    return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3];
  }
+  // Returns number of components written
+  uint32_t num_writes() const {
+    uint32_t total = 0;
+    for (int i = 0; i < 4; i++) {
+      if (write_mask[i]) {
+        total++;
+      }
+    }
+
+    return total;
+  }
  // Returns true if any non-constant components are written.
  bool stores_non_constants() const {
    for (int i = 0; i < 4; ++i) {
--- a/src/xenia/gpu/spirv/compiler.cc
+++ b/src/xenia/gpu/spirv/compiler.cc
@ -0,0 +1,36 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv/compiler.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+Compiler::Compiler() {}
+
+void Compiler::AddPass(std::unique_ptr<CompilerPass> pass) {
+  compiler_passes_.push_back(std::move(pass));
+}
+
+bool Compiler::Compile(spv::Module* module) {
+  for (auto& pass : compiler_passes_) {
+    if (!pass->Run(module)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void Compiler::Reset() { compiler_passes_.clear(); }
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/spirv/compiler.h
+++ b/src/xenia/gpu/spirv/compiler.h
@ -0,0 +1,41 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_COMPILER_H_
+#define XENIA_GPU_SPIRV_COMPILER_H_
+
+#include "xenia/base/arena.h"
+#include "xenia/gpu/spirv/compiler_pass.h"
+
+#include "third_party/glslang-spirv/SpvBuilder.h"
+#include "third_party/spirv/GLSL.std.450.hpp11"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the
+// drivers.
+class Compiler {
+ public:
+  Compiler();
+
+  void AddPass(std::unique_ptr<CompilerPass> pass);
+  void Reset();
+  bool Compile(spv::Module* module);
+
+ private:
+  std::vector<std::unique_ptr<CompilerPass>> compiler_passes_;
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_SPIRV_COMPILER_H_
--- a/src/xenia/gpu/spirv/compiler_pass.h
+++ b/src/xenia/gpu/spirv/compiler_pass.h
@ -0,0 +1,37 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_
+#define XENIA_GPU_SPIRV_COMPILER_PASS_H_
+
+#include "xenia/base/arena.h"
+
+#include "third_party/glslang-spirv/SpvBuilder.h"
+#include "third_party/spirv/GLSL.std.450.hpp11"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+class CompilerPass {
+ public:
+  CompilerPass() = default;
+  virtual ~CompilerPass() {}
+
+  virtual bool Run(spv::Module* module) = 0;
+
+ private:
+  xe::Arena ir_arena_;
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif
--- a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp
+++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.cpp
@ -0,0 +1,30 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+ControlFlowAnalysisPass::ControlFlowAnalysisPass() {}
+
+bool ControlFlowAnalysisPass::Run(spv::Module* module) {
+  for (auto function : module->getFunctions()) {
+    // For each OpBranchConditional, see if we can find a point where control
+    // flow converges and then append an OpSelectionMerge.
+    // Potential problems: while loops constructed from branch instructions
+  }
+
+  return true;
+}
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h
+++ b/src/xenia/gpu/spirv/passes/control_flow_analysis_pass.h
@ -0,0 +1,34 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
+#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
+
+#include "xenia/gpu/spirv/compiler_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+// Control-flow analysis pass. Runs through control-flow and adds merge opcodes
+// where necessary.
+class ControlFlowAnalysisPass : public CompilerPass {
+ public:
+  ControlFlowAnalysisPass();
+
+  bool Run(spv::Module* module) override;
+
+ private:
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
--- a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc
+++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.cc
@ -0,0 +1,48 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+ControlFlowSimplificationPass::ControlFlowSimplificationPass() {}
+
+bool ControlFlowSimplificationPass::Run(spv::Module* module) {
+  for (auto function : module->getFunctions()) {
+    // Walk through the blocks in the function and merge any blocks which are
+    // unconditionally dominated.
+    for (auto it = function->getBlocks().end() - 1;
+         it != function->getBlocks().begin() - 1;) {
+      auto block = *it;
+      if (!block->isUnreachable() && block->getPredecessors().size() == 1) {
+        auto prev_block = block->getPredecessors()[0];
+        auto last_instr =
+            prev_block->getInstruction(prev_block->getInstructionCount() - 1);
+        if (last_instr->getOpCode() == spv::Op::OpBranch) {
+          if (prev_block->getSuccessors().size() == 1 &&
+              prev_block->getSuccessors()[0] == block) {
+            // We're dominated by this block. Merge into it.
+            prev_block->merge(block);
+            block->setUnreachable();
+          }
+        }
+      }
+
+      --it;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
--- a/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h
+++ b/src/xenia/gpu/spirv/passes/control_flow_simplification_pass.h
@ -0,0 +1,34 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2016 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
+#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
+
+#include "xenia/gpu/spirv/compiler_pass.h"
+
+namespace xe {
+namespace gpu {
+namespace spirv {
+
+// Control-flow simplification pass. Combines adjacent blocks and marks
+// any unreachable blocks.
+class ControlFlowSimplificationPass : public CompilerPass {
+ public:
+  ControlFlowSimplificationPass();
+
+  bool Run(spv::Module* module) override;
+
+ private:
+};
+
+}  // namespace spirv
+}  // namespace gpu
+}  // namespace xe
+
+#endif  // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
--- a/src/xenia/gpu/spirv_shader_translator.cc
+++ b/src/xenia/gpu/spirv_shader_translator.cc
@ -12,16 +12,24 @@
 #include <cstring>

 #include "xenia/base/logging.h"
+#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
+#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"

 namespace xe {
 namespace gpu {
 using namespace ucode;

+constexpr int kMaxInterpolators = 16;
+constexpr int kMaxTemporaryRegisters = 64;
+
 using spv::GLSLstd450;
 using spv::Id;
 using spv::Op;

-SpirvShaderTranslator::SpirvShaderTranslator() = default;
+SpirvShaderTranslator::SpirvShaderTranslator() {
+  compiler_.AddPass(std::make_unique<spirv::ControlFlowSimplificationPass>());
+  compiler_.AddPass(std::make_unique<spirv::ControlFlowAnalysisPass>());
+}

 SpirvShaderTranslator::~SpirvShaderTranslator() = default;

@ -331,11 +339,19 @@ void SpirvShaderTranslator::StartTranslation() {
                              ps_param_gen_idx, b.makeUintConstant(-1));
    spv::Builder::If ifb(cond, b);

-    // Index is specified
-    auto reg_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction,
-                                       registers_ptr_,
-                                       std::vector<Id>({ps_param_gen_idx}));
-    b.createStore(param, reg_ptr);
+    // FYI: We do this instead of r[ps_param_gen_idx] because that causes
+    // nvidia to move all registers into local memory (slow!)
+    for (uint32_t i = 0; i < kMaxInterpolators; i++) {
+      auto reg_ptr = b.createAccessChain(
+          spv::StorageClass::StorageClassFunction, registers_ptr_,
+          std::vector<Id>({b.makeUintConstant(i)}));
+
+      auto cond = b.createBinOp(spv::Op::OpIEqual, bool_type_, ps_param_gen_idx,
+                                b.makeUintConstant(i));
+      auto reg = b.createTriOp(spv::Op::OpSelect, vec4_float_type_, cond, param,
+                               b.createLoad(reg_ptr));
+      b.createStore(reg, reg_ptr);
+    }

    ifb.makeEndIf();
  }
@ -406,28 +422,64 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
    b.createStore(p, pos_);
  } else {
    // Alpha test
-    auto alpha_test_x = b.createCompositeExtract(push_consts_, float_type_,
-                                                 std::vector<uint32_t>{2, 0});
-    auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, alpha_test_x,
-                              b.makeFloatConstant(1.f));
+    auto alpha_test_enabled = b.createCompositeExtract(
+        push_consts_, float_type_, std::vector<uint32_t>{2, 0});
+    auto alpha_test_func = b.createCompositeExtract(
+        push_consts_, float_type_, std::vector<uint32_t>{2, 1});
+    auto alpha_test_ref = b.createCompositeExtract(push_consts_, float_type_,
+                                                   std::vector<uint32_t>{2, 2});
+    alpha_test_func =
+        b.createUnaryOp(spv::Op::OpConvertFToU, uint_type_, alpha_test_func);
+    auto oC0_alpha = b.createCompositeExtract(frag_outputs_, float_type_,
+                                              std::vector<uint32_t>({0, 3}));

+    auto cond = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_,
+                              alpha_test_enabled, b.makeFloatConstant(1.f));
    spv::Builder::If alpha_if(cond, b);

-    // TODO(DrChat): Apply alpha test.
+    std::vector<spv::Block*> switch_segments;
+    b.makeSwitch(alpha_test_func, 8, std::vector<int>({0, 1, 2, 3, 4, 5, 6, 7}),
+                 std::vector<int>({0, 1, 2, 3, 4, 5, 6, 7}), 7,
+                 switch_segments);
+
+    const static spv::Op alpha_op_map[] = {
+        spv::Op::OpNop,
+        spv::Op::OpFOrdGreaterThanEqual,
+        spv::Op::OpFOrdNotEqual,
+        spv::Op::OpFOrdGreaterThan,
+        spv::Op::OpFOrdLessThanEqual,
+        spv::Op::OpFOrdEqual,
+        spv::Op::OpFOrdLessThan,
+        spv::Op::OpNop,
+    };
+
    // if (alpha_func == 0) passes = false;
-    // if (alpha_func == 1 && oC[0].a <  alpha_ref) passes = true;
-    // if (alpha_func == 2 && oC[0].a == alpha_ref) passes = true;
-    // if (alpha_func == 3 && oC[0].a <= alpha_ref) passes = true;
-    // if (alpha_func == 4 && oC[0].a >  alpha_ref) passes = true;
-    // if (alpha_func == 5 && oC[0].a != alpha_ref) passes = true;
-    // if (alpha_func == 6 && oC[0].a >= alpha_ref) passes = true;
+    b.nextSwitchSegment(switch_segments, 0);
+    b.makeDiscard();
+    b.addSwitchBreak();
+
+    for (int i = 1; i < 7; i++) {
+      b.nextSwitchSegment(switch_segments, i);
+      auto cond =
+          b.createBinOp(alpha_op_map[i], bool_type_, oC0_alpha, alpha_test_ref);
+      spv::Builder::If discard_if(cond, b);
+      b.makeDiscard();
+      discard_if.makeEndIf();
+      b.addSwitchBreak();
+    }
+
    // if (alpha_func == 7) passes = true;
+    b.nextSwitchSegment(switch_segments, 7);
+    b.endSwitch(switch_segments);

    alpha_if.makeEndIf();
  }

  b.makeReturn(false);

+  // Compile the spv IR
+  compiler_.Compile(b.getModule());
+
  std::vector<uint32_t> spirv_words;
  b.dump(spirv_words);

@ -555,8 +607,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin(

      auto next_block = cf_blocks_[instr.dword_index + 1];
      if (next_block.prev_dominates) {
-        b.createNoResultOp(spv::Op::OpSelectionMerge,
-                           {next_block.block->getId(), 0});
+        b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
      }
      b.createConditionalBranch(cond, body, next_block.block);
    } break;
@ -570,8 +621,7 @@ void SpirvShaderTranslator::ProcessExecInstructionBegin(

      auto next_block = cf_blocks_[instr.dword_index + 1];
      if (next_block.prev_dominates) {
-        b.createNoResultOp(spv::Op::OpSelectionMerge,
-                           {next_block.block->getId(), 0});
+        b.createSelectionMerge(next_block.block, spv::SelectionControlMaskNone);
      }
      b.createConditionalBranch(cond, body, next_block.block);

@ -756,8 +806,8 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
    predicated_block_cond_ = instr.predicate_condition;
    predicated_block_end_ = &b.makeNewBlock();

-    b.createNoResultOp(spv::Op::OpSelectionMerge,
-                       {predicated_block_end_->getId(), 0});
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
    b.setBuildPoint(block);
  }
@ -771,6 +821,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
  auto shader_vertex_id = b.createLoad(vertex_id_);
  auto cond =
      b.createBinOp(spv::Op::OpIEqual, bool_type_, vertex_id, shader_vertex_id);
+  cond = b.smearScalar(spv::NoPrecision, cond, vec4_bool_type_);

  // Skip loading if it's an indexed fetch.
  auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index]
@ -778,6 +829,30 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
  assert_not_zero(vertex_ptr);
  auto vertex = b.createLoad(vertex_ptr);

+  switch (instr.attributes.data_format) {
+    case VertexFormat::k_8_8_8_8:
+    case VertexFormat::k_16_16:
+    case VertexFormat::k_16_16_16_16:
+    case VertexFormat::k_16_16_16_16_FLOAT:
+    case VertexFormat::k_32:
+    case VertexFormat::k_32_32:
+    case VertexFormat::k_32_32_32_32:
+    case VertexFormat::k_32_FLOAT:
+    case VertexFormat::k_32_32_FLOAT:
+    case VertexFormat::k_32_32_32_FLOAT:
+    case VertexFormat::k_32_32_32_32_FLOAT:
+      // These are handled, for now.
+      break;
+
+    case VertexFormat::k_10_11_11: {
+      // No conversion needed. Natively supported.
+    } break;
+
+    case VertexFormat::k_11_11_10: {
+      // This needs to be converted.
+    } break;
+  }
+
  auto vertex_components = b.getNumComponents(vertex);
  Id alt_vertex = 0;
  switch (vertex_components) {
@ -836,8 +911,8 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
    predicated_block_cond_ = instr.predicate_condition;
    predicated_block_end_ = &b.makeNewBlock();

-    b.createNoResultOp(spv::Op::OpSelectionMerge,
-                       {predicated_block_end_->getId(), 0});
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
    b.setBuildPoint(block);
  }
@ -940,8 +1015,8 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
    predicated_block_cond_ = instr.predicate_condition;
    predicated_block_end_ = &b.makeNewBlock();

-    b.createNoResultOp(spv::Op::OpSelectionMerge,
-                       {predicated_block_end_->getId(), 0});
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
    b.setBuildPoint(block);
  }
@ -1170,6 +1245,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
      auto c_and =
          b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
      auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
      auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);

      // p0
@ -1194,6 +1270,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
      auto c_and =
          b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
      auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
      auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);

      // p0
@ -1218,6 +1295,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
      auto c_and =
          b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
      auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
      auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);

      // p0
@ -1242,6 +1320,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
      auto c_and =
          b.createBinOp(spv::Op::OpLogicalAnd, vec4_bool_type_, c0, c1);
      auto c_and_x = b.createCompositeExtract(c_and, bool_type_, 0);
+      c_and_x = b.smearScalar(spv::NoPrecision, c_and_x, vec4_bool_type_);
      auto c_and_w = b.createCompositeExtract(c_and, bool_type_, 3);

      // p0
@ -1376,8 +1455,8 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
    predicated_block_cond_ = instr.predicate_condition;
    predicated_block_end_ = &b.makeNewBlock();

-    b.createNoResultOp(spv::Op::OpSelectionMerge,
-                       {predicated_block_end_->getId(), 0});
+    b.createSelectionMerge(predicated_block_end_,
+                           spv::SelectionControlMaskNone);
    b.createConditionalBranch(pred_cond, block, predicated_block_end_);
    b.setBuildPoint(block);
  }
--- a/src/xenia/gpu/spirv_shader_translator.h
+++ b/src/xenia/gpu/spirv_shader_translator.h
@ -17,6 +17,7 @@
 #include "third_party/glslang-spirv/SpvBuilder.h"
 #include "third_party/spirv/GLSL.std.450.hpp11"
 #include "xenia/gpu/shader_translator.h"
+#include "xenia/gpu/spirv/compiler.h"
 #include "xenia/ui/spirv/spirv_disassembler.h"
 #include "xenia/ui/spirv/spirv_validator.h"

@ -97,6 +98,7 @@ class SpirvShaderTranslator : public ShaderTranslator {

  xe::ui::spirv::SpirvDisassembler disassembler_;
  xe::ui::spirv::SpirvValidator validator_;
+  xe::gpu::spirv::Compiler compiler_;

  // True if there's an open predicated block
  bool open_predicated_block_ = false;