SPIR-V: Use gl_VertexIndex rather than gl_VertexID

Rewrite bits of LoadFromOperand/StoreToResult
2016-07-29 11:28:19 -05:00 · 2016-07-29 11:28:19 -05:00 · a547c79b51
parent 72d83591e5
commit a547c79b51
2 changed files with 87 additions and 48 deletions
--- a/src/xenia/gpu/spirv_shader_translator.cc
+++ b/src/xenia/gpu/spirv_shader_translator.cc
@ -278,22 +278,23 @@ void SpirvShaderTranslator::StartTranslation() {
    b.addDecoration(pos_, spv::Decoration::DecorationBuiltIn,
                    spv::BuiltIn::BuiltInPosition);

-    vertex_id_ = b.createVariable(spv::StorageClass::StorageClassInput,
-                                  int_type_, "gl_VertexId");
-    b.addDecoration(vertex_id_, spv::Decoration::DecorationBuiltIn,
-                    spv::BuiltIn::BuiltInVertexId);
+    vertex_idx_ = b.createVariable(spv::StorageClass::StorageClassInput,
+                                   int_type_, "gl_VertexIndex");
+    b.addDecoration(vertex_idx_, spv::Decoration::DecorationBuiltIn,
+                    spv::BuiltIn::BuiltInVertexIndex);

    interface_ids_.push_back(interpolators_);
    interface_ids_.push_back(pos_);
-    interface_ids_.push_back(vertex_id_);
+    interface_ids_.push_back(vertex_idx_);

-    auto vertex_id = b.createLoad(vertex_id_);
-    vertex_id = b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_id);
+    auto vertex_idx = b.createLoad(vertex_idx_);
+    vertex_idx =
+        b.createUnaryOp(spv::Op::OpConvertSToF, float_type_, vertex_idx);
    auto r0_ptr = b.createAccessChain(spv::StorageClass::StorageClassFunction,
                                      registers_ptr_,
                                      std::vector<Id>({b.makeUintConstant(0)}));
    auto r0 = b.createLoad(r0_ptr);
-    r0 = b.createCompositeInsert(vertex_id, r0, vec4_float_type_,
+    r0 = b.createCompositeInsert(vertex_idx, r0, vec4_float_type_,
                                 std::vector<uint32_t>({0}));
    b.createStore(r0, r0_ptr);
  } else {
@ -857,7 +858,7 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
    const ParsedVertexFetchInstruction& instr) {
  auto& b = *builder_;
  assert_true(is_vertex_shader());
-  assert_not_zero(vertex_id_);
+  assert_not_zero(vertex_idx_);

  // Close the open predicated block if this instr isn't predicated or the
  // conditions do not match.
@ -889,10 +890,9 @@ void SpirvShaderTranslator::ProcessVertexFetchInstruction(
  // Operand 0 is the index
  // Operand 1 is the binding
  // TODO: Indexed fetch
-  auto vertex_id = LoadFromOperand(instr.operands[0]);
-  vertex_id = b.createCompositeExtract(vertex_id, float_type_, 0);
-  vertex_id = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_id);
-  auto shader_vertex_id = b.createLoad(vertex_id_);
+  auto vertex_idx = LoadFromOperand(instr.operands[0]);
+  vertex_idx = b.createUnaryOp(spv::Op::OpConvertFToS, int_type_, vertex_idx);
+  auto shader_vertex_idx = b.createLoad(vertex_idx_);

  // Skip loading if it's an indexed fetch.
  auto vertex_ptr = vertex_binding_map_[instr.operands[1].storage_index]
@ -1716,6 +1716,7 @@ void SpirvShaderTranslator::ProcessVectorAluInstruction(
  }

  assert_not_zero(dest);
+  assert_true(b.getTypeId(dest) == vec4_float_type_);
  if (dest) {
    b.createStore(dest, pv_);
    StoreToResult(dest, instr.result);
@ -1741,8 +1742,12 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
    auto src = LoadFromOperand(instr.operands[i]);

    // Pull components out of the vector operands and use them as sources.
-    for (int j = 0; j < instr.operands[i].component_count; j++) {
-      sources[x++] = b.createCompositeExtract(src, float_type_, j);
+    if (instr.operands[i].component_count > 1) {
+      for (int j = 0; j < instr.operands[i].component_count; j++) {
+        sources[x++] = b.createCompositeExtract(src, float_type_, j);
+      }
+    } else {
+      sources[x++] = src;
    }
  }

@ -2006,15 +2011,17 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
      dest = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
                                             spv::GLSLstd450::kInverseSqrt,
                                             {sources[0]});
-      auto c = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest);
+      auto c1 = b.createUnaryOp(spv::Op::OpIsInf, bool_type_, dest);
+      auto c2 = b.createUnaryOp(spv::Op::OpIsNan, bool_type_, dest);
+      auto c = b.createBinOp(spv::Op::OpLogicalOr, bool_type_, c1, c2);
      dest = b.createTriOp(spv::Op::OpSelect, float_type_, c,
                           b.makeFloatConstant(0.f), dest);
    } break;

    case AluScalarOpcode::kRsq: {
-      // dest = src0 != 0.0 ? inversesqrt(src0) : 0.0;
-      auto c = b.createBinOp(spv::Op::OpFOrdEqual, bool_type_, sources[0],
-                             b.makeFloatConstant(0.f));
+      // dest = src0 > 0.0 ? inversesqrt(src0) : 0.0;
+      auto c = b.createBinOp(spv::Op::OpFOrdLessThanEqual, bool_type_,
+                             sources[0], b.makeFloatConstant(0.f));
      auto d = CreateGlslStd450InstructionCall(spv::NoPrecision, float_type_,
                                               spv::GLSLstd450::kInverseSqrt,
                                               {sources[0]});
@ -2179,6 +2186,7 @@ void SpirvShaderTranslator::ProcessScalarAluInstruction(
  }

  assert_not_zero(dest);
+  assert_true(b.getTypeId(dest) == float_type_);
  if (dest) {
    b.createStore(dest, ps_);
    StoreToResult(dest, instr.result);
@ -2272,6 +2280,35 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
  auto storage_value = b.createLoad(storage_pointer);
  assert_true(b.getTypeId(storage_value) == vec4_float_type_);

+  if (op.component_count == 1) {
+    // Don't bother handling constant 0/1 fetches, as they're invalid in scalar
+    // opcodes.
+    uint32_t index = 0;
+    switch (op.components[0]) {
+      case SwizzleSource::kX:
+        index = 0;
+        break;
+      case SwizzleSource::kY:
+        index = 1;
+        break;
+      case SwizzleSource::kZ:
+        index = 2;
+        break;
+      case SwizzleSource::kW:
+        index = 3;
+        break;
+      case SwizzleSource::k0:
+        assert_always();
+        break;
+      case SwizzleSource::k1:
+        assert_always();
+        break;
+    }
+
+    storage_value = b.createCompositeExtract(storage_value, float_type_, index);
+    storage_type = float_type_;
+  }
+
  if (op.is_absolute_value) {
    storage_value = CreateGlslStd450InstructionCall(
        spv::NoPrecision, storage_type, GLSLstd450::kFAbs, {storage_value});
@ -2282,7 +2319,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
  }

  // swizzle
-  if (!op.is_standard_swizzle()) {
+  if (op.component_count > 1 && !op.is_standard_swizzle()) {
    std::vector<uint32_t> operands;
    operands.push_back(storage_value);
    operands.push_back(b.makeCompositeConstant(
@ -2427,42 +2464,25 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
        b.createAccessChain(storage_class, storage_pointer, storage_offsets);
  }

+  bool source_is_scalar = b.isScalar(source_value_id);
+  bool storage_is_scalar = b.isScalarType(b.getDerefTypeId(storage_pointer));
+  spv::Id source_type = b.getTypeId(source_value_id);
+
  // Only load from storage if we need it later.
  Id storage_value = 0;
-  if (!result.has_all_writes()) {
+  if ((source_is_scalar && !storage_is_scalar) || !result.has_all_writes()) {
    storage_value = b.createLoad(storage_pointer);
  }

  // Clamp the input value.
  if (result.is_clamped) {
    source_value_id = CreateGlslStd450InstructionCall(
-        spv::NoPrecision, b.getTypeId(source_value_id),
-        spv::GLSLstd450::kFClamp,
+        spv::NoPrecision, source_type, spv::GLSLstd450::kFClamp,
        {source_value_id, b.makeFloatConstant(0.0), b.makeFloatConstant(1.0)});
  }

-  // Convert to the appropriate type, if needed.
-  if (b.getTypeId(source_value_id) != storage_type) {
-    std::vector<Id> constituents;
-    auto n_el = b.getNumComponents(source_value_id);
-    auto n_dst = b.getNumTypeComponents(storage_type);
-
-    if (n_el != 1) {
-      std::vector<uint32_t> channels;
-      for (int i = 0; i < n_dst; i++) {
-        channels.push_back(i % n_el);
-      }
-
-      source_value_id = b.createRvalueSwizzle(spv::NoPrecision, storage_type,
-                                              source_value_id, channels);
-    } else {
-      source_value_id =
-          b.smearScalar(spv::NoPrecision, source_value_id, storage_type);
-    }
-  }
-
  // swizzle
-  if (!result.is_standard_swizzle()) {
+  if (!result.is_standard_swizzle() && !source_is_scalar) {
    std::vector<uint32_t> operands;
    operands.push_back(source_value_id);
    operands.push_back(b.makeCompositeConstant(
@ -2471,6 +2491,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,

    // Components start from left and are duplicated rightwards
    // e.g. count = 1, xxxx / count = 2, xyyy ...
+    uint32_t source_components = b.getNumComponents(source_value_id);
    for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) {
      if (!result.write_mask[i]) {
        // Undefined / don't care.
@ -2493,10 +2514,10 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
          operands.push_back(3);
          break;
        case SwizzleSource::k0:
-          operands.push_back(4);
+          operands.push_back(source_components + 0);
          break;
        case SwizzleSource::k1:
-          operands.push_back(5);
+          operands.push_back(source_components + 1);
          break;
      }
    }
@ -2506,7 +2527,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
  }

  // write mask
-  if (!result.has_all_writes()) {
+  if (!result.has_all_writes() && !source_is_scalar) {
    std::vector<uint32_t> operands;
    operands.push_back(source_value_id);
    operands.push_back(storage_value);
@ -2518,6 +2539,24 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,

    source_value_id =
        b.createOp(spv::Op::OpVectorShuffle, storage_type, operands);
+  } else if (source_is_scalar && !storage_is_scalar) {
+    assert_true(result.num_writes() >= 1);
+
+    if (result.has_all_writes()) {
+      source_value_id =
+          b.smearScalar(spv::NoPrecision, source_value_id, storage_type);
+    } else {
+      // Find first enabled component
+      uint32_t index = 0;
+      for (uint32_t i = 0; i < 4; i++) {
+        if (result.write_mask[i]) {
+          index = i;
+          break;
+        }
+      }
+      source_value_id = b.createCompositeInsert(source_value_id, storage_value,
+                                                storage_type, index);
+    }
  }

  // Perform store into the pointer.
--- a/src/xenia/gpu/spirv_shader_translator.h
+++ b/src/xenia/gpu/spirv_shader_translator.h
@ -135,7 +135,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
  spv::Id pos_ = 0;
  spv::Id push_consts_ = 0;
  spv::Id interpolators_ = 0;
-  spv::Id vertex_id_ = 0;
+  spv::Id vertex_idx_ = 0;
  spv::Id frag_outputs_ = 0, frag_depth_ = 0;
  spv::Id samplers_ = 0;
  spv::Id tex_[4] = {0};  // Images {1D, 2D, 3D, Cube}