From b3faba50a323244a091d85779d46448889d4bf63 Mon Sep 17 00:00:00 2001
From: Ben Vanik <ben.vanik@gmail.com>
Date: Sun, 21 Feb 2016 10:38:58 -0800
Subject: [PATCH] Pushing constants.

---
 src/xenia/gpu/spirv_shader_translator.cc | 36 +++++-----
 src/xenia/gpu/spirv_shader_translator.h  | 22 ++++++
 src/xenia/gpu/vulkan/pipeline_cache.cc   | 85 ++++++++++++++++++------
 src/xenia/gpu/vulkan/pipeline_cache.h    |  2 +
 4 files changed, 111 insertions(+), 34 deletions(-)

diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc
index f6f2ba229..ec19f76c0 100644
--- a/src/xenia/gpu/spirv_shader_translator.cc
+++ b/src/xenia/gpu/spirv_shader_translator.cc
@@ -133,24 +133,30 @@ void SpirvShaderTranslator::StartTranslation() {
     b.addDecoration(consts_, spv::Decoration::DecorationBinding, 1);
   }
 
-  // Push constants.
-  Id push_constants_type =
-      b.makeStructType({vec4_float_type_, vec4_float_type_, vec4_float_type_},
-                       "push_consts_type");
-
-  b.addMemberDecoration(push_constants_type, 0,
-                        spv::Decoration::DecorationOffset, 0);
+  // Push constants, represented by SpirvPushConstants.
+  Id push_constants_type = b.makeStructType(
+      {vec4_float_type_, vec4_float_type_, vec4_float_type_, uint_type},
+      "push_consts_type");
+  // float4 window_scale;
+  b.addMemberDecoration(
+      push_constants_type, 0, spv::Decoration::DecorationOffset,
+      static_cast<int>(offsetof(SpirvPushConstants, window_scale)));
   b.addMemberName(push_constants_type, 0, "window_scale");
-
-  b.addMemberDecoration(push_constants_type, 1,
-                        spv::Decoration::DecorationOffset, 4 * sizeof(float));
+  // float4 vtx_fmt;
+  b.addMemberDecoration(
+      push_constants_type, 1, spv::Decoration::DecorationOffset,
+      static_cast<int>(offsetof(SpirvPushConstants, vtx_fmt)));
   b.addMemberName(push_constants_type, 1, "vtx_fmt");
-
-  b.addMemberDecoration(push_constants_type, 2,
-                        spv::Decoration::DecorationOffset,
-                        2 * 4 * sizeof(float));
+  // float4 alpha_test;
+  b.addMemberDecoration(
+      push_constants_type, 2, spv::Decoration::DecorationOffset,
+      static_cast<int>(offsetof(SpirvPushConstants, alpha_test)));
   b.addMemberName(push_constants_type, 2, "alpha_test");
-
+  // uint ps_param_gen;
+  b.addMemberDecoration(
+      push_constants_type, 3, spv::Decoration::DecorationOffset,
+      static_cast<int>(offsetof(SpirvPushConstants, ps_param_gen)));
+  b.addMemberName(push_constants_type, 3, "ps_param_gen");
   push_consts_ = b.createVariable(spv::StorageClass::StorageClassPushConstant,
                                   push_constants_type, "push_consts");
 
diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h
index 8138bbdc9..1ec006d50 100644
--- a/src/xenia/gpu/spirv_shader_translator.h
+++ b/src/xenia/gpu/spirv_shader_translator.h
@@ -22,6 +22,28 @@
 namespace xe {
 namespace gpu {
 
+// Push constants embedded within the command buffer.
+// The total size of this struct must be <= 128b (as that's the commonly
+// supported size).
+struct SpirvPushConstants {
+  // Accessible to vertex shader only:
+  float window_scale[4];  // sx,sy, ?, ?
+  float vtx_fmt[4];
+
+  // Accessible to fragment shader only:
+  float alpha_test[4];  // alpha test enable, func, ref, ?
+  uint32_t ps_param_gen;
+};
+static_assert(sizeof(SpirvPushConstants) <= 128,
+              "Push constants must fit <= 128b");
+constexpr uint32_t kSpirvPushConstantVertexRangeOffset = 0;
+constexpr uint32_t kSpirvPushConstantVertexRangeSize = (sizeof(float) * 4) * 2;
+constexpr uint32_t kSpirvPushConstantFragmentRangeOffset =
+    kSpirvPushConstantVertexRangeSize;
+constexpr uint32_t kSpirvPushConstantFragmentRangeSize =
+    (sizeof(float) * 4) + sizeof(uint32_t);
+constexpr uint32_t kSpirvPushConstantsSize = sizeof(SpirvPushConstants);
+
 class SpirvShaderTranslator : public ShaderTranslator {
  public:
   SpirvShaderTranslator();
diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc
index e86220f61..a8938e407 100644
--- a/src/xenia/gpu/vulkan/pipeline_cache.cc
+++ b/src/xenia/gpu/vulkan/pipeline_cache.cc
@@ -59,13 +59,12 @@ PipelineCache::PipelineCache(
 
   // Push constants used for draw parameters.
   // We need to keep these under 128b across all stages.
-  VkPushConstantRange push_constant_ranges[2];
-  push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
+  // TODO(benvanik): split between the stages?
+  VkPushConstantRange push_constant_ranges[1];
+  push_constant_ranges[0].stageFlags =
+      VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
   push_constant_ranges[0].offset = 0;
-  push_constant_ranges[0].size = sizeof(float) * 16;
-  push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
-  push_constant_ranges[1].offset = sizeof(float) * 16;
-  push_constant_ranges[1].size = sizeof(int);
+  push_constant_ranges[0].size = kSpirvPushConstantsSize;
 
   // Shared pipeline layout.
   VkPipelineLayoutCreateInfo pipeline_layout_info;
@@ -511,26 +510,74 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
 
   // TODO(benvanik): push constants.
 
-  bool push_constants_dirty = full_update;
+  bool push_constants_dirty = full_update || viewport_state_dirty;
   push_constants_dirty |=
       SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
   push_constants_dirty |=
       SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
+  push_constants_dirty |=
+      SetShadowRegister(&regs.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL);
+  push_constants_dirty |=
+      SetShadowRegister(&regs.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF);
+  if (push_constants_dirty) {
+    xenos::xe_gpu_program_cntl_t program_cntl;
+    program_cntl.dword_0 = regs.sq_program_cntl;
 
-  xenos::xe_gpu_program_cntl_t program_cntl;
-  program_cntl.dword_0 = regs.sq_program_cntl;
+    // Normal vertex shaders only, for now.
+    // TODO(benvanik): transform feedback/memexport.
+    // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h
+    // 0 = normal
+    // 2 = point size
+    assert_true(program_cntl.vs_export_mode == 0 ||
+                program_cntl.vs_export_mode == 2);
 
-  // Populate a register in the pixel shader with frag coord.
-  int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF;
-  // draw_batcher_.set_ps_param_gen(program_cntl.param_gen ? ps_param_gen : -1);
+    SpirvPushConstants push_constants;
 
-  // Normal vertex shaders only, for now.
-  // TODO(benvanik): transform feedback/memexport.
-  // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h
-  // 0 = normal
-  // 2 = point size
-  assert_true(program_cntl.vs_export_mode == 0 ||
-              program_cntl.vs_export_mode == 2);
+    // Done in VS, no need to flush state.
+    if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
+      push_constants.window_scale[0] = 1.0f;
+      push_constants.window_scale[1] = 1.0f;
+    } else {
+      push_constants.window_scale[0] = 1.0f / 2560.0f;
+      push_constants.window_scale[1] = -1.0f / 2560.0f;
+    }
+
+    // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
+    // VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
+    //            = false: multiply the X, Y coordinates by 1/W0.
+    // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
+    //           = false: multiply the Z coordinate by 1/W0.
+    // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
+    //                    get 1/W0.
+    float vtx_xy_fmt = (regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f;
+    float vtx_z_fmt = (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f;
+    float vtx_w0_fmt = (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f;
+    push_constants.vtx_fmt[0] = vtx_xy_fmt;
+    push_constants.vtx_fmt[1] = vtx_xy_fmt;
+    push_constants.vtx_fmt[2] = vtx_z_fmt;
+    push_constants.vtx_fmt[3] = vtx_w0_fmt;
+
+    // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
+    // Deprecated in Vulkan, implemented in shader.
+    // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
+    // ALPHATESTENABLE
+    push_constants.alpha_test[0] =
+        (regs.rb_colorcontrol & 0x8) != 0 ? 1.0f : 0.0f;
+    // ALPHAFUNC
+    push_constants.alpha_test[1] =
+        static_cast<float>(regs.rb_colorcontrol & 0x7);
+    // ALPHAREF
+    push_constants.alpha_test[2] = regs.rb_alpha_ref;
+
+    // Whether to populate a register in the pixel shader with frag coord.
+    int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF;
+    push_constants.ps_param_gen = program_cntl.param_gen ? ps_param_gen : -1;
+
+    vkCmdPushConstants(
+        command_buffer, pipeline_layout_,
+        VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0,
+        kSpirvPushConstantsSize, &push_constants);
+  }
 
   return true;
 }
diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h
index 7d35fc496..3e623f14e 100644
--- a/src/xenia/gpu/vulkan/pipeline_cache.h
+++ b/src/xenia/gpu/vulkan/pipeline_cache.h
@@ -262,6 +262,8 @@ class PipelineCache {
 
     uint32_t sq_program_cntl;
     uint32_t sq_context_misc;
+    uint32_t rb_colorcontrol;
+    float rb_alpha_ref;
 
     SetDynamicStateRegisters() { Reset(); }
     void Reset() { std::memset(this, 0, sizeof(*this)); }