Merge remote-tracking branch 'GliniakRepo/patchingSystem' into canary_experimental

2022-06-14 17:50:25 +02:00 · 2022-06-14 17:50:25 +02:00 · e8aaddf4d5
parent 90d67ac11c 91f43a374d
commit e8aaddf4d5
16 changed files with 389 additions and 209 deletions
--- a/src/xenia/app/premake5.lua
+++ b/src/xenia/app/premake5.lua
@ -25,6 +25,7 @@ project("xenia-app")
    "xenia-hid-nop",
    "xenia-hid-sdl",
    "xenia-kernel",
+    "xenia-patcher",
    "xenia-ui",
    "xenia-ui-spirv",
    "xenia-ui-vulkan",
--- a/src/xenia/gpu/d3d12/premake5.lua
+++ b/src/xenia/gpu/d3d12/premake5.lua
@ -36,6 +36,7 @@ project("xenia-gpu-d3d12-trace-viewer")
    "xenia-hid",
    "xenia-hid-nop",
    "xenia-kernel",
+    "xenia-patcher",
    "xenia-ui",
    "xenia-ui-d3d12",
    "xenia-vfs",
--- a/src/xenia/gpu/dxbc_shader_translator.cc
+++ b/src/xenia/gpu/dxbc_shader_translator.cc
@ -212,63 +212,124 @@ void DxbcShaderTranslator::PopSystemTemp(uint32_t count) {
  system_temp_count_current_ -= std::min(count, system_temp_count_current_);
 }

-void DxbcShaderTranslator::ConvertPWLGamma(
-    bool to_gamma, int32_t source_temp, uint32_t source_temp_component,
-    uint32_t target_temp, uint32_t target_temp_component, uint32_t piece_temp,
-    uint32_t piece_temp_component, uint32_t accumulator_temp,
-    uint32_t accumulator_temp_component) {
-  assert_true(source_temp != target_temp ||
-              source_temp_component != target_temp_component ||
-              ((target_temp != accumulator_temp ||
-                target_temp_component != accumulator_temp_component) &&
-               (target_temp != piece_temp ||
-                target_temp_component != piece_temp_component)));
-  assert_true(piece_temp != source_temp ||
-              piece_temp_component != source_temp_component);
-  assert_true(accumulator_temp != source_temp ||
-              accumulator_temp_component != source_temp_component);
-  assert_true(piece_temp != accumulator_temp ||
-              piece_temp_component != accumulator_temp_component);
+void DxbcShaderTranslator::PWLGammaToLinear(
+    uint32_t target_temp, uint32_t target_temp_component, uint32_t source_temp,
+    uint32_t source_temp_component, bool source_pre_saturated, uint32_t temp1,
+    uint32_t temp1_component, uint32_t temp2, uint32_t temp2_component) {
+  // The source is needed only once to begin building the result, so it can be
+  // the same as the destination.
+  assert_true(temp1 != target_temp || temp1_component != target_temp_component);
+  assert_true(temp1 != source_temp || temp1_component != source_temp_component);
+  assert_true(temp2 != target_temp || temp2_component != target_temp_component);
+  assert_true(temp2 != source_temp || temp2_component != source_temp_component);
+  assert_true(temp1 != temp2 || temp1_component != temp2_component);
+  dxbc::Dest target_dest(
+      dxbc::Dest::R(target_temp, UINT32_C(1) << target_temp_component));
+  dxbc::Src target_src(dxbc::Src::R(target_temp).Select(target_temp_component));
  dxbc::Src source_src(dxbc::Src::R(source_temp).Select(source_temp_component));
-  dxbc::Dest piece_dest(dxbc::Dest::R(piece_temp, 1 << piece_temp_component));
-  dxbc::Src piece_src(dxbc::Src::R(piece_temp).Select(piece_temp_component));
-  dxbc::Dest accumulator_dest(
-      dxbc::Dest::R(accumulator_temp, 1 << accumulator_temp_component));
-  dxbc::Src accumulator_src(
-      dxbc::Src::R(accumulator_temp).Select(accumulator_temp_component));
-  // For each piece:
-  // 1) Calculate how far we are on it. Multiply by 1/width, subtract
-  //    start/width and saturate.
-  // 2) Add the contribution of the piece - multiply the position on the piece
-  //    by its slope*width and accumulate.
-  // Piece 1.
-  a_.OpMul(piece_dest, source_src,
-           dxbc::Src::LF(to_gamma ? (1.0f / 0.0625f) : (1.0f / 0.25f)), true);
-  a_.OpMul(accumulator_dest, piece_src,
-           dxbc::Src::LF(to_gamma ? (4.0f * 0.0625f) : (0.25f * 0.25f)));
-  // Piece 2.
-  a_.OpMAd(piece_dest, source_src,
-           dxbc::Src::LF(to_gamma ? (1.0f / 0.0625f) : (1.0f / 0.125f)),
-           dxbc::Src::LF(to_gamma ? (-0.0625f / 0.0625f) : (-0.25f / 0.125f)),
-           true);
-  a_.OpMAd(accumulator_dest, piece_src,
-           dxbc::Src::LF(to_gamma ? (2.0f * 0.0625f) : (0.5f * 0.125f)),
-           accumulator_src);
-  // Piece 3.
-  a_.OpMAd(piece_dest, source_src,
-           dxbc::Src::LF(to_gamma ? (1.0f / 0.375f) : (1.0f / 0.375f)),
-           dxbc::Src::LF(to_gamma ? (-0.125f / 0.375f) : (-0.375f / 0.375f)),
-           true);
-  a_.OpMAd(accumulator_dest, piece_src,
-           dxbc::Src::LF(to_gamma ? (1.0f * 0.375f) : (1.0f * 0.375f)),
-           accumulator_src);
-  // Piece 4.
-  a_.OpMAd(piece_dest, source_src,
-           dxbc::Src::LF(to_gamma ? (1.0f / 0.5f) : (1.0f / 0.25f)),
-           dxbc::Src::LF(to_gamma ? (-0.5f / 0.5f) : (-0.75f / 0.25f)), true);
-  a_.OpMAd(dxbc::Dest::R(target_temp, 1 << target_temp_component), piece_src,
-           dxbc::Src::LF(to_gamma ? (0.5f * 0.5f) : (2.0f * 0.25f)),
-           accumulator_src);
+  dxbc::Dest temp1_dest(dxbc::Dest::R(temp1, UINT32_C(1) << temp1_component));
+  dxbc::Src temp1_src(dxbc::Src::R(temp1).Select(temp1_component));
+  dxbc::Dest temp2_dest(dxbc::Dest::R(temp2, UINT32_C(1) << temp2_component));
+  dxbc::Src temp2_src(dxbc::Src::R(temp2).Select(temp2_component));
+
+  // Get the scale (into temp1) and the offset (into temp2) for the piece.
+  // Using `source >= threshold` comparisons because the input might have not
+  // been saturated yet, and thus it may be NaN - since it will be saturated to
+  // 0 later, the 0...64/255 case should be selected for it.
+  a_.OpGE(temp2_dest, source_src, dxbc::Src::LF(96.0f / 255.0f));
+  a_.OpIf(true, temp2_src);
+  // [96/255 ... 1
+  a_.OpGE(temp2_dest, source_src, dxbc::Src::LF(192.0f / 255.0f));
+  a_.OpMovC(temp1_dest, temp2_src, dxbc::Src::LF(8.0f / 1024.0f),
+            dxbc::Src::LF(4.0f / 1024.0f));
+  a_.OpMovC(temp2_dest, temp2_src, dxbc::Src::LF(-1024.0f),
+            dxbc::Src::LF(-256.0f));
+  a_.OpElse();
+  // 0 ... 96/255)
+  a_.OpGE(temp2_dest, source_src, dxbc::Src::LF(64.0f / 255.0f));
+  a_.OpMovC(temp1_dest, temp2_src, dxbc::Src::LF(2.0f / 1024.0f),
+            dxbc::Src::LF(1.0f / 1024.0f));
+  a_.OpMovC(temp2_dest, temp2_src, dxbc::Src::LF(-64.0f), dxbc::Src::LF(0.0f));
+  a_.OpEndIf();
+
+  if (!source_pre_saturated) {
+    // Saturate the input, and flush NaN to 0.
+    a_.OpMov(target_dest, source_src, true);
+  }
+  // linear = gamma * (255 * 1024) * scale + offset
+  // As both 1024 and the scale are powers of 2, and 1024 * scale is not smaller
+  // than 1, it's not important if it's (gamma * 255) * 1024 * scale,
+  // (gamma * 255 * 1024) * scale, gamma * 255 * (1024 * scale), or
+  // gamma * (255 * 1024 * scale) - or the option chosen here, as long as
+  // 1024 is applied before the scale since the scale is < 1 (specifically at
+  // least 1/1024), and it may make very small values denormal.
+  a_.OpMul(target_dest, source_pre_saturated ? source_src : target_src,
+           dxbc::Src::LF(255.0f * 1024.0f));
+  a_.OpMAd(target_dest, target_src, temp1_src, temp2_src);
+  // linear += trunc(linear * scale)
+  a_.OpMul(temp1_dest, target_src, temp1_src);
+  a_.OpRoundZ(temp1_dest, temp1_src);
+  a_.OpAdd(target_dest, target_src, temp1_src);
+  // linear *= 1/1023
+  a_.OpMul(target_dest, target_src, dxbc::Src::LF(1.0f / 1023.0f));
+}
+
+void DxbcShaderTranslator::PreSaturatedLinearToPWLGamma(
+    uint32_t target_temp, uint32_t target_temp_component, uint32_t source_temp,
+    uint32_t source_temp_component, uint32_t temp_or_target,
+    uint32_t temp_or_target_component, uint32_t temp_non_target,
+    uint32_t temp_non_target_component) {
+  // The source may be the same as the target, but in this case it can't also be
+  // used as a temporary variable.
+  assert_true(target_temp != source_temp ||
+              target_temp_component != source_temp_component ||
+              target_temp != temp_or_target ||
+              target_temp_component != temp_or_target_component);
+  assert_true(temp_or_target != source_temp ||
+              temp_or_target_component != source_temp_component);
+  assert_true(temp_non_target != target_temp ||
+              temp_non_target_component != target_temp_component);
+  assert_true(temp_non_target != source_temp ||
+              temp_non_target_component != source_temp_component);
+  assert_true(temp_or_target != temp_non_target ||
+              temp_or_target_component != temp_non_target_component);
+  dxbc::Dest target_dest(
+      dxbc::Dest::R(target_temp, UINT32_C(1) << target_temp_component));
+  dxbc::Src target_src(dxbc::Src::R(target_temp).Select(target_temp_component));
+  dxbc::Src source_src(dxbc::Src::R(source_temp).Select(source_temp_component));
+  dxbc::Dest temp_or_target_dest(
+      dxbc::Dest::R(temp_or_target, UINT32_C(1) << temp_or_target_component));
+  dxbc::Src temp_or_target_src(
+      dxbc::Src::R(temp_or_target).Select(temp_or_target_component));
+  dxbc::Dest temp_non_target_dest(
+      dxbc::Dest::R(temp_non_target, UINT32_C(1) << temp_non_target_component));
+  dxbc::Src temp_non_target_src(
+      dxbc::Src::R(temp_non_target).Select(temp_non_target_component));
+
+  // Get the scale (into temp_or_target) and the offset (into temp_non_target)
+  // for the piece.
+  a_.OpGE(temp_non_target_dest, source_src, dxbc::Src::LF(128.0f / 1023.0f));
+  a_.OpIf(true, temp_non_target_src);
+  // [128/1023 ... 1
+  a_.OpGE(temp_non_target_dest, source_src, dxbc::Src::LF(512.0f / 1023.0f));
+  a_.OpMovC(temp_or_target_dest, temp_non_target_src,
+            dxbc::Src::LF(1023.0f / 8.0f), dxbc::Src::LF(1023.0f / 4.0f));
+  a_.OpMovC(temp_non_target_dest, temp_non_target_src,
+            dxbc::Src::LF(128.0f / 255.0f), dxbc::Src::LF(64.0f / 255.0f));
+  a_.OpElse();
+  // 0 ... 128/1023)
+  a_.OpGE(temp_non_target_dest, source_src, dxbc::Src::LF(64.0f / 1023.0f));
+  a_.OpMovC(temp_or_target_dest, temp_non_target_src,
+            dxbc::Src::LF(1023.0f / 2.0f), dxbc::Src::LF(1023.0f));
+  a_.OpMovC(temp_non_target_dest, temp_non_target_src,
+            dxbc::Src::LF(32.0f / 255.0f), dxbc::Src::LF(0.0f));
+  a_.OpEndIf();
+
+  // gamma = trunc(linear * scale) * (1.0 / 255.0) + offset
+  a_.OpMul(target_dest, source_src, temp_or_target_src);
+  a_.OpRoundZ(target_dest, target_src);
+  a_.OpMAd(target_dest, target_src, dxbc::Src::LF(1.0f / 255.0f),
+           temp_non_target_src);
 }

 void DxbcShaderTranslator::RemapAndConvertVertexIndices(
--- a/src/xenia/gpu/dxbc_shader_translator.h
+++ b/src/xenia/gpu/dxbc_shader_translator.h
@ -664,15 +664,23 @@ class DxbcShaderTranslator : public ShaderTranslator {
  // Frees the last allocated internal r# registers for later reuse.
  void PopSystemTemp(uint32_t count = 1);

-  // Converts one scalar to or from PWL gamma, using 1 temporary scalar.
-  // The target may be the same as any of the source, the piece temporary or the
-  // accumulator, but not two or three of these.
-  // The piece and the accumulator can't be the same as source or as each other.
-  void ConvertPWLGamma(bool to_gamma, int32_t source_temp,
-                       uint32_t source_temp_component, uint32_t target_temp,
-                       uint32_t target_temp_component, uint32_t piece_temp,
-                       uint32_t piece_temp_component, uint32_t accumulator_temp,
-                       uint32_t accumulator_temp_component);
+  // Converts one scalar from piecewise linear gamma to linear. The target may
+  // be the same as the source, the temporary variables must be different. If
+  // the source is not pre-saturated, saturation will be done internally.
+  void PWLGammaToLinear(uint32_t target_temp, uint32_t target_temp_component,
+                        uint32_t source_temp, uint32_t source_temp_component,
+                        bool source_pre_saturated, uint32_t temp1,
+                        uint32_t temp1_component, uint32_t temp2,
+                        uint32_t temp2_component);
+  // Converts one scalar, which must be saturated before calling this function,
+  // from linear to piecewise linear gamma. The target may be the same as either
+  // the source or as temp_or_target, but not as both (and temp_or_target may
+  // not be the same as the source). temp_non_target must be different.
+  void PreSaturatedLinearToPWLGamma(
+      uint32_t target_temp, uint32_t target_temp_component,
+      uint32_t source_temp, uint32_t source_temp_component,
+      uint32_t temp_or_target, uint32_t temp_or_target_component,
+      uint32_t temp_non_target, uint32_t temp_non_target_component);

  bool IsSampleRate() const {
    assert_true(is_pixel_shader());
--- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc
+++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc
@ -2103,8 +2103,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
          a_.OpIf(false, dxbc::Src::R(gamma_temp, dxbc::Src::kXXXX));
        }
        // Convert from piecewise linear.
-        ConvertPWLGamma(false, system_temp_result_, i, system_temp_result_, i,
-                        gamma_temp, 0, gamma_temp, 1);
+        PWLGammaToLinear(system_temp_result_, i, system_temp_result_, i, false,
+                         gamma_temp, 0, gamma_temp, 1);
        if (gamma_render_target_as_srgb_) {
          a_.OpElse();
          // Convert from sRGB.
--- a/src/xenia/gpu/dxbc_shader_translator_om.cc
+++ b/src/xenia/gpu/dxbc_shader_translator_om.cc
@ -1384,8 +1384,8 @@ void DxbcShaderTranslator::ROV_UnpackColor(
             dxbc::Src::LF(1.0f / 255.0f));
    if (i) {
      for (uint32_t j = 0; j < 3; ++j) {
-        ConvertPWLGamma(false, color_temp, j, color_temp, j, temp1,
-                        temp1_component, temp2, temp2_component);
+        PWLGammaToLinear(color_temp, j, color_temp, j, true, temp1,
+                         temp1_component, temp2, temp2_component);
      }
    }
    a_.OpBreak();
@ -1537,8 +1537,9 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
          : xenos::ColorRenderTargetFormat::k_8_8_8_8)));
    for (uint32_t j = 0; j < 4; ++j) {
      if (i && j < 3) {
-        ConvertPWLGamma(true, color_temp, j, temp1, temp1_component, temp1,
-                        temp1_component, temp2, temp2_component);
+        PreSaturatedLinearToPWLGamma(temp1, temp1_component, color_temp, j,
+                                     temp1, temp1_component, temp2,
+                                     temp2_component);
        // Denormalize and add 0.5 for rounding.
        a_.OpMAd(temp1_dest, temp1_src, dxbc::Src::LF(255.0f),
                 dxbc::Src::LF(0.5f));
@ -1863,10 +1864,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
    if (!(shader_writes_color_targets & (1 << i))) {
      continue;
    }
+    uint32_t system_temp_color = system_temps_color_[i];
    // Apply the exponent bias after alpha to coverage because it needs the
-    // unbiased alpha from the shader
-    a_.OpMul(dxbc::Dest::R(system_temps_color_[i]),
-             dxbc::Src::R(system_temps_color_[i]),
+    // unbiased alpha from the shader.
+    a_.OpMul(dxbc::Dest::R(system_temp_color), dxbc::Src::R(system_temp_color),
             LoadSystemConstant(
                 SystemConstants::Index::kColorExpBias,
                 offsetof(SystemConstants, color_exp_bias) + sizeof(float) * i,
@ -1878,14 +1879,17 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
      a_.OpAnd(dxbc::Dest::R(gamma_temp, 0b0001), LoadFlagsSystemConstant(),
               dxbc::Src::LU(kSysFlag_ConvertColor0ToGamma << i));
      a_.OpIf(true, dxbc::Src::R(gamma_temp, dxbc::Src::kXXXX));
+      // Saturate before the gamma conversion.
+      a_.OpMov(dxbc::Dest::R(system_temp_color, 0b0111),
+               dxbc::Src::R(system_temp_color), true);
      for (uint32_t j = 0; j < 3; ++j) {
-        ConvertPWLGamma(true, system_temps_color_[i], j, system_temps_color_[i],
-                        j, gamma_temp, 0, gamma_temp, 1);
+        PreSaturatedLinearToPWLGamma(system_temp_color, j, system_temp_color, j,
+                                     gamma_temp, 0, gamma_temp, 1);
      }
      a_.OpEndIf();
    }
    // Copy the color from a readable temp register to an output register.
-    a_.OpMov(dxbc::Dest::O(i), dxbc::Src::R(system_temps_color_[i]));
+    a_.OpMov(dxbc::Dest::O(i), dxbc::Src::R(system_temp_color));
  }
  // Release gamma_temp.
  PopSystemTemp();
--- a/src/xenia/gpu/texture_cache.h
+++ b/src/xenia/gpu/texture_cache.h
@ -98,8 +98,11 @@ class TextureCache {

  // "ActiveTexture" means as of the latest RequestTextures call.

-  // Returns the post-swizzle signedness of a currently bound texture (must be
-  // called after RequestTextures).
+  uint8_t GetActiveTextureHostSwizzle(uint32_t fetch_constant_index) const {
+    const TextureBinding* binding =
+        GetValidTextureBinding(fetch_constant_index);
+    return binding ? binding->host_swizzle : xenos::XE_GPU_TEXTURE_SWIZZLE_0000;
+  }
  uint8_t GetActiveTextureSwizzledSigns(uint32_t fetch_constant_index) const {
    const TextureBinding* binding =
        GetValidTextureBinding(fetch_constant_index);
--- a/src/xenia/gpu/vulkan/premake5.lua
+++ b/src/xenia/gpu/vulkan/premake5.lua
@ -41,6 +41,7 @@ project("xenia-gpu-vulkan-trace-viewer")
    "xenia-hid",
    "xenia-hid-nop",
    "xenia-kernel",
+    "xenia-patcher",
    "xenia-ui",
    "xenia-ui-spirv",
    "xenia-ui-vulkan",
--- a/src/xenia/gpu/xenos.cc
+++ b/src/xenia/gpu/xenos.cc
@ -17,6 +17,91 @@ namespace xe {
 namespace gpu {
 namespace xenos {

+// Based on X360GammaToLinear and X360LinearToGamma from the Source Engine, with
+// additional logic from Direct3D 9 code in game executable disassembly, located
+// via the floating-point constants involved.
+// https://github.com/ValveSoftware/source-sdk-2013/blob/master/mp/src/mathlib/color_conversion.cpp#L329
+// These are provided here in part as a reference for shader translators.
+
+float PWLGammaToLinear(float gamma) {
+  // Not found in game executables, so just using the logic similar to that in
+  // the Source Engine.
+  gamma = xe::saturate_unsigned(gamma);
+  float scale, offset;
+  // While the compiled code for linear to gamma conversion uses `vcmpgtfp
+  // constant, value` comparison (constant > value, or value < constant), it's
+  // preferable to use `value >= constant` condition for the higher pieces, as
+  // it will never pass for NaN, and in case of NaN, the 0...64/255 case will be
+  // selected regardless of whether it's saturated before or after the
+  // comparisons (always pre-saturating here, but shader translators may choose
+  // to saturate later for convenience), as saturation will flush NaN to 0.
+  if (gamma >= 96.0f / 255.0f) {
+    if (gamma >= 192.0f / 255.0f) {
+      scale = 8.0f / 1024.0f;
+      offset = -1024.0f;
+    } else {
+      scale = 4.0f / 1024.0f;
+      offset = -256.0f;
+    }
+  } else {
+    if (gamma >= 64.0f / 255.0f) {
+      scale = 2.0f / 1024.0f;
+      offset = -64.0f;
+    } else {
+      scale = 1.0f / 1024.0f;
+      offset = 0.0f;
+      // No `floor` term in this case in the Source Engine, but for the largest
+      // value, 1.0, `floor(255.0f * (1.0f / 1024.0f))` is 0 anyway.
+    }
+  }
+  // Though in the Source Engine, the 1/1024 multiplication is done for the
+  // truncated part specifically, pre-baking it into the scale is lossless -
+  // both 1024 and `scale` are powers of 2.
+  float linear = gamma * ((255.0f * 1024.0f) * scale) + offset;
+  // For consistency with linear to gamma, and because it's more logical here
+  // (0 rather than 1 at -epsilon), using `trunc` instead of `floor`.
+  linear += std::trunc(linear * scale);
+  linear *= 1.0f / 1023.0f;
+  // Clamping is not necessary (1 * (255 * 8) - 1024 + 7 is exactly 1023).
+  return linear;
+}
+
+float LinearToPWLGamma(float linear) {
+  linear = xe::saturate_unsigned(linear);
+  float scale, offset;
+  // While the compiled code uses `vcmpgtfp constant, value` comparison
+  // (constant > value, or value < constant), it's preferable to use `value >=
+  // constant` condition for the higher pieces, as it will never pass for NaN,
+  // and in case of NaN, the 0...64/1023 case will be selected regardless of
+  // whether it's saturated before or after the comparisons (always
+  // pre-saturating here, but shader translators may choose to saturate later
+  // for convenience), as saturation will flush NaN to 0.
+  if (linear >= 128.0f / 1023.0f) {
+    if (linear >= 512.0f / 1023.0f) {
+      scale = 1023.0f / 8.0f;
+      offset = 128.0f / 255.0f;
+    } else {
+      scale = 1023.0f / 4.0f;
+      offset = 64.0f / 255.0f;
+    }
+  } else {
+    if (linear >= 64.0f / 1023.0f) {
+      scale = 1023.0f / 2.0f;
+      offset = 32.0f / 255.0f;
+    } else {
+      scale = 1023.0f;
+      offset = 0.0f;
+    }
+  }
+  // The truncation isn't in X360LinearToGamma in the Source Engine, but is
+  // there in Direct3D 9 disassembly (the `vrfiz` instructions).
+  // It also prevents conversion of 1.0 to 1.0034313725490196078431372549016
+  // that's handled via clamping in the Source Engine.
+  // 127.875 (1023 / 8) is truncated to 127, which, after scaling, becomes
+  // 127 / 255, and when 128 / 255 is added, the result is 1.
+  return std::trunc(linear * scale) * (1.0f / 255.0f) + offset;
+}
+
 // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp

 float Float7e3To32(uint32_t f10) {
--- a/src/xenia/gpu/xenos.h
+++ b/src/xenia/gpu/xenos.h
@ -327,6 +327,9 @@ enum class DepthRenderTargetFormat : uint32_t {

 const char* GetDepthRenderTargetFormatName(DepthRenderTargetFormat format);

+float PWLGammaToLinear(float gamma);
+float LinearToPWLGamma(float linear);
+
 // Converts Xenos floating-point 7e3 color value in bits 0:9 (not clamping) to
 // an IEEE-754 32-bit floating-point number.
 float Float7e3To32(uint32_t f10);
--- a/src/xenia/kernel/user_module.cc
+++ b/src/xenia/kernel/user_module.cc
@ -400,7 +400,7 @@ void UserModule::Dump() {
  // XEX header.
  sb.AppendFormat("Module {}:\n", path_);

-  sb.AppendFormat("Module Hash: {:016X}\n", hash_);
+  sb.AppendFormat("Module Hash: {:016X}\n", hash_.value_or(UINT64_MAX));

  sb.AppendFormat("    Module Flags: {:08X}\n", (uint32_t)header->module_flags);

@ -802,11 +802,44 @@ void UserModule::Dump() {
 }

 void UserModule::CalculateHash() {
-  uint8_t* base_adr = memory()->TranslateVirtual(xex_module()->base_address());
+  const BaseHeap* module_heap =
+      kernel_state_->memory()->LookupHeap(xex_module()->base_address());
+
+  if (!module_heap) {
+    XELOGE("Invalid heap for xex module! Address: {:08X}",
+           xex_module()->base_address());
+    return;
+  }
+
+  const uint32_t page_size = module_heap->page_size();
+  auto security_info = xex_module()->xex_security_info();
+
+  auto find_code_section_page = [&security_info](bool from_bottom) {
+    for (uint32_t i = 0; i < security_info->page_descriptor_count; i++) {
+      const uint32_t page_index =
+          from_bottom ? i : security_info->page_descriptor_count - i;
+      xex2_page_descriptor page_descriptor;
+      page_descriptor.value =
+          xe::byte_swap(security_info->page_descriptors[page_index].value);
+      if (page_descriptor.info != XEX_SECTION_CODE) {
+        continue;
+      }
+      return page_index;
+    }
+    return UINT32_MAX;
+  };
+
+  const uint32_t start_address =
+      xex_module()->base_address() + (find_code_section_page(true) * page_size);
+  const uint32_t end_address =
+      xex_module()->base_address() +
+      ((find_code_section_page(false) + 1) * page_size);
+
+  uint8_t* base_code_adr = memory()->TranslateVirtual(start_address);

  XXH3_state_t hash_state;
  XXH3_64bits_reset(&hash_state);
-  XXH3_64bits_update(&hash_state, base_adr, xex_module()->image_size());
+  XXH3_64bits_update(&hash_state, base_code_adr, end_address - start_address);
  hash_ = XXH3_64bits_digest(&hash_state);
 }
 }  // namespace kernel
--- a/src/xenia/kernel/user_module.h
+++ b/src/xenia/kernel/user_module.h
@ -38,7 +38,7 @@ class UserModule : public XModule {

  const std::string& path() const override { return path_; }
  const std::string& name() const override { return name_; }
-  uint64_t hash() const { return hash_; }
+  std::optional<uint64_t> hash() const { return hash_; }

  enum ModuleFormat {
    kModuleFormatUndefined = 0,
@ -106,7 +106,7 @@ class UserModule : public XModule {

  std::string name_;
  std::string path_;
-  uint64_t hash_ = -1;
+  std::optional<uint64_t> hash_ = std::nullopt;

  uint32_t guest_xex_header_ = 0;
  ModuleFormat module_format_ = kModuleFormatUndefined;
--- a/src/xenia/patcher/patch_db.cc
+++ b/src/xenia/patcher/patch_db.cc
@ -34,15 +34,14 @@ void PatchDB::LoadPatches() {
  }

  const std::filesystem::path patches_directory = patches_root_ / "patches";
-  const std::vector<xe::filesystem::FileInfo>& patch_files =
+  const std::vector<xe::filesystem::FileInfo> patch_files =
      filesystem::ListFiles(patches_directory);
-  const std::regex file_name_regex_match = std::regex(patch_filename_regex);

  for (const xe::filesystem::FileInfo& patch_file : patch_files) {
    // Skip files that doesn't have only title_id as name and .patch as
    // extension
    if (!std::regex_match(path_to_utf8(patch_file.name),
-                          file_name_regex_match)) {
+                          patch_filename_regex_)) {
      XELOGE("PatchDB: Skipped loading file {} due to incorrect filename",
             path_to_utf8(patch_file.name));
      continue;
@ -51,30 +50,30 @@ void PatchDB::LoadPatches() {
    const PatchFileEntry loaded_title_patches =
        ReadPatchFile(patch_file.path / patch_file.name);
    if (loaded_title_patches.title_id != -1) {
-      loaded_patches.push_back(loaded_title_patches);
+      loaded_patches_.push_back(loaded_title_patches);
    }
  }
-  XELOGI("PatchDB: Loaded patches for {} titles", loaded_patches.size());
+  XELOGI("PatchDB: Loaded patches for {} titles", loaded_patches_.size());
 }

 PatchFileEntry PatchDB::ReadPatchFile(const std::filesystem::path& file_path) {
-  PatchFileEntry patchFile;
+  PatchFileEntry patch_file;
  std::shared_ptr<cpptoml::table> patch_toml_fields;

  try {
    patch_toml_fields = cpptoml::parse_file(path_to_utf8(file_path));
  } catch (...) {
    XELOGE("PatchDB: Cannot load patch file: {}", path_to_utf8(file_path));
-    patchFile.title_id = -1;
-    return patchFile;
+    patch_file.title_id = -1;
+    return patch_file;
  };

  auto title_name = patch_toml_fields->get_as<std::string>("title_name");
  auto title_id = patch_toml_fields->get_as<std::string>("title_id");

-  patchFile.title_id = strtoul((*title_id).c_str(), NULL, 16);
-  patchFile.title_name = *title_name;
-  ReadHash(patchFile, patch_toml_fields);
+  patch_file.title_id = strtoul((*title_id).c_str(), NULL, 16);
+  patch_file.title_name = *title_name;
+  ReadHashes(patch_file, patch_toml_fields);

  auto patch_table = patch_toml_fields->get_table_array("patch");

@ -92,7 +91,7 @@ PatchFileEntry PatchDB::ReadPatchFile(const std::filesystem::path& file_path) {
    patch.is_enabled = is_enabled;

    // Iterate through all available data sizes
-    for (const auto& patch_data_type : patch_data_types_size) {
+    for (const auto& patch_data_type : patch_data_types_size_) {
      bool success =
          ReadPatchData(patch.patch_data, patch_data_type, patch_table_entry);

@ -101,9 +100,9 @@ PatchFileEntry PatchDB::ReadPatchFile(const std::filesystem::path& file_path) {
        break;
      }
    }
-    patchFile.patch_info.push_back(patch);
+    patch_file.patch_info.push_back(patch);
  }
-  return patchFile;
+  return patch_file;
 }

 bool PatchDB::ReadPatchData(
@ -120,77 +119,83 @@ bool PatchDB::ReadPatchData(
    size_t alloc_size = (size_t)data_type.second.size;

    switch (data_type.second.type) {
-      case PatchDataType::be8: {
+      case PatchDataType::kBE8: {
        uint16_t value = *patch_data_table->get_as<uint8_t>("value");
        patch_data.push_back({address, PatchDataValue(alloc_size, value)});
        break;
      }
-      case PatchDataType::be16: {
+      case PatchDataType::kBE16: {
        uint16_t value = *patch_data_table->get_as<uint16_t>("value");
        patch_data.push_back(
            {address, PatchDataValue(alloc_size, xe::byte_swap(value))});
        break;
      }
-      case PatchDataType::be32: {
+      case PatchDataType::kBE32: {
        uint32_t value = *patch_data_table->get_as<uint32_t>("value");
        patch_data.push_back(
            {address, PatchDataValue(alloc_size, xe::byte_swap(value))});
        break;
      }
-      case PatchDataType::f64: {
+      case PatchDataType::kBE64: {
+        uint64_t value = *patch_data_table->get_as<uint64_t>("value");
+        patch_data.push_back(
+            {address, PatchDataValue(alloc_size, xe::byte_swap(value))});
+        break;
+      }
+      case PatchDataType::kF64: {
        double val = *patch_data_table->get_as<double>("value");
        uint64_t value = *reinterpret_cast<uint64_t*>(&val);
        patch_data.push_back(
            {address, PatchDataValue(alloc_size, xe::byte_swap(value))});
        break;
      }
-      case PatchDataType::f32: {
+      case PatchDataType::kF32: {
        float value = float(*patch_data_table->get_as<double>("value"));
        patch_data.push_back(
            {address, PatchDataValue(alloc_size, xe::byte_swap(value))});
        break;
      }
-      case PatchDataType::string: {
+      case PatchDataType::kString: {
        std::string value = *patch_data_table->get_as<std::string>("value");
        patch_data.push_back({address, PatchDataValue(value)});
        break;
      }
-      case PatchDataType::u16string: {
+      case PatchDataType::kU16String: {
        std::u16string value =
            xe::to_utf16(*patch_data_table->get_as<std::string>("value"));
        patch_data.push_back({address, PatchDataValue(value)});
        break;
      }
-      case PatchDataType::byte_array: {
+      case PatchDataType::kByteArray: {
        std::vector<uint8_t> data;
        const std::string value =
            *patch_data_table->get_as<std::string>("value");

        bool success = string_util::hex_string_to_array(data, value);
        if (!success) {
+          XELOGW("PatchDB: Cannot convert hex string to byte array! Skipping",
+                 address);
          return false;
        }
-
-        patch_data.push_back({address, PatchDataValue(value.size() / 2, data)});
+        patch_data.push_back({address, PatchDataValue(data)});
        break;
      }
      default: {
-        uint64_t value = *patch_data_table->get_as<uint64_t>("value");
-        patch_data.push_back(
-            {address, PatchDataValue(alloc_size, xe::byte_swap(value))});
-        break;
+        XELOGW("PatchDB: Unknown patch data type for address {:08X}! Skipping",
+               address);
+        return false;
      }
    }
  }
  return true;
 }

-std::vector<PatchFileEntry> PatchDB::GetTitlePatches(uint32_t title_id,
-                                                     const uint64_t hash) {
+std::vector<PatchFileEntry> PatchDB::GetTitlePatches(
+    const uint32_t title_id, const std::optional<uint64_t> hash) {
  std::vector<PatchFileEntry> title_patches;

  std::copy_if(
-      loaded_patches.cbegin(), loaded_patches.cend(),
+      loaded_patches_.cbegin(), loaded_patches_.cend(),
      std::back_inserter(title_patches), [=](const PatchFileEntry entry) {
        bool hash_exist = std::find(entry.hashes.cbegin(), entry.hashes.cend(),
                                    hash) != entry.hashes.cend();
@ -202,17 +207,17 @@ std::vector<PatchFileEntry> PatchDB::GetTitlePatches(uint32_t title_id,
  return title_patches;
 }

-void PatchDB::ReadHash(PatchFileEntry& patchEntry,
-                       std::shared_ptr<cpptoml::table> patch_toml_fields) {
+void PatchDB::ReadHashes(PatchFileEntry& patch_entry,
+                         std::shared_ptr<cpptoml::table> patch_toml_fields) {
  auto title_hashes = patch_toml_fields->get_array_of<std::string>("hash");

  for (const auto& hash : *title_hashes) {
-    patchEntry.hashes.push_back(strtoull(hash.c_str(), NULL, 16));
+    patch_entry.hashes.push_back(strtoull(hash.c_str(), NULL, 16));
  }

  auto single_hash = patch_toml_fields->get_as<std::string>("hash");
  if (single_hash) {
-    patchEntry.hashes.push_back(strtoull((*single_hash).c_str(), NULL, 16));
+    patch_entry.hashes.push_back(strtoull((*single_hash).c_str(), NULL, 16));
  }
 }

--- a/src/xenia/patcher/patch_db.h
+++ b/src/xenia/patcher/patch_db.h
@ -10,7 +10,10 @@
 #ifndef XENIA_PATCH_DB_H_
 #define XENIA_PATCH_DB_H_

+#include <cstring>
 #include <map>
+#include <optional>
+#include <regex>

 #include "third_party/cpptoml/include/cpptoml.h"

@ -18,62 +21,37 @@ namespace xe {
 namespace patcher {

 struct PatchDataValue {
-  const size_t alloc_size_;
-  const uint8_t* patch_data_ptr_;
+  const size_t alloc_size;
+  std::vector<uint8_t> patch_data;

-  PatchDataValue(const size_t alloc_size, const uint8_t value)
-      : alloc_size_(alloc_size) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, &value, alloc_size);
+  template <typename T>
+  PatchDataValue(const size_t size, const T value) : alloc_size(size) {
+    patch_data.resize(alloc_size);
+    std::memcpy(patch_data.data(), &value, alloc_size);
  };

-  PatchDataValue(const size_t alloc_size, const uint16_t value)
-      : alloc_size_(alloc_size) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, &value, alloc_size);
+  PatchDataValue(const std::vector<uint8_t> value) : alloc_size(value.size()) {
+    patch_data.resize(alloc_size);
+    std::memcpy(patch_data.data(), value.data(), alloc_size);
  };

-  PatchDataValue(const size_t alloc_size, const uint32_t value)
-      : alloc_size_(alloc_size) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, &value, alloc_size);
+  PatchDataValue(const std::string value) : alloc_size(value.size()) {
+    patch_data.resize(alloc_size);
+    std::memcpy(patch_data.data(), value.c_str(), alloc_size);
  };

-  PatchDataValue(const size_t alloc_size, const uint64_t value)
-      : alloc_size_(alloc_size) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, &value, alloc_size);
-  };
-
-  PatchDataValue(const size_t alloc_size, const float value)
-      : alloc_size_(alloc_size) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, &value, alloc_size);
-  };
-
-  PatchDataValue(const size_t alloc_size, const std::vector<uint8_t> value)
-      : alloc_size_(alloc_size) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, value.data(), alloc_size);
-  };
-
-  PatchDataValue(const std::string value) : alloc_size_(value.size()) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, value.c_str(), alloc_size_);
-  };
-
-  PatchDataValue(const std::u16string value) : alloc_size_(value.size() * 2) {
-    patch_data_ptr_ = new uint8_t[alloc_size_];
-    memcpy((void*)patch_data_ptr_, value.c_str(), alloc_size_);
+  PatchDataValue(const std::u16string value) : alloc_size(value.size() * 2) {
+    patch_data.resize(alloc_size);
+    std::memcpy(patch_data.data(), value.c_str(), alloc_size);
  };
 };

 struct PatchDataEntry {
-  const uint32_t memory_address_;
-  const PatchDataValue new_data_;
+  const uint32_t address;
+  const PatchDataValue data;

-  PatchDataEntry(const uint32_t memory_address, const PatchDataValue new_data)
-      : memory_address_(memory_address), new_data_(new_data){};
+  PatchDataEntry(const uint32_t memory_address, const PatchDataValue patch_data)
+      : address(memory_address), data(patch_data){};
 };

 struct PatchInfoEntry {
@ -93,15 +71,15 @@ struct PatchFileEntry {
 };

 enum class PatchDataType {
-  be8,
-  be16,
-  be32,
-  be64,
-  f32,
-  f64,
-  string,
-  u16string,
-  byte_array
+  kBE8,
+  kBE16,
+  kBE32,
+  kBE64,
+  kF32,
+  kF64,
+  kString,
+  kU16String,
+  kByteArray
 };

 struct PatchData {
@ -123,29 +101,29 @@ class PatchDB {
                     const std::pair<std::string, PatchData> data_type,
                     const std::shared_ptr<cpptoml::table>& patch_table);

-  std::vector<PatchFileEntry> GetTitlePatches(uint32_t title_id,
-                                              const uint64_t hash);
-  std::vector<PatchFileEntry>& GetAllPatches() { return loaded_patches; }
+  std::vector<PatchFileEntry> GetTitlePatches(
+      const uint32_t title_id, const std::optional<uint64_t> hash);
+  std::vector<PatchFileEntry>& GetAllPatches() { return loaded_patches_; }

 private:
-  void ReadHash(PatchFileEntry& patchEntry,
-                std::shared_ptr<cpptoml::table> patch_toml_fields);
+  void ReadHashes(PatchFileEntry& patch_entry,
+                  std::shared_ptr<cpptoml::table> patch_toml_fields);

-  inline static const std::string patch_filename_regex =
-      "^[A-Fa-f0-9]{8}.*\\.patch\\.toml$";
+  inline static const std::regex patch_filename_regex_ =
+      std::regex("^[A-Fa-f0-9]{8}.*\\.patch\\.toml$");

-  const std::map<std::string, PatchData> patch_data_types_size = {
-      {"string", PatchData(0, PatchDataType::string)},
-      {"u16string", PatchData(0, PatchDataType::u16string)},
-      {"array", PatchData(0, PatchDataType::byte_array)},
-      {"f64", PatchData(sizeof(uint64_t), PatchDataType::f64)},
-      {"f32", PatchData(sizeof(uint32_t), PatchDataType::f32)},
-      {"be64", PatchData(sizeof(uint64_t), PatchDataType::be64)},
-      {"be32", PatchData(sizeof(uint32_t), PatchDataType::be32)},
-      {"be16", PatchData(sizeof(uint16_t), PatchDataType::be16)},
-      {"be8", PatchData(sizeof(uint8_t), PatchDataType::be8)}};
+  const std::map<std::string, PatchData> patch_data_types_size_ = {
+      {"string", PatchData(0, PatchDataType::kString)},
+      {"u16string", PatchData(0, PatchDataType::kU16String)},
+      {"array", PatchData(0, PatchDataType::kByteArray)},
+      {"f64", PatchData(sizeof(uint64_t), PatchDataType::kF64)},
+      {"f32", PatchData(sizeof(uint32_t), PatchDataType::kF32)},
+      {"be64", PatchData(sizeof(uint64_t), PatchDataType::kBE64)},
+      {"be32", PatchData(sizeof(uint32_t), PatchDataType::kBE32)},
+      {"be16", PatchData(sizeof(uint16_t), PatchDataType::kBE16)},
+      {"be8", PatchData(sizeof(uint8_t), PatchDataType::kBE8)}};

-  std::vector<PatchFileEntry> loaded_patches;
+  std::vector<PatchFileEntry> loaded_patches_;
  std::filesystem::path patches_root_;
 };
 }  // namespace patcher
--- a/src/xenia/patcher/patcher.cc
+++ b/src/xenia/patcher/patcher.cc
@ -6,23 +6,22 @@
 * Released under the BSD license - see LICENSE in the root for more details. *
 ******************************************************************************
 */
-#include "xenia/patcher/patcher.h"
+#include <cstring>

 #include "xenia/base/logging.h"
+#include "xenia/patcher/patcher.h"

 namespace xe {
 namespace patcher {

 Patcher::Patcher(const std::filesystem::path patches_root) {
  is_any_patch_applied_ = false;
-  patch_db = new PatchDB(patches_root);
+  patch_db_ = new PatchDB(patches_root);
 }

-Patcher::~Patcher() {}
-
 void Patcher::ApplyPatchesForTitle(Memory* memory, const uint32_t title_id,
-                                   const uint64_t hash) {
-  const auto title_patches = patch_db->GetTitlePatches(title_id, hash);
+                                   const std::optional<uint64_t> hash) {
+  const auto title_patches = patch_db_->GetTitlePatches(title_id, hash);

  for (const PatchFileEntry& patchFile : title_patches) {
    for (const PatchInfoEntry& patchEntry : patchFile.patch_info) {
@ -39,25 +38,24 @@ void Patcher::ApplyPatchesForTitle(Memory* memory, const uint32_t title_id,
 void Patcher::ApplyPatch(Memory* memory, const PatchInfoEntry* patch) {
  for (const PatchDataEntry& patch_data_entry : patch->patch_data) {
    uint32_t old_address_protect = 0;
-    auto address = memory->TranslateVirtual(patch_data_entry.memory_address_);
-    auto heap = memory->LookupHeap(patch_data_entry.memory_address_);
+    uint8_t* address = memory->TranslateVirtual(patch_data_entry.address);
+    xe::BaseHeap* heap = memory->LookupHeap(patch_data_entry.address);
    if (!heap) {
      continue;
    }

-    heap->QueryProtect(patch_data_entry.memory_address_, &old_address_protect);
+    heap->QueryProtect(patch_data_entry.address, &old_address_protect);

-    heap->Protect(patch_data_entry.memory_address_,
-                  (uint32_t)patch_data_entry.new_data_.alloc_size_,
+    heap->Protect(patch_data_entry.address,
+                  (uint32_t)patch_data_entry.data.alloc_size,
                  kMemoryProtectRead | kMemoryProtectWrite);

-
-    memcpy(address, patch_data_entry.new_data_.patch_data_ptr_,
-           patch_data_entry.new_data_.alloc_size_);
+    std::memcpy(address, patch_data_entry.data.patch_data.data(),
+                patch_data_entry.data.alloc_size);

    // Restore previous protection
-    heap->Protect(patch_data_entry.memory_address_,
-                  (uint32_t)patch_data_entry.new_data_.alloc_size_,
+    heap->Protect(patch_data_entry.address,
+                  (uint32_t)patch_data_entry.data.alloc_size,
                  old_address_protect);

    is_any_patch_applied_ = true;
--- a/src/xenia/patcher/patcher.h
+++ b/src/xenia/patcher/patcher.h
@ -19,16 +19,15 @@ namespace patcher {
 class Patcher {
 public:
  Patcher(const std::filesystem::path patches_root);
-  ~Patcher();

  void ApplyPatch(Memory* memory, const PatchInfoEntry* patch);
  void ApplyPatchesForTitle(Memory* memory, const uint32_t title_id,
-                            const uint64_t hash);
+                            const std::optional<uint64_t> hash);

  bool IsAnyPatchApplied() { return is_any_patch_applied_; }

 private:
-  PatchDB* patch_db;
+  PatchDB* patch_db_;
  bool is_any_patch_applied_;
 };