Merge branch 'master' into vulkan

2021-06-14 17:31:55 +03:00 · 2021-06-14 17:31:55 +03:00 · eb8bd732d3
parent 322921233c 357aa1cdd3
commit eb8bd732d3
9 changed files with 262 additions and 123 deletions
--- a/src/xenia/apu/conversion.h
+++ b/src/xenia/apu/conversion.h
@ -0,0 +1,119 @@
+/**
+ ******************************************************************************
+ * Xenia : Xbox 360 Emulator Research Project                                 *
+ ******************************************************************************
+ * Copyright 2021 Ben Vanik. All rights reserved.                             *
+ * Released under the BSD license - see LICENSE in the root for more details. *
+ ******************************************************************************
+ */
+
+#ifndef XENIA_APU_CONVERSION_H_
+#define XENIA_APU_CONVERSION_H_
+
+#include <cstdint>
+
+#include "xenia/base/byte_order.h"
+#include "xenia/base/platform.h"
+
+namespace xe {
+namespace apu {
+namespace conversion {
+
+#if XE_ARCH_AMD64
+inline void sequential_6_BE_to_interleaved_6_LE(float* output,
+                                                const float* input,
+                                                size_t ch_sample_count) {
+  const uint32_t* in = reinterpret_cast<const uint32_t*>(input);
+  uint32_t* out = reinterpret_cast<uint32_t*>(output);
+  const __m128i byte_swap_shuffle =
+      _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
+  for (size_t sample = 0; sample < ch_sample_count; sample++) {
+    __m128i sample0 = _mm_set_epi32(
+        in[3 * ch_sample_count + sample], in[2 * ch_sample_count + sample],
+        in[1 * ch_sample_count + sample], in[0 * ch_sample_count + sample]);
+    uint32_t sample1 = in[4 * ch_sample_count + sample];
+    uint32_t sample2 = in[5 * ch_sample_count + sample];
+    sample0 = _mm_shuffle_epi8(sample0, byte_swap_shuffle);
+    _mm_storeu_si128(reinterpret_cast<__m128i*>(&out[sample * 6]), sample0);
+    sample1 = xe::byte_swap(sample1);
+    out[sample * 6 + 4] = sample1;
+    sample2 = xe::byte_swap(sample2);
+    out[sample * 6 + 5] = sample2;
+  }
+}
+
+inline void sequential_6_BE_to_interleaved_2_LE(float* output,
+                                                const float* input,
+                                                size_t ch_sample_count) {
+  assert_true(ch_sample_count % 4 == 0);
+  const uint32_t* in = reinterpret_cast<const uint32_t*>(input);
+  uint32_t* out = reinterpret_cast<uint32_t*>(output);
+  const __m128i byte_swap_shuffle =
+      _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
+  const __m128 half = _mm_set1_ps(0.5f);
+  const __m128 two_fifths = _mm_set1_ps(1.0f / 2.5f);
+
+  // put center on left and right, discard low frequency
+  for (size_t sample = 0; sample < ch_sample_count; sample += 4) {
+    // load 4 samples from 6 channels each
+    __m128 fl = _mm_loadu_ps(&input[0 * ch_sample_count + sample]);
+    __m128 fr = _mm_loadu_ps(&input[1 * ch_sample_count + sample]);
+    __m128 fc = _mm_loadu_ps(&input[2 * ch_sample_count + sample]);
+    __m128 bl = _mm_loadu_ps(&input[4 * ch_sample_count + sample]);
+    __m128 br = _mm_loadu_ps(&input[5 * ch_sample_count + sample]);
+    // byte swap
+    fl = _mm_castsi128_ps(
+        _mm_shuffle_epi8(_mm_castps_si128(fl), byte_swap_shuffle));
+    fr = _mm_castsi128_ps(
+        _mm_shuffle_epi8(_mm_castps_si128(fr), byte_swap_shuffle));
+    fc = _mm_castsi128_ps(
+        _mm_shuffle_epi8(_mm_castps_si128(fc), byte_swap_shuffle));
+    bl = _mm_castsi128_ps(
+        _mm_shuffle_epi8(_mm_castps_si128(bl), byte_swap_shuffle));
+    br = _mm_castsi128_ps(
+        _mm_shuffle_epi8(_mm_castps_si128(br), byte_swap_shuffle));
+
+    __m128 center_halved = _mm_mul_ps(fc, half);
+    __m128 left = _mm_add_ps(_mm_add_ps(fl, bl), center_halved);
+    __m128 right = _mm_add_ps(_mm_add_ps(fr, br), center_halved);
+    left = _mm_mul_ps(left, two_fifths);
+    right = _mm_mul_ps(right, two_fifths);
+    _mm_storeu_ps(&output[sample * 2], _mm_unpacklo_ps(left, right));
+    _mm_storeu_ps(&output[(sample + 2) * 2], _mm_unpackhi_ps(left, right));
+  }
+}
+#else
+inline void sequential_6_BE_to_interleaved_6_LE(float* output,
+                                                const float* input,
+                                                size_t ch_sample_count) {
+  for (size_t sample = 0; sample < ch_sample_count; sample++) {
+    for (size_t channel = 0; channel < 6; channel++) {
+      output[sample * 6 + channel] =
+          xe::byte_swap(input[channel * ch_sample_count + sample]);
+    }
+  }
+}
+inline void sequential_6_BE_to_interleaved_2_LE(float* output,
+                                                const float* input,
+                                                size_t ch_sample_count) {
+  // Default 5.1 channel mapping is fl, fr, fc, lf, bl, br
+  // https://docs.microsoft.com/en-us/windows/win32/xaudio2/xaudio2-default-channel-mapping
+  for (size_t sample = 0; sample < ch_sample_count; sample++) {
+    // put center on left and right, discard low frequency
+    float fl = xe::byte_swap(input[0 * ch_sample_count + sample]);
+    float fr = xe::byte_swap(input[1 * ch_sample_count + sample]);
+    float fc = xe::byte_swap(input[2 * ch_sample_count + sample]);
+    float br = xe::byte_swap(input[4 * ch_sample_count + sample]);
+    float bl = xe::byte_swap(input[5 * ch_sample_count + sample]);
+    float center_halved = fc * 0.5f;
+    output[sample * 2] = (fl + bl + center_halved) * (1.0f / 2.5f);
+    output[sample * 2 + 1] = (fr + br + center_halved) * (1.0f / 2.5f);
+  }
+}
+#endif
+
+}  // namespace conversion
+}  // namespace apu
+}  // namespace xe
+
+#endif
--- a/src/xenia/apu/sdl/sdl_audio_driver.cc
+++ b/src/xenia/apu/sdl/sdl_audio_driver.cc
@ -10,9 +10,13 @@
 #include "xenia/apu/sdl/sdl_audio_driver.h"

 #include <array>
+#include <cstring>

 #include "xenia/apu/apu_flags.h"
+#include "xenia/apu/conversion.h"
+#include "xenia/base/assert.h"
 #include "xenia/base/logging.h"
+#include "xenia/base/profiling.h"
 #include "xenia/helper/sdl/sdl_helper.h"

 namespace xe {
@ -46,41 +50,37 @@ bool SDLAudioDriver::Initialize() {
  }
  sdl_initialized_ = true;

-  SDL_AudioCallback audio_callback = [](void* userdata, Uint8* stream,
-                                        int len) -> void {
-    assert_true(len == frame_size_);
-    const auto driver = static_cast<SDLAudioDriver*>(userdata);
-
-    std::unique_lock<std::mutex> guard(driver->frames_mutex_);
-    if (driver->frames_queued_.empty()) {
-      memset(stream, 0, len);
-    } else {
-      auto buffer = driver->frames_queued_.front();
-      driver->frames_queued_.pop();
-      if (cvars::mute) {
-        memset(stream, 0, len);
-      } else {
-        memcpy(stream, buffer, len);
-      }
-      driver->frames_unused_.push(buffer);
-
-      auto ret = driver->semaphore_->Release(1, nullptr);
-      assert_true(ret);
+  SDL_AudioSpec desired_spec = {};
+  SDL_AudioSpec obtained_spec;
+  desired_spec.freq = frame_frequency_;
+  desired_spec.format = AUDIO_F32;
+  desired_spec.channels = frame_channels_;
+  desired_spec.samples = channel_samples_;
+  desired_spec.callback = SDLCallback;
+  desired_spec.userdata = this;
+  // Allow the hardware to decide between 5.1 and stereo
+  int allowed_change = SDL_AUDIO_ALLOW_CHANNELS_CHANGE;
+  for (int i = 0; i < 2; i++) {
+    sdl_device_id_ = SDL_OpenAudioDevice(nullptr, 0, &desired_spec,
+                                         &obtained_spec, allowed_change);
+    if (sdl_device_id_ <= 0) {
+      XELOGE("SDL_OpenAudioDevice() failed.");
+      return false;
    }
-  };
-
-  SDL_AudioSpec wanted_spec = {};
-  wanted_spec.freq = frame_frequency_;
-  wanted_spec.format = AUDIO_F32;
-  wanted_spec.channels = frame_channels_;
-  wanted_spec.samples = channel_samples_;
-  wanted_spec.callback = audio_callback;
-  wanted_spec.userdata = this;
-  sdl_device_id_ = SDL_OpenAudioDevice(nullptr, 0, &wanted_spec, nullptr, 0);
+    if (obtained_spec.channels == 2 || obtained_spec.channels == 6) {
+      break;
+    }
+    // If the system is 4 or 7.1, let SDL convert
+    allowed_change = 0;
+    SDL_CloseAudioDevice(sdl_device_id_);
+    sdl_device_id_ = -1;
+  }
  if (sdl_device_id_ <= 0) {
-    XELOGE("SDL_OpenAudioDevice() failed.");
+    XELOGE("Failed to get a compatible SDL Audio Device.");
    return false;
  }
+  sdl_device_channels_ = obtained_spec.channels;
+
  SDL_PauseAudioDevice(sdl_device_id_, 0);

  return true;
@ -99,13 +99,7 @@ void SDLAudioDriver::SubmitFrame(uint32_t frame_ptr) {
    }
  }

-  // interleave the data
-  for (size_t index = 0, o = 0; index < channel_samples_; ++index) {
-    for (size_t channel = 0, table = 0; channel < frame_channels_;
-         ++channel, table += channel_samples_) {
-      output_frame[o++] = xe::byte_swap(input_frame[table + index]);
-    }
-  }
+  std::memcpy(output_frame, input_frame, frame_samples_ * sizeof(float));

  {
    std::unique_lock<std::mutex> guard(frames_mutex_);
@ -133,6 +127,45 @@ void SDLAudioDriver::Shutdown() {
  };
 }

+void SDLAudioDriver::SDLCallback(void* userdata, Uint8* stream, int len) {
+  SCOPE_profile_cpu_f("apu");
+  if (!userdata || !stream) {
+    XELOGE("SDLAudioDriver::sdl_callback called with nullptr.");
+    return;
+  }
+  const auto driver = static_cast<SDLAudioDriver*>(userdata);
+  assert_true(len ==
+              sizeof(float) * channel_samples_ * driver->sdl_device_channels_);
+
+  std::unique_lock<std::mutex> guard(driver->frames_mutex_);
+  if (driver->frames_queued_.empty()) {
+    std::memset(stream, 0, len);
+  } else {
+    auto buffer = driver->frames_queued_.front();
+    driver->frames_queued_.pop();
+    if (cvars::mute) {
+      std::memset(stream, 0, len);
+    } else {
+      switch (driver->sdl_device_channels_) {
+        case 2:
+          conversion::sequential_6_BE_to_interleaved_2_LE(
+              reinterpret_cast<float*>(stream), buffer, channel_samples_);
+          break;
+        case 6:
+          conversion::sequential_6_BE_to_interleaved_6_LE(
+              reinterpret_cast<float*>(stream), buffer, channel_samples_);
+          break;
+        default:
+          assert_unhandled_case(driver->sdl_device_channels_);
+          break;
+      }
+    }
+    driver->frames_unused_.push(buffer);
+
+    auto ret = driver->semaphore_->Release(1, nullptr);
+    assert_true(ret);
+  }
+};
 }  // namespace sdl
 }  // namespace apu
 }  // namespace xe
--- a/src/xenia/apu/sdl/sdl_audio_driver.h
+++ b/src/xenia/apu/sdl/sdl_audio_driver.h
@ -32,10 +32,13 @@ class SDLAudioDriver : public AudioDriver {
  void Shutdown();

 protected:
+  static void SDLCallback(void* userdata, Uint8* stream, int len);
+
  xe::threading::Semaphore* semaphore_ = nullptr;

  SDL_AudioDeviceID sdl_device_id_ = -1;
  bool sdl_initialized_ = false;
+  uint8_t sdl_device_channels_ = 0;

  static const uint32_t frame_frequency_ = 48000;
  static const uint32_t frame_channels_ = 6;
--- a/src/xenia/apu/xaudio2/xaudio2_audio_driver.cc
+++ b/src/xenia/apu/xaudio2/xaudio2_audio_driver.cc
@ -13,6 +13,7 @@
 #include "xenia/base/platform_win.h"

 #include "xenia/apu/apu_flags.h"
+#include "xenia/apu/conversion.h"
 #include "xenia/base/clock.h"
 #include "xenia/base/logging.h"

@ -208,12 +209,8 @@ void XAudio2AudioDriver::SubmitFrame(uint32_t frame_ptr) {
  auto interleave_channels = frame_channels_;

  // interleave the data
-  for (uint32_t index = 0, o = 0; index < channel_samples_; ++index) {
-    for (uint32_t channel = 0, table = 0; channel < interleave_channels;
-         ++channel, table += channel_samples_) {
-      output_frame[o++] = xe::byte_swap(input_frame[table + index]);
-    }
-  }
+  conversion::sequential_6_BE_to_interleaved_6_LE(output_frame, input_frame,
+                                                  channel_samples_);

  api::XAUDIO2_BUFFER buffer;
  buffer.Flags = 0;
--- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc
+++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc
@ -1928,6 +1928,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
  if (host_render_targets_used) {
    bound_depth_and_color_render_target_bits =
        render_target_cache_->GetLastUpdateBoundRenderTargets(
+            render_target_cache_->gamma_render_target_as_srgb(),
            bound_depth_and_color_render_target_formats);
  } else {
    bound_depth_and_color_render_target_bits = 0;
--- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc
+++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc
@ -1858,9 +1858,10 @@ DXGI_FORMAT D3D12RenderTargetCache::GetColorDrawDXGIFormat(
    xenos::ColorRenderTargetFormat format) const {
  switch (format) {
    case xenos::ColorRenderTargetFormat::k_8_8_8_8:
-    case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
-      // sRGB is handled in a different way, not via the RenderTargetKey format.
      return DXGI_FORMAT_R8G8B8A8_UNORM;
+    case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
+      return gamma_render_target_as_srgb_ ? DXGI_FORMAT_R8G8B8A8_UNORM_SRGB
+                                          : DXGI_FORMAT_R8G8B8A8_UNORM;
    case xenos::ColorRenderTargetFormat::k_16_16:
      return DXGI_FORMAT_R16G16_SNORM;
    case xenos::ColorRenderTargetFormat::k_16_16_16_16:
@ -1954,20 +1955,6 @@ DXGI_FORMAT D3D12RenderTargetCache::GetDepthSRVStencilDXGIFormat(
  }
 }

-xenos::ColorRenderTargetFormat
-D3D12RenderTargetCache::GetHostRelevantColorFormat(
-    xenos::ColorRenderTargetFormat format) const {
-  switch (format) {
-    case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
-      // Currently handled in the shader (with incorrect blending), but even if
-      // handling is changed (to true sRGB), it will still be able to alias it
-      // with R8G8B8A8_UNORM.
-      return xenos::ColorRenderTargetFormat::k_8_8_8_8;
-    default:
-      return format;
-  }
-}
-
 RenderTargetCache::RenderTarget* D3D12RenderTargetCache::CreateRenderTarget(
    RenderTargetKey key) {
  ID3D12Device* device =
@ -1990,7 +1977,7 @@ RenderTargetCache::RenderTarget* D3D12RenderTargetCache::CreateRenderTarget(
  assert_true(resource_desc.Format != DXGI_FORMAT_UNKNOWN);
  if (resource_desc.Format == DXGI_FORMAT_UNKNOWN) {
    XELOGE("D3D12RenderTargetCache: Unknown {} render target format {}",
-           key.is_depth ? "depth" : "color", key.host_relevant_format);
+           key.is_depth ? "depth" : "color", key.resource_format);
    return nullptr;
  }
  if (key.msaa_samples == xenos::MsaaSamples::k2X && !msaa_2x_supported()) {
@ -2228,16 +2215,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
  bool dest_is_color = (mode.output == TransferOutput::kColor);

  xenos::ColorRenderTargetFormat dest_color_format =
-      xenos::ColorRenderTargetFormat(key.dest_host_relevant_format);
+      xenos::ColorRenderTargetFormat(key.dest_resource_format);
  xenos::DepthRenderTargetFormat dest_depth_format =
-      xenos::DepthRenderTargetFormat(key.dest_host_relevant_format);
+      xenos::DepthRenderTargetFormat(key.dest_resource_format);
  bool dest_is_64bpp =
      dest_is_color && xenos::IsColorRenderTargetFormat64bpp(dest_color_format);

  xenos::ColorRenderTargetFormat source_color_format =
-      xenos::ColorRenderTargetFormat(key.source_host_relevant_format);
+      xenos::ColorRenderTargetFormat(key.source_resource_format);
  xenos::DepthRenderTargetFormat source_depth_format =
-      xenos::DepthRenderTargetFormat(key.source_host_relevant_format);
+      xenos::DepthRenderTargetFormat(key.source_resource_format);
  // If not source_is_color, it's depth / stencil - 40-sample columns are
  // swapped as opposed to color destination.
  bool source_is_color = (rs & kTransferUsedRootParameterColorSRVBit) != 0;
@ -4920,8 +4907,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
      uint32_t rt_sort_index = 0;
      TransferShaderKey new_transfer_shader_key;
      new_transfer_shader_key.dest_msaa_samples = dest_rt_key.msaa_samples;
-      new_transfer_shader_key.dest_host_relevant_format =
-          dest_rt_key.host_relevant_format;
+      new_transfer_shader_key.dest_resource_format =
+          dest_rt_key.resource_format;
      uint32_t stencil_clear_rectangle_count = 0;
      for (uint32_t j = 0; j <= uint32_t(need_stencil_bit_draws); ++j) {
        // j == 0 - color or depth.
@ -4958,8 +4945,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
          RenderTargetKey source_rt_key = source_d3d12_rt.key();
          new_transfer_shader_key.source_msaa_samples =
              source_rt_key.msaa_samples;
-          new_transfer_shader_key.source_host_relevant_format =
-              source_rt_key.host_relevant_format;
+          new_transfer_shader_key.source_resource_format =
+              source_rt_key.resource_format;
          bool host_depth_source_is_copy =
              host_depth_source_d3d12_rt == &dest_d3d12_rt;
          new_transfer_shader_key.host_depth_source_is_copy =
@ -6492,7 +6479,7 @@ void D3D12RenderTargetCache::DumpRenderTargets(uint32_t dump_base,
    any_sources_32bpp_64bpp[size_t(rt_key.Is64bpp())] = true;
    DumpPipelineKey pipeline_key;
    pipeline_key.msaa_samples = rt_key.msaa_samples;
-    pipeline_key.host_relevant_format = rt_key.host_relevant_format;
+    pipeline_key.resource_format = rt_key.resource_format;
    pipeline_key.is_depth = rt_key.is_depth;
    dump_invocations_.emplace_back(rectangle, pipeline_key);
  }
--- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h
+++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h
@ -224,9 +224,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
    return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
  }

-  xenos::ColorRenderTargetFormat GetHostRelevantColorFormat(
-      xenos::ColorRenderTargetFormat format) const override;
-
  RenderTarget* CreateRenderTarget(RenderTargetKey key) override;

  bool IsHostDepthEncodingDifferent(
@ -418,14 +415,14 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
  union TransferShaderKey {
    struct {
      xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits;
-      uint32_t dest_host_relevant_format : xenos::kRenderTargetFormatBits;
+      uint32_t dest_resource_format : xenos::kRenderTargetFormatBits;
      xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits;
      // Always 1x when host_depth_source_is_copy is true not to create the same
      // pipeline for different MSAA sample counts as it doesn't matter in this
      // case.
      xenos::MsaaSamples host_depth_source_msaa_samples
          : xenos::kMsaaSamplesBits;
-      uint32_t source_host_relevant_format : xenos::kRenderTargetFormatBits;
+      uint32_t source_resource_format : xenos::kRenderTargetFormatBits;
      // If host depth is also fetched, whether it's pre-copied to the EDRAM
      // buffer (but since it's just a scratch buffer, with tiles laid out
      // linearly with the same pitch as in the original render target; also no
@ -557,7 +554,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
  union DumpPipelineKey {
    struct {
      xenos::MsaaSamples msaa_samples : 2;
-      uint32_t host_relevant_format : 4;
+      uint32_t resource_format : 4;
      // Last bit because this affects the root signature - after sorting, only
      // change it at most once. Depth buffers have an additional stencil SRV.
      uint32_t is_depth : 1;
@ -580,11 +577,11 @@ class D3D12RenderTargetCache final : public RenderTargetCache {

    xenos::ColorRenderTargetFormat GetColorFormat() const {
      assert_false(is_depth);
-      return xenos::ColorRenderTargetFormat(host_relevant_format);
+      return xenos::ColorRenderTargetFormat(resource_format);
    }
    xenos::DepthRenderTargetFormat GetDepthFormat() const {
      assert_true(is_depth);
-      return xenos::DepthRenderTargetFormat(host_relevant_format);
+      return xenos::DepthRenderTargetFormat(resource_format);
    }
  };

--- a/src/xenia/gpu/render_target_cache.cc
+++ b/src/xenia/gpu/render_target_cache.cc
@ -424,7 +424,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
  uint32_t depth_and_color_rts_used_bits = 0;
  // depth_and_color_rts_used_bits -> EDRAM base.
  uint32_t edram_bases[1 + xenos::kMaxColorRenderTargets];
-  uint32_t host_relevant_formats[1 + xenos::kMaxColorRenderTargets];
+  uint32_t resource_formats[1 + xenos::kMaxColorRenderTargets];
  uint32_t rts_are_64bpp = 0;
  uint32_t color_rts_are_gamma = 0;
  if (is_rasterization_done) {
@ -438,7 +438,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
          std::min(rb_depth_info.depth_base, xenos::kEdramTileCount);
      // With pixel shader interlock, always the same addressing disregarding
      // the format.
-      host_relevant_formats[0] =
+      resource_formats[0] =
          interlock_barrier_only ? 0 : uint32_t(rb_depth_info.depth_format);
    }
    if (regs.Get<reg::RB_MODECONTROL>().edram_mode ==
@ -468,20 +468,19 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
          if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
            color_rts_are_gamma |= uint32_t(1) << rt_index;
          }
-          xenos::ColorRenderTargetFormat color_host_relevant_format;
+          xenos::ColorRenderTargetFormat color_resource_format;
          if (interlock_barrier_only) {
            // Only changes in mapping between coordinates and addresses are
            // interesting (along with access overlap between draw calls), thus
            // only pixel size is relevant.
-            color_host_relevant_format =
+            color_resource_format =
                is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16
                         : xenos::ColorRenderTargetFormat::k_8_8_8_8;
          } else {
-            color_host_relevant_format = GetHostRelevantColorFormat(
+            color_resource_format = GetColorResourceFormat(
                xenos::GetStorageColorFormat(color_format));
          }
-          host_relevant_formats[rt_bit_index] =
-              uint32_t(color_host_relevant_format);
+          resource_formats[rt_bit_index] = uint32_t(color_resource_format);
        }
      }
    }
@ -659,7 +658,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
    rt_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp;
    rt_key.msaa_samples = msaa_samples;
    rt_key.is_depth = rt_bit_index == 0;
-    rt_key.host_relevant_format = host_relevant_formats[rt_bit_index];
+    rt_key.resource_format = resource_formats[rt_bit_index];
    if (!interlock_barrier_only) {
      RenderTarget* render_target = GetOrCreateRenderTarget(rt_key);
      if (!render_target) {
@ -801,10 +800,11 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
 }

 uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
-    uint32_t* depth_and_color_formats_out) const {
+    bool distinguish_gamma_formats,
+    uint32_t* depth_and_color_resource_formats_out) const {
  if (GetPath() != Path::kHostRenderTargets) {
-    if (depth_and_color_formats_out) {
-      std::memset(depth_and_color_formats_out, 0,
+    if (depth_and_color_resource_formats_out) {
+      std::memset(depth_and_color_resource_formats_out, 0,
                  sizeof(uint32_t) * (1 + xenos::kMaxColorRenderTargets));
    }
    return 0;
@ -814,15 +814,19 @@ uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
    const RenderTarget* render_target =
        last_update_accumulated_render_targets_[i];
    if (!render_target) {
-      if (depth_and_color_formats_out) {
-        depth_and_color_formats_out[i] = 0;
+      if (depth_and_color_resource_formats_out) {
+        depth_and_color_resource_formats_out[i] = 0;
      }
      continue;
    }
    rts_used |= uint32_t(1) << i;
-    if (depth_and_color_formats_out) {
-      depth_and_color_formats_out[i] =
-          render_target->key().host_relevant_format;
+    if (depth_and_color_resource_formats_out) {
+      depth_and_color_resource_formats_out[i] =
+          (distinguish_gamma_formats && i &&
+           (last_update_accumulated_color_targets_are_gamma_ &
+            (uint32_t(1) << (i - 1))))
+              ? uint32_t(xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA)
+              : render_target->key().resource_format;
    }
  }
  return rts_used;
@ -1083,7 +1087,7 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
    depth_render_target_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp;
    depth_render_target_key.msaa_samples = msaa_samples;
    depth_render_target_key.is_depth = 1;
-    depth_render_target_key.host_relevant_format =
+    depth_render_target_key.resource_format =
        resolve_info.depth_edram_info.format;
    depth_render_target = GetOrCreateRenderTarget(depth_render_target_key);
    if (!depth_render_target) {
@ -1098,9 +1102,8 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
    color_render_target_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp;
    color_render_target_key.msaa_samples = msaa_samples;
    color_render_target_key.is_depth = 0;
-    color_render_target_key.host_relevant_format =
-        uint32_t(GetHostRelevantColorFormat(xenos::ColorRenderTargetFormat(
-            resolve_info.color_edram_info.format)));
+    color_render_target_key.resource_format = uint32_t(GetColorResourceFormat(
+        xenos::ColorRenderTargetFormat(resolve_info.color_edram_info.format)));
    color_render_target = GetOrCreateRenderTarget(color_render_target_key);
    if (!color_render_target) {
      color_render_target_key = RenderTargetKey();
@ -1161,8 +1164,8 @@ RenderTargetCache::PrepareFullEdram1280xRenderTargetForSnapshotRestoration(
  }
  RenderTargetKey render_target_key;
  render_target_key.pitch_tiles_at_32bpp = kPitchTilesAt32bpp;
-  render_target_key.host_relevant_format = uint32_t(
-      GetHostRelevantColorFormat(xenos::GetStorageColorFormat(color_format)));
+  render_target_key.resource_format =
+      uint32_t(GetColorResourceFormat(color_format));
  RenderTarget* render_target = GetOrCreateRenderTarget(render_target_key);
  if (!render_target) {
    return nullptr;
@ -1214,14 +1217,14 @@ RenderTargetCache::RenderTarget* RenderTargetCache::GetOrCreateRenderTarget(
          "Created a {}x{} {}xMSAA {} render target with guest format {} at "
          "EDRAM base {}",
          width, height, uint32_t(1) << uint32_t(key.msaa_samples),
-          key.is_depth ? "depth" : "color", key.host_relevant_format,
+          key.is_depth ? "depth" : "color", key.resource_format,
          key.base_tiles);
    } else {
      XELOGE(
          "Failed to create a {}x{} {}xMSAA {} render target with guest format "
          "{} at EDRAM base {}",
          width, height, uint32_t(1) << uint32_t(key.msaa_samples),
-          key.is_depth ? "depth" : "color", key.host_relevant_format,
+          key.is_depth ? "depth" : "color", key.resource_format,
          key.base_tiles);
    }
    // Insert even if failed to create, not to try to create again.
@ -1339,8 +1342,7 @@ void RenderTargetCache::ChangeOwnership(
                                         nullptr, resolve_clear_cutout)) {
          RenderTargetKey transfer_host_depth_source =
              host_depth_encoding_different
-                  ? it->second
-                        .host_depth_render_targets[dest.host_relevant_format]
+                  ? it->second.host_depth_render_targets[dest.resource_format]
                  : RenderTargetKey();
          if (transfer_host_depth_source == transfer_source) {
            // Same render target, don't provide a separate host depth source.
@ -1385,7 +1387,7 @@ void RenderTargetCache::ChangeOwnership(
    // Claim the current range.
    it->second.render_target = dest;
    if (host_depth_encoding_different) {
-      it->second.host_depth_render_targets[dest.host_relevant_format] = dest;
+      it->second.host_depth_render_targets[dest.resource_format] = dest;
    }
    // Check if can merge with the next range after claiming.
    std::map<uint32_t, OwnershipRange>::iterator it_next;
--- a/src/xenia/gpu/render_target_cache.h
+++ b/src/xenia/gpu/render_target_cache.h
@ -181,8 +181,10 @@ class RenderTargetCache {

  // Returns bits where 0 is whether a depth render target is currently bound on
  // the host and 1... are whether the same applies to color render targets, and
-  // "host-relevant" formats of each.
+  // formats (resource formats, but if needed, with gamma taken into account) of
+  // each.
  uint32_t GetLastUpdateBoundRenderTargets(
+      bool distinguish_gamma_formats,
      uint32_t* depth_and_color_formats_out = nullptr) const;

 protected:
@ -223,11 +225,8 @@ class RenderTargetCache {
      uint32_t pitch_tiles_at_32bpp : 8;                          // 19
      xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits;  // 21
      uint32_t is_depth : 1;                                      // 22
-      // Not always the original format - blending precision ignored, formats
-      // handled through the same render targets on the host are normalized, and
-      // with pixel shader interlock, replaced with some single 32bpp or 64bpp
-      // format because it's only needed for addressing.
-      uint32_t host_relevant_format : xenos::kRenderTargetFormatBits;  // 26
+      // Ignoring the blending precision and sRGB.
+      uint32_t resource_format : xenos::kRenderTargetFormatBits;  // 26
    };
    uint32_t key = 0;
    struct Hasher {
@ -250,11 +249,11 @@ class RenderTargetCache {

    xenos::ColorRenderTargetFormat GetColorFormat() const {
      assert_false(is_depth);
-      return xenos::ColorRenderTargetFormat(host_relevant_format);
+      return xenos::ColorRenderTargetFormat(resource_format);
    }
    xenos::DepthRenderTargetFormat GetDepthFormat() const {
      assert_true(is_depth);
-      return xenos::DepthRenderTargetFormat(host_relevant_format);
+      return xenos::DepthRenderTargetFormat(resource_format);
    }
    bool Is64bpp() const {
      if (is_depth) {
@ -436,15 +435,6 @@ class RenderTargetCache {
  uint32_t GetRenderTargetHeight(uint32_t pitch_tiles_at_32bpp,
                                 xenos::MsaaSamples msaa_samples) const;

-  // Normalizes the format if it's fine to use the same render target textures
-  // for the provided and the returned guest formats.
-  // xenos::GetStorageColorFormat is supposed to be done before calling, so
-  // redoing what it does in the implementations is not needed.
-  virtual xenos::ColorRenderTargetFormat GetHostRelevantColorFormat(
-      xenos::ColorRenderTargetFormat format) const {
-    return format;
-  }
-
  virtual RenderTarget* CreateRenderTarget(RenderTargetKey key) = 0;

  // Whether depth buffer is encoded differently on the host, thus after
@ -567,7 +557,7 @@ class RenderTargetCache {
        return false;
      }
      if (host_depth_encoding_different && !key.is_depth &&
-          host_depth_render_targets[key.host_relevant_format] != key) {
+          host_depth_render_targets[key.resource_format] != key) {
        // Depth encoding is the same, but different addressing is needed.
        return false;
      }
@ -582,6 +572,16 @@ class RenderTargetCache {
    }
  };

+  static constexpr xenos::ColorRenderTargetFormat GetColorResourceFormat(
+      xenos::ColorRenderTargetFormat format) {
+    // sRGB, if used on the host, is a view property or global state - linear
+    // and sRGB host render targets can share data directly without transfers.
+    if (format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
+      return xenos::ColorRenderTargetFormat::k_8_8_8_8;
+    }
+    return xenos::GetStorageColorFormat(format);
+  }
+
  RenderTarget* GetOrCreateRenderTarget(RenderTargetKey key);

  // Checks if changing ownership of the range to the specified render target