Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2021-06-14 17:31:55 +03:00
commit eb8bd732d3
9 changed files with 262 additions and 123 deletions

119
src/xenia/apu/conversion.h Normal file
View File

@ -0,0 +1,119 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2021 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_APU_CONVERSION_H_
#define XENIA_APU_CONVERSION_H_
#include <cstdint>
#include "xenia/base/byte_order.h"
#include "xenia/base/platform.h"
namespace xe {
namespace apu {
namespace conversion {
#if XE_ARCH_AMD64
inline void sequential_6_BE_to_interleaved_6_LE(float* output,
const float* input,
size_t ch_sample_count) {
const uint32_t* in = reinterpret_cast<const uint32_t*>(input);
uint32_t* out = reinterpret_cast<uint32_t*>(output);
const __m128i byte_swap_shuffle =
_mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
for (size_t sample = 0; sample < ch_sample_count; sample++) {
__m128i sample0 = _mm_set_epi32(
in[3 * ch_sample_count + sample], in[2 * ch_sample_count + sample],
in[1 * ch_sample_count + sample], in[0 * ch_sample_count + sample]);
uint32_t sample1 = in[4 * ch_sample_count + sample];
uint32_t sample2 = in[5 * ch_sample_count + sample];
sample0 = _mm_shuffle_epi8(sample0, byte_swap_shuffle);
_mm_storeu_si128(reinterpret_cast<__m128i*>(&out[sample * 6]), sample0);
sample1 = xe::byte_swap(sample1);
out[sample * 6 + 4] = sample1;
sample2 = xe::byte_swap(sample2);
out[sample * 6 + 5] = sample2;
}
}
inline void sequential_6_BE_to_interleaved_2_LE(float* output,
const float* input,
size_t ch_sample_count) {
assert_true(ch_sample_count % 4 == 0);
const uint32_t* in = reinterpret_cast<const uint32_t*>(input);
uint32_t* out = reinterpret_cast<uint32_t*>(output);
const __m128i byte_swap_shuffle =
_mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
const __m128 half = _mm_set1_ps(0.5f);
const __m128 two_fifths = _mm_set1_ps(1.0f / 2.5f);
// put center on left and right, discard low frequency
for (size_t sample = 0; sample < ch_sample_count; sample += 4) {
// load 4 samples from 6 channels each
__m128 fl = _mm_loadu_ps(&input[0 * ch_sample_count + sample]);
__m128 fr = _mm_loadu_ps(&input[1 * ch_sample_count + sample]);
__m128 fc = _mm_loadu_ps(&input[2 * ch_sample_count + sample]);
__m128 bl = _mm_loadu_ps(&input[4 * ch_sample_count + sample]);
__m128 br = _mm_loadu_ps(&input[5 * ch_sample_count + sample]);
// byte swap
fl = _mm_castsi128_ps(
_mm_shuffle_epi8(_mm_castps_si128(fl), byte_swap_shuffle));
fr = _mm_castsi128_ps(
_mm_shuffle_epi8(_mm_castps_si128(fr), byte_swap_shuffle));
fc = _mm_castsi128_ps(
_mm_shuffle_epi8(_mm_castps_si128(fc), byte_swap_shuffle));
bl = _mm_castsi128_ps(
_mm_shuffle_epi8(_mm_castps_si128(bl), byte_swap_shuffle));
br = _mm_castsi128_ps(
_mm_shuffle_epi8(_mm_castps_si128(br), byte_swap_shuffle));
__m128 center_halved = _mm_mul_ps(fc, half);
__m128 left = _mm_add_ps(_mm_add_ps(fl, bl), center_halved);
__m128 right = _mm_add_ps(_mm_add_ps(fr, br), center_halved);
left = _mm_mul_ps(left, two_fifths);
right = _mm_mul_ps(right, two_fifths);
_mm_storeu_ps(&output[sample * 2], _mm_unpacklo_ps(left, right));
_mm_storeu_ps(&output[(sample + 2) * 2], _mm_unpackhi_ps(left, right));
}
}
#else
inline void sequential_6_BE_to_interleaved_6_LE(float* output,
const float* input,
size_t ch_sample_count) {
for (size_t sample = 0; sample < ch_sample_count; sample++) {
for (size_t channel = 0; channel < 6; channel++) {
output[sample * 6 + channel] =
xe::byte_swap(input[channel * ch_sample_count + sample]);
}
}
}
inline void sequential_6_BE_to_interleaved_2_LE(float* output,
const float* input,
size_t ch_sample_count) {
// Default 5.1 channel mapping is fl, fr, fc, lf, bl, br
// https://docs.microsoft.com/en-us/windows/win32/xaudio2/xaudio2-default-channel-mapping
for (size_t sample = 0; sample < ch_sample_count; sample++) {
// put center on left and right, discard low frequency
float fl = xe::byte_swap(input[0 * ch_sample_count + sample]);
float fr = xe::byte_swap(input[1 * ch_sample_count + sample]);
float fc = xe::byte_swap(input[2 * ch_sample_count + sample]);
float br = xe::byte_swap(input[4 * ch_sample_count + sample]);
float bl = xe::byte_swap(input[5 * ch_sample_count + sample]);
float center_halved = fc * 0.5f;
output[sample * 2] = (fl + bl + center_halved) * (1.0f / 2.5f);
output[sample * 2 + 1] = (fr + br + center_halved) * (1.0f / 2.5f);
}
}
#endif
} // namespace conversion
} // namespace apu
} // namespace xe
#endif

View File

@ -10,9 +10,13 @@
#include "xenia/apu/sdl/sdl_audio_driver.h" #include "xenia/apu/sdl/sdl_audio_driver.h"
#include <array> #include <array>
#include <cstring>
#include "xenia/apu/apu_flags.h" #include "xenia/apu/apu_flags.h"
#include "xenia/apu/conversion.h"
#include "xenia/base/assert.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/helper/sdl/sdl_helper.h" #include "xenia/helper/sdl/sdl_helper.h"
namespace xe { namespace xe {
@ -46,41 +50,37 @@ bool SDLAudioDriver::Initialize() {
} }
sdl_initialized_ = true; sdl_initialized_ = true;
SDL_AudioCallback audio_callback = [](void* userdata, Uint8* stream, SDL_AudioSpec desired_spec = {};
int len) -> void { SDL_AudioSpec obtained_spec;
assert_true(len == frame_size_); desired_spec.freq = frame_frequency_;
const auto driver = static_cast<SDLAudioDriver*>(userdata); desired_spec.format = AUDIO_F32;
desired_spec.channels = frame_channels_;
std::unique_lock<std::mutex> guard(driver->frames_mutex_); desired_spec.samples = channel_samples_;
if (driver->frames_queued_.empty()) { desired_spec.callback = SDLCallback;
memset(stream, 0, len); desired_spec.userdata = this;
} else { // Allow the hardware to decide between 5.1 and stereo
auto buffer = driver->frames_queued_.front(); int allowed_change = SDL_AUDIO_ALLOW_CHANNELS_CHANGE;
driver->frames_queued_.pop(); for (int i = 0; i < 2; i++) {
if (cvars::mute) { sdl_device_id_ = SDL_OpenAudioDevice(nullptr, 0, &desired_spec,
memset(stream, 0, len); &obtained_spec, allowed_change);
} else {
memcpy(stream, buffer, len);
}
driver->frames_unused_.push(buffer);
auto ret = driver->semaphore_->Release(1, nullptr);
assert_true(ret);
}
};
SDL_AudioSpec wanted_spec = {};
wanted_spec.freq = frame_frequency_;
wanted_spec.format = AUDIO_F32;
wanted_spec.channels = frame_channels_;
wanted_spec.samples = channel_samples_;
wanted_spec.callback = audio_callback;
wanted_spec.userdata = this;
sdl_device_id_ = SDL_OpenAudioDevice(nullptr, 0, &wanted_spec, nullptr, 0);
if (sdl_device_id_ <= 0) { if (sdl_device_id_ <= 0) {
XELOGE("SDL_OpenAudioDevice() failed."); XELOGE("SDL_OpenAudioDevice() failed.");
return false; return false;
} }
if (obtained_spec.channels == 2 || obtained_spec.channels == 6) {
break;
}
// If the system is 4 or 7.1, let SDL convert
allowed_change = 0;
SDL_CloseAudioDevice(sdl_device_id_);
sdl_device_id_ = -1;
}
if (sdl_device_id_ <= 0) {
XELOGE("Failed to get a compatible SDL Audio Device.");
return false;
}
sdl_device_channels_ = obtained_spec.channels;
SDL_PauseAudioDevice(sdl_device_id_, 0); SDL_PauseAudioDevice(sdl_device_id_, 0);
return true; return true;
@ -99,13 +99,7 @@ void SDLAudioDriver::SubmitFrame(uint32_t frame_ptr) {
} }
} }
// interleave the data std::memcpy(output_frame, input_frame, frame_samples_ * sizeof(float));
for (size_t index = 0, o = 0; index < channel_samples_; ++index) {
for (size_t channel = 0, table = 0; channel < frame_channels_;
++channel, table += channel_samples_) {
output_frame[o++] = xe::byte_swap(input_frame[table + index]);
}
}
{ {
std::unique_lock<std::mutex> guard(frames_mutex_); std::unique_lock<std::mutex> guard(frames_mutex_);
@ -133,6 +127,45 @@ void SDLAudioDriver::Shutdown() {
}; };
} }
void SDLAudioDriver::SDLCallback(void* userdata, Uint8* stream, int len) {
SCOPE_profile_cpu_f("apu");
if (!userdata || !stream) {
XELOGE("SDLAudioDriver::sdl_callback called with nullptr.");
return;
}
const auto driver = static_cast<SDLAudioDriver*>(userdata);
assert_true(len ==
sizeof(float) * channel_samples_ * driver->sdl_device_channels_);
std::unique_lock<std::mutex> guard(driver->frames_mutex_);
if (driver->frames_queued_.empty()) {
std::memset(stream, 0, len);
} else {
auto buffer = driver->frames_queued_.front();
driver->frames_queued_.pop();
if (cvars::mute) {
std::memset(stream, 0, len);
} else {
switch (driver->sdl_device_channels_) {
case 2:
conversion::sequential_6_BE_to_interleaved_2_LE(
reinterpret_cast<float*>(stream), buffer, channel_samples_);
break;
case 6:
conversion::sequential_6_BE_to_interleaved_6_LE(
reinterpret_cast<float*>(stream), buffer, channel_samples_);
break;
default:
assert_unhandled_case(driver->sdl_device_channels_);
break;
}
}
driver->frames_unused_.push(buffer);
auto ret = driver->semaphore_->Release(1, nullptr);
assert_true(ret);
}
};
} // namespace sdl } // namespace sdl
} // namespace apu } // namespace apu
} // namespace xe } // namespace xe

View File

@ -32,10 +32,13 @@ class SDLAudioDriver : public AudioDriver {
void Shutdown(); void Shutdown();
protected: protected:
static void SDLCallback(void* userdata, Uint8* stream, int len);
xe::threading::Semaphore* semaphore_ = nullptr; xe::threading::Semaphore* semaphore_ = nullptr;
SDL_AudioDeviceID sdl_device_id_ = -1; SDL_AudioDeviceID sdl_device_id_ = -1;
bool sdl_initialized_ = false; bool sdl_initialized_ = false;
uint8_t sdl_device_channels_ = 0;
static const uint32_t frame_frequency_ = 48000; static const uint32_t frame_frequency_ = 48000;
static const uint32_t frame_channels_ = 6; static const uint32_t frame_channels_ = 6;

View File

@ -13,6 +13,7 @@
#include "xenia/base/platform_win.h" #include "xenia/base/platform_win.h"
#include "xenia/apu/apu_flags.h" #include "xenia/apu/apu_flags.h"
#include "xenia/apu/conversion.h"
#include "xenia/base/clock.h" #include "xenia/base/clock.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
@ -208,12 +209,8 @@ void XAudio2AudioDriver::SubmitFrame(uint32_t frame_ptr) {
auto interleave_channels = frame_channels_; auto interleave_channels = frame_channels_;
// interleave the data // interleave the data
for (uint32_t index = 0, o = 0; index < channel_samples_; ++index) { conversion::sequential_6_BE_to_interleaved_6_LE(output_frame, input_frame,
for (uint32_t channel = 0, table = 0; channel < interleave_channels; channel_samples_);
++channel, table += channel_samples_) {
output_frame[o++] = xe::byte_swap(input_frame[table + index]);
}
}
api::XAUDIO2_BUFFER buffer; api::XAUDIO2_BUFFER buffer;
buffer.Flags = 0; buffer.Flags = 0;

View File

@ -1928,6 +1928,7 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
if (host_render_targets_used) { if (host_render_targets_used) {
bound_depth_and_color_render_target_bits = bound_depth_and_color_render_target_bits =
render_target_cache_->GetLastUpdateBoundRenderTargets( render_target_cache_->GetLastUpdateBoundRenderTargets(
render_target_cache_->gamma_render_target_as_srgb(),
bound_depth_and_color_render_target_formats); bound_depth_and_color_render_target_formats);
} else { } else {
bound_depth_and_color_render_target_bits = 0; bound_depth_and_color_render_target_bits = 0;

View File

@ -1858,9 +1858,10 @@ DXGI_FORMAT D3D12RenderTargetCache::GetColorDrawDXGIFormat(
xenos::ColorRenderTargetFormat format) const { xenos::ColorRenderTargetFormat format) const {
switch (format) { switch (format) {
case xenos::ColorRenderTargetFormat::k_8_8_8_8: case xenos::ColorRenderTargetFormat::k_8_8_8_8:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
// sRGB is handled in a different way, not via the RenderTargetKey format.
return DXGI_FORMAT_R8G8B8A8_UNORM; return DXGI_FORMAT_R8G8B8A8_UNORM;
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
return gamma_render_target_as_srgb_ ? DXGI_FORMAT_R8G8B8A8_UNORM_SRGB
: DXGI_FORMAT_R8G8B8A8_UNORM;
case xenos::ColorRenderTargetFormat::k_16_16: case xenos::ColorRenderTargetFormat::k_16_16:
return DXGI_FORMAT_R16G16_SNORM; return DXGI_FORMAT_R16G16_SNORM;
case xenos::ColorRenderTargetFormat::k_16_16_16_16: case xenos::ColorRenderTargetFormat::k_16_16_16_16:
@ -1954,20 +1955,6 @@ DXGI_FORMAT D3D12RenderTargetCache::GetDepthSRVStencilDXGIFormat(
} }
} }
xenos::ColorRenderTargetFormat
D3D12RenderTargetCache::GetHostRelevantColorFormat(
xenos::ColorRenderTargetFormat format) const {
switch (format) {
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
// Currently handled in the shader (with incorrect blending), but even if
// handling is changed (to true sRGB), it will still be able to alias it
// with R8G8B8A8_UNORM.
return xenos::ColorRenderTargetFormat::k_8_8_8_8;
default:
return format;
}
}
RenderTargetCache::RenderTarget* D3D12RenderTargetCache::CreateRenderTarget( RenderTargetCache::RenderTarget* D3D12RenderTargetCache::CreateRenderTarget(
RenderTargetKey key) { RenderTargetKey key) {
ID3D12Device* device = ID3D12Device* device =
@ -1990,7 +1977,7 @@ RenderTargetCache::RenderTarget* D3D12RenderTargetCache::CreateRenderTarget(
assert_true(resource_desc.Format != DXGI_FORMAT_UNKNOWN); assert_true(resource_desc.Format != DXGI_FORMAT_UNKNOWN);
if (resource_desc.Format == DXGI_FORMAT_UNKNOWN) { if (resource_desc.Format == DXGI_FORMAT_UNKNOWN) {
XELOGE("D3D12RenderTargetCache: Unknown {} render target format {}", XELOGE("D3D12RenderTargetCache: Unknown {} render target format {}",
key.is_depth ? "depth" : "color", key.host_relevant_format); key.is_depth ? "depth" : "color", key.resource_format);
return nullptr; return nullptr;
} }
if (key.msaa_samples == xenos::MsaaSamples::k2X && !msaa_2x_supported()) { if (key.msaa_samples == xenos::MsaaSamples::k2X && !msaa_2x_supported()) {
@ -2228,16 +2215,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
bool dest_is_color = (mode.output == TransferOutput::kColor); bool dest_is_color = (mode.output == TransferOutput::kColor);
xenos::ColorRenderTargetFormat dest_color_format = xenos::ColorRenderTargetFormat dest_color_format =
xenos::ColorRenderTargetFormat(key.dest_host_relevant_format); xenos::ColorRenderTargetFormat(key.dest_resource_format);
xenos::DepthRenderTargetFormat dest_depth_format = xenos::DepthRenderTargetFormat dest_depth_format =
xenos::DepthRenderTargetFormat(key.dest_host_relevant_format); xenos::DepthRenderTargetFormat(key.dest_resource_format);
bool dest_is_64bpp = bool dest_is_64bpp =
dest_is_color && xenos::IsColorRenderTargetFormat64bpp(dest_color_format); dest_is_color && xenos::IsColorRenderTargetFormat64bpp(dest_color_format);
xenos::ColorRenderTargetFormat source_color_format = xenos::ColorRenderTargetFormat source_color_format =
xenos::ColorRenderTargetFormat(key.source_host_relevant_format); xenos::ColorRenderTargetFormat(key.source_resource_format);
xenos::DepthRenderTargetFormat source_depth_format = xenos::DepthRenderTargetFormat source_depth_format =
xenos::DepthRenderTargetFormat(key.source_host_relevant_format); xenos::DepthRenderTargetFormat(key.source_resource_format);
// If not source_is_color, it's depth / stencil - 40-sample columns are // If not source_is_color, it's depth / stencil - 40-sample columns are
// swapped as opposed to color destination. // swapped as opposed to color destination.
bool source_is_color = (rs & kTransferUsedRootParameterColorSRVBit) != 0; bool source_is_color = (rs & kTransferUsedRootParameterColorSRVBit) != 0;
@ -4920,8 +4907,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
uint32_t rt_sort_index = 0; uint32_t rt_sort_index = 0;
TransferShaderKey new_transfer_shader_key; TransferShaderKey new_transfer_shader_key;
new_transfer_shader_key.dest_msaa_samples = dest_rt_key.msaa_samples; new_transfer_shader_key.dest_msaa_samples = dest_rt_key.msaa_samples;
new_transfer_shader_key.dest_host_relevant_format = new_transfer_shader_key.dest_resource_format =
dest_rt_key.host_relevant_format; dest_rt_key.resource_format;
uint32_t stencil_clear_rectangle_count = 0; uint32_t stencil_clear_rectangle_count = 0;
for (uint32_t j = 0; j <= uint32_t(need_stencil_bit_draws); ++j) { for (uint32_t j = 0; j <= uint32_t(need_stencil_bit_draws); ++j) {
// j == 0 - color or depth. // j == 0 - color or depth.
@ -4958,8 +4945,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
RenderTargetKey source_rt_key = source_d3d12_rt.key(); RenderTargetKey source_rt_key = source_d3d12_rt.key();
new_transfer_shader_key.source_msaa_samples = new_transfer_shader_key.source_msaa_samples =
source_rt_key.msaa_samples; source_rt_key.msaa_samples;
new_transfer_shader_key.source_host_relevant_format = new_transfer_shader_key.source_resource_format =
source_rt_key.host_relevant_format; source_rt_key.resource_format;
bool host_depth_source_is_copy = bool host_depth_source_is_copy =
host_depth_source_d3d12_rt == &dest_d3d12_rt; host_depth_source_d3d12_rt == &dest_d3d12_rt;
new_transfer_shader_key.host_depth_source_is_copy = new_transfer_shader_key.host_depth_source_is_copy =
@ -6492,7 +6479,7 @@ void D3D12RenderTargetCache::DumpRenderTargets(uint32_t dump_base,
any_sources_32bpp_64bpp[size_t(rt_key.Is64bpp())] = true; any_sources_32bpp_64bpp[size_t(rt_key.Is64bpp())] = true;
DumpPipelineKey pipeline_key; DumpPipelineKey pipeline_key;
pipeline_key.msaa_samples = rt_key.msaa_samples; pipeline_key.msaa_samples = rt_key.msaa_samples;
pipeline_key.host_relevant_format = rt_key.host_relevant_format; pipeline_key.resource_format = rt_key.resource_format;
pipeline_key.is_depth = rt_key.is_depth; pipeline_key.is_depth = rt_key.is_depth;
dump_invocations_.emplace_back(rectangle, pipeline_key); dump_invocations_.emplace_back(rectangle, pipeline_key);
} }

View File

@ -224,9 +224,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
} }
xenos::ColorRenderTargetFormat GetHostRelevantColorFormat(
xenos::ColorRenderTargetFormat format) const override;
RenderTarget* CreateRenderTarget(RenderTargetKey key) override; RenderTarget* CreateRenderTarget(RenderTargetKey key) override;
bool IsHostDepthEncodingDifferent( bool IsHostDepthEncodingDifferent(
@ -418,14 +415,14 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
union TransferShaderKey { union TransferShaderKey {
struct { struct {
xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits; xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits;
uint32_t dest_host_relevant_format : xenos::kRenderTargetFormatBits; uint32_t dest_resource_format : xenos::kRenderTargetFormatBits;
xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits; xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits;
// Always 1x when host_depth_source_is_copy is true not to create the same // Always 1x when host_depth_source_is_copy is true not to create the same
// pipeline for different MSAA sample counts as it doesn't matter in this // pipeline for different MSAA sample counts as it doesn't matter in this
// case. // case.
xenos::MsaaSamples host_depth_source_msaa_samples xenos::MsaaSamples host_depth_source_msaa_samples
: xenos::kMsaaSamplesBits; : xenos::kMsaaSamplesBits;
uint32_t source_host_relevant_format : xenos::kRenderTargetFormatBits; uint32_t source_resource_format : xenos::kRenderTargetFormatBits;
// If host depth is also fetched, whether it's pre-copied to the EDRAM // If host depth is also fetched, whether it's pre-copied to the EDRAM
// buffer (but since it's just a scratch buffer, with tiles laid out // buffer (but since it's just a scratch buffer, with tiles laid out
// linearly with the same pitch as in the original render target; also no // linearly with the same pitch as in the original render target; also no
@ -557,7 +554,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
union DumpPipelineKey { union DumpPipelineKey {
struct { struct {
xenos::MsaaSamples msaa_samples : 2; xenos::MsaaSamples msaa_samples : 2;
uint32_t host_relevant_format : 4; uint32_t resource_format : 4;
// Last bit because this affects the root signature - after sorting, only // Last bit because this affects the root signature - after sorting, only
// change it at most once. Depth buffers have an additional stencil SRV. // change it at most once. Depth buffers have an additional stencil SRV.
uint32_t is_depth : 1; uint32_t is_depth : 1;
@ -580,11 +577,11 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
xenos::ColorRenderTargetFormat GetColorFormat() const { xenos::ColorRenderTargetFormat GetColorFormat() const {
assert_false(is_depth); assert_false(is_depth);
return xenos::ColorRenderTargetFormat(host_relevant_format); return xenos::ColorRenderTargetFormat(resource_format);
} }
xenos::DepthRenderTargetFormat GetDepthFormat() const { xenos::DepthRenderTargetFormat GetDepthFormat() const {
assert_true(is_depth); assert_true(is_depth);
return xenos::DepthRenderTargetFormat(host_relevant_format); return xenos::DepthRenderTargetFormat(resource_format);
} }
}; };

View File

@ -424,7 +424,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
uint32_t depth_and_color_rts_used_bits = 0; uint32_t depth_and_color_rts_used_bits = 0;
// depth_and_color_rts_used_bits -> EDRAM base. // depth_and_color_rts_used_bits -> EDRAM base.
uint32_t edram_bases[1 + xenos::kMaxColorRenderTargets]; uint32_t edram_bases[1 + xenos::kMaxColorRenderTargets];
uint32_t host_relevant_formats[1 + xenos::kMaxColorRenderTargets]; uint32_t resource_formats[1 + xenos::kMaxColorRenderTargets];
uint32_t rts_are_64bpp = 0; uint32_t rts_are_64bpp = 0;
uint32_t color_rts_are_gamma = 0; uint32_t color_rts_are_gamma = 0;
if (is_rasterization_done) { if (is_rasterization_done) {
@ -438,7 +438,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
std::min(rb_depth_info.depth_base, xenos::kEdramTileCount); std::min(rb_depth_info.depth_base, xenos::kEdramTileCount);
// With pixel shader interlock, always the same addressing disregarding // With pixel shader interlock, always the same addressing disregarding
// the format. // the format.
host_relevant_formats[0] = resource_formats[0] =
interlock_barrier_only ? 0 : uint32_t(rb_depth_info.depth_format); interlock_barrier_only ? 0 : uint32_t(rb_depth_info.depth_format);
} }
if (regs.Get<reg::RB_MODECONTROL>().edram_mode == if (regs.Get<reg::RB_MODECONTROL>().edram_mode ==
@ -468,20 +468,19 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { if (color_format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
color_rts_are_gamma |= uint32_t(1) << rt_index; color_rts_are_gamma |= uint32_t(1) << rt_index;
} }
xenos::ColorRenderTargetFormat color_host_relevant_format; xenos::ColorRenderTargetFormat color_resource_format;
if (interlock_barrier_only) { if (interlock_barrier_only) {
// Only changes in mapping between coordinates and addresses are // Only changes in mapping between coordinates and addresses are
// interesting (along with access overlap between draw calls), thus // interesting (along with access overlap between draw calls), thus
// only pixel size is relevant. // only pixel size is relevant.
color_host_relevant_format = color_resource_format =
is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16 is_64bpp ? xenos::ColorRenderTargetFormat::k_16_16_16_16
: xenos::ColorRenderTargetFormat::k_8_8_8_8; : xenos::ColorRenderTargetFormat::k_8_8_8_8;
} else { } else {
color_host_relevant_format = GetHostRelevantColorFormat( color_resource_format = GetColorResourceFormat(
xenos::GetStorageColorFormat(color_format)); xenos::GetStorageColorFormat(color_format));
} }
host_relevant_formats[rt_bit_index] = resource_formats[rt_bit_index] = uint32_t(color_resource_format);
uint32_t(color_host_relevant_format);
} }
} }
} }
@ -659,7 +658,7 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
rt_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp; rt_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp;
rt_key.msaa_samples = msaa_samples; rt_key.msaa_samples = msaa_samples;
rt_key.is_depth = rt_bit_index == 0; rt_key.is_depth = rt_bit_index == 0;
rt_key.host_relevant_format = host_relevant_formats[rt_bit_index]; rt_key.resource_format = resource_formats[rt_bit_index];
if (!interlock_barrier_only) { if (!interlock_barrier_only) {
RenderTarget* render_target = GetOrCreateRenderTarget(rt_key); RenderTarget* render_target = GetOrCreateRenderTarget(rt_key);
if (!render_target) { if (!render_target) {
@ -801,10 +800,11 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
} }
uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets( uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
uint32_t* depth_and_color_formats_out) const { bool distinguish_gamma_formats,
uint32_t* depth_and_color_resource_formats_out) const {
if (GetPath() != Path::kHostRenderTargets) { if (GetPath() != Path::kHostRenderTargets) {
if (depth_and_color_formats_out) { if (depth_and_color_resource_formats_out) {
std::memset(depth_and_color_formats_out, 0, std::memset(depth_and_color_resource_formats_out, 0,
sizeof(uint32_t) * (1 + xenos::kMaxColorRenderTargets)); sizeof(uint32_t) * (1 + xenos::kMaxColorRenderTargets));
} }
return 0; return 0;
@ -814,15 +814,19 @@ uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
const RenderTarget* render_target = const RenderTarget* render_target =
last_update_accumulated_render_targets_[i]; last_update_accumulated_render_targets_[i];
if (!render_target) { if (!render_target) {
if (depth_and_color_formats_out) { if (depth_and_color_resource_formats_out) {
depth_and_color_formats_out[i] = 0; depth_and_color_resource_formats_out[i] = 0;
} }
continue; continue;
} }
rts_used |= uint32_t(1) << i; rts_used |= uint32_t(1) << i;
if (depth_and_color_formats_out) { if (depth_and_color_resource_formats_out) {
depth_and_color_formats_out[i] = depth_and_color_resource_formats_out[i] =
render_target->key().host_relevant_format; (distinguish_gamma_formats && i &&
(last_update_accumulated_color_targets_are_gamma_ &
(uint32_t(1) << (i - 1))))
? uint32_t(xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA)
: render_target->key().resource_format;
} }
} }
return rts_used; return rts_used;
@ -1083,7 +1087,7 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
depth_render_target_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp; depth_render_target_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp;
depth_render_target_key.msaa_samples = msaa_samples; depth_render_target_key.msaa_samples = msaa_samples;
depth_render_target_key.is_depth = 1; depth_render_target_key.is_depth = 1;
depth_render_target_key.host_relevant_format = depth_render_target_key.resource_format =
resolve_info.depth_edram_info.format; resolve_info.depth_edram_info.format;
depth_render_target = GetOrCreateRenderTarget(depth_render_target_key); depth_render_target = GetOrCreateRenderTarget(depth_render_target_key);
if (!depth_render_target) { if (!depth_render_target) {
@ -1098,9 +1102,8 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
color_render_target_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp; color_render_target_key.pitch_tiles_at_32bpp = pitch_tiles_at_32bpp;
color_render_target_key.msaa_samples = msaa_samples; color_render_target_key.msaa_samples = msaa_samples;
color_render_target_key.is_depth = 0; color_render_target_key.is_depth = 0;
color_render_target_key.host_relevant_format = color_render_target_key.resource_format = uint32_t(GetColorResourceFormat(
uint32_t(GetHostRelevantColorFormat(xenos::ColorRenderTargetFormat( xenos::ColorRenderTargetFormat(resolve_info.color_edram_info.format)));
resolve_info.color_edram_info.format)));
color_render_target = GetOrCreateRenderTarget(color_render_target_key); color_render_target = GetOrCreateRenderTarget(color_render_target_key);
if (!color_render_target) { if (!color_render_target) {
color_render_target_key = RenderTargetKey(); color_render_target_key = RenderTargetKey();
@ -1161,8 +1164,8 @@ RenderTargetCache::PrepareFullEdram1280xRenderTargetForSnapshotRestoration(
} }
RenderTargetKey render_target_key; RenderTargetKey render_target_key;
render_target_key.pitch_tiles_at_32bpp = kPitchTilesAt32bpp; render_target_key.pitch_tiles_at_32bpp = kPitchTilesAt32bpp;
render_target_key.host_relevant_format = uint32_t( render_target_key.resource_format =
GetHostRelevantColorFormat(xenos::GetStorageColorFormat(color_format))); uint32_t(GetColorResourceFormat(color_format));
RenderTarget* render_target = GetOrCreateRenderTarget(render_target_key); RenderTarget* render_target = GetOrCreateRenderTarget(render_target_key);
if (!render_target) { if (!render_target) {
return nullptr; return nullptr;
@ -1214,14 +1217,14 @@ RenderTargetCache::RenderTarget* RenderTargetCache::GetOrCreateRenderTarget(
"Created a {}x{} {}xMSAA {} render target with guest format {} at " "Created a {}x{} {}xMSAA {} render target with guest format {} at "
"EDRAM base {}", "EDRAM base {}",
width, height, uint32_t(1) << uint32_t(key.msaa_samples), width, height, uint32_t(1) << uint32_t(key.msaa_samples),
key.is_depth ? "depth" : "color", key.host_relevant_format, key.is_depth ? "depth" : "color", key.resource_format,
key.base_tiles); key.base_tiles);
} else { } else {
XELOGE( XELOGE(
"Failed to create a {}x{} {}xMSAA {} render target with guest format " "Failed to create a {}x{} {}xMSAA {} render target with guest format "
"{} at EDRAM base {}", "{} at EDRAM base {}",
width, height, uint32_t(1) << uint32_t(key.msaa_samples), width, height, uint32_t(1) << uint32_t(key.msaa_samples),
key.is_depth ? "depth" : "color", key.host_relevant_format, key.is_depth ? "depth" : "color", key.resource_format,
key.base_tiles); key.base_tiles);
} }
// Insert even if failed to create, not to try to create again. // Insert even if failed to create, not to try to create again.
@ -1339,8 +1342,7 @@ void RenderTargetCache::ChangeOwnership(
nullptr, resolve_clear_cutout)) { nullptr, resolve_clear_cutout)) {
RenderTargetKey transfer_host_depth_source = RenderTargetKey transfer_host_depth_source =
host_depth_encoding_different host_depth_encoding_different
? it->second ? it->second.host_depth_render_targets[dest.resource_format]
.host_depth_render_targets[dest.host_relevant_format]
: RenderTargetKey(); : RenderTargetKey();
if (transfer_host_depth_source == transfer_source) { if (transfer_host_depth_source == transfer_source) {
// Same render target, don't provide a separate host depth source. // Same render target, don't provide a separate host depth source.
@ -1385,7 +1387,7 @@ void RenderTargetCache::ChangeOwnership(
// Claim the current range. // Claim the current range.
it->second.render_target = dest; it->second.render_target = dest;
if (host_depth_encoding_different) { if (host_depth_encoding_different) {
it->second.host_depth_render_targets[dest.host_relevant_format] = dest; it->second.host_depth_render_targets[dest.resource_format] = dest;
} }
// Check if can merge with the next range after claiming. // Check if can merge with the next range after claiming.
std::map<uint32_t, OwnershipRange>::iterator it_next; std::map<uint32_t, OwnershipRange>::iterator it_next;

View File

@ -181,8 +181,10 @@ class RenderTargetCache {
// Returns bits where 0 is whether a depth render target is currently bound on // Returns bits where 0 is whether a depth render target is currently bound on
// the host and 1... are whether the same applies to color render targets, and // the host and 1... are whether the same applies to color render targets, and
// "host-relevant" formats of each. // formats (resource formats, but if needed, with gamma taken into account) of
// each.
uint32_t GetLastUpdateBoundRenderTargets( uint32_t GetLastUpdateBoundRenderTargets(
bool distinguish_gamma_formats,
uint32_t* depth_and_color_formats_out = nullptr) const; uint32_t* depth_and_color_formats_out = nullptr) const;
protected: protected:
@ -223,11 +225,8 @@ class RenderTargetCache {
uint32_t pitch_tiles_at_32bpp : 8; // 19 uint32_t pitch_tiles_at_32bpp : 8; // 19
xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 21 xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 21
uint32_t is_depth : 1; // 22 uint32_t is_depth : 1; // 22
// Not always the original format - blending precision ignored, formats // Ignoring the blending precision and sRGB.
// handled through the same render targets on the host are normalized, and uint32_t resource_format : xenos::kRenderTargetFormatBits; // 26
// with pixel shader interlock, replaced with some single 32bpp or 64bpp
// format because it's only needed for addressing.
uint32_t host_relevant_format : xenos::kRenderTargetFormatBits; // 26
}; };
uint32_t key = 0; uint32_t key = 0;
struct Hasher { struct Hasher {
@ -250,11 +249,11 @@ class RenderTargetCache {
xenos::ColorRenderTargetFormat GetColorFormat() const { xenos::ColorRenderTargetFormat GetColorFormat() const {
assert_false(is_depth); assert_false(is_depth);
return xenos::ColorRenderTargetFormat(host_relevant_format); return xenos::ColorRenderTargetFormat(resource_format);
} }
xenos::DepthRenderTargetFormat GetDepthFormat() const { xenos::DepthRenderTargetFormat GetDepthFormat() const {
assert_true(is_depth); assert_true(is_depth);
return xenos::DepthRenderTargetFormat(host_relevant_format); return xenos::DepthRenderTargetFormat(resource_format);
} }
bool Is64bpp() const { bool Is64bpp() const {
if (is_depth) { if (is_depth) {
@ -436,15 +435,6 @@ class RenderTargetCache {
uint32_t GetRenderTargetHeight(uint32_t pitch_tiles_at_32bpp, uint32_t GetRenderTargetHeight(uint32_t pitch_tiles_at_32bpp,
xenos::MsaaSamples msaa_samples) const; xenos::MsaaSamples msaa_samples) const;
// Normalizes the format if it's fine to use the same render target textures
// for the provided and the returned guest formats.
// xenos::GetStorageColorFormat is supposed to be done before calling, so
// redoing what it does in the implementations is not needed.
virtual xenos::ColorRenderTargetFormat GetHostRelevantColorFormat(
xenos::ColorRenderTargetFormat format) const {
return format;
}
virtual RenderTarget* CreateRenderTarget(RenderTargetKey key) = 0; virtual RenderTarget* CreateRenderTarget(RenderTargetKey key) = 0;
// Whether depth buffer is encoded differently on the host, thus after // Whether depth buffer is encoded differently on the host, thus after
@ -567,7 +557,7 @@ class RenderTargetCache {
return false; return false;
} }
if (host_depth_encoding_different && !key.is_depth && if (host_depth_encoding_different && !key.is_depth &&
host_depth_render_targets[key.host_relevant_format] != key) { host_depth_render_targets[key.resource_format] != key) {
// Depth encoding is the same, but different addressing is needed. // Depth encoding is the same, but different addressing is needed.
return false; return false;
} }
@ -582,6 +572,16 @@ class RenderTargetCache {
} }
}; };
static constexpr xenos::ColorRenderTargetFormat GetColorResourceFormat(
xenos::ColorRenderTargetFormat format) {
// sRGB, if used on the host, is a view property or global state - linear
// and sRGB host render targets can share data directly without transfers.
if (format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) {
return xenos::ColorRenderTargetFormat::k_8_8_8_8;
}
return xenos::GetStorageColorFormat(format);
}
RenderTarget* GetOrCreateRenderTarget(RenderTargetKey key); RenderTarget* GetOrCreateRenderTarget(RenderTargetKey key);
// Checks if changing ownership of the range to the specified render target // Checks if changing ownership of the range to the specified render target