Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental
This commit is contained in:
commit
d262214c1b
|
@ -3643,15 +3643,14 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
|
||||||
// flow.
|
// flow.
|
||||||
reg::RB_COLOR_INFO color_infos[4];
|
reg::RB_COLOR_INFO color_infos[4];
|
||||||
float rt_clamp[4][4];
|
float rt_clamp[4][4];
|
||||||
|
// Two UINT32_MAX if no components actually existing in the RT are written.
|
||||||
uint32_t rt_keep_masks[4][2];
|
uint32_t rt_keep_masks[4][2];
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||||
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
||||||
color_infos[i] = color_info;
|
color_infos[i] = color_info;
|
||||||
if (edram_rov_used) {
|
if (edram_rov_used) {
|
||||||
// Get the mask for keeping previous color's components unmodified,
|
RenderTargetCache::GetPSIColorFormatInfo(
|
||||||
// or two UINT32_MAX if no colors actually existing in the RT are written.
|
|
||||||
DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
|
||||||
color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111,
|
color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111,
|
||||||
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
||||||
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
||||||
|
@ -4021,11 +4020,10 @@ XE_NOINLINE void D3D12CommandProcessor::UpdateSystemConstantValues_Impl(
|
||||||
rt_base_dwords_scaled);
|
rt_base_dwords_scaled);
|
||||||
system_constants_.edram_rt_base_dwords_scaled[i] =
|
system_constants_.edram_rt_base_dwords_scaled[i] =
|
||||||
rt_base_dwords_scaled;
|
rt_base_dwords_scaled;
|
||||||
uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags(
|
uint32_t format_flags =
|
||||||
color_info.color_format);
|
RenderTargetCache::AddPSIColorFormatFlags(color_info.color_format);
|
||||||
update_dirty_uint32_cmp(system_constants_.edram_rt_format_flags[i],
|
update_dirty_uint32_cmp(system_constants_.edram_rt_format_flags[i],
|
||||||
format_flags);
|
format_flags);
|
||||||
|
|
||||||
system_constants_.edram_rt_format_flags[i] = format_flags;
|
system_constants_.edram_rt_format_flags[i] = format_flags;
|
||||||
// Can't do float comparisons here because NaNs would result in always
|
// Can't do float comparisons here because NaNs would result in always
|
||||||
// setting the dirty flag.
|
// setting the dirty flag.
|
||||||
|
|
|
@ -267,19 +267,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
};
|
};
|
||||||
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
|
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
|
||||||
|
|
||||||
// Appended to the format in the format constant.
|
|
||||||
enum : uint32_t {
|
|
||||||
// Starting from bit 4 because the format itself needs 4 bits.
|
|
||||||
kRTFormatFlag_64bpp_Shift = 4,
|
|
||||||
// Requires clamping of blending sources and factors.
|
|
||||||
kRTFormatFlag_FixedPointColor_Shift,
|
|
||||||
kRTFormatFlag_FixedPointAlpha_Shift,
|
|
||||||
|
|
||||||
kRTFormatFlag_64bpp = 1u << kRTFormatFlag_64bpp_Shift,
|
|
||||||
kRTFormatFlag_FixedPointColor = 1u << kRTFormatFlag_FixedPointColor_Shift,
|
|
||||||
kRTFormatFlag_FixedPointAlpha = 1u << kRTFormatFlag_FixedPointAlpha_Shift,
|
|
||||||
};
|
|
||||||
|
|
||||||
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
||||||
// - SystemConstants::Index enum.
|
// - SystemConstants::Index enum.
|
||||||
// - system_constant_rdef_.
|
// - system_constant_rdef_.
|
||||||
|
@ -383,7 +370,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
|
|
||||||
uint32_t edram_rt_base_dwords_scaled[4];
|
uint32_t edram_rt_base_dwords_scaled[4];
|
||||||
|
|
||||||
// RT format combined with kRTFormatFlags.
|
// RT format combined with RenderTargetCache::kPSIColorFormatFlag values
|
||||||
|
// (pass via RenderTargetCache::AddPSIColorFormatFlags).
|
||||||
uint32_t edram_rt_format_flags[4];
|
uint32_t edram_rt_format_flags[4];
|
||||||
|
|
||||||
// Format info - values to clamp the color to before blending or storing.
|
// Format info - values to clamp the color to before blending or storing.
|
||||||
|
@ -524,40 +512,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kEdram,
|
kEdram,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Returns the format with internal flags for passing via the
|
|
||||||
// edram_rt_format_flags system constant.
|
|
||||||
static constexpr uint32_t ROV_AddColorFormatFlags(
|
|
||||||
xenos::ColorRenderTargetFormat format) {
|
|
||||||
uint32_t format_flags = uint32_t(format);
|
|
||||||
if (format == xenos::ColorRenderTargetFormat::k_16_16_16_16 ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
|
|
||||||
format_flags |= kRTFormatFlag_64bpp;
|
|
||||||
}
|
|
||||||
if (format == xenos::ColorRenderTargetFormat::k_8_8_8_8 ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_2_10_10_10 ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_16_16 ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_16_16_16_16 ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10) {
|
|
||||||
format_flags |=
|
|
||||||
kRTFormatFlag_FixedPointColor | kRTFormatFlag_FixedPointAlpha;
|
|
||||||
} else if (format == xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT ||
|
|
||||||
format == xenos::ColorRenderTargetFormat::
|
|
||||||
k_2_10_10_10_FLOAT_AS_16_16_16_16) {
|
|
||||||
format_flags |= kRTFormatFlag_FixedPointAlpha;
|
|
||||||
}
|
|
||||||
return format_flags;
|
|
||||||
}
|
|
||||||
// Returns the bits that need to be added to the RT flags constant - needs to
|
|
||||||
// be done externally, not in SetColorFormatConstants, because the flags
|
|
||||||
// contain other state.
|
|
||||||
static void ROV_GetColorFormatSystemConstants(
|
|
||||||
xenos::ColorRenderTargetFormat format, uint32_t write_mask,
|
|
||||||
float& clamp_rgb_low, float& clamp_alpha_low, float& clamp_rgb_high,
|
|
||||||
float& clamp_alpha_high, uint32_t& keep_mask_low,
|
|
||||||
uint32_t& keep_mask_high);
|
|
||||||
|
|
||||||
uint64_t GetDefaultVertexShaderModification(
|
uint64_t GetDefaultVertexShaderModification(
|
||||||
uint32_t dynamic_addressable_register_count,
|
uint32_t dynamic_addressable_register_count,
|
||||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||||
|
@ -772,6 +726,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// Whether it's possible and worth skipping running the translated shader for
|
// Whether it's possible and worth skipping running the translated shader for
|
||||||
// 2x2 quads.
|
// 2x2 quads.
|
||||||
bool ROV_IsDepthStencilEarly() const {
|
bool ROV_IsDepthStencilEarly() const {
|
||||||
|
assert_true(edram_rov_used_);
|
||||||
return !is_depth_only_pixel_shader_ && !current_shader().writes_depth() &&
|
return !is_depth_only_pixel_shader_ && !current_shader().writes_depth() &&
|
||||||
!current_shader().is_valid_memexport_used();
|
!current_shader().is_valid_memexport_used();
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,139 +14,13 @@
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/gpu/draw_util.h"
|
#include "xenia/gpu/draw_util.h"
|
||||||
|
#include "xenia/gpu/render_target_cache.h"
|
||||||
#include "xenia/gpu/texture_cache.h"
|
#include "xenia/gpu/texture_cache.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
using namespace ucode;
|
using namespace ucode;
|
||||||
|
|
||||||
void DxbcShaderTranslator::ROV_GetColorFormatSystemConstants(
|
|
||||||
xenos::ColorRenderTargetFormat format, uint32_t write_mask,
|
|
||||||
float& clamp_rgb_low, float& clamp_alpha_low, float& clamp_rgb_high,
|
|
||||||
float& clamp_alpha_high, uint32_t& keep_mask_low,
|
|
||||||
uint32_t& keep_mask_high) {
|
|
||||||
keep_mask_low = keep_mask_high = 0;
|
|
||||||
switch (format) {
|
|
||||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
|
|
||||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
|
|
||||||
clamp_rgb_low = clamp_alpha_low = 0.0f;
|
|
||||||
clamp_rgb_high = clamp_alpha_high = 1.0f;
|
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
|
||||||
if (!(write_mask & (1 << i))) {
|
|
||||||
keep_mask_low |= uint32_t(0xFF) << (i * 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} break;
|
|
||||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
|
|
||||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
|
|
||||||
clamp_rgb_low = clamp_alpha_low = 0.0f;
|
|
||||||
clamp_rgb_high = clamp_alpha_high = 1.0f;
|
|
||||||
for (uint32_t i = 0; i < 3; ++i) {
|
|
||||||
if (!(write_mask & (1 << i))) {
|
|
||||||
keep_mask_low |= uint32_t(0x3FF) << (i * 10);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b1000)) {
|
|
||||||
keep_mask_low |= uint32_t(3) << 30;
|
|
||||||
}
|
|
||||||
} break;
|
|
||||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
|
||||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: {
|
|
||||||
clamp_rgb_low = clamp_alpha_low = 0.0f;
|
|
||||||
clamp_rgb_high = 31.875f;
|
|
||||||
clamp_alpha_high = 1.0f;
|
|
||||||
for (uint32_t i = 0; i < 3; ++i) {
|
|
||||||
if (!(write_mask & (1 << i))) {
|
|
||||||
keep_mask_low |= uint32_t(0x3FF) << (i * 10);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b1000)) {
|
|
||||||
keep_mask_low |= uint32_t(3) << 30;
|
|
||||||
}
|
|
||||||
} break;
|
|
||||||
case xenos::ColorRenderTargetFormat::k_16_16:
|
|
||||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
|
|
||||||
// Alpha clamping affects blending source, so it's non-zero for alpha for
|
|
||||||
// k_16_16 (the render target is fixed-point). There's one deviation from
|
|
||||||
// how Direct3D 11.3 functional specification defines SNorm conversion
|
|
||||||
// (NaN should be 0, not the lowest negative number), but NaN handling in
|
|
||||||
// output shouldn't be very important.
|
|
||||||
clamp_rgb_low = clamp_alpha_low = -32.0f;
|
|
||||||
clamp_rgb_high = clamp_alpha_high = 32.0f;
|
|
||||||
if (!(write_mask & 0b0001)) {
|
|
||||||
keep_mask_low |= 0xFFFFu;
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b0010)) {
|
|
||||||
keep_mask_low |= 0xFFFF0000u;
|
|
||||||
}
|
|
||||||
if (format == xenos::ColorRenderTargetFormat::k_16_16_16_16) {
|
|
||||||
if (!(write_mask & 0b0100)) {
|
|
||||||
keep_mask_high |= 0xFFFFu;
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b1000)) {
|
|
||||||
keep_mask_high |= 0xFFFF0000u;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
write_mask &= 0b0011;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
|
|
||||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
|
||||||
// No NaNs on the Xbox 360 GPU, though can't use the extended range with
|
|
||||||
// f32tof16.
|
|
||||||
clamp_rgb_low = clamp_alpha_low = -65504.0f;
|
|
||||||
clamp_rgb_high = clamp_alpha_high = 65504.0f;
|
|
||||||
if (!(write_mask & 0b0001)) {
|
|
||||||
keep_mask_low |= 0xFFFFu;
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b0010)) {
|
|
||||||
keep_mask_low |= 0xFFFF0000u;
|
|
||||||
}
|
|
||||||
if (format == xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT) {
|
|
||||||
if (!(write_mask & 0b0100)) {
|
|
||||||
keep_mask_high |= 0xFFFFu;
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b1000)) {
|
|
||||||
keep_mask_high |= 0xFFFF0000u;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
write_mask &= 0b0011;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
|
|
||||||
// No clamping - let min/max always pick the original value.
|
|
||||||
clamp_rgb_low = clamp_alpha_low = clamp_rgb_high = clamp_alpha_high =
|
|
||||||
std::nanf("");
|
|
||||||
write_mask &= 0b0001;
|
|
||||||
if (!(write_mask & 0b0001)) {
|
|
||||||
keep_mask_low = ~uint32_t(0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
|
|
||||||
// No clamping - let min/max always pick the original value.
|
|
||||||
clamp_rgb_low = clamp_alpha_low = clamp_rgb_high = clamp_alpha_high =
|
|
||||||
std::nanf("");
|
|
||||||
write_mask &= 0b0011;
|
|
||||||
if (!(write_mask & 0b0001)) {
|
|
||||||
keep_mask_low = ~uint32_t(0);
|
|
||||||
}
|
|
||||||
if (!(write_mask & 0b0010)) {
|
|
||||||
keep_mask_high = ~uint32_t(0);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert_unhandled_case(format);
|
|
||||||
// Disable invalid render targets.
|
|
||||||
write_mask = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Special case handled in the shaders for empty write mask to completely skip
|
|
||||||
// a disabled render target: all keep bits are set.
|
|
||||||
if (!write_mask) {
|
|
||||||
keep_mask_low = keep_mask_high = ~uint32_t(0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
||||||
bool any_color_targets_written = current_shader().writes_color_targets() != 0;
|
bool any_color_targets_written = current_shader().writes_color_targets() != 0;
|
||||||
|
|
||||||
|
@ -484,8 +358,8 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
||||||
{
|
{
|
||||||
// Copy the 4x AA coverage to system_temp_rov_params_.x, making top-right
|
// Copy the 4x AA coverage to system_temp_rov_params_.x, making top-right
|
||||||
// the sample [2] and bottom-left the sample [1] (the opposite of Direct3D
|
// the sample [2] and bottom-left the sample [1] (the opposite of Direct3D
|
||||||
// 12), because on the Xbox 360, 2x MSAA doubles the storage width, 4x MSAA
|
// 12), because on the Xbox 360, 2x MSAA doubles the storage height, 4x MSAA
|
||||||
// doubles the storage height.
|
// doubles the storage width.
|
||||||
// Flip samples in bits 0:1 to bits 29:30.
|
// Flip samples in bits 0:1 to bits 29:30.
|
||||||
a_.OpBFRev(dxbc::Dest::R(system_temp_rov_params_, 0b0001),
|
a_.OpBFRev(dxbc::Dest::R(system_temp_rov_params_, 0b0001),
|
||||||
dxbc::Src::VCoverage());
|
dxbc::Src::VCoverage());
|
||||||
|
@ -1304,7 +1178,7 @@ void DxbcShaderTranslator::ROV_UnpackColor(
|
||||||
// k_8_8_8_8_GAMMA
|
// k_8_8_8_8_GAMMA
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
i ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA
|
i ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA
|
||||||
: xenos::ColorRenderTargetFormat::k_8_8_8_8)));
|
: xenos::ColorRenderTargetFormat::k_8_8_8_8)));
|
||||||
// Unpack the components.
|
// Unpack the components.
|
||||||
|
@ -1328,9 +1202,9 @@ void DxbcShaderTranslator::ROV_UnpackColor(
|
||||||
// k_2_10_10_10
|
// k_2_10_10_10
|
||||||
// k_2_10_10_10_AS_10_10_10_10
|
// k_2_10_10_10_AS_10_10_10_10
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
a_.OpCase(dxbc::Src::LU(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
ROV_AddColorFormatFlags(xenos::ColorRenderTargetFormat::k_2_10_10_10)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10)));
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10)));
|
||||||
{
|
{
|
||||||
// Unpack the components.
|
// Unpack the components.
|
||||||
|
@ -1350,9 +1224,9 @@ void DxbcShaderTranslator::ROV_UnpackColor(
|
||||||
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
||||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT)));
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16)));
|
||||||
{
|
{
|
||||||
// Unpack the alpha.
|
// Unpack the alpha.
|
||||||
|
@ -1381,7 +1255,7 @@ void DxbcShaderTranslator::ROV_UnpackColor(
|
||||||
// k_16_16_16_16 (64bpp)
|
// k_16_16_16_16 (64bpp)
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16
|
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16
|
||||||
: xenos::ColorRenderTargetFormat::k_16_16)));
|
: xenos::ColorRenderTargetFormat::k_16_16)));
|
||||||
dxbc::Dest color_components_dest(
|
dxbc::Dest color_components_dest(
|
||||||
|
@ -1404,7 +1278,7 @@ void DxbcShaderTranslator::ROV_UnpackColor(
|
||||||
// k_16_16_16_16_FLOAT (64bpp)
|
// k_16_16_16_16_FLOAT (64bpp)
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT
|
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT
|
||||||
: xenos::ColorRenderTargetFormat::k_16_16_FLOAT)));
|
: xenos::ColorRenderTargetFormat::k_16_16_FLOAT)));
|
||||||
dxbc::Dest color_components_dest(
|
dxbc::Dest color_components_dest(
|
||||||
|
@ -1465,7 +1339,7 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
|
||||||
// k_8_8_8_8_GAMMA
|
// k_8_8_8_8_GAMMA
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
i ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA
|
i ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA
|
||||||
: xenos::ColorRenderTargetFormat::k_8_8_8_8)));
|
: xenos::ColorRenderTargetFormat::k_8_8_8_8)));
|
||||||
for (uint32_t j = 0; j < 4; ++j) {
|
for (uint32_t j = 0; j < 4; ++j) {
|
||||||
|
@ -1496,9 +1370,9 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
|
||||||
// k_2_10_10_10
|
// k_2_10_10_10
|
||||||
// k_2_10_10_10_AS_10_10_10_10
|
// k_2_10_10_10_AS_10_10_10_10
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
a_.OpCase(dxbc::Src::LU(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
ROV_AddColorFormatFlags(xenos::ColorRenderTargetFormat::k_2_10_10_10)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10)));
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10)));
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
// Denormalize and convert to fixed-point.
|
// Denormalize and convert to fixed-point.
|
||||||
|
@ -1518,9 +1392,9 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
|
||||||
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
// k_2_10_10_10_FLOAT_AS_16_16_16_16
|
||||||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT)));
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16)));
|
xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16)));
|
||||||
{
|
{
|
||||||
// Convert red directly to the destination, which may be the same as the
|
// Convert red directly to the destination, which may be the same as the
|
||||||
|
@ -1550,7 +1424,7 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
|
||||||
// k_16_16_16_16 (64bpp)
|
// k_16_16_16_16 (64bpp)
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16
|
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16
|
||||||
: xenos::ColorRenderTargetFormat::k_16_16)));
|
: xenos::ColorRenderTargetFormat::k_16_16)));
|
||||||
for (uint32_t j = 0; j < (uint32_t(2) << i); ++j) {
|
for (uint32_t j = 0; j < (uint32_t(2) << i); ++j) {
|
||||||
|
@ -1582,7 +1456,7 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor(
|
||||||
// k_16_16_16_16_FLOAT (64bpp)
|
// k_16_16_16_16_FLOAT (64bpp)
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags(
|
a_.OpCase(dxbc::Src::LU(RenderTargetCache::AddPSIColorFormatFlags(
|
||||||
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT
|
i ? xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT
|
||||||
: xenos::ColorRenderTargetFormat::k_16_16_FLOAT)));
|
: xenos::ColorRenderTargetFormat::k_16_16_FLOAT)));
|
||||||
for (uint32_t j = 0; j < (uint32_t(2) << i); ++j) {
|
for (uint32_t j = 0; j < (uint32_t(2) << i); ++j) {
|
||||||
|
@ -2230,7 +2104,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Load whether the render target is 64bpp to system_temp_rov_params_.y to
|
// Load whether the render target is 64bpp to system_temp_rov_params_.y to
|
||||||
// get the needed relative sample address.
|
// get the needed relative sample address.
|
||||||
a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||||
rt_format_flags_src, dxbc::Src::LU(kRTFormatFlag_64bpp));
|
rt_format_flags_src,
|
||||||
|
dxbc::Src::LU(RenderTargetCache::kPSIColorFormatFlag_64bpp));
|
||||||
// Choose the relative sample address for the render target to
|
// Choose the relative sample address for the render target to
|
||||||
// system_temp_rov_params_.y.
|
// system_temp_rov_params_.y.
|
||||||
a_.OpMovC(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
a_.OpMovC(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||||
|
@ -2287,7 +2162,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the blending source color is fixed-point for clamping if it is.
|
// Get if the blending source color is fixed-point for clamping if it is.
|
||||||
// temp.x = whether color is fixed-point.
|
// temp.x = whether color is fixed-point.
|
||||||
a_.OpAnd(temp_x_dest, rt_format_flags_src,
|
a_.OpAnd(temp_x_dest, rt_format_flags_src,
|
||||||
dxbc::Src::LU(kRTFormatFlag_FixedPointColor));
|
dxbc::Src::LU(
|
||||||
|
RenderTargetCache::kPSIColorFormatFlag_FixedPointColor));
|
||||||
// Check if the blending source color is fixed-point and needs clamping.
|
// Check if the blending source color is fixed-point and needs clamping.
|
||||||
// temp.x = free.
|
// temp.x = free.
|
||||||
a_.OpIf(true, temp_x_src);
|
a_.OpIf(true, temp_x_src);
|
||||||
|
@ -2306,7 +2182,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the blending source alpha is fixed-point for clamping if it is.
|
// Get if the blending source alpha is fixed-point for clamping if it is.
|
||||||
// temp.x = whether alpha is fixed-point.
|
// temp.x = whether alpha is fixed-point.
|
||||||
a_.OpAnd(temp_x_dest, rt_format_flags_src,
|
a_.OpAnd(temp_x_dest, rt_format_flags_src,
|
||||||
dxbc::Src::LU(kRTFormatFlag_FixedPointAlpha));
|
dxbc::Src::LU(
|
||||||
|
RenderTargetCache::kPSIColorFormatFlag_FixedPointAlpha));
|
||||||
// Check if the blending source alpha is fixed-point and needs clamping.
|
// Check if the blending source alpha is fixed-point and needs clamping.
|
||||||
// temp.x = free.
|
// temp.x = free.
|
||||||
a_.OpIf(true, temp_x_src);
|
a_.OpIf(true, temp_x_src);
|
||||||
|
@ -2387,7 +2264,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the format is 64bpp to temp.w.
|
// Get if the format is 64bpp to temp.w.
|
||||||
// temp.w = whether the render target is 64bpp.
|
// temp.w = whether the render target is 64bpp.
|
||||||
a_.OpAnd(temp_w_dest, rt_format_flags_src,
|
a_.OpAnd(temp_w_dest, rt_format_flags_src,
|
||||||
dxbc::Src::LU(kRTFormatFlag_64bpp));
|
dxbc::Src::LU(RenderTargetCache::kPSIColorFormatFlag_64bpp));
|
||||||
// Check if the format is 64bpp.
|
// Check if the format is 64bpp.
|
||||||
// temp.w = free.
|
// temp.w = free.
|
||||||
a_.OpIf(true, temp_w_src);
|
a_.OpIf(true, temp_w_src);
|
||||||
|
@ -2478,8 +2355,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the render target color is fixed-point and the source
|
// Get if the render target color is fixed-point and the source
|
||||||
// color factor needs clamping to temp.x.
|
// color factor needs clamping to temp.x.
|
||||||
// temp.x = whether color is fixed-point.
|
// temp.x = whether color is fixed-point.
|
||||||
a_.OpAnd(temp_x_dest, rt_format_flags_src,
|
a_.OpAnd(
|
||||||
dxbc::Src::LU(kRTFormatFlag_FixedPointColor));
|
temp_x_dest, rt_format_flags_src,
|
||||||
|
dxbc::Src::LU(
|
||||||
|
RenderTargetCache::kPSIColorFormatFlag_FixedPointColor));
|
||||||
// Check if the source color factor needs clamping.
|
// Check if the source color factor needs clamping.
|
||||||
a_.OpIf(true, temp_x_src);
|
a_.OpIf(true, temp_x_src);
|
||||||
{
|
{
|
||||||
|
@ -2558,8 +2437,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the render target color is fixed-point and the
|
// Get if the render target color is fixed-point and the
|
||||||
// destination color factor needs clamping to temp.x.
|
// destination color factor needs clamping to temp.x.
|
||||||
// temp.x = whether color is fixed-point.
|
// temp.x = whether color is fixed-point.
|
||||||
a_.OpAnd(temp_x_dest, rt_format_flags_src,
|
a_.OpAnd(
|
||||||
dxbc::Src::LU(kRTFormatFlag_FixedPointColor));
|
temp_x_dest, rt_format_flags_src,
|
||||||
|
dxbc::Src::LU(
|
||||||
|
RenderTargetCache::kPSIColorFormatFlag_FixedPointColor));
|
||||||
// Check if the destination color factor needs clamping.
|
// Check if the destination color factor needs clamping.
|
||||||
a_.OpIf(true, temp_x_src);
|
a_.OpIf(true, temp_x_src);
|
||||||
{
|
{
|
||||||
|
@ -2701,8 +2582,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the render target alpha is fixed-point and the source
|
// Get if the render target alpha is fixed-point and the source
|
||||||
// alpha factor needs clamping to temp.y.
|
// alpha factor needs clamping to temp.y.
|
||||||
// temp.y = whether alpha is fixed-point.
|
// temp.y = whether alpha is fixed-point.
|
||||||
a_.OpAnd(temp_y_dest, rt_format_flags_src,
|
a_.OpAnd(
|
||||||
dxbc::Src::LU(kRTFormatFlag_FixedPointAlpha));
|
temp_y_dest, rt_format_flags_src,
|
||||||
|
dxbc::Src::LU(
|
||||||
|
RenderTargetCache::kPSIColorFormatFlag_FixedPointAlpha));
|
||||||
// Check if the source alpha factor needs clamping.
|
// Check if the source alpha factor needs clamping.
|
||||||
a_.OpIf(true, temp_y_src);
|
a_.OpIf(true, temp_y_src);
|
||||||
{
|
{
|
||||||
|
@ -2769,9 +2652,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// destination alpha factor needs clamping.
|
// destination alpha factor needs clamping.
|
||||||
// alpha_is_fixed_temp.x = whether alpha is fixed-point.
|
// alpha_is_fixed_temp.x = whether alpha is fixed-point.
|
||||||
uint32_t alpha_is_fixed_temp = PushSystemTemp();
|
uint32_t alpha_is_fixed_temp = PushSystemTemp();
|
||||||
a_.OpAnd(dxbc::Dest::R(alpha_is_fixed_temp, 0b0001),
|
a_.OpAnd(
|
||||||
|
dxbc::Dest::R(alpha_is_fixed_temp, 0b0001),
|
||||||
rt_format_flags_src,
|
rt_format_flags_src,
|
||||||
dxbc::Src::LU(kRTFormatFlag_FixedPointAlpha));
|
dxbc::Src::LU(
|
||||||
|
RenderTargetCache::kPSIColorFormatFlag_FixedPointAlpha));
|
||||||
// Check if the destination alpha factor needs clamping.
|
// Check if the destination alpha factor needs clamping.
|
||||||
a_.OpIf(true,
|
a_.OpIf(true,
|
||||||
dxbc::Src::R(alpha_is_fixed_temp, dxbc::Src::kXXXX));
|
dxbc::Src::R(alpha_is_fixed_temp, dxbc::Src::kXXXX));
|
||||||
|
@ -2925,7 +2810,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Get if the format is 64bpp to temp.z.
|
// Get if the format is 64bpp to temp.z.
|
||||||
// temp.z = whether the render target is 64bpp.
|
// temp.z = whether the render target is 64bpp.
|
||||||
a_.OpAnd(temp_z_dest, rt_format_flags_src,
|
a_.OpAnd(temp_z_dest, rt_format_flags_src,
|
||||||
dxbc::Src::LU(kRTFormatFlag_64bpp));
|
dxbc::Src::LU(RenderTargetCache::kPSIColorFormatFlag_64bpp));
|
||||||
// Check if the format is 64bpp.
|
// Check if the format is 64bpp.
|
||||||
// temp.z = free.
|
// temp.z = free.
|
||||||
a_.OpIf(true, temp_z_src);
|
a_.OpIf(true, temp_z_src);
|
||||||
|
@ -2954,16 +2839,29 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Close the sample covered check.
|
// Close the sample covered check.
|
||||||
a_.OpEndIf();
|
a_.OpEndIf();
|
||||||
|
|
||||||
// Go to the next sample (samples are at +0, +(80*scale_x), +1,
|
// Go to the next sample (samples are at +0, +(80*scale_x), +dwpp,
|
||||||
// +(80*scale_x+1), so need to do +(80*scale_x), -(80*scale_x-1),
|
// +(80*scale_x+dwpp), so need to do +(80*scale_x), -(80*scale_x-dwpp),
|
||||||
// +(80*scale_x) and -(80*scale_x+1) after each sample).
|
// +(80*scale_x) and -(80*scale_x+dwpp) after each sample).
|
||||||
// Though no need to do this for the last sample as for the next render
|
// Though no need to do this for the last sample as for the next render
|
||||||
// target, the address will be recalculated.
|
// target, the address will be recalculated.
|
||||||
if (j < 3) {
|
if (j < 3) {
|
||||||
|
if (j & 1) {
|
||||||
|
// temp.z = whether the render target is 64bpp.
|
||||||
|
a_.OpAnd(temp_z_dest, rt_format_flags_src,
|
||||||
|
dxbc::Src::LU(RenderTargetCache::kPSIColorFormatFlag_64bpp));
|
||||||
|
// temp.z = offset from the current sample to the next.
|
||||||
|
a_.OpMovC(temp_z_dest, temp_z_src,
|
||||||
|
dxbc::Src::LI(-int32_t(tile_width) + 2 * (2 - int32_t(j))),
|
||||||
|
dxbc::Src::LI(-int32_t(tile_width) + (2 - int32_t(j))));
|
||||||
|
// temp.z = free.
|
||||||
a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||||
dxbc::Src::LI((j & 1) ? -int32_t(tile_width) + 2 - j
|
temp_z_src);
|
||||||
: int32_t(tile_width)));
|
} else {
|
||||||
|
a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||||
|
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||||
|
dxbc::Src::LU(tile_width));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2987,6 +2885,17 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
||||||
|
|
||||||
if (current_shader().writes_color_target(0) &&
|
if (current_shader().writes_color_target(0) &&
|
||||||
!IsForceEarlyDepthStencilGlobalFlagEnabled()) {
|
!IsForceEarlyDepthStencilGlobalFlagEnabled()) {
|
||||||
|
if (edram_rov_used_) {
|
||||||
|
// Check if the render target 0 was written to on the execution path.
|
||||||
|
uint32_t rt_0_written_temp = PushSystemTemp();
|
||||||
|
a_.OpAnd(dxbc::Dest::R(rt_0_written_temp, 0b0001),
|
||||||
|
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX),
|
||||||
|
dxbc::Src::LU(1 << 8));
|
||||||
|
a_.OpIf(true, dxbc::Src::R(rt_0_written_temp, dxbc::Src::kXXXX));
|
||||||
|
// Release rt_0_written_temp.
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
|
|
||||||
// Alpha test.
|
// Alpha test.
|
||||||
// X - mask, then masked result (SGPR for loading, VGPR for masking).
|
// X - mask, then masked result (SGPR for loading, VGPR for masking).
|
||||||
// Y - operation result (SGPR for mask operations, VGPR for alpha
|
// Y - operation result (SGPR for mask operations, VGPR for alpha
|
||||||
|
@ -3057,11 +2966,16 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
||||||
a_.OpEndIf();
|
a_.OpEndIf();
|
||||||
// Release alpha_test_temp.
|
// Release alpha_test_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
|
||||||
|
|
||||||
// Discard samples with alpha to coverage.
|
// Discard samples with alpha to coverage.
|
||||||
CompletePixelShader_AlphaToMask();
|
CompletePixelShader_AlphaToMask();
|
||||||
|
|
||||||
|
if (edram_rov_used_) {
|
||||||
|
// Close the render target 0 written check.
|
||||||
|
a_.OpEndIf();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Write the values to the render targets. Not applying the exponent bias yet
|
// Write the values to the render targets. Not applying the exponent bias yet
|
||||||
// because the original 0 to 1 alpha value is needed for alpha to coverage,
|
// because the original 0 to 1 alpha value is needed for alpha to coverage,
|
||||||
// which is done differently for ROV and RTV/DSV.
|
// which is done differently for ROV and RTV/DSV.
|
||||||
|
|
|
@ -207,6 +207,134 @@ DEFINE_bool(
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
|
void RenderTargetCache::GetPSIColorFormatInfo(
|
||||||
|
xenos::ColorRenderTargetFormat format, uint32_t write_mask,
|
||||||
|
float& clamp_rgb_low, float& clamp_alpha_low, float& clamp_rgb_high,
|
||||||
|
float& clamp_alpha_high, uint32_t& keep_mask_low,
|
||||||
|
uint32_t& keep_mask_high) {
|
||||||
|
keep_mask_low = keep_mask_high = 0;
|
||||||
|
switch (format) {
|
||||||
|
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
|
||||||
|
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
|
||||||
|
clamp_rgb_low = clamp_alpha_low = 0.0f;
|
||||||
|
clamp_rgb_high = clamp_alpha_high = 1.0f;
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (!(write_mask & (1 << i))) {
|
||||||
|
keep_mask_low |= uint32_t(0xFF) << (i * 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
|
||||||
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
|
||||||
|
clamp_rgb_low = clamp_alpha_low = 0.0f;
|
||||||
|
clamp_rgb_high = clamp_alpha_high = 1.0f;
|
||||||
|
for (uint32_t i = 0; i < 3; ++i) {
|
||||||
|
if (!(write_mask & (1 << i))) {
|
||||||
|
keep_mask_low |= uint32_t(0x3FF) << (i * 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b1000)) {
|
||||||
|
keep_mask_low |= uint32_t(3) << 30;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||||
|
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16: {
|
||||||
|
clamp_rgb_low = clamp_alpha_low = 0.0f;
|
||||||
|
clamp_rgb_high = 31.875f;
|
||||||
|
clamp_alpha_high = 1.0f;
|
||||||
|
for (uint32_t i = 0; i < 3; ++i) {
|
||||||
|
if (!(write_mask & (1 << i))) {
|
||||||
|
keep_mask_low |= uint32_t(0x3FF) << (i * 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b1000)) {
|
||||||
|
keep_mask_low |= uint32_t(3) << 30;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case xenos::ColorRenderTargetFormat::k_16_16:
|
||||||
|
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
|
||||||
|
// Alpha clamping affects blending source, so it's non-zero for alpha for
|
||||||
|
// k_16_16 (the render target is fixed-point). There's one deviation from
|
||||||
|
// how Direct3D 11.3 functional specification defines SNorm conversion
|
||||||
|
// (NaN should be 0, not the lowest negative number), and that needs to be
|
||||||
|
// handled separately.
|
||||||
|
clamp_rgb_low = clamp_alpha_low = -32.0f;
|
||||||
|
clamp_rgb_high = clamp_alpha_high = 32.0f;
|
||||||
|
if (!(write_mask & 0b0001)) {
|
||||||
|
keep_mask_low |= 0xFFFFu;
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b0010)) {
|
||||||
|
keep_mask_low |= 0xFFFF0000u;
|
||||||
|
}
|
||||||
|
if (format == xenos::ColorRenderTargetFormat::k_16_16_16_16) {
|
||||||
|
if (!(write_mask & 0b0100)) {
|
||||||
|
keep_mask_high |= 0xFFFFu;
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b1000)) {
|
||||||
|
keep_mask_high |= 0xFFFF0000u;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
write_mask &= 0b0011;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||||
|
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||||
|
// No NaNs on the Xbox 360 GPU, though can't use the extended range with
|
||||||
|
// Direct3D and Vulkan conversions.
|
||||||
|
// TODO(Triang3l): Use the extended-range encoding in all implementations.
|
||||||
|
clamp_rgb_low = clamp_alpha_low = -65504.0f;
|
||||||
|
clamp_rgb_high = clamp_alpha_high = 65504.0f;
|
||||||
|
if (!(write_mask & 0b0001)) {
|
||||||
|
keep_mask_low |= 0xFFFFu;
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b0010)) {
|
||||||
|
keep_mask_low |= 0xFFFF0000u;
|
||||||
|
}
|
||||||
|
if (format == xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT) {
|
||||||
|
if (!(write_mask & 0b0100)) {
|
||||||
|
keep_mask_high |= 0xFFFFu;
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b1000)) {
|
||||||
|
keep_mask_high |= 0xFFFF0000u;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
write_mask &= 0b0011;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
|
||||||
|
// No clamping - let min/max always pick the original value.
|
||||||
|
clamp_rgb_low = clamp_alpha_low = clamp_rgb_high = clamp_alpha_high =
|
||||||
|
std::nanf("");
|
||||||
|
write_mask &= 0b0001;
|
||||||
|
if (!(write_mask & 0b0001)) {
|
||||||
|
keep_mask_low = ~uint32_t(0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||||
|
// No clamping - let min/max always pick the original value.
|
||||||
|
clamp_rgb_low = clamp_alpha_low = clamp_rgb_high = clamp_alpha_high =
|
||||||
|
std::nanf("");
|
||||||
|
write_mask &= 0b0011;
|
||||||
|
if (!(write_mask & 0b0001)) {
|
||||||
|
keep_mask_low = ~uint32_t(0);
|
||||||
|
}
|
||||||
|
if (!(write_mask & 0b0010)) {
|
||||||
|
keep_mask_high = ~uint32_t(0);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(format);
|
||||||
|
// Disable invalid render targets.
|
||||||
|
write_mask = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Special case handled in the shaders for empty write mask to completely skip
|
||||||
|
// a disabled render target: all keep bits are set.
|
||||||
|
if (!write_mask) {
|
||||||
|
keep_mask_low = keep_mask_high = ~uint32_t(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t RenderTargetCache::Transfer::GetRangeRectangles(
|
uint32_t RenderTargetCache::Transfer::GetRangeRectangles(
|
||||||
uint32_t start_tiles, uint32_t end_tiles, uint32_t base_tiles,
|
uint32_t start_tiles, uint32_t end_tiles, uint32_t base_tiles,
|
||||||
uint32_t pitch_tiles, xenos::MsaaSamples msaa_samples, bool is_64bpp,
|
uint32_t pitch_tiles, xenos::MsaaSamples msaa_samples, bool is_64bpp,
|
||||||
|
|
|
@ -113,6 +113,54 @@ class RenderTargetCache {
|
||||||
kSrgbToLinearExponent);
|
kSrgbToLinearExponent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pixel shader interlock implementation helpers.
|
||||||
|
|
||||||
|
// Appended to the format in the format constant via bitwise OR.
|
||||||
|
enum : uint32_t {
|
||||||
|
kPSIColorFormatFlag_64bpp_Shift = xenos::kColorRenderTargetFormatBits,
|
||||||
|
// Requires clamping of blending sources and factors.
|
||||||
|
kPSIColorFormatFlag_FixedPointColor_Shift,
|
||||||
|
kPSIColorFormatFlag_FixedPointAlpha_Shift,
|
||||||
|
|
||||||
|
kPSIColorFormatFlag_64bpp = uint32_t(1) << kPSIColorFormatFlag_64bpp_Shift,
|
||||||
|
kPSIColorFormatFlag_FixedPointColor =
|
||||||
|
uint32_t(1) << kPSIColorFormatFlag_FixedPointColor_Shift,
|
||||||
|
kPSIColorFormatFlag_FixedPointAlpha =
|
||||||
|
uint32_t(1) << kPSIColorFormatFlag_FixedPointAlpha_Shift,
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr uint32_t AddPSIColorFormatFlags(
|
||||||
|
xenos::ColorRenderTargetFormat format) {
|
||||||
|
uint32_t format_flags = uint32_t(format);
|
||||||
|
if (format == xenos::ColorRenderTargetFormat::k_16_16_16_16 ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
|
||||||
|
format_flags |= kPSIColorFormatFlag_64bpp;
|
||||||
|
}
|
||||||
|
if (format == xenos::ColorRenderTargetFormat::k_8_8_8_8 ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_2_10_10_10 ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_16_16 ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_16_16_16_16 ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10) {
|
||||||
|
format_flags |= kPSIColorFormatFlag_FixedPointColor |
|
||||||
|
kPSIColorFormatFlag_FixedPointAlpha;
|
||||||
|
} else if (format == xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT ||
|
||||||
|
format == xenos::ColorRenderTargetFormat::
|
||||||
|
k_2_10_10_10_FLOAT_AS_16_16_16_16) {
|
||||||
|
format_flags |= kPSIColorFormatFlag_FixedPointAlpha;
|
||||||
|
}
|
||||||
|
return format_flags;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void GetPSIColorFormatInfo(xenos::ColorRenderTargetFormat format,
|
||||||
|
uint32_t write_mask, float& clamp_rgb_low,
|
||||||
|
float& clamp_alpha_low,
|
||||||
|
float& clamp_rgb_high,
|
||||||
|
float& clamp_alpha_high,
|
||||||
|
uint32_t& keep_mask_low,
|
||||||
|
uint32_t& keep_mask_high);
|
||||||
|
|
||||||
virtual ~RenderTargetCache();
|
virtual ~RenderTargetCache();
|
||||||
|
|
||||||
virtual Path GetPath() const = 0;
|
virtual Path GetPath() const = 0;
|
||||||
|
|
|
@ -54,8 +54,10 @@ DEFINE_string(
|
||||||
"GPU");
|
"GPU");
|
||||||
DEFINE_bool(shader_output_bindless_resources, false,
|
DEFINE_bool(shader_output_bindless_resources, false,
|
||||||
"Output host shader with bindless resources used.", "GPU");
|
"Output host shader with bindless resources used.", "GPU");
|
||||||
DEFINE_bool(shader_output_dxbc_rov, false,
|
DEFINE_bool(
|
||||||
"Output ROV-based output-merger code in DXBC pixel shaders.",
|
shader_output_pixel_shader_interlock, false,
|
||||||
|
"Output host shader with a render backend implementation based on pixel "
|
||||||
|
"shader interlock.",
|
||||||
"GPU");
|
"GPU");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -124,12 +126,15 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
||||||
SpirvShaderTranslator::Features spirv_features(true);
|
SpirvShaderTranslator::Features spirv_features(true);
|
||||||
if (cvars::shader_output_type == "spirv" ||
|
if (cvars::shader_output_type == "spirv" ||
|
||||||
cvars::shader_output_type == "spirvtext") {
|
cvars::shader_output_type == "spirvtext") {
|
||||||
translator = std::make_unique<SpirvShaderTranslator>(spirv_features);
|
translator = std::make_unique<SpirvShaderTranslator>(
|
||||||
|
spirv_features, true, true,
|
||||||
|
cvars::shader_output_pixel_shader_interlock);
|
||||||
} else if (cvars::shader_output_type == "dxbc" ||
|
} else if (cvars::shader_output_type == "dxbc" ||
|
||||||
cvars::shader_output_type == "dxbctext") {
|
cvars::shader_output_type == "dxbctext") {
|
||||||
translator = std::make_unique<DxbcShaderTranslator>(
|
translator = std::make_unique<DxbcShaderTranslator>(
|
||||||
ui::GraphicsProvider::GpuVendorID(0),
|
ui::GraphicsProvider::GpuVendorID(0),
|
||||||
cvars::shader_output_bindless_resources, cvars::shader_output_dxbc_rov);
|
cvars::shader_output_bindless_resources,
|
||||||
|
cvars::shader_output_pixel_shader_interlock);
|
||||||
} else {
|
} else {
|
||||||
// Just output microcode disassembly generated during microcode information
|
// Just output microcode disassembly generated during microcode information
|
||||||
// gathering.
|
// gathering.
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/base/string_buffer.h"
|
||||||
#include "xenia/gpu/spirv_shader.h"
|
#include "xenia/gpu/spirv_shader.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
@ -31,6 +32,8 @@ SpirvShaderTranslator::Features::Features(bool all)
|
||||||
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
|
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
|
||||||
clip_distance(all),
|
clip_distance(all),
|
||||||
cull_distance(all),
|
cull_distance(all),
|
||||||
|
demote_to_helper_invocation(all),
|
||||||
|
fragment_shader_sample_interlock(all),
|
||||||
full_draw_index_uint32(all),
|
full_draw_index_uint32(all),
|
||||||
image_view_format_swizzle(all),
|
image_view_format_swizzle(all),
|
||||||
signed_zero_inf_nan_preserve_float32(all),
|
signed_zero_inf_nan_preserve_float32(all),
|
||||||
|
@ -42,6 +45,14 @@ SpirvShaderTranslator::Features::Features(
|
||||||
provider.device_properties().limits.maxStorageBufferRange),
|
provider.device_properties().limits.maxStorageBufferRange),
|
||||||
clip_distance(provider.device_features().shaderClipDistance),
|
clip_distance(provider.device_features().shaderClipDistance),
|
||||||
cull_distance(provider.device_features().shaderCullDistance),
|
cull_distance(provider.device_features().shaderCullDistance),
|
||||||
|
demote_to_helper_invocation(
|
||||||
|
provider.device_extensions().ext_shader_demote_to_helper_invocation &&
|
||||||
|
provider.device_shader_demote_to_helper_invocation_features()
|
||||||
|
.shaderDemoteToHelperInvocation),
|
||||||
|
fragment_shader_sample_interlock(
|
||||||
|
provider.device_extensions().ext_fragment_shader_interlock &&
|
||||||
|
provider.device_fragment_shader_interlock_features()
|
||||||
|
.fragmentShaderSampleInterlock),
|
||||||
full_draw_index_uint32(provider.device_features().fullDrawIndexUint32) {
|
full_draw_index_uint32(provider.device_features().fullDrawIndexUint32) {
|
||||||
uint32_t device_version = provider.device_properties().apiVersion;
|
uint32_t device_version = provider.device_properties().apiVersion;
|
||||||
const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions =
|
const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions =
|
||||||
|
@ -78,9 +89,6 @@ SpirvShaderTranslator::Features::Features(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SpirvShaderTranslator::SpirvShaderTranslator(const Features& features)
|
|
||||||
: features_(features) {}
|
|
||||||
|
|
||||||
uint64_t SpirvShaderTranslator::GetDefaultVertexShaderModification(
|
uint64_t SpirvShaderTranslator::GetDefaultVertexShaderModification(
|
||||||
uint32_t dynamic_addressable_register_count,
|
uint32_t dynamic_addressable_register_count,
|
||||||
Shader::HostVertexShaderType host_vertex_shader_type) const {
|
Shader::HostVertexShaderType host_vertex_shader_type) const {
|
||||||
|
@ -99,6 +107,19 @@ uint64_t SpirvShaderTranslator::GetDefaultPixelShaderModification(
|
||||||
return shader_modification.value;
|
return shader_modification.value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<uint8_t> SpirvShaderTranslator::CreateDepthOnlyFragmentShader() {
|
||||||
|
is_depth_only_fragment_shader_ = true;
|
||||||
|
// TODO(Triang3l): Handle in a nicer way (is_depth_only_fragment_shader_ is a
|
||||||
|
// leftover from when a Shader object wasn't used during translation).
|
||||||
|
Shader shader(xenos::ShaderType::kPixel, 0, nullptr, 0);
|
||||||
|
StringBuffer instruction_disassembly_buffer;
|
||||||
|
shader.AnalyzeUcode(instruction_disassembly_buffer);
|
||||||
|
Shader::Translation& translation = *shader.GetOrCreateTranslation(0);
|
||||||
|
TranslateAnalyzedShader(translation);
|
||||||
|
is_depth_only_fragment_shader_ = false;
|
||||||
|
return translation.translated_binary();
|
||||||
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::Reset() {
|
void SpirvShaderTranslator::Reset() {
|
||||||
ShaderTranslator::Reset();
|
ShaderTranslator::Reset();
|
||||||
|
|
||||||
|
@ -109,6 +130,7 @@ void SpirvShaderTranslator::Reset() {
|
||||||
input_point_coordinates_ = spv::NoResult;
|
input_point_coordinates_ = spv::NoResult;
|
||||||
input_fragment_coordinates_ = spv::NoResult;
|
input_fragment_coordinates_ = spv::NoResult;
|
||||||
input_front_facing_ = spv::NoResult;
|
input_front_facing_ = spv::NoResult;
|
||||||
|
input_sample_mask_ = spv::NoResult;
|
||||||
std::fill(input_output_interpolators_.begin(),
|
std::fill(input_output_interpolators_.begin(),
|
||||||
input_output_interpolators_.end(), spv::NoResult);
|
input_output_interpolators_.end(), spv::NoResult);
|
||||||
output_point_coordinates_ = spv::NoResult;
|
output_point_coordinates_ = spv::NoResult;
|
||||||
|
@ -120,6 +142,8 @@ void SpirvShaderTranslator::Reset() {
|
||||||
main_interface_.clear();
|
main_interface_.clear();
|
||||||
var_main_registers_ = spv::NoResult;
|
var_main_registers_ = spv::NoResult;
|
||||||
var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
|
var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult;
|
||||||
|
var_main_kill_pixel_ = spv::NoResult;
|
||||||
|
var_main_fsi_color_written_ = spv::NoResult;
|
||||||
|
|
||||||
main_switch_op_.reset();
|
main_switch_op_.reset();
|
||||||
main_switch_next_pc_phi_operands_.clear();
|
main_switch_next_pc_phi_operands_.clear();
|
||||||
|
@ -217,6 +241,10 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
size_t offset;
|
size_t offset;
|
||||||
spv::Id type;
|
spv::Id type;
|
||||||
};
|
};
|
||||||
|
spv::Id type_float4_array_4 = builder_->makeArrayType(
|
||||||
|
type_float4_, builder_->makeUintConstant(4), sizeof(float) * 4);
|
||||||
|
builder_->addDecoration(type_float4_array_4, spv::DecorationArrayStride,
|
||||||
|
sizeof(float) * 4);
|
||||||
spv::Id type_uint4_array_2 = builder_->makeArrayType(
|
spv::Id type_uint4_array_2 = builder_->makeArrayType(
|
||||||
type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4);
|
type_uint4_, builder_->makeUintConstant(2), sizeof(uint32_t) * 4);
|
||||||
builder_->addDecoration(type_uint4_array_2, spv::DecorationArrayStride,
|
builder_->addDecoration(type_uint4_array_2, spv::DecorationArrayStride,
|
||||||
|
@ -250,8 +278,37 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
type_uint4_array_4},
|
type_uint4_array_4},
|
||||||
{"alpha_test_reference", offsetof(SystemConstants, alpha_test_reference),
|
{"alpha_test_reference", offsetof(SystemConstants, alpha_test_reference),
|
||||||
type_float_},
|
type_float_},
|
||||||
|
{"edram_32bpp_tile_pitch_dwords_scaled",
|
||||||
|
offsetof(SystemConstants, edram_32bpp_tile_pitch_dwords_scaled),
|
||||||
|
type_uint_},
|
||||||
|
{"edram_depth_base_dwords_scaled",
|
||||||
|
offsetof(SystemConstants, edram_depth_base_dwords_scaled), type_uint_},
|
||||||
{"color_exp_bias", offsetof(SystemConstants, color_exp_bias),
|
{"color_exp_bias", offsetof(SystemConstants, color_exp_bias),
|
||||||
type_float4_},
|
type_float4_},
|
||||||
|
{"edram_poly_offset_front_scale",
|
||||||
|
offsetof(SystemConstants, edram_poly_offset_front_scale), type_float_},
|
||||||
|
{"edram_poly_offset_back_scale",
|
||||||
|
offsetof(SystemConstants, edram_poly_offset_back_scale), type_float_},
|
||||||
|
{"edram_poly_offset_front_offset",
|
||||||
|
offsetof(SystemConstants, edram_poly_offset_front_offset), type_float_},
|
||||||
|
{"edram_poly_offset_back_offset",
|
||||||
|
offsetof(SystemConstants, edram_poly_offset_back_offset), type_float_},
|
||||||
|
{"edram_stencil_front", offsetof(SystemConstants, edram_stencil_front),
|
||||||
|
type_uint2_},
|
||||||
|
{"edram_stencil_back", offsetof(SystemConstants, edram_stencil_back),
|
||||||
|
type_uint2_},
|
||||||
|
{"edram_rt_base_dwords_scaled",
|
||||||
|
offsetof(SystemConstants, edram_rt_base_dwords_scaled), type_uint4_},
|
||||||
|
{"edram_rt_format_flags",
|
||||||
|
offsetof(SystemConstants, edram_rt_format_flags), type_uint4_},
|
||||||
|
{"edram_rt_blend_factors_ops",
|
||||||
|
offsetof(SystemConstants, edram_rt_blend_factors_ops), type_uint4_},
|
||||||
|
{"edram_rt_keep_mask", offsetof(SystemConstants, edram_rt_keep_mask),
|
||||||
|
type_uint4_array_2},
|
||||||
|
{"edram_rt_clamp", offsetof(SystemConstants, edram_rt_clamp),
|
||||||
|
type_float4_array_4},
|
||||||
|
{"edram_blend_constant", offsetof(SystemConstants, edram_blend_constant),
|
||||||
|
type_float4_},
|
||||||
};
|
};
|
||||||
id_vector_temp_.clear();
|
id_vector_temp_.clear();
|
||||||
id_vector_temp_.reserve(xe::countof(system_constants));
|
id_vector_temp_.reserve(xe::countof(system_constants));
|
||||||
|
@ -281,6 +338,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
main_interface_.push_back(uniform_system_constants_);
|
main_interface_.push_back(uniform_system_constants_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
// Common uniform buffer - float constants.
|
// Common uniform buffer - float constants.
|
||||||
uint32_t float_constant_count =
|
uint32_t float_constant_count =
|
||||||
current_shader().constant_register_map().float_count;
|
current_shader().constant_register_map().float_count;
|
||||||
|
@ -289,11 +347,11 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
id_vector_temp_.push_back(builder_->makeArrayType(
|
id_vector_temp_.push_back(builder_->makeArrayType(
|
||||||
type_float4_, builder_->makeUintConstant(float_constant_count),
|
type_float4_, builder_->makeUintConstant(float_constant_count),
|
||||||
sizeof(float) * 4));
|
sizeof(float) * 4));
|
||||||
// Currently (as of October 24, 2020) makeArrayType only uses the stride to
|
// Currently (as of October 24, 2020) makeArrayType only uses the stride
|
||||||
// check if deduplication can be done - the array stride decoration needs to
|
// to check if deduplication can be done - the array stride decoration
|
||||||
// be applied explicitly.
|
// needs to be applied explicitly.
|
||||||
builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride,
|
builder_->addDecoration(id_vector_temp_.back(),
|
||||||
sizeof(float) * 4);
|
spv::DecorationArrayStride, sizeof(float) * 4);
|
||||||
spv::Id type_float_constants =
|
spv::Id type_float_constants =
|
||||||
builder_->makeStructType(id_vector_temp_, "XeFloatConstants");
|
builder_->makeStructType(id_vector_temp_, "XeFloatConstants");
|
||||||
builder_->addMemberName(type_float_constants, 0, "float_constants");
|
builder_->addMemberName(type_float_constants, 0, "float_constants");
|
||||||
|
@ -346,7 +404,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
builder_->addDecoration(uniform_bool_loop_constants_,
|
builder_->addDecoration(uniform_bool_loop_constants_,
|
||||||
spv::DecorationDescriptorSet,
|
spv::DecorationDescriptorSet,
|
||||||
int(kDescriptorSetConstants));
|
int(kDescriptorSetConstants));
|
||||||
builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding,
|
builder_->addDecoration(uniform_bool_loop_constants_,
|
||||||
|
spv::DecorationBinding,
|
||||||
int(kConstantBufferBoolLoop));
|
int(kConstantBufferBoolLoop));
|
||||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||||
main_interface_.push_back(uniform_bool_loop_constants_);
|
main_interface_.push_back(uniform_bool_loop_constants_);
|
||||||
|
@ -363,8 +422,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
spv::Id type_fetch_constants =
|
spv::Id type_fetch_constants =
|
||||||
builder_->makeStructType(id_vector_temp_, "XeFetchConstants");
|
builder_->makeStructType(id_vector_temp_, "XeFetchConstants");
|
||||||
builder_->addMemberName(type_fetch_constants, 0, "fetch_constants");
|
builder_->addMemberName(type_fetch_constants, 0, "fetch_constants");
|
||||||
builder_->addMemberDecoration(type_fetch_constants, 0, spv::DecorationOffset,
|
builder_->addMemberDecoration(type_fetch_constants, 0,
|
||||||
0);
|
spv::DecorationOffset, 0);
|
||||||
builder_->addDecoration(type_fetch_constants, spv::DecorationBlock);
|
builder_->addDecoration(type_fetch_constants, spv::DecorationBlock);
|
||||||
uniform_fetch_constants_ = builder_->createVariable(
|
uniform_fetch_constants_ = builder_->createVariable(
|
||||||
spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants,
|
spv::NoPrecision, spv::StorageClassUniform, type_fetch_constants,
|
||||||
|
@ -388,6 +447,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
spv::Id type_shared_memory =
|
spv::Id type_shared_memory =
|
||||||
builder_->makeStructType(id_vector_temp_, "XeSharedMemory");
|
builder_->makeStructType(id_vector_temp_, "XeSharedMemory");
|
||||||
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
|
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
|
||||||
|
builder_->addMemberDecoration(type_shared_memory, 0,
|
||||||
|
spv::DecorationRestrict);
|
||||||
// TODO(Triang3l): Make writable when memexport is implemented.
|
// TODO(Triang3l): Make writable when memexport is implemented.
|
||||||
builder_->addMemberDecoration(type_shared_memory, 0,
|
builder_->addMemberDecoration(type_shared_memory, 0,
|
||||||
spv::DecorationNonWritable);
|
spv::DecorationNonWritable);
|
||||||
|
@ -409,12 +470,14 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer
|
features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer
|
||||||
: spv::StorageClassUniform,
|
: spv::StorageClassUniform,
|
||||||
type_shared_memory, "xe_shared_memory");
|
type_shared_memory, "xe_shared_memory");
|
||||||
builder_->addDecoration(buffers_shared_memory_, spv::DecorationDescriptorSet,
|
builder_->addDecoration(buffers_shared_memory_,
|
||||||
|
spv::DecorationDescriptorSet,
|
||||||
int(kDescriptorSetSharedMemoryAndEdram));
|
int(kDescriptorSetSharedMemoryAndEdram));
|
||||||
builder_->addDecoration(buffers_shared_memory_, spv::DecorationBinding, 0);
|
builder_->addDecoration(buffers_shared_memory_, spv::DecorationBinding, 0);
|
||||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||||
main_interface_.push_back(buffers_shared_memory_);
|
main_interface_.push_back(buffers_shared_memory_);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (is_vertex_shader()) {
|
if (is_vertex_shader()) {
|
||||||
StartVertexOrTessEvalShaderBeforeMain();
|
StartVertexOrTessEvalShaderBeforeMain();
|
||||||
|
@ -438,6 +501,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
uniform_system_constants_, id_vector_temp_),
|
uniform_system_constants_, id_vector_temp_),
|
||||||
spv::NoPrecision);
|
spv::NoPrecision);
|
||||||
|
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
// Begin ucode translation. Initialize everything, even without defined
|
// Begin ucode translation. Initialize everything, even without defined
|
||||||
// defaults, for safety.
|
// defaults, for safety.
|
||||||
var_main_predicate_ = builder_->createVariable(
|
var_main_predicate_ = builder_->createVariable(
|
||||||
|
@ -474,6 +538,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction,
|
builder_->createVariable(spv::NoPrecision, spv::StorageClassFunction,
|
||||||
type_register_array, "xe_var_registers");
|
type_register_array, "xe_var_registers");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Write the execution model-specific prologue with access to variables in the
|
// Write the execution model-specific prologue with access to variables in the
|
||||||
// main function.
|
// main function.
|
||||||
|
@ -483,6 +548,10 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
StartFragmentShaderInMain();
|
StartFragmentShaderInMain();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_depth_only_fragment_shader_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Open the main loop.
|
// Open the main loop.
|
||||||
spv::Block& main_loop_pre_header = *builder_->getBuildPoint();
|
spv::Block& main_loop_pre_header = *builder_->getBuildPoint();
|
||||||
main_loop_header_ = &builder_->makeNewBlock();
|
main_loop_header_ = &builder_->makeNewBlock();
|
||||||
|
@ -551,6 +620,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
// Close flow control within the last switch case.
|
// Close flow control within the last switch case.
|
||||||
CloseExecConditionals();
|
CloseExecConditionals();
|
||||||
bool has_main_switch = !current_shader().label_addresses().empty();
|
bool has_main_switch = !current_shader().label_addresses().empty();
|
||||||
|
@ -566,23 +636,24 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
builder_->setBuildPoint(main_switch_header_);
|
builder_->setBuildPoint(main_switch_header_);
|
||||||
builder_->getBuildPoint()->addInstruction(std::move(main_switch_op_));
|
builder_->getBuildPoint()->addInstruction(std::move(main_switch_op_));
|
||||||
// Build the main switch merge, breaking out of the loop after falling
|
// Build the main switch merge, breaking out of the loop after falling
|
||||||
// through the end or breaking from exece (only continuing if a jump - from
|
// through the end or breaking from exece (only continuing if a jump -
|
||||||
// a guest loop or from jmp/call - was made).
|
// from a guest loop or from jmp/call - was made).
|
||||||
function_main_->addBlock(main_switch_merge_);
|
function_main_->addBlock(main_switch_merge_);
|
||||||
builder_->setBuildPoint(main_switch_merge_);
|
builder_->setBuildPoint(main_switch_merge_);
|
||||||
builder_->createBranch(main_loop_merge_);
|
builder_->createBranch(main_loop_merge_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Main loop continuation - choose the program counter based on the path
|
// Main loop continuation - choose the program counter based on the path
|
||||||
// taken (-1 if not from a jump as a safe fallback, which would result in not
|
// taken (-1 if not from a jump as a safe fallback, which would result in
|
||||||
// hitting any switch case and reaching the final break in the body).
|
// not hitting any switch case and reaching the final break in the body).
|
||||||
function_main_->addBlock(main_loop_continue_);
|
function_main_->addBlock(main_loop_continue_);
|
||||||
builder_->setBuildPoint(main_loop_continue_);
|
builder_->setBuildPoint(main_loop_continue_);
|
||||||
if (has_main_switch) {
|
if (has_main_switch) {
|
||||||
// OpPhi, if added, must be the first in the block.
|
// OpPhi, if added, must be the first in the block.
|
||||||
// If labels were added, but not jumps (for example, due to the call
|
// If labels were added, but not jumps (for example, due to the call
|
||||||
// instruction not being implemented as of October 18, 2020), send an
|
// instruction not being implemented as of October 18, 2020), send an
|
||||||
// impossible program counter value (-1) to the OpPhi at the next iteration.
|
// impossible program counter value (-1) to the OpPhi at the next
|
||||||
|
// iteration.
|
||||||
if (main_switch_next_pc_phi_operands_.empty()) {
|
if (main_switch_next_pc_phi_operands_.empty()) {
|
||||||
main_switch_next_pc_phi_operands_.push_back(
|
main_switch_next_pc_phi_operands_.push_back(
|
||||||
builder_->makeIntConstant(-1));
|
builder_->makeIntConstant(-1));
|
||||||
|
@ -590,18 +661,21 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
std::unique_ptr<spv::Instruction> main_loop_pc_next_op =
|
std::unique_ptr<spv::Instruction> main_loop_pc_next_op =
|
||||||
std::make_unique<spv::Instruction>(
|
std::make_unique<spv::Instruction>(
|
||||||
main_loop_pc_next_, type_int_,
|
main_loop_pc_next_, type_int_,
|
||||||
main_switch_next_pc_phi_operands_.size() >= 2 ? spv::OpPhi
|
main_switch_next_pc_phi_operands_.size() >= 2
|
||||||
|
? spv::OpPhi
|
||||||
: spv::OpCopyObject);
|
: spv::OpCopyObject);
|
||||||
for (spv::Id operand : main_switch_next_pc_phi_operands_) {
|
for (spv::Id operand : main_switch_next_pc_phi_operands_) {
|
||||||
main_loop_pc_next_op->addIdOperand(operand);
|
main_loop_pc_next_op->addIdOperand(operand);
|
||||||
}
|
}
|
||||||
builder_->getBuildPoint()->addInstruction(std::move(main_loop_pc_next_op));
|
builder_->getBuildPoint()->addInstruction(
|
||||||
|
std::move(main_loop_pc_next_op));
|
||||||
}
|
}
|
||||||
builder_->createBranch(main_loop_header_);
|
builder_->createBranch(main_loop_header_);
|
||||||
|
|
||||||
// Add the main loop merge block and go back to the function.
|
// Add the main loop merge block and go back to the function.
|
||||||
function_main_->addBlock(main_loop_merge_);
|
function_main_->addBlock(main_loop_merge_);
|
||||||
builder_->setBuildPoint(main_loop_merge_);
|
builder_->setBuildPoint(main_loop_merge_);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_vertex_shader()) {
|
if (is_vertex_shader()) {
|
||||||
CompleteVertexOrTessEvalShaderInMain();
|
CompleteVertexOrTessEvalShaderInMain();
|
||||||
|
@ -622,6 +696,20 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
builder_->addExecutionMode(function_main_,
|
builder_->addExecutionMode(function_main_,
|
||||||
spv::ExecutionModeEarlyFragmentTests);
|
spv::ExecutionModeEarlyFragmentTests);
|
||||||
}
|
}
|
||||||
|
if (edram_fragment_shader_interlock_) {
|
||||||
|
// Accessing per-sample values, so interlocking just when there's common
|
||||||
|
// coverage is enough if the device exposes that.
|
||||||
|
if (features_.fragment_shader_sample_interlock) {
|
||||||
|
builder_->addCapability(
|
||||||
|
spv::CapabilityFragmentShaderSampleInterlockEXT);
|
||||||
|
builder_->addExecutionMode(function_main_,
|
||||||
|
spv::ExecutionModeSampleInterlockOrderedEXT);
|
||||||
|
} else {
|
||||||
|
builder_->addCapability(spv::CapabilityFragmentShaderPixelInterlockEXT);
|
||||||
|
builder_->addExecutionMode(function_main_,
|
||||||
|
spv::ExecutionModePixelInterlockOrderedEXT);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
assert_true(is_vertex_shader());
|
assert_true(is_vertex_shader());
|
||||||
execution_model = IsSpirvTessEvalShader()
|
execution_model = IsSpirvTessEvalShader()
|
||||||
|
@ -649,8 +737,10 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
entry_point->addIdOperand(interface_id);
|
entry_point->addIdOperand(interface_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
// Specify the binding indices for samplers when the number of textures is
|
// Specify the binding indices for samplers when the number of textures is
|
||||||
// known, as samplers are located after images in the texture descriptor set.
|
// known, as samplers are located after images in the texture descriptor
|
||||||
|
// set.
|
||||||
size_t texture_binding_count = texture_bindings_.size();
|
size_t texture_binding_count = texture_bindings_.size();
|
||||||
size_t sampler_binding_count = sampler_bindings_.size();
|
size_t sampler_binding_count = sampler_bindings_.size();
|
||||||
for (size_t i = 0; i < sampler_binding_count; ++i) {
|
for (size_t i = 0; i < sampler_binding_count; ++i) {
|
||||||
|
@ -658,6 +748,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
||||||
spv::DecorationBinding,
|
spv::DecorationBinding,
|
||||||
int(texture_binding_count + i));
|
int(texture_binding_count + i));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Avoid copy?
|
// TODO(Triang3l): Avoid copy?
|
||||||
std::vector<unsigned int> module_uints;
|
std::vector<unsigned int> module_uints;
|
||||||
|
@ -1682,13 +1773,48 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
|
||||||
void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||||
Modification shader_modification = GetSpirvShaderModification();
|
Modification shader_modification = GetSpirvShaderModification();
|
||||||
|
|
||||||
|
if (edram_fragment_shader_interlock_) {
|
||||||
|
builder_->addExtension("SPV_EXT_fragment_shader_interlock");
|
||||||
|
|
||||||
|
// EDRAM buffer uint[].
|
||||||
|
id_vector_temp_.clear();
|
||||||
|
id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_));
|
||||||
|
// Storage buffers have std430 packing, no padding to 4-component vectors.
|
||||||
|
builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride,
|
||||||
|
sizeof(uint32_t));
|
||||||
|
spv::Id type_edram = builder_->makeStructType(id_vector_temp_, "XeEdram");
|
||||||
|
builder_->addMemberName(type_edram, 0, "edram");
|
||||||
|
builder_->addMemberDecoration(type_edram, 0, spv::DecorationCoherent);
|
||||||
|
builder_->addMemberDecoration(type_edram, 0, spv::DecorationRestrict);
|
||||||
|
builder_->addMemberDecoration(type_edram, 0, spv::DecorationOffset, 0);
|
||||||
|
builder_->addDecoration(type_edram, features_.spirv_version >= spv::Spv_1_3
|
||||||
|
? spv::DecorationBlock
|
||||||
|
: spv::DecorationBufferBlock);
|
||||||
|
buffer_edram_ = builder_->createVariable(
|
||||||
|
spv::NoPrecision,
|
||||||
|
features_.spirv_version >= spv::Spv_1_3 ? spv::StorageClassStorageBuffer
|
||||||
|
: spv::StorageClassUniform,
|
||||||
|
type_edram, "xe_edram");
|
||||||
|
builder_->addDecoration(buffer_edram_, spv::DecorationDescriptorSet,
|
||||||
|
int(kDescriptorSetSharedMemoryAndEdram));
|
||||||
|
builder_->addDecoration(buffer_edram_, spv::DecorationBinding, 1);
|
||||||
|
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||||
|
main_interface_.push_back(buffer_edram_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool param_gen_needed = !is_depth_only_fragment_shader_ &&
|
||||||
|
GetPsParamGenInterpolator() != UINT32_MAX;
|
||||||
|
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
uint32_t input_location = 0;
|
uint32_t input_location = 0;
|
||||||
|
|
||||||
// Interpolator inputs.
|
// Interpolator inputs.
|
||||||
{
|
{
|
||||||
uint32_t interpolators_remaining = GetModificationInterpolatorMask();
|
uint32_t interpolators_remaining = GetModificationInterpolatorMask();
|
||||||
uint32_t interpolator_index;
|
uint32_t interpolator_index;
|
||||||
while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) {
|
while (
|
||||||
|
xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) {
|
||||||
interpolators_remaining &= ~(UINT32_C(1) << interpolator_index);
|
interpolators_remaining &= ~(UINT32_C(1) << interpolator_index);
|
||||||
spv::Id interpolator = builder_->createVariable(
|
spv::Id interpolator = builder_->createVariable(
|
||||||
spv::NoPrecision, spv::StorageClassInput, type_float4_,
|
spv::NoPrecision, spv::StorageClassInput, type_float4_,
|
||||||
|
@ -1705,26 +1831,25 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX;
|
|
||||||
|
|
||||||
// Point coordinate input.
|
// Point coordinate input.
|
||||||
if (shader_modification.pixel.param_gen_point) {
|
if (shader_modification.pixel.param_gen_point) {
|
||||||
if (param_gen_needed) {
|
if (param_gen_needed) {
|
||||||
input_point_coordinates_ =
|
input_point_coordinates_ =
|
||||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassInput,
|
builder_->createVariable(spv::NoPrecision, spv::StorageClassInput,
|
||||||
type_float2_, "xe_in_point_coordinates");
|
type_float2_, "xe_in_point_coordinates");
|
||||||
builder_->addDecoration(input_point_coordinates_, spv::DecorationLocation,
|
builder_->addDecoration(input_point_coordinates_,
|
||||||
int(input_location));
|
spv::DecorationLocation, int(input_location));
|
||||||
main_interface_.push_back(input_point_coordinates_);
|
main_interface_.push_back(input_point_coordinates_);
|
||||||
}
|
}
|
||||||
++input_location;
|
++input_location;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Fragment coordinates.
|
// Fragment coordinates.
|
||||||
// TODO(Triang3l): More conditions - fragment shader interlock render backend,
|
// TODO(Triang3l): More conditions - alpha to coverage (if RT 0 is written,
|
||||||
// alpha to coverage (if RT 0 is written, and there's no early depth /
|
// and there's no early depth / stencil), depth writing in the fragment shader
|
||||||
// stencil), depth writing in the fragment shader (per-sample if supported).
|
// (per-sample if supported).
|
||||||
if (param_gen_needed) {
|
if (edram_fragment_shader_interlock_ || param_gen_needed) {
|
||||||
input_fragment_coordinates_ = builder_->createVariable(
|
input_fragment_coordinates_ = builder_->createVariable(
|
||||||
spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord");
|
spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord");
|
||||||
builder_->addDecoration(input_fragment_coordinates_, spv::DecorationBuiltIn,
|
builder_->addDecoration(input_fragment_coordinates_, spv::DecorationBuiltIn,
|
||||||
|
@ -1733,9 +1858,9 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Is front facing.
|
// Is front facing.
|
||||||
// TODO(Triang3l): Needed for stencil in the fragment shader interlock render
|
if (edram_fragment_shader_interlock_ ||
|
||||||
// backend.
|
(param_gen_needed &&
|
||||||
if (param_gen_needed && !GetSpirvShaderModification().pixel.param_gen_point) {
|
!GetSpirvShaderModification().pixel.param_gen_point)) {
|
||||||
input_front_facing_ = builder_->createVariable(
|
input_front_facing_ = builder_->createVariable(
|
||||||
spv::NoPrecision, spv::StorageClassInput, type_bool_, "gl_FrontFacing");
|
spv::NoPrecision, spv::StorageClassInput, type_bool_, "gl_FrontFacing");
|
||||||
builder_->addDecoration(input_front_facing_, spv::DecorationBuiltIn,
|
builder_->addDecoration(input_front_facing_, spv::DecorationBuiltIn,
|
||||||
|
@ -1743,33 +1868,165 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||||
main_interface_.push_back(input_front_facing_);
|
main_interface_.push_back(input_front_facing_);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Framebuffer attachment outputs.
|
// Sample mask input.
|
||||||
std::fill(output_fragment_data_.begin(), output_fragment_data_.end(),
|
if (edram_fragment_shader_interlock_) {
|
||||||
spv::NoResult);
|
// SampleMask depends on SampleRateShading in some SPIR-V revisions.
|
||||||
static const char* const kFragmentDataNames[] = {
|
builder_->addCapability(spv::CapabilitySampleRateShading);
|
||||||
|
input_sample_mask_ = builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassInput,
|
||||||
|
builder_->makeArrayType(type_int_, builder_->makeUintConstant(1), 0),
|
||||||
|
"gl_SampleMaskIn");
|
||||||
|
builder_->addDecoration(input_sample_mask_, spv::DecorationFlat);
|
||||||
|
builder_->addDecoration(input_sample_mask_, spv::DecorationBuiltIn,
|
||||||
|
spv::BuiltInSampleMask);
|
||||||
|
main_interface_.push_back(input_sample_mask_);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
|
// Framebuffer color attachment outputs.
|
||||||
|
if (!edram_fragment_shader_interlock_) {
|
||||||
|
std::fill(output_or_var_fragment_data_.begin(),
|
||||||
|
output_or_var_fragment_data_.end(), spv::NoResult);
|
||||||
|
static const char* const kFragmentDataOutputNames[] = {
|
||||||
"xe_out_fragment_data_0",
|
"xe_out_fragment_data_0",
|
||||||
"xe_out_fragment_data_1",
|
"xe_out_fragment_data_1",
|
||||||
"xe_out_fragment_data_2",
|
"xe_out_fragment_data_2",
|
||||||
"xe_out_fragment_data_3",
|
"xe_out_fragment_data_3",
|
||||||
};
|
};
|
||||||
uint32_t color_targets_remaining = current_shader().writes_color_targets();
|
uint32_t color_targets_remaining =
|
||||||
|
current_shader().writes_color_targets();
|
||||||
uint32_t color_target_index;
|
uint32_t color_target_index;
|
||||||
while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) {
|
while (
|
||||||
|
xe::bit_scan_forward(color_targets_remaining, &color_target_index)) {
|
||||||
color_targets_remaining &= ~(UINT32_C(1) << color_target_index);
|
color_targets_remaining &= ~(UINT32_C(1) << color_target_index);
|
||||||
spv::Id output_fragment_data_rt = builder_->createVariable(
|
spv::Id output_fragment_data_rt = builder_->createVariable(
|
||||||
spv::NoPrecision, spv::StorageClassOutput, type_float4_,
|
spv::NoPrecision, spv::StorageClassOutput, type_float4_,
|
||||||
kFragmentDataNames[color_target_index]);
|
kFragmentDataOutputNames[color_target_index]);
|
||||||
output_fragment_data_[color_target_index] = output_fragment_data_rt;
|
output_or_var_fragment_data_[color_target_index] =
|
||||||
builder_->addDecoration(output_fragment_data_rt, spv::DecorationLocation,
|
output_fragment_data_rt;
|
||||||
|
builder_->addDecoration(output_fragment_data_rt,
|
||||||
|
spv::DecorationLocation,
|
||||||
int(color_target_index));
|
int(color_target_index));
|
||||||
// Make invariant as pixel shaders may be used for various precise
|
// Make invariant as pixel shaders may be used for various precise
|
||||||
// computations.
|
// computations.
|
||||||
builder_->addDecoration(output_fragment_data_rt, spv::DecorationInvariant);
|
builder_->addDecoration(output_fragment_data_rt,
|
||||||
|
spv::DecorationInvariant);
|
||||||
main_interface_.push_back(output_fragment_data_rt);
|
main_interface_.push_back(output_fragment_data_rt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
|
// Set up pixel killing from within the translated shader without affecting
|
||||||
|
// the control flow (unlike with OpKill), similarly to how pixel killing works
|
||||||
|
// on the Xenos, and also keeping a single critical section exit and return
|
||||||
|
// for safety across different Vulkan implementations with fragment shader
|
||||||
|
// interlock.
|
||||||
|
if (current_shader().kills_pixels()) {
|
||||||
|
if (features_.demote_to_helper_invocation) {
|
||||||
|
// TODO(Triang3l): Promoted to SPIR-V 1.6 - don't add the extension there.
|
||||||
|
builder_->addExtension("SPV_EXT_demote_to_helper_invocation");
|
||||||
|
builder_->addCapability(spv::CapabilityDemoteToHelperInvocationEXT);
|
||||||
|
} else {
|
||||||
|
var_main_kill_pixel_ = builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassFunction, type_bool_,
|
||||||
|
"xe_var_kill_pixel", builder_->makeBoolConstant(false));
|
||||||
|
}
|
||||||
|
// For killing with fragment shader interlock when demotion is supported,
|
||||||
|
// using OpIsHelperInvocationEXT to avoid allocating a variable in addition
|
||||||
|
// to the execution mask GPUs naturally have.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (edram_fragment_shader_interlock_) {
|
||||||
|
// Initialize color output variables with fragment shader interlock.
|
||||||
|
std::fill(output_or_var_fragment_data_.begin(),
|
||||||
|
output_or_var_fragment_data_.end(), spv::NoResult);
|
||||||
|
var_main_fsi_color_written_ = spv::NoResult;
|
||||||
|
uint32_t color_targets_written = current_shader().writes_color_targets();
|
||||||
|
if (color_targets_written) {
|
||||||
|
static const char* const kFragmentDataVariableNames[] = {
|
||||||
|
"xe_var_fragment_data_0",
|
||||||
|
"xe_var_fragment_data_1",
|
||||||
|
"xe_var_fragment_data_2",
|
||||||
|
"xe_var_fragment_data_3",
|
||||||
|
};
|
||||||
|
uint32_t color_targets_remaining = color_targets_written;
|
||||||
|
uint32_t color_target_index;
|
||||||
|
while (
|
||||||
|
xe::bit_scan_forward(color_targets_remaining, &color_target_index)) {
|
||||||
|
color_targets_remaining &= ~(UINT32_C(1) << color_target_index);
|
||||||
|
output_or_var_fragment_data_[color_target_index] =
|
||||||
|
builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassFunction, type_float4_,
|
||||||
|
kFragmentDataVariableNames[color_target_index],
|
||||||
|
const_float4_0_);
|
||||||
|
}
|
||||||
|
var_main_fsi_color_written_ = builder_->createVariable(
|
||||||
|
spv::NoPrecision, spv::StorageClassFunction, type_uint_,
|
||||||
|
"xe_var_fsi_color_written", const_uint_0_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (edram_fragment_shader_interlock_ && FSI_IsDepthStencilEarly()) {
|
||||||
|
spv::Id msaa_samples = LoadMsaaSamplesFromFlags();
|
||||||
|
FSI_LoadSampleMask(msaa_samples);
|
||||||
|
FSI_LoadEdramOffsets(msaa_samples);
|
||||||
|
builder_->createNoResultOp(spv::OpBeginInvocationInterlockEXT);
|
||||||
|
FSI_DepthStencilTest(msaa_samples, false);
|
||||||
|
if (!is_depth_only_fragment_shader_) {
|
||||||
|
// Skip the rest of the shader if the whole quad (due to derivatives) has
|
||||||
|
// failed the depth / stencil test, and there are no depth and stencil
|
||||||
|
// values to conditionally write after running the shader to check if
|
||||||
|
// samples don't additionally need to be discarded.
|
||||||
|
spv::Id quad_needs_execution = builder_->createBinOp(
|
||||||
|
spv::OpINotEqual, type_bool_, main_fsi_sample_mask_, const_uint_0_);
|
||||||
|
// TODO(Triang3l): Use GroupNonUniformQuad operations where supported.
|
||||||
|
// If none of the pixels in the quad passed the depth / stencil test, the
|
||||||
|
// value of (any samples covered ? 1.0f : 0.0f) for the current pixel will
|
||||||
|
// be 0.0f, and since it will be 0.0f in other pixels too, the derivatives
|
||||||
|
// will be zero as well.
|
||||||
|
builder_->addCapability(spv::CapabilityDerivativeControl);
|
||||||
|
// Query the horizontally adjacent pixel.
|
||||||
|
quad_needs_execution = builder_->createBinOp(
|
||||||
|
spv::OpLogicalOr, type_bool_, quad_needs_execution,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpFOrdNotEqual, type_bool_,
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpDPdxFine, type_float_,
|
||||||
|
builder_->createTriOp(spv::OpSelect, type_float_,
|
||||||
|
quad_needs_execution, const_float_1_,
|
||||||
|
const_float_0_)),
|
||||||
|
const_float_0_));
|
||||||
|
// Query the vertically adjacent pair of pixels.
|
||||||
|
quad_needs_execution = builder_->createBinOp(
|
||||||
|
spv::OpLogicalOr, type_bool_, quad_needs_execution,
|
||||||
|
builder_->createBinOp(
|
||||||
|
spv::OpFOrdNotEqual, type_bool_,
|
||||||
|
builder_->createUnaryOp(
|
||||||
|
spv::OpDPdyCoarse, type_float_,
|
||||||
|
builder_->createTriOp(spv::OpSelect, type_float_,
|
||||||
|
quad_needs_execution, const_float_1_,
|
||||||
|
const_float_0_)),
|
||||||
|
const_float_0_));
|
||||||
|
spv::Block& main_fsi_early_depth_stencil_execute_quad =
|
||||||
|
builder_->makeNewBlock();
|
||||||
|
main_fsi_early_depth_stencil_execute_quad_merge_ =
|
||||||
|
&builder_->makeNewBlock();
|
||||||
|
SpirvCreateSelectionMerge(
|
||||||
|
main_fsi_early_depth_stencil_execute_quad_merge_->getId(),
|
||||||
|
spv::SelectionControlDontFlattenMask);
|
||||||
|
builder_->createConditionalBranch(
|
||||||
|
quad_needs_execution, &main_fsi_early_depth_stencil_execute_quad,
|
||||||
|
main_fsi_early_depth_stencil_execute_quad_merge_);
|
||||||
|
builder_->setBuildPoint(&main_fsi_early_depth_stencil_execute_quad);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_depth_only_fragment_shader_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t param_gen_interpolator = GetPsParamGenInterpolator();
|
uint32_t param_gen_interpolator = GetPsParamGenInterpolator();
|
||||||
|
|
||||||
// Zero general-purpose registers to prevent crashes when the game
|
// Zero general-purpose registers to prevent crashes when the game
|
||||||
|
@ -1928,14 +2185,16 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
||||||
var_main_registers_, id_vector_temp_));
|
var_main_registers_, id_vector_temp_));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!edram_fragment_shader_interlock_) {
|
||||||
// Initialize the colors for safety.
|
// Initialize the colors for safety.
|
||||||
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
||||||
spv::Id output_fragment_data_rt = output_fragment_data_[i];
|
spv::Id output_fragment_data_rt = output_or_var_fragment_data_[i];
|
||||||
if (output_fragment_data_rt != spv::NoResult) {
|
if (output_fragment_data_rt != spv::NoResult) {
|
||||||
builder_->createStore(const_float4_0_, output_fragment_data_rt);
|
builder_->createStore(const_float4_0_, output_fragment_data_rt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SpirvShaderTranslator::UpdateExecConditionals(
|
void SpirvShaderTranslator::UpdateExecConditionals(
|
||||||
ParsedExecInstruction::Type type, uint32_t bool_constant_index,
|
ParsedExecInstruction::Type type, uint32_t bool_constant_index,
|
||||||
|
@ -2299,11 +2558,18 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
||||||
assert_true(is_pixel_shader());
|
assert_true(is_pixel_shader());
|
||||||
assert_not_zero(used_write_mask);
|
assert_not_zero(used_write_mask);
|
||||||
assert_true(current_shader().writes_color_target(result.storage_index));
|
assert_true(current_shader().writes_color_target(result.storage_index));
|
||||||
target_pointer = output_fragment_data_[result.storage_index];
|
target_pointer = output_or_var_fragment_data_[result.storage_index];
|
||||||
// May be spv::NoResult if the color output is explicitly removed due to
|
if (edram_fragment_shader_interlock_) {
|
||||||
// an empty write mask without independent blending.
|
assert_true(var_main_fsi_color_written_ != spv::NoResult);
|
||||||
// TODO(Triang3l): Store the alpha of the first output in this case for
|
builder_->createStore(
|
||||||
// alpha test and alpha to coverage.
|
builder_->createBinOp(
|
||||||
|
spv::OpBitwiseOr, type_uint_,
|
||||||
|
builder_->createLoad(var_main_fsi_color_written_,
|
||||||
|
spv::NoPrecision),
|
||||||
|
builder_->makeUintConstant(uint32_t(1)
|
||||||
|
<< result.storage_index)),
|
||||||
|
var_main_fsi_color_written_);
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
// TODO(Triang3l): All storage targets.
|
// TODO(Triang3l): All storage targets.
|
||||||
|
|
|
@ -96,6 +96,9 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_WNotReciprocal_Shift,
|
kSysFlag_WNotReciprocal_Shift,
|
||||||
kSysFlag_PrimitivePolygonal_Shift,
|
kSysFlag_PrimitivePolygonal_Shift,
|
||||||
kSysFlag_PrimitiveLine_Shift,
|
kSysFlag_PrimitiveLine_Shift,
|
||||||
|
kSysFlag_MsaaSamples_Shift,
|
||||||
|
kSysFlag_DepthFloat24_Shift =
|
||||||
|
kSysFlag_MsaaSamples_Shift + xenos::kMsaaSamplesBits,
|
||||||
kSysFlag_AlphaPassIfLess_Shift,
|
kSysFlag_AlphaPassIfLess_Shift,
|
||||||
kSysFlag_AlphaPassIfEqual_Shift,
|
kSysFlag_AlphaPassIfEqual_Shift,
|
||||||
kSysFlag_AlphaPassIfGreater_Shift,
|
kSysFlag_AlphaPassIfGreater_Shift,
|
||||||
|
@ -104,6 +107,26 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_ConvertColor2ToGamma_Shift,
|
kSysFlag_ConvertColor2ToGamma_Shift,
|
||||||
kSysFlag_ConvertColor3ToGamma_Shift,
|
kSysFlag_ConvertColor3ToGamma_Shift,
|
||||||
|
|
||||||
|
kSysFlag_FSIDepthStencil_Shift,
|
||||||
|
kSysFlag_FSIDepthPassIfLess_Shift,
|
||||||
|
kSysFlag_FSIDepthPassIfEqual_Shift,
|
||||||
|
kSysFlag_FSIDepthPassIfGreater_Shift,
|
||||||
|
// 1 to write new depth to the depth buffer, 0 to keep the old one if the
|
||||||
|
// depth test passes.
|
||||||
|
kSysFlag_FSIDepthWrite_Shift,
|
||||||
|
kSysFlag_FSIStencilTest_Shift,
|
||||||
|
// If the depth / stencil test has failed, but resulted in a stencil value
|
||||||
|
// that is different than the one currently in the depth buffer, write it
|
||||||
|
// anyway and don't run the rest of the shader (to check if the sample may
|
||||||
|
// be discarded some way) - use when alpha test and alpha to coverage are
|
||||||
|
// disabled. Ignored by the shader if not applicable to it (like if it has
|
||||||
|
// kill instructions or writes the depth output).
|
||||||
|
// TODO(Triang3l): Investigate replacement with an alpha-to-mask flag,
|
||||||
|
// checking `(flags & (alpha test | alpha to mask)) == (always | disabled)`,
|
||||||
|
// taking into account the potential relation with occlusion queries (but
|
||||||
|
// should be safe at least temporarily).
|
||||||
|
kSysFlag_FSIDepthStencilEarlyWrite_Shift,
|
||||||
|
|
||||||
kSysFlag_Count,
|
kSysFlag_Count,
|
||||||
|
|
||||||
// For HostVertexShaderType kVertex, if fullDrawIndexUint32 is not
|
// For HostVertexShaderType kVertex, if fullDrawIndexUint32 is not
|
||||||
|
@ -127,6 +150,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
|
||||||
kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift,
|
kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift,
|
||||||
kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift,
|
kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift,
|
||||||
|
kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift,
|
||||||
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
|
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
|
||||||
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
|
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
|
||||||
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
|
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
|
||||||
|
@ -134,6 +158,14 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift,
|
kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift,
|
||||||
kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift,
|
kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift,
|
||||||
kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift,
|
kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift,
|
||||||
|
kSysFlag_FSIDepthStencil = 1u << kSysFlag_FSIDepthStencil_Shift,
|
||||||
|
kSysFlag_FSIDepthPassIfLess = 1u << kSysFlag_FSIDepthPassIfLess_Shift,
|
||||||
|
kSysFlag_FSIDepthPassIfEqual = 1u << kSysFlag_FSIDepthPassIfEqual_Shift,
|
||||||
|
kSysFlag_FSIDepthPassIfGreater = 1u << kSysFlag_FSIDepthPassIfGreater_Shift,
|
||||||
|
kSysFlag_FSIDepthWrite = 1u << kSysFlag_FSIDepthWrite_Shift,
|
||||||
|
kSysFlag_FSIStencilTest = 1u << kSysFlag_FSIStencilTest_Shift,
|
||||||
|
kSysFlag_FSIDepthStencilEarlyWrite =
|
||||||
|
1u << kSysFlag_FSIDepthStencilEarlyWrite_Shift,
|
||||||
};
|
};
|
||||||
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
|
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
|
||||||
|
|
||||||
|
@ -171,9 +203,55 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
uint32_t texture_swizzles[16];
|
uint32_t texture_swizzles[16];
|
||||||
|
|
||||||
float alpha_test_reference;
|
float alpha_test_reference;
|
||||||
float padding_alpha_test_reference[3];
|
uint32_t edram_32bpp_tile_pitch_dwords_scaled;
|
||||||
|
uint32_t edram_depth_base_dwords_scaled;
|
||||||
|
float padding_edram_depth_base_dwords_scaled;
|
||||||
|
|
||||||
float color_exp_bias[4];
|
float color_exp_bias[4];
|
||||||
|
|
||||||
|
float edram_poly_offset_front_scale;
|
||||||
|
float edram_poly_offset_back_scale;
|
||||||
|
float edram_poly_offset_front_offset;
|
||||||
|
float edram_poly_offset_back_offset;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
uint32_t edram_stencil_front_reference_masks;
|
||||||
|
uint32_t edram_stencil_front_func_ops;
|
||||||
|
|
||||||
|
uint32_t edram_stencil_back_reference_masks;
|
||||||
|
uint32_t edram_stencil_back_func_ops;
|
||||||
|
};
|
||||||
|
struct {
|
||||||
|
uint32_t edram_stencil_front[2];
|
||||||
|
uint32_t edram_stencil_back[2];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t edram_rt_base_dwords_scaled[4];
|
||||||
|
|
||||||
|
// RT format combined with RenderTargetCache::kPSIColorFormatFlag values
|
||||||
|
// (pass via RenderTargetCache::AddPSIColorFormatFlags).
|
||||||
|
uint32_t edram_rt_format_flags[4];
|
||||||
|
|
||||||
|
// Render target blending options - RB_BLENDCONTROL, with only the relevant
|
||||||
|
// options (factors and operations - AND 0x1FFF1FFF). If 0x00010001
|
||||||
|
// (1 * src + 0 * dst), blending is disabled for the render target.
|
||||||
|
uint32_t edram_rt_blend_factors_ops[4];
|
||||||
|
|
||||||
|
// Format info - mask to apply to the old packed RT data, and to apply as
|
||||||
|
// inverted to the new packed data, before storing (more or less the inverse
|
||||||
|
// of the write mask packed like render target channels). This can be used
|
||||||
|
// to bypass unpacking if blending is not used. If 0 and not blending,
|
||||||
|
// reading the old data from the EDRAM buffer is not required.
|
||||||
|
uint32_t edram_rt_keep_mask[4][2];
|
||||||
|
|
||||||
|
// Format info - values to clamp the color to before blending or storing.
|
||||||
|
// Low color, low alpha, high color, high alpha.
|
||||||
|
float edram_rt_clamp[4][4];
|
||||||
|
|
||||||
|
// The constant blend factor for the respective modes.
|
||||||
|
float edram_blend_constant[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ConstantBuffer : uint32_t {
|
enum ConstantBuffer : uint32_t {
|
||||||
|
@ -248,12 +326,22 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
uint32_t max_storage_buffer_range;
|
uint32_t max_storage_buffer_range;
|
||||||
bool clip_distance;
|
bool clip_distance;
|
||||||
bool cull_distance;
|
bool cull_distance;
|
||||||
|
bool demote_to_helper_invocation;
|
||||||
|
bool fragment_shader_sample_interlock;
|
||||||
bool full_draw_index_uint32;
|
bool full_draw_index_uint32;
|
||||||
bool image_view_format_swizzle;
|
bool image_view_format_swizzle;
|
||||||
bool signed_zero_inf_nan_preserve_float32;
|
bool signed_zero_inf_nan_preserve_float32;
|
||||||
bool denorm_flush_to_zero_float32;
|
bool denorm_flush_to_zero_float32;
|
||||||
};
|
};
|
||||||
SpirvShaderTranslator(const Features& features);
|
|
||||||
|
SpirvShaderTranslator(const Features& features,
|
||||||
|
bool native_2x_msaa_with_attachments,
|
||||||
|
bool native_2x_msaa_no_attachments,
|
||||||
|
bool edram_fragment_shader_interlock)
|
||||||
|
: features_(features),
|
||||||
|
native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments),
|
||||||
|
native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments),
|
||||||
|
edram_fragment_shader_interlock_(edram_fragment_shader_interlock) {}
|
||||||
|
|
||||||
uint64_t GetDefaultVertexShaderModification(
|
uint64_t GetDefaultVertexShaderModification(
|
||||||
uint32_t dynamic_addressable_register_count,
|
uint32_t dynamic_addressable_register_count,
|
||||||
|
@ -277,6 +365,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
features_.max_storage_buffer_range);
|
features_.max_storage_buffer_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Creates a special fragment shader without color outputs - this resets the
|
||||||
|
// state of the translator.
|
||||||
|
std::vector<uint8_t> CreateDepthOnlyFragmentShader();
|
||||||
|
|
||||||
// Common functions useful not only for the translator, but also for EDRAM
|
// Common functions useful not only for the translator, but also for EDRAM
|
||||||
// emulation via conventional render targets.
|
// emulation via conventional render targets.
|
||||||
|
|
||||||
|
@ -385,10 +477,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsExecutionModeEarlyFragmentTests() const {
|
bool IsExecutionModeEarlyFragmentTests() const {
|
||||||
// TODO(Triang3l): Not applicable to fragment shader interlock.
|
|
||||||
return is_pixel_shader() &&
|
return is_pixel_shader() &&
|
||||||
GetSpirvShaderModification().pixel.depth_stencil_mode ==
|
GetSpirvShaderModification().pixel.depth_stencil_mode ==
|
||||||
Modification::DepthStencilMode::kEarlyHint &&
|
Modification::DepthStencilMode::kEarlyHint &&
|
||||||
|
!edram_fragment_shader_interlock_ &&
|
||||||
current_shader().implicit_early_z_write_allowed();
|
current_shader().implicit_early_z_write_allowed();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -528,7 +620,72 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
spv::Id image_unsigned, spv::Id image_signed,
|
spv::Id image_unsigned, spv::Id image_signed,
|
||||||
spv::Id sampler, spv::Id is_all_signed);
|
spv::Id sampler, spv::Id is_all_signed);
|
||||||
|
|
||||||
|
spv::Id LoadMsaaSamplesFromFlags();
|
||||||
|
// Whether it's possible and worth skipping running the translated shader for
|
||||||
|
// 2x2 quads.
|
||||||
|
bool FSI_IsDepthStencilEarly() const {
|
||||||
|
assert_true(edram_fragment_shader_interlock_);
|
||||||
|
return !is_depth_only_fragment_shader_ &&
|
||||||
|
!current_shader().writes_depth() &&
|
||||||
|
!current_shader().is_valid_memexport_used();
|
||||||
|
}
|
||||||
|
void FSI_LoadSampleMask(spv::Id msaa_samples);
|
||||||
|
void FSI_LoadEdramOffsets(spv::Id msaa_samples);
|
||||||
|
// The address must be a signed int. Whether the render target is 64bpp, if
|
||||||
|
// present at all, must be a bool (if it's NoResult, 32bpp will be assumed).
|
||||||
|
spv::Id FSI_AddSampleOffset(spv::Id sample_0_address, uint32_t sample_index,
|
||||||
|
spv::Id is_64bpp = spv::NoResult);
|
||||||
|
// Updates main_fsi_sample_mask_. Must be called outside non-uniform control
|
||||||
|
// flow because of taking derivatives of the fragment depth.
|
||||||
|
void FSI_DepthStencilTest(spv::Id msaa_samples,
|
||||||
|
bool sample_mask_potentially_narrowed_previouly);
|
||||||
|
// Returns the first and the second 32 bits as two uints.
|
||||||
|
std::array<spv::Id, 2> FSI_ClampAndPackColor(spv::Id color_float4,
|
||||||
|
spv::Id format_with_flags);
|
||||||
|
std::array<spv::Id, 4> FSI_UnpackColor(std::array<spv::Id, 2> color_packed,
|
||||||
|
spv::Id format_with_flags);
|
||||||
|
// The bounds must have the same number of components as the color or alpha.
|
||||||
|
spv::Id FSI_FlushNaNClampAndInBlending(spv::Id color_or_alpha,
|
||||||
|
spv::Id is_fixed_point,
|
||||||
|
spv::Id min_value, spv::Id max_value);
|
||||||
|
spv::Id FSI_ApplyColorBlendFactor(spv::Id value, spv::Id is_fixed_point,
|
||||||
|
spv::Id clamp_min_value,
|
||||||
|
spv::Id clamp_max_value, spv::Id factor,
|
||||||
|
spv::Id source_color, spv::Id source_alpha,
|
||||||
|
spv::Id dest_color, spv::Id dest_alpha,
|
||||||
|
spv::Id constant_color,
|
||||||
|
spv::Id constant_alpha);
|
||||||
|
spv::Id FSI_ApplyAlphaBlendFactor(spv::Id value, spv::Id is_fixed_point,
|
||||||
|
spv::Id clamp_min_value,
|
||||||
|
spv::Id clamp_max_value, spv::Id factor,
|
||||||
|
spv::Id source_alpha, spv::Id dest_alpha,
|
||||||
|
spv::Id constant_alpha);
|
||||||
|
// If source_color_clamped, dest_color, constant_color_clamped are
|
||||||
|
// spv::NoResult, will blend the alpha. Otherwise, will blend the color.
|
||||||
|
// The result will be unclamped (color packing is supposed to clamp it).
|
||||||
|
spv::Id FSI_BlendColorOrAlphaWithUnclampedResult(
|
||||||
|
spv::Id is_fixed_point, spv::Id clamp_min_value, spv::Id clamp_max_value,
|
||||||
|
spv::Id source_color_clamped, spv::Id source_alpha_clamped,
|
||||||
|
spv::Id dest_color, spv::Id dest_alpha, spv::Id constant_color_clamped,
|
||||||
|
spv::Id constant_alpha_clamped, spv::Id equation, spv::Id source_factor,
|
||||||
|
spv::Id dest_factor);
|
||||||
|
|
||||||
Features features_;
|
Features features_;
|
||||||
|
bool native_2x_msaa_with_attachments_;
|
||||||
|
bool native_2x_msaa_no_attachments_;
|
||||||
|
|
||||||
|
// For safety with different drivers (even though fragment shader interlock in
|
||||||
|
// SPIR-V only has one control flow requirement - that both begin and end must
|
||||||
|
// be dynamically executed exactly once in this order), adhering to the more
|
||||||
|
// strict control flow limitations of OpenGL (GLSL) fragment shader interlock,
|
||||||
|
// that begin and end are called only on the outermost level of the control
|
||||||
|
// flow of the main function, and that there are no returns before either
|
||||||
|
// (there's a single return from the shader).
|
||||||
|
bool edram_fragment_shader_interlock_;
|
||||||
|
|
||||||
|
// Is currently writing the empty depth-only pixel shader, such as for depth
|
||||||
|
// and stencil testing with fragment shader interlock.
|
||||||
|
bool is_depth_only_fragment_shader_ = false;
|
||||||
|
|
||||||
std::unique_ptr<spv::Builder> builder_;
|
std::unique_ptr<spv::Builder> builder_;
|
||||||
|
|
||||||
|
@ -621,7 +778,23 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
kSystemConstantTextureSwizzledSigns,
|
kSystemConstantTextureSwizzledSigns,
|
||||||
kSystemConstantTextureSwizzles,
|
kSystemConstantTextureSwizzles,
|
||||||
kSystemConstantAlphaTestReference,
|
kSystemConstantAlphaTestReference,
|
||||||
|
kSystemConstantEdram32bppTilePitchDwordsScaled,
|
||||||
|
kSystemConstantEdramDepthBaseDwordsScaled,
|
||||||
kSystemConstantColorExpBias,
|
kSystemConstantColorExpBias,
|
||||||
|
kSystemConstantEdramPolyOffsetFrontScale,
|
||||||
|
kSystemConstantEdramPolyOffsetBackScale,
|
||||||
|
kSystemConstantEdramPolyOffsetFrontOffset,
|
||||||
|
kSystemConstantEdramPolyOffsetBackOffset,
|
||||||
|
kSystemConstantEdramStencilFront,
|
||||||
|
kSystemConstantEdramStencilBack,
|
||||||
|
kSystemConstantEdramRTBaseDwordsScaled,
|
||||||
|
kSystemConstantEdramRTFormatFlags,
|
||||||
|
kSystemConstantEdramRTBlendFactorsOps,
|
||||||
|
// Accessed as float4[2], not float2[4], due to std140 array stride
|
||||||
|
// alignment.
|
||||||
|
kSystemConstantEdramRTKeepMask,
|
||||||
|
kSystemConstantEdramRTClamp,
|
||||||
|
kSystemConstantEdramBlendConstant,
|
||||||
};
|
};
|
||||||
spv::Id uniform_system_constants_;
|
spv::Id uniform_system_constants_;
|
||||||
spv::Id uniform_float_constants_;
|
spv::Id uniform_float_constants_;
|
||||||
|
@ -629,6 +802,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
spv::Id uniform_fetch_constants_;
|
spv::Id uniform_fetch_constants_;
|
||||||
|
|
||||||
spv::Id buffers_shared_memory_;
|
spv::Id buffers_shared_memory_;
|
||||||
|
spv::Id buffer_edram_;
|
||||||
|
|
||||||
// Not using combined images and samplers because
|
// Not using combined images and samplers because
|
||||||
// maxPerStageDescriptorSamplers is often lower than
|
// maxPerStageDescriptorSamplers is often lower than
|
||||||
|
@ -647,6 +821,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
spv::Id input_fragment_coordinates_;
|
spv::Id input_fragment_coordinates_;
|
||||||
// PS, only when needed - bool.
|
// PS, only when needed - bool.
|
||||||
spv::Id input_front_facing_;
|
spv::Id input_front_facing_;
|
||||||
|
// PS, only when needed - int[1].
|
||||||
|
spv::Id input_sample_mask_;
|
||||||
|
|
||||||
// VS output or PS input, only the ones that are needed (spv::NoResult for the
|
// VS output or PS input, only the ones that are needed (spv::NoResult for the
|
||||||
// unneeded interpolators), indexed by the guest interpolator index - float4.
|
// unneeded interpolators), indexed by the guest interpolator index - float4.
|
||||||
|
@ -671,7 +847,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
};
|
};
|
||||||
spv::Id output_per_vertex_;
|
spv::Id output_per_vertex_;
|
||||||
|
|
||||||
std::array<spv::Id, xenos::kMaxColorRenderTargets> output_fragment_data_;
|
// With fragment shader interlock, variables in the main function.
|
||||||
|
// Otherwise, framebuffer color attachment outputs.
|
||||||
|
std::array<spv::Id, xenos::kMaxColorRenderTargets>
|
||||||
|
output_or_var_fragment_data_;
|
||||||
|
|
||||||
std::vector<spv::Id> main_interface_;
|
std::vector<spv::Id> main_interface_;
|
||||||
spv::Function* function_main_;
|
spv::Function* function_main_;
|
||||||
|
@ -698,6 +877,40 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
||||||
spv::Id var_main_registers_;
|
spv::Id var_main_registers_;
|
||||||
// VS only - float3 (special exports).
|
// VS only - float3 (special exports).
|
||||||
spv::Id var_main_point_size_edge_flag_kill_vertex_;
|
spv::Id var_main_point_size_edge_flag_kill_vertex_;
|
||||||
|
// PS, only when needed - bool.
|
||||||
|
spv::Id var_main_kill_pixel_;
|
||||||
|
// PS, only when writing to color render targets with fragment shader
|
||||||
|
// interlock - uint.
|
||||||
|
// Whether color buffers have been written to, if not written on the taken
|
||||||
|
// execution path, don't export according to Direct3D 9 register documentation
|
||||||
|
// (some games rely on this behavior).
|
||||||
|
spv::Id var_main_fsi_color_written_;
|
||||||
|
// Loaded by FSI_LoadSampleMask.
|
||||||
|
// Can be modified on the outermost control flow level in the main function.
|
||||||
|
// 0:3 - Per-sample coverage at the current stage of the shader's execution.
|
||||||
|
// Affected by things like gl_SampleMaskIn, early or late depth /
|
||||||
|
// stencil (always resets bits for failing, no matter if need to defer
|
||||||
|
// writing), alpha to coverage.
|
||||||
|
// 4:7 - Depth write deferred mask - when early depth / stencil resulted in a
|
||||||
|
// different value for the sample (like different stencil if the test
|
||||||
|
// failed), but can't write it before running the shader because it's
|
||||||
|
// not known if the sample will be discarded by the shader, alphatest or
|
||||||
|
// AtoC.
|
||||||
|
// Early depth / stencil rejection of the pixel is possible when both 0:3 and
|
||||||
|
// 4:7 are zero.
|
||||||
|
spv::Id main_fsi_sample_mask_;
|
||||||
|
// Loaded by FSI_LoadEdramOffsets.
|
||||||
|
// Including the depth render target base.
|
||||||
|
spv::Id main_fsi_address_depth_;
|
||||||
|
// Not including the render target base.
|
||||||
|
spv::Id main_fsi_offset_32bpp_;
|
||||||
|
spv::Id main_fsi_offset_64bpp_;
|
||||||
|
// Loaded by FSI_DepthStencilTest for early depth / stencil, the depth /
|
||||||
|
// stencil values to write at the end of the shader if the specified in
|
||||||
|
// main_fsi_sample_mask_ and if the samples were not discarded later after the
|
||||||
|
// early test.
|
||||||
|
std::array<spv::Id, 4> main_fsi_late_write_depth_stencil_;
|
||||||
|
spv::Block* main_fsi_early_depth_stencil_execute_quad_merge_;
|
||||||
spv::Block* main_loop_header_;
|
spv::Block* main_loop_header_;
|
||||||
spv::Block* main_loop_continue_;
|
spv::Block* main_loop_continue_;
|
||||||
spv::Block* main_loop_merge_;
|
spv::Block* main_loop_merge_;
|
||||||
|
|
|
@ -123,7 +123,7 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
|
||||||
: spv::NoType;
|
: spv::NoType;
|
||||||
|
|
||||||
// In case the paired scalar instruction (if processed first) terminates the
|
// In case the paired scalar instruction (if processed first) terminates the
|
||||||
// block (like via OpKill).
|
// block.
|
||||||
EnsureBuildPointAvailable();
|
EnsureBuildPointAvailable();
|
||||||
|
|
||||||
// Lookup table for variants of instructions with similar structure.
|
// Lookup table for variants of instructions with similar structure.
|
||||||
|
@ -838,9 +838,15 @@ spv::Id SpirvShaderTranslator::ProcessVectorAluOperation(
|
||||||
SpirvCreateSelectionMerge(merge_block.getId());
|
SpirvCreateSelectionMerge(merge_block.getId());
|
||||||
builder_->createConditionalBranch(condition, &kill_block, &merge_block);
|
builder_->createConditionalBranch(condition, &kill_block, &merge_block);
|
||||||
builder_->setBuildPoint(&kill_block);
|
builder_->setBuildPoint(&kill_block);
|
||||||
// TODO(Triang3l): Demote to helper invocation to keep derivatives if
|
// Kill without influencing the control flow in the translated shader.
|
||||||
// needed (and return 1 if killed in this case).
|
if (var_main_kill_pixel_ != spv::NoResult) {
|
||||||
builder_->createNoResultOp(spv::OpKill);
|
builder_->createStore(builder_->makeBoolConstant(true),
|
||||||
|
var_main_kill_pixel_);
|
||||||
|
}
|
||||||
|
if (features_.demote_to_helper_invocation) {
|
||||||
|
builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
|
||||||
|
}
|
||||||
|
builder_->createBranch(&merge_block);
|
||||||
builder_->setBuildPoint(&merge_block);
|
builder_->setBuildPoint(&merge_block);
|
||||||
return const_float_0_;
|
return const_float_0_;
|
||||||
}
|
}
|
||||||
|
@ -938,7 +944,7 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
|
||||||
}
|
}
|
||||||
|
|
||||||
// In case the paired vector instruction (if processed first) terminates the
|
// In case the paired vector instruction (if processed first) terminates the
|
||||||
// block (like via OpKill).
|
// block.
|
||||||
EnsureBuildPointAvailable();
|
EnsureBuildPointAvailable();
|
||||||
|
|
||||||
// Lookup table for variants of instructions with similar structure.
|
// Lookup table for variants of instructions with similar structure.
|
||||||
|
@ -1393,9 +1399,15 @@ spv::Id SpirvShaderTranslator::ProcessScalarAluOperation(
|
||||||
SpirvCreateSelectionMerge(merge_block.getId());
|
SpirvCreateSelectionMerge(merge_block.getId());
|
||||||
builder_->createConditionalBranch(condition, &kill_block, &merge_block);
|
builder_->createConditionalBranch(condition, &kill_block, &merge_block);
|
||||||
builder_->setBuildPoint(&kill_block);
|
builder_->setBuildPoint(&kill_block);
|
||||||
// TODO(Triang3l): Demote to helper invocation to keep derivatives if
|
// Kill without influencing the control flow in the translated shader.
|
||||||
// needed (and return 1 if killed in this case).
|
if (var_main_kill_pixel_ != spv::NoResult) {
|
||||||
builder_->createNoResultOp(spv::OpKill);
|
builder_->createStore(builder_->makeBoolConstant(true),
|
||||||
|
var_main_kill_pixel_);
|
||||||
|
}
|
||||||
|
if (features_.demote_to_helper_invocation) {
|
||||||
|
builder_->createNoResultOp(spv::OpDemoteToHelperInvocationEXT);
|
||||||
|
}
|
||||||
|
builder_->createBranch(&merge_block);
|
||||||
builder_->setBuildPoint(&merge_block);
|
builder_->setBuildPoint(&merge_block);
|
||||||
return const_float_0_;
|
return const_float_0_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1898,30 +1898,14 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
builder_->setBuildPoint(&block_dimension_stacked_start);
|
builder_->setBuildPoint(&block_dimension_stacked_start);
|
||||||
if (use_computed_lod) {
|
if (use_computed_lod) {
|
||||||
// Extract 2D gradients for stacked textures which are 2D arrays.
|
// Extract 2D gradients for stacked textures which are 2D arrays.
|
||||||
{
|
uint_vector_temp_.clear();
|
||||||
std::unique_ptr<spv::Instruction> shuffle_op =
|
uint_vector_temp_.reserve(2);
|
||||||
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
|
uint_vector_temp_.push_back(0);
|
||||||
type_float2_,
|
uint_vector_temp_.push_back(1);
|
||||||
spv::OpVectorShuffle);
|
texture_parameters.gradX = builder_->createRvalueSwizzle(
|
||||||
shuffle_op->addIdOperand(gradients_h);
|
spv::NoPrecision, type_float2_, gradients_h, uint_vector_temp_);
|
||||||
shuffle_op->addIdOperand(gradients_h);
|
texture_parameters.gradY = builder_->createRvalueSwizzle(
|
||||||
shuffle_op->addImmediateOperand(0);
|
spv::NoPrecision, type_float2_, gradients_v, uint_vector_temp_);
|
||||||
shuffle_op->addImmediateOperand(1);
|
|
||||||
texture_parameters.gradX = shuffle_op->getResultId();
|
|
||||||
builder_->getBuildPoint()->addInstruction(std::move(shuffle_op));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::unique_ptr<spv::Instruction> shuffle_op =
|
|
||||||
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
|
|
||||||
type_float2_,
|
|
||||||
spv::OpVectorShuffle);
|
|
||||||
shuffle_op->addIdOperand(gradients_v);
|
|
||||||
shuffle_op->addIdOperand(gradients_v);
|
|
||||||
shuffle_op->addImmediateOperand(0);
|
|
||||||
shuffle_op->addImmediateOperand(1);
|
|
||||||
texture_parameters.gradY = shuffle_op->getResultId();
|
|
||||||
builder_->getBuildPoint()->addInstruction(std::move(shuffle_op));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Check if linear filtering is needed.
|
// Check if linear filtering is needed.
|
||||||
bool vol_mag_filter_is_fetch_const =
|
bool vol_mag_filter_is_fetch_const =
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -68,9 +68,6 @@ const VkDescriptorPoolSize
|
||||||
{VK_DESCRIPTOR_TYPE_SAMPLER, kLinkedTypeDescriptorPoolSetCount},
|
{VK_DESCRIPTOR_TYPE_SAMPLER, kLinkedTypeDescriptorPoolSetCount},
|
||||||
};
|
};
|
||||||
|
|
||||||
// No specific reason for 32768 descriptors, just the "too much" amount from
|
|
||||||
// Direct3D 12 PIX warnings. 2x descriptors for textures because of unsigned and
|
|
||||||
// signed bindings.
|
|
||||||
VulkanCommandProcessor::VulkanCommandProcessor(
|
VulkanCommandProcessor::VulkanCommandProcessor(
|
||||||
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
||||||
: CommandProcessor(graphics_system, kernel_state),
|
: CommandProcessor(graphics_system, kernel_state),
|
||||||
|
@ -107,6 +104,32 @@ void VulkanCommandProcessor::TracePlaybackWroteMemory(uint32_t base_ptr,
|
||||||
|
|
||||||
void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {}
|
void VulkanCommandProcessor::RestoreEdramSnapshot(const void* snapshot) {}
|
||||||
|
|
||||||
|
std::string VulkanCommandProcessor::GetWindowTitleText() const {
|
||||||
|
std::ostringstream title;
|
||||||
|
title << "Vulkan";
|
||||||
|
if (render_target_cache_) {
|
||||||
|
switch (render_target_cache_->GetPath()) {
|
||||||
|
case RenderTargetCache::Path::kHostRenderTargets:
|
||||||
|
title << " - FBO";
|
||||||
|
break;
|
||||||
|
case RenderTargetCache::Path::kPixelShaderInterlock:
|
||||||
|
title << " - FSI";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
uint32_t draw_resolution_scale_x =
|
||||||
|
texture_cache_ ? texture_cache_->draw_resolution_scale_x() : 1;
|
||||||
|
uint32_t draw_resolution_scale_y =
|
||||||
|
texture_cache_ ? texture_cache_->draw_resolution_scale_y() : 1;
|
||||||
|
if (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) {
|
||||||
|
title << ' ' << draw_resolution_scale_x << 'x' << draw_resolution_scale_y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
title << " - HEAVILY INCOMPLETE, early development";
|
||||||
|
return title.str();
|
||||||
|
}
|
||||||
|
|
||||||
bool VulkanCommandProcessor::SetupContext() {
|
bool VulkanCommandProcessor::SetupContext() {
|
||||||
if (!CommandProcessor::SetupContext()) {
|
if (!CommandProcessor::SetupContext()) {
|
||||||
XELOGE("Failed to initialize base command processor context");
|
XELOGE("Failed to initialize base command processor context");
|
||||||
|
@ -147,7 +170,7 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
size_t(16384)),
|
size_t(16384)),
|
||||||
size_t(uniform_buffer_alignment)));
|
size_t(uniform_buffer_alignment)));
|
||||||
|
|
||||||
// Descriptor set layouts.
|
// Descriptor set layouts that don't depend on the setup of other subsystems.
|
||||||
VkShaderStageFlags guest_shader_stages =
|
VkShaderStageFlags guest_shader_stages =
|
||||||
guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT;
|
guest_shader_vertex_stages_ | VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
// Empty.
|
// Empty.
|
||||||
|
@ -164,37 +187,6 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
XELOGE("Failed to create an empty Vulkan descriptor set layout");
|
XELOGE("Failed to create an empty Vulkan descriptor set layout");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Shared memory and EDRAM.
|
|
||||||
uint32_t shared_memory_binding_count_log2 =
|
|
||||||
SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2(
|
|
||||||
provider.device_properties().limits.maxStorageBufferRange);
|
|
||||||
uint32_t shared_memory_binding_count = UINT32_C(1)
|
|
||||||
<< shared_memory_binding_count_log2;
|
|
||||||
VkDescriptorSetLayoutBinding
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram[1];
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram[0].binding = 0;
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorType =
|
|
||||||
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram[0].descriptorCount =
|
|
||||||
shared_memory_binding_count;
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram[0].stageFlags =
|
|
||||||
guest_shader_stages;
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram[0].pImmutableSamplers =
|
|
||||||
nullptr;
|
|
||||||
// TODO(Triang3l): EDRAM storage image binding for the fragment shader
|
|
||||||
// interlocks case.
|
|
||||||
descriptor_set_layout_create_info.bindingCount = uint32_t(
|
|
||||||
xe::countof(descriptor_set_layout_bindings_shared_memory_and_edram));
|
|
||||||
descriptor_set_layout_create_info.pBindings =
|
|
||||||
descriptor_set_layout_bindings_shared_memory_and_edram;
|
|
||||||
if (dfn.vkCreateDescriptorSetLayout(
|
|
||||||
device, &descriptor_set_layout_create_info, nullptr,
|
|
||||||
&descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) {
|
|
||||||
XELOGE(
|
|
||||||
"Failed to create a Vulkan descriptor set layout for the shared memory "
|
|
||||||
"and the EDRAM");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Guest draw constants.
|
// Guest draw constants.
|
||||||
VkDescriptorSetLayoutBinding descriptor_set_layout_bindings_constants
|
VkDescriptorSetLayoutBinding descriptor_set_layout_bindings_constants
|
||||||
[SpirvShaderTranslator::kConstantBufferCount] = {};
|
[SpirvShaderTranslator::kConstantBufferCount] = {};
|
||||||
|
@ -290,16 +282,70 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t shared_memory_binding_count_log2 =
|
||||||
|
SpirvShaderTranslator::GetSharedMemoryStorageBufferCountLog2(
|
||||||
|
provider.device_properties().limits.maxStorageBufferRange);
|
||||||
|
uint32_t shared_memory_binding_count = UINT32_C(1)
|
||||||
|
<< shared_memory_binding_count_log2;
|
||||||
|
|
||||||
// Requires the transient descriptor set layouts.
|
// Requires the transient descriptor set layouts.
|
||||||
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
|
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
|
||||||
// supports resolution scaling.
|
// supports resolution scaling.
|
||||||
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
||||||
*register_file_, *memory_, trace_writer_, 1, 1, *this);
|
*register_file_, *memory_, trace_writer_, 1, 1, *this);
|
||||||
if (!render_target_cache_->Initialize()) {
|
if (!render_target_cache_->Initialize(shared_memory_binding_count)) {
|
||||||
XELOGE("Failed to initialize the render target cache");
|
XELOGE("Failed to initialize the render target cache");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shared memory and EDRAM descriptor set layout.
|
||||||
|
bool edram_fragment_shader_interlock =
|
||||||
|
render_target_cache_->GetPath() ==
|
||||||
|
RenderTargetCache::Path::kPixelShaderInterlock;
|
||||||
|
VkDescriptorSetLayoutBinding
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[2];
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[0].binding = 0;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[0].descriptorType =
|
||||||
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[0].descriptorCount =
|
||||||
|
shared_memory_binding_count;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[0].stageFlags =
|
||||||
|
guest_shader_stages;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[0].pImmutableSamplers =
|
||||||
|
nullptr;
|
||||||
|
VkDescriptorSetLayoutCreateInfo
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info.sType =
|
||||||
|
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info.pNext = nullptr;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info.flags = 0;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info.pBindings =
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings;
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
// EDRAM.
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[1].binding = 1;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[1].descriptorType =
|
||||||
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[1].descriptorCount =
|
||||||
|
1;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[1].stageFlags =
|
||||||
|
VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_bindings[1]
|
||||||
|
.pImmutableSamplers = nullptr;
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info.bindingCount = 2;
|
||||||
|
} else {
|
||||||
|
shared_memory_and_edram_descriptor_set_layout_create_info.bindingCount = 1;
|
||||||
|
}
|
||||||
|
if (dfn.vkCreateDescriptorSetLayout(
|
||||||
|
device, &shared_memory_and_edram_descriptor_set_layout_create_info,
|
||||||
|
nullptr,
|
||||||
|
&descriptor_set_layout_shared_memory_and_edram_) != VK_SUCCESS) {
|
||||||
|
XELOGE(
|
||||||
|
"Failed to create a Vulkan descriptor set layout for the shared memory "
|
||||||
|
"and the EDRAM");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
pipeline_cache_ = std::make_unique<VulkanPipelineCache>(
|
pipeline_cache_ = std::make_unique<VulkanPipelineCache>(
|
||||||
*this, *register_file_, *render_target_cache_,
|
*this, *register_file_, *render_target_cache_,
|
||||||
guest_shader_vertex_stages_);
|
guest_shader_vertex_stages_);
|
||||||
|
@ -321,9 +367,8 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
// Shared memory and EDRAM common bindings.
|
// Shared memory and EDRAM common bindings.
|
||||||
VkDescriptorPoolSize descriptor_pool_sizes[1];
|
VkDescriptorPoolSize descriptor_pool_sizes[1];
|
||||||
descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
descriptor_pool_sizes[0].descriptorCount = shared_memory_binding_count;
|
descriptor_pool_sizes[0].descriptorCount =
|
||||||
// TODO(Triang3l): EDRAM storage image binding for the fragment shader
|
shared_memory_binding_count + uint32_t(edram_fragment_shader_interlock);
|
||||||
// interlocks case.
|
|
||||||
VkDescriptorPoolCreateInfo descriptor_pool_create_info;
|
VkDescriptorPoolCreateInfo descriptor_pool_create_info;
|
||||||
descriptor_pool_create_info.sType =
|
descriptor_pool_create_info.sType =
|
||||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||||
|
@ -370,20 +415,45 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
shared_memory_binding_range * i;
|
shared_memory_binding_range * i;
|
||||||
shared_memory_descriptor_buffer_info.range = shared_memory_binding_range;
|
shared_memory_descriptor_buffer_info.range = shared_memory_binding_range;
|
||||||
}
|
}
|
||||||
VkWriteDescriptorSet write_descriptor_sets[1];
|
VkWriteDescriptorSet write_descriptor_sets[2];
|
||||||
write_descriptor_sets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
VkWriteDescriptorSet& write_descriptor_set_shared_memory =
|
||||||
write_descriptor_sets[0].pNext = nullptr;
|
write_descriptor_sets[0];
|
||||||
write_descriptor_sets[0].dstSet = shared_memory_and_edram_descriptor_set_;
|
write_descriptor_set_shared_memory.sType =
|
||||||
write_descriptor_sets[0].dstBinding = 0;
|
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||||
write_descriptor_sets[0].dstArrayElement = 0;
|
write_descriptor_set_shared_memory.pNext = nullptr;
|
||||||
write_descriptor_sets[0].descriptorCount = shared_memory_binding_count;
|
write_descriptor_set_shared_memory.dstSet =
|
||||||
write_descriptor_sets[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
shared_memory_and_edram_descriptor_set_;
|
||||||
write_descriptor_sets[0].pImageInfo = nullptr;
|
write_descriptor_set_shared_memory.dstBinding = 0;
|
||||||
write_descriptor_sets[0].pBufferInfo = shared_memory_descriptor_buffers_info;
|
write_descriptor_set_shared_memory.dstArrayElement = 0;
|
||||||
write_descriptor_sets[0].pTexelBufferView = nullptr;
|
write_descriptor_set_shared_memory.descriptorCount =
|
||||||
// TODO(Triang3l): EDRAM storage image binding for the fragment shader
|
shared_memory_binding_count;
|
||||||
// interlocks case.
|
write_descriptor_set_shared_memory.descriptorType =
|
||||||
dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr);
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
|
write_descriptor_set_shared_memory.pImageInfo = nullptr;
|
||||||
|
write_descriptor_set_shared_memory.pBufferInfo =
|
||||||
|
shared_memory_descriptor_buffers_info;
|
||||||
|
write_descriptor_set_shared_memory.pTexelBufferView = nullptr;
|
||||||
|
VkDescriptorBufferInfo edram_descriptor_buffer_info;
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
edram_descriptor_buffer_info.buffer = render_target_cache_->edram_buffer();
|
||||||
|
edram_descriptor_buffer_info.offset = 0;
|
||||||
|
edram_descriptor_buffer_info.range = VK_WHOLE_SIZE;
|
||||||
|
VkWriteDescriptorSet& write_descriptor_set_edram = write_descriptor_sets[1];
|
||||||
|
write_descriptor_set_edram.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||||
|
write_descriptor_set_edram.pNext = nullptr;
|
||||||
|
write_descriptor_set_edram.dstSet = shared_memory_and_edram_descriptor_set_;
|
||||||
|
write_descriptor_set_edram.dstBinding = 1;
|
||||||
|
write_descriptor_set_edram.dstArrayElement = 0;
|
||||||
|
write_descriptor_set_edram.descriptorCount = 1;
|
||||||
|
write_descriptor_set_edram.descriptorType =
|
||||||
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||||
|
write_descriptor_set_edram.pImageInfo = nullptr;
|
||||||
|
write_descriptor_set_edram.pBufferInfo = &edram_descriptor_buffer_info;
|
||||||
|
write_descriptor_set_edram.pTexelBufferView = nullptr;
|
||||||
|
}
|
||||||
|
dfn.vkUpdateDescriptorSets(device,
|
||||||
|
1 + uint32_t(edram_fragment_shader_interlock),
|
||||||
|
write_descriptor_sets, 0, nullptr);
|
||||||
|
|
||||||
// Swap objects.
|
// Swap objects.
|
||||||
|
|
||||||
|
@ -1042,6 +1112,9 @@ void VulkanCommandProcessor::ShutdownContext() {
|
||||||
}
|
}
|
||||||
descriptor_set_layouts_textures_.clear();
|
descriptor_set_layouts_textures_.clear();
|
||||||
|
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(
|
||||||
|
dfn.vkDestroyDescriptorSetLayout, device,
|
||||||
|
descriptor_set_layout_shared_memory_and_edram_);
|
||||||
for (VkDescriptorSetLayout& descriptor_set_layout_single_transient :
|
for (VkDescriptorSetLayout& descriptor_set_layout_single_transient :
|
||||||
descriptor_set_layouts_single_transient_) {
|
descriptor_set_layouts_single_transient_) {
|
||||||
ui::vulkan::util::DestroyAndNullHandle(
|
ui::vulkan::util::DestroyAndNullHandle(
|
||||||
|
@ -1051,9 +1124,6 @@ void VulkanCommandProcessor::ShutdownContext() {
|
||||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
||||||
device,
|
device,
|
||||||
descriptor_set_layout_constants_);
|
descriptor_set_layout_constants_);
|
||||||
ui::vulkan::util::DestroyAndNullHandle(
|
|
||||||
dfn.vkDestroyDescriptorSetLayout, device,
|
|
||||||
descriptor_set_layout_shared_memory_and_edram_);
|
|
||||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
||||||
device, descriptor_set_layout_empty_);
|
device, descriptor_set_layout_empty_);
|
||||||
|
|
||||||
|
@ -2415,7 +2485,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
// Update system constants before uploading them.
|
// Update system constants before uploading them.
|
||||||
UpdateSystemConstantValues(primitive_polygonal, primitive_processing_result,
|
UpdateSystemConstantValues(primitive_polygonal, primitive_processing_result,
|
||||||
shader_32bit_index_dma, viewport_info,
|
shader_32bit_index_dma, viewport_info,
|
||||||
used_texture_mask);
|
used_texture_mask, normalized_depth_control,
|
||||||
|
normalized_color_mask);
|
||||||
|
|
||||||
// Update uniform buffers and descriptor sets after binding the pipeline with
|
// Update uniform buffers and descriptor sets after binding the pipeline with
|
||||||
// the new layout.
|
// the new layout.
|
||||||
|
@ -2475,6 +2546,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||||
// After all commands that may dispatch, copy or insert barriers, submit the
|
// After all commands that may dispatch, copy or insert barriers, submit the
|
||||||
// barriers (may end the render pass), and (re)enter the render pass before
|
// barriers (may end the render pass), and (re)enter the render pass before
|
||||||
// drawing.
|
// drawing.
|
||||||
|
// TODO(Triang3l): Handle disabled variableMultisampleRate by restarting the
|
||||||
|
// render pass with no attachments if the sample count becomes different.
|
||||||
SubmitBarriersAndEnterRenderTargetCacheRenderPass(
|
SubmitBarriersAndEnterRenderTargetCacheRenderPass(
|
||||||
render_target_cache_->last_update_render_pass(),
|
render_target_cache_->last_update_render_pass(),
|
||||||
render_target_cache_->last_update_framebuffer());
|
render_target_cache_->last_update_framebuffer());
|
||||||
|
@ -3194,9 +3267,9 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
scissor_rect.extent.height = scissor.extent[1];
|
scissor_rect.extent.height = scissor.extent[1];
|
||||||
SetScissor(scissor_rect);
|
SetScissor(scissor_rect);
|
||||||
|
|
||||||
|
if (render_target_cache_->GetPath() ==
|
||||||
|
RenderTargetCache::Path::kHostRenderTargets) {
|
||||||
// Depth bias.
|
// Depth bias.
|
||||||
// TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB
|
|
||||||
// implementation.
|
|
||||||
float depth_bias_constant_factor, depth_bias_slope_factor;
|
float depth_bias_constant_factor, depth_bias_slope_factor;
|
||||||
draw_util::GetPreferredFacePolygonOffset(regs, primitive_polygonal,
|
draw_util::GetPreferredFacePolygonOffset(regs, primitive_polygonal,
|
||||||
depth_bias_slope_factor,
|
depth_bias_slope_factor,
|
||||||
|
@ -3207,9 +3280,9 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
? draw_util::kD3D10PolygonOffsetFactorUnorm24
|
? draw_util::kD3D10PolygonOffsetFactorUnorm24
|
||||||
: draw_util::kD3D10PolygonOffsetFactorFloat24;
|
: draw_util::kD3D10PolygonOffsetFactorFloat24;
|
||||||
// With non-square resolution scaling, make sure the worst-case impact is
|
// With non-square resolution scaling, make sure the worst-case impact is
|
||||||
// reverted (slope only along the scaled axis), thus max. More bias is better
|
// reverted (slope only along the scaled axis), thus max. More bias is
|
||||||
// than less bias, because less bias means Z fighting with the background is
|
// better than less bias, because less bias means Z fighting with the
|
||||||
// more likely.
|
// background is more likely.
|
||||||
depth_bias_slope_factor *=
|
depth_bias_slope_factor *=
|
||||||
xenos::kPolygonOffsetScaleSubpixelUnit *
|
xenos::kPolygonOffsetScaleSubpixelUnit *
|
||||||
float(std::max(render_target_cache_->draw_resolution_scale_x(),
|
float(std::max(render_target_cache_->draw_resolution_scale_x(),
|
||||||
|
@ -3249,12 +3322,12 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
|
|
||||||
// Stencil masks and references.
|
// Stencil masks and references.
|
||||||
// Due to pretty complex conditions involving registers not directly related
|
// Due to pretty complex conditions involving registers not directly related
|
||||||
// to stencil (primitive type, culling), changing the values only when stencil
|
// to stencil (primitive type, culling), changing the values only when
|
||||||
// is actually needed. However, due to the way dynamic state needs to be set
|
// stencil is actually needed. However, due to the way dynamic state needs
|
||||||
// in Vulkan, which doesn't take into account whether the state actually has
|
// to be set in Vulkan, which doesn't take into account whether the state
|
||||||
// effect on drawing, and because the masks and the references are always
|
// actually has effect on drawing, and because the masks and the references
|
||||||
// dynamic in Xenia guest pipelines, they must be set in the command buffer
|
// are always dynamic in Xenia guest pipelines, they must be set in the
|
||||||
// before any draw.
|
// command buffer before any draw.
|
||||||
if (normalized_depth_control.stencil_enable) {
|
if (normalized_depth_control.stencil_enable) {
|
||||||
Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg;
|
Register stencil_ref_mask_front_reg, stencil_ref_mask_back_reg;
|
||||||
if (primitive_polygonal && normalized_depth_control.backface_enable) {
|
if (primitive_polygonal && normalized_depth_control.backface_enable) {
|
||||||
|
@ -3288,13 +3361,15 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
stencil_ref_mask_front.stencilmask;
|
stencil_ref_mask_front.stencilmask;
|
||||||
dynamic_stencil_compare_mask_front_ = stencil_ref_mask_front.stencilmask;
|
dynamic_stencil_compare_mask_front_ = stencil_ref_mask_front.stencilmask;
|
||||||
dynamic_stencil_compare_mask_back_update_needed_ |=
|
dynamic_stencil_compare_mask_back_update_needed_ |=
|
||||||
dynamic_stencil_compare_mask_back_ != stencil_ref_mask_back.stencilmask;
|
dynamic_stencil_compare_mask_back_ !=
|
||||||
|
stencil_ref_mask_back.stencilmask;
|
||||||
dynamic_stencil_compare_mask_back_ = stencil_ref_mask_back.stencilmask;
|
dynamic_stencil_compare_mask_back_ = stencil_ref_mask_back.stencilmask;
|
||||||
// Write mask.
|
// Write mask.
|
||||||
dynamic_stencil_write_mask_front_update_needed_ |=
|
dynamic_stencil_write_mask_front_update_needed_ |=
|
||||||
dynamic_stencil_write_mask_front_ !=
|
dynamic_stencil_write_mask_front_ !=
|
||||||
stencil_ref_mask_front.stencilwritemask;
|
stencil_ref_mask_front.stencilwritemask;
|
||||||
dynamic_stencil_write_mask_front_ = stencil_ref_mask_front.stencilwritemask;
|
dynamic_stencil_write_mask_front_ =
|
||||||
|
stencil_ref_mask_front.stencilwritemask;
|
||||||
dynamic_stencil_write_mask_back_update_needed_ |=
|
dynamic_stencil_write_mask_back_update_needed_ |=
|
||||||
dynamic_stencil_write_mask_back_ !=
|
dynamic_stencil_write_mask_back_ !=
|
||||||
stencil_ref_mask_back.stencilwritemask;
|
stencil_ref_mask_back.stencilwritemask;
|
||||||
|
@ -3307,14 +3382,15 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
dynamic_stencil_reference_back_ != stencil_ref_mask_back.stencilref;
|
dynamic_stencil_reference_back_ != stencil_ref_mask_back.stencilref;
|
||||||
dynamic_stencil_reference_back_ = stencil_ref_mask_back.stencilref;
|
dynamic_stencil_reference_back_ = stencil_ref_mask_back.stencilref;
|
||||||
}
|
}
|
||||||
// Using VK_STENCIL_FACE_FRONT_AND_BACK for higher safety when running on the
|
// Using VK_STENCIL_FACE_FRONT_AND_BACK for higher safety when running on
|
||||||
// Vulkan portability subset without separateStencilMaskRef.
|
// the Vulkan portability subset without separateStencilMaskRef.
|
||||||
if (dynamic_stencil_compare_mask_front_update_needed_ ||
|
if (dynamic_stencil_compare_mask_front_update_needed_ ||
|
||||||
dynamic_stencil_compare_mask_back_update_needed_) {
|
dynamic_stencil_compare_mask_back_update_needed_) {
|
||||||
if (dynamic_stencil_compare_mask_front_ ==
|
if (dynamic_stencil_compare_mask_front_ ==
|
||||||
dynamic_stencil_compare_mask_back_) {
|
dynamic_stencil_compare_mask_back_) {
|
||||||
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
||||||
VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_compare_mask_front_);
|
VK_STENCIL_FACE_FRONT_AND_BACK,
|
||||||
|
dynamic_stencil_compare_mask_front_);
|
||||||
} else {
|
} else {
|
||||||
if (dynamic_stencil_compare_mask_front_update_needed_) {
|
if (dynamic_stencil_compare_mask_front_update_needed_) {
|
||||||
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
deferred_command_buffer_.CmdVkSetStencilCompareMask(
|
||||||
|
@ -3330,7 +3406,8 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
}
|
}
|
||||||
if (dynamic_stencil_write_mask_front_update_needed_ ||
|
if (dynamic_stencil_write_mask_front_update_needed_ ||
|
||||||
dynamic_stencil_write_mask_back_update_needed_) {
|
dynamic_stencil_write_mask_back_update_needed_) {
|
||||||
if (dynamic_stencil_write_mask_front_ == dynamic_stencil_write_mask_back_) {
|
if (dynamic_stencil_write_mask_front_ ==
|
||||||
|
dynamic_stencil_write_mask_back_) {
|
||||||
deferred_command_buffer_.CmdVkSetStencilWriteMask(
|
deferred_command_buffer_.CmdVkSetStencilWriteMask(
|
||||||
VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_write_mask_front_);
|
VK_STENCIL_FACE_FRONT_AND_BACK, dynamic_stencil_write_mask_front_);
|
||||||
} else {
|
} else {
|
||||||
|
@ -3364,6 +3441,7 @@ void VulkanCommandProcessor::UpdateDynamicState(
|
||||||
dynamic_stencil_reference_front_update_needed_ = false;
|
dynamic_stencil_reference_front_update_needed_ = false;
|
||||||
dynamic_stencil_reference_back_update_needed_ = false;
|
dynamic_stencil_reference_back_update_needed_ = false;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): VK_EXT_extended_dynamic_state and
|
// TODO(Triang3l): VK_EXT_extended_dynamic_state and
|
||||||
// VK_EXT_extended_dynamic_state2.
|
// VK_EXT_extended_dynamic_state2.
|
||||||
|
@ -3373,23 +3451,67 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||||
bool primitive_polygonal,
|
bool primitive_polygonal,
|
||||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||||
bool shader_32bit_index_dma, const draw_util::ViewportInfo& viewport_info,
|
bool shader_32bit_index_dma, const draw_util::ViewportInfo& viewport_info,
|
||||||
uint32_t used_texture_mask) {
|
uint32_t used_texture_mask, reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||||
|
uint32_t normalized_color_mask) {
|
||||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
const RegisterFile& regs = *register_file_;
|
const RegisterFile& regs = *register_file_;
|
||||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||||
|
auto pa_su_sc_mode_cntl = regs.Get<reg::PA_SU_SC_MODE_CNTL>();
|
||||||
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32;
|
||||||
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
auto rb_colorcontrol = regs.Get<reg::RB_COLORCONTROL>();
|
||||||
|
auto rb_depth_info = regs.Get<reg::RB_DEPTH_INFO>();
|
||||||
|
auto rb_stencilrefmask = regs.Get<reg::RB_STENCILREFMASK>();
|
||||||
|
auto rb_stencilrefmask_bf =
|
||||||
|
regs.Get<reg::RB_STENCILREFMASK>(XE_GPU_REG_RB_STENCILREFMASK_BF);
|
||||||
|
auto rb_surface_info = regs.Get<reg::RB_SURFACE_INFO>();
|
||||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||||
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
||||||
|
|
||||||
// Get the color info register values for each render target.
|
bool edram_fragment_shader_interlock =
|
||||||
|
render_target_cache_->GetPath() ==
|
||||||
|
RenderTargetCache::Path::kPixelShaderInterlock;
|
||||||
|
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
|
||||||
|
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
|
||||||
|
|
||||||
|
// Get the color info register values for each render target. Also, for FSI,
|
||||||
|
// exclude components that don't exist in the format from the write mask.
|
||||||
|
// Don't exclude fully overlapping render targets, however - two render
|
||||||
|
// targets with the same base address are used in the lighting pass of
|
||||||
|
// 4D5307E6, for example, with the needed one picked with dynamic control
|
||||||
|
// flow.
|
||||||
reg::RB_COLOR_INFO color_infos[xenos::kMaxColorRenderTargets];
|
reg::RB_COLOR_INFO color_infos[xenos::kMaxColorRenderTargets];
|
||||||
|
float rt_clamp[4][4];
|
||||||
|
// Two UINT32_MAX if no components actually existing in the RT are written.
|
||||||
|
uint32_t rt_keep_masks[4][2];
|
||||||
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
||||||
color_infos[i] = regs.Get<reg::RB_COLOR_INFO>(
|
auto color_info = regs.Get<reg::RB_COLOR_INFO>(
|
||||||
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
reg::RB_COLOR_INFO::rt_register_indices[i]);
|
||||||
|
color_infos[i] = color_info;
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
RenderTargetCache::GetPSIColorFormatInfo(
|
||||||
|
color_info.color_format, (normalized_color_mask >> (i * 4)) & 0b1111,
|
||||||
|
rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3],
|
||||||
|
rt_keep_masks[i][0], rt_keep_masks[i][1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disable depth and stencil if it aliases a color render target (for
|
||||||
|
// instance, during the XBLA logo in 58410954, though depth writing is already
|
||||||
|
// disabled there).
|
||||||
|
bool depth_stencil_enabled = normalized_depth_control.stencil_enable ||
|
||||||
|
normalized_depth_control.z_enable;
|
||||||
|
if (edram_fragment_shader_interlock && depth_stencil_enabled) {
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (rb_depth_info.depth_base == color_infos[i].color_base &&
|
||||||
|
(rt_keep_masks[i][0] != UINT32_MAX ||
|
||||||
|
rt_keep_masks[i][1] != UINT32_MAX)) {
|
||||||
|
depth_stencil_enabled = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool dirty = false;
|
bool dirty = false;
|
||||||
|
@ -3433,6 +3555,13 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||||
if (draw_util::IsPrimitiveLine(regs)) {
|
if (draw_util::IsPrimitiveLine(regs)) {
|
||||||
flags |= SpirvShaderTranslator::kSysFlag_PrimitiveLine;
|
flags |= SpirvShaderTranslator::kSysFlag_PrimitiveLine;
|
||||||
}
|
}
|
||||||
|
// MSAA sample count.
|
||||||
|
flags |= uint32_t(rb_surface_info.msaa_samples)
|
||||||
|
<< SpirvShaderTranslator::kSysFlag_MsaaSamples_Shift;
|
||||||
|
// Depth format.
|
||||||
|
if (rb_depth_info.depth_format == xenos::DepthRenderTargetFormat::kD24FS8) {
|
||||||
|
flags |= SpirvShaderTranslator::kSysFlag_DepthFloat24;
|
||||||
|
}
|
||||||
// Alpha test.
|
// Alpha test.
|
||||||
xenos::CompareFunction alpha_test_function =
|
xenos::CompareFunction alpha_test_function =
|
||||||
rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func
|
rb_colorcontrol.alpha_test_enable ? rb_colorcontrol.alpha_func
|
||||||
|
@ -3447,6 +3576,30 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||||
flags |= SpirvShaderTranslator::kSysFlag_ConvertColor0ToGamma << i;
|
flags |= SpirvShaderTranslator::kSysFlag_ConvertColor0ToGamma << i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (edram_fragment_shader_interlock && depth_stencil_enabled) {
|
||||||
|
flags |= SpirvShaderTranslator::kSysFlag_FSIDepthStencil;
|
||||||
|
if (normalized_depth_control.z_enable) {
|
||||||
|
flags |= uint32_t(normalized_depth_control.zfunc)
|
||||||
|
<< SpirvShaderTranslator::kSysFlag_FSIDepthPassIfLess_Shift;
|
||||||
|
if (normalized_depth_control.z_write_enable) {
|
||||||
|
flags |= SpirvShaderTranslator::kSysFlag_FSIDepthWrite;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// In case stencil is used without depth testing - always pass, and
|
||||||
|
// don't modify the stored depth.
|
||||||
|
flags |= SpirvShaderTranslator::kSysFlag_FSIDepthPassIfLess |
|
||||||
|
SpirvShaderTranslator::kSysFlag_FSIDepthPassIfEqual |
|
||||||
|
SpirvShaderTranslator::kSysFlag_FSIDepthPassIfGreater;
|
||||||
|
}
|
||||||
|
if (normalized_depth_control.stencil_enable) {
|
||||||
|
flags |= SpirvShaderTranslator::kSysFlag_FSIStencilTest;
|
||||||
|
}
|
||||||
|
// Hint - if not applicable to the shader, will not have effect.
|
||||||
|
if (alpha_test_function == xenos::CompareFunction::kAlways &&
|
||||||
|
!rb_colorcontrol.alpha_to_mask_enable) {
|
||||||
|
flags |= SpirvShaderTranslator::kSysFlag_FSIDepthStencilEarlyWrite;
|
||||||
|
}
|
||||||
|
}
|
||||||
dirty |= system_constants_.flags != flags;
|
dirty |= system_constants_.flags != flags;
|
||||||
system_constants_.flags = flags;
|
system_constants_.flags = flags;
|
||||||
|
|
||||||
|
@ -3506,10 +3659,10 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||||
// to radius conversion to avoid multiplying the per-vertex diameter by an
|
// to radius conversion to avoid multiplying the per-vertex diameter by an
|
||||||
// additional constant in the shader.
|
// additional constant in the shader.
|
||||||
float point_screen_diameter_to_ndc_radius_x =
|
float point_screen_diameter_to_ndc_radius_x =
|
||||||
(/* 0.5f * 2.0f * */ float(texture_cache_->draw_resolution_scale_x())) /
|
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) /
|
||||||
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
std::max(viewport_info.xy_extent[0], uint32_t(1));
|
||||||
float point_screen_diameter_to_ndc_radius_y =
|
float point_screen_diameter_to_ndc_radius_y =
|
||||||
(/* 0.5f * 2.0f * */ float(texture_cache_->draw_resolution_scale_y())) /
|
(/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) /
|
||||||
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
std::max(viewport_info.xy_extent[1], uint32_t(1));
|
||||||
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] !=
|
||||||
point_screen_diameter_to_ndc_radius_x;
|
point_screen_diameter_to_ndc_radius_x;
|
||||||
|
@ -3574,7 +3727,25 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||||
dirty |= system_constants_.alpha_test_reference != rb_alpha_ref;
|
dirty |= system_constants_.alpha_test_reference != rb_alpha_ref;
|
||||||
system_constants_.alpha_test_reference = rb_alpha_ref;
|
system_constants_.alpha_test_reference = rb_alpha_ref;
|
||||||
|
|
||||||
// Color exponent bias.
|
uint32_t edram_tile_dwords_scaled =
|
||||||
|
xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples *
|
||||||
|
(draw_resolution_scale_x * draw_resolution_scale_y);
|
||||||
|
|
||||||
|
// EDRAM pitch for FSI render target writing.
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
// Align, then multiply by 32bpp tile size in dwords.
|
||||||
|
uint32_t edram_32bpp_tile_pitch_dwords_scaled =
|
||||||
|
((rb_surface_info.surface_pitch *
|
||||||
|
(rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X ? 2 : 1)) +
|
||||||
|
(xenos::kEdramTileWidthSamples - 1)) /
|
||||||
|
xenos::kEdramTileWidthSamples * edram_tile_dwords_scaled;
|
||||||
|
dirty |= system_constants_.edram_32bpp_tile_pitch_dwords_scaled !=
|
||||||
|
edram_32bpp_tile_pitch_dwords_scaled;
|
||||||
|
system_constants_.edram_32bpp_tile_pitch_dwords_scaled =
|
||||||
|
edram_32bpp_tile_pitch_dwords_scaled;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Color exponent bias and FSI render target writing.
|
||||||
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) {
|
||||||
reg::RB_COLOR_INFO color_info = color_infos[i];
|
reg::RB_COLOR_INFO color_info = color_infos[i];
|
||||||
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
||||||
|
@ -3595,6 +3766,148 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||||
UINT32_C(0x3F800000) + (color_exp_bias << 23);
|
UINT32_C(0x3F800000) + (color_exp_bias << 23);
|
||||||
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
|
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale;
|
||||||
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
|
system_constants_.color_exp_bias[i] = color_exp_bias_scale;
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
dirty |=
|
||||||
|
system_constants_.edram_rt_keep_mask[i][0] != rt_keep_masks[i][0];
|
||||||
|
system_constants_.edram_rt_keep_mask[i][0] = rt_keep_masks[i][0];
|
||||||
|
dirty |=
|
||||||
|
system_constants_.edram_rt_keep_mask[i][1] != rt_keep_masks[i][1];
|
||||||
|
system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1];
|
||||||
|
if (rt_keep_masks[i][0] != UINT32_MAX ||
|
||||||
|
rt_keep_masks[i][1] != UINT32_MAX) {
|
||||||
|
uint32_t rt_base_dwords_scaled =
|
||||||
|
color_info.color_base * edram_tile_dwords_scaled;
|
||||||
|
dirty |= system_constants_.edram_rt_base_dwords_scaled[i] !=
|
||||||
|
rt_base_dwords_scaled;
|
||||||
|
system_constants_.edram_rt_base_dwords_scaled[i] =
|
||||||
|
rt_base_dwords_scaled;
|
||||||
|
uint32_t format_flags =
|
||||||
|
RenderTargetCache::AddPSIColorFormatFlags(color_info.color_format);
|
||||||
|
dirty |= system_constants_.edram_rt_format_flags[i] != format_flags;
|
||||||
|
system_constants_.edram_rt_format_flags[i] = format_flags;
|
||||||
|
uint32_t blend_factors_ops =
|
||||||
|
regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF;
|
||||||
|
dirty |= system_constants_.edram_rt_blend_factors_ops[i] !=
|
||||||
|
blend_factors_ops;
|
||||||
|
system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops;
|
||||||
|
// Can't do float comparisons here because NaNs would result in always
|
||||||
|
// setting the dirty flag.
|
||||||
|
dirty |= std::memcmp(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
||||||
|
4 * sizeof(float)) != 0;
|
||||||
|
std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i],
|
||||||
|
4 * sizeof(float));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
uint32_t depth_base_dwords_scaled =
|
||||||
|
rb_depth_info.depth_base * edram_tile_dwords_scaled;
|
||||||
|
dirty |= system_constants_.edram_depth_base_dwords_scaled !=
|
||||||
|
depth_base_dwords_scaled;
|
||||||
|
system_constants_.edram_depth_base_dwords_scaled = depth_base_dwords_scaled;
|
||||||
|
|
||||||
|
// For non-polygons, front polygon offset is used, and it's enabled if
|
||||||
|
// POLY_OFFSET_PARA_ENABLED is set, for polygons, separate front and back
|
||||||
|
// are used.
|
||||||
|
float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f;
|
||||||
|
float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f;
|
||||||
|
if (primitive_polygonal) {
|
||||||
|
if (pa_su_sc_mode_cntl.poly_offset_front_enable) {
|
||||||
|
poly_offset_front_scale =
|
||||||
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||||
|
poly_offset_front_offset =
|
||||||
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||||
|
}
|
||||||
|
if (pa_su_sc_mode_cntl.poly_offset_back_enable) {
|
||||||
|
poly_offset_back_scale =
|
||||||
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32;
|
||||||
|
poly_offset_back_offset =
|
||||||
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (pa_su_sc_mode_cntl.poly_offset_para_enable) {
|
||||||
|
poly_offset_front_scale =
|
||||||
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32;
|
||||||
|
poly_offset_front_offset =
|
||||||
|
regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32;
|
||||||
|
poly_offset_back_scale = poly_offset_front_scale;
|
||||||
|
poly_offset_back_offset = poly_offset_front_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// With non-square resolution scaling, make sure the worst-case impact is
|
||||||
|
// reverted (slope only along the scaled axis), thus max. More bias is
|
||||||
|
// better than less bias, because less bias means Z fighting with the
|
||||||
|
// background is more likely.
|
||||||
|
float poly_offset_scale_factor =
|
||||||
|
xenos::kPolygonOffsetScaleSubpixelUnit *
|
||||||
|
std::max(draw_resolution_scale_x, draw_resolution_scale_y);
|
||||||
|
poly_offset_front_scale *= poly_offset_scale_factor;
|
||||||
|
poly_offset_back_scale *= poly_offset_scale_factor;
|
||||||
|
dirty |= system_constants_.edram_poly_offset_front_scale !=
|
||||||
|
poly_offset_front_scale;
|
||||||
|
system_constants_.edram_poly_offset_front_scale = poly_offset_front_scale;
|
||||||
|
dirty |= system_constants_.edram_poly_offset_front_offset !=
|
||||||
|
poly_offset_front_offset;
|
||||||
|
system_constants_.edram_poly_offset_front_offset = poly_offset_front_offset;
|
||||||
|
dirty |= system_constants_.edram_poly_offset_back_scale !=
|
||||||
|
poly_offset_back_scale;
|
||||||
|
system_constants_.edram_poly_offset_back_scale = poly_offset_back_scale;
|
||||||
|
dirty |= system_constants_.edram_poly_offset_back_offset !=
|
||||||
|
poly_offset_back_offset;
|
||||||
|
system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset;
|
||||||
|
|
||||||
|
if (depth_stencil_enabled && normalized_depth_control.stencil_enable) {
|
||||||
|
uint32_t stencil_front_reference_masks =
|
||||||
|
rb_stencilrefmask.value & 0xFFFFFF;
|
||||||
|
dirty |= system_constants_.edram_stencil_front_reference_masks !=
|
||||||
|
stencil_front_reference_masks;
|
||||||
|
system_constants_.edram_stencil_front_reference_masks =
|
||||||
|
stencil_front_reference_masks;
|
||||||
|
uint32_t stencil_func_ops =
|
||||||
|
(normalized_depth_control.value >> 8) & ((1 << 12) - 1);
|
||||||
|
dirty |=
|
||||||
|
system_constants_.edram_stencil_front_func_ops != stencil_func_ops;
|
||||||
|
system_constants_.edram_stencil_front_func_ops = stencil_func_ops;
|
||||||
|
|
||||||
|
if (primitive_polygonal && normalized_depth_control.backface_enable) {
|
||||||
|
uint32_t stencil_back_reference_masks =
|
||||||
|
rb_stencilrefmask_bf.value & 0xFFFFFF;
|
||||||
|
dirty |= system_constants_.edram_stencil_back_reference_masks !=
|
||||||
|
stencil_back_reference_masks;
|
||||||
|
system_constants_.edram_stencil_back_reference_masks =
|
||||||
|
stencil_back_reference_masks;
|
||||||
|
uint32_t stencil_func_ops_bf =
|
||||||
|
(normalized_depth_control.value >> 20) & ((1 << 12) - 1);
|
||||||
|
dirty |= system_constants_.edram_stencil_back_func_ops !=
|
||||||
|
stencil_func_ops_bf;
|
||||||
|
system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf;
|
||||||
|
} else {
|
||||||
|
dirty |= std::memcmp(system_constants_.edram_stencil_back,
|
||||||
|
system_constants_.edram_stencil_front,
|
||||||
|
2 * sizeof(uint32_t)) != 0;
|
||||||
|
std::memcpy(system_constants_.edram_stencil_back,
|
||||||
|
system_constants_.edram_stencil_front,
|
||||||
|
2 * sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dirty |= system_constants_.edram_blend_constant[0] !=
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||||
|
system_constants_.edram_blend_constant[0] =
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_RED].f32;
|
||||||
|
dirty |= system_constants_.edram_blend_constant[1] !=
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||||
|
system_constants_.edram_blend_constant[1] =
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_GREEN].f32;
|
||||||
|
dirty |= system_constants_.edram_blend_constant[2] !=
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||||
|
system_constants_.edram_blend_constant[2] =
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_BLUE].f32;
|
||||||
|
dirty |= system_constants_.edram_blend_constant[3] !=
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||||
|
system_constants_.edram_blend_constant[3] =
|
||||||
|
regs[XE_GPU_REG_RB_BLEND_ALPHA].f32;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dirty) {
|
if (dirty) {
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -259,6 +260,9 @@ class VulkanCommandProcessor final : public CommandProcessor {
|
||||||
void SetViewport(const VkViewport& viewport);
|
void SetViewport(const VkViewport& viewport);
|
||||||
void SetScissor(const VkRect2D& scissor);
|
void SetScissor(const VkRect2D& scissor);
|
||||||
|
|
||||||
|
// Returns the text to display in the GPU backend name in the window title.
|
||||||
|
std::string GetWindowTitleText() const;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
@ -443,7 +447,8 @@ class VulkanCommandProcessor final : public CommandProcessor {
|
||||||
bool primitive_polygonal,
|
bool primitive_polygonal,
|
||||||
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
|
||||||
bool shader_32bit_index_dma, const draw_util::ViewportInfo& viewport_info,
|
bool shader_32bit_index_dma, const draw_util::ViewportInfo& viewport_info,
|
||||||
uint32_t used_texture_mask);
|
uint32_t used_texture_mask, reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||||
|
uint32_t normalized_color_mask);
|
||||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||||
const VulkanShader* pixel_shader);
|
const VulkanShader* pixel_shader);
|
||||||
// Allocates a descriptor set and fills one or two VkWriteDescriptorSet
|
// Allocates a descriptor set and fills one or two VkWriteDescriptorSet
|
||||||
|
@ -520,12 +525,12 @@ class VulkanCommandProcessor final : public CommandProcessor {
|
||||||
|
|
||||||
// Descriptor set layouts used by different shaders.
|
// Descriptor set layouts used by different shaders.
|
||||||
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
|
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
|
|
||||||
VK_NULL_HANDLE;
|
|
||||||
VkDescriptorSetLayout descriptor_set_layout_constants_ = VK_NULL_HANDLE;
|
VkDescriptorSetLayout descriptor_set_layout_constants_ = VK_NULL_HANDLE;
|
||||||
std::array<VkDescriptorSetLayout,
|
std::array<VkDescriptorSetLayout,
|
||||||
size_t(SingleTransientDescriptorLayout::kCount)>
|
size_t(SingleTransientDescriptorLayout::kCount)>
|
||||||
descriptor_set_layouts_single_transient_{};
|
descriptor_set_layouts_single_transient_{};
|
||||||
|
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
|
||||||
|
VK_NULL_HANDLE;
|
||||||
|
|
||||||
// Descriptor set layouts are referenced by pipeline_layouts_.
|
// Descriptor set layouts are referenced by pipeline_layouts_.
|
||||||
std::unordered_map<TextureDescriptorSetLayoutKey, VkDescriptorSetLayout,
|
std::unordered_map<TextureDescriptorSetLayoutKey, VkDescriptorSetLayout,
|
||||||
|
@ -655,6 +660,9 @@ class VulkanCommandProcessor final : public CommandProcessor {
|
||||||
// declared as dynamic in the pipeline) invalidates such dynamic state.
|
// declared as dynamic in the pipeline) invalidates such dynamic state.
|
||||||
VkViewport dynamic_viewport_;
|
VkViewport dynamic_viewport_;
|
||||||
VkRect2D dynamic_scissor_;
|
VkRect2D dynamic_scissor_;
|
||||||
|
// Dynamic fixed-function depth bias, blend constants, stencil state are
|
||||||
|
// applicable only to the render target implementations where they are
|
||||||
|
// actually involved.
|
||||||
float dynamic_depth_bias_constant_factor_;
|
float dynamic_depth_bias_constant_factor_;
|
||||||
float dynamic_depth_bias_slope_factor_;
|
float dynamic_depth_bias_slope_factor_;
|
||||||
float dynamic_blend_constants_[4];
|
float dynamic_blend_constants_[4];
|
||||||
|
|
|
@ -21,6 +21,15 @@ VulkanGraphicsSystem::VulkanGraphicsSystem() {}
|
||||||
|
|
||||||
VulkanGraphicsSystem::~VulkanGraphicsSystem() {}
|
VulkanGraphicsSystem::~VulkanGraphicsSystem() {}
|
||||||
|
|
||||||
|
std::string VulkanGraphicsSystem::name() const {
|
||||||
|
auto vulkan_command_processor =
|
||||||
|
static_cast<VulkanCommandProcessor*>(command_processor());
|
||||||
|
if (vulkan_command_processor != nullptr) {
|
||||||
|
return vulkan_command_processor->GetWindowTitleText();
|
||||||
|
}
|
||||||
|
return "Vulkan - HEAVILY INCOMPLETE, early development";
|
||||||
|
}
|
||||||
|
|
||||||
X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor,
|
X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor,
|
||||||
kernel::KernelState* kernel_state,
|
kernel::KernelState* kernel_state,
|
||||||
ui::WindowedAppContext* app_context,
|
ui::WindowedAppContext* app_context,
|
||||||
|
|
|
@ -26,9 +26,7 @@ class VulkanGraphicsSystem : public GraphicsSystem {
|
||||||
|
|
||||||
static bool IsAvailable() { return true; }
|
static bool IsAvailable() { return true; }
|
||||||
|
|
||||||
std::string name() const override {
|
std::string name() const override;
|
||||||
return "Vulkan - HEAVILY INCOMPLETE, early development";
|
|
||||||
}
|
|
||||||
|
|
||||||
X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state,
|
X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state,
|
||||||
ui::WindowedAppContext* app_context,
|
ui::WindowedAppContext* app_context,
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "third_party/fmt/include/fmt/format.h"
|
#include "third_party/fmt/include/fmt/format.h"
|
||||||
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
||||||
|
@ -53,8 +54,32 @@ bool VulkanPipelineCache::Initialize() {
|
||||||
const ui::vulkan::VulkanProvider& provider =
|
const ui::vulkan::VulkanProvider& provider =
|
||||||
command_processor_.GetVulkanProvider();
|
command_processor_.GetVulkanProvider();
|
||||||
|
|
||||||
|
bool edram_fragment_shader_interlock =
|
||||||
|
render_target_cache_.GetPath() ==
|
||||||
|
RenderTargetCache::Path::kPixelShaderInterlock;
|
||||||
|
|
||||||
shader_translator_ = std::make_unique<SpirvShaderTranslator>(
|
shader_translator_ = std::make_unique<SpirvShaderTranslator>(
|
||||||
SpirvShaderTranslator::Features(provider));
|
SpirvShaderTranslator::Features(provider),
|
||||||
|
render_target_cache_.msaa_2x_attachments_supported(),
|
||||||
|
render_target_cache_.msaa_2x_no_attachments_supported(),
|
||||||
|
edram_fragment_shader_interlock);
|
||||||
|
|
||||||
|
if (edram_fragment_shader_interlock) {
|
||||||
|
std::vector<uint8_t> depth_only_fragment_shader_code =
|
||||||
|
shader_translator_->CreateDepthOnlyFragmentShader();
|
||||||
|
depth_only_fragment_shader_ = ui::vulkan::util::CreateShaderModule(
|
||||||
|
provider,
|
||||||
|
reinterpret_cast<const uint32_t*>(
|
||||||
|
depth_only_fragment_shader_code.data()),
|
||||||
|
depth_only_fragment_shader_code.size());
|
||||||
|
if (depth_only_fragment_shader_ == VK_NULL_HANDLE) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanPipelineCache: Failed to create the depth/stencil-only "
|
||||||
|
"fragment shader for the fragment shader interlock render backend "
|
||||||
|
"implementation");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -75,6 +100,8 @@ void VulkanPipelineCache::Shutdown() {
|
||||||
pipelines_.clear();
|
pipelines_.clear();
|
||||||
|
|
||||||
// Destroy all internal shaders.
|
// Destroy all internal shaders.
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device,
|
||||||
|
depth_only_fragment_shader_);
|
||||||
for (const auto& geometry_shader_pair : geometry_shaders_) {
|
for (const auto& geometry_shader_pair : geometry_shaders_) {
|
||||||
if (geometry_shader_pair.second != VK_NULL_HANDLE) {
|
if (geometry_shader_pair.second != VK_NULL_HANDLE) {
|
||||||
dfn.vkDestroyShaderModule(device, geometry_shader_pair.second, nullptr);
|
dfn.vkDestroyShaderModule(device, geometry_shader_pair.second, nullptr);
|
||||||
|
@ -179,6 +206,8 @@ VulkanPipelineCache::GetCurrentPixelShaderModification(
|
||||||
modification.pixel.param_gen_point = 0;
|
modification.pixel.param_gen_point = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (render_target_cache_.GetPath() ==
|
||||||
|
RenderTargetCache::Path::kHostRenderTargets) {
|
||||||
using DepthStencilMode =
|
using DepthStencilMode =
|
||||||
SpirvShaderTranslator::Modification::DepthStencilMode;
|
SpirvShaderTranslator::Modification::DepthStencilMode;
|
||||||
if (shader.implicit_early_z_write_allowed() &&
|
if (shader.implicit_early_z_write_allowed() &&
|
||||||
|
@ -189,6 +218,7 @@ VulkanPipelineCache::GetCurrentPixelShaderModification(
|
||||||
} else {
|
} else {
|
||||||
modification.pixel.depth_stencil_mode = DepthStencilMode::kNoModifiers;
|
modification.pixel.depth_stencil_mode = DepthStencilMode::kNoModifiers;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return modification;
|
return modification;
|
||||||
}
|
}
|
||||||
|
@ -303,7 +333,11 @@ bool VulkanPipelineCache::ConfigurePipeline(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VkRenderPass render_pass =
|
VkRenderPass render_pass =
|
||||||
render_target_cache_.GetRenderPass(render_pass_key);
|
render_target_cache_.GetPath() ==
|
||||||
|
RenderTargetCache::Path::kPixelShaderInterlock
|
||||||
|
? render_target_cache_.GetFragmentShaderInterlockRenderPass()
|
||||||
|
: render_target_cache_.GetHostRenderTargetsRenderPass(
|
||||||
|
render_pass_key);
|
||||||
if (render_pass == VK_NULL_HANDLE) {
|
if (render_pass == VK_NULL_HANDLE) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -603,9 +637,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||||
description_out.polygon_mode = PipelinePolygonMode::kFill;
|
description_out.polygon_mode = PipelinePolygonMode::kFill;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): Skip depth / stencil and color state for the fragment
|
if (render_target_cache_.GetPath() ==
|
||||||
// shader interlock RB implementation.
|
RenderTargetCache::Path::kHostRenderTargets) {
|
||||||
|
|
||||||
if (render_pass_key.depth_and_color_used & 1) {
|
if (render_pass_key.depth_and_color_used & 1) {
|
||||||
if (normalized_depth_control.z_enable) {
|
if (normalized_depth_control.z_enable) {
|
||||||
description_out.depth_write_enable =
|
description_out.depth_write_enable =
|
||||||
|
@ -646,8 +679,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Color blending and write masks (filled only for the attachments present in
|
// Color blending and write masks (filled only for the attachments present
|
||||||
// the render pass object).
|
// in the render pass object).
|
||||||
uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1;
|
uint32_t render_pass_color_rts = render_pass_key.depth_and_color_used >> 1;
|
||||||
if (device_features.independentBlend) {
|
if (device_features.independentBlend) {
|
||||||
uint32_t render_pass_color_rts_remaining = render_pass_color_rts;
|
uint32_t render_pass_color_rts_remaining = render_pass_color_rts;
|
||||||
|
@ -665,8 +698,10 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||||
// Take the blend control for the first render target that the guest wants
|
// Take the blend control for the first render target that the guest wants
|
||||||
// to write to (consider it the most important) and use it for all render
|
// to write to (consider it the most important) and use it for all render
|
||||||
// targets, if any.
|
// targets, if any.
|
||||||
// TODO(Triang3l): Implement an option for independent blending via multiple
|
// TODO(Triang3l): Implement an option for independent blending via
|
||||||
// draw calls with different pipelines maybe? Though independent blending
|
// replaying the render pass for each set of render targets with unique
|
||||||
|
// blending parameters, with depth / stencil saved before the first and
|
||||||
|
// restored before each of the rest maybe? Though independent blending
|
||||||
// support is pretty wide, with a quite prominent exception of Adreno 4xx
|
// support is pretty wide, with a quite prominent exception of Adreno 4xx
|
||||||
// apparently.
|
// apparently.
|
||||||
uint32_t render_pass_color_rts_remaining = render_pass_color_rts;
|
uint32_t render_pass_color_rts_remaining = render_pass_color_rts;
|
||||||
|
@ -678,15 +713,16 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||||
PipelineRenderTarget& render_pass_first_color_rt =
|
PipelineRenderTarget& render_pass_first_color_rt =
|
||||||
description_out.render_targets[render_pass_first_color_rt_index];
|
description_out.render_targets[render_pass_first_color_rt_index];
|
||||||
uint32_t common_blend_rt_index;
|
uint32_t common_blend_rt_index;
|
||||||
if (xe::bit_scan_forward(normalized_color_mask, &common_blend_rt_index)) {
|
if (xe::bit_scan_forward(normalized_color_mask,
|
||||||
|
&common_blend_rt_index)) {
|
||||||
common_blend_rt_index >>= 2;
|
common_blend_rt_index >>= 2;
|
||||||
// If a common write mask will be used for multiple render targets, use
|
// If a common write mask will be used for multiple render targets,
|
||||||
// the original RB_COLOR_MASK instead of the normalized color mask as
|
// use the original RB_COLOR_MASK instead of the normalized color mask
|
||||||
// the normalized color mask has non-existent components forced to
|
// as the normalized color mask has non-existent components forced to
|
||||||
// written (don't need reading to be preserved), while the number of
|
// written (don't need reading to be preserved), while the number of
|
||||||
// components may vary between render targets. The attachments in the
|
// components may vary between render targets. The attachments in the
|
||||||
// pass that must not be written to at all will be excluded via a shader
|
// pass that must not be written to at all will be excluded via a
|
||||||
// modification.
|
// shader modification.
|
||||||
WritePipelineRenderTargetDescription(
|
WritePipelineRenderTargetDescription(
|
||||||
regs.Get<reg::RB_BLENDCONTROL>(
|
regs.Get<reg::RB_BLENDCONTROL>(
|
||||||
reg::RB_BLENDCONTROL::rt_register_indices
|
reg::RB_BLENDCONTROL::rt_register_indices
|
||||||
|
@ -700,7 +736,8 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||||
render_pass_first_color_rt);
|
render_pass_first_color_rt);
|
||||||
} else {
|
} else {
|
||||||
// No render targets are written to, though the render pass still may
|
// No render targets are written to, though the render pass still may
|
||||||
// contain color attachments - set them to not written and not blending.
|
// contain color attachments - set them to not written and not
|
||||||
|
// blending.
|
||||||
render_pass_first_color_rt.src_color_blend_factor =
|
render_pass_first_color_rt.src_color_blend_factor =
|
||||||
PipelineBlendFactor::kOne;
|
PipelineBlendFactor::kOne;
|
||||||
render_pass_first_color_rt.dst_color_blend_factor =
|
render_pass_first_color_rt.dst_color_blend_factor =
|
||||||
|
@ -723,6 +760,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1929,6 +1967,10 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
command_processor_.GetVulkanProvider();
|
command_processor_.GetVulkanProvider();
|
||||||
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
|
||||||
|
|
||||||
|
bool edram_fragment_shader_interlock =
|
||||||
|
render_target_cache_.GetPath() ==
|
||||||
|
RenderTargetCache::Path::kPixelShaderInterlock;
|
||||||
|
|
||||||
std::array<VkPipelineShaderStageCreateInfo, 3> shader_stages;
|
std::array<VkPipelineShaderStageCreateInfo, 3> shader_stages;
|
||||||
uint32_t shader_stage_count = 0;
|
uint32_t shader_stage_count = 0;
|
||||||
|
|
||||||
|
@ -1962,12 +2004,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
shader_stage_geometry.pName = "main";
|
shader_stage_geometry.pName = "main";
|
||||||
shader_stage_geometry.pSpecializationInfo = nullptr;
|
shader_stage_geometry.pSpecializationInfo = nullptr;
|
||||||
}
|
}
|
||||||
// Pixel shader.
|
// Fragment shader.
|
||||||
if (creation_arguments.pixel_shader) {
|
|
||||||
assert_true(creation_arguments.pixel_shader->is_translated());
|
|
||||||
if (!creation_arguments.pixel_shader->is_valid()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
VkPipelineShaderStageCreateInfo& shader_stage_fragment =
|
VkPipelineShaderStageCreateInfo& shader_stage_fragment =
|
||||||
shader_stages[shader_stage_count++];
|
shader_stages[shader_stage_count++];
|
||||||
shader_stage_fragment.sType =
|
shader_stage_fragment.sType =
|
||||||
|
@ -1975,11 +2012,24 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
shader_stage_fragment.pNext = nullptr;
|
shader_stage_fragment.pNext = nullptr;
|
||||||
shader_stage_fragment.flags = 0;
|
shader_stage_fragment.flags = 0;
|
||||||
shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
|
shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
shader_stage_fragment.module = VK_NULL_HANDLE;
|
||||||
|
shader_stage_fragment.pName = "main";
|
||||||
|
shader_stage_fragment.pSpecializationInfo = nullptr;
|
||||||
|
if (creation_arguments.pixel_shader) {
|
||||||
|
assert_true(creation_arguments.pixel_shader->is_translated());
|
||||||
|
if (!creation_arguments.pixel_shader->is_valid()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
shader_stage_fragment.module =
|
shader_stage_fragment.module =
|
||||||
creation_arguments.pixel_shader->shader_module();
|
creation_arguments.pixel_shader->shader_module();
|
||||||
assert_true(shader_stage_fragment.module != VK_NULL_HANDLE);
|
assert_true(shader_stage_fragment.module != VK_NULL_HANDLE);
|
||||||
shader_stage_fragment.pName = "main";
|
} else {
|
||||||
shader_stage_fragment.pSpecializationInfo = nullptr;
|
if (edram_fragment_shader_interlock) {
|
||||||
|
shader_stage_fragment.module = depth_only_fragment_shader_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (shader_stage_fragment.module == VK_NULL_HANDLE) {
|
||||||
|
--shader_stage_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkPipelineVertexInputStateCreateInfo vertex_input_state = {};
|
VkPipelineVertexInputStateCreateInfo vertex_input_state = {};
|
||||||
|
@ -2087,10 +2137,10 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
// formula, though Z has no effect on anything if a depth attachment is not
|
// formula, though Z has no effect on anything if a depth attachment is not
|
||||||
// used (the guest shader can't access Z), enabling only when there's a
|
// used (the guest shader can't access Z), enabling only when there's a
|
||||||
// depth / stencil attachment for correctness.
|
// depth / stencil attachment for correctness.
|
||||||
// TODO(Triang3l): Disable the depth bias for the fragment shader interlock RB
|
|
||||||
// implementation.
|
|
||||||
rasterization_state.depthBiasEnable =
|
rasterization_state.depthBiasEnable =
|
||||||
(description.render_pass_key.depth_and_color_used & 0b1) ? VK_TRUE
|
(!edram_fragment_shader_interlock &&
|
||||||
|
(description.render_pass_key.depth_and_color_used & 0b1))
|
||||||
|
? VK_TRUE
|
||||||
: VK_FALSE;
|
: VK_FALSE;
|
||||||
// TODO(Triang3l): Wide lines.
|
// TODO(Triang3l): Wide lines.
|
||||||
rasterization_state.lineWidth = 1.0f;
|
rasterization_state.lineWidth = 1.0f;
|
||||||
|
@ -2101,6 +2151,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
||||||
if (description.render_pass_key.msaa_samples == xenos::MsaaSamples::k2X &&
|
if (description.render_pass_key.msaa_samples == xenos::MsaaSamples::k2X &&
|
||||||
!render_target_cache_.IsMsaa2xSupported(
|
!render_target_cache_.IsMsaa2xSupported(
|
||||||
|
!edram_fragment_shader_interlock &&
|
||||||
description.render_pass_key.depth_and_color_used != 0)) {
|
description.render_pass_key.depth_and_color_used != 0)) {
|
||||||
// Using sample 0 as 0 and 3 as 1 for 2x instead (not exactly the same
|
// Using sample 0 as 0 and 3 as 1 for 2x instead (not exactly the same
|
||||||
// sample locations, but still top-left and bottom-right - however, this can
|
// sample locations, but still top-left and bottom-right - however, this can
|
||||||
|
@ -2119,13 +2170,15 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
depth_stencil_state.sType =
|
depth_stencil_state.sType =
|
||||||
VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
|
VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
|
||||||
depth_stencil_state.pNext = nullptr;
|
depth_stencil_state.pNext = nullptr;
|
||||||
|
if (!edram_fragment_shader_interlock) {
|
||||||
if (description.depth_write_enable ||
|
if (description.depth_write_enable ||
|
||||||
description.depth_compare_op != xenos::CompareFunction::kAlways) {
|
description.depth_compare_op != xenos::CompareFunction::kAlways) {
|
||||||
depth_stencil_state.depthTestEnable = VK_TRUE;
|
depth_stencil_state.depthTestEnable = VK_TRUE;
|
||||||
depth_stencil_state.depthWriteEnable =
|
depth_stencil_state.depthWriteEnable =
|
||||||
description.depth_write_enable ? VK_TRUE : VK_FALSE;
|
description.depth_write_enable ? VK_TRUE : VK_FALSE;
|
||||||
depth_stencil_state.depthCompareOp = VkCompareOp(
|
depth_stencil_state.depthCompareOp =
|
||||||
uint32_t(VK_COMPARE_OP_NEVER) + uint32_t(description.depth_compare_op));
|
VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) +
|
||||||
|
uint32_t(description.depth_compare_op));
|
||||||
}
|
}
|
||||||
if (description.stencil_test_enable) {
|
if (description.stencil_test_enable) {
|
||||||
depth_stencil_state.stencilTestEnable = VK_TRUE;
|
depth_stencil_state.stencilTestEnable = VK_TRUE;
|
||||||
|
@ -2154,9 +2207,14 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) +
|
VkCompareOp(uint32_t(VK_COMPARE_OP_NEVER) +
|
||||||
uint32_t(description.stencil_back_compare_op));
|
uint32_t(description.stencil_back_compare_op));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VkPipelineColorBlendStateCreateInfo color_blend_state = {};
|
||||||
|
color_blend_state.sType =
|
||||||
|
VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
||||||
VkPipelineColorBlendAttachmentState
|
VkPipelineColorBlendAttachmentState
|
||||||
color_blend_attachments[xenos::kMaxColorRenderTargets] = {};
|
color_blend_attachments[xenos::kMaxColorRenderTargets] = {};
|
||||||
|
if (!edram_fragment_shader_interlock) {
|
||||||
uint32_t color_rts_used =
|
uint32_t color_rts_used =
|
||||||
description.render_pass_key.depth_and_color_used >> 1;
|
description.render_pass_key.depth_and_color_used >> 1;
|
||||||
{
|
{
|
||||||
|
@ -2223,9 +2281,6 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
VkPipelineColorBlendStateCreateInfo color_blend_state = {};
|
|
||||||
color_blend_state.sType =
|
|
||||||
VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
|
||||||
color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used);
|
color_blend_state.attachmentCount = 32 - xe::lzcnt(color_rts_used);
|
||||||
color_blend_state.pAttachments = color_blend_attachments;
|
color_blend_state.pAttachments = color_blend_attachments;
|
||||||
if (color_rts_used && !device_features.independentBlend) {
|
if (color_rts_used && !device_features.independentBlend) {
|
||||||
|
@ -2241,6 +2296,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
color_blend_attachments[first_color_rt_index];
|
color_blend_attachments[first_color_rt_index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::array<VkDynamicState, 7> dynamic_states;
|
std::array<VkDynamicState, 7> dynamic_states;
|
||||||
VkPipelineDynamicStateCreateInfo dynamic_state;
|
VkPipelineDynamicStateCreateInfo dynamic_state;
|
||||||
|
@ -2255,6 +2311,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
// invalidated (again, even if it has no effect).
|
// invalidated (again, even if it has no effect).
|
||||||
dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
|
dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
|
||||||
dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
|
dynamic_states[dynamic_state.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
|
||||||
|
if (!edram_fragment_shader_interlock) {
|
||||||
dynamic_states[dynamic_state.dynamicStateCount++] =
|
dynamic_states[dynamic_state.dynamicStateCount++] =
|
||||||
VK_DYNAMIC_STATE_DEPTH_BIAS;
|
VK_DYNAMIC_STATE_DEPTH_BIAS;
|
||||||
dynamic_states[dynamic_state.dynamicStateCount++] =
|
dynamic_states[dynamic_state.dynamicStateCount++] =
|
||||||
|
@ -2265,6 +2322,7 @@ bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||||
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
|
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
|
||||||
dynamic_states[dynamic_state.dynamicStateCount++] =
|
dynamic_states[dynamic_state.dynamicStateCount++] =
|
||||||
VK_DYNAMIC_STATE_STENCIL_REFERENCE;
|
VK_DYNAMIC_STATE_STENCIL_REFERENCE;
|
||||||
|
}
|
||||||
|
|
||||||
VkGraphicsPipelineCreateInfo pipeline_create_info;
|
VkGraphicsPipelineCreateInfo pipeline_create_info;
|
||||||
pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||||
|
|
|
@ -314,6 +314,10 @@ class VulkanPipelineCache {
|
||||||
GeometryShaderKey::Hasher>
|
GeometryShaderKey::Hasher>
|
||||||
geometry_shaders_;
|
geometry_shaders_;
|
||||||
|
|
||||||
|
// Empty depth-only pixel shader for writing to depth buffer using fragment
|
||||||
|
// shader interlock when no Xenos pixel shader provided.
|
||||||
|
VkShaderModule depth_only_fragment_shader_ = VK_NULL_HANDLE;
|
||||||
|
|
||||||
std::unordered_map<PipelineDescription, Pipeline, PipelineDescription::Hasher>
|
std::unordered_map<PipelineDescription, Pipeline, PipelineDescription::Hasher>
|
||||||
pipelines_;
|
pipelines_;
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
|
||||||
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
#include "third_party/glslang/SPIRV/SpvBuilder.h"
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/cvar.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/gpu/draw_util.h"
|
#include "xenia/gpu/draw_util.h"
|
||||||
|
@ -33,6 +34,27 @@
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||||
|
|
||||||
|
DEFINE_string(
|
||||||
|
render_target_path_vulkan, "",
|
||||||
|
"Render target emulation path to use on Vulkan.\n"
|
||||||
|
"Use: [any, fbo, fsi]\n"
|
||||||
|
" fbo:\n"
|
||||||
|
" Host framebuffers and fixed-function blending and depth / stencil "
|
||||||
|
"testing, copying between render targets when needed.\n"
|
||||||
|
" Lower accuracy (limited pixel format support).\n"
|
||||||
|
" Performance limited primarily by render target layout changes requiring "
|
||||||
|
"copying, but generally higher.\n"
|
||||||
|
" fsi:\n"
|
||||||
|
" Manual pixel packing, blending and depth / stencil testing, with free "
|
||||||
|
"render target layout changes.\n"
|
||||||
|
" Requires a GPU supporting fragment shader interlock.\n"
|
||||||
|
" Highest accuracy (all pixel formats handled in software).\n"
|
||||||
|
" Performance limited primarily by overdraw.\n"
|
||||||
|
" Any other value:\n"
|
||||||
|
" Choose what is considered the most optimal for the system (currently "
|
||||||
|
"always FB because the FSI path is much slower now).",
|
||||||
|
"GPU");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace vulkan {
|
namespace vulkan {
|
||||||
|
@ -43,6 +65,10 @@ namespace shaders {
|
||||||
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h"
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_2xmsaa_cs.h"
|
||||||
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h"
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/host_depth_store_4xmsaa_cs.h"
|
||||||
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h"
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/passthrough_position_xy_vs.h"
|
||||||
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_clear_32bpp_cs.h"
|
||||||
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_clear_32bpp_scaled_cs.h"
|
||||||
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_clear_64bpp_cs.h"
|
||||||
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_clear_64bpp_scaled_cs.h"
|
||||||
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_cs.h"
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_cs.h"
|
||||||
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_scaled_cs.h"
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_1x2xmsaa_scaled_cs.h"
|
||||||
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_4xmsaa_cs.h"
|
#include "xenia/gpu/shaders/bytecode/vulkan_spirv/resolve_fast_32bpp_4xmsaa_cs.h"
|
||||||
|
@ -180,13 +206,61 @@ VulkanRenderTargetCache::VulkanRenderTargetCache(
|
||||||
|
|
||||||
VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); }
|
VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(true); }
|
||||||
|
|
||||||
bool VulkanRenderTargetCache::Initialize() {
|
bool VulkanRenderTargetCache::Initialize(uint32_t shared_memory_binding_count) {
|
||||||
const ui::vulkan::VulkanProvider& provider =
|
const ui::vulkan::VulkanProvider& provider =
|
||||||
command_processor_.GetVulkanProvider();
|
command_processor_.GetVulkanProvider();
|
||||||
const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider.ifn();
|
const ui::vulkan::VulkanProvider::InstanceFunctions& ifn = provider.ifn();
|
||||||
VkPhysicalDevice physical_device = provider.physical_device();
|
VkPhysicalDevice physical_device = provider.physical_device();
|
||||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||||
VkDevice device = provider.device();
|
VkDevice device = provider.device();
|
||||||
|
const VkPhysicalDeviceLimits& device_limits =
|
||||||
|
provider.device_properties().limits;
|
||||||
|
|
||||||
|
if (cvars::render_target_path_vulkan == "fsi") {
|
||||||
|
path_ = Path::kPixelShaderInterlock;
|
||||||
|
} else {
|
||||||
|
path_ = Path::kHostRenderTargets;
|
||||||
|
}
|
||||||
|
// Fragment shader interlock is a feature implemented by pretty advanced GPUs,
|
||||||
|
// closer to Direct3D 11 / OpenGL ES 3.2 level mainly, not Direct3D 10 /
|
||||||
|
// OpenGL ES 3.1. Thus, it's fine to demand a wide range of other optional
|
||||||
|
// features for the fragment shader interlock backend to work.
|
||||||
|
if (path_ == Path::kPixelShaderInterlock) {
|
||||||
|
const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT&
|
||||||
|
device_fragment_shader_interlock_features =
|
||||||
|
provider.device_fragment_shader_interlock_features();
|
||||||
|
const VkPhysicalDeviceFeatures& device_features =
|
||||||
|
provider.device_features();
|
||||||
|
// Interlocking between fragments with common sample coverage is enough, but
|
||||||
|
// interlocking more is acceptable too (fragmentShaderShadingRateInterlock
|
||||||
|
// would be okay too, but it's unlikely that an implementation would
|
||||||
|
// advertise only it and not any other ones, as it's a very specific feature
|
||||||
|
// interacting with another optional feature that is variable shading rate,
|
||||||
|
// so there's no need to overcomplicate the checks and the shader execution
|
||||||
|
// mode setting).
|
||||||
|
// Sample-rate shading is required by certain SPIR-V revisions to access the
|
||||||
|
// sample mask fragment shader input.
|
||||||
|
// Stanard sample locations are needed for calculating the depth at the
|
||||||
|
// samples.
|
||||||
|
// It's unlikely that a device exposing fragment shader interlock won't have
|
||||||
|
// a large enough storage buffer range and a sufficient SSBO slot count for
|
||||||
|
// all the shared memory buffers and the EDRAM buffer - an in a conflict
|
||||||
|
// between, for instance, the ability to vfetch and memexport in fragment
|
||||||
|
// shaders, and the usage of fragment shader interlock, prefer the former
|
||||||
|
// for simplicity.
|
||||||
|
if (!provider.device_extensions().ext_fragment_shader_interlock ||
|
||||||
|
!(device_fragment_shader_interlock_features
|
||||||
|
.fragmentShaderSampleInterlock ||
|
||||||
|
device_fragment_shader_interlock_features
|
||||||
|
.fragmentShaderPixelInterlock) ||
|
||||||
|
!device_features.fragmentStoresAndAtomics ||
|
||||||
|
!device_features.sampleRateShading ||
|
||||||
|
!device_limits.standardSampleLocations ||
|
||||||
|
shared_memory_binding_count >=
|
||||||
|
device_limits.maxDescriptorSetStorageBuffers) {
|
||||||
|
path_ = Path::kHostRenderTargets;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Format support.
|
// Format support.
|
||||||
constexpr VkFormatFeatureFlags kUsedDepthFormatFeatures =
|
constexpr VkFormatFeatureFlags kUsedDepthFormatFeatures =
|
||||||
|
@ -199,6 +273,30 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
(depth_unorm24_properties.optimalTilingFeatures &
|
(depth_unorm24_properties.optimalTilingFeatures &
|
||||||
kUsedDepthFormatFeatures) == kUsedDepthFormatFeatures;
|
kUsedDepthFormatFeatures) == kUsedDepthFormatFeatures;
|
||||||
|
|
||||||
|
// 2x MSAA support.
|
||||||
|
// TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in
|
||||||
|
// transfers.
|
||||||
|
if (cvars::native_2x_msaa) {
|
||||||
|
// Multisampled integer sampled images are optional in Vulkan and in Xenia.
|
||||||
|
msaa_2x_attachments_supported_ =
|
||||||
|
(device_limits.framebufferColorSampleCounts &
|
||||||
|
device_limits.framebufferDepthSampleCounts &
|
||||||
|
device_limits.framebufferStencilSampleCounts &
|
||||||
|
device_limits.sampledImageColorSampleCounts &
|
||||||
|
device_limits.sampledImageDepthSampleCounts &
|
||||||
|
device_limits.sampledImageStencilSampleCounts &
|
||||||
|
VK_SAMPLE_COUNT_2_BIT) &&
|
||||||
|
(device_limits.sampledImageIntegerSampleCounts &
|
||||||
|
(VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT)) !=
|
||||||
|
VK_SAMPLE_COUNT_4_BIT;
|
||||||
|
msaa_2x_no_attachments_supported_ =
|
||||||
|
(device_limits.framebufferNoAttachmentsSampleCounts &
|
||||||
|
VK_SAMPLE_COUNT_2_BIT) != 0;
|
||||||
|
} else {
|
||||||
|
msaa_2x_attachments_supported_ = false;
|
||||||
|
msaa_2x_no_attachments_supported_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
// Descriptor set layouts.
|
// Descriptor set layouts.
|
||||||
VkDescriptorSetLayoutBinding descriptor_set_layout_bindings[2];
|
VkDescriptorSetLayoutBinding descriptor_set_layout_bindings[2];
|
||||||
descriptor_set_layout_bindings[0].binding = 0;
|
descriptor_set_layout_bindings[0].binding = 0;
|
||||||
|
@ -429,32 +527,10 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
|
|
||||||
// TODO(Triang3l): All paths (FSI).
|
// TODO(Triang3l): All paths (FSI).
|
||||||
|
|
||||||
depth_float24_round_ = cvars::depth_float24_round;
|
if (path_ == Path::kHostRenderTargets) {
|
||||||
|
// Host render targets.
|
||||||
|
|
||||||
// TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in
|
depth_float24_round_ = cvars::depth_float24_round;
|
||||||
// transfers.
|
|
||||||
if (cvars::native_2x_msaa) {
|
|
||||||
const VkPhysicalDeviceLimits& device_limits =
|
|
||||||
provider.device_properties().limits;
|
|
||||||
// Multisampled integer sampled images are optional in Vulkan and in Xenia.
|
|
||||||
msaa_2x_attachments_supported_ =
|
|
||||||
(device_limits.framebufferColorSampleCounts &
|
|
||||||
device_limits.framebufferDepthSampleCounts &
|
|
||||||
device_limits.framebufferStencilSampleCounts &
|
|
||||||
device_limits.sampledImageColorSampleCounts &
|
|
||||||
device_limits.sampledImageDepthSampleCounts &
|
|
||||||
device_limits.sampledImageStencilSampleCounts &
|
|
||||||
VK_SAMPLE_COUNT_2_BIT) &&
|
|
||||||
(device_limits.sampledImageIntegerSampleCounts &
|
|
||||||
(VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT)) !=
|
|
||||||
VK_SAMPLE_COUNT_4_BIT;
|
|
||||||
msaa_2x_no_attachments_supported_ =
|
|
||||||
(device_limits.framebufferNoAttachmentsSampleCounts &
|
|
||||||
VK_SAMPLE_COUNT_2_BIT) != 0;
|
|
||||||
} else {
|
|
||||||
msaa_2x_attachments_supported_ = false;
|
|
||||||
msaa_2x_no_attachments_supported_ = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Host depth storing pipeline layout.
|
// Host depth storing pipeline layout.
|
||||||
VkDescriptorSetLayout host_depth_store_descriptor_set_layouts[] = {
|
VkDescriptorSetLayout host_depth_store_descriptor_set_layouts[] = {
|
||||||
|
@ -464,7 +540,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
descriptor_set_layout_sampled_image_x2_,
|
descriptor_set_layout_sampled_image_x2_,
|
||||||
};
|
};
|
||||||
VkPushConstantRange host_depth_store_push_constant_range;
|
VkPushConstantRange host_depth_store_push_constant_range;
|
||||||
host_depth_store_push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
host_depth_store_push_constant_range.stageFlags =
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
host_depth_store_push_constant_range.offset = 0;
|
host_depth_store_push_constant_range.offset = 0;
|
||||||
host_depth_store_push_constant_range.size = sizeof(HostDepthStoreConstants);
|
host_depth_store_push_constant_range.size = sizeof(HostDepthStoreConstants);
|
||||||
VkPipelineLayoutCreateInfo host_depth_store_pipeline_layout_create_info;
|
VkPipelineLayoutCreateInfo host_depth_store_pipeline_layout_create_info;
|
||||||
|
@ -505,8 +582,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
host_depth_store_shader.first, host_depth_store_shader.second);
|
host_depth_store_shader.first, host_depth_store_shader.second);
|
||||||
if (host_depth_store_pipeline == VK_NULL_HANDLE) {
|
if (host_depth_store_pipeline == VK_NULL_HANDLE) {
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"VulkanRenderTargetCache: Failed to create the {}-sample host depth "
|
"VulkanRenderTargetCache: Failed to create the {}-sample host "
|
||||||
"storing pipeline",
|
"depth storing pipeline",
|
||||||
uint32_t(1) << i);
|
uint32_t(1) << i);
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
|
@ -529,8 +606,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
sizeof(shaders::passthrough_position_xy_vs));
|
sizeof(shaders::passthrough_position_xy_vs));
|
||||||
if (transfer_passthrough_vertex_shader_ == VK_NULL_HANDLE) {
|
if (transfer_passthrough_vertex_shader_ == VK_NULL_HANDLE) {
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"VulkanRenderTargetCache: Failed to create the render target ownership "
|
"VulkanRenderTargetCache: Failed to create the render target "
|
||||||
"transfer vertex shader");
|
"ownership transfer vertex shader");
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -558,8 +635,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
uint32_t transfer_pipeline_layout_descriptor_sets_remaining =
|
uint32_t transfer_pipeline_layout_descriptor_sets_remaining =
|
||||||
transfer_pipeline_layout_info.used_descriptor_sets;
|
transfer_pipeline_layout_info.used_descriptor_sets;
|
||||||
uint32_t transfer_pipeline_layout_descriptor_set_index;
|
uint32_t transfer_pipeline_layout_descriptor_set_index;
|
||||||
while (
|
while (xe::bit_scan_forward(
|
||||||
xe::bit_scan_forward(transfer_pipeline_layout_descriptor_sets_remaining,
|
transfer_pipeline_layout_descriptor_sets_remaining,
|
||||||
&transfer_pipeline_layout_descriptor_set_index)) {
|
&transfer_pipeline_layout_descriptor_set_index)) {
|
||||||
transfer_pipeline_layout_descriptor_sets_remaining &=
|
transfer_pipeline_layout_descriptor_sets_remaining &=
|
||||||
~(uint32_t(1) << transfer_pipeline_layout_descriptor_set_index);
|
~(uint32_t(1) << transfer_pipeline_layout_descriptor_set_index);
|
||||||
|
@ -590,7 +667,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
}
|
}
|
||||||
transfer_pipeline_layout_push_constant_range.size = uint32_t(
|
transfer_pipeline_layout_push_constant_range.size = uint32_t(
|
||||||
sizeof(uint32_t) *
|
sizeof(uint32_t) *
|
||||||
xe::bit_count(transfer_pipeline_layout_info.used_push_constant_dwords));
|
xe::bit_count(
|
||||||
|
transfer_pipeline_layout_info.used_push_constant_dwords));
|
||||||
transfer_pipeline_layout_create_info.pushConstantRangeCount =
|
transfer_pipeline_layout_create_info.pushConstantRangeCount =
|
||||||
transfer_pipeline_layout_info.used_push_constant_dwords ? 1 : 0;
|
transfer_pipeline_layout_info.used_push_constant_dwords ? 1 : 0;
|
||||||
if (dfn.vkCreatePipelineLayout(
|
if (dfn.vkCreatePipelineLayout(
|
||||||
|
@ -631,8 +709,8 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
dump_pipeline_layout_create_info.pPushConstantRanges =
|
dump_pipeline_layout_create_info.pPushConstantRanges =
|
||||||
&dump_pipeline_layout_push_constant_range;
|
&dump_pipeline_layout_push_constant_range;
|
||||||
if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info,
|
if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info,
|
||||||
nullptr,
|
nullptr, &dump_pipeline_layout_color_) !=
|
||||||
&dump_pipeline_layout_color_) != VK_SUCCESS) {
|
VK_SUCCESS) {
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"VulkanRenderTargetCache: Failed to create the color render target "
|
"VulkanRenderTargetCache: Failed to create the color render target "
|
||||||
"dumping pipeline layout");
|
"dumping pipeline layout");
|
||||||
|
@ -642,14 +720,162 @@ bool VulkanRenderTargetCache::Initialize() {
|
||||||
dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetSource] =
|
dump_pipeline_layout_descriptor_set_layouts[kDumpDescriptorSetSource] =
|
||||||
descriptor_set_layout_sampled_image_x2_;
|
descriptor_set_layout_sampled_image_x2_;
|
||||||
if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info,
|
if (dfn.vkCreatePipelineLayout(device, &dump_pipeline_layout_create_info,
|
||||||
nullptr,
|
nullptr, &dump_pipeline_layout_depth_) !=
|
||||||
&dump_pipeline_layout_depth_) != VK_SUCCESS) {
|
VK_SUCCESS) {
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"VulkanRenderTargetCache: Failed to create the depth render target "
|
"VulkanRenderTargetCache: Failed to create the depth render target "
|
||||||
"dumping pipeline layout");
|
"dumping pipeline layout");
|
||||||
Shutdown();
|
Shutdown();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
} else if (path_ == Path::kPixelShaderInterlock) {
|
||||||
|
// Pixel (fragment) shader interlock.
|
||||||
|
|
||||||
|
// Blending is done in linear space directly in shaders.
|
||||||
|
gamma_render_target_as_srgb_ = false;
|
||||||
|
|
||||||
|
// Always true float24 depth rounded to the nearest even.
|
||||||
|
depth_float24_round_ = true;
|
||||||
|
|
||||||
|
// The pipeline layout and the pipelines for clearing the EDRAM buffer in
|
||||||
|
// resolves.
|
||||||
|
VkPushConstantRange resolve_fsi_clear_push_constant_range;
|
||||||
|
resolve_fsi_clear_push_constant_range.stageFlags =
|
||||||
|
VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
|
resolve_fsi_clear_push_constant_range.offset = 0;
|
||||||
|
resolve_fsi_clear_push_constant_range.size =
|
||||||
|
sizeof(draw_util::ResolveClearShaderConstants);
|
||||||
|
VkPipelineLayoutCreateInfo resolve_fsi_clear_pipeline_layout_create_info;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.sType =
|
||||||
|
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.pNext = nullptr;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.flags = 0;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.setLayoutCount = 1;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.pSetLayouts =
|
||||||
|
&descriptor_set_layout_storage_buffer_;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.pushConstantRangeCount = 1;
|
||||||
|
resolve_fsi_clear_pipeline_layout_create_info.pPushConstantRanges =
|
||||||
|
&resolve_fsi_clear_push_constant_range;
|
||||||
|
if (dfn.vkCreatePipelineLayout(
|
||||||
|
device, &resolve_fsi_clear_pipeline_layout_create_info, nullptr,
|
||||||
|
&resolve_fsi_clear_pipeline_layout_) != VK_SUCCESS) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanRenderTargetCache: Failed to create the resolve EDRAM buffer "
|
||||||
|
"clear pipeline layout");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
resolve_fsi_clear_32bpp_pipeline_ = ui::vulkan::util::CreateComputePipeline(
|
||||||
|
provider, resolve_fsi_clear_pipeline_layout_,
|
||||||
|
draw_resolution_scaled ? shaders::resolve_clear_32bpp_scaled_cs
|
||||||
|
: shaders::resolve_clear_32bpp_cs,
|
||||||
|
draw_resolution_scaled ? sizeof(shaders::resolve_clear_32bpp_scaled_cs)
|
||||||
|
: sizeof(shaders::resolve_clear_32bpp_cs));
|
||||||
|
if (resolve_fsi_clear_32bpp_pipeline_ == VK_NULL_HANDLE) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanRenderTargetCache: Failed to create the 32bpp resolve EDRAM "
|
||||||
|
"buffer clear pipeline");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
resolve_fsi_clear_64bpp_pipeline_ = ui::vulkan::util::CreateComputePipeline(
|
||||||
|
provider, resolve_fsi_clear_pipeline_layout_,
|
||||||
|
draw_resolution_scaled ? shaders::resolve_clear_64bpp_scaled_cs
|
||||||
|
: shaders::resolve_clear_64bpp_cs,
|
||||||
|
draw_resolution_scaled ? sizeof(shaders::resolve_clear_64bpp_scaled_cs)
|
||||||
|
: sizeof(shaders::resolve_clear_64bpp_cs));
|
||||||
|
if (resolve_fsi_clear_32bpp_pipeline_ == VK_NULL_HANDLE) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanRenderTargetCache: Failed to create the 64bpp resolve EDRAM "
|
||||||
|
"buffer clear pipeline");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Common render pass.
|
||||||
|
VkSubpassDescription fsi_subpass = {};
|
||||||
|
fsi_subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||||
|
// Fragment shader interlock provides synchronization and ordering within a
|
||||||
|
// subpass, create an external by-region dependency to maintain interlocking
|
||||||
|
// between passes. Framebuffer-global dependencies will be made with
|
||||||
|
// explicit barriers when the addressing of the EDRAM buffer relatively to
|
||||||
|
// the fragment coordinates is changed.
|
||||||
|
VkSubpassDependency fsi_subpass_dependencies[2];
|
||||||
|
fsi_subpass_dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||||
|
fsi_subpass_dependencies[0].dstSubpass = 0;
|
||||||
|
fsi_subpass_dependencies[0].srcStageMask =
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||||
|
fsi_subpass_dependencies[0].dstStageMask =
|
||||||
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||||
|
fsi_subpass_dependencies[0].srcAccessMask =
|
||||||
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
fsi_subpass_dependencies[0].dstAccessMask =
|
||||||
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||||
|
fsi_subpass_dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
|
||||||
|
fsi_subpass_dependencies[1] = fsi_subpass_dependencies[0];
|
||||||
|
std::swap(fsi_subpass_dependencies[1].srcSubpass,
|
||||||
|
fsi_subpass_dependencies[1].dstSubpass);
|
||||||
|
VkRenderPassCreateInfo fsi_render_pass_create_info;
|
||||||
|
fsi_render_pass_create_info.sType =
|
||||||
|
VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
||||||
|
fsi_render_pass_create_info.pNext = nullptr;
|
||||||
|
fsi_render_pass_create_info.flags = 0;
|
||||||
|
fsi_render_pass_create_info.attachmentCount = 0;
|
||||||
|
fsi_render_pass_create_info.pAttachments = nullptr;
|
||||||
|
fsi_render_pass_create_info.subpassCount = 1;
|
||||||
|
fsi_render_pass_create_info.pSubpasses = &fsi_subpass;
|
||||||
|
fsi_render_pass_create_info.dependencyCount =
|
||||||
|
uint32_t(xe::countof(fsi_subpass_dependencies));
|
||||||
|
fsi_render_pass_create_info.pDependencies = fsi_subpass_dependencies;
|
||||||
|
if (dfn.vkCreateRenderPass(device, &fsi_render_pass_create_info, nullptr,
|
||||||
|
&fsi_render_pass_) != VK_SUCCESS) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanRenderTargetCache: Failed to create the fragment shader "
|
||||||
|
"interlock render backend render pass");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Common framebuffer.
|
||||||
|
VkFramebufferCreateInfo fsi_framebuffer_create_info;
|
||||||
|
fsi_framebuffer_create_info.sType =
|
||||||
|
VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
||||||
|
fsi_framebuffer_create_info.pNext = nullptr;
|
||||||
|
fsi_framebuffer_create_info.flags = 0;
|
||||||
|
fsi_framebuffer_create_info.renderPass = fsi_render_pass_;
|
||||||
|
fsi_framebuffer_create_info.attachmentCount = 0;
|
||||||
|
fsi_framebuffer_create_info.pAttachments = nullptr;
|
||||||
|
fsi_framebuffer_create_info.width = std::min(
|
||||||
|
xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_x(),
|
||||||
|
device_limits.maxFramebufferWidth);
|
||||||
|
fsi_framebuffer_create_info.height = std::min(
|
||||||
|
xenos::kTexture2DCubeMaxWidthHeight * draw_resolution_scale_y(),
|
||||||
|
device_limits.maxFramebufferHeight);
|
||||||
|
fsi_framebuffer_create_info.layers = 1;
|
||||||
|
if (dfn.vkCreateFramebuffer(device, &fsi_framebuffer_create_info, nullptr,
|
||||||
|
&fsi_framebuffer_.framebuffer) != VK_SUCCESS) {
|
||||||
|
XELOGE(
|
||||||
|
"VulkanRenderTargetCache: Failed to create the fragment shader "
|
||||||
|
"interlock render backend framebuffer");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
fsi_framebuffer_.host_extent.width = fsi_framebuffer_create_info.width;
|
||||||
|
fsi_framebuffer_.host_extent.height = fsi_framebuffer_create_info.height;
|
||||||
|
} else {
|
||||||
|
assert_unhandled_case(path_);
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the last update structures, to keep the defaults consistent between
|
||||||
|
// paths regardless of whether the update for the path actually modifies them.
|
||||||
|
last_update_render_pass_key_ = RenderPassKey();
|
||||||
|
last_update_render_pass_ = VK_NULL_HANDLE;
|
||||||
|
last_update_framebuffer_pitch_tiles_at_32bpp_ = 0;
|
||||||
|
std::memset(last_update_framebuffer_attachments_, 0,
|
||||||
|
sizeof(last_update_framebuffer_attachments_));
|
||||||
|
last_update_framebuffer_ = VK_NULL_HANDLE;
|
||||||
|
|
||||||
InitializeCommon();
|
InitializeCommon();
|
||||||
return true;
|
return true;
|
||||||
|
@ -667,6 +893,18 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) {
|
||||||
// already too late.
|
// already too late.
|
||||||
DestroyAllRenderTargets(true);
|
DestroyAllRenderTargets(true);
|
||||||
|
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device,
|
||||||
|
resolve_fsi_clear_64bpp_pipeline_);
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device,
|
||||||
|
resolve_fsi_clear_32bpp_pipeline_);
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device,
|
||||||
|
resolve_fsi_clear_pipeline_layout_);
|
||||||
|
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device,
|
||||||
|
fsi_framebuffer_.framebuffer);
|
||||||
|
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyRenderPass, device,
|
||||||
|
fsi_render_pass_);
|
||||||
|
|
||||||
for (const auto& dump_pipeline_pair : dump_pipelines_) {
|
for (const auto& dump_pipeline_pair : dump_pipelines_) {
|
||||||
// May be null to prevent recreation attempts.
|
// May be null to prevent recreation attempts.
|
||||||
if (dump_pipeline_pair.second != VK_NULL_HANDLE) {
|
if (dump_pipeline_pair.second != VK_NULL_HANDLE) {
|
||||||
|
@ -951,8 +1189,8 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
|
||||||
bool clear_depth = resolve_info.IsClearingDepth();
|
bool clear_depth = resolve_info.IsClearingDepth();
|
||||||
bool clear_color = resolve_info.IsClearingColor();
|
bool clear_color = resolve_info.IsClearingColor();
|
||||||
if (clear_depth || clear_color) {
|
if (clear_depth || clear_color) {
|
||||||
// TODO(Triang3l): Fragment shader interlock path EDRAM buffer clearing.
|
switch (GetPath()) {
|
||||||
if (GetPath() == Path::kHostRenderTargets) {
|
case Path::kHostRenderTargets: {
|
||||||
Transfer::Rectangle clear_rectangle;
|
Transfer::Rectangle clear_rectangle;
|
||||||
RenderTarget* clear_render_targets[2];
|
RenderTarget* clear_render_targets[2];
|
||||||
// If PrepareHostRenderTargetsResolveClear returns false, may be just an
|
// If PrepareHostRenderTargetsResolveClear returns false, may be just an
|
||||||
|
@ -970,6 +1208,62 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory,
|
||||||
&clear_rectangle);
|
&clear_rectangle);
|
||||||
}
|
}
|
||||||
cleared = true;
|
cleared = true;
|
||||||
|
} break;
|
||||||
|
case Path::kPixelShaderInterlock: {
|
||||||
|
UseEdramBuffer(EdramBufferUsage::kComputeWrite);
|
||||||
|
// Should be safe to only commit once (if was accessed as unordered or
|
||||||
|
// with fragment shader interlock previously - if there was nothing to
|
||||||
|
// copy, only to clear, for some reason, for instance), overlap of the
|
||||||
|
// depth and the color ranges is highly unlikely.
|
||||||
|
CommitEdramBufferShaderWrites();
|
||||||
|
command_buffer.CmdVkBindDescriptorSets(
|
||||||
|
VK_PIPELINE_BIND_POINT_COMPUTE, resolve_fsi_clear_pipeline_layout_,
|
||||||
|
0, 1, &edram_storage_buffer_descriptor_set_, 0, nullptr);
|
||||||
|
std::pair<uint32_t, uint32_t> clear_group_count =
|
||||||
|
resolve_info.GetClearShaderGroupCount(draw_resolution_scale_x(),
|
||||||
|
draw_resolution_scale_y());
|
||||||
|
assert_true(clear_group_count.first && clear_group_count.second);
|
||||||
|
if (clear_depth) {
|
||||||
|
command_processor_.BindExternalComputePipeline(
|
||||||
|
resolve_fsi_clear_32bpp_pipeline_);
|
||||||
|
draw_util::ResolveClearShaderConstants depth_clear_constants;
|
||||||
|
resolve_info.GetDepthClearShaderConstants(depth_clear_constants);
|
||||||
|
command_buffer.CmdVkPushConstants(
|
||||||
|
resolve_fsi_clear_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
0, sizeof(depth_clear_constants), &depth_clear_constants);
|
||||||
|
command_processor_.SubmitBarriers(true);
|
||||||
|
command_buffer.CmdVkDispatch(clear_group_count.first,
|
||||||
|
clear_group_count.second, 1);
|
||||||
|
}
|
||||||
|
if (clear_color) {
|
||||||
|
command_processor_.BindExternalComputePipeline(
|
||||||
|
resolve_info.color_edram_info.format_is_64bpp
|
||||||
|
? resolve_fsi_clear_64bpp_pipeline_
|
||||||
|
: resolve_fsi_clear_32bpp_pipeline_);
|
||||||
|
draw_util::ResolveClearShaderConstants color_clear_constants;
|
||||||
|
resolve_info.GetColorClearShaderConstants(color_clear_constants);
|
||||||
|
if (clear_depth) {
|
||||||
|
// Non-RT-specific constants have already been set.
|
||||||
|
command_buffer.CmdVkPushConstants(
|
||||||
|
resolve_fsi_clear_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
uint32_t(offsetof(draw_util::ResolveClearShaderConstants,
|
||||||
|
rt_specific)),
|
||||||
|
sizeof(color_clear_constants.rt_specific),
|
||||||
|
&color_clear_constants.rt_specific);
|
||||||
|
} else {
|
||||||
|
command_buffer.CmdVkPushConstants(
|
||||||
|
resolve_fsi_clear_pipeline_layout_, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||||
|
0, sizeof(color_clear_constants), &color_clear_constants);
|
||||||
|
}
|
||||||
|
command_processor_.SubmitBarriers(true);
|
||||||
|
command_buffer.CmdVkDispatch(clear_group_count.first,
|
||||||
|
clear_group_count.second, 1);
|
||||||
|
}
|
||||||
|
MarkEdramBufferModified();
|
||||||
|
cleared = true;
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(GetPath());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cleared = true;
|
cleared = true;
|
||||||
|
@ -987,8 +1281,15 @@ bool VulkanRenderTargetCache::Update(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Triang3l): All paths (FSI).
|
auto rb_surface_info = register_file().Get<reg::RB_SURFACE_INFO>();
|
||||||
|
|
||||||
|
RenderPassKey render_pass_key;
|
||||||
|
// Needed even with the fragment shader interlock render backend for passing
|
||||||
|
// the sample count to the pipeline cache.
|
||||||
|
render_pass_key.msaa_samples = rb_surface_info.msaa_samples;
|
||||||
|
|
||||||
|
switch (GetPath()) {
|
||||||
|
case Path::kHostRenderTargets: {
|
||||||
RenderTarget* const* depth_and_color_render_targets =
|
RenderTarget* const* depth_and_color_render_targets =
|
||||||
last_update_accumulated_render_targets();
|
last_update_accumulated_render_targets();
|
||||||
|
|
||||||
|
@ -996,14 +1297,11 @@ bool VulkanRenderTargetCache::Update(
|
||||||
depth_and_color_render_targets,
|
depth_and_color_render_targets,
|
||||||
last_update_transfers());
|
last_update_transfers());
|
||||||
|
|
||||||
auto rb_surface_info = register_file().Get<reg::RB_SURFACE_INFO>();
|
|
||||||
uint32_t render_targets_are_srgb =
|
uint32_t render_targets_are_srgb =
|
||||||
gamma_render_target_as_srgb_
|
gamma_render_target_as_srgb_
|
||||||
? last_update_accumulated_color_targets_are_gamma()
|
? last_update_accumulated_color_targets_are_gamma()
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
RenderPassKey render_pass_key;
|
|
||||||
render_pass_key.msaa_samples = rb_surface_info.msaa_samples;
|
|
||||||
if (depth_and_color_render_targets[0]) {
|
if (depth_and_color_render_targets[0]) {
|
||||||
render_pass_key.depth_and_color_used |= 1 << 0;
|
render_pass_key.depth_and_color_used |= 1 << 0;
|
||||||
render_pass_key.depth_format =
|
render_pass_key.depth_format =
|
||||||
|
@ -1043,7 +1341,7 @@ bool VulkanRenderTargetCache::Update(
|
||||||
? last_update_render_pass_
|
? last_update_render_pass_
|
||||||
: VK_NULL_HANDLE;
|
: VK_NULL_HANDLE;
|
||||||
if (render_pass == VK_NULL_HANDLE) {
|
if (render_pass == VK_NULL_HANDLE) {
|
||||||
render_pass = GetRenderPass(render_pass_key);
|
render_pass = GetHostRenderTargetsRenderPass(render_pass_key);
|
||||||
if (render_pass == VK_NULL_HANDLE) {
|
if (render_pass == VK_NULL_HANDLE) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1052,12 +1350,13 @@ bool VulkanRenderTargetCache::Update(
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t pitch_tiles_at_32bpp =
|
uint32_t pitch_tiles_at_32bpp =
|
||||||
((rb_surface_info.surface_pitch
|
((rb_surface_info.surface_pitch << uint32_t(
|
||||||
<< uint32_t(rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X)) +
|
rb_surface_info.msaa_samples >= xenos::MsaaSamples::k4X)) +
|
||||||
(xenos::kEdramTileWidthSamples - 1)) /
|
(xenos::kEdramTileWidthSamples - 1)) /
|
||||||
xenos::kEdramTileWidthSamples;
|
xenos::kEdramTileWidthSamples;
|
||||||
if (framebuffer) {
|
if (framebuffer) {
|
||||||
if (last_update_framebuffer_pitch_tiles_at_32bpp_ != pitch_tiles_at_32bpp ||
|
if (last_update_framebuffer_pitch_tiles_at_32bpp_ !=
|
||||||
|
pitch_tiles_at_32bpp ||
|
||||||
std::memcmp(last_update_framebuffer_attachments_,
|
std::memcmp(last_update_framebuffer_attachments_,
|
||||||
depth_and_color_render_targets,
|
depth_and_color_render_targets,
|
||||||
sizeof(last_update_framebuffer_attachments_))) {
|
sizeof(last_update_framebuffer_attachments_))) {
|
||||||
|
@ -1065,7 +1364,8 @@ bool VulkanRenderTargetCache::Update(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!framebuffer) {
|
if (!framebuffer) {
|
||||||
framebuffer = GetFramebuffer(render_pass_key, pitch_tiles_at_32bpp,
|
framebuffer = GetHostRenderTargetsFramebuffer(
|
||||||
|
render_pass_key, pitch_tiles_at_32bpp,
|
||||||
depth_and_color_render_targets);
|
depth_and_color_render_targets);
|
||||||
if (!framebuffer) {
|
if (!framebuffer) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -1101,14 +1401,41 @@ bool VulkanRenderTargetCache::Update(
|
||||||
vulkan_rt.current_stage_mask(), rt_dst_stage_mask,
|
vulkan_rt.current_stage_mask(), rt_dst_stage_mask,
|
||||||
vulkan_rt.current_access_mask(), rt_dst_access_mask,
|
vulkan_rt.current_access_mask(), rt_dst_access_mask,
|
||||||
vulkan_rt.current_layout(), rt_new_layout);
|
vulkan_rt.current_layout(), rt_new_layout);
|
||||||
vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask, rt_new_layout);
|
vulkan_rt.SetUsage(rt_dst_stage_mask, rt_dst_access_mask,
|
||||||
|
rt_new_layout);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case Path::kPixelShaderInterlock: {
|
||||||
|
// For FSI, only the barrier is needed - already scheduled if required.
|
||||||
|
// But the buffer will be used for FSI drawing now.
|
||||||
|
UseEdramBuffer(EdramBufferUsage::kFragmentReadWrite);
|
||||||
|
// Commit preceding unordered (but not FSI) writes like clears as they
|
||||||
|
// aren't synchronized with FSI accesses.
|
||||||
|
CommitEdramBufferShaderWrites(
|
||||||
|
EdramBufferModificationStatus::kViaUnordered);
|
||||||
|
// TODO(Triang3l): Check if this draw call modifies color or depth /
|
||||||
|
// stencil, at least coarsely, to prevent useless barriers.
|
||||||
|
MarkEdramBufferModified(
|
||||||
|
EdramBufferModificationStatus::kViaFragmentShaderInterlock);
|
||||||
|
last_update_render_pass_key_ = render_pass_key;
|
||||||
|
last_update_render_pass_ = fsi_render_pass_;
|
||||||
|
last_update_framebuffer_ = &fsi_framebuffer_;
|
||||||
|
} break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(GetPath());
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
|
VkRenderPass VulkanRenderTargetCache::GetHostRenderTargetsRenderPass(
|
||||||
auto it = render_passes_.find(key.key);
|
RenderPassKey key) {
|
||||||
|
assert_true(GetPath() == Path::kHostRenderTargets);
|
||||||
|
|
||||||
|
auto it = render_passes_.find(key);
|
||||||
if (it != render_passes_.end()) {
|
if (it != render_passes_.end()) {
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
@ -1244,10 +1571,10 @@ VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
|
||||||
if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr,
|
if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr,
|
||||||
&render_pass) != VK_SUCCESS) {
|
&render_pass) != VK_SUCCESS) {
|
||||||
XELOGE("VulkanRenderTargetCache: Failed to create a render pass");
|
XELOGE("VulkanRenderTargetCache: Failed to create a render pass");
|
||||||
render_passes_.emplace(key.key, VK_NULL_HANDLE);
|
render_passes_.emplace(key, VK_NULL_HANDLE);
|
||||||
return VK_NULL_HANDLE;
|
return VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
render_passes_.emplace(key.key, render_pass);
|
render_passes_.emplace(key, render_pass);
|
||||||
return render_pass;
|
return render_pass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1353,15 +1680,17 @@ VulkanRenderTargetCache::VulkanRenderTarget::~VulkanRenderTarget() {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const {
|
uint32_t VulkanRenderTargetCache::GetMaxRenderTargetWidth() const {
|
||||||
const ui::vulkan::VulkanProvider& provider =
|
const VkPhysicalDeviceLimits& device_limits =
|
||||||
command_processor_.GetVulkanProvider();
|
command_processor_.GetVulkanProvider().device_properties().limits;
|
||||||
return provider.device_properties().limits.maxFramebufferWidth;
|
return std::min(device_limits.maxFramebufferWidth,
|
||||||
|
device_limits.maxImageDimension2D);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const {
|
uint32_t VulkanRenderTargetCache::GetMaxRenderTargetHeight() const {
|
||||||
const ui::vulkan::VulkanProvider& provider =
|
const VkPhysicalDeviceLimits& device_limits =
|
||||||
command_processor_.GetVulkanProvider();
|
command_processor_.GetVulkanProvider().device_properties().limits;
|
||||||
return provider.device_properties().limits.maxFramebufferHeight;
|
return std::min(device_limits.maxFramebufferHeight,
|
||||||
|
device_limits.maxImageDimension2D);
|
||||||
}
|
}
|
||||||
|
|
||||||
RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget(
|
RenderTargetCache::RenderTarget* VulkanRenderTargetCache::CreateRenderTarget(
|
||||||
|
@ -1615,6 +1944,12 @@ bool VulkanRenderTargetCache::IsHostDepthEncodingDifferent(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanRenderTargetCache::RequestPixelShaderInterlockBarrier() {
|
||||||
|
if (edram_buffer_usage_ == EdramBufferUsage::kFragmentReadWrite) {
|
||||||
|
CommitEdramBufferShaderWrites();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void VulkanRenderTargetCache::GetEdramBufferUsageMasks(
|
void VulkanRenderTargetCache::GetEdramBufferUsageMasks(
|
||||||
EdramBufferUsage usage, VkPipelineStageFlags& stage_mask_out,
|
EdramBufferUsage usage, VkPipelineStageFlags& stage_mask_out,
|
||||||
VkAccessFlags& access_mask_out) {
|
VkAccessFlags& access_mask_out) {
|
||||||
|
@ -1715,7 +2050,7 @@ void VulkanRenderTargetCache::CommitEdramBufferShaderWrites(
|
||||||
}
|
}
|
||||||
|
|
||||||
const VulkanRenderTargetCache::Framebuffer*
|
const VulkanRenderTargetCache::Framebuffer*
|
||||||
VulkanRenderTargetCache::GetFramebuffer(
|
VulkanRenderTargetCache::GetHostRenderTargetsFramebuffer(
|
||||||
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
|
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
|
||||||
const RenderTarget* const* depth_and_color_render_targets) {
|
const RenderTarget* const* depth_and_color_render_targets) {
|
||||||
FramebufferKey key;
|
FramebufferKey key;
|
||||||
|
@ -1749,8 +2084,10 @@ VulkanRenderTargetCache::GetFramebuffer(
|
||||||
command_processor_.GetVulkanProvider();
|
command_processor_.GetVulkanProvider();
|
||||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||||
VkDevice device = provider.device();
|
VkDevice device = provider.device();
|
||||||
|
const VkPhysicalDeviceLimits& device_limits =
|
||||||
|
provider.device_properties().limits;
|
||||||
|
|
||||||
VkRenderPass render_pass = GetRenderPass(render_pass_key);
|
VkRenderPass render_pass = GetHostRenderTargetsRenderPass(render_pass_key);
|
||||||
if (render_pass == VK_NULL_HANDLE) {
|
if (render_pass == VK_NULL_HANDLE) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -1789,12 +2126,19 @@ VulkanRenderTargetCache::GetFramebuffer(
|
||||||
render_pass_key.msaa_samples);
|
render_pass_key.msaa_samples);
|
||||||
} else {
|
} else {
|
||||||
assert_zero(render_pass_key.depth_and_color_used);
|
assert_zero(render_pass_key.depth_and_color_used);
|
||||||
host_extent.width = 0;
|
// Still needed for occlusion queries.
|
||||||
host_extent.height = 0;
|
host_extent.width = xenos::kTexture2DCubeMaxWidthHeight;
|
||||||
|
host_extent.height = xenos::kTexture2DCubeMaxWidthHeight;
|
||||||
}
|
}
|
||||||
// Vulkan requires width and height greater than 0.
|
// Limiting to the device limit for the case of no attachments, for which
|
||||||
framebuffer_create_info.width = std::max(host_extent.width, uint32_t(1));
|
// there's no limit imposed by the sizes of the attachments that have been
|
||||||
framebuffer_create_info.height = std::max(host_extent.height, uint32_t(1));
|
// created successfully.
|
||||||
|
host_extent.width = std::min(host_extent.width * draw_resolution_scale_x(),
|
||||||
|
device_limits.maxFramebufferWidth);
|
||||||
|
host_extent.height = std::min(host_extent.height * draw_resolution_scale_y(),
|
||||||
|
device_limits.maxFramebufferHeight);
|
||||||
|
framebuffer_create_info.width = host_extent.width;
|
||||||
|
framebuffer_create_info.height = host_extent.height;
|
||||||
framebuffer_create_info.layers = 1;
|
framebuffer_create_info.layers = 1;
|
||||||
VkFramebuffer framebuffer;
|
VkFramebuffer framebuffer;
|
||||||
if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr,
|
if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr,
|
||||||
|
@ -4070,7 +4414,8 @@ VkPipeline const* VulkanRenderTargetCache::GetTransferPipelines(
|
||||||
: nullptr;
|
: nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkRenderPass render_pass = GetRenderPass(key.render_pass_key);
|
VkRenderPass render_pass =
|
||||||
|
GetHostRenderTargetsRenderPass(key.render_pass_key);
|
||||||
VkShaderModule fragment_shader_module = GetTransferShader(key.shader_key);
|
VkShaderModule fragment_shader_module = GetTransferShader(key.shader_key);
|
||||||
if (render_pass == VK_NULL_HANDLE ||
|
if (render_pass == VK_NULL_HANDLE ||
|
||||||
fragment_shader_module == VK_NULL_HANDLE) {
|
fragment_shader_module == VK_NULL_HANDLE) {
|
||||||
|
@ -4643,7 +4988,8 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
|
||||||
dest_rt_key.GetColorFormat();
|
dest_rt_key.GetColorFormat();
|
||||||
transfer_render_pass_key.color_rts_use_transfer_formats = 1;
|
transfer_render_pass_key.color_rts_use_transfer_formats = 1;
|
||||||
}
|
}
|
||||||
VkRenderPass transfer_render_pass = GetRenderPass(transfer_render_pass_key);
|
VkRenderPass transfer_render_pass =
|
||||||
|
GetHostRenderTargetsRenderPass(transfer_render_pass_key);
|
||||||
if (transfer_render_pass == VK_NULL_HANDLE) {
|
if (transfer_render_pass == VK_NULL_HANDLE) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -4651,7 +4997,7 @@ void VulkanRenderTargetCache::PerformTransfersAndResolveClears(
|
||||||
transfer_framebuffer_render_targets[1 + xenos::kMaxColorRenderTargets] =
|
transfer_framebuffer_render_targets[1 + xenos::kMaxColorRenderTargets] =
|
||||||
{};
|
{};
|
||||||
transfer_framebuffer_render_targets[dest_rt_key.is_depth ? 0 : 1] = dest_rt;
|
transfer_framebuffer_render_targets[dest_rt_key.is_depth ? 0 : 1] = dest_rt;
|
||||||
const Framebuffer* transfer_framebuffer = GetFramebuffer(
|
const Framebuffer* transfer_framebuffer = GetHostRenderTargetsFramebuffer(
|
||||||
transfer_render_pass_key, dest_rt_key.pitch_tiles_at_32bpp,
|
transfer_render_pass_key, dest_rt_key.pitch_tiles_at_32bpp,
|
||||||
transfer_framebuffer_render_targets);
|
transfer_framebuffer_render_targets);
|
||||||
if (!transfer_framebuffer) {
|
if (!transfer_framebuffer) {
|
||||||
|
|
|
@ -43,6 +43,10 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
// true 4x MSAA passes (framebuffers because render target cache render
|
// true 4x MSAA passes (framebuffers because render target cache render
|
||||||
// targets are different for 2x and 4x guest MSAA, pipelines because the
|
// targets are different for 2x and 4x guest MSAA, pipelines because the
|
||||||
// sample mask will have 2 samples excluded for 2x-as-4x).
|
// sample mask will have 2 samples excluded for 2x-as-4x).
|
||||||
|
// This has effect only on the attachments, but even in cases when there
|
||||||
|
// are no attachments, it can be used to the sample count between
|
||||||
|
// subsystems, for instance, to specify the desired number of samples to
|
||||||
|
// use when there are no attachments in pipelines.
|
||||||
xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2
|
xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2
|
||||||
// << 0 is depth, << 1...4 is color.
|
// << 0 is depth, << 1...4 is color.
|
||||||
uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7
|
uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7
|
||||||
|
@ -81,8 +85,9 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
static_assert_size(RenderPassKey, sizeof(uint32_t));
|
static_assert_size(RenderPassKey, sizeof(uint32_t));
|
||||||
|
|
||||||
struct Framebuffer {
|
struct Framebuffer {
|
||||||
VkFramebuffer framebuffer;
|
VkFramebuffer framebuffer = VK_NULL_HANDLE;
|
||||||
VkExtent2D host_extent;
|
VkExtent2D host_extent{};
|
||||||
|
Framebuffer() = default;
|
||||||
Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent)
|
Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent)
|
||||||
: framebuffer(framebuffer), host_extent(host_extent) {}
|
: framebuffer(framebuffer), host_extent(host_extent) {}
|
||||||
};
|
};
|
||||||
|
@ -96,15 +101,16 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
|
|
||||||
// Transient descriptor set layouts must be initialized in the command
|
// Transient descriptor set layouts must be initialized in the command
|
||||||
// processor.
|
// processor.
|
||||||
bool Initialize();
|
bool Initialize(uint32_t shared_memory_binding_count);
|
||||||
void Shutdown(bool from_destructor = false);
|
void Shutdown(bool from_destructor = false);
|
||||||
void ClearCache() override;
|
void ClearCache() override;
|
||||||
|
|
||||||
void CompletedSubmissionUpdated();
|
void CompletedSubmissionUpdated();
|
||||||
void EndSubmission();
|
void EndSubmission();
|
||||||
|
|
||||||
// TODO(Triang3l): Fragment shader interlock.
|
Path GetPath() const override { return path_; }
|
||||||
Path GetPath() const override { return Path::kHostRenderTargets; }
|
|
||||||
|
VkBuffer edram_buffer() const { return edram_buffer_; }
|
||||||
|
|
||||||
// Performs the resolve to a shared memory area according to the current
|
// Performs the resolve to a shared memory area according to the current
|
||||||
// register values, and also clears the render targets if needed. Must be in a
|
// register values, and also clears the render targets if needed. Must be in a
|
||||||
|
@ -161,7 +167,11 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
// Returns the render pass object, or VK_NULL_HANDLE if failed to create.
|
// Returns the render pass object, or VK_NULL_HANDLE if failed to create.
|
||||||
// A render pass managed by the render target cache may be ended and resumed
|
// A render pass managed by the render target cache may be ended and resumed
|
||||||
// at any time (to allow for things like copying and texture loading).
|
// at any time (to allow for things like copying and texture loading).
|
||||||
VkRenderPass GetRenderPass(RenderPassKey key);
|
VkRenderPass GetHostRenderTargetsRenderPass(RenderPassKey key);
|
||||||
|
VkRenderPass GetFragmentShaderInterlockRenderPass() const {
|
||||||
|
assert_true(GetPath() == Path::kPixelShaderInterlock);
|
||||||
|
return fsi_render_pass_;
|
||||||
|
}
|
||||||
|
|
||||||
VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const;
|
VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const;
|
||||||
VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const;
|
VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const;
|
||||||
|
@ -178,6 +188,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
bool IsHostDepthEncodingDifferent(
|
bool IsHostDepthEncodingDifferent(
|
||||||
xenos::DepthRenderTargetFormat format) const override;
|
xenos::DepthRenderTargetFormat format) const override;
|
||||||
|
|
||||||
|
void RequestPixelShaderInterlockBarrier() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum class EdramBufferUsage {
|
enum class EdramBufferUsage {
|
||||||
// There's no need for combined fragment and compute usages.
|
// There's no need for combined fragment and compute usages.
|
||||||
|
@ -251,6 +263,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
VulkanCommandProcessor& command_processor_;
|
VulkanCommandProcessor& command_processor_;
|
||||||
TraceWriter& trace_writer_;
|
TraceWriter& trace_writer_;
|
||||||
|
|
||||||
|
Path path_ = Path::kHostRenderTargets;
|
||||||
|
|
||||||
// Accessible in fragment and compute shaders.
|
// Accessible in fragment and compute shaders.
|
||||||
VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE;
|
VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE;
|
VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE;
|
||||||
|
@ -276,9 +290,18 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
std::array<VkPipeline, size_t(draw_util::ResolveCopyShaderIndex::kCount)>
|
std::array<VkPipeline, size_t(draw_util::ResolveCopyShaderIndex::kCount)>
|
||||||
resolve_copy_pipelines_{};
|
resolve_copy_pipelines_{};
|
||||||
|
|
||||||
// RenderPassKey::key -> VkRenderPass.
|
// On the fragment shader interlock path, the render pass key is used purely
|
||||||
// VK_NULL_HANDLE if failed to create.
|
// for passing parameters to pipeline setup - there's always only one render
|
||||||
std::unordered_map<uint32_t, VkRenderPass> render_passes_;
|
// pass.
|
||||||
|
RenderPassKey last_update_render_pass_key_;
|
||||||
|
VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE;
|
||||||
|
// The pitch is not used on the fragment shader interlock path.
|
||||||
|
uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0;
|
||||||
|
// The attachments are not used on the fragment shader interlock path.
|
||||||
|
const RenderTarget* const*
|
||||||
|
last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] =
|
||||||
|
{};
|
||||||
|
const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE;
|
||||||
|
|
||||||
// For host render targets.
|
// For host render targets.
|
||||||
|
|
||||||
|
@ -809,7 +832,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
|
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
|
||||||
const Framebuffer* GetFramebuffer(
|
const Framebuffer* GetHostRenderTargetsFramebuffer(
|
||||||
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
|
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
|
||||||
const RenderTarget* const* depth_and_color_render_targets);
|
const RenderTarget* const* depth_and_color_render_targets);
|
||||||
|
|
||||||
|
@ -845,17 +868,13 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
bool msaa_2x_attachments_supported_ = false;
|
bool msaa_2x_attachments_supported_ = false;
|
||||||
bool msaa_2x_no_attachments_supported_ = false;
|
bool msaa_2x_no_attachments_supported_ = false;
|
||||||
|
|
||||||
|
// VK_NULL_HANDLE if failed to create.
|
||||||
|
std::unordered_map<RenderPassKey, VkRenderPass, RenderPassKey::Hasher>
|
||||||
|
render_passes_;
|
||||||
|
|
||||||
std::unordered_map<FramebufferKey, Framebuffer, FramebufferKey::Hasher>
|
std::unordered_map<FramebufferKey, Framebuffer, FramebufferKey::Hasher>
|
||||||
framebuffers_;
|
framebuffers_;
|
||||||
|
|
||||||
RenderPassKey last_update_render_pass_key_;
|
|
||||||
VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE;
|
|
||||||
uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0;
|
|
||||||
const RenderTarget* const*
|
|
||||||
last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] =
|
|
||||||
{};
|
|
||||||
const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE;
|
|
||||||
|
|
||||||
// Set 0 - EDRAM storage buffer, set 1 - source depth sampled image (and
|
// Set 0 - EDRAM storage buffer, set 1 - source depth sampled image (and
|
||||||
// unused stencil from the transfer descriptor set), HostDepthStoreConstants
|
// unused stencil from the transfer descriptor set), HostDepthStoreConstants
|
||||||
// passed via push constants.
|
// passed via push constants.
|
||||||
|
@ -895,6 +914,15 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
||||||
// Temporary storage for DumpRenderTargets.
|
// Temporary storage for DumpRenderTargets.
|
||||||
std::vector<ResolveCopyDumpRectangle> dump_rectangles_;
|
std::vector<ResolveCopyDumpRectangle> dump_rectangles_;
|
||||||
std::vector<DumpInvocation> dump_invocations_;
|
std::vector<DumpInvocation> dump_invocations_;
|
||||||
|
|
||||||
|
// For pixel (fragment) shader interlock.
|
||||||
|
|
||||||
|
VkRenderPass fsi_render_pass_ = VK_NULL_HANDLE;
|
||||||
|
Framebuffer fsi_framebuffer_;
|
||||||
|
|
||||||
|
VkPipelineLayout resolve_fsi_clear_pipeline_layout_ = VK_NULL_HANDLE;
|
||||||
|
VkPipeline resolve_fsi_clear_32bpp_pipeline_ = VK_NULL_HANDLE;
|
||||||
|
VkPipeline resolve_fsi_clear_64bpp_pipeline_ = VK_NULL_HANDLE;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
// VK_KHR_get_physical_device_properties2 functions used in Xenia.
|
// VK_KHR_get_physical_device_properties2 functions used in Xenia.
|
||||||
// Promoted to Vulkan 1.1 core.
|
// Promoted to Vulkan 1.1 core.
|
||||||
|
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceFeatures2KHR,
|
||||||
|
vkGetPhysicalDeviceFeatures2)
|
||||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceMemoryProperties2KHR,
|
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceMemoryProperties2KHR,
|
||||||
vkGetPhysicalDeviceMemoryProperties2)
|
vkGetPhysicalDeviceMemoryProperties2)
|
||||||
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceProperties2KHR,
|
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceProperties2KHR,
|
||||||
|
|
|
@ -696,6 +696,7 @@ bool VulkanProvider::Initialize() {
|
||||||
device_extensions_.khr_shader_float_controls = true;
|
device_extensions_.khr_shader_float_controls = true;
|
||||||
device_extensions_.khr_spirv_1_4 = true;
|
device_extensions_.khr_spirv_1_4 = true;
|
||||||
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) {
|
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) {
|
||||||
|
device_extensions_.ext_shader_demote_to_helper_invocation = true;
|
||||||
device_extensions_.khr_maintenance4 = true;
|
device_extensions_.khr_maintenance4 = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -709,6 +710,8 @@ bool VulkanProvider::Initialize() {
|
||||||
{"VK_EXT_fragment_shader_interlock",
|
{"VK_EXT_fragment_shader_interlock",
|
||||||
offsetof(DeviceExtensions, ext_fragment_shader_interlock)},
|
offsetof(DeviceExtensions, ext_fragment_shader_interlock)},
|
||||||
{"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)},
|
{"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)},
|
||||||
|
{"VK_EXT_shader_demote_to_helper_invocation",
|
||||||
|
offsetof(DeviceExtensions, ext_shader_demote_to_helper_invocation)},
|
||||||
{"VK_EXT_shader_stencil_export",
|
{"VK_EXT_shader_stencil_export",
|
||||||
offsetof(DeviceExtensions, ext_shader_stencil_export)},
|
offsetof(DeviceExtensions, ext_shader_stencil_export)},
|
||||||
{"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)},
|
{"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)},
|
||||||
|
@ -816,6 +819,16 @@ bool VulkanProvider::Initialize() {
|
||||||
// Get additional device properties.
|
// Get additional device properties.
|
||||||
std::memset(&device_float_controls_properties_, 0,
|
std::memset(&device_float_controls_properties_, 0,
|
||||||
sizeof(device_float_controls_properties_));
|
sizeof(device_float_controls_properties_));
|
||||||
|
device_float_controls_properties_.sType =
|
||||||
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
|
||||||
|
std::memset(&device_fragment_shader_interlock_features_, 0,
|
||||||
|
sizeof(device_fragment_shader_interlock_features_));
|
||||||
|
device_fragment_shader_interlock_features_.sType =
|
||||||
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT;
|
||||||
|
std::memset(&device_shader_demote_to_helper_invocation_features_, 0,
|
||||||
|
sizeof(device_shader_demote_to_helper_invocation_features_));
|
||||||
|
device_shader_demote_to_helper_invocation_features_.sType =
|
||||||
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT;
|
||||||
if (instance_extensions_.khr_get_physical_device_properties2) {
|
if (instance_extensions_.khr_get_physical_device_properties2) {
|
||||||
VkPhysicalDeviceProperties2KHR device_properties_2;
|
VkPhysicalDeviceProperties2KHR device_properties_2;
|
||||||
device_properties_2.sType =
|
device_properties_2.sType =
|
||||||
|
@ -824,8 +837,6 @@ bool VulkanProvider::Initialize() {
|
||||||
VkPhysicalDeviceProperties2KHR* device_properties_2_last =
|
VkPhysicalDeviceProperties2KHR* device_properties_2_last =
|
||||||
&device_properties_2;
|
&device_properties_2;
|
||||||
if (device_extensions_.khr_shader_float_controls) {
|
if (device_extensions_.khr_shader_float_controls) {
|
||||||
device_float_controls_properties_.sType =
|
|
||||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
|
|
||||||
device_float_controls_properties_.pNext = nullptr;
|
device_float_controls_properties_.pNext = nullptr;
|
||||||
device_properties_2_last->pNext = &device_float_controls_properties_;
|
device_properties_2_last->pNext = &device_float_controls_properties_;
|
||||||
device_properties_2_last =
|
device_properties_2_last =
|
||||||
|
@ -836,6 +847,28 @@ bool VulkanProvider::Initialize() {
|
||||||
ifn_.vkGetPhysicalDeviceProperties2KHR(physical_device_,
|
ifn_.vkGetPhysicalDeviceProperties2KHR(physical_device_,
|
||||||
&device_properties_2);
|
&device_properties_2);
|
||||||
}
|
}
|
||||||
|
VkPhysicalDeviceFeatures2KHR device_features_2;
|
||||||
|
device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
|
||||||
|
device_features_2.pNext = nullptr;
|
||||||
|
VkPhysicalDeviceFeatures2KHR* device_features_2_last = &device_features_2;
|
||||||
|
if (device_extensions_.ext_fragment_shader_interlock) {
|
||||||
|
device_fragment_shader_interlock_features_.pNext = nullptr;
|
||||||
|
device_features_2_last->pNext =
|
||||||
|
&device_fragment_shader_interlock_features_;
|
||||||
|
device_features_2_last = reinterpret_cast<VkPhysicalDeviceFeatures2KHR*>(
|
||||||
|
&device_fragment_shader_interlock_features_);
|
||||||
|
}
|
||||||
|
if (device_extensions_.ext_shader_demote_to_helper_invocation) {
|
||||||
|
device_shader_demote_to_helper_invocation_features_.pNext = nullptr;
|
||||||
|
device_features_2_last->pNext =
|
||||||
|
&device_shader_demote_to_helper_invocation_features_;
|
||||||
|
device_features_2_last = reinterpret_cast<VkPhysicalDeviceFeatures2KHR*>(
|
||||||
|
&device_shader_demote_to_helper_invocation_features_);
|
||||||
|
}
|
||||||
|
if (device_features_2_last != &device_features_2) {
|
||||||
|
ifn_.vkGetPhysicalDeviceFeatures2KHR(physical_device_,
|
||||||
|
&device_features_2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the device.
|
// Create the device.
|
||||||
|
@ -888,6 +921,21 @@ bool VulkanProvider::Initialize() {
|
||||||
device_create_info_last = reinterpret_cast<VkDeviceCreateInfo*>(
|
device_create_info_last = reinterpret_cast<VkDeviceCreateInfo*>(
|
||||||
&device_portability_subset_features_);
|
&device_portability_subset_features_);
|
||||||
}
|
}
|
||||||
|
if (device_extensions_.ext_fragment_shader_interlock) {
|
||||||
|
// TODO(Triang3l): Enable only needed fragment shader interlock features.
|
||||||
|
device_fragment_shader_interlock_features_.pNext = nullptr;
|
||||||
|
device_create_info_last->pNext =
|
||||||
|
&device_fragment_shader_interlock_features_;
|
||||||
|
device_create_info_last = reinterpret_cast<VkDeviceCreateInfo*>(
|
||||||
|
&device_fragment_shader_interlock_features_);
|
||||||
|
}
|
||||||
|
if (device_extensions_.ext_shader_demote_to_helper_invocation) {
|
||||||
|
device_shader_demote_to_helper_invocation_features_.pNext = nullptr;
|
||||||
|
device_create_info_last->pNext =
|
||||||
|
&device_shader_demote_to_helper_invocation_features_;
|
||||||
|
device_create_info_last = reinterpret_cast<VkDeviceCreateInfo*>(
|
||||||
|
&device_shader_demote_to_helper_invocation_features_);
|
||||||
|
}
|
||||||
if (ifn_.vkCreateDevice(physical_device_, &device_create_info, nullptr,
|
if (ifn_.vkCreateDevice(physical_device_, &device_create_info, nullptr,
|
||||||
&device_) != VK_SUCCESS) {
|
&device_) != VK_SUCCESS) {
|
||||||
XELOGE("Failed to create a Vulkan device");
|
XELOGE("Failed to create a Vulkan device");
|
||||||
|
@ -995,8 +1043,30 @@ bool VulkanProvider::Initialize() {
|
||||||
XELOGVK("Vulkan device extensions:");
|
XELOGVK("Vulkan device extensions:");
|
||||||
XELOGVK("* VK_EXT_fragment_shader_interlock: {}",
|
XELOGVK("* VK_EXT_fragment_shader_interlock: {}",
|
||||||
device_extensions_.ext_fragment_shader_interlock ? "yes" : "no");
|
device_extensions_.ext_fragment_shader_interlock ? "yes" : "no");
|
||||||
|
if (device_extensions_.ext_fragment_shader_interlock) {
|
||||||
|
XELOGVK(
|
||||||
|
" * Sample interlock: {}",
|
||||||
|
device_fragment_shader_interlock_features_.fragmentShaderSampleInterlock
|
||||||
|
? "yes"
|
||||||
|
: "no");
|
||||||
|
XELOGVK(
|
||||||
|
" * Pixel interlock: {}",
|
||||||
|
device_fragment_shader_interlock_features_.fragmentShaderPixelInterlock
|
||||||
|
? "yes"
|
||||||
|
: "no");
|
||||||
|
}
|
||||||
XELOGVK("* VK_EXT_memory_budget: {}",
|
XELOGVK("* VK_EXT_memory_budget: {}",
|
||||||
device_extensions_.ext_memory_budget ? "yes" : "no");
|
device_extensions_.ext_memory_budget ? "yes" : "no");
|
||||||
|
XELOGVK(
|
||||||
|
"* VK_EXT_shader_demote_to_helper_invocation: {}",
|
||||||
|
device_extensions_.ext_shader_demote_to_helper_invocation ? "yes" : "no");
|
||||||
|
if (device_extensions_.ext_shader_demote_to_helper_invocation) {
|
||||||
|
XELOGVK(" * Demote to helper invocation: {}",
|
||||||
|
device_shader_demote_to_helper_invocation_features_
|
||||||
|
.shaderDemoteToHelperInvocation
|
||||||
|
? "yes"
|
||||||
|
: "no");
|
||||||
|
}
|
||||||
XELOGVK("* VK_EXT_shader_stencil_export: {}",
|
XELOGVK("* VK_EXT_shader_stencil_export: {}",
|
||||||
device_extensions_.ext_shader_stencil_export ? "yes" : "no");
|
device_extensions_.ext_shader_stencil_export ? "yes" : "no");
|
||||||
XELOGVK("* VK_KHR_bind_memory2: {}",
|
XELOGVK("* VK_KHR_bind_memory2: {}",
|
||||||
|
|
|
@ -133,6 +133,8 @@ class VulkanProvider : public GraphicsProvider {
|
||||||
struct DeviceExtensions {
|
struct DeviceExtensions {
|
||||||
bool ext_fragment_shader_interlock;
|
bool ext_fragment_shader_interlock;
|
||||||
bool ext_memory_budget;
|
bool ext_memory_budget;
|
||||||
|
// Core since 1.3.0.
|
||||||
|
bool ext_shader_demote_to_helper_invocation;
|
||||||
bool ext_shader_stencil_export;
|
bool ext_shader_stencil_export;
|
||||||
// Core since 1.1.0.
|
// Core since 1.1.0.
|
||||||
bool khr_bind_memory2;
|
bool khr_bind_memory2;
|
||||||
|
@ -198,6 +200,14 @@ class VulkanProvider : public GraphicsProvider {
|
||||||
device_float_controls_properties() const {
|
device_float_controls_properties() const {
|
||||||
return device_float_controls_properties_;
|
return device_float_controls_properties_;
|
||||||
}
|
}
|
||||||
|
const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT&
|
||||||
|
device_fragment_shader_interlock_features() const {
|
||||||
|
return device_fragment_shader_interlock_features_;
|
||||||
|
}
|
||||||
|
const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT&
|
||||||
|
device_shader_demote_to_helper_invocation_features() const {
|
||||||
|
return device_shader_demote_to_helper_invocation_features_;
|
||||||
|
}
|
||||||
|
|
||||||
struct Queue {
|
struct Queue {
|
||||||
VkQueue queue = VK_NULL_HANDLE;
|
VkQueue queue = VK_NULL_HANDLE;
|
||||||
|
@ -320,6 +330,10 @@ class VulkanProvider : public GraphicsProvider {
|
||||||
uint32_t queue_family_graphics_compute_;
|
uint32_t queue_family_graphics_compute_;
|
||||||
uint32_t queue_family_sparse_binding_;
|
uint32_t queue_family_sparse_binding_;
|
||||||
VkPhysicalDeviceFloatControlsPropertiesKHR device_float_controls_properties_;
|
VkPhysicalDeviceFloatControlsPropertiesKHR device_float_controls_properties_;
|
||||||
|
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT
|
||||||
|
device_fragment_shader_interlock_features_;
|
||||||
|
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT
|
||||||
|
device_shader_demote_to_helper_invocation_features_;
|
||||||
|
|
||||||
VkDevice device_ = VK_NULL_HANDLE;
|
VkDevice device_ = VK_NULL_HANDLE;
|
||||||
DeviceFunctions dfn_ = {};
|
DeviceFunctions dfn_ = {};
|
||||||
|
|
|
@ -191,9 +191,10 @@
|
||||||
this.translationComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
this.translationComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
|
||||||
this.translationComboBox.FormattingEnabled = true;
|
this.translationComboBox.FormattingEnabled = true;
|
||||||
this.translationComboBox.Items.AddRange(new object[] {
|
this.translationComboBox.Items.AddRange(new object[] {
|
||||||
"DXBC (RTV/DSV RB)",
|
"DXBC (render target RB)",
|
||||||
"DXBC (ROV RB)",
|
"DXBC (rasterizer-ordered view RB)",
|
||||||
"SPIR-V"});
|
"SPIR-V (framebuffer RB)",
|
||||||
|
"SPIR-V (fragment shader interlock RB)"});
|
||||||
this.translationComboBox.Location = new System.Drawing.Point(1224, 0);
|
this.translationComboBox.Location = new System.Drawing.Point(1224, 0);
|
||||||
this.translationComboBox.Margin = new System.Windows.Forms.Padding(3, 0, 3, 0);
|
this.translationComboBox.Margin = new System.Windows.Forms.Padding(3, 0, 3, 0);
|
||||||
this.translationComboBox.Name = "translationComboBox";
|
this.translationComboBox.Name = "translationComboBox";
|
||||||
|
|
|
@ -235,6 +235,7 @@ namespace shader_playground {
|
||||||
outputType = "dxbctext";
|
outputType = "dxbctext";
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
|
case 3:
|
||||||
outputType = "spirvtext";
|
outputType = "spirvtext";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -269,8 +270,9 @@ namespace shader_playground {
|
||||||
"--vertex_shader_output_type=" + vertexShaderType,
|
"--vertex_shader_output_type=" + vertexShaderType,
|
||||||
"--dxbc_source_map=true",
|
"--dxbc_source_map=true",
|
||||||
};
|
};
|
||||||
if (translationComboBox.SelectedIndex == 1) {
|
if (translationComboBox.SelectedIndex == 1 ||
|
||||||
startArguments.Add("--shader_output_dxbc_rov=true");
|
translationComboBox.SelectedIndex == 3) {
|
||||||
|
startArguments.Add("--shader_output_pixel_shader_interlock=true");
|
||||||
}
|
}
|
||||||
|
|
||||||
startInfo = new ProcessStartInfo(compilerPath_);
|
startInfo = new ProcessStartInfo(compilerPath_);
|
||||||
|
|
Loading…
Reference in New Issue