diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index b59b637b6..b260f78fa 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -228,6 +228,8 @@ void SpirvShaderTranslator::StartTranslation() { offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), type_uint4_array_4}, + {"color_exp_bias", offsetof(SystemConstants, color_exp_bias), + type_float4_}, }; id_vector_temp_.clear(); id_vector_temp_.reserve(xe::countof(system_constants)); @@ -403,6 +405,14 @@ void SpirvShaderTranslator::StartTranslation() { spv::NoPrecision, type_void_, "main", main_param_types, main_precisions, &function_main_entry); + // Load the flags system constant since it may be used in many places. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + main_system_constant_flags_ = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + // Begin ucode translation. Initialize everything, even without defined // defaults, for safety. var_main_predicate_ = builder_->createVariable( @@ -580,6 +590,8 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { if (is_vertex_shader()) { CompleteVertexOrTessEvalShaderInMain(); + } else if (is_pixel_shader()) { + CompleteFragmentShaderInMain(); } // End the main function. @@ -1115,13 +1127,6 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { } void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { - id_vector_temp_.clear(); - id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); - spv::Id system_constant_flags = builder_->createLoad( - builder_->createAccessChain(spv::StorageClassUniform, - uniform_system_constants_, id_vector_temp_), - spv::NoPrecision); - id_vector_temp_.clear(); id_vector_temp_.push_back( builder_->makeIntConstant(kOutputPerVertexMemberPosition)); @@ -1136,7 +1141,7 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { spv::Id is_w_not_reciprocal = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, system_constant_flags, + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_WNotReciprocal))), const_uint_0_); @@ -1160,7 +1165,7 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { spv::Id is_xy_divided_by_w = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, system_constant_flags, + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_XYDividedByW))), const_uint_0_); @@ -1180,7 +1185,7 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { spv::Id is_z_divided_by_w = builder_->createBinOp( spv::OpINotEqual, type_bool_, builder_->createBinOp( - spv::OpBitwiseAnd, type_uint_, system_constant_flags, + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, builder_->makeUintConstant( static_cast(kSysFlag_ZDividedByW))), const_uint_0_); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 075279848..aa0265afb 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -61,12 +61,20 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal_Shift, + kSysFlag_ConvertColor0ToGamma_Shift, + kSysFlag_ConvertColor1ToGamma_Shift, + kSysFlag_ConvertColor2ToGamma_Shift, + kSysFlag_ConvertColor3ToGamma_Shift, kSysFlag_Count, kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, + kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift, + kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift, + kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift, + kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift, }; static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants"); @@ -94,6 +102,8 @@ class SpirvShaderTranslator : public ShaderTranslator { // apply to the result directly in the shader code. In each uint32_t, // swizzles for 2 texture fetch constants (in bits 0:11 and 12:23). uint32_t texture_swizzles[16]; + + float color_exp_bias[4]; }; // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for @@ -308,6 +318,7 @@ class SpirvShaderTranslator : public ShaderTranslator { void StartFragmentShaderBeforeMain(); void StartFragmentShaderInMain(); + void CompleteFragmentShaderInMain(); // Updates the current flow control condition (to be called in the beginning // of exec and in jumps), closing the previous conditionals if needed. @@ -509,6 +520,7 @@ class SpirvShaderTranslator : public ShaderTranslator { kSystemConstantNdcOffset, kSystemConstantTextureSwizzledSigns, kSystemConstantTextureSwizzles, + kSystemConstantColorExpBias, }; spv::Id uniform_system_constants_; spv::Id uniform_float_constants_; @@ -545,6 +557,7 @@ class SpirvShaderTranslator : public ShaderTranslator { std::vector main_interface_; spv::Function* function_main_; + spv::Id main_system_constant_flags_; // bool. spv::Id var_main_predicate_; // uint4. diff --git a/src/xenia/gpu/spirv_shader_translator_rb.cc b/src/xenia/gpu/spirv_shader_translator_rb.cc index 8282016b5..829b3f576 100644 --- a/src/xenia/gpu/spirv_shader_translator_rb.cc +++ b/src/xenia/gpu/spirv_shader_translator_rb.cc @@ -11,9 +11,11 @@ #include #include +#include #include "third_party/glslang/SPIRV/GLSL.std.450.h" #include "xenia/base/assert.h" +#include "xenia/base/math.h" namespace xe { namespace gpu { @@ -423,5 +425,101 @@ spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, return f32; } +void SpirvShaderTranslator::CompleteFragmentShaderInMain() { + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags)); + spv::Id system_constant_flags = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision); + + uint32_t color_targets_remaining = current_shader().writes_color_targets(); + uint32_t color_target_index; + while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) { + color_targets_remaining &= ~(UINT32_C(1) << color_target_index); + spv::Id color_variable = output_fragment_data_[color_target_index]; + spv::Id color = builder_->createLoad(color_variable, spv::NoPrecision); + + // Apply the exponent bias after the alpha test and alpha to coverage + // because they need the unbiased alpha from the shader. + id_vector_temp_.clear(); + id_vector_temp_.reserve(2); + id_vector_temp_.push_back( + builder_->makeIntConstant(kSystemConstantColorExpBias)); + id_vector_temp_.push_back( + builder_->makeIntConstant(int32_t(color_target_index))); + color = builder_->createBinOp( + spv::OpVectorTimesScalar, type_float4_, color, + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassUniform, + uniform_system_constants_, id_vector_temp_), + spv::NoPrecision)); + builder_->addDecoration(color, spv::DecorationNoContraction); + + // Convert to gamma space - this is incorrect, since it must be done after + // blending on the Xbox 360, but this is just one of many blending issues in + // the host render target path. + // TODO(Triang3l): Gamma as sRGB check. + spv::Id color_rgb; + { + std::unique_ptr color_rgb_shuffle_op = + std::make_unique( + builder_->getUniqueId(), type_float3_, spv::OpVectorShuffle); + color_rgb_shuffle_op->addIdOperand(color); + color_rgb_shuffle_op->addIdOperand(color); + color_rgb_shuffle_op->addImmediateOperand(0); + color_rgb_shuffle_op->addImmediateOperand(1); + color_rgb_shuffle_op->addImmediateOperand(2); + color_rgb = color_rgb_shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(color_rgb_shuffle_op)); + } + spv::Id is_gamma = builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma + << color_target_index)), + const_uint_0_); + spv::Block& block_gamma_head = *builder_->getBuildPoint(); + spv::Block& block_gamma = builder_->makeNewBlock(); + spv::Block& block_gamma_merge = builder_->makeNewBlock(); + SpirvCreateSelectionMerge(block_gamma_merge.getId()); + builder_->createConditionalBranch(is_gamma, &block_gamma, + &block_gamma_merge); + builder_->setBuildPoint(&block_gamma); + spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false); + builder_->createBranch(&block_gamma_merge); + builder_->setBuildPoint(&block_gamma_merge); + { + std::unique_ptr gamma_phi_op = + std::make_unique(builder_->getUniqueId(), + type_float3_, spv::OpPhi); + gamma_phi_op->addIdOperand(color_rgb_gamma); + gamma_phi_op->addIdOperand(block_gamma.getId()); + gamma_phi_op->addIdOperand(color_rgb); + gamma_phi_op->addIdOperand(block_gamma_head.getId()); + color_rgb = gamma_phi_op->getResultId(); + builder_->getBuildPoint()->addInstruction(std::move(gamma_phi_op)); + } + { + std::unique_ptr color_rgba_shuffle_op = + std::make_unique( + builder_->getUniqueId(), type_float4_, spv::OpVectorShuffle); + color_rgba_shuffle_op->addIdOperand(color_rgb); + color_rgba_shuffle_op->addIdOperand(color); + color_rgba_shuffle_op->addImmediateOperand(0); + color_rgba_shuffle_op->addImmediateOperand(1); + color_rgba_shuffle_op->addImmediateOperand(2); + color_rgba_shuffle_op->addImmediateOperand(3 + 3); + color = color_rgba_shuffle_op->getResultId(); + builder_->getBuildPoint()->addInstruction( + std::move(color_rgba_shuffle_op)); + } + + builder_->createStore(color, color_variable); + } +} + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 72c8f0efa..4ac850ce0 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -3267,6 +3267,13 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( auto pa_cl_vte_cntl = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); + // Get the color info register values for each render target. + reg::RB_COLOR_INFO color_infos[xenos::kMaxColorRenderTargets]; + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + color_infos[i] = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); + } + bool dirty = false; // Flags. @@ -3288,6 +3295,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal; } + // Gamma writing. + // TODO(Triang3l): Gamma as sRGB check. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + if (color_infos[i].color_format == + xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { + flags |= SpirvShaderTranslator::kSysFlag_ConvertColor0ToGamma << i; + } + } dirty |= system_constants_.flags != flags; system_constants_.flags = flags; @@ -3356,6 +3371,29 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } } + // Color exponent bias. + for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { + reg::RB_COLOR_INFO color_info = color_infos[i]; + // Exponent bias is in bits 20:25 of RB_COLOR_INFO. + int32_t color_exp_bias = color_info.color_exp_bias; + if (render_target_cache_->GetPath() == + RenderTargetCache::Path::kHostRenderTargets && + (color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16 && + !render_target_cache_->IsFixedRG16TruncatedToMinus1To1() || + color_info.color_format == + xenos::ColorRenderTargetFormat::k_16_16_16_16 && + !render_target_cache_->IsFixedRGBA16TruncatedToMinus1To1())) { + // Remap from -32...32 to -1...1 by dividing the output values by 32, + // losing blending correctness, but getting the full range. + color_exp_bias -= 5; + } + float color_exp_bias_scale; + *reinterpret_cast(&color_exp_bias_scale) = + UINT32_C(0x3F800000) + (color_exp_bias << 23); + dirty |= system_constants_.color_exp_bias[i] != color_exp_bias_scale; + system_constants_.color_exp_bias[i] = color_exp_bias_scale; + } + if (dirty) { current_graphics_descriptor_set_values_up_to_date_ &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc index 4d8545fd0..02547eaaa 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.cc @@ -784,11 +784,10 @@ bool VulkanRenderTargetCache::Resolve(const Memory& memory, bool draw_resolution_scaled = IsDrawResolutionScaled(); draw_util::ResolveInfo resolve_info; - // TODO(Triang3l): Truncation of fixed16 (but not fixed16 as float16) range to - // -1 to 1. if (!draw_util::GetResolveInfo( register_file(), memory, trace_writer_, draw_resolution_scale_x(), - draw_resolution_scale_y(), false, false, resolve_info)) { + draw_resolution_scale_y(), IsFixedRG16TruncatedToMinus1To1(), + IsFixedRGBA16TruncatedToMinus1To1(), resolve_info)) { return false; } diff --git a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h index 2857fde1f..18113bf9f 100644 --- a/src/xenia/gpu/vulkan/vulkan_render_target_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_render_target_cache.h @@ -128,6 +128,20 @@ class VulkanRenderTargetCache final : public RenderTargetCache { return last_update_framebuffer_; } + // Using R16G16[B16A16]_SNORM, which are -1...1, not the needed -32...32. + // Persistent data doesn't depend on this, so can be overriden by per-game + // configuration. + bool IsFixedRG16TruncatedToMinus1To1() const { + // TODO(Triang3l): Not float16 condition. + return GetPath() == Path::kHostRenderTargets && + !cvars::snorm16_render_target_full_range; + } + bool IsFixedRGBA16TruncatedToMinus1To1() const { + // TODO(Triang3l): Not float16 condition. + return GetPath() == Path::kHostRenderTargets && + !cvars::snorm16_render_target_full_range; + } + bool depth_float24_round() const { return depth_float24_round_; } bool msaa_2x_attachments_supported() const {