From 6688b13773e0515a8a4d81c9ed51001f211c51df Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 26 Jun 2022 15:01:27 +0300 Subject: [PATCH] [Vulkan] PsParamGen --- src/xenia/gpu/spirv_shader_translator.cc | 166 +++++++++++++++++- src/xenia/gpu/spirv_shader_translator.h | 17 ++ .../gpu/vulkan/vulkan_command_processor.cc | 16 +- .../gpu/vulkan/vulkan_command_processor.h | 3 +- src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc | 16 +- 5 files changed, 212 insertions(+), 6 deletions(-) diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 146af6823..a6830c20f 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -106,6 +106,9 @@ void SpirvShaderTranslator::Reset() { uniform_float_constants_ = spv::NoResult; + input_fragment_coord_ = spv::NoResult; + input_front_facing_ = spv::NoResult; + sampler_bindings_.clear(); texture_bindings_.clear(); @@ -1011,6 +1014,17 @@ spv::Id SpirvShaderTranslator::SpirvSmearScalarResultOrConstant( is_spec_constant); } +uint32_t SpirvShaderTranslator::GetPsParamGenInterpolator() const { + assert_true(is_pixel_shader()); + Modification modification = GetSpirvShaderModification(); + // param_gen_interpolator is already 4 bits, no need for an interpolator count + // safety check. + return (modification.pixel.param_gen_enable && + modification.pixel.param_gen_interpolator < register_count()) + ? modification.pixel.param_gen_interpolator + : UINT32_MAX; +} + void SpirvShaderTranslator::EnsureBuildPointAvailable() { if (!builder_->getBuildPoint()->isTerminated()) { return; @@ -1261,6 +1275,31 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { main_interface_.push_back(interpolator); } + bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX; + + // Fragment coordinates. + // TODO(Triang3l): More conditions - fragment shader interlock render backend, + // alpha to coverage (if RT 0 is written, and there's no early depth / + // stencil), depth writing in the fragment shader (per-sample if supported). + if (param_gen_needed) { + input_fragment_coord_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord"); + builder_->addDecoration(input_fragment_coord_, spv::DecorationBuiltIn, + spv::BuiltInFragCoord); + main_interface_.push_back(input_fragment_coord_); + } + + // Is front facing. + // TODO(Triang3l): Needed for stencil in the fragment shader interlock render + // backend. + if (param_gen_needed && !GetSpirvShaderModification().pixel.param_gen_point) { + input_front_facing_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassInput, type_bool_, "gl_FrontFacing"); + builder_->addDecoration(input_front_facing_, spv::DecorationBuiltIn, + spv::BuiltInFrontFacing); + main_interface_.push_back(input_front_facing_); + } + // Framebuffer attachment outputs. std::fill(output_fragment_data_.begin(), output_fragment_data_.end(), spv::NoResult); @@ -1288,12 +1327,16 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { } void SpirvShaderTranslator::StartFragmentShaderInMain() { + uint32_t param_gen_interpolator = GetPsParamGenInterpolator(); + // Copy the interpolators to general-purpose registers. // TODO(Triang3l): Centroid. - // TODO(Triang3l): ps_param_gen. uint32_t interpolator_count = std::min(xenos::kMaxInterpolators, register_count()); for (uint32_t i = 0; i < interpolator_count; ++i) { + if (i == param_gen_interpolator) { + continue; + } id_vector_temp_.clear(); // Register array element. id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); @@ -1303,6 +1346,127 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { var_main_registers_, id_vector_temp_)); } + // Pixel parameters. + if (param_gen_interpolator != UINT32_MAX) { + Modification modification = GetSpirvShaderModification(); + // Rounding the position down, and taking the absolute value, so in case the + // host GPU for some reason has quads used for derivative calculation at odd + // locations, the left and top edges will have correct derivative magnitude + // and LODs. + // Assuming that if PsParamGen is needed at all, param_gen_point is always + // set for point primitives, and is always disabled for other primitive + // types. + // OpFNegate requires sign bit flipping even for 0.0 (in this case, the + // first column or row of pixels) only since SPIR-V 1.5 revision 2 (not the + // base 1.5). + // TODO(Triang3l): When SPIR-V 1.6 is used in Xenia, see if OpFNegate can be + // used there, should be cheaper because it may be implemented as a hardware + // instruction modifier, though it respects the rule for subnormal numbers - + // see the actual hardware instructions in both OpBitwiseXor and OpFNegate + // cases. + spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); + // TODO(Triang3l): Resolution scale inversion. + // X - pixel X .0 in the magnitude, is back-facing in the sign bit. + assert_true(input_fragment_coord_ != spv::NoResult); + id_vector_temp_.clear(); + id_vector_temp_.push_back(const_int_0_); + spv::Id param_gen_x = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coord_, id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_x); + param_gen_x = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_x); + param_gen_x = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + if (!modification.pixel.param_gen_point) { + assert_true(input_front_facing_ != spv::NoResult); + param_gen_x = builder_->createTriOp( + spv::OpSelect, type_float_, + builder_->createBinOp( + spv::OpLogicalOr, type_bool_, + builder_->createBinOp( + spv::OpIEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_PrimitivePolygonal)), + const_uint_0_), + builder_->createLoad(input_front_facing_, spv::NoPrecision)), + param_gen_x, + builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createBinOp( + spv::OpBitwiseXor, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, + param_gen_x), + const_sign_bit))); + } + // Y - pixel Y .0 in the magnitude, is point in the sign bit. + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(1)); + spv::Id param_gen_y = builder_->createLoad( + builder_->createAccessChain(spv::StorageClassInput, + input_fragment_coord_, id_vector_temp_), + spv::NoPrecision); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_y); + param_gen_y = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450Floor, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back(param_gen_y); + param_gen_y = builder_->createBuiltinCall( + type_float_, ext_inst_glsl_std_450_, GLSLstd450FAbs, id_vector_temp_); + if (modification.pixel.param_gen_point) { + param_gen_y = builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createBinOp( + spv::OpBitwiseXor, type_uint_, + builder_->createUnaryOp(spv::OpBitcast, type_uint_, param_gen_y), + const_sign_bit)); + } + // Z - point S in the magnitude, is line in the sign bit. + spv::Id param_gen_z; + if (modification.pixel.param_gen_point) { + // TODO(Triang3l): Point coordinates. + param_gen_z = const_float_0_; + } else { + param_gen_z = builder_->createUnaryOp( + spv::OpBitcast, type_float_, + builder_->createTriOp( + spv::OpSelect, type_uint_, + builder_->createBinOp( + spv::OpINotEqual, type_bool_, + builder_->createBinOp( + spv::OpBitwiseAnd, type_uint_, + main_system_constant_flags_, + builder_->makeUintConstant(kSysFlag_PrimitiveLine)), + const_uint_0_), + const_sign_bit, const_uint_0_)); + } + // W - point T in the magnitude. + // TODO(Triang3l): Point coordinates. + spv::Id param_gen_w = const_float_0_; + // Store the pixel parameters. + id_vector_temp_.clear(); + id_vector_temp_.reserve(4); + id_vector_temp_.push_back(param_gen_x); + id_vector_temp_.push_back(param_gen_y); + id_vector_temp_.push_back(param_gen_z); + id_vector_temp_.push_back(param_gen_w); + spv::Id param_gen = + builder_->createCompositeConstruct(type_float4_, id_vector_temp_); + id_vector_temp_.clear(); + id_vector_temp_.push_back( + builder_->makeIntConstant(int(param_gen_interpolator))); + builder_->createStore(param_gen, builder_->createAccessChain( + spv::StorageClassFunction, + var_main_registers_, id_vector_temp_)); + } + // Initialize the colors for safety. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { spv::Id output_fragment_data_rt = output_fragment_data_[i]; diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 76caab044..0a94300a0 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -46,6 +46,12 @@ class SpirvShaderTranslator : public ShaderTranslator { struct PixelShaderModification { // Dynamically indexable register count from SQ_PROGRAM_CNTL. uint32_t dynamic_addressable_register_count : 8; + uint32_t param_gen_enable : 1; + uint32_t param_gen_interpolator : 4; + // If param_gen_enable is set, this must be set for point primitives, and + // must not be set for other primitive types - enables the point sprite + // coordinates input, and also effects the flag bits in PsParamGen. + uint32_t param_gen_point : 1; } pixel; uint64_t value = 0; @@ -56,6 +62,8 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal_Shift, + kSysFlag_PrimitivePolygonal_Shift, + kSysFlag_PrimitiveLine_Shift, kSysFlag_ConvertColor0ToGamma_Shift, kSysFlag_ConvertColor1ToGamma_Shift, kSysFlag_ConvertColor2ToGamma_Shift, @@ -66,6 +74,8 @@ class SpirvShaderTranslator : public ShaderTranslator { kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, + kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift, + kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift, kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift, kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift, kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift, @@ -301,6 +311,9 @@ class SpirvShaderTranslator : public ShaderTranslator { GetSpirvShaderModification().vertex.host_vertex_shader_type); } + // Returns UINT32_MAX if PsParamGen doesn't need to be written. + uint32_t GetPsParamGenInterpolator() const; + // Must be called before emitting any SPIR-V operations that must be in a // block in translator callbacks to ensure that if the last instruction added // was something like OpBranch - in this case, an unreachable block is @@ -535,6 +548,10 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id input_vertex_index_; // VS as TES only - int. spv::Id input_primitive_id_; + // PS, only when needed - float4. + spv::Id input_fragment_coord_; + // PS, only when needed - bool. + spv::Id input_front_facing_; // In vertex or tessellation evaluation shaders - outputs, always // xenos::kMaxInterpolators. diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 4ac850ce0..29eebba8e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2336,7 +2336,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, normalized_depth_control); // Update system constants before uploading them. - UpdateSystemConstantValues(primitive_processing_result.host_index_endian, + UpdateSystemConstantValues(primitive_polygonal, + primitive_processing_result.host_index_endian, viewport_info, used_texture_mask); // Update uniform buffers and descriptor sets after binding the pipeline with @@ -3257,14 +3258,15 @@ void VulkanCommandProcessor::UpdateDynamicState( } void VulkanCommandProcessor::UpdateSystemConstantValues( - xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, - uint32_t used_texture_mask) { + bool primitive_polygonal, xenos::Endian index_endian, + const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask) { #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES const RegisterFile& regs = *register_file_; auto pa_cl_vte_cntl = regs.Get(); + auto vgt_draw_initiator = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); // Get the color info register values for each render target. @@ -3295,6 +3297,14 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= SpirvShaderTranslator::kSysFlag_WNotReciprocal; } + // Whether the primitive is polygonal, and gl_FrontFacing matters. + if (primitive_polygonal) { + flags |= SpirvShaderTranslator::kSysFlag_PrimitivePolygonal; + } + // Primitive type. + if (draw_util::IsPrimitiveLine(regs)) { + flags |= SpirvShaderTranslator::kSysFlag_PrimitiveLine; + } // Gamma writing. // TODO(Triang3l): Gamma as sRGB check. for (uint32_t i = 0; i < xenos::kMaxColorRenderTargets; ++i) { diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 164e7b253..dece6e02a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -449,7 +449,8 @@ class VulkanCommandProcessor : public CommandProcessor { void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal, reg::RB_DEPTHCONTROL normalized_depth_control); - void UpdateSystemConstantValues(xenos::Endian index_endian, + void UpdateSystemConstantValues(bool primitive_polygonal, + xenos::Endian index_endian, const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask); bool UpdateBindings(const VulkanShader* vertex_shader, diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index 5e9fec78d..d3049a561 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -134,13 +134,27 @@ VulkanPipelineCache::GetCurrentPixelShaderModification( assert_true(shader.type() == xenos::ShaderType::kPixel); assert_true(shader.is_ucode_analyzed()); const auto& regs = register_file_; - auto sq_program_cntl = regs.Get(); + SpirvShaderTranslator::Modification modification( shader_translator_->GetDefaultPixelShaderModification( shader.GetDynamicAddressableRegisterCount( sq_program_cntl.ps_num_reg))); + if (sq_program_cntl.param_gen) { + auto sq_context_misc = regs.Get(); + if (sq_context_misc.param_gen_pos < + std::min(std::max(modification.pixel.dynamic_addressable_register_count, + shader.register_static_address_bound()), + xenos::kMaxInterpolators)) { + modification.pixel.param_gen_enable = 1; + modification.pixel.param_gen_interpolator = sq_context_misc.param_gen_pos; + auto vgt_draw_initiator = regs.Get(); + modification.pixel.param_gen_point = uint32_t( + vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList); + } + } + return modification; }