diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index b92a61d77..b6f72ff9b 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -3160,8 +3160,6 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( const RegisterFile& regs = *register_file_; auto pa_cl_clip_cntl = regs.Get(); auto pa_cl_vte_cntl = regs.Get(); - auto pa_su_point_minmax = regs.Get(); - auto pa_su_point_size = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; auto rb_colorcontrol = regs.Get(); @@ -3365,43 +3363,47 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } // Point size. - float point_vertex_diameter_min = - float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); - float point_vertex_diameter_max = - float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); - float point_constant_diameter_x = - float(pa_su_point_size.width) * (2.0f / 16.0f); - float point_constant_diameter_y = - float(pa_su_point_size.height) * (2.0f / 16.0f); - dirty |= - system_constants_.point_vertex_diameter_min != point_vertex_diameter_min; - dirty |= - system_constants_.point_vertex_diameter_max != point_vertex_diameter_max; - dirty |= - system_constants_.point_constant_diameter[0] != point_constant_diameter_x; - dirty |= - system_constants_.point_constant_diameter[1] != point_constant_diameter_y; - system_constants_.point_vertex_diameter_min = point_vertex_diameter_min; - system_constants_.point_vertex_diameter_max = point_vertex_diameter_max; - system_constants_.point_constant_diameter[0] = point_constant_diameter_x; - system_constants_.point_constant_diameter[1] = point_constant_diameter_y; - // 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter to - // radius conversion to avoid multiplying the per-vertex diameter by an - // additional constant in the shader. - float point_screen_diameter_to_ndc_radius_x = - (/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) / - std::max(viewport_info.xy_extent[0], uint32_t(1)); - float point_screen_diameter_to_ndc_radius_y = - (/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) / - std::max(viewport_info.xy_extent[1], uint32_t(1)); - dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] != - point_screen_diameter_to_ndc_radius_x; - dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] != - point_screen_diameter_to_ndc_radius_y; - system_constants_.point_screen_diameter_to_ndc_radius[0] = - point_screen_diameter_to_ndc_radius_x; - system_constants_.point_screen_diameter_to_ndc_radius[1] = - point_screen_diameter_to_ndc_radius_y; + if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { + auto pa_su_point_minmax = regs.Get(); + auto pa_su_point_size = regs.Get(); + float point_vertex_diameter_min = + float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); + float point_vertex_diameter_max = + float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); + float point_constant_diameter_x = + float(pa_su_point_size.width) * (2.0f / 16.0f); + float point_constant_diameter_y = + float(pa_su_point_size.height) * (2.0f / 16.0f); + dirty |= system_constants_.point_vertex_diameter_min != + point_vertex_diameter_min; + dirty |= system_constants_.point_vertex_diameter_max != + point_vertex_diameter_max; + dirty |= system_constants_.point_constant_diameter[0] != + point_constant_diameter_x; + dirty |= system_constants_.point_constant_diameter[1] != + point_constant_diameter_y; + system_constants_.point_vertex_diameter_min = point_vertex_diameter_min; + system_constants_.point_vertex_diameter_max = point_vertex_diameter_max; + system_constants_.point_constant_diameter[0] = point_constant_diameter_x; + system_constants_.point_constant_diameter[1] = point_constant_diameter_y; + // 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter + // to radius conversion to avoid multiplying the per-vertex diameter by an + // additional constant in the shader. + float point_screen_diameter_to_ndc_radius_x = + (/* 0.5f * 2.0f * */ float(draw_resolution_scale_x)) / + std::max(viewport_info.xy_extent[0], uint32_t(1)); + float point_screen_diameter_to_ndc_radius_y = + (/* 0.5f * 2.0f * */ float(draw_resolution_scale_y)) / + std::max(viewport_info.xy_extent[1], uint32_t(1)); + dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] != + point_screen_diameter_to_ndc_radius_x; + dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] != + point_screen_diameter_to_ndc_radius_y; + system_constants_.point_screen_diameter_to_ndc_radius[0] = + point_screen_diameter_to_ndc_radius_x; + system_constants_.point_screen_diameter_to_ndc_radius[1] = + point_screen_diameter_to_ndc_radius_y; + } // Texture signedness / gamma. bool gamma_render_target_as_srgb = diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index f7dc2c1f3..452ac1450 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -106,16 +106,19 @@ void SpirvShaderTranslator::Reset() { uniform_float_constants_ = spv::NoResult; - input_fragment_coord_ = spv::NoResult; + input_point_coordinates_ = spv::NoResult; + input_fragment_coordinates_ = spv::NoResult; input_front_facing_ = spv::NoResult; std::fill(input_output_interpolators_.begin(), input_output_interpolators_.end(), spv::NoResult); + output_point_size_ = spv::NoResult; sampler_bindings_.clear(); texture_bindings_.clear(); main_interface_.clear(); var_main_registers_ = spv::NoResult; + var_main_point_size_edge_flag_kill_vertex_ = spv::NoResult; main_switch_op_.reset(); main_switch_next_pc_phi_operands_.clear(); @@ -230,7 +233,16 @@ void SpirvShaderTranslator::StartTranslation() { {"vertex_base_index", offsetof(SystemConstants, vertex_base_index), type_int_}, {"ndc_scale", offsetof(SystemConstants, ndc_scale), type_float3_}, + {"point_vertex_diameter_min", + offsetof(SystemConstants, point_vertex_diameter_min), type_float_}, {"ndc_offset", offsetof(SystemConstants, ndc_offset), type_float3_}, + {"point_vertex_diameter_max", + offsetof(SystemConstants, point_vertex_diameter_max), type_float_}, + {"point_constant_diameter", + offsetof(SystemConstants, point_constant_diameter), type_float2_}, + {"point_screen_diameter_to_ndc_radius", + offsetof(SystemConstants, point_screen_diameter_to_ndc_radius), + type_float2_}, {"texture_swizzled_signs", offsetof(SystemConstants, texture_swizzled_signs), type_uint4_array_2}, {"texture_swizzles", offsetof(SystemConstants, texture_swizzles), @@ -1063,9 +1075,10 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { main_interface_.push_back(input_vertex_index_); } + uint32_t output_location = 0; + // Create the interpolator outputs. { - uint32_t interpolator_location = 0; uint32_t interpolators_remaining = GetModificationInterpolatorMask(); uint32_t interpolator_index; while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) { @@ -1075,13 +1088,29 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { fmt::format("xe_out_interpolator_{}", interpolator_index).c_str()); input_output_interpolators_[interpolator_index] = interpolator; builder_->addDecoration(interpolator, spv::DecorationLocation, - int(interpolator_location)); + int(output_location)); builder_->addDecoration(interpolator, spv::DecorationInvariant); main_interface_.push_back(interpolator); - ++interpolator_location; + ++output_location; } } + Modification shader_modification = GetSpirvShaderModification(); + + // Create the point size output. Not using gl_PointSize from gl_PerVertex not + // to rely on the shaderTessellationAndGeometryPointSize feature, and also + // because the value written to gl_PointSize must be greater than zero. + if (shader_modification.vertex.output_point_size) { + output_point_size_ = + builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_float_, "xe_out_point_size"); + builder_->addDecoration(output_point_size_, spv::DecorationLocation, + int(output_location)); + builder_->addDecoration(output_point_size_, spv::DecorationInvariant); + main_interface_.push_back(output_point_size_); + ++output_location; + } + // Create the gl_PerVertex output for used system outputs. std::vector struct_per_vertex_members; struct_per_vertex_members.reserve(kOutputPerVertexMemberCount); @@ -1103,9 +1132,23 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { } void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { - var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable( - spv::NoPrecision, spv::StorageClassFunction, type_float3_, - "xe_var_point_size_edge_flag_kill_vertex"); + // The edge flag isn't used for any purpose by the translator. + if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b101) { + id_vector_temp_.clear(); + id_vector_temp_.reserve(3); + // Set the point size to a negative value to tell the point sprite expansion + // that it should use the default point size if the vertex shader does not + // override it. + id_vector_temp_.push_back(builder_->makeFloatConstant(-1.0f)); + // The edge flag is ignored. + id_vector_temp_.push_back(const_float_0_); + // Don't kill by default (zero bits 0:30). + id_vector_temp_.push_back(const_float_0_); + var_main_point_size_edge_flag_kill_vertex_ = builder_->createVariable( + spv::NoPrecision, spv::StorageClassFunction, type_float3_, + "xe_var_point_size_edge_flag_kill_vertex", + builder_->makeCompositeConstant(type_float3_, id_vector_temp_)); + } // Zero general-purpose registers to prevent crashes when the game // references them after only initializing them conditionally. @@ -1352,13 +1395,35 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { std::move(composite_construct_op)); } builder_->createStore(position, position_ptr); + + // Write the point size. + if (output_point_size_ != spv::NoResult) { + spv::Id point_size; + if (current_shader().writes_point_size_edge_flag_kill_vertex() & 0b001) { + assert_true(var_main_point_size_edge_flag_kill_vertex_ != spv::NoResult); + id_vector_temp_.clear(); + // X vector component. + id_vector_temp_.push_back(const_int_0_); + point_size = builder_->createLoad( + builder_->createAccessChain( + spv::StorageClassFunction, + var_main_point_size_edge_flag_kill_vertex_, id_vector_temp_), + spv::NoPrecision); + } else { + // Not statically overridden - write a negative value. + point_size = builder_->makeFloatConstant(-1.0f); + } + builder_->createStore(point_size, output_point_size_); + } } void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { - // Interpolator inputs. Modification shader_modification = GetSpirvShaderModification(); + + uint32_t input_location = 0; + + // Interpolator inputs. { - uint32_t interpolator_location = 0; uint32_t interpolators_remaining = GetModificationInterpolatorMask(); uint32_t interpolator_index; while (xe::bit_scan_forward(interpolators_remaining, &interpolator_index)) { @@ -1368,28 +1433,41 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { fmt::format("xe_in_interpolator_{}", interpolator_index).c_str()); input_output_interpolators_[interpolator_index] = interpolator; builder_->addDecoration(interpolator, spv::DecorationLocation, - int(interpolator_location)); + int(input_location)); if (shader_modification.pixel.interpolators_centroid & (UINT32_C(1) << interpolator_index)) { builder_->addDecoration(interpolator, spv::DecorationCentroid); } main_interface_.push_back(interpolator); - ++interpolator_location; + ++input_location; } } bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX; + // Point coordinate input. + if (shader_modification.pixel.param_gen_point) { + if (param_gen_needed) { + input_point_coordinates_ = + builder_->createVariable(spv::NoPrecision, spv::StorageClassInput, + type_float2_, "xe_in_point_coordinates"); + builder_->addDecoration(input_point_coordinates_, spv::DecorationLocation, + int(input_location)); + main_interface_.push_back(input_point_coordinates_); + } + ++input_location; + } + // Fragment coordinates. // TODO(Triang3l): More conditions - fragment shader interlock render backend, // alpha to coverage (if RT 0 is written, and there's no early depth / // stencil), depth writing in the fragment shader (per-sample if supported). if (param_gen_needed) { - input_fragment_coord_ = builder_->createVariable( + input_fragment_coordinates_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassInput, type_float4_, "gl_FragCoord"); - builder_->addDecoration(input_fragment_coord_, spv::DecorationBuiltIn, + builder_->addDecoration(input_fragment_coordinates_, spv::DecorationBuiltIn, spv::BuiltInFragCoord); - main_interface_.push_back(input_fragment_coord_); + main_interface_.push_back(input_fragment_coordinates_); } // Is front facing. @@ -1473,13 +1551,14 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31); // TODO(Triang3l): Resolution scale inversion. // X - pixel X .0 in the magnitude, is back-facing in the sign bit. - assert_true(input_fragment_coord_ != spv::NoResult); + assert_true(input_fragment_coordinates_ != spv::NoResult); id_vector_temp_.clear(); id_vector_temp_.push_back(const_int_0_); - spv::Id param_gen_x = builder_->createLoad( - builder_->createAccessChain(spv::StorageClassInput, - input_fragment_coord_, id_vector_temp_), - spv::NoPrecision); + spv::Id param_gen_x = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassInput, + input_fragment_coordinates_, id_vector_temp_), + spv::NoPrecision); id_vector_temp_.clear(); id_vector_temp_.push_back(param_gen_x); param_gen_x = builder_->createBuiltinCall( @@ -1514,10 +1593,11 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { // Y - pixel Y .0 in the magnitude, is point in the sign bit. id_vector_temp_.clear(); id_vector_temp_.push_back(builder_->makeIntConstant(1)); - spv::Id param_gen_y = builder_->createLoad( - builder_->createAccessChain(spv::StorageClassInput, - input_fragment_coord_, id_vector_temp_), - spv::NoPrecision); + spv::Id param_gen_y = + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassInput, + input_fragment_coordinates_, id_vector_temp_), + spv::NoPrecision); id_vector_temp_.clear(); id_vector_temp_.push_back(param_gen_y); param_gen_y = builder_->createBuiltinCall( @@ -1535,10 +1615,16 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { const_sign_bit)); } // Z - point S in the magnitude, is line in the sign bit. - spv::Id param_gen_z; + // W - point T in the magnitude. + spv::Id param_gen_z, param_gen_w; if (modification.pixel.param_gen_point) { - // TODO(Triang3l): Point coordinates. - param_gen_z = const_float_0_; + assert_true(input_point_coordinates_ != spv::NoResult); + spv::Id param_gen_point_coordinates = + builder_->createLoad(input_point_coordinates_, spv::NoPrecision); + param_gen_z = builder_->createCompositeExtract( + param_gen_point_coordinates, type_float_, 0); + param_gen_w = builder_->createCompositeExtract( + param_gen_point_coordinates, type_float_, 1); } else { param_gen_z = builder_->createUnaryOp( spv::OpBitcast, type_float_, @@ -1552,10 +1638,8 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { builder_->makeUintConstant(kSysFlag_PrimitiveLine)), const_uint_0_), const_sign_bit, const_uint_0_)); + param_gen_w = const_float_0_; } - // W - point T in the magnitude. - // TODO(Triang3l): Point coordinates. - spv::Id param_gen_w = const_float_0_; // Store the pixel parameters. id_vector_temp_.clear(); id_vector_temp_.reserve(4); @@ -1927,15 +2011,20 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, target_pointer = input_output_interpolators_[result.storage_index]; // Unused interpolators are spv::NoResult in input_output_interpolators_. } break; - case InstructionStorageTarget::kPosition: + case InstructionStorageTarget::kPosition: { assert_true(is_vertex_shader()); id_vector_temp_util_.clear(); id_vector_temp_util_.push_back( builder_->makeIntConstant(kOutputPerVertexMemberPosition)); target_pointer = builder_->createAccessChain( spv::StorageClassOutput, output_per_vertex_, id_vector_temp_util_); - break; - case InstructionStorageTarget::kColor: + } break; + case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: { + assert_true(is_vertex_shader()); + assert_zero(used_write_mask & 0b1000); + target_pointer = var_main_point_size_edge_flag_kill_vertex_; + } break; + case InstructionStorageTarget::kColor: { assert_true(is_pixel_shader()); assert_not_zero(used_write_mask); assert_true(current_shader().writes_color_target(result.storage_index)); @@ -1944,7 +2033,7 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, // an empty write mask without independent blending. // TODO(Triang3l): Store the alpha of the first output in this case for // alpha test and alpha to coverage. - break; + } break; default: // TODO(Triang3l): All storage targets. break; @@ -2179,6 +2268,57 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, } } } + + if (result.storage_target == + InstructionStorageTarget::kPointSizeEdgeFlagKillVertex && + used_write_mask & 0b001) { + // Make the point size non-negative as negative is used to indicate that the + // default size must be used, and also clamp it to the bounds the way the + // R400 (Adreno 200, to be more precise) hardware clamps it (functionally + // like a signed 32-bit integer, -NaN and -Infinity...-0 to the minimum, + // +NaN to the maximum). + spv::Id point_size = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createCompositeExtract(value_to_store, type_float_, 0)); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back( + builder_->makeIntConstant(kSystemConstantPointVertexDiameterMin)); + spv::Id point_vertex_diameter_min = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, + id_vector_temp_util_), + spv::NoPrecision)); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + id_vector_temp_util_.push_back(point_vertex_diameter_min); + id_vector_temp_util_.push_back(point_size); + point_size = + builder_->createBuiltinCall(type_int_, ext_inst_glsl_std_450_, + GLSLstd450SMax, id_vector_temp_util_); + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back( + builder_->makeIntConstant(kSystemConstantPointVertexDiameterMax)); + spv::Id point_vertex_diameter_max = builder_->createUnaryOp( + spv::OpBitcast, type_int_, + builder_->createLoad( + builder_->createAccessChain(spv::StorageClassUniform, + uniform_system_constants_, + id_vector_temp_util_), + spv::NoPrecision)); + id_vector_temp_util_.clear(); + id_vector_temp_util_.reserve(2); + id_vector_temp_util_.push_back(point_vertex_diameter_max); + id_vector_temp_util_.push_back(point_size); + point_size = + builder_->createBuiltinCall(type_int_, ext_inst_glsl_std_450_, + GLSLstd450SMin, id_vector_temp_util_); + value_to_store = builder_->createCompositeInsert( + builder_->createUnaryOp(spv::OpBitcast, type_float_, point_size), + value_to_store, type_float3_, 0); + } + builder_->createStore(value_to_store, target_pointer); } diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index 69d05d95c..733bbf2ff 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -34,7 +34,7 @@ class SpirvShaderTranslator : public ShaderTranslator { // TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid // prototyping stage (easier to do small granular updates with an // incremental counter). - static constexpr uint32_t kVersion = 5; + static constexpr uint32_t kVersion = 6; enum class DepthStencilMode : uint32_t { kNoModifiers, @@ -50,6 +50,7 @@ class SpirvShaderTranslator : public ShaderTranslator { // Interpolators written by the vertex shader and needed by the pixel // shader. uint32_t interpolator_mask : xenos::kMaxInterpolators; + uint32_t output_point_size : 1; // Dynamically indexable register count from SQ_PROGRAM_CNTL. uint32_t dynamic_addressable_register_count : 8; // Pipeline stage and input configuration. @@ -145,10 +146,15 @@ class SpirvShaderTranslator : public ShaderTranslator { int32_t vertex_base_index; float ndc_scale[3]; - uint32_t padding_ndc_scale; + float point_vertex_diameter_min; float ndc_offset[3]; - uint32_t padding_ndc_offset; + float point_vertex_diameter_max; + + float point_constant_diameter[2]; + // Diameter in guest screen coordinates > radius (0.5 * diameter) in the NDC + // for the host viewport. + float point_screen_diameter_to_ndc_radius[2]; // Each byte contains post-swizzle TextureSign values for each of the needed // components of each of the 32 used texture fetch constants. @@ -603,7 +609,11 @@ class SpirvShaderTranslator : public ShaderTranslator { kSystemConstantVertexIndexEndian, kSystemConstantVertexBaseIndex, kSystemConstantNdcScale, + kSystemConstantPointVertexDiameterMin, kSystemConstantNdcOffset, + kSystemConstantPointVertexDiameterMax, + kSystemConstantPointConstantDiameter, + kSystemConstantPointScreenDiameterToNdcRadius, kSystemConstantTextureSwizzledSigns, kSystemConstantTextureSwizzles, kSystemConstantAlphaTestReference, @@ -627,8 +637,10 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id input_vertex_index_; // VS as TES only - int. spv::Id input_primitive_id_; + // PS, only when needed - float2. + spv::Id input_point_coordinates_; // PS, only when needed - float4. - spv::Id input_fragment_coord_; + spv::Id input_fragment_coordinates_; // PS, only when needed - bool. spv::Id input_front_facing_; @@ -643,6 +655,9 @@ class SpirvShaderTranslator : public ShaderTranslator { // all). std::array input_output_interpolators_; + // VS, only when needed - float. + spv::Id output_point_size_; + enum OutputPerVertexMember : unsigned int { kOutputPerVertexMemberPosition, kOutputPerVertexMemberCount, diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 3c4422561..80affe639 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -3482,6 +3482,49 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( system_constants_.ndc_offset[i] = viewport_info.ndc_offset[i]; } + // Point size. + if (vgt_draw_initiator.prim_type == xenos::PrimitiveType::kPointList) { + auto pa_su_point_minmax = regs.Get(); + auto pa_su_point_size = regs.Get(); + float point_vertex_diameter_min = + float(pa_su_point_minmax.min_size) * (2.0f / 16.0f); + float point_vertex_diameter_max = + float(pa_su_point_minmax.max_size) * (2.0f / 16.0f); + float point_constant_diameter_x = + float(pa_su_point_size.width) * (2.0f / 16.0f); + float point_constant_diameter_y = + float(pa_su_point_size.height) * (2.0f / 16.0f); + dirty |= system_constants_.point_vertex_diameter_min != + point_vertex_diameter_min; + dirty |= system_constants_.point_vertex_diameter_max != + point_vertex_diameter_max; + dirty |= system_constants_.point_constant_diameter[0] != + point_constant_diameter_x; + dirty |= system_constants_.point_constant_diameter[1] != + point_constant_diameter_y; + system_constants_.point_vertex_diameter_min = point_vertex_diameter_min; + system_constants_.point_vertex_diameter_max = point_vertex_diameter_max; + system_constants_.point_constant_diameter[0] = point_constant_diameter_x; + system_constants_.point_constant_diameter[1] = point_constant_diameter_y; + // 2 because 1 in the NDC is half of the viewport's axis, 0.5 for diameter + // to radius conversion to avoid multiplying the per-vertex diameter by an + // additional constant in the shader. + float point_screen_diameter_to_ndc_radius_x = + (/* 0.5f * 2.0f * */ float(texture_cache_->draw_resolution_scale_x())) / + std::max(viewport_info.xy_extent[0], uint32_t(1)); + float point_screen_diameter_to_ndc_radius_y = + (/* 0.5f * 2.0f * */ float(texture_cache_->draw_resolution_scale_y())) / + std::max(viewport_info.xy_extent[1], uint32_t(1)); + dirty |= system_constants_.point_screen_diameter_to_ndc_radius[0] != + point_screen_diameter_to_ndc_radius_x; + dirty |= system_constants_.point_screen_diameter_to_ndc_radius[1] != + point_screen_diameter_to_ndc_radius_y; + system_constants_.point_screen_diameter_to_ndc_radius[0] = + point_screen_diameter_to_ndc_radius_x; + system_constants_.point_screen_diameter_to_ndc_radius[1] = + point_screen_diameter_to_ndc_radius_y; + } + // Texture signedness / gamma. { uint32_t textures_remaining = used_texture_mask; diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc index df7156b08..7cf30e250 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc @@ -133,6 +133,11 @@ VulkanPipelineCache::GetCurrentVertexShaderModification( modification.vertex.interpolator_mask = interpolator_mask; + modification.vertex.output_point_size = + uint32_t((shader.writes_point_size_edge_flag_kill_vertex() & 0b001) && + regs.Get().prim_type == + xenos::PrimitiveType::kPointList); + return modification; } @@ -284,6 +289,8 @@ bool VulkanPipelineCache::ConfigurePipeline( if (GetGeometryShaderKey( description.geometry_shader, SpirvShaderTranslator::Modification(vertex_shader->modification()), + SpirvShaderTranslator::Modification( + pixel_shader ? pixel_shader->modification() : 0), geometry_shader_key)) { geometry_shader = GetGeometryShader(geometry_shader_key); if (geometry_shader == VK_NULL_HANDLE) { @@ -496,6 +503,7 @@ bool VulkanPipelineCache::GetCurrentStateDescription( PipelinePrimitiveTopology primitive_topology; switch (primitive_processing_result.host_primitive_type) { case xenos::PrimitiveType::kPointList: + geometry_shader = PipelineGeometryShader::kPointList; primitive_topology = PipelinePrimitiveTopology::kPointList; break; case xenos::PrimitiveType::kLineList: @@ -815,6 +823,7 @@ bool VulkanPipelineCache::ArePipelineRequirementsMet( bool VulkanPipelineCache::GetGeometryShaderKey( PipelineGeometryShader geometry_shader_type, SpirvShaderTranslator::Modification vertex_shader_modification, + SpirvShaderTranslator::Modification pixel_shader_modification, GeometryShaderKey& key_out) { if (geometry_shader_type == PipelineGeometryShader::kNone) { return false; @@ -831,10 +840,8 @@ bool VulkanPipelineCache::GetGeometryShaderKey( /* vertex_shader_modification.vertex.user_clip_plane_cull */ 0; key.has_vertex_kill_and = /* vertex_shader_modification.vertex.vertex_kill_and */ 0; - key.has_point_size = - /* vertex_shader_modification.vertex.output_point_size */ 0; - key.has_point_coordinates = - /* pixel_shader_modification.pixel.param_gen_point */ 0; + key.has_point_size = vertex_shader_modification.vertex.output_point_size; + key.has_point_coordinates = pixel_shader_modification.pixel.param_gen_point; key_out = key; return true; } @@ -853,6 +860,13 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { spv::ExecutionMode output_primitive_execution_mode = spv::ExecutionMode(0); uint32_t output_max_vertices = 0; switch (key.type) { + case PipelineGeometryShader::kPointList: + // Point to a strip of 2 triangles. + input_primitive_execution_mode = spv::ExecutionModeInputPoints; + input_primitive_vertex_count = 1; + output_primitive_execution_mode = spv::ExecutionModeOutputTriangleStrip; + output_max_vertices = 4; + break; case PipelineGeometryShader::kRectangleList: // Triangle to a strip of 2 triangles. input_primitive_execution_mode = spv::ExecutionModeTriangles; @@ -901,6 +915,7 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { spv::Id type_bool4 = builder.makeVectorType(type_bool, 4); spv::Id type_int = builder.makeIntType(32); spv::Id type_float = builder.makeFloatType(32); + spv::Id type_float2 = builder.makeVectorType(type_float, 2); spv::Id type_float4 = builder.makeVectorType(type_float, 4); spv::Id type_clip_distances = clip_distance_count @@ -912,9 +927,54 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { ? builder.makeArrayType( type_float, builder.makeUintConstant(cull_distance_count), 0) : spv::NoType; - spv::Id type_point_coordinates = key.has_point_coordinates - ? builder.makeVectorType(type_float, 2) - : spv::NoType; + + // System constants. + // For points: + // - float2 point_constant_diameter + // - float2 point_screen_diameter_to_ndc_radius + enum PointConstant : uint32_t { + kPointConstantConstantDiameter, + kPointConstantScreenDiameterToNdcRadius, + kPointConstantCount, + }; + spv::Id type_system_constants = spv::NoType; + if (key.type == PipelineGeometryShader::kPointList) { + id_vector_temp.clear(); + id_vector_temp.resize(kPointConstantCount); + id_vector_temp[kPointConstantConstantDiameter] = type_float2; + id_vector_temp[kPointConstantScreenDiameterToNdcRadius] = type_float2; + type_system_constants = + builder.makeStructType(id_vector_temp, "XeSystemConstants"); + builder.addMemberName(type_system_constants, kPointConstantConstantDiameter, + "point_constant_diameter"); + builder.addMemberDecoration( + type_system_constants, kPointConstantConstantDiameter, + spv::DecorationOffset, + int(offsetof(SpirvShaderTranslator::SystemConstants, + point_constant_diameter))); + builder.addMemberName(type_system_constants, + kPointConstantScreenDiameterToNdcRadius, + "point_screen_diameter_to_ndc_radius"); + builder.addMemberDecoration( + type_system_constants, kPointConstantScreenDiameterToNdcRadius, + spv::DecorationOffset, + int(offsetof(SpirvShaderTranslator::SystemConstants, + point_screen_diameter_to_ndc_radius))); + } + spv::Id uniform_system_constants = spv::NoResult; + if (type_system_constants != spv::NoType) { + builder.addDecoration(type_system_constants, spv::DecorationBlock); + uniform_system_constants = builder.createVariable( + spv::NoPrecision, spv::StorageClassUniform, type_system_constants, + "xe_uniform_system_constants"); + builder.addDecoration(uniform_system_constants, + spv::DecorationDescriptorSet, + int(SpirvShaderTranslator::kDescriptorSetConstants)); + builder.addDecoration(uniform_system_constants, spv::DecorationBinding, + int(SpirvShaderTranslator::kConstantBufferSystem)); + // Generating SPIR-V 1.0, no need to add bindings to the entry point's + // interface until SPIR-V 1.4. + } // Inputs and outputs - matching glslang order, in gl_PerVertex gl_in[], // user-defined outputs, user-defined inputs, out gl_PerVertex. @@ -977,6 +1037,8 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { type_array_in_gl_per_vertex, "gl_in"); main_interface.push_back(in_gl_per_vertex); + uint32_t output_location = 0; + // Interpolators outputs. std::array out_interpolators; for (uint32_t i = 0; i < key.interpolator_count; ++i) { @@ -984,23 +1046,28 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { spv::NoPrecision, spv::StorageClassOutput, type_float4, fmt::format("xe_out_interpolator_{}", i).c_str()); out_interpolators[i] = out_interpolator; - builder.addDecoration(out_interpolator, spv::DecorationLocation, i); + builder.addDecoration(out_interpolator, spv::DecorationLocation, + int(output_location)); builder.addDecoration(out_interpolator, spv::DecorationInvariant); main_interface.push_back(out_interpolator); + ++output_location; } // Point coordinate output. spv::Id out_point_coordinates = spv::NoResult; if (key.has_point_coordinates) { - out_point_coordinates = builder.createVariable( - spv::NoPrecision, spv::StorageClassOutput, type_point_coordinates, - "xe_out_point_coordinates"); + out_point_coordinates = + builder.createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_float2, "xe_out_point_coordinates"); builder.addDecoration(out_point_coordinates, spv::DecorationLocation, - key.interpolator_count); + int(output_location)); builder.addDecoration(out_point_coordinates, spv::DecorationInvariant); main_interface.push_back(out_point_coordinates); + ++output_location; } + uint32_t input_location = 0; + // Interpolator inputs. std::array in_interpolators; for (uint32_t i = 0; i < key.interpolator_count; ++i) { @@ -1010,8 +1077,10 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { 0), fmt::format("xe_in_interpolator_{}", i).c_str()); in_interpolators[i] = in_interpolator; - builder.addDecoration(in_interpolator, spv::DecorationLocation, i); + builder.addDecoration(in_interpolator, spv::DecorationLocation, + int(input_location)); main_interface.push_back(in_interpolator); + ++input_location; } // Point size input. @@ -1023,8 +1092,9 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { 0), "xe_in_point_size"); builder.addDecoration(in_point_size, spv::DecorationLocation, - key.interpolator_count); + int(input_location)); main_interface.push_back(in_point_size); + ++input_location; } // out gl_PerVertex. @@ -1198,6 +1268,231 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { } switch (key.type) { + case PipelineGeometryShader::kPointList: { + // Expand the point sprite, with left-to-right, top-to-bottom UVs. + + spv::Id const_int_0 = builder.makeIntConstant(0); + spv::Id const_int_1 = builder.makeIntConstant(1); + spv::Id const_float_0 = builder.makeFloatConstant(0.0f); + + // Load the point diameter in guest pixels. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back( + builder.makeIntConstant(int32_t(kPointConstantConstantDiameter))); + id_vector_temp.push_back(const_int_0); + spv::Id point_guest_diameter_x = builder.createLoad( + builder.createAccessChain(spv::StorageClassUniform, + uniform_system_constants, id_vector_temp), + spv::NoPrecision); + id_vector_temp.back() = const_int_1; + spv::Id point_guest_diameter_y = builder.createLoad( + builder.createAccessChain(spv::StorageClassUniform, + uniform_system_constants, id_vector_temp), + spv::NoPrecision); + if (key.has_point_size) { + // The vertex shader's header writes -1.0 to point_size by default, so + // any non-negative value means that it was overwritten by the + // translated vertex shader, and needs to be used instead of the + // constant size. The per-vertex diameter is already clamped in the + // vertex shader (combined with making it non-negative). + id_vector_temp.clear(); + // 0 is the input primitive vertex index. + id_vector_temp.push_back(const_int_0); + spv::Id point_vertex_diameter = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_point_size, + id_vector_temp), + spv::NoPrecision); + spv::Id point_vertex_diameter_written = + builder.createBinOp(spv::OpFOrdGreaterThanEqual, type_bool, + point_vertex_diameter, const_float_0); + point_guest_diameter_x = builder.createTriOp( + spv::OpSelect, type_float, point_vertex_diameter_written, + point_vertex_diameter, point_guest_diameter_x); + point_guest_diameter_y = builder.createTriOp( + spv::OpSelect, type_float, point_vertex_diameter_written, + point_vertex_diameter, point_guest_diameter_y); + } + + // 4D5307F1 has zero-size snowflakes, drop them quicker, and also drop + // points with a constant size of zero since point lists may also be used + // as just "compute" with memexport. + spv::Id point_size_not_zero = builder.createBinOp( + spv::OpLogicalAnd, type_bool, + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + point_guest_diameter_x, const_float_0), + builder.createBinOp(spv::OpFOrdGreaterThan, type_bool, + point_guest_diameter_y, const_float_0)); + spv::Block& point_size_zero_predecessor = *builder.getBuildPoint(); + spv::Block& point_size_zero_then_block = builder.makeNewBlock(); + spv::Block& point_size_zero_merge_block = builder.makeNewBlock(); + { + std::unique_ptr selection_merge_op( + std::make_unique(spv::OpSelectionMerge)); + selection_merge_op->addIdOperand(point_size_zero_merge_block.getId()); + selection_merge_op->addImmediateOperand( + spv::SelectionControlDontFlattenMask); + point_size_zero_predecessor.addInstruction( + std::move(selection_merge_op)); + } + { + std::unique_ptr branch_conditional_op( + std::make_unique(spv::OpBranchConditional)); + branch_conditional_op->addIdOperand(point_size_not_zero); + branch_conditional_op->addIdOperand( + point_size_zero_merge_block.getId()); + branch_conditional_op->addIdOperand(point_size_zero_then_block.getId()); + branch_conditional_op->addImmediateOperand(2); + branch_conditional_op->addImmediateOperand(1); + point_size_zero_predecessor.addInstruction( + std::move(branch_conditional_op)); + } + point_size_zero_then_block.addPredecessor(&point_size_zero_predecessor); + point_size_zero_merge_block.addPredecessor(&point_size_zero_predecessor); + builder.setBuildPoint(&point_size_zero_then_block); + builder.createNoResultOp(spv::OpReturn); + builder.setBuildPoint(&point_size_zero_merge_block); + + // Transform the diameter in the guest screen coordinates to radius in the + // normalized device coordinates, and then to the clip space by + // multiplying by W. + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back(builder.makeIntConstant( + int32_t(kPointConstantScreenDiameterToNdcRadius))); + id_vector_temp.push_back(const_int_0); + spv::Id point_radius_x = builder.createBinOp( + spv::OpFMul, type_float, point_guest_diameter_x, + builder.createLoad(builder.createAccessChain(spv::StorageClassUniform, + uniform_system_constants, + id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(point_radius_x, spv::DecorationNoContraction); + id_vector_temp.back() = const_int_1; + spv::Id point_radius_y = builder.createBinOp( + spv::OpFMul, type_float, point_guest_diameter_y, + builder.createLoad(builder.createAccessChain(spv::StorageClassUniform, + uniform_system_constants, + id_vector_temp), + spv::NoPrecision)); + builder.addDecoration(point_radius_y, spv::DecorationNoContraction); + id_vector_temp.clear(); + id_vector_temp.reserve(2); + // 0 is the input primitive vertex index. + id_vector_temp.push_back(const_int_0); + id_vector_temp.push_back(const_member_in_gl_per_vertex_position); + spv::Id point_position = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + spv::Id point_w = + builder.createCompositeExtract(point_position, type_float, 3); + point_radius_x = + builder.createBinOp(spv::OpFMul, type_float, point_radius_x, point_w); + builder.addDecoration(point_radius_x, spv::DecorationNoContraction); + point_radius_y = + builder.createBinOp(spv::OpFMul, type_float, point_radius_y, point_w); + builder.addDecoration(point_radius_y, spv::DecorationNoContraction); + + // Load the inputs for the guest point. + // Interpolators. + std::array point_interpolators; + id_vector_temp.clear(); + // 0 is the input primitive vertex index. + id_vector_temp.push_back(const_int_0); + for (uint32_t i = 0; i < key.interpolator_count; ++i) { + point_interpolators[i] = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, + in_interpolators[i], id_vector_temp), + spv::NoPrecision); + } + // Positions. + spv::Id point_x = + builder.createCompositeExtract(point_position, type_float, 0); + spv::Id point_y = + builder.createCompositeExtract(point_position, type_float, 1); + std::array point_edge_x, point_edge_y; + for (uint32_t i = 0; i < 2; ++i) { + spv::Op point_radius_add_op = i ? spv::OpFAdd : spv::OpFSub; + point_edge_x[i] = builder.createBinOp(point_radius_add_op, type_float, + point_x, point_radius_x); + builder.addDecoration(point_edge_x[i], spv::DecorationNoContraction); + point_edge_y[i] = builder.createBinOp(point_radius_add_op, type_float, + point_y, point_radius_y); + builder.addDecoration(point_edge_y[i], spv::DecorationNoContraction); + }; + spv::Id point_z = + builder.createCompositeExtract(point_position, type_float, 2); + // Clip distances. + spv::Id point_clip_distances = spv::NoResult; + if (clip_distance_count) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + // 0 is the input primitive vertex index. + id_vector_temp.push_back(const_int_0); + id_vector_temp.push_back(const_member_in_gl_per_vertex_clip_distance); + point_clip_distances = builder.createLoad( + builder.createAccessChain(spv::StorageClassInput, in_gl_per_vertex, + id_vector_temp), + spv::NoPrecision); + } + + for (uint32_t i = 0; i < 4; ++i) { + // Same interpolators for the entire sprite. + for (uint32_t j = 0; j < key.interpolator_count; ++j) { + builder.createStore(point_interpolators[j], out_interpolators[j]); + } + // Top-left, bottom-left, top-right, bottom-right order (chosen + // arbitrarily, simply based on counterclockwise meaning front with + // frontFace = VkFrontFace(0), but faceness is ignored for non-polygon + // primitive types). + uint32_t point_vertex_x = i >> 1; + uint32_t point_vertex_y = i & 1; + // Point coordinates. + if (key.has_point_coordinates) { + id_vector_temp.clear(); + id_vector_temp.reserve(2); + id_vector_temp.push_back( + builder.makeFloatConstant(float(point_vertex_x))); + id_vector_temp.push_back( + builder.makeFloatConstant(float(point_vertex_y))); + builder.createStore( + builder.makeCompositeConstant(type_float2, id_vector_temp), + out_point_coordinates); + } + // Position. + id_vector_temp.clear(); + id_vector_temp.reserve(4); + id_vector_temp.push_back(point_edge_x[point_vertex_x]); + id_vector_temp.push_back(point_edge_y[point_vertex_y]); + id_vector_temp.push_back(point_z); + id_vector_temp.push_back(point_w); + spv::Id point_vertex_position = + builder.createCompositeConstruct(type_float4, id_vector_temp); + id_vector_temp.clear(); + id_vector_temp.push_back(const_member_out_gl_per_vertex_position); + builder.createStore( + point_vertex_position, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + // Clip distances. + // TODO(Triang3l): Handle ps_ucp_mode properly, clip expanded points if + // needed. + if (clip_distance_count) { + id_vector_temp.clear(); + id_vector_temp.push_back( + const_member_out_gl_per_vertex_clip_distance); + builder.createStore( + point_clip_distances, + builder.createAccessChain(spv::StorageClassOutput, + out_gl_per_vertex, id_vector_temp)); + } + // Emit the vertex. + builder.createNoResultOp(spv::OpEmitVertex); + } + builder.createNoResultOp(spv::OpEndPrimitive); + } break; + case PipelineGeometryShader::kRectangleList: { // Construct a strip with the fourth vertex generated by mirroring a // vertex across the longest edge (the diagonal). @@ -1308,8 +1603,8 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { id_vector_temp.reserve(2); id_vector_temp.push_back(const_float_0); id_vector_temp.push_back(const_float_0); - const_point_coordinates_zero = builder.makeCompositeConstant( - type_point_coordinates, id_vector_temp); + const_point_coordinates_zero = + builder.makeCompositeConstant(type_float2, id_vector_temp); } // Emit the triangle in the strip that consists of the original vertices. @@ -1491,8 +1786,8 @@ VkShaderModule VulkanPipelineCache::GetGeometryShader(GeometryShaderKey key) { id_vector_temp.reserve(2); id_vector_temp.push_back(const_float_0); id_vector_temp.push_back(const_float_0); - const_point_coordinates_zero = builder.makeCompositeConstant( - type_point_coordinates, id_vector_temp); + const_point_coordinates_zero = + builder.makeCompositeConstant(type_float2, id_vector_temp); } // Build the triangle strip from the original quad vertices in the diff --git a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h index e967a1415..6e0c73ab0 100644 --- a/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h +++ b/src/xenia/gpu/vulkan/vulkan_pipeline_cache.h @@ -92,6 +92,7 @@ class VulkanPipelineCache { private: enum class PipelineGeometryShader : uint32_t { kNone, + kPointList, kRectangleList, kQuadList, }; @@ -267,6 +268,7 @@ class VulkanPipelineCache { static bool GetGeometryShaderKey( PipelineGeometryShader geometry_shader_type, SpirvShaderTranslator::Modification vertex_shader_modification, + SpirvShaderTranslator::Modification pixel_shader_modification, GeometryShaderKey& key_out); VkShaderModule GetGeometryShader(GeometryShaderKey key);