From 13363161398a6f3b692e602897a962ea93b29908 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Thu, 20 Feb 2020 08:55:40 +0300 Subject: [PATCH] [D3D12] Vertex kill and multipass vertex exports --- .../gpu/d3d12/d3d12_command_processor.cc | 148 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 12 +- src/xenia/gpu/d3d12/primitive_converter.cc | 3 +- .../shaders/dxbc/primitive_point_list_gs.cso | Bin 7320 -> 7544 bytes .../shaders/dxbc/primitive_point_list_gs.h | 629 ++++--- .../shaders/dxbc/primitive_point_list_gs.txt | 31 +- .../shaders/dxbc/primitive_quad_list_gs.cso | Bin 4252 -> 4944 bytes .../shaders/dxbc/primitive_quad_list_gs.h | 677 +++---- .../shaders/dxbc/primitive_quad_list_gs.txt | 28 +- .../dxbc/primitive_rectangle_list_gs.cso | Bin 10164 -> 10736 bytes .../dxbc/primitive_rectangle_list_gs.h | 1638 +++++++++-------- .../dxbc/primitive_rectangle_list_gs.txt | 62 +- .../shaders/primitive_point_list.gs.hlsl | 38 +- .../d3d12/shaders/primitive_quad_list.gs.hlsl | 25 +- .../shaders/primitive_rectangle_list.gs.hlsl | 88 +- src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli | 15 +- src/xenia/gpu/dxbc_shader_translator.cc | 137 +- src/xenia/gpu/dxbc_shader_translator.h | 16 +- src/xenia/gpu/shader.h | 11 +- src/xenia/gpu/shader_translator.cc | 6 +- src/xenia/gpu/shader_translator_disasm.cc | 2 +- src/xenia/gpu/spirv_shader_translator.cc | 2 +- src/xenia/gpu/xenos.h | 2 + 23 files changed, 1959 insertions(+), 1611 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index c903202dc..7af8173e6 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1313,10 +1313,14 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool memexport_used = memexport_used_vertex || memexport_used_pixel; bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); + auto sq_program_cntl = regs.Get(); auto pa_su_sc_mode_cntl = regs.Get(); - if (!memexport_used_vertex && primitive_two_faced && - pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) { - // Both sides are culled - can't be expressed in the pipeline state. + if (!memexport_used_vertex && + (sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kMultipass || + (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && + pa_su_sc_mode_cntl.cull_back))) { + // All faces are culled - can't be expressed in the pipeline state. return true; } @@ -2440,6 +2444,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( if (viewport_scale_z < 0.0f) { flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; } + // Primitive killing condition. + if (pa_cl_clip_cntl.vtx_kill_or) { + flags |= DxbcShaderTranslator::kSysFlag_KillIfAnyVertexKilled; + } // Alpha test. if (rb_colorcontrol.alpha_test_enable) { flags |= uint32_t(rb_colorcontrol.alpha_func) @@ -2534,6 +2542,18 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } } + // Half pixel offset. + bool half_pixel_offset = + cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center; + // Like in OpenGL - VPOS giving pixel centers, or like in Direct3D 9 - VPOS + // giving the top-left corner. + // TODO(Triang3l): Check if ps_param_gen should give center positions in + // OpenGL mode on the Xbox 360. + float param_gen_half_pixel_offset = half_pixel_offset ? 0.0f : 0.5f; + dirty |= + system_constants_.pixel_half_pixel_offset != param_gen_half_pixel_offset; + system_constants_.pixel_half_pixel_offset = param_gen_half_pixel_offset; + // Conversion to Direct3D 12 normalized device coordinates. // See viewport configuration in UpdateFixedFunctionState for explanations. // X and Y scale/offset is to convert unnormalized coordinates generated by @@ -2545,72 +2565,72 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // different register (and if there's such register at all). float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; - // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be - // written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't - // set in this case in draws in games. - bool gl_clip_space_def = - !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; - float ndc_scale_x, ndc_scale_y, ndc_scale_z; - if (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && - pa_su_sc_mode_cntl.cull_back) { - // Kill all primitives if both faces are culled, but the vertex shader still - // needs to do memexport (not NaN because of comparison for setting the - // dirty flag). - ndc_scale_x = ndc_scale_y = ndc_scale_z = 0; + // Kill all primitives if multipass or both faces are culled, but still need + // to do memexport. + if (sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kMultipass || + (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && + pa_su_sc_mode_cntl.cull_back)) { + dirty |= !std::isnan(system_constants_.ndc_scale[0]); + dirty |= !std::isnan(system_constants_.ndc_scale[1]); + dirty |= !std::isnan(system_constants_.ndc_scale[2]); + dirty |= !std::isnan(system_constants_.ndc_offset[0]); + dirty |= !std::isnan(system_constants_.ndc_offset[1]); + dirty |= !std::isnan(system_constants_.ndc_offset[2]); + float nan_value = std::nanf(""); + system_constants_.ndc_scale[0] = nan_value; + system_constants_.ndc_scale[1] = nan_value; + system_constants_.ndc_scale[2] = nan_value; + system_constants_.ndc_offset[0] = nan_value; + system_constants_.ndc_offset[1] = nan_value; + system_constants_.ndc_offset[2] = nan_value; } else { - if (pa_cl_vte_cntl.vport_x_scale_ena) { - ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; - } else { - ndc_scale_x = 1.0f / 1280.0f; - } - if (pa_cl_vte_cntl.vport_y_scale_ena) { - ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; - } else { - ndc_scale_y = -1.0f / 1280.0f; - } - ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; - } - float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f; - float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f; - float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; - // Like in OpenGL - VPOS giving pixel centers. - // TODO(Triang3l): Check if ps_param_gen should give center positions in - // OpenGL mode on the Xbox 360. - float pixel_half_pixel_offset = 0.5f; - if (cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center) { - // Signs are hopefully correct here, tested in GTA IV on both clearing - // (without a viewport) and drawing things near the edges of the screen. - if (pa_cl_vte_cntl.vport_x_scale_ena) { - if (viewport_scale_x != 0.0f) { - ndc_offset_x += 0.5f / viewport_scale_x; + // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to + // be written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF + // isn't set in this case in draws in games. + bool gl_clip_space_def = + !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; + float ndc_scale_x = pa_cl_vte_cntl.vport_x_scale_ena + ? (viewport_scale_x >= 0.0f ? 1.0f : -1.0f) + : (1.0f / 1280.0f); + float ndc_scale_y = pa_cl_vte_cntl.vport_y_scale_ena + ? (viewport_scale_y >= 0.0f ? -1.0f : 1.0f) + : (-1.0f / 1280.0f); + float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; + float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f; + float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f; + float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; + if (half_pixel_offset) { + // Signs are hopefully correct here, tested in GTA IV on both clearing + // (without a viewport) and drawing things near the edges of the screen. + if (pa_cl_vte_cntl.vport_x_scale_ena) { + if (viewport_scale_x != 0.0f) { + ndc_offset_x += 0.5f / viewport_scale_x; + } + } else { + ndc_offset_x += 1.0f / 2560.0f; } - } else { - ndc_offset_x += 1.0f / 2560.0f; - } - if (pa_cl_vte_cntl.vport_y_scale_ena) { - if (viewport_scale_y != 0.0f) { - ndc_offset_y += 0.5f / viewport_scale_y; + if (pa_cl_vte_cntl.vport_y_scale_ena) { + if (viewport_scale_y != 0.0f) { + ndc_offset_y += 0.5f / viewport_scale_y; + } + } else { + ndc_offset_y -= 1.0f / 2560.0f; } - } else { - ndc_offset_y -= 1.0f / 2560.0f; } - // Like in Direct3D 9 - VPOS giving the top-left corner. - pixel_half_pixel_offset = 0.0f; + dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; + dirty |= system_constants_.ndc_scale[1] != ndc_scale_y; + dirty |= system_constants_.ndc_scale[2] != ndc_scale_z; + dirty |= system_constants_.ndc_offset[0] != ndc_offset_x; + dirty |= system_constants_.ndc_offset[1] != ndc_offset_y; + dirty |= system_constants_.ndc_offset[2] != ndc_offset_z; + system_constants_.ndc_scale[0] = ndc_scale_x; + system_constants_.ndc_scale[1] = ndc_scale_y; + system_constants_.ndc_scale[2] = ndc_scale_z; + system_constants_.ndc_offset[0] = ndc_offset_x; + system_constants_.ndc_offset[1] = ndc_offset_y; + system_constants_.ndc_offset[2] = ndc_offset_z; } - dirty |= system_constants_.ndc_scale[0] != ndc_scale_x; - dirty |= system_constants_.ndc_scale[1] != ndc_scale_y; - dirty |= system_constants_.ndc_scale[2] != ndc_scale_z; - dirty |= system_constants_.ndc_offset[0] != ndc_offset_x; - dirty |= system_constants_.ndc_offset[1] != ndc_offset_y; - dirty |= system_constants_.ndc_offset[2] != ndc_offset_z; - dirty |= system_constants_.pixel_half_pixel_offset != pixel_half_pixel_offset; - system_constants_.ndc_scale[0] = ndc_scale_x; - system_constants_.ndc_scale[1] = ndc_scale_y; - system_constants_.ndc_scale[2] = ndc_scale_z; - system_constants_.ndc_offset[0] = ndc_offset_x; - system_constants_.ndc_offset[1] = ndc_offset_y; - system_constants_.ndc_offset[2] = ndc_offset_z; - system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; // Point size. float point_size_x = float(pa_su_point_size.width) * 0.125f; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 98d488f4c..ba3314007 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -218,13 +218,11 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, auto sq_program_cntl = regs.Get(); - // Normal vertex shaders only, for now. - assert_true(sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kPosition1Vector || - sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kPosition2VectorsSprite || - sq_program_cntl.vs_export_mode == - xenos::VertexShaderExportMode::kMultipass); + // Edge flags are not supported yet (because polygon primitives are not). + assert_true(sq_program_cntl.vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdge && + sq_program_cntl.vs_export_mode != + xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill); assert_false(sq_program_cntl.gen_index_vtx); if (!vertex_shader->is_translated() && diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 72bcfa545..db47ba804 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -25,7 +25,8 @@ DEFINE_bool(d3d12_convert_quads_to_triangles, false, "Convert quad lists to triangle lists on the CPU instead of using " "a geometry shader. Not recommended for playing, for debugging " "primarily (because PIX fails to display vertices when a geometry " - "shader is used).", + "shader is used), and this way quads can't be discarded correctly " + "when the game uses vertex kill functionality.", "D3D12"); namespace xe { diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso b/src/xenia/gpu/d3d12/shaders/dxbc/primitive_point_list_gs.cso index 7bacd7af630aff1202605f644f35e2020e5cd57d..a9004fb55bc186553212dbc6d3a95f43a1684d12 100644 GIT binary patch delta 848 zcmZ8f&r2Io5S~po*&j*PWZjrm()uHZf*P!$(8R7{W3jiAB9z{Y-h@DVsF;gLuHMQE zv=&KwI2{AI6h$yY@%+O$R8p4RXnXYcBT;{8e__2_eCe|6`Aun2`wzn z4m^Uq*tZMgV3CC(!Xj~NVvzY>*CURWNf_IT-$B6!e+V~v(9*|*WBh^c<=0KiA}DmhdbGtc3_$=Wm!cu zo1KaxdT6T=q#CuY-^}Am)*ntqNp}bb^w`)|Jx)@xHmS)NP}Z2~^;dC1#ehu1eKnM{ z)&C^v@g82`dGhRwG#IuwU~g!w@RVQ7MQ%a!#h+Y>zXccHXKrcUH!G^|x{7y`gJr7J zw);!!;?SMo)7oY~<*5BbD}SVkx_82t)HKgkr=-qjhl%{l7_QS2je-}xFLd?k4?L2( T9-3qP|1&~f``8&3)nfA>ve8p# delta 617 zcmZ`%Jxjw-6un8K_9IDSV(foPhNY5Qb;(F1d>uJ>y5Bpe7cIkwBCn3Q_z*5iBBth)qx|ENmhO7JRfn8HD=F}6+R7e)nm^z?$aV8h28J;;M7bHWw>o~3x){q zK~we$urd9GQamo*jw{572P8}?5aiL&W?lS@ff2?5c{_y1$g$0}FA<3W_Bo>X gch(QT+?+R3V`2Si(=_M!L&Lh*Et&KCFOCBn&h+S>3ZOmiX+RTj0d{#{?TnvsElVU7R;11pd=0pbNfe1VC9Ap|5K zFwv-pR|Cis1#&omm|@~ZaY+WC07x4MFat4&&Cd@cCO#BrWSE@DEW!ffO%`N?s0En> z0wA>@HdyUsM@Eq1i!jB7P{o`el|TSe3}QnRF9a)gWPzG|5vrIMq!I`~ia~6MVn!yA z$s1vc4VfU$5Co|N0+3=5n;FR9oSX<$EIL_{6>PHT& zj7JZ8@ggE+1-**i)t?aX=AlJIb983+y`%&@cyM6e%zN{_nc4T=zP)|csouKZ-0DB< zzJ8ZFI2}D(e82te%`X7}XDa{}*)`JVq$3012`$`R8C1I$?$B(WR&CPE;KUOd6p^%< z;Mvf-n4xflLM&`q6CA`@!R(P@aZ`k_WRiqI_Cxq6fQK4>q2L)NNf>57gue!G+YGxv zVT75fY9K)iW4K~)8y^U!^(EXVc$Io)VZ*RX%9&SjV0iO4RFosEX;Ra~l2w$QoxQE& z!$bd*@zjWY$pf@0$4`IX+7;FwQBD6Ox-R-_V8>FBq=R>aCVr0P z;vBDlEvMt6K>?Bt=*51}9?siZ%<&SWNR{6YCUv+jAE?AHXF@nCLR*KVyrPRLdr8^) zxx#u)H#l9G8_gouP`9CM5^}Ps4wPfuqqY@j;AbaGIlj}_@MYqfNom*J zJT2|I*~^o(rK!6SVEgD2p7B3=Wioi0&5NAxF8r28&&I2Tg;))GHb0Iu&KA6T06obw ze&Q`>V)==UN%D8B@VW96(g*t2z5a3Gm9exy6Ujkv7@ zEJ>3QOSNQqT_Jclf)7I2AQFvWsss31!gVcF9)@t7NHl_}4q%0ma(@I@Sa1xRNHl_} zRw6~q_oGUFx>u!*Q}E90|K z5KoSYlj1d9CPvL7BfRW)bOvG{s5F7QMMA}9HYfjQ9D7v&ljn-lb~6z`)Q}5!r|9ak dEBeJA%h&Kr>C4GV= xe_stream) { - XeVertex xe_out; - xe_out.interpolators = xe_in[0].interpolators; - xe_out.point_params.z = xe_in[0].point_params.z; - xe_out.clip_space_zw = xe_in[0].clip_space_zw; - xe_out.position.zw = xe_in[0].position.zw; - xe_out.clip_distance_0123 = xe_in[0].clip_distance_0123; - xe_out.clip_distance_45 = xe_in[0].clip_distance_45; +void main(point XeVertexPreGS xe_in[1], + inout TriangleStream xe_stream) { + if (xe_in[0].cull_distance < 0.0f || any(isnan(xe_in[0].post_gs.position))) { + return; + } + + XeVertexPostGS xe_out; + xe_out.interpolators = xe_in[0].post_gs.interpolators; + xe_out.point_params.z = xe_in[0].post_gs.point_params.z; + xe_out.clip_space_zw = xe_in[0].post_gs.clip_space_zw; + xe_out.position.zw = xe_in[0].post_gs.position.zw; + xe_out.clip_distance_0123 = xe_in[0].post_gs.clip_distance_0123; + xe_out.clip_distance_45 = xe_in[0].post_gs.clip_distance_45; // Shader header writes -1.0f to point_size by default, so any positive value // means that it was overwritten by the translated vertex shader. - float2 point_size = (xe_in[0].point_params.z > 0.0f ? xe_in[0].point_params.zz - : xe_point_size); + float2 point_size = + (xe_in[0].post_gs.point_params.z > 0.0f ? xe_in[0].post_gs.point_params.zz + : xe_point_size); point_size = clamp(point_size, xe_point_size_min_max.xx, xe_point_size_min_max.yy) * - xe_point_screen_to_ndc * xe_in[0].position.w; + xe_point_screen_to_ndc * xe_in[0].post_gs.position.w; xe_out.point_params.xy = float2(0.0, 1.0); - xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size; + xe_out.position.xy = + xe_in[0].post_gs.position.xy + float2(-1.0, 1.0) * point_size; xe_stream.Append(xe_out); xe_out.point_params.xy = float2(1.0, 1.0); - xe_out.position.xy = xe_in[0].position.xy + point_size; + xe_out.position.xy = xe_in[0].post_gs.position.xy + point_size; xe_stream.Append(xe_out); xe_out.point_params.xy = float2(0.0, 0.0); - xe_out.position.xy = xe_in[0].position.xy - point_size; + xe_out.position.xy = xe_in[0].post_gs.position.xy - point_size; xe_stream.Append(xe_out); xe_out.point_params.xy = float2(1.0, 0.0); - xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size; + xe_out.position.xy = + xe_in[0].post_gs.position.xy + float2(1.0, -1.0) * point_size; xe_stream.Append(xe_out); xe_stream.RestartStrip(); } diff --git a/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl index 882e29ef0..b7d4422ac 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl @@ -1,10 +1,25 @@ #include "xenos_draw.hlsli" [maxvertexcount(4)] -void main(lineadj XeVertex xe_in[4], inout TriangleStream xe_stream) { - xe_stream.Append(xe_in[0]); - xe_stream.Append(xe_in[1]); - xe_stream.Append(xe_in[3]); - xe_stream.Append(xe_in[2]); +void main(lineadj XeVertexPreGS xe_in[4], + inout TriangleStream xe_stream) { + // Must kill the whole quad if need to kill. + if (max(max(xe_in[0].cull_distance, xe_in[1].cull_distance), + max(xe_in[2].cull_distance, xe_in[3].cull_distance)) < 0.0f || + any(isnan(xe_in[0].post_gs.position)) || + any(isnan(xe_in[1].post_gs.position)) || + any(isnan(xe_in[2].post_gs.position)) || + any(isnan(xe_in[3].post_gs.position))) { + return; + } + XeVertexPostGS xe_out; + xe_out = xe_in[0].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[1].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[3].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[2].post_gs; + xe_stream.Append(xe_out); xe_stream.RestartStrip(); } diff --git a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl index e62d20207..8411e54c2 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl @@ -1,17 +1,34 @@ #include "xenos_draw.hlsli" [maxvertexcount(6)] -void main(triangle XeVertex xe_in[3], - inout TriangleStream xe_stream) { - xe_stream.Append(xe_in[0]); - xe_stream.Append(xe_in[1]); - xe_stream.Append(xe_in[2]); +void main(triangle XeVertexPreGS xe_in[3], + inout TriangleStream xe_stream) { + if (max(max(xe_in[0].cull_distance, xe_in[1].cull_distance), + xe_in[2].cull_distance) < 0.0f || + any(isnan(xe_in[0].post_gs.position)) || + any(isnan(xe_in[1].post_gs.position)) || + any(isnan(xe_in[2].post_gs.position))) { + return; + } + + XeVertexPostGS xe_out; + + xe_out = xe_in[0].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[1].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[2].post_gs; + xe_stream.Append(xe_out); xe_stream.RestartStrip(); + // Find the diagonal (the edge that is longer than both the other two) and // mirror the other vertex across it. - float3 edge_01 = xe_in[1].position.xyz - xe_in[0].position.xyz; - float3 edge_02 = xe_in[2].position.xyz - xe_in[0].position.xyz; - float3 edge_12 = xe_in[2].position.xyz - xe_in[1].position.xyz; + float3 edge_01 = + xe_in[1].post_gs.position.xyz - xe_in[0].post_gs.position.xyz; + float3 edge_02 = + xe_in[2].post_gs.position.xyz - xe_in[0].post_gs.position.xyz; + float3 edge_12 = + xe_in[2].post_gs.position.xyz - xe_in[1].post_gs.position.xyz; float3 edge_squares = float3( dot(edge_01, edge_01), dot(edge_02, edge_02), dot(edge_12, edge_12)); float3 v3_signs; @@ -29,8 +46,10 @@ void main(triangle XeVertex xe_in[3], // | // | 2: 1,-1 // | - | 3: [ 1, 1 ] // 1 ------[3] - xe_stream.Append(xe_in[2]); - xe_stream.Append(xe_in[1]); + xe_out = xe_in[2].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[1].post_gs; + xe_stream.Append(xe_out); v3_signs = float3(-1.0f, 1.0f, 1.0f); } else if (edge_squares.y > edge_squares.x && edge_squares.y > edge_squares.z) { @@ -41,8 +60,10 @@ void main(triangle XeVertex xe_in[3], // | \\ | 2: 1, 1 // | - | 3: [-1, 1 ] // [3] ----- 2 - xe_stream.Append(xe_in[0]); - xe_stream.Append(xe_in[2]); + xe_out = xe_in[0].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[2].post_gs; + xe_stream.Append(xe_out); v3_signs = float3(1.0f, -1.0f, 1.0f); } else { // 01 is the diagonal. Not seen in any game so far. @@ -52,31 +73,32 @@ void main(triangle XeVertex xe_in[3], // | \\ | 2: 1,-1 // | - | 3: [-1, 1 ] // [3] ----- 1 - xe_stream.Append(xe_in[1]); - xe_stream.Append(xe_in[0]); + xe_out = xe_in[1].post_gs; + xe_stream.Append(xe_out); + xe_out = xe_in[0].post_gs; + xe_stream.Append(xe_out); v3_signs = float3(1.0f, 1.0f, -1.0f); } - XeVertex xe_out; [unroll] for (int i = 0; i < 16; ++i) { - xe_out.interpolators[i] = v3_signs.x * xe_in[0].interpolators[i] + - v3_signs.y * xe_in[1].interpolators[i] + - v3_signs.z * xe_in[2].interpolators[i]; + xe_out.interpolators[i] = v3_signs.x * xe_in[0].post_gs.interpolators[i] + + v3_signs.y * xe_in[1].post_gs.interpolators[i] + + v3_signs.z * xe_in[2].post_gs.interpolators[i]; } - xe_out.point_params = v3_signs.x * xe_in[0].point_params + - v3_signs.y * xe_in[1].point_params + - v3_signs.z * xe_in[2].point_params; - xe_out.clip_space_zw = v3_signs.x * xe_in[0].clip_space_zw + - v3_signs.y * xe_in[1].clip_space_zw + - v3_signs.z * xe_in[2].clip_space_zw; - xe_out.position = v3_signs.x * xe_in[0].position + - v3_signs.y * xe_in[1].position + - v3_signs.z * xe_in[2].position; - xe_out.clip_distance_0123 = v3_signs.x * xe_in[0].clip_distance_0123 + - v3_signs.y * xe_in[1].clip_distance_0123 + - v3_signs.z * xe_in[2].clip_distance_0123; - xe_out.clip_distance_45 = v3_signs.x * xe_in[0].clip_distance_45 + - v3_signs.y * xe_in[1].clip_distance_45 + - v3_signs.z * xe_in[2].clip_distance_45; + xe_out.point_params = v3_signs.x * xe_in[0].post_gs.point_params + + v3_signs.y * xe_in[1].post_gs.point_params + + v3_signs.z * xe_in[2].post_gs.point_params; + xe_out.clip_space_zw = v3_signs.x * xe_in[0].post_gs.clip_space_zw + + v3_signs.y * xe_in[1].post_gs.clip_space_zw + + v3_signs.z * xe_in[2].post_gs.clip_space_zw; + xe_out.position = v3_signs.x * xe_in[0].post_gs.position + + v3_signs.y * xe_in[1].post_gs.position + + v3_signs.z * xe_in[2].post_gs.position; + xe_out.clip_distance_0123 = v3_signs.x * xe_in[0].post_gs.clip_distance_0123 + + v3_signs.y * xe_in[1].post_gs.clip_distance_0123 + + v3_signs.z * xe_in[2].post_gs.clip_distance_0123; + xe_out.clip_distance_45 = v3_signs.x * xe_in[0].post_gs.clip_distance_45 + + v3_signs.y * xe_in[1].post_gs.clip_distance_45 + + v3_signs.z * xe_in[2].post_gs.clip_distance_45; xe_stream.Append(xe_out); xe_stream.RestartStrip(); } diff --git a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli index 6d6c7b44e..f618d7372 100644 --- a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli +++ b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli @@ -51,15 +51,26 @@ cbuffer xe_system_cbuffer : register(b0) { float4 xe_edram_blend_constant; }; -struct XeVertex { +struct XeVertexPostGS { float4 interpolators[16] : TEXCOORD0; float3 point_params : TEXCOORD16; float2 clip_space_zw : TEXCOORD17; - float4 position : SV_Position; + // Precise needed to preserve NaN - guest primitives may be converted to more + // than 1 triangle, so need to kill them entirely manually in GS if any vertex + // is NaN. + precise float4 position : SV_Position; float4 clip_distance_0123 : SV_ClipDistance0; float2 clip_distance_45 : SV_ClipDistance1; }; +struct XeVertexPreGS { + XeVertexPostGS post_gs; + // Guest primitives may be converted to more than 1 triangle, so need to kill + // them entirely manually in GS - must kill if all guest primitive vertices + // have negative cull distance. + float cull_distance : SV_CullDistance; +}; + #define XeSysFlag_SharedMemoryIsUAV_Shift 0u #define XeSysFlag_SharedMemoryIsUAV (1u << XeSysFlag_SharedMemoryIsUAV_Shift) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index e8b27e9ce..2c0aa1956 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -818,25 +818,6 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { ++stat_.mov_instruction_count; } - // Zero the point coordinate (will be set in the geometry shader if needed) - // and set the point size to a negative value to tell the geometry shader that - // it should use the global point size - the vertex shader may overwrite it - // later. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0111, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kVSOutPointParameters)); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(0); - shader_code_.push_back(0); - // -1.0f - shader_code_.push_back(0xBF800000u); - shader_code_.push_back(0); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; - if (IsDxbcVertexShader()) { // Write the vertex index to GPR 0. StartVertexShader_LoadVertexIndex(); @@ -1333,6 +1314,13 @@ void DxbcShaderTranslator::StartTranslation() { // epilogue. if (IsDxbcVertexOrDomainShader()) { system_temp_position_ = PushSystemTemp(0b1111); + system_temp_point_size_edge_flag_kill_vertex_ = PushSystemTemp(0b0100); + // Set the point size to a negative value to tell the geometry shader that + // it should use the global point size if the vertex shader does not + // override it. + DxbcOpMov( + DxbcDest::R(system_temp_point_size_edge_flag_kill_vertex_, 0b0001), + DxbcSrc::LF(-1.0f)); } else if (IsDxbcPixelShader()) { if (edram_rov_used_) { // Will be initialized unconditionally. @@ -1615,7 +1603,9 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { PopSystemTemp(2); // Apply scale for drawing without a viewport, and also remap from OpenGL - // Z clip space to Direct3D if needed. + // Z clip space to Direct3D if needed. Also, if the vertex shader is + // multipass, the NDC scale constant can be used to set position to NaN to + // kill all primitives. system_constants_used_ |= 1ull << kSysConst_NDCScale_Index; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); @@ -1714,6 +1704,50 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { ++stat_.instruction_count; ++stat_.mov_instruction_count; + // Initialize SV_CullDistance. + DxbcOpMov( + DxbcDest::O(uint32_t(InOutRegister::kVSOutClipDistance45AndCullDistance), + 0b0100), + DxbcSrc::LF(0.0f)); + // Kill the primitive if needed - check if the shader wants to kill. + // TODO(Triang3l): Find if the condition is actually the flag being non-zero. + uint32_t kill_temp = PushSystemTemp(); + DxbcOpNE( + DxbcDest::R(kill_temp, 0b0001), + DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_, DxbcSrc::kZZZZ), + DxbcSrc::LF(0.0f)); + DxbcOpIf(true, DxbcSrc::R(kill_temp, DxbcSrc::kXXXX)); + { + // Extract the killing condition. + system_constants_used_ |= 1ull << kSysConst_Flags_Index; + DxbcOpAnd(DxbcDest::R(kill_temp, 0b0001), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled_Shift)); + DxbcOpIf(true, DxbcSrc::R(kill_temp, DxbcSrc::kXXXX)); + // Release kill_temp. + PopSystemTemp(); + { + // Kill the primitive if any vertex is killed - write NaN to position. + DxbcOpMov(DxbcDest::R(system_temp_position_, 0b1000), + DxbcSrc::LF(std::nanf(""))); + } + DxbcOpElse(); + { + // Kill the primitive if all vertices are killed - set SV_CullDistance to + // negative. + DxbcOpMov( + DxbcDest::O( + uint32_t(InOutRegister::kVSOutClipDistance45AndCullDistance), + 0b0100), + DxbcSrc::LF(-1.0f)); + } + DxbcOpEndIf(); + } + DxbcOpEndIf(); + // Write the position to the output. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); @@ -1725,6 +1759,14 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { shader_code_.push_back(system_temp_position_); ++stat_.instruction_count; ++stat_.mov_instruction_count; + + // Zero the point coordinate (will be set in the geometry shader if needed) + // and write the point size. + DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSOutPointParameters), 0b0011), + DxbcSrc::LF(0.0f)); + DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSOutPointParameters), 0b0100), + DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_, + DxbcSrc::kXXXX)); } void DxbcShaderTranslator::CompleteShaderCode() { @@ -1818,8 +1860,9 @@ void DxbcShaderTranslator::CompleteShaderCode() { } if (IsDxbcVertexOrDomainShader()) { - // Release system_temp_position_. - PopSystemTemp(); + // Release system_temp_position_ and + // system_temp_point_size_edge_flag_kill_vertex_. + PopSystemTemp(2); } else if (IsDxbcPixelShader()) { // Release system_temps_color_. for (int32_t i = 3; i >= 0; --i) { @@ -2413,8 +2456,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, ENCODE_D3D10_SB_INSTRUCTION_SATURATE(result.is_clamped); // Scalar targets get only one component. - if (result.storage_target == InstructionStorageTarget::kPointSize || - result.storage_target == InstructionStorageTarget::kDepth) { + if (result.storage_target == InstructionStorageTarget::kDepth) { if (!result.write_mask[0]) { return; } @@ -2424,14 +2466,6 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, } // Both r[imm32] and imm32 operands are 2 tokens long. switch (result.storage_target) { - case InstructionStorageTarget::kPointSize: - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5) | saturate_bit); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0100, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kVSOutPointParameters)); - break; case InstructionStorageTarget::kDepth: assert_true(writes_depth()); if (writes_depth()) { @@ -2581,6 +2615,18 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, shader_code_.push_back(system_temp_position_); break; + case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) | + saturate_bit); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1)); + shader_code_.push_back(system_temp_point_size_edge_flag_kill_vertex_); + break; + case InstructionStorageTarget::kExportAddress: ++stat_.instruction_count; ++stat_.mov_instruction_count; @@ -4161,8 +4207,8 @@ void DxbcShaderTranslator::WriteOutputSignature() { if (IsDxbcVertexOrDomainShader()) { // Interpolators, point parameters (coordinates, size), clip space ZW, - // screen position, 6 clip distances in 2 vectors. - shader_object_.push_back(kInterpolatorCount + 5); + // screen position, 6 clip distances in 2 vectors, cull distance. + shader_object_.push_back(kInterpolatorCount + 6); // Unknown. shader_object_.push_back(8); @@ -4209,7 +4255,7 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.push_back(uint32_t(InOutRegister::kVSOutPosition)); shader_object_.push_back(0b1111); - // Clip distances. + // Clip and cull distances. for (uint32_t i = 0; i < 2; ++i) { shader_object_.push_back(0); shader_object_.push_back(i); @@ -4220,6 +4266,14 @@ void DxbcShaderTranslator::WriteOutputSignature() { i); shader_object_.push_back(i ? (0b0011 | (0b1100 << 8)) : 0b1111); } + shader_object_.push_back(0); + shader_object_.push_back(0); + // D3D_NAME_CULL_DISTANCE. + shader_object_.push_back(3); + shader_object_.push_back(3); + shader_object_.push_back( + uint32_t(InOutRegister::kVSOutClipDistance45AndCullDistance)); + shader_object_.push_back(0b0100 | (0b1011 << 8)); // Write the semantic names. new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) * @@ -4239,6 +4293,9 @@ void DxbcShaderTranslator::WriteOutputSignature() { name_position_dwords += signature_size_dwords; } new_offset += AppendString(shader_object_, "SV_ClipDistance"); + shader_object_[name_position_dwords] = new_offset; + name_position_dwords += signature_size_dwords; + new_offset += AppendString(shader_object_, "SV_CullDistance"); } else { assert_true(IsDxbcPixelShader()); if (edram_rov_used_) { @@ -4612,6 +4669,16 @@ void DxbcShaderTranslator::WriteShaderCode() { ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_CLIP_DISTANCE)); ++stat_.dcl_count; } + // Cull distance output. + shader_object_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT_SIV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); + shader_object_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0100, 1)); + shader_object_.push_back( + uint32_t(InOutRegister::kVSOutClipDistance45AndCullDistance)); + shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_CULL_DISTANCE)); + ++stat_.dcl_count; } else if (IsDxbcPixelShader()) { // Interpolator input. if (!is_depth_only_pixel_shader_) { diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 87891fdd0..e7b02f674 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -80,6 +80,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_UserClipPlane4_Shift, kSysFlag_UserClipPlane5_Shift, kSysFlag_ReverseZ_Shift, + kSysFlag_KillIfAnyVertexKilled_Shift, kSysFlag_AlphaPassIfLess_Shift, kSysFlag_AlphaPassIfEqual_Shift, kSysFlag_AlphaPassIfGreater_Shift, @@ -121,6 +122,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_UserClipPlane4 = 1u << kSysFlag_UserClipPlane4_Shift, kSysFlag_UserClipPlane5 = 1u << kSysFlag_UserClipPlane5_Shift, kSysFlag_ReverseZ = 1u << kSysFlag_ReverseZ_Shift, + kSysFlag_KillIfAnyVertexKilled = 1u << kSysFlag_KillIfAnyVertexKilled_Shift, kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift, kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift, @@ -874,6 +876,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kMov = 54, kMovC = 55, kMul = 56, + kNE = 57, kNot = 59, kOr = 60, kRet = 62, @@ -1179,6 +1182,11 @@ class DxbcShaderTranslator : public ShaderTranslator { DxbcEmitAluOp(DxbcOpcode::kMul, 0b00, dest, src0, src1, saturate); ++stat_.float_instruction_count; } + void DxbcOpNE(const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1) { + DxbcEmitAluOp(DxbcOpcode::kNE, 0b00, dest, src0, src1); + ++stat_.float_instruction_count; + } void DxbcOpNot(const DxbcDest& dest, const DxbcSrc& src) { DxbcEmitAluOp(DxbcOpcode::kNot, 0b1, dest, src); ++stat_.uint_instruction_count; @@ -1488,11 +1496,13 @@ class DxbcShaderTranslator : public ShaderTranslator { kVSOutPointParameters = kVSOutInterpolators + kInterpolatorCount, kVSOutClipSpaceZW, kVSOutPosition, + // Clip and cull distances must be tightly packed in Direct3D! kVSOutClipDistance0123, - kVSOutClipDistance45, + kVSOutClipDistance45AndCullDistance, // TODO(Triang3l): Use SV_CullDistance instead for // PA_CL_CLIP_CNTL::UCP_CULL_ONLY_ENA, but can't have more than 8 clip and - // cull distances in total. + // cull distances in total. Currently only using SV_CullDistance for vertex + // kill. kPSInInterpolators = 0, kPSInPointParameters = kPSInInterpolators + kInterpolatorCount, @@ -1989,6 +1999,8 @@ class DxbcShaderTranslator : public ShaderTranslator { // Position in vertex shaders (because viewport and W transformations can be // applied in the end of the shader). uint32_t system_temp_position_; + // Special exports in vertex shaders. + uint32_t system_temp_point_size_edge_flag_kill_vertex_; // ROV only - 4 persistent VGPRs when writing to color targets, 2 VGPRs when // not: // X - Bit masks: diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 3ec54fa8c..8d7509d8e 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -29,8 +29,15 @@ enum class InstructionStorageTarget { kInterpolant, // Result is stored to the position export (gl_Position). kPosition, - // Result is stored to the point size export (gl_PointSize). - kPointSize, + // Result is stored to the vertex shader misc export register. + // See R6xx/R7xx registers for details (USE_VTX_POINT_SIZE, USE_VTX_EDGE_FLAG, + // USE_VTX_KILL_FLAG). + // X - PSIZE (gl_PointSize). + // Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point + // drawing. + // Z - KILLVERTEX flag (used in Banjo-Kazooie: Nuts & Bolts for grass), set + // for killing primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition. + kPointSizeEdgeFlagKillVertex, // Result is stored as memexport destination address // (see xenos::xe_gpu_memexport_stream_t). kExportAddress, diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index d2c6bb77b..b2a219d6e 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -1285,7 +1285,8 @@ void ShaderTranslator::ParseAluVectorOperation(const AluInstruction& op, i.vector_result.storage_target = InstructionStorageTarget::kPosition; break; case 63: - i.vector_result.storage_target = InstructionStorageTarget::kPointSize; + i.vector_result.storage_target = + InstructionStorageTarget::kPointSizeEdgeFlagKillVertex; break; default: if (dest_num < 16) { @@ -1453,7 +1454,8 @@ void ShaderTranslator::ParseAluScalarOperation(const AluInstruction& op, i.scalar_result.storage_target = InstructionStorageTarget::kPosition; break; case 63: - i.scalar_result.storage_target = InstructionStorageTarget::kPointSize; + i.scalar_result.storage_target = + InstructionStorageTarget::kPointSizeEdgeFlagKillVertex; break; default: if (dest_num < 16) { diff --git a/src/xenia/gpu/shader_translator_disasm.cc b/src/xenia/gpu/shader_translator_disasm.cc index 3667fe3b2..ddd5e433e 100644 --- a/src/xenia/gpu/shader_translator_disasm.cc +++ b/src/xenia/gpu/shader_translator_disasm.cc @@ -35,7 +35,7 @@ void DisassembleResultOperand(const InstructionResult& result, case InstructionStorageTarget::kPosition: out->Append("oPos"); break; - case InstructionStorageTarget::kPointSize: + case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: out->Append("oPts"); break; case InstructionStorageTarget::kExportAddress: diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index c1d151942..ea54f736c 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -3301,7 +3301,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id, storage_offsets.push_back(0); storage_array = false; break; - case InstructionStorageTarget::kPointSize: + case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: assert_true(is_vertex_shader()); storage_pointer = point_size_; storage_class = spv::StorageClass::StorageClassOutput; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index fa67e3c69..0cda866ba 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -479,6 +479,8 @@ enum class VertexShaderExportMode : uint32_t { kPosition2VectorsKill = 4, kPosition2VectorsSpriteKill = 5, kPosition2VectorsEdgeKill = 6, + // Vertex shader outputs are ignored (kill all primitives) - see + // SX_MISC::MULTIPASS on R6xx/R7xx. kMultipass = 7, };