diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 97035eb94..7c2c17379 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -706,6 +706,12 @@ void DxbcShaderTranslator::StartPixelShader() { a_.OpEndIf(); // ZW - UV within a point sprite in the absolute value, at centroid if // requested for the interpolator. + // TODO(Triang3l): Are centroid point coordinates possible in the hardware + // at all? ps_param_gen is not a triangle-IJ-interpolated value + // apparently, rather, it replaces the value in the shader input. + // TODO(Triang3l): Saturate to avoid negative point coordinates (the sign + // bit is used for the primitive type indicator) in case of extrapolation + // when the center is not covered with MSAA. dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100)); dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V( uint32_t(InOutRegister::kPSInPointParameters), 0b01000000)); @@ -723,6 +729,7 @@ void DxbcShaderTranslator::StartPixelShader() { // At centroid. a_.OpEvalCentroid(point_coord_r_zw_dest, point_coord_v_xxxy_src); a_.OpEndIf(); + // TODO(Triang3l): Point / line primitive type flags to the sign bits. // Write ps_param_gen to the specified GPR. dxbc::Src param_gen_src(dxbc::Src::R(param_gen_temp)); if (uses_register_dynamic_addressing) { @@ -980,12 +987,12 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { dxbc::Src::R(system_temp_position_)); // Assuming SV_CullDistance was zeroed earlier in this function. - // Kill the primitive if needed - check if the shader wants to kill. - // TODO(Triang3l): Find if the condition is actually the flag being non-zero. - a_.OpNE(temp_x_dest, - dxbc::Src::R(system_temp_point_size_edge_flag_kill_vertex_, - dxbc::Src::kZZZZ), - dxbc::Src::LF(0.0f)); + // Kill the primitive if needed - check if the shader wants to kill (bits + // 0:30 of the vertex kill register are not zero). + a_.OpAnd(temp_x_dest, + dxbc::Src::R(system_temp_point_size_edge_flag_kill_vertex_, + dxbc::Src::kZZZZ), + dxbc::Src::LU(UINT32_C(0x7FFFFFFF))); a_.OpIf(true, temp_x_src); { // Extract the killing condition. diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index 7e313f09b..f6425c277 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -130,22 +130,70 @@ union alignas(uint32_t) SQ_CONTEXT_MISC { uint32_t sc_output_screen_xy : 1; // +1 xenos::SampleControl sc_sample_cntl : 2; // +2 uint32_t : 4; // +4 - // Pixel shader interpolator (according to the XNA microcode compiler) index - // to write pixel parameters to. So far have been able to find the following - // usage: + // Pixel shader interpolator (according to the XNA microcode compiler - + // limited to the interpolator count, 16, not the total register count of + // 64) index to write pixel parameters to. + // See https://portal.unifiedpatents.com/ptab/case/IPR2015-00325 Exhibit + // 2039 R400 Sequencer Specification 2.11 (a significantly early version of + // the specification, however) section 19.2 "Sprites/ XY screen coordinates/ + // FB information" for additional details. // * |XY| - position on screen (vPos - the XNA microcode compiler translates // ps_3_0 vPos directly to this, so at least in Direct3D 9 pixel center // mode, this contains 0, 1, 2, not 0.5, 1.5, 2.5). flto also said in the // Freedreno IRC that it's .0 even in OpenGL: // https://dri.freedesktop.org/~cbrill/dri-log/?channel=freedreno&date=2020-04-19 - // (on Android, according to LG P705 GL_OES_get_program_binary - // disassembly, gl_FragCoord.xy is |r0.xy| * c221.xy + c222.zw - haven't - // been able to dump the constant values by exploiting a huge uniform - // array, but flto says c222.zw contains tile offset plus 0.5). - // * Sign bit of X - is front face (vFace), non-negative for front face, - // negative for back face (used with `rcpc` in shaders to take signedness - // of 0 into account in `cndge`). - // * |ZW| - UV within a point sprite (sign meaning is unknown so far). + // According to the actual usage, in the final version of the hardware, + // the screen coordinates are passed to the shader directly as floats + // (contrary to what's written in the early 2.11 version of the sequencer + // specification from IPR2015-00325, where the coordinates are specified + // to be 2^23-biased, essentially packed as integers in the low mantissa + // bits of 2^23). + // * On Android, according to LG P705 - checked on the driver V@6.0 AU@ + // (CL@3050818) - GL_OES_get_program_binary disassembly, gl_FragCoord.xy + // is |r0.xy| * c221.xy + c222.zw. Though we haven't yet been able to + // dump the actual constant values by exploiting a huge uniform array, + // but flto says c222.zw contains tile offset plus 0.5. It also appears + // that the multiplication by c221.xy is done to flip the direction of + // the Y axis in gl_FragCoord (c221.y is probably -1). According to the + // tests performed with triangles and point sprites, the hardware uses + // the top-left rasterization rule just like Direct3D (tie-breaking + // sample coverage towards gl_FragCoord.-x+y, while Direct3D tie-breaks + // towards VPOS.-x-y), and the R400 / Z430 doesn't seem to have the + // equivalent of R5xx's SC_EDGERULE register for configuring this). + // Also, both OpenGL and apparently Direct3D 9 define the point sprite V + // coordinate to be 0 in the top, and 1 in the bottom (but OpenGL + // gl_FragCoord.y is towards the top, while Direct3D 9's VPOS.y is + // towards the bottom), gl_PointCoord.y is |PsParamGen.w| directly, and + // the R400 / Z430 doesn't appear to have an equivalent of R6xx's + // SPI_INTERP_CONTROL_0::PNT_SPRITE_TOP_1 for toggling the direction. + // So, it looks like the internal screen coordinates in the official + // OpenGL ES 2.0 driver are still top-to-bottom like in Direct3D, but + // gl_FragCoord.y is flipped in the shader code so it's bottom-to-top + // as OpenGL specifies. + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/point-sprites + // * |ZW| - UV within a point sprite, [0, 1]. In OpenGL ES 2.0, this is + // interpreted directly as gl_PointCoord, with the directions matching the + // OpenGL ES 2.0 specification - 0 in the top (towards +gl_FragCoord.y in + // OpenGL ES bottom-to-top screen coordinates - but towards -PsParamGen.y + // likely, see the explanation of gl_FragCoord.xy above), 1 in the bottom + // (towards -gl_FragCoord.y, or +PsParamGen.y likely). The point sprite + // coordinates are exposed differently on the Xbox 360 and the PC + // Direct3D 9 - the Xbox 360 passes the whole PsParamGen register via the + // SPRITETEXCOORD input semantic directly (unlike on the PC, where point + // sprite coordinates are written to XY of TEXCOORD0), and shaders should + // take abs(SPRITETEXCOORD.zw) explicitly. + // https://shawnhargreaves.com/blog/point-sprites-on-xbox.html + // 4D5307F1 has snowflake point sprites with an asymmetric texture. + // * Sign bit of X - is front face (according to the disassembly of vFace + // and gl_FrontFacing usage), non-negative for front face, negative for + // back face (used with `rcpc` in shaders to take signedness of 0 into + // account in `cndge`). + // * Sign bit of Y - is the primitive type a point (according to the + // IPR2015-00325 sequencer specification), negative for a point, + // non-negative for other primitive types. + // * Sign bit of Z - is the primitive type a line (according to the + // IPR2015-00325 sequencer specification), negative for a line, + // non-negative for other primitive types. uint32_t param_gen_pos : 8; // +8 uint32_t perfcounter_ref : 1; // +16 uint32_t yeild_optimize : 1; // +17 sic @@ -309,7 +357,7 @@ static_assert_size(VGT_HOS_CNTL, sizeof(uint32_t)); union alignas(uint32_t) PA_SU_POINT_MINMAX { uint32_t value; struct { - // Radius, 12.4 fixed point. + // For per-vertex size specification, radius (1/2 size), 12.4 fixed point. uint32_t min_size : 16; // +0 uint32_t max_size : 16; // +16 }; @@ -548,6 +596,10 @@ union alignas(uint32_t) RB_COLORCONTROL { uint32_t alpha_to_mask_enable : 1; // +4 // Everything in between was added on Adreno. uint32_t : 19; // +5 + // TODO(Triang3l): Redo these tests and possibly flip these vertically in + // the comment and in the actual implementation. It appears that + // gl_FragCoord.y is mirrored as opposed to the actual screen coordinates in + // the rasterizer (see the SQ_CONTEXT_MISC::param_gen_pos comment here). // According to tests on an Adreno 200 device (LG Optimus L7), done by // drawing 0.5x0.5 rectangles in different corners of four pixels in a quad // to a multisampled GLSurfaceView, the coverage mask is the following for 4 diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index e4ce497d1..3b0875e25 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -1810,10 +1810,24 @@ enum class ExportRegister : uint32_t { // See R6xx/R7xx registers for details (USE_VTX_POINT_SIZE, USE_VTX_EDGE_FLAG, // USE_VTX_KILL_FLAG). // X - PSIZE (gl_PointSize). + // According to tests and GL_AMD_program_binary_Z400 disassembly on an + // Adreno 200 device: + // * This is the full width and height of the point sprite (not half - + // gl_PointSize goes directly to oPts.x). + // * Clamped to PA_SU_POINT_MINMAX as a signed integer in rasterization: + // * -NaN - min + // * -Infinity - min + // * -Normal - min + // * -0 (0x80000000 - the smallest signed integer) - min + // * +0 - min + // * +Infinity - max + // * +NaN - max // Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point // drawing. // Z - KILLVERTEX flag (used in 4D5307ED for grass), set for killing - // primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition. + // primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition if bits 0:30 + // of this export value (the sign bit is ignored according to the + // IPR2015-00325 sequencer specification) are not zero. kVSPointSizeEdgeFlagKillVertex = 63, kPSColor0 = 0,