[GPU] More accurate vertex kill + PsParamGen/point documentation
This commit is contained in:
parent
5ec0c92601
commit
b2b1d7b518
|
@ -706,6 +706,12 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
a_.OpEndIf();
|
||||
// ZW - UV within a point sprite in the absolute value, at centroid if
|
||||
// requested for the interpolator.
|
||||
// TODO(Triang3l): Are centroid point coordinates possible in the hardware
|
||||
// at all? ps_param_gen is not a triangle-IJ-interpolated value
|
||||
// apparently, rather, it replaces the value in the shader input.
|
||||
// TODO(Triang3l): Saturate to avoid negative point coordinates (the sign
|
||||
// bit is used for the primitive type indicator) in case of extrapolation
|
||||
// when the center is not covered with MSAA.
|
||||
dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100));
|
||||
dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V(
|
||||
uint32_t(InOutRegister::kPSInPointParameters), 0b01000000));
|
||||
|
@ -723,6 +729,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// At centroid.
|
||||
a_.OpEvalCentroid(point_coord_r_zw_dest, point_coord_v_xxxy_src);
|
||||
a_.OpEndIf();
|
||||
// TODO(Triang3l): Point / line primitive type flags to the sign bits.
|
||||
// Write ps_param_gen to the specified GPR.
|
||||
dxbc::Src param_gen_src(dxbc::Src::R(param_gen_temp));
|
||||
if (uses_register_dynamic_addressing) {
|
||||
|
@ -980,12 +987,12 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
|||
dxbc::Src::R(system_temp_position_));
|
||||
|
||||
// Assuming SV_CullDistance was zeroed earlier in this function.
|
||||
// Kill the primitive if needed - check if the shader wants to kill.
|
||||
// TODO(Triang3l): Find if the condition is actually the flag being non-zero.
|
||||
a_.OpNE(temp_x_dest,
|
||||
dxbc::Src::R(system_temp_point_size_edge_flag_kill_vertex_,
|
||||
dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LF(0.0f));
|
||||
// Kill the primitive if needed - check if the shader wants to kill (bits
|
||||
// 0:30 of the vertex kill register are not zero).
|
||||
a_.OpAnd(temp_x_dest,
|
||||
dxbc::Src::R(system_temp_point_size_edge_flag_kill_vertex_,
|
||||
dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LU(UINT32_C(0x7FFFFFFF)));
|
||||
a_.OpIf(true, temp_x_src);
|
||||
{
|
||||
// Extract the killing condition.
|
||||
|
|
|
@ -130,22 +130,70 @@ union alignas(uint32_t) SQ_CONTEXT_MISC {
|
|||
uint32_t sc_output_screen_xy : 1; // +1
|
||||
xenos::SampleControl sc_sample_cntl : 2; // +2
|
||||
uint32_t : 4; // +4
|
||||
// Pixel shader interpolator (according to the XNA microcode compiler) index
|
||||
// to write pixel parameters to. So far have been able to find the following
|
||||
// usage:
|
||||
// Pixel shader interpolator (according to the XNA microcode compiler -
|
||||
// limited to the interpolator count, 16, not the total register count of
|
||||
// 64) index to write pixel parameters to.
|
||||
// See https://portal.unifiedpatents.com/ptab/case/IPR2015-00325 Exhibit
|
||||
// 2039 R400 Sequencer Specification 2.11 (a significantly early version of
|
||||
// the specification, however) section 19.2 "Sprites/ XY screen coordinates/
|
||||
// FB information" for additional details.
|
||||
// * |XY| - position on screen (vPos - the XNA microcode compiler translates
|
||||
// ps_3_0 vPos directly to this, so at least in Direct3D 9 pixel center
|
||||
// mode, this contains 0, 1, 2, not 0.5, 1.5, 2.5). flto also said in the
|
||||
// Freedreno IRC that it's .0 even in OpenGL:
|
||||
// https://dri.freedesktop.org/~cbrill/dri-log/?channel=freedreno&date=2020-04-19
|
||||
// (on Android, according to LG P705 GL_OES_get_program_binary
|
||||
// disassembly, gl_FragCoord.xy is |r0.xy| * c221.xy + c222.zw - haven't
|
||||
// been able to dump the constant values by exploiting a huge uniform
|
||||
// array, but flto says c222.zw contains tile offset plus 0.5).
|
||||
// * Sign bit of X - is front face (vFace), non-negative for front face,
|
||||
// negative for back face (used with `rcpc` in shaders to take signedness
|
||||
// of 0 into account in `cndge`).
|
||||
// * |ZW| - UV within a point sprite (sign meaning is unknown so far).
|
||||
// According to the actual usage, in the final version of the hardware,
|
||||
// the screen coordinates are passed to the shader directly as floats
|
||||
// (contrary to what's written in the early 2.11 version of the sequencer
|
||||
// specification from IPR2015-00325, where the coordinates are specified
|
||||
// to be 2^23-biased, essentially packed as integers in the low mantissa
|
||||
// bits of 2^23).
|
||||
// * On Android, according to LG P705 - checked on the driver V@6.0 AU@
|
||||
// (CL@3050818) - GL_OES_get_program_binary disassembly, gl_FragCoord.xy
|
||||
// is |r0.xy| * c221.xy + c222.zw. Though we haven't yet been able to
|
||||
// dump the actual constant values by exploiting a huge uniform array,
|
||||
// but flto says c222.zw contains tile offset plus 0.5. It also appears
|
||||
// that the multiplication by c221.xy is done to flip the direction of
|
||||
// the Y axis in gl_FragCoord (c221.y is probably -1). According to the
|
||||
// tests performed with triangles and point sprites, the hardware uses
|
||||
// the top-left rasterization rule just like Direct3D (tie-breaking
|
||||
// sample coverage towards gl_FragCoord.-x+y, while Direct3D tie-breaks
|
||||
// towards VPOS.-x-y), and the R400 / Z430 doesn't seem to have the
|
||||
// equivalent of R5xx's SC_EDGERULE register for configuring this).
|
||||
// Also, both OpenGL and apparently Direct3D 9 define the point sprite V
|
||||
// coordinate to be 0 in the top, and 1 in the bottom (but OpenGL
|
||||
// gl_FragCoord.y is towards the top, while Direct3D 9's VPOS.y is
|
||||
// towards the bottom), gl_PointCoord.y is |PsParamGen.w| directly, and
|
||||
// the R400 / Z430 doesn't appear to have an equivalent of R6xx's
|
||||
// SPI_INTERP_CONTROL_0::PNT_SPRITE_TOP_1 for toggling the direction.
|
||||
// So, it looks like the internal screen coordinates in the official
|
||||
// OpenGL ES 2.0 driver are still top-to-bottom like in Direct3D, but
|
||||
// gl_FragCoord.y is flipped in the shader code so it's bottom-to-top
|
||||
// as OpenGL specifies.
|
||||
// https://docs.microsoft.com/en-us/windows/win32/direct3d9/point-sprites
|
||||
// * |ZW| - UV within a point sprite, [0, 1]. In OpenGL ES 2.0, this is
|
||||
// interpreted directly as gl_PointCoord, with the directions matching the
|
||||
// OpenGL ES 2.0 specification - 0 in the top (towards +gl_FragCoord.y in
|
||||
// OpenGL ES bottom-to-top screen coordinates - but towards -PsParamGen.y
|
||||
// likely, see the explanation of gl_FragCoord.xy above), 1 in the bottom
|
||||
// (towards -gl_FragCoord.y, or +PsParamGen.y likely). The point sprite
|
||||
// coordinates are exposed differently on the Xbox 360 and the PC
|
||||
// Direct3D 9 - the Xbox 360 passes the whole PsParamGen register via the
|
||||
// SPRITETEXCOORD input semantic directly (unlike on the PC, where point
|
||||
// sprite coordinates are written to XY of TEXCOORD0), and shaders should
|
||||
// take abs(SPRITETEXCOORD.zw) explicitly.
|
||||
// https://shawnhargreaves.com/blog/point-sprites-on-xbox.html
|
||||
// 4D5307F1 has snowflake point sprites with an asymmetric texture.
|
||||
// * Sign bit of X - is front face (according to the disassembly of vFace
|
||||
// and gl_FrontFacing usage), non-negative for front face, negative for
|
||||
// back face (used with `rcpc` in shaders to take signedness of 0 into
|
||||
// account in `cndge`).
|
||||
// * Sign bit of Y - is the primitive type a point (according to the
|
||||
// IPR2015-00325 sequencer specification), negative for a point,
|
||||
// non-negative for other primitive types.
|
||||
// * Sign bit of Z - is the primitive type a line (according to the
|
||||
// IPR2015-00325 sequencer specification), negative for a line,
|
||||
// non-negative for other primitive types.
|
||||
uint32_t param_gen_pos : 8; // +8
|
||||
uint32_t perfcounter_ref : 1; // +16
|
||||
uint32_t yeild_optimize : 1; // +17 sic
|
||||
|
@ -309,7 +357,7 @@ static_assert_size(VGT_HOS_CNTL, sizeof(uint32_t));
|
|||
union alignas(uint32_t) PA_SU_POINT_MINMAX {
|
||||
uint32_t value;
|
||||
struct {
|
||||
// Radius, 12.4 fixed point.
|
||||
// For per-vertex size specification, radius (1/2 size), 12.4 fixed point.
|
||||
uint32_t min_size : 16; // +0
|
||||
uint32_t max_size : 16; // +16
|
||||
};
|
||||
|
@ -548,6 +596,10 @@ union alignas(uint32_t) RB_COLORCONTROL {
|
|||
uint32_t alpha_to_mask_enable : 1; // +4
|
||||
// Everything in between was added on Adreno.
|
||||
uint32_t : 19; // +5
|
||||
// TODO(Triang3l): Redo these tests and possibly flip these vertically in
|
||||
// the comment and in the actual implementation. It appears that
|
||||
// gl_FragCoord.y is mirrored as opposed to the actual screen coordinates in
|
||||
// the rasterizer (see the SQ_CONTEXT_MISC::param_gen_pos comment here).
|
||||
// According to tests on an Adreno 200 device (LG Optimus L7), done by
|
||||
// drawing 0.5x0.5 rectangles in different corners of four pixels in a quad
|
||||
// to a multisampled GLSurfaceView, the coverage mask is the following for 4
|
||||
|
|
|
@ -1810,10 +1810,24 @@ enum class ExportRegister : uint32_t {
|
|||
// See R6xx/R7xx registers for details (USE_VTX_POINT_SIZE, USE_VTX_EDGE_FLAG,
|
||||
// USE_VTX_KILL_FLAG).
|
||||
// X - PSIZE (gl_PointSize).
|
||||
// According to tests and GL_AMD_program_binary_Z400 disassembly on an
|
||||
// Adreno 200 device:
|
||||
// * This is the full width and height of the point sprite (not half -
|
||||
// gl_PointSize goes directly to oPts.x).
|
||||
// * Clamped to PA_SU_POINT_MINMAX as a signed integer in rasterization:
|
||||
// * -NaN - min
|
||||
// * -Infinity - min
|
||||
// * -Normal - min
|
||||
// * -0 (0x80000000 - the smallest signed integer) - min
|
||||
// * +0 - min
|
||||
// * +Infinity - max
|
||||
// * +NaN - max
|
||||
// Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point
|
||||
// drawing.
|
||||
// Z - KILLVERTEX flag (used in 4D5307ED for grass), set for killing
|
||||
// primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
|
||||
// primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition if bits 0:30
|
||||
// of this export value (the sign bit is ignored according to the
|
||||
// IPR2015-00325 sequencer specification) are not zero.
|
||||
kVSPointSizeEdgeFlagKillVertex = 63,
|
||||
|
||||
kPSColor0 = 0,
|
||||
|
|
Loading…
Reference in New Issue