[GPU] Cleanup definitions of some registers

VS/PS_NUM_REG is 6-bit on Adreno 200, and games aren't seen using the
bit 7 to indicate that no GPRs are used. It's not clear why Freedreno
configures it this way.

Some texture fetch fields were deprecated or moved during the development
of the Xenos, reflect that in the comments.

Add definitions of the registers configuring the conversion of vertex
positions to fixed-point. Although there isn't much that can be done with
it when emulating using PC GPU APIs, there are some places in Xenia that
wrongly (though sometimes deliberately, for results closer to the behavior
of the host GPU) assume that the conversion works like in Direct3D 10+,
however the Xenos supports only up to 4 subpixel bits rather than 8. The
effects of this difference are largely negligible, though.

Also add more detailed info about register references and differences from
other ATI/AMD GPUs for potential future contributors.
This commit is contained in:
Triang3l 2025-08-06 13:21:19 +03:00
parent 9ae3a72500
commit a06be03f1b
9 changed files with 177 additions and 102 deletions

View File

@ -2106,8 +2106,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
ID3D12Device* device = GetD3D12Provider().GetDevice();
const RegisterFile& regs = *register_file_;
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode == xenos::ModeControl::kCopy) {
xenos::EdramMode edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode == xenos::EdramMode::kCopy) {
// Special copy handling.
return IssueCopy();
}
@ -2134,9 +2134,9 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal);
D3D12Shader* pixel_shader = nullptr;
if (is_rasterization_done) {
// See xenos::ModeControl for explanation why the pixel shader is only used
// See xenos::EdramMode for explanation why the pixel shader is only used
// when it's kColorDepth here.
if (edram_mode == xenos::ModeControl::kColorDepth) {
if (edram_mode == xenos::EdramMode::kColorDepth) {
pixel_shader = static_cast<D3D12Shader*>(active_pixel_shader());
if (pixel_shader) {
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);

View File

@ -253,7 +253,8 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) {
// so it's safe to add both - adding it will neither move the 16p8 clamping
// bounds -32768 and 32767+255/256 into the 0...8192 screen space range, nor
// cause 24p8 overflow.
if (!regs.Get<reg::PA_SU_VTX_CNTL>().pix_center) {
if (regs.Get<reg::PA_SU_VTX_CNTL>().pix_center ==
xenos::PixelCenter::kD3DZero) {
max_y_24p8 += 128;
}
if (pa_su_sc_mode_cntl.vtx_window_offset_enable) {
@ -329,7 +330,8 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y,
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
viewport_bottom += float(window_y_offset);
}
if (!regs.Get<reg::PA_SU_VTX_CNTL>().pix_center) {
if (regs.Get<reg::PA_SU_VTX_CNTL>().pix_center ==
xenos::PixelCenter::kD3DZero) {
viewport_bottom += 0.5f;
}
// Then apply the floating-point viewport offset.

View File

@ -40,12 +40,12 @@ namespace draw_util {
bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
bool primitive_polygonal) {
// TODO(Triang3l): Investigate ModeControl::kIgnore better, with respect to
// TODO(Triang3l): Investigate EdramMode::kNoOperation better, with respect to
// sample counting. Let's assume sample counting is a part of depth / stencil,
// thus disabled too.
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::ModeControl::kColorDepth &&
edram_mode != xenos::ModeControl::kDepth) {
xenos::EdramMode edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::EdramMode::kColorDepth &&
edram_mode != xenos::EdramMode::kDepthOnly) {
return false;
}
if (regs.Get<reg::SQ_PROGRAM_CNTL>().vs_export_mode ==
@ -64,9 +64,9 @@ bool IsRasterizationPotentiallyDone(const RegisterFile& regs,
}
reg::RB_DEPTHCONTROL GetNormalizedDepthControl(const RegisterFile& regs) {
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::ModeControl::kColorDepth &&
edram_mode != xenos::ModeControl::kDepth) {
xenos::EdramMode edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode != xenos::EdramMode::kColorDepth &&
edram_mode != xenos::EdramMode::kDepthOnly) {
// Both depth and stencil disabled (EDRAM depth and stencil ignored).
reg::RB_DEPTHCONTROL disabled;
disabled.value = 0;
@ -124,10 +124,10 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader,
assert_true(shader.type() == xenos::ShaderType::kPixel);
assert_true(shader.is_ucode_analyzed());
// See xenos::ModeControl for explanation why the pixel shader is only used
// when it's kColorDepth here.
// See xenos::EdramMode for explanation why the pixel shader is only used when
// it's kColorDepth here.
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
xenos::EdramMode::kColorDepth) {
return false;
}
@ -340,7 +340,8 @@ void GetHostViewportInfo(const RegisterFile& regs,
offset_add_xy[0] += float(pa_sc_window_offset.window_x_offset);
offset_add_xy[1] += float(pa_sc_window_offset.window_y_offset);
}
if (cvars::half_pixel_offset && !pa_su_vtx_cntl.pix_center) {
if (cvars::half_pixel_offset &&
pa_su_vtx_cntl.pix_center == xenos::PixelCenter::kD3DZero) {
offset_add_xy[0] += 0.5f;
offset_add_xy[1] += 0.5f;
}
@ -607,7 +608,7 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
uint32_t GetNormalizedColorMask(const RegisterFile& regs,
uint32_t pixel_shader_writes_color_targets) {
if (regs.Get<reg::RB_MODECONTROL>().edram_mode !=
xenos::ModeControl::kColorDepth) {
xenos::EdramMode::kColorDepth) {
return 0;
}
uint32_t normalized_color_mask = 0;
@ -838,7 +839,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
memory.TranslatePhysical(fetch.address * sizeof(uint32_t)));
// Most vertices have a negative half-pixel offset applied, which we reverse.
float half_pixel_offset =
regs.Get<reg::PA_SU_VTX_CNTL>().pix_center ? 0.0f : 0.5f;
regs.Get<reg::PA_SU_VTX_CNTL>().pix_center == xenos::PixelCenter::kD3DZero
? 0.5f
: 0.0f;
int32_t vertices_fixed[6];
for (size_t i = 0; i < xe::countof(vertices_fixed); ++i) {
vertices_fixed[i] = ui::FloatToD3D11Fixed16p8(
@ -1097,7 +1100,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
bool fill_half_pixel_offset =
(draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) &&
cvars::resolve_resolution_scale_fill_half_pixel_offset &&
cvars::half_pixel_offset && !regs.Get<reg::PA_SU_VTX_CNTL>().pix_center;
cvars::half_pixel_offset &&
regs.Get<reg::PA_SU_VTX_CNTL>().pix_center ==
xenos::PixelCenter::kD3DZero;
int32_t exp_bias = is_depth ? 0 : rb_copy_dest_info.copy_dest_exp_bias;
ResolveEdramInfo depth_edram_info;
depth_edram_info.packed = 0;

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Copyright 2025 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -10,8 +10,25 @@
// This is a partial file designed to be included by other files when
// constructing various tables.
// Almost all of these values are taken directly from:
// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/22/yamato_offset.h
// Most 3D registers are the same as in the Qualcomm Adreno 200 (AMD Z430,
// another R400 architecture family chip):
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/10/yamato_offset.h
// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/10/yamato_offset.h
//
// The addresses in this file are specified in dwords, similarly to how they are
// defined in yamato_offset.h. AMD, however, generally uses byte addresses in
// their documentation and open source drivers, so when looking up similar
// registers there, multiply by 4 and convert to hexadecimal.
//
// Display controller register addresses mostly match the M56 ones:
// https://www.x.org/docs/AMD/old/RRG-216M56-03oOEM.pdf
//
// 3D registers on the later chips such as the R600 are very different, but some
// of them are still partially or fully the same, and also share the address.
// However, on the R600, the 3D register space (at 0x8000 bytes, or 0x2000
// dwords, on the Xenos) is split into config registers (at 0x8000 bytes) and
// context registers (at 0x28000 bytes), but the lower bits of the address may
// still be the same.
//#define XE_GPU_REGISTER(index, type, name)

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2017 Ben Vanik. All rights reserved. *
* Copyright 2025 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -16,10 +16,29 @@
#include "xenia/base/assert.h"
#include "xenia/gpu/xenos.h"
// Most registers can be found from:
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h
// Some registers were added on Adreno specifically and are not referenced in
// game .pdb files and never set by games.
// Most 3D registers are the same as in the Qualcomm Adreno 200 (AMD Z430,
// another R400 architecture family chip):
//
// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/10/yamato_registers.h
// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/10/yamato_registers.h
//
// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/freedreno/registers/adreno/a2xx.xml
//
// The Adreno 200, however, has various differences in its registers (primarily
// in the render backend, but not limited to that). Before adding the
// definitions from the Adreno 200, see the actual values of those registers set
// by games, and/or test them on the Xenos hardware.
//
// Other useful sources are the register references for later ATI/AMD 3D chips,
// most importantly the R600 - while it has a massive amount of differences,
// it's the closest relative of the R400 architecture that is available on the
// PC. Documentation for newer AMD GPUs, such as Evergreen, Northern Islands,
// and even GCN also can provide details in some cases. The earlier ATI's
// architecture, R3xx/R5xx, has very differently structured registers (although
// some are still very similar), but can provide some historical context.
//
// Display controller register addresses mostly match the M56 ones:
// https://www.x.org/docs/AMD/old/RRG-216M56-03oOEM.pdf
// All unused bits are intentionally declared as named fields for stable
// comparisons when register values are constructed or modified by Xenia itself.
@ -108,17 +127,20 @@ static_assert_size(WAIT_UNTIL, sizeof(uint32_t));
union alignas(uint32_t) SQ_PROGRAM_CNTL {
uint32_t value;
struct {
// Note from a2xx.xml:
// Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG,
// but high bit is set to indicate "0 registers used".
// (Register count = (num_reg & 0x80) ? 0 : (num_reg + 1))
uint32_t vs_num_reg : 8; // +0
uint32_t ps_num_reg : 8; // +8
// GPR counts minus 1.
// Ignore the Freedreno a2xx.xml note about the bit 7 for zero registers,
// the fields are 6-bit, not 8-bit, in yamato_registers.h, and games never
// set the bits 7:6.
uint32_t vs_num_reg : 6; // +0, value minus 1
uint32_t _pad_6 : 2; // +6
uint32_t ps_num_reg : 6; // +8, value minus 1
uint32_t _pad_14 : 2; // +14
uint32_t vs_resource : 1; // +16
uint32_t ps_resource : 1; // +17
uint32_t param_gen : 1; // +18
uint32_t gen_index_pix : 1; // +19
uint32_t vs_export_count : 4; // +20
// Interpolator output count minus 1.
uint32_t vs_export_count : 4; // +20, value minus 1
xenos::VertexShaderExportMode vs_export_mode : 3; // +24
uint32_t ps_export_mode : 4; // +27
uint32_t gen_index_vtx : 1; // +31
@ -211,7 +233,7 @@ union alignas(uint32_t) SQ_CONTEXT_MISC {
// non-negative for other primitive types.
uint32_t param_gen_pos : 8; // +8
uint32_t perfcounter_ref : 1; // +16
uint32_t yeild_optimize : 1; // +17 sic
uint32_t yield_optimize : 1; // +17
uint32_t tx_cache_sel : 1; // +18
uint32_t _pad_19 : 13; // +19
};
@ -237,7 +259,7 @@ union alignas(uint32_t) SQ_VS_CONST {
uint32_t base : 9; // +0
uint32_t _pad_9 : 3; // +9
// Vec4 count minus one.
uint32_t size : 9; // +12
uint32_t size : 9; // +12, value minus 1
uint32_t _pad_21 : 11; // +21
};
static constexpr Register register_index = XE_GPU_REG_SQ_VS_CONST;
@ -251,7 +273,7 @@ union alignas(uint32_t) SQ_PS_CONST {
uint32_t base : 9; // +0
uint32_t _pad_9 : 3; // +9
// Vec4 count minus one.
uint32_t size : 9; // +12
uint32_t size : 9; // +12, value minus 1
uint32_t _pad_21 : 11; // +21
};
static constexpr Register register_index = XE_GPU_REG_SQ_PS_CONST;
@ -288,10 +310,15 @@ union alignas(uint32_t) VGT_DMA_SIZE {
union alignas(uint32_t) VGT_DRAW_INITIATOR {
uint32_t value;
// Different than on A2xx and R6xx/R7xx.
// Has differences from the Adreno 200.
struct {
xenos::PrimitiveType prim_type : 6; // +0
xenos::SourceSelect source_select : 2; // +6
// Adreno 200 replaced this with FACENESS_CULL_SELECT possibly due to the
// removal of tessellation, but on the Xenos this is MAJOR_MODE like on the
// R600, it's set to the explicit mode mainly for tessellated draws in games
// (because VGT_OUTPUT_PATH_CNTL where tessellation is enabled is ignored in
// the implicit major mode).
xenos::MajorMode major_mode : 2; // +8
uint32_t _pad_10 : 1; // +10
xenos::IndexFormat index_size : 1; // +11
@ -466,9 +493,9 @@ static_assert_size(PA_SU_SC_MODE_CNTL, sizeof(uint32_t));
union alignas(uint32_t) PA_SU_VTX_CNTL {
uint32_t value;
struct {
uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL).
uint32_t round_mode : 2; // +1
uint32_t quant_mode : 3; // +3
xenos::PixelCenter pix_center : 1; // +0
xenos::VertexRounding round_mode : 2; // +1
xenos::VertexQuantization quant_mode : 3; // +3
uint32_t _pad_6 : 26; // +6
};
static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL;
@ -507,6 +534,8 @@ static_assert_size(PA_SC_VIZ_QUERY, sizeof(uint32_t));
union alignas(uint32_t) PA_CL_CLIP_CNTL {
uint32_t value;
struct {
// Like on the Adreno 200, but with user clip planes from R3xx (used in
// 4D5307E6 for the hanging lamp on Last Resort).
uint32_t ucp_ena_0 : 1; // +0
uint32_t ucp_ena_1 : 1; // +1
uint32_t ucp_ena_2 : 1; // +2
@ -631,8 +660,8 @@ static_assert_size(PA_SC_WINDOW_SCISSOR_BR, sizeof(uint32_t));
union alignas(uint32_t) RB_MODECONTROL {
uint32_t value;
struct {
xenos::ModeControl edram_mode : 3; // +0
uint32_t _pad_3 : 29; // +3
xenos::EdramMode edram_mode : 3; // +0
uint32_t _pad_3 : 29; // +3
};
static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL;
};
@ -773,7 +802,7 @@ union alignas(uint32_t) RB_DEPTHCONTROL {
uint32_t stencil_enable : 1; // +0
uint32_t z_enable : 1; // +1
uint32_t z_write_enable : 1; // +2
// EARLY_Z_ENABLE was added on Adreno.
// EARLY_Z_ENABLE was added on Adreno, never set by Xbox 360 games.
uint32_t _pad_3 : 1; // +3
xenos::CompareFunction zfunc : 3; // +4
uint32_t backface_enable : 1; // +7

View File

@ -940,9 +940,7 @@ class Shader {
if (!uses_register_dynamic_addressing()) {
return 0;
}
return std::max((program_cntl_num_reg & 0x80)
? uint32_t(0)
: (program_cntl_num_reg + uint32_t(1)),
return std::max(program_cntl_num_reg + uint32_t(1),
register_static_address_bound());
}

View File

@ -1187,29 +1187,29 @@ void TraceViewer::DrawStateUI() {
}
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
static_cast<EdramMode>(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7);
const char* mode_name = "Unknown";
switch (enable_mode) {
case ModeControl::kIgnore:
case EdramMode::kNoOperation:
ImGui::Text("Ignored Command %d", player_->current_command_index());
break;
case ModeControl::kColorDepth:
case ModeControl::kDepth: {
case EdramMode::kColorDepth:
case EdramMode::kDepthOnly: {
static const char* kPrimNames[] = {
"<none>", "point list", "line list", "line strip",
"triangle list", "triangle fan", "triangle strip", "unknown 0x7",
"rectangle list", "unknown 0x9", "unknown 0xA", "unknown 0xB",
"line loop", "quad list", "quad strip", "unknown 0xF",
};
ImGui::Text("%s Command %d: %s, %d indices",
enable_mode == ModeControl::kColorDepth ? "Color-Depth"
: "Depth-only",
player_->current_command_index(),
kPrimNames[int(draw_info.prim_type)], draw_info.index_count);
ImGui::Text(
"%s Command %d: %s, %d indices",
enable_mode == EdramMode::kColorDepth ? "Color-Depth" : "Depth-only",
player_->current_command_index(),
kPrimNames[int(draw_info.prim_type)], draw_info.index_count);
break;
}
case ModeControl::kCopy: {
case EdramMode::kCopy: {
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE];
ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(),
copy_dest_base);
@ -1371,7 +1371,7 @@ void TraceViewer::DrawStateUI() {
static_cast<xenos::MsaaSamples>((rb_surface_info >> 16) & 0x3);
if (ImGui::CollapsingHeader("Color Targets")) {
if (enable_mode != ModeControl::kDepth) {
if (enable_mode != EdramMode::kDepthOnly) {
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL];

View File

@ -2159,8 +2159,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
const RegisterFile& regs = *register_file_;
xenos::ModeControl edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode == xenos::ModeControl::kCopy) {
xenos::EdramMode edram_mode = regs.Get<reg::RB_MODECONTROL>().edram_mode;
if (edram_mode == xenos::EdramMode::kCopy) {
// Special copy handling.
return IssueCopy();
}
@ -2192,9 +2192,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal);
VulkanShader* pixel_shader = nullptr;
if (is_rasterization_done) {
// See xenos::ModeControl for explanation why the pixel shader is only used
// See xenos::EdramMode for explanation why the pixel shader is only used
// when it's kColorDepth here.
if (edram_mode == xenos::ModeControl::kColorDepth) {
if (edram_mode == xenos::EdramMode::kColorDepth) {
pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
if (pixel_shader) {
pipeline_cache_->AnalyzeShaderUcode(*pixel_shader);

View File

@ -766,7 +766,12 @@ enum class SignedRepeatingFractionMode : uint32_t {
kNoZero,
};
// instr_arbitrary_filter_t
// Arbitrary filter is still present in the Code Aurora Forum release of the
// Adreno 200 programming interface, but is deprecated according to the
// IPR2015-00325 R400 Document Library Folder History:
// "Change 124923 on 2003/10/03 by jhoule@jhoule_doc_lt
// [...]
// Deprecated the ARBITRARY_FILTER fields from TFetch instr+const."
enum class ArbitraryFilter : uint32_t {
k2x4Sym = 0,
k2x4Asym = 1,
@ -777,13 +782,7 @@ enum class ArbitraryFilter : uint32_t {
kUseFetchConst = 7,
};
// While instructions contain 6-bit register index fields (allowing literal
// indices, or literal index offsets, depending on the addressing mode, of up to
// 63), the maximum total register count for a vertex and a pixel shader
// combined is 128, and the boundary between vertex and pixel shaders can be
// moved via SQ_PROGRAM_CNTL::VS/PS_NUM_REG, according to the IPR2015-00325
// specification (section 8 "Register file allocation").
constexpr uint32_t kMaxShaderTempRegistersLog2 = 7;
constexpr uint32_t kMaxShaderTempRegistersLog2 = 6;
constexpr uint32_t kMaxShaderTempRegisters = UINT32_C(1)
<< kMaxShaderTempRegistersLog2;
@ -859,34 +858,59 @@ enum class PolygonType : uint32_t {
kTriangles = 2,
};
enum class ModeControl : uint32_t {
kIgnore = 0,
enum class PixelCenter : uint32_t {
// Pixel center at vertex positions .0, like in Direct3D 9.
// Commonly used in Xbox 360 games.
kD3DZero = 0,
// Pixel center at vertex positions .5, like in OpenGL.
// Used in 415607E6.
kOGLHalf = 1,
};
enum class VertexRounding : uint32_t {
kTruncate = 0, // OpenGL.
kRound = 1,
kRoundToEven = 2, // Direct3D. Common in Xbox 360 games.
kRoundToOdd = 3,
};
enum class VertexQuantization : uint32_t {
k_1_16th = 0,
k_1_8th = 1,
k_1_4th = 2,
k_1_2 = 3,
k_1 = 4,
// 1/256th was added in R600. On the Xbox 360, games normally use 1/16th.
};
enum class EdramMode : uint32_t {
kNoOperation = 0,
kColorDepth = 4,
// TODO(Triang3l): Verify whether kDepth means the pixel shader is ignored
// TODO(Triang3l): Verify whether kDepthOnly means the pixel shader is ignored
// completely even if it writes depth, exports to memory or kills pixels.
// Hints suggesting that it should be completely ignored (which is desirable
// on real hardware to avoid scheduling the pixel shader at all and waiting
// for it especially since the Xbox 360 doesn't have early per-sample depth /
// stencil, only early hi-Z / hi-stencil, and other registers possibly
// toggling pixel shader execution are yet to be found):
// - Most of depth pre-pass draws in 415607E6 use the kDepth more with a
// - Most of depth pre-pass draws in 415607E6 use the kDepthOnly more with a
// `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though.
// However, when alphatested surfaces are drawn, kColorDepth is explicitly
// used with the same shader performing the texture fetch.
// - 5454082B has some kDepth draws with alphatest enabled, but the shader is
// `oC0 = r0`, which makes no sense (alphatest based on an interpolant from
// the vertex shader) as no texture alpha cutout is involved.
// - 5454082B also has kDepth draws with pretty complex shaders clearly for
// use only in the color pass - even fetching and filtering a shadowmap.
// - 5454082B has some kDepthOnly draws with alphatest enabled, but the shader
// is `oC0 = r0`, which makes no sense (alphatest based on an interpolant
// from the vertex shader) as no texture alpha cutout is involved.
// - 5454082B also has kDepthOnly draws with pretty complex shaders clearly
// for use only in the color pass - even fetching and filtering a shadowmap.
// For now, based on these, let's assume the pixel shader is never used with
// kDepth.
kDepth = 5,
// kDepthOnly.
kDepthOnly = 5,
kCopy = 6,
};
// Xenos copies EDRAM contents to a tiled 2D or 3D texture (resolves - from
// "MSAA resolve", but this name is also used for single-sampled copying) by
// drawing primitives with the EDRAM mode ModeControl::kCopy. Pixels covered by
// drawing primitives with the EDRAM mode EdramMode::kCopy. Pixels covered by
// the drawn geometry are copied. It's likely that only rectangular regions can
// be resolved.
//
@ -1095,9 +1119,9 @@ union alignas(uint32_t) xe_gpu_vertex_fetch_t {
FetchConstantType type : 2; // +0
uint32_t address : 30; // +2 address in dwords
Endian endian : 2; // +0
uint32_t size : 24; // +2 size in words
uint32_t unk1 : 6; // +26
Endian endian : 2; // +0
uint32_t size : 24; // +2 size in words
uint32_t _pad_1_26 : 6; // +26
};
};
static_assert_size(xe_gpu_vertex_fetch_t, sizeof(uint32_t) * 2);
@ -1168,21 +1192,21 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t {
};
struct {
FetchConstantType type : 2; // +0 dword_0
// Likely before the swizzle, seems logical from R5xx (SIGNED_COMP0/1/2/3
// set the signedness of components 0/1/2/3, while SEL_ALPHA/RED/GREEN/BLUE
// specify "swizzling for each channel at the input of the pixel shader",
// which can be texture components 0/1/2/3 or constant 0/1) and R6xx
// (signedness is FORMAT_COMP_X/Y/Z/W, while the swizzle is DST_SEL_X/Y/Z/W,
// which is named in resources the same as DST_SEL in fetch clauses).
TextureSign sign_x : 2; // +2
TextureSign sign_y : 2; // +4
TextureSign sign_z : 2; // +6
TextureSign sign_w : 2; // +8
ClampMode clamp_x : 3; // +10
ClampMode clamp_y : 3; // +13
ClampMode clamp_z : 3; // +16
SignedRepeatingFractionMode signed_rf_mode_all : 1; // +19
uint32_t dim_tbd : 2; // +20
// The signedness applies to the data components (before the swizzle, which
// is the destination selection).
// Signed repeating fraction formats always use the kZeroClampMinusOne mode,
// according to the IPR2015-00325 R400 Document Library Folder History:
// "Change 133990 on 2003/11/25 by jhoule@jhoule_doc_lt
// v1.80 - Indicated that NO_ZERO srf mode is unsupported for Xenos (will
// currently only work in the VC path)"
TextureSign sign_x : 2; // +2
TextureSign sign_y : 2; // +4
TextureSign sign_z : 2; // +6
TextureSign sign_w : 2; // +8
ClampMode clamp_x : 3; // +10
ClampMode clamp_y : 3; // +13
ClampMode clamp_z : 3; // +16
uint32_t _pad_0_19 : 3; // +19
// Base row pitch in pixels (not blocks) >> 5. For linear textures, this is
// provided by Direct3D 9 in a way that every row of blocks ends up aligned
// to kTextureLinearRowAlignmentBytes (the GPU requires 256-byte alignment
@ -1209,7 +1233,7 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t {
union { // dword_2
struct {
uint32_t width : 24;
uint32_t _pad_88 : 8;
uint32_t _pad_size_1d : 8;
} size_1d;
struct {
uint32_t width : 13;