diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 93589cf7b..27a3141ec 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -2106,8 +2106,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, ID3D12Device* device = GetD3D12Provider().GetDevice(); const RegisterFile& regs = *register_file_; - xenos::ModeControl edram_mode = regs.Get().edram_mode; - if (edram_mode == xenos::ModeControl::kCopy) { + xenos::EdramMode edram_mode = regs.Get().edram_mode; + if (edram_mode == xenos::EdramMode::kCopy) { // Special copy handling. return IssueCopy(); } @@ -2134,9 +2134,9 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal); D3D12Shader* pixel_shader = nullptr; if (is_rasterization_done) { - // See xenos::ModeControl for explanation why the pixel shader is only used + // See xenos::EdramMode for explanation why the pixel shader is only used // when it's kColorDepth here. - if (edram_mode == xenos::ModeControl::kColorDepth) { + if (edram_mode == xenos::EdramMode::kColorDepth) { pixel_shader = static_cast(active_pixel_shader()); if (pixel_shader) { pipeline_cache_->AnalyzeShaderUcode(*pixel_shader); diff --git a/src/xenia/gpu/draw_extent_estimator.cc b/src/xenia/gpu/draw_extent_estimator.cc index 20c6086ee..480794aeb 100644 --- a/src/xenia/gpu/draw_extent_estimator.cc +++ b/src/xenia/gpu/draw_extent_estimator.cc @@ -253,7 +253,8 @@ uint32_t DrawExtentEstimator::EstimateVertexMaxY(const Shader& vertex_shader) { // so it's safe to add both - adding it will neither move the 16p8 clamping // bounds -32768 and 32767+255/256 into the 0...8192 screen space range, nor // cause 24p8 overflow. - if (!regs.Get().pix_center) { + if (regs.Get().pix_center == + xenos::PixelCenter::kD3DZero) { max_y_24p8 += 128; } if (pa_su_sc_mode_cntl.vtx_window_offset_enable) { @@ -329,7 +330,8 @@ uint32_t DrawExtentEstimator::EstimateMaxY(bool try_to_estimate_vertex_max_y, if (regs.Get().vtx_window_offset_enable) { viewport_bottom += float(window_y_offset); } - if (!regs.Get().pix_center) { + if (regs.Get().pix_center == + xenos::PixelCenter::kD3DZero) { viewport_bottom += 0.5f; } // Then apply the floating-point viewport offset. diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index b9f70ef1d..82e9216e7 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -40,12 +40,12 @@ namespace draw_util { bool IsRasterizationPotentiallyDone(const RegisterFile& regs, bool primitive_polygonal) { - // TODO(Triang3l): Investigate ModeControl::kIgnore better, with respect to + // TODO(Triang3l): Investigate EdramMode::kNoOperation better, with respect to // sample counting. Let's assume sample counting is a part of depth / stencil, // thus disabled too. - xenos::ModeControl edram_mode = regs.Get().edram_mode; - if (edram_mode != xenos::ModeControl::kColorDepth && - edram_mode != xenos::ModeControl::kDepth) { + xenos::EdramMode edram_mode = regs.Get().edram_mode; + if (edram_mode != xenos::EdramMode::kColorDepth && + edram_mode != xenos::EdramMode::kDepthOnly) { return false; } if (regs.Get().vs_export_mode == @@ -64,9 +64,9 @@ bool IsRasterizationPotentiallyDone(const RegisterFile& regs, } reg::RB_DEPTHCONTROL GetNormalizedDepthControl(const RegisterFile& regs) { - xenos::ModeControl edram_mode = regs.Get().edram_mode; - if (edram_mode != xenos::ModeControl::kColorDepth && - edram_mode != xenos::ModeControl::kDepth) { + xenos::EdramMode edram_mode = regs.Get().edram_mode; + if (edram_mode != xenos::EdramMode::kColorDepth && + edram_mode != xenos::EdramMode::kDepthOnly) { // Both depth and stencil disabled (EDRAM depth and stencil ignored). reg::RB_DEPTHCONTROL disabled; disabled.value = 0; @@ -124,10 +124,10 @@ bool IsPixelShaderNeededWithRasterization(const Shader& shader, assert_true(shader.type() == xenos::ShaderType::kPixel); assert_true(shader.is_ucode_analyzed()); - // See xenos::ModeControl for explanation why the pixel shader is only used - // when it's kColorDepth here. + // See xenos::EdramMode for explanation why the pixel shader is only used when + // it's kColorDepth here. if (regs.Get().edram_mode != - xenos::ModeControl::kColorDepth) { + xenos::EdramMode::kColorDepth) { return false; } @@ -340,7 +340,8 @@ void GetHostViewportInfo(const RegisterFile& regs, offset_add_xy[0] += float(pa_sc_window_offset.window_x_offset); offset_add_xy[1] += float(pa_sc_window_offset.window_y_offset); } - if (cvars::half_pixel_offset && !pa_su_vtx_cntl.pix_center) { + if (cvars::half_pixel_offset && + pa_su_vtx_cntl.pix_center == xenos::PixelCenter::kD3DZero) { offset_add_xy[0] += 0.5f; offset_add_xy[1] += 0.5f; } @@ -607,7 +608,7 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out, uint32_t GetNormalizedColorMask(const RegisterFile& regs, uint32_t pixel_shader_writes_color_targets) { if (regs.Get().edram_mode != - xenos::ModeControl::kColorDepth) { + xenos::EdramMode::kColorDepth) { return 0; } uint32_t normalized_color_mask = 0; @@ -838,7 +839,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, memory.TranslatePhysical(fetch.address * sizeof(uint32_t))); // Most vertices have a negative half-pixel offset applied, which we reverse. float half_pixel_offset = - regs.Get().pix_center ? 0.0f : 0.5f; + regs.Get().pix_center == xenos::PixelCenter::kD3DZero + ? 0.5f + : 0.0f; int32_t vertices_fixed[6]; for (size_t i = 0; i < xe::countof(vertices_fixed); ++i) { vertices_fixed[i] = ui::FloatToD3D11Fixed16p8( @@ -1097,7 +1100,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, bool fill_half_pixel_offset = (draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) && cvars::resolve_resolution_scale_fill_half_pixel_offset && - cvars::half_pixel_offset && !regs.Get().pix_center; + cvars::half_pixel_offset && + regs.Get().pix_center == + xenos::PixelCenter::kD3DZero; int32_t exp_bias = is_depth ? 0 : rb_copy_dest_info.copy_dest_exp_bias; ResolveEdramInfo depth_edram_info; depth_edram_info.packed = 0; diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index aa22558eb..cae9ba23a 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * + * Copyright 2025 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,8 +10,25 @@ // This is a partial file designed to be included by other files when // constructing various tables. -// Almost all of these values are taken directly from: -// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/22/yamato_offset.h +// Most 3D registers are the same as in the Qualcomm Adreno 200 (AMD Z430, +// another R400 architecture family chip): +// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/10/yamato_offset.h +// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/10/yamato_offset.h +// +// The addresses in this file are specified in dwords, similarly to how they are +// defined in yamato_offset.h. AMD, however, generally uses byte addresses in +// their documentation and open source drivers, so when looking up similar +// registers there, multiply by 4 and convert to hexadecimal. +// +// Display controller register addresses mostly match the M56 ones: +// https://www.x.org/docs/AMD/old/RRG-216M56-03oOEM.pdf +// +// 3D registers on the later chips such as the R600 are very different, but some +// of them are still partially or fully the same, and also share the address. +// However, on the R600, the 3D register space (at 0x8000 bytes, or 0x2000 +// dwords, on the Xenos) is split into config registers (at 0x8000 bytes) and +// context registers (at 0x28000 bytes), but the lower bits of the address may +// still be the same. //#define XE_GPU_REGISTER(index, type, name) diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index c8ad6a286..4aec14f99 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2017 Ben Vanik. All rights reserved. * + * Copyright 2025 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -16,10 +16,29 @@ #include "xenia/base/assert.h" #include "xenia/gpu/xenos.h" -// Most registers can be found from: -// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h -// Some registers were added on Adreno specifically and are not referenced in -// game .pdb files and never set by games. +// Most 3D registers are the same as in the Qualcomm Adreno 200 (AMD Z430, +// another R400 architecture family chip): +// +// https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/10/yamato_registers.h +// https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/10/yamato_registers.h +// +// https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/freedreno/registers/adreno/a2xx.xml +// +// The Adreno 200, however, has various differences in its registers (primarily +// in the render backend, but not limited to that). Before adding the +// definitions from the Adreno 200, see the actual values of those registers set +// by games, and/or test them on the Xenos hardware. +// +// Other useful sources are the register references for later ATI/AMD 3D chips, +// most importantly the R600 - while it has a massive amount of differences, +// it's the closest relative of the R400 architecture that is available on the +// PC. Documentation for newer AMD GPUs, such as Evergreen, Northern Islands, +// and even GCN also can provide details in some cases. The earlier ATI's +// architecture, R3xx/R5xx, has very differently structured registers (although +// some are still very similar), but can provide some historical context. +// +// Display controller register addresses mostly match the M56 ones: +// https://www.x.org/docs/AMD/old/RRG-216M56-03oOEM.pdf // All unused bits are intentionally declared as named fields for stable // comparisons when register values are constructed or modified by Xenia itself. @@ -108,17 +127,20 @@ static_assert_size(WAIT_UNTIL, sizeof(uint32_t)); union alignas(uint32_t) SQ_PROGRAM_CNTL { uint32_t value; struct { - // Note from a2xx.xml: - // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, - // but high bit is set to indicate "0 registers used". - // (Register count = (num_reg & 0x80) ? 0 : (num_reg + 1)) - uint32_t vs_num_reg : 8; // +0 - uint32_t ps_num_reg : 8; // +8 + // GPR counts minus 1. + // Ignore the Freedreno a2xx.xml note about the bit 7 for zero registers, + // the fields are 6-bit, not 8-bit, in yamato_registers.h, and games never + // set the bits 7:6. + uint32_t vs_num_reg : 6; // +0, value minus 1 + uint32_t _pad_6 : 2; // +6 + uint32_t ps_num_reg : 6; // +8, value minus 1 + uint32_t _pad_14 : 2; // +14 uint32_t vs_resource : 1; // +16 uint32_t ps_resource : 1; // +17 uint32_t param_gen : 1; // +18 uint32_t gen_index_pix : 1; // +19 - uint32_t vs_export_count : 4; // +20 + // Interpolator output count minus 1. + uint32_t vs_export_count : 4; // +20, value minus 1 xenos::VertexShaderExportMode vs_export_mode : 3; // +24 uint32_t ps_export_mode : 4; // +27 uint32_t gen_index_vtx : 1; // +31 @@ -211,7 +233,7 @@ union alignas(uint32_t) SQ_CONTEXT_MISC { // non-negative for other primitive types. uint32_t param_gen_pos : 8; // +8 uint32_t perfcounter_ref : 1; // +16 - uint32_t yeild_optimize : 1; // +17 sic + uint32_t yield_optimize : 1; // +17 uint32_t tx_cache_sel : 1; // +18 uint32_t _pad_19 : 13; // +19 }; @@ -237,7 +259,7 @@ union alignas(uint32_t) SQ_VS_CONST { uint32_t base : 9; // +0 uint32_t _pad_9 : 3; // +9 // Vec4 count minus one. - uint32_t size : 9; // +12 + uint32_t size : 9; // +12, value minus 1 uint32_t _pad_21 : 11; // +21 }; static constexpr Register register_index = XE_GPU_REG_SQ_VS_CONST; @@ -251,7 +273,7 @@ union alignas(uint32_t) SQ_PS_CONST { uint32_t base : 9; // +0 uint32_t _pad_9 : 3; // +9 // Vec4 count minus one. - uint32_t size : 9; // +12 + uint32_t size : 9; // +12, value minus 1 uint32_t _pad_21 : 11; // +21 }; static constexpr Register register_index = XE_GPU_REG_SQ_PS_CONST; @@ -288,10 +310,15 @@ union alignas(uint32_t) VGT_DMA_SIZE { union alignas(uint32_t) VGT_DRAW_INITIATOR { uint32_t value; - // Different than on A2xx and R6xx/R7xx. + // Has differences from the Adreno 200. struct { xenos::PrimitiveType prim_type : 6; // +0 xenos::SourceSelect source_select : 2; // +6 + // Adreno 200 replaced this with FACENESS_CULL_SELECT possibly due to the + // removal of tessellation, but on the Xenos this is MAJOR_MODE like on the + // R600, it's set to the explicit mode mainly for tessellated draws in games + // (because VGT_OUTPUT_PATH_CNTL where tessellation is enabled is ignored in + // the implicit major mode). xenos::MajorMode major_mode : 2; // +8 uint32_t _pad_10 : 1; // +10 xenos::IndexFormat index_size : 1; // +11 @@ -466,9 +493,9 @@ static_assert_size(PA_SU_SC_MODE_CNTL, sizeof(uint32_t)); union alignas(uint32_t) PA_SU_VTX_CNTL { uint32_t value; struct { - uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL). - uint32_t round_mode : 2; // +1 - uint32_t quant_mode : 3; // +3 + xenos::PixelCenter pix_center : 1; // +0 + xenos::VertexRounding round_mode : 2; // +1 + xenos::VertexQuantization quant_mode : 3; // +3 uint32_t _pad_6 : 26; // +6 }; static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL; @@ -507,6 +534,8 @@ static_assert_size(PA_SC_VIZ_QUERY, sizeof(uint32_t)); union alignas(uint32_t) PA_CL_CLIP_CNTL { uint32_t value; struct { + // Like on the Adreno 200, but with user clip planes from R3xx (used in + // 4D5307E6 for the hanging lamp on Last Resort). uint32_t ucp_ena_0 : 1; // +0 uint32_t ucp_ena_1 : 1; // +1 uint32_t ucp_ena_2 : 1; // +2 @@ -631,8 +660,8 @@ static_assert_size(PA_SC_WINDOW_SCISSOR_BR, sizeof(uint32_t)); union alignas(uint32_t) RB_MODECONTROL { uint32_t value; struct { - xenos::ModeControl edram_mode : 3; // +0 - uint32_t _pad_3 : 29; // +3 + xenos::EdramMode edram_mode : 3; // +0 + uint32_t _pad_3 : 29; // +3 }; static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL; }; @@ -773,7 +802,7 @@ union alignas(uint32_t) RB_DEPTHCONTROL { uint32_t stencil_enable : 1; // +0 uint32_t z_enable : 1; // +1 uint32_t z_write_enable : 1; // +2 - // EARLY_Z_ENABLE was added on Adreno. + // EARLY_Z_ENABLE was added on Adreno, never set by Xbox 360 games. uint32_t _pad_3 : 1; // +3 xenos::CompareFunction zfunc : 3; // +4 uint32_t backface_enable : 1; // +7 diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 6ac4f568e..3ba205630 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -940,9 +940,7 @@ class Shader { if (!uses_register_dynamic_addressing()) { return 0; } - return std::max((program_cntl_num_reg & 0x80) - ? uint32_t(0) - : (program_cntl_num_reg + uint32_t(1)), + return std::max(program_cntl_num_reg + uint32_t(1), register_static_address_bound()); } diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 754942411..d63ebcf7d 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -1187,29 +1187,29 @@ void TraceViewer::DrawStateUI() { } auto enable_mode = - static_cast(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7); + static_cast(regs[XE_GPU_REG_RB_MODECONTROL] & 0x7); const char* mode_name = "Unknown"; switch (enable_mode) { - case ModeControl::kIgnore: + case EdramMode::kNoOperation: ImGui::Text("Ignored Command %d", player_->current_command_index()); break; - case ModeControl::kColorDepth: - case ModeControl::kDepth: { + case EdramMode::kColorDepth: + case EdramMode::kDepthOnly: { static const char* kPrimNames[] = { "", "point list", "line list", "line strip", "triangle list", "triangle fan", "triangle strip", "unknown 0x7", "rectangle list", "unknown 0x9", "unknown 0xA", "unknown 0xB", "line loop", "quad list", "quad strip", "unknown 0xF", }; - ImGui::Text("%s Command %d: %s, %d indices", - enable_mode == ModeControl::kColorDepth ? "Color-Depth" - : "Depth-only", - player_->current_command_index(), - kPrimNames[int(draw_info.prim_type)], draw_info.index_count); + ImGui::Text( + "%s Command %d: %s, %d indices", + enable_mode == EdramMode::kColorDepth ? "Color-Depth" : "Depth-only", + player_->current_command_index(), + kPrimNames[int(draw_info.prim_type)], draw_info.index_count); break; } - case ModeControl::kCopy: { + case EdramMode::kCopy: { uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE]; ImGui::Text("Copy Command %d (to %.8X)", player_->current_command_index(), copy_dest_base); @@ -1371,7 +1371,7 @@ void TraceViewer::DrawStateUI() { static_cast((rb_surface_info >> 16) & 0x3); if (ImGui::CollapsingHeader("Color Targets")) { - if (enable_mode != ModeControl::kDepth) { + if (enable_mode != EdramMode::kDepthOnly) { // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; uint32_t color_control = regs[XE_GPU_REG_RB_COLORCONTROL]; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index b2af47f30..ee1461d5e 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -2159,8 +2159,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, const RegisterFile& regs = *register_file_; - xenos::ModeControl edram_mode = regs.Get().edram_mode; - if (edram_mode == xenos::ModeControl::kCopy) { + xenos::EdramMode edram_mode = regs.Get().edram_mode; + if (edram_mode == xenos::EdramMode::kCopy) { // Special copy handling. return IssueCopy(); } @@ -2192,9 +2192,9 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, draw_util::IsRasterizationPotentiallyDone(regs, primitive_polygonal); VulkanShader* pixel_shader = nullptr; if (is_rasterization_done) { - // See xenos::ModeControl for explanation why the pixel shader is only used + // See xenos::EdramMode for explanation why the pixel shader is only used // when it's kColorDepth here. - if (edram_mode == xenos::ModeControl::kColorDepth) { + if (edram_mode == xenos::EdramMode::kColorDepth) { pixel_shader = static_cast(active_pixel_shader()); if (pixel_shader) { pipeline_cache_->AnalyzeShaderUcode(*pixel_shader); diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 5517d73c0..88396f5a7 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -766,7 +766,12 @@ enum class SignedRepeatingFractionMode : uint32_t { kNoZero, }; -// instr_arbitrary_filter_t +// Arbitrary filter is still present in the Code Aurora Forum release of the +// Adreno 200 programming interface, but is deprecated according to the +// IPR2015-00325 R400 Document Library Folder History: +// "Change 124923 on 2003/10/03 by jhoule@jhoule_doc_lt +// [...] +// Deprecated the ARBITRARY_FILTER fields from TFetch instr+const." enum class ArbitraryFilter : uint32_t { k2x4Sym = 0, k2x4Asym = 1, @@ -777,13 +782,7 @@ enum class ArbitraryFilter : uint32_t { kUseFetchConst = 7, }; -// While instructions contain 6-bit register index fields (allowing literal -// indices, or literal index offsets, depending on the addressing mode, of up to -// 63), the maximum total register count for a vertex and a pixel shader -// combined is 128, and the boundary between vertex and pixel shaders can be -// moved via SQ_PROGRAM_CNTL::VS/PS_NUM_REG, according to the IPR2015-00325 -// specification (section 8 "Register file allocation"). -constexpr uint32_t kMaxShaderTempRegistersLog2 = 7; +constexpr uint32_t kMaxShaderTempRegistersLog2 = 6; constexpr uint32_t kMaxShaderTempRegisters = UINT32_C(1) << kMaxShaderTempRegistersLog2; @@ -859,34 +858,59 @@ enum class PolygonType : uint32_t { kTriangles = 2, }; -enum class ModeControl : uint32_t { - kIgnore = 0, +enum class PixelCenter : uint32_t { + // Pixel center at vertex positions .0, like in Direct3D 9. + // Commonly used in Xbox 360 games. + kD3DZero = 0, + // Pixel center at vertex positions .5, like in OpenGL. + // Used in 415607E6. + kOGLHalf = 1, +}; + +enum class VertexRounding : uint32_t { + kTruncate = 0, // OpenGL. + kRound = 1, + kRoundToEven = 2, // Direct3D. Common in Xbox 360 games. + kRoundToOdd = 3, +}; + +enum class VertexQuantization : uint32_t { + k_1_16th = 0, + k_1_8th = 1, + k_1_4th = 2, + k_1_2 = 3, + k_1 = 4, + // 1/256th was added in R600. On the Xbox 360, games normally use 1/16th. +}; + +enum class EdramMode : uint32_t { + kNoOperation = 0, kColorDepth = 4, - // TODO(Triang3l): Verify whether kDepth means the pixel shader is ignored + // TODO(Triang3l): Verify whether kDepthOnly means the pixel shader is ignored // completely even if it writes depth, exports to memory or kills pixels. // Hints suggesting that it should be completely ignored (which is desirable // on real hardware to avoid scheduling the pixel shader at all and waiting // for it especially since the Xbox 360 doesn't have early per-sample depth / // stencil, only early hi-Z / hi-stencil, and other registers possibly // toggling pixel shader execution are yet to be found): - // - Most of depth pre-pass draws in 415607E6 use the kDepth more with a + // - Most of depth pre-pass draws in 415607E6 use the kDepthOnly more with a // `oC0 = tfetch2D(tf0, r0.xy) * r1` shader, some use `oC0 = r0` though. // However, when alphatested surfaces are drawn, kColorDepth is explicitly // used with the same shader performing the texture fetch. - // - 5454082B has some kDepth draws with alphatest enabled, but the shader is - // `oC0 = r0`, which makes no sense (alphatest based on an interpolant from - // the vertex shader) as no texture alpha cutout is involved. - // - 5454082B also has kDepth draws with pretty complex shaders clearly for - // use only in the color pass - even fetching and filtering a shadowmap. + // - 5454082B has some kDepthOnly draws with alphatest enabled, but the shader + // is `oC0 = r0`, which makes no sense (alphatest based on an interpolant + // from the vertex shader) as no texture alpha cutout is involved. + // - 5454082B also has kDepthOnly draws with pretty complex shaders clearly + // for use only in the color pass - even fetching and filtering a shadowmap. // For now, based on these, let's assume the pixel shader is never used with - // kDepth. - kDepth = 5, + // kDepthOnly. + kDepthOnly = 5, kCopy = 6, }; // Xenos copies EDRAM contents to a tiled 2D or 3D texture (resolves - from // "MSAA resolve", but this name is also used for single-sampled copying) by -// drawing primitives with the EDRAM mode ModeControl::kCopy. Pixels covered by +// drawing primitives with the EDRAM mode EdramMode::kCopy. Pixels covered by // the drawn geometry are copied. It's likely that only rectangular regions can // be resolved. // @@ -1095,9 +1119,9 @@ union alignas(uint32_t) xe_gpu_vertex_fetch_t { FetchConstantType type : 2; // +0 uint32_t address : 30; // +2 address in dwords - Endian endian : 2; // +0 - uint32_t size : 24; // +2 size in words - uint32_t unk1 : 6; // +26 + Endian endian : 2; // +0 + uint32_t size : 24; // +2 size in words + uint32_t _pad_1_26 : 6; // +26 }; }; static_assert_size(xe_gpu_vertex_fetch_t, sizeof(uint32_t) * 2); @@ -1168,21 +1192,21 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t { }; struct { FetchConstantType type : 2; // +0 dword_0 - // Likely before the swizzle, seems logical from R5xx (SIGNED_COMP0/1/2/3 - // set the signedness of components 0/1/2/3, while SEL_ALPHA/RED/GREEN/BLUE - // specify "swizzling for each channel at the input of the pixel shader", - // which can be texture components 0/1/2/3 or constant 0/1) and R6xx - // (signedness is FORMAT_COMP_X/Y/Z/W, while the swizzle is DST_SEL_X/Y/Z/W, - // which is named in resources the same as DST_SEL in fetch clauses). - TextureSign sign_x : 2; // +2 - TextureSign sign_y : 2; // +4 - TextureSign sign_z : 2; // +6 - TextureSign sign_w : 2; // +8 - ClampMode clamp_x : 3; // +10 - ClampMode clamp_y : 3; // +13 - ClampMode clamp_z : 3; // +16 - SignedRepeatingFractionMode signed_rf_mode_all : 1; // +19 - uint32_t dim_tbd : 2; // +20 + // The signedness applies to the data components (before the swizzle, which + // is the destination selection). + // Signed repeating fraction formats always use the kZeroClampMinusOne mode, + // according to the IPR2015-00325 R400 Document Library Folder History: + // "Change 133990 on 2003/11/25 by jhoule@jhoule_doc_lt + // v1.80 - Indicated that NO_ZERO srf mode is unsupported for Xenos (will + // currently only work in the VC path)" + TextureSign sign_x : 2; // +2 + TextureSign sign_y : 2; // +4 + TextureSign sign_z : 2; // +6 + TextureSign sign_w : 2; // +8 + ClampMode clamp_x : 3; // +10 + ClampMode clamp_y : 3; // +13 + ClampMode clamp_z : 3; // +16 + uint32_t _pad_0_19 : 3; // +19 // Base row pitch in pixels (not blocks) >> 5. For linear textures, this is // provided by Direct3D 9 in a way that every row of blocks ends up aligned // to kTextureLinearRowAlignmentBytes (the GPU requires 256-byte alignment @@ -1209,7 +1233,7 @@ union alignas(uint32_t) xe_gpu_texture_fetch_t { union { // dword_2 struct { uint32_t width : 24; - uint32_t _pad_88 : 8; + uint32_t _pad_size_1d : 8; } size_1d; struct { uint32_t width : 13;