From 3481f739a17cc82c6bca844d8b86584c382eae9f Mon Sep 17 00:00:00 2001 From: Atvaark Date: Wed, 2 Oct 2019 23:34:43 +0200 Subject: [PATCH 1/4] [Docs] Fix project name in building.md --- docs/building.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/building.md b/docs/building.md index 933567e6c..366db280b 100644 --- a/docs/building.md +++ b/docs/building.md @@ -72,7 +72,7 @@ xb genspirv #### Debugging -VS behaves oddly with the debug paths. Open the xenia project properties +VS behaves oddly with the debug paths. Open the 'xenia-app' project properties and set the 'Command' to `$(SolutionDir)$(TargetPath)` and the 'Working Directory' to `$(SolutionDir)..\..`. You can specify flags and the file to run in the 'Command Arguments' field (or use `--flagfile=flags.txt`). From f83269cf8ca75ea2fff899012eecf12b29d71d5d Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 19 Oct 2019 23:30:53 +0300 Subject: [PATCH 2/4] [GPU] Refactor: Register structs in D3D12CommandProcessor and some other places --- src/xenia/gpu/command_processor.cc | 1 - .../gpu/d3d12/d3d12_command_processor.cc | 465 +++++++++--------- src/xenia/gpu/d3d12/pipeline_cache.cc | 36 +- src/xenia/gpu/d3d12/pipeline_cache.h | 2 +- src/xenia/gpu/d3d12/shared_memory.cc | 15 +- src/xenia/gpu/d3d12/shared_memory.h | 1 - src/xenia/gpu/dxbc_shader_translator.cc | 281 ++++++----- src/xenia/gpu/dxbc_shader_translator.h | 3 + src/xenia/gpu/register_file.h | 22 +- src/xenia/gpu/registers.cc | 51 ++ src/xenia/gpu/registers.h | 192 +++++++- src/xenia/gpu/shader_translator.cc | 8 +- src/xenia/gpu/shader_translator.h | 5 +- src/xenia/gpu/spirv_shader_translator.cc | 1 + src/xenia/gpu/vulkan/pipeline_cache.cc | 47 +- src/xenia/gpu/vulkan/pipeline_cache.h | 7 +- src/xenia/gpu/xenos.h | 77 ++- 17 files changed, 716 insertions(+), 498 deletions(-) create mode 100644 src/xenia/gpu/registers.cc diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index ff7b00521..a715d0e5e 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -20,7 +20,6 @@ #include "xenia/base/ring_buffer.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/graphics_system.h" -#include "xenia/gpu/registers.h" #include "xenia/gpu/sampler_info.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/xenos.h" diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index c0f57485c..20e1495b9 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1142,8 +1142,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - auto enable_mode = static_cast( - regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + xenos::ModeControl enable_mode = regs.Get().edram_mode; if (enable_mode == xenos::ModeControl::kIgnore) { // Ignored. return true; @@ -1153,7 +1152,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return IssueCopy(); } - if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { + if (regs.Get().surface_pitch == 0) { // Doesn't actually draw. // TODO(Triang3l): Do something so memexport still works in this case maybe? // Unlikely that zero would even really be legal though. @@ -1164,7 +1163,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool tessellated; if (uint32_t(primitive_type) >= uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) { - tessellated = (regs[XE_GPU_REG_VGT_OUTPUT_PATH_CNTL].u32 & 0x3) == 0x1; + tessellated = regs.Get().path_select == + xenos::VGTOutputPath::kTessellationEnable; } else { tessellated = false; } @@ -1202,8 +1202,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool memexport_used = memexport_used_vertex || memexport_used_pixel; bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); + auto pa_su_sc_mode_cntl = regs.Get(); if (!memexport_used_vertex && primitive_two_faced && - (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3) { + pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) { // Both sides are culled - can't be expressed in the pipeline state. return true; } @@ -1223,9 +1224,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // tessellation factors (as floats) instead of control point indices. bool adaptive_tessellation; if (tessellated) { - TessellationMode tessellation_mode = - TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3); - adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive; + xenos::TessellationMode tessellation_mode = + regs.Get().tess_mode; + adaptive_tessellation = + tessellation_mode == xenos::TessellationMode::kAdaptive; if (adaptive_tessellation && (!indexed || index_buffer_info->format != IndexFormat::kInt32)) { return false; @@ -1235,7 +1237,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // passed to vertex shader registers, especially if patches are drawn with // an index buffer. // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 - if (tessellation_mode != TessellationMode::kAdaptive) { + if (tessellation_mode != xenos::TessellationMode::kAdaptive) { XELOGE( "Tessellation mode %u is not implemented yet, only adaptive is " "partially available now - report the game to Xenia developers!", @@ -1309,20 +1311,16 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, vertex_shader->GetUsedTextureMask(), pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); - // Check if early depth/stencil can be enabled explicitly by RB_DEPTHCONTROL - // or implicitly when alpha test and alpha to coverage are disabled. - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - bool early_z = false; - if (pixel_shader == nullptr) { + // Check if early depth/stencil can be enabled. + bool early_z; + if (pixel_shader) { + auto rb_colorcontrol = regs.Get(); + early_z = pixel_shader->implicit_early_z_allowed() && + (!rb_colorcontrol.alpha_test_enable || + rb_colorcontrol.alpha_func == CompareFunction::kAlways) && + !rb_colorcontrol.alpha_to_mask_enable; + } else { early_z = true; - } else if (!pixel_shader->writes_depth()) { - if (rb_depthcontrol & 0x8) { - early_z = true; - } else if (pixel_shader->implicit_early_z_allowed()) { - early_z = (!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7) && - !(rb_colorcontrol & 0x10); - } } // Create the pipeline if needed and bind it. @@ -1366,22 +1364,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, (1ull << (vfetch_index & 63))) { continue; } - uint32_t vfetch_constant_index = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2; - if ((regs[vfetch_constant_index].u32 & 0x3) != 3) { + const auto& vfetch_constant = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + if (vfetch_constant.type != 3) { XELOGW("Vertex fetch type is not 3 (fetch constant %u is %.8X %.8X)!", - vfetch_index, regs[vfetch_constant_index].u32, - regs[vfetch_constant_index + 1].u32); + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); return false; } - if (!shared_memory_->RequestRange( - regs[vfetch_constant_index].u32 & 0x1FFFFFFC, - regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) { + if (!shared_memory_->RequestRange(vfetch_constant.address << 2, + vfetch_constant.size << 2)) { XELOGE( "Failed to request vertex buffer at 0x%.8X (size %u) in the shared " "memory", - regs[vfetch_constant_index].u32 & 0x1FFFFFFC, - regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); + vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); @@ -1400,31 +1395,29 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, const std::vector& memexport_stream_constants_vertex = vertex_shader->memexport_stream_constants(); for (uint32_t constant_index : memexport_stream_constants_vertex) { - const xenos::xe_gpu_memexport_stream_t* memexport_stream = - reinterpret_cast( - ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]); - if (memexport_stream->index_count == 0) { + const auto& memexport_stream = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4); + if (memexport_stream.index_count == 0) { continue; } uint32_t memexport_format_size = - GetSupportedMemExportFormatSize(memexport_stream->format); + GetSupportedMemExportFormatSize(memexport_stream.format); if (memexport_format_size == 0) { - XELOGE( - "Unsupported memexport format %s", - FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format))) - ->name); + XELOGE("Unsupported memexport format %s", + FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format))) + ->name); return false; } - uint32_t memexport_base_address = memexport_stream->base_address; uint32_t memexport_size_dwords = - memexport_stream->index_count * memexport_format_size; + memexport_stream.index_count * memexport_format_size; // Try to reduce the number of shared memory operations when writing // different elements into the same buffer through different exports // (happens in Halo 3). bool memexport_range_reused = false; for (uint32_t i = 0; i < memexport_range_count; ++i) { MemExportRange& memexport_range = memexport_ranges[i]; - if (memexport_range.base_address_dwords == memexport_base_address) { + if (memexport_range.base_address_dwords == + memexport_stream.base_address) { memexport_range.size_dwords = std::max(memexport_range.size_dwords, memexport_size_dwords); memexport_range_reused = true; @@ -1435,7 +1428,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, if (!memexport_range_reused) { MemExportRange& memexport_range = memexport_ranges[memexport_range_count++]; - memexport_range.base_address_dwords = memexport_base_address; + memexport_range.base_address_dwords = memexport_stream.base_address; memexport_range.size_dwords = memexport_size_dwords; } } @@ -1444,28 +1437,26 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, const std::vector& memexport_stream_constants_pixel = pixel_shader->memexport_stream_constants(); for (uint32_t constant_index : memexport_stream_constants_pixel) { - const xenos::xe_gpu_memexport_stream_t* memexport_stream = - reinterpret_cast( - ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]); - if (memexport_stream->index_count == 0) { + const auto& memexport_stream = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4); + if (memexport_stream.index_count == 0) { continue; } uint32_t memexport_format_size = - GetSupportedMemExportFormatSize(memexport_stream->format); + GetSupportedMemExportFormatSize(memexport_stream.format); if (memexport_format_size == 0) { - XELOGE( - "Unsupported memexport format %s", - FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format))) - ->name); + XELOGE("Unsupported memexport format %s", + FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format))) + ->name); return false; } - uint32_t memexport_base_address = memexport_stream->base_address; uint32_t memexport_size_dwords = - memexport_stream->index_count * memexport_format_size; + memexport_stream.index_count * memexport_format_size; bool memexport_range_reused = false; for (uint32_t i = 0; i < memexport_range_count; ++i) { MemExportRange& memexport_range = memexport_ranges[i]; - if (memexport_range.base_address_dwords == memexport_base_address) { + if (memexport_range.base_address_dwords == + memexport_stream.base_address) { memexport_range.size_dwords = std::max(memexport_range.size_dwords, memexport_size_dwords); memexport_range_reused = true; @@ -1475,7 +1466,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, if (!memexport_range_reused) { MemExportRange& memexport_range = memexport_ranges[memexport_range_count++]; - memexport_range.base_address_dwords = memexport_base_address; + memexport_range.base_address_dwords = memexport_stream.base_address; memexport_range.size_dwords = memexport_size_dwords; } } @@ -1850,15 +1841,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; - int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; - if (window_offset_x & 0x4000) { - window_offset_x |= 0x8000; - } - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } + auto pa_sc_window_offset = regs.Get(); // Supersampling replacing multisampling due to difficulties of emulating // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also @@ -1868,8 +1851,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { pixel_size_x = 1; pixel_size_y = 1; } else { - MsaaSamples msaa_samples = - MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3); + MsaaSamples msaa_samples = regs.Get().msaa_samples; pixel_size_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; pixel_size_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; } @@ -1889,30 +1871,30 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // box. If it's not, the position is in screen space. Since we can only use // the NDC in PC APIs, we use a viewport of the largest possible size, and // divide the position by it in translated shaders. - uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + auto pa_cl_vte_cntl = regs.Get(); float viewport_scale_x = - (pa_cl_vte_cntl & (1 << 0)) + pa_cl_vte_cntl.vport_x_scale_ena ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) : 1280.0f; float viewport_scale_y = - (pa_cl_vte_cntl & (1 << 2)) + pa_cl_vte_cntl.vport_y_scale_ena ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) : 1280.0f; - float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1.0f; - float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) + float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : std::abs(viewport_scale_x); - float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : std::abs(viewport_scale_y); - float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) + float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0.0f; - if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { - viewport_offset_x += float(window_offset_x); - viewport_offset_y += float(window_offset_y); + if (regs.Get().vtx_window_offset_enable) { + viewport_offset_x += float(pa_sc_window_offset.window_x_offset); + viewport_offset_y += float(pa_sc_window_offset.window_y_offset); } D3D12_VIEWPORT viewport; viewport.TopLeftX = @@ -1941,21 +1923,22 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { } // Scissor. - uint32_t pa_sc_window_scissor_tl = - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t pa_sc_window_scissor_br = - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + auto pa_sc_window_scissor_tl = regs.Get(); + auto pa_sc_window_scissor_br = regs.Get(); D3D12_RECT scissor; - scissor.left = pa_sc_window_scissor_tl & 0x7FFF; - scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF; - scissor.right = pa_sc_window_scissor_br & 0x7FFF; - scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF; - if (!(pa_sc_window_scissor_tl & (1u << 31))) { - // !WINDOW_OFFSET_DISABLE. - scissor.left = std::max(scissor.left + window_offset_x, LONG(0)); - scissor.top = std::max(scissor.top + window_offset_y, LONG(0)); - scissor.right = std::max(scissor.right + window_offset_x, LONG(0)); - scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0)); + scissor.left = pa_sc_window_scissor_tl.tl_x; + scissor.top = pa_sc_window_scissor_tl.tl_y; + scissor.right = pa_sc_window_scissor_br.br_x; + scissor.bottom = pa_sc_window_scissor_br.br_y; + if (!pa_sc_window_scissor_tl.window_offset_disable) { + scissor.left = + std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); + scissor.top = + std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); + scissor.right = + std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); + scissor.bottom = + std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); } scissor.left *= pixel_size_x; scissor.top *= pixel_size_y; @@ -1992,13 +1975,17 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // Stencil reference value. Per-face reference not supported by Direct3D 12, // choose the back face one only if drawing only back faces. - uint32_t stencil_ref; - if (primitive_two_faced && (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & 0x80) && - (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 1) { - stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32 & 0xFF; + uint32_t stencil_ref_mask_reg; + auto pa_su_sc_mode_cntl = regs.Get(); + if (primitive_two_faced && + regs.Get().backface_enable && + pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) { + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; } else { - stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF; + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK; } + uint32_t stencil_ref = + regs.Get(stencil_ref_mask_reg).stencilref; ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; if (ff_stencil_ref_update_needed_) { ff_stencil_ref_ = stencil_ref; @@ -2019,64 +2006,55 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; - uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; - uint32_t pa_su_point_minmax = regs[XE_GPU_REG_PA_SU_POINT_MINMAX].u32; - uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32; - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; - uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; + auto pa_cl_clip_cntl = regs.Get(); + auto pa_cl_vte_cntl = regs.Get(); + auto pa_su_point_minmax = regs.Get(); + auto pa_su_point_size = regs.Get(); + auto pa_su_sc_mode_cntl = regs.Get(); + auto pa_su_vtx_cntl = regs.Get(); float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; - uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; - uint32_t rb_stencilrefmask_bf = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32; - uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32; - uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; + auto rb_colorcontrol = regs.Get(); + auto rb_depth_info = regs.Get(); + auto rb_depthcontrol = regs.Get(); + auto rb_stencilrefmask = regs.Get(); + auto rb_stencilrefmask_bf = + regs.Get(XE_GPU_REG_RB_STENCILREFMASK_BF); + auto rb_surface_info = regs.Get(); + auto sq_context_misc = regs.Get(); + auto sq_program_cntl = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); // Get the color info register values for each render target, and also put // some safety measures for the ROV path - disable fully aliased render // targets. Also, for ROV, exclude components that don't exist in the format // from the write mask. - uint32_t color_infos[4]; - ColorRenderTargetFormat color_formats[4]; + reg::RB_COLOR_INFO color_infos[4]; float rt_clamp[4][4]; uint32_t rt_keep_masks[4][2]; for (uint32_t i = 0; i < 4; ++i) { - uint32_t color_info; - switch (i) { - case 1: - color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - break; - case 2: - color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - break; - case 3: - color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - break; - default: - color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; - } + static const uint32_t kColorInfoRegs[] = { + XE_GPU_REG_RB_COLOR_INFO, + XE_GPU_REG_RB_COLOR1_INFO, + XE_GPU_REG_RB_COLOR2_INFO, + XE_GPU_REG_RB_COLOR3_INFO, + }; + auto color_info = regs.Get(kColorInfoRegs[i]); color_infos[i] = color_info; - color_formats[i] = ColorRenderTargetFormat((color_info >> 16) & 0xF); if (IsROVUsedForEDRAM()) { // Get the mask for keeping previous color's components unmodified, // or two UINT32_MAX if no colors actually existing in the RT are written. DxbcShaderTranslator::ROV_GetColorFormatSystemConstants( - color_formats[i], (color_mask >> (i * 4)) & 0b1111, rt_clamp[i][0], - rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], rt_keep_masks[i][0], - rt_keep_masks[i][1]); + color_info.color_format, (color_mask >> (i * 4)) & 0b1111, + rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], + rt_keep_masks[i][0], rt_keep_masks[i][1]); // Disable the render target if it has the same EDRAM base as another one // (with a smaller index - assume it's more important). if (rt_keep_masks[i][0] == UINT32_MAX && rt_keep_masks[i][1] == UINT32_MAX) { - uint32_t edram_base = color_info & 0xFFF; for (uint32_t j = 0; j < i; ++j) { - if (edram_base == (color_infos[j] & 0xFFF) && + if (color_info.color_base == color_infos[j].color_base && (rt_keep_masks[j][0] != UINT32_MAX || rt_keep_masks[j][1] != UINT32_MAX)) { rt_keep_masks[i][0] = UINT32_MAX; @@ -2091,20 +2069,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Disable depth and stencil if it aliases a color render target (for // instance, during the XBLA logo in Banjo-Kazooie, though depth writing is // already disabled there). - if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) { - uint32_t edram_base_depth = rb_depth_info & 0xFFF; + bool depth_stencil_enabled = + rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable; + if (IsROVUsedForEDRAM() && depth_stencil_enabled) { for (uint32_t i = 0; i < 4; ++i) { - if (edram_base_depth == (color_infos[i] & 0xFFF) && + if (rb_depth_info.depth_base == color_infos[i].color_base && (rt_keep_masks[i][0] != UINT32_MAX || rt_keep_masks[i][1] != UINT32_MAX)) { - rb_depthcontrol &= ~(uint32_t(0x1 | 0x2)); + depth_stencil_enabled = false; break; } } } // Get viewport Z scale - needed for flags and ROV output. - float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1.0f; @@ -2126,18 +2105,18 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // = false: multiply the Z coordinate by 1/W0. // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal // to get 1/W0. - if (pa_cl_vte_cntl & (1 << 8)) { + if (pa_cl_vte_cntl.vtx_xy_fmt) { flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW; } - if (pa_cl_vte_cntl & (1 << 9)) { + if (pa_cl_vte_cntl.vtx_z_fmt) { flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW; } - if (pa_cl_vte_cntl & (1 << 10)) { + if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal; } // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. - if (!(pa_cl_clip_cntl & (1 << 16))) { - flags |= (pa_cl_clip_cntl & 0b111111) + if (!pa_cl_clip_cntl.clip_disable) { + flags |= (pa_cl_clip_cntl.value & 0b111111) << DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift; } // Reversed depth. @@ -2145,8 +2124,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; } // Alpha test. - if (rb_colorcontrol & 0x8) { - flags |= (rb_colorcontrol & 0x7) + if (rb_colorcontrol.alpha_test_enable) { + flags |= uint32_t(rb_colorcontrol.alpha_func.value()) << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; } else { flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | @@ -2154,25 +2133,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater; } // Alpha to coverage. - if (rb_colorcontrol & 0x10) { + if (rb_colorcontrol.alpha_to_mask_enable) { flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage; } // Gamma writing. for (uint32_t i = 0; i < 4; ++i) { - if (color_formats[i] == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { + if (color_infos[i].color_format == + ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i; } } - if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) { + if (IsROVUsedForEDRAM() && depth_stencil_enabled) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil; - if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == - DepthRenderTargetFormat::kD24FS8) { + if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; } - if (rb_depthcontrol & 0x2) { - flags |= ((rb_depthcontrol >> 4) & 0x7) + if (rb_depthcontrol.z_enable) { + flags |= uint32_t(rb_depthcontrol.zfunc.value()) << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; - if (rb_depthcontrol & 0x4) { + if (rb_depthcontrol.z_write_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; } } else { @@ -2182,7 +2161,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual | DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater; } - if (rb_depthcontrol & 0x1) { + if (rb_depthcontrol.stencil_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest; } if (early_z) { @@ -2223,9 +2202,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( index_endian_and_edge_factors; // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. - if (!(pa_cl_clip_cntl & (1 << 16))) { + if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { - if (!(pa_cl_clip_cntl & (1 << i))) { + if (!(pa_cl_clip_cntl.value & (1 << i))) { continue; } const float* ucp = ®s[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32; @@ -2249,45 +2228,49 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // different register (and if there's such register at all). float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; + // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be + // written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't + // set in this case in draws in games. bool gl_clip_space_def = - !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); + !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; float ndc_scale_x, ndc_scale_y, ndc_scale_z; - if (primitive_two_faced && (pa_su_sc_mode_cntl & 0x3) == 0x3) { + if (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && + pa_su_sc_mode_cntl.cull_back) { // Kill all primitives if both faces are culled, but the vertex shader still // needs to do memexport (not NaN because of comparison for setting the // dirty flag). ndc_scale_x = ndc_scale_y = ndc_scale_z = 0; } else { - if (pa_cl_vte_cntl & (1 << 0)) { + if (pa_cl_vte_cntl.vport_x_scale_ena) { ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; } else { ndc_scale_x = 1.0f / 1280.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { + if (pa_cl_vte_cntl.vport_y_scale_ena) { ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; } else { ndc_scale_y = -1.0f / 1280.0f; } ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; } - float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; - float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; + float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f; + float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; // Like in OpenGL - VPOS giving pixel centers. // TODO(Triang3l): Check if ps_param_gen should give center positions in // OpenGL mode on the Xbox 360. float pixel_half_pixel_offset = 0.5f; - if (cvars::d3d12_half_pixel_offset && !(pa_su_vtx_cntl & (1 << 0))) { + if (cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center) { // Signs are hopefully correct here, tested in GTA IV on both clearing // (without a viewport) and drawing things near the edges of the screen. - if (pa_cl_vte_cntl & (1 << 0)) { + if (pa_cl_vte_cntl.vport_x_scale_ena) { if (viewport_scale_x != 0.0f) { ndc_offset_x += 0.5f / viewport_scale_x; } } else { ndc_offset_x += 1.0f / 2560.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { + if (pa_cl_vte_cntl.vport_y_scale_ena) { if (viewport_scale_y != 0.0f) { ndc_offset_y += 0.5f / viewport_scale_y; } @@ -2313,10 +2296,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; // Point size. - float point_size_x = float(pa_su_point_size >> 16) * 0.125f; - float point_size_y = float(pa_su_point_size & 0xFFFF) * 0.125f; - float point_size_min = float(pa_su_point_minmax & 0xFFFF) * 0.125f; - float point_size_max = float(pa_su_point_minmax >> 16) * 0.125f; + float point_size_x = float(pa_su_point_size.width) * 0.125f; + float point_size_y = float(pa_su_point_size.height) * 0.125f; + float point_size_min = float(pa_su_point_minmax.min_size) * 0.125f; + float point_size_max = float(pa_su_point_minmax.max_size) * 0.125f; dirty |= system_constants_.point_size[0] != point_size_x; dirty |= system_constants_.point_size[1] != point_size_y; dirty |= system_constants_.point_size_min_max[0] != point_size_min; @@ -2326,13 +2309,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.point_size_min_max[0] = point_size_min; system_constants_.point_size_min_max[1] = point_size_max; float point_screen_to_ndc_x, point_screen_to_ndc_y; - if (pa_cl_vte_cntl & (1 << 0)) { + if (pa_cl_vte_cntl.vport_x_scale_ena) { point_screen_to_ndc_x = (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; } else { point_screen_to_ndc_x = 1.0f / 2560.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { + if (pa_cl_vte_cntl.vport_y_scale_ena) { point_screen_to_ndc_y = (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; } else { @@ -2345,15 +2328,16 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Pixel position register. uint32_t pixel_pos_reg = - (sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX; + sq_program_cntl.param_gen ? sq_context_misc.param_gen_pos : UINT_MAX; dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg; system_constants_.pixel_pos_reg = pixel_pos_reg; // Log2 of sample count, for scaling VPOS with SSAA (without ROV) and for // EDRAM address calculation with MSAA (with ROV). - MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); - uint32_t sample_count_log2_x = msaa_samples >= MsaaSamples::k4X ? 1 : 0; - uint32_t sample_count_log2_y = msaa_samples >= MsaaSamples::k2X ? 1 : 0; + uint32_t sample_count_log2_x = + rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 1 : 0; + uint32_t sample_count_log2_y = + rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 1 : 0; dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x; dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y; system_constants_.sample_count_log2[0] = sample_count_log2_x; @@ -2365,43 +2349,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // EDRAM pitch for ROV writing. if (IsROVUsedForEDRAM()) { - uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) * - (msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + - 79) / - 80; + uint32_t edram_pitch_tiles = + ((std::min(rb_surface_info.surface_pitch.value(), 2560u) * + (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + + 79) / + 80; dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles; system_constants_.edram_pitch_tiles = edram_pitch_tiles; } // Color exponent bias and output index mapping or ROV render target writing. - bool colorcontrol_blend_enable = (rb_colorcontrol & 0x20) == 0; for (uint32_t i = 0; i < 4; ++i) { - uint32_t color_info = color_infos[i]; - uint32_t blend_factors_ops; - if (colorcontrol_blend_enable) { - switch (i) { - case 1: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32; - break; - case 2: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32; - break; - case 3: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32; - break; - default: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32; - break; - } - blend_factors_ops &= 0x1FFF1FFF; - } else { - blend_factors_ops = 0x00010001; - } + reg::RB_COLOR_INFO color_info = color_infos[i]; // Exponent bias is in bits 20:25 of RB_COLOR_INFO. - int32_t color_exp_bias = int32_t(color_info << 6) >> 26; - ColorRenderTargetFormat color_format = color_formats[i]; - if (color_format == ColorRenderTargetFormat::k_16_16 || - color_format == ColorRenderTargetFormat::k_16_16_16_16) { + int32_t color_exp_bias = color_info.color_exp_bias; + if (color_info.color_format == ColorRenderTargetFormat::k_16_16 || + color_info.color_format == ColorRenderTargetFormat::k_16_16_16_16) { // On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have // -32...32 range and expect shaders to give -32...32 values, but they're // emulated using normalized RG16/RGBA16 when not using the ROV, so the @@ -2427,7 +2390,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1]; if (rt_keep_masks[i][0] != UINT32_MAX || rt_keep_masks[i][1] != UINT32_MAX) { - uint32_t rt_base_dwords_scaled = (color_info & 0xFFF) * 1280; + uint32_t rt_base_dwords_scaled = color_info.color_base * 1280; if (texture_cache_->IsResolutionScale2X()) { rt_base_dwords_scaled <<= 2; } @@ -2435,8 +2398,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( rt_base_dwords_scaled; system_constants_.edram_rt_base_dwords_scaled[i] = rt_base_dwords_scaled; - uint32_t format_flags = - DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format); + uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags( + color_info.color_format); dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; system_constants_.edram_rt_format_flags[i] = format_flags; // Can't do float comparisons here because NaNs would result in always @@ -2445,6 +2408,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( 4 * sizeof(float)) != 0; std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)); + static const uint32_t kBlendControlRegs[] = { + XE_GPU_REG_RB_BLENDCONTROL_0, + XE_GPU_REG_RB_BLENDCONTROL_1, + XE_GPU_REG_RB_BLENDCONTROL_2, + XE_GPU_REG_RB_BLENDCONTROL_3, + }; + uint32_t blend_factors_ops = + regs[kBlendControlRegs[i]].u32 & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -2465,7 +2436,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( resolution_square_scale; system_constants_.edram_resolution_square_scale = resolution_square_scale; - uint32_t depth_base_dwords = (rb_depth_info & 0xFFF) * 1280; + uint32_t depth_base_dwords = rb_depth_info.depth_base * 1280; dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords; @@ -2474,7 +2445,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float depth_range_scale = std::abs(viewport_scale_z); dirty |= system_constants_.edram_depth_range_scale != depth_range_scale; system_constants_.edram_depth_range_scale = depth_range_scale; - float depth_range_offset = (pa_cl_vte_cntl & (1 << 5)) + float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0.0f; if (viewport_scale_z < 0.0f) { @@ -2490,20 +2461,20 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f; float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f; if (primitive_two_faced) { - if (pa_su_sc_mode_cntl & (1 << 11)) { + if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_front_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; } - if (pa_su_sc_mode_cntl & (1 << 12)) { + if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; poly_offset_back_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; } } else { - if (pa_su_sc_mode_cntl & (1 << 13)) { + if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_front_offset = @@ -2533,39 +2504,43 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( poly_offset_back_offset; system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset; - if (rb_depthcontrol & 0x1) { - uint32_t stencil_value; - - stencil_value = rb_stencilrefmask & 0xFF; - dirty |= system_constants_.edram_stencil_front_reference != stencil_value; - system_constants_.edram_stencil_front_reference = stencil_value; - stencil_value = (rb_stencilrefmask >> 8) & 0xFF; - dirty |= system_constants_.edram_stencil_front_read_mask != stencil_value; - system_constants_.edram_stencil_front_read_mask = stencil_value; - stencil_value = (rb_stencilrefmask >> 16) & 0xFF; + if (depth_stencil_enabled && rb_depthcontrol.stencil_enable) { + dirty |= system_constants_.edram_stencil_front_reference != + rb_stencilrefmask.stencilref; + system_constants_.edram_stencil_front_reference = + rb_stencilrefmask.stencilref; + dirty |= system_constants_.edram_stencil_front_read_mask != + rb_stencilrefmask.stencilmask; + system_constants_.edram_stencil_front_read_mask = + rb_stencilrefmask.stencilmask; + dirty |= system_constants_.edram_stencil_front_write_mask != + rb_stencilrefmask.stencilwritemask; + system_constants_.edram_stencil_front_write_mask = + rb_stencilrefmask.stencilwritemask; + uint32_t stencil_func_ops = + (rb_depthcontrol.value >> 8) & ((1 << 12) - 1); dirty |= - system_constants_.edram_stencil_front_write_mask != stencil_value; - system_constants_.edram_stencil_front_write_mask = stencil_value; - stencil_value = (rb_depthcontrol >> 8) & ((1 << 12) - 1); - dirty |= system_constants_.edram_stencil_front_func_ops != stencil_value; - system_constants_.edram_stencil_front_func_ops = stencil_value; + system_constants_.edram_stencil_front_func_ops != stencil_func_ops; + system_constants_.edram_stencil_front_func_ops = stencil_func_ops; - if (primitive_two_faced && (rb_depthcontrol & 0x80)) { - stencil_value = rb_stencilrefmask_bf & 0xFF; - dirty |= - system_constants_.edram_stencil_back_reference != stencil_value; - system_constants_.edram_stencil_back_reference = stencil_value; - stencil_value = (rb_stencilrefmask_bf >> 8) & 0xFF; - dirty |= - system_constants_.edram_stencil_back_read_mask != stencil_value; - system_constants_.edram_stencil_back_read_mask = stencil_value; - stencil_value = (rb_stencilrefmask_bf >> 16) & 0xFF; - dirty |= - system_constants_.edram_stencil_back_write_mask != stencil_value; - system_constants_.edram_stencil_back_write_mask = stencil_value; - stencil_value = (rb_depthcontrol >> 20) & ((1 << 12) - 1); - dirty |= system_constants_.edram_stencil_back_func_ops != stencil_value; - system_constants_.edram_stencil_back_func_ops = stencil_value; + if (primitive_two_faced && rb_depthcontrol.backface_enable) { + dirty |= system_constants_.edram_stencil_back_reference != + rb_stencilrefmask_bf.stencilref; + system_constants_.edram_stencil_back_reference = + rb_stencilrefmask_bf.stencilref; + dirty |= system_constants_.edram_stencil_back_read_mask != + rb_stencilrefmask_bf.stencilmask; + system_constants_.edram_stencil_back_read_mask = + rb_stencilrefmask_bf.stencilmask; + dirty |= system_constants_.edram_stencil_back_write_mask != + rb_stencilrefmask_bf.stencilwritemask; + system_constants_.edram_stencil_back_write_mask = + rb_stencilrefmask_bf.stencilwritemask; + uint32_t stencil_func_ops_bf = + (rb_depthcontrol.value >> 8) & ((1 << 12) - 1); + dirty |= system_constants_.edram_stencil_back_func_ops != + stencil_func_ops_bf; + system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf; } else { dirty |= std::memcmp(system_constants_.edram_stencil_back, system_constants_.edram_stencil_front, diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 89286be18..67d20b3da 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -207,8 +207,17 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - xenos::xe_gpu_program_cntl_t sq_program_cntl; - sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; + auto sq_program_cntl = regs.Get(); + + // Normal vertex shaders only, for now. + assert_true(sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition1Vector || + sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition2VectorsSprite || + sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kMultipass); + assert_false(sq_program_cntl.gen_index_vtx); + if (!vertex_shader->is_translated() && !TranslateShader(vertex_shader, sq_program_cntl, tessellated, primitive_type)) { @@ -294,8 +303,7 @@ bool PipelineCache::ConfigurePipeline( } bool PipelineCache::TranslateShader(D3D12Shader* shader, - xenos::xe_gpu_program_cntl_t cntl, - bool tessellated, + reg::SQ_PROGRAM_CNTL cntl, bool tessellated, PrimitiveType primitive_type) { // Perform translation. // If this fails the shader will be marked as invalid and ignored later. @@ -385,12 +393,12 @@ bool PipelineCache::GetCurrentStateDescription( // Primitive topology type, tessellation mode and geometry shader. if (tessellated) { - switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) { - case TessellationMode::kContinuous: + switch (regs.Get().tess_mode) { + case xenos::TessellationMode::kContinuous: description_out.tessellation_mode = PipelineTessellationMode::kContinuous; break; - case TessellationMode::kAdaptive: + case xenos::TessellationMode::kAdaptive: description_out.tessellation_mode = cvars::d3d12_tessellation_adaptive ? PipelineTessellationMode::kAdaptive @@ -559,20 +567,10 @@ bool PipelineCache::GetCurrentStateDescription( // CLIP_DISABLE description_out.depth_clip = (regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0; - // TODO(DrChat): This seem to differ. Need to examine this. - // https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11 - // ZCLIP_NEAR_DISABLE - // description_out.depth_clip = (PA_CL_CLIP_CNTL & (1 << 26)) == 0; - // RASTERIZER_DISABLE - // Disable rendering in command processor if PA_CL_CLIP_CNTL & (1 << 22)? if (edram_rov_used_) { description_out.rov_msaa = ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0; - } - - if (!edram_rov_used_) { - uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - + } else { // Depth/stencil. No stencil, always passing depth test and no depth writing // means depth disabled. if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { @@ -711,7 +709,7 @@ bool PipelineCache::GetCurrentStateDescription( rt.format = RenderTargetCache::GetBaseColorFormat( ColorRenderTargetFormat((color_info >> 16) & 0xF)); rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF; - if (!(rb_colorcontrol & 0x20) && rt.write_mask) { + if (rt.write_mask) { rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F]; rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F]; rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7); diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 179b851d0..ba5a1a4b6 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -171,7 +171,7 @@ class PipelineCache { PipelineRenderTarget render_targets[4]; }; - bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl, + bool TranslateShader(D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl, bool tessellated, PrimitiveType primitive_type); bool GetCurrentStateDescription( diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index e199832dd..11d2c6e69 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -34,7 +34,6 @@ namespace d3d12 { constexpr uint32_t SharedMemory::kBufferSizeLog2; constexpr uint32_t SharedMemory::kBufferSize; -constexpr uint32_t SharedMemory::kAddressMask; constexpr uint32_t SharedMemory::kHeapSizeLog2; constexpr uint32_t SharedMemory::kHeapSize; constexpr uint32_t SharedMemory::kWatchBucketSizeLog2; @@ -198,10 +197,9 @@ void SharedMemory::UnregisterGlobalWatch(GlobalWatchHandle handle) { SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( uint32_t start, uint32_t length, WatchCallback callback, void* callback_context, void* callback_data, uint64_t callback_argument) { - if (length == 0) { + if (length == 0 || start >= kBufferSize) { return nullptr; } - start &= kAddressMask; length = std::min(length, kBufferSize - start); uint32_t watch_page_first = start >> page_size_log2_; uint32_t watch_page_last = (start + length - 1) >> page_size_log2_; @@ -278,9 +276,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) { // Some texture is empty, for example - safe to draw in this case. return true; } - start &= kAddressMask; - if ((kBufferSize - start) < length) { - // Exceeds the physical address space. + if (start > kBufferSize || (kBufferSize - start) < length) { return false; } @@ -343,9 +339,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { // Some texture is empty, for example - safe to draw in this case. return true; } - start &= kAddressMask; - if ((kBufferSize - start) < length) { - // Exceeds the physical address space. + if (start > kBufferSize || (kBufferSize - start) < length) { return false; } uint32_t last = start + length - 1; @@ -433,8 +427,7 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last, } void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { - start &= kAddressMask; - if (length == 0) { + if (length == 0 || start >= kBufferSize) { return; } length = std::min(length, kBufferSize - start); diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index dc1869fbd..1b2d03834 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -138,7 +138,6 @@ class SharedMemory { // The 512 MB tiled buffer. static constexpr uint32_t kBufferSizeLog2 = 29; static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; - static constexpr uint32_t kAddressMask = kBufferSize - 1; ID3D12Resource* buffer_ = nullptr; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0; D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 5e785bf6e..f1de01c3d 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -416,9 +416,13 @@ void DxbcShaderTranslator::ConvertPWLGamma( } void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { + if (register_count() < 1) { + return; + } + // Vertex index is in an input bound to SV_VertexID, byte swapped according to - // xe_vertex_index_endian_and_edge_factors system constant and written to GPR - // 0 (which is always present because register_count includes +1). + // xe_vertex_index_endian_and_edge_factors system constant and written to + // GPR 0. // xe_vertex_index_endian_and_edge_factors & 0b11 is: // - 00 for no swap. @@ -756,157 +760,161 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Write the vertex index to GPR 0. StartVertexShader_LoadVertexIndex(); } else if (IsDxbcDomainShader()) { - uint32_t temp_register_operand_length = - uses_register_dynamic_addressing() ? 3 : 2; - - // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for - // triangle patches), and also set the domain in STAT. - uint32_t domain_location_mask, domain_location_swizzle; - if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { - domain_location_mask = 0b0111; - // ZYX swizzle with r1.y == 0, according to the water shader in - // Banjo-Kazooie: Nuts & Bolts. - domain_location_swizzle = 0b00000110; - stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; - } else { - // TODO(Triang3l): Support line patches. - assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); - // According to the ground shader in Viva Pinata, though it's impossible - // (as of December 12th, 2018) to test there since it possibly requires - // memexport for ground control points (the memory region with them is - // filled with zeros). - domain_location_mask = 0b0110; - domain_location_swizzle = 0b00000100; - stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD; - } - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( - 2 + temp_register_operand_length)); - if (uses_register_dynamic_addressing()) { - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2)); - shader_code_.push_back(0); - } else { - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1)); - } - shader_code_.push_back(0); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, domain_location_swizzle, 0)); - ++stat_.instruction_count; - if (uses_register_dynamic_addressing()) { - ++stat_.array_instruction_count; - } else { - ++stat_.mov_instruction_count; - } - assert_true(register_count() >= 2); + if (register_count() != 0) { + uint32_t temp_register_operand_length = + uses_register_dynamic_addressing() ? 3 : 2; - // Copy the primitive index to r0.x (for quad patches) or r1.x (for - // triangle patches) as a float. - // When using indexable temps, copy through a r# because x# are apparently - // only accessible via mov. - // TODO(Triang3l): Investigate what should be written for primitives (or - // even control points) for non-adaptive tessellation modes (they may - // possibly have an index buffer). - // TODO(Triang3l): Support line patches. - uint32_t primitive_id_gpr_index = - patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0; - - if (register_count() > primitive_id_gpr_index) { - uint32_t primitive_id_temp = uses_register_dynamic_addressing() - ? PushSystemTemp() - : primitive_id_gpr_index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(primitive_id_temp); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0)); - ++stat_.instruction_count; - ++stat_.conversion_instruction_count; - if (uses_register_dynamic_addressing()) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2)); - shader_code_.push_back(0); - shader_code_.push_back(primitive_id_gpr_index); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(primitive_id_temp); - ++stat_.instruction_count; - ++stat_.array_instruction_count; - // Release primitive_id_temp. - PopSystemTemp(); + // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for + // triangle patches), and also set the domain in STAT. + uint32_t domain_location_mask, domain_location_swizzle; + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { + domain_location_mask = 0b0111; + // ZYX swizzle with r1.y == 0, according to the water shader in + // Banjo-Kazooie: Nuts & Bolts. + domain_location_swizzle = 0b00000110; + stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; + } else { + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); + // According to the ground shader in Viva Pinata, though it's impossible + // (as of December 12th, 2018) to test there since it possibly requires + // memexport for ground control points (the memory region with them is + // filled with zeros). + domain_location_mask = 0b0110; + domain_location_swizzle = 0b00000100; + stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD; } - } - - if (register_count() >= 2) { - // Write the swizzle of the barycentric/UV coordinates to r1.x (for quad - // patches) or r1.y (for triangle patches). It appears that the - // tessellator offloads the reordering of coordinates for edges to game - // shaders. - // - // In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge - // factors), the shader multiplies the first control point's position by - // r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before - // doing that it swizzles r0.xyz the following way depending on the value - // in r1.y: - // - ZXY for 1.0. - // - YZX for 2.0. - // - XZY for 4.0. - // - YXZ for 5.0. - // - ZYX for 6.0. - // Possibly, the logic here is that the value itself is the amount of - // rotation of the swizzle to the right, and 1 << 2 is set when the - // swizzle needs to be flipped before rotating. - // - // In Viva Pinata (quad patches with per-edge factors - not possible to - // test however as of December 12th, 2018), if we assume that r0.y is V - // and r0.z is U, the factors each control point value is multiplied by - // are the following: - // - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle). - // - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ). - // - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX). - // - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY). - // According to the control point order at - // https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt - // the first is located at (0,0), the second at (0,1), the third at (1,0) - // and the fourth at (1,1). So, swizzle index 0 appears to be the correct - // one. But, this hasn't been tested yet. - // - // Direct3D 12 appears to be passing the coordinates in a consistent - // order, so we can just use ZYX for triangle patches. - // - // TODO(Triang3l): Support line patches. - uint32_t domain_location_swizzle_mask = - patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010 - : 0b0001; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( - 3 + temp_register_operand_length)); + 2 + temp_register_operand_length)); if (uses_register_dynamic_addressing()) { - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, - domain_location_swizzle_mask, 2)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2)); shader_code_.push_back(0); } else { shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1)); + D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1)); } - shader_code_.push_back(1); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back(0); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, + domain_location_swizzle, 0)); ++stat_.instruction_count; if (uses_register_dynamic_addressing()) { ++stat_.array_instruction_count; } else { ++stat_.mov_instruction_count; } + + // Copy the primitive index to r0.x (for quad patches) or r1.x (for + // triangle patches) as a float. + // When using indexable temps, copy through a r# because x# are apparently + // only accessible via mov. + // TODO(Triang3l): Investigate what should be written for primitives (or + // even control points) for non-adaptive tessellation modes (they may + // possibly have an index buffer). + // TODO(Triang3l): Support line patches. + uint32_t primitive_id_gpr_index = + patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0; + + if (register_count() > primitive_id_gpr_index) { + uint32_t primitive_id_temp = uses_register_dynamic_addressing() + ? PushSystemTemp() + : primitive_id_gpr_index; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(primitive_id_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0)); + ++stat_.instruction_count; + ++stat_.conversion_instruction_count; + if (uses_register_dynamic_addressing()) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2)); + shader_code_.push_back(0); + shader_code_.push_back(primitive_id_gpr_index); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(primitive_id_temp); + ++stat_.instruction_count; + ++stat_.array_instruction_count; + // Release primitive_id_temp. + PopSystemTemp(); + } + } + + if (register_count() >= 2) { + // Write the swizzle of the barycentric/UV coordinates to r1.x (for quad + // patches) or r1.y (for triangle patches). It appears that the + // tessellator offloads the reordering of coordinates for edges to game + // shaders. + // + // In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge + // factors), the shader multiplies the first control point's position by + // r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before + // doing that it swizzles r0.xyz the following way depending on the + // value in r1.y: + // - ZXY for 1.0. + // - YZX for 2.0. + // - XZY for 4.0. + // - YXZ for 5.0. + // - ZYX for 6.0. + // Possibly, the logic here is that the value itself is the amount of + // rotation of the swizzle to the right, and 1 << 2 is set when the + // swizzle needs to be flipped before rotating. + // + // In Viva Pinata (quad patches with per-edge factors - not possible to + // test however as of December 12th, 2018), if we assume that r0.y is V + // and r0.z is U, the factors each control point value is multiplied by + // are the following: + // - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle). + // - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ). + // - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX). + // - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY). + // According to the control point order at + // https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt + // the first is located at (0,0), the second at (0,1), the third at + // (1,0) and the fourth at (1,1). So, swizzle index 0 appears to be the + // correct one. But, this hasn't been tested yet. + // + // Direct3D 12 appears to be passing the coordinates in a consistent + // order, so we can just use ZYX for triangle patches. + // + // TODO(Triang3l): Support line patches. + uint32_t domain_location_swizzle_mask = + patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010 + : 0b0001; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( + 3 + temp_register_operand_length)); + if (uses_register_dynamic_addressing()) { + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, + domain_location_swizzle_mask, 2)); + shader_code_.push_back(0); + } else { + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1)); + } + shader_code_.push_back(1); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0); + ++stat_.instruction_count; + if (uses_register_dynamic_addressing()) { + ++stat_.array_instruction_count; + } else { + ++stat_.mov_instruction_count; + } + } } } } @@ -4796,6 +4804,7 @@ void DxbcShaderTranslator::WriteShaderCode() { // General-purpose registers if using dynamic indexing (x0). if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) { + assert_true(register_count() != 0); shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 73451422f..28db647bc 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -503,6 +503,9 @@ class DxbcShaderTranslator : public ShaderTranslator { kVSOutPosition, kVSOutClipDistance0123, kVSOutClipDistance45, + // TODO(Triang3l): Use SV_CullDistance instead for + // PA_CL_CLIP_CNTL::UCP_CULL_ONLY_ENA, but can't have more than 8 clip and + // cull distances in total. kPSInInterpolators = 0, kPSInPointParameters = kPSInInterpolators + kInterpolatorCount, diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h index 524cea772..e54e2db72 100644 --- a/src/xenia/gpu/register_file.h +++ b/src/xenia/gpu/register_file.h @@ -13,15 +13,11 @@ #include #include +#include "xenia/gpu/registers.h" + namespace xe { namespace gpu { -enum Register { -#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index, -#include "xenia/gpu/register_table.inc" -#undef XE_GPU_REGISTER -}; - struct RegisterInfo { enum class Type { kDword, @@ -44,8 +40,20 @@ class RegisterFile { }; RegisterValue values[kRegisterCount]; - RegisterValue& operator[](int reg) { return values[reg]; } + RegisterValue& operator[](uint32_t reg) { return values[reg]; } RegisterValue& operator[](Register reg) { return values[reg]; } + template + T& Get(uint32_t reg) { + return *reinterpret_cast(&values[reg]); + } + template + T& Get(Register reg) { + return *reinterpret_cast(&values[reg]); + } + template + T& Get() { + return *reinterpret_cast(&values[T::register_index]); + } }; } // namespace gpu diff --git a/src/xenia/gpu/registers.cc b/src/xenia/gpu/registers.cc new file mode 100644 index 000000000..4215e3352 --- /dev/null +++ b/src/xenia/gpu/registers.cc @@ -0,0 +1,51 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2019 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/registers.h" + +namespace xe { +namespace gpu { +namespace reg { + +constexpr uint32_t COHER_STATUS_HOST::register_index; +constexpr uint32_t WAIT_UNTIL::register_index; + +constexpr uint32_t SQ_PROGRAM_CNTL::register_index; +constexpr uint32_t SQ_CONTEXT_MISC::register_index; + +constexpr uint32_t VGT_OUTPUT_PATH_CNTL::register_index; +constexpr uint32_t VGT_HOS_CNTL::register_index; + +constexpr uint32_t PA_SU_POINT_MINMAX::register_index; +constexpr uint32_t PA_SU_POINT_SIZE::register_index; +constexpr uint32_t PA_SU_SC_MODE_CNTL::register_index; +constexpr uint32_t PA_SU_VTX_CNTL::register_index; +constexpr uint32_t PA_SC_MPASS_PS_CNTL::register_index; +constexpr uint32_t PA_SC_VIZ_QUERY::register_index; +constexpr uint32_t PA_CL_CLIP_CNTL::register_index; +constexpr uint32_t PA_CL_VTE_CNTL::register_index; +constexpr uint32_t PA_SC_WINDOW_OFFSET::register_index; +constexpr uint32_t PA_SC_WINDOW_SCISSOR_TL::register_index; +constexpr uint32_t PA_SC_WINDOW_SCISSOR_BR::register_index; + +constexpr uint32_t RB_MODECONTROL::register_index; +constexpr uint32_t RB_SURFACE_INFO::register_index; +constexpr uint32_t RB_COLORCONTROL::register_index; +constexpr uint32_t RB_COLOR_INFO::register_index; +constexpr uint32_t RB_COLOR_MASK::register_index; +constexpr uint32_t RB_DEPTHCONTROL::register_index; +constexpr uint32_t RB_STENCILREFMASK::register_index; +constexpr uint32_t RB_DEPTH_INFO::register_index; +constexpr uint32_t RB_COPY_CONTROL::register_index; +constexpr uint32_t RB_COPY_DEST_INFO::register_index; +constexpr uint32_t RB_COPY_DEST_PITCH::register_index; + +} // namespace reg +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index 215363b33..a0fc9e279 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -20,15 +20,22 @@ // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h namespace xe { namespace gpu { + +enum Register { +#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index, +#include "xenia/gpu/register_table.inc" +#undef XE_GPU_REGISTER +}; + namespace reg { -/************************************************** +/******************************************************************************* ___ ___ _ _ _____ ___ ___ _ / __/ _ \| \| |_ _| _ \/ _ \| | | (_| (_) | .` | | | | / (_) | |__ \___\___/|_|\_| |_| |_|_\\___/|____| -***************************************************/ +*******************************************************************************/ union COHER_STATUS_HOST { xe::bf matching_contexts; @@ -49,6 +56,7 @@ union COHER_STATUS_HOST { xe::bf status; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_COHER_STATUS_HOST; }; union WAIT_UNTIL { @@ -69,9 +77,82 @@ union WAIT_UNTIL { xe::bf cmdfifo_entries; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_WAIT_UNTIL; }; -/************************************************** +/******************************************************************************* + ___ ___ ___ _ _ ___ _ _ ___ ___ ___ + / __| __/ _ \| | | | __| \| |/ __| __| _ \ + \__ \ _| (_) | |_| | _|| .` | (__| _|| / + |___/___\__\_\\___/|___|_|\_|\___|___|_|_\ + +*******************************************************************************/ + +union SQ_PROGRAM_CNTL { + // Note from a2xx.xml: + // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, but + // high bit is set to indicate "0 registers used". + xe::bf vs_num_reg; + xe::bf ps_num_reg; + xe::bf vs_resource; + xe::bf ps_resource; + xe::bf param_gen; + xe::bf gen_index_pix; + xe::bf vs_export_count; + xe::bf vs_export_mode; + xe::bf ps_export_mode; + xe::bf gen_index_vtx; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; +}; + +union SQ_CONTEXT_MISC { + xe::bf inst_pred_optimize; + xe::bf sc_output_screen_xy; + xe::bf sc_sample_cntl; + xe::bf param_gen_pos; + xe::bf perfcounter_ref; + xe::bf yeild_optimize; // sic + xe::bf tx_cache_sel; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_SQ_CONTEXT_MISC; +}; + +/******************************************************************************* + __ _____ ___ _____ _____ __ + \ \ / / __| _ \_ _| __\ \/ / + \ V /| _|| / | | | _| > < + \_/ |___|_|_\ |_| |___/_/\_\ + + ___ ___ ___ _ _ ___ ___ ___ _ _ _ ___ + / __| _ \/ _ \| | | | _ \ __| _ \ /_\ | \| | \ + | (_ | / (_) | |_| | _/ _|| / / _ \| .` | |) | + \___|_|_\\___/ \___/|_| |___|_|_\ /_/ \_\_|\_|___/ + + _____ ___ ___ ___ ___ _ _ _ _____ ___ ___ + |_ _| __/ __/ __| __| | | | /_\_ _/ _ \| _ \ + | | | _|\__ \__ \ _|| |__| |__ / _ \| || (_) | / + |_| |___|___/___/___|____|____/_/ \_\_| \___/|_|_\ + +*******************************************************************************/ + +union VGT_OUTPUT_PATH_CNTL { + xe::bf path_select; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; +}; + +union VGT_HOS_CNTL { + xe::bf tess_mode; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_VGT_HOS_CNTL; +}; + +/******************************************************************************* ___ ___ ___ __ __ ___ _____ _____ _____ | _ \ _ \_ _| \/ |_ _|_ _|_ _\ \ / / __| | _/ /| || |\/| || | | | | | \ V /| _| @@ -82,7 +163,25 @@ union WAIT_UNTIL { / _ \\__ \__ \ _|| |\/| | _ \ |__| _|| / /_/ \_\___/___/___|_| |_|___/____|___|_|_\ -***************************************************/ +*******************************************************************************/ + +union PA_SU_POINT_MINMAX { + // Radius, 12.4 fixed point. + xe::bf min_size; + xe::bf max_size; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; +}; + +union PA_SU_POINT_SIZE { + // 1/2 width or height, 12.4 fixed point. + xe::bf height; + xe::bf width; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_SIZE; +}; // Setup Unit / Scanline Converter mode cntl union PA_SU_SC_MODE_CNTL { @@ -110,6 +209,7 @@ union PA_SU_SC_MODE_CNTL { xe::bf wait_rb_idle_first_tri_new_state; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; }; // Setup Unit Vertex Control @@ -119,6 +219,7 @@ union PA_SU_VTX_CNTL { xe::bf quant_mode; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_VTX_CNTL; }; union PA_SC_MPASS_PS_CNTL { @@ -126,6 +227,7 @@ union PA_SC_MPASS_PS_CNTL { xe::bf mpass_ps_ena; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; }; // Scanline converter viz query @@ -135,11 +237,10 @@ union PA_SC_VIZ_QUERY { xe::bf kill_pix_post_early_z; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; }; // Clipper clip control -// TODO(DrChat): This seem to differ. Need to examine this. -// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11 union PA_CL_CLIP_CNTL { xe::bf ucp_ena_0; xe::bf ucp_ena_1; @@ -160,6 +261,7 @@ union PA_CL_CLIP_CNTL { xe::bf w_nan_retain; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; }; // Viewport transform engine control @@ -177,6 +279,7 @@ union PA_CL_VTE_CNTL { xe::bf perfcounter_ref; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; union PA_SC_WINDOW_OFFSET { @@ -184,6 +287,7 @@ union PA_SC_WINDOW_OFFSET { xe::bf window_y_offset; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; }; union PA_SC_WINDOW_SCISSOR_TL { @@ -192,6 +296,7 @@ union PA_SC_WINDOW_SCISSOR_TL { xe::bf window_offset_disable; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; }; union PA_SC_WINDOW_SCISSOR_BR { @@ -199,20 +304,22 @@ union PA_SC_WINDOW_SCISSOR_BR { xe::bf br_y; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; }; -/************************************************** +/******************************************************************************* ___ ___ | _ \ _ ) | / _ \ |_|_\___/ -***************************************************/ +*******************************************************************************/ union RB_MODECONTROL { xe::bf edram_mode; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_MODECONTROL; }; union RB_SURFACE_INFO { @@ -221,27 +328,83 @@ union RB_SURFACE_INFO { xe::bf hiz_pitch; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_SURFACE_INFO; }; union RB_COLORCONTROL { - xe::bf alpha_func; + xe::bf alpha_func; xe::bf alpha_test_enable; xe::bf alpha_to_mask_enable; - + // Everything in between was added on Adreno, not in game PDBs and never set. xe::bf alpha_to_mask_offset0; xe::bf alpha_to_mask_offset1; xe::bf alpha_to_mask_offset2; xe::bf alpha_to_mask_offset3; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COLORCONTROL; }; union RB_COLOR_INFO { xe::bf color_base; xe::bf color_format; - xe::bf color_exp_bias; + xe::bf color_exp_bias; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_INFO; + // RB_COLOR[1-3]_INFO also use this format. +}; + +union RB_COLOR_MASK { + xe::bf write_red0; + xe::bf write_green0; + xe::bf write_blue0; + xe::bf write_alpha0; + xe::bf write_red1; + xe::bf write_green1; + xe::bf write_blue1; + xe::bf write_alpha1; + xe::bf write_red2; + xe::bf write_green2; + xe::bf write_blue2; + xe::bf write_alpha2; + xe::bf write_red3; + xe::bf write_green3; + xe::bf write_blue3; + xe::bf write_alpha3; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_MASK; +}; + +union RB_DEPTHCONTROL { + xe::bf stencil_enable; + xe::bf z_enable; + xe::bf z_write_enable; + // EARLY_Z_ENABLE was added on Adreno. + xe::bf zfunc; + xe::bf backface_enable; + xe::bf stencilfunc; + xe::bf stencilfail; + xe::bf stencilzpass; + xe::bf stencilzfail; + xe::bf stencilfunc_bf; + xe::bf stencilfail_bf; + xe::bf stencilzpass_bf; + xe::bf stencilzfail_bf; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTHCONTROL; +}; + +union RB_STENCILREFMASK { + xe::bf stencilref; + xe::bf stencilmask; + xe::bf stencilwritemask; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_STENCILREFMASK; + // RB_STENCILREFMASK_BF also uses this format. }; union RB_DEPTH_INFO { @@ -249,6 +412,7 @@ union RB_DEPTH_INFO { xe::bf depth_format; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTH_INFO; }; union RB_COPY_CONTROL { @@ -260,6 +424,7 @@ union RB_COPY_CONTROL { xe::bf copy_command; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_CONTROL; }; union RB_COPY_DEST_INFO { @@ -268,10 +433,11 @@ union RB_COPY_DEST_INFO { xe::bf copy_dest_slice; xe::bf copy_dest_format; xe::bf copy_dest_number; - xe::bf copy_dest_exp_bias; + xe::bf copy_dest_exp_bias; xe::bf copy_dest_swap; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_INFO; }; union RB_COPY_DEST_PITCH { @@ -279,9 +445,11 @@ union RB_COPY_DEST_PITCH { xe::bf copy_dest_height; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; }; } // namespace reg + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 8680577ae..940db871b 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -108,10 +108,12 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { } bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, - xenos::xe_gpu_program_cntl_t cntl) { + reg::SQ_PROGRAM_CNTL cntl) { Reset(); - register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1 - : cntl.ps_regs + 1; + uint32_t cntl_num_reg = shader->type() == ShaderType::kVertex + ? cntl.vs_num_reg.value() + : cntl.ps_num_reg.value(); + register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1); return TranslateInternal(shader, patch_type); } diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 7a12abc10..537606eb3 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -17,6 +17,7 @@ #include "xenia/base/math.h" #include "xenia/base/string_buffer.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/shader.h" #include "xenia/gpu/ucode.h" #include "xenia/gpu/xenos.h" @@ -33,7 +34,7 @@ class ShaderTranslator { bool GatherAllBindingInformation(Shader* shader); bool Translate(Shader* shader, PrimitiveType patch_type, - xenos::xe_gpu_program_cntl_t cntl); + reg::SQ_PROGRAM_CNTL cntl); bool Translate(Shader* shader, PrimitiveType patch_type); protected: @@ -232,7 +233,7 @@ class ShaderTranslator { PrimitiveType patch_primitive_type_; const uint32_t* ucode_dwords_; size_t ucode_dword_count_; - xenos::xe_gpu_program_cntl_t program_cntl_; + reg::SQ_PROGRAM_CNTL program_cntl_; uint32_t register_count_; // Accumulated translation errors. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 9f9f6da5c..c1d151942 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -93,6 +93,7 @@ void SpirvShaderTranslator::StartTranslation() { b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main", {}, {}, &function_block); + assert_not_zero(register_count()); registers_type_ = b.makeArrayType(vec4_float_type_, b.makeUintConstant(register_count()), 0); registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 25d822daf..4258061f1 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -364,7 +364,7 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, } bool PipelineCache::TranslateShader(VulkanShader* shader, - xenos::xe_gpu_program_cntl_t cntl) { + reg::SQ_PROGRAM_CNTL cntl) { // Perform translation. // If this fails the shader will be marked as invalid and ignored later. if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) { @@ -808,8 +808,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, } bool push_constants_dirty = full_update || viewport_state_dirty; - push_constants_dirty |= - SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + push_constants_dirty |= SetShadowRegister(®s.sq_program_cntl.value, + XE_GPU_REG_SQ_PROGRAM_CNTL); push_constants_dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); push_constants_dirty |= @@ -827,25 +827,14 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, push_constants_dirty |= SetShadowRegister(®s.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE); if (push_constants_dirty) { - xenos::xe_gpu_program_cntl_t program_cntl; - program_cntl.dword_0 = regs.sq_program_cntl; - // Normal vertex shaders only, for now. - // TODO(benvanik): transform feedback/memexport. - // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h - // Draw calls skipped if they have unsupported export modes. - // 0 = positionOnly - // 1 = unused - // 2 = sprite - // 3 = edge - // 4 = kill - // 5 = spriteKill - // 6 = edgeKill - // 7 = multipass - assert_true(program_cntl.vs_export_mode == 0 || - program_cntl.vs_export_mode == 2 || - program_cntl.vs_export_mode == 7); - assert_false(program_cntl.gen_index_vtx); + assert_true(regs.sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition1Vector || + regs.sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition2VectorsSprite || + regs.sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kMultipass); + assert_false(regs.sq_program_cntl.gen_index_vtx); SpirvPushConstants push_constants = {}; @@ -909,7 +898,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, // Whether to populate a register in the pixel shader with frag coord. int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; - push_constants.ps_param_gen = program_cntl.param_gen ? ps_param_gen : -1; + push_constants.ps_param_gen = + regs.sq_program_cntl.param_gen ? ps_param_gen : -1; vkCmdPushConstants(command_buffer, pipeline_layout_, VK_SHADER_STAGE_VERTEX_BIT | @@ -1061,7 +1051,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( bool dirty = false; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl.value, + XE_GPU_REG_SQ_PROGRAM_CNTL); dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.pixel_shader != pixel_shader; dirty |= regs.primitive_type != primitive_type; @@ -1073,17 +1064,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( return UpdateStatus::kCompatible; } - xenos::xe_gpu_program_cntl_t sq_program_cntl; - sq_program_cntl.dword_0 = regs.sq_program_cntl; - if (!vertex_shader->is_translated() && - !TranslateShader(vertex_shader, sq_program_cntl)) { + !TranslateShader(vertex_shader, regs.sq_program_cntl)) { XELOGE("Failed to translate the vertex shader!"); return UpdateStatus::kError; } if (pixel_shader && !pixel_shader->is_translated() && - !TranslateShader(pixel_shader, sq_program_cntl)) { + !TranslateShader(pixel_shader, regs.sq_program_cntl)) { XELOGE("Failed to translate the pixel shader!"); return UpdateStatus::kError; } @@ -1513,7 +1501,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { auto& state_info = update_color_blend_state_info_; bool dirty = false; - dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); @@ -1568,7 +1555,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { for (int i = 0; i < 4; ++i) { uint32_t blend_control = regs.rb_blendcontrol[i]; auto& attachment_state = attachment_states[i]; - attachment_state.blendEnable = !(regs.rb_colorcontrol & 0x20); + attachment_state.blendEnable = (blend_control & 0x1FFF1FFF) != 0x00010001; // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND attachment_state.srcColorBlendFactor = kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 26db40605..80035d25f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -79,7 +79,7 @@ class PipelineCache { // state. VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); - bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl); + bool TranslateShader(VulkanShader* shader, reg::SQ_PROGRAM_CNTL cntl); void DumpShaderDisasmAMD(VkPipeline pipeline); void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); @@ -170,7 +170,7 @@ class PipelineCache { struct UpdateShaderStagesRegisters { PrimitiveType primitive_type; uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; + reg::SQ_PROGRAM_CNTL sq_program_cntl; VulkanShader* vertex_shader; VulkanShader* pixel_shader; @@ -256,7 +256,6 @@ class PipelineCache { VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; struct UpdateColorBlendStateRegisters { - uint32_t rb_colorcontrol; uint32_t rb_color_mask; uint32_t rb_blendcontrol[4]; uint32_t rb_modecontrol; @@ -290,7 +289,7 @@ class PipelineCache { float rb_blend_rgba[4]; uint32_t rb_stencilrefmask; - uint32_t sq_program_cntl; + reg::SQ_PROGRAM_CNTL sq_program_cntl; uint32_t sq_context_misc; uint32_t rb_colorcontrol; uint32_t rb_color_info; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index a43b807e0..2175e5e5d 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -80,12 +80,6 @@ inline bool IsPrimitiveTwoFaced(bool tessellated, PrimitiveType type) { return false; } -enum class TessellationMode : uint32_t { - kDiscrete = 0, - kContinuous = 1, - kAdaptive = 2, -}; - enum class Dimension : uint32_t { k1D = 0, k2D = 1, @@ -334,6 +328,28 @@ inline int GetVertexFormatSizeInWords(VertexFormat format) { } } +enum class CompareFunction : uint32_t { + kNever = 0b000, + kLess = 0b001, + kEqual = 0b010, + kLessEqual = 0b011, + kGreater = 0b100, + kNotEqual = 0b101, + kGreaterEqual = 0b110, + kAlways = 0b111, +}; + +enum class StencilOp : uint32_t { + kKeep = 0, + kZero = 1, + kReplace = 2, + kIncrementClamp = 3, + kDecrementClamp = 4, + kInvert = 5, + kIncrementWrap = 6, + kDecrementWrap = 7, +}; + // adreno_rb_blend_factor enum class BlendFactor : uint32_t { kZero = 0, @@ -375,6 +391,35 @@ typedef enum { XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, } XE_GPU_INVALIDATE_MASK; +// a2xx_sq_ps_vtx_mode +enum class VertexShaderExportMode : uint32_t { + kPosition1Vector = 0, + kPosition2VectorsSprite = 2, + kPosition2VectorsEdge = 3, + kPosition2VectorsKill = 4, + kPosition2VectorsSpriteKill = 5, + kPosition2VectorsEdgeKill = 6, + kMultipass = 7, +}; + +enum class SampleControl : uint32_t { + kCentroidsOnly = 0, + kCentersOnly = 1, + kCentroidsAndCenters = 2, +}; + +enum class VGTOutputPath : uint32_t { + kVertexReuse = 0, + kTessellationEnable = 1, + kPassthru = 2, +}; + +enum class TessellationMode : uint32_t { + kDiscrete = 0, + kContinuous = 1, + kAdaptive = 2, +}; + enum class ModeControl : uint32_t { kIgnore = 0, kColorDepth = 4, @@ -471,26 +516,6 @@ inline uint32_t GpuToCpu(uint32_t p) { return p; } inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; } -// XE_GPU_REG_SQ_PROGRAM_CNTL -typedef union { - XEPACKEDSTRUCTANONYMOUS({ - uint32_t vs_regs : 6; - uint32_t unk_0 : 2; - uint32_t ps_regs : 6; - uint32_t unk_1 : 2; - uint32_t vs_resource : 1; - uint32_t ps_resource : 1; - uint32_t param_gen : 1; - uint32_t gen_index_pix : 1; - uint32_t vs_export_count : 4; - uint32_t vs_export_mode : 3; - uint32_t ps_export_depth : 1; - uint32_t ps_export_count : 3; - uint32_t gen_index_vtx : 1; - }); - XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; }); -} xe_gpu_program_cntl_t; - // XE_GPU_REG_SHADER_CONSTANT_FETCH_* XEPACKEDUNION(xe_gpu_vertex_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ From a9ed73bdd122207fcaa35c9bf422c38aaf8fbcbd Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 20 Oct 2019 19:40:37 +0300 Subject: [PATCH 3/4] [GPU] Remove most hardcoded register/instruction layouts from common and D3D12 code --- src/xenia/base/bit_field.h | 51 -- src/xenia/gpu/command_processor.cc | 13 +- src/xenia/gpu/command_processor.h | 2 +- .../gpu/d3d12/d3d12_command_processor.cc | 29 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 185 +++--- src/xenia/gpu/d3d12/pipeline_cache.h | 20 +- src/xenia/gpu/d3d12/primitive_converter.cc | 2 +- src/xenia/gpu/d3d12/render_target_cache.cc | 260 ++++---- src/xenia/gpu/d3d12/texture_cache.cc | 62 +- src/xenia/gpu/register_table.inc | 10 +- src/xenia/gpu/registers.cc | 69 ++- src/xenia/gpu/registers.h | 556 ++++++++++-------- src/xenia/gpu/sampler_info.cc | 16 +- src/xenia/gpu/shader_translator.cc | 5 +- src/xenia/gpu/texture_conversion.cc | 2 +- src/xenia/gpu/texture_info.cc | 4 +- src/xenia/gpu/texture_info.h | 71 --- src/xenia/gpu/trace_viewer.cc | 12 +- src/xenia/gpu/ucode.h | 236 +++----- src/xenia/gpu/vulkan/buffer_cache.cc | 5 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 16 +- src/xenia/gpu/vulkan/pipeline_cache.h | 8 +- .../gpu/vulkan/vulkan_command_processor.cc | 16 +- src/xenia/gpu/xenos.h | 188 ++++-- 24 files changed, 896 insertions(+), 942 deletions(-) delete mode 100644 src/xenia/base/bit_field.h diff --git a/src/xenia/base/bit_field.h b/src/xenia/base/bit_field.h deleted file mode 100644 index 98a8bfbf8..000000000 --- a/src/xenia/base/bit_field.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2017 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_BASE_BIT_FIELD_H_ -#define XENIA_BASE_BIT_FIELD_H_ - -#include -#include -#include - -namespace xe { - -// Bitfield, where position starts at the LSB. -template -struct bf { - // For enum values, we strip them down to an underlying type. - typedef - typename std::conditional::value, std::underlying_type, - std::remove_reference>::type::type - value_type; - - bf() = default; - inline operator T() const { return value(); } - - inline T value() const { - auto value = (storage & mask()) >> position; - if (std::is_signed::value) { - // If the value is signed, sign-extend it. - value_type sign_mask = value_type(1) << (n_bits - 1); - value = (sign_mask ^ value) - sign_mask; - } - - return static_cast(value); - } - - inline value_type mask() const { - return ((value_type(1) << n_bits) - 1) << position; - } - - value_type storage; -}; - -} // namespace xe - -#endif // XENIA_BASE_BIT_FIELD_H_ diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index a715d0e5e..acb991fb5 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -350,20 +350,20 @@ void CommandProcessor::MakeCoherent() { // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 RegisterFile* regs = register_file_; - auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; + auto& status_host = regs->Get(); auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; - if (!(status_host & 0x80000000ul)) { + if (!status_host.status) { return; } const char* action = "N/A"; - if ((status_host & 0x03000000) == 0x03000000) { + if (status_host.vc_action_ena && status_host.tc_action_ena) { action = "VC | TC"; - } else if (status_host & 0x02000000) { + } else if (status_host.tc_action_ena) { action = "TC"; - } else if (status_host & 0x01000000) { + } else if (status_host.vc_action_ena) { action = "VC"; } @@ -372,8 +372,7 @@ void CommandProcessor::MakeCoherent() { base_host + size_host, size_host, action); // Mark coherent. - status_host &= ~0x80000000ul; - regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host; + status_host.status = 0; } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 3b86844ed..dad797b05 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -151,7 +151,7 @@ class CommandProcessor { protected: struct IndexBufferInfo { IndexFormat format = IndexFormat::kInt16; - Endian endianness = Endian::kUnspecified; + Endian endianness = Endian::kNone; uint32_t count = 0; uint32_t guest_base = 0; size_t length = 0; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 20e1495b9..ce0a136c8 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1345,7 +1345,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Update system constants before uploading them. UpdateSystemConstantValues( memexport_used, primitive_two_faced, line_loop_closing_index, - indexed ? index_buffer_info->endianness : Endian::kUnspecified, + indexed ? index_buffer_info->endianness : Endian::kNone, adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0, early_z, GetCurrentColorMask(pixel_shader), pipeline_render_targets); @@ -1975,7 +1975,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // Stencil reference value. Per-face reference not supported by Direct3D 12, // choose the back face one only if drawing only back faces. - uint32_t stencil_ref_mask_reg; + Register stencil_ref_mask_reg; auto pa_su_sc_mode_cntl = regs.Get(); if (primitive_two_faced && regs.Get().backface_enable && @@ -2032,13 +2032,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float rt_clamp[4][4]; uint32_t rt_keep_masks[4][2]; for (uint32_t i = 0; i < 4; ++i) { - static const uint32_t kColorInfoRegs[] = { - XE_GPU_REG_RB_COLOR_INFO, - XE_GPU_REG_RB_COLOR1_INFO, - XE_GPU_REG_RB_COLOR2_INFO, - XE_GPU_REG_RB_COLOR3_INFO, - }; - auto color_info = regs.Get(kColorInfoRegs[i]); + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); color_infos[i] = color_info; if (IsROVUsedForEDRAM()) { @@ -2125,7 +2120,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } // Alpha test. if (rb_colorcontrol.alpha_test_enable) { - flags |= uint32_t(rb_colorcontrol.alpha_func.value()) + flags |= uint32_t(rb_colorcontrol.alpha_func) << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; } else { flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | @@ -2149,7 +2144,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; } if (rb_depthcontrol.z_enable) { - flags |= uint32_t(rb_depthcontrol.zfunc.value()) + flags |= uint32_t(rb_depthcontrol.zfunc) << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; if (rb_depthcontrol.z_write_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; @@ -2350,7 +2345,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // EDRAM pitch for ROV writing. if (IsROVUsedForEDRAM()) { uint32_t edram_pitch_tiles = - ((std::min(rb_surface_info.surface_pitch.value(), 2560u) * + ((std::min(rb_surface_info.surface_pitch, 2560u) * (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + 79) / 80; @@ -2408,14 +2403,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( 4 * sizeof(float)) != 0; std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)); - static const uint32_t kBlendControlRegs[] = { - XE_GPU_REG_RB_BLENDCONTROL_0, - XE_GPU_REG_RB_BLENDCONTROL_1, - XE_GPU_REG_RB_BLENDCONTROL_2, - XE_GPU_REG_RB_BLENDCONTROL_3, - }; uint32_t blend_factors_ops = - regs[kBlendControlRegs[i]].u32 & 0x1FFF1FFF; + regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -2537,7 +2526,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.edram_stencil_back_write_mask = rb_stencilrefmask_bf.stencilwritemask; uint32_t stencil_func_ops_bf = - (rb_depthcontrol.value >> 8) & ((1 << 12) - 1); + (rb_depthcontrol.value >> 20) & ((1 << 12) - 1); dirty |= system_constants_.edram_stencil_back_func_ops != stencil_func_ops_bf; system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 67d20b3da..1a9a4bf0b 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -363,7 +363,7 @@ bool PipelineCache::GetCurrentStateDescription( const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineDescription& description_out) { auto& regs = *register_file_; - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + auto pa_su_sc_mode_cntl = regs.Get(); bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); // Initialize all unused fields to zero for comparison/hashing. @@ -381,7 +381,7 @@ bool PipelineCache::GetCurrentStateDescription( description_out.pixel_shader = pixel_shader; // Index buffer strip cut value. - if (pa_su_sc_mode_cntl & (1 << 21)) { + if (pa_su_sc_mode_cntl.multi_prim_ib_ena) { // Not using 0xFFFF with 32-bit indices because in index buffers it will be // 0xFFFF0000 anyway due to endianness. description_out.strip_cut_index = index_format == IndexFormat::kInt32 @@ -479,53 +479,60 @@ bool PipelineCache::GetCurrentStateDescription( // Xenos fill mode 1). // Here we also assume that only one side is culled - if two sides are culled, // the D3D12 command processor will drop such draw early. - uint32_t cull_mode = primitive_two_faced ? (pa_su_sc_mode_cntl & 0x3) : 0; + bool cull_front, cull_back; + if (primitive_two_faced) { + cull_front = pa_su_sc_mode_cntl.cull_front != 0; + cull_back = pa_su_sc_mode_cntl.cull_back != 0; + } else { + cull_front = false; + cull_back = false; + } float poly_offset = 0.0f, poly_offset_scale = 0.0f; if (primitive_two_faced) { - description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0; - if (cull_mode == 1) { + description_out.front_counter_clockwise = pa_su_sc_mode_cntl.face == 0; + if (cull_front) { description_out.cull_mode = PipelineCullMode::kFront; - } else if (cull_mode == 2) { + } else if (cull_back) { description_out.cull_mode = PipelineCullMode::kBack; } else { description_out.cull_mode = PipelineCullMode::kNone; } // With ROV, the depth bias is applied in the pixel shader because // per-sample depth is needed for MSAA. - if (cull_mode != 1) { + if (!cull_front) { // Front faces aren't culled. - uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; - if (fill_mode == 0 || fill_mode == 1) { + // Direct3D 12, unfortunately, doesn't support point fill mode. + if (pa_su_sc_mode_cntl.polymode_front_ptype != + xenos::PolygonType::kTriangles) { description_out.fill_mode_wireframe = 1; } - if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 11))) { + if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; } } - if (cull_mode != 2) { + if (!cull_back) { // Back faces aren't culled. - uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7; - if (fill_mode == 0 || fill_mode == 1) { + if (pa_su_sc_mode_cntl.polymode_back_ptype != + xenos::PolygonType::kTriangles) { description_out.fill_mode_wireframe = 1; } // Prefer front depth bias because in general, front faces are the ones // that are rendered (except for shadow volumes). - if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 12)) && + if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_back_enable && poly_offset == 0.0f && poly_offset_scale == 0.0f) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; } } - if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) { - // Fill mode is disabled. + if (pa_su_sc_mode_cntl.poly_mode == xenos::PolygonModeEnable::kDisabled) { description_out.fill_mode_wireframe = 0; } } else { // Filled front faces only. // Use front depth bias if POLY_OFFSET_PARA_ENABLED // (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives). - if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 13))) { + if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; } @@ -543,8 +550,8 @@ bool PipelineCache::GetCurrentStateDescription( // of Duty 4 (vehicledamage map explosion decals) and Red Dead Redemption // (shadows - 2^17 is not enough, 2^18 hasn't been tested, but 2^19 // eliminates the acne). - if (((register_file_->values[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 0x1) == - uint32_t(DepthRenderTargetFormat::kD24FS8)) { + if (regs.Get().depth_format == + DepthRenderTargetFormat::kD24FS8) { poly_offset *= float(1 << 19); } else { poly_offset *= float(1 << 23); @@ -564,48 +571,49 @@ bool PipelineCache::GetCurrentStateDescription( primitive_type == PrimitiveType::kQuadPatch)) { description_out.fill_mode_wireframe = 1; } - // CLIP_DISABLE - description_out.depth_clip = - (regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0; + description_out.depth_clip = !regs.Get().clip_disable; if (edram_rov_used_) { description_out.rov_msaa = - ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0; + regs.Get().msaa_samples != MsaaSamples::k1X; } else { // Depth/stencil. No stencil, always passing depth test and no depth writing // means depth disabled. if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - if (rb_depthcontrol & 0x2) { - description_out.depth_func = (rb_depthcontrol >> 4) & 0x7; - description_out.depth_write = (rb_depthcontrol & 0x4) != 0; + auto rb_depthcontrol = regs.Get(); + if (rb_depthcontrol.z_enable) { + description_out.depth_func = rb_depthcontrol.zfunc; + description_out.depth_write = rb_depthcontrol.z_write_enable; } else { - description_out.depth_func = 0b111; + description_out.depth_func = CompareFunction::kAlways; } - if (rb_depthcontrol & 0x1) { + if (rb_depthcontrol.stencil_enable) { description_out.stencil_enable = 1; bool stencil_backface_enable = - primitive_two_faced && (rb_depthcontrol & 0x80); - uint32_t stencil_masks; + primitive_two_faced && rb_depthcontrol.backface_enable; // Per-face masks not supported by Direct3D 12, choose the back face // ones only if drawing only back faces. - if (stencil_backface_enable && cull_mode == 1) { - stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32; + Register stencil_ref_mask_reg; + if (stencil_backface_enable && cull_front) { + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; } else { - stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK; } - description_out.stencil_read_mask = (stencil_masks >> 8) & 0xFF; - description_out.stencil_write_mask = (stencil_masks >> 16) & 0xFF; - description_out.stencil_front_fail_op = (rb_depthcontrol >> 11) & 0x7; + auto stencil_ref_mask = + regs.Get(stencil_ref_mask_reg); + description_out.stencil_read_mask = stencil_ref_mask.stencilmask; + description_out.stencil_write_mask = stencil_ref_mask.stencilwritemask; + description_out.stencil_front_fail_op = rb_depthcontrol.stencilfail; description_out.stencil_front_depth_fail_op = - (rb_depthcontrol >> 17) & 0x7; - description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7; - description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7; + rb_depthcontrol.stencilzfail; + description_out.stencil_front_pass_op = rb_depthcontrol.stencilzpass; + description_out.stencil_front_func = rb_depthcontrol.stencilfunc; if (stencil_backface_enable) { - description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7; + description_out.stencil_back_fail_op = rb_depthcontrol.stencilfail_bf; description_out.stencil_back_depth_fail_op = - (rb_depthcontrol >> 29) & 0x7; - description_out.stencil_back_pass_op = (rb_depthcontrol >> 26) & 0x7; - description_out.stencil_back_func = (rb_depthcontrol >> 20) & 0x7; + rb_depthcontrol.stencilzfail_bf; + description_out.stencil_back_pass_op = + rb_depthcontrol.stencilzpass_bf; + description_out.stencil_back_func = rb_depthcontrol.stencilfunc_bf; } else { description_out.stencil_back_fail_op = description_out.stencil_front_fail_op; @@ -618,13 +626,13 @@ bool PipelineCache::GetCurrentStateDescription( } } // If not binding the DSV, ignore the format in the hash. - if (description_out.depth_func != 0b111 || description_out.depth_write || - description_out.stencil_enable) { - description_out.depth_format = DepthRenderTargetFormat( - (regs[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 1); + if (description_out.depth_func != CompareFunction::kAlways || + description_out.depth_write || description_out.stencil_enable) { + description_out.depth_format = + regs.Get().depth_format; } } else { - description_out.depth_func = 0b111; + description_out.depth_func = CompareFunction::kAlways; } if (early_z) { description_out.force_early_z = 1; @@ -684,38 +692,25 @@ bool PipelineCache::GetCurrentStateDescription( if (render_targets[i].format == DXGI_FORMAT_UNKNOWN) { break; } - uint32_t guest_rt_index = render_targets[i].guest_render_target; - uint32_t color_info, blendcontrol; - switch (guest_rt_index) { - case 1: - color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32; - break; - case 2: - color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32; - break; - case 3: - color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32; - break; - default: - color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32; - break; - } PipelineRenderTarget& rt = description_out.render_targets[i]; rt.used = 1; - rt.format = RenderTargetCache::GetBaseColorFormat( - ColorRenderTargetFormat((color_info >> 16) & 0xF)); + uint32_t guest_rt_index = render_targets[i].guest_render_target; + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[guest_rt_index]); + rt.format = + RenderTargetCache::GetBaseColorFormat(color_info.color_format); rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF; if (rt.write_mask) { - rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F]; - rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F]; - rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7); - rt.src_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 16) & 0x1F]; - rt.dest_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 24) & 0x1F]; - rt.blend_op_alpha = BlendOp((blendcontrol >> 21) & 0x7); + auto blendcontrol = regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices[guest_rt_index]); + rt.src_blend = kBlendFactorMap[uint32_t(blendcontrol.color_srcblend)]; + rt.dest_blend = kBlendFactorMap[uint32_t(blendcontrol.color_destblend)]; + rt.blend_op = blendcontrol.color_comb_fcn; + rt.src_blend_alpha = + kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_srcblend)]; + rt.dest_blend_alpha = + kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_destblend)]; + rt.blend_op_alpha = blendcontrol.alpha_comb_fcn; } else { rt.src_blend = PipelineBlendFactor::kOne; rt.dest_blend = PipelineBlendFactor::kZero; @@ -941,15 +936,17 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( if (!edram_rov_used_) { // Depth/stencil. - if (description.depth_func != 0b111 || description.depth_write) { + if (description.depth_func != CompareFunction::kAlways || + description.depth_write) { state_desc.DepthStencilState.DepthEnable = TRUE; state_desc.DepthStencilState.DepthWriteMask = description.depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; // Comparison functions are the same in Direct3D 12 but plus one (minus // one, bit 0 for less, bit 1 for equal, bit 2 for greater). - state_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC( - uint32_t(D3D12_COMPARISON_FUNC_NEVER) + description.depth_func); + state_desc.DepthStencilState.DepthFunc = + D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + + uint32_t(description.depth_func)); } if (description.stencil_enable) { state_desc.DepthStencilState.StencilEnable = TRUE; @@ -958,26 +955,30 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( state_desc.DepthStencilState.StencilWriteMask = description.stencil_write_mask; // Stencil operations are the same in Direct3D 12 too but plus one. - state_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_fail_op); + state_desc.DepthStencilState.FrontFace.StencilFailOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_fail_op)); state_desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + - description.stencil_front_depth_fail_op); - state_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_pass_op); + uint32_t(description.stencil_front_depth_fail_op)); + state_desc.DepthStencilState.FrontFace.StencilPassOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_pass_op)); state_desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + - description.stencil_front_func); - state_desc.DepthStencilState.BackFace.StencilFailOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_fail_op); + uint32_t(description.stencil_front_func)); + state_desc.DepthStencilState.BackFace.StencilFailOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_fail_op)); state_desc.DepthStencilState.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + - description.stencil_back_depth_fail_op); - state_desc.DepthStencilState.BackFace.StencilPassOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_pass_op); + uint32_t(description.stencil_back_depth_fail_op)); + state_desc.DepthStencilState.BackFace.StencilPassOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_pass_op)); state_desc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + - description.stencil_back_func); + uint32_t(description.stencil_back_func)); } if (state_desc.DepthStencilState.DepthEnable || state_desc.DepthStencilState.StencilEnable) { diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index ba5a1a4b6..8ac86c9b0 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -152,21 +152,21 @@ class PipelineCache { uint32_t depth_clip : 1; // 15 uint32_t rov_msaa : 1; // 16 DepthRenderTargetFormat depth_format : 1; // 17 - uint32_t depth_func : 3; // 20 + CompareFunction depth_func : 3; // 20 uint32_t depth_write : 1; // 21 uint32_t stencil_enable : 1; // 22 uint32_t stencil_read_mask : 8; // 30 uint32_t force_early_z : 1; // 31 - uint32_t stencil_write_mask : 8; // 8 - uint32_t stencil_front_fail_op : 3; // 11 - uint32_t stencil_front_depth_fail_op : 3; // 14 - uint32_t stencil_front_pass_op : 3; // 17 - uint32_t stencil_front_func : 3; // 20 - uint32_t stencil_back_fail_op : 3; // 23 - uint32_t stencil_back_depth_fail_op : 3; // 26 - uint32_t stencil_back_pass_op : 3; // 29 - uint32_t stencil_back_func : 3; // 32 + uint32_t stencil_write_mask : 8; // 8 + StencilOp stencil_front_fail_op : 3; // 11 + StencilOp stencil_front_depth_fail_op : 3; // 14 + StencilOp stencil_front_pass_op : 3; // 17 + CompareFunction stencil_front_func : 3; // 20 + StencilOp stencil_back_fail_op : 3; // 23 + StencilOp stencil_back_depth_fail_op : 3; // 26 + StencilOp stencil_back_pass_op : 3; // 29 + CompareFunction stencil_back_func : 3; // 32 PipelineRenderTarget render_targets[4]; }; diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 95f2fc2f6..9ddeca74f 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -192,7 +192,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) { bool index_32bit = index_format == IndexFormat::kInt32; auto& regs = *register_file_; - bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0; + bool reset = regs.Get().multi_prim_ib_ena; // Swap the reset index because we will be comparing unswapped values to it. uint32_t reset_index = xenos::GpuSwap( regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness); diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index a006fe113..8d32c5be7 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -541,16 +541,17 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { bool rov_used = command_processor_->IsROVUsedForEDRAM(); - uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); + auto rb_surface_info = regs.Get(); + uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u); if (surface_pitch == 0) { // TODO(Triang3l): Do something if a memexport-only draw has 0 surface // pitch (never seen in any game so far, not sure if even legal). return false; } - MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); - uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; - uint32_t msaa_samples_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; + uint32_t msaa_samples_x = + rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1; + uint32_t msaa_samples_y = + rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 2 : 1; // Extract color/depth info in an unified way. bool enabled[5]; @@ -558,26 +559,27 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { uint32_t formats[5]; bool formats_are_64bpp[5]; uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader); - uint32_t rb_color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32}; for (uint32_t i = 0; i < 4; ++i) { enabled[i] = (color_mask & (0xF << (i * 4))) != 0; - edram_bases[i] = std::min(rb_color_info[i] & 0xFFF, 2048u); - formats[i] = uint32_t(GetBaseColorFormat( - ColorRenderTargetFormat((rb_color_info[i] >> 16) & 0xF))); + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); + edram_bases[i] = std::min(color_info.color_base, 2048u); + formats[i] = uint32_t(GetBaseColorFormat(color_info.color_format)); formats_are_64bpp[i] = IsColorFormat64bpp(ColorRenderTargetFormat(formats[i])); } - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + auto rb_depthcontrol = regs.Get(); + auto rb_depth_info = regs.Get(); // 0x1 = stencil test, 0x2 = depth test. - enabled[4] = (rb_depthcontrol & (0x1 | 0x2)) != 0; - edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u); - formats[4] = (rb_depth_info >> 16) & 0x1; + enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable; + edram_bases[4] = std::min(rb_depth_info.depth_base, 2048u); + formats[4] = uint32_t(rb_depth_info.depth_format); formats_are_64bpp[4] = false; // Don't mark depth regions as dirty if not writing the depth. - bool depth_readonly = (rb_depthcontrol & (0x1 | 0x4)) == 0; + // TODO(Triang3l): Make a common function for checking if stencil writing is + // really done? + bool depth_readonly = + !rb_depthcontrol.stencil_enable && !rb_depthcontrol.z_write_enable; bool full_update = false; @@ -590,7 +592,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // in the beginning of the frame or after resolves by setting the current // pitch to 0. if (current_surface_pitch_ != surface_pitch || - current_msaa_samples_ != msaa_samples) { + current_msaa_samples_ != rb_surface_info.msaa_samples) { full_update = true; } @@ -632,26 +634,22 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // Get EDRAM usage of the current draw so dirty regions can be calculated. // See D3D12CommandProcessor::UpdateFixedFunctionState for more info. - int16_t window_offset_y = - (regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32 >> 16) & 0x7FFF; - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } - uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; - float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2)) + int32_t window_offset_y = + regs.Get().window_y_offset; + auto pa_cl_vte_cntl = regs.Get(); + float viewport_scale_y = pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1280.0f; - float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : std::abs(viewport_scale_y); - if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { + if (regs.Get().vtx_window_offset_enable) { viewport_offset_y += float(window_offset_y); } uint32_t viewport_bottom = uint32_t(std::max( 0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y)))); - uint32_t scissor_bottom = - (regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32 >> 16) & 0x7FFF; - if (!(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32 & (1u << 31))) { + uint32_t scissor_bottom = regs.Get().br_y; + if (!regs.Get().window_offset_disable) { scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0); } uint32_t dirty_bottom = @@ -769,7 +767,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { ClearBindings(); current_surface_pitch_ = surface_pitch; - current_msaa_samples_ = msaa_samples; + current_msaa_samples_ = rb_surface_info.msaa_samples; if (!rov_used) { current_edram_max_rows_ = edram_max_rows; } @@ -801,8 +799,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { #endif } XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u", - full_update ? "Full" : "Partial", surface_pitch, msaa_samples, - render_targets_to_attach); + full_update ? "Full" : "Partial", surface_pitch, + rb_surface_info.msaa_samples, render_targets_to_attach); #if 0 auto device = @@ -891,7 +889,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { if (!rov_used) { // Sample positions when loading depth must match sample positions when // drawing. - command_processor_->SetSamplePositions(msaa_samples); + command_processor_->SetSamplePositions(rb_surface_info.msaa_samples); // Load the contents of the new render targets from the EDRAM buffer (will // change the state of the render targets to copy destination). @@ -1007,18 +1005,14 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, auto& regs = *register_file_; // Get the render target properties. - uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); + auto rb_surface_info = regs.Get(); + uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u); if (surface_pitch == 0) { return true; } - MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); - uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; // Depth info is always needed because color resolve may also clear depth. - uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - uint32_t depth_edram_base = rb_depth_info & 0xFFF; - uint32_t depth_format = (rb_depth_info >> 16) & 0x1; - uint32_t surface_index = rb_copy_control & 0x7; + auto rb_depth_info = regs.Get(); + uint32_t surface_index = regs.Get().copy_src_select; if (surface_index > 4) { assert_always(); return false; @@ -1027,43 +1021,28 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, uint32_t surface_edram_base; uint32_t surface_format; if (surface_is_depth) { - surface_edram_base = depth_edram_base; - surface_format = depth_format; + surface_edram_base = rb_depth_info.depth_base; + surface_format = uint32_t(rb_depth_info.depth_format); } else { - uint32_t rb_color_info; - switch (surface_index) { - case 1: - rb_color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - break; - case 2: - rb_color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - break; - case 3: - rb_color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - break; - default: - rb_color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; - break; - } - surface_edram_base = rb_color_info & 0xFFF; - surface_format = uint32_t(GetBaseColorFormat( - ColorRenderTargetFormat((rb_color_info >> 16) & 0xF))); + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[surface_index]); + surface_edram_base = color_info.color_base; + surface_format = uint32_t(GetBaseColorFormat(color_info.color_format)); } // Get the resolve region since both copying and clearing need it. // HACK: Vertices to use are always in vf0. - auto fetch_group = reinterpret_cast( - ®s.values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); - const auto& fetch = fetch_group->vertex_fetch_0; + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); assert_true(fetch.type == 3); - assert_true(fetch.endian == 2); + assert_true(fetch.endian == Endian::k8in32); assert_true(fetch.size == 6); const uint8_t* src_vertex_address = memory->TranslatePhysical(fetch.address << 2); float vertices[6]; // Most vertices have a negative half pixel offset applied, which we reverse. float vertex_offset = - (regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1) ? 0.0f : 0.5f; + regs.Get().pix_center ? 0.0f : 0.5f; for (uint32_t i = 0; i < 6; ++i) { vertices[i] = xenos::GpuSwap(xe::load(src_vertex_address + i * sizeof(float)), @@ -1097,39 +1076,34 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, // vertices (-640,0)->(640,720), however, the destination texture pointer is // adjusted properly to the right half of the texture, and the source render // target has a pitch of 800). + auto pa_sc_window_offset = regs.Get(); D3D12_RECT rect; rect.left = LONG(std::min(std::min(vertices[0], vertices[2]), vertices[4])); rect.right = LONG(std::max(std::max(vertices[0], vertices[2]), vertices[4])); rect.top = LONG(std::min(std::min(vertices[1], vertices[3]), vertices[5])); rect.bottom = LONG(std::max(std::max(vertices[1], vertices[3]), vertices[5])); + if (regs.Get().vtx_window_offset_enable) { + rect.left += pa_sc_window_offset.window_x_offset; + rect.right += pa_sc_window_offset.window_x_offset; + rect.top += pa_sc_window_offset.window_y_offset; + rect.bottom += pa_sc_window_offset.window_y_offset; + } D3D12_RECT scissor; - uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - scissor.left = LONG(window_scissor_tl & 0x7FFF); - scissor.right = LONG(window_scissor_br & 0x7FFF); - scissor.top = LONG((window_scissor_tl >> 16) & 0x7FFF); - scissor.bottom = LONG((window_scissor_br >> 16) & 0x7FFF); - if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { - uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; - int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; - if (window_offset_x & 0x4000) { - window_offset_x |= 0x8000; - } - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } - rect.left += window_offset_x; - rect.right += window_offset_x; - rect.top += window_offset_y; - rect.bottom += window_offset_y; - if (!(window_scissor_tl & (1u << 31))) { - scissor.left = std::max(LONG(scissor.left + window_offset_x), LONG(0)); - scissor.right = std::max(LONG(scissor.right + window_offset_x), LONG(0)); - scissor.top = std::max(LONG(scissor.top + window_offset_y), LONG(0)); - scissor.bottom = - std::max(LONG(scissor.bottom + window_offset_y), LONG(0)); - } + auto pa_sc_window_scissor_tl = regs.Get(); + auto pa_sc_window_scissor_br = regs.Get(); + scissor.left = pa_sc_window_scissor_tl.tl_x; + scissor.right = pa_sc_window_scissor_br.br_x; + scissor.top = pa_sc_window_scissor_tl.tl_y; + scissor.bottom = pa_sc_window_scissor_br.br_y; + if (!pa_sc_window_scissor_tl.window_offset_disable) { + scissor.left = std::max( + LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.right = std::max( + LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.top = std::max( + LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0)); + scissor.bottom = std::max( + LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0)); } rect.left = std::max(rect.left, scissor.left); rect.right = std::min(rect.right, scissor.right); @@ -1140,9 +1114,9 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, "Resolve: (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format %u) " "at %u", rect.left, rect.top, rect.right, rect.bottom, surface_index, - surface_pitch, 1 << uint32_t(msaa_samples), - msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format, - surface_edram_base); + surface_pitch, 1 << uint32_t(rb_surface_info.msaa_samples), + rb_surface_info.msaa_samples != MsaaSamples::k1X ? "s" : "", + surface_format, surface_edram_base); if (rect.left >= rect.right || rect.top >= rect.bottom) { // Nothing to copy. @@ -1157,18 +1131,20 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, // GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed // clamping to the source render target size. - bool result = - ResolveCopy(shared_memory, texture_cache, surface_edram_base, - surface_pitch, msaa_samples, surface_is_depth, surface_format, - rect, written_address_out, written_length_out); + bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base, + surface_pitch, rb_surface_info.msaa_samples, + surface_is_depth, surface_format, rect, + written_address_out, written_length_out); // Clear the color RT if needed. if (!surface_is_depth) { - result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples, - false, surface_format, rect); + result &= + ResolveClear(surface_edram_base, surface_pitch, + rb_surface_info.msaa_samples, false, surface_format, rect); } // Clear the depth RT if needed (may be cleared alongside color). - result &= ResolveClear(depth_edram_base, surface_pitch, msaa_samples, true, - depth_format, rect); + result &= ResolveClear(rb_depth_info.depth_base, surface_pitch, + rb_surface_info.msaa_samples, true, + uint32_t(rb_depth_info.depth_format), rect); return result; } @@ -1183,19 +1159,18 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, auto& regs = *register_file_; - uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; - xenos::CopyCommand copy_command = - xenos::CopyCommand((rb_copy_control >> 20) & 0x3); - if (copy_command != xenos::CopyCommand::kRaw && - copy_command != xenos::CopyCommand::kConvert) { + auto rb_copy_control = regs.Get(); + if (rb_copy_control.copy_command != xenos::CopyCommand::kRaw && + rb_copy_control.copy_command != xenos::CopyCommand::kConvert) { // TODO(Triang3l): Handle kConstantOne and kNull. + assert_always(); return false; } auto command_list = command_processor_->GetDeferredCommandList(); // Get format info. - uint32_t rb_copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32; + auto rb_copy_dest_info = regs.Get(); TextureFormat src_texture_format; bool src_64bpp; if (is_depth) { @@ -1222,14 +1197,15 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // The destination format is specified as k_8_8_8_8 when resolving depth, but // no format conversion is done for depth, so ignore it. TextureFormat dest_format = - is_depth ? src_texture_format - : GetBaseFormat(TextureFormat((rb_copy_dest_info >> 7) & 0x3F)); + is_depth + ? src_texture_format + : GetBaseFormat(TextureFormat(rb_copy_dest_info.copy_dest_format)); const FormatInfo* dest_format_info = FormatInfo::Get(dest_format); // Get the destination region and clamp the source region to it. - uint32_t rb_copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; - uint32_t dest_pitch = rb_copy_dest_pitch & 0x3FFF; - uint32_t dest_height = (rb_copy_dest_pitch >> 16) & 0x3FFF; + auto rb_copy_dest_pitch = regs.Get(); + uint32_t dest_pitch = rb_copy_dest_pitch.copy_dest_pitch; + uint32_t dest_height = rb_copy_dest_pitch.copy_dest_height; if (dest_pitch == 0 || dest_height == 0) { // Nothing to copy. return true; @@ -1263,8 +1239,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF; // An example of a 3D resolve destination is the color grading LUT (used // starting from the developer/publisher intro) in Dead Space 3. - bool dest_3d = (rb_copy_dest_info & (1 << 3)) != 0; - if (dest_3d) { + if (rb_copy_dest_info.copy_dest_array) { dest_address += texture_util::GetTiledOffset3D( int(rect.left & ~LONG(31)), int(rect.top & ~LONG(31)), 0, dest_pitch, dest_height, xe::log2_floor(dest_format_info->bits_per_pixel >> 3)); @@ -1279,21 +1254,20 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // resolve to 8bpp or 16bpp textures at very odd locations. return false; } - uint32_t dest_z = dest_3d ? ((rb_copy_dest_info >> 4) & 0x7) : 0; + uint32_t dest_z = + rb_copy_dest_info.copy_dest_array ? rb_copy_dest_info.copy_dest_slice : 0; // See what samples we need and what we should do with them. - xenos::CopySampleSelect sample_select = - xenos::CopySampleSelect((rb_copy_control >> 4) & 0x7); + xenos::CopySampleSelect sample_select = rb_copy_control.copy_sample_select; if (is_depth && sample_select > xenos::CopySampleSelect::k3) { assert_always(); return false; } - Endian128 dest_endian = Endian128(rb_copy_dest_info & 0x7); int32_t dest_exp_bias; if (is_depth) { dest_exp_bias = 0; } else { - dest_exp_bias = int32_t((rb_copy_dest_info >> 16) << 26) >> 26; + dest_exp_bias = rb_copy_dest_info.copy_dest_exp_bias; if (ColorRenderTargetFormat(src_format) == ColorRenderTargetFormat::k_16_16 || ColorRenderTargetFormat(src_format) == @@ -1309,14 +1283,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, } } } - bool dest_swap = !is_depth && ((rb_copy_dest_info >> 24) & 0x1); + bool dest_swap = !is_depth && rb_copy_dest_info.copy_dest_swap; XELOGGPU( "Resolve: Copying samples %u to 0x%.8X (%ux%u, %cD), destination Z %u, " "destination format %s, exponent bias %d, red and blue %sswapped", uint32_t(sample_select), dest_address, dest_pitch, dest_height, - dest_3d ? '3' : '2', dest_z, dest_format_info->name, dest_exp_bias, - dest_swap ? "" : "not "); + rb_copy_dest_info.copy_dest_array ? '3' : '2', dest_z, + dest_format_info->name, dest_exp_bias, dest_swap ? "" : "not "); // There are 2 paths for resolving in this function - they don't necessarily // have to map directly to kRaw and kConvert CopyCommands. @@ -1344,7 +1318,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, resolution_scale_2x_ && cvars::d3d12_resolution_scale_resolve_edge_clamp && cvars::d3d12_half_pixel_offset && - !(regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1); + !regs.Get().pix_center; if (sample_select <= xenos::CopySampleSelect::k3 && src_texture_format == dest_format && dest_exp_bias == 0) { // ************************************************************************* @@ -1363,7 +1337,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, uint32_t dest_size; uint32_t dest_modified_start = dest_address; uint32_t dest_modified_length; - if (dest_3d) { + if (rb_copy_dest_info.copy_dest_array) { // Depth granularity is 4 (though TiledAddress chaining is possible with 8 // granularity). dest_size = texture_util::GetGuestMipSliceStorageSize( @@ -1442,8 +1416,10 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, assert_true(dest_pitch <= 8192); root_constants.tile_sample_dest_info = ((dest_pitch + 31) >> 5) | - (dest_3d ? (((dest_height + 31) >> 5) << 9) : 0) | - (uint32_t(sample_select) << 18) | (uint32_t(dest_endian) << 20); + (rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9) + : 0) | + (uint32_t(sample_select) << 18) | + (uint32_t(rb_copy_dest_info.copy_dest_endian) << 20); if (dest_swap) { root_constants.tile_sample_dest_info |= (1 << 23) | (src_format << 24); } @@ -1797,10 +1773,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // dest_address already adjusted, so offsets are & 31. texture_cache->TileResolvedTexture( - dest_format, dest_address, dest_pitch, dest_height, dest_3d, - uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width, - copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size, - resolve_target->footprint, &written_address_out, &written_length_out); + dest_format, dest_address, dest_pitch, dest_height, + rb_copy_dest_info.copy_dest_array != 0, uint32_t(rect.left) & 31, + uint32_t(rect.top) & 31, dest_z, copy_width, copy_height, + rb_copy_dest_info.copy_dest_endian, copy_buffer, + resolve_target->copy_buffer_size, resolve_target->footprint, + &written_address_out, &written_length_out); // Done with the copy buffer. @@ -1817,9 +1795,15 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, auto& regs = *register_file_; // Check if clearing is enabled. - uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; - if (!(rb_copy_control & (is_depth ? (1 << 9) : (1 << 8)))) { - return true; + auto rb_copy_control = regs.Get(); + if (is_depth) { + if (!rb_copy_control.depth_clear_enable) { + return true; + } + } else { + if (!rb_copy_control.color_clear_enable) { + return true; + } } XELOGGPU("Resolve: Clearing the %s render target", @@ -1886,7 +1870,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, } else if (is_64bpp) { // TODO(Triang3l): Check which 32-bit portion is in which register. root_constants.clear_color_high = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; command_processor_->SetComputePipeline(edram_clear_64bpp_pipeline_); } else { Register reg = diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 1fb339b9a..0fb518f9e 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -848,15 +848,13 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, continue; } TextureBinding& binding = texture_bindings_[index]; - uint32_t r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6; - auto group = - reinterpret_cast(®s.values[r]); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6); TextureKey old_key = binding.key; bool old_has_unsigned = binding.has_unsigned; bool old_has_signed = binding.has_signed; - BindingInfoFromFetchConstant(group->texture_fetch, binding.key, - &binding.swizzle, &binding.has_unsigned, - &binding.has_signed); + BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzle, + &binding.has_unsigned, &binding.has_signed); texture_keys_in_sync_ |= index_bit; if (binding.key.IsInvalid()) { binding.texture = nullptr; @@ -1142,18 +1140,15 @@ void TextureCache::WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv, TextureCache::SamplerParameters TextureCache::GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const { auto& regs = *register_file_; - uint32_t r = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); SamplerParameters parameters; - parameters.clamp_x = ClampMode(fetch.clamp_x); - parameters.clamp_y = ClampMode(fetch.clamp_y); - parameters.clamp_z = ClampMode(fetch.clamp_z); - parameters.border_color = BorderColor(fetch.border_color); + parameters.clamp_x = fetch.clamp_x; + parameters.clamp_y = fetch.clamp_y; + parameters.clamp_z = fetch.clamp_z; + parameters.border_color = fetch.border_color; uint32_t mip_min_level = fetch.mip_min_level; uint32_t mip_max_level = fetch.mip_max_level; @@ -1171,7 +1166,7 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters( parameters.lod_bias = fetch.lod_bias; AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst - ? AnisoFilter(fetch.aniso_filter) + ? fetch.aniso_filter : binding.aniso_filter; aniso_filter = std::min(aniso_filter, AnisoFilter::kMax_16_1); parameters.aniso_filter = aniso_filter; @@ -1182,17 +1177,17 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters( } else { TextureFilter mag_filter = binding.mag_filter == TextureFilter::kUseFetchConst - ? TextureFilter(fetch.mag_filter) + ? fetch.mag_filter : binding.mag_filter; parameters.mag_linear = mag_filter == TextureFilter::kLinear; TextureFilter min_filter = binding.min_filter == TextureFilter::kUseFetchConst - ? TextureFilter(fetch.min_filter) + ? fetch.min_filter : binding.min_filter; parameters.min_linear = min_filter == TextureFilter::kLinear; TextureFilter mip_filter = binding.mip_filter == TextureFilter::kUseFetchConst - ? TextureFilter(fetch.mip_filter) + ? fetch.mip_filter : binding.mip_filter; parameters.mip_linear = mip_filter == TextureFilter::kLinear; // TODO(Triang3l): Investigate mip_filter TextureFilter::kBaseMap. @@ -1586,13 +1581,12 @@ void TextureCache::CreateScaledResolveBufferRawUAV( bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle, TextureFormat& format_out) { - auto group = reinterpret_cast( - ®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); - auto& fetch = group->texture_fetch; + auto& regs = *register_file_; + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); TextureKey key; uint32_t swizzle; - BindingInfoFromFetchConstant(group->texture_fetch, key, &swizzle, nullptr, - nullptr); + BindingInfoFromFetchConstant(fetch, key, &swizzle, nullptr, nullptr); if (key.base_page == 0 || key.dimension != Dimension::k2D) { return false; } @@ -1733,7 +1727,7 @@ void TextureCache::BindingInfoFromFetchConstant( return; } - TextureFormat format = GetBaseFormat(TextureFormat(fetch.format)); + TextureFormat format = GetBaseFormat(fetch.format); key_out.base_page = base_page; key_out.mip_page = mip_page; @@ -1745,7 +1739,7 @@ void TextureCache::BindingInfoFromFetchConstant( key_out.tiled = fetch.tiled; key_out.packed_mips = fetch.packed_mips; key_out.format = format; - key_out.endianness = Endian(fetch.endianness); + key_out.endianness = fetch.endianness; if (swizzle_out != nullptr) { uint32_t swizzle = fetch.swizzle; @@ -1783,16 +1777,16 @@ void TextureCache::BindingInfoFromFetchConstant( } if (has_unsigned_out != nullptr) { - *has_unsigned_out = TextureSign(fetch.sign_x) != TextureSign::kSigned || - TextureSign(fetch.sign_y) != TextureSign::kSigned || - TextureSign(fetch.sign_z) != TextureSign::kSigned || - TextureSign(fetch.sign_w) != TextureSign::kSigned; + *has_unsigned_out = fetch.sign_x != TextureSign::kSigned || + fetch.sign_y != TextureSign::kSigned || + fetch.sign_z != TextureSign::kSigned || + fetch.sign_w != TextureSign::kSigned; } if (has_signed_out != nullptr) { - *has_signed_out = TextureSign(fetch.sign_x) == TextureSign::kSigned || - TextureSign(fetch.sign_y) == TextureSign::kSigned || - TextureSign(fetch.sign_z) == TextureSign::kSigned || - TextureSign(fetch.sign_w) == TextureSign::kSigned; + *has_signed_out = fetch.sign_x == TextureSign::kSigned || + fetch.sign_y == TextureSign::kSigned || + fetch.sign_z == TextureSign::kSigned || + fetch.sign_w == TextureSign::kSigned; } } diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index 3df06bfd9..c305b5a03 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -134,7 +134,7 @@ XE_GPU_REGISTER(0x2184, kDword, SQ_WRAPPING_1) XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR) XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL) -XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL_0) +XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL0) XE_GPU_REGISTER(0x2202, kDword, RB_COLORCONTROL) XE_GPU_REGISTER(0x2203, kDword, RB_HIZCONTROL) XE_GPU_REGISTER(0x2204, kDword, PA_CL_CLIP_CNTL) @@ -142,9 +142,9 @@ XE_GPU_REGISTER(0x2205, kDword, PA_SU_SC_MODE_CNTL) XE_GPU_REGISTER(0x2206, kDword, PA_CL_VTE_CNTL) XE_GPU_REGISTER(0x2207, kDword, VGT_CURRENT_BIN_ID_MIN) XE_GPU_REGISTER(0x2208, kDword, RB_MODECONTROL) -XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL_1) -XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL_2) -XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL_3) +XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL1) +XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL2) +XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL3) XE_GPU_REGISTER(0x2280, kDword, PA_SU_POINT_SIZE) XE_GPU_REGISTER(0x2281, kDword, PA_SU_POINT_MINMAX) @@ -199,7 +199,7 @@ XE_GPU_REGISTER(0x231B, kDword, RB_COPY_DEST_INFO) XE_GPU_REGISTER(0x231C, kDword, RB_HIZ_CLEAR) XE_GPU_REGISTER(0x231D, kDword, RB_DEPTH_CLEAR) XE_GPU_REGISTER(0x231E, kDword, RB_COLOR_CLEAR) -XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LOW) +XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LO) XE_GPU_REGISTER(0x2320, kDword, RB_COPY_FUNC) XE_GPU_REGISTER(0x2321, kDword, RB_COPY_REF) XE_GPU_REGISTER(0x2322, kDword, RB_COPY_MASK) diff --git a/src/xenia/gpu/registers.cc b/src/xenia/gpu/registers.cc index 4215e3352..df7aaa48a 100644 --- a/src/xenia/gpu/registers.cc +++ b/src/xenia/gpu/registers.cc @@ -13,38 +13,51 @@ namespace xe { namespace gpu { namespace reg { -constexpr uint32_t COHER_STATUS_HOST::register_index; -constexpr uint32_t WAIT_UNTIL::register_index; +constexpr Register COHER_STATUS_HOST::register_index; +constexpr Register WAIT_UNTIL::register_index; -constexpr uint32_t SQ_PROGRAM_CNTL::register_index; -constexpr uint32_t SQ_CONTEXT_MISC::register_index; +constexpr Register SQ_PROGRAM_CNTL::register_index; +constexpr Register SQ_CONTEXT_MISC::register_index; -constexpr uint32_t VGT_OUTPUT_PATH_CNTL::register_index; -constexpr uint32_t VGT_HOS_CNTL::register_index; +constexpr Register VGT_OUTPUT_PATH_CNTL::register_index; +constexpr Register VGT_HOS_CNTL::register_index; -constexpr uint32_t PA_SU_POINT_MINMAX::register_index; -constexpr uint32_t PA_SU_POINT_SIZE::register_index; -constexpr uint32_t PA_SU_SC_MODE_CNTL::register_index; -constexpr uint32_t PA_SU_VTX_CNTL::register_index; -constexpr uint32_t PA_SC_MPASS_PS_CNTL::register_index; -constexpr uint32_t PA_SC_VIZ_QUERY::register_index; -constexpr uint32_t PA_CL_CLIP_CNTL::register_index; -constexpr uint32_t PA_CL_VTE_CNTL::register_index; -constexpr uint32_t PA_SC_WINDOW_OFFSET::register_index; -constexpr uint32_t PA_SC_WINDOW_SCISSOR_TL::register_index; -constexpr uint32_t PA_SC_WINDOW_SCISSOR_BR::register_index; +constexpr Register PA_SU_POINT_MINMAX::register_index; +constexpr Register PA_SU_POINT_SIZE::register_index; +constexpr Register PA_SU_SC_MODE_CNTL::register_index; +constexpr Register PA_SU_VTX_CNTL::register_index; +constexpr Register PA_SC_MPASS_PS_CNTL::register_index; +constexpr Register PA_SC_VIZ_QUERY::register_index; +constexpr Register PA_CL_CLIP_CNTL::register_index; +constexpr Register PA_CL_VTE_CNTL::register_index; +constexpr Register PA_SC_WINDOW_OFFSET::register_index; +constexpr Register PA_SC_WINDOW_SCISSOR_TL::register_index; +constexpr Register PA_SC_WINDOW_SCISSOR_BR::register_index; -constexpr uint32_t RB_MODECONTROL::register_index; -constexpr uint32_t RB_SURFACE_INFO::register_index; -constexpr uint32_t RB_COLORCONTROL::register_index; -constexpr uint32_t RB_COLOR_INFO::register_index; -constexpr uint32_t RB_COLOR_MASK::register_index; -constexpr uint32_t RB_DEPTHCONTROL::register_index; -constexpr uint32_t RB_STENCILREFMASK::register_index; -constexpr uint32_t RB_DEPTH_INFO::register_index; -constexpr uint32_t RB_COPY_CONTROL::register_index; -constexpr uint32_t RB_COPY_DEST_INFO::register_index; -constexpr uint32_t RB_COPY_DEST_PITCH::register_index; +constexpr Register RB_MODECONTROL::register_index; +constexpr Register RB_SURFACE_INFO::register_index; +constexpr Register RB_COLORCONTROL::register_index; +constexpr Register RB_COLOR_INFO::register_index; +const Register RB_COLOR_INFO::rt_register_indices[4] = { + XE_GPU_REG_RB_COLOR_INFO, + XE_GPU_REG_RB_COLOR1_INFO, + XE_GPU_REG_RB_COLOR2_INFO, + XE_GPU_REG_RB_COLOR3_INFO, +}; +constexpr Register RB_COLOR_MASK::register_index; +constexpr Register RB_BLENDCONTROL::register_index; +const Register RB_BLENDCONTROL::rt_register_indices[4] = { + XE_GPU_REG_RB_BLENDCONTROL0, + XE_GPU_REG_RB_BLENDCONTROL1, + XE_GPU_REG_RB_BLENDCONTROL2, + XE_GPU_REG_RB_BLENDCONTROL3, +}; +constexpr Register RB_DEPTHCONTROL::register_index; +constexpr Register RB_STENCILREFMASK::register_index; +constexpr Register RB_DEPTH_INFO::register_index; +constexpr Register RB_COPY_CONTROL::register_index; +constexpr Register RB_COPY_DEST_INFO::register_index; +constexpr Register RB_COPY_DEST_PITCH::register_index; } // namespace reg } // namespace gpu diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index a0fc9e279..5b6fdc54b 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -13,11 +13,12 @@ #include #include -#include "xenia/base/bit_field.h" #include "xenia/gpu/xenos.h" // Most registers can be found from: // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h +// Some registers were added on Adreno specifically and are not referenced in +// game .pdb files and never set by games. namespace xe { namespace gpu { @@ -38,46 +39,49 @@ namespace reg { *******************************************************************************/ union COHER_STATUS_HOST { - xe::bf matching_contexts; - xe::bf rb_copy_dest_base_ena; - xe::bf dest_base_0_ena; - xe::bf dest_base_1_ena; - xe::bf dest_base_2_ena; - xe::bf dest_base_3_ena; - xe::bf dest_base_4_ena; - xe::bf dest_base_5_ena; - xe::bf dest_base_6_ena; - xe::bf dest_base_7_ena; - - xe::bf vc_action_ena; - xe::bf tc_action_ena; - xe::bf pglb_action_ena; - - xe::bf status; - + struct { + uint32_t matching_contexts : 8; // +0 + uint32_t rb_copy_dest_base_ena : 1; // +8 + uint32_t dest_base_0_ena : 1; // +9 + uint32_t dest_base_1_ena : 1; // +10 + uint32_t dest_base_2_ena : 1; // +11 + uint32_t dest_base_3_ena : 1; // +12 + uint32_t dest_base_4_ena : 1; // +13 + uint32_t dest_base_5_ena : 1; // +14 + uint32_t dest_base_6_ena : 1; // +15 + uint32_t dest_base_7_ena : 1; // +16 + uint32_t : 7; // +17 + uint32_t vc_action_ena : 1; // +24 + uint32_t tc_action_ena : 1; // +25 + uint32_t pglb_action_ena : 1; // +26 + uint32_t : 4; // +27 + uint32_t status : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_COHER_STATUS_HOST; + static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST; }; union WAIT_UNTIL { - xe::bf wait_re_vsync; - xe::bf wait_fe_vsync; - xe::bf wait_vsync; - xe::bf wait_dsply_id0; - xe::bf wait_dsply_id1; - xe::bf wait_dsply_id2; - - xe::bf wait_cmdfifo; - - xe::bf wait_2d_idle; - xe::bf wait_3d_idle; - xe::bf wait_2d_idleclean; - xe::bf wait_3d_idleclean; - - xe::bf cmdfifo_entries; - + struct { + uint32_t : 1; // +0 + uint32_t wait_re_vsync : 1; // +1 + uint32_t wait_fe_vsync : 1; // +2 + uint32_t wait_vsync : 1; // +3 + uint32_t wait_dsply_id0 : 1; // +4 + uint32_t wait_dsply_id1 : 1; // +5 + uint32_t wait_dsply_id2 : 1; // +6 + uint32_t : 3; // +7 + uint32_t wait_cmdfifo : 1; // +10 + uint32_t : 3; // +11 + uint32_t wait_2d_idle : 1; // +14 + uint32_t wait_3d_idle : 1; // +15 + uint32_t wait_2d_idleclean : 1; // +16 + uint32_t wait_3d_idleclean : 1; // +17 + uint32_t : 2; // +18 + uint32_t cmdfifo_entries : 4; // +20 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_WAIT_UNTIL; + static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL; }; /******************************************************************************* @@ -89,35 +93,38 @@ union WAIT_UNTIL { *******************************************************************************/ union SQ_PROGRAM_CNTL { - // Note from a2xx.xml: - // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, but - // high bit is set to indicate "0 registers used". - xe::bf vs_num_reg; - xe::bf ps_num_reg; - xe::bf vs_resource; - xe::bf ps_resource; - xe::bf param_gen; - xe::bf gen_index_pix; - xe::bf vs_export_count; - xe::bf vs_export_mode; - xe::bf ps_export_mode; - xe::bf gen_index_vtx; - + struct { + // Note from a2xx.xml: + // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, + // but high bit is set to indicate "0 registers used". + uint32_t vs_num_reg : 8; // +0 + uint32_t ps_num_reg : 8; // +8 + uint32_t vs_resource : 1; // +16 + uint32_t ps_resource : 1; // +17 + uint32_t param_gen : 1; // +18 + uint32_t gen_index_pix : 1; // +19 + uint32_t vs_export_count : 4; // +20 + xenos::VertexShaderExportMode vs_export_mode : 3; // +24 + uint32_t ps_export_mode : 4; // +27 + uint32_t gen_index_vtx : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; + static constexpr Register register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; }; union SQ_CONTEXT_MISC { - xe::bf inst_pred_optimize; - xe::bf sc_output_screen_xy; - xe::bf sc_sample_cntl; - xe::bf param_gen_pos; - xe::bf perfcounter_ref; - xe::bf yeild_optimize; // sic - xe::bf tx_cache_sel; - + struct { + uint32_t inst_pred_optimize : 1; // +0 + uint32_t sc_output_screen_xy : 1; // +1 + xenos::SampleControl sc_sample_cntl : 2; // +2 + uint32_t : 4; // +4 + uint32_t param_gen_pos : 8; // +8 + uint32_t perfcounter_ref : 1; // +16 + uint32_t yeild_optimize : 1; // +17 sic + uint32_t tx_cache_sel : 1; // +18 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_SQ_CONTEXT_MISC; + static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC; }; /******************************************************************************* @@ -139,17 +146,19 @@ union SQ_CONTEXT_MISC { *******************************************************************************/ union VGT_OUTPUT_PATH_CNTL { - xe::bf path_select; - + struct { + xenos::VGTOutputPath path_select : 2; // +0 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; + static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; }; union VGT_HOS_CNTL { - xe::bf tess_mode; - + struct { + xenos::TessellationMode tess_mode : 2; // +0 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_VGT_HOS_CNTL; + static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL; }; /******************************************************************************* @@ -166,145 +175,162 @@ union VGT_HOS_CNTL { *******************************************************************************/ union PA_SU_POINT_MINMAX { - // Radius, 12.4 fixed point. - xe::bf min_size; - xe::bf max_size; - + struct { + // Radius, 12.4 fixed point. + uint32_t min_size : 16; // +0 + uint32_t max_size : 16; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; + static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; }; union PA_SU_POINT_SIZE { - // 1/2 width or height, 12.4 fixed point. - xe::bf height; - xe::bf width; - + struct { + // 1/2 width or height, 12.4 fixed point. + uint32_t height : 16; // +0 + uint32_t width : 16; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_SIZE; + static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_SIZE; }; // Setup Unit / Scanline Converter mode cntl union PA_SU_SC_MODE_CNTL { - xe::bf cull_front; - xe::bf cull_back; - xe::bf face; - xe::bf poly_mode; - xe::bf polymode_front_ptype; - xe::bf polymode_back_ptype; - xe::bf poly_offset_front_enable; - xe::bf poly_offset_back_enable; - xe::bf poly_offset_para_enable; - - xe::bf msaa_enable; - xe::bf vtx_window_offset_enable; - - xe::bf line_stipple_enable; - xe::bf provoking_vtx_last; - xe::bf persp_corr_dis; - xe::bf multi_prim_ib_ena; - - xe::bf quad_order_enable; - - xe::bf wait_rb_idle_all_tri; - xe::bf wait_rb_idle_first_tri_new_state; - + struct { + uint32_t cull_front : 1; // +0 + uint32_t cull_back : 1; // +1 + // 0 - front is CCW, 1 - front is CW. + uint32_t face : 1; // +2 + xenos::PolygonModeEnable poly_mode : 2; // +3 + xenos::PolygonType polymode_front_ptype : 3; // +5 + xenos::PolygonType polymode_back_ptype : 3; // +8 + uint32_t poly_offset_front_enable : 1; // +11 + uint32_t poly_offset_back_enable : 1; // +12 + uint32_t poly_offset_para_enable : 1; // +13 + uint32_t : 1; // +14 + uint32_t msaa_enable : 1; // +15 + uint32_t vtx_window_offset_enable : 1; // +16 + // LINE_STIPPLE_ENABLE was added on Adreno. + uint32_t : 2; // +17 + uint32_t provoking_vtx_last : 1; // +19 + uint32_t persp_corr_dis : 1; // +20 + uint32_t multi_prim_ib_ena : 1; // +21 + uint32_t : 1; // +22 + uint32_t quad_order_enable : 1; // +23 + // WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on + // Adreno. + // TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset. + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; }; // Setup Unit Vertex Control union PA_SU_VTX_CNTL { - xe::bf pix_center; // 1 = half pixel offset - xe::bf round_mode; - xe::bf quant_mode; - + struct { + uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL). + uint32_t round_mode : 2; // +1 + uint32_t quant_mode : 3; // +3 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_VTX_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL; }; union PA_SC_MPASS_PS_CNTL { - xe::bf mpass_pix_vec_per_pass; - xe::bf mpass_ps_ena; - + struct { + uint32_t mpass_pix_vec_per_pass : 20; // +0 + uint32_t : 11; // +20 + uint32_t mpass_ps_ena : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; }; // Scanline converter viz query union PA_SC_VIZ_QUERY { - xe::bf viz_query_ena; - xe::bf viz_query_id; - xe::bf kill_pix_post_early_z; - + struct { + uint32_t viz_query_ena : 1; // +0 + uint32_t viz_query_id : 6; // +1 + uint32_t kill_pix_post_early_z : 1; // +7 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; + static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; }; // Clipper clip control union PA_CL_CLIP_CNTL { - xe::bf ucp_ena_0; - xe::bf ucp_ena_1; - xe::bf ucp_ena_2; - xe::bf ucp_ena_3; - xe::bf ucp_ena_4; - xe::bf ucp_ena_5; - - xe::bf ps_ucp_mode; - xe::bf clip_disable; - xe::bf ucp_cull_only_ena; - xe::bf boundary_edge_flag_ena; - xe::bf dx_clip_space_def; - xe::bf dis_clip_err_detect; - xe::bf vtx_kill_or; - xe::bf xy_nan_retain; - xe::bf z_nan_retain; - xe::bf w_nan_retain; - + struct { + uint32_t ucp_ena_0 : 1; // +0 + uint32_t ucp_ena_1 : 1; // +1 + uint32_t ucp_ena_2 : 1; // +2 + uint32_t ucp_ena_3 : 1; // +3 + uint32_t ucp_ena_4 : 1; // +4 + uint32_t ucp_ena_5 : 1; // +5 + uint32_t : 8; // +6 + uint32_t ps_ucp_mode : 2; // +14 + uint32_t clip_disable : 1; // +16 + uint32_t ucp_cull_only_ena : 1; // +17 + uint32_t boundary_edge_flag_ena : 1; // +18 + uint32_t dx_clip_space_def : 1; // +19 + uint32_t dis_clip_err_detect : 1; // +20 + uint32_t vtx_kill_or : 1; // +21 + uint32_t xy_nan_retain : 1; // +22 + uint32_t z_nan_retain : 1; // +23 + uint32_t w_nan_retain : 1; // +24 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; }; // Viewport transform engine control union PA_CL_VTE_CNTL { - xe::bf vport_x_scale_ena; - xe::bf vport_x_offset_ena; - xe::bf vport_y_scale_ena; - xe::bf vport_y_offset_ena; - xe::bf vport_z_scale_ena; - xe::bf vport_z_offset_ena; - - xe::bf vtx_xy_fmt; - xe::bf vtx_z_fmt; - xe::bf vtx_w0_fmt; - xe::bf perfcounter_ref; - + struct { + uint32_t vport_x_scale_ena : 1; // +0 + uint32_t vport_x_offset_ena : 1; // +1 + uint32_t vport_y_scale_ena : 1; // +2 + uint32_t vport_y_offset_ena : 1; // +3 + uint32_t vport_z_scale_ena : 1; // +4 + uint32_t vport_z_offset_ena : 1; // +5 + uint32_t : 2; // +6 + uint32_t vtx_xy_fmt : 1; // +8 + uint32_t vtx_z_fmt : 1; // +9 + uint32_t vtx_w0_fmt : 1; // +10 + uint32_t perfcounter_ref : 1; // +11 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_VTE_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; union PA_SC_WINDOW_OFFSET { - xe::bf window_x_offset; - xe::bf window_y_offset; - + struct { + int32_t window_x_offset : 15; // +0 + uint32_t : 1; // +15 + int32_t window_y_offset : 15; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; + static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; }; union PA_SC_WINDOW_SCISSOR_TL { - xe::bf tl_x; - xe::bf tl_y; - xe::bf window_offset_disable; - + struct { + uint32_t tl_x : 14; // +0 + uint32_t : 2; // +14 + uint32_t tl_y : 14; // +16 + uint32_t : 1; // +30 + uint32_t window_offset_disable : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; + static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; }; union PA_SC_WINDOW_SCISSOR_BR { - xe::bf br_x; - xe::bf br_y; - + struct { + uint32_t br_x : 14; // +0 + uint32_t : 2; // +14 + uint32_t br_y : 14; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; + static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; }; /******************************************************************************* @@ -316,136 +342,174 @@ union PA_SC_WINDOW_SCISSOR_BR { *******************************************************************************/ union RB_MODECONTROL { - xe::bf edram_mode; - + struct { + xenos::ModeControl edram_mode : 3; // +0 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_MODECONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL; }; union RB_SURFACE_INFO { - xe::bf surface_pitch; - xe::bf msaa_samples; - xe::bf hiz_pitch; - + struct { + uint32_t surface_pitch : 14; // +0 + uint32_t : 2; // +14 + MsaaSamples msaa_samples : 2; // +16 + uint32_t hiz_pitch : 14; // +18 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_SURFACE_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_SURFACE_INFO; }; union RB_COLORCONTROL { - xe::bf alpha_func; - xe::bf alpha_test_enable; - xe::bf alpha_to_mask_enable; - // Everything in between was added on Adreno, not in game PDBs and never set. - xe::bf alpha_to_mask_offset0; - xe::bf alpha_to_mask_offset1; - xe::bf alpha_to_mask_offset2; - xe::bf alpha_to_mask_offset3; - + struct { + CompareFunction alpha_func : 3; // +0 + uint32_t alpha_test_enable : 1; // +3 + uint32_t alpha_to_mask_enable : 1; // +4 + // Everything in between was added on Adreno. + uint32_t : 19; // +5 + uint32_t alpha_to_mask_offset0 : 2; // +24 + uint32_t alpha_to_mask_offset1 : 2; // +26 + uint32_t alpha_to_mask_offset2 : 2; // +28 + uint32_t alpha_to_mask_offset3 : 2; // +30 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COLORCONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_COLORCONTROL; }; union RB_COLOR_INFO { - xe::bf color_base; - xe::bf color_format; - xe::bf color_exp_bias; - + struct { + uint32_t color_base : 12; // +0 + uint32_t : 4; // +12 + ColorRenderTargetFormat color_format : 4; // +16 + int32_t color_exp_bias : 6; // +20 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_COLOR_INFO; // RB_COLOR[1-3]_INFO also use this format. + static const Register rt_register_indices[4]; }; union RB_COLOR_MASK { - xe::bf write_red0; - xe::bf write_green0; - xe::bf write_blue0; - xe::bf write_alpha0; - xe::bf write_red1; - xe::bf write_green1; - xe::bf write_blue1; - xe::bf write_alpha1; - xe::bf write_red2; - xe::bf write_green2; - xe::bf write_blue2; - xe::bf write_alpha2; - xe::bf write_red3; - xe::bf write_green3; - xe::bf write_blue3; - xe::bf write_alpha3; - + struct { + uint32_t write_red0 : 1; // +0 + uint32_t write_green0 : 1; // +1 + uint32_t write_blue0 : 1; // +2 + uint32_t write_alpha0 : 1; // +3 + uint32_t write_red1 : 1; // +4 + uint32_t write_green1 : 1; // +5 + uint32_t write_blue1 : 1; // +6 + uint32_t write_alpha1 : 1; // +7 + uint32_t write_red2 : 1; // +8 + uint32_t write_green2 : 1; // +9 + uint32_t write_blue2 : 1; // +10 + uint32_t write_alpha2 : 1; // +11 + uint32_t write_red3 : 1; // +12 + uint32_t write_green3 : 1; // +13 + uint32_t write_blue3 : 1; // +14 + uint32_t write_alpha3 : 1; // +15 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_MASK; + static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK; +}; + +union RB_BLENDCONTROL { + struct { + BlendFactor color_srcblend : 5; // +0 + BlendOp color_comb_fcn : 3; // +5 + BlendFactor color_destblend : 5; // +8 + uint32_t : 3; // +13 + BlendFactor alpha_srcblend : 5; // +16 + BlendOp alpha_comb_fcn : 3; // +21 + BlendFactor alpha_destblend : 5; // +24 + // BLEND_FORCE_ENABLE and BLEND_FORCE were added on Adreno. + }; + uint32_t value; + // RB_BLENDCONTROL[0-3] use this format. + static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0; + static const Register rt_register_indices[4]; }; union RB_DEPTHCONTROL { - xe::bf stencil_enable; - xe::bf z_enable; - xe::bf z_write_enable; - // EARLY_Z_ENABLE was added on Adreno. - xe::bf zfunc; - xe::bf backface_enable; - xe::bf stencilfunc; - xe::bf stencilfail; - xe::bf stencilzpass; - xe::bf stencilzfail; - xe::bf stencilfunc_bf; - xe::bf stencilfail_bf; - xe::bf stencilzpass_bf; - xe::bf stencilzfail_bf; - + struct { + uint32_t stencil_enable : 1; // +0 + uint32_t z_enable : 1; // +1 + uint32_t z_write_enable : 1; // +2 + // EARLY_Z_ENABLE was added on Adreno. + uint32_t : 1; // +3 + CompareFunction zfunc : 3; // +4 + uint32_t backface_enable : 1; // +7 + CompareFunction stencilfunc : 3; // +8 + StencilOp stencilfail : 3; // +11 + StencilOp stencilzpass : 3; // +14 + StencilOp stencilzfail : 3; // +17 + CompareFunction stencilfunc_bf : 3; // +20 + StencilOp stencilfail_bf : 3; // +23 + StencilOp stencilzpass_bf : 3; // +26 + StencilOp stencilzfail_bf : 3; // +29 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTHCONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_DEPTHCONTROL; }; union RB_STENCILREFMASK { - xe::bf stencilref; - xe::bf stencilmask; - xe::bf stencilwritemask; - + struct { + uint32_t stencilref : 8; // +0 + uint32_t stencilmask : 8; // +8 + uint32_t stencilwritemask : 8; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_STENCILREFMASK; + static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK; // RB_STENCILREFMASK_BF also uses this format. }; union RB_DEPTH_INFO { - xe::bf depth_base; - xe::bf depth_format; - + struct { + uint32_t depth_base : 12; // +0 + uint32_t : 4; // +12 + DepthRenderTargetFormat depth_format : 1; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTH_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO; }; union RB_COPY_CONTROL { - xe::bf copy_src_select; - xe::bf copy_sample_select; - xe::bf color_clear_enable; - xe::bf depth_clear_enable; - - xe::bf copy_command; - + struct { + uint32_t copy_src_select : 3; // +0 Depth is 4. + uint32_t : 1; // +3 + xenos::CopySampleSelect copy_sample_select : 3; // +4 + uint32_t : 1; // +7 + uint32_t color_clear_enable : 1; // +8 + uint32_t depth_clear_enable : 1; // +9 + uint32_t : 10; // +10 + xenos::CopyCommand copy_command : 2; // +20 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_CONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL; }; union RB_COPY_DEST_INFO { - xe::bf copy_dest_endian; - xe::bf copy_dest_array; - xe::bf copy_dest_slice; - xe::bf copy_dest_format; - xe::bf copy_dest_number; - xe::bf copy_dest_exp_bias; - xe::bf copy_dest_swap; - + struct { + Endian128 copy_dest_endian : 3; // +0 + uint32_t copy_dest_array : 1; // +3 + uint32_t copy_dest_slice : 3; // +4 + ColorFormat copy_dest_format : 6; // +7 + uint32_t copy_dest_number : 3; // +13 + int32_t copy_dest_exp_bias : 6; // +16 + uint32_t : 2; // +22 + uint32_t copy_dest_swap : 1; // +24 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO; }; union RB_COPY_DEST_PITCH { - xe::bf copy_dest_pitch; - xe::bf copy_dest_height; - + struct { + uint32_t copy_dest_pitch : 14; // +0 + uint32_t : 2; // +14 + uint32_t copy_dest_height : 14; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; + static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; }; } // namespace reg diff --git a/src/xenia/gpu/sampler_info.cc b/src/xenia/gpu/sampler_info.cc index c6fcf0985..9881fe74d 100644 --- a/src/xenia/gpu/sampler_info.cc +++ b/src/xenia/gpu/sampler_info.cc @@ -24,25 +24,25 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, out_info->min_filter = fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst - ? static_cast(fetch.min_filter) + ? fetch.min_filter : fetch_instr.attributes.min_filter; out_info->mag_filter = fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst - ? static_cast(fetch.mag_filter) + ? fetch.mag_filter : fetch_instr.attributes.mag_filter; out_info->mip_filter = fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst - ? static_cast(fetch.mip_filter) + ? fetch.mip_filter : fetch_instr.attributes.mip_filter; - out_info->clamp_u = static_cast(fetch.clamp_x); - out_info->clamp_v = static_cast(fetch.clamp_y); - out_info->clamp_w = static_cast(fetch.clamp_z); + out_info->clamp_u = fetch.clamp_x; + out_info->clamp_v = fetch.clamp_y; + out_info->clamp_w = fetch.clamp_z; out_info->aniso_filter = fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst - ? static_cast(fetch.aniso_filter) + ? fetch.aniso_filter : fetch_instr.attributes.aniso_filter; - out_info->border_color = static_cast(fetch.border_color); + out_info->border_color = fetch.border_color; out_info->lod_bias = (fetch.lod_bias) / 32.f; out_info->mip_min_level = fetch.mip_min_level; out_info->mip_max_level = fetch.mip_max_level; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 940db871b..0a10ef5a2 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -110,9 +110,8 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, reg::SQ_PROGRAM_CNTL cntl) { Reset(); - uint32_t cntl_num_reg = shader->type() == ShaderType::kVertex - ? cntl.vs_num_reg.value() - : cntl.ps_num_reg.value(); + uint32_t cntl_num_reg = + shader->type() == ShaderType::kVertex ? cntl.vs_num_reg : cntl.ps_num_reg; register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1); return TranslateInternal(shader, patch_type); diff --git a/src/xenia/gpu/texture_conversion.cc b/src/xenia/gpu/texture_conversion.cc index 6137dc906..fbbf5b148 100644 --- a/src/xenia/gpu/texture_conversion.cc +++ b/src/xenia/gpu/texture_conversion.cc @@ -40,7 +40,7 @@ void CopySwapBlock(Endian endian, void* output, const void* input, xe::copy_and_swap_16_in_32_unaligned(output, input, length); break; default: - case Endian::kUnspecified: + case Endian::kNone: std::memcpy(output, input, length); break; } diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc index 85e88e45c..6fe0a4daa 100644 --- a/src/xenia/gpu/texture_info.cc +++ b/src/xenia/gpu/texture_info.cc @@ -33,8 +33,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, auto& info = *out_info; - info.format = static_cast(fetch.format); - info.endianness = static_cast(fetch.endianness); + info.format = fetch.format; + info.endianness = fetch.endianness; info.dimension = static_cast(fetch.dimension); info.width = info.height = info.depth = 0; diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h index c50fd0a8d..08d96a532 100644 --- a/src/xenia/gpu/texture_info.h +++ b/src/xenia/gpu/texture_info.h @@ -19,77 +19,6 @@ namespace xe { namespace gpu { -// a2xx_sq_surfaceformat + -// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas -enum class TextureFormat : uint32_t { - k_1_REVERSE = 0, - k_1 = 1, - k_8 = 2, - k_1_5_5_5 = 3, - k_5_6_5 = 4, - k_6_5_5 = 5, - k_8_8_8_8 = 6, - k_2_10_10_10 = 7, - k_8_A = 8, - k_8_B = 9, - k_8_8 = 10, - k_Cr_Y1_Cb_Y0_REP = 11, - k_Y1_Cr_Y0_Cb_REP = 12, - k_16_16_EDRAM = 13, - k_8_8_8_8_A = 14, - k_4_4_4_4 = 15, - k_10_11_11 = 16, - k_11_11_10 = 17, - k_DXT1 = 18, - k_DXT2_3 = 19, - k_DXT4_5 = 20, - k_16_16_16_16_EDRAM = 21, - k_24_8 = 22, - k_24_8_FLOAT = 23, - k_16 = 24, - k_16_16 = 25, - k_16_16_16_16 = 26, - k_16_EXPAND = 27, - k_16_16_EXPAND = 28, - k_16_16_16_16_EXPAND = 29, - k_16_FLOAT = 30, - k_16_16_FLOAT = 31, - k_16_16_16_16_FLOAT = 32, - k_32 = 33, - k_32_32 = 34, - k_32_32_32_32 = 35, - k_32_FLOAT = 36, - k_32_32_FLOAT = 37, - k_32_32_32_32_FLOAT = 38, - k_32_AS_8 = 39, - k_32_AS_8_8 = 40, - k_16_MPEG = 41, - k_16_16_MPEG = 42, - k_8_INTERLACED = 43, - k_32_AS_8_INTERLACED = 44, - k_32_AS_8_8_INTERLACED = 45, - k_16_INTERLACED = 46, - k_16_MPEG_INTERLACED = 47, - k_16_16_MPEG_INTERLACED = 48, - k_DXN = 49, - k_8_8_8_8_AS_16_16_16_16 = 50, - k_DXT1_AS_16_16_16_16 = 51, - k_DXT2_3_AS_16_16_16_16 = 52, - k_DXT4_5_AS_16_16_16_16 = 53, - k_2_10_10_10_AS_16_16_16_16 = 54, - k_10_11_11_AS_16_16_16_16 = 55, - k_11_11_10_AS_16_16_16_16 = 56, - k_32_32_32_FLOAT = 57, - k_DXT3A = 58, - k_DXT5A = 59, - k_CTX1 = 60, - k_DXT3A_AS_1_1_1_1 = 61, - k_8_8_8_8_GAMMA_EDRAM = 62, - k_2_10_10_10_FLOAT_EDRAM = 63, - - kUnknown = 0xFFFFFFFFu, -}; - inline TextureFormat GetBaseFormat(TextureFormat texture_format) { // These formats are used for resampling textures / gamma control. switch (texture_format) { diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 4a634be52..d1fed5b72 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -824,7 +824,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader, #define LOADEL(type, wo) \ GpuSwap(xe::load(vstart + \ (attrib.fetch_instr.attributes.offset + wo) * 4), \ - Endian(fetch->endian)) + fetch->endian) switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_32: ImGui::Text("%.8X", LOADEL(uint32_t, 0)); @@ -1334,10 +1334,10 @@ void TraceViewer::DrawStateUI() { regs[XE_GPU_REG_RB_COLOR3_INFO].u32, }; uint32_t rb_blendcontrol[4] = { - regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL0].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL1].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL2].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL3].u32, }; ImGui::Columns(2); for (int i = 0; i < xe::countof(color_info); ++i) { @@ -1713,7 +1713,7 @@ void TraceViewer::DrawStateUI() { fetch = &group->vertex_fetch_2; break; } - assert_true(fetch->endian == 2); + assert_true(fetch->endian == Endian::k8in32); char tree_root_id[32]; sprintf(tree_root_id, "#vertices_root_%d", vertex_binding.fetch_constant); diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index 79cb07fab..72b8734d5 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -146,12 +146,8 @@ enum class AllocType : uint32_t { // Instruction data for ControlFlowOpcode::kExec and kExecEnd. struct ControlFlowExecInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Address of the instructions to execute. uint32_t address() const { return address_; } // Number of instructions being executed. @@ -176,19 +172,15 @@ struct ControlFlowExecInstruction { uint32_t : 7; uint32_t clean_ : 1; uint32_t : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowExecInstruction, 8); // Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd. struct ControlFlowCondExecInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Address of the instructions to execute. uint32_t address() const { return address_; } // Number of instructions being executed. @@ -214,20 +206,16 @@ struct ControlFlowCondExecInstruction { uint32_t vc_lo_ : 2; uint32_t bool_address_ : 8; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondExecInstruction, 8); // Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd, // kCondExecPredClean, kCondExecPredCleanEnd. struct ControlFlowCondExecPredInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Address of the instructions to execute. uint32_t address() const { return address_; } // Number of instructions being executed. @@ -254,19 +242,15 @@ struct ControlFlowCondExecPredInstruction { uint32_t : 7; uint32_t clean_ : 1; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondExecPredInstruction, 8); // Instruction data for ControlFlowOpcode::kLoopStart. struct ControlFlowLoopStartInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address to jump to when skipping the loop. uint32_t address() const { return address_; } // Whether to reuse the current aL instead of reset it to loop start. @@ -285,19 +269,15 @@ struct ControlFlowLoopStartInstruction { // Word 1: (16 bits) uint32_t : 11; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowLoopStartInstruction, 8); // Instruction data for ControlFlowOpcode::kLoopEnd. struct ControlFlowLoopEndInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address of the start of the loop body. uint32_t address() const { return address_; } // Integer constant register that holds the loop parameters. @@ -319,19 +299,15 @@ struct ControlFlowLoopEndInstruction { // Word 1: (16 bits) uint32_t : 10; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowLoopEndInstruction, 8); // Instruction data for ControlFlowOpcode::kCondCall. struct ControlFlowCondCallInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address. uint32_t address() const { return address_; } // Unconditional call - ignores condition/predication. @@ -354,19 +330,15 @@ struct ControlFlowCondCallInstruction { uint32_t : 2; uint32_t bool_address_ : 8; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondCallInstruction, 8); // Instruction data for ControlFlowOpcode::kReturn. struct ControlFlowReturnInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } private: // Word 0: (32 bits) @@ -381,12 +353,8 @@ static_assert_size(ControlFlowReturnInstruction, 8); // Instruction data for ControlFlowOpcode::kCondJmp. struct ControlFlowCondJmpInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address. uint32_t address() const { return address_; } // Unconditional jump - ignores condition/predication. @@ -410,20 +378,18 @@ struct ControlFlowCondJmpInstruction { uint32_t direction_ : 1; uint32_t bool_address_ : 8; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondJmpInstruction, 8); // Instruction data for ControlFlowOpcode::kAlloc. struct ControlFlowAllocInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } + ControlFlowOpcode opcode() const { return opcode_; } // The total number of the given type allocated by this instruction. uint32_t size() const { return size_; } // Unconditional jump - ignores condition/predication. - AllocType alloc_type() const { return static_cast(alloc_type_); } + AllocType alloc_type() const { return alloc_type_; } private: // Word 0: (32 bits) @@ -433,16 +399,14 @@ struct ControlFlowAllocInstruction { // Word 1: (16 bits) uint32_t : 8; uint32_t is_unserialized_ : 1; - uint32_t alloc_type_ : 2; + AllocType alloc_type_ : 2; uint32_t : 1; - uint32_t opcode_ : 4; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowAllocInstruction, 8); XEPACKEDUNION(ControlFlowInstruction, { - ControlFlowOpcode opcode() const { - return static_cast(opcode_value); - } + ControlFlowOpcode opcode() const { return opcode_value; } ControlFlowExecInstruction exec; // kExec* ControlFlowCondExecInstruction cond_exec; // kCondExec* @@ -457,7 +421,7 @@ XEPACKEDUNION(ControlFlowInstruction, { XEPACKEDSTRUCTANONYMOUS({ uint32_t unused_0 : 32; uint32_t unused_1 : 12; - uint32_t opcode_value : 4; + ControlFlowOpcode opcode_value : 4; }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; @@ -478,7 +442,7 @@ inline void UnpackControlFlowInstructions(const uint32_t* dwords, out_b->dword_1 = dword_2 >> 16; } -enum class FetchOpcode { +enum class FetchOpcode : uint32_t { kVertexFetch = 0, kTextureFetch = 1, kGetTextureBorderColorFrac = 16, @@ -492,9 +456,7 @@ enum class FetchOpcode { }; struct VertexFetchInstruction { - FetchOpcode opcode() const { - return static_cast(data_.opcode_value); - } + FetchOpcode opcode() const { return data_.opcode_value; } // Whether the jump is predicated (or conditional). bool is_predicated() const { return data_.is_predicated; } @@ -538,13 +500,9 @@ struct VertexFetchInstruction { uint32_t prefetch_count() const { return data_.prefetch_count; } bool is_mini_fetch() const { return data_.is_mini_fetch == 1; } - VertexFormat data_format() const { - return static_cast(data_.format); - } + VertexFormat data_format() const { return data_.format; } // [-32, 31] - int exp_adjust() const { - return ((static_cast(data_.exp_adjust) << 26) >> 26); - } + int exp_adjust() const { return data_.exp_adjust; } bool is_signed() const { return data_.fomat_comp_all == 1; } bool is_normalized() const { return data_.num_format_all == 0; } bool is_index_rounded() const { return data_.is_index_rounded == 1; } @@ -562,7 +520,7 @@ struct VertexFetchInstruction { private: XEPACKEDSTRUCT(Data, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t opcode_value : 5; + FetchOpcode opcode_value : 5; uint32_t src_reg : 6; uint32_t src_reg_am : 1; uint32_t dst_reg : 6; @@ -579,9 +537,9 @@ struct VertexFetchInstruction { uint32_t num_format_all : 1; uint32_t signed_rf_mode_all : 1; uint32_t is_index_rounded : 1; - uint32_t format : 6; + VertexFormat format : 6; uint32_t reserved2 : 2; - uint32_t exp_adjust : 6; + int32_t exp_adjust : 6; uint32_t is_mini_fetch : 1; uint32_t is_predicated : 1; }); @@ -595,9 +553,7 @@ struct VertexFetchInstruction { }; struct TextureFetchInstruction { - FetchOpcode opcode() const { - return static_cast(data_.opcode_value); - } + FetchOpcode opcode() const { return data_.opcode_value; } // Whether the jump is predicated (or conditional). bool is_predicated() const { return data_.is_predicated; } @@ -613,59 +569,49 @@ struct TextureFetchInstruction { uint32_t src_swizzle() const { return data_.src_swiz; } bool is_src_relative() const { return data_.src_reg_am; } - TextureDimension dimension() const { - return static_cast(data_.dimension); - } + TextureDimension dimension() const { return data_.dimension; } bool fetch_valid_only() const { return data_.fetch_valid_only == 1; } bool unnormalized_coordinates() const { return data_.tx_coord_denorm == 1; } - bool has_mag_filter() const { return data_.mag_filter != 0x3; } - TextureFilter mag_filter() const { - return static_cast(data_.mag_filter); + bool has_mag_filter() const { + return data_.mag_filter != TextureFilter::kUseFetchConst; } - bool has_min_filter() const { return data_.min_filter != 0x3; } - TextureFilter min_filter() const { - return static_cast(data_.min_filter); + TextureFilter mag_filter() const { return data_.mag_filter; } + bool has_min_filter() const { + return data_.min_filter != TextureFilter::kUseFetchConst; } - bool has_mip_filter() const { return data_.mip_filter != 0x3; } - TextureFilter mip_filter() const { - return static_cast(data_.mip_filter); + TextureFilter min_filter() const { return data_.min_filter; } + bool has_mip_filter() const { + return data_.mip_filter != TextureFilter::kUseFetchConst; } - bool has_aniso_filter() const { return data_.aniso_filter != 0x7; } - AnisoFilter aniso_filter() const { - return static_cast(data_.aniso_filter); + TextureFilter mip_filter() const { return data_.mip_filter; } + bool has_aniso_filter() const { + return data_.aniso_filter != AnisoFilter::kUseFetchConst; } - bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; } - TextureFilter vol_mag_filter() const { - return static_cast(data_.vol_mag_filter); + AnisoFilter aniso_filter() const { return data_.aniso_filter; } + bool has_vol_mag_filter() const { + return data_.vol_mag_filter != TextureFilter::kUseFetchConst; } - bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; } - TextureFilter vol_min_filter() const { - return static_cast(data_.vol_min_filter); + TextureFilter vol_mag_filter() const { return data_.vol_mag_filter; } + bool has_vol_min_filter() const { + return data_.vol_min_filter != TextureFilter::kUseFetchConst; } + TextureFilter vol_min_filter() const { return data_.vol_min_filter; } bool use_computed_lod() const { return data_.use_comp_lod == 1; } bool use_register_lod() const { return data_.use_reg_lod == 1; } bool use_register_gradients() const { return data_.use_reg_gradients == 1; } - SampleLocation sample_location() const { - return static_cast(data_.sample_location); - } + SampleLocation sample_location() const { return data_.sample_location; } float lod_bias() const { // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx - return ((static_cast(data_.lod_bias) << 25) >> 25) / 16.0f; - } - float offset_x() const { - return ((static_cast(data_.offset_x) << 27) >> 27) / 2.0f; - } - float offset_y() const { - return ((static_cast(data_.offset_y) << 27) >> 27) / 2.0f; - } - float offset_z() const { - return ((static_cast(data_.offset_z) << 27) >> 27) / 2.0f; + return data_.lod_bias * (1.0f / 16.0f); } + float offset_x() const { return data_.offset_x * 0.5f; } + float offset_y() const { return data_.offset_y * 0.5f; } + float offset_z() const { return data_.offset_z * 0.5f; } private: XEPACKEDSTRUCT(Data, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t opcode_value : 5; + FetchOpcode opcode_value : 5; uint32_t src_reg : 6; uint32_t src_reg_am : 1; uint32_t dst_reg : 6; @@ -676,14 +622,14 @@ struct TextureFetchInstruction { uint32_t src_swiz : 6; // xyz }); XEPACKEDSTRUCTANONYMOUS({ - uint32_t dst_swiz : 12; // xyzw - uint32_t mag_filter : 2; // instr_tex_filter_t - uint32_t min_filter : 2; // instr_tex_filter_t - uint32_t mip_filter : 2; // instr_tex_filter_t - uint32_t aniso_filter : 3; // instr_aniso_filter_t - uint32_t arbitrary_filter : 3; // instr_arbitrary_filter_t - uint32_t vol_mag_filter : 2; // instr_tex_filter_t - uint32_t vol_min_filter : 2; // instr_tex_filter_t + uint32_t dst_swiz : 12; // xyzw + TextureFilter mag_filter : 2; + TextureFilter min_filter : 2; + TextureFilter mip_filter : 2; + AnisoFilter aniso_filter : 3; + xenos::ArbitraryFilter arbitrary_filter : 3; + TextureFilter vol_mag_filter : 2; + TextureFilter vol_min_filter : 2; uint32_t use_comp_lod : 1; uint32_t use_reg_lod : 1; uint32_t unk : 1; @@ -691,13 +637,13 @@ struct TextureFetchInstruction { }); XEPACKEDSTRUCTANONYMOUS({ uint32_t use_reg_gradients : 1; - uint32_t sample_location : 1; - uint32_t lod_bias : 7; + SampleLocation sample_location : 1; + int32_t lod_bias : 7; uint32_t unused : 5; - uint32_t dimension : 2; - uint32_t offset_x : 5; - uint32_t offset_y : 5; - uint32_t offset_z : 5; + TextureDimension dimension : 2; + int32_t offset_x : 5; + int32_t offset_y : 5; + int32_t offset_z : 5; uint32_t pred_condition : 1; }); }); @@ -722,7 +668,7 @@ static_assert_size(TextureFetchInstruction, 12); // when write masks are disabled or the instruction that would write them // fails its predication check. -enum class AluScalarOpcode { +enum class AluScalarOpcode : uint32_t { // Floating-Point Add // adds dest, src0.ab // dest.xyzw = src0.a + src0.b; @@ -1049,7 +995,7 @@ enum class AluScalarOpcode { kRetainPrev = 50, }; -enum class AluVectorOpcode { +enum class AluVectorOpcode : uint32_t { // Per-Component Floating-Point Add // add dest, src0, src1 // dest.x = src0.x + src1.x; @@ -1373,9 +1319,7 @@ struct AluInstruction { return vector_write_mask() || is_export() || AluVectorOpcodeHasSideEffects(vector_opcode()); } - AluVectorOpcode vector_opcode() const { - return static_cast(data_.vector_opc); - } + AluVectorOpcode vector_opcode() const { return data_.vector_opc; } uint32_t vector_write_mask() const { return data_.vector_write_mask; } uint32_t vector_dest() const { return data_.vector_dest; } bool is_vector_dest_relative() const { return data_.vector_dest_rel == 1; } @@ -1385,9 +1329,7 @@ struct AluInstruction { return scalar_opcode() != AluScalarOpcode::kRetainPrev || (!is_export() && scalar_write_mask() != 0); } - AluScalarOpcode scalar_opcode() const { - return static_cast(data_.scalar_opc); - } + AluScalarOpcode scalar_opcode() const { return data_.scalar_opc; } uint32_t scalar_write_mask() const { return data_.scalar_write_mask; } uint32_t scalar_dest() const { return data_.scalar_dest; } bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; } @@ -1459,7 +1401,7 @@ struct AluInstruction { uint32_t scalar_write_mask : 4; uint32_t vector_clamp : 1; uint32_t scalar_clamp : 1; - uint32_t scalar_opc : 6; // instr_scalar_opc_t + AluScalarOpcode scalar_opc : 6; }); XEPACKEDSTRUCTANONYMOUS({ uint32_t src3_swiz : 8; @@ -1478,7 +1420,7 @@ struct AluInstruction { uint32_t src3_reg : 8; uint32_t src2_reg : 8; uint32_t src1_reg : 8; - uint32_t vector_opc : 5; // instr_vector_opc_t + AluVectorOpcode vector_opc : 5; uint32_t src3_sel : 1; uint32_t src2_sel : 1; uint32_t src1_sel : 1; diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index cd1fb7ba8..ae30a5dd8 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -654,9 +654,8 @@ VkDescriptorSet BufferCache::PrepareVertexSet( // trace_writer_.WriteMemoryRead(physical_address, source_length); // Upload (or get a cached copy of) the buffer. - auto buffer_ref = - UploadVertexBuffer(command_buffer, physical_address, source_length, - static_cast(fetch->endian), fence); + auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address, + source_length, fetch->endian, fence); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. XELOGW("Failed to upload vertex buffer!"); diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 4258061f1..24ff9cbe6 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -815,13 +815,13 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, push_constants_dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); push_constants_dirty |= - SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO); push_constants_dirty |= - SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO); push_constants_dirty |= - SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO); push_constants_dirty |= - SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO); push_constants_dirty |= SetShadowRegister(®s.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF); push_constants_dirty |= @@ -1503,13 +1503,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { bool dirty = false; dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= - SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); + SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL0); dirty |= - SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); + SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL1); dirty |= - SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); + SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL2); dirty |= - SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3); dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 80035d25f..0bbd03503 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -292,10 +292,10 @@ class PipelineCache { reg::SQ_PROGRAM_CNTL sq_program_cntl; uint32_t sq_context_misc; uint32_t rb_colorcontrol; - uint32_t rb_color_info; - uint32_t rb_color1_info; - uint32_t rb_color2_info; - uint32_t rb_color3_info; + reg::RB_COLOR_INFO rb_color_info; + reg::RB_COLOR_INFO rb_color1_info; + reg::RB_COLOR_INFO rb_color2_info; + reg::RB_COLOR_INFO rb_color3_info; float rb_alpha_ref; uint32_t pa_su_point_size; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9b54dfe3e..50a3094a4 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -962,7 +962,7 @@ bool VulkanCommandProcessor::IssueCopy() { break; } assert_true(fetch->type == 3); - assert_true(fetch->endian == 2); + assert_true(fetch->endian == Endian::k8in32); assert_true(fetch->size == 6); const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2); trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4); @@ -974,7 +974,7 @@ bool VulkanCommandProcessor::IssueCopy() { float dest_points[6]; for (int i = 0; i < 6; i++) { dest_points[i] = - GpuSwap(xe::load(vertex_addr + i * 4), Endian(fetch->endian)) + + GpuSwap(xe::load(vertex_addr + i * 4), fetch->endian) + vtx_offset; } @@ -1000,10 +1000,10 @@ bool VulkanCommandProcessor::IssueCopy() { if (is_color_source) { // Source from a color target. reg::RB_COLOR_INFO color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, - regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, - regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + regs.Get(), + regs.Get(XE_GPU_REG_RB_COLOR1_INFO), + regs.Get(XE_GPU_REG_RB_COLOR2_INFO), + regs.Get(XE_GPU_REG_RB_COLOR3_INFO), }; color_edram_base = color_info[copy_src_select].color_base; color_format = color_info[copy_src_select].color_format; @@ -1023,7 +1023,7 @@ bool VulkanCommandProcessor::IssueCopy() { Endian resolve_endian = Endian::k8in32; if (copy_regs->copy_dest_info.copy_dest_endian <= Endian128::k16in32) { resolve_endian = - static_cast(copy_regs->copy_dest_info.copy_dest_endian.value()); + static_cast(copy_regs->copy_dest_info.copy_dest_endian); } // Demand a resolve texture from the texture cache. @@ -1289,7 +1289,7 @@ bool VulkanCommandProcessor::IssueCopy() { // Perform any requested clears. uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; assert_true(copy_color_clear == copy_color_clear_low); if (color_clear_enabled) { diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 2175e5e5d..47a5659a4 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -161,14 +161,14 @@ enum class SampleLocation : uint32_t { }; enum class Endian : uint32_t { - kUnspecified = 0, + kNone = 0, k8in16 = 1, k8in32 = 2, k16in32 = 3, }; enum class Endian128 : uint32_t { - kUnspecified = 0, + kNone = 0, k8in16 = 1, k8in32 = 2, k16in32 = 3, @@ -225,6 +225,77 @@ enum class DepthRenderTargetFormat : uint32_t { kD24FS8 = 1, }; +// a2xx_sq_surfaceformat + +// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas +enum class TextureFormat : uint32_t { + k_1_REVERSE = 0, + k_1 = 1, + k_8 = 2, + k_1_5_5_5 = 3, + k_5_6_5 = 4, + k_6_5_5 = 5, + k_8_8_8_8 = 6, + k_2_10_10_10 = 7, + k_8_A = 8, + k_8_B = 9, + k_8_8 = 10, + k_Cr_Y1_Cb_Y0_REP = 11, + k_Y1_Cr_Y0_Cb_REP = 12, + k_16_16_EDRAM = 13, + k_8_8_8_8_A = 14, + k_4_4_4_4 = 15, + k_10_11_11 = 16, + k_11_11_10 = 17, + k_DXT1 = 18, + k_DXT2_3 = 19, + k_DXT4_5 = 20, + k_16_16_16_16_EDRAM = 21, + k_24_8 = 22, + k_24_8_FLOAT = 23, + k_16 = 24, + k_16_16 = 25, + k_16_16_16_16 = 26, + k_16_EXPAND = 27, + k_16_16_EXPAND = 28, + k_16_16_16_16_EXPAND = 29, + k_16_FLOAT = 30, + k_16_16_FLOAT = 31, + k_16_16_16_16_FLOAT = 32, + k_32 = 33, + k_32_32 = 34, + k_32_32_32_32 = 35, + k_32_FLOAT = 36, + k_32_32_FLOAT = 37, + k_32_32_32_32_FLOAT = 38, + k_32_AS_8 = 39, + k_32_AS_8_8 = 40, + k_16_MPEG = 41, + k_16_16_MPEG = 42, + k_8_INTERLACED = 43, + k_32_AS_8_INTERLACED = 44, + k_32_AS_8_8_INTERLACED = 45, + k_16_INTERLACED = 46, + k_16_MPEG_INTERLACED = 47, + k_16_16_MPEG_INTERLACED = 48, + k_DXN = 49, + k_8_8_8_8_AS_16_16_16_16 = 50, + k_DXT1_AS_16_16_16_16 = 51, + k_DXT2_3_AS_16_16_16_16 = 52, + k_DXT4_5_AS_16_16_16_16 = 53, + k_2_10_10_10_AS_16_16_16_16 = 54, + k_10_11_11_AS_16_16_16_16 = 55, + k_11_11_10_AS_16_16_16_16 = 56, + k_32_32_32_FLOAT = 57, + k_DXT3A = 58, + k_DXT5A = 59, + k_CTX1 = 60, + k_DXT3A_AS_1_1_1_1 = 61, + k_8_8_8_8_GAMMA_EDRAM = 62, + k_2_10_10_10_FLOAT_EDRAM = 63, + + kUnknown = 0xFFFFFFFFu, +}; + // Subset of a2xx_sq_surfaceformat - formats that RTs can be resolved to. enum class ColorFormat : uint32_t { k_8 = 2, @@ -367,11 +438,7 @@ enum class BlendFactor : uint32_t { kConstantAlpha = 14, kOneMinusConstantAlpha = 15, kSrcAlphaSaturate = 16, - // SRC1 likely not used on the Xbox 360 - only available in Direct3D 9Ex. - kSrc1Color = 20, - kOneMinusSrc1Color = 21, - kSrc1Alpha = 22, - kOneMinusSrc1Alpha = 23, + // SRC1 added on Adreno. }; enum class BlendOp : uint32_t { @@ -391,6 +458,17 @@ typedef enum { XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, } XE_GPU_INVALIDATE_MASK; +// instr_arbitrary_filter_t +enum class ArbitraryFilter : uint32_t { + k2x4Sym = 0, + k2x4Asym = 1, + k4x2Sym = 2, + k4x2Asym = 3, + k4x4Sym = 4, + k4x4Asym = 5, + kUseFetchConst = 7, +}; + // a2xx_sq_ps_vtx_mode enum class VertexShaderExportMode : uint32_t { kPosition1Vector = 0, @@ -420,6 +498,17 @@ enum class TessellationMode : uint32_t { kAdaptive = 2, }; +enum class PolygonModeEnable : uint32_t { + kDisabled = 0, // Render triangles. + kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type. +}; + +enum class PolygonType : uint32_t { + kPoints = 0, + kLines = 1, + kTriangles = 2, +}; + enum class ModeControl : uint32_t { kIgnore = 0, kColorDepth = 4, @@ -471,7 +560,7 @@ typedef enum { inline uint16_t GpuSwap(uint16_t value, Endian endianness) { switch (endianness) { - case Endian::kUnspecified: + case Endian::kNone: // No swap. return value; case Endian::k8in16: @@ -486,7 +575,7 @@ inline uint16_t GpuSwap(uint16_t value, Endian endianness) { inline uint32_t GpuSwap(uint32_t value, Endian endianness) { switch (endianness) { default: - case Endian::kUnspecified: + case Endian::kNone: // No swap. return value; case Endian::k8in16: @@ -520,11 +609,11 @@ inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; } XEPACKEDUNION(xe_gpu_vertex_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ uint32_t type : 2; // +0 - uint32_t address : 30; // +2 + uint32_t address : 30; // +2 address in dwords - uint32_t endian : 2; // +0 - uint32_t size : 24; // +2 size in words - uint32_t unk1 : 6; // +26 + Endian endian : 2; // +0 + uint32_t size : 24; // +2 size in words + uint32_t unk1 : 6; // +26 }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; @@ -535,34 +624,36 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, { // XE_GPU_REG_SHADER_CONSTANT_FETCH_* XEPACKEDUNION(xe_gpu_texture_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t type : 2; // +0 dword_0 - uint32_t sign_x : 2; // +2 - uint32_t sign_y : 2; // +4 - uint32_t sign_z : 2; // +6 - uint32_t sign_w : 2; // +8 - uint32_t clamp_x : 3; // +10 - uint32_t clamp_y : 3; // +13 - uint32_t clamp_z : 3; // +16 - uint32_t unused_0 : 3; // +19 - uint32_t pitch : 9; // +22 byte_pitch >> 5 - uint32_t tiled : 1; // +31 + uint32_t type : 2; // +0 dword_0 + TextureSign sign_x : 2; // +2 + TextureSign sign_y : 2; // +4 + TextureSign sign_z : 2; // +6 + TextureSign sign_w : 2; // +8 + ClampMode clamp_x : 3; // +10 + ClampMode clamp_y : 3; // +13 + ClampMode clamp_z : 3; // +16 + uint32_t signed_rf_mode_all : 1; // +19 + // TODO(Triang3l): 1 or 2 dim_tbd bits? + uint32_t unk_0 : 2; // +20 + uint32_t pitch : 9; // +22 byte_pitch >> 5 + uint32_t tiled : 1; // +31 - uint32_t format : 6; // +0 dword_1 - uint32_t endianness : 2; // +6 - uint32_t request_size : 2; // +8 - uint32_t stacked : 1; // +10 - uint32_t clamp_policy : 1; // +11 d3d/opengl - uint32_t base_address : 20; // +12 + TextureFormat format : 6; // +0 dword_1 + Endian endianness : 2; // +6 + uint32_t request_size : 2; // +8 + uint32_t stacked : 1; // +10 + uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl + uint32_t base_address : 20; // +12 base address >> 12 union { // dword_2 struct { uint32_t width : 24; - uint32_t unused : 8; + uint32_t : 8; } size_1d; struct { uint32_t width : 13; uint32_t height : 13; - uint32_t unused : 6; + uint32_t : 6; } size_2d; struct { uint32_t width : 13; @@ -576,15 +667,16 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { } size_3d; }; - uint32_t num_format : 1; // +0 dword_3 frac/int - uint32_t swizzle : 12; // +1 xyzw, 3b each (XE_GPU_SWIZZLE) - int32_t exp_adjust : 6; // +13 - uint32_t mag_filter : 2; // +19 - uint32_t min_filter : 2; // +21 - uint32_t mip_filter : 2; // +23 - uint32_t aniso_filter : 3; // +25 - uint32_t unused_3 : 3; // +28 - uint32_t border_size : 1; // +31 + uint32_t num_format : 1; // +0 dword_3 frac/int + // xyzw, 3b each (XE_GPU_SWIZZLE) + uint32_t swizzle : 12; // +1 + int32_t exp_adjust : 6; // +13 + TextureFilter mag_filter : 2; // +19 + TextureFilter min_filter : 2; // +21 + TextureFilter mip_filter : 2; // +23 + AnisoFilter aniso_filter : 3; // +25 + xenos::ArbitraryFilter arbitrary_filter : 3; // +28 + uint32_t border_size : 1; // +31 uint32_t vol_mag_filter : 1; // +0 dword_4 uint32_t vol_min_filter : 1; // +1 @@ -596,13 +688,13 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { int32_t grad_exp_adjust_h : 5; // +22 int32_t grad_exp_adjust_v : 5; // +27 - uint32_t border_color : 2; // +0 dword_5 - uint32_t force_bcw_max : 1; // +2 - uint32_t tri_clamp : 2; // +3 - int32_t aniso_bias : 4; // +5 - uint32_t dimension : 2; // +9 - uint32_t packed_mips : 1; // +11 - uint32_t mip_address : 20; // +12 + BorderColor border_color : 2; // +0 dword_5 + uint32_t force_bc_w_to_max : 1; // +2 + uint32_t tri_clamp : 2; // +3 + int32_t aniso_bias : 4; // +5 + uint32_t dimension : 2; // +9 + uint32_t packed_mips : 1; // +11 + uint32_t mip_address : 20; // +12 mip address >> 12 }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; From 1005651855fd9f28c3c9279e510665ff3797d629 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 21 Oct 2019 21:16:29 +0300 Subject: [PATCH 4/4] [D3D12] Fix DXN swizzle and cleanup texture swizzles --- src/xenia/gpu/d3d12/texture_cache.cc | 925 +++++++++++++++++++-------- src/xenia/gpu/d3d12/texture_cache.h | 9 +- 2 files changed, 669 insertions(+), 265 deletions(-) diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 0fb518f9e..99f40e26f 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -97,313 +97,739 @@ constexpr uint32_t TextureCache::kScaledResolveBufferSize; constexpr uint32_t TextureCache::kScaledResolveHeapSizeLog2; constexpr uint32_t TextureCache::kScaledResolveHeapSize; +// Assuming all single-component textures have its only component replicated. +// For DXT3A and DXT5A, this is according to: +// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf +// Halo 3 also expects replicated components in k_8 sprites. const TextureCache::HostFormat TextureCache::host_formats_[64] = { // k_1_REVERSE - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_1 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_8 - {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, - DXGI_FORMAT_R8_SNORM, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R8_UNORM, ResolveTileMode::k8bpp, true}, + {DXGI_FORMAT_R8_TYPELESS, + DXGI_FORMAT_R8_UNORM, + LoadMode::k8bpb, + DXGI_FORMAT_R8_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8_UNORM, + ResolveTileMode::k8bpp, + {0, 0, 0, 0}}, // k_1_5_5_5 - {DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - ResolveTileMode::k16bppRGBA, false}, + // Red and blue swapped in the load shader for simplicity. + {DXGI_FORMAT_B5G5R5A1_UNORM, + DXGI_FORMAT_B5G5R5A1_UNORM, + LoadMode::k16bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + ResolveTileMode::k16bppRGBA, + {0, 1, 2, 3}}, // k_5_6_5 - {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_B5G6R5_UNORM, ResolveTileMode::k16bpp, - false}, + // Red and blue swapped in the load shader for simplicity. + {DXGI_FORMAT_B5G6R5_UNORM, + DXGI_FORMAT_B5G6R5_UNORM, + LoadMode::k16bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_B5G6R5_UNORM, + ResolveTileMode::k16bpp, + {0, 1, 2, 3}}, // k_6_5_5 - // Green bits in blue, blue bits in green - RBGA swizzle must be used. - {DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_B5G6R5_UNORM, ResolveTileMode::k16bpp, - false}, + // On the host, green bits in blue, blue bits in green. + {DXGI_FORMAT_B5G6R5_UNORM, + DXGI_FORMAT_B5G6R5_UNORM, + LoadMode::k16bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_B5G6R5_UNORM, + ResolveTileMode::k16bpp, + {0, 2, 1, 3}}, // k_8_8_8_8 - {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - ResolveTileMode::k32bpp, false}, + {DXGI_FORMAT_R8G8B8A8_TYPELESS, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_R8G8B8A8_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_2_10_10_10 - {DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R10G10B10A2_UNORM, - ResolveTileMode::k32bpp, false}, + {DXGI_FORMAT_R10G10B10A2_TYPELESS, + DXGI_FORMAT_R10G10B10A2_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R10G10B10A2_UNORM, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_8_A - {DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, LoadMode::k8bpb, - DXGI_FORMAT_R8_SNORM, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R8_UNORM, ResolveTileMode::k8bpp, true}, + {DXGI_FORMAT_R8_TYPELESS, + DXGI_FORMAT_R8_UNORM, + LoadMode::k8bpb, + DXGI_FORMAT_R8_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8_UNORM, + ResolveTileMode::k8bpp, + {0, 0, 0, 0}}, // k_8_B - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_8_8 - {DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_R8G8_SNORM, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R8G8_UNORM, ResolveTileMode::k16bpp, - false}, + {DXGI_FORMAT_R8G8_TYPELESS, + DXGI_FORMAT_R8G8_UNORM, + LoadMode::k16bpb, + DXGI_FORMAT_R8G8_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8_UNORM, + ResolveTileMode::k16bpp, + {0, 1, 2, 3}}, // k_Cr_Y1_Cb_Y0_REP - {DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM, LoadMode::k32bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + // Red and blue probably must be swapped, similar to k_Y1_Cr_Y0_Cb_REP. + {DXGI_FORMAT_G8R8_G8B8_UNORM, + DXGI_FORMAT_G8R8_G8B8_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {2, 1, 0, 3}}, // k_Y1_Cr_Y0_Cb_REP - // Used for videos in NBA 2K9. Red and blue must be flipped. + // Used for videos in NBA 2K9. Red and blue must be swapped. // TODO(Triang3l): D3DFMT_G8R8_G8B8 is DXGI_FORMAT_R8G8_B8G8_UNORM * 255.0f, // watch out for num_format int, division in shaders, etc., in NBA 2K9 it // works as is. Also need to decompress if the size is uneven, but should be // a very rare case. - {DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_R8G8_B8G8_UNORM, LoadMode::k32bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_R8G8_B8G8_UNORM, + DXGI_FORMAT_R8G8_B8G8_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {2, 1, 0, 3}}, // k_16_16_EDRAM // Not usable as a texture, also has -32...32 range. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_8_8_8_8_A - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_4_4_4_4 - {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - ResolveTileMode::k16bppRGBA, false}, + // Red and blue swapped in the load shader for simplicity. + {DXGI_FORMAT_B4G4R4A4_UNORM, + DXGI_FORMAT_B4G4R4A4_UNORM, + LoadMode::k16bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + ResolveTileMode::k16bppRGBA, + {0, 1, 2, 3}}, // k_10_11_11 - {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR11G11B10ToRGBA16SNorm, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR11G11B10AsRGBA16, - false}, + {DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_UNORM, + LoadMode::kR11G11B10ToRGBA16, + DXGI_FORMAT_R16G16B16A16_SNORM, + LoadMode::kR11G11B10ToRGBA16SNorm, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_UNORM, + ResolveTileMode::kR11G11B10AsRGBA16, + {0, 1, 2, 3}}, // k_11_11_10 - {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR10G11B11ToRGBA16SNorm, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR10G11B11AsRGBA16, - false}, + {DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_UNORM, + LoadMode::kR10G11B11ToRGBA16, + DXGI_FORMAT_R16G16B16A16_SNORM, + LoadMode::kR10G11B11ToRGBA16SNorm, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_UNORM, + ResolveTileMode::kR10G11B11AsRGBA16, + {0, 1, 2, 3}}, // k_DXT1 - {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT1ToRGBA8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + {DXGI_FORMAT_BC1_UNORM, + DXGI_FORMAT_BC1_UNORM, + LoadMode::k64bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::kDXT1ToRGBA8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXT2_3 - {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT3ToRGBA8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + {DXGI_FORMAT_BC2_UNORM, + DXGI_FORMAT_BC2_UNORM, + LoadMode::k128bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::kDXT3ToRGBA8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXT4_5 - {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT5ToRGBA8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + {DXGI_FORMAT_BC3_UNORM, + DXGI_FORMAT_BC3_UNORM, + LoadMode::k128bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::kDXT5ToRGBA8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_16_16_16_16_EDRAM // Not usable as a texture, also has -32...32 range. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // R32_FLOAT for depth because shaders would require an additional SRV to // sample stencil, which we don't provide. // k_24_8 - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthUnorm, - DXGI_FORMAT_R32_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R32_FLOAT, + LoadMode::kDepthUnorm, + DXGI_FORMAT_R32_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_24_8_FLOAT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, LoadMode::kDepthFloat, - DXGI_FORMAT_R32_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R32_FLOAT, + LoadMode::kDepthFloat, + DXGI_FORMAT_R32_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_16 - {DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, LoadMode::k16bpb, - DXGI_FORMAT_R16_SNORM, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R16_UNORM, ResolveTileMode::k16bpp, true}, + {DXGI_FORMAT_R16_TYPELESS, + DXGI_FORMAT_R16_UNORM, + LoadMode::k16bpb, + DXGI_FORMAT_R16_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16_UNORM, + ResolveTileMode::k16bpp, + {0, 0, 0, 0}}, // k_16_16 // The resolve format being unorm is correct (with snorm distortion effects // in Halo 3 cause stretching of one corner of the screen). - {DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, - DXGI_FORMAT_R16G16_SNORM, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R16G16_UNORM, ResolveTileMode::k32bpp, - false}, + {DXGI_FORMAT_R16G16_TYPELESS, + DXGI_FORMAT_R16G16_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_R16G16_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16_UNORM, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_16_16_16_16 // The resolve format being unorm is correct (with snorm distortion effects // in Halo 3 cause stretching of one corner of the screen). - {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::k64bpb, DXGI_FORMAT_R16G16B16A16_SNORM, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_UNORM, - ResolveTileMode::k64bpp, false}, + {DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_UNORM, + LoadMode::k64bpb, + DXGI_FORMAT_R16G16B16A16_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_UNORM, + ResolveTileMode::k64bpp, + {0, 1, 2, 3}}, // k_16_EXPAND - {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, - DXGI_FORMAT_R16_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R16_FLOAT, ResolveTileMode::k16bpp, true}, + {DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_R16_FLOAT, + LoadMode::k16bpb, + DXGI_FORMAT_R16_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16_FLOAT, + ResolveTileMode::k16bpp, + {0, 0, 0, 0}}, // k_16_16_EXPAND - {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, - DXGI_FORMAT_R16G16_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R16G16_FLOAT, ResolveTileMode::k32bpp, - false}, + {DXGI_FORMAT_R16G16_FLOAT, + DXGI_FORMAT_R16G16_FLOAT, + LoadMode::k32bpb, + DXGI_FORMAT_R16G16_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16_FLOAT, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_16_16_16_16_EXPAND - {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, - LoadMode::k64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_FLOAT, - ResolveTileMode::k64bpp, false}, + {DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + LoadMode::k64bpb, + DXGI_FORMAT_R16G16B16A16_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_FLOAT, + ResolveTileMode::k64bpp, + {0, 1, 2, 3}}, // k_16_FLOAT - {DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, - DXGI_FORMAT_R16_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R16_FLOAT, ResolveTileMode::k16bpp, true}, + {DXGI_FORMAT_R16_FLOAT, + DXGI_FORMAT_R16_FLOAT, + LoadMode::k16bpb, + DXGI_FORMAT_R16_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16_FLOAT, + ResolveTileMode::k16bpp, + {0, 0, 0, 0}}, // k_16_16_FLOAT - {DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, - DXGI_FORMAT_R16G16_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R16G16_FLOAT, ResolveTileMode::k32bpp, - false}, + {DXGI_FORMAT_R16G16_FLOAT, + DXGI_FORMAT_R16G16_FLOAT, + LoadMode::k32bpb, + DXGI_FORMAT_R16G16_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16_FLOAT, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_16_16_16_16_FLOAT - {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, - LoadMode::k64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R16G16B16A16_FLOAT, - ResolveTileMode::k64bpp, false}, + {DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_R16G16B16A16_FLOAT, + LoadMode::k64bpb, + DXGI_FORMAT_R16G16B16A16_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_FLOAT, + ResolveTileMode::k64bpp, + {0, 1, 2, 3}}, // k_32 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_32_32 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_32_32_32_32 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_32_FLOAT - {DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, LoadMode::k32bpb, - DXGI_FORMAT_R32_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R32_FLOAT, ResolveTileMode::k32bpp, true}, + {DXGI_FORMAT_R32_FLOAT, + DXGI_FORMAT_R32_FLOAT, + LoadMode::k32bpb, + DXGI_FORMAT_R32_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R32_FLOAT, + ResolveTileMode::k32bpp, + {0, 0, 0, 0}}, // k_32_32_FLOAT - {DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, LoadMode::k64bpb, - DXGI_FORMAT_R32G32_FLOAT, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_R32G32_FLOAT, ResolveTileMode::k64bpp, - false}, + {DXGI_FORMAT_R32G32_FLOAT, + DXGI_FORMAT_R32G32_FLOAT, + LoadMode::k64bpb, + DXGI_FORMAT_R32G32_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R32G32_FLOAT, + ResolveTileMode::k64bpp, + {0, 1, 2, 3}}, // k_32_32_32_32_FLOAT - {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, - LoadMode::k128bpb, DXGI_FORMAT_R32G32B32A32_FLOAT, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R32G32B32A32_FLOAT, - ResolveTileMode::k128bpp, false}, + {DXGI_FORMAT_R32G32B32A32_FLOAT, + DXGI_FORMAT_R32G32B32A32_FLOAT, + LoadMode::k128bpb, + DXGI_FORMAT_R32G32B32A32_FLOAT, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R32G32B32A32_FLOAT, + ResolveTileMode::k128bpp, + {0, 1, 2, 3}}, // k_32_AS_8 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_32_AS_8_8 - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_16_MPEG - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_16_16_MPEG - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_32_AS_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_32_AS_8_8_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_16_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_16_16_MPEG_INTERLACED - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXN - {DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8_UNORM, - LoadMode::kDXNToRG8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + // Appears to be luminance-alpha, like ATI 3Dc and LATC in OpenGL. Call of + // Duty 4 reads this with XW swizzle in the shader. + {DXGI_FORMAT_BC5_UNORM, + DXGI_FORMAT_BC5_UNORM, + LoadMode::k128bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8_UNORM, + LoadMode::kDXNToRG8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 1}}, // k_8_8_8_8_AS_16_16_16_16 - {DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - ResolveTileMode::k32bpp, false}, + {DXGI_FORMAT_R8G8B8A8_TYPELESS, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_R8G8B8A8_SNORM, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_DXT1_AS_16_16_16_16 - {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, LoadMode::k64bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT1ToRGBA8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + {DXGI_FORMAT_BC1_UNORM, + DXGI_FORMAT_BC1_UNORM, + LoadMode::k64bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::kDXT1ToRGBA8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXT2_3_AS_16_16_16_16 - {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT3ToRGBA8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + {DXGI_FORMAT_BC2_UNORM, + DXGI_FORMAT_BC2_UNORM, + LoadMode::k128bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::kDXT3ToRGBA8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXT4_5_AS_16_16_16_16 - {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8G8B8A8_UNORM, - LoadMode::kDXT5ToRGBA8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - false}, + {DXGI_FORMAT_BC3_UNORM, + DXGI_FORMAT_BC3_UNORM, + LoadMode::k128bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8G8B8A8_UNORM, + LoadMode::kDXT5ToRGBA8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_2_10_10_10_AS_16_16_16_16 - {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, - LoadMode::k32bpb, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R10G10B10A2_UNORM, - ResolveTileMode::k32bpp, false}, + {DXGI_FORMAT_R10G10B10A2_UNORM, + DXGI_FORMAT_R10G10B10A2_UNORM, + LoadMode::k32bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R10G10B10A2_UNORM, + ResolveTileMode::k32bpp, + {0, 1, 2, 3}}, // k_10_11_11_AS_16_16_16_16 - {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR11G11B10ToRGBA16SNorm, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR11G11B10AsRGBA16, - false}, + {DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_UNORM, + LoadMode::kR11G11B10ToRGBA16, + DXGI_FORMAT_R16G16B16A16_SNORM, + LoadMode::kR11G11B10ToRGBA16SNorm, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_UNORM, + ResolveTileMode::kR11G11B10AsRGBA16, + {0, 1, 2, 3}}, // k_11_11_10_AS_16_16_16_16 - {DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM, - LoadMode::kR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM, - LoadMode::kR10G11B11ToRGBA16SNorm, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_R16G16B16A16_UNORM, ResolveTileMode::kR10G11B11AsRGBA16, - false}, + {DXGI_FORMAT_R16G16B16A16_TYPELESS, + DXGI_FORMAT_R16G16B16A16_UNORM, + LoadMode::kR10G11B11ToRGBA16, + DXGI_FORMAT_R16G16B16A16_SNORM, + LoadMode::kR10G11B11ToRGBA16SNorm, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R16G16B16A16_UNORM, + ResolveTileMode::kR10G11B11AsRGBA16, + {0, 1, 2, 3}}, // k_32_32_32_FLOAT - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXT3A - {DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, LoadMode::kDXT3A, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, true}, + // R8_UNORM has the same size as BC2, but doesn't have the 4x4 size + // alignment requirement. + {DXGI_FORMAT_R8_UNORM, + DXGI_FORMAT_R8_UNORM, + LoadMode::kDXT3A, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_DXT5A - {DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, LoadMode::k64bpb, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_R8_UNORM, - LoadMode::kDXT5AToR8, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, - true}, + {DXGI_FORMAT_BC4_UNORM, + DXGI_FORMAT_BC4_UNORM, + LoadMode::k64bpb, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_R8_UNORM, + LoadMode::kDXT5AToR8, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 0, 0, 0}}, // k_CTX1 - {DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, LoadMode::kCTX1, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_R8G8_UNORM, + DXGI_FORMAT_R8G8_UNORM, + LoadMode::kCTX1, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_DXT3A_AS_1_1_1_1 - {DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM, - LoadMode::kDXT3AAs1111, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_B4G4R4A4_UNORM, + DXGI_FORMAT_B4G4R4A4_UNORM, + LoadMode::kDXT3AAs1111, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_8_8_8_8_GAMMA_EDRAM // Not usable as a texture. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, // k_2_10_10_10_FLOAT_EDRAM // Not usable as a texture. - {DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, - DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, - LoadMode::kUnknown, DXGI_FORMAT_UNKNOWN, ResolveTileMode::kUnknown, false}, + {DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + LoadMode::kUnknown, + DXGI_FORMAT_UNKNOWN, + ResolveTileMode::kUnknown, + {0, 1, 2, 3}}, }; const char* const TextureCache::dimension_names_[4] = {"1D", "2D", "3D", @@ -1742,36 +2168,19 @@ void TextureCache::BindingInfoFromFetchConstant( key_out.endianness = fetch.endianness; if (swizzle_out != nullptr) { - uint32_t swizzle = fetch.swizzle; - // 0b000 or 0b111 mask for 0 or 1 swizzles. - uint32_t swizzle_constant = swizzle & 0b100100100100; - swizzle_constant |= (swizzle_constant >> 1) | (swizzle_constant >> 2); - // 0b000 or 0b111 mask for RGBA swizzles. - uint32_t swizzle_not_constant = swizzle_constant ^ 0b111111111111; - // Get rid of 6 and 7 values (to prevent device losses if the game has - // something broken) the quick and dirty way - by changing them to 4 and 5. - swizzle &= 0b101101101101 | swizzle_not_constant; - // Remap the swizzle according to the texture format. k_1_5_5_5, k_5_6_5 and - // k_4_4_4_4 already have red and blue swapped in the load shader for - // simplicity. - if (format == TextureFormat::k_6_5_5) { - // Green bits of the texture used for blue, and blue bits used for green. - // Swap 001 and 010 (XOR 011 if either 001 or 010). - uint32_t swizzle_green_or_blue = - (swizzle & 0b001001001001) ^ ((swizzle >> 1) & 0b001001001001); - swizzle ^= (swizzle_green_or_blue | (swizzle_green_or_blue << 1)) & - swizzle_not_constant; - } else if (format == TextureFormat::k_Cr_Y1_Cb_Y0_REP || - format == TextureFormat::k_Y1_Cr_Y0_Cb_REP) { - // Swap red and blue. - swizzle ^= ((~swizzle & 0b001001001001) << 1) & swizzle_not_constant; - } else if (host_formats_[uint32_t(format)].replicate_component) { - // Replicate the only component of single-component textures, which are - // emulated with red formats (including DXT3A, which uses R8 rather than - // DXT3 because the resulting size is the same, but there's no 4x4 - // alignment requirement). If not 0.0 or 1.0 (if the high bit isn't set), - // make 0 (red). - swizzle &= swizzle_constant; + uint32_t swizzle = 0; + for (uint32_t i = 0; i < 4; ++i) { + uint32_t swizzle_component = (fetch.swizzle >> (i * 3)) & 0b111; + if (swizzle_component >= 4) { + // Get rid of 6 and 7 values (to prevent device losses if the game has + // something broken) the quick and dirty way - by changing them to 4 (0) + // and 5 (1). + swizzle_component &= 0b101; + } else { + swizzle_component = + host_formats_[uint32_t(format)].swizzle[swizzle_component]; + } + swizzle |= swizzle_component << (i * 3); } *swizzle_out = swizzle; } diff --git a/src/xenia/gpu/d3d12/texture_cache.h b/src/xenia/gpu/d3d12/texture_cache.h index 244fceff1..9690a50fb 100644 --- a/src/xenia/gpu/d3d12/texture_cache.h +++ b/src/xenia/gpu/d3d12/texture_cache.h @@ -266,13 +266,8 @@ class TextureCache { DXGI_FORMAT dxgi_format_resolve_tile; ResolveTileMode resolve_tile_mode; - // Whether the red component must be replicated in the SRV swizzle, for - // single-component formats. At least for DXT3A/DXT5A, this is according to - // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf - // k_8 is also used with RGBA swizzle, but assumes replicated components, in - // Halo 3 sprites, thus it appears that all single-component formats should - // have RRRR swizzle. - bool replicate_component; + // Mapping of Xenos swizzle components to DXGI format components. + uint8_t swizzle[4]; }; union TextureKey {