diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index ff7b00521..a715d0e5e 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -20,7 +20,6 @@ #include "xenia/base/ring_buffer.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/graphics_system.h" -#include "xenia/gpu/registers.h" #include "xenia/gpu/sampler_info.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/xenos.h" diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index c0f57485c..20e1495b9 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1142,8 +1142,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - auto enable_mode = static_cast( - regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7); + xenos::ModeControl enable_mode = regs.Get().edram_mode; if (enable_mode == xenos::ModeControl::kIgnore) { // Ignored. return true; @@ -1153,7 +1152,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, return IssueCopy(); } - if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) { + if (regs.Get().surface_pitch == 0) { // Doesn't actually draw. // TODO(Triang3l): Do something so memexport still works in this case maybe? // Unlikely that zero would even really be legal though. @@ -1164,7 +1163,8 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool tessellated; if (uint32_t(primitive_type) >= uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) { - tessellated = (regs[XE_GPU_REG_VGT_OUTPUT_PATH_CNTL].u32 & 0x3) == 0x1; + tessellated = regs.Get().path_select == + xenos::VGTOutputPath::kTessellationEnable; } else { tessellated = false; } @@ -1202,8 +1202,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, bool memexport_used = memexport_used_vertex || memexport_used_pixel; bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); + auto pa_su_sc_mode_cntl = regs.Get(); if (!memexport_used_vertex && primitive_two_faced && - (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 0x3) { + pa_su_sc_mode_cntl.cull_front && pa_su_sc_mode_cntl.cull_back) { // Both sides are culled - can't be expressed in the pipeline state. return true; } @@ -1223,9 +1224,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // tessellation factors (as floats) instead of control point indices. bool adaptive_tessellation; if (tessellated) { - TessellationMode tessellation_mode = - TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3); - adaptive_tessellation = tessellation_mode == TessellationMode::kAdaptive; + xenos::TessellationMode tessellation_mode = + regs.Get().tess_mode; + adaptive_tessellation = + tessellation_mode == xenos::TessellationMode::kAdaptive; if (adaptive_tessellation && (!indexed || index_buffer_info->format != IndexFormat::kInt32)) { return false; @@ -1235,7 +1237,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // passed to vertex shader registers, especially if patches are drawn with // an index buffer. // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 - if (tessellation_mode != TessellationMode::kAdaptive) { + if (tessellation_mode != xenos::TessellationMode::kAdaptive) { XELOGE( "Tessellation mode %u is not implemented yet, only adaptive is " "partially available now - report the game to Xenia developers!", @@ -1309,20 +1311,16 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, vertex_shader->GetUsedTextureMask(), pixel_shader != nullptr ? pixel_shader->GetUsedTextureMask() : 0); - // Check if early depth/stencil can be enabled explicitly by RB_DEPTHCONTROL - // or implicitly when alpha test and alpha to coverage are disabled. - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - bool early_z = false; - if (pixel_shader == nullptr) { + // Check if early depth/stencil can be enabled. + bool early_z; + if (pixel_shader) { + auto rb_colorcontrol = regs.Get(); + early_z = pixel_shader->implicit_early_z_allowed() && + (!rb_colorcontrol.alpha_test_enable || + rb_colorcontrol.alpha_func == CompareFunction::kAlways) && + !rb_colorcontrol.alpha_to_mask_enable; + } else { early_z = true; - } else if (!pixel_shader->writes_depth()) { - if (rb_depthcontrol & 0x8) { - early_z = true; - } else if (pixel_shader->implicit_early_z_allowed()) { - early_z = (!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7) && - !(rb_colorcontrol & 0x10); - } } // Create the pipeline if needed and bind it. @@ -1366,22 +1364,19 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, (1ull << (vfetch_index & 63))) { continue; } - uint32_t vfetch_constant_index = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2; - if ((regs[vfetch_constant_index].u32 & 0x3) != 3) { + const auto& vfetch_constant = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + if (vfetch_constant.type != 3) { XELOGW("Vertex fetch type is not 3 (fetch constant %u is %.8X %.8X)!", - vfetch_index, regs[vfetch_constant_index].u32, - regs[vfetch_constant_index + 1].u32); + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); return false; } - if (!shared_memory_->RequestRange( - regs[vfetch_constant_index].u32 & 0x1FFFFFFC, - regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC)) { + if (!shared_memory_->RequestRange(vfetch_constant.address << 2, + vfetch_constant.size << 2)) { XELOGE( "Failed to request vertex buffer at 0x%.8X (size %u) in the shared " "memory", - regs[vfetch_constant_index].u32 & 0x1FFFFFFC, - regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); + vfetch_constant.address << 2, vfetch_constant.size << 2); return false; } vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); @@ -1400,31 +1395,29 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, const std::vector& memexport_stream_constants_vertex = vertex_shader->memexport_stream_constants(); for (uint32_t constant_index : memexport_stream_constants_vertex) { - const xenos::xe_gpu_memexport_stream_t* memexport_stream = - reinterpret_cast( - ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4]); - if (memexport_stream->index_count == 0) { + const auto& memexport_stream = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_000_X + constant_index * 4); + if (memexport_stream.index_count == 0) { continue; } uint32_t memexport_format_size = - GetSupportedMemExportFormatSize(memexport_stream->format); + GetSupportedMemExportFormatSize(memexport_stream.format); if (memexport_format_size == 0) { - XELOGE( - "Unsupported memexport format %s", - FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format))) - ->name); + XELOGE("Unsupported memexport format %s", + FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format))) + ->name); return false; } - uint32_t memexport_base_address = memexport_stream->base_address; uint32_t memexport_size_dwords = - memexport_stream->index_count * memexport_format_size; + memexport_stream.index_count * memexport_format_size; // Try to reduce the number of shared memory operations when writing // different elements into the same buffer through different exports // (happens in Halo 3). bool memexport_range_reused = false; for (uint32_t i = 0; i < memexport_range_count; ++i) { MemExportRange& memexport_range = memexport_ranges[i]; - if (memexport_range.base_address_dwords == memexport_base_address) { + if (memexport_range.base_address_dwords == + memexport_stream.base_address) { memexport_range.size_dwords = std::max(memexport_range.size_dwords, memexport_size_dwords); memexport_range_reused = true; @@ -1435,7 +1428,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, if (!memexport_range_reused) { MemExportRange& memexport_range = memexport_ranges[memexport_range_count++]; - memexport_range.base_address_dwords = memexport_base_address; + memexport_range.base_address_dwords = memexport_stream.base_address; memexport_range.size_dwords = memexport_size_dwords; } } @@ -1444,28 +1437,26 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, const std::vector& memexport_stream_constants_pixel = pixel_shader->memexport_stream_constants(); for (uint32_t constant_index : memexport_stream_constants_pixel) { - const xenos::xe_gpu_memexport_stream_t* memexport_stream = - reinterpret_cast( - ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4]); - if (memexport_stream->index_count == 0) { + const auto& memexport_stream = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_256_X + constant_index * 4); + if (memexport_stream.index_count == 0) { continue; } uint32_t memexport_format_size = - GetSupportedMemExportFormatSize(memexport_stream->format); + GetSupportedMemExportFormatSize(memexport_stream.format); if (memexport_format_size == 0) { - XELOGE( - "Unsupported memexport format %s", - FormatInfo::Get(TextureFormat(uint32_t(memexport_stream->format))) - ->name); + XELOGE("Unsupported memexport format %s", + FormatInfo::Get(TextureFormat(uint32_t(memexport_stream.format))) + ->name); return false; } - uint32_t memexport_base_address = memexport_stream->base_address; uint32_t memexport_size_dwords = - memexport_stream->index_count * memexport_format_size; + memexport_stream.index_count * memexport_format_size; bool memexport_range_reused = false; for (uint32_t i = 0; i < memexport_range_count; ++i) { MemExportRange& memexport_range = memexport_ranges[i]; - if (memexport_range.base_address_dwords == memexport_base_address) { + if (memexport_range.base_address_dwords == + memexport_stream.base_address) { memexport_range.size_dwords = std::max(memexport_range.size_dwords, memexport_size_dwords); memexport_range_reused = true; @@ -1475,7 +1466,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, if (!memexport_range_reused) { MemExportRange& memexport_range = memexport_ranges[memexport_range_count++]; - memexport_range.base_address_dwords = memexport_base_address; + memexport_range.base_address_dwords = memexport_stream.base_address; memexport_range.size_dwords = memexport_size_dwords; } } @@ -1850,15 +1841,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; - int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; - if (window_offset_x & 0x4000) { - window_offset_x |= 0x8000; - } - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } + auto pa_sc_window_offset = regs.Get(); // Supersampling replacing multisampling due to difficulties of emulating // EDRAM with multisampling with RTV/DSV (with ROV, there's MSAA), and also @@ -1868,8 +1851,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { pixel_size_x = 1; pixel_size_y = 1; } else { - MsaaSamples msaa_samples = - MsaaSamples((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3); + MsaaSamples msaa_samples = regs.Get().msaa_samples; pixel_size_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; pixel_size_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; } @@ -1889,30 +1871,30 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // box. If it's not, the position is in screen space. Since we can only use // the NDC in PC APIs, we use a viewport of the largest possible size, and // divide the position by it in translated shaders. - uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + auto pa_cl_vte_cntl = regs.Get(); float viewport_scale_x = - (pa_cl_vte_cntl & (1 << 0)) + pa_cl_vte_cntl.vport_x_scale_ena ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) : 1280.0f; float viewport_scale_y = - (pa_cl_vte_cntl & (1 << 2)) + pa_cl_vte_cntl.vport_y_scale_ena ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) : 1280.0f; - float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1.0f; - float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) + float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : std::abs(viewport_scale_x); - float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : std::abs(viewport_scale_y); - float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) + float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0.0f; - if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { - viewport_offset_x += float(window_offset_x); - viewport_offset_y += float(window_offset_y); + if (regs.Get().vtx_window_offset_enable) { + viewport_offset_x += float(pa_sc_window_offset.window_x_offset); + viewport_offset_y += float(pa_sc_window_offset.window_y_offset); } D3D12_VIEWPORT viewport; viewport.TopLeftX = @@ -1941,21 +1923,22 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { } // Scissor. - uint32_t pa_sc_window_scissor_tl = - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t pa_sc_window_scissor_br = - regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; + auto pa_sc_window_scissor_tl = regs.Get(); + auto pa_sc_window_scissor_br = regs.Get(); D3D12_RECT scissor; - scissor.left = pa_sc_window_scissor_tl & 0x7FFF; - scissor.top = (pa_sc_window_scissor_tl >> 16) & 0x7FFF; - scissor.right = pa_sc_window_scissor_br & 0x7FFF; - scissor.bottom = (pa_sc_window_scissor_br >> 16) & 0x7FFF; - if (!(pa_sc_window_scissor_tl & (1u << 31))) { - // !WINDOW_OFFSET_DISABLE. - scissor.left = std::max(scissor.left + window_offset_x, LONG(0)); - scissor.top = std::max(scissor.top + window_offset_y, LONG(0)); - scissor.right = std::max(scissor.right + window_offset_x, LONG(0)); - scissor.bottom = std::max(scissor.bottom + window_offset_y, LONG(0)); + scissor.left = pa_sc_window_scissor_tl.tl_x; + scissor.top = pa_sc_window_scissor_tl.tl_y; + scissor.right = pa_sc_window_scissor_br.br_x; + scissor.bottom = pa_sc_window_scissor_br.br_y; + if (!pa_sc_window_scissor_tl.window_offset_disable) { + scissor.left = + std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0)); + scissor.top = + std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0)); + scissor.right = + std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0)); + scissor.bottom = + std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0)); } scissor.left *= pixel_size_x; scissor.top *= pixel_size_y; @@ -1992,13 +1975,17 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // Stencil reference value. Per-face reference not supported by Direct3D 12, // choose the back face one only if drawing only back faces. - uint32_t stencil_ref; - if (primitive_two_faced && (regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & 0x80) && - (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & 0x3) == 1) { - stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32 & 0xFF; + uint32_t stencil_ref_mask_reg; + auto pa_su_sc_mode_cntl = regs.Get(); + if (primitive_two_faced && + regs.Get().backface_enable && + pa_su_sc_mode_cntl.cull_front && !pa_su_sc_mode_cntl.cull_back) { + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; } else { - stencil_ref = regs[XE_GPU_REG_RB_STENCILREFMASK].u32 & 0xFF; + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK; } + uint32_t stencil_ref = + regs.Get(stencil_ref_mask_reg).stencilref; ff_stencil_ref_update_needed_ |= ff_stencil_ref_ != stencil_ref; if (ff_stencil_ref_update_needed_) { ff_stencil_ref_ = stencil_ref; @@ -2019,64 +2006,55 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES - uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; - uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; - uint32_t pa_su_point_minmax = regs[XE_GPU_REG_PA_SU_POINT_MINMAX].u32; - uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32; - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; - uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; + auto pa_cl_clip_cntl = regs.Get(); + auto pa_cl_vte_cntl = regs.Get(); + auto pa_su_point_minmax = regs.Get(); + auto pa_su_point_size = regs.Get(); + auto pa_su_sc_mode_cntl = regs.Get(); + auto pa_su_vtx_cntl = regs.Get(); float rb_alpha_ref = regs[XE_GPU_REG_RB_ALPHA_REF].f32; - uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t rb_stencilrefmask = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; - uint32_t rb_stencilrefmask_bf = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32; - uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32; - uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; + auto rb_colorcontrol = regs.Get(); + auto rb_depth_info = regs.Get(); + auto rb_depthcontrol = regs.Get(); + auto rb_stencilrefmask = regs.Get(); + auto rb_stencilrefmask_bf = + regs.Get(XE_GPU_REG_RB_STENCILREFMASK_BF); + auto rb_surface_info = regs.Get(); + auto sq_context_misc = regs.Get(); + auto sq_program_cntl = regs.Get(); int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32); // Get the color info register values for each render target, and also put // some safety measures for the ROV path - disable fully aliased render // targets. Also, for ROV, exclude components that don't exist in the format // from the write mask. - uint32_t color_infos[4]; - ColorRenderTargetFormat color_formats[4]; + reg::RB_COLOR_INFO color_infos[4]; float rt_clamp[4][4]; uint32_t rt_keep_masks[4][2]; for (uint32_t i = 0; i < 4; ++i) { - uint32_t color_info; - switch (i) { - case 1: - color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - break; - case 2: - color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - break; - case 3: - color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - break; - default: - color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; - } + static const uint32_t kColorInfoRegs[] = { + XE_GPU_REG_RB_COLOR_INFO, + XE_GPU_REG_RB_COLOR1_INFO, + XE_GPU_REG_RB_COLOR2_INFO, + XE_GPU_REG_RB_COLOR3_INFO, + }; + auto color_info = regs.Get(kColorInfoRegs[i]); color_infos[i] = color_info; - color_formats[i] = ColorRenderTargetFormat((color_info >> 16) & 0xF); if (IsROVUsedForEDRAM()) { // Get the mask for keeping previous color's components unmodified, // or two UINT32_MAX if no colors actually existing in the RT are written. DxbcShaderTranslator::ROV_GetColorFormatSystemConstants( - color_formats[i], (color_mask >> (i * 4)) & 0b1111, rt_clamp[i][0], - rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], rt_keep_masks[i][0], - rt_keep_masks[i][1]); + color_info.color_format, (color_mask >> (i * 4)) & 0b1111, + rt_clamp[i][0], rt_clamp[i][1], rt_clamp[i][2], rt_clamp[i][3], + rt_keep_masks[i][0], rt_keep_masks[i][1]); // Disable the render target if it has the same EDRAM base as another one // (with a smaller index - assume it's more important). if (rt_keep_masks[i][0] == UINT32_MAX && rt_keep_masks[i][1] == UINT32_MAX) { - uint32_t edram_base = color_info & 0xFFF; for (uint32_t j = 0; j < i; ++j) { - if (edram_base == (color_infos[j] & 0xFFF) && + if (color_info.color_base == color_infos[j].color_base && (rt_keep_masks[j][0] != UINT32_MAX || rt_keep_masks[j][1] != UINT32_MAX)) { rt_keep_masks[i][0] = UINT32_MAX; @@ -2091,20 +2069,21 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Disable depth and stencil if it aliases a color render target (for // instance, during the XBLA logo in Banjo-Kazooie, though depth writing is // already disabled there). - if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) { - uint32_t edram_base_depth = rb_depth_info & 0xFFF; + bool depth_stencil_enabled = + rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable; + if (IsROVUsedForEDRAM() && depth_stencil_enabled) { for (uint32_t i = 0; i < 4; ++i) { - if (edram_base_depth == (color_infos[i] & 0xFFF) && + if (rb_depth_info.depth_base == color_infos[i].color_base && (rt_keep_masks[i][0] != UINT32_MAX || rt_keep_masks[i][1] != UINT32_MAX)) { - rb_depthcontrol &= ~(uint32_t(0x1 | 0x2)); + depth_stencil_enabled = false; break; } } } // Get viewport Z scale - needed for flags and ROV output. - float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) + float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1.0f; @@ -2126,18 +2105,18 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // = false: multiply the Z coordinate by 1/W0. // 10: VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal // to get 1/W0. - if (pa_cl_vte_cntl & (1 << 8)) { + if (pa_cl_vte_cntl.vtx_xy_fmt) { flags |= DxbcShaderTranslator::kSysFlag_XYDividedByW; } - if (pa_cl_vte_cntl & (1 << 9)) { + if (pa_cl_vte_cntl.vtx_z_fmt) { flags |= DxbcShaderTranslator::kSysFlag_ZDividedByW; } - if (pa_cl_vte_cntl & (1 << 10)) { + if (pa_cl_vte_cntl.vtx_w0_fmt) { flags |= DxbcShaderTranslator::kSysFlag_WNotReciprocal; } // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. - if (!(pa_cl_clip_cntl & (1 << 16))) { - flags |= (pa_cl_clip_cntl & 0b111111) + if (!pa_cl_clip_cntl.clip_disable) { + flags |= (pa_cl_clip_cntl.value & 0b111111) << DxbcShaderTranslator::kSysFlag_UserClipPlane0_Shift; } // Reversed depth. @@ -2145,8 +2124,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= DxbcShaderTranslator::kSysFlag_ReverseZ; } // Alpha test. - if (rb_colorcontrol & 0x8) { - flags |= (rb_colorcontrol & 0x7) + if (rb_colorcontrol.alpha_test_enable) { + flags |= uint32_t(rb_colorcontrol.alpha_func.value()) << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; } else { flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | @@ -2154,25 +2133,25 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater; } // Alpha to coverage. - if (rb_colorcontrol & 0x10) { + if (rb_colorcontrol.alpha_to_mask_enable) { flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage; } // Gamma writing. for (uint32_t i = 0; i < 4; ++i) { - if (color_formats[i] == ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { + if (color_infos[i].color_format == + ColorRenderTargetFormat::k_8_8_8_8_GAMMA) { flags |= DxbcShaderTranslator::kSysFlag_Color0Gamma << i; } } - if (IsROVUsedForEDRAM() && (rb_depthcontrol & (0x1 | 0x2))) { + if (IsROVUsedForEDRAM() && depth_stencil_enabled) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthStencil; - if (DepthRenderTargetFormat((rb_depth_info >> 16) & 0x1) == - DepthRenderTargetFormat::kD24FS8) { + if (rb_depth_info.depth_format == DepthRenderTargetFormat::kD24FS8) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; } - if (rb_depthcontrol & 0x2) { - flags |= ((rb_depthcontrol >> 4) & 0x7) + if (rb_depthcontrol.z_enable) { + flags |= uint32_t(rb_depthcontrol.zfunc.value()) << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; - if (rb_depthcontrol & 0x4) { + if (rb_depthcontrol.z_write_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; } } else { @@ -2182,7 +2161,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::kSysFlag_ROVDepthPassIfEqual | DxbcShaderTranslator::kSysFlag_ROVDepthPassIfGreater; } - if (rb_depthcontrol & 0x1) { + if (rb_depthcontrol.stencil_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVStencilTest; } if (early_z) { @@ -2223,9 +2202,9 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( index_endian_and_edge_factors; // User clip planes (UCP_ENA_#), when not CLIP_DISABLE. - if (!(pa_cl_clip_cntl & (1 << 16))) { + if (!pa_cl_clip_cntl.clip_disable) { for (uint32_t i = 0; i < 6; ++i) { - if (!(pa_cl_clip_cntl & (1 << i))) { + if (!(pa_cl_clip_cntl.value & (1 << i))) { continue; } const float* ucp = ®s[XE_GPU_REG_PA_CL_UCP_0_X + i * 4].f32; @@ -2249,45 +2228,49 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // different register (and if there's such register at all). float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; + // When VPORT_Z_SCALE_ENA is disabled, Z/W is directly what is expected to be + // written to the depth buffer, and for some reason DX_CLIP_SPACE_DEF isn't + // set in this case in draws in games. bool gl_clip_space_def = - !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); + !pa_cl_clip_cntl.dx_clip_space_def && pa_cl_vte_cntl.vport_z_scale_ena; float ndc_scale_x, ndc_scale_y, ndc_scale_z; - if (primitive_two_faced && (pa_su_sc_mode_cntl & 0x3) == 0x3) { + if (primitive_two_faced && pa_su_sc_mode_cntl.cull_front && + pa_su_sc_mode_cntl.cull_back) { // Kill all primitives if both faces are culled, but the vertex shader still // needs to do memexport (not NaN because of comparison for setting the // dirty flag). ndc_scale_x = ndc_scale_y = ndc_scale_z = 0; } else { - if (pa_cl_vte_cntl & (1 << 0)) { + if (pa_cl_vte_cntl.vport_x_scale_ena) { ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; } else { ndc_scale_x = 1.0f / 1280.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { + if (pa_cl_vte_cntl.vport_y_scale_ena) { ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; } else { ndc_scale_y = -1.0f / 1280.0f; } ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; } - float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; - float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; + float ndc_offset_x = pa_cl_vte_cntl.vport_x_offset_ena ? 0.0f : -1.0f; + float ndc_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? 0.0f : 1.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; // Like in OpenGL - VPOS giving pixel centers. // TODO(Triang3l): Check if ps_param_gen should give center positions in // OpenGL mode on the Xbox 360. float pixel_half_pixel_offset = 0.5f; - if (cvars::d3d12_half_pixel_offset && !(pa_su_vtx_cntl & (1 << 0))) { + if (cvars::d3d12_half_pixel_offset && !pa_su_vtx_cntl.pix_center) { // Signs are hopefully correct here, tested in GTA IV on both clearing // (without a viewport) and drawing things near the edges of the screen. - if (pa_cl_vte_cntl & (1 << 0)) { + if (pa_cl_vte_cntl.vport_x_scale_ena) { if (viewport_scale_x != 0.0f) { ndc_offset_x += 0.5f / viewport_scale_x; } } else { ndc_offset_x += 1.0f / 2560.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { + if (pa_cl_vte_cntl.vport_y_scale_ena) { if (viewport_scale_y != 0.0f) { ndc_offset_y += 0.5f / viewport_scale_y; } @@ -2313,10 +2296,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; // Point size. - float point_size_x = float(pa_su_point_size >> 16) * 0.125f; - float point_size_y = float(pa_su_point_size & 0xFFFF) * 0.125f; - float point_size_min = float(pa_su_point_minmax & 0xFFFF) * 0.125f; - float point_size_max = float(pa_su_point_minmax >> 16) * 0.125f; + float point_size_x = float(pa_su_point_size.width) * 0.125f; + float point_size_y = float(pa_su_point_size.height) * 0.125f; + float point_size_min = float(pa_su_point_minmax.min_size) * 0.125f; + float point_size_max = float(pa_su_point_minmax.max_size) * 0.125f; dirty |= system_constants_.point_size[0] != point_size_x; dirty |= system_constants_.point_size[1] != point_size_y; dirty |= system_constants_.point_size_min_max[0] != point_size_min; @@ -2326,13 +2309,13 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.point_size_min_max[0] = point_size_min; system_constants_.point_size_min_max[1] = point_size_max; float point_screen_to_ndc_x, point_screen_to_ndc_y; - if (pa_cl_vte_cntl & (1 << 0)) { + if (pa_cl_vte_cntl.vport_x_scale_ena) { point_screen_to_ndc_x = (viewport_scale_x != 0.0f) ? (0.5f / viewport_scale_x) : 0.0f; } else { point_screen_to_ndc_x = 1.0f / 2560.0f; } - if (pa_cl_vte_cntl & (1 << 2)) { + if (pa_cl_vte_cntl.vport_y_scale_ena) { point_screen_to_ndc_y = (viewport_scale_y != 0.0f) ? (-0.5f / viewport_scale_y) : 0.0f; } else { @@ -2345,15 +2328,16 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Pixel position register. uint32_t pixel_pos_reg = - (sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX; + sq_program_cntl.param_gen ? sq_context_misc.param_gen_pos : UINT_MAX; dirty |= system_constants_.pixel_pos_reg != pixel_pos_reg; system_constants_.pixel_pos_reg = pixel_pos_reg; // Log2 of sample count, for scaling VPOS with SSAA (without ROV) and for // EDRAM address calculation with MSAA (with ROV). - MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); - uint32_t sample_count_log2_x = msaa_samples >= MsaaSamples::k4X ? 1 : 0; - uint32_t sample_count_log2_y = msaa_samples >= MsaaSamples::k2X ? 1 : 0; + uint32_t sample_count_log2_x = + rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 1 : 0; + uint32_t sample_count_log2_y = + rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 1 : 0; dirty |= system_constants_.sample_count_log2[0] != sample_count_log2_x; dirty |= system_constants_.sample_count_log2[1] != sample_count_log2_y; system_constants_.sample_count_log2[0] = sample_count_log2_x; @@ -2365,43 +2349,22 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // EDRAM pitch for ROV writing. if (IsROVUsedForEDRAM()) { - uint32_t edram_pitch_tiles = ((std::min(rb_surface_info & 0x3FFFu, 2560u) * - (msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + - 79) / - 80; + uint32_t edram_pitch_tiles = + ((std::min(rb_surface_info.surface_pitch.value(), 2560u) * + (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + + 79) / + 80; dirty |= system_constants_.edram_pitch_tiles != edram_pitch_tiles; system_constants_.edram_pitch_tiles = edram_pitch_tiles; } // Color exponent bias and output index mapping or ROV render target writing. - bool colorcontrol_blend_enable = (rb_colorcontrol & 0x20) == 0; for (uint32_t i = 0; i < 4; ++i) { - uint32_t color_info = color_infos[i]; - uint32_t blend_factors_ops; - if (colorcontrol_blend_enable) { - switch (i) { - case 1: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32; - break; - case 2: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32; - break; - case 3: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32; - break; - default: - blend_factors_ops = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32; - break; - } - blend_factors_ops &= 0x1FFF1FFF; - } else { - blend_factors_ops = 0x00010001; - } + reg::RB_COLOR_INFO color_info = color_infos[i]; // Exponent bias is in bits 20:25 of RB_COLOR_INFO. - int32_t color_exp_bias = int32_t(color_info << 6) >> 26; - ColorRenderTargetFormat color_format = color_formats[i]; - if (color_format == ColorRenderTargetFormat::k_16_16 || - color_format == ColorRenderTargetFormat::k_16_16_16_16) { + int32_t color_exp_bias = color_info.color_exp_bias; + if (color_info.color_format == ColorRenderTargetFormat::k_16_16 || + color_info.color_format == ColorRenderTargetFormat::k_16_16_16_16) { // On the Xbox 360, k_16_16_EDRAM and k_16_16_16_16_EDRAM internally have // -32...32 range and expect shaders to give -32...32 values, but they're // emulated using normalized RG16/RGBA16 when not using the ROV, so the @@ -2427,7 +2390,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.edram_rt_keep_mask[i][1] = rt_keep_masks[i][1]; if (rt_keep_masks[i][0] != UINT32_MAX || rt_keep_masks[i][1] != UINT32_MAX) { - uint32_t rt_base_dwords_scaled = (color_info & 0xFFF) * 1280; + uint32_t rt_base_dwords_scaled = color_info.color_base * 1280; if (texture_cache_->IsResolutionScale2X()) { rt_base_dwords_scaled <<= 2; } @@ -2435,8 +2398,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( rt_base_dwords_scaled; system_constants_.edram_rt_base_dwords_scaled[i] = rt_base_dwords_scaled; - uint32_t format_flags = - DxbcShaderTranslator::ROV_AddColorFormatFlags(color_format); + uint32_t format_flags = DxbcShaderTranslator::ROV_AddColorFormatFlags( + color_info.color_format); dirty |= system_constants_.edram_rt_format_flags[i] != format_flags; system_constants_.edram_rt_format_flags[i] = format_flags; // Can't do float comparisons here because NaNs would result in always @@ -2445,6 +2408,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( 4 * sizeof(float)) != 0; std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)); + static const uint32_t kBlendControlRegs[] = { + XE_GPU_REG_RB_BLENDCONTROL_0, + XE_GPU_REG_RB_BLENDCONTROL_1, + XE_GPU_REG_RB_BLENDCONTROL_2, + XE_GPU_REG_RB_BLENDCONTROL_3, + }; + uint32_t blend_factors_ops = + regs[kBlendControlRegs[i]].u32 & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -2465,7 +2436,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( resolution_square_scale; system_constants_.edram_resolution_square_scale = resolution_square_scale; - uint32_t depth_base_dwords = (rb_depth_info & 0xFFF) * 1280; + uint32_t depth_base_dwords = rb_depth_info.depth_base * 1280; dirty |= system_constants_.edram_depth_base_dwords != depth_base_dwords; system_constants_.edram_depth_base_dwords = depth_base_dwords; @@ -2474,7 +2445,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float depth_range_scale = std::abs(viewport_scale_z); dirty |= system_constants_.edram_depth_range_scale != depth_range_scale; system_constants_.edram_depth_range_scale = depth_range_scale; - float depth_range_offset = (pa_cl_vte_cntl & (1 << 5)) + float depth_range_offset = pa_cl_vte_cntl.vport_z_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0.0f; if (viewport_scale_z < 0.0f) { @@ -2490,20 +2461,20 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float poly_offset_front_scale = 0.0f, poly_offset_front_offset = 0.0f; float poly_offset_back_scale = 0.0f, poly_offset_back_offset = 0.0f; if (primitive_two_faced) { - if (pa_su_sc_mode_cntl & (1 << 11)) { + if (pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_front_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; } - if (pa_su_sc_mode_cntl & (1 << 12)) { + if (pa_su_sc_mode_cntl.poly_offset_back_enable) { poly_offset_back_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; poly_offset_back_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; } } else { - if (pa_su_sc_mode_cntl & (1 << 13)) { + if (pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset_front_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; poly_offset_front_offset = @@ -2533,39 +2504,43 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( poly_offset_back_offset; system_constants_.edram_poly_offset_back_offset = poly_offset_back_offset; - if (rb_depthcontrol & 0x1) { - uint32_t stencil_value; - - stencil_value = rb_stencilrefmask & 0xFF; - dirty |= system_constants_.edram_stencil_front_reference != stencil_value; - system_constants_.edram_stencil_front_reference = stencil_value; - stencil_value = (rb_stencilrefmask >> 8) & 0xFF; - dirty |= system_constants_.edram_stencil_front_read_mask != stencil_value; - system_constants_.edram_stencil_front_read_mask = stencil_value; - stencil_value = (rb_stencilrefmask >> 16) & 0xFF; + if (depth_stencil_enabled && rb_depthcontrol.stencil_enable) { + dirty |= system_constants_.edram_stencil_front_reference != + rb_stencilrefmask.stencilref; + system_constants_.edram_stencil_front_reference = + rb_stencilrefmask.stencilref; + dirty |= system_constants_.edram_stencil_front_read_mask != + rb_stencilrefmask.stencilmask; + system_constants_.edram_stencil_front_read_mask = + rb_stencilrefmask.stencilmask; + dirty |= system_constants_.edram_stencil_front_write_mask != + rb_stencilrefmask.stencilwritemask; + system_constants_.edram_stencil_front_write_mask = + rb_stencilrefmask.stencilwritemask; + uint32_t stencil_func_ops = + (rb_depthcontrol.value >> 8) & ((1 << 12) - 1); dirty |= - system_constants_.edram_stencil_front_write_mask != stencil_value; - system_constants_.edram_stencil_front_write_mask = stencil_value; - stencil_value = (rb_depthcontrol >> 8) & ((1 << 12) - 1); - dirty |= system_constants_.edram_stencil_front_func_ops != stencil_value; - system_constants_.edram_stencil_front_func_ops = stencil_value; + system_constants_.edram_stencil_front_func_ops != stencil_func_ops; + system_constants_.edram_stencil_front_func_ops = stencil_func_ops; - if (primitive_two_faced && (rb_depthcontrol & 0x80)) { - stencil_value = rb_stencilrefmask_bf & 0xFF; - dirty |= - system_constants_.edram_stencil_back_reference != stencil_value; - system_constants_.edram_stencil_back_reference = stencil_value; - stencil_value = (rb_stencilrefmask_bf >> 8) & 0xFF; - dirty |= - system_constants_.edram_stencil_back_read_mask != stencil_value; - system_constants_.edram_stencil_back_read_mask = stencil_value; - stencil_value = (rb_stencilrefmask_bf >> 16) & 0xFF; - dirty |= - system_constants_.edram_stencil_back_write_mask != stencil_value; - system_constants_.edram_stencil_back_write_mask = stencil_value; - stencil_value = (rb_depthcontrol >> 20) & ((1 << 12) - 1); - dirty |= system_constants_.edram_stencil_back_func_ops != stencil_value; - system_constants_.edram_stencil_back_func_ops = stencil_value; + if (primitive_two_faced && rb_depthcontrol.backface_enable) { + dirty |= system_constants_.edram_stencil_back_reference != + rb_stencilrefmask_bf.stencilref; + system_constants_.edram_stencil_back_reference = + rb_stencilrefmask_bf.stencilref; + dirty |= system_constants_.edram_stencil_back_read_mask != + rb_stencilrefmask_bf.stencilmask; + system_constants_.edram_stencil_back_read_mask = + rb_stencilrefmask_bf.stencilmask; + dirty |= system_constants_.edram_stencil_back_write_mask != + rb_stencilrefmask_bf.stencilwritemask; + system_constants_.edram_stencil_back_write_mask = + rb_stencilrefmask_bf.stencilwritemask; + uint32_t stencil_func_ops_bf = + (rb_depthcontrol.value >> 8) & ((1 << 12) - 1); + dirty |= system_constants_.edram_stencil_back_func_ops != + stencil_func_ops_bf; + system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf; } else { dirty |= std::memcmp(system_constants_.edram_stencil_back, system_constants_.edram_stencil_front, diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 89286be18..67d20b3da 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -207,8 +207,17 @@ bool PipelineCache::EnsureShadersTranslated(D3D12Shader* vertex_shader, assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 || regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); - xenos::xe_gpu_program_cntl_t sq_program_cntl; - sq_program_cntl.dword_0 = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; + auto sq_program_cntl = regs.Get(); + + // Normal vertex shaders only, for now. + assert_true(sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition1Vector || + sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition2VectorsSprite || + sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kMultipass); + assert_false(sq_program_cntl.gen_index_vtx); + if (!vertex_shader->is_translated() && !TranslateShader(vertex_shader, sq_program_cntl, tessellated, primitive_type)) { @@ -294,8 +303,7 @@ bool PipelineCache::ConfigurePipeline( } bool PipelineCache::TranslateShader(D3D12Shader* shader, - xenos::xe_gpu_program_cntl_t cntl, - bool tessellated, + reg::SQ_PROGRAM_CNTL cntl, bool tessellated, PrimitiveType primitive_type) { // Perform translation. // If this fails the shader will be marked as invalid and ignored later. @@ -385,12 +393,12 @@ bool PipelineCache::GetCurrentStateDescription( // Primitive topology type, tessellation mode and geometry shader. if (tessellated) { - switch (TessellationMode(regs[XE_GPU_REG_VGT_HOS_CNTL].u32 & 0x3)) { - case TessellationMode::kContinuous: + switch (regs.Get().tess_mode) { + case xenos::TessellationMode::kContinuous: description_out.tessellation_mode = PipelineTessellationMode::kContinuous; break; - case TessellationMode::kAdaptive: + case xenos::TessellationMode::kAdaptive: description_out.tessellation_mode = cvars::d3d12_tessellation_adaptive ? PipelineTessellationMode::kAdaptive @@ -559,20 +567,10 @@ bool PipelineCache::GetCurrentStateDescription( // CLIP_DISABLE description_out.depth_clip = (regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0; - // TODO(DrChat): This seem to differ. Need to examine this. - // https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11 - // ZCLIP_NEAR_DISABLE - // description_out.depth_clip = (PA_CL_CLIP_CNTL & (1 << 26)) == 0; - // RASTERIZER_DISABLE - // Disable rendering in command processor if PA_CL_CLIP_CNTL & (1 << 22)? if (edram_rov_used_) { description_out.rov_msaa = ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0; - } - - if (!edram_rov_used_) { - uint32_t rb_colorcontrol = regs[XE_GPU_REG_RB_COLORCONTROL].u32; - + } else { // Depth/stencil. No stencil, always passing depth test and no depth writing // means depth disabled. if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { @@ -711,7 +709,7 @@ bool PipelineCache::GetCurrentStateDescription( rt.format = RenderTargetCache::GetBaseColorFormat( ColorRenderTargetFormat((color_info >> 16) & 0xF)); rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF; - if (!(rb_colorcontrol & 0x20) && rt.write_mask) { + if (rt.write_mask) { rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F]; rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F]; rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7); diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 179b851d0..ba5a1a4b6 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -171,7 +171,7 @@ class PipelineCache { PipelineRenderTarget render_targets[4]; }; - bool TranslateShader(D3D12Shader* shader, xenos::xe_gpu_program_cntl_t cntl, + bool TranslateShader(D3D12Shader* shader, reg::SQ_PROGRAM_CNTL cntl, bool tessellated, PrimitiveType primitive_type); bool GetCurrentStateDescription( diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index e199832dd..11d2c6e69 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -34,7 +34,6 @@ namespace d3d12 { constexpr uint32_t SharedMemory::kBufferSizeLog2; constexpr uint32_t SharedMemory::kBufferSize; -constexpr uint32_t SharedMemory::kAddressMask; constexpr uint32_t SharedMemory::kHeapSizeLog2; constexpr uint32_t SharedMemory::kHeapSize; constexpr uint32_t SharedMemory::kWatchBucketSizeLog2; @@ -198,10 +197,9 @@ void SharedMemory::UnregisterGlobalWatch(GlobalWatchHandle handle) { SharedMemory::WatchHandle SharedMemory::WatchMemoryRange( uint32_t start, uint32_t length, WatchCallback callback, void* callback_context, void* callback_data, uint64_t callback_argument) { - if (length == 0) { + if (length == 0 || start >= kBufferSize) { return nullptr; } - start &= kAddressMask; length = std::min(length, kBufferSize - start); uint32_t watch_page_first = start >> page_size_log2_; uint32_t watch_page_last = (start + length - 1) >> page_size_log2_; @@ -278,9 +276,7 @@ bool SharedMemory::MakeTilesResident(uint32_t start, uint32_t length) { // Some texture is empty, for example - safe to draw in this case. return true; } - start &= kAddressMask; - if ((kBufferSize - start) < length) { - // Exceeds the physical address space. + if (start > kBufferSize || (kBufferSize - start) < length) { return false; } @@ -343,9 +339,7 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { // Some texture is empty, for example - safe to draw in this case. return true; } - start &= kAddressMask; - if ((kBufferSize - start) < length) { - // Exceeds the physical address space. + if (start > kBufferSize || (kBufferSize - start) < length) { return false; } uint32_t last = start + length - 1; @@ -433,8 +427,7 @@ void SharedMemory::FireWatches(uint32_t page_first, uint32_t page_last, } void SharedMemory::RangeWrittenByGPU(uint32_t start, uint32_t length) { - start &= kAddressMask; - if (length == 0) { + if (length == 0 || start >= kBufferSize) { return; } length = std::min(length, kBufferSize - start); diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index dc1869fbd..1b2d03834 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -138,7 +138,6 @@ class SharedMemory { // The 512 MB tiled buffer. static constexpr uint32_t kBufferSizeLog2 = 29; static constexpr uint32_t kBufferSize = 1 << kBufferSizeLog2; - static constexpr uint32_t kAddressMask = kBufferSize - 1; ID3D12Resource* buffer_ = nullptr; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address_ = 0; D3D12_RESOURCE_STATES buffer_state_ = D3D12_RESOURCE_STATE_COPY_DEST; diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 5e785bf6e..f1de01c3d 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -416,9 +416,13 @@ void DxbcShaderTranslator::ConvertPWLGamma( } void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { + if (register_count() < 1) { + return; + } + // Vertex index is in an input bound to SV_VertexID, byte swapped according to - // xe_vertex_index_endian_and_edge_factors system constant and written to GPR - // 0 (which is always present because register_count includes +1). + // xe_vertex_index_endian_and_edge_factors system constant and written to + // GPR 0. // xe_vertex_index_endian_and_edge_factors & 0b11 is: // - 00 for no swap. @@ -756,157 +760,161 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Write the vertex index to GPR 0. StartVertexShader_LoadVertexIndex(); } else if (IsDxbcDomainShader()) { - uint32_t temp_register_operand_length = - uses_register_dynamic_addressing() ? 3 : 2; - - // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for - // triangle patches), and also set the domain in STAT. - uint32_t domain_location_mask, domain_location_swizzle; - if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { - domain_location_mask = 0b0111; - // ZYX swizzle with r1.y == 0, according to the water shader in - // Banjo-Kazooie: Nuts & Bolts. - domain_location_swizzle = 0b00000110; - stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; - } else { - // TODO(Triang3l): Support line patches. - assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); - // According to the ground shader in Viva Pinata, though it's impossible - // (as of December 12th, 2018) to test there since it possibly requires - // memexport for ground control points (the memory region with them is - // filled with zeros). - domain_location_mask = 0b0110; - domain_location_swizzle = 0b00000100; - stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD; - } - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( - 2 + temp_register_operand_length)); - if (uses_register_dynamic_addressing()) { - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2)); - shader_code_.push_back(0); - } else { - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1)); - } - shader_code_.push_back(0); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, domain_location_swizzle, 0)); - ++stat_.instruction_count; - if (uses_register_dynamic_addressing()) { - ++stat_.array_instruction_count; - } else { - ++stat_.mov_instruction_count; - } - assert_true(register_count() >= 2); + if (register_count() != 0) { + uint32_t temp_register_operand_length = + uses_register_dynamic_addressing() ? 3 : 2; - // Copy the primitive index to r0.x (for quad patches) or r1.x (for - // triangle patches) as a float. - // When using indexable temps, copy through a r# because x# are apparently - // only accessible via mov. - // TODO(Triang3l): Investigate what should be written for primitives (or - // even control points) for non-adaptive tessellation modes (they may - // possibly have an index buffer). - // TODO(Triang3l): Support line patches. - uint32_t primitive_id_gpr_index = - patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0; - - if (register_count() > primitive_id_gpr_index) { - uint32_t primitive_id_temp = uses_register_dynamic_addressing() - ? PushSystemTemp() - : primitive_id_gpr_index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(primitive_id_temp); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0)); - ++stat_.instruction_count; - ++stat_.conversion_instruction_count; - if (uses_register_dynamic_addressing()) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2)); - shader_code_.push_back(0); - shader_code_.push_back(primitive_id_gpr_index); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(primitive_id_temp); - ++stat_.instruction_count; - ++stat_.array_instruction_count; - // Release primitive_id_temp. - PopSystemTemp(); + // Copy the domain location to r0.yz (for quad patches) or r0.xyz (for + // triangle patches), and also set the domain in STAT. + uint32_t domain_location_mask, domain_location_swizzle; + if (patch_primitive_type() == PrimitiveType::kTrianglePatch) { + domain_location_mask = 0b0111; + // ZYX swizzle with r1.y == 0, according to the water shader in + // Banjo-Kazooie: Nuts & Bolts. + domain_location_swizzle = 0b00000110; + stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_TRI; + } else { + // TODO(Triang3l): Support line patches. + assert_true(patch_primitive_type() == PrimitiveType::kQuadPatch); + // According to the ground shader in Viva Pinata, though it's impossible + // (as of December 12th, 2018) to test there since it possibly requires + // memexport for ground control points (the memory region with them is + // filled with zeros). + domain_location_mask = 0b0110; + domain_location_swizzle = 0b00000100; + stat_.tessellator_domain = D3D11_SB_TESSELLATOR_DOMAIN_QUAD; } - } - - if (register_count() >= 2) { - // Write the swizzle of the barycentric/UV coordinates to r1.x (for quad - // patches) or r1.y (for triangle patches). It appears that the - // tessellator offloads the reordering of coordinates for edges to game - // shaders. - // - // In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge - // factors), the shader multiplies the first control point's position by - // r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before - // doing that it swizzles r0.xyz the following way depending on the value - // in r1.y: - // - ZXY for 1.0. - // - YZX for 2.0. - // - XZY for 4.0. - // - YXZ for 5.0. - // - ZYX for 6.0. - // Possibly, the logic here is that the value itself is the amount of - // rotation of the swizzle to the right, and 1 << 2 is set when the - // swizzle needs to be flipped before rotating. - // - // In Viva Pinata (quad patches with per-edge factors - not possible to - // test however as of December 12th, 2018), if we assume that r0.y is V - // and r0.z is U, the factors each control point value is multiplied by - // are the following: - // - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle). - // - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ). - // - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX). - // - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY). - // According to the control point order at - // https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt - // the first is located at (0,0), the second at (0,1), the third at (1,0) - // and the fourth at (1,1). So, swizzle index 0 appears to be the correct - // one. But, this hasn't been tested yet. - // - // Direct3D 12 appears to be passing the coordinates in a consistent - // order, so we can just use ZYX for triangle patches. - // - // TODO(Triang3l): Support line patches. - uint32_t domain_location_swizzle_mask = - patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010 - : 0b0001; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( - 3 + temp_register_operand_length)); + 2 + temp_register_operand_length)); if (uses_register_dynamic_addressing()) { - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, - domain_location_swizzle_mask, 2)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, domain_location_mask, 2)); shader_code_.push_back(0); } else { shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1)); + D3D10_SB_OPERAND_TYPE_TEMP, domain_location_mask, 1)); } - shader_code_.push_back(1); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back(0); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, + domain_location_swizzle, 0)); ++stat_.instruction_count; if (uses_register_dynamic_addressing()) { ++stat_.array_instruction_count; } else { ++stat_.mov_instruction_count; } + + // Copy the primitive index to r0.x (for quad patches) or r1.x (for + // triangle patches) as a float. + // When using indexable temps, copy through a r# because x# are apparently + // only accessible via mov. + // TODO(Triang3l): Investigate what should be written for primitives (or + // even control points) for non-adaptive tessellation modes (they may + // possibly have an index buffer). + // TODO(Triang3l): Support line patches. + uint32_t primitive_id_gpr_index = + patch_primitive_type() == PrimitiveType::kTrianglePatch ? 1 : 0; + + if (register_count() > primitive_id_gpr_index) { + uint32_t primitive_id_temp = uses_register_dynamic_addressing() + ? PushSystemTemp() + : primitive_id_gpr_index; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(primitive_id_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, 0)); + ++stat_.instruction_count; + ++stat_.conversion_instruction_count; + if (uses_register_dynamic_addressing()) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, 0b0001, 2)); + shader_code_.push_back(0); + shader_code_.push_back(primitive_id_gpr_index); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(primitive_id_temp); + ++stat_.instruction_count; + ++stat_.array_instruction_count; + // Release primitive_id_temp. + PopSystemTemp(); + } + } + + if (register_count() >= 2) { + // Write the swizzle of the barycentric/UV coordinates to r1.x (for quad + // patches) or r1.y (for triangle patches). It appears that the + // tessellator offloads the reordering of coordinates for edges to game + // shaders. + // + // In Banjo-Kazooie: Nuts & Bolts (triangle patches with per-edge + // factors), the shader multiplies the first control point's position by + // r0.z, the second CP's by r0.y, and the third CP's by r0.x. But before + // doing that it swizzles r0.xyz the following way depending on the + // value in r1.y: + // - ZXY for 1.0. + // - YZX for 2.0. + // - XZY for 4.0. + // - YXZ for 5.0. + // - ZYX for 6.0. + // Possibly, the logic here is that the value itself is the amount of + // rotation of the swizzle to the right, and 1 << 2 is set when the + // swizzle needs to be flipped before rotating. + // + // In Viva Pinata (quad patches with per-edge factors - not possible to + // test however as of December 12th, 2018), if we assume that r0.y is V + // and r0.z is U, the factors each control point value is multiplied by + // are the following: + // - (1-v)*(1-u), v*(1-u), (1-v)*u, v*u for 0.0 (base swizzle). + // - v*(1-u), (1-v)*(1-u), v*u, (1-v)*u for 1.0 (YXWZ). + // - v*u, (1-v)*u, v*(1-u), (1-v)*(1-u) for 2.0 (WZYX). + // - (1-v)*u, v*u, (1-v)*(1-u), v*(1-u) for 3.0 (ZWXY). + // According to the control point order at + // https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_vertex_shader_tessellator.txt + // the first is located at (0,0), the second at (0,1), the third at + // (1,0) and the fourth at (1,1). So, swizzle index 0 appears to be the + // correct one. But, this hasn't been tested yet. + // + // Direct3D 12 appears to be passing the coordinates in a consistent + // order, so we can just use ZYX for triangle patches. + // + // TODO(Triang3l): Support line patches. + uint32_t domain_location_swizzle_mask = + patch_primitive_type() == PrimitiveType::kTrianglePatch ? 0b0010 + : 0b0001; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( + 3 + temp_register_operand_length)); + if (uses_register_dynamic_addressing()) { + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, + domain_location_swizzle_mask, 2)); + shader_code_.push_back(0); + } else { + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, domain_location_swizzle_mask, 1)); + } + shader_code_.push_back(1); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0); + ++stat_.instruction_count; + if (uses_register_dynamic_addressing()) { + ++stat_.array_instruction_count; + } else { + ++stat_.mov_instruction_count; + } + } } } } @@ -4796,6 +4804,7 @@ void DxbcShaderTranslator::WriteShaderCode() { // General-purpose registers if using dynamic indexing (x0). if (!is_depth_only_pixel_shader_ && uses_register_dynamic_addressing()) { + assert_true(register_count() != 0); shader_object_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4)); diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 73451422f..28db647bc 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -503,6 +503,9 @@ class DxbcShaderTranslator : public ShaderTranslator { kVSOutPosition, kVSOutClipDistance0123, kVSOutClipDistance45, + // TODO(Triang3l): Use SV_CullDistance instead for + // PA_CL_CLIP_CNTL::UCP_CULL_ONLY_ENA, but can't have more than 8 clip and + // cull distances in total. kPSInInterpolators = 0, kPSInPointParameters = kPSInInterpolators + kInterpolatorCount, diff --git a/src/xenia/gpu/register_file.h b/src/xenia/gpu/register_file.h index 524cea772..e54e2db72 100644 --- a/src/xenia/gpu/register_file.h +++ b/src/xenia/gpu/register_file.h @@ -13,15 +13,11 @@ #include #include +#include "xenia/gpu/registers.h" + namespace xe { namespace gpu { -enum Register { -#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index, -#include "xenia/gpu/register_table.inc" -#undef XE_GPU_REGISTER -}; - struct RegisterInfo { enum class Type { kDword, @@ -44,8 +40,20 @@ class RegisterFile { }; RegisterValue values[kRegisterCount]; - RegisterValue& operator[](int reg) { return values[reg]; } + RegisterValue& operator[](uint32_t reg) { return values[reg]; } RegisterValue& operator[](Register reg) { return values[reg]; } + template + T& Get(uint32_t reg) { + return *reinterpret_cast(&values[reg]); + } + template + T& Get(Register reg) { + return *reinterpret_cast(&values[reg]); + } + template + T& Get() { + return *reinterpret_cast(&values[T::register_index]); + } }; } // namespace gpu diff --git a/src/xenia/gpu/registers.cc b/src/xenia/gpu/registers.cc new file mode 100644 index 000000000..4215e3352 --- /dev/null +++ b/src/xenia/gpu/registers.cc @@ -0,0 +1,51 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2019 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/registers.h" + +namespace xe { +namespace gpu { +namespace reg { + +constexpr uint32_t COHER_STATUS_HOST::register_index; +constexpr uint32_t WAIT_UNTIL::register_index; + +constexpr uint32_t SQ_PROGRAM_CNTL::register_index; +constexpr uint32_t SQ_CONTEXT_MISC::register_index; + +constexpr uint32_t VGT_OUTPUT_PATH_CNTL::register_index; +constexpr uint32_t VGT_HOS_CNTL::register_index; + +constexpr uint32_t PA_SU_POINT_MINMAX::register_index; +constexpr uint32_t PA_SU_POINT_SIZE::register_index; +constexpr uint32_t PA_SU_SC_MODE_CNTL::register_index; +constexpr uint32_t PA_SU_VTX_CNTL::register_index; +constexpr uint32_t PA_SC_MPASS_PS_CNTL::register_index; +constexpr uint32_t PA_SC_VIZ_QUERY::register_index; +constexpr uint32_t PA_CL_CLIP_CNTL::register_index; +constexpr uint32_t PA_CL_VTE_CNTL::register_index; +constexpr uint32_t PA_SC_WINDOW_OFFSET::register_index; +constexpr uint32_t PA_SC_WINDOW_SCISSOR_TL::register_index; +constexpr uint32_t PA_SC_WINDOW_SCISSOR_BR::register_index; + +constexpr uint32_t RB_MODECONTROL::register_index; +constexpr uint32_t RB_SURFACE_INFO::register_index; +constexpr uint32_t RB_COLORCONTROL::register_index; +constexpr uint32_t RB_COLOR_INFO::register_index; +constexpr uint32_t RB_COLOR_MASK::register_index; +constexpr uint32_t RB_DEPTHCONTROL::register_index; +constexpr uint32_t RB_STENCILREFMASK::register_index; +constexpr uint32_t RB_DEPTH_INFO::register_index; +constexpr uint32_t RB_COPY_CONTROL::register_index; +constexpr uint32_t RB_COPY_DEST_INFO::register_index; +constexpr uint32_t RB_COPY_DEST_PITCH::register_index; + +} // namespace reg +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index 215363b33..a0fc9e279 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -20,15 +20,22 @@ // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h namespace xe { namespace gpu { + +enum Register { +#define XE_GPU_REGISTER(index, type, name) XE_GPU_REG_##name = index, +#include "xenia/gpu/register_table.inc" +#undef XE_GPU_REGISTER +}; + namespace reg { -/************************************************** +/******************************************************************************* ___ ___ _ _ _____ ___ ___ _ / __/ _ \| \| |_ _| _ \/ _ \| | | (_| (_) | .` | | | | / (_) | |__ \___\___/|_|\_| |_| |_|_\\___/|____| -***************************************************/ +*******************************************************************************/ union COHER_STATUS_HOST { xe::bf matching_contexts; @@ -49,6 +56,7 @@ union COHER_STATUS_HOST { xe::bf status; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_COHER_STATUS_HOST; }; union WAIT_UNTIL { @@ -69,9 +77,82 @@ union WAIT_UNTIL { xe::bf cmdfifo_entries; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_WAIT_UNTIL; }; -/************************************************** +/******************************************************************************* + ___ ___ ___ _ _ ___ _ _ ___ ___ ___ + / __| __/ _ \| | | | __| \| |/ __| __| _ \ + \__ \ _| (_) | |_| | _|| .` | (__| _|| / + |___/___\__\_\\___/|___|_|\_|\___|___|_|_\ + +*******************************************************************************/ + +union SQ_PROGRAM_CNTL { + // Note from a2xx.xml: + // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, but + // high bit is set to indicate "0 registers used". + xe::bf vs_num_reg; + xe::bf ps_num_reg; + xe::bf vs_resource; + xe::bf ps_resource; + xe::bf param_gen; + xe::bf gen_index_pix; + xe::bf vs_export_count; + xe::bf vs_export_mode; + xe::bf ps_export_mode; + xe::bf gen_index_vtx; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; +}; + +union SQ_CONTEXT_MISC { + xe::bf inst_pred_optimize; + xe::bf sc_output_screen_xy; + xe::bf sc_sample_cntl; + xe::bf param_gen_pos; + xe::bf perfcounter_ref; + xe::bf yeild_optimize; // sic + xe::bf tx_cache_sel; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_SQ_CONTEXT_MISC; +}; + +/******************************************************************************* + __ _____ ___ _____ _____ __ + \ \ / / __| _ \_ _| __\ \/ / + \ V /| _|| / | | | _| > < + \_/ |___|_|_\ |_| |___/_/\_\ + + ___ ___ ___ _ _ ___ ___ ___ _ _ _ ___ + / __| _ \/ _ \| | | | _ \ __| _ \ /_\ | \| | \ + | (_ | / (_) | |_| | _/ _|| / / _ \| .` | |) | + \___|_|_\\___/ \___/|_| |___|_|_\ /_/ \_\_|\_|___/ + + _____ ___ ___ ___ ___ _ _ _ _____ ___ ___ + |_ _| __/ __/ __| __| | | | /_\_ _/ _ \| _ \ + | | | _|\__ \__ \ _|| |__| |__ / _ \| || (_) | / + |_| |___|___/___/___|____|____/_/ \_\_| \___/|_|_\ + +*******************************************************************************/ + +union VGT_OUTPUT_PATH_CNTL { + xe::bf path_select; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; +}; + +union VGT_HOS_CNTL { + xe::bf tess_mode; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_VGT_HOS_CNTL; +}; + +/******************************************************************************* ___ ___ ___ __ __ ___ _____ _____ _____ | _ \ _ \_ _| \/ |_ _|_ _|_ _\ \ / / __| | _/ /| || |\/| || | | | | | \ V /| _| @@ -82,7 +163,25 @@ union WAIT_UNTIL { / _ \\__ \__ \ _|| |\/| | _ \ |__| _|| / /_/ \_\___/___/___|_| |_|___/____|___|_|_\ -***************************************************/ +*******************************************************************************/ + +union PA_SU_POINT_MINMAX { + // Radius, 12.4 fixed point. + xe::bf min_size; + xe::bf max_size; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; +}; + +union PA_SU_POINT_SIZE { + // 1/2 width or height, 12.4 fixed point. + xe::bf height; + xe::bf width; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_SIZE; +}; // Setup Unit / Scanline Converter mode cntl union PA_SU_SC_MODE_CNTL { @@ -110,6 +209,7 @@ union PA_SU_SC_MODE_CNTL { xe::bf wait_rb_idle_first_tri_new_state; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; }; // Setup Unit Vertex Control @@ -119,6 +219,7 @@ union PA_SU_VTX_CNTL { xe::bf quant_mode; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_VTX_CNTL; }; union PA_SC_MPASS_PS_CNTL { @@ -126,6 +227,7 @@ union PA_SC_MPASS_PS_CNTL { xe::bf mpass_ps_ena; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; }; // Scanline converter viz query @@ -135,11 +237,10 @@ union PA_SC_VIZ_QUERY { xe::bf kill_pix_post_early_z; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; }; // Clipper clip control -// TODO(DrChat): This seem to differ. Need to examine this. -// https://github.com/decaf-emu/decaf-emu/blob/c017a9ff8128852fb9a5da19466778a171cea6e1/src/libdecaf/src/gpu/latte_registers_pa.h#L11 union PA_CL_CLIP_CNTL { xe::bf ucp_ena_0; xe::bf ucp_ena_1; @@ -160,6 +261,7 @@ union PA_CL_CLIP_CNTL { xe::bf w_nan_retain; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; }; // Viewport transform engine control @@ -177,6 +279,7 @@ union PA_CL_VTE_CNTL { xe::bf perfcounter_ref; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; union PA_SC_WINDOW_OFFSET { @@ -184,6 +287,7 @@ union PA_SC_WINDOW_OFFSET { xe::bf window_y_offset; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; }; union PA_SC_WINDOW_SCISSOR_TL { @@ -192,6 +296,7 @@ union PA_SC_WINDOW_SCISSOR_TL { xe::bf window_offset_disable; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; }; union PA_SC_WINDOW_SCISSOR_BR { @@ -199,20 +304,22 @@ union PA_SC_WINDOW_SCISSOR_BR { xe::bf br_y; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; }; -/************************************************** +/******************************************************************************* ___ ___ | _ \ _ ) | / _ \ |_|_\___/ -***************************************************/ +*******************************************************************************/ union RB_MODECONTROL { xe::bf edram_mode; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_MODECONTROL; }; union RB_SURFACE_INFO { @@ -221,27 +328,83 @@ union RB_SURFACE_INFO { xe::bf hiz_pitch; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_SURFACE_INFO; }; union RB_COLORCONTROL { - xe::bf alpha_func; + xe::bf alpha_func; xe::bf alpha_test_enable; xe::bf alpha_to_mask_enable; - + // Everything in between was added on Adreno, not in game PDBs and never set. xe::bf alpha_to_mask_offset0; xe::bf alpha_to_mask_offset1; xe::bf alpha_to_mask_offset2; xe::bf alpha_to_mask_offset3; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COLORCONTROL; }; union RB_COLOR_INFO { xe::bf color_base; xe::bf color_format; - xe::bf color_exp_bias; + xe::bf color_exp_bias; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_INFO; + // RB_COLOR[1-3]_INFO also use this format. +}; + +union RB_COLOR_MASK { + xe::bf write_red0; + xe::bf write_green0; + xe::bf write_blue0; + xe::bf write_alpha0; + xe::bf write_red1; + xe::bf write_green1; + xe::bf write_blue1; + xe::bf write_alpha1; + xe::bf write_red2; + xe::bf write_green2; + xe::bf write_blue2; + xe::bf write_alpha2; + xe::bf write_red3; + xe::bf write_green3; + xe::bf write_blue3; + xe::bf write_alpha3; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_MASK; +}; + +union RB_DEPTHCONTROL { + xe::bf stencil_enable; + xe::bf z_enable; + xe::bf z_write_enable; + // EARLY_Z_ENABLE was added on Adreno. + xe::bf zfunc; + xe::bf backface_enable; + xe::bf stencilfunc; + xe::bf stencilfail; + xe::bf stencilzpass; + xe::bf stencilzfail; + xe::bf stencilfunc_bf; + xe::bf stencilfail_bf; + xe::bf stencilzpass_bf; + xe::bf stencilzfail_bf; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTHCONTROL; +}; + +union RB_STENCILREFMASK { + xe::bf stencilref; + xe::bf stencilmask; + xe::bf stencilwritemask; + + uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_STENCILREFMASK; + // RB_STENCILREFMASK_BF also uses this format. }; union RB_DEPTH_INFO { @@ -249,6 +412,7 @@ union RB_DEPTH_INFO { xe::bf depth_format; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTH_INFO; }; union RB_COPY_CONTROL { @@ -260,6 +424,7 @@ union RB_COPY_CONTROL { xe::bf copy_command; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_CONTROL; }; union RB_COPY_DEST_INFO { @@ -268,10 +433,11 @@ union RB_COPY_DEST_INFO { xe::bf copy_dest_slice; xe::bf copy_dest_format; xe::bf copy_dest_number; - xe::bf copy_dest_exp_bias; + xe::bf copy_dest_exp_bias; xe::bf copy_dest_swap; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_INFO; }; union RB_COPY_DEST_PITCH { @@ -279,9 +445,11 @@ union RB_COPY_DEST_PITCH { xe::bf copy_dest_height; uint32_t value; + static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; }; } // namespace reg + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 8680577ae..940db871b 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -108,10 +108,12 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { } bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, - xenos::xe_gpu_program_cntl_t cntl) { + reg::SQ_PROGRAM_CNTL cntl) { Reset(); - register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1 - : cntl.ps_regs + 1; + uint32_t cntl_num_reg = shader->type() == ShaderType::kVertex + ? cntl.vs_num_reg.value() + : cntl.ps_num_reg.value(); + register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1); return TranslateInternal(shader, patch_type); } diff --git a/src/xenia/gpu/shader_translator.h b/src/xenia/gpu/shader_translator.h index 7a12abc10..537606eb3 100644 --- a/src/xenia/gpu/shader_translator.h +++ b/src/xenia/gpu/shader_translator.h @@ -17,6 +17,7 @@ #include "xenia/base/math.h" #include "xenia/base/string_buffer.h" +#include "xenia/gpu/registers.h" #include "xenia/gpu/shader.h" #include "xenia/gpu/ucode.h" #include "xenia/gpu/xenos.h" @@ -33,7 +34,7 @@ class ShaderTranslator { bool GatherAllBindingInformation(Shader* shader); bool Translate(Shader* shader, PrimitiveType patch_type, - xenos::xe_gpu_program_cntl_t cntl); + reg::SQ_PROGRAM_CNTL cntl); bool Translate(Shader* shader, PrimitiveType patch_type); protected: @@ -232,7 +233,7 @@ class ShaderTranslator { PrimitiveType patch_primitive_type_; const uint32_t* ucode_dwords_; size_t ucode_dword_count_; - xenos::xe_gpu_program_cntl_t program_cntl_; + reg::SQ_PROGRAM_CNTL program_cntl_; uint32_t register_count_; // Accumulated translation errors. diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index 9f9f6da5c..c1d151942 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -93,6 +93,7 @@ void SpirvShaderTranslator::StartTranslation() { b.makeFunctionEntry(spv::NoPrecision, b.makeVoidType(), "translated_main", {}, {}, &function_block); + assert_not_zero(register_count()); registers_type_ = b.makeArrayType(vec4_float_type_, b.makeUintConstant(register_count()), 0); registers_ptr_ = b.createVariable(spv::StorageClass::StorageClassFunction, diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 25d822daf..4258061f1 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -364,7 +364,7 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state, } bool PipelineCache::TranslateShader(VulkanShader* shader, - xenos::xe_gpu_program_cntl_t cntl) { + reg::SQ_PROGRAM_CNTL cntl) { // Perform translation. // If this fails the shader will be marked as invalid and ignored later. if (!shader_translator_->Translate(shader, PrimitiveType::kNone, cntl)) { @@ -808,8 +808,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, } bool push_constants_dirty = full_update || viewport_state_dirty; - push_constants_dirty |= - SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + push_constants_dirty |= SetShadowRegister(®s.sq_program_cntl.value, + XE_GPU_REG_SQ_PROGRAM_CNTL); push_constants_dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); push_constants_dirty |= @@ -827,25 +827,14 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, push_constants_dirty |= SetShadowRegister(®s.pa_su_point_size, XE_GPU_REG_PA_SU_POINT_SIZE); if (push_constants_dirty) { - xenos::xe_gpu_program_cntl_t program_cntl; - program_cntl.dword_0 = regs.sq_program_cntl; - // Normal vertex shaders only, for now. - // TODO(benvanik): transform feedback/memexport. - // https://github.com/freedreno/freedreno/blob/master/includes/a2xx.xml.h - // Draw calls skipped if they have unsupported export modes. - // 0 = positionOnly - // 1 = unused - // 2 = sprite - // 3 = edge - // 4 = kill - // 5 = spriteKill - // 6 = edgeKill - // 7 = multipass - assert_true(program_cntl.vs_export_mode == 0 || - program_cntl.vs_export_mode == 2 || - program_cntl.vs_export_mode == 7); - assert_false(program_cntl.gen_index_vtx); + assert_true(regs.sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition1Vector || + regs.sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kPosition2VectorsSprite || + regs.sq_program_cntl.vs_export_mode == + xenos::VertexShaderExportMode::kMultipass); + assert_false(regs.sq_program_cntl.gen_index_vtx); SpirvPushConstants push_constants = {}; @@ -909,7 +898,8 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, // Whether to populate a register in the pixel shader with frag coord. int ps_param_gen = (regs.sq_context_misc >> 8) & 0xFF; - push_constants.ps_param_gen = program_cntl.param_gen ? ps_param_gen : -1; + push_constants.ps_param_gen = + regs.sq_program_cntl.param_gen ? ps_param_gen : -1; vkCmdPushConstants(command_buffer, pipeline_layout_, VK_SHADER_STAGE_VERTEX_BIT | @@ -1061,7 +1051,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( bool dirty = false; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl.value, + XE_GPU_REG_SQ_PROGRAM_CNTL); dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.pixel_shader != pixel_shader; dirty |= regs.primitive_type != primitive_type; @@ -1073,17 +1064,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( return UpdateStatus::kCompatible; } - xenos::xe_gpu_program_cntl_t sq_program_cntl; - sq_program_cntl.dword_0 = regs.sq_program_cntl; - if (!vertex_shader->is_translated() && - !TranslateShader(vertex_shader, sq_program_cntl)) { + !TranslateShader(vertex_shader, regs.sq_program_cntl)) { XELOGE("Failed to translate the vertex shader!"); return UpdateStatus::kError; } if (pixel_shader && !pixel_shader->is_translated() && - !TranslateShader(pixel_shader, sq_program_cntl)) { + !TranslateShader(pixel_shader, regs.sq_program_cntl)) { XELOGE("Failed to translate the pixel shader!"); return UpdateStatus::kError; } @@ -1513,7 +1501,6 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { auto& state_info = update_color_blend_state_info_; bool dirty = false; - dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); @@ -1568,7 +1555,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { for (int i = 0; i < 4; ++i) { uint32_t blend_control = regs.rb_blendcontrol[i]; auto& attachment_state = attachment_states[i]; - attachment_state.blendEnable = !(regs.rb_colorcontrol & 0x20); + attachment_state.blendEnable = (blend_control & 0x1FFF1FFF) != 0x00010001; // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND attachment_state.srcColorBlendFactor = kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 26db40605..80035d25f 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -79,7 +79,7 @@ class PipelineCache { // state. VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); - bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl); + bool TranslateShader(VulkanShader* shader, reg::SQ_PROGRAM_CNTL cntl); void DumpShaderDisasmAMD(VkPipeline pipeline); void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info); @@ -170,7 +170,7 @@ class PipelineCache { struct UpdateShaderStagesRegisters { PrimitiveType primitive_type; uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; + reg::SQ_PROGRAM_CNTL sq_program_cntl; VulkanShader* vertex_shader; VulkanShader* pixel_shader; @@ -256,7 +256,6 @@ class PipelineCache { VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; struct UpdateColorBlendStateRegisters { - uint32_t rb_colorcontrol; uint32_t rb_color_mask; uint32_t rb_blendcontrol[4]; uint32_t rb_modecontrol; @@ -290,7 +289,7 @@ class PipelineCache { float rb_blend_rgba[4]; uint32_t rb_stencilrefmask; - uint32_t sq_program_cntl; + reg::SQ_PROGRAM_CNTL sq_program_cntl; uint32_t sq_context_misc; uint32_t rb_colorcontrol; uint32_t rb_color_info; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index a43b807e0..2175e5e5d 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -80,12 +80,6 @@ inline bool IsPrimitiveTwoFaced(bool tessellated, PrimitiveType type) { return false; } -enum class TessellationMode : uint32_t { - kDiscrete = 0, - kContinuous = 1, - kAdaptive = 2, -}; - enum class Dimension : uint32_t { k1D = 0, k2D = 1, @@ -334,6 +328,28 @@ inline int GetVertexFormatSizeInWords(VertexFormat format) { } } +enum class CompareFunction : uint32_t { + kNever = 0b000, + kLess = 0b001, + kEqual = 0b010, + kLessEqual = 0b011, + kGreater = 0b100, + kNotEqual = 0b101, + kGreaterEqual = 0b110, + kAlways = 0b111, +}; + +enum class StencilOp : uint32_t { + kKeep = 0, + kZero = 1, + kReplace = 2, + kIncrementClamp = 3, + kDecrementClamp = 4, + kInvert = 5, + kIncrementWrap = 6, + kDecrementWrap = 7, +}; + // adreno_rb_blend_factor enum class BlendFactor : uint32_t { kZero = 0, @@ -375,6 +391,35 @@ typedef enum { XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, } XE_GPU_INVALIDATE_MASK; +// a2xx_sq_ps_vtx_mode +enum class VertexShaderExportMode : uint32_t { + kPosition1Vector = 0, + kPosition2VectorsSprite = 2, + kPosition2VectorsEdge = 3, + kPosition2VectorsKill = 4, + kPosition2VectorsSpriteKill = 5, + kPosition2VectorsEdgeKill = 6, + kMultipass = 7, +}; + +enum class SampleControl : uint32_t { + kCentroidsOnly = 0, + kCentersOnly = 1, + kCentroidsAndCenters = 2, +}; + +enum class VGTOutputPath : uint32_t { + kVertexReuse = 0, + kTessellationEnable = 1, + kPassthru = 2, +}; + +enum class TessellationMode : uint32_t { + kDiscrete = 0, + kContinuous = 1, + kAdaptive = 2, +}; + enum class ModeControl : uint32_t { kIgnore = 0, kColorDepth = 4, @@ -471,26 +516,6 @@ inline uint32_t GpuToCpu(uint32_t p) { return p; } inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; } -// XE_GPU_REG_SQ_PROGRAM_CNTL -typedef union { - XEPACKEDSTRUCTANONYMOUS({ - uint32_t vs_regs : 6; - uint32_t unk_0 : 2; - uint32_t ps_regs : 6; - uint32_t unk_1 : 2; - uint32_t vs_resource : 1; - uint32_t ps_resource : 1; - uint32_t param_gen : 1; - uint32_t gen_index_pix : 1; - uint32_t vs_export_count : 4; - uint32_t vs_export_mode : 3; - uint32_t ps_export_depth : 1; - uint32_t ps_export_count : 3; - uint32_t gen_index_vtx : 1; - }); - XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; }); -} xe_gpu_program_cntl_t; - // XE_GPU_REG_SHADER_CONSTANT_FETCH_* XEPACKEDUNION(xe_gpu_vertex_fetch_t, { XEPACKEDSTRUCTANONYMOUS({