From a9ed73bdd122207fcaa35c9bf422c38aaf8fbcbd Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 20 Oct 2019 19:40:37 +0300 Subject: [PATCH] [GPU] Remove most hardcoded register/instruction layouts from common and D3D12 code --- src/xenia/base/bit_field.h | 51 -- src/xenia/gpu/command_processor.cc | 13 +- src/xenia/gpu/command_processor.h | 2 +- .../gpu/d3d12/d3d12_command_processor.cc | 29 +- src/xenia/gpu/d3d12/pipeline_cache.cc | 185 +++--- src/xenia/gpu/d3d12/pipeline_cache.h | 20 +- src/xenia/gpu/d3d12/primitive_converter.cc | 2 +- src/xenia/gpu/d3d12/render_target_cache.cc | 260 ++++---- src/xenia/gpu/d3d12/texture_cache.cc | 62 +- src/xenia/gpu/register_table.inc | 10 +- src/xenia/gpu/registers.cc | 69 ++- src/xenia/gpu/registers.h | 556 ++++++++++-------- src/xenia/gpu/sampler_info.cc | 16 +- src/xenia/gpu/shader_translator.cc | 5 +- src/xenia/gpu/texture_conversion.cc | 2 +- src/xenia/gpu/texture_info.cc | 4 +- src/xenia/gpu/texture_info.h | 71 --- src/xenia/gpu/trace_viewer.cc | 12 +- src/xenia/gpu/ucode.h | 236 +++----- src/xenia/gpu/vulkan/buffer_cache.cc | 5 +- src/xenia/gpu/vulkan/pipeline_cache.cc | 16 +- src/xenia/gpu/vulkan/pipeline_cache.h | 8 +- .../gpu/vulkan/vulkan_command_processor.cc | 16 +- src/xenia/gpu/xenos.h | 188 ++++-- 24 files changed, 896 insertions(+), 942 deletions(-) delete mode 100644 src/xenia/base/bit_field.h diff --git a/src/xenia/base/bit_field.h b/src/xenia/base/bit_field.h deleted file mode 100644 index 98a8bfbf8..000000000 --- a/src/xenia/base/bit_field.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2017 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_BASE_BIT_FIELD_H_ -#define XENIA_BASE_BIT_FIELD_H_ - -#include -#include -#include - -namespace xe { - -// Bitfield, where position starts at the LSB. -template -struct bf { - // For enum values, we strip them down to an underlying type. - typedef - typename std::conditional::value, std::underlying_type, - std::remove_reference>::type::type - value_type; - - bf() = default; - inline operator T() const { return value(); } - - inline T value() const { - auto value = (storage & mask()) >> position; - if (std::is_signed::value) { - // If the value is signed, sign-extend it. - value_type sign_mask = value_type(1) << (n_bits - 1); - value = (sign_mask ^ value) - sign_mask; - } - - return static_cast(value); - } - - inline value_type mask() const { - return ((value_type(1) << n_bits) - 1) << position; - } - - value_type storage; -}; - -} // namespace xe - -#endif // XENIA_BASE_BIT_FIELD_H_ diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index a715d0e5e..acb991fb5 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -350,20 +350,20 @@ void CommandProcessor::MakeCoherent() { // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 RegisterFile* regs = register_file_; - auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; + auto& status_host = regs->Get(); auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; - if (!(status_host & 0x80000000ul)) { + if (!status_host.status) { return; } const char* action = "N/A"; - if ((status_host & 0x03000000) == 0x03000000) { + if (status_host.vc_action_ena && status_host.tc_action_ena) { action = "VC | TC"; - } else if (status_host & 0x02000000) { + } else if (status_host.tc_action_ena) { action = "TC"; - } else if (status_host & 0x01000000) { + } else if (status_host.vc_action_ena) { action = "VC"; } @@ -372,8 +372,7 @@ void CommandProcessor::MakeCoherent() { base_host + size_host, size_host, action); // Mark coherent. - status_host &= ~0x80000000ul; - regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host; + status_host.status = 0; } void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 3b86844ed..dad797b05 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -151,7 +151,7 @@ class CommandProcessor { protected: struct IndexBufferInfo { IndexFormat format = IndexFormat::kInt16; - Endian endianness = Endian::kUnspecified; + Endian endianness = Endian::kNone; uint32_t count = 0; uint32_t guest_base = 0; size_t length = 0; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 20e1495b9..ce0a136c8 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1345,7 +1345,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Update system constants before uploading them. UpdateSystemConstantValues( memexport_used, primitive_two_faced, line_loop_closing_index, - indexed ? index_buffer_info->endianness : Endian::kUnspecified, + indexed ? index_buffer_info->endianness : Endian::kNone, adaptive_tessellation ? (index_buffer_info->guest_base & 0x1FFFFFFC) : 0, early_z, GetCurrentColorMask(pixel_shader), pipeline_render_targets); @@ -1975,7 +1975,7 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) { // Stencil reference value. Per-face reference not supported by Direct3D 12, // choose the back face one only if drawing only back faces. - uint32_t stencil_ref_mask_reg; + Register stencil_ref_mask_reg; auto pa_su_sc_mode_cntl = regs.Get(); if (primitive_two_faced && regs.Get().backface_enable && @@ -2032,13 +2032,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( float rt_clamp[4][4]; uint32_t rt_keep_masks[4][2]; for (uint32_t i = 0; i < 4; ++i) { - static const uint32_t kColorInfoRegs[] = { - XE_GPU_REG_RB_COLOR_INFO, - XE_GPU_REG_RB_COLOR1_INFO, - XE_GPU_REG_RB_COLOR2_INFO, - XE_GPU_REG_RB_COLOR3_INFO, - }; - auto color_info = regs.Get(kColorInfoRegs[i]); + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); color_infos[i] = color_info; if (IsROVUsedForEDRAM()) { @@ -2125,7 +2120,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( } // Alpha test. if (rb_colorcontrol.alpha_test_enable) { - flags |= uint32_t(rb_colorcontrol.alpha_func.value()) + flags |= uint32_t(rb_colorcontrol.alpha_func) << DxbcShaderTranslator::kSysFlag_AlphaPassIfLess_Shift; } else { flags |= DxbcShaderTranslator::kSysFlag_AlphaPassIfLess | @@ -2149,7 +2144,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( flags |= DxbcShaderTranslator::kSysFlag_ROVDepthFloat24; } if (rb_depthcontrol.z_enable) { - flags |= uint32_t(rb_depthcontrol.zfunc.value()) + flags |= uint32_t(rb_depthcontrol.zfunc) << DxbcShaderTranslator::kSysFlag_ROVDepthPassIfLess_Shift; if (rb_depthcontrol.z_write_enable) { flags |= DxbcShaderTranslator::kSysFlag_ROVDepthWrite; @@ -2350,7 +2345,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // EDRAM pitch for ROV writing. if (IsROVUsedForEDRAM()) { uint32_t edram_pitch_tiles = - ((std::min(rb_surface_info.surface_pitch.value(), 2560u) * + ((std::min(rb_surface_info.surface_pitch, 2560u) * (rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1)) + 79) / 80; @@ -2408,14 +2403,8 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( 4 * sizeof(float)) != 0; std::memcpy(system_constants_.edram_rt_clamp[i], rt_clamp[i], 4 * sizeof(float)); - static const uint32_t kBlendControlRegs[] = { - XE_GPU_REG_RB_BLENDCONTROL_0, - XE_GPU_REG_RB_BLENDCONTROL_1, - XE_GPU_REG_RB_BLENDCONTROL_2, - XE_GPU_REG_RB_BLENDCONTROL_3, - }; uint32_t blend_factors_ops = - regs[kBlendControlRegs[i]].u32 & 0x1FFF1FFF; + regs[reg::RB_BLENDCONTROL::rt_register_indices[i]].u32 & 0x1FFF1FFF; dirty |= system_constants_.edram_rt_blend_factors_ops[i] != blend_factors_ops; system_constants_.edram_rt_blend_factors_ops[i] = blend_factors_ops; @@ -2537,7 +2526,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.edram_stencil_back_write_mask = rb_stencilrefmask_bf.stencilwritemask; uint32_t stencil_func_ops_bf = - (rb_depthcontrol.value >> 8) & ((1 << 12) - 1); + (rb_depthcontrol.value >> 20) & ((1 << 12) - 1); dirty |= system_constants_.edram_stencil_back_func_ops != stencil_func_ops_bf; system_constants_.edram_stencil_back_func_ops = stencil_func_ops_bf; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 67d20b3da..1a9a4bf0b 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -363,7 +363,7 @@ bool PipelineCache::GetCurrentStateDescription( const RenderTargetCache::PipelineRenderTarget render_targets[5], PipelineDescription& description_out) { auto& regs = *register_file_; - uint32_t pa_su_sc_mode_cntl = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + auto pa_su_sc_mode_cntl = regs.Get(); bool primitive_two_faced = IsPrimitiveTwoFaced(tessellated, primitive_type); // Initialize all unused fields to zero for comparison/hashing. @@ -381,7 +381,7 @@ bool PipelineCache::GetCurrentStateDescription( description_out.pixel_shader = pixel_shader; // Index buffer strip cut value. - if (pa_su_sc_mode_cntl & (1 << 21)) { + if (pa_su_sc_mode_cntl.multi_prim_ib_ena) { // Not using 0xFFFF with 32-bit indices because in index buffers it will be // 0xFFFF0000 anyway due to endianness. description_out.strip_cut_index = index_format == IndexFormat::kInt32 @@ -479,53 +479,60 @@ bool PipelineCache::GetCurrentStateDescription( // Xenos fill mode 1). // Here we also assume that only one side is culled - if two sides are culled, // the D3D12 command processor will drop such draw early. - uint32_t cull_mode = primitive_two_faced ? (pa_su_sc_mode_cntl & 0x3) : 0; + bool cull_front, cull_back; + if (primitive_two_faced) { + cull_front = pa_su_sc_mode_cntl.cull_front != 0; + cull_back = pa_su_sc_mode_cntl.cull_back != 0; + } else { + cull_front = false; + cull_back = false; + } float poly_offset = 0.0f, poly_offset_scale = 0.0f; if (primitive_two_faced) { - description_out.front_counter_clockwise = (pa_su_sc_mode_cntl & 0x4) == 0; - if (cull_mode == 1) { + description_out.front_counter_clockwise = pa_su_sc_mode_cntl.face == 0; + if (cull_front) { description_out.cull_mode = PipelineCullMode::kFront; - } else if (cull_mode == 2) { + } else if (cull_back) { description_out.cull_mode = PipelineCullMode::kBack; } else { description_out.cull_mode = PipelineCullMode::kNone; } // With ROV, the depth bias is applied in the pixel shader because // per-sample depth is needed for MSAA. - if (cull_mode != 1) { + if (!cull_front) { // Front faces aren't culled. - uint32_t fill_mode = (pa_su_sc_mode_cntl >> 5) & 0x7; - if (fill_mode == 0 || fill_mode == 1) { + // Direct3D 12, unfortunately, doesn't support point fill mode. + if (pa_su_sc_mode_cntl.polymode_front_ptype != + xenos::PolygonType::kTriangles) { description_out.fill_mode_wireframe = 1; } - if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 11))) { + if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_front_enable) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; } } - if (cull_mode != 2) { + if (!cull_back) { // Back faces aren't culled. - uint32_t fill_mode = (pa_su_sc_mode_cntl >> 8) & 0x7; - if (fill_mode == 0 || fill_mode == 1) { + if (pa_su_sc_mode_cntl.polymode_back_ptype != + xenos::PolygonType::kTriangles) { description_out.fill_mode_wireframe = 1; } // Prefer front depth bias because in general, front faces are the ones // that are rendered (except for shadow volumes). - if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 12)) && + if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_back_enable && poly_offset == 0.0f && poly_offset_scale == 0.0f) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_BACK_SCALE].f32; } } - if (((pa_su_sc_mode_cntl >> 3) & 0x3) == 0) { - // Fill mode is disabled. + if (pa_su_sc_mode_cntl.poly_mode == xenos::PolygonModeEnable::kDisabled) { description_out.fill_mode_wireframe = 0; } } else { // Filled front faces only. // Use front depth bias if POLY_OFFSET_PARA_ENABLED // (POLY_OFFSET_FRONT_ENABLED is for two-sided primitives). - if (!edram_rov_used_ && (pa_su_sc_mode_cntl & (1 << 13))) { + if (!edram_rov_used_ && pa_su_sc_mode_cntl.poly_offset_para_enable) { poly_offset = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_OFFSET].f32; poly_offset_scale = regs[XE_GPU_REG_PA_SU_POLY_OFFSET_FRONT_SCALE].f32; } @@ -543,8 +550,8 @@ bool PipelineCache::GetCurrentStateDescription( // of Duty 4 (vehicledamage map explosion decals) and Red Dead Redemption // (shadows - 2^17 is not enough, 2^18 hasn't been tested, but 2^19 // eliminates the acne). - if (((register_file_->values[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 0x1) == - uint32_t(DepthRenderTargetFormat::kD24FS8)) { + if (regs.Get().depth_format == + DepthRenderTargetFormat::kD24FS8) { poly_offset *= float(1 << 19); } else { poly_offset *= float(1 << 23); @@ -564,48 +571,49 @@ bool PipelineCache::GetCurrentStateDescription( primitive_type == PrimitiveType::kQuadPatch)) { description_out.fill_mode_wireframe = 1; } - // CLIP_DISABLE - description_out.depth_clip = - (regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32 & (1 << 16)) == 0; + description_out.depth_clip = !regs.Get().clip_disable; if (edram_rov_used_) { description_out.rov_msaa = - ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 >> 16) & 0x3) != 0; + regs.Get().msaa_samples != MsaaSamples::k1X; } else { // Depth/stencil. No stencil, always passing depth test and no depth writing // means depth disabled. if (render_targets[4].format != DXGI_FORMAT_UNKNOWN) { - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - if (rb_depthcontrol & 0x2) { - description_out.depth_func = (rb_depthcontrol >> 4) & 0x7; - description_out.depth_write = (rb_depthcontrol & 0x4) != 0; + auto rb_depthcontrol = regs.Get(); + if (rb_depthcontrol.z_enable) { + description_out.depth_func = rb_depthcontrol.zfunc; + description_out.depth_write = rb_depthcontrol.z_write_enable; } else { - description_out.depth_func = 0b111; + description_out.depth_func = CompareFunction::kAlways; } - if (rb_depthcontrol & 0x1) { + if (rb_depthcontrol.stencil_enable) { description_out.stencil_enable = 1; bool stencil_backface_enable = - primitive_two_faced && (rb_depthcontrol & 0x80); - uint32_t stencil_masks; + primitive_two_faced && rb_depthcontrol.backface_enable; // Per-face masks not supported by Direct3D 12, choose the back face // ones only if drawing only back faces. - if (stencil_backface_enable && cull_mode == 1) { - stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK_BF].u32; + Register stencil_ref_mask_reg; + if (stencil_backface_enable && cull_front) { + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK_BF; } else { - stencil_masks = regs[XE_GPU_REG_RB_STENCILREFMASK].u32; + stencil_ref_mask_reg = XE_GPU_REG_RB_STENCILREFMASK; } - description_out.stencil_read_mask = (stencil_masks >> 8) & 0xFF; - description_out.stencil_write_mask = (stencil_masks >> 16) & 0xFF; - description_out.stencil_front_fail_op = (rb_depthcontrol >> 11) & 0x7; + auto stencil_ref_mask = + regs.Get(stencil_ref_mask_reg); + description_out.stencil_read_mask = stencil_ref_mask.stencilmask; + description_out.stencil_write_mask = stencil_ref_mask.stencilwritemask; + description_out.stencil_front_fail_op = rb_depthcontrol.stencilfail; description_out.stencil_front_depth_fail_op = - (rb_depthcontrol >> 17) & 0x7; - description_out.stencil_front_pass_op = (rb_depthcontrol >> 14) & 0x7; - description_out.stencil_front_func = (rb_depthcontrol >> 8) & 0x7; + rb_depthcontrol.stencilzfail; + description_out.stencil_front_pass_op = rb_depthcontrol.stencilzpass; + description_out.stencil_front_func = rb_depthcontrol.stencilfunc; if (stencil_backface_enable) { - description_out.stencil_back_fail_op = (rb_depthcontrol >> 23) & 0x7; + description_out.stencil_back_fail_op = rb_depthcontrol.stencilfail_bf; description_out.stencil_back_depth_fail_op = - (rb_depthcontrol >> 29) & 0x7; - description_out.stencil_back_pass_op = (rb_depthcontrol >> 26) & 0x7; - description_out.stencil_back_func = (rb_depthcontrol >> 20) & 0x7; + rb_depthcontrol.stencilzfail_bf; + description_out.stencil_back_pass_op = + rb_depthcontrol.stencilzpass_bf; + description_out.stencil_back_func = rb_depthcontrol.stencilfunc_bf; } else { description_out.stencil_back_fail_op = description_out.stencil_front_fail_op; @@ -618,13 +626,13 @@ bool PipelineCache::GetCurrentStateDescription( } } // If not binding the DSV, ignore the format in the hash. - if (description_out.depth_func != 0b111 || description_out.depth_write || - description_out.stencil_enable) { - description_out.depth_format = DepthRenderTargetFormat( - (regs[XE_GPU_REG_RB_DEPTH_INFO].u32 >> 16) & 1); + if (description_out.depth_func != CompareFunction::kAlways || + description_out.depth_write || description_out.stencil_enable) { + description_out.depth_format = + regs.Get().depth_format; } } else { - description_out.depth_func = 0b111; + description_out.depth_func = CompareFunction::kAlways; } if (early_z) { description_out.force_early_z = 1; @@ -684,38 +692,25 @@ bool PipelineCache::GetCurrentStateDescription( if (render_targets[i].format == DXGI_FORMAT_UNKNOWN) { break; } - uint32_t guest_rt_index = render_targets[i].guest_render_target; - uint32_t color_info, blendcontrol; - switch (guest_rt_index) { - case 1: - color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32; - break; - case 2: - color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32; - break; - case 3: - color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32; - break; - default: - color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; - blendcontrol = regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32; - break; - } PipelineRenderTarget& rt = description_out.render_targets[i]; rt.used = 1; - rt.format = RenderTargetCache::GetBaseColorFormat( - ColorRenderTargetFormat((color_info >> 16) & 0xF)); + uint32_t guest_rt_index = render_targets[i].guest_render_target; + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[guest_rt_index]); + rt.format = + RenderTargetCache::GetBaseColorFormat(color_info.color_format); rt.write_mask = (color_mask >> (guest_rt_index * 4)) & 0xF; if (rt.write_mask) { - rt.src_blend = kBlendFactorMap[blendcontrol & 0x1F]; - rt.dest_blend = kBlendFactorMap[(blendcontrol >> 8) & 0x1F]; - rt.blend_op = BlendOp((blendcontrol >> 5) & 0x7); - rt.src_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 16) & 0x1F]; - rt.dest_blend_alpha = kBlendFactorAlphaMap[(blendcontrol >> 24) & 0x1F]; - rt.blend_op_alpha = BlendOp((blendcontrol >> 21) & 0x7); + auto blendcontrol = regs.Get( + reg::RB_BLENDCONTROL::rt_register_indices[guest_rt_index]); + rt.src_blend = kBlendFactorMap[uint32_t(blendcontrol.color_srcblend)]; + rt.dest_blend = kBlendFactorMap[uint32_t(blendcontrol.color_destblend)]; + rt.blend_op = blendcontrol.color_comb_fcn; + rt.src_blend_alpha = + kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_srcblend)]; + rt.dest_blend_alpha = + kBlendFactorAlphaMap[uint32_t(blendcontrol.alpha_destblend)]; + rt.blend_op_alpha = blendcontrol.alpha_comb_fcn; } else { rt.src_blend = PipelineBlendFactor::kOne; rt.dest_blend = PipelineBlendFactor::kZero; @@ -941,15 +936,17 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( if (!edram_rov_used_) { // Depth/stencil. - if (description.depth_func != 0b111 || description.depth_write) { + if (description.depth_func != CompareFunction::kAlways || + description.depth_write) { state_desc.DepthStencilState.DepthEnable = TRUE; state_desc.DepthStencilState.DepthWriteMask = description.depth_write ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; // Comparison functions are the same in Direct3D 12 but plus one (minus // one, bit 0 for less, bit 1 for equal, bit 2 for greater). - state_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC( - uint32_t(D3D12_COMPARISON_FUNC_NEVER) + description.depth_func); + state_desc.DepthStencilState.DepthFunc = + D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + + uint32_t(description.depth_func)); } if (description.stencil_enable) { state_desc.DepthStencilState.StencilEnable = TRUE; @@ -958,26 +955,30 @@ ID3D12PipelineState* PipelineCache::CreatePipelineState( state_desc.DepthStencilState.StencilWriteMask = description.stencil_write_mask; // Stencil operations are the same in Direct3D 12 too but plus one. - state_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_fail_op); + state_desc.DepthStencilState.FrontFace.StencilFailOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_fail_op)); state_desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + - description.stencil_front_depth_fail_op); - state_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_front_pass_op); + uint32_t(description.stencil_front_depth_fail_op)); + state_desc.DepthStencilState.FrontFace.StencilPassOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_front_pass_op)); state_desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + - description.stencil_front_func); - state_desc.DepthStencilState.BackFace.StencilFailOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_fail_op); + uint32_t(description.stencil_front_func)); + state_desc.DepthStencilState.BackFace.StencilFailOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_fail_op)); state_desc.DepthStencilState.BackFace.StencilDepthFailOp = D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + - description.stencil_back_depth_fail_op); - state_desc.DepthStencilState.BackFace.StencilPassOp = D3D12_STENCIL_OP( - uint32_t(D3D12_STENCIL_OP_KEEP) + description.stencil_back_pass_op); + uint32_t(description.stencil_back_depth_fail_op)); + state_desc.DepthStencilState.BackFace.StencilPassOp = + D3D12_STENCIL_OP(uint32_t(D3D12_STENCIL_OP_KEEP) + + uint32_t(description.stencil_back_pass_op)); state_desc.DepthStencilState.BackFace.StencilFunc = D3D12_COMPARISON_FUNC(uint32_t(D3D12_COMPARISON_FUNC_NEVER) + - description.stencil_back_func); + uint32_t(description.stencil_back_func)); } if (state_desc.DepthStencilState.DepthEnable || state_desc.DepthStencilState.StencilEnable) { diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index ba5a1a4b6..8ac86c9b0 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -152,21 +152,21 @@ class PipelineCache { uint32_t depth_clip : 1; // 15 uint32_t rov_msaa : 1; // 16 DepthRenderTargetFormat depth_format : 1; // 17 - uint32_t depth_func : 3; // 20 + CompareFunction depth_func : 3; // 20 uint32_t depth_write : 1; // 21 uint32_t stencil_enable : 1; // 22 uint32_t stencil_read_mask : 8; // 30 uint32_t force_early_z : 1; // 31 - uint32_t stencil_write_mask : 8; // 8 - uint32_t stencil_front_fail_op : 3; // 11 - uint32_t stencil_front_depth_fail_op : 3; // 14 - uint32_t stencil_front_pass_op : 3; // 17 - uint32_t stencil_front_func : 3; // 20 - uint32_t stencil_back_fail_op : 3; // 23 - uint32_t stencil_back_depth_fail_op : 3; // 26 - uint32_t stencil_back_pass_op : 3; // 29 - uint32_t stencil_back_func : 3; // 32 + uint32_t stencil_write_mask : 8; // 8 + StencilOp stencil_front_fail_op : 3; // 11 + StencilOp stencil_front_depth_fail_op : 3; // 14 + StencilOp stencil_front_pass_op : 3; // 17 + CompareFunction stencil_front_func : 3; // 20 + StencilOp stencil_back_fail_op : 3; // 23 + StencilOp stencil_back_depth_fail_op : 3; // 26 + StencilOp stencil_back_pass_op : 3; // 29 + CompareFunction stencil_back_func : 3; // 32 PipelineRenderTarget render_targets[4]; }; diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index 95f2fc2f6..9ddeca74f 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -192,7 +192,7 @@ PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) { bool index_32bit = index_format == IndexFormat::kInt32; auto& regs = *register_file_; - bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0; + bool reset = regs.Get().multi_prim_ib_ena; // Swap the reset index because we will be comparing unswapped values to it. uint32_t reset_index = xenos::GpuSwap( regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness); diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index a006fe113..8d32c5be7 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -541,16 +541,17 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { bool rov_used = command_processor_->IsROVUsedForEDRAM(); - uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); + auto rb_surface_info = regs.Get(); + uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u); if (surface_pitch == 0) { // TODO(Triang3l): Do something if a memexport-only draw has 0 surface // pitch (never seen in any game so far, not sure if even legal). return false; } - MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); - uint32_t msaa_samples_x = msaa_samples >= MsaaSamples::k4X ? 2 : 1; - uint32_t msaa_samples_y = msaa_samples >= MsaaSamples::k2X ? 2 : 1; + uint32_t msaa_samples_x = + rb_surface_info.msaa_samples >= MsaaSamples::k4X ? 2 : 1; + uint32_t msaa_samples_y = + rb_surface_info.msaa_samples >= MsaaSamples::k2X ? 2 : 1; // Extract color/depth info in an unified way. bool enabled[5]; @@ -558,26 +559,27 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { uint32_t formats[5]; bool formats_are_64bpp[5]; uint32_t color_mask = command_processor_->GetCurrentColorMask(pixel_shader); - uint32_t rb_color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, regs[XE_GPU_REG_RB_COLOR3_INFO].u32}; for (uint32_t i = 0; i < 4; ++i) { enabled[i] = (color_mask & (0xF << (i * 4))) != 0; - edram_bases[i] = std::min(rb_color_info[i] & 0xFFF, 2048u); - formats[i] = uint32_t(GetBaseColorFormat( - ColorRenderTargetFormat((rb_color_info[i] >> 16) & 0xF))); + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[i]); + edram_bases[i] = std::min(color_info.color_base, 2048u); + formats[i] = uint32_t(GetBaseColorFormat(color_info.color_format)); formats_are_64bpp[i] = IsColorFormat64bpp(ColorRenderTargetFormat(formats[i])); } - uint32_t rb_depthcontrol = regs[XE_GPU_REG_RB_DEPTHCONTROL].u32; - uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; + auto rb_depthcontrol = regs.Get(); + auto rb_depth_info = regs.Get(); // 0x1 = stencil test, 0x2 = depth test. - enabled[4] = (rb_depthcontrol & (0x1 | 0x2)) != 0; - edram_bases[4] = std::min(rb_depth_info & 0xFFF, 2048u); - formats[4] = (rb_depth_info >> 16) & 0x1; + enabled[4] = rb_depthcontrol.stencil_enable || rb_depthcontrol.z_enable; + edram_bases[4] = std::min(rb_depth_info.depth_base, 2048u); + formats[4] = uint32_t(rb_depth_info.depth_format); formats_are_64bpp[4] = false; // Don't mark depth regions as dirty if not writing the depth. - bool depth_readonly = (rb_depthcontrol & (0x1 | 0x4)) == 0; + // TODO(Triang3l): Make a common function for checking if stencil writing is + // really done? + bool depth_readonly = + !rb_depthcontrol.stencil_enable && !rb_depthcontrol.z_write_enable; bool full_update = false; @@ -590,7 +592,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // in the beginning of the frame or after resolves by setting the current // pitch to 0. if (current_surface_pitch_ != surface_pitch || - current_msaa_samples_ != msaa_samples) { + current_msaa_samples_ != rb_surface_info.msaa_samples) { full_update = true; } @@ -632,26 +634,22 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { // Get EDRAM usage of the current draw so dirty regions can be calculated. // See D3D12CommandProcessor::UpdateFixedFunctionState for more info. - int16_t window_offset_y = - (regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32 >> 16) & 0x7FFF; - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } - uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; - float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2)) + int32_t window_offset_y = + regs.Get().window_y_offset; + auto pa_cl_vte_cntl = regs.Get(); + float viewport_scale_y = pa_cl_vte_cntl.vport_y_scale_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1280.0f; - float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) + float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : std::abs(viewport_scale_y); - if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { + if (regs.Get().vtx_window_offset_enable) { viewport_offset_y += float(window_offset_y); } uint32_t viewport_bottom = uint32_t(std::max( 0.0f, std::ceil(viewport_offset_y + std::abs(viewport_scale_y)))); - uint32_t scissor_bottom = - (regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32 >> 16) & 0x7FFF; - if (!(regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32 & (1u << 31))) { + uint32_t scissor_bottom = regs.Get().br_y; + if (!regs.Get().window_offset_disable) { scissor_bottom = std::max(int32_t(scissor_bottom) + window_offset_y, 0); } uint32_t dirty_bottom = @@ -769,7 +767,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { ClearBindings(); current_surface_pitch_ = surface_pitch; - current_msaa_samples_ = msaa_samples; + current_msaa_samples_ = rb_surface_info.msaa_samples; if (!rov_used) { current_edram_max_rows_ = edram_max_rows; } @@ -801,8 +799,8 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { #endif } XELOGGPU("RT Cache: %s update - pitch %u, samples %u, RTs to attach %u", - full_update ? "Full" : "Partial", surface_pitch, msaa_samples, - render_targets_to_attach); + full_update ? "Full" : "Partial", surface_pitch, + rb_surface_info.msaa_samples, render_targets_to_attach); #if 0 auto device = @@ -891,7 +889,7 @@ bool RenderTargetCache::UpdateRenderTargets(const D3D12Shader* pixel_shader) { if (!rov_used) { // Sample positions when loading depth must match sample positions when // drawing. - command_processor_->SetSamplePositions(msaa_samples); + command_processor_->SetSamplePositions(rb_surface_info.msaa_samples); // Load the contents of the new render targets from the EDRAM buffer (will // change the state of the render targets to copy destination). @@ -1007,18 +1005,14 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, auto& regs = *register_file_; // Get the render target properties. - uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u); + auto rb_surface_info = regs.Get(); + uint32_t surface_pitch = std::min(rb_surface_info.surface_pitch, 2560u); if (surface_pitch == 0) { return true; } - MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3); - uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; // Depth info is always needed because color resolve may also clear depth. - uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32; - uint32_t depth_edram_base = rb_depth_info & 0xFFF; - uint32_t depth_format = (rb_depth_info >> 16) & 0x1; - uint32_t surface_index = rb_copy_control & 0x7; + auto rb_depth_info = regs.Get(); + uint32_t surface_index = regs.Get().copy_src_select; if (surface_index > 4) { assert_always(); return false; @@ -1027,43 +1021,28 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, uint32_t surface_edram_base; uint32_t surface_format; if (surface_is_depth) { - surface_edram_base = depth_edram_base; - surface_format = depth_format; + surface_edram_base = rb_depth_info.depth_base; + surface_format = uint32_t(rb_depth_info.depth_format); } else { - uint32_t rb_color_info; - switch (surface_index) { - case 1: - rb_color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; - break; - case 2: - rb_color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; - break; - case 3: - rb_color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; - break; - default: - rb_color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; - break; - } - surface_edram_base = rb_color_info & 0xFFF; - surface_format = uint32_t(GetBaseColorFormat( - ColorRenderTargetFormat((rb_color_info >> 16) & 0xF))); + auto color_info = regs.Get( + reg::RB_COLOR_INFO::rt_register_indices[surface_index]); + surface_edram_base = color_info.color_base; + surface_format = uint32_t(GetBaseColorFormat(color_info.color_format)); } // Get the resolve region since both copying and clearing need it. // HACK: Vertices to use are always in vf0. - auto fetch_group = reinterpret_cast( - ®s.values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); - const auto& fetch = fetch_group->vertex_fetch_0; + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); assert_true(fetch.type == 3); - assert_true(fetch.endian == 2); + assert_true(fetch.endian == Endian::k8in32); assert_true(fetch.size == 6); const uint8_t* src_vertex_address = memory->TranslatePhysical(fetch.address << 2); float vertices[6]; // Most vertices have a negative half pixel offset applied, which we reverse. float vertex_offset = - (regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1) ? 0.0f : 0.5f; + regs.Get().pix_center ? 0.0f : 0.5f; for (uint32_t i = 0; i < 6; ++i) { vertices[i] = xenos::GpuSwap(xe::load(src_vertex_address + i * sizeof(float)), @@ -1097,39 +1076,34 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, // vertices (-640,0)->(640,720), however, the destination texture pointer is // adjusted properly to the right half of the texture, and the source render // target has a pitch of 800). + auto pa_sc_window_offset = regs.Get(); D3D12_RECT rect; rect.left = LONG(std::min(std::min(vertices[0], vertices[2]), vertices[4])); rect.right = LONG(std::max(std::max(vertices[0], vertices[2]), vertices[4])); rect.top = LONG(std::min(std::min(vertices[1], vertices[3]), vertices[5])); rect.bottom = LONG(std::max(std::max(vertices[1], vertices[3]), vertices[5])); + if (regs.Get().vtx_window_offset_enable) { + rect.left += pa_sc_window_offset.window_x_offset; + rect.right += pa_sc_window_offset.window_x_offset; + rect.top += pa_sc_window_offset.window_y_offset; + rect.bottom += pa_sc_window_offset.window_y_offset; + } D3D12_RECT scissor; - uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - scissor.left = LONG(window_scissor_tl & 0x7FFF); - scissor.right = LONG(window_scissor_br & 0x7FFF); - scissor.top = LONG((window_scissor_tl >> 16) & 0x7FFF); - scissor.bottom = LONG((window_scissor_br >> 16) & 0x7FFF); - if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { - uint32_t pa_sc_window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - int16_t window_offset_x = pa_sc_window_offset & 0x7FFF; - int16_t window_offset_y = (pa_sc_window_offset >> 16) & 0x7FFF; - if (window_offset_x & 0x4000) { - window_offset_x |= 0x8000; - } - if (window_offset_y & 0x4000) { - window_offset_y |= 0x8000; - } - rect.left += window_offset_x; - rect.right += window_offset_x; - rect.top += window_offset_y; - rect.bottom += window_offset_y; - if (!(window_scissor_tl & (1u << 31))) { - scissor.left = std::max(LONG(scissor.left + window_offset_x), LONG(0)); - scissor.right = std::max(LONG(scissor.right + window_offset_x), LONG(0)); - scissor.top = std::max(LONG(scissor.top + window_offset_y), LONG(0)); - scissor.bottom = - std::max(LONG(scissor.bottom + window_offset_y), LONG(0)); - } + auto pa_sc_window_scissor_tl = regs.Get(); + auto pa_sc_window_scissor_br = regs.Get(); + scissor.left = pa_sc_window_scissor_tl.tl_x; + scissor.right = pa_sc_window_scissor_br.br_x; + scissor.top = pa_sc_window_scissor_tl.tl_y; + scissor.bottom = pa_sc_window_scissor_br.br_y; + if (!pa_sc_window_scissor_tl.window_offset_disable) { + scissor.left = std::max( + LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.right = std::max( + LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0)); + scissor.top = std::max( + LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0)); + scissor.bottom = std::max( + LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0)); } rect.left = std::max(rect.left, scissor.left); rect.right = std::min(rect.right, scissor.right); @@ -1140,9 +1114,9 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, "Resolve: (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format %u) " "at %u", rect.left, rect.top, rect.right, rect.bottom, surface_index, - surface_pitch, 1 << uint32_t(msaa_samples), - msaa_samples != MsaaSamples::k1X ? "s" : "", surface_format, - surface_edram_base); + surface_pitch, 1 << uint32_t(rb_surface_info.msaa_samples), + rb_surface_info.msaa_samples != MsaaSamples::k1X ? "s" : "", + surface_format, surface_edram_base); if (rect.left >= rect.right || rect.top >= rect.bottom) { // Nothing to copy. @@ -1157,18 +1131,20 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, // GetEDRAMLayout in ResolveCopy and ResolveClear will perform the needed // clamping to the source render target size. - bool result = - ResolveCopy(shared_memory, texture_cache, surface_edram_base, - surface_pitch, msaa_samples, surface_is_depth, surface_format, - rect, written_address_out, written_length_out); + bool result = ResolveCopy(shared_memory, texture_cache, surface_edram_base, + surface_pitch, rb_surface_info.msaa_samples, + surface_is_depth, surface_format, rect, + written_address_out, written_length_out); // Clear the color RT if needed. if (!surface_is_depth) { - result &= ResolveClear(surface_edram_base, surface_pitch, msaa_samples, - false, surface_format, rect); + result &= + ResolveClear(surface_edram_base, surface_pitch, + rb_surface_info.msaa_samples, false, surface_format, rect); } // Clear the depth RT if needed (may be cleared alongside color). - result &= ResolveClear(depth_edram_base, surface_pitch, msaa_samples, true, - depth_format, rect); + result &= ResolveClear(rb_depth_info.depth_base, surface_pitch, + rb_surface_info.msaa_samples, true, + uint32_t(rb_depth_info.depth_format), rect); return result; } @@ -1183,19 +1159,18 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, auto& regs = *register_file_; - uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; - xenos::CopyCommand copy_command = - xenos::CopyCommand((rb_copy_control >> 20) & 0x3); - if (copy_command != xenos::CopyCommand::kRaw && - copy_command != xenos::CopyCommand::kConvert) { + auto rb_copy_control = regs.Get(); + if (rb_copy_control.copy_command != xenos::CopyCommand::kRaw && + rb_copy_control.copy_command != xenos::CopyCommand::kConvert) { // TODO(Triang3l): Handle kConstantOne and kNull. + assert_always(); return false; } auto command_list = command_processor_->GetDeferredCommandList(); // Get format info. - uint32_t rb_copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32; + auto rb_copy_dest_info = regs.Get(); TextureFormat src_texture_format; bool src_64bpp; if (is_depth) { @@ -1222,14 +1197,15 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // The destination format is specified as k_8_8_8_8 when resolving depth, but // no format conversion is done for depth, so ignore it. TextureFormat dest_format = - is_depth ? src_texture_format - : GetBaseFormat(TextureFormat((rb_copy_dest_info >> 7) & 0x3F)); + is_depth + ? src_texture_format + : GetBaseFormat(TextureFormat(rb_copy_dest_info.copy_dest_format)); const FormatInfo* dest_format_info = FormatInfo::Get(dest_format); // Get the destination region and clamp the source region to it. - uint32_t rb_copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; - uint32_t dest_pitch = rb_copy_dest_pitch & 0x3FFF; - uint32_t dest_height = (rb_copy_dest_pitch >> 16) & 0x3FFF; + auto rb_copy_dest_pitch = regs.Get(); + uint32_t dest_pitch = rb_copy_dest_pitch.copy_dest_pitch; + uint32_t dest_height = rb_copy_dest_pitch.copy_dest_height; if (dest_pitch == 0 || dest_height == 0) { // Nothing to copy. return true; @@ -1263,8 +1239,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF; // An example of a 3D resolve destination is the color grading LUT (used // starting from the developer/publisher intro) in Dead Space 3. - bool dest_3d = (rb_copy_dest_info & (1 << 3)) != 0; - if (dest_3d) { + if (rb_copy_dest_info.copy_dest_array) { dest_address += texture_util::GetTiledOffset3D( int(rect.left & ~LONG(31)), int(rect.top & ~LONG(31)), 0, dest_pitch, dest_height, xe::log2_floor(dest_format_info->bits_per_pixel >> 3)); @@ -1279,21 +1254,20 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, // resolve to 8bpp or 16bpp textures at very odd locations. return false; } - uint32_t dest_z = dest_3d ? ((rb_copy_dest_info >> 4) & 0x7) : 0; + uint32_t dest_z = + rb_copy_dest_info.copy_dest_array ? rb_copy_dest_info.copy_dest_slice : 0; // See what samples we need and what we should do with them. - xenos::CopySampleSelect sample_select = - xenos::CopySampleSelect((rb_copy_control >> 4) & 0x7); + xenos::CopySampleSelect sample_select = rb_copy_control.copy_sample_select; if (is_depth && sample_select > xenos::CopySampleSelect::k3) { assert_always(); return false; } - Endian128 dest_endian = Endian128(rb_copy_dest_info & 0x7); int32_t dest_exp_bias; if (is_depth) { dest_exp_bias = 0; } else { - dest_exp_bias = int32_t((rb_copy_dest_info >> 16) << 26) >> 26; + dest_exp_bias = rb_copy_dest_info.copy_dest_exp_bias; if (ColorRenderTargetFormat(src_format) == ColorRenderTargetFormat::k_16_16 || ColorRenderTargetFormat(src_format) == @@ -1309,14 +1283,14 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, } } } - bool dest_swap = !is_depth && ((rb_copy_dest_info >> 24) & 0x1); + bool dest_swap = !is_depth && rb_copy_dest_info.copy_dest_swap; XELOGGPU( "Resolve: Copying samples %u to 0x%.8X (%ux%u, %cD), destination Z %u, " "destination format %s, exponent bias %d, red and blue %sswapped", uint32_t(sample_select), dest_address, dest_pitch, dest_height, - dest_3d ? '3' : '2', dest_z, dest_format_info->name, dest_exp_bias, - dest_swap ? "" : "not "); + rb_copy_dest_info.copy_dest_array ? '3' : '2', dest_z, + dest_format_info->name, dest_exp_bias, dest_swap ? "" : "not "); // There are 2 paths for resolving in this function - they don't necessarily // have to map directly to kRaw and kConvert CopyCommands. @@ -1344,7 +1318,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, resolution_scale_2x_ && cvars::d3d12_resolution_scale_resolve_edge_clamp && cvars::d3d12_half_pixel_offset && - !(regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32 & 0x1); + !regs.Get().pix_center; if (sample_select <= xenos::CopySampleSelect::k3 && src_texture_format == dest_format && dest_exp_bias == 0) { // ************************************************************************* @@ -1363,7 +1337,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, uint32_t dest_size; uint32_t dest_modified_start = dest_address; uint32_t dest_modified_length; - if (dest_3d) { + if (rb_copy_dest_info.copy_dest_array) { // Depth granularity is 4 (though TiledAddress chaining is possible with 8 // granularity). dest_size = texture_util::GetGuestMipSliceStorageSize( @@ -1442,8 +1416,10 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, assert_true(dest_pitch <= 8192); root_constants.tile_sample_dest_info = ((dest_pitch + 31) >> 5) | - (dest_3d ? (((dest_height + 31) >> 5) << 9) : 0) | - (uint32_t(sample_select) << 18) | (uint32_t(dest_endian) << 20); + (rb_copy_dest_info.copy_dest_array ? (((dest_height + 31) >> 5) << 9) + : 0) | + (uint32_t(sample_select) << 18) | + (uint32_t(rb_copy_dest_info.copy_dest_endian) << 20); if (dest_swap) { root_constants.tile_sample_dest_info |= (1 << 23) | (src_format << 24); } @@ -1797,10 +1773,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, copy_buffer_state = D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; // dest_address already adjusted, so offsets are & 31. texture_cache->TileResolvedTexture( - dest_format, dest_address, dest_pitch, dest_height, dest_3d, - uint32_t(rect.left) & 31, uint32_t(rect.top) & 31, dest_z, copy_width, - copy_height, dest_endian, copy_buffer, resolve_target->copy_buffer_size, - resolve_target->footprint, &written_address_out, &written_length_out); + dest_format, dest_address, dest_pitch, dest_height, + rb_copy_dest_info.copy_dest_array != 0, uint32_t(rect.left) & 31, + uint32_t(rect.top) & 31, dest_z, copy_width, copy_height, + rb_copy_dest_info.copy_dest_endian, copy_buffer, + resolve_target->copy_buffer_size, resolve_target->footprint, + &written_address_out, &written_length_out); // Done with the copy buffer. @@ -1817,9 +1795,15 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, auto& regs = *register_file_; // Check if clearing is enabled. - uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; - if (!(rb_copy_control & (is_depth ? (1 << 9) : (1 << 8)))) { - return true; + auto rb_copy_control = regs.Get(); + if (is_depth) { + if (!rb_copy_control.depth_clear_enable) { + return true; + } + } else { + if (!rb_copy_control.color_clear_enable) { + return true; + } } XELOGGPU("Resolve: Clearing the %s render target", @@ -1886,7 +1870,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base, } else if (is_64bpp) { // TODO(Triang3l): Check which 32-bit portion is in which register. root_constants.clear_color_high = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; command_processor_->SetComputePipeline(edram_clear_64bpp_pipeline_); } else { Register reg = diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 1fb339b9a..0fb518f9e 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -848,15 +848,13 @@ void TextureCache::RequestTextures(uint32_t used_vertex_texture_mask, continue; } TextureBinding& binding = texture_bindings_[index]; - uint32_t r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6; - auto group = - reinterpret_cast(®s.values[r]); + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + index * 6); TextureKey old_key = binding.key; bool old_has_unsigned = binding.has_unsigned; bool old_has_signed = binding.has_signed; - BindingInfoFromFetchConstant(group->texture_fetch, binding.key, - &binding.swizzle, &binding.has_unsigned, - &binding.has_signed); + BindingInfoFromFetchConstant(fetch, binding.key, &binding.swizzle, + &binding.has_unsigned, &binding.has_signed); texture_keys_in_sync_ |= index_bit; if (binding.key.IsInvalid()) { binding.texture = nullptr; @@ -1142,18 +1140,15 @@ void TextureCache::WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv, TextureCache::SamplerParameters TextureCache::GetSamplerParameters( const D3D12Shader::SamplerBinding& binding) const { auto& regs = *register_file_; - uint32_t r = - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6; - auto group = - reinterpret_cast(®s.values[r]); - auto& fetch = group->texture_fetch; + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + binding.fetch_constant * 6); SamplerParameters parameters; - parameters.clamp_x = ClampMode(fetch.clamp_x); - parameters.clamp_y = ClampMode(fetch.clamp_y); - parameters.clamp_z = ClampMode(fetch.clamp_z); - parameters.border_color = BorderColor(fetch.border_color); + parameters.clamp_x = fetch.clamp_x; + parameters.clamp_y = fetch.clamp_y; + parameters.clamp_z = fetch.clamp_z; + parameters.border_color = fetch.border_color; uint32_t mip_min_level = fetch.mip_min_level; uint32_t mip_max_level = fetch.mip_max_level; @@ -1171,7 +1166,7 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters( parameters.lod_bias = fetch.lod_bias; AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst - ? AnisoFilter(fetch.aniso_filter) + ? fetch.aniso_filter : binding.aniso_filter; aniso_filter = std::min(aniso_filter, AnisoFilter::kMax_16_1); parameters.aniso_filter = aniso_filter; @@ -1182,17 +1177,17 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters( } else { TextureFilter mag_filter = binding.mag_filter == TextureFilter::kUseFetchConst - ? TextureFilter(fetch.mag_filter) + ? fetch.mag_filter : binding.mag_filter; parameters.mag_linear = mag_filter == TextureFilter::kLinear; TextureFilter min_filter = binding.min_filter == TextureFilter::kUseFetchConst - ? TextureFilter(fetch.min_filter) + ? fetch.min_filter : binding.min_filter; parameters.min_linear = min_filter == TextureFilter::kLinear; TextureFilter mip_filter = binding.mip_filter == TextureFilter::kUseFetchConst - ? TextureFilter(fetch.mip_filter) + ? fetch.mip_filter : binding.mip_filter; parameters.mip_linear = mip_filter == TextureFilter::kLinear; // TODO(Triang3l): Investigate mip_filter TextureFilter::kBaseMap. @@ -1586,13 +1581,12 @@ void TextureCache::CreateScaledResolveBufferRawUAV( bool TextureCache::RequestSwapTexture(D3D12_CPU_DESCRIPTOR_HANDLE handle, TextureFormat& format_out) { - auto group = reinterpret_cast( - ®ister_file_->values[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0]); - auto& fetch = group->texture_fetch; + auto& regs = *register_file_; + const auto& fetch = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0); TextureKey key; uint32_t swizzle; - BindingInfoFromFetchConstant(group->texture_fetch, key, &swizzle, nullptr, - nullptr); + BindingInfoFromFetchConstant(fetch, key, &swizzle, nullptr, nullptr); if (key.base_page == 0 || key.dimension != Dimension::k2D) { return false; } @@ -1733,7 +1727,7 @@ void TextureCache::BindingInfoFromFetchConstant( return; } - TextureFormat format = GetBaseFormat(TextureFormat(fetch.format)); + TextureFormat format = GetBaseFormat(fetch.format); key_out.base_page = base_page; key_out.mip_page = mip_page; @@ -1745,7 +1739,7 @@ void TextureCache::BindingInfoFromFetchConstant( key_out.tiled = fetch.tiled; key_out.packed_mips = fetch.packed_mips; key_out.format = format; - key_out.endianness = Endian(fetch.endianness); + key_out.endianness = fetch.endianness; if (swizzle_out != nullptr) { uint32_t swizzle = fetch.swizzle; @@ -1783,16 +1777,16 @@ void TextureCache::BindingInfoFromFetchConstant( } if (has_unsigned_out != nullptr) { - *has_unsigned_out = TextureSign(fetch.sign_x) != TextureSign::kSigned || - TextureSign(fetch.sign_y) != TextureSign::kSigned || - TextureSign(fetch.sign_z) != TextureSign::kSigned || - TextureSign(fetch.sign_w) != TextureSign::kSigned; + *has_unsigned_out = fetch.sign_x != TextureSign::kSigned || + fetch.sign_y != TextureSign::kSigned || + fetch.sign_z != TextureSign::kSigned || + fetch.sign_w != TextureSign::kSigned; } if (has_signed_out != nullptr) { - *has_signed_out = TextureSign(fetch.sign_x) == TextureSign::kSigned || - TextureSign(fetch.sign_y) == TextureSign::kSigned || - TextureSign(fetch.sign_z) == TextureSign::kSigned || - TextureSign(fetch.sign_w) == TextureSign::kSigned; + *has_signed_out = fetch.sign_x == TextureSign::kSigned || + fetch.sign_y == TextureSign::kSigned || + fetch.sign_z == TextureSign::kSigned || + fetch.sign_w == TextureSign::kSigned; } } diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index 3df06bfd9..c305b5a03 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -134,7 +134,7 @@ XE_GPU_REGISTER(0x2184, kDword, SQ_WRAPPING_1) XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR) XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL) -XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL_0) +XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL0) XE_GPU_REGISTER(0x2202, kDword, RB_COLORCONTROL) XE_GPU_REGISTER(0x2203, kDword, RB_HIZCONTROL) XE_GPU_REGISTER(0x2204, kDword, PA_CL_CLIP_CNTL) @@ -142,9 +142,9 @@ XE_GPU_REGISTER(0x2205, kDword, PA_SU_SC_MODE_CNTL) XE_GPU_REGISTER(0x2206, kDword, PA_CL_VTE_CNTL) XE_GPU_REGISTER(0x2207, kDword, VGT_CURRENT_BIN_ID_MIN) XE_GPU_REGISTER(0x2208, kDword, RB_MODECONTROL) -XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL_1) -XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL_2) -XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL_3) +XE_GPU_REGISTER(0x2209, kDword, RB_BLENDCONTROL1) +XE_GPU_REGISTER(0x220A, kDword, RB_BLENDCONTROL2) +XE_GPU_REGISTER(0x220B, kDword, RB_BLENDCONTROL3) XE_GPU_REGISTER(0x2280, kDword, PA_SU_POINT_SIZE) XE_GPU_REGISTER(0x2281, kDword, PA_SU_POINT_MINMAX) @@ -199,7 +199,7 @@ XE_GPU_REGISTER(0x231B, kDword, RB_COPY_DEST_INFO) XE_GPU_REGISTER(0x231C, kDword, RB_HIZ_CLEAR) XE_GPU_REGISTER(0x231D, kDword, RB_DEPTH_CLEAR) XE_GPU_REGISTER(0x231E, kDword, RB_COLOR_CLEAR) -XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LOW) +XE_GPU_REGISTER(0x231F, kDword, RB_COLOR_CLEAR_LO) XE_GPU_REGISTER(0x2320, kDword, RB_COPY_FUNC) XE_GPU_REGISTER(0x2321, kDword, RB_COPY_REF) XE_GPU_REGISTER(0x2322, kDword, RB_COPY_MASK) diff --git a/src/xenia/gpu/registers.cc b/src/xenia/gpu/registers.cc index 4215e3352..df7aaa48a 100644 --- a/src/xenia/gpu/registers.cc +++ b/src/xenia/gpu/registers.cc @@ -13,38 +13,51 @@ namespace xe { namespace gpu { namespace reg { -constexpr uint32_t COHER_STATUS_HOST::register_index; -constexpr uint32_t WAIT_UNTIL::register_index; +constexpr Register COHER_STATUS_HOST::register_index; +constexpr Register WAIT_UNTIL::register_index; -constexpr uint32_t SQ_PROGRAM_CNTL::register_index; -constexpr uint32_t SQ_CONTEXT_MISC::register_index; +constexpr Register SQ_PROGRAM_CNTL::register_index; +constexpr Register SQ_CONTEXT_MISC::register_index; -constexpr uint32_t VGT_OUTPUT_PATH_CNTL::register_index; -constexpr uint32_t VGT_HOS_CNTL::register_index; +constexpr Register VGT_OUTPUT_PATH_CNTL::register_index; +constexpr Register VGT_HOS_CNTL::register_index; -constexpr uint32_t PA_SU_POINT_MINMAX::register_index; -constexpr uint32_t PA_SU_POINT_SIZE::register_index; -constexpr uint32_t PA_SU_SC_MODE_CNTL::register_index; -constexpr uint32_t PA_SU_VTX_CNTL::register_index; -constexpr uint32_t PA_SC_MPASS_PS_CNTL::register_index; -constexpr uint32_t PA_SC_VIZ_QUERY::register_index; -constexpr uint32_t PA_CL_CLIP_CNTL::register_index; -constexpr uint32_t PA_CL_VTE_CNTL::register_index; -constexpr uint32_t PA_SC_WINDOW_OFFSET::register_index; -constexpr uint32_t PA_SC_WINDOW_SCISSOR_TL::register_index; -constexpr uint32_t PA_SC_WINDOW_SCISSOR_BR::register_index; +constexpr Register PA_SU_POINT_MINMAX::register_index; +constexpr Register PA_SU_POINT_SIZE::register_index; +constexpr Register PA_SU_SC_MODE_CNTL::register_index; +constexpr Register PA_SU_VTX_CNTL::register_index; +constexpr Register PA_SC_MPASS_PS_CNTL::register_index; +constexpr Register PA_SC_VIZ_QUERY::register_index; +constexpr Register PA_CL_CLIP_CNTL::register_index; +constexpr Register PA_CL_VTE_CNTL::register_index; +constexpr Register PA_SC_WINDOW_OFFSET::register_index; +constexpr Register PA_SC_WINDOW_SCISSOR_TL::register_index; +constexpr Register PA_SC_WINDOW_SCISSOR_BR::register_index; -constexpr uint32_t RB_MODECONTROL::register_index; -constexpr uint32_t RB_SURFACE_INFO::register_index; -constexpr uint32_t RB_COLORCONTROL::register_index; -constexpr uint32_t RB_COLOR_INFO::register_index; -constexpr uint32_t RB_COLOR_MASK::register_index; -constexpr uint32_t RB_DEPTHCONTROL::register_index; -constexpr uint32_t RB_STENCILREFMASK::register_index; -constexpr uint32_t RB_DEPTH_INFO::register_index; -constexpr uint32_t RB_COPY_CONTROL::register_index; -constexpr uint32_t RB_COPY_DEST_INFO::register_index; -constexpr uint32_t RB_COPY_DEST_PITCH::register_index; +constexpr Register RB_MODECONTROL::register_index; +constexpr Register RB_SURFACE_INFO::register_index; +constexpr Register RB_COLORCONTROL::register_index; +constexpr Register RB_COLOR_INFO::register_index; +const Register RB_COLOR_INFO::rt_register_indices[4] = { + XE_GPU_REG_RB_COLOR_INFO, + XE_GPU_REG_RB_COLOR1_INFO, + XE_GPU_REG_RB_COLOR2_INFO, + XE_GPU_REG_RB_COLOR3_INFO, +}; +constexpr Register RB_COLOR_MASK::register_index; +constexpr Register RB_BLENDCONTROL::register_index; +const Register RB_BLENDCONTROL::rt_register_indices[4] = { + XE_GPU_REG_RB_BLENDCONTROL0, + XE_GPU_REG_RB_BLENDCONTROL1, + XE_GPU_REG_RB_BLENDCONTROL2, + XE_GPU_REG_RB_BLENDCONTROL3, +}; +constexpr Register RB_DEPTHCONTROL::register_index; +constexpr Register RB_STENCILREFMASK::register_index; +constexpr Register RB_DEPTH_INFO::register_index; +constexpr Register RB_COPY_CONTROL::register_index; +constexpr Register RB_COPY_DEST_INFO::register_index; +constexpr Register RB_COPY_DEST_PITCH::register_index; } // namespace reg } // namespace gpu diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index a0fc9e279..5b6fdc54b 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -13,11 +13,12 @@ #include #include -#include "xenia/base/bit_field.h" #include "xenia/gpu/xenos.h" // Most registers can be found from: // https://github.com/UDOOboard/Kernel_Unico/blob/master/drivers/mxc/amd-gpu/include/reg/yamato/14/yamato_registers.h +// Some registers were added on Adreno specifically and are not referenced in +// game .pdb files and never set by games. namespace xe { namespace gpu { @@ -38,46 +39,49 @@ namespace reg { *******************************************************************************/ union COHER_STATUS_HOST { - xe::bf matching_contexts; - xe::bf rb_copy_dest_base_ena; - xe::bf dest_base_0_ena; - xe::bf dest_base_1_ena; - xe::bf dest_base_2_ena; - xe::bf dest_base_3_ena; - xe::bf dest_base_4_ena; - xe::bf dest_base_5_ena; - xe::bf dest_base_6_ena; - xe::bf dest_base_7_ena; - - xe::bf vc_action_ena; - xe::bf tc_action_ena; - xe::bf pglb_action_ena; - - xe::bf status; - + struct { + uint32_t matching_contexts : 8; // +0 + uint32_t rb_copy_dest_base_ena : 1; // +8 + uint32_t dest_base_0_ena : 1; // +9 + uint32_t dest_base_1_ena : 1; // +10 + uint32_t dest_base_2_ena : 1; // +11 + uint32_t dest_base_3_ena : 1; // +12 + uint32_t dest_base_4_ena : 1; // +13 + uint32_t dest_base_5_ena : 1; // +14 + uint32_t dest_base_6_ena : 1; // +15 + uint32_t dest_base_7_ena : 1; // +16 + uint32_t : 7; // +17 + uint32_t vc_action_ena : 1; // +24 + uint32_t tc_action_ena : 1; // +25 + uint32_t pglb_action_ena : 1; // +26 + uint32_t : 4; // +27 + uint32_t status : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_COHER_STATUS_HOST; + static constexpr Register register_index = XE_GPU_REG_COHER_STATUS_HOST; }; union WAIT_UNTIL { - xe::bf wait_re_vsync; - xe::bf wait_fe_vsync; - xe::bf wait_vsync; - xe::bf wait_dsply_id0; - xe::bf wait_dsply_id1; - xe::bf wait_dsply_id2; - - xe::bf wait_cmdfifo; - - xe::bf wait_2d_idle; - xe::bf wait_3d_idle; - xe::bf wait_2d_idleclean; - xe::bf wait_3d_idleclean; - - xe::bf cmdfifo_entries; - + struct { + uint32_t : 1; // +0 + uint32_t wait_re_vsync : 1; // +1 + uint32_t wait_fe_vsync : 1; // +2 + uint32_t wait_vsync : 1; // +3 + uint32_t wait_dsply_id0 : 1; // +4 + uint32_t wait_dsply_id1 : 1; // +5 + uint32_t wait_dsply_id2 : 1; // +6 + uint32_t : 3; // +7 + uint32_t wait_cmdfifo : 1; // +10 + uint32_t : 3; // +11 + uint32_t wait_2d_idle : 1; // +14 + uint32_t wait_3d_idle : 1; // +15 + uint32_t wait_2d_idleclean : 1; // +16 + uint32_t wait_3d_idleclean : 1; // +17 + uint32_t : 2; // +18 + uint32_t cmdfifo_entries : 4; // +20 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_WAIT_UNTIL; + static constexpr Register register_index = XE_GPU_REG_WAIT_UNTIL; }; /******************************************************************************* @@ -89,35 +93,38 @@ union WAIT_UNTIL { *******************************************************************************/ union SQ_PROGRAM_CNTL { - // Note from a2xx.xml: - // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, but - // high bit is set to indicate "0 registers used". - xe::bf vs_num_reg; - xe::bf ps_num_reg; - xe::bf vs_resource; - xe::bf ps_resource; - xe::bf param_gen; - xe::bf gen_index_pix; - xe::bf vs_export_count; - xe::bf vs_export_mode; - xe::bf ps_export_mode; - xe::bf gen_index_vtx; - + struct { + // Note from a2xx.xml: + // Only 0x3F worth of valid register values for VS_NUM_REG and PS_NUM_REG, + // but high bit is set to indicate "0 registers used". + uint32_t vs_num_reg : 8; // +0 + uint32_t ps_num_reg : 8; // +8 + uint32_t vs_resource : 1; // +16 + uint32_t ps_resource : 1; // +17 + uint32_t param_gen : 1; // +18 + uint32_t gen_index_pix : 1; // +19 + uint32_t vs_export_count : 4; // +20 + xenos::VertexShaderExportMode vs_export_mode : 3; // +24 + uint32_t ps_export_mode : 4; // +27 + uint32_t gen_index_vtx : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; + static constexpr Register register_index = XE_GPU_REG_SQ_PROGRAM_CNTL; }; union SQ_CONTEXT_MISC { - xe::bf inst_pred_optimize; - xe::bf sc_output_screen_xy; - xe::bf sc_sample_cntl; - xe::bf param_gen_pos; - xe::bf perfcounter_ref; - xe::bf yeild_optimize; // sic - xe::bf tx_cache_sel; - + struct { + uint32_t inst_pred_optimize : 1; // +0 + uint32_t sc_output_screen_xy : 1; // +1 + xenos::SampleControl sc_sample_cntl : 2; // +2 + uint32_t : 4; // +4 + uint32_t param_gen_pos : 8; // +8 + uint32_t perfcounter_ref : 1; // +16 + uint32_t yeild_optimize : 1; // +17 sic + uint32_t tx_cache_sel : 1; // +18 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_SQ_CONTEXT_MISC; + static constexpr Register register_index = XE_GPU_REG_SQ_CONTEXT_MISC; }; /******************************************************************************* @@ -139,17 +146,19 @@ union SQ_CONTEXT_MISC { *******************************************************************************/ union VGT_OUTPUT_PATH_CNTL { - xe::bf path_select; - + struct { + xenos::VGTOutputPath path_select : 2; // +0 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; + static constexpr Register register_index = XE_GPU_REG_VGT_OUTPUT_PATH_CNTL; }; union VGT_HOS_CNTL { - xe::bf tess_mode; - + struct { + xenos::TessellationMode tess_mode : 2; // +0 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_VGT_HOS_CNTL; + static constexpr Register register_index = XE_GPU_REG_VGT_HOS_CNTL; }; /******************************************************************************* @@ -166,145 +175,162 @@ union VGT_HOS_CNTL { *******************************************************************************/ union PA_SU_POINT_MINMAX { - // Radius, 12.4 fixed point. - xe::bf min_size; - xe::bf max_size; - + struct { + // Radius, 12.4 fixed point. + uint32_t min_size : 16; // +0 + uint32_t max_size : 16; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; + static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_MINMAX; }; union PA_SU_POINT_SIZE { - // 1/2 width or height, 12.4 fixed point. - xe::bf height; - xe::bf width; - + struct { + // 1/2 width or height, 12.4 fixed point. + uint32_t height : 16; // +0 + uint32_t width : 16; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_POINT_SIZE; + static constexpr Register register_index = XE_GPU_REG_PA_SU_POINT_SIZE; }; // Setup Unit / Scanline Converter mode cntl union PA_SU_SC_MODE_CNTL { - xe::bf cull_front; - xe::bf cull_back; - xe::bf face; - xe::bf poly_mode; - xe::bf polymode_front_ptype; - xe::bf polymode_back_ptype; - xe::bf poly_offset_front_enable; - xe::bf poly_offset_back_enable; - xe::bf poly_offset_para_enable; - - xe::bf msaa_enable; - xe::bf vtx_window_offset_enable; - - xe::bf line_stipple_enable; - xe::bf provoking_vtx_last; - xe::bf persp_corr_dis; - xe::bf multi_prim_ib_ena; - - xe::bf quad_order_enable; - - xe::bf wait_rb_idle_all_tri; - xe::bf wait_rb_idle_first_tri_new_state; - + struct { + uint32_t cull_front : 1; // +0 + uint32_t cull_back : 1; // +1 + // 0 - front is CCW, 1 - front is CW. + uint32_t face : 1; // +2 + xenos::PolygonModeEnable poly_mode : 2; // +3 + xenos::PolygonType polymode_front_ptype : 3; // +5 + xenos::PolygonType polymode_back_ptype : 3; // +8 + uint32_t poly_offset_front_enable : 1; // +11 + uint32_t poly_offset_back_enable : 1; // +12 + uint32_t poly_offset_para_enable : 1; // +13 + uint32_t : 1; // +14 + uint32_t msaa_enable : 1; // +15 + uint32_t vtx_window_offset_enable : 1; // +16 + // LINE_STIPPLE_ENABLE was added on Adreno. + uint32_t : 2; // +17 + uint32_t provoking_vtx_last : 1; // +19 + uint32_t persp_corr_dis : 1; // +20 + uint32_t multi_prim_ib_ena : 1; // +21 + uint32_t : 1; // +22 + uint32_t quad_order_enable : 1; // +23 + // WAIT_RB_IDLE_ALL_TRI and WAIT_RB_IDLE_FIRST_TRI_NEW_STATE were added on + // Adreno. + // TODO(Triang3l): Find SC_ONE_QUAD_PER_CLOCK offset. + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_SU_SC_MODE_CNTL; }; // Setup Unit Vertex Control union PA_SU_VTX_CNTL { - xe::bf pix_center; // 1 = half pixel offset - xe::bf round_mode; - xe::bf quant_mode; - + struct { + uint32_t pix_center : 1; // +0 1 = half pixel offset (OpenGL). + uint32_t round_mode : 2; // +1 + uint32_t quant_mode : 3; // +3 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SU_VTX_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_SU_VTX_CNTL; }; union PA_SC_MPASS_PS_CNTL { - xe::bf mpass_pix_vec_per_pass; - xe::bf mpass_ps_ena; - + struct { + uint32_t mpass_pix_vec_per_pass : 20; // +0 + uint32_t : 11; // +20 + uint32_t mpass_ps_ena : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_SC_MPASS_PS_CNTL; }; // Scanline converter viz query union PA_SC_VIZ_QUERY { - xe::bf viz_query_ena; - xe::bf viz_query_id; - xe::bf kill_pix_post_early_z; - + struct { + uint32_t viz_query_ena : 1; // +0 + uint32_t viz_query_id : 6; // +1 + uint32_t kill_pix_post_early_z : 1; // +7 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; + static constexpr Register register_index = XE_GPU_REG_PA_SC_VIZ_QUERY; }; // Clipper clip control union PA_CL_CLIP_CNTL { - xe::bf ucp_ena_0; - xe::bf ucp_ena_1; - xe::bf ucp_ena_2; - xe::bf ucp_ena_3; - xe::bf ucp_ena_4; - xe::bf ucp_ena_5; - - xe::bf ps_ucp_mode; - xe::bf clip_disable; - xe::bf ucp_cull_only_ena; - xe::bf boundary_edge_flag_ena; - xe::bf dx_clip_space_def; - xe::bf dis_clip_err_detect; - xe::bf vtx_kill_or; - xe::bf xy_nan_retain; - xe::bf z_nan_retain; - xe::bf w_nan_retain; - + struct { + uint32_t ucp_ena_0 : 1; // +0 + uint32_t ucp_ena_1 : 1; // +1 + uint32_t ucp_ena_2 : 1; // +2 + uint32_t ucp_ena_3 : 1; // +3 + uint32_t ucp_ena_4 : 1; // +4 + uint32_t ucp_ena_5 : 1; // +5 + uint32_t : 8; // +6 + uint32_t ps_ucp_mode : 2; // +14 + uint32_t clip_disable : 1; // +16 + uint32_t ucp_cull_only_ena : 1; // +17 + uint32_t boundary_edge_flag_ena : 1; // +18 + uint32_t dx_clip_space_def : 1; // +19 + uint32_t dis_clip_err_detect : 1; // +20 + uint32_t vtx_kill_or : 1; // +21 + uint32_t xy_nan_retain : 1; // +22 + uint32_t z_nan_retain : 1; // +23 + uint32_t w_nan_retain : 1; // +24 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_CL_CLIP_CNTL; }; // Viewport transform engine control union PA_CL_VTE_CNTL { - xe::bf vport_x_scale_ena; - xe::bf vport_x_offset_ena; - xe::bf vport_y_scale_ena; - xe::bf vport_y_offset_ena; - xe::bf vport_z_scale_ena; - xe::bf vport_z_offset_ena; - - xe::bf vtx_xy_fmt; - xe::bf vtx_z_fmt; - xe::bf vtx_w0_fmt; - xe::bf perfcounter_ref; - + struct { + uint32_t vport_x_scale_ena : 1; // +0 + uint32_t vport_x_offset_ena : 1; // +1 + uint32_t vport_y_scale_ena : 1; // +2 + uint32_t vport_y_offset_ena : 1; // +3 + uint32_t vport_z_scale_ena : 1; // +4 + uint32_t vport_z_offset_ena : 1; // +5 + uint32_t : 2; // +6 + uint32_t vtx_xy_fmt : 1; // +8 + uint32_t vtx_z_fmt : 1; // +9 + uint32_t vtx_w0_fmt : 1; // +10 + uint32_t perfcounter_ref : 1; // +11 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_CL_VTE_CNTL; + static constexpr Register register_index = XE_GPU_REG_PA_CL_VTE_CNTL; }; union PA_SC_WINDOW_OFFSET { - xe::bf window_x_offset; - xe::bf window_y_offset; - + struct { + int32_t window_x_offset : 15; // +0 + uint32_t : 1; // +15 + int32_t window_y_offset : 15; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; + static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_OFFSET; }; union PA_SC_WINDOW_SCISSOR_TL { - xe::bf tl_x; - xe::bf tl_y; - xe::bf window_offset_disable; - + struct { + uint32_t tl_x : 14; // +0 + uint32_t : 2; // +14 + uint32_t tl_y : 14; // +16 + uint32_t : 1; // +30 + uint32_t window_offset_disable : 1; // +31 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; + static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL; }; union PA_SC_WINDOW_SCISSOR_BR { - xe::bf br_x; - xe::bf br_y; - + struct { + uint32_t br_x : 14; // +0 + uint32_t : 2; // +14 + uint32_t br_y : 14; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; + static constexpr Register register_index = XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR; }; /******************************************************************************* @@ -316,136 +342,174 @@ union PA_SC_WINDOW_SCISSOR_BR { *******************************************************************************/ union RB_MODECONTROL { - xe::bf edram_mode; - + struct { + xenos::ModeControl edram_mode : 3; // +0 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_MODECONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_MODECONTROL; }; union RB_SURFACE_INFO { - xe::bf surface_pitch; - xe::bf msaa_samples; - xe::bf hiz_pitch; - + struct { + uint32_t surface_pitch : 14; // +0 + uint32_t : 2; // +14 + MsaaSamples msaa_samples : 2; // +16 + uint32_t hiz_pitch : 14; // +18 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_SURFACE_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_SURFACE_INFO; }; union RB_COLORCONTROL { - xe::bf alpha_func; - xe::bf alpha_test_enable; - xe::bf alpha_to_mask_enable; - // Everything in between was added on Adreno, not in game PDBs and never set. - xe::bf alpha_to_mask_offset0; - xe::bf alpha_to_mask_offset1; - xe::bf alpha_to_mask_offset2; - xe::bf alpha_to_mask_offset3; - + struct { + CompareFunction alpha_func : 3; // +0 + uint32_t alpha_test_enable : 1; // +3 + uint32_t alpha_to_mask_enable : 1; // +4 + // Everything in between was added on Adreno. + uint32_t : 19; // +5 + uint32_t alpha_to_mask_offset0 : 2; // +24 + uint32_t alpha_to_mask_offset1 : 2; // +26 + uint32_t alpha_to_mask_offset2 : 2; // +28 + uint32_t alpha_to_mask_offset3 : 2; // +30 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COLORCONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_COLORCONTROL; }; union RB_COLOR_INFO { - xe::bf color_base; - xe::bf color_format; - xe::bf color_exp_bias; - + struct { + uint32_t color_base : 12; // +0 + uint32_t : 4; // +12 + ColorRenderTargetFormat color_format : 4; // +16 + int32_t color_exp_bias : 6; // +20 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_COLOR_INFO; // RB_COLOR[1-3]_INFO also use this format. + static const Register rt_register_indices[4]; }; union RB_COLOR_MASK { - xe::bf write_red0; - xe::bf write_green0; - xe::bf write_blue0; - xe::bf write_alpha0; - xe::bf write_red1; - xe::bf write_green1; - xe::bf write_blue1; - xe::bf write_alpha1; - xe::bf write_red2; - xe::bf write_green2; - xe::bf write_blue2; - xe::bf write_alpha2; - xe::bf write_red3; - xe::bf write_green3; - xe::bf write_blue3; - xe::bf write_alpha3; - + struct { + uint32_t write_red0 : 1; // +0 + uint32_t write_green0 : 1; // +1 + uint32_t write_blue0 : 1; // +2 + uint32_t write_alpha0 : 1; // +3 + uint32_t write_red1 : 1; // +4 + uint32_t write_green1 : 1; // +5 + uint32_t write_blue1 : 1; // +6 + uint32_t write_alpha1 : 1; // +7 + uint32_t write_red2 : 1; // +8 + uint32_t write_green2 : 1; // +9 + uint32_t write_blue2 : 1; // +10 + uint32_t write_alpha2 : 1; // +11 + uint32_t write_red3 : 1; // +12 + uint32_t write_green3 : 1; // +13 + uint32_t write_blue3 : 1; // +14 + uint32_t write_alpha3 : 1; // +15 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COLOR_MASK; + static constexpr Register register_index = XE_GPU_REG_RB_COLOR_MASK; +}; + +union RB_BLENDCONTROL { + struct { + BlendFactor color_srcblend : 5; // +0 + BlendOp color_comb_fcn : 3; // +5 + BlendFactor color_destblend : 5; // +8 + uint32_t : 3; // +13 + BlendFactor alpha_srcblend : 5; // +16 + BlendOp alpha_comb_fcn : 3; // +21 + BlendFactor alpha_destblend : 5; // +24 + // BLEND_FORCE_ENABLE and BLEND_FORCE were added on Adreno. + }; + uint32_t value; + // RB_BLENDCONTROL[0-3] use this format. + static constexpr Register register_index = XE_GPU_REG_RB_BLENDCONTROL0; + static const Register rt_register_indices[4]; }; union RB_DEPTHCONTROL { - xe::bf stencil_enable; - xe::bf z_enable; - xe::bf z_write_enable; - // EARLY_Z_ENABLE was added on Adreno. - xe::bf zfunc; - xe::bf backface_enable; - xe::bf stencilfunc; - xe::bf stencilfail; - xe::bf stencilzpass; - xe::bf stencilzfail; - xe::bf stencilfunc_bf; - xe::bf stencilfail_bf; - xe::bf stencilzpass_bf; - xe::bf stencilzfail_bf; - + struct { + uint32_t stencil_enable : 1; // +0 + uint32_t z_enable : 1; // +1 + uint32_t z_write_enable : 1; // +2 + // EARLY_Z_ENABLE was added on Adreno. + uint32_t : 1; // +3 + CompareFunction zfunc : 3; // +4 + uint32_t backface_enable : 1; // +7 + CompareFunction stencilfunc : 3; // +8 + StencilOp stencilfail : 3; // +11 + StencilOp stencilzpass : 3; // +14 + StencilOp stencilzfail : 3; // +17 + CompareFunction stencilfunc_bf : 3; // +20 + StencilOp stencilfail_bf : 3; // +23 + StencilOp stencilzpass_bf : 3; // +26 + StencilOp stencilzfail_bf : 3; // +29 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTHCONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_DEPTHCONTROL; }; union RB_STENCILREFMASK { - xe::bf stencilref; - xe::bf stencilmask; - xe::bf stencilwritemask; - + struct { + uint32_t stencilref : 8; // +0 + uint32_t stencilmask : 8; // +8 + uint32_t stencilwritemask : 8; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_STENCILREFMASK; + static constexpr Register register_index = XE_GPU_REG_RB_STENCILREFMASK; // RB_STENCILREFMASK_BF also uses this format. }; union RB_DEPTH_INFO { - xe::bf depth_base; - xe::bf depth_format; - + struct { + uint32_t depth_base : 12; // +0 + uint32_t : 4; // +12 + DepthRenderTargetFormat depth_format : 1; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_DEPTH_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_DEPTH_INFO; }; union RB_COPY_CONTROL { - xe::bf copy_src_select; - xe::bf copy_sample_select; - xe::bf color_clear_enable; - xe::bf depth_clear_enable; - - xe::bf copy_command; - + struct { + uint32_t copy_src_select : 3; // +0 Depth is 4. + uint32_t : 1; // +3 + xenos::CopySampleSelect copy_sample_select : 3; // +4 + uint32_t : 1; // +7 + uint32_t color_clear_enable : 1; // +8 + uint32_t depth_clear_enable : 1; // +9 + uint32_t : 10; // +10 + xenos::CopyCommand copy_command : 2; // +20 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_CONTROL; + static constexpr Register register_index = XE_GPU_REG_RB_COPY_CONTROL; }; union RB_COPY_DEST_INFO { - xe::bf copy_dest_endian; - xe::bf copy_dest_array; - xe::bf copy_dest_slice; - xe::bf copy_dest_format; - xe::bf copy_dest_number; - xe::bf copy_dest_exp_bias; - xe::bf copy_dest_swap; - + struct { + Endian128 copy_dest_endian : 3; // +0 + uint32_t copy_dest_array : 1; // +3 + uint32_t copy_dest_slice : 3; // +4 + ColorFormat copy_dest_format : 6; // +7 + uint32_t copy_dest_number : 3; // +13 + int32_t copy_dest_exp_bias : 6; // +16 + uint32_t : 2; // +22 + uint32_t copy_dest_swap : 1; // +24 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_INFO; + static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_INFO; }; union RB_COPY_DEST_PITCH { - xe::bf copy_dest_pitch; - xe::bf copy_dest_height; - + struct { + uint32_t copy_dest_pitch : 14; // +0 + uint32_t : 2; // +14 + uint32_t copy_dest_height : 14; // +16 + }; uint32_t value; - static constexpr uint32_t register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; + static constexpr Register register_index = XE_GPU_REG_RB_COPY_DEST_PITCH; }; } // namespace reg diff --git a/src/xenia/gpu/sampler_info.cc b/src/xenia/gpu/sampler_info.cc index c6fcf0985..9881fe74d 100644 --- a/src/xenia/gpu/sampler_info.cc +++ b/src/xenia/gpu/sampler_info.cc @@ -24,25 +24,25 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch, out_info->min_filter = fetch_instr.attributes.min_filter == TextureFilter::kUseFetchConst - ? static_cast(fetch.min_filter) + ? fetch.min_filter : fetch_instr.attributes.min_filter; out_info->mag_filter = fetch_instr.attributes.mag_filter == TextureFilter::kUseFetchConst - ? static_cast(fetch.mag_filter) + ? fetch.mag_filter : fetch_instr.attributes.mag_filter; out_info->mip_filter = fetch_instr.attributes.mip_filter == TextureFilter::kUseFetchConst - ? static_cast(fetch.mip_filter) + ? fetch.mip_filter : fetch_instr.attributes.mip_filter; - out_info->clamp_u = static_cast(fetch.clamp_x); - out_info->clamp_v = static_cast(fetch.clamp_y); - out_info->clamp_w = static_cast(fetch.clamp_z); + out_info->clamp_u = fetch.clamp_x; + out_info->clamp_v = fetch.clamp_y; + out_info->clamp_w = fetch.clamp_z; out_info->aniso_filter = fetch_instr.attributes.aniso_filter == AnisoFilter::kUseFetchConst - ? static_cast(fetch.aniso_filter) + ? fetch.aniso_filter : fetch_instr.attributes.aniso_filter; - out_info->border_color = static_cast(fetch.border_color); + out_info->border_color = fetch.border_color; out_info->lod_bias = (fetch.lod_bias) / 32.f; out_info->mip_min_level = fetch.mip_min_level; out_info->mip_max_level = fetch.mip_max_level; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 940db871b..0a10ef5a2 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -110,9 +110,8 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) { bool ShaderTranslator::Translate(Shader* shader, PrimitiveType patch_type, reg::SQ_PROGRAM_CNTL cntl) { Reset(); - uint32_t cntl_num_reg = shader->type() == ShaderType::kVertex - ? cntl.vs_num_reg.value() - : cntl.ps_num_reg.value(); + uint32_t cntl_num_reg = + shader->type() == ShaderType::kVertex ? cntl.vs_num_reg : cntl.ps_num_reg; register_count_ = (cntl_num_reg & 0x80) ? 0 : (cntl_num_reg + 1); return TranslateInternal(shader, patch_type); diff --git a/src/xenia/gpu/texture_conversion.cc b/src/xenia/gpu/texture_conversion.cc index 6137dc906..fbbf5b148 100644 --- a/src/xenia/gpu/texture_conversion.cc +++ b/src/xenia/gpu/texture_conversion.cc @@ -40,7 +40,7 @@ void CopySwapBlock(Endian endian, void* output, const void* input, xe::copy_and_swap_16_in_32_unaligned(output, input, length); break; default: - case Endian::kUnspecified: + case Endian::kNone: std::memcpy(output, input, length); break; } diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc index 85e88e45c..6fe0a4daa 100644 --- a/src/xenia/gpu/texture_info.cc +++ b/src/xenia/gpu/texture_info.cc @@ -33,8 +33,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, auto& info = *out_info; - info.format = static_cast(fetch.format); - info.endianness = static_cast(fetch.endianness); + info.format = fetch.format; + info.endianness = fetch.endianness; info.dimension = static_cast(fetch.dimension); info.width = info.height = info.depth = 0; diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h index c50fd0a8d..08d96a532 100644 --- a/src/xenia/gpu/texture_info.h +++ b/src/xenia/gpu/texture_info.h @@ -19,77 +19,6 @@ namespace xe { namespace gpu { -// a2xx_sq_surfaceformat + -// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas -enum class TextureFormat : uint32_t { - k_1_REVERSE = 0, - k_1 = 1, - k_8 = 2, - k_1_5_5_5 = 3, - k_5_6_5 = 4, - k_6_5_5 = 5, - k_8_8_8_8 = 6, - k_2_10_10_10 = 7, - k_8_A = 8, - k_8_B = 9, - k_8_8 = 10, - k_Cr_Y1_Cb_Y0_REP = 11, - k_Y1_Cr_Y0_Cb_REP = 12, - k_16_16_EDRAM = 13, - k_8_8_8_8_A = 14, - k_4_4_4_4 = 15, - k_10_11_11 = 16, - k_11_11_10 = 17, - k_DXT1 = 18, - k_DXT2_3 = 19, - k_DXT4_5 = 20, - k_16_16_16_16_EDRAM = 21, - k_24_8 = 22, - k_24_8_FLOAT = 23, - k_16 = 24, - k_16_16 = 25, - k_16_16_16_16 = 26, - k_16_EXPAND = 27, - k_16_16_EXPAND = 28, - k_16_16_16_16_EXPAND = 29, - k_16_FLOAT = 30, - k_16_16_FLOAT = 31, - k_16_16_16_16_FLOAT = 32, - k_32 = 33, - k_32_32 = 34, - k_32_32_32_32 = 35, - k_32_FLOAT = 36, - k_32_32_FLOAT = 37, - k_32_32_32_32_FLOAT = 38, - k_32_AS_8 = 39, - k_32_AS_8_8 = 40, - k_16_MPEG = 41, - k_16_16_MPEG = 42, - k_8_INTERLACED = 43, - k_32_AS_8_INTERLACED = 44, - k_32_AS_8_8_INTERLACED = 45, - k_16_INTERLACED = 46, - k_16_MPEG_INTERLACED = 47, - k_16_16_MPEG_INTERLACED = 48, - k_DXN = 49, - k_8_8_8_8_AS_16_16_16_16 = 50, - k_DXT1_AS_16_16_16_16 = 51, - k_DXT2_3_AS_16_16_16_16 = 52, - k_DXT4_5_AS_16_16_16_16 = 53, - k_2_10_10_10_AS_16_16_16_16 = 54, - k_10_11_11_AS_16_16_16_16 = 55, - k_11_11_10_AS_16_16_16_16 = 56, - k_32_32_32_FLOAT = 57, - k_DXT3A = 58, - k_DXT5A = 59, - k_CTX1 = 60, - k_DXT3A_AS_1_1_1_1 = 61, - k_8_8_8_8_GAMMA_EDRAM = 62, - k_2_10_10_10_FLOAT_EDRAM = 63, - - kUnknown = 0xFFFFFFFFu, -}; - inline TextureFormat GetBaseFormat(TextureFormat texture_format) { // These formats are used for resampling textures / gamma control. switch (texture_format) { diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 4a634be52..d1fed5b72 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -824,7 +824,7 @@ void TraceViewer::DrawVertexFetcher(Shader* shader, #define LOADEL(type, wo) \ GpuSwap(xe::load(vstart + \ (attrib.fetch_instr.attributes.offset + wo) * 4), \ - Endian(fetch->endian)) + fetch->endian) switch (attrib.fetch_instr.attributes.data_format) { case VertexFormat::k_32: ImGui::Text("%.8X", LOADEL(uint32_t, 0)); @@ -1334,10 +1334,10 @@ void TraceViewer::DrawStateUI() { regs[XE_GPU_REG_RB_COLOR3_INFO].u32, }; uint32_t rb_blendcontrol[4] = { - regs[XE_GPU_REG_RB_BLENDCONTROL_0].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL_1].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL_2].u32, - regs[XE_GPU_REG_RB_BLENDCONTROL_3].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL0].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL1].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL2].u32, + regs[XE_GPU_REG_RB_BLENDCONTROL3].u32, }; ImGui::Columns(2); for (int i = 0; i < xe::countof(color_info); ++i) { @@ -1713,7 +1713,7 @@ void TraceViewer::DrawStateUI() { fetch = &group->vertex_fetch_2; break; } - assert_true(fetch->endian == 2); + assert_true(fetch->endian == Endian::k8in32); char tree_root_id[32]; sprintf(tree_root_id, "#vertices_root_%d", vertex_binding.fetch_constant); diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index 79cb07fab..72b8734d5 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -146,12 +146,8 @@ enum class AllocType : uint32_t { // Instruction data for ControlFlowOpcode::kExec and kExecEnd. struct ControlFlowExecInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Address of the instructions to execute. uint32_t address() const { return address_; } // Number of instructions being executed. @@ -176,19 +172,15 @@ struct ControlFlowExecInstruction { uint32_t : 7; uint32_t clean_ : 1; uint32_t : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowExecInstruction, 8); // Instruction data for ControlFlowOpcode::kCondExec and kCondExecEnd. struct ControlFlowCondExecInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Address of the instructions to execute. uint32_t address() const { return address_; } // Number of instructions being executed. @@ -214,20 +206,16 @@ struct ControlFlowCondExecInstruction { uint32_t vc_lo_ : 2; uint32_t bool_address_ : 8; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondExecInstruction, 8); // Instruction data for ControlFlowOpcode::kCondExecPred, kCondExecPredEnd, // kCondExecPredClean, kCondExecPredCleanEnd. struct ControlFlowCondExecPredInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Address of the instructions to execute. uint32_t address() const { return address_; } // Number of instructions being executed. @@ -254,19 +242,15 @@ struct ControlFlowCondExecPredInstruction { uint32_t : 7; uint32_t clean_ : 1; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondExecPredInstruction, 8); // Instruction data for ControlFlowOpcode::kLoopStart. struct ControlFlowLoopStartInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address to jump to when skipping the loop. uint32_t address() const { return address_; } // Whether to reuse the current aL instead of reset it to loop start. @@ -285,19 +269,15 @@ struct ControlFlowLoopStartInstruction { // Word 1: (16 bits) uint32_t : 11; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowLoopStartInstruction, 8); // Instruction data for ControlFlowOpcode::kLoopEnd. struct ControlFlowLoopEndInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address of the start of the loop body. uint32_t address() const { return address_; } // Integer constant register that holds the loop parameters. @@ -319,19 +299,15 @@ struct ControlFlowLoopEndInstruction { // Word 1: (16 bits) uint32_t : 10; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowLoopEndInstruction, 8); // Instruction data for ControlFlowOpcode::kCondCall. struct ControlFlowCondCallInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address. uint32_t address() const { return address_; } // Unconditional call - ignores condition/predication. @@ -354,19 +330,15 @@ struct ControlFlowCondCallInstruction { uint32_t : 2; uint32_t bool_address_ : 8; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondCallInstruction, 8); // Instruction data for ControlFlowOpcode::kReturn. struct ControlFlowReturnInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } private: // Word 0: (32 bits) @@ -381,12 +353,8 @@ static_assert_size(ControlFlowReturnInstruction, 8); // Instruction data for ControlFlowOpcode::kCondJmp. struct ControlFlowCondJmpInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } - AddressingMode addressing_mode() const { - return static_cast(address_mode_); - } + ControlFlowOpcode opcode() const { return opcode_; } + AddressingMode addressing_mode() const { return address_mode_; } // Target address. uint32_t address() const { return address_; } // Unconditional jump - ignores condition/predication. @@ -410,20 +378,18 @@ struct ControlFlowCondJmpInstruction { uint32_t direction_ : 1; uint32_t bool_address_ : 8; uint32_t condition_ : 1; - uint32_t address_mode_ : 1; - uint32_t opcode_ : 4; + AddressingMode address_mode_ : 1; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowCondJmpInstruction, 8); // Instruction data for ControlFlowOpcode::kAlloc. struct ControlFlowAllocInstruction { - ControlFlowOpcode opcode() const { - return static_cast(opcode_); - } + ControlFlowOpcode opcode() const { return opcode_; } // The total number of the given type allocated by this instruction. uint32_t size() const { return size_; } // Unconditional jump - ignores condition/predication. - AllocType alloc_type() const { return static_cast(alloc_type_); } + AllocType alloc_type() const { return alloc_type_; } private: // Word 0: (32 bits) @@ -433,16 +399,14 @@ struct ControlFlowAllocInstruction { // Word 1: (16 bits) uint32_t : 8; uint32_t is_unserialized_ : 1; - uint32_t alloc_type_ : 2; + AllocType alloc_type_ : 2; uint32_t : 1; - uint32_t opcode_ : 4; + ControlFlowOpcode opcode_ : 4; }; static_assert_size(ControlFlowAllocInstruction, 8); XEPACKEDUNION(ControlFlowInstruction, { - ControlFlowOpcode opcode() const { - return static_cast(opcode_value); - } + ControlFlowOpcode opcode() const { return opcode_value; } ControlFlowExecInstruction exec; // kExec* ControlFlowCondExecInstruction cond_exec; // kCondExec* @@ -457,7 +421,7 @@ XEPACKEDUNION(ControlFlowInstruction, { XEPACKEDSTRUCTANONYMOUS({ uint32_t unused_0 : 32; uint32_t unused_1 : 12; - uint32_t opcode_value : 4; + ControlFlowOpcode opcode_value : 4; }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; @@ -478,7 +442,7 @@ inline void UnpackControlFlowInstructions(const uint32_t* dwords, out_b->dword_1 = dword_2 >> 16; } -enum class FetchOpcode { +enum class FetchOpcode : uint32_t { kVertexFetch = 0, kTextureFetch = 1, kGetTextureBorderColorFrac = 16, @@ -492,9 +456,7 @@ enum class FetchOpcode { }; struct VertexFetchInstruction { - FetchOpcode opcode() const { - return static_cast(data_.opcode_value); - } + FetchOpcode opcode() const { return data_.opcode_value; } // Whether the jump is predicated (or conditional). bool is_predicated() const { return data_.is_predicated; } @@ -538,13 +500,9 @@ struct VertexFetchInstruction { uint32_t prefetch_count() const { return data_.prefetch_count; } bool is_mini_fetch() const { return data_.is_mini_fetch == 1; } - VertexFormat data_format() const { - return static_cast(data_.format); - } + VertexFormat data_format() const { return data_.format; } // [-32, 31] - int exp_adjust() const { - return ((static_cast(data_.exp_adjust) << 26) >> 26); - } + int exp_adjust() const { return data_.exp_adjust; } bool is_signed() const { return data_.fomat_comp_all == 1; } bool is_normalized() const { return data_.num_format_all == 0; } bool is_index_rounded() const { return data_.is_index_rounded == 1; } @@ -562,7 +520,7 @@ struct VertexFetchInstruction { private: XEPACKEDSTRUCT(Data, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t opcode_value : 5; + FetchOpcode opcode_value : 5; uint32_t src_reg : 6; uint32_t src_reg_am : 1; uint32_t dst_reg : 6; @@ -579,9 +537,9 @@ struct VertexFetchInstruction { uint32_t num_format_all : 1; uint32_t signed_rf_mode_all : 1; uint32_t is_index_rounded : 1; - uint32_t format : 6; + VertexFormat format : 6; uint32_t reserved2 : 2; - uint32_t exp_adjust : 6; + int32_t exp_adjust : 6; uint32_t is_mini_fetch : 1; uint32_t is_predicated : 1; }); @@ -595,9 +553,7 @@ struct VertexFetchInstruction { }; struct TextureFetchInstruction { - FetchOpcode opcode() const { - return static_cast(data_.opcode_value); - } + FetchOpcode opcode() const { return data_.opcode_value; } // Whether the jump is predicated (or conditional). bool is_predicated() const { return data_.is_predicated; } @@ -613,59 +569,49 @@ struct TextureFetchInstruction { uint32_t src_swizzle() const { return data_.src_swiz; } bool is_src_relative() const { return data_.src_reg_am; } - TextureDimension dimension() const { - return static_cast(data_.dimension); - } + TextureDimension dimension() const { return data_.dimension; } bool fetch_valid_only() const { return data_.fetch_valid_only == 1; } bool unnormalized_coordinates() const { return data_.tx_coord_denorm == 1; } - bool has_mag_filter() const { return data_.mag_filter != 0x3; } - TextureFilter mag_filter() const { - return static_cast(data_.mag_filter); + bool has_mag_filter() const { + return data_.mag_filter != TextureFilter::kUseFetchConst; } - bool has_min_filter() const { return data_.min_filter != 0x3; } - TextureFilter min_filter() const { - return static_cast(data_.min_filter); + TextureFilter mag_filter() const { return data_.mag_filter; } + bool has_min_filter() const { + return data_.min_filter != TextureFilter::kUseFetchConst; } - bool has_mip_filter() const { return data_.mip_filter != 0x3; } - TextureFilter mip_filter() const { - return static_cast(data_.mip_filter); + TextureFilter min_filter() const { return data_.min_filter; } + bool has_mip_filter() const { + return data_.mip_filter != TextureFilter::kUseFetchConst; } - bool has_aniso_filter() const { return data_.aniso_filter != 0x7; } - AnisoFilter aniso_filter() const { - return static_cast(data_.aniso_filter); + TextureFilter mip_filter() const { return data_.mip_filter; } + bool has_aniso_filter() const { + return data_.aniso_filter != AnisoFilter::kUseFetchConst; } - bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; } - TextureFilter vol_mag_filter() const { - return static_cast(data_.vol_mag_filter); + AnisoFilter aniso_filter() const { return data_.aniso_filter; } + bool has_vol_mag_filter() const { + return data_.vol_mag_filter != TextureFilter::kUseFetchConst; } - bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; } - TextureFilter vol_min_filter() const { - return static_cast(data_.vol_min_filter); + TextureFilter vol_mag_filter() const { return data_.vol_mag_filter; } + bool has_vol_min_filter() const { + return data_.vol_min_filter != TextureFilter::kUseFetchConst; } + TextureFilter vol_min_filter() const { return data_.vol_min_filter; } bool use_computed_lod() const { return data_.use_comp_lod == 1; } bool use_register_lod() const { return data_.use_reg_lod == 1; } bool use_register_gradients() const { return data_.use_reg_gradients == 1; } - SampleLocation sample_location() const { - return static_cast(data_.sample_location); - } + SampleLocation sample_location() const { return data_.sample_location; } float lod_bias() const { // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx - return ((static_cast(data_.lod_bias) << 25) >> 25) / 16.0f; - } - float offset_x() const { - return ((static_cast(data_.offset_x) << 27) >> 27) / 2.0f; - } - float offset_y() const { - return ((static_cast(data_.offset_y) << 27) >> 27) / 2.0f; - } - float offset_z() const { - return ((static_cast(data_.offset_z) << 27) >> 27) / 2.0f; + return data_.lod_bias * (1.0f / 16.0f); } + float offset_x() const { return data_.offset_x * 0.5f; } + float offset_y() const { return data_.offset_y * 0.5f; } + float offset_z() const { return data_.offset_z * 0.5f; } private: XEPACKEDSTRUCT(Data, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t opcode_value : 5; + FetchOpcode opcode_value : 5; uint32_t src_reg : 6; uint32_t src_reg_am : 1; uint32_t dst_reg : 6; @@ -676,14 +622,14 @@ struct TextureFetchInstruction { uint32_t src_swiz : 6; // xyz }); XEPACKEDSTRUCTANONYMOUS({ - uint32_t dst_swiz : 12; // xyzw - uint32_t mag_filter : 2; // instr_tex_filter_t - uint32_t min_filter : 2; // instr_tex_filter_t - uint32_t mip_filter : 2; // instr_tex_filter_t - uint32_t aniso_filter : 3; // instr_aniso_filter_t - uint32_t arbitrary_filter : 3; // instr_arbitrary_filter_t - uint32_t vol_mag_filter : 2; // instr_tex_filter_t - uint32_t vol_min_filter : 2; // instr_tex_filter_t + uint32_t dst_swiz : 12; // xyzw + TextureFilter mag_filter : 2; + TextureFilter min_filter : 2; + TextureFilter mip_filter : 2; + AnisoFilter aniso_filter : 3; + xenos::ArbitraryFilter arbitrary_filter : 3; + TextureFilter vol_mag_filter : 2; + TextureFilter vol_min_filter : 2; uint32_t use_comp_lod : 1; uint32_t use_reg_lod : 1; uint32_t unk : 1; @@ -691,13 +637,13 @@ struct TextureFetchInstruction { }); XEPACKEDSTRUCTANONYMOUS({ uint32_t use_reg_gradients : 1; - uint32_t sample_location : 1; - uint32_t lod_bias : 7; + SampleLocation sample_location : 1; + int32_t lod_bias : 7; uint32_t unused : 5; - uint32_t dimension : 2; - uint32_t offset_x : 5; - uint32_t offset_y : 5; - uint32_t offset_z : 5; + TextureDimension dimension : 2; + int32_t offset_x : 5; + int32_t offset_y : 5; + int32_t offset_z : 5; uint32_t pred_condition : 1; }); }); @@ -722,7 +668,7 @@ static_assert_size(TextureFetchInstruction, 12); // when write masks are disabled or the instruction that would write them // fails its predication check. -enum class AluScalarOpcode { +enum class AluScalarOpcode : uint32_t { // Floating-Point Add // adds dest, src0.ab // dest.xyzw = src0.a + src0.b; @@ -1049,7 +995,7 @@ enum class AluScalarOpcode { kRetainPrev = 50, }; -enum class AluVectorOpcode { +enum class AluVectorOpcode : uint32_t { // Per-Component Floating-Point Add // add dest, src0, src1 // dest.x = src0.x + src1.x; @@ -1373,9 +1319,7 @@ struct AluInstruction { return vector_write_mask() || is_export() || AluVectorOpcodeHasSideEffects(vector_opcode()); } - AluVectorOpcode vector_opcode() const { - return static_cast(data_.vector_opc); - } + AluVectorOpcode vector_opcode() const { return data_.vector_opc; } uint32_t vector_write_mask() const { return data_.vector_write_mask; } uint32_t vector_dest() const { return data_.vector_dest; } bool is_vector_dest_relative() const { return data_.vector_dest_rel == 1; } @@ -1385,9 +1329,7 @@ struct AluInstruction { return scalar_opcode() != AluScalarOpcode::kRetainPrev || (!is_export() && scalar_write_mask() != 0); } - AluScalarOpcode scalar_opcode() const { - return static_cast(data_.scalar_opc); - } + AluScalarOpcode scalar_opcode() const { return data_.scalar_opc; } uint32_t scalar_write_mask() const { return data_.scalar_write_mask; } uint32_t scalar_dest() const { return data_.scalar_dest; } bool is_scalar_dest_relative() const { return data_.scalar_dest_rel == 1; } @@ -1459,7 +1401,7 @@ struct AluInstruction { uint32_t scalar_write_mask : 4; uint32_t vector_clamp : 1; uint32_t scalar_clamp : 1; - uint32_t scalar_opc : 6; // instr_scalar_opc_t + AluScalarOpcode scalar_opc : 6; }); XEPACKEDSTRUCTANONYMOUS({ uint32_t src3_swiz : 8; @@ -1478,7 +1420,7 @@ struct AluInstruction { uint32_t src3_reg : 8; uint32_t src2_reg : 8; uint32_t src1_reg : 8; - uint32_t vector_opc : 5; // instr_vector_opc_t + AluVectorOpcode vector_opc : 5; uint32_t src3_sel : 1; uint32_t src2_sel : 1; uint32_t src1_sel : 1; diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index cd1fb7ba8..ae30a5dd8 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -654,9 +654,8 @@ VkDescriptorSet BufferCache::PrepareVertexSet( // trace_writer_.WriteMemoryRead(physical_address, source_length); // Upload (or get a cached copy of) the buffer. - auto buffer_ref = - UploadVertexBuffer(command_buffer, physical_address, source_length, - static_cast(fetch->endian), fence); + auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address, + source_length, fetch->endian, fence); if (buffer_ref.second == VK_WHOLE_SIZE) { // Failed to upload buffer. XELOGW("Failed to upload vertex buffer!"); diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index 4258061f1..24ff9cbe6 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -815,13 +815,13 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, push_constants_dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); push_constants_dirty |= - SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + SetShadowRegister(®s.rb_color_info.value, XE_GPU_REG_RB_COLOR_INFO); push_constants_dirty |= - SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + SetShadowRegister(®s.rb_color1_info.value, XE_GPU_REG_RB_COLOR1_INFO); push_constants_dirty |= - SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + SetShadowRegister(®s.rb_color2_info.value, XE_GPU_REG_RB_COLOR2_INFO); push_constants_dirty |= - SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + SetShadowRegister(®s.rb_color3_info.value, XE_GPU_REG_RB_COLOR3_INFO); push_constants_dirty |= SetShadowRegister(®s.rb_alpha_ref, XE_GPU_REG_RB_ALPHA_REF); push_constants_dirty |= @@ -1503,13 +1503,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { bool dirty = false; dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= - SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); + SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL0); dirty |= - SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1); + SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL1); dirty |= - SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); + SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL2); dirty |= - SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); + SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL3); dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index 80035d25f..0bbd03503 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -292,10 +292,10 @@ class PipelineCache { reg::SQ_PROGRAM_CNTL sq_program_cntl; uint32_t sq_context_misc; uint32_t rb_colorcontrol; - uint32_t rb_color_info; - uint32_t rb_color1_info; - uint32_t rb_color2_info; - uint32_t rb_color3_info; + reg::RB_COLOR_INFO rb_color_info; + reg::RB_COLOR_INFO rb_color1_info; + reg::RB_COLOR_INFO rb_color2_info; + reg::RB_COLOR_INFO rb_color3_info; float rb_alpha_ref; uint32_t pa_su_point_size; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 9b54dfe3e..50a3094a4 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -962,7 +962,7 @@ bool VulkanCommandProcessor::IssueCopy() { break; } assert_true(fetch->type == 3); - assert_true(fetch->endian == 2); + assert_true(fetch->endian == Endian::k8in32); assert_true(fetch->size == 6); const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2); trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4); @@ -974,7 +974,7 @@ bool VulkanCommandProcessor::IssueCopy() { float dest_points[6]; for (int i = 0; i < 6; i++) { dest_points[i] = - GpuSwap(xe::load(vertex_addr + i * 4), Endian(fetch->endian)) + + GpuSwap(xe::load(vertex_addr + i * 4), fetch->endian) + vtx_offset; } @@ -1000,10 +1000,10 @@ bool VulkanCommandProcessor::IssueCopy() { if (is_color_source) { // Source from a color target. reg::RB_COLOR_INFO color_info[4] = { - regs[XE_GPU_REG_RB_COLOR_INFO].u32, - regs[XE_GPU_REG_RB_COLOR1_INFO].u32, - regs[XE_GPU_REG_RB_COLOR2_INFO].u32, - regs[XE_GPU_REG_RB_COLOR3_INFO].u32, + regs.Get(), + regs.Get(XE_GPU_REG_RB_COLOR1_INFO), + regs.Get(XE_GPU_REG_RB_COLOR2_INFO), + regs.Get(XE_GPU_REG_RB_COLOR3_INFO), }; color_edram_base = color_info[copy_src_select].color_base; color_format = color_info[copy_src_select].color_format; @@ -1023,7 +1023,7 @@ bool VulkanCommandProcessor::IssueCopy() { Endian resolve_endian = Endian::k8in32; if (copy_regs->copy_dest_info.copy_dest_endian <= Endian128::k16in32) { resolve_endian = - static_cast(copy_regs->copy_dest_info.copy_dest_endian.value()); + static_cast(copy_regs->copy_dest_info.copy_dest_endian); } // Demand a resolve texture from the texture cache. @@ -1289,7 +1289,7 @@ bool VulkanCommandProcessor::IssueCopy() { // Perform any requested clears. uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; - uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LO].u32; assert_true(copy_color_clear == copy_color_clear_low); if (color_clear_enabled) { diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 2175e5e5d..47a5659a4 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -161,14 +161,14 @@ enum class SampleLocation : uint32_t { }; enum class Endian : uint32_t { - kUnspecified = 0, + kNone = 0, k8in16 = 1, k8in32 = 2, k16in32 = 3, }; enum class Endian128 : uint32_t { - kUnspecified = 0, + kNone = 0, k8in16 = 1, k8in32 = 2, k16in32 = 3, @@ -225,6 +225,77 @@ enum class DepthRenderTargetFormat : uint32_t { kD24FS8 = 1, }; +// a2xx_sq_surfaceformat + +// https://github.com/indirivacua/RAGE-Console-Texture-Editor/blob/master/Console.Xbox360.Graphics.pas +enum class TextureFormat : uint32_t { + k_1_REVERSE = 0, + k_1 = 1, + k_8 = 2, + k_1_5_5_5 = 3, + k_5_6_5 = 4, + k_6_5_5 = 5, + k_8_8_8_8 = 6, + k_2_10_10_10 = 7, + k_8_A = 8, + k_8_B = 9, + k_8_8 = 10, + k_Cr_Y1_Cb_Y0_REP = 11, + k_Y1_Cr_Y0_Cb_REP = 12, + k_16_16_EDRAM = 13, + k_8_8_8_8_A = 14, + k_4_4_4_4 = 15, + k_10_11_11 = 16, + k_11_11_10 = 17, + k_DXT1 = 18, + k_DXT2_3 = 19, + k_DXT4_5 = 20, + k_16_16_16_16_EDRAM = 21, + k_24_8 = 22, + k_24_8_FLOAT = 23, + k_16 = 24, + k_16_16 = 25, + k_16_16_16_16 = 26, + k_16_EXPAND = 27, + k_16_16_EXPAND = 28, + k_16_16_16_16_EXPAND = 29, + k_16_FLOAT = 30, + k_16_16_FLOAT = 31, + k_16_16_16_16_FLOAT = 32, + k_32 = 33, + k_32_32 = 34, + k_32_32_32_32 = 35, + k_32_FLOAT = 36, + k_32_32_FLOAT = 37, + k_32_32_32_32_FLOAT = 38, + k_32_AS_8 = 39, + k_32_AS_8_8 = 40, + k_16_MPEG = 41, + k_16_16_MPEG = 42, + k_8_INTERLACED = 43, + k_32_AS_8_INTERLACED = 44, + k_32_AS_8_8_INTERLACED = 45, + k_16_INTERLACED = 46, + k_16_MPEG_INTERLACED = 47, + k_16_16_MPEG_INTERLACED = 48, + k_DXN = 49, + k_8_8_8_8_AS_16_16_16_16 = 50, + k_DXT1_AS_16_16_16_16 = 51, + k_DXT2_3_AS_16_16_16_16 = 52, + k_DXT4_5_AS_16_16_16_16 = 53, + k_2_10_10_10_AS_16_16_16_16 = 54, + k_10_11_11_AS_16_16_16_16 = 55, + k_11_11_10_AS_16_16_16_16 = 56, + k_32_32_32_FLOAT = 57, + k_DXT3A = 58, + k_DXT5A = 59, + k_CTX1 = 60, + k_DXT3A_AS_1_1_1_1 = 61, + k_8_8_8_8_GAMMA_EDRAM = 62, + k_2_10_10_10_FLOAT_EDRAM = 63, + + kUnknown = 0xFFFFFFFFu, +}; + // Subset of a2xx_sq_surfaceformat - formats that RTs can be resolved to. enum class ColorFormat : uint32_t { k_8 = 2, @@ -367,11 +438,7 @@ enum class BlendFactor : uint32_t { kConstantAlpha = 14, kOneMinusConstantAlpha = 15, kSrcAlphaSaturate = 16, - // SRC1 likely not used on the Xbox 360 - only available in Direct3D 9Ex. - kSrc1Color = 20, - kOneMinusSrc1Color = 21, - kSrc1Alpha = 22, - kOneMinusSrc1Alpha = 23, + // SRC1 added on Adreno. }; enum class BlendOp : uint32_t { @@ -391,6 +458,17 @@ typedef enum { XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, } XE_GPU_INVALIDATE_MASK; +// instr_arbitrary_filter_t +enum class ArbitraryFilter : uint32_t { + k2x4Sym = 0, + k2x4Asym = 1, + k4x2Sym = 2, + k4x2Asym = 3, + k4x4Sym = 4, + k4x4Asym = 5, + kUseFetchConst = 7, +}; + // a2xx_sq_ps_vtx_mode enum class VertexShaderExportMode : uint32_t { kPosition1Vector = 0, @@ -420,6 +498,17 @@ enum class TessellationMode : uint32_t { kAdaptive = 2, }; +enum class PolygonModeEnable : uint32_t { + kDisabled = 0, // Render triangles. + kDualMode = 1, // Send 2 sets of 3 polygons with the specified polygon type. +}; + +enum class PolygonType : uint32_t { + kPoints = 0, + kLines = 1, + kTriangles = 2, +}; + enum class ModeControl : uint32_t { kIgnore = 0, kColorDepth = 4, @@ -471,7 +560,7 @@ typedef enum { inline uint16_t GpuSwap(uint16_t value, Endian endianness) { switch (endianness) { - case Endian::kUnspecified: + case Endian::kNone: // No swap. return value; case Endian::k8in16: @@ -486,7 +575,7 @@ inline uint16_t GpuSwap(uint16_t value, Endian endianness) { inline uint32_t GpuSwap(uint32_t value, Endian endianness) { switch (endianness) { default: - case Endian::kUnspecified: + case Endian::kNone: // No swap. return value; case Endian::k8in16: @@ -520,11 +609,11 @@ inline uint32_t CpuToGpu(uint32_t p) { return p & 0x1FFFFFFF; } XEPACKEDUNION(xe_gpu_vertex_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ uint32_t type : 2; // +0 - uint32_t address : 30; // +2 + uint32_t address : 30; // +2 address in dwords - uint32_t endian : 2; // +0 - uint32_t size : 24; // +2 size in words - uint32_t unk1 : 6; // +26 + Endian endian : 2; // +0 + uint32_t size : 24; // +2 size in words + uint32_t unk1 : 6; // +26 }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0; @@ -535,34 +624,36 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, { // XE_GPU_REG_SHADER_CONSTANT_FETCH_* XEPACKEDUNION(xe_gpu_texture_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t type : 2; // +0 dword_0 - uint32_t sign_x : 2; // +2 - uint32_t sign_y : 2; // +4 - uint32_t sign_z : 2; // +6 - uint32_t sign_w : 2; // +8 - uint32_t clamp_x : 3; // +10 - uint32_t clamp_y : 3; // +13 - uint32_t clamp_z : 3; // +16 - uint32_t unused_0 : 3; // +19 - uint32_t pitch : 9; // +22 byte_pitch >> 5 - uint32_t tiled : 1; // +31 + uint32_t type : 2; // +0 dword_0 + TextureSign sign_x : 2; // +2 + TextureSign sign_y : 2; // +4 + TextureSign sign_z : 2; // +6 + TextureSign sign_w : 2; // +8 + ClampMode clamp_x : 3; // +10 + ClampMode clamp_y : 3; // +13 + ClampMode clamp_z : 3; // +16 + uint32_t signed_rf_mode_all : 1; // +19 + // TODO(Triang3l): 1 or 2 dim_tbd bits? + uint32_t unk_0 : 2; // +20 + uint32_t pitch : 9; // +22 byte_pitch >> 5 + uint32_t tiled : 1; // +31 - uint32_t format : 6; // +0 dword_1 - uint32_t endianness : 2; // +6 - uint32_t request_size : 2; // +8 - uint32_t stacked : 1; // +10 - uint32_t clamp_policy : 1; // +11 d3d/opengl - uint32_t base_address : 20; // +12 + TextureFormat format : 6; // +0 dword_1 + Endian endianness : 2; // +6 + uint32_t request_size : 2; // +8 + uint32_t stacked : 1; // +10 + uint32_t nearest_clamp_policy : 1; // +11 d3d/opengl + uint32_t base_address : 20; // +12 base address >> 12 union { // dword_2 struct { uint32_t width : 24; - uint32_t unused : 8; + uint32_t : 8; } size_1d; struct { uint32_t width : 13; uint32_t height : 13; - uint32_t unused : 6; + uint32_t : 6; } size_2d; struct { uint32_t width : 13; @@ -576,15 +667,16 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { } size_3d; }; - uint32_t num_format : 1; // +0 dword_3 frac/int - uint32_t swizzle : 12; // +1 xyzw, 3b each (XE_GPU_SWIZZLE) - int32_t exp_adjust : 6; // +13 - uint32_t mag_filter : 2; // +19 - uint32_t min_filter : 2; // +21 - uint32_t mip_filter : 2; // +23 - uint32_t aniso_filter : 3; // +25 - uint32_t unused_3 : 3; // +28 - uint32_t border_size : 1; // +31 + uint32_t num_format : 1; // +0 dword_3 frac/int + // xyzw, 3b each (XE_GPU_SWIZZLE) + uint32_t swizzle : 12; // +1 + int32_t exp_adjust : 6; // +13 + TextureFilter mag_filter : 2; // +19 + TextureFilter min_filter : 2; // +21 + TextureFilter mip_filter : 2; // +23 + AnisoFilter aniso_filter : 3; // +25 + xenos::ArbitraryFilter arbitrary_filter : 3; // +28 + uint32_t border_size : 1; // +31 uint32_t vol_mag_filter : 1; // +0 dword_4 uint32_t vol_min_filter : 1; // +1 @@ -596,13 +688,13 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { int32_t grad_exp_adjust_h : 5; // +22 int32_t grad_exp_adjust_v : 5; // +27 - uint32_t border_color : 2; // +0 dword_5 - uint32_t force_bcw_max : 1; // +2 - uint32_t tri_clamp : 2; // +3 - int32_t aniso_bias : 4; // +5 - uint32_t dimension : 2; // +9 - uint32_t packed_mips : 1; // +11 - uint32_t mip_address : 20; // +12 + BorderColor border_color : 2; // +0 dword_5 + uint32_t force_bc_w_to_max : 1; // +2 + uint32_t tri_clamp : 2; // +3 + int32_t aniso_bias : 4; // +5 + uint32_t dimension : 2; // +9 + uint32_t packed_mips : 1; // +11 + uint32_t mip_address : 20; // +12 mip address >> 12 }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0;