diff --git a/appveyor.yml b/appveyor.yml index f338cff..a0627e4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.1.10.build-{build} +version: 0.1.11.build-{build} image: Visual Studio 2019 environment: matrix: diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 6d60788..6ea50c0 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -82,7 +82,7 @@ if (KYTY_LINKER STREQUAL LD) set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000") endif() -project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.10) +project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.11) include(src_script.cmake) diff --git a/source/emulator/include/Emulator/Graphics/HardwareContext.h b/source/emulator/include/Emulator/Graphics/HardwareContext.h index a7b6781..befcee0 100644 --- a/source/emulator/include/Emulator/Graphics/HardwareContext.h +++ b/source/emulator/include/Emulator/Graphics/HardwareContext.h @@ -29,20 +29,26 @@ struct ColorView { uint32_t base_array_slice_index = 0; uint32_t last_array_slice_index = 0; + uint32_t current_mip_level = 0; }; struct ColorInfo { - bool fmask_compression_enable = false; - uint32_t fmask_compression_mode = 0; - bool cmask_fast_clear_enable = false; - bool dcc_compression_enable = false; - bool neo_mode = false; - uint32_t cmask_tile_mode = 0; - uint32_t cmask_tile_mode_neo = 0; - uint32_t format = 0; - uint32_t channel_type = 0; - uint32_t channel_order = 0; + bool fmask_compression_enable = false; + // uint32_t fmask_compression_mode = 0; + bool fmask_data_compression_disable = false; + bool fmask_one_frag_mode = false; + bool cmask_fast_clear_enable = false; + bool dcc_compression_enable = false; + bool neo_mode = false; + bool blend_clamp = false; + bool blend_bypass = false; + bool round_mode = false; + uint32_t cmask_tile_mode = 0; + uint32_t cmask_tile_mode_neo = 0; + uint32_t format = 0; + uint32_t channel_type = 0; + uint32_t channel_order = 0; }; struct ColorAttrib @@ -54,14 +60,33 @@ struct ColorAttrib uint32_t num_fragments = 0; }; -struct ColorDcc +struct ColorAttrib2 { - uint32_t max_uncompressed_block_size = 0; - uint32_t max_compressed_block_size = 0; - uint32_t min_compressed_block_size = 0; - uint32_t color_transform = 0; - bool enable_overwrite_combiner = false; - bool force_independent_blocks = false; + uint32_t height = 0; + uint32_t width = 0; + uint32_t num_mip_levels = 0; +}; + +struct ColorAttrib3 +{ + uint32_t depth = 0; + uint32_t tile_mode = 0; + uint32_t dimension = 0; + bool cmask_pipe_aligned = false; + bool dcc_pipe_aligned = false; +}; + +struct ColorDccControl +{ + uint32_t max_uncompressed_block_size = 0; + uint32_t max_compressed_block_size = 0; + uint32_t min_compressed_block_size = 0; + uint32_t color_transform = 0; + bool dcc_clear_key_enable = false; + bool overwrite_combiner_disable = false; + bool independent_64b_blocks = false; + bool independent_128b_blocks = false; + bool data_write_on_dcc_clear_to_reg = false; }; struct ColorCmask @@ -113,7 +138,9 @@ struct RenderTarget ColorView view; ColorInfo info; ColorAttrib attrib; - ColorDcc dcc; + ColorAttrib2 attrib2; + ColorAttrib3 attrib3; + ColorDccControl dcc; ColorCmask cmask; ColorCmaskSlice cmask_slice; ColorFmask fmask; @@ -124,23 +151,29 @@ struct RenderTarget ColorSize size; }; -struct DepthRenderTargetZInfo +struct DepthZInfo { - uint32_t format = 0; - uint32_t tile_mode_index = 0; - uint32_t num_samples = 0; - bool tile_surface_enable = false; - bool expclear_enabled = false; - uint32_t zrange_precision = 0; + uint32_t format = 0; + uint32_t tile_mode_index = 0; + uint32_t num_samples = 0; + uint32_t zrange_precision = 0; + bool tile_surface_enable = false; + bool expclear_enabled = false; + bool embedded_sample_locations = false; + bool partially_resident = false; + uint8_t num_mip_levels = 0; + uint8_t plane_compression = 0; }; -struct DepthRenderTargetStencilInfo +struct DepthStencilInfo { - uint32_t format = 0; - uint32_t tile_mode_index = 0; - uint32_t tile_split = 0; - bool expclear_enabled = false; - bool tile_stencil_disable = false; + uint32_t format = 0; + uint32_t tile_mode_index = 0; + uint32_t tile_split = 0; + bool expclear_enabled = false; + bool tile_stencil_disable = false; + bool texture_compatible_stencil = false; + bool partially_resident = false; }; struct DepthRenderTargetDepthInfo @@ -154,10 +187,19 @@ struct DepthRenderTargetDepthInfo uint32_t num_banks = 0; }; -struct DepthRenderTargetDepthView +struct DepthDepthView { - uint32_t slice_start = 0; - uint32_t slice_max = 0; + uint32_t slice_start = 0; + uint32_t slice_max = 0; + uint8_t current_mip_level = 0; + bool depth_write_disable = false; + bool stencil_write_disable = false; +}; + +struct DepthDepthSizeXY +{ + uint16_t x_max = 0; + uint16_t y_max = 0; }; struct DepthRenderTargetHTileSurface @@ -173,10 +215,11 @@ struct DepthRenderTargetHTileSurface struct DepthRenderTarget { - DepthRenderTargetZInfo z_info; - DepthRenderTargetStencilInfo stencil_info; + DepthZInfo z_info; + DepthStencilInfo stencil_info; + DepthDepthView depth_view; + DepthDepthSizeXY size; DepthRenderTargetDepthInfo depth_info; - DepthRenderTargetDepthView depth_view; DepthRenderTargetHTileSurface htile_surface; uint64_t z_read_base_addr = 0; @@ -220,14 +263,16 @@ struct ClipControl struct DepthControl { - bool stencil_enable = false; - bool z_enable = false; - bool z_write_enable = false; - bool depth_bounds_enable = false; - uint8_t zfunc = 0; - bool backface_enable = false; - uint8_t stencilfunc = 0; - uint8_t stencilfunc_bf = 0; + bool stencil_enable = false; + bool z_enable = false; + bool z_write_enable = false; + bool depth_bounds_enable = false; + uint8_t zfunc = 0; + bool backface_enable = false; + uint8_t stencilfunc = 0; + uint8_t stencilfunc_bf = 0; + bool color_writes_on_depth_fail_enable = false; + bool color_writes_on_depth_pass_disable = false; }; struct StencilControl @@ -374,29 +419,66 @@ struct ScreenViewport float guard_band_vert_discard = 0.0f; }; +struct VsShaderResource1 +{ + uint8_t vgprs = 0; + uint8_t sgprs = 0; + uint8_t priority = 0; + uint8_t float_mode = 0; + bool dx10_clamp = false; + bool ieee_mode = false; + uint8_t vgpr_component_count = 0; + bool cu_group_enable = false; + bool require_forward_progress = false; + bool fp16_overflow = false; +}; + +struct VsShaderResource2 +{ + bool scratch_en = false; + uint8_t user_sgpr = 0; + bool offchip_lds = false; + bool streamout_enabled = false; + uint8_t shared_vgprs = 0; +}; + struct VsStageRegisters { - uint32_t m_spiShaderPgmLoVs = 0; - uint32_t m_spiShaderPgmHiVs = 0; - uint32_t m_spiShaderPgmRsrc1Vs = 0; - uint32_t m_spiShaderPgmRsrc2Vs = 0; + uint64_t data_addr = 0; + VsShaderResource1 rsrc1; + VsShaderResource2 rsrc2; +}; - [[nodiscard]] uint64_t GetGpuAddress() const; - [[nodiscard]] bool GetStreamoutEnabled() const; - [[nodiscard]] uint32_t GetSgprCount() const; - [[nodiscard]] uint32_t GetInputComponentsCount() const; - [[nodiscard]] uint32_t GetUnknown1() const; - [[nodiscard]] uint32_t GetUnknown2() const; +struct PsShaderResource1 +{ + uint8_t vgprs = 0; + uint8_t sgprs = 0; + uint8_t priority = 0; + uint8_t float_mode = 0; + bool dx10_clamp = false; + bool debug_mode = false; + bool ieee_mode = false; + bool cu_group_disable = false; + bool require_forward_progress = false; + bool fp16_overflow = false; +}; + +struct PsShaderResource2 +{ + bool scratch_en = false; + uint8_t user_sgpr = 0; + bool wave_cnt_en = false; + uint8_t extra_lds_size = 0; + bool raster_ordered_shading = false; + uint8_t shared_vgprs = 0; }; struct PsStageRegisters { - uint64_t data_addr = 0; - uint8_t vgprs = 0; - uint8_t sgprs = 0; - uint8_t scratch_en = 0; - uint8_t user_sgpr = 0; - uint8_t wave_cnt_en = 0; + uint64_t data_addr = 0; + PsShaderResource1 rsrc1; + PsShaderResource2 rsrc2; + uint64_t chksum = 0; }; struct CsStageRegisters @@ -419,6 +501,45 @@ struct CsStageRegisters uint8_t lds_size = 0; }; +struct EsStageRegisters +{ + uint64_t data_addr = 0; +}; + +struct GsShaderResource1 +{ + uint8_t vgprs = 0; + uint8_t sgprs = 0; + uint8_t priority = 0; + uint8_t float_mode = 0; + bool dx10_clamp = false; + bool debug_mode = false; + bool ieee_mode = false; + bool cu_group_enable = false; + bool require_forward_progress = false; + bool lds_configuration = false; + uint8_t gs_vgpr_component_count = 0; + bool fp16_overflow = false; +}; + +struct GsShaderResource2 +{ + bool scratch_en = false; + uint8_t user_sgpr = 0; + uint8_t es_vgpr_component_count = 0; + bool offchip_lds = false; + uint8_t lds_size = 0; + uint8_t shared_vgprs = 0; +}; + +struct GsStageRegisters +{ + uint64_t data_addr = 0; + GsShaderResource1 rsrc1; + GsShaderResource2 rsrc2; + uint64_t chksum = 0; +}; + struct ShaderRegisters { uint32_t m_spiVsOutConfig = 0; @@ -472,9 +593,12 @@ struct UserSgprInfo struct VertexShaderInfo { VsStageRegisters vs_regs; + EsStageRegisters es_regs; + GsStageRegisters gs_regs; uint32_t vs_shader_modifier = 0; uint32_t vs_embedded_id = 0; UserSgprInfo vs_user_sgpr; + UserSgprInfo gs_user_sgpr; bool vs_embedded = false; }; @@ -493,6 +617,19 @@ struct ComputeShaderInfo UserSgprInfo cs_user_sgpr; }; +struct GeControl +{ + uint16_t primitive_group_size = 0; + uint16_t vertex_group_size = 0; +}; + +struct GeUserVgprEn +{ + bool vgpr1 = false; + bool vgpr2 = false; + bool vgpr3 = false; +}; + class Context { public: @@ -509,7 +646,9 @@ public: void SetColorView(uint32_t slot, const ColorView& view) { m_render_targets[slot].view = view; } void SetColorInfo(uint32_t slot, const ColorInfo& info) { m_render_targets[slot].info = info; } void SetColorAttrib(uint32_t slot, const ColorAttrib& attrib) { m_render_targets[slot].attrib = attrib; } - void SetColorDcc(uint32_t slot, const ColorDcc& dcc) { m_render_targets[slot].dcc = dcc; } + void SetColorAttrib2(uint32_t slot, const ColorAttrib2& attrib2) { m_render_targets[slot].attrib2 = attrib2; } + void SetColorAttrib3(uint32_t slot, const ColorAttrib3& attrib3) { m_render_targets[slot].attrib3 = attrib3; } + void SetColorDccControl(uint32_t slot, const ColorDccControl& dcc) { m_render_targets[slot].dcc = dcc; } void SetColorCmask(uint32_t slot, const ColorCmask& cmask) { m_render_targets[slot].cmask = cmask; } void SetColorCmaskSlice(uint32_t slot, const ColorCmaskSlice& cmask_slice) { m_render_targets[slot].cmask_slice = cmask_slice; } void SetColorFmask(uint32_t slot, const ColorFmask& fmask) { m_render_targets[slot].fmask = fmask; } @@ -531,16 +670,27 @@ public: void SetDepthRenderTarget(const DepthRenderTarget& target) { m_depth_render_target = target; } [[nodiscard]] const DepthRenderTarget& GetDepthRenderTarget() const { return m_depth_render_target; } - void SetDepthRenderTargetZInfo(const DepthRenderTargetZInfo& info) { m_depth_render_target.z_info = info; } - [[nodiscard]] const DepthRenderTargetZInfo& GetDepthRenderTargetZInfo() const { return m_depth_render_target.z_info; } - void SetDepthRenderTargetStencilInfo(const DepthRenderTargetStencilInfo& info) { m_depth_render_target.stencil_info = info; } - [[nodiscard]] const DepthRenderTargetStencilInfo& GetDepthRenderTargetStencilInfo() const { return m_depth_render_target.stencil_info; } + void SetDepthZInfo(const DepthZInfo& info) { m_depth_render_target.z_info = info; } + [[nodiscard]] const DepthZInfo& GetDepthZInfo() const { return m_depth_render_target.z_info; } + void SetDepthStencilInfo(const DepthStencilInfo& info) { m_depth_render_target.stencil_info = info; } + [[nodiscard]] const DepthStencilInfo& GetDepthStencilInfo() const { return m_depth_render_target.stencil_info; } + void SetDepthZReadBase(uint64_t addr) { m_depth_render_target.z_read_base_addr = addr; } + void SetDepthStencilReadBase(uint64_t addr) { m_depth_render_target.stencil_read_base_addr = addr; } + void SetDepthZWriteBase(uint64_t addr) { m_depth_render_target.z_write_base_addr = addr; } + void SetDepthStencilWriteBase(uint64_t addr) { m_depth_render_target.stencil_write_base_addr = addr; } + void SetDepthHTileDataBase(uint64_t addr) { m_depth_render_target.htile_data_base_addr = addr; } + void SetDepthDepthView(const DepthDepthView& view) { m_depth_render_target.depth_view = view; } + [[nodiscard]] const DepthDepthView& GetDepthDepthView() const { return m_depth_render_target.depth_view; } + void SetDepthDepthSizeXY(const DepthDepthSizeXY& size) { m_depth_render_target.size = size; } + [[nodiscard]] const DepthDepthSizeXY& GetDepthDepthSizeXY() const { return m_depth_render_target.size; } void SetViewportZ(uint32_t viewport_id, float zmin, float zmax) { m_screen_viewport.viewports[viewport_id].zmin = zmin; m_screen_viewport.viewports[viewport_id].zmax = zmax; } + void SetViewportZMin(uint32_t viewport_id, float zmin) { m_screen_viewport.viewports[viewport_id].zmin = zmin; } + void SetViewportZMax(uint32_t viewport_id, float zmax) { m_screen_viewport.viewports[viewport_id].zmax = zmax; } void SetViewportScaleOffset(uint32_t viewport_id, float xscale, float xoffset, float yscale, float yoffset, float zscale, float zoffset) { m_screen_viewport.viewports[viewport_id].xscale = xscale; @@ -550,6 +700,31 @@ public: m_screen_viewport.viewports[viewport_id].zscale = zscale; m_screen_viewport.viewports[viewport_id].zoffset = zoffset; } + void SetViewportXScale(uint32_t viewport_id, float xscale) { m_screen_viewport.viewports[viewport_id].xscale = xscale; } + void SetViewportXOffset(uint32_t viewport_id, float xoffset) { m_screen_viewport.viewports[viewport_id].xoffset = xoffset; } + void SetViewportYScale(uint32_t viewport_id, float yscale) { m_screen_viewport.viewports[viewport_id].yscale = yscale; } + void SetViewportYOffset(uint32_t viewport_id, float yoffset) { m_screen_viewport.viewports[viewport_id].yoffset = yoffset; } + void SetViewportZScale(uint32_t viewport_id, float zscale) { m_screen_viewport.viewports[viewport_id].zscale = zscale; } + void SetViewportZOffset(uint32_t viewport_id, float zoffset) { m_screen_viewport.viewports[viewport_id].zoffset = zoffset; } + void SetViewportScissor(uint32_t viewport_id, int left, int top, int right, int bottom, bool window_offset_enable) + { + m_screen_viewport.viewports[viewport_id].viewport_scissor_left = left; + m_screen_viewport.viewports[viewport_id].viewport_scissor_top = top; + m_screen_viewport.viewports[viewport_id].viewport_scissor_right = right; + m_screen_viewport.viewports[viewport_id].viewport_scissor_bottom = bottom; + m_screen_viewport.viewports[viewport_id].viewport_scissor_window_offset_enable = window_offset_enable; + } + void SetViewportScissorTL(uint32_t viewport_id, int left, int top, bool window_offset_enable) + { + m_screen_viewport.viewports[viewport_id].viewport_scissor_left = left; + m_screen_viewport.viewports[viewport_id].viewport_scissor_top = top; + m_screen_viewport.viewports[viewport_id].viewport_scissor_window_offset_enable = window_offset_enable; + } + void SetViewportScissorBR(uint32_t viewport_id, int right, int bottom) + { + m_screen_viewport.viewports[viewport_id].viewport_scissor_right = right; + m_screen_viewport.viewports[viewport_id].viewport_scissor_bottom = bottom; + } void SetViewportTransformControl(uint32_t control) { m_screen_viewport.transform_control = control; } void SetScreenScissor(int left, int top, int right, int bottom) { @@ -688,8 +863,16 @@ public: void SetPrimitiveType(uint32_t prim_type) { m_prim_type = prim_type; } [[nodiscard]] uint32_t GetPrimType() const { return m_prim_type; } + [[nodiscard]] const GeControl& GetGeControl() const { return m_ge_cntl; } + void SetGeControl(const GeControl& control) { m_ge_cntl = control; } + [[nodiscard]] const GeUserVgprEn& GetGeUserVgprEn() const { return m_ge_user_vgpr_en; } + void SetGeUserVgprEn(const GeUserVgprEn& en) { m_ge_user_vgpr_en = en; } + private: uint32_t m_prim_type = 0; + + GeControl m_ge_cntl; + GeUserVgprEn m_ge_user_vgpr_en; }; class Shader @@ -702,49 +885,75 @@ public: void Reset() { *this = Shader(); } - void SetVsShader(const VsStageRegisters& vs_regs, uint32_t shader_modifier) + void SetVsShaderModifier(uint32_t shader_modifier) { m_vs.vs_shader_modifier = shader_modifier; } + void SetVsShaderBase(uint64_t addr) { - m_vs.vs_regs = vs_regs; - m_vs.vs_shader_modifier = shader_modifier; - m_vs.vs_embedded = false; + m_vs.vs_regs.data_addr = addr; + m_vs.vs_embedded = false; + } + void SetVsShaderResource1(const VsShaderResource1& rsrc1) + { + m_vs.vs_regs.rsrc1 = rsrc1; + m_vs.vs_embedded = false; + } + void SetVsShaderResource2(const VsShaderResource2& rsrc2) + { + m_vs.vs_regs.rsrc2 = rsrc2; + m_vs.vs_embedded = false; } - void UpdateVsShader(const VsStageRegisters& vs_regs, uint32_t shader_modifier) - { - m_vs.vs_regs.m_spiShaderPgmLoVs = vs_regs.m_spiShaderPgmLoVs; - m_vs.vs_regs.m_spiShaderPgmHiVs = vs_regs.m_spiShaderPgmHiVs; - m_vs.vs_regs.m_spiShaderPgmRsrc1Vs = vs_regs.m_spiShaderPgmRsrc1Vs; - m_vs.vs_regs.m_spiShaderPgmRsrc2Vs = vs_regs.m_spiShaderPgmRsrc2Vs; - m_vs.vs_shader_modifier = shader_modifier; - m_vs.vs_embedded = false; - } void SetVsEmbedded(uint32_t id, uint32_t shader_modifier) { m_vs.vs_embedded_id = id; m_vs.vs_shader_modifier = shader_modifier; m_vs.vs_embedded = true; } - - void SetPsShader(const PsStageRegisters& ps_regs) + void SetEsShaderBase(uint64_t addr) { - m_ps.ps_regs = ps_regs; - m_ps.ps_embedded = false; + m_vs.es_regs.data_addr = addr; + m_vs.vs_embedded = false; } - void UpdatePsShader(const PsStageRegisters& ps_regs) + void SetGsShaderResource1(const GsShaderResource1& rsrc1) { - m_ps.ps_regs.data_addr = ps_regs.data_addr; - m_ps.ps_regs.vgprs = ps_regs.vgprs; - m_ps.ps_regs.sgprs = ps_regs.sgprs; - m_ps.ps_regs.scratch_en = ps_regs.scratch_en; - m_ps.ps_regs.user_sgpr = ps_regs.user_sgpr; - m_ps.ps_regs.wave_cnt_en = ps_regs.wave_cnt_en; - m_ps.ps_embedded = false; + m_vs.gs_regs.rsrc1 = rsrc1; + m_vs.vs_embedded = false; + } + void SetGsShaderResource2(const GsShaderResource2& rsrc2) + { + m_vs.gs_regs.rsrc2 = rsrc2; + m_vs.vs_embedded = false; + } + void SetGsShaderChksum(uint32_t value) + { + m_vs.gs_regs.chksum <<= 32u; + m_vs.gs_regs.chksum |= value; + } + + void SetPsShaderBase(uint64_t addr) + { + m_ps.ps_regs.data_addr = addr; + m_ps.ps_embedded = false; + } + void SetPsShaderResource1(const PsShaderResource1& rsrc1) + { + m_ps.ps_regs.rsrc1 = rsrc1; + m_ps.ps_embedded = false; + } + void SetPsShaderResource2(const PsShaderResource2& rsrc2) + { + m_ps.ps_regs.rsrc2 = rsrc2; + m_ps.ps_embedded = false; } void SetPsEmbedded(uint32_t id) { m_ps.ps_embedded_id = id; m_ps.ps_embedded = true; } + void SetPsShaderChksum(uint32_t value) + { + m_ps.ps_regs.chksum <<= 32u; + m_ps.ps_regs.chksum |= value; + } void SetCsShader(const CsStageRegisters& cs_regs, uint32_t shader_modifier) { @@ -770,6 +979,12 @@ public: m_cs.cs_user_sgpr.type[id] = type; m_cs.cs_user_sgpr.count = ((id + 1) > m_cs.cs_user_sgpr.count ? (id + 1) : m_cs.cs_user_sgpr.count); } + void SetGsUserSgpr(uint32_t id, uint32_t value, UserSgprType type) + { + m_vs.gs_user_sgpr.value[id] = value; + m_vs.gs_user_sgpr.type[id] = type; + m_vs.gs_user_sgpr.count = ((id + 1) > m_vs.gs_user_sgpr.count ? (id + 1) : m_vs.gs_user_sgpr.count); + } [[nodiscard]] const PixelShaderInfo& GetPs() const { return m_ps; } [[nodiscard]] const VertexShaderInfo& GetVs() const { return m_vs; } @@ -781,36 +996,6 @@ private: ComputeShaderInfo m_cs; }; -inline uint64_t VsStageRegisters::GetGpuAddress() const -{ - return (static_cast(m_spiShaderPgmLoVs) << 8u) | (static_cast(m_spiShaderPgmHiVs) << 40u); -} - -inline bool VsStageRegisters::GetStreamoutEnabled() const -{ - return (m_spiShaderPgmRsrc2Vs & 0x00001000u) != 0u; -} - -inline uint32_t VsStageRegisters::GetSgprCount() const -{ - return (m_spiShaderPgmRsrc1Vs >> 6u) & 0xfu; -} - -inline uint32_t VsStageRegisters::GetInputComponentsCount() const -{ - return (m_spiShaderPgmRsrc1Vs >> 24u) & 0x3u; -} - -inline uint32_t VsStageRegisters::GetUnknown1() const -{ - return m_spiShaderPgmRsrc1Vs & 0xfcfffc3fu; -} - -inline uint32_t VsStageRegisters::GetUnknown2() const -{ - return m_spiShaderPgmRsrc2Vs & 0xFFFFEFFFu; -} - } // namespace Kyty::Libs::Graphics::HW #endif // KYTY_EMU_ENABLED diff --git a/source/emulator/include/Emulator/Graphics/Objects/DepthStencilBuffer.h b/source/emulator/include/Emulator/Graphics/Objects/DepthStencilBuffer.h index 881e741..4a04aff 100644 --- a/source/emulator/include/Emulator/Graphics/Objects/DepthStencilBuffer.h +++ b/source/emulator/include/Emulator/Graphics/Objects/DepthStencilBuffer.h @@ -18,14 +18,16 @@ public: static constexpr int PARAM_HEIGHT = 2; static constexpr int PARAM_HTILE = 3; static constexpr int PARAM_NEO = 4; + static constexpr int PARAM_USAGE = 5; - DepthStencilBufferObject(uint64_t vk_format, uint32_t width, uint32_t height, bool htile, bool neo) + DepthStencilBufferObject(uint64_t vk_format, uint32_t width, uint32_t height, bool htile, bool neo, bool sampled) { params[PARAM_FORMAT] = vk_format; params[PARAM_WIDTH] = width; params[PARAM_HEIGHT] = height; params[PARAM_HTILE] = htile ? 1 : 0; params[PARAM_NEO] = neo ? 1 : 0; + params[PARAM_USAGE] = sampled ? 1 : 0; check_hash = false; type = Graphics::GpuMemoryObjectType::DepthStencilBuffer; } diff --git a/source/emulator/include/Emulator/Graphics/Pm4.h b/source/emulator/include/Emulator/Graphics/Pm4.h index 23bd33c..f54ac59 100644 --- a/source/emulator/include/Emulator/Graphics/Pm4.h +++ b/source/emulator/include/Emulator/Graphics/Pm4.h @@ -120,19 +120,35 @@ constexpr uint32_t DB_RENDER_CONTROL_COPY_SAMPLE_MASK = 0xF; constexpr uint32_t DB_COUNT_CONTROL = 0x1; -constexpr uint32_t DB_DEPTH_VIEW = 0x2; -constexpr uint32_t DB_DEPTH_VIEW_SLICE_START_SHIFT = 0; -constexpr uint32_t DB_DEPTH_VIEW_SLICE_START_MASK = 0x7FF; -constexpr uint32_t DB_DEPTH_VIEW_SLICE_MAX_SHIFT = 13; -constexpr uint32_t DB_DEPTH_VIEW_SLICE_MAX_MASK = 0x7FF; +constexpr uint32_t DB_DEPTH_VIEW = 0x2; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_START_SHIFT = 0; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_START_MASK = 0x7FF; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_START_HI_SHIFT = 11; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_START_HI_MASK = 0x3; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_MAX_SHIFT = 13; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_MAX_MASK = 0x7FF; +constexpr uint32_t DB_DEPTH_VIEW_Z_READ_ONLY_SHIFT = 24; +constexpr uint32_t DB_DEPTH_VIEW_Z_READ_ONLY_MASK = 0x1; +constexpr uint32_t DB_DEPTH_VIEW_STENCIL_READ_ONLY_SHIFT = 25; +constexpr uint32_t DB_DEPTH_VIEW_STENCIL_READ_ONLY_MASK = 0x1; +constexpr uint32_t DB_DEPTH_VIEW_MIPID_SHIFT = 26; +constexpr uint32_t DB_DEPTH_VIEW_MIPID_MASK = 0xF; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_MAX_HI_SHIFT = 30; +constexpr uint32_t DB_DEPTH_VIEW_SLICE_MAX_HI_MASK = 0x3; constexpr uint32_t DB_RENDER_OVERRIDE = 0x3; constexpr uint32_t DB_RENDER_OVERRIDE2 = 0x4; constexpr uint32_t DB_HTILE_DATA_BASE = 0x5; constexpr uint32_t PS_SHADER_SAMPLE_EXCLUSION_MASK = 0x6; -constexpr uint32_t DB_DEPTH_SIZE_XY = 0x7; -constexpr uint32_t DB_DEPTH_BOUNDS_MIN = 0x8; -constexpr uint32_t DB_DEPTH_BOUNDS_MAX = 0x9; + +constexpr uint32_t DB_DEPTH_SIZE_XY = 0x7; +constexpr uint32_t DB_DEPTH_SIZE_XY_X_MAX_SHIFT = 0; +constexpr uint32_t DB_DEPTH_SIZE_XY_X_MAX_MASK = 0x3FFF; +constexpr uint32_t DB_DEPTH_SIZE_XY_Y_MAX_SHIFT = 16; +constexpr uint32_t DB_DEPTH_SIZE_XY_Y_MAX_MASK = 0x3FFF; + +constexpr uint32_t DB_DEPTH_BOUNDS_MIN = 0x8; +constexpr uint32_t DB_DEPTH_BOUNDS_MAX = 0x9; constexpr uint32_t DB_STENCIL_CLEAR = 0xA; constexpr uint32_t DB_STENCIL_CLEAR_CLEAR_SHIFT = 0; @@ -172,23 +188,41 @@ constexpr uint32_t DB_DEPTH_INFO_MACRO_TILE_ASPECT_MASK = 0x3; constexpr uint32_t DB_DEPTH_INFO_NUM_BANKS_SHIFT = 19; constexpr uint32_t DB_DEPTH_INFO_NUM_BANKS_MASK = 0x3; -constexpr uint32_t DB_Z_INFO = 0x10; -constexpr uint32_t DB_Z_INFO_FORMAT_SHIFT = 0; -constexpr uint32_t DB_Z_INFO_FORMAT_MASK = 0x3; -constexpr uint32_t DB_Z_INFO_NUM_SAMPLES_SHIFT = 2; -constexpr uint32_t DB_Z_INFO_NUM_SAMPLES_MASK = 0x3; -constexpr uint32_t DB_Z_INFO_TILE_MODE_INDEX_SHIFT = 20; -constexpr uint32_t DB_Z_INFO_TILE_MODE_INDEX_MASK = 0x7; -constexpr uint32_t DB_Z_INFO_TILE_SURFACE_ENABLE_SHIFT = 29; -constexpr uint32_t DB_Z_INFO_TILE_SURFACE_ENABLE_MASK = 0x1; -constexpr uint32_t DB_Z_INFO_ZRANGE_PRECISION_SHIFT = 31; -constexpr uint32_t DB_Z_INFO_ZRANGE_PRECISION_MASK = 0x1; +constexpr uint32_t DB_Z_INFO = 0x10; +constexpr uint32_t DB_Z_INFO_FORMAT_SHIFT = 0; +constexpr uint32_t DB_Z_INFO_FORMAT_MASK = 0x3; +constexpr uint32_t DB_Z_INFO_NUM_SAMPLES_SHIFT = 2; +constexpr uint32_t DB_Z_INFO_NUM_SAMPLES_MASK = 0x3; +constexpr uint32_t DB_Z_INFO_ITERATE_FLUSH_SHIFT = 11; +constexpr uint32_t DB_Z_INFO_ITERATE_FLUSH_MASK = 0x1; +constexpr uint32_t DB_Z_INFO_PARTIALLY_RESIDENT_SHIFT = 12; +constexpr uint32_t DB_Z_INFO_PARTIALLY_RESIDENT_MASK = 0x1; +constexpr uint32_t DB_Z_INFO_MAXMIP_SHIFT = 16; +constexpr uint32_t DB_Z_INFO_MAXMIP_MASK = 0xF; +constexpr uint32_t DB_Z_INFO_TILE_MODE_INDEX_SHIFT = 20; +constexpr uint32_t DB_Z_INFO_TILE_MODE_INDEX_MASK = 0x7; +constexpr uint32_t DB_Z_INFO_DECOMPRESS_ON_N_ZPLANES_SHIFT = 23; +constexpr uint32_t DB_Z_INFO_DECOMPRESS_ON_N_ZPLANES_MASK = 0xF; +constexpr uint32_t DB_Z_INFO_ALLOW_EXPCLEAR_SHIFT = 27; +constexpr uint32_t DB_Z_INFO_ALLOW_EXPCLEAR_MASK = 0x1; +constexpr uint32_t DB_Z_INFO_TILE_SURFACE_ENABLE_SHIFT = 29; +constexpr uint32_t DB_Z_INFO_TILE_SURFACE_ENABLE_MASK = 0x1; +constexpr uint32_t DB_Z_INFO_ZRANGE_PRECISION_SHIFT = 31; +constexpr uint32_t DB_Z_INFO_ZRANGE_PRECISION_MASK = 0x1; constexpr uint32_t DB_STENCIL_INFO = 0x11; constexpr uint32_t DB_STENCIL_INFO_FORMAT_SHIFT = 0; constexpr uint32_t DB_STENCIL_INFO_FORMAT_MASK = 0x1; +constexpr uint32_t DB_STENCIL_INFO_ITERATE_FLUSH_SHIFT = 11; +constexpr uint32_t DB_STENCIL_INFO_ITERATE_FLUSH_MASK = 0x1; +constexpr uint32_t DB_STENCIL_INFO_PARTIALLY_RESIDENT_SHIFT = 12; +constexpr uint32_t DB_STENCIL_INFO_PARTIALLY_RESIDENT_MASK = 0x1; +constexpr uint32_t DB_STENCIL_INFO_RESERVED_FIELD_1_SHIFT = 13; +constexpr uint32_t DB_STENCIL_INFO_RESERVED_FIELD_1_MASK = 0x7; constexpr uint32_t DB_STENCIL_INFO_TILE_MODE_INDEX_SHIFT = 20; constexpr uint32_t DB_STENCIL_INFO_TILE_MODE_INDEX_MASK = 0x7; +constexpr uint32_t DB_STENCIL_INFO_ALLOW_EXPCLEAR_SHIFT = 27; +constexpr uint32_t DB_STENCIL_INFO_ALLOW_EXPCLEAR_MASK = 0x1; constexpr uint32_t DB_STENCIL_INFO_TILE_STENCIL_DISABLE_SHIFT = 29; constexpr uint32_t DB_STENCIL_INFO_TILE_STENCIL_DISABLE_MASK = 0x1; @@ -245,21 +279,37 @@ constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_X_MASK = 0x7FFF; constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_Y_SHIFT = 16; constexpr uint32_t PA_SC_GENERIC_SCISSOR_BR_BR_Y_MASK = 0x7FFF; -constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL = 0x94; -constexpr uint32_t PA_SC_VPORT_SCISSOR_0_BR = 0x95; -constexpr uint32_t PA_SC_VPORT_ZMIN_0 = 0xB4; -constexpr uint32_t PA_SC_VPORT_ZMAX_0 = 0xB5; -constexpr uint32_t PA_SC_RIGHT_VERT_GRID = 0xE8; -constexpr uint32_t PA_SC_LEFT_VERT_GRID = 0xE9; -constexpr uint32_t PA_SC_HORIZ_GRID = 0xEA; -constexpr uint32_t PA_SC_FOV_WINDOW_LR = 0xEB; -constexpr uint32_t PA_SC_FOV_WINDOW_TB = 0xEC; -constexpr uint32_t CB_RMI_GL2_CACHE_CONTROL = 0x104; -constexpr uint32_t CB_BLEND_RED = 0x105; -constexpr uint32_t CB_BLEND_GREEN = 0x106; -constexpr uint32_t CB_BLEND_BLUE = 0x107; -constexpr uint32_t CB_BLEND_ALPHA = 0x108; -constexpr uint32_t CB_DCC_CONTROL = 0x109; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL = 0x94; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL_TL_X_SHIFT = 0; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL_TL_X_MASK = 0x7FFF; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL_TL_Y_SHIFT = 16; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL_TL_Y_MASK = 0x7FFF; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL_WINDOW_OFFSET_DISABLE_SHIFT = 31; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_TL_WINDOW_OFFSET_DISABLE_MASK = 0x1; + +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_BR = 0x95; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_BR_BR_X_SHIFT = 0; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_BR_BR_X_MASK = 0x7FFF; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_BR_BR_Y_SHIFT = 16; +constexpr uint32_t PA_SC_VPORT_SCISSOR_0_BR_BR_Y_MASK = 0x7FFF; + +constexpr uint32_t PA_SC_VPORT_SCISSOR_15_TL = 0xB2; +constexpr uint32_t PA_SC_VPORT_SCISSOR_15_BR = 0xB3; +constexpr uint32_t PA_SC_VPORT_ZMIN_0 = 0xB4; +constexpr uint32_t PA_SC_VPORT_ZMAX_0 = 0xB5; +constexpr uint32_t PA_SC_VPORT_ZMIN_15 = 0xD2; +constexpr uint32_t PA_SC_VPORT_ZMAX_15 = 0xD3; +constexpr uint32_t PA_SC_RIGHT_VERT_GRID = 0xE8; +constexpr uint32_t PA_SC_LEFT_VERT_GRID = 0xE9; +constexpr uint32_t PA_SC_HORIZ_GRID = 0xEA; +constexpr uint32_t PA_SC_FOV_WINDOW_LR = 0xEB; +constexpr uint32_t PA_SC_FOV_WINDOW_TB = 0xEC; +constexpr uint32_t CB_RMI_GL2_CACHE_CONTROL = 0x104; +constexpr uint32_t CB_BLEND_RED = 0x105; +constexpr uint32_t CB_BLEND_GREEN = 0x106; +constexpr uint32_t CB_BLEND_BLUE = 0x107; +constexpr uint32_t CB_BLEND_ALPHA = 0x108; +constexpr uint32_t CB_DCC_CONTROL = 0x109; constexpr uint32_t DB_STENCIL_CONTROL = 0x10B; constexpr uint32_t DB_STENCIL_CONTROL_STENCILFAIL_SHIFT = 0; @@ -295,29 +345,35 @@ constexpr uint32_t DB_STENCILREFMASK_BF_STENCILWRITEMASK_BF_MASK = 0xFF; constexpr uint32_t DB_STENCILREFMASK_BF_STENCILOPVAL_BF_SHIFT = 24; constexpr uint32_t DB_STENCILREFMASK_BF_STENCILOPVAL_BF_MASK = 0xFF; -constexpr uint32_t PA_CL_VPORT_XSCALE = 0x10F; -constexpr uint32_t PA_CL_VPORT_XOFFSET = 0x110; -constexpr uint32_t PA_CL_VPORT_YSCALE = 0x111; -constexpr uint32_t PA_CL_VPORT_YOFFSET = 0x112; -constexpr uint32_t PA_CL_VPORT_ZSCALE = 0x113; -constexpr uint32_t PA_CL_VPORT_ZOFFSET = 0x114; -constexpr uint32_t PA_CL_UCP_0_X = 0x16F; -constexpr uint32_t PA_CL_UCP_0_Y = 0x170; -constexpr uint32_t PA_CL_UCP_0_Z = 0x171; -constexpr uint32_t PA_CL_UCP_0_W = 0x172; -constexpr uint32_t SPI_PS_INPUT_CNTL_0 = 0x191; -constexpr uint32_t SPI_PS_INPUT_CNTL_31 = 0x1B0; -constexpr uint32_t SPI_VS_OUT_CONFIG = 0x1B1; -constexpr uint32_t SPI_PS_INPUT_ENA = 0x1B3; -constexpr uint32_t SPI_PS_INPUT_ADDR = 0x1B4; -constexpr uint32_t SPI_INTERP_CONTROL_0 = 0x1B5; -constexpr uint32_t SPI_PS_IN_CONTROL = 0x1B6; -constexpr uint32_t SPI_BARYC_CNTL = 0x1B8; -constexpr uint32_t SPI_TMPRING_SIZE = 0x1BA; -constexpr uint32_t SPI_SHADER_IDX_FORMAT = 0x1C2; -constexpr uint32_t SPI_SHADER_POS_FORMAT = 0x1C3; -constexpr uint32_t SPI_SHADER_Z_FORMAT = 0x1C4; -constexpr uint32_t SPI_SHADER_COL_FORMAT = 0x1C5; +constexpr uint32_t PA_CL_VPORT_XSCALE = 0x10F; +constexpr uint32_t PA_CL_VPORT_XOFFSET = 0x110; +constexpr uint32_t PA_CL_VPORT_YSCALE = 0x111; +constexpr uint32_t PA_CL_VPORT_YOFFSET = 0x112; +constexpr uint32_t PA_CL_VPORT_ZSCALE = 0x113; +constexpr uint32_t PA_CL_VPORT_ZOFFSET = 0x114; +constexpr uint32_t PA_CL_VPORT_XSCALE_15 = 0x169; +constexpr uint32_t PA_CL_VPORT_XOFFSET_15 = 0x16A; +constexpr uint32_t PA_CL_VPORT_YSCALE_15 = 0x16B; +constexpr uint32_t PA_CL_VPORT_YOFFSET_15 = 0x16C; +constexpr uint32_t PA_CL_VPORT_ZSCALE_15 = 0x16D; +constexpr uint32_t PA_CL_VPORT_ZOFFSET_15 = 0x16E; +constexpr uint32_t PA_CL_UCP_0_X = 0x16F; +constexpr uint32_t PA_CL_UCP_0_Y = 0x170; +constexpr uint32_t PA_CL_UCP_0_Z = 0x171; +constexpr uint32_t PA_CL_UCP_0_W = 0x172; +constexpr uint32_t SPI_PS_INPUT_CNTL_0 = 0x191; +constexpr uint32_t SPI_PS_INPUT_CNTL_31 = 0x1B0; +constexpr uint32_t SPI_VS_OUT_CONFIG = 0x1B1; +constexpr uint32_t SPI_PS_INPUT_ENA = 0x1B3; +constexpr uint32_t SPI_PS_INPUT_ADDR = 0x1B4; +constexpr uint32_t SPI_INTERP_CONTROL_0 = 0x1B5; +constexpr uint32_t SPI_PS_IN_CONTROL = 0x1B6; +constexpr uint32_t SPI_BARYC_CNTL = 0x1B8; +constexpr uint32_t SPI_TMPRING_SIZE = 0x1BA; +constexpr uint32_t SPI_SHADER_IDX_FORMAT = 0x1C2; +constexpr uint32_t SPI_SHADER_POS_FORMAT = 0x1C3; +constexpr uint32_t SPI_SHADER_Z_FORMAT = 0x1C4; +constexpr uint32_t SPI_SHADER_COL_FORMAT = 0x1C5; constexpr uint32_t CB_BLEND0_CONTROL = 0x1E0; constexpr uint32_t CB_BLEND0_CONTROL_COLOR_SRCBLEND_SHIFT = 0; @@ -339,23 +395,27 @@ constexpr uint32_t CB_BLEND0_CONTROL_ENABLE_MASK = 0x1; constexpr uint32_t GE_MAX_OUTPUT_PER_SUBGROUP = 0x1FF; -constexpr uint32_t DB_DEPTH_CONTROL = 0x200; -constexpr uint32_t DB_DEPTH_CONTROL_STENCIL_ENABLE_SHIFT = 0; -constexpr uint32_t DB_DEPTH_CONTROL_STENCIL_ENABLE_MASK = 0x1; -constexpr uint32_t DB_DEPTH_CONTROL_Z_ENABLE_SHIFT = 1; -constexpr uint32_t DB_DEPTH_CONTROL_Z_ENABLE_MASK = 0x1; -constexpr uint32_t DB_DEPTH_CONTROL_Z_WRITE_ENABLE_SHIFT = 2; -constexpr uint32_t DB_DEPTH_CONTROL_Z_WRITE_ENABLE_MASK = 0x1; -constexpr uint32_t DB_DEPTH_CONTROL_DEPTH_BOUNDS_ENABLE_SHIFT = 3; -constexpr uint32_t DB_DEPTH_CONTROL_DEPTH_BOUNDS_ENABLE_MASK = 0x1; -constexpr uint32_t DB_DEPTH_CONTROL_ZFUNC_SHIFT = 4; -constexpr uint32_t DB_DEPTH_CONTROL_ZFUNC_MASK = 0x7; -constexpr uint32_t DB_DEPTH_CONTROL_BACKFACE_ENABLE_SHIFT = 7; -constexpr uint32_t DB_DEPTH_CONTROL_BACKFACE_ENABLE_MASK = 0x1; -constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_SHIFT = 8; -constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_MASK = 0x7; -constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_BF_SHIFT = 20; -constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_BF_MASK = 0x7; +constexpr uint32_t DB_DEPTH_CONTROL = 0x200; +constexpr uint32_t DB_DEPTH_CONTROL_STENCIL_ENABLE_SHIFT = 0; +constexpr uint32_t DB_DEPTH_CONTROL_STENCIL_ENABLE_MASK = 0x1; +constexpr uint32_t DB_DEPTH_CONTROL_Z_ENABLE_SHIFT = 1; +constexpr uint32_t DB_DEPTH_CONTROL_Z_ENABLE_MASK = 0x1; +constexpr uint32_t DB_DEPTH_CONTROL_Z_WRITE_ENABLE_SHIFT = 2; +constexpr uint32_t DB_DEPTH_CONTROL_Z_WRITE_ENABLE_MASK = 0x1; +constexpr uint32_t DB_DEPTH_CONTROL_DEPTH_BOUNDS_ENABLE_SHIFT = 3; +constexpr uint32_t DB_DEPTH_CONTROL_DEPTH_BOUNDS_ENABLE_MASK = 0x1; +constexpr uint32_t DB_DEPTH_CONTROL_ZFUNC_SHIFT = 4; +constexpr uint32_t DB_DEPTH_CONTROL_ZFUNC_MASK = 0x7; +constexpr uint32_t DB_DEPTH_CONTROL_BACKFACE_ENABLE_SHIFT = 7; +constexpr uint32_t DB_DEPTH_CONTROL_BACKFACE_ENABLE_MASK = 0x1; +constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_SHIFT = 8; +constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_MASK = 0x7; +constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_BF_SHIFT = 20; +constexpr uint32_t DB_DEPTH_CONTROL_STENCILFUNC_BF_MASK = 0x7; +constexpr uint32_t DB_DEPTH_CONTROL_ENABLE_COLOR_WRITES_ON_DEPTH_FAIL_SHIFT = 30; +constexpr uint32_t DB_DEPTH_CONTROL_ENABLE_COLOR_WRITES_ON_DEPTH_FAIL_MASK = 0x1; +constexpr uint32_t DB_DEPTH_CONTROL_DISABLE_COLOR_WRITES_ON_DEPTH_PASS_SHIFT = 31; +constexpr uint32_t DB_DEPTH_CONTROL_DISABLE_COLOR_WRITES_ON_DEPTH_PASS_MASK = 0x1; constexpr uint32_t DB_EQAA = 0x201; constexpr uint32_t DB_EQAA_MAX_ANCHOR_SAMPLES_SHIFT = 0; @@ -539,21 +599,124 @@ constexpr uint32_t PA_SC_BINNER_CNTL_1 = 0x312; constexpr uint32_t PA_SC_CONSERVATIVE_RASTERIZATION_CNTL = 0x313; constexpr uint32_t PA_SC_NGG_MODE_CNTL = 0x314; constexpr uint32_t CB_COLOR0_BASE = 0x318; -constexpr uint32_t CB_COLOR0_VIEW = 0x31B; -constexpr uint32_t CB_COLOR0_INFO = 0x31C; -constexpr uint32_t CB_COLOR0_ATTRIB = 0x31D; -constexpr uint32_t CB_COLOR0_DCC_CONTROL = 0x31E; -constexpr uint32_t CB_COLOR0_CMASK = 0x31F; -constexpr uint32_t CB_COLOR0_FMASK = 0x321; -constexpr uint32_t CB_COLOR0_CLEAR_WORD0 = 0x323; -constexpr uint32_t CB_COLOR0_CLEAR_WORD1 = 0x324; -constexpr uint32_t CB_COLOR0_DCC_BASE = 0x325; -constexpr uint32_t CB_COLOR0_BASE_EXT = 0x390; -constexpr uint32_t CB_COLOR0_CMASK_BASE_EXT = 0x398; -constexpr uint32_t CB_COLOR0_FMASK_BASE_EXT = 0x3A0; -constexpr uint32_t CB_COLOR0_DCC_BASE_EXT = 0x3A8; -constexpr uint32_t CB_COLOR0_ATTRIB2 = 0x3B0; -constexpr uint32_t CB_COLOR0_ATTRIB3 = 0x3B8; + +constexpr uint32_t CB_COLOR0_VIEW = 0x31B; +constexpr uint32_t CB_COLOR0_VIEW_SLICE_START_SHIFT = 0; +constexpr uint32_t CB_COLOR0_VIEW_SLICE_START_MASK = 0x1FFF; +constexpr uint32_t CB_COLOR0_VIEW_SLICE_MAX_SHIFT = 13; +constexpr uint32_t CB_COLOR0_VIEW_SLICE_MAX_MASK = 0x1FFF; +constexpr uint32_t CB_COLOR0_VIEW_MIP_LEVEL_SHIFT = 26; +constexpr uint32_t CB_COLOR0_VIEW_MIP_LEVEL_MASK = 0xF; + +constexpr uint32_t CB_COLOR0_INFO = 0x31C; +constexpr uint32_t CB_COLOR0_INFO_FORMAT_SHIFT = 2; +constexpr uint32_t CB_COLOR0_INFO_FORMAT_MASK = 0x1F; +constexpr uint32_t CB_COLOR0_INFO_NUMBER_TYPE_SHIFT = 8; +constexpr uint32_t CB_COLOR0_INFO_NUMBER_TYPE_MASK = 0x7; +constexpr uint32_t CB_COLOR0_INFO_COMP_SWAP_SHIFT = 11; +constexpr uint32_t CB_COLOR0_INFO_COMP_SWAP_MASK = 0x3; +constexpr uint32_t CB_COLOR0_INFO_FAST_CLEAR_SHIFT = 13; +constexpr uint32_t CB_COLOR0_INFO_FAST_CLEAR_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_COMPRESSION_SHIFT = 14; +constexpr uint32_t CB_COLOR0_INFO_COMPRESSION_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_BLEND_CLAMP_SHIFT = 15; +constexpr uint32_t CB_COLOR0_INFO_BLEND_CLAMP_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_BLEND_BYPASS_SHIFT = 16; +constexpr uint32_t CB_COLOR0_INFO_BLEND_BYPASS_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_ROUND_MODE_SHIFT = 18; +constexpr uint32_t CB_COLOR0_INFO_ROUND_MODE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_CMASK_IS_LINEAR_SHIFT = 19; +constexpr uint32_t CB_COLOR0_INFO_CMASK_IS_LINEAR_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_FMASK_COMPRESSION_DISABLE_SHIFT = 26; +constexpr uint32_t CB_COLOR0_INFO_FMASK_COMPRESSION_DISABLE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_FMASK_COMPRESS_1FRAG_ONLY_SHIFT = 27; +constexpr uint32_t CB_COLOR0_INFO_FMASK_COMPRESS_1FRAG_ONLY_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_DCC_ENABLE_SHIFT = 28; +constexpr uint32_t CB_COLOR0_INFO_DCC_ENABLE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_INFO_CMASK_ADDR_TYPE_SHIFT = 29; +constexpr uint32_t CB_COLOR0_INFO_CMASK_ADDR_TYPE_MASK = 0x3; +constexpr uint32_t CB_COLOR0_INFO_ALT_TILE_MODE_SHIFT = 31; +constexpr uint32_t CB_COLOR0_INFO_ALT_TILE_MODE_MASK = 0x1; + +constexpr uint32_t CB_COLOR0_ATTRIB = 0x31D; +constexpr uint32_t CB_COLOR0_ATTRIB_TILE_MODE_INDEX_SHIFT = 0; +constexpr uint32_t CB_COLOR0_ATTRIB_TILE_MODE_INDEX_MASK = 0x1F; +constexpr uint32_t CB_COLOR0_ATTRIB_FMASK_TILE_MODE_INDEX_SHIFT = 5; +constexpr uint32_t CB_COLOR0_ATTRIB_FMASK_TILE_MODE_INDEX_MASK = 0x1F; +constexpr uint32_t CB_COLOR0_ATTRIB_NUM_SAMPLES_SHIFT = 12; +constexpr uint32_t CB_COLOR0_ATTRIB_NUM_SAMPLES_MASK = 0x7; +constexpr uint32_t CB_COLOR0_ATTRIB_NUM_FRAGMENTS_SHIFT = 15; +constexpr uint32_t CB_COLOR0_ATTRIB_NUM_FRAGMENTS_MASK = 0x3; +constexpr uint32_t CB_COLOR0_ATTRIB_FORCE_DST_ALPHA_1_SHIFT = 17; +constexpr uint32_t CB_COLOR0_ATTRIB_FORCE_DST_ALPHA_1_MASK = 0x1; + +constexpr uint32_t CB_COLOR0_DCC_CONTROL = 0x31E; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_OVERWRITE_COMBINER_DISABLE_SHIFT = 0; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_OVERWRITE_COMBINER_DISABLE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_KEY_CLEAR_ENABLE_SHIFT = 1; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_KEY_CLEAR_ENABLE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_MAX_UNCOMPRESSED_BLOCK_SIZE_SHIFT = 2; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_MAX_UNCOMPRESSED_BLOCK_SIZE_MASK = 0x3; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_MIN_COMPRESSED_BLOCK_SIZE_SHIFT = 4; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_MIN_COMPRESSED_BLOCK_SIZE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_MAX_COMPRESSED_BLOCK_SIZE_SHIFT = 5; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_MAX_COMPRESSED_BLOCK_SIZE_MASK = 0x3; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_COLOR_TRANSFORM_SHIFT = 7; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_COLOR_TRANSFORM_MASK = 0x3; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_INDEPENDENT_64B_BLOCKS_SHIFT = 9; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_INDEPENDENT_64B_BLOCKS_MASK = 0x1; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_ENABLE_CONSTANT_ENCODE_REG_WRITE_SHIFT = 19; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_ENABLE_CONSTANT_ENCODE_REG_WRITE_MASK = 0x1; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_INDEPENDENT_128B_BLOCKS_SHIFT = 20; +constexpr uint32_t CB_COLOR0_DCC_CONTROL_INDEPENDENT_128B_BLOCKS_MASK = 0x1; + +constexpr uint32_t CB_COLOR0_CMASK = 0x31F; +constexpr uint32_t CB_COLOR0_FMASK = 0x321; +constexpr uint32_t CB_COLOR0_CLEAR_WORD0 = 0x323; +constexpr uint32_t CB_COLOR0_CLEAR_WORD1 = 0x324; +constexpr uint32_t CB_COLOR0_DCC_BASE = 0x325; +constexpr uint32_t CB_COLOR7_BASE = 0x381; +constexpr uint32_t CB_COLOR7_VIEW = 0x384; +constexpr uint32_t CB_COLOR7_INFO = 0x385; +constexpr uint32_t CB_COLOR7_ATTRIB = 0x386; +constexpr uint32_t CB_COLOR7_DCC_CONTROL = 0x387; +constexpr uint32_t CB_COLOR7_CMASK = 0x388; +constexpr uint32_t CB_COLOR7_FMASK = 0x38A; +constexpr uint32_t CB_COLOR7_CLEAR_WORD0 = 0x38C; +constexpr uint32_t CB_COLOR7_CLEAR_WORD1 = 0x38D; +constexpr uint32_t CB_COLOR7_DCC_BASE = 0x38E; +constexpr uint32_t CB_COLOR0_BASE_EXT = 0x390; +constexpr uint32_t CB_COLOR7_BASE_EXT = 0x397; +constexpr uint32_t CB_COLOR0_CMASK_BASE_EXT = 0x398; +constexpr uint32_t CB_COLOR7_CMASK_BASE_EXT = 0x39F; +constexpr uint32_t CB_COLOR0_FMASK_BASE_EXT = 0x3A0; +constexpr uint32_t CB_COLOR7_FMASK_BASE_EXT = 0x3A7; +constexpr uint32_t CB_COLOR0_DCC_BASE_EXT = 0x3A8; +constexpr uint32_t CB_COLOR7_DCC_BASE_EXT = 0x3AF; + +constexpr uint32_t CB_COLOR0_ATTRIB2 = 0x3B0; +constexpr uint32_t CB_COLOR0_ATTRIB2_MIP0_HEIGHT_SHIFT = 0; +constexpr uint32_t CB_COLOR0_ATTRIB2_MIP0_HEIGHT_MASK = 0x3FFF; +constexpr uint32_t CB_COLOR0_ATTRIB2_MIP0_WIDTH_SHIFT = 14; +constexpr uint32_t CB_COLOR0_ATTRIB2_MIP0_WIDTH_MASK = 0x3FFF; +constexpr uint32_t CB_COLOR0_ATTRIB2_MAX_MIP_SHIFT = 28; +constexpr uint32_t CB_COLOR0_ATTRIB2_MAX_MIP_MASK = 0xF; + +constexpr uint32_t CB_COLOR7_ATTRIB2 = 0x3B7; + +constexpr uint32_t CB_COLOR0_ATTRIB3 = 0x3B8; +constexpr uint32_t CB_COLOR0_ATTRIB3_MIP0_DEPTH_SHIFT = 0; +constexpr uint32_t CB_COLOR0_ATTRIB3_MIP0_DEPTH_MASK = 0x1FFF; +constexpr uint32_t CB_COLOR0_ATTRIB3_COLOR_SW_MODE_SHIFT = 14; +constexpr uint32_t CB_COLOR0_ATTRIB3_COLOR_SW_MODE_MASK = 0x1F; +constexpr uint32_t CB_COLOR0_ATTRIB3_RESOURCE_TYPE_SHIFT = 24; +constexpr uint32_t CB_COLOR0_ATTRIB3_RESOURCE_TYPE_MASK = 0x3; +constexpr uint32_t CB_COLOR0_ATTRIB3_CMASK_PIPE_ALIGNED_SHIFT = 26; +constexpr uint32_t CB_COLOR0_ATTRIB3_CMASK_PIPE_ALIGNED_MASK = 0x1; +constexpr uint32_t CB_COLOR0_ATTRIB3_DCC_PIPE_ALIGNED_SHIFT = 30; +constexpr uint32_t CB_COLOR0_ATTRIB3_DCC_PIPE_ALIGNED_MASK = 0x1; + +constexpr uint32_t CB_COLOR7_ATTRIB3 = 0x3BF; /* Fake codes. Don't exist on real HW */ @@ -572,23 +735,86 @@ constexpr uint32_t SPI_SHADER_PGM_RSRC3_PS = 0x7; constexpr uint32_t SPI_SHADER_PGM_LO_PS = 0x8; constexpr uint32_t SPI_SHADER_PGM_HI_PS = 0x9; -constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS = 0xA; -constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_VGPRS_SHIFT = 0; -constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_VGPRS_MASK = 0x3F; -constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_SGPRS_SHIFT = 6; -constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_SGPRS_MASK = 0xF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS = 0xA; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_VGPRS_SHIFT = 0; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_VGPRS_MASK = 0x3F; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_SGPRS_SHIFT = 6; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_SGPRS_MASK = 0xF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_PRIORITY_SHIFT = 10; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_PRIORITY_MASK = 0x3; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_FLOAT_MODE_SHIFT = 12; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_FLOAT_MODE_MASK = 0xFF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_DX10_CLAMP_SHIFT = 21; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_DX10_CLAMP_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_DEBUG_MODE_SHIFT = 22; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_DEBUG_MODE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_IEEE_MODE_SHIFT = 23; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_IEEE_MODE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_CU_GROUP_DISABLE_SHIFT = 24; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_CU_GROUP_DISABLE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_FWD_PROGRESS_SHIFT = 26; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_FWD_PROGRESS_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_FP16_OVFL_SHIFT = 29; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_PS_FP16_OVFL_MASK = 0x1; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS = 0xB; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_SHIFT = 0; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_MASK = 0x1; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_SHIFT = 1; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MASK = 0x1F; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_SHIFT = 7; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_MASK = 0x0; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS = 0xB; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_SHIFT = 0; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_SHIFT = 1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MASK = 0x1F; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_SHIFT = 7; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_EXTRA_LDS_SIZE_SHIFT = 8; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_EXTRA_LDS_SIZE_MASK = 0xFF; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_LOAD_INTRAWAVE_COLLISION_SHIFT = 26; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_LOAD_INTRAWAVE_COLLISION_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MSB_SHIFT = 27; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MSB_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_SHARED_VGPR_CNT_SHIFT = 28; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_PS_SHARED_VGPR_CNT_MASK = 0xF; + +constexpr uint32_t SPI_SHADER_USER_DATA_PS_0 = 0xC; +constexpr uint32_t SPI_SHADER_USER_DATA_PS_15 = 0x1B; +constexpr uint32_t SPI_SHADER_USER_ACCUM_PS_0 = 0x32; +constexpr uint32_t SPI_SHADER_PGM_LO_VS = 0x48; +constexpr uint32_t SPI_SHADER_PGM_HI_VS = 0x49; + +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS = 0x4A; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_VGPRS_SHIFT = 0; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_VGPRS_MASK = 0x3F; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_SGPRS_SHIFT = 6; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_SGPRS_MASK = 0xF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_PRIORITY_SHIFT = 10; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_PRIORITY_MASK = 0x3; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_FLOAT_MODE_SHIFT = 12; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_FLOAT_MODE_MASK = 0xFF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_DX10_CLAMP_SHIFT = 21; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_DX10_CLAMP_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_IEEE_MODE_SHIFT = 23; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_IEEE_MODE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_VGPR_COMP_CNT_SHIFT = 24; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_VGPR_COMP_CNT_MASK = 0x3; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_CU_GROUP_ENABLE_SHIFT = 26; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_CU_GROUP_ENABLE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_FWD_PROGRESS_SHIFT = 28; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_FWD_PROGRESS_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_FP16_OVFL_SHIFT = 31; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_VS_FP16_OVFL_MASK = 0x1; + +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS = 0x4B; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_SCRATCH_EN_SHIFT = 0; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_SCRATCH_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_USER_SGPR_SHIFT = 1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_USER_SGPR_MASK = 0x1F; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_OC_LDS_EN_SHIFT = 7; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_OC_LDS_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_SO_EN_SHIFT = 12; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_SO_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_USER_SGPR_MSB_SHIFT = 27; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_USER_SGPR_MSB_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_SHARED_VGPR_CNT_SHIFT = 28; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_VS_SHARED_VGPR_CNT_MASK = 0xF; -constexpr uint32_t SPI_SHADER_USER_DATA_PS_0 = 0xC; -constexpr uint32_t SPI_SHADER_USER_DATA_PS_15 = 0x1B; -constexpr uint32_t SPI_SHADER_USER_ACCUM_PS_0 = 0x32; constexpr uint32_t SPI_SHADER_USER_DATA_VS_0 = 0x4C; constexpr uint32_t SPI_SHADER_USER_DATA_VS_15 = 0x5B; constexpr uint32_t SPI_SHADER_PGM_CHKSUM_GS = 0x80; @@ -598,9 +824,51 @@ constexpr uint32_t SPI_SHADER_USER_DATA_ADDR_HI_GS = 0x83; constexpr uint32_t SPI_SHADER_PGM_RSRC3_GS = 0x87; constexpr uint32_t SPI_SHADER_PGM_LO_GS = 0x88; constexpr uint32_t SPI_SHADER_PGM_HI_GS = 0x89; -constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS = 0x8A; -constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS = 0x8B; + +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS = 0x8A; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_VGPRS_SHIFT = 0; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_VGPRS_MASK = 0x3F; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_SGPRS_SHIFT = 6; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_SGPRS_MASK = 0xF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_PRIORITY_SHIFT = 10; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_PRIORITY_MASK = 0x3; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_FLOAT_MODE_SHIFT = 12; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_FLOAT_MODE_MASK = 0xFF; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_DX10_CLAMP_SHIFT = 21; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_DX10_CLAMP_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_DEBUG_MODE_SHIFT = 22; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_DEBUG_MODE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_IEEE_MODE_SHIFT = 23; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_IEEE_MODE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_CU_GROUP_ENABLE_SHIFT = 24; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_CU_GROUP_ENABLE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_FWD_PROGRESS_SHIFT = 26; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_FWD_PROGRESS_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_WGP_MODE_SHIFT = 27; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_WGP_MODE_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_GS_VGPR_COMP_CNT_SHIFT = 29; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_GS_VGPR_COMP_CNT_MASK = 0x3; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_FP16_OVFL_SHIFT = 31; +constexpr uint32_t SPI_SHADER_PGM_RSRC1_GS_FP16_OVFL_MASK = 0x1; + +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS = 0x8B; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_SCRATCH_EN_SHIFT = 0; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_SCRATCH_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_USER_SGPR_SHIFT = 1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_USER_SGPR_MASK = 0x1F; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_ES_VGPR_COMP_CNT_SHIFT = 16; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_ES_VGPR_COMP_CNT_MASK = 0x3; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_OC_LDS_EN_SHIFT = 18; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_OC_LDS_EN_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_LDS_SIZE_SHIFT = 19; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_LDS_SIZE_MASK = 0xFF; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_USER_SGPR_MSB_SHIFT = 27; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_USER_SGPR_MSB_MASK = 0x1; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_SHARED_VGPR_CNT_SHIFT = 28; +constexpr uint32_t SPI_SHADER_PGM_RSRC2_GS_SHARED_VGPR_CNT_MASK = 0xF; + constexpr uint32_t SPI_SHADER_USER_DATA_GS_0 = 0x8C; +constexpr uint32_t SPI_SHADER_USER_DATA_GS_15 = 0x9B; constexpr uint32_t SPI_SHADER_USER_ACCUM_ESGS_0 = 0xB2; constexpr uint32_t SPI_SHADER_PGM_LO_ES = 0xC8; constexpr uint32_t SPI_SHADER_PGM_HI_ES = 0xC9; @@ -670,24 +938,41 @@ constexpr uint32_t SH_NUM = 0x2FF + 1; /* User config registers */ -constexpr uint32_t VGT_PRIMITIVE_TYPE = 0x242; +constexpr uint32_t VGT_PRIMITIVE_TYPE = 0x242; +constexpr uint32_t VGT_PRIMITIVE_TYPE_PRIM_TYPE_SHIFT = 0; +constexpr uint32_t VGT_PRIMITIVE_TYPE_PRIM_TYPE_MASK = 0x3F; + constexpr uint32_t VGT_OBJECT_ID = 0x248; constexpr uint32_t GE_INDX_OFFSET = 0x24A; constexpr uint32_t GE_MULTI_PRIM_IB_RESET_EN = 0x24B; constexpr uint32_t VGT_HS_OFFCHIP_PARAM = 0x24F; constexpr uint32_t VGT_TF_MEMORY_BASE = 0x250; -constexpr uint32_t GE_CNTL = 0x25B; -constexpr uint32_t GE_USER_VGPR1 = 0x25C; -constexpr uint32_t GE_USER_VGPR2 = 0x25D; -constexpr uint32_t GE_USER_VGPR3 = 0x25E; -constexpr uint32_t GE_STEREO_CNTL = 0x25F; -constexpr uint32_t GE_USER_VGPR_EN = 0x262; -constexpr uint32_t TA_CS_BC_BASE_ADDR = 0x380; -constexpr uint32_t TA_CS_BC_BASE_ADDR_HI = 0x381; -constexpr uint32_t TEXTURE_GRADIENT_FACTORS = 0x382; -constexpr uint32_t GDS_OA_CNTL = 0x41D; -constexpr uint32_t GDS_OA_COUNTER = 0x41E; -constexpr uint32_t GDS_OA_ADDRESS = 0x41F; + +constexpr uint32_t GE_CNTL = 0x25B; +constexpr uint32_t GE_CNTL_PRIM_GRP_SIZE_SHIFT = 0; +constexpr uint32_t GE_CNTL_PRIM_GRP_SIZE_MASK = 0x1FF; +constexpr uint32_t GE_CNTL_VERT_GRP_SIZE_SHIFT = 9; +constexpr uint32_t GE_CNTL_VERT_GRP_SIZE_MASK = 0x1FF; + +constexpr uint32_t GE_USER_VGPR1 = 0x25C; +constexpr uint32_t GE_USER_VGPR2 = 0x25D; +constexpr uint32_t GE_USER_VGPR3 = 0x25E; +constexpr uint32_t GE_STEREO_CNTL = 0x25F; + +constexpr uint32_t GE_USER_VGPR_EN = 0x262; +constexpr uint32_t GE_USER_VGPR_EN_EN_USER_VGPR1_SHIFT = 0; +constexpr uint32_t GE_USER_VGPR_EN_EN_USER_VGPR1_MASK = 0x1; +constexpr uint32_t GE_USER_VGPR_EN_EN_USER_VGPR2_SHIFT = 1; +constexpr uint32_t GE_USER_VGPR_EN_EN_USER_VGPR2_MASK = 0x1; +constexpr uint32_t GE_USER_VGPR_EN_EN_USER_VGPR3_SHIFT = 2; +constexpr uint32_t GE_USER_VGPR_EN_EN_USER_VGPR3_MASK = 0x1; + +constexpr uint32_t TA_CS_BC_BASE_ADDR = 0x380; +constexpr uint32_t TA_CS_BC_BASE_ADDR_HI = 0x381; +constexpr uint32_t TEXTURE_GRADIENT_FACTORS = 0x382; +constexpr uint32_t GDS_OA_CNTL = 0x41D; +constexpr uint32_t GDS_OA_COUNTER = 0x41E; +constexpr uint32_t GDS_OA_ADDRESS = 0x41F; /* Fake codes. Don't exist on real HW */ diff --git a/source/emulator/include/Emulator/Graphics/Shader.h b/source/emulator/include/Emulator/Graphics/Shader.h index 3928bc3..2f622ab 100644 --- a/source/emulator/include/Emulator/Graphics/Shader.h +++ b/source/emulator/include/Emulator/Graphics/Shader.h @@ -823,6 +823,7 @@ Vector ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInp Vector ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info); Vector ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info); bool ShaderIsDisabled(uint64_t addr); +bool ShaderIsDisabled2(uint64_t addr, uint64_t chksum); void ShaderDisable(uint64_t id); void ShaderInjectDebugPrintf(uint64_t id, const ShaderDebugPrintf& cmd); diff --git a/source/emulator/src/Graphics/Graphics.cpp b/source/emulator/src/Graphics/Graphics.cpp index d439c35..1ae2694 100644 --- a/source/emulator/src/Graphics/Graphics.cpp +++ b/source/emulator/src/Graphics/Graphics.cpp @@ -1556,13 +1556,34 @@ int KYTY_SYSV_ABI GraphicsCreateShader(Shader** dst, void* header, const volatil h->code = code; - dbg_dump_shader(h); - EXIT_NOT_IMPLEMENTED(h->file_header != 0x34333231); EXIT_NOT_IMPLEMENTED(h->version != 0x00000018); + auto base = reinterpret_cast(code); + + if (h->type == 2 && h->num_sh_registers >= 2 && h->sh_registers[0].offset == Pm4::SPI_SHADER_PGM_LO_ES && + h->sh_registers[1].offset == Pm4::SPI_SHADER_PGM_HI_ES) + { + h->sh_registers[0].offset = Pm4::SPI_SHADER_PGM_LO_ES; + h->sh_registers[0].value = ((base >> 8u) & 0xffffffffu); + h->sh_registers[1].offset = Pm4::SPI_SHADER_PGM_HI_ES; + h->sh_registers[1].value = ((base >> 40u) & 0x000000ffu); + } else if (h->type == 1 && h->num_sh_registers >= 2 && h->sh_registers[0].offset == Pm4::SPI_SHADER_PGM_LO_PS && + h->sh_registers[1].offset == Pm4::SPI_SHADER_PGM_HI_PS) + { + h->sh_registers[0].offset = Pm4::SPI_SHADER_PGM_LO_PS; + h->sh_registers[0].value = ((base >> 8u) & 0xffffffffu); + h->sh_registers[1].offset = Pm4::SPI_SHADER_PGM_HI_PS; + h->sh_registers[1].value = ((base >> 40u) & 0x000000ffu); + } else + { + EXIT("invalid shader\n"); + } + *dst = h; + dbg_dump_shader(h); + return OK; } @@ -1774,6 +1795,10 @@ uint32_t* KYTY_SYSV_ABI GraphicsCbSetShRegisterRangeDirect(CommandBuffer* buf, u buf->DbgDump(); + auto* marker = buf->AllocateDW(2); + marker[0] = KYTY_PM4(2, Pm4::IT_NOP, Pm4::R_ZERO); + marker[1] = 0x6875000d; + auto* cmd = buf->AllocateDW(num_values + 2); EXIT_NOT_IMPLEMENTED(cmd == nullptr); @@ -1815,23 +1840,22 @@ uint32_t* KYTY_SYSV_ABI GraphicsCbReleaseMem(CommandBuffer* buf, uint8_t action, EXIT_NOT_IMPLEMENTED(data_sel != 2); EXIT_NOT_IMPLEMENTED(gds_offset != 0); EXIT_NOT_IMPLEMENTED(gds_size != 1); + EXIT_NOT_IMPLEMENTED(interrupt != 0); + EXIT_NOT_IMPLEMENTED(interrupt_ctx_id != 0); buf->DbgDump(); - auto* cmd = buf->AllocateDW(10); + auto* cmd = buf->AllocateDW(7); EXIT_NOT_IMPLEMENTED(cmd == nullptr); - cmd[0] = KYTY_PM4(10, Pm4::IT_NOP, Pm4::R_RELEASE_MEM); - cmd[1] = action; + cmd[0] = KYTY_PM4(7, Pm4::IT_NOP, Pm4::R_RELEASE_MEM); + cmd[1] = action | (static_cast(cache_policy) << 8u); cmd[2] = gcr_cntl; - cmd[3] = cache_policy; - cmd[4] = static_cast(reinterpret_cast(address) & 0xffffffffu); - cmd[5] = static_cast((reinterpret_cast(address) >> 32u) & 0xffffffffu); - cmd[6] = static_cast(data & 0xffffffffu); - cmd[7] = static_cast((data >> 32u) & 0xffffffffu); - cmd[8] = interrupt; - cmd[9] = interrupt_ctx_id; + cmd[3] = static_cast(reinterpret_cast(address) & 0xffffffffu); + cmd[4] = static_cast((reinterpret_cast(address) >> 32u) & 0xffffffffu); + cmd[5] = static_cast(data & 0xffffffffu); + cmd[6] = static_cast((data >> 32u) & 0xffffffffu); return cmd; } @@ -2086,26 +2110,23 @@ uint32_t* KYTY_SYSV_ABI GraphicsDcbWriteData(CommandBuffer* buf, uint8_t dst, ui printf("\t write_confirm = 0x%02" PRIx8 "\n", write_confirm); EXIT_NOT_IMPLEMENTED(buf == nullptr); - EXIT_NOT_IMPLEMENTED((8 + num_dwords - 2u) > 0x3fffu); + EXIT_NOT_IMPLEMENTED((4 + num_dwords - 2u) > 0x3fffu); EXIT_NOT_IMPLEMENTED(data == nullptr); EXIT_NOT_IMPLEMENTED(address_or_offset == 0); buf->DbgDump(); - auto* cmd = buf->AllocateDW(8 + num_dwords); + auto* cmd = buf->AllocateDW(4 + num_dwords); EXIT_NOT_IMPLEMENTED(cmd == nullptr); - cmd[0] = KYTY_PM4(8 + num_dwords, Pm4::IT_NOP, Pm4::R_WRITE_DATA); - cmd[1] = dst; - cmd[2] = cache_policy; - cmd[3] = address_or_offset & 0xffffffffu; - cmd[4] = (address_or_offset >> 32u) & 0xffffffffu; - cmd[5] = num_dwords; - cmd[6] = increment; - cmd[7] = write_confirm; + cmd[0] = KYTY_PM4(4 + num_dwords, Pm4::IT_NOP, Pm4::R_WRITE_DATA); + cmd[1] = dst | (static_cast(cache_policy) << 8u) | (static_cast(increment) << 16u) | + (static_cast(write_confirm) << 24u); + cmd[2] = address_or_offset & 0xffffffffu; + cmd[3] = (address_or_offset >> 32u) & 0xffffffffu; - memcpy(cmd + 8, data, static_cast(num_dwords) * 4); + memcpy(cmd + 4, data, static_cast(num_dwords) * 4); return cmd; } diff --git a/source/emulator/src/Graphics/GraphicsRender.cpp b/source/emulator/src/Graphics/GraphicsRender.cpp index 50586a9..c69ba8c 100644 --- a/source/emulator/src/Graphics/GraphicsRender.cpp +++ b/source/emulator/src/Graphics/GraphicsRender.cpp @@ -466,10 +466,27 @@ static void uc_print(const char* func, const HW::UserConfig& uc) { printf("%s\n", func); - printf("\t GetPrimType() = 0x%08" PRIx32 "\n", uc.GetPrimType()); + const auto& ge_cntl = uc.GetGeControl(); + const auto& user_en = uc.GetGeUserVgprEn(); + + printf("\t GetPrimType() = 0x%08" PRIx32 "\n", uc.GetPrimType()); + printf("\t primitive_group_size = 0x%04" PRIx16 "\n", ge_cntl.primitive_group_size); + printf("\t en_user_vgpr1 = %s\n", user_en.vgpr1 ? "true" : "false"); + printf("\t en_user_vgpr2 = %s\n", user_en.vgpr2 ? "true" : "false"); + printf("\t en_user_vgpr3 = %s\n", user_en.vgpr3 ? "true" : "false"); } -static void uc_check(const HW::UserConfig& /*uc*/) {} +static void uc_check(const HW::UserConfig& uc) +{ + const auto& ge_cntl = uc.GetGeControl(); + const auto& user_en = uc.GetGeUserVgprEn(); + + EXIT_NOT_IMPLEMENTED(ge_cntl.primitive_group_size != 0x0000); + EXIT_NOT_IMPLEMENTED(ge_cntl.vertex_group_size != 0x0000); + EXIT_NOT_IMPLEMENTED(user_en.vgpr1 != false); + EXIT_NOT_IMPLEMENTED(user_en.vgpr2 != false); + EXIT_NOT_IMPLEMENTED(user_en.vgpr3 != false); +} static void sh_print(const char* func, const HW::Shader& /*uc*/) { @@ -490,8 +507,13 @@ static Core::StringList rt_print(const char* func, const HW::RenderTarget& rt) dst.Add(String::FromPrintf("\t slice.slice_div64_minus1 = 0x%08" PRIx32 "\n", rt.slice.slice_div64_minus1)); dst.Add(String::FromPrintf("\t view.base_array_slice_index = 0x%08" PRIx32 "\n", rt.view.base_array_slice_index)); dst.Add(String::FromPrintf("\t view.last_array_slice_index = 0x%08" PRIx32 "\n", rt.view.last_array_slice_index)); + dst.Add(String::FromPrintf("\t view.current_mip_level = 0x%08" PRIx32 "\n", rt.view.current_mip_level)); dst.Add(String::FromPrintf("\t info.fmask_compression_enable = %s\n", rt.info.fmask_compression_enable ? "true" : "false")); - dst.Add(String::FromPrintf("\t info.fmask_compression_mode = 0x%08" PRIx32 "\n", rt.info.fmask_compression_mode)); + + // dst.Add(String::FromPrintf("\t info.fmask_compression_mode = 0x%08" PRIx32 "\n", rt.info.fmask_compression_mode)); + dst.Add(String::FromPrintf("\t info.fmask_data_compression_disable = %s\n", rt.info.fmask_data_compression_disable ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.fmask_one_frag_mode = %s\n", rt.info.fmask_one_frag_mode ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.cmask_fast_clear_enable = %s\n", rt.info.cmask_fast_clear_enable ? "true" : "false")); dst.Add(String::FromPrintf("\t info.dcc_compression_enable = %s\n", rt.info.dcc_compression_enable ? "true" : "false")); dst.Add(String::FromPrintf("\t info.neo_mode = %s\n", rt.info.neo_mode ? "true" : "false")); @@ -500,17 +522,31 @@ static Core::StringList rt_print(const char* func, const HW::RenderTarget& rt) dst.Add(String::FromPrintf("\t info.format = 0x%08" PRIx32 "\n", rt.info.format)); dst.Add(String::FromPrintf("\t info.channel_type = 0x%08" PRIx32 "\n", rt.info.channel_type)); dst.Add(String::FromPrintf("\t info.channel_order = 0x%08" PRIx32 "\n", rt.info.channel_order)); + dst.Add(String::FromPrintf("\t info.blend_bypa = %s\n", rt.info.blend_bypass ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.blend_clamp = %s\n", rt.info.blend_clamp ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.round_mode = %s\n", rt.info.round_mode ? "true" : "false")); dst.Add(String::FromPrintf("\t attrib.force_dest_alpha_to_one = %s\n", rt.attrib.force_dest_alpha_to_one ? "true" : "false")); dst.Add(String::FromPrintf("\t attrib.tile_mode = 0x%08" PRIx32 "\n", rt.attrib.tile_mode)); dst.Add(String::FromPrintf("\t attrib.fmask_tile_mode = 0x%08" PRIx32 "\n", rt.attrib.fmask_tile_mode)); dst.Add(String::FromPrintf("\t attrib.num_samples = 0x%08" PRIx32 "\n", rt.attrib.num_samples)); dst.Add(String::FromPrintf("\t attrib.num_fragments = 0x%08" PRIx32 "\n", rt.attrib.num_fragments)); + dst.Add(String::FromPrintf("\t attrib2.width = 0x%08" PRIx32 "\n", rt.attrib2.width)); + dst.Add(String::FromPrintf("\t attrib2.height = 0x%08" PRIx32 "\n", rt.attrib2.height)); + dst.Add(String::FromPrintf("\t attrib2.num_mip_levels = 0x%08" PRIx32 "\n", rt.attrib2.num_mip_levels)); + dst.Add(String::FromPrintf("\t attrib3.depth = 0x%08" PRIx32 "\n", rt.attrib3.depth)); + dst.Add(String::FromPrintf("\t attrib3.tile_mode = 0x%08" PRIx32 "\n", rt.attrib3.tile_mode)); + dst.Add(String::FromPrintf("\t attrib3.dimension = 0x%08" PRIx32 "\n", rt.attrib3.dimension)); + dst.Add(String::FromPrintf("\t attrib3.cmask_pipe_aligned = %s\n", rt.attrib3.cmask_pipe_aligned ? "true" : "false")); + dst.Add(String::FromPrintf("\t attrib3.dcc_pipe_aligned = %s\n", rt.attrib3.dcc_pipe_aligned ? "true" : "false")); dst.Add(String::FromPrintf("\t dcc.max_uncompressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.max_uncompressed_block_size)); dst.Add(String::FromPrintf("\t dcc.max_compressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.max_compressed_block_size)); dst.Add(String::FromPrintf("\t dcc.min_compressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.min_compressed_block_size)); dst.Add(String::FromPrintf("\t dcc.color_transform = 0x%08" PRIx32 "\n", rt.dcc.color_transform)); - dst.Add(String::FromPrintf("\t dcc.enable_overwrite_combiner = %s\n", rt.dcc.enable_overwrite_combiner ? "true" : "false")); - dst.Add(String::FromPrintf("\t dcc.force_independent_blocks = %s\n", rt.dcc.force_independent_blocks ? "true" : "false")); + dst.Add(String::FromPrintf("\t dcc.overwrite_combiner_disable = %s\n", rt.dcc.overwrite_combiner_disable ? "true" : "false")); + dst.Add(String::FromPrintf("\t dcc.independent_64b_blocks = %s\n", rt.dcc.independent_64b_blocks ? "true" : "false")); + dst.Add(String::FromPrintf("\t dcc.independent_128b_blocks = %s\n", rt.dcc.independent_128b_blocks ? "true" : "false")); + dst.Add(String::FromPrintf("\t data_write_on_dcc_clear_to_reg = %s\n", rt.dcc.data_write_on_dcc_clear_to_reg ? "true" : "false")); + dst.Add(String::FromPrintf("\t dcc.dcc_clear_key_enable = %s\n", rt.dcc.dcc_clear_key_enable ? "true" : "false")); dst.Add(String::FromPrintf("\t cmask.addr = 0x%016" PRIx64 "\n", rt.cmask.addr)); dst.Add(String::FromPrintf("\t cmask_slice.slice_minus1 = 0x%08" PRIx32 "\n", rt.cmask_slice.slice_minus1)); dst.Add(String::FromPrintf("\t fmask.addr = 0x%016" PRIx64 "\n", rt.fmask.addr)); @@ -536,13 +572,21 @@ static void rt_check(const HW::RenderTarget& rt) // EXIT_NOT_IMPLEMENTED(rt.slice_div64_minus1 != 0x000086ff); EXIT_NOT_IMPLEMENTED(rt.view.base_array_slice_index != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.view.last_array_slice_index != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.view.current_mip_level != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.info.fmask_compression_enable != false); - EXIT_NOT_IMPLEMENTED(rt.info.fmask_compression_mode != 0x00000000); + + // EXIT_NOT_IMPLEMENTED(rt.info.fmask_compression_mode != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.info.fmask_data_compression_disable != false); + EXIT_NOT_IMPLEMENTED(rt.info.fmask_one_frag_mode != false); + EXIT_NOT_IMPLEMENTED(rt.info.cmask_fast_clear_enable != false); EXIT_NOT_IMPLEMENTED(rt.info.dcc_compression_enable != false); EXIT_NOT_IMPLEMENTED(!render_to_texture && rt.info.neo_mode != Config::IsNeo()); EXIT_NOT_IMPLEMENTED(rt.info.cmask_tile_mode != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.info.cmask_tile_mode_neo != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.info.blend_bypass != false); + // EXIT_NOT_IMPLEMENTED(rt.info.blend_clamp != false); + EXIT_NOT_IMPLEMENTED(rt.info.round_mode != false); // EXIT_NOT_IMPLEMENTED(rt.format != 0x0000000a); // EXIT_NOT_IMPLEMENTED(rt.channel_type != 0x00000006); // EXIT_NOT_IMPLEMENTED(rt.channel_order != 0x00000001); @@ -551,12 +595,23 @@ static void rt_check(const HW::RenderTarget& rt) // EXIT_NOT_IMPLEMENTED(rt.fmask_tile_mode != 0x0000000a); EXIT_NOT_IMPLEMENTED(rt.attrib.num_samples != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.attrib.num_fragments != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.width != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.height != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.num_mip_levels != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.depth != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.tile_mode != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.dimension != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.cmask_pipe_aligned != false); + EXIT_NOT_IMPLEMENTED(rt.attrib3.dcc_pipe_aligned != false); // EXIT_NOT_IMPLEMENTED(rt.dcc_max_uncompressed_block_size != 0x00000002); // EXIT_NOT_IMPLEMENTED(rt.dcc.max_compressed_block_size != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.dcc.min_compressed_block_size != 0x00000000); // EXIT_NOT_IMPLEMENTED(rt.dcc.color_transform != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.dcc.enable_overwrite_combiner != false); + EXIT_NOT_IMPLEMENTED(rt.dcc.overwrite_combiner_disable != false); // EXIT_NOT_IMPLEMENTED(rt.dcc.force_independent_blocks != false); + // EXIT_NOT_IMPLEMENTED(rt.dcc.independent_128b_blocks != false); + // EXIT_NOT_IMPLEMENTED(rt.dcc.data_write_on_dcc_clear_to_reg != false); + EXIT_NOT_IMPLEMENTED(rt.dcc.dcc_clear_key_enable != false); EXIT_NOT_IMPLEMENTED(rt.cmask.addr != 0x0000000000000000); EXIT_NOT_IMPLEMENTED(rt.cmask_slice.slice_minus1 != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.fmask.addr != 0x0000000000000000); @@ -579,11 +634,17 @@ static void z_print(const char* func, const HW::DepthRenderTarget& z) printf("\t z_info.tile_surface_enable = %s\n", z.z_info.tile_surface_enable ? "true" : "false"); printf("\t z_info.expclear_enabled = %s\n", z.z_info.expclear_enabled ? "true" : "false"); printf("\t z_info.zrange_precision = 0x%08" PRIx32 "\n", z.z_info.zrange_precision); + printf("\t z_info.embedded_sample_locations = %s\n", z.z_info.embedded_sample_locations ? "true" : "false"); + printf("\t z_info.partially_resident = %s\n", z.z_info.partially_resident ? "true" : "false"); + printf("\t z_info.num_mip_levels = 0x%02" PRIx8 "\n", z.z_info.num_mip_levels); + printf("\t z_info.plane_compression = 0x%02" PRIx8 "\n", z.z_info.plane_compression); printf("\t stencil_info.format = 0x%08" PRIx32 "\n", z.stencil_info.format); printf("\t stencil_info.tile_stencil_disable = %s\n", z.stencil_info.tile_stencil_disable ? "true" : "false"); printf("\t stencil_info.expclear_enabled = %s\n", z.stencil_info.expclear_enabled ? "true" : "false"); printf("\t stencil_info.tile_mode_index = 0x%08" PRIx32 "\n", z.stencil_info.tile_mode_index); printf("\t stencil_info.tile_split = 0x%08" PRIx32 "\n", z.stencil_info.tile_split); + printf("\t stencil_info.texture_compatible_stencil = %s\n", z.stencil_info.texture_compatible_stencil ? "true" : "false"); + printf("\t stencil_info.partially_resident = %s\n", z.stencil_info.partially_resident ? "true" : "false"); printf("\t depth_info.addr5_swizzle_mask = 0x%08" PRIx32 "\n", z.depth_info.addr5_swizzle_mask); printf("\t depth_info.array_mode = 0x%08" PRIx32 "\n", z.depth_info.array_mode); printf("\t depth_info.pipe_config = 0x%08" PRIx32 "\n", z.depth_info.pipe_config); @@ -593,6 +654,9 @@ static void z_print(const char* func, const HW::DepthRenderTarget& z) printf("\t depth_info.num_banks = 0x%08" PRIx32 "\n", z.depth_info.num_banks); printf("\t depth_view.slice_start = 0x%08" PRIx32 "\n", z.depth_view.slice_start); printf("\t depth_view.slice_max = 0x%08" PRIx32 "\n", z.depth_view.slice_max); + printf("\t depth_view.current_mip_level = 0x%02" PRIx8 "\n", z.depth_view.current_mip_level); + printf("\t depth_view.depth_write_disable = %s\n", z.depth_view.depth_write_disable ? "true" : "false"); + printf("\t depth_view.stencil_write_disable = %s\n", z.depth_view.stencil_write_disable ? "true" : "false"); printf("\t htile_surface.linear = 0x%08" PRIx32 "\n", z.htile_surface.linear); printf("\t htile_surface.full_cache = 0x%08" PRIx32 "\n", z.htile_surface.full_cache); printf("\t htile_surface.htile_uses_preload_win = 0x%08" PRIx32 "\n", z.htile_surface.htile_uses_preload_win); @@ -610,6 +674,8 @@ static void z_print(const char* func, const HW::DepthRenderTarget& z) printf("\t htile_data_base_addr = 0x%016" PRIx64 "\n", z.htile_data_base_addr); printf("\t width = 0x%08" PRIx32 "\n", z.width); printf("\t height = 0x%08" PRIx32 "\n", z.height); + printf("\t size.x_max = 0x%04" PRIx16 "\n", z.size.x_max); + printf("\t size.y_max = 0x%04" PRIx16 "\n", z.size.y_max); } // NOLINTNEXTLINE(readability-function-cognitive-complexity) @@ -623,6 +689,10 @@ static void z_check(const HW::DepthRenderTarget& z) EXIT_NOT_IMPLEMENTED(z.z_info.tile_surface_enable != false); EXIT_NOT_IMPLEMENTED(z.z_info.expclear_enabled != false); EXIT_NOT_IMPLEMENTED(z.z_info.zrange_precision != 0); + EXIT_NOT_IMPLEMENTED(z.z_info.embedded_sample_locations != false); + EXIT_NOT_IMPLEMENTED(z.z_info.partially_resident != false); + EXIT_NOT_IMPLEMENTED(z.z_info.num_mip_levels != 0); + EXIT_NOT_IMPLEMENTED(z.z_info.plane_compression != 0); } else { EXIT_NOT_IMPLEMENTED(z.z_info.format != 0x00000003); @@ -631,22 +701,30 @@ static void z_check(const HW::DepthRenderTarget& z) // EXIT_NOT_IMPLEMENTED(z.z_info.tile_surface_enable != true); EXIT_NOT_IMPLEMENTED(z.z_info.expclear_enabled != false); EXIT_NOT_IMPLEMENTED(z.z_info.zrange_precision != 0x00000001); + EXIT_NOT_IMPLEMENTED(z.z_info.embedded_sample_locations != false); + EXIT_NOT_IMPLEMENTED(z.z_info.partially_resident != false); + EXIT_NOT_IMPLEMENTED(z.z_info.num_mip_levels != 0); + EXIT_NOT_IMPLEMENTED(z.z_info.plane_compression != 0); } if (z.stencil_info.format == 0) { - EXIT_NOT_IMPLEMENTED(z.stencil_info.format != 0); - // EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_stencil_disable != false); + // EXIT_NOT_IMPLEMENTED(z.stencil_info.format != 0); + // EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_stencil_disable != false); EXIT_NOT_IMPLEMENTED(z.stencil_info.expclear_enabled != false); // EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_mode_index != 0); // EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_split != 0); + // EXIT_NOT_IMPLEMENTED(z.stencil_info.texture_compatible_stencil != true); + EXIT_NOT_IMPLEMENTED(z.stencil_info.partially_resident != false); } else { - EXIT_NOT_IMPLEMENTED(z.stencil_info.format != 0x00000001); + // EXIT_NOT_IMPLEMENTED(z.stencil_info.format != 0x00000001); EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_stencil_disable != true); EXIT_NOT_IMPLEMENTED(z.stencil_info.expclear_enabled != false); // EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_mode_index != (Config::IsNeo() ? 0x00000002 : 0)); // EXIT_NOT_IMPLEMENTED(z.stencil_info.tile_split != (Config::IsNeo() ? 0x00000002 : 0)); + // EXIT_NOT_IMPLEMENTED(z.stencil_info.texture_compatible_stencil != true); + EXIT_NOT_IMPLEMENTED(z.stencil_info.partially_resident != false); } if (z.z_info.format != 0 || z.stencil_info.format != 0) @@ -660,6 +738,9 @@ static void z_check(const HW::DepthRenderTarget& z) // EXIT_NOT_IMPLEMENTED(z.depth_info.num_banks != (Config::IsNeo() ? 0x00000002 : 3)); EXIT_NOT_IMPLEMENTED(z.depth_view.slice_start != 0x00000000); EXIT_NOT_IMPLEMENTED(z.depth_view.slice_max != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_view.current_mip_level != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_view.depth_write_disable != false); + EXIT_NOT_IMPLEMENTED(z.depth_view.stencil_write_disable != false); EXIT_NOT_IMPLEMENTED(z.htile_surface.linear != 0x00000000); EXIT_NOT_IMPLEMENTED(z.htile_surface.full_cache != 0x00000000); EXIT_NOT_IMPLEMENTED(z.htile_surface.htile_uses_preload_win != 0x00000000); @@ -677,6 +758,8 @@ static void z_check(const HW::DepthRenderTarget& z) // EXIT_NOT_IMPLEMENTED(z.htile_data_base_addr == 0); // EXIT_NOT_IMPLEMENTED(z.width != 0x00000780); // EXIT_NOT_IMPLEMENTED(z.height != 0x00000438); + EXIT_NOT_IMPLEMENTED(z.size.x_max != 0); + EXIT_NOT_IMPLEMENTED(z.size.y_max != 0); } } @@ -820,6 +903,8 @@ static void d_print(const char* func, const HW::DepthControl& c, const HW::Stenc printf("\t backface_enable = %s\n", c.backface_enable ? "true" : "false"); printf("\t stencilfunc = %" PRIu8 "\n", c.stencilfunc); printf("\t stencilfunc_bf = %" PRIu8 "\n", c.stencilfunc_bf); + printf("\t color_writes_on_depth_fail_enable = %s\n", c.color_writes_on_depth_fail_enable ? "true" : "false"); + printf("\t color_writes_on_depth_pass_disable = %s\n", c.color_writes_on_depth_pass_disable ? "true" : "false"); printf("\t stencil_fail = %" PRIu8 "\n", s.stencil_fail); printf("\t stencil_zpass = %" PRIu8 "\n", s.stencil_zpass); printf("\t stencil_zfail = %" PRIu8 "\n", s.stencil_zfail); @@ -846,6 +931,8 @@ static void d_check(const HW::DepthControl& c, const HW::StencilControl& s, cons EXIT_NOT_IMPLEMENTED(c.backface_enable != false); // EXIT_NOT_IMPLEMENTED(c.stencilfunc != 0); // EXIT_NOT_IMPLEMENTED(c.stencilfunc_bf != 0); + EXIT_NOT_IMPLEMENTED(c.color_writes_on_depth_fail_enable != false); + EXIT_NOT_IMPLEMENTED(c.color_writes_on_depth_pass_disable != false); // EXIT_NOT_IMPLEMENTED(s.stencil_fail != 0); // EXIT_NOT_IMPLEMENTED(s.stencil_zpass != 0); // EXIT_NOT_IMPLEMENTED(s.stencil_zfail != 0); @@ -3736,7 +3823,9 @@ static void FindRenderDepthInfo(uint64_t submit_id, CommandBuffer* /*buffer*/, c if (r->format != VK_FORMAT_UNDEFINED) { - DepthStencilBufferObject vulkan_buffer_info(r->format, r->width, r->height, htile, neo); + bool sampled = ((z.stencil_info.format == 0 && z.z_info.tile_mode_index != 0) || z.stencil_info.texture_compatible_stencil); + + DepthStencilBufferObject vulkan_buffer_info(r->format, r->width, r->height, htile, neo, sampled); EXIT_NOT_IMPLEMENTED(z.z_info.tile_mode_index != 0 && r->depth_tile_swizzle != 0); EXIT_NOT_IMPLEMENTED(r->stencil_tile_swizzle != 0); @@ -4321,6 +4410,26 @@ static void SetDynamicParams(VkCommandBuffer vk_buffer, VulkanPipeline* pipeline } } +static bool shader_is_disabled(HW::Shader* sh_ctx) +{ + if (const auto& vs = sh_ctx->GetVs(); + !vs.vs_embedded && ((vs.vs_regs.data_addr != 0 && ShaderIsDisabled(vs.vs_regs.data_addr)) || + (vs.vs_regs.data_addr == 0 && vs.gs_regs.data_addr == 0 && vs.es_regs.data_addr != 0 && + vs.gs_regs.chksum != 0 && ShaderIsDisabled2(vs.es_regs.data_addr, vs.gs_regs.chksum)))) + { + return true; + } + + if (const auto& ps = sh_ctx->GetPs(); + !ps.ps_embedded && ((ps.ps_regs.chksum == 0 && ShaderIsDisabled(ps.ps_regs.data_addr)) || + (ps.ps_regs.chksum != 0 && ShaderIsDisabled2(ps.ps_regs.data_addr, ps.ps_regs.chksum)))) + { + return true; + } + + return false; +} + void GraphicsRenderDrawIndex(uint64_t submit_id, CommandBuffer* buffer, HW::Context* ctx, HW::UserConfig* ucfg, HW::Shader* sh_ctx, uint32_t index_type_and_size, uint32_t index_count, const void* index_addr, uint32_t flags, uint32_t type) { @@ -4334,12 +4443,7 @@ void GraphicsRenderDrawIndex(uint64_t submit_id, CommandBuffer* buffer, HW::Cont Core::LockGuard lock(g_render_ctx->GetMutex()); - if (const auto& vs = sh_ctx->GetVs(); !vs.vs_embedded && ShaderIsDisabled(vs.vs_regs.GetGpuAddress())) - { - return; - } - - if (const auto& ps = sh_ctx->GetPs(); !ps.ps_embedded && ShaderIsDisabled(ps.ps_regs.data_addr)) + if (shader_is_disabled(sh_ctx)) { return; } @@ -4482,12 +4586,7 @@ void GraphicsRenderDrawIndexAuto(uint64_t submit_id, CommandBuffer* buffer, HW:: Core::LockGuard lock(g_render_ctx->GetMutex()); - if (const auto& vs = sh_ctx->GetVs(); !vs.vs_embedded && ShaderIsDisabled(vs.vs_regs.GetGpuAddress())) - { - return; - } - - if (const auto& ps = sh_ctx->GetPs(); !ps.ps_embedded && ShaderIsDisabled(ps.ps_regs.data_addr)) + if (shader_is_disabled(sh_ctx)) { return; } diff --git a/source/emulator/src/Graphics/GraphicsRun.cpp b/source/emulator/src/Graphics/GraphicsRun.cpp index 7407760..a9689b2 100644 --- a/source/emulator/src/Graphics/GraphicsRun.cpp +++ b/source/emulator/src/Graphics/GraphicsRun.cpp @@ -29,6 +29,10 @@ #define KYTY_HW_SH_PARSER_ARGS \ [[maybe_unused]] CommandProcessor *cp, uint32_t cmd_id, [[maybe_unused]] uint32_t cmd_offset, const uint32_t *buffer, \ [[maybe_unused]] uint32_t dw +#define KYTY_HW_CTX_INDIRECT_ARGS CommandProcessor *cp, [[maybe_unused]] uint32_t cmd_offset, uint32_t value +#define KYTY_HW_UC_INDIRECT_ARGS CommandProcessor *cp, [[maybe_unused]] uint32_t cmd_offset, uint32_t value +#define KYTY_HW_SH_INDIRECT_ARGS CommandProcessor *cp, [[maybe_unused]] uint32_t cmd_offset, uint32_t value + #define KYTY_HW_CTX_PARSER(f) static uint32_t f(KYTY_HW_CTX_PARSER_ARGS) #define KYTY_HW_UC_PARSER(f) static uint32_t f(KYTY_HW_UC_PARSER_ARGS) #define KYTY_HW_SH_PARSER(f) static uint32_t f(KYTY_HW_SH_PARSER_ARGS) @@ -111,8 +115,9 @@ public: void WriteConstRam(uint32_t offset, const uint32_t* src, uint32_t dw_num); void DumpConstRam(uint32_t* dst, uint32_t offset, uint32_t dw_num); - void WaitRegMem(uint32_t func, bool me, bool mem, const uint32_t* addr, uint32_t ref, uint32_t mask, uint32_t poll); - void WriteData(uint32_t* dst, const uint32_t* src, uint32_t dw_num, uint32_t write_control); + void WaitRegMem32(uint32_t func, const uint32_t* addr, uint32_t ref, uint32_t mask, uint32_t poll); + void WaitRegMem64(uint32_t func, const uint64_t* addr, uint64_t ref, uint64_t mask, uint32_t poll); + void WriteData(uint32_t* dst, const uint32_t* src, uint32_t dw_num, uint32_t write_control, bool custom); void Run(uint32_t* data, uint32_t num_dw); @@ -307,17 +312,23 @@ private: int m_done_num = 0; }; -using hw_ctx_parser_func_t = uint32_t (*)(KYTY_HW_CTX_PARSER_ARGS); -using hw_uc_parser_func_t = uint32_t (*)(KYTY_HW_UC_PARSER_ARGS); -using hw_sh_parser_func_t = uint32_t (*)(KYTY_HW_SH_PARSER_ARGS); -using cp_op_parser_func_t = uint32_t (*)(KYTY_CP_OP_PARSER_ARGS); +using hw_ctx_parser_func_t = uint32_t (*)(KYTY_HW_CTX_PARSER_ARGS); +using hw_uc_parser_func_t = uint32_t (*)(KYTY_HW_UC_PARSER_ARGS); +using hw_sh_parser_func_t = uint32_t (*)(KYTY_HW_SH_PARSER_ARGS); +using cp_op_parser_func_t = uint32_t (*)(KYTY_CP_OP_PARSER_ARGS); +using hw_ctx_indirect_func_t = void (*)(KYTY_HW_CTX_INDIRECT_ARGS); +using hw_uc_indirect_func_t = void (*)(KYTY_HW_UC_INDIRECT_ARGS); +using hw_sh_indirect_func_t = void (*)(KYTY_HW_SH_INDIRECT_ARGS); -static hw_ctx_parser_func_t g_hw_ctx_func[Pm4::CX_NUM] = {}; -static hw_sh_parser_func_t g_hw_sh_func[Pm4::SH_NUM] = {}; -static hw_sh_parser_func_t g_hw_uc_func[Pm4::UC_NUM] = {}; -static hw_sh_parser_func_t g_hw_sh_custom_func[Pm4::R_NUM] = {}; -static cp_op_parser_func_t g_cp_op_func[256] = {}; -static cp_op_parser_func_t g_cp_op_custom_func[Pm4::R_NUM] = {}; +static hw_ctx_parser_func_t g_hw_ctx_func[Pm4::CX_NUM] = {}; +static hw_ctx_indirect_func_t g_hw_ctx_indirect_func[Pm4::CX_NUM] = {}; +static hw_sh_parser_func_t g_hw_sh_func[Pm4::SH_NUM] = {}; +static hw_sh_indirect_func_t g_hw_sh_indirect_func[Pm4::SH_NUM] = {}; +static hw_uc_parser_func_t g_hw_uc_func[Pm4::UC_NUM] = {}; +static hw_uc_indirect_func_t g_hw_uc_indirect_func[Pm4::UC_NUM] = {}; +static hw_sh_parser_func_t g_hw_sh_custom_func[Pm4::R_NUM] = {}; +static cp_op_parser_func_t g_cp_op_func[256] = {}; +static cp_op_parser_func_t g_cp_op_custom_func[Pm4::R_NUM] = {}; static Gpu* g_gpu = nullptr; @@ -650,11 +661,9 @@ void CommandProcessor::DumpConstRam(uint32_t* dst, uint32_t offset, uint32_t dw_ GraphicsRenderMemoryFlush(reinterpret_cast(dst), static_cast(dw_num) * 4); } -void CommandProcessor::WaitRegMem(uint32_t func, bool me, bool mem, const uint32_t* addr, uint32_t ref, uint32_t mask, uint32_t poll) +void CommandProcessor::WaitRegMem32(uint32_t func, const uint32_t* addr, uint32_t ref, uint32_t mask, uint32_t poll) { EXIT_NOT_IMPLEMENTED(func != 3); - EXIT_NOT_IMPLEMENTED(!me); - EXIT_NOT_IMPLEMENTED(!mem); EXIT_NOT_IMPLEMENTED(poll != 10); BufferFlush(); @@ -665,11 +674,25 @@ void CommandProcessor::WaitRegMem(uint32_t func, bool me, bool mem, const uint32 } } -void CommandProcessor::WriteData(uint32_t* dst, const uint32_t* src, uint32_t dw_num, uint32_t write_control) +void CommandProcessor::WaitRegMem64(uint32_t func, const uint64_t* addr, uint64_t ref, uint64_t mask, uint32_t poll) +{ + EXIT_NOT_IMPLEMENTED(func != 3); + EXIT_NOT_IMPLEMENTED(poll != 10); + + BufferFlush(); + + while (((*addr) & mask) != ref) + { + Core::Thread::SleepMicro(10); + } +} + +void CommandProcessor::WriteData(uint32_t* dst, const uint32_t* src, uint32_t dw_num, uint32_t write_control, bool custom) { Core::LockGuard lock(m_mutex); - EXIT_NOT_IMPLEMENTED(write_control != 0x04100500); + EXIT_NOT_IMPLEMENTED(!custom && write_control != 0x04100500); + EXIT_NOT_IMPLEMENTED(custom && write_control != 0x01000004); GpuMemoryCheckAccessViolation(reinterpret_cast(dst), static_cast(dw_num) * 4); @@ -1152,7 +1175,7 @@ void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_ { GraphicsRenderWriteAtEndOfPipe32(m_sumbit_id, m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); } else if (((eop_event_type == 0x04 && event_index == 0x05) || (eop_event_type == 0x28 && event_index == 0x05) || - (eop_event_type == 0x2f && event_index == 0x06)) && + (eop_event_type == 0x2f && event_index == 0x06) || (eop_event_type == 0x14 && event_index == 0x00)) && cache_action == 0x38 && source64 && !with_interrupt) { GraphicsRenderWriteAtEndOfPipeWithWriteBack64(m_sumbit_id, m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); @@ -1216,8 +1239,11 @@ void CommandProcessor::TriggerEvent(uint32_t event_type, uint32_t event_index) // FlushAndInvalidateCbPixelData MemoryBarrier(); } else if ((event_type == 0x0000002c) && event_index == 0x00000007) + { // NOLINT + // FlushAndInvalidateDbMeta + } else if ((event_type == 0x00000010) && event_index == 0x00000000) { - // FlushAndInvalidateDbMeta + // PsPartialFlush } else { EXIT("unknown event type: 0x%08" PRIx32 ", 0x%08" PRIx32 "\n", event_type, event_index); @@ -1528,16 +1554,32 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_color_info) HW::ColorInfo r; - r.fmask_compression_enable = (buffer[4] & 0x4000u) != 0; - r.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u; - r.cmask_fast_clear_enable = (buffer[4] & 0x2000u) != 0; - r.dcc_compression_enable = (buffer[4] & 0x10000000u) != 0; - r.neo_mode = (buffer[4] & 0x80000000u) != 0; - r.cmask_tile_mode = (buffer[4] >> 19u) & 0x1u; - r.cmask_tile_mode_neo = (buffer[4] >> 29u) & 0x3u; - r.format = (buffer[4] >> 2u) & 0x1fu; - r.channel_type = (buffer[4] >> 8u) & 0x7u; - r.channel_order = (buffer[4] >> 11u) & 0x3u; + // r.fmask_compression_enable = (buffer[4] & 0x4000u) != 0; + // // r.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u; + // r.fmask_data_compression_disable = ((buffer[4] >> 26u) & 0x1u) != 0; + // r.fmask_one_frag_mode = ((buffer[4] >> 27u) & 0x1u) != 0; + // r.cmask_fast_clear_enable = (buffer[4] & 0x2000u) != 0; + // r.dcc_compression_enable = (buffer[4] & 0x10000000u) != 0; + // r.neo_mode = (buffer[4] & 0x80000000u) != 0; + // r.cmask_tile_mode = (buffer[4] >> 19u) & 0x1u; + // r.cmask_tile_mode_neo = (buffer[4] >> 29u) & 0x3u; + // r.format = (buffer[4] >> 2u) & 0x1fu; + // r.channel_type = (buffer[4] >> 8u) & 0x7u; + // r.channel_order = (buffer[4] >> 11u) & 0x3u; + r.format = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, FORMAT); + r.channel_type = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, NUMBER_TYPE); + r.channel_order = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, COMP_SWAP); + r.cmask_fast_clear_enable = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, FAST_CLEAR) != 0; + r.fmask_compression_enable = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, COMPRESSION) != 0; + r.blend_clamp = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, BLEND_CLAMP) != 0; + r.blend_bypass = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, BLEND_BYPASS) != 0; + r.round_mode = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, ROUND_MODE) != 0; + r.cmask_tile_mode = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, CMASK_IS_LINEAR); + r.fmask_data_compression_disable = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, FMASK_COMPRESSION_DISABLE) != 0; + r.fmask_one_frag_mode = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, FMASK_COMPRESS_1FRAG_ONLY) != 0; + r.dcc_compression_enable = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, DCC_ENABLE) != 0; + r.cmask_tile_mode_neo = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, CMASK_ADDR_TYPE); + r.neo_mode = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, ALT_TILE_MODE) != 0; cp->GetCtx()->SetColorInfo(param, r); @@ -1561,14 +1603,16 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_depth_control) HW::DepthControl r; - r.stencil_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCIL_ENABLE) != 0; - r.z_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, Z_ENABLE) != 0; - r.z_write_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, Z_WRITE_ENABLE) != 0; - r.depth_bounds_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, DEPTH_BOUNDS_ENABLE) != 0; - r.zfunc = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, ZFUNC); - r.backface_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, BACKFACE_ENABLE) != 0; - r.stencilfunc = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCILFUNC); - r.stencilfunc_bf = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCILFUNC_BF); + r.stencil_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCIL_ENABLE) != 0; + r.z_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, Z_ENABLE) != 0; + r.z_write_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, Z_WRITE_ENABLE) != 0; + r.depth_bounds_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, DEPTH_BOUNDS_ENABLE) != 0; + r.zfunc = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, ZFUNC); + r.backface_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, BACKFACE_ENABLE) != 0; + r.stencilfunc = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCILFUNC); + r.stencilfunc_bf = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, STENCILFUNC_BF); + r.color_writes_on_depth_fail_enable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, ENABLE_COLOR_WRITES_ON_DEPTH_FAIL) != 0; + r.color_writes_on_depth_pass_disable = KYTY_PM4_GET(buffer[0], DB_DEPTH_CONTROL, DISABLE_COLOR_WRITES_ON_DEPTH_PASS) != 0; cp->GetCtx()->SetDepthControl(r); @@ -1583,17 +1627,27 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_depth_render_target) if (cmd_id == 0xC0016900) { - HW::DepthRenderTargetZInfo r; + HW::DepthZInfo r; - r.expclear_enabled = (buffer[0] & 0x08000000u) != 0; + // r.expclear_enabled = (buffer[0] & 0x08000000u) != 0; + // r.format = (buffer[0] >> Pm4::DB_Z_INFO_FORMAT_SHIFT) & Pm4::DB_Z_INFO_FORMAT_MASK; + // r.num_samples = (buffer[0] >> Pm4::DB_Z_INFO_NUM_SAMPLES_SHIFT) & Pm4::DB_Z_INFO_NUM_SAMPLES_MASK; + // r.tile_mode_index = (buffer[0] >> Pm4::DB_Z_INFO_TILE_MODE_INDEX_SHIFT) & Pm4::DB_Z_INFO_TILE_MODE_INDEX_MASK; + // r.tile_surface_enable = ((buffer[0] >> Pm4::DB_Z_INFO_TILE_SURFACE_ENABLE_SHIFT) & Pm4::DB_Z_INFO_TILE_SURFACE_ENABLE_MASK) !=0 + // r.zrange_precision = (buffer[0] >> Pm4::DB_Z_INFO_ZRANGE_PRECISION_SHIFT) & Pm4::DB_Z_INFO_ZRANGE_PRECISION_MASK; - r.format = (buffer[0] >> Pm4::DB_Z_INFO_FORMAT_SHIFT) & Pm4::DB_Z_INFO_FORMAT_MASK; - r.num_samples = (buffer[0] >> Pm4::DB_Z_INFO_NUM_SAMPLES_SHIFT) & Pm4::DB_Z_INFO_NUM_SAMPLES_MASK; - r.tile_mode_index = (buffer[0] >> Pm4::DB_Z_INFO_TILE_MODE_INDEX_SHIFT) & Pm4::DB_Z_INFO_TILE_MODE_INDEX_MASK; - r.tile_surface_enable = ((buffer[0] >> Pm4::DB_Z_INFO_TILE_SURFACE_ENABLE_SHIFT) & Pm4::DB_Z_INFO_TILE_SURFACE_ENABLE_MASK) != 0; - r.zrange_precision = (buffer[0] >> Pm4::DB_Z_INFO_ZRANGE_PRECISION_SHIFT) & Pm4::DB_Z_INFO_ZRANGE_PRECISION_MASK; + r.format = KYTY_PM4_GET(buffer[0], DB_Z_INFO, FORMAT); + r.num_samples = KYTY_PM4_GET(buffer[0], DB_Z_INFO, NUM_SAMPLES); + r.embedded_sample_locations = KYTY_PM4_GET(buffer[0], DB_Z_INFO, ITERATE_FLUSH) != 0; + r.partially_resident = KYTY_PM4_GET(buffer[0], DB_Z_INFO, PARTIALLY_RESIDENT) != 0; + r.num_mip_levels = KYTY_PM4_GET(buffer[0], DB_Z_INFO, MAXMIP); + r.tile_mode_index = KYTY_PM4_GET(buffer[0], DB_Z_INFO, TILE_MODE_INDEX); + r.plane_compression = KYTY_PM4_GET(buffer[0], DB_Z_INFO, DECOMPRESS_ON_N_ZPLANES); + r.expclear_enabled = KYTY_PM4_GET(buffer[0], DB_Z_INFO, ALLOW_EXPCLEAR) != 0; + r.tile_surface_enable = KYTY_PM4_GET(buffer[0], DB_Z_INFO, TILE_SURFACE_ENABLE) != 0; + r.zrange_precision = KYTY_PM4_GET(buffer[0], DB_Z_INFO, ZRANGE_PRECISION); - cp->GetCtx()->SetDepthRenderTargetZInfo(r); + cp->GetCtx()->SetDepthZInfo(r); } else if (cmd_id == 0xC0086900) { if (dw >= 22 && buffer[8] == 0xC0016900 && buffer[9] == Pm4::DB_DEPTH_INFO && buffer[11] == 0xC0016900 && @@ -1604,26 +1658,48 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_depth_render_target) HW::DepthRenderTarget z; - z.z_info.expclear_enabled = (buffer[0] & 0x08000000u) != 0; - z.z_info.format = (buffer[0] >> Pm4::DB_Z_INFO_FORMAT_SHIFT) & Pm4::DB_Z_INFO_FORMAT_MASK; - z.z_info.num_samples = (buffer[0] >> Pm4::DB_Z_INFO_NUM_SAMPLES_SHIFT) & Pm4::DB_Z_INFO_NUM_SAMPLES_MASK; - z.z_info.tile_mode_index = (buffer[0] >> Pm4::DB_Z_INFO_TILE_MODE_INDEX_SHIFT) & Pm4::DB_Z_INFO_TILE_MODE_INDEX_MASK; - z.z_info.tile_surface_enable = KYTY_PM4_GET(buffer[0], DB_Z_INFO, TILE_SURFACE_ENABLE) != 0; - z.z_info.zrange_precision = (buffer[0] >> Pm4::DB_Z_INFO_ZRANGE_PRECISION_SHIFT) & Pm4::DB_Z_INFO_ZRANGE_PRECISION_MASK; + // z.z_info.expclear_enabled = (buffer[0] & 0x08000000u) != 0; + // z.z_info.format = (buffer[0] >> Pm4::DB_Z_INFO_FORMAT_SHIFT) & Pm4::DB_Z_INFO_FORMAT_MASK; + // z.z_info.num_samples = (buffer[0] >> Pm4::DB_Z_INFO_NUM_SAMPLES_SHIFT) & Pm4::DB_Z_INFO_NUM_SAMPLES_MASK; + // z.z_info.tile_mode_index = (buffer[0] >> Pm4::DB_Z_INFO_TILE_MODE_INDEX_SHIFT) & + // Pm4::DB_Z_INFO_TILE_MODE_INDEX_MASK; z.z_info.tile_surface_enable = KYTY_PM4_GET(buffer[0], DB_Z_INFO, + // TILE_SURFACE_ENABLE) != 0; z.z_info.zrange_precision = (buffer[0] >> Pm4::DB_Z_INFO_ZRANGE_PRECISION_SHIFT) & + // Pm4::DB_Z_INFO_ZRANGE_PRECISION_MASK; - z.stencil_info.expclear_enabled = (buffer[1] & 0x08000000u) != 0; - z.stencil_info.tile_split = (buffer[1] >> 13u) & 0x7u; - z.stencil_info.format = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, FORMAT); - z.stencil_info.tile_mode_index = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_MODE_INDEX); - z.stencil_info.tile_stencil_disable = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_STENCIL_DISABLE); + z.z_info.format = KYTY_PM4_GET(buffer[0], DB_Z_INFO, FORMAT); + z.z_info.num_samples = KYTY_PM4_GET(buffer[0], DB_Z_INFO, NUM_SAMPLES); + z.z_info.embedded_sample_locations = KYTY_PM4_GET(buffer[0], DB_Z_INFO, ITERATE_FLUSH) != 0; + z.z_info.partially_resident = KYTY_PM4_GET(buffer[0], DB_Z_INFO, PARTIALLY_RESIDENT) != 0; + z.z_info.num_mip_levels = KYTY_PM4_GET(buffer[0], DB_Z_INFO, MAXMIP); + z.z_info.tile_mode_index = KYTY_PM4_GET(buffer[0], DB_Z_INFO, TILE_MODE_INDEX); + z.z_info.plane_compression = KYTY_PM4_GET(buffer[0], DB_Z_INFO, DECOMPRESS_ON_N_ZPLANES); + z.z_info.expclear_enabled = KYTY_PM4_GET(buffer[0], DB_Z_INFO, ALLOW_EXPCLEAR) != 0; + z.z_info.tile_surface_enable = KYTY_PM4_GET(buffer[0], DB_Z_INFO, TILE_SURFACE_ENABLE) != 0; + z.z_info.zrange_precision = KYTY_PM4_GET(buffer[0], DB_Z_INFO, ZRANGE_PRECISION); + + // z.stencil_info.expclear_enabled = (buffer[1] & 0x08000000u) != 0; + // z.stencil_info.tile_split = (buffer[1] >> 13u) & 0x7u; + // z.stencil_info.format = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, FORMAT); + // z.stencil_info.tile_mode_index = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_MODE_INDEX); + // z.stencil_info.tile_stencil_disable = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_STENCIL_DISABLE); + z.stencil_info.format = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, FORMAT); + z.stencil_info.texture_compatible_stencil = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, ITERATE_FLUSH) != 0; + z.stencil_info.partially_resident = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, PARTIALLY_RESIDENT) != 0; + z.stencil_info.tile_split = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, RESERVED_FIELD_1); + z.stencil_info.tile_mode_index = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_MODE_INDEX); + z.stencil_info.expclear_enabled = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, ALLOW_EXPCLEAR) != 0; + z.stencil_info.tile_stencil_disable = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_STENCIL_DISABLE) != 0; z.z_read_base_addr = static_cast(buffer[2]) << 8u; z.stencil_read_base_addr = static_cast(buffer[3]) << 8u; z.z_write_base_addr = static_cast(buffer[4]) << 8u; z.stencil_write_base_addr = static_cast(buffer[5]) << 8u; + // DB_DEPTH_SIZE z.pitch_div8_minus1 = (buffer[6] >> Pm4::DB_DEPTH_SIZE_PITCH_TILE_MAX_SHIFT) & Pm4::DB_DEPTH_SIZE_PITCH_TILE_MAX_MASK; z.height_div8_minus1 = (buffer[6] >> Pm4::DB_DEPTH_SIZE_HEIGHT_TILE_MAX_SHIFT) & Pm4::DB_DEPTH_SIZE_HEIGHT_TILE_MAX_MASK; + + // DB_DEPTH_SLICE z.slice_div64_minus1 = (buffer[7] >> Pm4::DB_DEPTH_SLICE_SLICE_TILE_MAX_SHIFT) & Pm4::DB_DEPTH_SLICE_SLICE_TILE_MAX_MASK; z.depth_info.addr5_swizzle_mask = KYTY_PM4_GET(buffer[10], DB_DEPTH_INFO, ADDR5_SWIZZLE_MASK); @@ -1634,8 +1710,16 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_depth_render_target) z.depth_info.macro_tile_aspect = KYTY_PM4_GET(buffer[10], DB_DEPTH_INFO, MACRO_TILE_ASPECT); z.depth_info.num_banks = (buffer[10] >> Pm4::DB_DEPTH_INFO_NUM_BANKS_SHIFT) & Pm4::DB_DEPTH_INFO_NUM_BANKS_MASK; - z.depth_view.slice_start = (buffer[13] >> Pm4::DB_DEPTH_VIEW_SLICE_START_SHIFT) & Pm4::DB_DEPTH_VIEW_SLICE_START_MASK; - z.depth_view.slice_max = (buffer[13] >> Pm4::DB_DEPTH_VIEW_SLICE_MAX_SHIFT) & Pm4::DB_DEPTH_VIEW_SLICE_MAX_MASK; + // z.depth_view.slice_start = (buffer[13] >> Pm4::DB_DEPTH_VIEW_SLICE_START_SHIFT) & Pm4::DB_DEPTH_VIEW_SLICE_START_MASK; + // z.depth_view.slice_max = (buffer[13] >> Pm4::DB_DEPTH_VIEW_SLICE_MAX_SHIFT) & Pm4::DB_DEPTH_VIEW_SLICE_MAX_MASK; + + z.depth_view.slice_start = + KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, SLICE_START) + (KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, SLICE_START_HI) << 11u); + z.depth_view.slice_max = + KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, SLICE_MAX) + (KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, SLICE_MAX_HI) << 11u); + z.depth_view.depth_write_disable = KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, Z_READ_ONLY) != 0; + z.depth_view.stencil_write_disable = KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, STENCIL_READ_ONLY) != 0; + z.depth_view.current_mip_level = KYTY_PM4_GET(buffer[13], DB_DEPTH_VIEW, MIPID); z.htile_data_base_addr = static_cast(buffer[16]) << 8u; @@ -1815,7 +1899,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_render_target) { EXIT_NOT_IMPLEMENTED(cmd_id != 0xC00E6900 && cmd_id != 0xC00B6900); - uint32_t param = (cmd_offset - Pm4::CB_COLOR0_BASE) / 15; + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_BASE) / 15; uint32_t count = 11; @@ -1827,55 +1911,96 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_render_target) HW::ColorView view; HW::ColorInfo info; HW::ColorAttrib attrib; - HW::ColorDcc dcc; + HW::ColorDccControl dcc; HW::ColorCmask cmask; HW::ColorCmaskSlice cmask_slice; HW::ColorFmask fmask; HW::ColorFmaskSlice fmask_slice; - base.addr = static_cast(buffer[0]) << 8u; - pitch.pitch_div8_minus1 = buffer[1] & 0x7ffu; - pitch.fmask_pitch_div8_minus1 = (buffer[1] >> 20u) & 0x7ffu; - slice.slice_div64_minus1 = buffer[2] & 0x3fffffu; - view.base_array_slice_index = buffer[3] & 0x7ffu; - view.last_array_slice_index = (buffer[3] >> 13u) & 0x7ffu; - info.fmask_compression_enable = (buffer[4] & 0x4000u) != 0; - info.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u; - info.cmask_fast_clear_enable = (buffer[4] & 0x2000u) != 0; - info.dcc_compression_enable = (buffer[4] & 0x10000000u) != 0; - info.neo_mode = (buffer[4] & 0x80000000u) != 0; - info.cmask_tile_mode = (buffer[4] >> 19u) & 0x1u; - info.cmask_tile_mode_neo = (buffer[4] >> 29u) & 0x3u; - info.format = (buffer[4] >> 2u) & 0x1fu; - info.channel_type = (buffer[4] >> 8u) & 0x7u; - info.channel_order = (buffer[4] >> 11u) & 0x3u; - attrib.force_dest_alpha_to_one = (buffer[5] & 0x20000u) != 0; - attrib.tile_mode = buffer[5] & 0x1fu; - attrib.fmask_tile_mode = (buffer[5] >> 5u) & 0x1fu; - attrib.num_samples = (buffer[5] >> 12u) & 0x7u; - attrib.num_fragments = (buffer[5] >> 15u) & 0x3u; - dcc.max_uncompressed_block_size = (buffer[6] >> 2u) & 0x3u; - dcc.max_compressed_block_size = (buffer[6] >> 5u) & 0x3u; - dcc.min_compressed_block_size = (buffer[6] >> 4u) & 0x1u; - dcc.color_transform = (buffer[6] >> 7u) & 0x3u; - dcc.enable_overwrite_combiner = (buffer[6] & 0x1u) != 0; - dcc.force_independent_blocks = (buffer[6] & 0x200u) != 0; - cmask.addr = static_cast(buffer[7]) << 8u; - cmask_slice.slice_minus1 = buffer[8] & 0x3fffu; - fmask.addr = static_cast(buffer[9]) << 8u; - fmask_slice.slice_minus1 = buffer[10] & 0x3fffffu; + base.addr = static_cast(buffer[0]) << 8u; + pitch.pitch_div8_minus1 = buffer[1] & 0x7ffu; + pitch.fmask_pitch_div8_minus1 = (buffer[1] >> 20u) & 0x7ffu; + slice.slice_div64_minus1 = buffer[2] & 0x3fffffu; - ctx->SetColorBase(param, base); - ctx->SetColorPitch(param, pitch); - ctx->SetColorSlice(param, slice); - ctx->SetColorView(param, view); - ctx->SetColorInfo(param, info); - ctx->SetColorAttrib(param, attrib); - ctx->SetColorDcc(param, dcc); - ctx->SetColorCmask(param, cmask); - ctx->SetColorCmaskSlice(param, cmask_slice); - ctx->SetColorFmask(param, fmask); - ctx->SetColorFmaskSlice(param, fmask_slice); + // view.base_array_slice_index = buffer[3] & 0x7ffu; + // view.last_array_slice_index = (buffer[3] >> 13u) & 0x7ffu; + view.base_array_slice_index = KYTY_PM4_GET(buffer[3], CB_COLOR0_VIEW, SLICE_START); + view.last_array_slice_index = KYTY_PM4_GET(buffer[3], CB_COLOR0_VIEW, SLICE_MAX); + view.current_mip_level = KYTY_PM4_GET(buffer[3], CB_COLOR0_VIEW, MIP_LEVEL); + + // info.fmask_compression_enable = (buffer[4] & 0x4000u) != 0; + // + // // info.fmask_compression_mode = (buffer[4] >> 26u) & 0x3u; + // info.fmask_data_compression_disable = ((buffer[4] >> 26u) & 0x1u) != 0; + // info.fmask_one_frag_mode = ((buffer[4] >> 27u) & 0x1u) != 0; + // + // info.cmask_fast_clear_enable = (buffer[4] & 0x2000u) != 0; + // info.dcc_compression_enable = (buffer[4] & 0x10000000u) != 0; + // info.neo_mode = (buffer[4] & 0x80000000u) != 0; + // info.cmask_tile_mode = (buffer[4] >> 19u) & 0x1u; + // info.cmask_tile_mode_neo = (buffer[4] >> 29u) & 0x3u; + // info.format = (buffer[4] >> 2u) & 0x1fu; + // info.channel_type = (buffer[4] >> 8u) & 0x7u; + // info.channel_order = (buffer[4] >> 11u) & 0x3u; + info.format = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, FORMAT); + info.channel_type = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, NUMBER_TYPE); + info.channel_order = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, COMP_SWAP); + info.cmask_fast_clear_enable = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, FAST_CLEAR) != 0; + info.fmask_compression_enable = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, COMPRESSION) != 0; + info.blend_clamp = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, BLEND_CLAMP) != 0; + info.blend_bypass = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, BLEND_BYPASS) != 0; + info.round_mode = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, ROUND_MODE) != 0; + info.cmask_tile_mode = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, CMASK_IS_LINEAR); + info.fmask_data_compression_disable = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, FMASK_COMPRESSION_DISABLE) != 0; + info.fmask_one_frag_mode = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, FMASK_COMPRESS_1FRAG_ONLY) != 0; + info.dcc_compression_enable = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, DCC_ENABLE) != 0; + info.cmask_tile_mode_neo = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, CMASK_ADDR_TYPE); + info.neo_mode = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, ALT_TILE_MODE) != 0; + + // attrib.force_dest_alpha_to_one = (buffer[5] & 0x20000u) != 0; + // attrib.tile_mode = buffer[5] & 0x1fu; + // attrib.fmask_tile_mode = (buffer[5] >> 5u) & 0x1fu; + // attrib.num_samples = (buffer[5] >> 12u) & 0x7u; + // attrib.num_fragments = (buffer[5] >> 15u) & 0x3u; + attrib.force_dest_alpha_to_one = KYTY_PM4_GET(buffer[5], CB_COLOR0_ATTRIB, FORCE_DST_ALPHA_1) != 0; + attrib.tile_mode = KYTY_PM4_GET(buffer[5], CB_COLOR0_ATTRIB, TILE_MODE_INDEX); + attrib.fmask_tile_mode = KYTY_PM4_GET(buffer[5], CB_COLOR0_ATTRIB, FMASK_TILE_MODE_INDEX); + attrib.num_samples = KYTY_PM4_GET(buffer[5], CB_COLOR0_ATTRIB, NUM_SAMPLES); + attrib.num_fragments = KYTY_PM4_GET(buffer[5], CB_COLOR0_ATTRIB, NUM_FRAGMENTS); + + // dcc.max_uncompressed_block_size = (buffer[6] >> 2u) & 0x3u; + // dcc.max_compressed_block_size = (buffer[6] >> 5u) & 0x3u; + // dcc.min_compressed_block_size = (buffer[6] >> 4u) & 0x1u; + // dcc.color_transform = (buffer[6] >> 7u) & 0x3u; + // dcc.overwrite_combiner_disable = (buffer[6] & 0x1u) != 0; + // dcc.independent_64b_blocks = (buffer[6] & 0x200u) != 0; + dcc.overwrite_combiner_disable = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, OVERWRITE_COMBINER_DISABLE) != 0; + dcc.dcc_clear_key_enable = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, KEY_CLEAR_ENABLE) != 0; + dcc.max_uncompressed_block_size = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, MAX_UNCOMPRESSED_BLOCK_SIZE); + dcc.min_compressed_block_size = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, MIN_COMPRESSED_BLOCK_SIZE); + dcc.max_compressed_block_size = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, MAX_COMPRESSED_BLOCK_SIZE); + dcc.color_transform = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, COLOR_TRANSFORM); + dcc.independent_64b_blocks = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, INDEPENDENT_64B_BLOCKS) != 0; + dcc.data_write_on_dcc_clear_to_reg = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, ENABLE_CONSTANT_ENCODE_REG_WRITE) != 0; + dcc.independent_128b_blocks = KYTY_PM4_GET(buffer[6], CB_COLOR0_DCC_CONTROL, INDEPENDENT_128B_BLOCKS) != 0; + + cmask.addr = static_cast(buffer[7]) << 8u; + cmask_slice.slice_minus1 = buffer[8] & 0x3fffu; + + fmask.addr = static_cast(buffer[9]) << 8u; + fmask_slice.slice_minus1 = buffer[10] & 0x3fffffu; + + ctx->SetColorBase(slot, base); + ctx->SetColorPitch(slot, pitch); + ctx->SetColorSlice(slot, slice); + ctx->SetColorView(slot, view); + ctx->SetColorInfo(slot, info); + ctx->SetColorAttrib(slot, attrib); + ctx->SetColorDccControl(slot, dcc); + ctx->SetColorCmask(slot, cmask); + ctx->SetColorCmaskSlice(slot, cmask_slice); + ctx->SetColorFmask(slot, fmask); + ctx->SetColorFmaskSlice(slot, fmask_slice); if (cmd_id == 0xC00E6900) { @@ -1889,9 +2014,9 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_render_target) clear_word1.word1 = buffer[12]; dcc_addr.addr = static_cast(buffer[13]) << 8u; - ctx->SetColorClearWord0(param, clear_word0); - ctx->SetColorClearWord1(param, clear_word1); - ctx->SetColorDccAddr(param, dcc_addr); + ctx->SetColorClearWord0(slot, clear_word0); + ctx->SetColorClearWord1(slot, clear_word1); + ctx->SetColorDccAddr(slot, dcc_addr); } if (dw >= count + 2 && buffer[count] == 0xC0001000) @@ -1901,7 +2026,7 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_render_target) size.width = (buffer[count + 1] >> 0u) & 0xffffu; size.height = (buffer[count + 1] >> 16u) & 0xffffu; - ctx->SetColorSize(param, size); + ctx->SetColorSize(slot, size); count += 2; } @@ -1994,17 +2119,24 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_stencil_info) EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0016900); EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::DB_STENCIL_INFO); - HW::DepthRenderTargetStencilInfo r; + HW::DepthStencilInfo r; - r.expclear_enabled = (buffer[0] & 0x08000000u) != 0; - r.tile_split = (buffer[0] >> 13u) & 0x7u; + // r.expclear_enabled = (buffer[0] & 0x08000000u) != 0; + // r.tile_split = (buffer[0] >> 13u) & 0x7u; + // r.format = (buffer[0] >> Pm4::DB_STENCIL_INFO_FORMAT_SHIFT) & Pm4::DB_STENCIL_INFO_FORMAT_MASK; + // r.tile_mode_index = (buffer[0] >> Pm4::DB_STENCIL_INFO_TILE_MODE_INDEX_SHIFT) & Pm4::DB_STENCIL_INFO_TILE_MODE_INDEX_MASK; + // r.tile_stencil_disable = + // ((buffer[0] >> Pm4::DB_STENCIL_INFO_TILE_STENCIL_DISABLE_SHIFT) & Pm4::DB_STENCIL_INFO_TILE_STENCIL_DISABLE_MASK) != 0; - r.format = (buffer[0] >> Pm4::DB_STENCIL_INFO_FORMAT_SHIFT) & Pm4::DB_STENCIL_INFO_FORMAT_MASK; - r.tile_mode_index = (buffer[0] >> Pm4::DB_STENCIL_INFO_TILE_MODE_INDEX_SHIFT) & Pm4::DB_STENCIL_INFO_TILE_MODE_INDEX_MASK; - r.tile_stencil_disable = - ((buffer[0] >> Pm4::DB_STENCIL_INFO_TILE_STENCIL_DISABLE_SHIFT) & Pm4::DB_STENCIL_INFO_TILE_STENCIL_DISABLE_MASK) != 0; + r.format = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, FORMAT); + r.texture_compatible_stencil = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, ITERATE_FLUSH) != 0; + r.partially_resident = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, PARTIALLY_RESIDENT) != 0; + r.tile_split = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, RESERVED_FIELD_1); + r.tile_mode_index = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_MODE_INDEX); + r.expclear_enabled = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, ALLOW_EXPCLEAR) != 0; + r.tile_stencil_disable = KYTY_PM4_GET(buffer[1], DB_STENCIL_INFO, TILE_STENCIL_DISABLE) != 0; - cp->GetCtx()->SetDepthRenderTargetStencilInfo(r); + cp->GetCtx()->SetDepthStencilInfo(r); return 1; } @@ -2129,14 +2261,37 @@ KYTY_HW_SH_PARSER(hw_sh_set_ps_shader) { EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0261008); - HW::PsStageRegisters r {}; + // HW::PsStageRegisters r {}; + HW::PsShaderResource1 r1; + HW::PsShaderResource2 r2; - r.data_addr = (static_cast(buffer[0]) << 8u) | (static_cast(buffer[1]) << 40u); - r.vgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_MASK; - r.sgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_SGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_SGPRS_MASK; - r.scratch_en = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_MASK; - r.user_sgpr = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MASK; - r.wave_cnt_en = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_MASK; + // r.data_addr = (static_cast(buffer[0]) << 8u) | (static_cast(buffer[1]) << 40u); + // r.vgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_MASK; + // r.sgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_SGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_SGPRS_MASK; + // r.scratch_en = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_MASK; + // r.user_sgpr = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MASK; + // r.wave_cnt_en = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_MASK; + + uint64_t addr = (static_cast(buffer[0]) << 8u) | (static_cast(buffer[1] & 0xffu) << 40u); + + r1.vgprs = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, VGPRS); + r1.sgprs = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, SGPRS); + r1.priority = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, PRIORITY); + r1.float_mode = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, FLOAT_MODE); + r1.dx10_clamp = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, DX10_CLAMP) != 0; + r1.debug_mode = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, DEBUG_MODE) != 0; + r1.ieee_mode = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, IEEE_MODE) != 0; + r1.cu_group_disable = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, CU_GROUP_DISABLE) != 0; + r1.require_forward_progress = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, FWD_PROGRESS) != 0; + r1.fp16_overflow = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, FP16_OVFL) != 0; + + r2.scratch_en = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, SCRATCH_EN); + r2.user_sgpr = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, USER_SGPR) + + (KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, USER_SGPR_MSB) << 5u); + r2.wave_cnt_en = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, WAVE_CNT_EN); + r2.extra_lds_size = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, EXTRA_LDS_SIZE); + r2.raster_ordered_shading = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, LOAD_INTRAWAVE_COLLISION); + r2.shared_vgprs = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, SHARED_VGPR_CNT); cp->GetCtx()->SetShaderZFormat(buffer[4]); @@ -2167,7 +2322,10 @@ KYTY_HW_SH_PARSER(hw_sh_set_ps_shader) cp->GetCtx()->SetDepthShaderControl(db_shader_control); cp->GetCtx()->SetShaderMask(buffer[11]); - cp->GetShCtx()->SetPsShader(r); + // cp->GetShCtx()->SetPsShader(r); + cp->GetShCtx()->SetPsShaderBase(addr); + cp->GetShCtx()->SetPsShaderResource1(r1); + cp->GetShCtx()->SetPsShaderResource2(r2); return 39; } @@ -2207,18 +2365,46 @@ KYTY_HW_SH_PARSER(hw_sh_set_vs_shader) auto shader_modifier = buffer[0]; - HW::VsStageRegisters r {}; + // HW::VsStageRegisters r {}; - r.m_spiShaderPgmLoVs = buffer[1]; - r.m_spiShaderPgmHiVs = buffer[2]; - r.m_spiShaderPgmRsrc1Vs = buffer[3]; - r.m_spiShaderPgmRsrc2Vs = buffer[4]; + uint64_t addr = (static_cast(buffer[1]) << 8u) | (static_cast(buffer[2] & 0xffu) << 40u); + + HW::VsShaderResource1 r1; + HW::VsShaderResource2 r2; + + // r.m_spiShaderPgmLoVs = buffer[1]; + // r.m_spiShaderPgmHiVs = buffer[2]; + // r.m_spiShaderPgmRsrc1Vs = buffer[3]; + // r.m_spiShaderPgmRsrc2Vs = buffer[4]; + + r1.vgprs = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, VGPRS); + r1.sgprs = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, SGPRS); + r1.priority = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, PRIORITY); + r1.float_mode = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, FLOAT_MODE); + r1.dx10_clamp = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, DX10_CLAMP) != 0; + r1.ieee_mode = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, IEEE_MODE) != 0; + r1.vgpr_component_count = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT); + r1.cu_group_enable = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, CU_GROUP_ENABLE) != 0; + r1.require_forward_progress = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, FWD_PROGRESS) != 0; + r1.fp16_overflow = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, FP16_OVFL) != 0; + + r2.scratch_en = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, SCRATCH_EN) != 0; + r2.user_sgpr = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, USER_SGPR) + + (KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, USER_SGPR_MSB) << 5u); + r2.offchip_lds = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN) != 0; + r2.streamout_enabled = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, SO_EN) != 0; + r2.shared_vgprs = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, SHARED_VGPR_CNT); uint32_t m_spi_vs_out_config = buffer[5]; uint32_t m_spi_shader_pos_format = buffer[6]; uint32_t m_pa_cl_vs_out_cntl = buffer[7]; - cp->GetShCtx()->SetVsShader(r, shader_modifier); + // cp->GetShCtx()->SetVsShader(r, shader_modifier); + cp->GetShCtx()->SetVsShaderBase(addr); + cp->GetShCtx()->SetVsShaderModifier(shader_modifier); + cp->GetShCtx()->SetVsShaderResource1(r1); + cp->GetShCtx()->SetVsShaderResource2(r2); + cp->GetCtx()->SetVsOutConfig(m_spi_vs_out_config); cp->GetCtx()->SetShaderPosFormat(m_spi_shader_pos_format); cp->GetCtx()->SetClVsOutCntl(m_pa_cl_vs_out_cntl); @@ -2243,22 +2429,57 @@ KYTY_HW_SH_PARSER(hw_sh_set_vs_user_sgpr) return reg_num; } +KYTY_HW_SH_PARSER(hw_sh_set_gs_user_sgpr) +{ + EXIT_NOT_IMPLEMENTED(!(cmd_offset >= Pm4::SPI_SHADER_USER_DATA_GS_0 && cmd_offset <= Pm4::SPI_SHADER_USER_DATA_GS_15)); + + uint32_t slot = (cmd_offset - Pm4::SPI_SHADER_USER_DATA_GS_0) / 1; + + auto reg_num = (cmd_id >> 16u) & 0x3fffu; + + for (uint32_t i = 0; i < reg_num; i++) + { + cp->GetShCtx()->SetGsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker()); + } + cp->SetUserDataMarker(HW::UserSgprType::Unknown); + + return reg_num; +} + KYTY_HW_SH_PARSER(hw_sh_update_ps_shader) { EXIT_NOT_IMPLEMENTED(cmd_id != 0xc0261040); - HW::PsStageRegisters r {}; + HW::PsShaderResource1 r1; + HW::PsShaderResource2 r2; - r.data_addr = (static_cast(buffer[0]) << 8u) | (static_cast(buffer[1]) << 40u); - r.vgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_VGPRS_MASK; - r.sgprs = (buffer[2] >> Pm4::SPI_SHADER_PGM_RSRC1_PS_SGPRS_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC1_PS_SGPRS_MASK; - r.scratch_en = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_SCRATCH_EN_MASK; - r.user_sgpr = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_USER_SGPR_MASK; - r.wave_cnt_en = (buffer[3] >> Pm4::SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_SHIFT) & Pm4::SPI_SHADER_PGM_RSRC2_PS_WAVE_CNT_EN_MASK; + uint64_t addr = (static_cast(buffer[0]) << 8u) | (static_cast(buffer[1] & 0xffu) << 40u); + + r1.vgprs = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, VGPRS); + r1.sgprs = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, SGPRS); + r1.priority = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, PRIORITY); + r1.float_mode = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, FLOAT_MODE); + r1.dx10_clamp = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, DX10_CLAMP) != 0; + r1.debug_mode = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, DEBUG_MODE) != 0; + r1.ieee_mode = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, IEEE_MODE) != 0; + r1.cu_group_disable = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, CU_GROUP_DISABLE) != 0; + r1.require_forward_progress = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, FWD_PROGRESS) != 0; + r1.fp16_overflow = KYTY_PM4_GET(buffer[2], SPI_SHADER_PGM_RSRC1_PS, FP16_OVFL) != 0; + + r2.scratch_en = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, SCRATCH_EN); + r2.user_sgpr = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, USER_SGPR) + + (KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, USER_SGPR_MSB) << 5u); + r2.wave_cnt_en = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, WAVE_CNT_EN); + r2.extra_lds_size = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, EXTRA_LDS_SIZE); + r2.raster_ordered_shading = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, LOAD_INTRAWAVE_COLLISION); + r2.shared_vgprs = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC2_PS, SHARED_VGPR_CNT); cp->GetCtx()->SetShaderZFormat(buffer[4]); - cp->GetShCtx()->UpdatePsShader(r); + // cp->GetShCtx()->UpdatePsShader(r); + cp->GetShCtx()->SetPsShaderBase(addr); + cp->GetShCtx()->SetPsShaderResource1(r1); + cp->GetShCtx()->SetPsShaderResource2(r2); return 39; } @@ -2269,14 +2490,33 @@ KYTY_HW_SH_PARSER(hw_sh_update_vs_shader) auto shader_modifier = buffer[0]; - HW::VsStageRegisters r {}; + uint64_t addr = (static_cast(buffer[1]) << 8u) | (static_cast(buffer[2] & 0xffu) << 40u); - r.m_spiShaderPgmLoVs = buffer[1]; - r.m_spiShaderPgmHiVs = buffer[2]; - r.m_spiShaderPgmRsrc1Vs = buffer[3]; - r.m_spiShaderPgmRsrc2Vs = buffer[4]; + HW::VsShaderResource1 r1; + HW::VsShaderResource2 r2; - cp->GetShCtx()->UpdateVsShader(r, shader_modifier); + r1.vgprs = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, VGPRS); + r1.sgprs = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, SGPRS); + r1.priority = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, PRIORITY); + r1.float_mode = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, FLOAT_MODE); + r1.dx10_clamp = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, DX10_CLAMP) != 0; + r1.ieee_mode = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, IEEE_MODE) != 0; + r1.vgpr_component_count = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, VGPR_COMP_CNT); + r1.cu_group_enable = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, CU_GROUP_ENABLE) != 0; + r1.require_forward_progress = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, FWD_PROGRESS) != 0; + r1.fp16_overflow = KYTY_PM4_GET(buffer[3], SPI_SHADER_PGM_RSRC1_VS, FP16_OVFL) != 0; + + r2.scratch_en = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, SCRATCH_EN) != 0; + r2.user_sgpr = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, USER_SGPR) + + (KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, USER_SGPR_MSB) << 5u); + r2.offchip_lds = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, OC_LDS_EN) != 0; + r2.streamout_enabled = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, SO_EN) != 0; + r2.shared_vgprs = KYTY_PM4_GET(buffer[4], SPI_SHADER_PGM_RSRC2_VS, SHARED_VGPR_CNT); + + cp->GetShCtx()->SetVsShaderBase(addr); + cp->GetShCtx()->SetVsShaderModifier(shader_modifier); + cp->GetShCtx()->SetVsShaderResource1(r1); + cp->GetShCtx()->SetVsShaderResource2(r2); return 28; } @@ -2286,7 +2526,9 @@ KYTY_HW_UC_PARSER(hw_uc_set_primitive_type) EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0017900); EXIT_NOT_IMPLEMENTED(cmd_offset != Pm4::VGT_PRIMITIVE_TYPE); - cp->GetUcfg()->SetPrimitiveType(buffer[0]); + uint32_t prim_type = KYTY_PM4_GET(buffer[0], VGT_PRIMITIVE_TYPE, PRIM_TYPE); + + cp->GetUcfg()->SetPrimitiveType(prim_type); return 1; } @@ -2296,15 +2538,18 @@ KYTY_CP_OP_PARSER(cp_op_acquire_mem) { KYTY_PROFILER_FUNCTION(); - EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0055800); + EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0055800 && cmd_id != 0xc0061050); - uint32_t stall_mode = buffer[0] >> 31u; - uint32_t cache_action = buffer[0] & 0x7fffffffu; - uint64_t size_lo = buffer[1]; - uint32_t size_hi = buffer[2]; - uint64_t base_lo = buffer[3]; - uint32_t base_hi = buffer[4]; - uint32_t poll = buffer[5]; + bool custom = (cmd_id == 0xc0061050); + + uint32_t stall_mode = buffer[0] >> 31u; + uint32_t cache_action = buffer[0] & 0x7fffffffu; + uint64_t size_lo = buffer[1]; + uint32_t size_hi = buffer[2]; + uint64_t base_lo = buffer[3]; + uint32_t base_hi = buffer[4]; + uint32_t poll = buffer[5]; + [[maybe_unused]] uint32_t gcr_cntl = (custom ? buffer[6] : 0); uint32_t target_mask = cache_action & 0x00007FC0u; uint32_t extended_action = cache_action & 0x2E000000u; @@ -2390,6 +2635,24 @@ KYTY_CP_OP_PARSER(cp_op_acquire_mem) cp->WriteBack(); } break; + + case 0x06007fc0: + { + // target_mask: 0x00007fc0 (all rt and depth) + // extended_action: 0x06000000 (Flush Cb & Db) + // action: 0x00 (none) + EXIT_NOT_IMPLEMENTED(gcr_cntl != 0x280); // kGl0ScalarInvalidate & kGl1Invalidate + EXIT_NOT_IMPLEMENTED(size_lo != 0); + EXIT_NOT_IMPLEMENTED(base_lo != 0); + + EXIT_IF(target_mask != 0x00007fc0); + EXIT_IF(extended_action != 0x06000000); + EXIT_IF(action != 0x00); + + cp->MemoryBarrier(); + } + break; + default: EXIT("unknown barrier: 0x%08" PRIx32 ", 0x%08" PRIx32 ", 0x%08" PRIx32 ", 0x%08" PRIx32 "\n", cache_action, target_mask, extended_action, action); @@ -2401,7 +2664,7 @@ KYTY_CP_OP_PARSER(cp_op_acquire_mem) cp->BufferWait(); } - return 6; + return (custom ? 7 : 6); } KYTY_CP_OP_PARSER(cp_op_dispatch_direct) @@ -2656,6 +2919,25 @@ KYTY_CP_OP_PARSER(cp_op_event_write_eos) return 4; } +KYTY_CP_OP_PARSER(cp_op_flip) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_NOT_IMPLEMENTED(cmd_id != 0xc004105c); + + CommandProcessor::FlipInfo f; + + f.handle = static_cast(buffer[0]); + f.index = static_cast(buffer[1]); + f.flip_mode = static_cast(buffer[2]); + f.flip_arg = static_cast(buffer[3] | (static_cast(buffer[4]) << 32u)); + + cp->SetFlip(f); + cp->Flip(); + + return 5; +} + KYTY_CP_OP_PARSER(cp_op_increment_ce_counter) { KYTY_PROFILER_FUNCTION(); @@ -2729,55 +3011,80 @@ KYTY_CP_OP_PARSER(cp_op_indirect_cx_regs) auto cmd_offset = indirect_buffer[0]; auto value = indirect_buffer[1]; - if (cmd_offset >= Pm4::SPI_PS_INPUT_CNTL_0 && cmd_offset <= Pm4::SPI_PS_INPUT_CNTL_31) + EXIT_NOT_IMPLEMENTED(cmd_offset >= Pm4::CX_NUM); + + auto pfunc = g_hw_ctx_indirect_func[cmd_offset & (Pm4::CX_NUM - 1)]; + + if (pfunc == nullptr) { - uint32_t slot = cmd_offset - Pm4::SPI_PS_INPUT_CNTL_0; - cp->GetCtx()->SetPsInputSettings(slot, value); - continue; + EXIT("unknown cx reg at %05" PRIx32 ": 0x%" PRIx32 "\n", num_dw - dw, cmd_offset); } - switch (cmd_offset) - { - case Pm4::SPI_VS_OUT_CONFIG: cp->GetCtx()->SetVsOutConfig(value); break; - case Pm4::SPI_SHADER_POS_FORMAT: cp->GetCtx()->SetShaderPosFormat(value); break; - case Pm4::SPI_SHADER_IDX_FORMAT: cp->GetCtx()->SetShaderIdxFormat(value); break; - case Pm4::PA_CL_VS_OUT_CNTL: cp->GetCtx()->SetClVsOutCntl(value); break; - case Pm4::GE_NGG_SUBGRP_CNTL: cp->GetCtx()->SetNggSubgrpCntl(value); break; - case Pm4::VGT_GS_INSTANCE_CNT: cp->GetCtx()->SetGsInstanceCnt(value); break; - case Pm4::VGT_GS_ONCHIP_CNTL: cp->GetCtx()->SetGsOnchipCntl(value); break; - case Pm4::GE_MAX_OUTPUT_PER_SUBGROUP: cp->GetCtx()->SetMaxOutputPerSubgroup(value); break; - case Pm4::VGT_ESGS_RING_ITEMSIZE: cp->GetCtx()->SetEsgsRingItemsize(value); break; - case Pm4::VGT_GS_MAX_VERT_OUT: cp->GetCtx()->SetGsMaxVertOut(value); break; - case Pm4::VGT_SHADER_STAGES_EN: cp->GetCtx()->SetShaderStages(value); break; - case Pm4::VGT_GS_OUT_PRIM_TYPE: cp->GetCtx()->SetGsOutPrimType(value); break; - case Pm4::SPI_SHADER_Z_FORMAT: cp->GetCtx()->SetShaderZFormat(value); break; - case Pm4::SPI_SHADER_COL_FORMAT: - for (uint32_t i = 0; i < 8; i++) - { - cp->GetCtx()->SetTargetOutputMode(i, (value >> (i * 4)) & 0xFu); - } - break; - case Pm4::SPI_PS_INPUT_ENA: cp->GetCtx()->SetPsInputEna(value); break; - case Pm4::SPI_PS_INPUT_ADDR: cp->GetCtx()->SetPsInputAddr(value); break; - case Pm4::SPI_PS_IN_CONTROL: cp->GetCtx()->SetPsInControl(value); break; - case Pm4::SPI_BARYC_CNTL: cp->GetCtx()->SetBarycCntl(value); break; - case Pm4::DB_SHADER_CONTROL: - { - HW::DepthShaderControl db_shader_control {}; - db_shader_control.other_bits = value & 0xFFFF9B8Eu; - db_shader_control.conservative_z_export_value = KYTY_PM4_GET(value, DB_SHADER_CONTROL, CONSERVATIVE_Z_EXPORT); - db_shader_control.shader_z_behavior = KYTY_PM4_GET(value, DB_SHADER_CONTROL, Z_ORDER); - db_shader_control.shader_kill_enable = KYTY_PM4_GET(value, DB_SHADER_CONTROL, KILL_ENABLE) != 0; - db_shader_control.shader_z_export_enable = KYTY_PM4_GET(value, DB_SHADER_CONTROL, Z_EXPORT_ENABLE) != 0; - db_shader_control.shader_execute_on_noop = KYTY_PM4_GET(value, DB_SHADER_CONTROL, EXEC_ON_NOOP) != 0; - cp->GetCtx()->SetDepthShaderControl(db_shader_control); - break; - } - case Pm4::CB_SHADER_MASK: cp->GetCtx()->SetShaderMask(value); break; - case Pm4::PA_SC_SHADER_CONTROL: cp->GetCtx()->SetScShaderControl(value); break; + pfunc(cp, cmd_offset, value); + } - default: EXIT("unknown cx reg at %05" PRIx32 ": 0x%" PRIx32 "\n", num_dw - dw, cmd_offset); + return 3; +} + +KYTY_CP_OP_PARSER(cp_op_indirect_sh_regs) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_NOT_IMPLEMENTED(cmd_id != 0xc0021044); + + auto* indirect_buffer = reinterpret_cast(buffer[1] | (static_cast(buffer[2]) << 32u)); + uint32_t indirect_num_dw = buffer[0]; + + EXIT_NOT_IMPLEMENTED(indirect_buffer == nullptr); + EXIT_NOT_IMPLEMENTED(indirect_num_dw == 0); + + for (uint32_t i = 0; i < indirect_num_dw; i++, indirect_buffer += 2) + { + auto cmd_offset = indirect_buffer[0]; + auto value = indirect_buffer[1]; + + EXIT_NOT_IMPLEMENTED(cmd_offset >= Pm4::SH_NUM); + + auto pfunc = g_hw_sh_indirect_func[cmd_offset & (Pm4::SH_NUM - 1)]; + + if (pfunc == nullptr) + { + EXIT("unknown sh reg at %05" PRIx32 ": 0x%" PRIx32 "\n", num_dw - dw, cmd_offset); } + + pfunc(cp, cmd_offset, value); + } + + return 3; +} + +KYTY_CP_OP_PARSER(cp_op_indirect_uc_regs) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_NOT_IMPLEMENTED(cmd_id != 0xc002104c); + + auto* indirect_buffer = reinterpret_cast(buffer[1] | (static_cast(buffer[2]) << 32u)); + uint32_t indirect_num_dw = buffer[0]; + + EXIT_NOT_IMPLEMENTED(indirect_buffer == nullptr); + EXIT_NOT_IMPLEMENTED(indirect_num_dw == 0); + + for (uint32_t i = 0; i < indirect_num_dw; i++, indirect_buffer += 2) + { + auto cmd_offset = indirect_buffer[0]; + auto value = indirect_buffer[1]; + + EXIT_NOT_IMPLEMENTED(cmd_offset >= Pm4::UC_NUM); + + auto pfunc = g_hw_uc_indirect_func[cmd_offset & (Pm4::UC_NUM - 1)]; + + if (pfunc == nullptr) + { + EXIT("unknown uc reg at %05" PRIx32 ": 0x%" PRIx32 "\n", num_dw - dw, cmd_offset); + } + + pfunc(cp, cmd_offset, value); } return 3; @@ -2900,7 +3207,9 @@ KYTY_CP_OP_PARSER(cp_op_release_mem) { KYTY_PROFILER_FUNCTION(); - EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0054902); + EXIT_NOT_IMPLEMENTED(cmd_id != 0xC0054902 && cmd_id != 0xc0051060); + + bool custom = (cmd_id == 0xc0051060); uint32_t cache_policy = (buffer[0] >> 25u) & 0x3u; uint32_t cache_action = (buffer[0] >> 12u) & 0x3fu; @@ -2912,6 +3221,25 @@ KYTY_CP_OP_PARSER(cp_op_release_mem) auto* dst_gpu_addr = reinterpret_cast(buffer[2] | (static_cast(buffer[3]) << 32u)); uint64_t value = (buffer[4] | (static_cast(buffer[5]) << 32u)); + if (custom) + { + uint32_t gcr_cntl = buffer[1]; + + EXIT_NOT_IMPLEMENTED(gcr_cntl != 0x200); + EXIT_NOT_IMPLEMENTED((buffer[0] >> 8u) != 0x3); + + if (gcr_cntl == 0x200) + { + cache_action = 0x38; + } + + cache_policy = 0; + event_index = 0; + event_write_dest = 0; + event_write_source = 2; + interrupt_selector = 0; + } + cp->WriteAtEndOfPipe64(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr, value, interrupt_selector); @@ -2997,25 +3325,40 @@ KYTY_CP_OP_PARSER(cp_op_wait_flip_done) return 6; } -KYTY_CP_OP_PARSER(cp_op_wait_on_address) +KYTY_CP_OP_PARSER(cp_op_wait_reg_mem_32) { KYTY_PROFILER_FUNCTION(); EXIT_NOT_IMPLEMENTED(cmd_id != 0xC00C1028); auto* addr = reinterpret_cast(buffer[0] | (static_cast(buffer[1]) << 32u)); - bool me = true; - bool mem = true; auto mask = buffer[2]; auto func = buffer[3]; auto ref = buffer[4]; auto poll = 10; - cp->WaitRegMem(func, me, mem, addr, ref, mask, poll); + cp->WaitRegMem32(func, addr, ref, mask, poll); return 13; } +KYTY_CP_OP_PARSER(cp_op_wait_reg_mem_64) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_NOT_IMPLEMENTED(cmd_id != 0xc0071058); + + auto* addr = reinterpret_cast(buffer[0] | (static_cast(buffer[1]) << 32u)); + auto mask = buffer[2] | (static_cast(buffer[3]) << 32u); + auto ref = buffer[4] | (static_cast(buffer[5]) << 32u); + auto func = buffer[6]; + auto poll = buffer[7]; + + cp->WaitRegMem64(func, addr, ref, mask, poll); + + return 8; +} + KYTY_CP_OP_PARSER(cp_op_wait_on_ce_counter) { KYTY_PROFILER_FUNCTION(); @@ -3053,7 +3396,10 @@ KYTY_CP_OP_PARSER(cp_op_wait_reg_mem) auto mask = buffer[4]; auto poll = buffer[5]; - cp->WaitRegMem(func, me, mem, addr, ref, mask, poll); + EXIT_NOT_IMPLEMENTED(!me); + EXIT_NOT_IMPLEMENTED(!mem); + + cp->WaitRegMem32(func, addr, ref, mask, poll); return 6; } @@ -3078,16 +3424,666 @@ KYTY_CP_OP_PARSER(cp_op_write_data) { KYTY_PROFILER_FUNCTION(); + auto op = (cmd_id >> 8u) & 0xffu; + + EXIT_NOT_IMPLEMENTED(op != Pm4::IT_WRITE_DATA && op != Pm4::IT_NOP); + + bool custom = (op == Pm4::IT_NOP); + auto dw_num = (cmd_id >> 16u) & 0x3fffu; auto write_control = buffer[0]; auto* dst = reinterpret_cast(buffer[1] | (static_cast(buffer[2]) << 32u)); - cp->WriteData(dst, buffer + 3, dw_num - 2, write_control); + cp->WriteData(dst, buffer + 3, dw_num - 2, write_control, custom); return 1 + dw_num; } +static void graphics_init_jmp_tables_cx_indirect() +{ + for (auto& func: g_hw_ctx_indirect_func) + { + func = nullptr; + } + + for (auto cmd_offset = Pm4::SPI_PS_INPUT_CNTL_0; cmd_offset <= Pm4::SPI_PS_INPUT_CNTL_31; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = cmd_offset - Pm4::SPI_PS_INPUT_CNTL_0; + cp->GetCtx()->SetPsInputSettings(slot, value); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_BASE; cmd_offset <= Pm4::CB_COLOR7_BASE; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_BASE) / 15; + auto base = cp->GetCtx()->GetRenderTarget(slot).base; + base.addr &= 0xFFFFFF00000000FFull; + base.addr |= static_cast(value) << 8u; + cp->GetCtx()->SetColorBase(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_BASE_EXT; cmd_offset <= Pm4::CB_COLOR7_BASE_EXT; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_BASE_EXT); + auto base = cp->GetCtx()->GetRenderTarget(slot).base; + base.addr &= 0xFFFF00FFFFFFFFFFull; + base.addr |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetColorBase(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_VIEW; cmd_offset <= Pm4::CB_COLOR7_VIEW; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_VIEW) / 15; + HW::ColorView view; + view.base_array_slice_index = KYTY_PM4_GET(value, CB_COLOR0_VIEW, SLICE_START); + view.last_array_slice_index = KYTY_PM4_GET(value, CB_COLOR0_VIEW, SLICE_MAX); + view.current_mip_level = KYTY_PM4_GET(value, CB_COLOR0_VIEW, MIP_LEVEL); + cp->GetCtx()->SetColorView(slot, view); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_INFO; cmd_offset <= Pm4::CB_COLOR7_INFO; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_INFO) / 15; + HW::ColorInfo info; + info.format = KYTY_PM4_GET(value, CB_COLOR0_INFO, FORMAT); + info.channel_type = KYTY_PM4_GET(value, CB_COLOR0_INFO, NUMBER_TYPE); + info.channel_order = KYTY_PM4_GET(value, CB_COLOR0_INFO, COMP_SWAP); + info.cmask_fast_clear_enable = KYTY_PM4_GET(value, CB_COLOR0_INFO, FAST_CLEAR) != 0; + info.fmask_compression_enable = KYTY_PM4_GET(value, CB_COLOR0_INFO, COMPRESSION) != 0; + info.blend_clamp = KYTY_PM4_GET(value, CB_COLOR0_INFO, BLEND_CLAMP) != 0; + info.blend_bypass = KYTY_PM4_GET(value, CB_COLOR0_INFO, BLEND_BYPASS) != 0; + info.round_mode = KYTY_PM4_GET(value, CB_COLOR0_INFO, ROUND_MODE) != 0; + info.cmask_tile_mode = KYTY_PM4_GET(value, CB_COLOR0_INFO, CMASK_IS_LINEAR); + info.fmask_data_compression_disable = KYTY_PM4_GET(value, CB_COLOR0_INFO, FMASK_COMPRESSION_DISABLE) != 0; + info.fmask_one_frag_mode = KYTY_PM4_GET(value, CB_COLOR0_INFO, FMASK_COMPRESS_1FRAG_ONLY) != 0; + info.dcc_compression_enable = KYTY_PM4_GET(value, CB_COLOR0_INFO, DCC_ENABLE) != 0; + info.cmask_tile_mode_neo = KYTY_PM4_GET(value, CB_COLOR0_INFO, CMASK_ADDR_TYPE); + info.neo_mode = KYTY_PM4_GET(value, CB_COLOR0_INFO, ALT_TILE_MODE) != 0; + cp->GetCtx()->SetColorInfo(slot, info); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_ATTRIB; cmd_offset <= Pm4::CB_COLOR7_ATTRIB; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_ATTRIB) / 15; + HW::ColorAttrib attrib; + attrib.force_dest_alpha_to_one = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB, FORCE_DST_ALPHA_1) != 0; + attrib.tile_mode = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB, TILE_MODE_INDEX); + attrib.fmask_tile_mode = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB, FMASK_TILE_MODE_INDEX); + attrib.num_samples = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB, NUM_SAMPLES); + attrib.num_fragments = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB, NUM_FRAGMENTS); + cp->GetCtx()->SetColorAttrib(slot, attrib); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_DCC_CONTROL; cmd_offset <= Pm4::CB_COLOR7_DCC_CONTROL; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_DCC_CONTROL) / 15; + HW::ColorDccControl dcc; + dcc.overwrite_combiner_disable = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, OVERWRITE_COMBINER_DISABLE) != 0; + dcc.dcc_clear_key_enable = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, KEY_CLEAR_ENABLE) != 0; + dcc.max_uncompressed_block_size = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, MAX_UNCOMPRESSED_BLOCK_SIZE); + dcc.min_compressed_block_size = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, MIN_COMPRESSED_BLOCK_SIZE); + dcc.max_compressed_block_size = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, MAX_COMPRESSED_BLOCK_SIZE); + dcc.color_transform = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, COLOR_TRANSFORM); + dcc.independent_64b_blocks = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, INDEPENDENT_64B_BLOCKS) != 0; + dcc.data_write_on_dcc_clear_to_reg = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, ENABLE_CONSTANT_ENCODE_REG_WRITE) != 0; + dcc.independent_128b_blocks = KYTY_PM4_GET(value, CB_COLOR0_DCC_CONTROL, INDEPENDENT_128B_BLOCKS) != 0; + cp->GetCtx()->SetColorDccControl(slot, dcc); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_CMASK; cmd_offset <= Pm4::CB_COLOR7_CMASK; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_CMASK) / 15; + auto base = cp->GetCtx()->GetRenderTarget(slot).cmask; + base.addr &= 0xFFFFFF00000000FFull; + base.addr |= static_cast(value) << 8u; + cp->GetCtx()->SetColorCmask(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_CMASK_BASE_EXT; cmd_offset <= Pm4::CB_COLOR7_CMASK_BASE_EXT; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_CMASK_BASE_EXT); + auto base = cp->GetCtx()->GetRenderTarget(slot).cmask; + base.addr &= 0xFFFF00FFFFFFFFFFull; + base.addr |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetColorCmask(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_FMASK; cmd_offset <= Pm4::CB_COLOR7_FMASK; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_FMASK) / 15; + auto base = cp->GetCtx()->GetRenderTarget(slot).fmask; + base.addr &= 0xFFFFFF00000000FFull; + base.addr |= static_cast(value) << 8u; + cp->GetCtx()->SetColorFmask(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_FMASK_BASE_EXT; cmd_offset <= Pm4::CB_COLOR7_FMASK_BASE_EXT; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_FMASK_BASE_EXT); + auto base = cp->GetCtx()->GetRenderTarget(slot).fmask; + base.addr &= 0xFFFF00FFFFFFFFFFull; + base.addr |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetColorFmask(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_CLEAR_WORD0; cmd_offset <= Pm4::CB_COLOR7_CLEAR_WORD0; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::ColorClearWord0 clear_word0; + clear_word0.word0 = value; + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_CLEAR_WORD0) / 15; + cp->GetCtx()->SetColorClearWord0(slot, clear_word0); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_CLEAR_WORD1; cmd_offset <= Pm4::CB_COLOR7_CLEAR_WORD1; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::ColorClearWord1 clear_word1; + clear_word1.word1 = value; + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_CLEAR_WORD1) / 15; + cp->GetCtx()->SetColorClearWord1(slot, clear_word1); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_DCC_BASE; cmd_offset <= Pm4::CB_COLOR7_DCC_BASE; cmd_offset += 15) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_DCC_BASE) / 15; + auto base = cp->GetCtx()->GetRenderTarget(slot).dcc_addr; + base.addr &= 0xFFFFFF00000000FFull; + base.addr |= static_cast(value) << 8u; + cp->GetCtx()->SetColorDccAddr(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_DCC_BASE_EXT; cmd_offset <= Pm4::CB_COLOR7_DCC_BASE_EXT; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_DCC_BASE_EXT); + auto base = cp->GetCtx()->GetRenderTarget(slot).dcc_addr; + base.addr &= 0xFFFF00FFFFFFFFFFull; + base.addr |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetColorDccAddr(slot, base); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_ATTRIB2; cmd_offset <= Pm4::CB_COLOR7_ATTRIB2; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_ATTRIB2); + HW::ColorAttrib2 attrib2; + attrib2.height = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB2, MIP0_HEIGHT); + attrib2.width = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB2, MIP0_WIDTH); + attrib2.num_mip_levels = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB2, MAX_MIP); + cp->GetCtx()->SetColorAttrib2(slot, attrib2); + }; + } + + for (auto cmd_offset = Pm4::CB_COLOR0_ATTRIB3; cmd_offset <= Pm4::CB_COLOR7_ATTRIB3; cmd_offset++) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + uint32_t slot = (cmd_offset - Pm4::CB_COLOR0_ATTRIB3); + HW::ColorAttrib3 attrib3; + attrib3.depth = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB3, MIP0_DEPTH); + attrib3.tile_mode = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB3, COLOR_SW_MODE); + attrib3.dimension = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB3, RESOURCE_TYPE); + attrib3.cmask_pipe_aligned = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB3, CMASK_PIPE_ALIGNED); + attrib3.dcc_pipe_aligned = KYTY_PM4_GET(value, CB_COLOR0_ATTRIB3, DCC_PIPE_ALIGNED); + cp->GetCtx()->SetColorAttrib3(slot, attrib3); + }; + } + + for (auto cmd_offset = Pm4::PA_CL_VPORT_XSCALE; cmd_offset <= Pm4::PA_CL_VPORT_XSCALE_15; cmd_offset += 6) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportXScale((cmd_offset - Pm4::PA_CL_VPORT_XSCALE) / 6, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_CL_VPORT_XOFFSET; cmd_offset <= Pm4::PA_CL_VPORT_XOFFSET_15; cmd_offset += 6) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportXOffset((cmd_offset - Pm4::PA_CL_VPORT_XOFFSET) / 6, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_CL_VPORT_YSCALE; cmd_offset <= Pm4::PA_CL_VPORT_YSCALE_15; cmd_offset += 6) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportYScale((cmd_offset - Pm4::PA_CL_VPORT_YSCALE) / 6, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_CL_VPORT_YOFFSET; cmd_offset <= Pm4::PA_CL_VPORT_YOFFSET_15; cmd_offset += 6) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportYOffset((cmd_offset - Pm4::PA_CL_VPORT_YOFFSET) / 6, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_CL_VPORT_ZSCALE; cmd_offset <= Pm4::PA_CL_VPORT_ZSCALE_15; cmd_offset += 6) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportZScale((cmd_offset - Pm4::PA_CL_VPORT_ZSCALE) / 6, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_CL_VPORT_ZOFFSET; cmd_offset <= Pm4::PA_CL_VPORT_ZOFFSET_15; cmd_offset += 6) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportZOffset((cmd_offset - Pm4::PA_CL_VPORT_ZOFFSET) / 6, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_SC_VPORT_SCISSOR_0_TL; cmd_offset <= Pm4::PA_SC_VPORT_SCISSOR_15_TL; cmd_offset += 2) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + int left = static_cast(static_cast(KYTY_PM4_GET(value, PA_SC_VPORT_SCISSOR_0_TL, TL_X))); + int top = static_cast(static_cast(KYTY_PM4_GET(value, PA_SC_VPORT_SCISSOR_0_TL, TL_Y))); + bool window_offset_disable = KYTY_PM4_GET(value, PA_SC_VPORT_SCISSOR_0_TL, WINDOW_OFFSET_DISABLE) != 0; + cp->GetCtx()->SetViewportScissorTL((cmd_offset - Pm4::PA_SC_VPORT_SCISSOR_0_TL) / 2, left, top, !window_offset_disable); + }; + } + + for (auto cmd_offset = Pm4::PA_SC_VPORT_SCISSOR_0_BR; cmd_offset <= Pm4::PA_SC_VPORT_SCISSOR_15_BR; cmd_offset += 2) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + int right = static_cast(static_cast(KYTY_PM4_GET(value, PA_SC_VPORT_SCISSOR_0_BR, BR_X))); + int bottom = static_cast(static_cast(KYTY_PM4_GET(value, PA_SC_VPORT_SCISSOR_0_BR, BR_Y))); + cp->GetCtx()->SetViewportScissorBR((cmd_offset - Pm4::PA_SC_VPORT_SCISSOR_0_BR) / 2, right, bottom); + }; + } + + for (auto cmd_offset = Pm4::PA_SC_VPORT_ZMIN_0; cmd_offset <= Pm4::PA_SC_VPORT_ZMIN_15; cmd_offset += 2) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportZMin((cmd_offset - Pm4::PA_SC_VPORT_ZMIN_0) / 2, *reinterpret_cast(&value)); }; + } + + for (auto cmd_offset = Pm4::PA_SC_VPORT_ZMAX_0; cmd_offset <= Pm4::PA_SC_VPORT_ZMAX_15; cmd_offset += 2) + { + g_hw_ctx_indirect_func[cmd_offset] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetViewportZMax((cmd_offset - Pm4::PA_SC_VPORT_ZMAX_0) / 2, *reinterpret_cast(&value)); }; + } + + g_hw_ctx_indirect_func[Pm4::SPI_VS_OUT_CONFIG] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetVsOutConfig(value); }; + + g_hw_ctx_indirect_func[Pm4::SPI_SHADER_POS_FORMAT] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetShaderPosFormat(value); }; + g_hw_ctx_indirect_func[Pm4::SPI_SHADER_IDX_FORMAT] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetShaderIdxFormat(value); }; + g_hw_ctx_indirect_func[Pm4::PA_CL_VS_OUT_CNTL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetClVsOutCntl(value); }; + g_hw_ctx_indirect_func[Pm4::GE_NGG_SUBGRP_CNTL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetNggSubgrpCntl(value); }; + g_hw_ctx_indirect_func[Pm4::VGT_GS_INSTANCE_CNT] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetGsInstanceCnt(value); }; + g_hw_ctx_indirect_func[Pm4::VGT_GS_ONCHIP_CNTL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetGsOnchipCntl(value); }; + + g_hw_ctx_indirect_func[Pm4::GE_MAX_OUTPUT_PER_SUBGROUP] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetMaxOutputPerSubgroup(value); }; + + g_hw_ctx_indirect_func[Pm4::VGT_ESGS_RING_ITEMSIZE] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetEsgsRingItemsize(value); }; + g_hw_ctx_indirect_func[Pm4::VGT_GS_MAX_VERT_OUT] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetGsMaxVertOut(value); }; + g_hw_ctx_indirect_func[Pm4::VGT_SHADER_STAGES_EN] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetShaderStages(value); }; + g_hw_ctx_indirect_func[Pm4::VGT_GS_OUT_PRIM_TYPE] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetGsOutPrimType(value); }; + g_hw_ctx_indirect_func[Pm4::SPI_SHADER_Z_FORMAT] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetShaderZFormat(value); }; + + g_hw_ctx_indirect_func[Pm4::SPI_SHADER_COL_FORMAT] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + for (uint32_t i = 0; i < 8; i++) + { + cp->GetCtx()->SetTargetOutputMode(i, (value >> (i * 4)) & 0xFu); + } + }; + + g_hw_ctx_indirect_func[Pm4::SPI_PS_INPUT_ENA] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetPsInputEna(value); }; + g_hw_ctx_indirect_func[Pm4::SPI_PS_INPUT_ADDR] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetPsInputAddr(value); }; + g_hw_ctx_indirect_func[Pm4::SPI_PS_IN_CONTROL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetPsInControl(value); }; + g_hw_ctx_indirect_func[Pm4::SPI_BARYC_CNTL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetBarycCntl(value); }; + + g_hw_ctx_indirect_func[Pm4::DB_SHADER_CONTROL] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::DepthShaderControl db_shader_control {}; + db_shader_control.other_bits = value & 0xFFFF9B8Eu; + db_shader_control.conservative_z_export_value = KYTY_PM4_GET(value, DB_SHADER_CONTROL, CONSERVATIVE_Z_EXPORT); + db_shader_control.shader_z_behavior = KYTY_PM4_GET(value, DB_SHADER_CONTROL, Z_ORDER); + db_shader_control.shader_kill_enable = KYTY_PM4_GET(value, DB_SHADER_CONTROL, KILL_ENABLE) != 0; + db_shader_control.shader_z_export_enable = KYTY_PM4_GET(value, DB_SHADER_CONTROL, Z_EXPORT_ENABLE) != 0; + db_shader_control.shader_execute_on_noop = KYTY_PM4_GET(value, DB_SHADER_CONTROL, EXEC_ON_NOOP) != 0; + cp->GetCtx()->SetDepthShaderControl(db_shader_control); + }; + + g_hw_ctx_indirect_func[Pm4::CB_SHADER_MASK] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetShaderMask(value); }; + g_hw_ctx_indirect_func[Pm4::PA_SC_SHADER_CONTROL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetScShaderControl(value); }; + g_hw_ctx_indirect_func[Pm4::CB_TARGET_MASK] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetRenderTargetMask(value); }; + + g_hw_ctx_indirect_func[Pm4::DB_Z_INFO] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::DepthZInfo r; + r.format = KYTY_PM4_GET(value, DB_Z_INFO, FORMAT); + r.num_samples = KYTY_PM4_GET(value, DB_Z_INFO, NUM_SAMPLES); + r.embedded_sample_locations = KYTY_PM4_GET(value, DB_Z_INFO, ITERATE_FLUSH) != 0; + r.partially_resident = KYTY_PM4_GET(value, DB_Z_INFO, PARTIALLY_RESIDENT) != 0; + r.num_mip_levels = KYTY_PM4_GET(value, DB_Z_INFO, MAXMIP); + r.tile_mode_index = KYTY_PM4_GET(value, DB_Z_INFO, TILE_MODE_INDEX); + r.plane_compression = KYTY_PM4_GET(value, DB_Z_INFO, DECOMPRESS_ON_N_ZPLANES); + r.expclear_enabled = KYTY_PM4_GET(value, DB_Z_INFO, ALLOW_EXPCLEAR) != 0; + r.tile_surface_enable = KYTY_PM4_GET(value, DB_Z_INFO, TILE_SURFACE_ENABLE) != 0; + r.zrange_precision = KYTY_PM4_GET(value, DB_Z_INFO, ZRANGE_PRECISION); + cp->GetCtx()->SetDepthZInfo(r); + }; + + g_hw_ctx_indirect_func[Pm4::DB_STENCIL_INFO] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::DepthStencilInfo r; + r.format = KYTY_PM4_GET(value, DB_STENCIL_INFO, FORMAT); + r.texture_compatible_stencil = KYTY_PM4_GET(value, DB_STENCIL_INFO, ITERATE_FLUSH) != 0; + r.partially_resident = KYTY_PM4_GET(value, DB_STENCIL_INFO, PARTIALLY_RESIDENT) != 0; + r.tile_split = KYTY_PM4_GET(value, DB_STENCIL_INFO, RESERVED_FIELD_1); + r.tile_mode_index = KYTY_PM4_GET(value, DB_STENCIL_INFO, TILE_MODE_INDEX); + r.expclear_enabled = KYTY_PM4_GET(value, DB_STENCIL_INFO, ALLOW_EXPCLEAR) != 0; + r.tile_stencil_disable = KYTY_PM4_GET(value, DB_STENCIL_INFO, TILE_STENCIL_DISABLE) != 0; + cp->GetCtx()->SetDepthStencilInfo(r); + }; + + g_hw_ctx_indirect_func[Pm4::DB_Z_READ_BASE] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().z_read_base_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetCtx()->SetDepthZReadBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_Z_READ_BASE_HI] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().z_read_base_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetDepthZReadBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_STENCIL_READ_BASE] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().stencil_read_base_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetCtx()->SetDepthStencilReadBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_STENCIL_READ_BASE_HI] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().stencil_read_base_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetDepthStencilReadBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_Z_WRITE_BASE] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().z_write_base_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetCtx()->SetDepthZWriteBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_Z_WRITE_BASE_HI] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().z_write_base_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetDepthZWriteBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_STENCIL_WRITE_BASE] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().stencil_write_base_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetCtx()->SetDepthStencilWriteBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_STENCIL_WRITE_BASE_HI] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().stencil_write_base_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetDepthStencilWriteBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_HTILE_DATA_BASE] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().htile_data_base_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetCtx()->SetDepthHTileDataBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_HTILE_DATA_BASE_HI] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + auto base = cp->GetCtx()->GetDepthRenderTarget().htile_data_base_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetCtx()->SetDepthHTileDataBase(base); + }; + + g_hw_ctx_indirect_func[Pm4::DB_DEPTH_VIEW] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::DepthDepthView r; + r.slice_start = KYTY_PM4_GET(value, DB_DEPTH_VIEW, SLICE_START) + (KYTY_PM4_GET(value, DB_DEPTH_VIEW, SLICE_START_HI) << 11u); + r.slice_max = KYTY_PM4_GET(value, DB_DEPTH_VIEW, SLICE_MAX) + (KYTY_PM4_GET(value, DB_DEPTH_VIEW, SLICE_MAX_HI) << 11u); + r.depth_write_disable = KYTY_PM4_GET(value, DB_DEPTH_VIEW, Z_READ_ONLY) != 0; + r.stencil_write_disable = KYTY_PM4_GET(value, DB_DEPTH_VIEW, STENCIL_READ_ONLY) != 0; + r.current_mip_level = KYTY_PM4_GET(value, DB_DEPTH_VIEW, MIPID); + cp->GetCtx()->SetDepthDepthView(r); + }; + + g_hw_ctx_indirect_func[Pm4::DB_DEPTH_SIZE_XY] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::DepthDepthSizeXY r; + r.x_max = KYTY_PM4_GET(value, DB_DEPTH_SIZE_XY, X_MAX); + r.y_max = KYTY_PM4_GET(value, DB_DEPTH_SIZE_XY, Y_MAX); + cp->GetCtx()->SetDepthDepthSizeXY(r); + }; + + g_hw_ctx_indirect_func[Pm4::DB_DEPTH_CLEAR] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetDepthClearValue(*reinterpret_cast(&value)); }; + g_hw_ctx_indirect_func[Pm4::DB_STENCIL_CLEAR] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { cp->GetCtx()->SetStencilClearValue(KYTY_PM4_GET(value, DB_STENCIL_CLEAR, CLEAR)); }; + g_hw_ctx_indirect_func[Pm4::PA_CL_VTE_CNTL] = [](KYTY_HW_CTX_INDIRECT_ARGS) { cp->GetCtx()->SetViewportTransformControl(value); }; + + g_hw_ctx_indirect_func[Pm4::PA_SC_MODE_CNTL_0] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::ScanModeControl r; + r.msaa_enable = KYTY_PM4_GET(value, PA_SC_MODE_CNTL_0, MSAA_ENABLE) != 0; + r.vport_scissor_enable = KYTY_PM4_GET(value, PA_SC_MODE_CNTL_0, VPORT_SCISSOR_ENABLE) != 0; + r.line_stipple_enable = KYTY_PM4_GET(value, PA_SC_MODE_CNTL_0, LINE_STIPPLE_ENABLE) != 0; + cp->GetCtx()->SetScanModeControl(r); + }; + + g_hw_ctx_indirect_func[Pm4::DB_DEPTH_CONTROL] = [](KYTY_HW_CTX_INDIRECT_ARGS) + { + HW::DepthControl r; + r.stencil_enable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, STENCIL_ENABLE) != 0; + r.z_enable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, Z_ENABLE) != 0; + r.z_write_enable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, Z_WRITE_ENABLE) != 0; + r.depth_bounds_enable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, DEPTH_BOUNDS_ENABLE) != 0; + r.zfunc = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, ZFUNC); + r.backface_enable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, BACKFACE_ENABLE) != 0; + r.stencilfunc = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, STENCILFUNC); + r.stencilfunc_bf = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, STENCILFUNC_BF); + r.color_writes_on_depth_fail_enable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, ENABLE_COLOR_WRITES_ON_DEPTH_FAIL) != 0; + r.color_writes_on_depth_pass_disable = KYTY_PM4_GET(value, DB_DEPTH_CONTROL, DISABLE_COLOR_WRITES_ON_DEPTH_PASS) != 0; + cp->GetCtx()->SetDepthControl(r); + }; +} + +static void graphics_init_jmp_tables_sh_indirect() +{ + for (auto& func: g_hw_sh_indirect_func) + { + func = nullptr; + } + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_LO_ES] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + auto base = cp->GetShCtx()->GetVs().es_regs.data_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetShCtx()->SetEsShaderBase(base); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_HI_ES] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + auto base = cp->GetShCtx()->GetVs().es_regs.data_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetShCtx()->SetEsShaderBase(base); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_CHKSUM_GS] = [](KYTY_HW_SH_INDIRECT_ARGS) { cp->GetShCtx()->SetGsShaderChksum(value); }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_RSRC1_GS] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + HW::GsShaderResource1 r1; + r1.vgprs = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, VGPRS); + r1.sgprs = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, SGPRS); + r1.priority = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, PRIORITY); + r1.float_mode = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, FLOAT_MODE); + r1.dx10_clamp = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, DX10_CLAMP) != 0; + r1.debug_mode = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, DEBUG_MODE) != 0; + r1.ieee_mode = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, IEEE_MODE) != 0; + r1.cu_group_enable = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, CU_GROUP_ENABLE) != 0; + r1.require_forward_progress = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, FWD_PROGRESS) != 0; + r1.lds_configuration = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, WGP_MODE) != 0; + r1.gs_vgpr_component_count = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, GS_VGPR_COMP_CNT); + r1.fp16_overflow = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_GS, FP16_OVFL) != 0; + cp->GetShCtx()->SetGsShaderResource1(r1); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_RSRC2_GS] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + HW::GsShaderResource2 r2; + r2.scratch_en = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, SCRATCH_EN) != 0; + r2.user_sgpr = + KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR) + (KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, USER_SGPR_MSB) << 5u); + r2.es_vgpr_component_count = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, ES_VGPR_COMP_CNT); + r2.offchip_lds = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, OC_LDS_EN) != 0; + r2.lds_size = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, LDS_SIZE); + r2.shared_vgprs = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_GS, SHARED_VGPR_CNT); + cp->GetShCtx()->SetGsShaderResource2(r2); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_LO_PS] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + auto base = cp->GetShCtx()->GetPs().ps_regs.data_addr; + base &= 0xFFFFFF00000000FFull; + base |= static_cast(value) << 8u; + cp->GetShCtx()->SetPsShaderBase(base); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_HI_PS] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + auto base = cp->GetShCtx()->GetPs().ps_regs.data_addr; + base &= 0xFFFF00FFFFFFFFFFull; + base |= (static_cast(value) & 0xffu) << 40u; + cp->GetShCtx()->SetPsShaderBase(base); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_CHKSUM_PS] = [](KYTY_HW_SH_INDIRECT_ARGS) { cp->GetShCtx()->SetPsShaderChksum(value); }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_RSRC1_PS] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + HW::PsShaderResource1 r1; + r1.vgprs = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, VGPRS); + r1.sgprs = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, SGPRS); + r1.priority = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, PRIORITY); + r1.float_mode = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, FLOAT_MODE); + r1.dx10_clamp = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, DX10_CLAMP) != 0; + r1.debug_mode = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, DEBUG_MODE) != 0; + r1.ieee_mode = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, IEEE_MODE) != 0; + r1.cu_group_disable = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, CU_GROUP_DISABLE) != 0; + r1.require_forward_progress = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, FWD_PROGRESS) != 0; + r1.fp16_overflow = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC1_PS, FP16_OVFL) != 0; + cp->GetShCtx()->SetPsShaderResource1(r1); + }; + + g_hw_sh_indirect_func[Pm4::SPI_SHADER_PGM_RSRC2_PS] = [](KYTY_HW_SH_INDIRECT_ARGS) + { + HW::PsShaderResource2 r2; + r2.scratch_en = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, SCRATCH_EN); + r2.user_sgpr = + KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, USER_SGPR) + (KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, USER_SGPR_MSB) << 5u); + r2.wave_cnt_en = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, WAVE_CNT_EN); + r2.extra_lds_size = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, EXTRA_LDS_SIZE); + r2.raster_ordered_shading = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, LOAD_INTRAWAVE_COLLISION); + r2.shared_vgprs = KYTY_PM4_GET(value, SPI_SHADER_PGM_RSRC2_PS, SHARED_VGPR_CNT); + cp->GetShCtx()->SetPsShaderResource2(r2); + }; +} + +static void graphics_init_jmp_tables_uc_indirect() +{ + for (auto& func: g_hw_uc_indirect_func) + { + func = nullptr; + } + + g_hw_uc_indirect_func[Pm4::GE_CNTL] = [](KYTY_HW_UC_INDIRECT_ARGS) + { + HW::GeControl r; + r.primitive_group_size = KYTY_PM4_GET(value, GE_CNTL, PRIM_GRP_SIZE); + r.vertex_group_size = KYTY_PM4_GET(value, GE_CNTL, VERT_GRP_SIZE); + cp->GetUcfg()->SetGeControl(r); + }; + + g_hw_uc_indirect_func[Pm4::GE_USER_VGPR_EN] = [](KYTY_HW_UC_INDIRECT_ARGS) + { + HW::GeUserVgprEn r; + r.vgpr1 = KYTY_PM4_GET(value, GE_USER_VGPR_EN, EN_USER_VGPR1) != 0; + r.vgpr2 = KYTY_PM4_GET(value, GE_USER_VGPR_EN, EN_USER_VGPR2) != 0; + r.vgpr3 = KYTY_PM4_GET(value, GE_USER_VGPR_EN, EN_USER_VGPR3) != 0; + cp->GetUcfg()->SetGeUserVgprEn(r); + }; + + g_hw_uc_indirect_func[Pm4::VGT_PRIMITIVE_TYPE] = [](KYTY_HW_UC_INDIRECT_ARGS) + { + uint32_t prim_type = KYTY_PM4_GET(value, VGT_PRIMITIVE_TYPE, PRIM_TYPE); + cp->GetUcfg()->SetPrimitiveType(prim_type); + }; +} + static void graphics_init_jmp_tables() { for (auto& func: g_hw_ctx_func) @@ -3145,6 +4141,7 @@ static void graphics_init_jmp_tables() g_hw_sh_func[Pm4::SPI_SHADER_USER_DATA_VS_0 + slot * 1] = hw_sh_set_vs_user_sgpr; g_hw_sh_func[Pm4::SPI_SHADER_USER_DATA_PS_0 + slot * 1] = hw_sh_set_ps_user_sgpr; g_hw_sh_func[Pm4::COMPUTE_USER_DATA_0 + slot * 1] = hw_sh_set_cs_user_sgpr; + g_hw_sh_func[Pm4::SPI_SHADER_USER_DATA_GS_0 + slot * 1] = hw_sh_set_gs_user_sgpr; } for (auto& func: g_hw_uc_func) @@ -3205,12 +4202,23 @@ static void graphics_init_jmp_tables() g_cp_op_custom_func[Pm4::R_DRAW_INDEX_AUTO] = cp_op_draw_index_auto; g_cp_op_custom_func[Pm4::R_DISPATCH_DIRECT] = cp_op_dispatch_direct; g_cp_op_custom_func[Pm4::R_DISPATCH_RESET] = cp_op_dispatch_reset; - g_cp_op_custom_func[Pm4::R_WAIT_MEM_32] = cp_op_wait_on_address; + g_cp_op_custom_func[Pm4::R_WAIT_MEM_32] = cp_op_wait_reg_mem_32; g_cp_op_custom_func[Pm4::R_DRAW_RESET] = cp_op_draw_reset; g_cp_op_custom_func[Pm4::R_WAIT_FLIP_DONE] = cp_op_wait_flip_done; g_cp_op_custom_func[Pm4::R_PUSH_MARKER] = cp_op_push_marker; g_cp_op_custom_func[Pm4::R_POP_MARKER] = cp_op_pop_marker; g_cp_op_custom_func[Pm4::R_CX_REGS_INDIRECT] = cp_op_indirect_cx_regs; + g_cp_op_custom_func[Pm4::R_SH_REGS_INDIRECT] = cp_op_indirect_sh_regs; + g_cp_op_custom_func[Pm4::R_UC_REGS_INDIRECT] = cp_op_indirect_uc_regs; + g_cp_op_custom_func[Pm4::R_ACQUIRE_MEM] = cp_op_acquire_mem; + g_cp_op_custom_func[Pm4::R_WRITE_DATA] = cp_op_write_data; + g_cp_op_custom_func[Pm4::R_WAIT_MEM_64] = cp_op_wait_reg_mem_64; + g_cp_op_custom_func[Pm4::R_FLIP] = cp_op_flip; + g_cp_op_custom_func[Pm4::R_RELEASE_MEM] = cp_op_release_mem; + + graphics_init_jmp_tables_cx_indirect(); + graphics_init_jmp_tables_sh_indirect(); + graphics_init_jmp_tables_uc_indirect(); } } // namespace Kyty::Libs::Graphics diff --git a/source/emulator/src/Graphics/Objects/DepthStencilBuffer.cpp b/source/emulator/src/Graphics/Objects/DepthStencilBuffer.cpp index ae7d395..662a52f 100644 --- a/source/emulator/src/Graphics/Objects/DepthStencilBuffer.cpp +++ b/source/emulator/src/Graphics/Objects/DepthStencilBuffer.cpp @@ -31,7 +31,8 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint auto pixel_format = static_cast(params[DepthStencilBufferObject::PARAM_FORMAT]); auto width = params[DepthStencilBufferObject::PARAM_WIDTH]; auto height = params[DepthStencilBufferObject::PARAM_HEIGHT]; - bool htile = params[DepthStencilBufferObject::PARAM_HTILE] != 1; + bool htile = params[DepthStencilBufferObject::PARAM_HTILE] != 0; + bool sampled = params[DepthStencilBufferObject::PARAM_USAGE] == 1; EXIT_NOT_IMPLEMENTED(pixel_format == VK_FORMAT_UNDEFINED); EXIT_NOT_IMPLEMENTED(width == 0); @@ -50,7 +51,7 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint view = nullptr; } - vk_obj->compressed = htile; + vk_obj->compressed = !htile; VkImageCreateInfo image_info {}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -66,7 +67,7 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.initialLayout = vk_obj->layout; image_info.usage = static_cast(VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) | - static_cast(VK_IMAGE_USAGE_SAMPLED_BIT); + (sampled ? static_cast(VK_IMAGE_USAGE_SAMPLED_BIT) : static_cast(0)); image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.samples = VK_SAMPLE_COUNT_1_BIT; @@ -160,15 +161,10 @@ static void delete_func(GraphicContext* ctx, void* obj, VulkanMemory* mem) bool DepthStencilBufferObject::Equal(const uint64_t* other) const { return (params[PARAM_FORMAT] == other[PARAM_FORMAT] && params[PARAM_WIDTH] == other[PARAM_WIDTH] && - params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && params[PARAM_HTILE] == other[PARAM_HTILE]); + params[PARAM_HEIGHT] == other[PARAM_HEIGHT] && params[PARAM_HTILE] == other[PARAM_HTILE] && + params[PARAM_NEO] == other[PARAM_NEO] && params[PARAM_USAGE] == other[PARAM_USAGE]); } -// bool DepthStencilBufferObject::Reuse(const uint64_t* other) const -//{ -// return (params[PARAM_FORMAT] == other[PARAM_FORMAT] && params[PARAM_WIDTH] <= other[PARAM_WIDTH] && -// params[PARAM_HEIGHT] <= other[PARAM_HEIGHT] && params[PARAM_HTILE] == other[PARAM_HTILE]); -//} - GpuObject::create_func_t DepthStencilBufferObject::GetCreateFunc() const { return create_func; diff --git a/source/emulator/src/Graphics/Shader.cpp b/source/emulator/src/Graphics/Shader.cpp index 13760df..190fdc9 100644 --- a/source/emulator/src/Graphics/Shader.cpp +++ b/source/emulator/src/Graphics/Shader.cpp @@ -2088,20 +2088,59 @@ KYTY_SHADER_PARSER(shader_parse) return ptr - src; } -static void vs_print(const char* func, const HW::VsStageRegisters& vs, const HW::ShaderRegisters& sh) +static void vs_print(const char* func, const HW::VertexShaderInfo& vs, const HW::ShaderRegisters& sh) { printf("%s\n", func); - printf("\t GetGpuAddress() = 0x%016" PRIx64 "\n", vs.GetGpuAddress()); - printf("\t GetStreamoutEnabled() = %s\n", vs.GetStreamoutEnabled() ? "true" : "false"); - printf("\t GetSgprCount() = 0x%08" PRIx32 "\n", vs.GetSgprCount()); - printf("\t GetInputComponentsCount() = 0x%08" PRIx32 "\n", vs.GetInputComponentsCount()); - printf("\t GetUnknown1() = 0x%08" PRIx32 "\n", vs.GetUnknown1()); - printf("\t GetUnknown2() = 0x%08" PRIx32 "\n", vs.GetUnknown2()); + printf("\t vs.data_addr = 0x%016" PRIx64 "\n", vs.vs_regs.data_addr); + printf("\t es.data_addr = 0x%016" PRIx64 "\n", vs.es_regs.data_addr); + printf("\t gs.data_addr = 0x%016" PRIx64 "\n", vs.gs_regs.data_addr); + + if (vs.vs_regs.data_addr != 0) + { + printf("\t vs.vgprs = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc1.vgprs); + printf("\t vs.sgprs = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc1.sgprs); + printf("\t vs.priority = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc1.priority); + printf("\t vs.float_mode = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc1.float_mode); + printf("\t vs.dx10_clamp = %s\n", vs.vs_regs.rsrc1.dx10_clamp ? "true" : "false"); + printf("\t vs.ieee_mode = %s\n", vs.vs_regs.rsrc1.ieee_mode ? "true" : "false"); + printf("\t vs.vgpr_component_count = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc1.vgpr_component_count); + printf("\t vs.cu_group_enable = %s\n", vs.vs_regs.rsrc1.cu_group_enable ? "true" : "false"); + printf("\t vs.require_forward_progress = %s\n", vs.vs_regs.rsrc1.require_forward_progress ? "true" : "false"); + printf("\t vs.fp16_overflow = %s\n", vs.vs_regs.rsrc1.fp16_overflow ? "true" : "false"); + printf("\t vs.scratch_en = %s\n", vs.vs_regs.rsrc2.scratch_en ? "true" : "false"); + printf("\t vs.user_sgpr = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc2.user_sgpr); + printf("\t vs.offchip_lds = %s\n", vs.vs_regs.rsrc2.offchip_lds ? "true" : "false"); + printf("\t vs.streamout_enabled = %s\n", vs.vs_regs.rsrc2.streamout_enabled ? "true" : "false"); + printf("\t vs.shared_vgprs = 0x%02" PRIx8 "\n", vs.vs_regs.rsrc2.shared_vgprs); + } + + if (vs.gs_regs.data_addr != 0 || vs.es_regs.data_addr != 0) + { + printf("\t chksum = 0x%016" PRIx64 "\n", vs.gs_regs.chksum); + printf("\t gs.vgprs = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc1.vgprs); + printf("\t gs.sgprs = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc1.sgprs); + printf("\t gs.priority = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc1.priority); + printf("\t gs.float_mode = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc1.float_mode); + printf("\t gs.dx10_clamp = %s\n", vs.gs_regs.rsrc1.dx10_clamp ? "true" : "false"); + printf("\t gs.ieee_mode = %s\n", vs.gs_regs.rsrc1.ieee_mode ? "true" : "false"); + printf("\t gs.debug_mode = %s\n", vs.gs_regs.rsrc1.debug_mode ? "true" : "false"); + printf("\t gs.lds_configuration = %s\n", vs.gs_regs.rsrc1.lds_configuration ? "true" : "false"); + printf("\t gs.cu_group_enable = %s\n", vs.gs_regs.rsrc1.cu_group_enable ? "true" : "false"); + printf("\t gs.require_forward_progress = %s\n", vs.gs_regs.rsrc1.require_forward_progress ? "true" : "false"); + printf("\t gs.fp16_overflow = %s\n", vs.gs_regs.rsrc1.fp16_overflow ? "true" : "false"); + printf("\t gs.gs_vgpr_component_count = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc1.gs_vgpr_component_count); + printf("\t gs.scratch_en = %s\n", vs.gs_regs.rsrc2.scratch_en ? "true" : "false"); + printf("\t gs.user_sgpr = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc2.user_sgpr); + printf("\t gs.offchip_lds = %s\n", vs.gs_regs.rsrc2.offchip_lds ? "true" : "false"); + printf("\t gs.shared_vgprs = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc2.shared_vgprs); + printf("\t gs.es_vgpr_component_count = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc2.es_vgpr_component_count); + printf("\t gs.lds_size = 0x%02" PRIx8 "\n", vs.gs_regs.rsrc2.lds_size); + } + printf("\t m_spiVsOutConfig = 0x%08" PRIx32 "\n", sh.m_spiVsOutConfig); printf("\t m_spiShaderPosFormat = 0x%08" PRIx32 "\n", sh.m_spiShaderPosFormat); printf("\t m_paClVsOutCntl = 0x%08" PRIx32 "\n", sh.m_paClVsOutCntl); - printf("\t m_spiShaderIdxFormat = 0x%08" PRIx32 "\n", sh.m_spiShaderIdxFormat); printf("\t m_geNggSubgrpCntl = 0x%08" PRIx32 "\n", sh.m_geNggSubgrpCntl); printf("\t m_vgtGsInstanceCnt = 0x%08" PRIx32 "\n", sh.m_vgtGsInstanceCnt); @@ -2118,21 +2157,30 @@ static void ps_print(const char* func, const HW::PsStageRegisters& ps, const HW: { printf("%s\n", func); - // printf("\t GetGpuAddress() = 0x%016" PRIx64 "\n", ps.GetGpuAddress()); - // printf("\t m_spiShaderPgmRsrc1Ps = 0x%08" PRIx32 "\n", ps.m_spiShaderPgmRsrc1Ps); - // printf("\t m_spiShaderPgmRsrc2Ps = 0x%08" PRIx32 "\n", ps.m_spiShaderPgmRsrc2Ps); - // printf("\t GetTargetOutputMode(0) = 0x%08" PRIx32 "\n", ps.GetTargetOutputMode(0)); printf("\t data_addr = 0x%016" PRIx64 "\n", ps.data_addr); + printf("\t chksum = 0x%016" PRIx64 "\n", ps.chksum); printf("\t conservative_z_export_value = 0x%08" PRIx32 "\n", sh.db_shader_control.conservative_z_export_value); printf("\t shader_z_behavior = 0x%08" PRIx32 "\n", sh.db_shader_control.shader_z_behavior); printf("\t shader_kill_enable = %s\n", sh.db_shader_control.shader_kill_enable ? "true" : "false"); printf("\t shader_z_export_enable = %s\n", sh.db_shader_control.shader_z_export_enable ? "true" : "false"); printf("\t shader_execute_on_noop = %s\n", sh.db_shader_control.shader_execute_on_noop ? "true" : "false"); - printf("\t vgprs = 0x%02" PRIx8 "\n", ps.vgprs); - printf("\t sgprs = 0x%02" PRIx8 "\n", ps.sgprs); - printf("\t scratch_en = 0x%02" PRIx8 "\n", ps.scratch_en); - printf("\t user_sgpr = 0x%02" PRIx8 "\n", ps.user_sgpr); - printf("\t wave_cnt_en = 0x%02" PRIx8 "\n", ps.wave_cnt_en); + printf("\t vgprs = 0x%02" PRIx8 "\n", ps.rsrc1.vgprs); + printf("\t sgprs = 0x%02" PRIx8 "\n", ps.rsrc1.sgprs); + printf("\t priority = 0x%02" PRIx8 "\n", ps.rsrc1.priority); + printf("\t float_mode = 0x%02" PRIx8 "\n", ps.rsrc1.float_mode); + printf("\t dx10_clamp = %s\n", ps.rsrc1.dx10_clamp ? "true" : "false"); + printf("\t debug_mode = %s\n", ps.rsrc1.debug_mode ? "true" : "false"); + printf("\t ieee_mode = %s\n", ps.rsrc1.ieee_mode ? "true" : "false"); + printf("\t cu_group_disable = %s\n", ps.rsrc1.cu_group_disable ? "true" : "false"); + printf("\t require_forward_progress = %s\n", ps.rsrc1.require_forward_progress ? "true" : "false"); + printf("\t fp16_overflow = %s\n", ps.rsrc1.fp16_overflow ? "true" : "false"); + printf("\t scratch_en = %s\n", ps.rsrc2.scratch_en ? "true" : "false"); + printf("\t user_sgpr = 0x%02" PRIx8 "\n", ps.rsrc2.user_sgpr); + printf("\t wave_cnt_en = %s\n", ps.rsrc2.wave_cnt_en ? "true" : "false"); + printf("\t extra_lds_size = 0x%02" PRIx8 "\n", ps.rsrc2.extra_lds_size); + printf("\t raster_ordered_shading = %s\n", ps.rsrc2.raster_ordered_shading ? "true" : "false"); + printf("\t shared_vgprs = 0x%02" PRIx8 "\n", ps.rsrc2.shared_vgprs); + printf("\t shader_z_format = 0x%08" PRIx32 "\n", sh.shader_z_format); printf("\t target_output_mode[0] = 0x%02" PRIx8 "\n", sh.target_output_mode[0]); printf("\t ps_input_ena = 0x%08" PRIx32 "\n", sh.ps_input_ena); @@ -2196,14 +2244,43 @@ static void bi_print(const char* func, const ShaderBinaryInfo& bi) printf("\t crc32 = 0x%08" PRIx32 "\n", bi.crc32); } -static void vs_check(const HW::VsStageRegisters& vs, const HW::ShaderRegisters& sh) +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +static void vs_check(const HW::VertexShaderInfo& vs, const HW::ShaderRegisters& sh) { - EXIT_NOT_IMPLEMENTED(vs.GetStreamoutEnabled() != false); - // EXIT_NOT_IMPLEMENTED(vs.GetSgprCount() != 0x00000000); - // EXIT_NOT_IMPLEMENTED(vs.GetInputComponentsCount() != 0x00000003); - // EXIT_NOT_IMPLEMENTED(vs.GetUnknown1() != 0x002c0000); - // EXIT_NOT_IMPLEMENTED(vs.GetUnknown2() != 0x00000000); - // EXIT_NOT_IMPLEMENTED(vs.m_spiVsOutConfig != 0x00000000); + if (vs.vs_regs.data_addr != 0) + { + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.priority != 0); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.float_mode != 192); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.dx10_clamp != true); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.ieee_mode != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.cu_group_enable != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.require_forward_progress != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc1.fp16_overflow != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc2.scratch_en != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc2.offchip_lds != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc2.streamout_enabled != false); + EXIT_NOT_IMPLEMENTED(vs.vs_regs.rsrc2.shared_vgprs != 0); + } + + if (vs.es_regs.data_addr != 0 || vs.gs_regs.data_addr != 0) + { + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.priority != 0); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.float_mode != 192); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.dx10_clamp != true); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.debug_mode != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.ieee_mode != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.cu_group_enable != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.require_forward_progress != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.lds_configuration != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.gs_vgpr_component_count != 0); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.fp16_overflow != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.scratch_en != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.offchip_lds != false); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.es_vgpr_component_count != 0); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.lds_size != 0); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.shared_vgprs != 0); + } + EXIT_NOT_IMPLEMENTED(sh.m_spiShaderPosFormat != 0x00000004); EXIT_NOT_IMPLEMENTED(sh.m_paClVsOutCntl != 0x00000000); @@ -2231,9 +2308,21 @@ static void ps_check(const HW::PsStageRegisters& ps, const HW::ShaderRegisters& // EXIT_NOT_IMPLEMENTED(ps.m_spiShaderPgmRsrc2Ps != 0x00000000); // EXIT_NOT_IMPLEMENTED(ps.vgprs != 0x00 && ps.vgprs != 0x01); // EXIT_NOT_IMPLEMENTED(ps.sgprs != 0x00 && ps.sgprs != 0x01); - EXIT_NOT_IMPLEMENTED(ps.scratch_en != 0); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.priority != 0); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.float_mode != 192); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.dx10_clamp != true); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.debug_mode != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.ieee_mode != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.cu_group_disable != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.require_forward_progress != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc1.fp16_overflow != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc2.scratch_en != false); // EXIT_NOT_IMPLEMENTED(ps.user_sgpr != 0 && ps.user_sgpr != 4 && ps.user_sgpr != 12); - EXIT_NOT_IMPLEMENTED(ps.wave_cnt_en != 0); + EXIT_NOT_IMPLEMENTED(ps.rsrc2.wave_cnt_en != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc2.extra_lds_size != 0); + EXIT_NOT_IMPLEMENTED(ps.rsrc2.raster_ordered_shading != false); + EXIT_NOT_IMPLEMENTED(ps.rsrc2.shared_vgprs != 0); + EXIT_NOT_IMPLEMENTED(sh.shader_z_format != 0x00000000); EXIT_NOT_IMPLEMENTED(sh.ps_input_ena != 0x00000002 && sh.ps_input_ena != 0x00000302); EXIT_NOT_IMPLEMENTED(sh.ps_input_addr != 0x00000002 && sh.ps_input_addr != 0x00000302); @@ -2756,7 +2845,7 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi return; } - const auto* src = reinterpret_cast(regs->vs_regs.GetGpuAddress()); + const auto* src = reinterpret_cast(regs->vs_regs.data_addr); auto usages = GetUsageSlots(src); @@ -3400,12 +3489,14 @@ ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegis code.SetVsEmbeddedId(regs->vs_embedded_id); } else { - const auto* src = reinterpret_cast(regs->vs_regs.GetGpuAddress()); + const auto* src = reinterpret_cast(regs->vs_regs.data_addr); EXIT_NOT_IMPLEMENTED(src == nullptr); - vs_print("ShaderParseVS()", regs->vs_regs, *sh); - vs_check(regs->vs_regs, *sh); + vs_print("ShaderParseVS()", *regs, *sh); + vs_check(*regs, *sh); + + EXIT_NOT_IMPLEMENTED(regs->vs_regs.rsrc2.user_sgpr > regs->vs_user_sgpr.count); const auto* header = GetBinaryInfo(src); @@ -3489,7 +3580,7 @@ ShaderCode ShaderParsePS(const HW::PixelShaderInfo* regs, const HW::ShaderRegist ps_print("ShaderParsePS()", regs->ps_regs, *sh); ps_check(regs->ps_regs, *sh); - EXIT_NOT_IMPLEMENTED(regs->ps_regs.user_sgpr > regs->ps_user_sgpr.count); + EXIT_NOT_IMPLEMENTED(regs->ps_regs.rsrc2.user_sgpr > regs->ps_user_sgpr.count); const auto* header = GetBinaryInfo(src); @@ -3853,7 +3944,7 @@ ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInput return ret; } - const auto* src = reinterpret_cast(regs->vs_regs.GetGpuAddress()); + const auto* src = reinterpret_cast(regs->vs_regs.data_addr); EXIT_NOT_IMPLEMENTED(src == nullptr); @@ -3999,6 +4090,15 @@ bool ShaderIsDisabled(uint64_t addr) return disabled; } +bool ShaderIsDisabled2(uint64_t addr, uint64_t chksum) +{ + bool disabled = (g_disabled_shaders != nullptr && g_disabled_shaders->Contains(chksum)); + + printf("Shader 0x%016" PRIx64 ": id = 0x%016" PRIx64 " - %s\n", addr, chksum, (disabled ? "disabled" : "enabled")); + + return disabled; +} + void ShaderDisable(uint64_t id) { if (g_disabled_shaders == nullptr)