From 90acd3d3642351bf1075ed754ca3feb9acb70e26 Mon Sep 17 00:00:00 2001 From: InoriRus Date: Tue, 16 Aug 2022 13:38:38 +0300 Subject: [PATCH] PS5 graphics (#42) --- appveyor.yml | 2 +- source/CMakeLists.txt | 2 +- source/emulator/include/Emulator/Config.h | 3 + .../include/Emulator/Graphics/Graphics.h | 6 +- .../Emulator/Graphics/HardwareContext.h | 4 +- .../Graphics/Objects/StorageTexture.h | 6 +- .../Emulator/Graphics/Objects/Texture.h | 8 +- .../include/Emulator/Graphics/Shader.h | 292 +- .../include/Emulator/Graphics/ShaderParse.h | 21 + .../include/Emulator/Graphics/ShaderSpirv.h | 10 +- .../emulator/include/Emulator/Graphics/Tile.h | 4 +- .../emulator/include/Emulator/Kernel/Memory.h | 4 + source/emulator/include/Emulator/Libs/Errno.h | 13 + source/emulator/include/Emulator/Libs/Libs.h | 5 +- .../include/Emulator/Loader/VirtualMemory.h | 1 + source/emulator/src/Config.cpp | 13 +- source/emulator/src/Graphics/Graphics.cpp | 166 +- .../emulator/src/Graphics/GraphicsRender.cpp | 562 ++- source/emulator/src/Graphics/GraphicsRun.cpp | 62 +- .../src/Graphics/Objects/StorageTexture.cpp | 51 +- .../emulator/src/Graphics/Objects/Texture.cpp | 135 +- source/emulator/src/Graphics/Shader.cpp | 2996 +++++--------- source/emulator/src/Graphics/ShaderParse.cpp | 3350 ++++++++++++++++ source/emulator/src/Graphics/ShaderSpirv.cpp | 3559 ++++++++++------- .../Graphics/Tables/TileTextureInfo_0_56.inc | 2373 +++++++++++ source/emulator/src/Graphics/Tile.cpp | 231 +- source/emulator/src/Graphics/VideoOut.cpp | 4 +- source/emulator/src/Kernel/Memory.cpp | 51 + source/emulator/src/Kyty.cpp | 26 + source/emulator/src/Libs/LibDbgAsan.cpp | 251 ++ .../emulator/src/Libs/LibGraphicsDriver.cpp | 1 + source/emulator/src/Libs/LibKernel.cpp | 23 +- source/emulator/src/Libs/Libs.cpp | 25 +- source/emulator/src/Loader/RuntimeLinker.cpp | 16 +- source/emulator/src/Loader/VirtualMemory.cpp | 19 +- source/include/Kyty/Core/MagicEnum.h | 19 + source/include/Kyty/Core/String8.h | 182 + .../forms/configuration_edit_dialog.ui | 37 +- source/launcher/include/Configuration.h | 24 +- .../include/ConfigurationEditDialog.h | 2 +- .../launcher/src/ConfigurationEditDialog.cpp | 88 +- source/launcher/src/MainDialog.cpp | 27 +- source/launcher/ts/launcher_ru_RU.ts | 32 +- source/lib/Core/src/String8.cpp | 1157 ++++++ source/unit_test/src/UnitTest.cpp | 2 + .../src/core/UnitTestCoreString8.cpp | 739 ++++ 46 files changed, 12642 insertions(+), 3962 deletions(-) create mode 100644 source/emulator/include/Emulator/Graphics/ShaderParse.h create mode 100644 source/emulator/src/Graphics/ShaderParse.cpp create mode 100644 source/emulator/src/Graphics/Tables/TileTextureInfo_0_56.inc create mode 100644 source/emulator/src/Libs/LibDbgAsan.cpp create mode 100644 source/include/Kyty/Core/String8.h create mode 100644 source/lib/Core/src/String8.cpp create mode 100644 source/unit_test/src/core/UnitTestCoreString8.cpp diff --git a/appveyor.yml b/appveyor.yml index a0627e4..c62ca9a 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.1.11.build-{build} +version: 0.1.12.build-{build} image: Visual Studio 2019 environment: matrix: diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 6ea50c0..8dd52ad 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -82,7 +82,7 @@ if (KYTY_LINKER STREQUAL LD) set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000") endif() -project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.11) +project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.12) include(src_script.cmake) diff --git a/source/emulator/include/Emulator/Config.h b/source/emulator/include/Emulator/Config.h index f80f05b..de1e3e2 100644 --- a/source/emulator/include/Emulator/Config.h +++ b/source/emulator/include/Emulator/Config.h @@ -41,9 +41,12 @@ enum class ProfilerDirection void Load(const Scripts::ScriptVar& cfg); +void SetNextGen(bool mode); + uint32_t GetScreenWidth(); uint32_t GetScreenHeight(); bool IsNeo(); +bool IsNextGen(); bool VulkanValidationEnabled(); bool ShaderValidationEnabled(); diff --git a/source/emulator/include/Emulator/Graphics/Graphics.h b/source/emulator/include/Emulator/Graphics/Graphics.h index 20ccc23..7379f21 100644 --- a/source/emulator/include/Emulator/Graphics/Graphics.h +++ b/source/emulator/include/Emulator/Graphics/Graphics.h @@ -11,6 +11,9 @@ namespace Kyty::Libs::Graphics { +struct Shader; +struct ShaderRegister; + KYTY_SUBSYSTEM_DEFINE(Graphics); void GraphicsDbgDumpDcb(const char* type, uint32_t num_dw, uint32_t* cmd_buffer); @@ -69,9 +72,7 @@ int KYTY_SYSV_ABI GraphicsUnregisterResource(uint32_t resource_handle); namespace Gen5 { -struct Shader; struct CommandBuffer; -struct ShaderRegister; struct Label; int KYTY_SYSV_ABI GraphicsInit(uint32_t* state, uint32_t ver); @@ -97,6 +98,7 @@ uint32_t* KYTY_SYSV_ABI GraphicsCbReleaseMem(CommandBuffer* buf, uint8_t action, uint16_t gds_size, uint8_t interrupt, uint32_t interrupt_ctx_id); uint32_t* KYTY_SYSV_ABI GraphicsDcbResetQueue(CommandBuffer* buf, uint32_t op, uint32_t state); uint32_t* KYTY_SYSV_ABI GraphicsDcbWaitUntilSafeForRendering(CommandBuffer* buf, uint32_t video_out_handle, uint32_t display_buffer_index); +uint32_t* KYTY_SYSV_ABI GraphicsDcbSetShRegisterDirect(CommandBuffer* buf, ShaderRegister reg); uint32_t* KYTY_SYSV_ABI GraphicsDcbSetCxRegistersIndirect(CommandBuffer* buf, const volatile ShaderRegister* regs, uint32_t num_regs); uint32_t* KYTY_SYSV_ABI GraphicsDcbSetShRegistersIndirect(CommandBuffer* buf, const volatile ShaderRegister* regs, uint32_t num_regs); uint32_t* KYTY_SYSV_ABI GraphicsDcbSetUcRegistersIndirect(CommandBuffer* buf, const volatile ShaderRegister* regs, uint32_t num_regs); diff --git a/source/emulator/include/Emulator/Graphics/HardwareContext.h b/source/emulator/include/Emulator/Graphics/HardwareContext.h index befcee0..14aa177 100644 --- a/source/emulator/include/Emulator/Graphics/HardwareContext.h +++ b/source/emulator/include/Emulator/Graphics/HardwareContext.h @@ -353,7 +353,7 @@ struct ColorControl struct ScanModeControl { bool msaa_enable = false; - bool vport_scissor_enable = false; + bool vport_scissor_enable = true; bool line_stipple_enable = false; }; @@ -401,7 +401,7 @@ struct Viewport struct ScreenViewport { Viewport viewports[15]; - uint32_t transform_control = 0; + uint32_t transform_control = 1087; int screen_scissor_left = 0; int screen_scissor_top = 0; int screen_scissor_right = 0; diff --git a/source/emulator/include/Emulator/Graphics/Objects/StorageTexture.h b/source/emulator/include/Emulator/Graphics/Objects/StorageTexture.h index eafd314..cbfb382 100644 --- a/source/emulator/include/Emulator/Graphics/Objects/StorageTexture.h +++ b/source/emulator/include/Emulator/Graphics/Objects/StorageTexture.h @@ -13,7 +13,7 @@ namespace Kyty::Libs::Graphics { class StorageTextureObject: public GpuObject { public: - static constexpr int PARAM_DFMT_NFMT = 0; + static constexpr int PARAM_FORMAT = 0; static constexpr int PARAM_PITCH = 1; static constexpr int PARAM_WIDTH_HEIGHT = 2; static constexpr int PARAM_LEVELS = 3; @@ -21,10 +21,10 @@ public: static constexpr int PARAM_NEO = 5; static constexpr int PARAM_SWIZZLE = 6; - StorageTextureObject(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t base_level, + StorageTextureObject(uint8_t dfmt, uint8_t nfmt, uint16_t fmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t base_level, uint32_t levels, uint32_t tile, bool neo, uint32_t swizzle) { - params[PARAM_DFMT_NFMT] = (static_cast(dfmt) << 32u) | nfmt; + params[PARAM_FORMAT] = (static_cast(fmt) << 16u) | (static_cast(dfmt) << 8u) | nfmt; params[PARAM_PITCH] = pitch; params[PARAM_WIDTH_HEIGHT] = (static_cast(width) << 32u) | height; params[PARAM_LEVELS] = (static_cast(base_level) << 32u) | levels; diff --git a/source/emulator/include/Emulator/Graphics/Objects/Texture.h b/source/emulator/include/Emulator/Graphics/Objects/Texture.h index 7aebace..518f33e 100644 --- a/source/emulator/include/Emulator/Graphics/Objects/Texture.h +++ b/source/emulator/include/Emulator/Graphics/Objects/Texture.h @@ -13,7 +13,7 @@ namespace Kyty::Libs::Graphics { class TextureObject: public GpuObject { public: - static constexpr int PARAM_DFMT_NFMT = 0; + static constexpr int PARAM_FORMAT = 0; static constexpr int PARAM_PITCH = 1; static constexpr int PARAM_WIDTH_HEIGHT = 2; static constexpr int PARAM_LEVELS = 3; @@ -21,10 +21,10 @@ public: static constexpr int PARAM_NEO = 5; static constexpr int PARAM_SWIZZLE = 6; - TextureObject(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t base_level, uint32_t levels, - uint32_t tile, bool neo, uint32_t swizzle) + TextureObject(uint8_t dfmt, uint8_t nfmt, uint16_t fmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t base_level, + uint32_t levels, uint32_t tile, bool neo, uint32_t swizzle) { - params[PARAM_DFMT_NFMT] = (static_cast(dfmt) << 32u) | nfmt; + params[PARAM_FORMAT] = (static_cast(fmt) << 16u) | (static_cast(dfmt) << 8u) | nfmt; params[PARAM_PITCH] = pitch; params[PARAM_WIDTH_HEIGHT] = (static_cast(width) << 32u) | height; params[PARAM_LEVELS] = (static_cast(base_level) << 32u) | levels; diff --git a/source/emulator/include/Emulator/Graphics/Shader.h b/source/emulator/include/Emulator/Graphics/Shader.h index 2f622ab..6ef0f55 100644 --- a/source/emulator/include/Emulator/Graphics/Shader.h +++ b/source/emulator/include/Emulator/Graphics/Shader.h @@ -3,6 +3,7 @@ #include "Kyty/Core/Common.h" #include "Kyty/Core/String.h" +#include "Kyty/Core/String8.h" #include "Kyty/Core/Vector.h" #include "Emulator/Common.h" @@ -29,7 +30,7 @@ enum class ShaderType Compute }; -enum class ShaderInstructionType +enum class ShaderInstructionType : uint32_t { Unknown, @@ -57,6 +58,8 @@ enum class ShaderInstructionType SAndB64, SAndn2B64, SAndSaveexecB64, + SBfeU32, + SBfeU64, SBfmB32, SBranch, SBufferLoadDword, @@ -84,20 +87,30 @@ enum class ShaderInstructionType SCselectB32, SCselectB64, SEndpgm, + SInstPrefetch, + SLoadDword, + SLoadDwordx2, SLoadDwordx4, SLoadDwordx8, + SLoadDwordx16, + SLshl4AddU32, SLshlB32, SLshrB32, + SLshlB64, + SLshrB64, SMovB32, SMovB64, SMovkI32, SMulI32, SNandB64, SNorB64, + SOrB32, SOrB64, SOrn2B64, + SSendmsg, SSetpcB64, SSwappcB64, + SSubI32, SWaitcnt, SWqmB64, SXnorB64, @@ -207,6 +220,12 @@ enum class ShaderInstructionType VSubrevI32, VTruncF32, VXorB32, + + FetchX, + FetchXy, + FetchXyz, + FetchXyzw, + ZMax }; @@ -253,6 +272,7 @@ enum FormatByte : uint64_t Param3, // param3 Param4, // param4 Mrt0, // mrt_color0 + Prim, // prim Off, // off Compr, // compr Vm, // vm @@ -292,9 +312,12 @@ enum Format : uint64_t Param3Vsrc0Vsrc1Vsrc2Vsrc3 = FormatDefine({Param3, S0, S1, S2, S3}), Param4Vsrc0Vsrc1Vsrc2Vsrc3 = FormatDefine({Param4, S0, S1, S2, S3}), Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done = FormatDefine({Pos0, S0, S1, S2, S3, Done}), + PrimVsrc0OffOffOffDone = FormatDefine({Prim, S0, Off, Off, Off, Done}), Saddr = FormatDefine({S0A2}), + SdstSbaseSoffset = FormatDefine({D, S0A2, S1}), Sdst16SvSoffset = FormatDefine({DA16, S0A4, S1}), Sdst2Ssrc02 = FormatDefine({DA2, S0A2}), + Sdst2Ssrc02Ssrc1 = FormatDefine({DA2, S0A2, S1}), Sdst2Ssrc02Ssrc12 = FormatDefine({DA2, S0A2, S1A2}), Sdst2SvSoffset = FormatDefine({DA2, S0A4, S1}), Sdst4SbaseSoffset = FormatDefine({DA4, S0A2, S1}), @@ -332,6 +355,12 @@ enum Format : uint64_t } // namespace ShaderInstructionFormat +struct ShaderInstructionTypeFormat +{ + ShaderInstructionType type = ShaderInstructionType::Unknown; + ShaderInstructionFormat::Format format = ShaderInstructionFormat::Unknown; +}; + enum class ShaderOperandType { Unknown, @@ -346,7 +375,8 @@ enum class ShaderOperandType Scc, Vgpr, Sgpr, - M0 + M0, + Null }; union ShaderConstant @@ -387,6 +417,7 @@ struct ShaderInstruction class ShaderLabel { public: + ShaderLabel(uint32_t dst, uint32_t src): m_dst(dst), m_src(src) {} explicit ShaderLabel(const ShaderInstruction& inst): m_dst(inst.pc + 4 + inst.src[0].constant.i), m_src(inst.pc) {} ~ShaderLabel() = default; KYTY_CLASS_DEFAULT_COPY(ShaderLabel); @@ -394,7 +425,7 @@ public: [[nodiscard]] uint32_t GetDst() const { return m_dst; } [[nodiscard]] uint32_t GetSrc() const { return m_src; } - [[nodiscard]] String ToString() const { return String::FromPrintf("label_%04" PRIx32 "_%04" PRIx32, m_dst, m_src); } + [[nodiscard]] String8 ToString() const { return String8::FromPrintf("label_%04" PRIx32 "_%04" PRIx32, m_dst, m_src); } void Disable() { @@ -442,12 +473,14 @@ public: Vector& GetInstructions() { return m_instructions; } [[nodiscard]] const Vector& GetLabels() const { return m_labels; } Vector& GetLabels() { return m_labels; } + [[nodiscard]] const Vector& GetIndirectLabels() const { return m_indirect_labels; } + Vector& GetIndirectLabels() { return m_indirect_labels; } [[nodiscard]] const Vector& GetDebugPrintfs() const { return m_debug_printfs; } Vector& GetDebugPrintfs() { return m_debug_printfs; } - [[nodiscard]] String DbgDump() const; + [[nodiscard]] String8 DbgDump() const; - static String DbgInstructionToStr(const ShaderInstruction& inst); + static String8 DbgInstructionToStr(const ShaderInstruction& inst); [[nodiscard]] ShaderType GetType() const { return m_type; } void SetType(ShaderType type) { this->m_type = type; } @@ -484,6 +517,7 @@ private: uint32_t m_crc32 = 0; Vector m_instructions; Vector m_labels; + Vector m_indirect_labels; ShaderType m_type = ShaderType::Unknown; Vector m_debug_printfs; uint32_t m_vs_embedded_id = 0; @@ -516,15 +550,22 @@ struct ShaderBufferResource { uint32_t fields[4] = {0}; - void UpdateAddress(uint64_t gpu_addr) + void UpdateAddress44(uint64_t gpu_addr) { auto lo = static_cast(gpu_addr & 0xffffffffu); auto hi = static_cast(gpu_addr >> 32u); fields[0] = lo; - fields[1] = (fields[1] & 0xfffff000u) | (hi & 0xfffu); + fields[1] = (fields[1] & 0xfffff000u) | (hi & 0x00000fffu); + } + + void UpdateAddress48(uint64_t gpu_addr) + { + auto lo = static_cast(gpu_addr & 0xffffffffu); + auto hi = static_cast(gpu_addr >> 32u); + fields[0] = lo; + fields[1] = (fields[1] & 0xffff0000u) | (hi & 0x0000ffffu); } - [[nodiscard]] uint64_t Base() const { return (fields[0] | (static_cast(fields[1]) << 32u)) & 0xFFFFFFFFFFFu; } [[nodiscard]] uint16_t Stride() const { return (fields[1] >> 16u) & 0x3FFFu; } [[nodiscard]] bool SwizzleEnabled() const { return ((fields[1] >> 31u) & 0x1u) == 1; } [[nodiscard]] uint32_t NumRecords() const { return fields[2]; } @@ -535,11 +576,16 @@ struct ShaderBufferResource [[nodiscard]] uint32_t DstSelXY() const { return (fields[3] >> 0u) & 0x3Fu; } [[nodiscard]] uint32_t DstSelXYZ() const { return (fields[3] >> 0u) & 0x1FFu; } [[nodiscard]] uint32_t DstSelXYZW() const { return (fields[3] >> 0u) & 0xFFFu; } - [[nodiscard]] uint8_t Nfmt() const { return (fields[3] >> 12u) & 0x7u; } - [[nodiscard]] uint8_t Dfmt() const { return (fields[3] >> 15u) & 0xFu; } [[nodiscard]] bool AddTid() const { return ((fields[3] >> 23u) & 0x1u) == 1; } - [[nodiscard]] uint8_t MemoryType() const + [[nodiscard]] uint64_t Base48() const { return (fields[0] | (static_cast(fields[1]) << 32u)) & 0xFFFFFFFFFFFFu; } + [[nodiscard]] uint8_t Format() const { return (fields[3] >> 12u) & 0x7Fu; } + [[nodiscard]] uint8_t OutOfBounds() const { return (fields[3] >> 28u) & 0x3u; } + + [[nodiscard]] uint64_t Base44() const { return (fields[0] | (static_cast(fields[1]) << 32u)) & 0x0FFFFFFFFFFFu; } + [[nodiscard]] uint8_t Nfmt() const { return (fields[3] >> 12u) & 0x7u; } + [[nodiscard]] uint8_t Dfmt() const { return (fields[3] >> 15u) & 0xFu; } + [[nodiscard]] uint8_t MemoryType() const { return ((fields[1] >> 7u) & 0x60u) | ((fields[3] >> 25u) & 0x1cu) | ((fields[1] >> 14u) & 0x3u); } @@ -549,22 +595,23 @@ struct ShaderTextureResource { uint32_t fields[8] = {0}; - void UpdateAddress(uint64_t gpu_addr) + void UpdateAddress38(uint64_t gpu_addr) { auto lo = static_cast(gpu_addr & 0xffffffffu); auto hi = static_cast(gpu_addr >> 32u); fields[0] = lo; - fields[1] = (fields[1] & 0xffffffc0u) | (hi & 0x3fu); + fields[1] = (fields[1] & 0xffffffc0u) | (hi & 0x0000003fu); + } + + void UpdateAddress40(uint64_t gpu_addr) + { + auto lo = static_cast(gpu_addr & 0xffffffffu); + auto hi = static_cast(gpu_addr >> 32u); + fields[0] = lo; + fields[1] = (fields[1] & 0xffffff00u) | (hi & 0x000000ffu); } - [[nodiscard]] uint64_t Base() const { return ((fields[0] | (static_cast(fields[1]) << 32u)) & 0x3FFFFFFFFFu) << 8u; } [[nodiscard]] uint16_t MinLod() const { return (fields[1] >> 8u) & 0xFFFu; } - [[nodiscard]] uint8_t Dfmt() const { return (fields[1] >> 20u) & 0x3Fu; } - [[nodiscard]] uint8_t Nfmt() const { return (fields[1] >> 26u) & 0xFu; } - [[nodiscard]] uint16_t Width() const { return (fields[2] >> 0u) & 0x3FFFu; } - [[nodiscard]] uint16_t Height() const { return (fields[2] >> 14u) & 0x3FFFu; } - [[nodiscard]] uint8_t PerfMod() const { return (fields[2] >> 28u) & 0x7u; } - [[nodiscard]] bool Interlaced() const { return ((fields[2] >> 31u) & 0x1u) == 1; } [[nodiscard]] uint8_t DstSelX() const { return (fields[3] >> 0u) & 0x7u; } [[nodiscard]] uint8_t DstSelY() const { return (fields[3] >> 3u) & 0x7u; } [[nodiscard]] uint8_t DstSelZ() const { return (fields[3] >> 6u) & 0x7u; } @@ -574,19 +621,49 @@ struct ShaderTextureResource [[nodiscard]] uint32_t DstSelXYZW() const { return (fields[3] >> 0u) & 0xFFFu; } [[nodiscard]] uint8_t BaseLevel() const { return (fields[3] >> 12u) & 0xFu; } [[nodiscard]] uint8_t LastLevel() const { return (fields[3] >> 16u) & 0xFu; } - [[nodiscard]] uint8_t TilingIdx() const { return (fields[3] >> 20u) & 0x1Fu; } - [[nodiscard]] bool Pow2Pad() const { return ((fields[3] >> 25u) & 0x1u) == 1; } + [[nodiscard]] uint8_t TileMode() const { return (fields[3] >> 20u) & 0x1Fu; } [[nodiscard]] uint8_t Type() const { return (fields[3] >> 28u) & 0xFu; } - [[nodiscard]] uint16_t Depth() const { return (fields[4] >> 0u) & 0x1FFFu; } + + [[nodiscard]] uint64_t Base40() const { return ((fields[0] | (static_cast(fields[1]) << 32u)) & 0xFFFFFFFFFFu) << 8u; } + [[nodiscard]] uint16_t Format() const { return (fields[1] >> 20u) & 0x1FFu; } + [[nodiscard]] uint16_t Width5() const { return ((fields[1] >> 30u) & 0x3u) | (((fields[2] >> 0u) & 0xFFFu) << 2u); } + [[nodiscard]] uint16_t Height5() const { return (fields[2] >> 14u) & 0x3FFFu; } + [[nodiscard]] uint8_t BCSwizzle() const { return (fields[3] >> 25u) & 0x7u; } + [[nodiscard]] uint16_t BaseArray5() const { return (fields[4] >> 16u) & 0x1FFFu; } + [[nodiscard]] uint8_t ArrayPitch() const { return (fields[5] >> 0u) & 0xFu; } + [[nodiscard]] uint8_t MaxMip() const { return (fields[5] >> 4u) & 0xFu; } + [[nodiscard]] uint16_t MinLodWarn5() const { return (fields[5] >> 8u) & 0xFFFu; } + [[nodiscard]] uint8_t PerfMod5() const { return (fields[5] >> 20u) & 0x7u; } + [[nodiscard]] bool CornerSample() const { return ((fields[5] >> 23u) & 0x1u) == 1; } + [[nodiscard]] bool MipStatsCntEn() const { return ((fields[5] >> 25u) & 0x1u) == 1; } + [[nodiscard]] bool PrtDefColor() const { return ((fields[5] >> 26u) & 0x1u) == 1; } + [[nodiscard]] uint8_t MipStatsCntId() const { return (fields[6] >> 0u) & 0xFFu; } + [[nodiscard]] bool MsaaDepth() const { return ((fields[6] >> 10u) & 0x1u) == 1; } + [[nodiscard]] uint8_t MaxUncompBlkSize() const { return (fields[6] >> 15u) & 0x3u; } + [[nodiscard]] uint8_t MaxCompBlkSize() const { return (fields[6] >> 17u) & 0x3u; } + [[nodiscard]] bool MetaPipeAligned() const { return ((fields[6] >> 19u) & 0x1u) == 1; } + [[nodiscard]] bool WriteCompress() const { return ((fields[6] >> 20u) & 0x1u) == 1; } + [[nodiscard]] bool MetaCompress() const { return ((fields[6] >> 21u) & 0x1u) == 1; } + [[nodiscard]] bool DccAlphaPos() const { return ((fields[6] >> 22u) & 0x1u) == 1; } + [[nodiscard]] bool DccColorTransf() const { return ((fields[6] >> 23u) & 0x1u) == 1; } + [[nodiscard]] uint64_t MetaAddr() const { return ((fields[6] >> 24u) & 0xFFu) | (static_cast(fields[7]) << 8u); } + + [[nodiscard]] uint64_t Base38() const { return ((fields[0] | (static_cast(fields[1]) << 32u)) & 0x3FFFFFFFFFu) << 8u; } + [[nodiscard]] uint8_t Dfmt() const { return (fields[1] >> 20u) & 0x3Fu; } + [[nodiscard]] uint8_t Nfmt() const { return (fields[1] >> 26u) & 0xFu; } + [[nodiscard]] uint16_t Width4() const { return (fields[2] >> 0u) & 0x3FFFu; } + [[nodiscard]] uint16_t Height4() const { return (fields[2] >> 14u) & 0x3FFFu; } + [[nodiscard]] uint8_t PerfMod() const { return (fields[2] >> 28u) & 0x7u; } + [[nodiscard]] bool Interlaced() const { return ((fields[2] >> 31u) & 0x1u) == 1; } + [[nodiscard]] bool Pow2Pad() const { return ((fields[3] >> 25u) & 0x1u) == 1; } [[nodiscard]] uint16_t Pitch() const { return (fields[4] >> 13u) & 0x3FFFu; } [[nodiscard]] uint16_t BaseArray() const { return (fields[5] >> 0u) & 0x1FFFu; } [[nodiscard]] uint16_t LastArray() const { return (fields[5] >> 13u) & 0x1FFFu; } [[nodiscard]] uint16_t MinLodWarn() const { return (fields[6] >> 0u) & 0xFFFu; } - [[nodiscard]] uint8_t CounterBankId() const { return (fields[6] >> 12u) & 0xFFu; } [[nodiscard]] bool LodHdwCntEn() const { return ((fields[6] >> 20u) & 0x1u) == 1; } - - [[nodiscard]] uint8_t MemoryType() const + [[nodiscard]] uint8_t CounterBankId() const { return (fields[6] >> 12u) & 0xFFu; } + [[nodiscard]] uint8_t MemoryType() const { return ((fields[1] >> 6u) & 0x3u) | ((fields[1] >> 30u) << 2u) | ((fields[3] & 0x04000000u) == 0 ? 0x60u : 0x10u); } @@ -605,7 +682,6 @@ struct ShaderSamplerResource [[nodiscard]] uint8_t DepthCompareFunc() const { return (fields[0] >> 12u) & 0x7u; } [[nodiscard]] bool ForceUnormCoords() const { return ((fields[0] >> 15u) & 0x1u) == 1; } [[nodiscard]] uint8_t AnisoThreshold() const { return (fields[0] >> 16u) & 0x7u; } - [[nodiscard]] bool McCoordTrunc() const { return ((fields[0] >> 19u) & 0x1u) == 1; } [[nodiscard]] bool ForceDegamma() const { return ((fields[0] >> 20u) & 0x1u) == 1; } [[nodiscard]] uint8_t AnisoBias() const { return (fields[0] >> 21u) & 0x3Fu; } [[nodiscard]] bool TruncCoord() const { return ((fields[0] >> 27u) & 0x1u) == 1; } @@ -623,6 +699,13 @@ struct ShaderSamplerResource [[nodiscard]] uint8_t MipFilter() const { return (fields[2] >> 26u) & 0x3u; } [[nodiscard]] uint16_t BorderColorPtr() const { return (fields[3] >> 0u) & 0xFFFu; } [[nodiscard]] uint8_t BorderColorType() const { return (fields[3] >> 30u) & 0x3u; } + + [[nodiscard]] bool SkipDegamma() const { return ((fields[0] >> 31u) & 0x1u) == 1; } + [[nodiscard]] bool PointPreclamp() const { return ((fields[3] >> 28u) & 0x1u) == 1; } + [[nodiscard]] bool AnisoOverride() const { return ((fields[3] >> 28u) & 0x1u) == 1; } + [[nodiscard]] bool BlendZeroPrt() const { return ((fields[3] >> 28u) & 0x1u) == 1; } + + [[nodiscard]] bool McCoordTrunc() const { return ((fields[0] >> 19u) & 0x1u) == 1; } }; struct ShaderGdsResource @@ -760,25 +843,24 @@ struct ShaderBindResources ShaderExtendedResources extended; }; -// struct ShaderBindParameters -//{ -// bool textures2d_without_sampler[ShaderTextureResources::RES_MAX] = {}; -// int textures2d_sampled_num = 0; -// int textures2d_storage_num = 0; -//}; - struct ShaderVertexInputInfo { static constexpr int RES_MAX = 16; ShaderBufferResource resources[RES_MAX]; ShaderVertexDestination resources_dst[RES_MAX]; - int resources_num = 0; - bool fetch = false; ShaderVertexInputBuffer buffers[RES_MAX]; - int buffers_num = 0; - int export_count = 0; ShaderBindResources bind; + int resources_num = 0; + int fetch_shader_reg = 0; + int fetch_attrib_reg = 0; + int fetch_buffer_reg = 0; + int buffers_num = 0; + int export_count = 0; + bool fetch_external = false; + bool fetch_embedded = false; + bool fetch_inline = false; + bool gs_prolog = false; }; struct ShaderComputeInputInfo @@ -802,23 +884,123 @@ struct ShaderPixelInputInfo ShaderBindResources bind; }; -void ShaderCalcBindingIndices(ShaderBindResources* bind); -void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderVertexInputInfo* info); -void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const HW::ShaderRegisters* sh, const ShaderVertexInputInfo* vs_info, - ShaderPixelInputInfo* ps_info); -void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderComputeInputInfo* info); -void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info); -void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info); -void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info); -ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info); -ShaderId ShaderGetIdPS(const HW::PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info); -ShaderId ShaderGetIdCS(const HW::ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info); -ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh); -ShaderCode ShaderParsePS(const HW::PixelShaderInfo* regs, const HW::ShaderRegisters* sh); -ShaderCode ShaderParseCS(const HW::ComputeShaderInfo* regs, const HW::ShaderRegisters* sh); -// ShaderBindParameters ShaderGetBindParametersVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info); -// ShaderBindParameters ShaderGetBindParametersPS(const ShaderCode& code, const ShaderPixelInputInfo* input_info); -// ShaderBindParameters ShaderGetBindParametersCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info); +struct ShaderSharp +{ + uint16_t offset_dw : 15; + uint16_t size : 1; +}; + +struct ShaderUserData +{ + uint16_t* direct_resource_offset; + ShaderSharp* sharp_resource_offset[4]; + uint16_t eud_size_dw; + uint16_t srt_size_dw; + uint16_t direct_resource_count; + uint16_t sharp_resource_count[4]; +}; + +struct ShaderRegisterRange +{ + uint16_t start; + uint16_t end; +}; + +struct ShaderDrawModifier +{ + uint32_t enbl_start_vertex_offset : 1; + uint32_t enbl_start_index_offset : 1; + uint32_t enbl_start_instance_offset : 1; + uint32_t enbl_draw_index : 1; + uint32_t enbl_user_vgprs : 1; + uint32_t render_target_slice_offset : 3; + uint32_t fuse_draws : 1; + uint32_t compiler_flags : 23; + uint32_t is_default : 1; + uint32_t reserved : 31; +}; + +struct ShaderRegister +{ + uint32_t offset; + uint32_t value; +}; + +struct ShaderSpecialRegs +{ + ShaderRegister ge_cntl; + ShaderRegister vgt_shader_stages_en; + uint32_t dispatch_modifier; + ShaderRegisterRange user_data_range; + ShaderDrawModifier draw_modifier; + ShaderRegister vgt_gs_out_prim_type; + ShaderRegister ge_user_vgpr_en; +}; + +struct ShaderSemantic +{ + uint32_t semantic : 8; + uint32_t hardware_mapping : 8; + uint32_t size_in_elements : 4; + uint32_t is_f16 : 2; + uint32_t is_flat_shaded : 1; + uint32_t is_linear : 1; + uint32_t is_custom : 1; + uint32_t static_vb_index : 1; + uint32_t static_attribute : 1; + uint32_t reserved : 1; + uint32_t default_value : 2; + uint32_t default_value_hi : 2; +}; + +struct Shader +{ + uint32_t file_header; + uint32_t version; + ShaderUserData* user_data; + const volatile void* code; + ShaderRegister* cx_registers; + ShaderRegister* sh_registers; + ShaderSpecialRegs* specials; + ShaderSemantic* input_semantics; + ShaderSemantic* output_semantics; + uint32_t header_size; + uint32_t shader_size; + uint32_t embedded_constant_buffer_size_dqw; + uint32_t target; + uint32_t num_input_semantics; + uint16_t scratch_size_dw_per_thread; + uint16_t num_output_semantics; + uint16_t special_sizes_bytes; + uint8_t type; + uint8_t num_cx_registers; + uint8_t num_sh_registers; +}; + +struct ShaderMappedData +{ + ShaderUserData* user_data = nullptr; + ShaderSemantic* input_semantics = nullptr; + uint32_t num_input_semantics = 0; +}; + +void ShaderInit(); +void ShaderMapUserData(uint64_t addr, const ShaderMappedData& data); + +void ShaderCalcBindingIndices(ShaderBindResources* bind); +void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderVertexInputInfo* info); +void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const HW::ShaderRegisters* sh, const ShaderVertexInputInfo* vs_info, + ShaderPixelInputInfo* ps_info); +void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderComputeInputInfo* info); +void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info); +void ShaderDbgDumpInputInfo(const ShaderPixelInputInfo* info); +void ShaderDbgDumpInputInfo(const ShaderComputeInputInfo* info); +ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInputInfo* input_info); +ShaderId ShaderGetIdPS(const HW::PixelShaderInfo* regs, const ShaderPixelInputInfo* input_info); +ShaderId ShaderGetIdCS(const HW::ComputeShaderInfo* regs, const ShaderComputeInputInfo* input_info); +ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh); +ShaderCode ShaderParsePS(const HW::PixelShaderInfo* regs, const HW::ShaderRegisters* sh); +ShaderCode ShaderParseCS(const HW::ComputeShaderInfo* regs, const HW::ShaderRegisters* sh); Vector ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInputInfo* input_info); Vector ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInputInfo* input_info); Vector ShaderRecompileCS(const ShaderCode& code, const ShaderComputeInputInfo* input_info); diff --git a/source/emulator/include/Emulator/Graphics/ShaderParse.h b/source/emulator/include/Emulator/Graphics/ShaderParse.h new file mode 100644 index 0000000..b95de63 --- /dev/null +++ b/source/emulator/include/Emulator/Graphics/ShaderParse.h @@ -0,0 +1,21 @@ +#ifndef EMULATOR_INCLUDE_EMULATOR_GRAPHICS_SHADERPARSE_H_ +#define EMULATOR_INCLUDE_EMULATOR_GRAPHICS_SHADERPARSE_H_ + +#include "Kyty/Core/Common.h" +#include "Kyty/Core/String.h" + +#include "Emulator/Common.h" + +#ifdef KYTY_EMU_ENABLED + +namespace Kyty::Libs::Graphics { + +class ShaderCode; + +void ShaderParse(const uint32_t* src, ShaderCode* dst); + +} // namespace Kyty::Libs::Graphics + +#endif // KYTY_EMU_ENABLED + +#endif /* EMULATOR_INCLUDE_EMULATOR_GRAPHICS_SHADERPARSE_H_ */ diff --git a/source/emulator/include/Emulator/Graphics/ShaderSpirv.h b/source/emulator/include/Emulator/Graphics/ShaderSpirv.h index 84f758d..744a823 100644 --- a/source/emulator/include/Emulator/Graphics/ShaderSpirv.h +++ b/source/emulator/include/Emulator/Graphics/ShaderSpirv.h @@ -2,7 +2,7 @@ #define EMULATOR_INCLUDE_EMULATOR_GRAPHICS_SHADERSPIRV_H_ #include "Kyty/Core/Common.h" -#include "Kyty/Core/String.h" +#include "Kyty/Core/String8.h" #include "Emulator/Common.h" @@ -15,10 +15,10 @@ struct ShaderVertexInputInfo; struct ShaderPixelInputInfo; struct ShaderComputeInputInfo; -String SpirvGenerateSource(const ShaderCode& code, const ShaderVertexInputInfo* vs_input_info, const ShaderPixelInputInfo* ps_input_info, - const ShaderComputeInputInfo* cs_input_info); -String SpirvGetEmbeddedVs(uint32_t id); -String SpirvGetEmbeddedPs(uint32_t id); +String8 SpirvGenerateSource(const ShaderCode& code, const ShaderVertexInputInfo* vs_input_info, const ShaderPixelInputInfo* ps_input_info, + const ShaderComputeInputInfo* cs_input_info); +String8 SpirvGetEmbeddedVs(uint32_t id); +String8 SpirvGetEmbeddedPs(uint32_t id); } // namespace Kyty::Libs::Graphics diff --git a/source/emulator/include/Emulator/Graphics/Tile.h b/source/emulator/include/Emulator/Graphics/Tile.h index a6ceacc..abe4ad4 100644 --- a/source/emulator/include/Emulator/Graphics/Tile.h +++ b/source/emulator/include/Emulator/Graphics/Tile.h @@ -43,10 +43,12 @@ void TileConvertTiledToLinear(void* dst, const void* src, TileMode mode, uint32_ uint32_t pitch, uint32_t levels, bool neo); bool TileGetDepthSize(uint32_t width, uint32_t height, uint32_t pitch, uint32_t z_format, uint32_t stencil_format, bool htile, bool neo, - TileSizeAlign* stencil_size, TileSizeAlign* htile_size, TileSizeAlign* depth_size); + bool next_gen, TileSizeAlign* stencil_size, TileSizeAlign* htile_size, TileSizeAlign* depth_size); void TileGetVideoOutSize(uint32_t width, uint32_t height, uint32_t pitch, bool tile, bool neo, TileSizeAlign* size); void TileGetTextureSize(uint32_t dfmt, uint32_t nfmt, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, uint32_t tile, bool neo, TileSizeAlign* total_size, TileSizeOffset* level_sizes, TilePaddedSize* padded_size); +void TileGetTextureSize2(uint32_t format, uint32_t width, uint32_t height, uint32_t pitch, uint32_t levels, uint32_t tile, + TileSizeAlign* total_size, TileSizeOffset* level_sizes, TilePaddedSize* padded_size); } // namespace Kyty::Libs::Graphics diff --git a/source/emulator/include/Emulator/Kernel/Memory.h b/source/emulator/include/Emulator/Kernel/Memory.h index d49dbf2..fe93116 100644 --- a/source/emulator/include/Emulator/Kernel/Memory.h +++ b/source/emulator/include/Emulator/Kernel/Memory.h @@ -12,6 +12,10 @@ namespace Kyty::Libs::LibKernel::Memory { KYTY_SUBSYSTEM_DEFINE(Memory); +using callback_func_t = void (*)(uintptr_t addr, size_t size); + +void RegisterCallbacks(callback_func_t alloc_func, callback_func_t free_func); + int KYTY_SYSV_ABI KernelMapNamedFlexibleMemory(void** addr_in_out, size_t len, int prot, int flags, const char* name); int KYTY_SYSV_ABI KernelMapFlexibleMemory(void** addr_in_out, size_t len, int prot, int flags); int KYTY_SYSV_ABI KernelMunmap(uint64_t vaddr, size_t len); diff --git a/source/emulator/include/Emulator/Libs/Errno.h b/source/emulator/include/Emulator/Libs/Errno.h index 7388a49..a8d86c0 100644 --- a/source/emulator/include/Emulator/Libs/Errno.h +++ b/source/emulator/include/Emulator/Libs/Errno.h @@ -20,6 +20,19 @@ constexpr int OK = 0; return 0; \ }() +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define POSIX_N_CALL(func) \ + [&]() \ + { \ + auto result = func; \ + if (result < 0) \ + { \ + *Posix::GetErrorAddr() = LibKernel::KernelToPosix(static_cast(result)); \ + return static_cast(-1); \ + } \ + return result; \ + }() + // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define POSIX_PTHREAD_CALL(func) \ [&]() \ diff --git a/source/emulator/include/Emulator/Libs/Libs.h b/source/emulator/include/Emulator/Libs/Libs.h index fd71485..d2b7cfb 100644 --- a/source/emulator/include/Emulator/Libs/Libs.h +++ b/source/emulator/include/Emulator/Libs/Libs.h @@ -35,10 +35,12 @@ using n::g_module_version_major; \ using n::g_module_version_minor; // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define LIB_LOAD(name) name(s) +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define LIB_CHECK(ids, name) \ if (id == (ids)) \ { \ - name(s); \ + LIB_LOAD(name); \ return true; \ } // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -81,6 +83,7 @@ class SymbolDatabase; namespace Libs { bool Init(const String& id, Loader::SymbolDatabase* s); +void InitAll(Loader::SymbolDatabase* s); } // namespace Libs } // namespace Kyty diff --git a/source/emulator/include/Emulator/Loader/VirtualMemory.h b/source/emulator/include/Emulator/Loader/VirtualMemory.h index 381a01f..f7fe815 100644 --- a/source/emulator/include/Emulator/Loader/VirtualMemory.h +++ b/source/emulator/include/Emulator/Loader/VirtualMemory.h @@ -60,6 +60,7 @@ public: uint64_t access_violation_vaddr = 0; uint64_t exception_address = 0; uint64_t rbp = 0; + uint32_t exception_win_code = 0; }; using handler_func_t = void (*)(const ExceptionInfo*); diff --git a/source/emulator/src/Config.cpp b/source/emulator/src/Config.cpp index 119d38a..d2641f0 100644 --- a/source/emulator/src/Config.cpp +++ b/source/emulator/src/Config.cpp @@ -13,6 +13,7 @@ struct Config uint32_t screen_width = 1280; uint32_t screen_height = 720; bool neo = true; + bool next_gen = false; bool vulkan_validation_enabled = false; bool shader_validation_enabled = false; ShaderOptimizationType shader_optimization_type = ShaderOptimizationType::None; @@ -115,7 +116,7 @@ uint32_t GetScreenHeight() bool IsNeo() { - return g_config->neo; + return g_config->neo || g_config->next_gen; } bool VulkanValidationEnabled() @@ -193,6 +194,16 @@ String GetPipelineDumpFolder() return g_config->pipeline_dump_folder; } +void SetNextGen(bool mode) +{ + g_config->next_gen = mode; +} + +bool IsNextGen() +{ + return g_config->next_gen; +} + } // namespace Kyty::Config #endif // KYTY_EMU_ENABLED diff --git a/source/emulator/src/Graphics/Graphics.cpp b/source/emulator/src/Graphics/Graphics.cpp index 1ae2694..363947a 100644 --- a/source/emulator/src/Graphics/Graphics.cpp +++ b/source/emulator/src/Graphics/Graphics.cpp @@ -12,6 +12,7 @@ #include "Emulator/Graphics/Objects/IndexBuffer.h" #include "Emulator/Graphics/Objects/Label.h" #include "Emulator/Graphics/Pm4.h" +#include "Emulator/Graphics/Shader.h" #include "Emulator/Graphics/Tile.h" #include "Emulator/Graphics/VideoOut.h" #include "Emulator/Graphics/Window.h" @@ -40,6 +41,7 @@ KYTY_SUBSYSTEM_INIT(Graphics) LabelInit(); TileInit(); IndexBufferInit(); + ShaderInit(); } KYTY_SUBSYSTEM_UNEXPECTED_SHUTDOWN(Graphics) {} @@ -755,12 +757,6 @@ namespace Gen5 { LIB_NAME("Graphics5", "Graphics5"); -struct ShaderRegister -{ - uint32_t offset; - uint32_t value; -}; - struct RegisterDefaultInfo { uint32_t type; @@ -778,93 +774,6 @@ struct RegisterDefaults uint32_t count = 0; }; -struct ShaderSharp -{ - uint16_t offset_dw : 15; - uint16_t size : 1; -}; - -struct ShaderUserData -{ - uint16_t* direct_resource_offset; - ShaderSharp* sharp_resource_offset[4]; - uint16_t eud_size_dw; - uint16_t srt_size_dw; - uint16_t direct_resource_count; - uint16_t sharp_resource_count[4]; -}; - -struct ShaderRegisterRange -{ - uint16_t start; - uint16_t end; -}; - -struct ShaderDrawModifier -{ - uint32_t enbl_start_vertex_offset : 1; - uint32_t enbl_start_index_offset : 1; - uint32_t enbl_start_instance_offset : 1; - uint32_t enbl_draw_index : 1; - uint32_t enbl_user_vgprs : 1; - uint32_t render_target_slice_offset : 3; - uint32_t fuse_draws : 1; - uint32_t compiler_flags : 23; - uint32_t is_default : 1; - uint32_t reserved : 31; -}; - -struct ShaderSpecialRegs -{ - ShaderRegister ge_cntl; - ShaderRegister vgt_shader_stages_en; - uint32_t dispatch_modifier; - ShaderRegisterRange user_data_range; - ShaderDrawModifier draw_modifier; - ShaderRegister vgt_gs_out_prim_type; - ShaderRegister ge_user_vgpr_en; -}; - -struct ShaderSemantic -{ - uint32_t semantic : 8; - uint32_t hardware_mapping : 8; - uint32_t size_in_elements : 4; - uint32_t is_f16 : 2; - uint32_t is_flat_shaded : 1; - uint32_t is_linear : 1; - uint32_t is_custom : 1; - uint32_t static_vb_index : 1; - uint32_t static_attribute : 1; - uint32_t reserved : 1; - uint32_t default_value : 2; - uint32_t default_value_hi : 2; -}; - -struct Shader -{ - uint32_t file_header; - uint32_t version; - ShaderUserData* user_data; - const volatile void* code; - ShaderRegister* cx_registers; - ShaderRegister* sh_registers; - ShaderSpecialRegs* specials; - ShaderSemantic* input_semantics; - ShaderSemantic* output_semantics; - uint32_t header_size; - uint32_t shader_size; - uint32_t embedded_constant_buffer_size_dqw; - uint32_t target; - uint32_t num_input_semantics; - uint16_t scratch_size_dw_per_thread; - uint16_t num_output_semantics; - uint16_t special_sizes_bytes; - uint8_t type; - uint8_t num_cx_registers; - uint8_t num_sh_registers; -}; - struct CommandBuffer { using Callback = KYTY_SYSV_ABI bool (*)(CommandBuffer*, uint32_t, void*); @@ -1561,6 +1470,17 @@ int KYTY_SYSV_ABI GraphicsCreateShader(Shader** dst, void* header, const volatil auto base = reinterpret_cast(code); + printf("\t base = 0x%016" PRIx64 "\n", base); + + ShaderMappedData map; + map.user_data = h->user_data; + map.input_semantics = h->input_semantics; + map.num_input_semantics = h->num_input_semantics; + + ShaderMapUserData(base, map); + + EXIT_NOT_IMPLEMENTED((base & 0xFFFF0000000000FFull) != 0); + if (h->type == 2 && h->num_sh_registers >= 2 && h->sh_registers[0].offset == Pm4::SPI_SHADER_PGM_LO_ES && h->sh_registers[1].offset == Pm4::SPI_SHADER_PGM_HI_ES) { @@ -1716,6 +1636,7 @@ int KYTY_SYSV_ABI GraphicsCreatePrimState(ShaderRegister* cx_regs, ShaderRegiste return OK; } +// NOLINTNEXTLINE(readability-function-cognitive-complexity) int KYTY_SYSV_ABI GraphicsCreateInterpolantMapping(ShaderRegister* regs, const Shader* gs, const Shader* ps) { PRINT_NAME(); @@ -1733,20 +1654,42 @@ int KYTY_SYSV_ABI GraphicsCreateInterpolantMapping(ShaderRegister* regs, const S EXIT_NOT_IMPLEMENTED(sizeof(ShaderSemantic) != 4); EXIT_NOT_IMPLEMENTED(ps != nullptr && gs->num_output_semantics != ps->num_input_semantics); + EXIT_NOT_IMPLEMENTED(ps != nullptr && ps->num_output_semantics != 0); - auto* out32 = reinterpret_cast(gs->output_semantics); - auto* in32 = (ps != nullptr ? reinterpret_cast(ps->input_semantics) : nullptr); - - for (int i = 0; i < 32; i++) + for (uint16_t i = 0; i < 32; i++) { regs[i].offset = Pm4::SPI_PS_INPUT_CNTL_0 + i; regs[i].value = 0; if (i < static_cast(gs->num_output_semantics)) { - EXIT_NOT_IMPLEMENTED(out32[i] != 0x0000000f); - EXIT_NOT_IMPLEMENTED(in32 != nullptr && out32[i] != in32[i]); - regs[i].value = i; + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].semantic != 15 + i); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].hardware_mapping != i); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].size_in_elements != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].is_f16 != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].is_flat_shaded != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].is_linear != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].is_custom != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].static_vb_index != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].static_attribute != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].default_value != 0); + EXIT_NOT_IMPLEMENTED(gs->output_semantics[i].default_value_hi != 0); + bool flat = false; + if (ps != nullptr) + { + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].semantic != gs->output_semantics[i].semantic); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].hardware_mapping != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].size_in_elements != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].is_f16 != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].is_linear != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].is_custom != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].static_vb_index != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].static_attribute != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].default_value != 0); + EXIT_NOT_IMPLEMENTED(ps->input_semantics[i].default_value_hi != 0); + flat = ps->input_semantics[i].is_flat_shaded != 0; + } + regs[i].value = i | (flat ? 0x400u : 0u); } } @@ -1837,7 +1780,7 @@ uint32_t* KYTY_SYSV_ABI GraphicsCbReleaseMem(CommandBuffer* buf, uint8_t action, EXIT_NOT_IMPLEMENTED(buf == nullptr); EXIT_NOT_IMPLEMENTED(dst != 1); - EXIT_NOT_IMPLEMENTED(data_sel != 2); + EXIT_NOT_IMPLEMENTED(data_sel != 2 && data_sel != 3); EXIT_NOT_IMPLEMENTED(gds_offset != 0); EXIT_NOT_IMPLEMENTED(gds_size != 1); EXIT_NOT_IMPLEMENTED(interrupt != 0); @@ -1851,7 +1794,7 @@ uint32_t* KYTY_SYSV_ABI GraphicsCbReleaseMem(CommandBuffer* buf, uint8_t action, cmd[0] = KYTY_PM4(7, Pm4::IT_NOP, Pm4::R_RELEASE_MEM); cmd[1] = action | (static_cast(cache_policy) << 8u); - cmd[2] = gcr_cntl; + cmd[2] = gcr_cntl | (static_cast(data_sel) << 16u); cmd[3] = static_cast(reinterpret_cast(address) & 0xffffffffu); cmd[4] = static_cast((reinterpret_cast(address) >> 32u) & 0xffffffffu); cmd[5] = static_cast(data & 0xffffffffu); @@ -1909,6 +1852,25 @@ uint32_t* KYTY_SYSV_ABI GraphicsDcbWaitUntilSafeForRendering(CommandBuffer* buf, return cmd; } +uint32_t* KYTY_SYSV_ABI GraphicsDcbSetShRegisterDirect(CommandBuffer* buf, ShaderRegister reg) +{ + PRINT_NAME(); + + EXIT_NOT_IMPLEMENTED(buf == nullptr); + + buf->DbgDump(); + + auto* cmd = buf->AllocateDW(3); + + EXIT_NOT_IMPLEMENTED(cmd == nullptr); + + cmd[0] = KYTY_PM4(3, Pm4::IT_SET_SH_REG, 0u); + cmd[1] = reg.offset; + cmd[2] = reg.value; + + return cmd; +} + uint32_t* KYTY_SYSV_ABI GraphicsDcbSetCxRegistersIndirect(CommandBuffer* buf, const volatile ShaderRegister* regs, uint32_t num_regs) { PRINT_NAME(); diff --git a/source/emulator/src/Graphics/GraphicsRender.cpp b/source/emulator/src/Graphics/GraphicsRender.cpp index c69ba8c..7cb422b 100644 --- a/source/emulator/src/Graphics/GraphicsRender.cpp +++ b/source/emulator/src/Graphics/GraphicsRender.cpp @@ -471,6 +471,7 @@ static void uc_print(const char* func, const HW::UserConfig& uc) printf("\t GetPrimType() = 0x%08" PRIx32 "\n", uc.GetPrimType()); printf("\t primitive_group_size = 0x%04" PRIx16 "\n", ge_cntl.primitive_group_size); + printf("\t vertex_group_size = 0x%04" PRIx16 "\n", ge_cntl.vertex_group_size); printf("\t en_user_vgpr1 = %s\n", user_en.vgpr1 ? "true" : "false"); printf("\t en_user_vgpr2 = %s\n", user_en.vgpr2 ? "true" : "false"); printf("\t en_user_vgpr3 = %s\n", user_en.vgpr3 ? "true" : "false"); @@ -481,8 +482,8 @@ static void uc_check(const HW::UserConfig& uc) const auto& ge_cntl = uc.GetGeControl(); const auto& user_en = uc.GetGeUserVgprEn(); - EXIT_NOT_IMPLEMENTED(ge_cntl.primitive_group_size != 0x0000); - EXIT_NOT_IMPLEMENTED(ge_cntl.vertex_group_size != 0x0000); + EXIT_NOT_IMPLEMENTED(ge_cntl.primitive_group_size != 0x0000 && ge_cntl.primitive_group_size != 0x0040); + EXIT_NOT_IMPLEMENTED(ge_cntl.vertex_group_size != 0x0000 && ge_cntl.vertex_group_size != 0x0040); EXIT_NOT_IMPLEMENTED(user_en.vgpr1 != false); EXIT_NOT_IMPLEMENTED(user_en.vgpr2 != false); EXIT_NOT_IMPLEMENTED(user_en.vgpr3 != false); @@ -565,11 +566,15 @@ static void rt_check(const HW::RenderTarget& rt) { if (rt.base.addr != 0) { - bool render_to_texture = (rt.attrib.tile_mode == 0x0d); - // EXIT_NOT_IMPLEMENTED(rt.base_addr == 0); - // EXIT_NOT_IMPLEMENTED(rt.pitch_div8_minus1 != 0x000000ef); - // EXIT_NOT_IMPLEMENTED(rt.fmask_pitch_div8_minus1 != 0x000000ef); - // EXIT_NOT_IMPLEMENTED(rt.slice_div64_minus1 != 0x000086ff); + bool ps5 = Config::IsNextGen(); + // bool render_to_texture = (rt.attrib.tile_mode == 0x0d); + // EXIT_NOT_IMPLEMENTED(rt.base_addr == 0); + if (ps5) + { + EXIT_NOT_IMPLEMENTED(rt.pitch.pitch_div8_minus1 != 0); + EXIT_NOT_IMPLEMENTED(rt.pitch.fmask_pitch_div8_minus1 != 0); + EXIT_NOT_IMPLEMENTED(rt.slice.slice_div64_minus1 != 0); + } EXIT_NOT_IMPLEMENTED(rt.view.base_array_slice_index != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.view.last_array_slice_index != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.view.current_mip_level != 0x00000000); @@ -581,7 +586,7 @@ static void rt_check(const HW::RenderTarget& rt) EXIT_NOT_IMPLEMENTED(rt.info.cmask_fast_clear_enable != false); EXIT_NOT_IMPLEMENTED(rt.info.dcc_compression_enable != false); - EXIT_NOT_IMPLEMENTED(!render_to_texture && rt.info.neo_mode != Config::IsNeo()); + EXIT_NOT_IMPLEMENTED(!(rt.attrib.tile_mode == 0x0d) && rt.info.neo_mode != Config::IsNeo()); EXIT_NOT_IMPLEMENTED(rt.info.cmask_tile_mode != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.info.cmask_tile_mode_neo != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.info.blend_bypass != false); @@ -595,14 +600,27 @@ static void rt_check(const HW::RenderTarget& rt) // EXIT_NOT_IMPLEMENTED(rt.fmask_tile_mode != 0x0000000a); EXIT_NOT_IMPLEMENTED(rt.attrib.num_samples != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.attrib.num_fragments != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib2.width != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib2.height != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib2.num_mip_levels != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib3.depth != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib3.tile_mode != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib3.dimension != 0x00000000); - EXIT_NOT_IMPLEMENTED(rt.attrib3.cmask_pipe_aligned != false); - EXIT_NOT_IMPLEMENTED(rt.attrib3.dcc_pipe_aligned != false); + if (ps5) + { + EXIT_NOT_IMPLEMENTED(rt.attrib2.width == 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.height == 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.num_mip_levels != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.depth != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.tile_mode != 0x0000001b); + EXIT_NOT_IMPLEMENTED(rt.attrib3.dimension != 0x00000001); + EXIT_NOT_IMPLEMENTED(rt.attrib3.cmask_pipe_aligned != true); + EXIT_NOT_IMPLEMENTED(rt.attrib3.dcc_pipe_aligned != true); + } else + { + EXIT_NOT_IMPLEMENTED(rt.attrib2.width != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.height != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib2.num_mip_levels != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.depth != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.tile_mode != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.dimension != 0x00000000); + EXIT_NOT_IMPLEMENTED(rt.attrib3.cmask_pipe_aligned != false); + EXIT_NOT_IMPLEMENTED(rt.attrib3.dcc_pipe_aligned != false); + } // EXIT_NOT_IMPLEMENTED(rt.dcc_max_uncompressed_block_size != 0x00000002); // EXIT_NOT_IMPLEMENTED(rt.dcc.max_compressed_block_size != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.dcc.min_compressed_block_size != 0x00000000); @@ -619,8 +637,11 @@ static void rt_check(const HW::RenderTarget& rt) EXIT_NOT_IMPLEMENTED(rt.clear_word0.word0 != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.clear_word1.word1 != 0x00000000); EXIT_NOT_IMPLEMENTED(rt.dcc_addr.addr != 0x0000000000000000); - // EXIT_NOT_IMPLEMENTED(rt.width != 0x00000780); - // EXIT_NOT_IMPLEMENTED(rt.height != 0x00000438); + if (ps5) + { + EXIT_NOT_IMPLEMENTED(rt.size.width != 0); + EXIT_NOT_IMPLEMENTED(rt.size.height != 0); + } } } @@ -729,25 +750,46 @@ static void z_check(const HW::DepthRenderTarget& z) if (z.z_info.format != 0 || z.stencil_info.format != 0) { - EXIT_NOT_IMPLEMENTED(z.depth_info.addr5_swizzle_mask != 0x00000001); - EXIT_NOT_IMPLEMENTED(z.depth_info.array_mode != 0x00000004); - EXIT_NOT_IMPLEMENTED(z.depth_info.pipe_config != (Config::IsNeo() ? 0x00000012 : 0x0c)); - EXIT_NOT_IMPLEMENTED(z.depth_info.bank_width != 0x00000000); - // EXIT_NOT_IMPLEMENTED(z.depth_info.bank_height != (Config::IsNeo() ? 0x00000001 : 2)); - // EXIT_NOT_IMPLEMENTED(z.depth_info.macro_tile_aspect != (Config::IsNeo() ? 0x00000000 : 2)); - // EXIT_NOT_IMPLEMENTED(z.depth_info.num_banks != (Config::IsNeo() ? 0x00000002 : 3)); + bool ps5 = Config::IsNextGen(); + + if (ps5) + { + EXIT_NOT_IMPLEMENTED(z.depth_info.addr5_swizzle_mask != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_info.array_mode != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_info.pipe_config != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_info.bank_width != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_info.bank_height != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_info.macro_tile_aspect != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.depth_info.num_banks != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.linear != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.full_cache != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.htile_uses_preload_win != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.preload != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.prefetch_width != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.prefetch_height != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.dst_outside_zero_to_one != 0x00000000); + } else + { + EXIT_NOT_IMPLEMENTED(z.depth_info.addr5_swizzle_mask != 0x00000001); + EXIT_NOT_IMPLEMENTED(z.depth_info.array_mode != 0x00000004); + EXIT_NOT_IMPLEMENTED(z.depth_info.pipe_config != (Config::IsNeo() ? 0x00000012 : 0x0c)); + EXIT_NOT_IMPLEMENTED(z.depth_info.bank_width != 0x00000000); + // EXIT_NOT_IMPLEMENTED(z.depth_info.bank_height != (Config::IsNeo() ? 0x00000001 : 2)); + // EXIT_NOT_IMPLEMENTED(z.depth_info.macro_tile_aspect != (Config::IsNeo() ? 0x00000000 : 2)); + // EXIT_NOT_IMPLEMENTED(z.depth_info.num_banks != (Config::IsNeo() ? 0x00000002 : 3)); + EXIT_NOT_IMPLEMENTED(z.htile_surface.linear != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.full_cache != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.htile_uses_preload_win != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.preload != 0x00000001); + EXIT_NOT_IMPLEMENTED(z.htile_surface.prefetch_width != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.prefetch_height != 0x00000000); + EXIT_NOT_IMPLEMENTED(z.htile_surface.dst_outside_zero_to_one != 0x00000000); + } EXIT_NOT_IMPLEMENTED(z.depth_view.slice_start != 0x00000000); EXIT_NOT_IMPLEMENTED(z.depth_view.slice_max != 0x00000000); EXIT_NOT_IMPLEMENTED(z.depth_view.current_mip_level != 0x00000000); EXIT_NOT_IMPLEMENTED(z.depth_view.depth_write_disable != false); EXIT_NOT_IMPLEMENTED(z.depth_view.stencil_write_disable != false); - EXIT_NOT_IMPLEMENTED(z.htile_surface.linear != 0x00000000); - EXIT_NOT_IMPLEMENTED(z.htile_surface.full_cache != 0x00000000); - EXIT_NOT_IMPLEMENTED(z.htile_surface.htile_uses_preload_win != 0x00000000); - EXIT_NOT_IMPLEMENTED(z.htile_surface.preload != 0x00000001); - EXIT_NOT_IMPLEMENTED(z.htile_surface.prefetch_width != 0x00000000); - EXIT_NOT_IMPLEMENTED(z.htile_surface.prefetch_height != 0x00000000); - EXIT_NOT_IMPLEMENTED(z.htile_surface.dst_outside_zero_to_one != 0x00000000); EXIT_NOT_IMPLEMENTED(z.z_read_base_addr != z.z_write_base_addr); EXIT_NOT_IMPLEMENTED(z.stencil_read_base_addr != z.stencil_write_base_addr); EXIT_NOT_IMPLEMENTED(z.z_write_base_addr == 0); @@ -758,8 +800,13 @@ static void z_check(const HW::DepthRenderTarget& z) // EXIT_NOT_IMPLEMENTED(z.htile_data_base_addr == 0); // EXIT_NOT_IMPLEMENTED(z.width != 0x00000780); // EXIT_NOT_IMPLEMENTED(z.height != 0x00000438); - EXIT_NOT_IMPLEMENTED(z.size.x_max != 0); - EXIT_NOT_IMPLEMENTED(z.size.y_max != 0); + if (ps5) + { + EXIT_NOT_IMPLEMENTED(z.width != 0); + EXIT_NOT_IMPLEMENTED(z.height != 0); + EXIT_NOT_IMPLEMENTED(z.size.x_max == 0); + EXIT_NOT_IMPLEMENTED(z.size.y_max == 0); + } } } @@ -1044,14 +1091,23 @@ static void vp_print(const char* func, const HW::ScreenViewport& vp, const HW::S static void vp_check(const HW::ScreenViewport& vp, const HW::ScanModeControl& smc) { + bool ps5 = Config::IsNextGen(); + EXIT_NOT_IMPLEMENTED(smc.msaa_enable); // EXIT_NOT_IMPLEMENTED(smc.vport_scissor_enable); EXIT_NOT_IMPLEMENTED(smc.line_stipple_enable); bool generic_scissor = (vp.generic_scissor_left != 0 || vp.generic_scissor_top != 0 || vp.generic_scissor_right != 0 || vp.generic_scissor_bottom != 0); + bool screen_scissor = + (vp.screen_scissor_left != 0 || vp.screen_scissor_top != 0 || vp.screen_scissor_right != 0 || vp.screen_scissor_bottom != 0); + bool viewport_scissor = (vp.viewports[0].viewport_scissor_left != 0 || vp.viewports[0].viewport_scissor_top != 0 || + vp.viewports[0].viewport_scissor_right != 0 || vp.viewports[0].viewport_scissor_bottom != 0); - EXIT_NOT_IMPLEMENTED(vp.viewports[0].zmin != 0.000000); + EXIT_NOT_IMPLEMENTED(viewport_scissor && (generic_scissor || screen_scissor)); + EXIT_NOT_IMPLEMENTED(viewport_scissor && (!smc.vport_scissor_enable)); + + EXIT_NOT_IMPLEMENTED(vp.viewports[0].zmin > 0.000000); EXIT_NOT_IMPLEMENTED(vp.viewports[0].zmax != 1.000000); // EXIT_NOT_IMPLEMENTED(vp.viewports[0].xscale != 960.000000); // EXIT_NOT_IMPLEMENTED(vp.viewports[0].xoffset != 960.000000); @@ -1068,14 +1124,21 @@ static void vp_check(const HW::ScreenViewport& vp, const HW::ScanModeControl& sm // EXIT_NOT_IMPLEMENTED(vp.hw_offset_y != 32); // EXIT_NOT_IMPLEMENTED(fabsf(vp.guard_band_horz_clip - 33.133327f) > 0.001f); // EXIT_NOT_IMPLEMENTED(fabsf(vp.guard_band_vert_clip - 59.629623f) > 0.001f); - EXIT_NOT_IMPLEMENTED(vp.guard_band_horz_discard != 1.000000); - EXIT_NOT_IMPLEMENTED(vp.guard_band_vert_discard != 1.000000); + if (ps5) + { + EXIT_NOT_IMPLEMENTED(vp.guard_band_horz_discard != 0.0f); + EXIT_NOT_IMPLEMENTED(vp.guard_band_vert_discard != 0.0f); + } else + { + EXIT_NOT_IMPLEMENTED(vp.guard_band_horz_discard != 1.000000); + EXIT_NOT_IMPLEMENTED(vp.guard_band_vert_discard != 1.000000); + } EXIT_NOT_IMPLEMENTED(vp.generic_scissor_window_offset_enable != false); - EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_left != 0); - EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_top != 0); - EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_right != 0); - EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_bottom != 0); - EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_window_offset_enable != false); + // EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_left != 0); + // EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_top != 0); + // EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_right != 0); + // EXIT_NOT_IMPLEMENTED(vp.viewports[0].viewport_scissor_bottom != 0); + EXIT_NOT_IMPLEMENTED(viewport_scissor && vp.viewports[0].viewport_scissor_window_offset_enable != true); } static void hw_check(const HW::Context& hw) @@ -1785,33 +1848,53 @@ uint64_t SamplerCache::GetSamplerId(const ShaderSamplerResource& r) return m_samplers_size; } -static void get_input_format(const ShaderBufferResource& res, VkFormat* format, uint32_t* size) +static void get_input_format(const ShaderBufferResource& res, VkFormat* format, uint32_t* size, bool ps5) { EXIT_IF(format == nullptr); EXIT_IF(size == nullptr); - auto nfmt = res.Nfmt(); - auto dfmt = res.Dfmt(); - if (nfmt == 7 && dfmt == 14) + if (ps5) { - *format = VK_FORMAT_R32G32B32A32_SFLOAT; - *size = 4; - } else if (nfmt == 7 && dfmt == 13) - { - *format = VK_FORMAT_R32G32B32_SFLOAT; - *size = 3; - } else if (nfmt == 7 && dfmt == 11) - { - *format = VK_FORMAT_R32G32_SFLOAT; - *size = 2; - } else if (nfmt == 0 && dfmt == 10) - { - *format = VK_FORMAT_R8G8B8A8_UNORM; - *size = 4; + auto fmt = res.Format(); + if (fmt == 74) + { + *format = VK_FORMAT_R32G32B32_SFLOAT; + *size = 3; + } else if (fmt == 64) + { + *format = VK_FORMAT_R32G32_SFLOAT; + *size = 2; + } else + { + EXIT("unknown format: fmt = %u\n", fmt); + *format = VK_FORMAT_UNDEFINED; + *size = 4; + } } else { - EXIT("unknown format: nfmt = %u, dfmt = %u\n", nfmt, dfmt); - *format = VK_FORMAT_UNDEFINED; - *size = 4; + auto nfmt = res.Nfmt(); + auto dfmt = res.Dfmt(); + if (nfmt == 7 && dfmt == 14) + { + *format = VK_FORMAT_R32G32B32A32_SFLOAT; + *size = 4; + } else if (nfmt == 7 && dfmt == 13) + { + *format = VK_FORMAT_R32G32B32_SFLOAT; + *size = 3; + } else if (nfmt == 7 && dfmt == 11) + { + *format = VK_FORMAT_R32G32_SFLOAT; + *size = 2; + } else if (nfmt == 0 && dfmt == 10) + { + *format = VK_FORMAT_R8G8B8A8_UNORM; + *size = 4; + } else + { + EXIT("unknown format: nfmt = %u, dfmt = %u\n", nfmt, dfmt); + *format = VK_FORMAT_UNDEFINED; + *size = 4; + } } } @@ -1952,6 +2035,8 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh VkVertexInputAttributeDescription input_attr[ShaderVertexInputInfo::RES_MAX]; VkVertexInputBindingDescription input_desc[ShaderVertexInputInfo::RES_MAX]; + bool gen5 = Config::IsNextGen(); + for (int bi = 0; bi < vs_input_info->buffers_num; bi++) { const auto& b = vs_input_info->buffers[bi]; @@ -1967,10 +2052,14 @@ static VulkanPipeline* CreatePipelineInternal(VkRenderPass render_pass, const Sh input_attr[index].offset = b.attr_offsets[ai]; uint32_t attr_size = 4; - get_input_format(vs_input_info->resources[index], &input_attr[index].format, &attr_size); + get_input_format(vs_input_info->resources[index], &input_attr[index].format, &attr_size, gen5); auto registers_num = vs_input_info->resources_dst[index].registers_num; + if (gen5) + { + EXIT_NOT_IMPLEMENTED(vs_input_info->resources[index].OutOfBounds() != 0); + } EXIT_NOT_IMPLEMENTED(vs_input_info->resources[index].AddTid()); EXIT_NOT_IMPLEMENTED(vs_input_info->resources[index].SwizzleEnabled()); @@ -2484,17 +2573,23 @@ VulkanPipeline* PipelineCache::CreatePipeline(VulkanFramebuffer* framebuffer, Re bool generic_scissor = (vp.generic_scissor_left != 0 || vp.generic_scissor_top != 0 || vp.generic_scissor_right != 0 || vp.generic_scissor_bottom != 0); + bool viewport_scissor = (vp.viewports[0].viewport_scissor_left != 0 || vp.viewports[0].viewport_scissor_top != 0 || + vp.viewports[0].viewport_scissor_right != 0 || vp.viewports[0].viewport_scissor_bottom != 0); - p.static_params->viewport_scale[0] = vp.viewports[0].xscale; - p.static_params->viewport_scale[1] = vp.viewports[0].yscale; - p.static_params->viewport_scale[2] = vp.viewports[0].zscale; - p.static_params->viewport_offset[0] = vp.viewports[0].xoffset; - p.static_params->viewport_offset[1] = vp.viewports[0].yoffset; - p.static_params->viewport_offset[2] = vp.viewports[0].zoffset; - p.static_params->scissor_ltrb[0] = (generic_scissor ? vp.generic_scissor_left : vp.screen_scissor_left); - p.static_params->scissor_ltrb[1] = (generic_scissor ? vp.generic_scissor_top : vp.screen_scissor_top); - p.static_params->scissor_ltrb[2] = (generic_scissor ? vp.generic_scissor_right : vp.screen_scissor_right); - p.static_params->scissor_ltrb[3] = (generic_scissor ? vp.generic_scissor_bottom : vp.screen_scissor_bottom); + p.static_params->viewport_scale[0] = vp.viewports[0].xscale; + p.static_params->viewport_scale[1] = vp.viewports[0].yscale; + p.static_params->viewport_scale[2] = vp.viewports[0].zscale; + p.static_params->viewport_offset[0] = vp.viewports[0].xoffset; + p.static_params->viewport_offset[1] = vp.viewports[0].yoffset; + p.static_params->viewport_offset[2] = vp.viewports[0].zoffset; + p.static_params->scissor_ltrb[0] = + (viewport_scissor ? vp.viewports[0].viewport_scissor_left : (generic_scissor ? vp.generic_scissor_left : vp.screen_scissor_left)); + p.static_params->scissor_ltrb[1] = + (viewport_scissor ? vp.viewports[0].viewport_scissor_top : (generic_scissor ? vp.generic_scissor_top : vp.screen_scissor_top)); + p.static_params->scissor_ltrb[2] = (viewport_scissor ? vp.viewports[0].viewport_scissor_right + : (generic_scissor ? vp.generic_scissor_right : vp.screen_scissor_right)); + p.static_params->scissor_ltrb[3] = (viewport_scissor ? vp.viewports[0].viewport_scissor_bottom + : (generic_scissor ? vp.generic_scissor_bottom : vp.screen_scissor_bottom)); p.static_params->topology = topology; p.static_params->with_depth = (depth->format != VK_FORMAT_UNDEFINED && depth->vulkan_buffer != nullptr); p.static_params->depth_test_enable = depth->depth_test_enable; @@ -3713,42 +3808,22 @@ static void FindRenderDepthInfo(uint64_t submit_id, CommandBuffer* /*buffer*/, c EXIT_IF(r == nullptr); - bool neo = Config::IsNeo(); - const auto& z = hw.GetDepthRenderTarget(); - const auto& rc = hw.GetRenderControl(); - const auto& dc = hw.GetDepthControl(); - const auto& sc = hw.GetStencilControl(); - const auto& sm = hw.GetStencilMask(); - const auto& cc = hw.GetColorControl(); + const auto& z = hw.GetDepthRenderTarget(); + const auto& rc = hw.GetRenderControl(); + const auto& dc = hw.GetDepthControl(); + const auto& sc = hw.GetStencilControl(); + const auto& sm = hw.GetStencilMask(); + const auto& cc = hw.GetColorControl(); - uint32_t size = 0; - uint32_t pitch = (z.pitch_div8_minus1 + 1) * 8; TileSizeAlign stencil_size {}; TileSizeAlign htile_size {}; TileSizeAlign depth_size {}; - if (z.z_info.format == 3) - { - size = (z.slice_div64_minus1 + 1) * 64 * 4; - } else if (z.z_info.format == 1) - { - size = (z.slice_div64_minus1 + 1) * 64 * 2; - } - - bool htile = z.z_info.tile_surface_enable; - + bool neo = Config::IsNeo(); + bool ps5 = Config::IsNextGen(); + bool htile = z.z_info.tile_surface_enable; bool decompress = htile && (rc.depth_compress_disable || rc.stencil_compress_disable); - if (!TileGetDepthSize(z.width, z.height, pitch, z.z_info.format, z.stencil_info.format, htile, neo, &stencil_size, &htile_size, - &depth_size)) - { - depth_size.size = size; - depth_size.align = neo ? 65536 : 32768; - } else - { - EXIT_NOT_IMPLEMENTED(depth_size.size != size); - } - if (dc.z_enable || dc.z_write_enable || dc.stencil_enable || decompress) { switch (z.z_info.format * 2 + z.stencil_info.format) @@ -3763,6 +3838,38 @@ static void FindRenderDepthInfo(uint64_t submit_id, CommandBuffer* /*buffer*/, c } } + if (r->format != VK_FORMAT_UNDEFINED) + { + if (ps5) + { + bool size_found = TileGetDepthSize(z.size.x_max + 1, z.size.y_max + 1, 0, z.z_info.format, z.stencil_info.format, htile, true, + true, &stencil_size, &htile_size, &depth_size); + EXIT_NOT_IMPLEMENTED(!size_found); + } else + { + uint32_t size = 0; + uint32_t pitch = (z.pitch_div8_minus1 + 1) * 8; + + if (z.z_info.format == 3) + { + size = (z.slice_div64_minus1 + 1) * 64 * 4; + } else if (z.z_info.format == 1) + { + size = (z.slice_div64_minus1 + 1) * 64 * 2; + } + + if (!TileGetDepthSize(z.width, z.height, pitch, z.z_info.format, z.stencil_info.format, htile, neo, false, &stencil_size, + &htile_size, &depth_size)) + { + depth_size.size = size; + depth_size.align = neo ? 65536 : 32768; + } else + { + EXIT_NOT_IMPLEMENTED(depth_size.size != size); + } + } + } + auto stencil_addr_mask = static_cast(stencil_size.align) - 1; auto depth_addr_mask = static_cast(depth_size.align) - 1; auto htile_addr_mask = static_cast(htile_size.align) - 1; @@ -3778,8 +3885,8 @@ static void FindRenderDepthInfo(uint64_t submit_id, CommandBuffer* /*buffer*/, c r->htile_buffer_size = htile_size.size; r->htile_buffer_vaddr = z.htile_data_base_addr & ~htile_addr_mask; r->htile_tile_swizzle = z.htile_data_base_addr & htile_addr_mask; - r->width = z.width; - r->height = z.height; + r->width = (ps5 ? z.size.x_max + 1 : z.width); + r->height = (ps5 ? z.size.y_max + 1 : z.height); r->depth_clear_enable = rc.depth_clear_enable; r->depth_clear_value = hw.GetDepthClearValue(); r->depth_test_enable = dc.z_enable; @@ -3888,38 +3995,69 @@ static void FindRenderColorInfo(uint64_t submit_id, CommandBuffer* buffer, const return; } - auto width = rt.size.width; - auto height = rt.size.height; - auto pitch = (rt.pitch.pitch_div8_minus1 + 1) * 8; - auto size = (rt.slice.slice_div64_minus1 + 1) * 64 * 4; - bool tile = false; - bool write_back = false; + bool ps5 = Config::IsNextGen(); + + uint32_t width = 0; + uint32_t height = 0; + uint32_t pitch = 0; + uint32_t size = 0; + bool tile = false; + bool write_back = false; + + if (ps5) + { + switch (rt.attrib3.tile_mode) + { + case 0x1b: + tile = true; + write_back = false; + break; + default: EXIT("unknown tile mode: %u\n", rt.attrib3.tile_mode); + } + + width = rt.attrib2.width + 1; + height = rt.attrib2.height + 1; + pitch = width; + + Graphics::TileSizeAlign size32 {}; + Graphics::TileGetVideoOutSize(width, height, pitch, tile, true, &size32); + + size = size32.size; + + EXIT_NOT_IMPLEMENTED(size == 0); + } else + { + switch (rt.attrib.tile_mode) + { + case 0x8: + tile = false; + write_back = false; + break; + + case 0x1f: + tile = false; + write_back = true; + break; + + case 0xa: + case 0xd: + case 0xe: + tile = true; + write_back = false; + break; + + default: EXIT("unknown tile mode: %u\n", rt.attrib.tile_mode); + } + + width = rt.size.width; + height = rt.size.height; + pitch = (rt.pitch.pitch_div8_minus1 + 1) * 8; + size = (rt.slice.slice_div64_minus1 + 1) * 64 * 4; + } auto video_image = VideoOut::VideoOutGetImage(rt.base.addr); bool render_to_texture = (video_image.image == nullptr); - switch (rt.attrib.tile_mode) - { - case 0x8: - tile = false; - write_back = false; - break; - - case 0x1f: - tile = false; - write_back = true; - break; - - case 0xa: - case 0xd: - case 0xe: - tile = true; - write_back = false; - break; - - default: EXIT("unknown tile mode: %u\n", rt.attrib.tile_mode); - } - if (render_to_texture) { RenderTextureFormat rt_format = RenderTextureFormat::Unknown; @@ -3955,8 +4093,8 @@ static void FindRenderColorInfo(uint64_t submit_id, CommandBuffer* buffer, const r->buffer_size = size; } else { - EXIT_NOT_IMPLEMENTED( - !(rt.info.format == 0xa && (rt.info.channel_type == 0x6 || rt.info.channel_type == 0x0) && rt.info.channel_order == 0x1)); + EXIT_NOT_IMPLEMENTED(!(rt.info.format == 0xa && (rt.info.channel_type == 0x6 || rt.info.channel_type == 0x0) && + (rt.info.channel_order == 0x0 || rt.info.channel_order == 0x1))); // Display buffer EXIT_NOT_IMPLEMENTED(video_image.buffer_size != size); @@ -3994,6 +4132,7 @@ static void InvalidateMemoryObject(const RenderDepthInfo& r) } } +// NOLINTNEXTLINE(readability-function-cognitive-complexity) static void PrepareStorageBuffers(uint64_t submit_id, CommandBuffer* buffer, const ShaderStorageResources& storage_buffers, VulkanBuffer** buffers, uint32_t** sgprs) { @@ -4003,26 +4142,36 @@ static void PrepareStorageBuffers(uint64_t submit_id, CommandBuffer* buffer, con EXIT_IF(sgprs == nullptr); EXIT_IF(*sgprs == nullptr); + bool gen5 = Config::IsNextGen(); + for (int i = 0; i < storage_buffers.buffers_num; i++) { auto r = storage_buffers.buffers[i]; EXIT_NOT_IMPLEMENTED(r.AddTid()); EXIT_NOT_IMPLEMENTED(r.SwizzleEnabled()); - EXIT_NOT_IMPLEMENTED(!((r.Stride() == 4 && r.DstSelXYZW() == DstSel(4, 0, 0, 0) && r.Dfmt() == 4 && r.Nfmt() == 4) || - (r.Stride() == 4 && r.DstSelXYZW() == DstSel(4, 0, 0, 1) && r.Dfmt() == 4 && r.Nfmt() == 7) || - (r.Stride() == 8 && r.DstSelXYZW() == DstSel(4, 5, 0, 0) && r.Dfmt() == 11 && r.Nfmt() == 4) || - (r.Stride() == 16 && r.DstSelXYZW() == DstSel(4, 5, 6, 7) && r.Dfmt() == 14 && r.Nfmt() == 7))); - EXIT_NOT_IMPLEMENTED(!(r.MemoryType() == 0x00 || r.MemoryType() == 0x10 || r.MemoryType() == 0x6d)); - auto addr = r.Base(); + if (gen5) + { + EXIT_NOT_IMPLEMENTED(r.OutOfBounds() != 0); + EXIT_NOT_IMPLEMENTED(!((r.Stride() == 16 && r.DstSelXYZW() == DstSel(4, 5, 6, 7) && r.Format() == 77))); + } else + { + EXIT_NOT_IMPLEMENTED(!((r.Stride() == 4 && r.DstSelXYZW() == DstSel(4, 0, 0, 0) && r.Dfmt() == 4 && r.Nfmt() == 4) || + (r.Stride() == 4 && r.DstSelXYZW() == DstSel(4, 0, 0, 1) && r.Dfmt() == 4 && r.Nfmt() == 7) || + (r.Stride() == 8 && r.DstSelXYZW() == DstSel(4, 5, 0, 0) && r.Dfmt() == 11 && r.Nfmt() == 4) || + (r.Stride() == 16 && r.DstSelXYZW() == DstSel(4, 5, 6, 7) && r.Dfmt() == 14 && r.Nfmt() == 7))); + EXIT_NOT_IMPLEMENTED(!(r.MemoryType() == 0x00 || r.MemoryType() == 0x10 || r.MemoryType() == 0x6d)); + } + + auto addr = (gen5 ? r.Base48() : r.Base44()); auto stride = r.Stride(); auto num_records = r.NumRecords(); auto size = stride * num_records; EXIT_NOT_IMPLEMENTED(size == 0); EXIT_NOT_IMPLEMENTED((size & 0x3u) != 0); - bool read_only = (r.MemoryType() == 0x10); + bool read_only = (gen5 ? false : (r.MemoryType() == 0x10)); EXIT_NOT_IMPLEMENTED(read_only && !(storage_buffers.usages[i] == ShaderStorageUsage::ReadOnly || storage_buffers.usages[i] == ShaderStorageUsage::Constant)); @@ -4038,9 +4187,15 @@ static void PrepareStorageBuffers(uint64_t submit_id, CommandBuffer* buffer, con buffers[i] = buf; - r.UpdateAddress(i); + if (gen5) + { + r.UpdateAddress48(i); + } else + { + r.UpdateAddress44(i); + } - EXIT_NOT_IMPLEMENTED((r.Base() >> 32u) != 0); + EXIT_NOT_IMPLEMENTED(((gen5 ? r.Base48() : r.Base44()) >> 32u) != 0); (*sgprs)[0] = r.fields[0]; (*sgprs)[1] = r.fields[1]; @@ -4065,48 +4220,83 @@ static void PrepareTextures(uint64_t submit_id, CommandBuffer* buffer, const Sha int index_sampled = 0; int index_storage = 0; + bool gen5 = Config::IsNextGen(); + for (int i = 0; i < textures.textures_num; i++) { auto r = textures.desc[i].texture; - EXIT_NOT_IMPLEMENTED(r.Base() == 0); + if (gen5) + { + EXIT_NOT_IMPLEMENTED(!(r.TileMode() == 0)); + EXIT_NOT_IMPLEMENTED(r.Format() != 56); + EXIT_NOT_IMPLEMENTED(r.PerfMod5() != 7 && r.PerfMod5() != 0); + EXIT_NOT_IMPLEMENTED(r.BCSwizzle() != 0); + EXIT_NOT_IMPLEMENTED(r.BaseArray5() != 0); + EXIT_NOT_IMPLEMENTED(r.ArrayPitch() != 0); + EXIT_NOT_IMPLEMENTED(r.MaxMip() != 0); + EXIT_NOT_IMPLEMENTED(r.MinLodWarn5() != 0); + EXIT_NOT_IMPLEMENTED(r.MipStatsCntId() != 0); + EXIT_NOT_IMPLEMENTED(r.MipStatsCntEn() != false); + EXIT_NOT_IMPLEMENTED(r.CornerSample() != false); + EXIT_NOT_IMPLEMENTED(r.PrtDefColor() != false); + EXIT_NOT_IMPLEMENTED(r.MsaaDepth() != false); + EXIT_NOT_IMPLEMENTED(r.MaxUncompBlkSize() != 0); + EXIT_NOT_IMPLEMENTED(r.MaxCompBlkSize() != 0); + EXIT_NOT_IMPLEMENTED(r.MetaPipeAligned() != false); + EXIT_NOT_IMPLEMENTED(r.WriteCompress() != false); + EXIT_NOT_IMPLEMENTED(r.MetaCompress() != false); + EXIT_NOT_IMPLEMENTED(r.DccAlphaPos() != false); + EXIT_NOT_IMPLEMENTED(r.DccColorTransf() != false); + EXIT_NOT_IMPLEMENTED(r.MetaAddr() != 0); + } else + { + EXIT_NOT_IMPLEMENTED(r.Dfmt() != 1 && r.Dfmt() != 10 && r.Dfmt() != 37 && r.Dfmt() != 4 && r.Dfmt() != 35 && r.Dfmt() != 3 && + r.Dfmt() != 36); + EXIT_NOT_IMPLEMENTED(r.Nfmt() != 9 && r.Nfmt() != 0 && r.Nfmt() != 7); + EXIT_NOT_IMPLEMENTED(r.PerfMod() != 7 && r.PerfMod() != 0); + EXIT_NOT_IMPLEMENTED(r.Interlaced() != false); + EXIT_NOT_IMPLEMENTED(!(r.TileMode() == 8 || r.TileMode() == 13 || r.TileMode() == 14 || r.TileMode() == 2 || + r.TileMode() == 10 || r.TileMode() == 31)); + EXIT_NOT_IMPLEMENTED(r.BaseArray() != 0); + EXIT_NOT_IMPLEMENTED(r.LastArray() != 0); + EXIT_NOT_IMPLEMENTED(r.MinLodWarn() != 0); + EXIT_NOT_IMPLEMENTED(r.CounterBankId() != 0); + EXIT_NOT_IMPLEMENTED(r.LodHdwCntEn() != false); + EXIT_NOT_IMPLEMENTED(r.MemoryType() != 0x10 && r.MemoryType() != 0x6d); + } + EXIT_NOT_IMPLEMENTED((gen5 ? r.Base40() : r.Base38()) == 0); EXIT_NOT_IMPLEMENTED(r.MinLod() != 0); - EXIT_NOT_IMPLEMENTED(r.Dfmt() != 1 && r.Dfmt() != 10 && r.Dfmt() != 37 && r.Dfmt() != 4 && r.Dfmt() != 35 && r.Dfmt() != 3 && - r.Dfmt() != 36); - EXIT_NOT_IMPLEMENTED(r.Nfmt() != 9 && r.Nfmt() != 0 && r.Nfmt() != 7); - EXIT_NOT_IMPLEMENTED(r.PerfMod() != 7 && r.PerfMod() != 0); - EXIT_NOT_IMPLEMENTED(r.Interlaced() != false); - EXIT_NOT_IMPLEMENTED(!(r.TilingIdx() == 8 || r.TilingIdx() == 13 || r.TilingIdx() == 14 || r.TilingIdx() == 2 || - r.TilingIdx() == 10 || r.TilingIdx() == 31)); EXIT_NOT_IMPLEMENTED(r.Type() != 9); EXIT_NOT_IMPLEMENTED(r.Depth() != 0); - EXIT_NOT_IMPLEMENTED(r.BaseArray() != 0); - EXIT_NOT_IMPLEMENTED(r.LastArray() != 0); - EXIT_NOT_IMPLEMENTED(r.MinLodWarn() != 0); - EXIT_NOT_IMPLEMENTED(r.CounterBankId() != 0); - EXIT_NOT_IMPLEMENTED(r.LodHdwCntEn() != false); - EXIT_NOT_IMPLEMENTED(r.MemoryType() != 0x10 && r.MemoryType() != 0x6d); - bool read_only = (r.MemoryType() == 0x10); + bool read_only = (gen5 ? false : (r.MemoryType() == 0x10)); EXIT_NOT_IMPLEMENTED(read_only && !(textures.desc[i].usage == ShaderTextureUsage::ReadOnly)); TileSizeAlign size {}; - auto addr = r.Base(); + auto addr = (gen5 ? r.Base40() : r.Base38()); bool neo = Config::IsNeo(); - auto width = r.Width() + 1; - auto height = r.Height() + 1; - auto pitch = r.Pitch() + 1; + auto width = (gen5 ? r.Width5() : r.Width4()) + 1; + auto height = (gen5 ? r.Height5() : r.Height4()) + 1; + auto pitch = (gen5 ? width : r.Pitch() + 1); auto base_level = r.BaseLevel(); auto levels = r.LastLevel() + 1; - auto tile = r.TilingIdx(); - auto dfmt = r.Dfmt(); - auto nfmt = r.Nfmt(); + auto tile = r.TileMode(); + auto dfmt = (gen5 ? 0 : r.Dfmt()); + auto nfmt = (gen5 ? 0 : r.Nfmt()); + auto fmt = (gen5 ? r.Format() : 0); uint32_t swizzle = r.DstSelXYZW(); - bool check_depth_texture = (tile == 2); + bool check_depth_texture = (!gen5 && tile == 2); - TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, &size, nullptr, nullptr); + if (gen5) + { + TileGetTextureSize2(fmt, width, height, pitch, levels, tile, &size, nullptr, nullptr); + } else + { + TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, &size, nullptr, nullptr); + } EXIT_NOT_IMPLEMENTED(size.size == 0); EXIT_NOT_IMPLEMENTED((addr & (static_cast(size.align) - 1u)) != 0); @@ -4149,14 +4339,14 @@ static void PrepareTextures(uint64_t submit_id, CommandBuffer* buffer, const Sha { EXIT_NOT_IMPLEMENTED(textures.desc[i].usage != ShaderTextureUsage::ReadWrite); - StorageTextureObject vulkan_texture_info(dfmt, nfmt, width, height, pitch, base_level, levels, tile, neo, swizzle); + StorageTextureObject vulkan_texture_info(dfmt, nfmt, fmt, width, height, pitch, base_level, levels, tile, neo, swizzle); tex = static_cast( GpuMemoryCreateObject(submit_id, g_render_ctx->GetGraphicCtx(), buffer, addr, size.size, vulkan_texture_info)); } else { EXIT_NOT_IMPLEMENTED(textures.desc[i].usage != ShaderTextureUsage::ReadOnly); - if (tile == 10) + if (!gen5 && tile == 10) { RenderTextureFormat rt_format = RenderTextureFormat::Unknown; if (dfmt == 10 && nfmt == 0) @@ -4169,7 +4359,7 @@ static void PrepareTextures(uint64_t submit_id, CommandBuffer* buffer, const Sha submit_id, g_render_ctx->GetGraphicCtx(), buffer, addr, size.size, vulkan_buffer_info)); } else { - TextureObject vulkan_texture_info(dfmt, nfmt, width, height, pitch, base_level, levels, tile, neo, swizzle); + TextureObject vulkan_texture_info(dfmt, nfmt, fmt, width, height, pitch, base_level, levels, tile, neo, swizzle); tex = static_cast( GpuMemoryCreateObject(submit_id, g_render_ctx->GetGraphicCtx(), buffer, addr, size.size, vulkan_texture_info)); } @@ -4181,17 +4371,29 @@ static void PrepareTextures(uint64_t submit_id, CommandBuffer* buffer, const Sha if (textures.desc[i].textures2d_without_sampler) { images_storage[index_storage] = tex; - r.UpdateAddress(index_storage); + if (gen5) + { + r.UpdateAddress40(index_storage); + } else + { + r.UpdateAddress38(index_storage); + } index_storage++; } else { images_sampled[index_sampled] = tex; images_sampled_view[index_sampled] = (depth_texture ? VulkanImage::VIEW_DEPTH_TEXTURE : view_type); - r.UpdateAddress(index_sampled); + if (gen5) + { + r.UpdateAddress40(index_sampled); + } else + { + r.UpdateAddress38(index_sampled); + } index_sampled++; } - EXIT_NOT_IMPLEMENTED((r.Base() >> 32u) != 0); + EXIT_NOT_IMPLEMENTED(((gen5 ? r.Base40() : r.Base38()) >> 32u) != 0); (*sgprs)[0] = r.fields[0]; (*sgprs)[1] = r.fields[1]; @@ -4215,6 +4417,8 @@ static void PrepareSamplers(const ShaderSamplerResources& samplers, uint64_t* sa EXIT_IF(sgprs == nullptr); EXIT_IF(*sgprs == nullptr); + bool gen5 = Config::IsNextGen(); + for (int i = 0; i < samplers.samplers_num; i++) { auto r = samplers.samplers[i]; @@ -4226,8 +4430,12 @@ static void PrepareSamplers(const ShaderSamplerResources& samplers, uint64_t* sa EXIT_NOT_IMPLEMENTED(r.DepthCompareFunc() != 0); EXIT_NOT_IMPLEMENTED(r.ForceUnormCoords() != false); EXIT_NOT_IMPLEMENTED(r.AnisoThreshold() != 0); - EXIT_NOT_IMPLEMENTED(r.McCoordTrunc() != false); + EXIT_NOT_IMPLEMENTED(!gen5 && r.McCoordTrunc() != false); EXIT_NOT_IMPLEMENTED(r.ForceDegamma() != false); + EXIT_NOT_IMPLEMENTED(gen5 && r.SkipDegamma() != false); + EXIT_NOT_IMPLEMENTED(gen5 && r.PointPreclamp() != false); + EXIT_NOT_IMPLEMENTED(gen5 && r.AnisoOverride() != false); + EXIT_NOT_IMPLEMENTED(gen5 && r.BlendZeroPrt() != false); EXIT_NOT_IMPLEMENTED(r.AnisoBias() != 0); EXIT_NOT_IMPLEMENTED(r.TruncCoord() != false); EXIT_NOT_IMPLEMENTED(r.DisableCubeWrap() != false); @@ -4457,7 +4665,7 @@ void GraphicsRenderDrawIndex(uint64_t submit_id, CommandBuffer* buffer, HW::Cont hw_print(*ctx); hw_check(*ctx); - EXIT_NOT_IMPLEMENTED(ctx->GetShaderStages() != 0); + EXIT_NOT_IMPLEMENTED(ctx->GetShaderStages() != 0 && ctx->GetShaderStages() != 0x02002000); VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; @@ -4605,7 +4813,7 @@ void GraphicsRenderDrawIndexAuto(uint64_t submit_id, CommandBuffer* buffer, HW:: printf("\t flags = 0x%08" PRIx32 "\n", flags); EXIT_NOT_IMPLEMENTED(flags != 0); - EXIT_NOT_IMPLEMENTED(ctx->GetShaderStages() != 0); + EXIT_NOT_IMPLEMENTED(ctx->GetShaderStages() != 0 && ctx->GetShaderStages() != 0x02002000); RenderDepthInfo depth_info; FindRenderDepthInfo(submit_id, buffer, *ctx, &depth_info); diff --git a/source/emulator/src/Graphics/GraphicsRun.cpp b/source/emulator/src/Graphics/GraphicsRun.cpp index a9689b2..16dff61 100644 --- a/source/emulator/src/Graphics/GraphicsRun.cpp +++ b/source/emulator/src/Graphics/GraphicsRun.cpp @@ -5,6 +5,7 @@ #include "Kyty/Core/String.h" #include "Kyty/Core/Threads.h" +#include "Emulator/Config.h" #include "Emulator/Graphics/AsyncJob.h" #include "Emulator/Graphics/GraphicContext.h" #include "Emulator/Graphics/Graphics.h" @@ -309,7 +310,7 @@ private: CommandProcessor* m_compute_cp[8] = {}; ComputeRing* m_compute_ring[64] = {}; - int m_done_num = 0; + std::atomic_int m_done_num = 0; }; using hw_ctx_parser_func_t = uint32_t (*)(KYTY_HW_CTX_PARSER_ARGS); @@ -452,7 +453,7 @@ bool Gpu::AreSubmitsAllowed() int Gpu::GetFrameNum() { - Core::LockGuard lock(m_mutex); + // Core::LockGuard lock(m_mutex); return m_done_num; } @@ -1179,7 +1180,8 @@ void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_ cache_action == 0x38 && source64 && !with_interrupt) { GraphicsRenderWriteAtEndOfPipeWithWriteBack64(m_sumbit_id, m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); - } else if (eop_event_type == 0x04 && cache_action == 0x00 && event_index == 0x05 && source_counter && !with_interrupt) + } else if (((eop_event_type == 0x04 && event_index == 0x05) || (eop_event_type == 0x28 && event_index == 0x00)) && + cache_action == 0x00 && source_counter && !with_interrupt) { GraphicsRenderWriteAtEndOfPipeClockCounter(m_sumbit_id, m_buffer[m_current_buffer], static_cast(dst_gpu_addr)); } else if ((eop_event_type == 0x04 && event_index == 0x05) && cache_action == 0x00 && source64 && with_interrupt) @@ -1581,6 +1583,11 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_color_info) r.cmask_tile_mode_neo = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, CMASK_ADDR_TYPE); r.neo_mode = KYTY_PM4_GET(buffer[0], CB_COLOR0_INFO, ALT_TILE_MODE) != 0; + if (!r.neo_mode && Config::IsNextGen()) + { + r.neo_mode = true; + } + cp->GetCtx()->SetColorInfo(param, r); return 1; @@ -1957,6 +1964,11 @@ KYTY_HW_CTX_PARSER(hw_ctx_set_render_target) info.cmask_tile_mode_neo = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, CMASK_ADDR_TYPE); info.neo_mode = KYTY_PM4_GET(buffer[4], CB_COLOR0_INFO, ALT_TILE_MODE) != 0; + if (!info.neo_mode && Config::IsNextGen()) + { + info.neo_mode = true; + } + // attrib.force_dest_alpha_to_one = (buffer[5] & 0x20000u) != 0; // attrib.tile_mode = buffer[5] & 0x1fu; // attrib.fmask_tile_mode = (buffer[5] >> 5u) & 0x1fu; @@ -3223,21 +3235,39 @@ KYTY_CP_OP_PARSER(cp_op_release_mem) if (custom) { - uint32_t gcr_cntl = buffer[1]; + uint32_t gcr_cntl = buffer[1] & 0xffffu; + uint32_t data_sel = (buffer[1] >> 16u) & 0xffu; - EXIT_NOT_IMPLEMENTED(gcr_cntl != 0x200); - EXIT_NOT_IMPLEMENTED((buffer[0] >> 8u) != 0x3); + EXIT_NOT_IMPLEMENTED(data_sel != 2 && data_sel != 3); - if (gcr_cntl == 0x200) + if (data_sel == 2) { - cache_action = 0x38; - } + EXIT_NOT_IMPLEMENTED(gcr_cntl != 0x0200); + EXIT_NOT_IMPLEMENTED((buffer[0] >> 8u) != 0x3); - cache_policy = 0; - event_index = 0; - event_write_dest = 0; - event_write_source = 2; - interrupt_selector = 0; + if (gcr_cntl == 0x200) + { + cache_action = 0x38; + } + + cache_policy = 0; + event_index = 0; + event_write_dest = 0; + event_write_source = 2; + interrupt_selector = 0; + } else if (data_sel == 3) + { + EXIT_NOT_IMPLEMENTED(gcr_cntl != 0x0000); + EXIT_NOT_IMPLEMENTED((buffer[0] >> 8u) != 0x0); + + cache_action = 0x00; + + cache_policy = 0; + event_index = 0; + event_write_dest = 0; + event_write_source = 4; + interrupt_selector = 0; + } } cp->WriteAtEndOfPipe64(cache_policy, event_write_dest, eop_event_type, cache_action, event_index, event_write_source, dst_gpu_addr, @@ -3513,6 +3543,10 @@ static void graphics_init_jmp_tables_cx_indirect() info.dcc_compression_enable = KYTY_PM4_GET(value, CB_COLOR0_INFO, DCC_ENABLE) != 0; info.cmask_tile_mode_neo = KYTY_PM4_GET(value, CB_COLOR0_INFO, CMASK_ADDR_TYPE); info.neo_mode = KYTY_PM4_GET(value, CB_COLOR0_INFO, ALT_TILE_MODE) != 0; + if (!info.neo_mode && Config::IsNextGen()) + { + info.neo_mode = true; + } cp->GetCtx()->SetColorInfo(slot, info); }; } diff --git a/source/emulator/src/Graphics/Objects/StorageTexture.cpp b/source/emulator/src/Graphics/Objects/StorageTexture.cpp index 456f513..203b89b 100644 --- a/source/emulator/src/Graphics/Objects/StorageTexture.cpp +++ b/source/emulator/src/Graphics/Objects/StorageTexture.cpp @@ -17,21 +17,27 @@ namespace Kyty::Libs::Graphics { -static VkFormat get_texture_format(uint32_t dfmt, uint32_t nfmt) +static VkFormat get_texture_format(uint32_t dfmt, uint32_t nfmt, uint32_t fmt) { - if (nfmt == 9 && dfmt == 10) + if (fmt == 0) { - return VK_FORMAT_R8G8B8A8_SRGB; - } - if (nfmt == 0 && dfmt == 10) + if (nfmt == 9 && dfmt == 10) + { + return VK_FORMAT_R8G8B8A8_SRGB; + } + if (nfmt == 0 && dfmt == 10) + { + return VK_FORMAT_R8G8B8A8_UNORM; + } + if (nfmt == 9 && dfmt == 37) + { + return VK_FORMAT_BC3_SRGB_BLOCK; + } + EXIT("unknown format: nfmt = %u, dfmt = %u\n", nfmt, dfmt); + } else { - return VK_FORMAT_R8G8B8A8_UNORM; + EXIT("unknown format: fmt = %u\n", fmt); } - if (nfmt == 9 && dfmt == 37) - { - return VK_FORMAT_BC3_SRGB_BLOCK; - } - EXIT("unknown format: nfmt = %u, dfmt = %u\n", nfmt, dfmt); return VK_FORMAT_UNDEFINED; } @@ -130,8 +136,9 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, auto* vk_obj = static_cast(obj); auto tile = params[StorageTextureObject::PARAM_TILE]; - auto dfmt = params[StorageTextureObject::PARAM_DFMT_NFMT] >> 32u; - auto nfmt = params[StorageTextureObject::PARAM_DFMT_NFMT] & 0xffffffffu; + auto fmt = (params[StorageTextureObject::PARAM_FORMAT] >> 16u) & 0xffffu; + auto dfmt = (params[StorageTextureObject::PARAM_FORMAT] >> 8u) & 0xffu; + auto nfmt = (params[StorageTextureObject::PARAM_FORMAT]) & 0xffu; auto width = params[StorageTextureObject::PARAM_WIDTH_HEIGHT] >> 32u; auto height = params[StorageTextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu; // auto base_level = params[StorageTextureObject::PARAM_LEVELS] >> 32u; @@ -147,7 +154,13 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, TileSizeOffset level_sizes[16]; - TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, nullptr, level_sizes, nullptr); + if (fmt != 0) + { + TileGetTextureSize2(fmt, width, height, pitch, levels, tile, nullptr, level_sizes, nullptr); + } else + { + TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, nullptr, level_sizes, nullptr); + } // dbg_test_mipmaps(ctx, VK_FORMAT_BC3_SRGB_BLOCK, 512, 512); @@ -187,6 +200,7 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, if (tile == 13) { // EXIT_NOT_IMPLEMENTED(pitch != width); + EXIT_NOT_IMPLEMENTED(fmt != 0); auto* temp_buf = new uint8_t[*size]; TileConvertTiledToLinear(temp_buf, reinterpret_cast(*vaddr), TileMode::TextureTiled, dfmt, nfmt, width, height, pitch, levels, neo); @@ -208,8 +222,9 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint EXIT_IF(ctx == nullptr); EXIT_IF(params == nullptr); - auto dfmt = params[StorageTextureObject::PARAM_DFMT_NFMT] >> 32u; - auto nfmt = params[StorageTextureObject::PARAM_DFMT_NFMT] & 0xffffffffu; + auto fmt = (params[StorageTextureObject::PARAM_FORMAT] >> 16u) & 0xffffu; + auto dfmt = (params[StorageTextureObject::PARAM_FORMAT] >> 8u) & 0xffu; + auto nfmt = (params[StorageTextureObject::PARAM_FORMAT]) & 0xffu; auto width = params[StorageTextureObject::PARAM_WIDTH_HEIGHT] >> 32u; auto height = params[StorageTextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu; auto base_level = params[StorageTextureObject::PARAM_LEVELS] >> 32u; @@ -227,7 +242,7 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint components.b = get_swizzle(GetDstSel(swizzle, 2)); components.a = get_swizzle(GetDstSel(swizzle, 3)); - auto pixel_format = get_texture_format(dfmt, nfmt); + auto pixel_format = get_texture_format(dfmt, nfmt, fmt); EXIT_NOT_IMPLEMENTED(pixel_format == VK_FORMAT_UNDEFINED); EXIT_NOT_IMPLEMENTED(width == 0); @@ -336,7 +351,7 @@ static void delete_func(GraphicContext* ctx, void* obj, VulkanMemory* mem) bool StorageTextureObject::Equal(const uint64_t* other) const { - return (params[PARAM_DFMT_NFMT] == other[PARAM_DFMT_NFMT] && params[PARAM_PITCH] == other[PARAM_PITCH] && + return (params[PARAM_FORMAT] == other[PARAM_FORMAT] && params[PARAM_PITCH] == other[PARAM_PITCH] && params[PARAM_WIDTH_HEIGHT] == other[PARAM_WIDTH_HEIGHT] && params[PARAM_LEVELS] == other[PARAM_LEVELS] && params[PARAM_TILE] == other[PARAM_TILE] && params[PARAM_NEO] == other[PARAM_NEO] && params[PARAM_SWIZZLE] == other[PARAM_SWIZZLE]); diff --git a/source/emulator/src/Graphics/Objects/Texture.cpp b/source/emulator/src/Graphics/Objects/Texture.cpp index 9e92ee4..174ec66 100644 --- a/source/emulator/src/Graphics/Objects/Texture.cpp +++ b/source/emulator/src/Graphics/Objects/Texture.cpp @@ -17,41 +17,51 @@ namespace Kyty::Libs::Graphics { -static VkFormat get_texture_format(uint32_t dfmt, uint32_t nfmt) +static VkFormat get_texture_format(uint32_t dfmt, uint32_t nfmt, uint32_t fmt) { - if (nfmt == 9 && dfmt == 10) + if (fmt == 0) { - return VK_FORMAT_R8G8B8A8_SRGB; - } - if (nfmt == 0 && dfmt == 10) + if (nfmt == 9 && dfmt == 10) + { + return VK_FORMAT_R8G8B8A8_SRGB; + } + if (nfmt == 0 && dfmt == 10) + { + return VK_FORMAT_R8G8B8A8_UNORM; + } + if (nfmt == 0 && dfmt == 1) + { + return VK_FORMAT_R8_UNORM; + } + if (nfmt == 0 && dfmt == 3) + { + return VK_FORMAT_R8G8_UNORM; + } + if (nfmt == 9 && dfmt == 37) + { + return VK_FORMAT_BC3_SRGB_BLOCK; + } + if (nfmt == 0 && dfmt == 37) + { + return VK_FORMAT_BC3_UNORM_BLOCK; + } + if (nfmt == 0 && dfmt == 36) + { + return VK_FORMAT_BC2_UNORM_BLOCK; + } + if (nfmt == 0 && dfmt == 35) + { + return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + } + EXIT("unknown format: nfmt = %u, dfmt = %u\n", nfmt, dfmt); + } else { - return VK_FORMAT_R8G8B8A8_UNORM; + if (fmt == 56) + { + return VK_FORMAT_R8G8B8A8_UNORM; + } + EXIT("unknown format: fmt = %u\n", fmt); } - if (nfmt == 0 && dfmt == 1) - { - return VK_FORMAT_R8_UNORM; - } - if (nfmt == 0 && dfmt == 3) - { - return VK_FORMAT_R8G8_UNORM; - } - if (nfmt == 9 && dfmt == 37) - { - return VK_FORMAT_BC3_SRGB_BLOCK; - } - if (nfmt == 0 && dfmt == 37) - { - return VK_FORMAT_BC3_UNORM_BLOCK; - } - if (nfmt == 0 && dfmt == 36) - { - return VK_FORMAT_BC2_UNORM_BLOCK; - } - if (nfmt == 0 && dfmt == 35) - { - return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; - } - EXIT("unknown format: nfmt = %u, dfmt = %u\n", nfmt, dfmt); return VK_FORMAT_UNDEFINED; } @@ -150,8 +160,9 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, auto* vk_obj = static_cast(obj); auto tile = params[TextureObject::PARAM_TILE]; - auto dfmt = params[TextureObject::PARAM_DFMT_NFMT] >> 32u; - auto nfmt = params[TextureObject::PARAM_DFMT_NFMT] & 0xffffffffu; + auto fmt = (params[TextureObject::PARAM_FORMAT] >> 16u) & 0xffffu; + auto dfmt = (params[TextureObject::PARAM_FORMAT] >> 8u) & 0xffu; + auto nfmt = (params[TextureObject::PARAM_FORMAT]) & 0xffu; auto width = params[TextureObject::PARAM_WIDTH_HEIGHT] >> 32u; auto height = params[TextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu; auto levels = params[TextureObject::PARAM_LEVELS] & 0xffffffffu; @@ -162,11 +173,19 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, EXIT_NOT_IMPLEMENTED(levels >= 16); - EXIT_NOT_IMPLEMENTED(tile != 8 && tile != 13); - TileSizeOffset level_sizes[16]; - TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, nullptr, level_sizes, nullptr); + if (fmt != 0) + { + EXIT_NOT_IMPLEMENTED(tile != 0); + + TileGetTextureSize2(fmt, width, height, pitch, levels, tile, nullptr, level_sizes, nullptr); + } else + { + EXIT_NOT_IMPLEMENTED(tile != 8 && tile != 13); + + TileGetTextureSize(dfmt, nfmt, width, height, pitch, levels, tile, neo, nullptr, level_sizes, nullptr); + } // dbg_test_mipmaps(ctx, VK_FORMAT_BC3_SRGB_BLOCK, 512, 512); @@ -201,17 +220,27 @@ static void update_func(GraphicContext* ctx, const uint64_t* params, void* obj, } } - if (tile == 13) + if (fmt == 0) { - // EXIT_NOT_IMPLEMENTED(pitch != width); - auto* temp_buf = new uint8_t[*size]; - TileConvertTiledToLinear(temp_buf, reinterpret_cast(*vaddr), TileMode::TextureTiled, dfmt, nfmt, width, height, pitch, - levels, neo); - UtilFillImage(ctx, vk_obj, temp_buf, *size, regions, static_cast(vk_layout)); - delete[] temp_buf; - } else if (tile == 8) + if (tile == 13) + { + // EXIT_NOT_IMPLEMENTED(pitch != width); + EXIT_NOT_IMPLEMENTED(fmt != 0); + auto* temp_buf = new uint8_t[*size]; + TileConvertTiledToLinear(temp_buf, reinterpret_cast(*vaddr), TileMode::TextureTiled, dfmt, nfmt, width, height, pitch, + levels, neo); + UtilFillImage(ctx, vk_obj, temp_buf, *size, regions, static_cast(vk_layout)); + delete[] temp_buf; + } else if (tile == 8) + { + UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size, regions, static_cast(vk_layout)); + } + } else { - UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size, regions, static_cast(vk_layout)); + if (tile == 0) + { + UtilFillImage(ctx, vk_obj, reinterpret_cast(*vaddr), *size, regions, static_cast(vk_layout)); + } } } @@ -392,8 +421,9 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint EXIT_IF(ctx == nullptr); EXIT_IF(params == nullptr); - auto dfmt = params[TextureObject::PARAM_DFMT_NFMT] >> 32u; - auto nfmt = params[TextureObject::PARAM_DFMT_NFMT] & 0xffffffffu; + auto fmt = (params[TextureObject::PARAM_FORMAT] >> 16u) & 0xffffu; + auto dfmt = (params[TextureObject::PARAM_FORMAT] >> 8u) & 0xffu; + auto nfmt = (params[TextureObject::PARAM_FORMAT]) & 0xffu; auto width = params[TextureObject::PARAM_WIDTH_HEIGHT] >> 32u; auto height = params[TextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu; auto base_level = params[TextureObject::PARAM_LEVELS] >> 32u; @@ -409,7 +439,7 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint components.b = get_swizzle(GetDstSel(swizzle, 2)); components.a = get_swizzle(GetDstSel(swizzle, 3)); - auto pixel_format = get_texture_format(dfmt, nfmt); + auto pixel_format = get_texture_format(dfmt, nfmt, fmt); EXIT_NOT_IMPLEMENTED(pixel_format == VK_FORMAT_UNDEFINED); EXIT_NOT_IMPLEMENTED(width == 0); @@ -504,8 +534,9 @@ static void* create2_func(GraphicContext* ctx, CommandBuffer* buffer, const uint EXIT_IF(ctx == nullptr); EXIT_IF(params == nullptr); - auto dfmt = params[TextureObject::PARAM_DFMT_NFMT] >> 32u; - auto nfmt = params[TextureObject::PARAM_DFMT_NFMT] & 0xffffffffu; + auto fmt = (params[TextureObject::PARAM_FORMAT] >> 16u) & 0xffffu; + auto dfmt = (params[TextureObject::PARAM_FORMAT] >> 8u) & 0xffu; + auto nfmt = (params[TextureObject::PARAM_FORMAT]) & 0xffu; auto width = params[TextureObject::PARAM_WIDTH_HEIGHT] >> 32u; auto height = params[TextureObject::PARAM_WIDTH_HEIGHT] & 0xffffffffu; auto base_level = params[TextureObject::PARAM_LEVELS] >> 32u; @@ -521,7 +552,7 @@ static void* create2_func(GraphicContext* ctx, CommandBuffer* buffer, const uint components.b = get_swizzle(GetDstSel(swizzle, 2)); components.a = get_swizzle(GetDstSel(swizzle, 3)); - auto pixel_format = get_texture_format(dfmt, nfmt); + auto pixel_format = get_texture_format(dfmt, nfmt, fmt); EXIT_NOT_IMPLEMENTED(pixel_format == VK_FORMAT_UNDEFINED); EXIT_NOT_IMPLEMENTED(width == 0); @@ -628,7 +659,7 @@ static void delete_func(GraphicContext* ctx, void* obj, VulkanMemory* mem) bool TextureObject::Equal(const uint64_t* other) const { - return (params[PARAM_DFMT_NFMT] == other[PARAM_DFMT_NFMT] && params[PARAM_PITCH] == other[PARAM_PITCH] && + return (params[PARAM_FORMAT] == other[PARAM_FORMAT] && params[PARAM_PITCH] == other[PARAM_PITCH] && params[PARAM_WIDTH_HEIGHT] == other[PARAM_WIDTH_HEIGHT] && params[PARAM_LEVELS] == other[PARAM_LEVELS] && params[PARAM_TILE] == other[PARAM_TILE] && params[PARAM_NEO] == other[PARAM_NEO] && params[PARAM_SWIZZLE] == other[PARAM_SWIZZLE]); diff --git a/source/emulator/src/Graphics/Shader.cpp b/source/emulator/src/Graphics/Shader.cpp index 190fdc9..9d2efc8 100644 --- a/source/emulator/src/Graphics/Shader.cpp +++ b/source/emulator/src/Graphics/Shader.cpp @@ -5,11 +5,13 @@ #include "Kyty/Core/File.h" #include "Kyty/Core/MagicEnum.h" #include "Kyty/Core/String.h" +#include "Kyty/Core/String8.h" #include "Kyty/Core/Vector.h" #include "Emulator/Config.h" #include "Emulator/Graphics/GraphicsRun.h" #include "Emulator/Graphics/HardwareContext.h" +#include "Emulator/Graphics/ShaderParse.h" #include "Emulator/Graphics/ShaderSpirv.h" #include "Emulator/Profiler.h" @@ -20,6 +22,7 @@ #include #include #include +#include #include //#define SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS @@ -27,15 +30,6 @@ #ifdef KYTY_EMU_ENABLED -#define KYTY_SHADER_PARSER_ARGS \ - [[maybe_unused]] uint32_t pc, [[maybe_unused]] const uint32_t *src, [[maybe_unused]] const uint32_t *buffer, \ - [[maybe_unused]] ShaderCode *dst -#define KYTY_SHADER_PARSER(f) static uint32_t f(KYTY_SHADER_PARSER_ARGS) -#define KYTY_CP_OP_PARSER_ARGS \ - [[maybe_unused]] CommandProcessor *cp, [[maybe_unused]] uint32_t cmd_id, [[maybe_unused]] const uint32_t *buffer, \ - [[maybe_unused]] uint32_t dw, [[maybe_unused]] uint32_t num_dw -#define KYTY_CP_OP_PARSER(f) static uint32_t f(KYTY_CP_OP_PARSER_ARGS) - KYTY_ENUM_RANGE(Kyty::Libs::Graphics::ShaderInstructionType, 0, static_cast(Kyty::Libs::Graphics::ShaderInstructionType::ZMax)); namespace Kyty::Libs::Graphics { @@ -74,6 +68,25 @@ struct ShaderUsageInfo const uint32_t* usage_masks = nullptr; const ShaderUsageSlot* slots = nullptr; int slots_num = 0; + bool valid = false; +}; + +struct ShaderParsedUsage +{ + bool fetch = false; + int fetch_reg = 0; + bool vertex_buffer = false; + int vertex_buffer_reg = 0; + bool vertex_attrib = false; + int vertex_attrib_reg = 0; + int storage_buffers_readwrite = 0; + int storage_buffers_readonly = 0; + int storage_buffers_constant = 0; + int textures2D_readonly = 0; + int textures2D_readwrite = 0; + bool extended_buffer = false; + int samplers = 0; + int gds_pointers = 0; }; struct ShaderDebugPrintfCmds @@ -82,28 +95,43 @@ struct ShaderDebugPrintfCmds Vector cmds; }; -static Vector* g_disabled_shaders = nullptr; -static Vector* g_debug_printfs = nullptr; +static Vector* g_disabled_shaders = nullptr; +static Vector* g_debug_printfs = nullptr; +static std::unordered_map* g_shader_map = nullptr; -static String operand_to_str(ShaderOperand op) +void ShaderInit() { - String ret = U"???"; + EXIT_IF(g_shader_map != nullptr); + + g_shader_map = new std::unordered_map(); +} + +void ShaderMapUserData(uint64_t addr, const ShaderMappedData& data) +{ + EXIT_IF(g_shader_map == nullptr); + + g_shader_map->insert({addr, data}); +} + +static String8 operand_to_str(ShaderOperand op) +{ + String8 ret = "???"; switch (op.type) { case ShaderOperandType::LiteralConstant: EXIT_IF(op.size != 0); EXIT_IF(op.negate || op.absolute); - return String::FromPrintf("%f (%u)", op.constant.f, op.constant.u); + return String8::FromPrintf("%f (%u)", op.constant.f, op.constant.u); break; case ShaderOperandType::IntegerInlineConstant: EXIT_IF(op.size != 0); EXIT_IF(op.negate || op.absolute); - return String::FromPrintf("%d", op.constant.i); + return String8::FromPrintf("%d", op.constant.i); break; case ShaderOperandType::FloatInlineConstant: EXIT_IF(op.size != 0); EXIT_IF(op.negate || op.absolute); - return String::FromPrintf("%f", op.constant.f); + return String8::FromPrintf("%f", op.constant.f); break; default: break; } @@ -112,34 +140,35 @@ static String operand_to_str(ShaderOperand op) switch (op.type) { - case ShaderOperandType::VccHi: ret = U"vcc_hi"; break; - case ShaderOperandType::VccLo: ret = U"vcc_lo"; break; - case ShaderOperandType::ExecHi: ret = U"exec_hi"; break; - case ShaderOperandType::ExecLo: ret = U"exec_lo"; break; - case ShaderOperandType::ExecZ: ret = U"execz"; break; - case ShaderOperandType::Scc: ret = U"scc"; break; - case ShaderOperandType::M0: ret = U"m0"; break; - case ShaderOperandType::Vgpr: ret = String::FromPrintf("v%d", op.register_id); break; - case ShaderOperandType::Sgpr: ret = String::FromPrintf("s%d", op.register_id); break; + case ShaderOperandType::VccHi: ret = "vcc_hi"; break; + case ShaderOperandType::VccLo: ret = "vcc_lo"; break; + case ShaderOperandType::ExecHi: ret = "exec_hi"; break; + case ShaderOperandType::ExecLo: ret = "exec_lo"; break; + case ShaderOperandType::ExecZ: ret = "execz"; break; + case ShaderOperandType::Scc: ret = "scc"; break; + case ShaderOperandType::M0: ret = "m0"; break; + case ShaderOperandType::Vgpr: ret = String8::FromPrintf("v%d", op.register_id); break; + case ShaderOperandType::Sgpr: ret = String8::FromPrintf("s%d", op.register_id); break; + case ShaderOperandType::Null: ret = "null"; break; default: break; } if (op.absolute) { - ret = U"abs(" + ret + U")"; + ret = "abs(" + ret + ")"; } if (op.negate) { - return U"-" + ret; + return "-" + ret; } return ret; } -static String operand_array_to_str(ShaderOperand op, int n) +static String8 operand_array_to_str(ShaderOperand op, int n) { - String ret = U"???"; + String8 ret = "???"; EXIT_IF(op.size != n); @@ -148,109 +177,112 @@ static String operand_array_to_str(ShaderOperand op, int n) case ShaderOperandType::VccLo: if (n == 2) { - ret = U"vcc"; + ret = "vcc"; } break; case ShaderOperandType::ExecLo: if (n == 2) { - ret = U"exec"; + ret = "exec"; } break; - case ShaderOperandType::Sgpr: ret = String::FromPrintf("s[%d:%d]", op.register_id, op.register_id + n - 1); break; - case ShaderOperandType::Vgpr: ret = String::FromPrintf("v[%d:%d]", op.register_id, op.register_id + n - 1); break; + case ShaderOperandType::Sgpr: ret = String8::FromPrintf("s[%d:%d]", op.register_id, op.register_id + n - 1); break; + case ShaderOperandType::Vgpr: ret = String8::FromPrintf("v[%d:%d]", op.register_id, op.register_id + n - 1); break; case ShaderOperandType::LiteralConstant: if (n == 2) { - ret = String::FromPrintf("%f (%u)", op.constant.f, op.constant.u); + ret = String8::FromPrintf("%f (%u)", op.constant.f, op.constant.u); } break; case ShaderOperandType::IntegerInlineConstant: if (n == 2) { - ret = String::FromPrintf("%d", op.constant.i); + ret = String8::FromPrintf("%d", op.constant.i); } break; default: break; } - EXIT_IF(ret == U"???"); + EXIT_IF(ret == "???"); if (op.absolute) { - ret = U"abs(" + ret + U")"; + ret = "abs(" + ret + ")"; } if (op.negate) { - return U"-" + ret; + return "-" + ret; } return ret; } -static String dbg_fmt_to_str(const ShaderInstruction& inst) +static String8 dbg_fmt_to_str(const ShaderInstruction& inst) { switch (inst.format) { - case ShaderInstructionFormat::Unknown: return U"Unknown"; break; - case ShaderInstructionFormat::Empty: return U"Empty"; break; - case ShaderInstructionFormat::Imm: return U"Imm"; break; - case ShaderInstructionFormat::Mrt0OffOffComprVmDone: return U"Mrt0OffOffComprVmDone"; break; - case ShaderInstructionFormat::Mrt0Vsrc0Vsrc1ComprVmDone: return U"Mrt0Vsrc0Vsrc1ComprVmDone"; break; - case ShaderInstructionFormat::Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone: return U"Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone"; break; - case ShaderInstructionFormat::Param0Vsrc0Vsrc1Vsrc2Vsrc3: return U"Param0Vsrc0Vsrc1Vsrc2Vsrc3"; break; - case ShaderInstructionFormat::Param1Vsrc0Vsrc1Vsrc2Vsrc3: return U"Param1Vsrc0Vsrc1Vsrc2Vsrc3"; break; - case ShaderInstructionFormat::Param2Vsrc0Vsrc1Vsrc2Vsrc3: return U"Param2Vsrc0Vsrc1Vsrc2Vsrc3"; break; - case ShaderInstructionFormat::Param3Vsrc0Vsrc1Vsrc2Vsrc3: return U"Param3Vsrc0Vsrc1Vsrc2Vsrc3"; break; - case ShaderInstructionFormat::Param4Vsrc0Vsrc1Vsrc2Vsrc3: return U"Param4Vsrc0Vsrc1Vsrc2Vsrc3"; break; - case ShaderInstructionFormat::Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done: return U"Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done"; break; - case ShaderInstructionFormat::Saddr: return U"Saddr"; break; - case ShaderInstructionFormat::Sdst4SbaseSoffset: return U"Sdst4SbaseSoffset"; break; - case ShaderInstructionFormat::Sdst8SbaseSoffset: return U"Sdst8SbaseSoffset"; break; - case ShaderInstructionFormat::SdstSvSoffset: return U"SdstSvSoffset"; break; - case ShaderInstructionFormat::Sdst2SvSoffset: return U"Sdst2SvSoffset"; break; - case ShaderInstructionFormat::Sdst4SvSoffset: return U"Sdst4SvSoffset"; break; - case ShaderInstructionFormat::Sdst8SvSoffset: return U"Sdst8SvSoffset"; break; - case ShaderInstructionFormat::Sdst16SvSoffset: return U"Sdst16SvSoffset"; break; - case ShaderInstructionFormat::SVdstSVsrc0: return U"SVdstSVsrc0"; break; - case ShaderInstructionFormat::Sdst2Ssrc02: return U"Sdst2Ssrc02"; break; - case ShaderInstructionFormat::Sdst2Ssrc02Ssrc12: return U"Sdst2Ssrc02Ssrc12"; break; - case ShaderInstructionFormat::SmaskVsrc0Vsrc1: return U"SmaskVsrc0Vsrc1"; break; - case ShaderInstructionFormat::Ssrc0Ssrc1: return U"Ssrc0Ssrc1"; break; - case ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen: return U"Vdata1VaddrSvSoffsIdxen"; break; - case ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxenFloat1: return U"Vdata1VaddrSvSoffsIdxenFloat1"; break; - case ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen: return U"Vdata2VaddrSvSoffsIdxen"; break; - case ShaderInstructionFormat::Vdata3VaddrSvSoffsIdxen: return U"Vdata3VaddrSvSoffsIdxen"; break; - case ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxen: return U"Vdata4VaddrSvSoffsIdxen"; break; - case ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxenFloat4: return U"Vdata4VaddrSvSoffsIdxenFloat4"; break; - case ShaderInstructionFormat::Vdata4Vaddr2SvSoffsOffenIdxenFloat4: return U"Vdata4Vaddr2SvSoffsOffenIdxenFloat4"; break; - case ShaderInstructionFormat::Vdata1Vaddr3StSsDmask1: return U"Vdata1Vaddr3StSsDmask1"; break; - case ShaderInstructionFormat::Vdata1Vaddr3StSsDmask8: return U"Vdata1Vaddr3StSsDmask8"; break; - case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask3: return U"Vdata2Vaddr3StSsDmask3"; break; - case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask5: return U"Vdata2Vaddr3StSsDmask5"; break; - case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask9: return U"Vdata2Vaddr3StSsDmask9"; break; - case ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7: return U"Vdata3Vaddr3StSsDmask7"; break; - case ShaderInstructionFormat::Vdata3Vaddr4StSsDmask7: return U"Vdata3Vaddr4StSsDmask7"; break; - case ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF: return U"Vdata4Vaddr3StSsDmaskF"; break; - case ShaderInstructionFormat::Vdata4Vaddr3StDmaskF: return U"Vdata4Vaddr3StDmaskF"; break; - case ShaderInstructionFormat::Vdata4Vaddr4StDmaskF: return U"Vdata4Vaddr4StDmaskF"; break; - case ShaderInstructionFormat::SVdstSVsrc0SVsrc1: return U"SVdstSVsrc0SVsrc1"; break; - case ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2: return U"VdstVsrc0Vsrc1Smask2"; break; - case ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2: return U"VdstVsrc0Vsrc1Vsrc2"; break; - case ShaderInstructionFormat::VdstVsrcAttrChan: return U"VdstVsrcAttrChan"; break; - case ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1: return U"VdstSdst2Vsrc0Vsrc1"; break; - case ShaderInstructionFormat::VdstGds: return U"VdstGds"; break; - case ShaderInstructionFormat::Label: return U"Label"; break; - default: return U"????"; break; + case ShaderInstructionFormat::Unknown: return "Unknown"; break; + case ShaderInstructionFormat::Empty: return "Empty"; break; + case ShaderInstructionFormat::Imm: return "Imm"; break; + case ShaderInstructionFormat::Mrt0OffOffComprVmDone: return "Mrt0OffOffComprVmDone"; break; + case ShaderInstructionFormat::Mrt0Vsrc0Vsrc1ComprVmDone: return "Mrt0Vsrc0Vsrc1ComprVmDone"; break; + case ShaderInstructionFormat::Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone: return "Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone"; break; + case ShaderInstructionFormat::Param0Vsrc0Vsrc1Vsrc2Vsrc3: return "Param0Vsrc0Vsrc1Vsrc2Vsrc3"; break; + case ShaderInstructionFormat::Param1Vsrc0Vsrc1Vsrc2Vsrc3: return "Param1Vsrc0Vsrc1Vsrc2Vsrc3"; break; + case ShaderInstructionFormat::Param2Vsrc0Vsrc1Vsrc2Vsrc3: return "Param2Vsrc0Vsrc1Vsrc2Vsrc3"; break; + case ShaderInstructionFormat::Param3Vsrc0Vsrc1Vsrc2Vsrc3: return "Param3Vsrc0Vsrc1Vsrc2Vsrc3"; break; + case ShaderInstructionFormat::Param4Vsrc0Vsrc1Vsrc2Vsrc3: return "Param4Vsrc0Vsrc1Vsrc2Vsrc3"; break; + case ShaderInstructionFormat::Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done: return "Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done"; break; + case ShaderInstructionFormat::PrimVsrc0OffOffOffDone: return "PrimVsrc0OffOffOffDone"; break; + case ShaderInstructionFormat::Saddr: return "Saddr"; break; + case ShaderInstructionFormat::SdstSbaseSoffset: return "SdstSbaseSoffset"; break; + case ShaderInstructionFormat::Sdst4SbaseSoffset: return "Sdst4SbaseSoffset"; break; + case ShaderInstructionFormat::Sdst8SbaseSoffset: return "Sdst8SbaseSoffset"; break; + case ShaderInstructionFormat::SdstSvSoffset: return "SdstSvSoffset"; break; + case ShaderInstructionFormat::Sdst2SvSoffset: return "Sdst2SvSoffset"; break; + case ShaderInstructionFormat::Sdst4SvSoffset: return "Sdst4SvSoffset"; break; + case ShaderInstructionFormat::Sdst8SvSoffset: return "Sdst8SvSoffset"; break; + case ShaderInstructionFormat::Sdst16SvSoffset: return "Sdst16SvSoffset"; break; + case ShaderInstructionFormat::SVdstSVsrc0: return "SVdstSVsrc0"; break; + case ShaderInstructionFormat::Sdst2Ssrc02: return "Sdst2Ssrc02"; break; + case ShaderInstructionFormat::Sdst2Ssrc02Ssrc1: return "Sdst2Ssrc02Ssrc1"; break; + case ShaderInstructionFormat::Sdst2Ssrc02Ssrc12: return "Sdst2Ssrc02Ssrc12"; break; + case ShaderInstructionFormat::SmaskVsrc0Vsrc1: return "SmaskVsrc0Vsrc1"; break; + case ShaderInstructionFormat::Ssrc0Ssrc1: return "Ssrc0Ssrc1"; break; + case ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen: return "Vdata1VaddrSvSoffsIdxen"; break; + case ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxenFloat1: return "Vdata1VaddrSvSoffsIdxenFloat1"; break; + case ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen: return "Vdata2VaddrSvSoffsIdxen"; break; + case ShaderInstructionFormat::Vdata3VaddrSvSoffsIdxen: return "Vdata3VaddrSvSoffsIdxen"; break; + case ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxen: return "Vdata4VaddrSvSoffsIdxen"; break; + case ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxenFloat4: return "Vdata4VaddrSvSoffsIdxenFloat4"; break; + case ShaderInstructionFormat::Vdata4Vaddr2SvSoffsOffenIdxenFloat4: return "Vdata4Vaddr2SvSoffsOffenIdxenFloat4"; break; + case ShaderInstructionFormat::Vdata1Vaddr3StSsDmask1: return "Vdata1Vaddr3StSsDmask1"; break; + case ShaderInstructionFormat::Vdata1Vaddr3StSsDmask8: return "Vdata1Vaddr3StSsDmask8"; break; + case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask3: return "Vdata2Vaddr3StSsDmask3"; break; + case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask5: return "Vdata2Vaddr3StSsDmask5"; break; + case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask9: return "Vdata2Vaddr3StSsDmask9"; break; + case ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7: return "Vdata3Vaddr3StSsDmask7"; break; + case ShaderInstructionFormat::Vdata3Vaddr4StSsDmask7: return "Vdata3Vaddr4StSsDmask7"; break; + case ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF: return "Vdata4Vaddr3StSsDmaskF"; break; + case ShaderInstructionFormat::Vdata4Vaddr3StDmaskF: return "Vdata4Vaddr3StDmaskF"; break; + case ShaderInstructionFormat::Vdata4Vaddr4StDmaskF: return "Vdata4Vaddr4StDmaskF"; break; + case ShaderInstructionFormat::SVdstSVsrc0SVsrc1: return "SVdstSVsrc0SVsrc1"; break; + case ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2: return "VdstVsrc0Vsrc1Smask2"; break; + case ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2: return "VdstVsrc0Vsrc1Vsrc2"; break; + case ShaderInstructionFormat::VdstVsrcAttrChan: return "VdstVsrcAttrChan"; break; + case ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1: return "VdstSdst2Vsrc0Vsrc1"; break; + case ShaderInstructionFormat::VdstGds: return "VdstGds"; break; + case ShaderInstructionFormat::Label: return "Label"; break; + default: return "????"; break; } } -static String dbg_fmt_print(const ShaderInstruction& inst) +static String8 dbg_fmt_print(const ShaderInstruction& inst) { uint64_t f = inst.format; EXIT_IF(f == ShaderInstructionFormat::Unknown); - String str; + String8 str; if (f == ShaderInstructionFormat::Empty) { return str; @@ -258,8 +290,8 @@ static String dbg_fmt_print(const ShaderInstruction& inst) int src_num = 0; for (;;) { - String s; - auto fu = f & 0xffu; + String8 s; + auto fu = f & 0xffu; if (fu == 0) { break; @@ -290,31 +322,32 @@ static String dbg_fmt_print(const ShaderInstruction& inst) case ShaderInstructionFormat::S2A2: s = operand_array_to_str(inst.src[2], 2); break; case ShaderInstructionFormat::S2A3: s = operand_array_to_str(inst.src[2], 3); break; case ShaderInstructionFormat::S2A4: s = operand_array_to_str(inst.src[2], 4); break; - case ShaderInstructionFormat::Attr: s = String::FromPrintf("attr%u.%u", inst.src[1].constant.u, inst.src[2].constant.u); break; - case ShaderInstructionFormat::Idxen: s = U"idxen"; break; - case ShaderInstructionFormat::Offen: s = U"offen"; break; - case ShaderInstructionFormat::Float1: s = U"format:float1"; break; - case ShaderInstructionFormat::Float4: s = U"format:float4"; break; - case ShaderInstructionFormat::Pos0: s = U"pos0"; break; - case ShaderInstructionFormat::Done: s = U"done"; break; - case ShaderInstructionFormat::Param0: s = U"param0"; break; - case ShaderInstructionFormat::Param1: s = U"param1"; break; - case ShaderInstructionFormat::Param2: s = U"param2"; break; - case ShaderInstructionFormat::Param3: s = U"param3"; break; - case ShaderInstructionFormat::Param4: s = U"param4"; break; - case ShaderInstructionFormat::Mrt0: s = U"mrt_color0"; break; - case ShaderInstructionFormat::Off: s = U"off"; break; - case ShaderInstructionFormat::Compr: s = U"compr"; break; - case ShaderInstructionFormat::Vm: s = U"vm"; break; - case ShaderInstructionFormat::L: s = String::FromPrintf("label_%04" PRIx32, inst.pc + 4 + inst.src[0].constant.i); break; - case ShaderInstructionFormat::Dmask1: s = U"dmask:0x1"; break; - case ShaderInstructionFormat::Dmask8: s = U"dmask:0x8"; break; - case ShaderInstructionFormat::Dmask3: s = U"dmask:0x3"; break; - case ShaderInstructionFormat::Dmask5: s = U"dmask:0x5"; break; - case ShaderInstructionFormat::Dmask7: s = U"dmask:0x7"; break; - case ShaderInstructionFormat::Dmask9: s = U"dmask:0x9"; break; - case ShaderInstructionFormat::DmaskF: s = U"dmask:0xf"; break; - case ShaderInstructionFormat::Gds: s = U"gds"; break; + case ShaderInstructionFormat::Attr: s = String8::FromPrintf("attr%u.%u", inst.src[1].constant.u, inst.src[2].constant.u); break; + case ShaderInstructionFormat::Idxen: s = "idxen"; break; + case ShaderInstructionFormat::Offen: s = "offen"; break; + case ShaderInstructionFormat::Float1: s = "format:float1"; break; + case ShaderInstructionFormat::Float4: s = "format:float4"; break; + case ShaderInstructionFormat::Pos0: s = "pos0"; break; + case ShaderInstructionFormat::Done: s = "done"; break; + case ShaderInstructionFormat::Param0: s = "param0"; break; + case ShaderInstructionFormat::Param1: s = "param1"; break; + case ShaderInstructionFormat::Param2: s = "param2"; break; + case ShaderInstructionFormat::Param3: s = "param3"; break; + case ShaderInstructionFormat::Param4: s = "param4"; break; + case ShaderInstructionFormat::Mrt0: s = "mrt_color0"; break; + case ShaderInstructionFormat::Prim: s = "prim"; break; + case ShaderInstructionFormat::Off: s = "off"; break; + case ShaderInstructionFormat::Compr: s = "compr"; break; + case ShaderInstructionFormat::Vm: s = "vm"; break; + case ShaderInstructionFormat::L: s = String8::FromPrintf("label_%04" PRIx32, inst.pc + 4 + inst.src[0].constant.i); break; + case ShaderInstructionFormat::Dmask1: s = "dmask:0x1"; break; + case ShaderInstructionFormat::Dmask8: s = "dmask:0x8"; break; + case ShaderInstructionFormat::Dmask3: s = "dmask:0x3"; break; + case ShaderInstructionFormat::Dmask5: s = "dmask:0x5"; break; + case ShaderInstructionFormat::Dmask7: s = "dmask:0x7"; break; + case ShaderInstructionFormat::Dmask9: s = "dmask:0x9"; break; + case ShaderInstructionFormat::DmaskF: s = "dmask:0xf"; break; + case ShaderInstructionFormat::Gds: s = "gds"; break; default: EXIT("unknown code: %u\n", static_cast(fu)); } switch (fu) @@ -337,7 +370,7 @@ static String dbg_fmt_print(const ShaderInstruction& inst) case ShaderInstructionFormat::S3: src_num = std::max(src_num, 4); break; default: break; } - str = s + (str.IsEmpty() ? U"" : U", " + str); + str = s + (str.IsEmpty() ? "" : ", " + str); f >>= 8u; } EXIT_IF(src_num != inst.src_num); @@ -355,34 +388,38 @@ static String dbg_fmt_print(const ShaderInstruction& inst) } if (inst.dst.clamp) { - str += U" clamp"; + str += " clamp"; } return str; } -String ShaderCode::DbgInstructionToStr(const ShaderInstruction& inst) +String8 ShaderCode::DbgInstructionToStr(const ShaderInstruction& inst) { - String ret; + String8 ret; - String name = Core::EnumName(inst.type); - String format = dbg_fmt_to_str(inst); + String8 name = Core::EnumName8(inst.type); + String8 format = dbg_fmt_to_str(inst); - ret += String::FromPrintf("%-20s [%-30s] ", name.C_Str(), format.C_Str()); + ret += String8::FromPrintf("%-20s [%-30s] ", name.c_str(), format.c_str()); ret += dbg_fmt_print(inst); return ret; } -String ShaderCode::DbgDump() const +String8 ShaderCode::DbgDump() const { - String ret; + String8 ret; for (const auto& inst: m_instructions) { if (m_labels.Contains(inst.pc, [](auto label, auto pc) { return (!label.IsDisabled() && label.GetDst() == pc); })) { - ret += String::FromPrintf("label_%04" PRIx32 ":\n", inst.pc); + ret += String8::FromPrintf("\nlabel_%04" PRIx32 ":\n", inst.pc); } - ret += String::FromPrintf(" %s\n", DbgInstructionToStr(inst).C_Str()); + if (m_indirect_labels.Contains(inst.pc, [](auto label, auto pc) { return (!label.IsDisabled() && label.GetDst() == pc); })) + { + ret += "\n"; + } + ret += String8::FromPrintf(" %s\n", DbgInstructionToStr(inst).c_str()); } return ret; } @@ -496,1598 +533,6 @@ Vector ShaderCode::ReadIntructions(const ShaderControlFlowBlo return ret; } -// Vector ShaderCode::GetDiscardBlock(uint32_t pc) const -//{ -// Vector ret; -// -// auto inst_count = m_instructions.Size(); -// for (uint32_t index = 0; index < inst_count; index++) -// { -// const auto& inst = m_instructions.At(index); -// if (inst.pc == pc) -// { -// for (uint32_t i = index; i < inst_count; i++) -// { -// const auto& inst = m_instructions.At(i); -// -// ret.Add(inst); -// -// if (IsDiscardInstruction(i)) -// { -// ret.Add(m_instructions.At(i + 1)); -// break; -// } -// } -// break; -// } -// } -// -// return ret; -// } - -static ShaderOperand operand_parse(uint32_t code) -{ - ShaderOperand ret; - - ret.size = 1; - - if (code >= 0 && code <= 103) - { - ret.type = ShaderOperandType::Sgpr; - ret.register_id = static_cast(code); - } else if (code == 106) - { - ret.type = ShaderOperandType::VccLo; - } else if (code == 107) - { - ret.type = ShaderOperandType::VccHi; - } else if (code == 124) - { - ret.type = ShaderOperandType::M0; - } else if (code == 126) - { - ret.type = ShaderOperandType::ExecLo; - } else if (code == 127) - { - ret.type = ShaderOperandType::ExecHi; - } else if (code >= 128 && code <= 192) - { - ret.type = ShaderOperandType::IntegerInlineConstant; - ret.constant.i = static_cast(code) - 128; - ret.size = 0; - } else if (code >= 193 && code <= 208) - { - ret.type = ShaderOperandType::IntegerInlineConstant; - ret.constant.i = 192 - static_cast(code); - ret.size = 0; - } else if (code >= 240 && code <= 247) - { - static const float fv[] = {0.5f, -0.5f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f}; - ret.type = ShaderOperandType::FloatInlineConstant; - ret.constant.f = fv[static_cast(code) - 240]; - ret.size = 0; - } else if (code == 252) - { - ret.type = ShaderOperandType::ExecZ; - } else if (code == 255) - { - ret.type = ShaderOperandType::LiteralConstant; - ret.size = 0; - } else if (code >= 256) - { - ret.type = ShaderOperandType::Vgpr; - ret.register_id = static_cast(code) - 256; - } else - { - EXIT("unknown operand: %u\n", code); - } - - return ret; -} - -KYTY_SHADER_PARSER(shader_parse_sopc) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t ssrc1 = (buffer[0] >> 8u) & 0xffu; - uint32_t ssrc0 = (buffer[0] >> 0u) & 0xffu; - uint32_t opcode = (buffer[0] >> 16u) & 0x7fu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(ssrc0); - inst.src[1] = operand_parse(ssrc1); - inst.src_num = 2; - - uint32_t size = 1; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - if (inst.src[1].type == ShaderOperandType::LiteralConstant) - { - inst.src[1].constant.u = buffer[size]; - size++; - } - - inst.format = ShaderInstructionFormat::Ssrc0Ssrc1; - - switch (opcode) - { - case 0x00: inst.type = ShaderInstructionType::SCmpEqI32; break; - case 0x01: inst.type = ShaderInstructionType::SCmpLgI32; break; - case 0x02: inst.type = ShaderInstructionType::SCmpGtI32; break; - case 0x03: inst.type = ShaderInstructionType::SCmpGeI32; break; - case 0x04: inst.type = ShaderInstructionType::SCmpLtI32; break; - case 0x05: inst.type = ShaderInstructionType::SCmpLeI32; break; - case 0x06: inst.type = ShaderInstructionType::SCmpEqU32; break; - case 0x07: inst.type = ShaderInstructionType::SCmpLgU32; break; - case 0x08: inst.type = ShaderInstructionType::SCmpGtU32; break; - case 0x09: inst.type = ShaderInstructionType::SCmpGeU32; break; - case 0x0a: inst.type = ShaderInstructionType::SCmpLtU32; break; - case 0x0b: inst.type = ShaderInstructionType::SCmpLeU32; break; - - default: - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown sopc opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", opcode, pc, dst->GetHash0()); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_sopk) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 23u) & 0x1fu; - auto imm = static_cast(buffer[0] >> 0u & 0xffffu); - uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; - - ShaderInstruction inst; - inst.pc = pc; - inst.dst = operand_parse(sdst); - - switch (opcode) // NOLINT - { - case 0x00: - inst.type = ShaderInstructionType::SMovkI32; - inst.format = ShaderInstructionFormat::SVdstSVsrc0; - inst.src[0].type = ShaderOperandType::IntegerInlineConstant; - inst.src[0].constant.i = imm; - inst.src_num = 1; - break; - default: - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown sopk opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", opcode, pc, dst->GetHash0()); - } - - dst->GetInstructions().Add(inst); - - return 1; -} - -KYTY_SHADER_PARSER(shader_parse_sopp) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 16u) & 0x7fu; - uint32_t simm = (buffer[0] >> 0u) & 0xffffu; - - ShaderInstruction inst; - inst.pc = pc; - - inst.format = ShaderInstructionFormat::Label; - inst.src[0].type = ShaderOperandType::LiteralConstant; - inst.src[0].constant.i = static_cast(simm) * 4; - inst.src_num = 1; - - switch (opcode) - { - case 0x01: - inst.type = ShaderInstructionType::SEndpgm; - inst.format = ShaderInstructionFormat::Empty; - inst.src_num = 0; - break; - case 0x02: inst.type = ShaderInstructionType::SBranch; break; - case 0x04: inst.type = ShaderInstructionType::SCbranchScc0; break; - case 0x05: inst.type = ShaderInstructionType::SCbranchScc1; break; - case 0x06: inst.type = ShaderInstructionType::SCbranchVccz; break; - case 0x07: inst.type = ShaderInstructionType::SCbranchVccnz; break; - case 0x08: inst.type = ShaderInstructionType::SCbranchExecz; break; - case 0x0c: - inst.type = ShaderInstructionType::SWaitcnt; - inst.format = ShaderInstructionFormat::Imm; - inst.src[0].type = ShaderOperandType::LiteralConstant; - inst.src[0].constant.u = simm; - inst.src_num = 1; - break; - default: - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown sopp opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", opcode, pc, dst->GetHash0()); - } - - dst->GetInstructions().Add(inst); - - if (inst.type == ShaderInstructionType::SCbranchScc0 || inst.type == ShaderInstructionType::SCbranchScc1 || - inst.type == ShaderInstructionType::SCbranchVccz || inst.type == ShaderInstructionType::SCbranchVccnz || - inst.type == ShaderInstructionType::SCbranchExecz || inst.type == ShaderInstructionType::SBranch) - { - dst->GetLabels().Add(ShaderLabel(inst)); - } - - return 1; -} - -KYTY_SHADER_PARSER(shader_parse_sop1) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 8u) & 0xffu; - uint32_t ssrc0 = (buffer[0] >> 0u) & 0xffu; - uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(ssrc0); - inst.src_num = 1; - inst.dst = operand_parse(sdst); - - uint32_t size = 1; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - switch (opcode) - { - case 0x03: - inst.type = ShaderInstructionType::SMovB32; - inst.format = ShaderInstructionFormat::SVdstSVsrc0; - break; - case 0x04: - inst.type = ShaderInstructionType::SMovB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02; - inst.dst.size = 2; - inst.src[0].size = 2; - break; - case 0x0a: - inst.type = ShaderInstructionType::SWqmB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02; - inst.dst.size = 2; - inst.src[0].size = 2; - break; - case 0x20: - inst.type = ShaderInstructionType::SSetpcB64; - inst.format = ShaderInstructionFormat::Saddr; - inst.src[0].size = 2; - break; - case 0x21: - inst.type = ShaderInstructionType::SSwappcB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02; - inst.src[0].size = 2; - inst.dst.size = 2; - break; - case 0x24: - inst.type = ShaderInstructionType::SAndSaveexecB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02; - inst.dst.size = 2; - inst.src[0].size = 2; - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown sop1 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_sop2) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 23u) & 0x7fu; - - switch (opcode) - { - case 0x7d: return shader_parse_sop1(pc, src, buffer, dst); break; - case 0x7e: return shader_parse_sopc(pc, src, buffer, dst); break; - case 0x7f: return shader_parse_sopp(pc, src, buffer, dst); break; - default: break; - } - - if (opcode >= 0x60) - { - return shader_parse_sopk(pc, src, buffer, dst); - } - - uint32_t ssrc1 = (buffer[0] >> 8u) & 0xffu; - uint32_t ssrc0 = (buffer[0] >> 0u) & 0xffu; - uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(ssrc0); - inst.src[1] = operand_parse(ssrc1); - inst.src_num = 2; - inst.dst = operand_parse(sdst); - - uint32_t size = 1; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - if (inst.src[1].type == ShaderOperandType::LiteralConstant) - { - inst.src[1].constant.u = buffer[size]; - size++; - } - - inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; - - switch (opcode) - { - case 0x00: inst.type = ShaderInstructionType::SAddU32; break; - case 0x02: inst.type = ShaderInstructionType::SAddI32; break; - case 0x04: inst.type = ShaderInstructionType::SAddcU32; break; - case 0x0a: inst.type = ShaderInstructionType::SCselectB32; break; - case 0x0b: - inst.type = ShaderInstructionType::SCselectB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x0e: inst.type = ShaderInstructionType::SAndB32; break; - case 0x0f: - inst.type = ShaderInstructionType::SAndB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x11: - inst.type = ShaderInstructionType::SOrB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x13: - inst.type = ShaderInstructionType::SXorB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x15: - inst.type = ShaderInstructionType::SAndn2B64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x17: - inst.type = ShaderInstructionType::SOrn2B64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x19: - inst.type = ShaderInstructionType::SNandB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x1b: - inst.type = ShaderInstructionType::SNorB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x1d: - inst.type = ShaderInstructionType::SXnorB64; - inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; - inst.dst.size = 2; - inst.src[0].size = 2; - inst.src[1].size = 2; - break; - case 0x1e: inst.type = ShaderInstructionType::SLshlB32; break; - case 0x20: inst.type = ShaderInstructionType::SLshrB32; break; - case 0x24: inst.type = ShaderInstructionType::SBfmB32; break; - case 0x26: inst.type = ShaderInstructionType::SMulI32; break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown sop2 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_vopc) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 17u) & 0xffu; - uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; - uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(src0); - inst.src[1] = operand_parse(vsrc1 + 256); - inst.src_num = 2; - - uint32_t size = 1; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1; - inst.dst.type = ShaderOperandType::VccLo; - inst.dst.size = 2; - - switch (opcode) - { - case 0x00: inst.type = ShaderInstructionType::VCmpFF32; break; - case 0x01: inst.type = ShaderInstructionType::VCmpLtF32; break; - case 0x02: inst.type = ShaderInstructionType::VCmpEqF32; break; - case 0x03: inst.type = ShaderInstructionType::VCmpLeF32; break; - case 0x04: inst.type = ShaderInstructionType::VCmpGtF32; break; - case 0x05: inst.type = ShaderInstructionType::VCmpLgF32; break; - case 0x06: inst.type = ShaderInstructionType::VCmpGeF32; break; - case 0x07: inst.type = ShaderInstructionType::VCmpOF32; break; - case 0x08: inst.type = ShaderInstructionType::VCmpUF32; break; - case 0x09: inst.type = ShaderInstructionType::VCmpNgeF32; break; - case 0x0a: inst.type = ShaderInstructionType::VCmpNlgF32; break; - case 0x0b: inst.type = ShaderInstructionType::VCmpNgtF32; break; - case 0x0c: inst.type = ShaderInstructionType::VCmpNleF32; break; - case 0x0d: inst.type = ShaderInstructionType::VCmpNeqF32; break; - case 0x0e: inst.type = ShaderInstructionType::VCmpNltF32; break; - case 0x0f: inst.type = ShaderInstructionType::VCmpTruF32; break; - case 0x11: inst.type = ShaderInstructionType::VCmpxLtF32; break; - case 0x14: inst.type = ShaderInstructionType::VCmpxGtF32; break; - case 0x1d: inst.type = ShaderInstructionType::VCmpxNeqF32; break; - case 0x80: inst.type = ShaderInstructionType::VCmpFI32; break; - case 0x81: inst.type = ShaderInstructionType::VCmpLtI32; break; - case 0x82: inst.type = ShaderInstructionType::VCmpEqI32; break; - case 0x83: inst.type = ShaderInstructionType::VCmpLeI32; break; - case 0x84: inst.type = ShaderInstructionType::VCmpGtI32; break; - case 0x85: inst.type = ShaderInstructionType::VCmpNeI32; break; - case 0x86: inst.type = ShaderInstructionType::VCmpGeI32; break; - case 0x87: inst.type = ShaderInstructionType::VCmpTI32; break; - case 0xc0: inst.type = ShaderInstructionType::VCmpFU32; break; - case 0xc1: inst.type = ShaderInstructionType::VCmpLtU32; break; - case 0xc2: inst.type = ShaderInstructionType::VCmpEqU32; break; - case 0xc3: inst.type = ShaderInstructionType::VCmpLeU32; break; - case 0xc4: inst.type = ShaderInstructionType::VCmpGtU32; break; - case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break; - case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break; - case 0xc7: inst.type = ShaderInstructionType::VCmpTU32; break; - case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break; - case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break; - case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break; - case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break; - default: - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown vopc opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", opcode, pc, dst->GetHash0()); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_vop1) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t vdst = (buffer[0] >> 17u) & 0xffu; - uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; - uint32_t opcode = (buffer[0] >> 9u) & 0xffu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(src0); - inst.dst = operand_parse(vdst + 256); - inst.src_num = 1; - - uint32_t size = 1; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - inst.format = ShaderInstructionFormat::SVdstSVsrc0; - - switch (opcode) - { - case 0x01: inst.type = ShaderInstructionType::VMovB32; break; - case 0x05: inst.type = ShaderInstructionType::VCvtF32I32; break; - case 0x06: inst.type = ShaderInstructionType::VCvtF32U32; break; - case 0x07: inst.type = ShaderInstructionType::VCvtU32F32; break; - case 0x0b: inst.type = ShaderInstructionType::VCvtF32F16; break; - case 0x11: inst.type = ShaderInstructionType::VCvtF32Ubyte0; break; - case 0x12: inst.type = ShaderInstructionType::VCvtF32Ubyte1; break; - case 0x13: inst.type = ShaderInstructionType::VCvtF32Ubyte2; break; - case 0x14: inst.type = ShaderInstructionType::VCvtF32Ubyte3; break; - case 0x20: inst.type = ShaderInstructionType::VFractF32; break; - case 0x21: inst.type = ShaderInstructionType::VTruncF32; break; - case 0x22: inst.type = ShaderInstructionType::VCeilF32; break; - case 0x23: inst.type = ShaderInstructionType::VRndneF32; break; - case 0x24: inst.type = ShaderInstructionType::VFloorF32; break; - case 0x25: inst.type = ShaderInstructionType::VExpF32; break; - case 0x27: inst.type = ShaderInstructionType::VLogF32; break; - case 0x2a: inst.type = ShaderInstructionType::VRcpF32; break; - case 0x2e: inst.type = ShaderInstructionType::VRsqF32; break; - case 0x33: inst.type = ShaderInstructionType::VSqrtF32; break; - case 0x35: inst.type = ShaderInstructionType::VSinF32; break; - case 0x36: inst.type = ShaderInstructionType::VCosF32; break; - case 0x37: inst.type = ShaderInstructionType::VNotB32; break; - case 0x38: inst.type = ShaderInstructionType::VBfrevB32; break; - default: - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown vop1 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", opcode, pc, dst->GetHash0()); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_vop2) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 25u) & 0x3fu; - uint32_t vdst = (buffer[0] >> 17u) & 0xffu; - uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; - uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(src0); - inst.src[1] = operand_parse(vsrc1 + 256); - inst.dst = operand_parse(vdst + 256); - inst.src_num = 2; - - uint32_t size = 1; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; - - switch (opcode) - { - case 0x00: - inst.type = ShaderInstructionType::VCndmaskB32; - inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; - inst.src[2].type = ShaderOperandType::VccLo; - inst.src[2].size = 2; - inst.src_num = 3; - break; - case 0x03: inst.type = ShaderInstructionType::VAddF32; break; - case 0x04: inst.type = ShaderInstructionType::VSubF32; break; - case 0x05: inst.type = ShaderInstructionType::VSubrevF32; break; - case 0x08: inst.type = ShaderInstructionType::VMulF32; break; - case 0x0b: inst.type = ShaderInstructionType::VMulU32U24; break; - case 0x0f: inst.type = ShaderInstructionType::VMinF32; break; - case 0x10: inst.type = ShaderInstructionType::VMaxF32; break; - case 0x15: inst.type = ShaderInstructionType::VLshrB32; break; - case 0x16: inst.type = ShaderInstructionType::VLshrrevB32; break; - case 0x17: inst.type = ShaderInstructionType::VAshrI32; break; - case 0x18: inst.type = ShaderInstructionType::VAshrrevI32; break; - case 0x19: inst.type = ShaderInstructionType::VLshlB32; break; - case 0x1a: inst.type = ShaderInstructionType::VLshlrevB32; break; - case 0x1b: inst.type = ShaderInstructionType::VAndB32; break; - case 0x1c: inst.type = ShaderInstructionType::VOrB32; break; - case 0x1d: inst.type = ShaderInstructionType::VXorB32; break; - case 0x1e: inst.type = ShaderInstructionType::VBfmB32; break; - case 0x1f: inst.type = ShaderInstructionType::VMacF32; break; - case 0x20: - inst.type = ShaderInstructionType::VMadmkF32; - inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2; - inst.src_num = 3; - inst.src[2] = inst.src[1]; - inst.src[1].type = ShaderOperandType::LiteralConstant; - inst.src[1].constant.u = buffer[size]; - inst.src[1].size = 0; - size++; - break; - case 0x21: - inst.type = ShaderInstructionType::VMadakF32; - inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2; - inst.src_num = 3; - inst.src[2].type = ShaderOperandType::LiteralConstant; - inst.src[2].constant.u = buffer[size]; - inst.src[2].size = 0; - size++; - break; - case 0x22: inst.type = ShaderInstructionType::VBcntU32B32; break; - case 0x23: inst.type = ShaderInstructionType::VMbcntLoU32B32; break; - case 0x24: inst.type = ShaderInstructionType::VMbcntHiU32B32; break; - case 0x25: - inst.type = ShaderInstructionType::VAddI32; - inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; - inst.dst2.type = ShaderOperandType::VccLo; - inst.dst2.size = 2; - break; - case 0x26: - inst.type = ShaderInstructionType::VSubI32; - inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; - inst.dst2.type = ShaderOperandType::VccLo; - inst.dst2.size = 2; - break; - case 0x27: - inst.type = ShaderInstructionType::VSubrevI32; - inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; - inst.dst2.type = ShaderOperandType::VccLo; - inst.dst2.size = 2; - break; - case 0x2f: inst.type = ShaderInstructionType::VCvtPkrtzF16F32; break; - case 0x3e: return shader_parse_vopc(pc, src, buffer, dst); break; - case 0x3f: return shader_parse_vop1(pc, src, buffer, dst); break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown vop2 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_vop3) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 17u) & 0x1ffu; - uint32_t clamp = (buffer[0] >> 11u) & 0x1u; - uint32_t abs = (buffer[0] >> 8u) & 0x7u; - uint32_t vdst = (buffer[0] >> 0u) & 0xffu; - uint32_t sdst = (buffer[0] >> 8u) & 0x7fu; - uint32_t neg = (buffer[1] >> 29u) & 0x7u; - uint32_t omod = (buffer[1] >> 27u) & 0x3u; - uint32_t src0 = (buffer[1] >> 0u) & 0x1ffu; - uint32_t src1 = (buffer[1] >> 9u) & 0x1ffu; - uint32_t src2 = (buffer[1] >> 18u) & 0x1ffu; - - // EXIT_NOT_IMPLEMENTED(abs != 0); - // EXIT_NOT_IMPLEMENTED(sdst != 0); - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(src0); - inst.src[1] = operand_parse(src1); - inst.src[2] = operand_parse(src2); - inst.src_num = 3; - inst.dst = operand_parse(vdst + 256); - - switch (omod) - { - case 0: inst.dst.multiplier = 1.0f; break; - case 1: inst.dst.multiplier = 2.0f; break; - case 2: inst.dst.multiplier = 4.0f; break; - case 3: inst.dst.multiplier = 0.5f; break; - default: break; - } - - if ((neg & 0x1u) != 0) - { - inst.src[0].negate = true; - } - if ((neg & 0x2u) != 0) - { - inst.src[1].negate = true; - } - if ((neg & 0x4u) != 0) - { - inst.src[2].negate = true; - } - - uint32_t size = 2; - - if (inst.src[0].type == ShaderOperandType::LiteralConstant) - { - inst.src[0].constant.u = buffer[size]; - size++; - } - - if (inst.src[1].type == ShaderOperandType::LiteralConstant) - { - inst.src[1].constant.u = buffer[size]; - size++; - } - - if (inst.src[2].type == ShaderOperandType::LiteralConstant) - { - inst.src[2].constant.u = buffer[size]; - size++; - } - - inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2; - - if (opcode >= 0 && opcode <= 0xff) - { - /* VOPC using VOP3 encoding */ - inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1; - inst.src_num = 2; - inst.dst = operand_parse(vdst); - inst.dst.size = 2; - } - - if (opcode >= 0x100 && opcode <= 0x13d) - { - /* VOP2 using VOP3 encoding */ - inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; - inst.src_num = 2; - } - - if (opcode >= 0x180 && opcode <= 0x1e8) - { - /* VOP1 using VOP3 encoding */ - inst.format = ShaderInstructionFormat::SVdstSVsrc0; - inst.src_num = 1; - } - - switch (opcode) - { - /* VOPC using VOP3 encoding */ - case 0x00: inst.type = ShaderInstructionType::VCmpFF32; break; - case 0x01: inst.type = ShaderInstructionType::VCmpLtF32; break; - case 0x02: inst.type = ShaderInstructionType::VCmpEqF32; break; - case 0x03: inst.type = ShaderInstructionType::VCmpLeF32; break; - case 0x04: inst.type = ShaderInstructionType::VCmpGtF32; break; - case 0x05: inst.type = ShaderInstructionType::VCmpLgF32; break; - case 0x06: inst.type = ShaderInstructionType::VCmpGeF32; break; - case 0x07: inst.type = ShaderInstructionType::VCmpOF32; break; - case 0x08: inst.type = ShaderInstructionType::VCmpUF32; break; - case 0x09: inst.type = ShaderInstructionType::VCmpNgeF32; break; - case 0x0a: inst.type = ShaderInstructionType::VCmpNlgF32; break; - case 0x0b: inst.type = ShaderInstructionType::VCmpNgtF32; break; - case 0x0c: inst.type = ShaderInstructionType::VCmpNleF32; break; - case 0x0d: inst.type = ShaderInstructionType::VCmpNeqF32; break; - case 0x0e: inst.type = ShaderInstructionType::VCmpNltF32; break; - case 0x0f: inst.type = ShaderInstructionType::VCmpTruF32; break; - case 0x1d: inst.type = ShaderInstructionType::VCmpxNeqF32; break; - case 0x80: inst.type = ShaderInstructionType::VCmpFI32; break; - case 0x81: inst.type = ShaderInstructionType::VCmpLtI32; break; - case 0x82: inst.type = ShaderInstructionType::VCmpEqI32; break; - case 0x83: inst.type = ShaderInstructionType::VCmpLeI32; break; - case 0x84: inst.type = ShaderInstructionType::VCmpGtI32; break; - case 0x85: inst.type = ShaderInstructionType::VCmpNeI32; break; - case 0x86: inst.type = ShaderInstructionType::VCmpGeI32; break; - case 0x87: inst.type = ShaderInstructionType::VCmpTI32; break; - case 0xc0: inst.type = ShaderInstructionType::VCmpFU32; break; - case 0xc1: inst.type = ShaderInstructionType::VCmpLtU32; break; - case 0xc2: inst.type = ShaderInstructionType::VCmpEqU32; break; - case 0xc3: inst.type = ShaderInstructionType::VCmpLeU32; break; - case 0xc4: inst.type = ShaderInstructionType::VCmpGtU32; break; - case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break; - case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break; - case 0xc7: inst.type = ShaderInstructionType::VCmpTU32; break; - case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break; - case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break; - case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break; - case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break; - - /* VOP2 using VOP3 encoding */ - case 0x100: - inst.type = ShaderInstructionType::VCndmaskB32; - inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; - inst.src_num = 3; - inst.src[2].size = 2; - break; - case 0x103: inst.type = ShaderInstructionType::VAddF32; break; - case 0x104: inst.type = ShaderInstructionType::VSubF32; break; - case 0x105: inst.type = ShaderInstructionType::VSubrevF32; break; - case 0x108: inst.type = ShaderInstructionType::VMulF32; break; - case 0x10b: inst.type = ShaderInstructionType::VMulU32U24; break; - case 0x10f: inst.type = ShaderInstructionType::VMinF32; break; - case 0x110: inst.type = ShaderInstructionType::VMaxF32; break; - case 0x115: inst.type = ShaderInstructionType::VLshrB32; break; - case 0x116: inst.type = ShaderInstructionType::VLshrrevB32; break; - case 0x117: inst.type = ShaderInstructionType::VAshrI32; break; - case 0x118: inst.type = ShaderInstructionType::VAshrrevI32; break; - case 0x119: inst.type = ShaderInstructionType::VLshlB32; break; - case 0x11a: inst.type = ShaderInstructionType::VLshlrevB32; break; - case 0x11b: inst.type = ShaderInstructionType::VAndB32; break; - case 0x11c: inst.type = ShaderInstructionType::VOrB32; break; - case 0x11d: inst.type = ShaderInstructionType::VXorB32; break; - case 0x11e: inst.type = ShaderInstructionType::VBfmB32; break; - case 0x11f: inst.type = ShaderInstructionType::VMacF32; break; - case 0x122: inst.type = ShaderInstructionType::VBcntU32B32; break; - case 0x123: inst.type = ShaderInstructionType::VMbcntLoU32B32; break; - case 0x124: inst.type = ShaderInstructionType::VMbcntHiU32B32; break; - case 0x125: - inst.type = ShaderInstructionType::VAddI32; - inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; - inst.dst2 = operand_parse(sdst); - inst.dst2.size = 2; - break; - case 0x126: - inst.type = ShaderInstructionType::VSubI32; - inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; - inst.dst2 = operand_parse(sdst); - inst.dst2.size = 2; - break; - case 0x127: - inst.type = ShaderInstructionType::VSubrevI32; - inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; - inst.dst2 = operand_parse(sdst); - inst.dst2.size = 2; - break; - case 0x12f: inst.type = ShaderInstructionType::VCvtPkrtzF16F32; break; - - /* VOP3 instructions */ - case 0x141: inst.type = ShaderInstructionType::VMadF32; break; - case 0x143: inst.type = ShaderInstructionType::VMadU32U24; break; - case 0x148: inst.type = ShaderInstructionType::VBfeU32; break; - case 0x14b: inst.type = ShaderInstructionType::VFmaF32; break; - case 0x151: inst.type = ShaderInstructionType::VMin3F32; break; - case 0x154: inst.type = ShaderInstructionType::VMax3F32; break; - case 0x157: inst.type = ShaderInstructionType::VMed3F32; break; - case 0x15d: inst.type = ShaderInstructionType::VSadU32; break; - case 0x169: - inst.type = ShaderInstructionType::VMulLoU32; - inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; - inst.src_num = 2; - break; - case 0x16a: - inst.type = ShaderInstructionType::VMulHiU32; - inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; - inst.src_num = 2; - break; - case 0x16b: - inst.type = ShaderInstructionType::VMulLoI32; - inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; - inst.src_num = 2; - break; - - /* VOP1 using VOP3 encoding */ - case 0x1a0: inst.type = ShaderInstructionType::VFractF32; break; - case 0x1a1: inst.type = ShaderInstructionType::VTruncF32; break; - case 0x1a2: inst.type = ShaderInstructionType::VCeilF32; break; - case 0x1a3: inst.type = ShaderInstructionType::VRndneF32; break; - case 0x1a4: inst.type = ShaderInstructionType::VFloorF32; break; - case 0x1a5: inst.type = ShaderInstructionType::VExpF32; break; - case 0x1a7: inst.type = ShaderInstructionType::VLogF32; break; - case 0x1aa: inst.type = ShaderInstructionType::VRcpF32; break; - case 0x1ae: inst.type = ShaderInstructionType::VRsqF32; break; - case 0x1b3: inst.type = ShaderInstructionType::VSqrtF32; break; - case 0x1b5: inst.type = ShaderInstructionType::VSinF32; break; - case 0x1b6: inst.type = ShaderInstructionType::VCosF32; break; - - default: - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown vop3 opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", opcode, pc, dst->GetHash0()); - } - - if (inst.dst2.type == ShaderOperandType::Unknown) - { - if ((abs & 0x1u) != 0) - { - inst.src[0].absolute = true; - } - if ((abs & 0x2u) != 0) - { - inst.src[1].absolute = true; - } - if ((abs & 0x4u) != 0) - { - inst.src[2].absolute = true; - } - - inst.dst.clamp = (clamp != 0); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_exp) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t vm = (buffer[0] >> 12u) & 0x1u; - uint32_t done = (buffer[0] >> 11u) & 0x1u; - uint32_t compr = (buffer[0] >> 10u) & 0x1u; - uint32_t target = (buffer[0] >> 4u) & 0x3fu; - uint32_t en = (buffer[0] >> 0u) & 0xfu; - - uint32_t vsrc0 = (buffer[1] >> 0u) & 0xffu; - uint32_t vsrc1 = (buffer[1] >> 8u) & 0xffu; - uint32_t vsrc2 = (buffer[1] >> 16u) & 0xffu; - uint32_t vsrc3 = (buffer[1] >> 24u) & 0xffu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(vsrc0 + 256); - inst.src[1] = operand_parse(vsrc1 + 256); - inst.src[2] = operand_parse(vsrc2 + 256); - inst.src[3] = operand_parse(vsrc3 + 256); - inst.src_num = 4; - - inst.type = ShaderInstructionType::Exp; - - switch (target) - { - case 0x00: - if (done != 0 && compr != 0 && vm != 0 && en == 0x0) - { - inst.format = ShaderInstructionFormat::Mrt0OffOffComprVmDone; - inst.src_num = 0; - } else if (done != 0 && compr != 0 && vm != 0 && en == 0xf) - { - inst.format = ShaderInstructionFormat::Mrt0Vsrc0Vsrc1ComprVmDone; - inst.src_num = 2; - } else if (done != 0 && compr == 0 && vm != 0 && en == 0xf) - { - inst.format = ShaderInstructionFormat::Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone; - }; - break; - case 0x0c: - if (done != 0 && en == 0xf) - { - inst.format = ShaderInstructionFormat::Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done; - }; - break; - default: break; - } - - if (inst.format == ShaderInstructionFormat::Unknown && done == 0 && compr == 0 && vm == 0 && en == 0xf) - { - switch (target) - { - case 0x20: inst.format = ShaderInstructionFormat::Param0Vsrc0Vsrc1Vsrc2Vsrc3; break; - case 0x21: inst.format = ShaderInstructionFormat::Param1Vsrc0Vsrc1Vsrc2Vsrc3; break; - case 0x22: inst.format = ShaderInstructionFormat::Param2Vsrc0Vsrc1Vsrc2Vsrc3; break; - case 0x23: inst.format = ShaderInstructionFormat::Param3Vsrc0Vsrc1Vsrc2Vsrc3; break; - case 0x24: inst.format = ShaderInstructionFormat::Param4Vsrc0Vsrc1Vsrc2Vsrc3; break; - default: break; - } - } - - if (inst.format == ShaderInstructionFormat::Unknown) - { - printf("%s", dst->DbgDump().C_Str()); - EXIT("%s\n" - "unknown exp target: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", - dst->DbgDump().C_Str(), target, pc, dst->GetHash0()); - } - - dst->GetInstructions().Add(inst); - - return 2; -} - -KYTY_SHADER_PARSER(shader_parse_smrd) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 22u) & 0x1fu; - uint32_t sdst = (buffer[0] >> 15u) & 0x7fu; - uint32_t sbase = (buffer[0] >> 9u) & 0x3fu; - uint32_t imm = (buffer[0] >> 8u) & 0x1u; - uint32_t offset = (buffer[0] >> 0u) & 0xffu; - - ShaderInstruction inst; - inst.pc = pc; - inst.dst = operand_parse(sdst); - inst.src_num = 2; - inst.src[0] = operand_parse(sbase * 2); - - uint32_t size = 1; - - if (imm == 1) - { - inst.src[1].type = ShaderOperandType::LiteralConstant; - inst.src[1].constant.u = offset << 2u; - } else - { - inst.src[1] = operand_parse(offset); - - if (inst.src[1].type == ShaderOperandType::LiteralConstant) - { - inst.src[1].constant.u = buffer[size]; - size++; - } - } - - switch (opcode) - { - case 0x02: - inst.type = ShaderInstructionType::SLoadDwordx4; - inst.format = ShaderInstructionFormat::Sdst4SbaseSoffset; - inst.src[0].size = 2; - inst.dst.size = 4; - break; - case 0x03: - inst.type = ShaderInstructionType::SLoadDwordx8; - inst.format = ShaderInstructionFormat::Sdst8SbaseSoffset; - inst.src[0].size = 2; - inst.dst.size = 8; - break; - case 0x08: - inst.type = ShaderInstructionType::SBufferLoadDword; - inst.format = ShaderInstructionFormat::SdstSvSoffset; - inst.src[0].size = 4; - break; - case 0x09: - inst.type = ShaderInstructionType::SBufferLoadDwordx2; - inst.format = ShaderInstructionFormat::Sdst2SvSoffset; - inst.src[0].size = 4; - inst.dst.size = 2; - break; - case 0x0a: - inst.type = ShaderInstructionType::SBufferLoadDwordx4; - inst.format = ShaderInstructionFormat::Sdst4SvSoffset; - inst.src[0].size = 4; - inst.dst.size = 4; - break; - case 0x0b: - inst.type = ShaderInstructionType::SBufferLoadDwordx8; - inst.format = ShaderInstructionFormat::Sdst8SvSoffset; - inst.src[0].size = 4; - inst.dst.size = 8; - break; - case 0x0c: - inst.type = ShaderInstructionType::SBufferLoadDwordx16; - inst.format = ShaderInstructionFormat::Sdst16SvSoffset; - inst.src[0].size = 4; - inst.dst.size = 16; - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown smrd opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_mubuf) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 18u) & 0x1fu; - uint32_t lds = (buffer[0] >> 16u) & 0x1u; - uint32_t glc = (buffer[0] >> 14u) & 0x1u; - uint32_t idxen = (buffer[0] >> 13u) & 0x1u; - uint32_t offen = (buffer[0] >> 12u) & 0x1u; - uint32_t offset = (buffer[0] >> 0u) & 0xfffu; - - uint32_t soffset = (buffer[1] >> 24u) & 0xffu; - uint32_t tfe = (buffer[1] >> 23u) & 0x1u; - uint32_t slc = (buffer[1] >> 22u) & 0x1u; - uint32_t srsrc = (buffer[1] >> 16u) & 0x1fu; - uint32_t vdata = (buffer[1] >> 8u) & 0xffu; - uint32_t vaddr = (buffer[1] >> 0u) & 0xffu; - - EXIT_NOT_IMPLEMENTED(idxen == 0); - EXIT_NOT_IMPLEMENTED(offen == 1); - EXIT_NOT_IMPLEMENTED(offset != 0); - EXIT_NOT_IMPLEMENTED(glc == 1); - EXIT_NOT_IMPLEMENTED(slc == 1); - EXIT_NOT_IMPLEMENTED(lds == 1); - EXIT_NOT_IMPLEMENTED(tfe == 1); - - uint32_t size = 2; - - ShaderInstruction inst; - inst.pc = pc; - inst.dst = operand_parse(vdata + 256); - inst.src_num = 3; - inst.src[0] = operand_parse(vaddr + 256); - inst.src[1] = operand_parse(srsrc * 4); - inst.src[2] = operand_parse(soffset); - - if (inst.src[2].type == ShaderOperandType::LiteralConstant) - { - inst.src[2].constant.u = buffer[size]; - size++; - } - - switch (opcode) - { - case 0x00: - inst.type = ShaderInstructionType::BufferLoadFormatX; - inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; - inst.src[1].size = 4; - break; - case 0x01: - inst.type = ShaderInstructionType::BufferLoadFormatXy; - inst.format = ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen; - inst.src[1].size = 4; - inst.dst.size = 2; - break; - case 0x02: - inst.type = ShaderInstructionType::BufferLoadFormatXyz; - inst.format = ShaderInstructionFormat::Vdata3VaddrSvSoffsIdxen; - inst.src[1].size = 4; - inst.dst.size = 3; - break; - case 0x03: - inst.type = ShaderInstructionType::BufferLoadFormatXyzw; - inst.format = ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxen; - inst.src[1].size = 4; - inst.dst.size = 4; - break; - case 0x04: - inst.type = ShaderInstructionType::BufferStoreFormatX; - inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; - inst.src[1].size = 4; - break; - case 0x05: - inst.type = ShaderInstructionType::BufferStoreFormatXy; - inst.format = ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen; - inst.src[1].size = 4; - inst.dst.size = 2; - break; - case 0x0c: - inst.type = ShaderInstructionType::BufferLoadDword; - inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; - inst.src[1].size = 4; - break; - case 0x1c: - inst.type = ShaderInstructionType::BufferStoreDword; - inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; - inst.src[1].size = 4; - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown mubuf opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_ds) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 18u) & 0xffu; - uint32_t gds = (buffer[0] >> 17u) & 0x1u; - uint32_t offset0 = (buffer[0] >> 0u) & 0xffu; - uint32_t offset1 = (buffer[0] >> 8u) & 0xffu; - - uint32_t vdst = (buffer[1] >> 24u) & 0xffu; - uint32_t data1 = (buffer[1] >> 16u) & 0xffu; - uint32_t data0 = (buffer[1] >> 8u) & 0xffu; - uint32_t addr = (buffer[1] >> 0u) & 0xffu; - - EXIT_NOT_IMPLEMENTED(addr != 0); - EXIT_NOT_IMPLEMENTED(data0 != 0); - EXIT_NOT_IMPLEMENTED(data1 != 0); - EXIT_NOT_IMPLEMENTED(offset0 != 0); - EXIT_NOT_IMPLEMENTED(offset1 != 0); - EXIT_NOT_IMPLEMENTED(gds == 0); - - uint32_t size = 2; - - ShaderInstruction inst; - inst.pc = pc; - inst.dst = operand_parse(vdst + 256); - inst.src_num = 0; - - switch (opcode) // NOLINT - { - case 0x3d: - inst.type = ShaderInstructionType::DsConsume; - inst.format = ShaderInstructionFormat::VdstGds; - break; - case 0x3e: - inst.type = ShaderInstructionType::DsAppend; - inst.format = ShaderInstructionFormat::VdstGds; - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown ds opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_mimg) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t slc = (buffer[0] >> 25u) & 0x1u; - uint32_t opcode = (buffer[0] >> 18u) & 0x7fu; - uint32_t lwe = (buffer[0] >> 17u) & 0x1u; - uint32_t tff = (buffer[0] >> 16u) & 0x1u; - uint32_t r128 = (buffer[0] >> 15u) & 0x1u; - uint32_t da = (buffer[0] >> 14u) & 0x1u; - uint32_t glc = (buffer[0] >> 13u) & 0x1u; - uint32_t unrm = (buffer[0] >> 12u) & 0x1u; - uint32_t dmask = (buffer[0] >> 8u) & 0xfu; - - uint32_t ssamp = (buffer[1] >> 21u) & 0x1fu; // S# - uint32_t srsrc = (buffer[1] >> 16u) & 0x1fu; // T# - uint32_t vdata = (buffer[1] >> 8u) & 0xffu; - uint32_t vaddr = (buffer[1] >> 0u) & 0xffu; - - EXIT_NOT_IMPLEMENTED(da == 1); - EXIT_NOT_IMPLEMENTED(r128 == 1); - EXIT_NOT_IMPLEMENTED(tff == 1); - EXIT_NOT_IMPLEMENTED(lwe == 1); - EXIT_NOT_IMPLEMENTED(glc == 1); - EXIT_NOT_IMPLEMENTED(slc == 1); - EXIT_NOT_IMPLEMENTED(unrm == 1); - // EXIT_NOT_IMPLEMENTED(dmask != 0xf && dmask != 0x7); - - uint32_t size = 2; - - ShaderInstruction inst; - inst.pc = pc; - inst.dst = operand_parse(vdata + 256); - inst.src_num = 3; - inst.src[0] = operand_parse(vaddr + 256); - inst.src[1] = operand_parse(srsrc * 4); - inst.src[2] = operand_parse(ssamp * 4); - - switch (opcode) - { - case 0x00: - inst.type = ShaderInstructionType::ImageLoad; - inst.src[0].size = 3; - inst.src[1].size = 8; - inst.src_num = 2; - if (dmask == 0xf) - { - inst.format = ShaderInstructionFormat::Vdata4Vaddr3StDmaskF; - inst.dst.size = 4; - } - break; - case 0x08: - inst.type = ShaderInstructionType::ImageStore; - inst.src[0].size = 3; - inst.src[1].size = 8; - inst.src_num = 2; - if (dmask == 0xf) - { - inst.format = ShaderInstructionFormat::Vdata4Vaddr3StDmaskF; - inst.dst.size = 4; - } - break; - case 0x09: - inst.type = ShaderInstructionType::ImageStoreMip; - inst.src[0].size = 4; - inst.src[1].size = 8; - inst.src_num = 2; - if (dmask == 0xf) - { - inst.format = ShaderInstructionFormat::Vdata4Vaddr4StDmaskF; - inst.dst.size = 4; - } - break; - case 0x20: - inst.type = ShaderInstructionType::ImageSample; - inst.src[0].size = 3; - inst.src[1].size = 8; - inst.src[2].size = 4; - switch (dmask) - { - case 0x1: - { - inst.format = ShaderInstructionFormat::Vdata1Vaddr3StSsDmask1; - inst.dst.size = 1; - break; - } - case 0x3: - { - inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask3; - inst.dst.size = 2; - break; - } - case 0x5: - { - inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask5; - inst.dst.size = 2; - break; - } - case 0x7: - { - inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7; - inst.dst.size = 3; - break; - } - case 0x8: - { - inst.format = ShaderInstructionFormat::Vdata1Vaddr3StSsDmask8; - inst.dst.size = 1; - break; - } - case 0x9: - { - inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask9; - inst.dst.size = 2; - break; - } - case 0xf: - { - inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF; - inst.dst.size = 4; - break; - } - default:; - } - break; - case 0x27: - inst.type = ShaderInstructionType::ImageSampleLz; - inst.src[0].size = 3; - inst.src[1].size = 8; - inst.src[2].size = 4; - switch (dmask) // NOLINT - { - case 0x7: - { - inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7; - inst.dst.size = 3; - break; - } - default:; - } - break; - case 0x37: - inst.type = ShaderInstructionType::ImageSampleLzO; - inst.src[0].size = 4; - inst.src[1].size = 8; - inst.src[2].size = 4; - switch (dmask) // NOLINT - { - case 0x7: - { - inst.format = ShaderInstructionFormat::Vdata3Vaddr4StSsDmask7; - inst.dst.size = 3; - break; - } - default:; - } - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown mimg opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - if (inst.format == ShaderInstructionFormat::Unknown) - { - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown mimg format for opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 ", dmask: 0x%" PRIx32 "\n", opcode, pc, dmask); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_mtbuf) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 16u) & 0x7u; - uint32_t dfmt = (buffer[0] >> 19u) & 0xfu; - uint32_t nfmt = (buffer[0] >> 23u) & 0x7u; - uint32_t glc = (buffer[0] >> 14u) & 0x1u; - uint32_t idxen = (buffer[0] >> 13u) & 0x1u; - uint32_t offen = (buffer[0] >> 12u) & 0x1u; - uint32_t offset = (buffer[0] >> 0u) & 0xfffu; - - uint32_t soffset = (buffer[1] >> 24u) & 0xffu; - uint32_t tfe = (buffer[1] >> 23u) & 0x1u; - uint32_t slc = (buffer[1] >> 22u) & 0x1u; - uint32_t srsrc = (buffer[1] >> 16u) & 0x1fu; - uint32_t vdata = (buffer[1] >> 8u) & 0xffu; - uint32_t vaddr = (buffer[1] >> 0u) & 0xffu; - - EXIT_NOT_IMPLEMENTED(idxen == 0); - // EXIT_NOT_IMPLEMENTED(offen == 1); - EXIT_NOT_IMPLEMENTED(offset != 0); - EXIT_NOT_IMPLEMENTED(glc == 1); - EXIT_NOT_IMPLEMENTED(slc == 1); - EXIT_NOT_IMPLEMENTED(tfe == 1); - // EXIT_NOT_IMPLEMENTED(dfmt != 14); - // EXIT_NOT_IMPLEMENTED(nfmt != 7); - - if ((dfmt != 14 && dfmt != 4) || nfmt != 7) - { - EXIT("unknown format: dfmt = %d, nfmt = %d at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ")\n", dfmt, nfmt, pc, dst->GetHash0()); - } - - uint32_t size = 2; - - ShaderInstruction inst; - inst.pc = pc; - inst.dst = operand_parse(vdata + 256); - inst.src_num = 3; - inst.src[0] = operand_parse(vaddr + 256); - inst.src[1] = operand_parse(srsrc * 4); - inst.src[2] = operand_parse(soffset); - - if (inst.src[2].type == ShaderOperandType::LiteralConstant) - { - inst.src[2].constant.u = buffer[size]; - size++; - } - - inst.src[1].size = 4; - - switch (opcode) - { - case 0x00: - inst.type = ShaderInstructionType::TBufferLoadFormatX; - inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxenFloat1; - EXIT_NOT_IMPLEMENTED(offen == 1); - EXIT_NOT_IMPLEMENTED(!(dfmt == 4 && nfmt == 7)); - break; - - case 0x03: - inst.type = ShaderInstructionType::TBufferLoadFormatXyzw; - inst.format = (offen == 1 ? ShaderInstructionFormat::Vdata4Vaddr2SvSoffsOffenIdxenFloat4 - : ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxenFloat4); - inst.src[0].size += static_cast(offen); - inst.dst.size = 4; - EXIT_NOT_IMPLEMENTED(!(dfmt == 14 && nfmt == 7)); - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown mtbuf opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return size; -} - -KYTY_SHADER_PARSER(shader_parse_vintrp) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer == nullptr || buffer < src); - - uint32_t opcode = (buffer[0] >> 16u) & 0x3u; - uint32_t vdst = (buffer[0] >> 18u) & 0xffu; - uint32_t attr = (buffer[0] >> 10u) & 0x3fu; - uint32_t chan = (buffer[0] >> 8u) & 0x3u; - uint32_t vsrc = (buffer[0] >> 0u) & 0xffu; - - ShaderInstruction inst; - inst.pc = pc; - inst.src[0] = operand_parse(vsrc + 256); - inst.dst = operand_parse(vdst + 256); - inst.src[1].type = ShaderOperandType::IntegerInlineConstant; - inst.src[1].constant.u = attr; - inst.src[2].type = ShaderOperandType::IntegerInlineConstant; - inst.src[2].constant.u = chan; - inst.src_num = 3; - - switch (opcode) - { - case 0x0: - inst.type = ShaderInstructionType::VInterpP1F32; - inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; - break; - case 0x1: - inst.type = ShaderInstructionType::VInterpP2F32; - inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; - break; - default: printf("%s", dst->DbgDump().C_Str()); EXIT("unknown vintrp opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 "\n", opcode, pc); - } - - dst->GetInstructions().Add(inst); - - return 1; -} - -KYTY_SHADER_PARSER(shader_parse) -{ - EXIT_IF(dst == nullptr); - EXIT_IF(src == nullptr); - EXIT_IF(buffer != nullptr); - - auto type = dst->GetType(); - - dst->GetInstructions().Clear(); - dst->GetLabels().Clear(); - - const auto* ptr = src + pc / 4; - for (;;) - { - auto instruction = ptr[0]; - auto pc = 4 * static_cast(ptr - src); - - if ((instruction & 0xF8000000u) == 0xC0000000) - { - ptr += shader_parse_smrd(pc, src, ptr, dst); - } else if ((instruction & 0xC0000000u) == 0x80000000) - { - ptr += shader_parse_sop2(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xE0000000) - { - ptr += shader_parse_mubuf(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xE8000000) - { - ptr += shader_parse_mtbuf(pc, src, ptr, dst); - } else if ((instruction & 0x80000000u) == 0x00000000) - { - ptr += shader_parse_vop2(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xD0000000) - { - ptr += shader_parse_vop3(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xF8000000) - { - ptr += shader_parse_exp(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xC8000000) - { - ptr += shader_parse_vintrp(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xF0000000) - { - ptr += shader_parse_mimg(pc, src, ptr, dst); - } else if ((instruction & 0xFC000000u) == 0xD8000000) - { - ptr += shader_parse_ds(pc, src, ptr, dst); - } else - { - printf("%s", dst->DbgDump().C_Str()); - EXIT("unknown code 0x%08" PRIx32 " at addr 0x%08" PRIx32 "\n", ptr[0], pc); - } - - if ((instruction == 0xBF810000 && (type == ShaderType::Vertex || type == ShaderType::Pixel || type == ShaderType::Compute) && - !dst->GetLabels().Contains(4 * static_cast(ptr - src), [](auto label, auto pc) { return label.GetDst() == pc; })) || - (instruction == 0xBE802000 && type == ShaderType::Fetch)) - { - break; - } - } - - return ptr - src; -} - static void vs_print(const char* func, const HW::VertexShaderInfo& vs, const HW::ShaderRegisters& sh) { printf("%s\n", func); @@ -2272,11 +717,11 @@ static void vs_check(const HW::VertexShaderInfo& vs, const HW::ShaderRegisters& EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.cu_group_enable != false); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.require_forward_progress != false); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.lds_configuration != false); - EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.gs_vgpr_component_count != 0); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.gs_vgpr_component_count != 3); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc1.fp16_overflow != false); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.scratch_en != false); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.offchip_lds != false); - EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.es_vgpr_component_count != 0); + EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.es_vgpr_component_count != 3); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.lds_size != 0); EXIT_NOT_IMPLEMENTED(vs.gs_regs.rsrc2.shared_vgprs != 0); } @@ -2287,11 +732,11 @@ static void vs_check(const HW::VertexShaderInfo& vs, const HW::ShaderRegisters& EXIT_NOT_IMPLEMENTED(sh.m_spiShaderIdxFormat != 0x00000000 && sh.m_spiShaderIdxFormat != 0x00000001); EXIT_NOT_IMPLEMENTED(sh.m_geNggSubgrpCntl != 0x00000000 && sh.m_geNggSubgrpCntl != 0x00000001); EXIT_NOT_IMPLEMENTED(sh.m_vgtGsInstanceCnt != 0x00000000); - EXIT_NOT_IMPLEMENTED(sh.GetEsVertsPerSubgrp() != 0x00000000); - EXIT_NOT_IMPLEMENTED(sh.GetGsPrimsPerSubgrp() != 0x00000000); - EXIT_NOT_IMPLEMENTED(sh.GetGsInstPrimsInSubgrp() != 0x00000000); - EXIT_NOT_IMPLEMENTED(sh.m_geMaxOutputPerSubgroup != 0x00000000); - EXIT_NOT_IMPLEMENTED(sh.m_vgtEsgsRingItemsize != 0x00000000); + EXIT_NOT_IMPLEMENTED(sh.GetEsVertsPerSubgrp() != 0x00000000 && sh.GetEsVertsPerSubgrp() != 0x00000040); + EXIT_NOT_IMPLEMENTED(sh.GetGsPrimsPerSubgrp() != 0x00000000 && sh.GetGsPrimsPerSubgrp() != 0x00000040); + EXIT_NOT_IMPLEMENTED(sh.GetGsInstPrimsInSubgrp() != 0x00000000 && sh.GetGsInstPrimsInSubgrp() != 0x00000040); + EXIT_NOT_IMPLEMENTED(sh.m_geMaxOutputPerSubgroup != 0x00000000 && sh.m_geMaxOutputPerSubgroup != 0x00000040); + EXIT_NOT_IMPLEMENTED(sh.m_vgtEsgsRingItemsize != 0x00000000 && sh.m_vgtEsgsRingItemsize != 0x00000004); EXIT_NOT_IMPLEMENTED(sh.m_vgtGsMaxVertOut != 0x00000000); EXIT_NOT_IMPLEMENTED(sh.m_vgtGsOutPrimType != 0x00000000); } @@ -2327,7 +772,7 @@ static void ps_check(const HW::PsStageRegisters& ps, const HW::ShaderRegisters& EXIT_NOT_IMPLEMENTED(sh.ps_input_ena != 0x00000002 && sh.ps_input_ena != 0x00000302); EXIT_NOT_IMPLEMENTED(sh.ps_input_addr != 0x00000002 && sh.ps_input_addr != 0x00000302); // EXIT_NOT_IMPLEMENTED(ps.m_spiPsInControl != 0x00000000); - EXIT_NOT_IMPLEMENTED(sh.baryc_cntl != 0x00000000); + EXIT_NOT_IMPLEMENTED(sh.baryc_cntl != 0x00000000 && sh.baryc_cntl != 0x01000000); EXIT_NOT_IMPLEMENTED(sh.m_cbShaderMask != 0x0000000f); EXIT_NOT_IMPLEMENTED(sh.db_shader_control.other_bits != 0x00000000); @@ -2358,7 +803,7 @@ static void cs_check(const HW::CsStageRegisters& cs, const HW::ShaderRegisters& // EXIT_NOT_IMPLEMENTED(cs.m_computeNumThreadZ != 0x00000001); } -static bool SpirvDisassemble(const uint32_t* src_binary, size_t src_binary_size, String* dst_disassembly) +static bool SpirvDisassemble(const uint32_t* src_binary, size_t src_binary_size, String8* dst_disassembly) { if (dst_disassembly != nullptr) { @@ -2383,7 +828,7 @@ static bool SpirvDisassemble(const uint32_t* src_binary, size_t src_binary_size, return true; } -static bool SpirvToGlsl(const uint32_t* /*src_binary*/, size_t /*src_binary_size*/, String* /*dst_code*/) +static bool SpirvToGlsl(const uint32_t* /*src_binary*/, size_t /*src_binary_size*/, String8* /*dst_code*/) { // if (dst_code != nullptr) // { @@ -2396,7 +841,7 @@ static bool SpirvToGlsl(const uint32_t* /*src_binary*/, size_t /*src_binary_size return true; } -static bool SpirvRun(const String& src, Vector* dst, String* err_msg) +static bool SpirvRun(const String8& src, Vector* dst, String8* err_msg) { EXIT_IF(dst == nullptr); EXIT_IF(err_msg == nullptr); @@ -2405,39 +850,38 @@ static bool SpirvRun(const String& src, Vector* dst, String* err_msg) spvtools::Optimizer opt(SPV_ENV_VULKAN_1_2); spv_position_t error_position {}; - String error_msg; + String8 error_msg; auto print_msg_to_stderr = [&error_position, &error_msg](spv_message_level_t /* level */, const char* /*source*/, [[maybe_unused]] const spv_position_t& position, const char* m) { // printf("%s\n", source); - error_msg = String::FromPrintf("%d: %d (%d) %s", static_cast(position.line), static_cast(position.column), - static_cast(position.index), m); - printf(FG_BRIGHT_RED "error: %s\n" FG_DEFAULT, error_msg.C_Str()); + error_msg = String8::FromPrintf("%d: %d (%d) %s", static_cast(position.line), static_cast(position.column), + static_cast(position.index), m); + printf(FG_BRIGHT_RED "error: %s\n" FG_DEFAULT, error_msg.c_str()); error_position = position; }; core.SetMessageConsumer(print_msg_to_stderr); opt.SetMessageConsumer(print_msg_to_stderr); - String::Utf8 src_utf8 = src.utf8_str(); - dst->Clear(); std::vector spirv; - if (!core.Assemble(src_utf8.GetDataConst(), src_utf8.Size(), &spirv)) + if (!core.Assemble(src.GetDataConst(), src.Size(), &spirv)) { - printf("Assemble failed at:\n%s\n", src.Mid(src.FindIndex(U'\n', error_position.index - 100), 200).C_Str()); - *err_msg = String::FromPrintf("Assemble failed at:\n%s\n", src.Mid(src.FindIndex(U'\n', error_position.index - 100), 200).C_Str()); + printf("Assemble failed at:\n%s\n", src.Mid(src.FindIndex('\n', error_position.index - 100), 200).c_str()); + *err_msg = String8::FromPrintf("Assemble failed at:\n%s\n", src.Mid(src.FindIndex('\n', error_position.index - 100), 200).c_str()); return false; } if (Config::ShaderValidationEnabled() && !core.Validate(spirv)) { - String disassembly; + String8 disassembly; SpirvDisassemble(spirv.data(), spirv.size(), &disassembly); - printf("%s\n", disassembly.C_Str()); + printf("%s\n", disassembly.c_str()); printf("Validate failed\n"); - *err_msg = String::FromPrintf("%s\n\nValidate failed:\n%s\n", Log::RemoveColors(disassembly).C_Str(), error_msg.C_Str()); + *err_msg = String8::FromPrintf("%s\n\nValidate failed:\n%s\n", Log::RemoveColors(String::FromUtf8(disassembly.c_str())).C_Str(), + error_msg.c_str()); return false; } @@ -2452,7 +896,7 @@ static bool SpirvRun(const String& src, Vector* dst, String* err_msg) if (optimize && !opt.Run(spirv.data(), spirv.size(), &spirv)) { printf("Optimize failed\n"); - *err_msg = String::FromPrintf("Optimize failed\n"); + *err_msg = String8::FromPrintf("Optimize failed\n"); return false; } @@ -2479,21 +923,24 @@ static ShaderUsageInfo GetUsageSlots(const uint32_t* code) const auto* binary_info = GetBinaryInfo(code); - EXIT_NOT_IMPLEMENTED(binary_info == nullptr); - EXIT_NOT_IMPLEMENTED(binary_info->chunk_usage_base_offset_dw == 0); - ShaderUsageInfo ret; - ret.usage_masks = (binary_info->chunk_usage_base_offset_dw == 0 - ? nullptr - : reinterpret_cast(binary_info) - binary_info->chunk_usage_base_offset_dw); - ret.slots_num = binary_info->num_input_usage_slots; - ret.slots = (ret.slots_num == 0 ? nullptr : reinterpret_cast(ret.usage_masks) - ret.slots_num); + if (binary_info != nullptr) + { + EXIT_NOT_IMPLEMENTED(binary_info->chunk_usage_base_offset_dw == 0); + + ret.usage_masks = (binary_info->chunk_usage_base_offset_dw == 0 + ? nullptr + : reinterpret_cast(binary_info) - binary_info->chunk_usage_base_offset_dw); + ret.slots_num = binary_info->num_input_usage_slots; + ret.slots = (ret.slots_num == 0 ? nullptr : reinterpret_cast(ret.usage_masks) - ret.slots_num); + ret.valid = true; + } return ret; } -static void ShaderDetectBuffers(ShaderVertexInputInfo* info) +static void ShaderDetectBuffers(ShaderVertexInputInfo* info, bool ps5) { KYTY_PROFILER_FUNCTION(); @@ -2514,7 +961,7 @@ static void ShaderDetectBuffers(ShaderVertexInputInfo* info) if (stride == r.Stride()) { - uint64_t rbase = r.Base(); + uint64_t rbase = (ps5 ? r.Base48() : r.Base44()); uint64_t base = std::min(rbase, b.addr); uint64_t offset1 = rbase - base; uint64_t offset2 = b.addr - base; @@ -2535,7 +982,7 @@ static void ShaderDetectBuffers(ShaderVertexInputInfo* info) { EXIT_NOT_IMPLEMENTED(info->buffers_num >= ShaderVertexInputInfo::RES_MAX); int bi = info->buffers_num++; - info->buffers[bi].addr = r.Base(); + info->buffers[bi].addr = (ps5 ? r.Base48() : r.Base44()); info->buffers[bi].stride = r.Stride(); info->buffers[bi].num_records = r.NumRecords(); info->buffers[bi].attr_num = 1; @@ -2548,7 +995,8 @@ static void ShaderDetectBuffers(ShaderVertexInputInfo* info) auto& b = info->buffers[bi]; for (int ri = 0; ri < b.attr_num; ri++) { - b.attr_offsets[ri] = info->resources[b.attr_indices[ri]].Base() - b.addr; + b.attr_offsets[ri] = + (ps5 ? info->resources[b.attr_indices[ri]].Base48() : info->resources[b.attr_indices[ri]].Base44()) - b.addr; } } } @@ -2563,11 +1011,12 @@ static void ShaderParseFetch(ShaderVertexInputInfo* info, const uint32_t* fetch, ShaderCode code; code.SetType(ShaderType::Fetch); - shader_parse(0, fetch, nullptr, &code); + // shader_parse(0, fetch, nullptr, &code); + ShaderParse(fetch, &code); KYTY_PROFILER_END_BLOCK; - // printf("%s", code.DbgDump().C_Str()); + // printf("%s", code.DbgDump().c_str()); KYTY_PROFILER_BLOCK("ShaderParseFetch::check_insts"); @@ -2642,6 +1091,52 @@ static void ShaderParseFetch(ShaderVertexInputInfo* info, const uint32_t* fetch, EXIT_NOT_IMPLEMENTED(s_num != v_num); } +static void ShaderParseAttrib(ShaderVertexInputInfo* info, const ShaderSemantic* input_semantics, uint32_t num_input_semantics, + const uint32_t* attrib, const uint32_t* buffer) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_IF(info == nullptr || attrib == nullptr || buffer == nullptr); + + for (uint32_t i = 0; i < num_input_semantics; i++) + { + const auto& in = input_semantics[i]; + + EXIT_NOT_IMPLEMENTED(in.static_vb_index == 1 || in.static_attribute == 1); + + uint32_t reg = in.hardware_mapping; + uint32_t size = in.size_in_elements; + + printf("reg = %u, size = %u, va[%u] = 0x%08" PRIx32 "\n", reg, size, i, attrib[in.semantic]); + + size_t index = attrib[in.semantic] & 0x1fu; + uint32_t format = (attrib[in.semantic] >> 5u) & 0x1ffu; + uint32_t offset = (attrib[in.semantic] >> 14u) & 0xfffu; + uint32_t fetch_index = (attrib[in.semantic] >> 26u) & 0x1u; + + EXIT_NOT_IMPLEMENTED(format != 0); + EXIT_NOT_IMPLEMENTED(offset != 0); + EXIT_NOT_IMPLEMENTED(fetch_index != 0); + + EXIT_NOT_IMPLEMENTED(index >= ShaderVertexInputInfo::RES_MAX); + + const auto* sharp = &buffer[index * 4]; + + EXIT_NOT_IMPLEMENTED(info->resources_num >= ShaderVertexInputInfo::RES_MAX); + + auto& r = info->resources[info->resources_num]; + auto& rd = info->resources_dst[info->resources_num]; + rd.register_start = static_cast(reg); + rd.registers_num = static_cast(size); + r.fields[0] = sharp[0]; + r.fields[1] = sharp[1]; + r.fields[2] = sharp[2]; + r.fields[3] = sharp[3]; + + info->resources_num++; + } +} + static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index, int slot, ShaderStorageUsage usage, const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer) { @@ -2796,6 +1291,8 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s void ShaderCalcBindingIndices(ShaderBindResources* bind) { + KYTY_PROFILER_FUNCTION(); + int binding_index = 0; bind->push_constant_size = 0; @@ -2829,6 +1326,423 @@ void ShaderCalcBindingIndices(ShaderBindResources* bind) EXIT_IF((bind->push_constant_size % 16) != 0); } +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_IF(bind == nullptr); + EXIT_IF(info == nullptr); + + const auto* src = reinterpret_cast(addr); + + auto usages = GetUsageSlots(src); + + EXIT_NOT_IMPLEMENTED(!usages.valid); + + info->fetch = false; + info->fetch_reg = 0; + info->vertex_buffer = false; + info->vertex_buffer_reg = 0; + info->storage_buffers_readonly = 0; + info->storage_buffers_constant = 0; + info->storage_buffers_readwrite = 0; + info->textures2D_readonly = 0; + info->textures2D_readwrite = 0; + info->extended_buffer = false; + info->samplers = 0; + info->gds_pointers = 0; + + uint32_t* extended_buffer = nullptr; + + for (int i = 0; i < usages.slots_num; i++) + { + const auto& usage = usages.slots[i]; + switch (usage.type) + { + case 0x00: + EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); + if (usage.flags == 0) + { + ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly, + user_sgpr, extended_buffer); + info->storage_buffers_readonly++; + } else if (usage.flags == 3) + { + ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly, user_sgpr, + extended_buffer); + info->textures2D_readonly++; + EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9); + } + break; + + case 0x01: + EXIT_NOT_IMPLEMENTED(usage.flags != 0); + ShaderGetSampler(&bind->samplers, usage.start_register, usage.slot, user_sgpr, extended_buffer); + info->samplers++; + break; + + case 0x02: + EXIT_NOT_IMPLEMENTED(usage.flags != 0); + ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant, user_sgpr, + extended_buffer); + info->storage_buffers_constant++; + break; + + case 0x04: + EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); + if (usage.flags == 0) + { + ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadWrite, + user_sgpr, extended_buffer); + info->storage_buffers_readwrite++; + } else if (usage.flags == 3) + { + ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite, user_sgpr, + extended_buffer); + info->textures2D_readwrite++; + EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9); + } + break; + + case 0x07: + EXIT_NOT_IMPLEMENTED(usage.flags != 0); + ShaderGetGdsPointer(&bind->gds_pointers, usage.start_register, usage.slot, user_sgpr, extended_buffer); + info->gds_pointers++; + break; + + case 0x12: + EXIT_NOT_IMPLEMENTED(usage.slot != 0); + EXIT_NOT_IMPLEMENTED(usage.flags != 0); + info->fetch = true; + info->fetch_reg = usage.start_register; + break; + + case 0x17: + EXIT_NOT_IMPLEMENTED(usage.slot != 0); + EXIT_NOT_IMPLEMENTED(usage.flags != 0); + info->vertex_buffer = true; + info->vertex_buffer_reg = usage.start_register; + break; + + case 0x1b: + EXIT_NOT_IMPLEMENTED(usage.flags != 0); + EXIT_NOT_IMPLEMENTED(usage.slot != 1); + EXIT_NOT_IMPLEMENTED(bind->extended.used); + bind->extended.used = true; + bind->extended.slot = usage.slot; + bind->extended.start_register = usage.start_register; + bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; + bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; + extended_buffer = reinterpret_cast(bind->extended.data.Base()); + info->extended_buffer = true; + break; + + default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); + } + } +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, ShaderBindResources* bind, + const HW::UserSgprInfo& user_sgpr) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_IF(bind == nullptr); + EXIT_IF(info == nullptr); + + info->fetch = false; + info->fetch_reg = 0; + info->vertex_buffer = false; + info->vertex_buffer_reg = 0; + info->storage_buffers_readonly = 0; + info->storage_buffers_constant = 0; + info->storage_buffers_readwrite = 0; + info->textures2D_readonly = 0; + info->textures2D_readwrite = 0; + info->extended_buffer = false; + info->samplers = 0; + info->gds_pointers = 0; + + EXIT_NOT_IMPLEMENTED(user_data == nullptr); + EXIT_NOT_IMPLEMENTED(user_data->eud_size_dw != 0); + EXIT_NOT_IMPLEMENTED(user_data->srt_size_dw != 0); + + uint32_t* extended_buffer = nullptr; + + for (uint16_t type = 0; type < user_data->direct_resource_count; type++) + { + if (user_data->direct_resource_offset[type] == 0xffff) + { + continue; + } + + int reg = user_data->direct_resource_offset[type]; + + switch (type) + { + case 8: + info->vertex_buffer = true; + info->vertex_buffer_reg = reg; + break; + + case 10: + info->vertex_attrib = true; + info->vertex_attrib_reg = reg; + break; + + default: EXIT("unknown usage type: 0x%04" PRIx16 "\n", type); + } + } + + if (user_data->sharp_resource_count[0] != 0) + { + for (uint16_t slot = 0; slot < user_data->sharp_resource_count[0]; slot++) + { + if (user_data->sharp_resource_offset[0][slot].offset_dw == 0x7fff) + { + continue; + } + + EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[0][slot].size != 0); + ShaderGetTextureBuffer(&bind->textures2D, user_data->sharp_resource_offset[0][slot].offset_dw, slot, + ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer); + info->textures2D_readonly++; + EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9); + } + } + + EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_count[1] != 0); + + if (user_data->sharp_resource_count[2] != 0) + { + for (uint16_t slot = 0; slot < user_data->sharp_resource_count[2]; slot++) + { + if (user_data->sharp_resource_offset[2][slot].offset_dw == 0x7fff) + { + continue; + } + + EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[2][slot].size != 1); + ShaderGetSampler(&bind->samplers, user_data->sharp_resource_offset[2][slot].offset_dw, slot, user_sgpr, extended_buffer); + info->samplers++; + } + } + + if (user_data->sharp_resource_count[3] != 0) + { + for (uint16_t slot = 0; slot < user_data->sharp_resource_count[3]; slot++) + { + if (user_data->sharp_resource_offset[3][slot].offset_dw == 0x7fff) + { + continue; + } + + EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[3][slot].size != 1); + ShaderGetStorageBuffer(&bind->storage_buffers, user_data->sharp_resource_offset[3][slot].offset_dw, slot, + ShaderStorageUsage::Constant, user_sgpr, extended_buffer); + info->storage_buffers_constant++; + } + } + + // KYTY_NOT_IMPLEMENTED; +} + +//// NOLINTNEXTLINE(readability-function-cognitive-complexity) +// void ShaderParseUsageCS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) +//{ +// KYTY_PROFILER_FUNCTION(); +// +// EXIT_IF(bind == nullptr); +// EXIT_IF(info == nullptr); +// +// const auto* src = reinterpret_cast(addr); +// +// auto usages = GetUsageSlots(src); +// +// info->fetch = false; +// info->fetch_reg = 0; +// info->vertex_buffer = false; +// info->vertex_buffer_reg = 0; +// +// uint32_t* extended_buffer = nullptr; +// +// for (int i = 0; i < usages.slots_num; i++) +// { +// const auto& usage = usages.slots[i]; +// switch (usage.type) +// { +// // case 0x00: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); +// // if (usage.flags == 0) +// // { +// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, +// // ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); } +// // else if (usage.flags +// // == 3) +// // { +// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, +// // ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer); +// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() +// //!= 9); +// // } +// // break; +// // case 0x02: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, +// // ShaderStorageUsage::Constant, user_sgpr, extended_buffer); break; +// // case 0x04: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); +// // if (usage.flags == 0) +// // { +// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, +// // ShaderStorageUsage::ReadWrite, user_sgpr, extended_buffer); } +// // else if (usage.flags +// // == 3) +// // { +// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, +// // ShaderTextureUsage::ReadWrite, user_sgpr, extended_buffer); +// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() +// //!= 9); +// // } +// // break; +// case 0x07: +// EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// ShaderGetGdsPointer(&bind->gds_pointers, usage.start_register, usage.slot, user_sgpr, extended_buffer); +// break; +// // case 0x1b: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // EXIT_NOT_IMPLEMENTED(usage.slot != 1); +// // EXIT_NOT_IMPLEMENTED(bind->extended.used); +// // bind->extended.used = true; +// // bind->extended.slot = usage.slot; +// // bind->extended.start_register = usage.start_register; +// // bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; +// // bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; +// // extended_buffer = reinterpret_cast(bind->extended.data.Base()); +// // break; +// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); +// } +// } +// } +// +//// NOLINTNEXTLINE(readability-function-cognitive-complexity) +// void ShaderParseUsagePS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) +//{ +// KYTY_PROFILER_FUNCTION(); +// +// EXIT_IF(bind == nullptr); +// EXIT_IF(info == nullptr); +// +// const auto* src = reinterpret_cast(addr); +// +// auto usages = GetUsageSlots(src); +// +// info->fetch = false; +// info->fetch_reg = 0; +// info->vertex_buffer = false; +// info->vertex_buffer_reg = 0; +// +// uint32_t* extended_buffer = nullptr; +// +// for (int i = 0; i < usages.slots_num; i++) +// { +// const auto& usage = usages.slots[i]; +// switch (usage.type) +// { +// // case 0x00: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); +// // if (usage.flags == 0) +// // { +// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, +// // ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); } +// // else if (usage.flags +// // == 3) +// // { +// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, +// // ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer); +// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() +// //!= 9); +// // } +// // break; +// // case 0x01: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // ShaderGetSampler(&bind->samplers, usage.start_register, usage.slot, user_sgpr, extended_buffer); +// // break; +// // case 0x02: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, +// // ShaderStorageUsage::Constant, user_sgpr, extended_buffer); break; +// // case 0x04: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 3); +// // if (usage.flags == 3) +// // { +// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, +// // ShaderTextureUsage::ReadWrite, user_sgpr, extended_buffer); +// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() +// //!= 9); +// // } +// // break; +// // case 0x1b: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // EXIT_NOT_IMPLEMENTED(usage.slot != 1); +// // EXIT_NOT_IMPLEMENTED(bind->extended.used); +// // bind->extended.used = true; +// // bind->extended.slot = usage.slot; +// // bind->extended.start_register = usage.start_register; +// // bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; +// // bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; +// // extended_buffer = reinterpret_cast(bind->extended.data.Base()); +// // break; +// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); +// } +// } +// } +// +// void ShaderParseUsageVS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) +//{ +// KYTY_PROFILER_FUNCTION(); +// +// EXIT_IF(bind == nullptr); +// EXIT_IF(info == nullptr); +// +// const auto* src = reinterpret_cast(addr); +// +// auto usages = GetUsageSlots(src); +// +// info->fetch = false; +// info->fetch_reg = 0; +// info->vertex_buffer = false; +// info->vertex_buffer_reg = 0; +// +// for (int i = 0; i < usages.slots_num; i++) +// { +// const auto& usage = usages.slots[i]; +// switch (usage.type) +// { +// // case 0x02: +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, +// // ShaderStorageUsage::Constant, user_sgpr, nullptr); break; +// // case 0x12: +// // EXIT_NOT_IMPLEMENTED(usage.slot != 0); +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // info->fetch = true; +// // info->fetch_reg = usage.start_register; +// // break; +// // case 0x17: +// // EXIT_NOT_IMPLEMENTED(usage.slot != 0); +// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); +// // info->vertex_buffer = true; +// // info->vertex_buffer_reg = usage.start_register; +// // break; +// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); +// } +// } +// } + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderVertexInputInfo* info) { KYTY_PROFILER_FUNCTION(); @@ -2845,77 +1759,100 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi return; } - const auto* src = reinterpret_cast(regs->vs_regs.data_addr); + ShaderParsedUsage usage; - auto usages = GetUsageSlots(src); + bool gs_instead_of_vs = + (regs->vs_regs.data_addr == 0 && regs->gs_regs.data_addr == 0 && regs->es_regs.data_addr != 0 && regs->gs_regs.chksum != 0); - bool fetch = false; - int fetch_reg = 0; - bool vertex_buffer = false; - int vertex_buffer_reg = 0; + uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr); + const HW::UserSgprInfo& user_sgpr = (gs_instead_of_vs ? regs->gs_user_sgpr : regs->vs_user_sgpr); - KYTY_PROFILER_BLOCK("ShaderGetInputInfoVS::usages_cycle"); + bool ps5 = Config::IsNextGen(); - for (int i = 0; i < usages.slots_num; i++) + ShaderMappedData data; + + if (ps5) { - const auto& usage = usages.slots[i]; - switch (usage.type) + if (auto iter = g_shader_map->find(shader_addr); iter != g_shader_map->end()) { - case 0x02: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant, - regs->vs_user_sgpr, nullptr); - break; - case 0x12: - EXIT_NOT_IMPLEMENTED(usage.slot != 0); - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - fetch = true; - fetch_reg = usage.start_register; - break; - case 0x17: - EXIT_NOT_IMPLEMENTED(usage.slot != 0); - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - vertex_buffer = true; - vertex_buffer_reg = usage.start_register; - break; - default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); + data = iter->second; } } - KYTY_PROFILER_END_BLOCK; - - KYTY_PROFILER_BLOCK("ShaderGetInputInfoVS::parse_fetch"); - - EXIT_NOT_IMPLEMENTED((fetch && !vertex_buffer) || (!fetch && vertex_buffer)); - - if (fetch && vertex_buffer) + if (ps5) { - info->fetch = true; + EXIT_NOT_IMPLEMENTED(data.user_data == nullptr); + EXIT_NOT_IMPLEMENTED(!gs_instead_of_vs); - EXIT_NOT_IMPLEMENTED(fetch_reg >= 16 || vertex_buffer_reg >= 16); + info->gs_prolog = true; - const auto* fetch = reinterpret_cast(static_cast(regs->vs_user_sgpr.value[fetch_reg]) | - (static_cast(regs->vs_user_sgpr.value[fetch_reg + 1]) << 32u)); + ShaderParseUsage2(data.user_data, &usage, &info->bind, user_sgpr); + } else + { + EXIT_NOT_IMPLEMENTED(gs_instead_of_vs); + + info->gs_prolog = false; + + ShaderParseUsage(shader_addr, &usage, &info->bind, user_sgpr); + } + + EXIT_NOT_IMPLEMENTED(usage.extended_buffer); + EXIT_NOT_IMPLEMENTED(usage.samplers > 0); + EXIT_NOT_IMPLEMENTED(usage.gds_pointers > 0); + EXIT_NOT_IMPLEMENTED(usage.storage_buffers_readonly > 0 || usage.textures2D_readonly > 0); + EXIT_NOT_IMPLEMENTED(usage.storage_buffers_readwrite > 0 || usage.textures2D_readwrite > 0); + EXIT_NOT_IMPLEMENTED(!ps5 && ((usage.fetch && !usage.vertex_buffer) || (!usage.fetch && usage.vertex_buffer))); + EXIT_NOT_IMPLEMENTED(ps5 && ((usage.vertex_attrib && !usage.vertex_buffer) || (!usage.vertex_attrib && usage.vertex_buffer))); + + if (usage.vertex_buffer && usage.vertex_attrib) + { + info->fetch_external = false; + info->fetch_embedded = true; + info->fetch_inline = false; + info->fetch_attrib_reg = usage.vertex_attrib_reg; + info->fetch_buffer_reg = usage.vertex_buffer_reg; + + const auto* attrib = + reinterpret_cast(static_cast(user_sgpr.value[usage.vertex_attrib_reg]) | + (static_cast(user_sgpr.value[usage.vertex_attrib_reg + 1]) << 32u)); const auto* buffer = - reinterpret_cast(static_cast(regs->vs_user_sgpr.value[vertex_buffer_reg]) | - (static_cast(regs->vs_user_sgpr.value[vertex_buffer_reg + 1]) << 32u)); + reinterpret_cast(static_cast(user_sgpr.value[usage.vertex_buffer_reg]) | + (static_cast(user_sgpr.value[usage.vertex_buffer_reg + 1]) << 32u)); + + EXIT_NOT_IMPLEMENTED(attrib == nullptr || buffer == nullptr); + + EXIT_NOT_IMPLEMENTED(data.input_semantics == nullptr || data.num_input_semantics == 0); + + ShaderParseAttrib(info, data.input_semantics, data.num_input_semantics, attrib, buffer); + ShaderDetectBuffers(info, ps5); + } + + if (usage.fetch && usage.vertex_buffer) + { + info->fetch_external = true; + info->fetch_embedded = false; + info->fetch_inline = false; + info->fetch_shader_reg = usage.fetch_reg; + info->fetch_buffer_reg = usage.vertex_buffer_reg; + + EXIT_NOT_IMPLEMENTED(usage.fetch_reg >= 16 || usage.vertex_buffer_reg >= 16); + + const auto* fetch = + reinterpret_cast(static_cast(regs->vs_user_sgpr.value[usage.fetch_reg]) | + (static_cast(regs->vs_user_sgpr.value[usage.fetch_reg + 1]) << 32u)); + const auto* buffer = + reinterpret_cast(static_cast(regs->vs_user_sgpr.value[usage.vertex_buffer_reg]) | + (static_cast(regs->vs_user_sgpr.value[usage.vertex_buffer_reg + 1]) << 32u)); EXIT_NOT_IMPLEMENTED(fetch == nullptr || buffer == nullptr); ShaderParseFetch(info, fetch, buffer); - ShaderDetectBuffers(info); + ShaderDetectBuffers(info, ps5); } - KYTY_PROFILER_END_BLOCK; - - KYTY_PROFILER_BLOCK("ShaderGetInputInfoVS::calc_binding"); - ShaderCalcBindingIndices(&info->bind); - - KYTY_PROFILER_END_BLOCK; } -// NOLINTNEXTLINE(readability-function-cognitive-complexity) void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const HW::ShaderRegisters* sh, const ShaderVertexInputInfo* vs_info, ShaderPixelInputInfo* ps_info) { @@ -2951,67 +1888,37 @@ void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const HW::ShaderRegis ps_info->target_output_mode[i] = sh->target_output_mode[i]; } - const auto* src = reinterpret_cast(regs->ps_regs.data_addr); + bool ps5 = Config::IsNextGen(); - auto usages = GetUsageSlots(src); + ShaderMappedData data; - uint32_t* extended_buffer = nullptr; - - for (int i = 0; i < usages.slots_num; i++) + if (ps5) { - const auto& usage = usages.slots[i]; - switch (usage.type) + if (auto iter = g_shader_map->find(regs->ps_regs.data_addr); iter != g_shader_map->end()) { - case 0x00: - EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); - if (usage.flags == 0) - { - ShaderGetStorageBuffer(&ps_info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly, - regs->ps_user_sgpr, extended_buffer); - } else if (usage.flags == 3) - { - ShaderGetTextureBuffer(&ps_info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly, - regs->ps_user_sgpr, extended_buffer); - EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.desc[ps_info->bind.textures2D.textures_num - 1].texture.Type() != 9); - } - break; - case 0x01: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetSampler(&ps_info->bind.samplers, usage.start_register, usage.slot, regs->ps_user_sgpr, extended_buffer); - break; - case 0x02: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetStorageBuffer(&ps_info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant, - regs->ps_user_sgpr, extended_buffer); - break; - case 0x04: - EXIT_NOT_IMPLEMENTED(usage.flags != 3); - if (usage.flags == 3) - { - ShaderGetTextureBuffer(&ps_info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite, - regs->ps_user_sgpr, extended_buffer); - EXIT_NOT_IMPLEMENTED(ps_info->bind.textures2D.desc[ps_info->bind.textures2D.textures_num - 1].texture.Type() != 9); - } - break; - case 0x1b: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - EXIT_NOT_IMPLEMENTED(usage.slot != 1); - EXIT_NOT_IMPLEMENTED(ps_info->bind.extended.used); - ps_info->bind.extended.used = true; - ps_info->bind.extended.slot = usage.slot; - ps_info->bind.extended.start_register = usage.start_register; - ps_info->bind.extended.data.fields[0] = regs->ps_user_sgpr.value[usage.start_register]; - ps_info->bind.extended.data.fields[1] = regs->ps_user_sgpr.value[usage.start_register + 1]; - extended_buffer = reinterpret_cast(ps_info->bind.extended.data.Base()); - break; - default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); + data = iter->second; } } + ShaderParsedUsage usage; + + if (ps5) + { + EXIT_NOT_IMPLEMENTED(data.user_data == nullptr); + + ShaderParseUsage2(data.user_data, &usage, &ps_info->bind, regs->ps_user_sgpr); + } else + { + ShaderParseUsage(regs->ps_regs.data_addr, &usage, &ps_info->bind, regs->ps_user_sgpr); + } + + EXIT_NOT_IMPLEMENTED(usage.fetch || usage.vertex_buffer || usage.vertex_attrib); + EXIT_NOT_IMPLEMENTED(usage.storage_buffers_readwrite > 0); + EXIT_NOT_IMPLEMENTED(usage.gds_pointers > 0); + ShaderCalcBindingIndices(&ps_info->bind); } -// NOLINTNEXTLINE(readability-function-cognitive-complexity) void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, const HW::ShaderRegisters* /*sh*/, ShaderComputeInputInfo* info) { EXIT_IF(info == nullptr); @@ -3031,70 +1938,17 @@ void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, const HW::ShaderReg info->bind.push_constant_size = 0; info->bind.descriptor_set_slot = 0; - const auto* src = reinterpret_cast(regs->cs_regs.data_addr); + ShaderParsedUsage usage; - auto usages = GetUsageSlots(src); + ShaderParseUsage(regs->cs_regs.data_addr, &usage, &info->bind, regs->cs_user_sgpr); - uint32_t* extended_buffer = nullptr; - - for (int i = 0; i < usages.slots_num; i++) - { - const auto& usage = usages.slots[i]; - switch (usage.type) - { - case 0x00: - EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); - if (usage.flags == 0) - { - ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly, - regs->cs_user_sgpr, extended_buffer); - } else if (usage.flags == 3) - { - ShaderGetTextureBuffer(&info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly, - regs->cs_user_sgpr, extended_buffer); - EXIT_NOT_IMPLEMENTED(info->bind.textures2D.desc[info->bind.textures2D.textures_num - 1].texture.Type() != 9); - } - break; - case 0x02: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant, - regs->cs_user_sgpr, extended_buffer); - break; - case 0x04: - EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); - if (usage.flags == 0) - { - ShaderGetStorageBuffer(&info->bind.storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadWrite, - regs->cs_user_sgpr, extended_buffer); - } else if (usage.flags == 3) - { - ShaderGetTextureBuffer(&info->bind.textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite, - regs->cs_user_sgpr, extended_buffer); - EXIT_NOT_IMPLEMENTED(info->bind.textures2D.desc[info->bind.textures2D.textures_num - 1].texture.Type() != 9); - } - break; - case 0x07: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetGdsPointer(&info->bind.gds_pointers, usage.start_register, usage.slot, regs->cs_user_sgpr, extended_buffer); - break; - case 0x1b: - EXIT_NOT_IMPLEMENTED(usage.flags != 0); - EXIT_NOT_IMPLEMENTED(usage.slot != 1); - EXIT_NOT_IMPLEMENTED(info->bind.extended.used); - info->bind.extended.used = true; - info->bind.extended.slot = usage.slot; - info->bind.extended.start_register = usage.start_register; - info->bind.extended.data.fields[0] = regs->cs_user_sgpr.value[usage.start_register]; - info->bind.extended.data.fields[1] = regs->cs_user_sgpr.value[usage.start_register + 1]; - extended_buffer = reinterpret_cast(info->bind.extended.data.Base()); - break; - default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); - } - } + EXIT_NOT_IMPLEMENTED(usage.samplers > 0); + EXIT_NOT_IMPLEMENTED(usage.fetch || usage.vertex_buffer || usage.vertex_attrib); ShaderCalcBindingIndices(&info->bind); } +// NOLINTNEXTLINE(readability-function-cognitive-complexity) static void ShaderDbgDumpResources(const ShaderBindResources& bind) { printf("\t descriptor_set_slot = %u\n", bind.descriptor_set_slot); @@ -3114,6 +1968,8 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t extended.start_register = %d\n", bind.extended.start_register); printf("\t extended.data.Base = %" PRIx64 "\n", bind.extended.data.Base()); + bool gen5 = Config::IsNextGen(); + for (int i = 0; i < bind.storage_buffers.buffers_num; i++) { const auto& r = bind.storage_buffers.buffers[i]; @@ -3122,7 +1978,7 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t\t fields = %08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "\n", r.fields[3], r.fields[2], r.fields[1], r.fields[0]); - printf("\t\t Base() = %" PRIx64 "\n", r.Base()); + printf("\t\t Base() = %" PRIx64 "\n", gen5 ? r.Base48() : r.Base44()); printf("\t\t Stride() = %" PRIu16 "\n", r.Stride()); printf("\t\t SwizzleEnabled() = %s\n", r.SwizzleEnabled() ? "true" : "false"); printf("\t\t NumRecords() = %" PRIu32 "\n", r.NumRecords()); @@ -3130,14 +1986,21 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t\t DstSelY() = %" PRIu8 "\n", r.DstSelY()); printf("\t\t DstSelZ() = %" PRIu8 "\n", r.DstSelZ()); printf("\t\t DstSelW() = %" PRIu8 "\n", r.DstSelW()); - printf("\t\t Nfmt() = %" PRIu8 "\n", r.Nfmt()); - printf("\t\t Dfmt() = %" PRIu8 "\n", r.Dfmt()); - printf("\t\t MemoryType() = 0x%02" PRIx8 "\n", r.MemoryType()); + if (!gen5) + { + printf("\t\t Nfmt() = %" PRIu8 "\n", r.Nfmt()); + printf("\t\t Dfmt() = %" PRIu8 "\n", r.Dfmt()); + printf("\t\t MemoryType() = 0x%02" PRIx8 "\n", r.MemoryType()); + } else + { + printf("\t\t Format() = %" PRIu8 "\n", r.Format()); + printf("\t\t OutOfBounds() = %" PRIu8 "\n", r.OutOfBounds()); + } printf("\t\t AddTid() = %s\n", r.AddTid() ? "true" : "false"); printf("\t\t slot = %d\n", bind.storage_buffers.slots[i]); printf("\t\t start_register = %d\n", bind.storage_buffers.start_register[i]); printf("\t\t extended = %s\n", (bind.storage_buffers.extended[i] ? "true" : "false")); - printf("\t\t usage = %s\n", Core::EnumName(bind.storage_buffers.usages[i]).C_Str()); + printf("\t\t usage = %s\n", Core::EnumName8(bind.storage_buffers.usages[i]).c_str()); } for (int i = 0; i < bind.textures2D.textures_num; i++) @@ -3148,35 +2011,61 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t\t fields = %08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "\n", r.fields[7], r.fields[6], r.fields[5], r.fields[4], r.fields[3], r.fields[2], r.fields[1], r.fields[0]); - printf("\t\t Base() = %" PRIx64 "\n", r.Base()); - printf("\t\t MemoryType() = 0x%02" PRIx8 "\n", r.MemoryType()); + printf("\t\t Base() = %016" PRIx64 "\n", gen5 ? r.Base40() : r.Base38()); printf("\t\t MinLod() = %" PRIu16 "\n", r.MinLod()); - printf("\t\t Dfmt() = %" PRIu8 "\n", r.Dfmt()); - printf("\t\t Nfmt() = %" PRIu8 "\n", r.Nfmt()); - printf("\t\t Width() = %" PRIu16 "\n", r.Width()); - printf("\t\t Height() = %" PRIu16 "\n", r.Height()); - printf("\t\t PerfMod() = %" PRIu8 "\n", r.PerfMod()); - printf("\t\t Interlaced() = %s\n", r.Interlaced() ? "true" : "false"); + if (gen5) + { + printf("\t\t Format() = %" PRIu16 "\n", r.Format()); + printf("\t\t BCSwizzle() = %" PRIu8 "\n", r.BCSwizzle()); + printf("\t\t BaseArray5() = %" PRIu16 "\n", r.BaseArray5()); + printf("\t\t ArrayPitch() = %" PRIu8 "\n", r.ArrayPitch()); + printf("\t\t MaxMip() = %" PRIu8 "\n", r.MaxMip()); + printf("\t\t MinLodWarn5() = %" PRIu16 "\n", r.MinLodWarn5()); + printf("\t\t PerfMod5() = %" PRIu8 "\n", r.PerfMod5()); + printf("\t\t CornerSample() = %s\n", r.CornerSample() ? "true" : "false"); + printf("\t\t MipStatsCntEn() = %s\n", r.MipStatsCntEn() ? "true" : "false"); + printf("\t\t PrtDefColor() = %s\n", r.PrtDefColor() ? "true" : "false"); + printf("\t\t MipStatsCntId() = %" PRIu8 "\n", r.MipStatsCntId()); + printf("\t\t MsaaDepth() = %s\n", r.MsaaDepth() ? "true" : "false"); + printf("\t\t MaxUncBlkSize() = %" PRIu8 "\n", r.MaxUncompBlkSize()); + printf("\t\t MaxCompBlkSize()= %" PRIu8 "\n", r.MaxCompBlkSize()); + printf("\t\t MetaPipeAlign() = %s\n", r.MetaPipeAligned() ? "true" : "false"); + printf("\t\t WriteCompress() = %s\n", r.WriteCompress() ? "true" : "false"); + printf("\t\t MetaCompress() = %s\n", r.MetaCompress() ? "true" : "false"); + printf("\t\t DccAlphaPos() = %s\n", r.DccAlphaPos() ? "true" : "false"); + printf("\t\t DccColorTransf()= %s\n", r.DccColorTransf() ? "true" : "false"); + printf("\t\t MetaAddr() = %" PRIx64 "\n", r.MetaAddr()); + + } else + { + printf("\t\t Dfmt() = %" PRIu8 "\n", r.Dfmt()); + printf("\t\t Nfmt() = %" PRIu8 "\n", r.Nfmt()); + printf("\t\t PerfMod() = %" PRIu8 "\n", r.PerfMod()); + printf("\t\t Interlaced() = %s\n", r.Interlaced() ? "true" : "false"); + printf("\t\t MemoryType() = 0x%02" PRIx8 "\n", r.MemoryType()); + printf("\t\t Pow2Pad() = %s\n", r.Pow2Pad() ? "true" : "false"); + printf("\t\t Pitch() = %" PRIu16 "\n", r.Pitch()); + printf("\t\t BaseArray() = %" PRIu16 "\n", r.BaseArray()); + printf("\t\t LastArray() = %" PRIu16 "\n", r.LastArray()); + printf("\t\t MinLodWarn() = %" PRIu16 "\n", r.MinLodWarn()); + printf("\t\t LodHdwCntEn() = %s\n", r.LodHdwCntEn() ? "true" : "false"); + printf("\t\t CounterBankId() = %" PRIu8 "\n", r.CounterBankId()); + } + printf("\t\t Width() = %" PRIu16 "\n", gen5 ? r.Width5() : r.Width4()); + printf("\t\t Height() = %" PRIu16 "\n", gen5 ? r.Height5() : r.Height4()); printf("\t\t DstSelX() = %" PRIu8 "\n", r.DstSelX()); printf("\t\t DstSelY() = %" PRIu8 "\n", r.DstSelY()); printf("\t\t DstSelZ() = %" PRIu8 "\n", r.DstSelZ()); printf("\t\t DstSelW() = %" PRIu8 "\n", r.DstSelW()); printf("\t\t BaseLevel() = %" PRIu8 "\n", r.BaseLevel()); printf("\t\t LastLevel() = %" PRIu8 "\n", r.LastLevel()); - printf("\t\t TilingIdx() = %" PRIu8 "\n", r.TilingIdx()); - printf("\t\t Pow2Pad() = %s\n", r.Pow2Pad() ? "true" : "false"); + printf("\t\t TileMode() = %" PRIu8 "\n", r.TileMode()); printf("\t\t Type() = %" PRIu8 "\n", r.Type()); printf("\t\t Depth() = %" PRIu16 "\n", r.Depth()); - printf("\t\t Pitch() = %" PRIu16 "\n", r.Pitch()); - printf("\t\t BaseArray() = %" PRIu16 "\n", r.BaseArray()); - printf("\t\t LastArray() = %" PRIu16 "\n", r.LastArray()); - printf("\t\t MinLodWarn() = %" PRIu16 "\n", r.MinLodWarn()); - printf("\t\t CounterBankId() = %" PRIu8 "\n", r.CounterBankId()); - printf("\t\t LodHdwCntEn() = %s\n", r.LodHdwCntEn() ? "true" : "false"); printf("\t\t slot = %d\n", bind.textures2D.desc[i].slot); printf("\t\t start_register = %d\n", bind.textures2D.desc[i].start_register); printf("\t\t extended = %s\n", (bind.textures2D.desc[i].extended ? "true" : "false")); - printf("\t\t usage = %s\n", Core::EnumName(bind.textures2D.desc[i].usage).C_Str()); + printf("\t\t usage = %s\n", Core::EnumName8(bind.textures2D.desc[i].usage).c_str()); } for (int i = 0; i < bind.samplers.samplers_num; i++) @@ -3194,7 +2083,16 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t\t DepthCompareFunc() = %" PRIu8 "\n", r.DepthCompareFunc()); printf("\t\t ForceUnormCoords() = %s\n", r.ForceUnormCoords() ? "true" : "false"); printf("\t\t AnisoThreshold() = %" PRIu8 "\n", r.AnisoThreshold()); - printf("\t\t McCoordTrunc() = %s\n", r.McCoordTrunc() ? "true" : "false"); + if (!gen5) + { + printf("\t\t McCoordTrunc() = %s\n", r.McCoordTrunc() ? "true" : "false"); + } else + { + printf("\t\t SkipDegamma() = %s\n", r.SkipDegamma() ? "true" : "false"); + printf("\t\t PointPreclamp() = %s\n", r.PointPreclamp() ? "true" : "false"); + printf("\t\t AnisoOverride() = %s\n", r.AnisoOverride() ? "true" : "false"); + printf("\t\t BlendZeroPrt() = %s\n", r.BlendZeroPrt() ? "true" : "false"); + } printf("\t\t ForceDegamma() = %s\n", r.ForceDegamma() ? "true" : "false"); printf("\t\t AnisoBias() = %" PRIu8 "\n", r.AnisoBias()); printf("\t\t TruncCoord() = %s\n", r.TruncCoord() ? "true" : "false"); @@ -3239,8 +2137,12 @@ void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info) printf("ShaderDbgDumpInputInfo()\n"); - printf("\t fetch = %s\n", info->fetch ? "true" : "false"); - printf("\t export_count = %d\n", info->export_count); + printf("\t fetch_external = %s\n", info->fetch_external ? "true" : "false"); + printf("\t fetch_embedded = %s\n", info->fetch_embedded ? "true" : "false"); + printf("\t fetch_inline = %s\n", info->fetch_inline ? "true" : "false"); + printf("\t export_count = %d\n", info->export_count); + + bool gen5 = Config::IsNextGen(); for (int i = 0; i < info->resources_num; i++) { @@ -3253,7 +2155,7 @@ void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info) printf("\t\t registers_num = %d\n", rd.registers_num); printf("\t\t fields = %08" PRIx32 "%08" PRIx32 "%08" PRIx32 "%08" PRIx32 "\n", r.fields[3], r.fields[2], r.fields[1], r.fields[0]); - printf("\t\t Base() = %" PRIx64 "\n", r.Base()); + printf("\t\t Base() = %" PRIx64 "\n", gen5 ? r.Base48() : r.Base44()); printf("\t\t Stride() = %" PRIu16 "\n", r.Stride()); printf("\t\t SwizzleEnabled() = %s\n", r.SwizzleEnabled() ? "true" : "false"); printf("\t\t NumRecords() = %" PRIu32 "\n", r.NumRecords()); @@ -3261,9 +2163,16 @@ void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info) printf("\t\t DstSelY() = %" PRIu8 "\n", r.DstSelY()); printf("\t\t DstSelZ() = %" PRIu8 "\n", r.DstSelZ()); printf("\t\t DstSelW() = %" PRIu8 "\n", r.DstSelW()); - printf("\t\t Nfmt() = %" PRIu8 "\n", r.Nfmt()); - printf("\t\t Dfmt() = %" PRIu8 "\n", r.Dfmt()); - printf("\t\t MemoryType() = 0x%02" PRIx8 "\n", r.MemoryType()); + if (!gen5) + { + printf("\t\t Nfmt() = %" PRIu8 "\n", r.Nfmt()); + printf("\t\t Dfmt() = %" PRIu8 "\n", r.Dfmt()); + printf("\t\t MemoryType() = 0x%02" PRIx8 "\n", r.MemoryType()); + } else + { + printf("\t\t Format() = %" PRIu8 "\n", r.Format()); + printf("\t\t OutOfBounds() = %" PRIu8 "\n", r.OutOfBounds()); + } printf("\t\t AddTid() = %s\n", r.AddTid() ? "true" : "false"); } @@ -3370,7 +2279,7 @@ public: printf("crc32 = %08" PRIx32 "\n", code.GetCrc32()); printf("hash0 = %08" PRIx32 "\n", code.GetHash0()); printf("---------\n"); - printf("%s", code.DbgDump().C_Str()); + printf("%s", code.DbgDump().c_str()); printf("---------\n"); } else if (!m_file.IsInvalid()) { @@ -3378,25 +2287,25 @@ public: m_file.Printf("crc32 = %08" PRIx32 "\n", code.GetCrc32()); m_file.Printf("hash0 = %08" PRIx32 "\n", code.GetHash0()); m_file.Printf("---------\n"); - m_file.Printf("%s", code.DbgDump().C_Str()); + m_file.Printf("%s", code.DbgDump().c_str()); m_file.Printf("---------\n"); } } } - void DumpRecompiledShader(const String& source) + void DumpRecompiledShader(const String8& source) { if (m_enabled) { if (m_console) { printf("--------- Recompiled Shader ---------\n"); - printf("%s\n", source.C_Str()); + printf("%s\n", source.c_str()); printf("---------\n"); } else if (!m_file.IsInvalid()) { m_file.Printf("--------- Recompiled Shader ---------\n"); - m_file.Printf("%s\n", source.C_Str()); + m_file.Printf("%s\n", source.c_str()); m_file.Printf("---------\n"); } } @@ -3406,7 +2315,7 @@ public: { if (m_enabled) { - String text; + String8 text; if (!SpirvDisassemble(bin.GetDataConst(), bin.Size(), &text)) { EXIT("SpirvDisassemble() failed\n"); @@ -3414,12 +2323,12 @@ public: if (m_console) { printf("--------- Optimized Shader ---------\n"); - printf("%s\n", text.C_Str()); + printf("%s\n", text.c_str()); printf("---------\n"); } else if (!m_file.IsInvalid()) { m_file.Printf("--------- Optimized Shader ---------\n"); - m_file.Printf("%s\n", Log::RemoveColors(text).C_Str()); + m_file.Printf("%s\n", Log::RemoveColors(String::FromUtf8(text.c_str())).C_Str()); m_file.Printf("---------\n"); } } @@ -3429,7 +2338,7 @@ public: { if (m_enabled) { - String text; + String8 text; if (!SpirvToGlsl(bin.GetDataConst(), bin.Size(), &text)) { EXIT("SpirvToGlsl() failed\n"); @@ -3437,12 +2346,12 @@ public: if (m_console) { printf("--------- Glsl Shader ---------\n"); - printf("%s\n", text.C_Str()); + printf("%s\n", text.c_str()); printf("---------\n"); } else if (!m_file.IsInvalid()) { m_file.Printf("--------- Glsl Shader ---------\n"); - m_file.Printf("%s\n", Log::RemoveColors(text).C_Str()); + m_file.Printf("%s\n", Log::RemoveColors(String::FromUtf8(text.c_str())).C_Str()); m_file.Printf("---------\n"); } } @@ -3453,7 +2362,7 @@ public: if (m_enabled && !m_console && !m_file.IsInvalid()) { Core::File file; - String file_name = m_file_name.FilenameWithoutExtension() + U".spv"; + String file_name = m_file_name.FilenameWithoutExtension() + ".spv"; file.Create(file_name); if (file.IsInvalid()) { @@ -3489,7 +2398,14 @@ ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegis code.SetVsEmbeddedId(regs->vs_embedded_id); } else { - const auto* src = reinterpret_cast(regs->vs_regs.data_addr); + uint32_t hash0 = 0; + uint32_t crc32 = 0; + + bool gs_instead_of_vs = + (regs->vs_regs.data_addr == 0 && regs->gs_regs.data_addr == 0 && regs->es_regs.data_addr != 0 && regs->gs_regs.chksum != 0); + uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr); + + const auto* src = reinterpret_cast(shader_addr); EXIT_NOT_IMPLEMENTED(src == nullptr); @@ -3498,19 +2414,32 @@ ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegis EXIT_NOT_IMPLEMENTED(regs->vs_regs.rsrc2.user_sgpr > regs->vs_user_sgpr.count); - const auto* header = GetBinaryInfo(src); + if (Config::IsNextGen()) + { + EXIT_NOT_IMPLEMENTED(!gs_instead_of_vs); - EXIT_NOT_IMPLEMENTED(header == nullptr); + hash0 = (regs->gs_regs.chksum >> 32u) & 0xffffffffu; + crc32 = regs->gs_regs.chksum & 0xffffffffu; + } else + { + const auto* header = GetBinaryInfo(src); - bi_print("ShaderParseVS():ShaderBinaryInfo", *header); + EXIT_NOT_IMPLEMENTED(header == nullptr); - code.SetCrc32(header->crc32); - code.SetHash0(header->hash0); - shader_parse(0, src, nullptr, &code); + bi_print("ShaderParseVS():ShaderBinaryInfo", *header); + + hash0 = header->hash0; + crc32 = header->crc32; + } + + code.SetCrc32(crc32); + code.SetHash0(hash0); + // shader_parse(0, src, nullptr, &code); + ShaderParse(src, &code); if (g_debug_printfs != nullptr) { - auto id = (static_cast(header->hash0) << 32u) | header->crc32; + auto id = (static_cast(hash0) << 32u) | crc32; if (auto index = g_debug_printfs->Find(id, [](auto cmd, auto id) { return cmd.id == id; }); g_debug_printfs->IndexValid(index)) { code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds; @@ -3525,7 +2454,7 @@ Vector ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInp { KYTY_PROFILER_FUNCTION(profiler::colors::Amber300); - String source; + String8 source; Vector ret; ShaderLogHelper log("vs"); @@ -3547,9 +2476,9 @@ Vector ShaderRecompileVS(const ShaderCode& code, const ShaderVertexInp log.DumpRecompiledShader(source); - if (String err_msg; !SpirvRun(source, &ret, &err_msg)) + if (String8 err_msg; !SpirvRun(source, &ret, &err_msg)) { - EXIT("SpirvRun() failed:\n%s\n", err_msg.C_Str()); + EXIT("SpirvRun() failed:\n%s\n", err_msg.c_str()); } log.DumpOptimizedShader(ret); @@ -3573,28 +2502,42 @@ ShaderCode ShaderParsePS(const HW::PixelShaderInfo* regs, const HW::ShaderRegist code.SetPsEmbeddedId(regs->ps_embedded_id); } else { - const auto* src = reinterpret_cast(regs->ps_regs.data_addr); - - EXIT_NOT_IMPLEMENTED(src == nullptr); + uint32_t hash0 = 0; + uint32_t crc32 = 0; ps_print("ShaderParsePS()", regs->ps_regs, *sh); ps_check(regs->ps_regs, *sh); EXIT_NOT_IMPLEMENTED(regs->ps_regs.rsrc2.user_sgpr > regs->ps_user_sgpr.count); - const auto* header = GetBinaryInfo(src); + const auto* src = reinterpret_cast(regs->ps_regs.data_addr); - EXIT_NOT_IMPLEMENTED(header == nullptr); + EXIT_NOT_IMPLEMENTED(src == nullptr); - bi_print("ShaderParsePS():ShaderBinaryInfo", *header); + if (Config::IsNextGen()) + { + hash0 = (regs->ps_regs.chksum >> 32u) & 0xffffffffu; + crc32 = regs->ps_regs.chksum & 0xffffffffu; + } else + { + const auto* header = GetBinaryInfo(src); - code.SetCrc32(header->crc32); - code.SetHash0(header->hash0); - shader_parse(0, src, nullptr, &code); + EXIT_NOT_IMPLEMENTED(header == nullptr); + + bi_print("ShaderParsePS():ShaderBinaryInfo", *header); + + hash0 = header->hash0; + crc32 = header->crc32; + } + + code.SetCrc32(crc32); + code.SetHash0(hash0); + // shader_parse(0, src, nullptr, &code); + ShaderParse(src, &code); if (g_debug_printfs != nullptr) { - auto id = (static_cast(header->hash0) << 32u) | header->crc32; + auto id = (static_cast(hash0) << 32u) | crc32; if (auto index = g_debug_printfs->Find(id, [](auto cmd, auto id) { return cmd.id == id; }); g_debug_printfs->IndexValid(index)) { code.GetDebugPrintfs() = g_debug_printfs->At(index).cmds; @@ -3609,7 +2552,7 @@ Vector ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInpu { KYTY_PROFILER_FUNCTION(profiler::colors::Blue300); - String source; + String8 source; Vector ret; ShaderLogHelper log("ps"); @@ -3636,9 +2579,9 @@ Vector ShaderRecompilePS(const ShaderCode& code, const ShaderPixelInpu log.DumpRecompiledShader(source); - if (String err_msg; !SpirvRun(source, &ret, &err_msg)) + if (String8 err_msg; !SpirvRun(source, &ret, &err_msg)) { - EXIT("SpirvRun() failed:\n%s\n", err_msg.C_Str()); + EXIT("SpirvRun() failed:\n%s\n", err_msg.c_str()); } log.DumpOptimizedShader(ret); @@ -3673,7 +2616,8 @@ ShaderCode ShaderParseCS(const HW::ComputeShaderInfo* regs, const HW::ShaderRegi code.SetCrc32(header->crc32); code.SetHash0(header->hash0); - shader_parse(0, src, nullptr, &code); + // shader_parse(0, src, nullptr, &code); + ShaderParse(src, &code); if (g_debug_printfs != nullptr) { @@ -3707,9 +2651,9 @@ Vector ShaderRecompileCS(const ShaderCode& code, const ShaderComputeIn log.DumpRecompiledShader(source); - if (String err_msg; !SpirvRun(source, &ret, &err_msg)) + if (String8 err_msg; !SpirvRun(source, &ret, &err_msg)) { - EXIT("SpirvRun() failed:\n%s\n", err_msg.C_Str()); + EXIT("SpirvRun() failed:\n%s\n", err_msg.c_str()); } log.DumpOptimizedShader(ret); @@ -3944,21 +2888,38 @@ ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInput return ret; } - const auto* src = reinterpret_cast(regs->vs_regs.data_addr); - - EXIT_NOT_IMPLEMENTED(src == nullptr); - - const auto* header = GetBinaryInfo(src); - - EXIT_NOT_IMPLEMENTED(header == nullptr); - ret.ids.Expand(64); - ret.hash0 = header->hash0; - ret.crc32 = header->crc32; + bool gs_instead_of_vs = + (regs->vs_regs.data_addr == 0 && regs->gs_regs.data_addr == 0 && regs->es_regs.data_addr != 0 && regs->gs_regs.chksum != 0); + uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr); - ret.ids.Add(header->length); - ret.ids.Add(static_cast(input_info->fetch)); + bool gen5 = Config::IsNextGen(); + + if (gen5) + { + EXIT_NOT_IMPLEMENTED(!gs_instead_of_vs); + + ret.hash0 = (regs->gs_regs.chksum >> 32u) & 0xffffffffu; + ret.crc32 = regs->gs_regs.chksum & 0xffffffffu; + } else + { + const auto* src = reinterpret_cast(shader_addr); + + EXIT_NOT_IMPLEMENTED(src == nullptr); + + const auto* header = GetBinaryInfo(src); + + EXIT_NOT_IMPLEMENTED(header == nullptr); + + ret.hash0 = header->hash0; + ret.crc32 = header->crc32; + ret.ids.Add(header->length); + } + + ret.ids.Add(static_cast(input_info->fetch_external)); + ret.ids.Add(static_cast(input_info->fetch_embedded)); + ret.ids.Add(static_cast(input_info->fetch_inline)); ret.ids.Add(input_info->resources_num); ret.ids.Add(input_info->export_count); @@ -3975,8 +2936,15 @@ ShaderId ShaderGetIdVS(const HW::VertexShaderInfo* regs, const ShaderVertexInput ret.ids.Add(r.DstSelY()); ret.ids.Add(r.DstSelZ()); ret.ids.Add(r.DstSelW()); - ret.ids.Add(r.Nfmt()); - ret.ids.Add(r.Dfmt()); + if (gen5) + { + ret.ids.Add(r.Format()); + ret.ids.Add(r.OutOfBounds()); + } else + { + ret.ids.Add(r.Nfmt()); + ret.ids.Add(r.Dfmt()); + } ret.ids.Add(static_cast(r.AddTid())); } @@ -4011,20 +2979,28 @@ ShaderId ShaderGetIdPS(const HW::PixelShaderInfo* regs, const ShaderPixelInputIn return ret; } - const auto* src = reinterpret_cast(regs->ps_regs.data_addr); - - EXIT_NOT_IMPLEMENTED(src == nullptr); - - const auto* header = GetBinaryInfo(src); - - EXIT_NOT_IMPLEMENTED(header == nullptr); - ret.ids.Expand(64); - ret.hash0 = header->hash0; - ret.crc32 = header->crc32; + if (Config::IsNextGen()) + { + ret.hash0 = (regs->ps_regs.chksum >> 32u) & 0xffffffffu; + ret.crc32 = regs->ps_regs.chksum & 0xffffffffu; + } else + { + const auto* src = reinterpret_cast(regs->ps_regs.data_addr); + + EXIT_NOT_IMPLEMENTED(src == nullptr); + + const auto* header = GetBinaryInfo(src); + + EXIT_NOT_IMPLEMENTED(header == nullptr); + + ret.hash0 = header->hash0; + ret.crc32 = header->crc32; + + ret.ids.Add(header->length); + } - ret.ids.Add(header->length); ret.ids.Add(input_info->input_num); ret.ids.Add(static_cast(input_info->ps_pos_xy)); ret.ids.Add(static_cast(input_info->ps_pixel_kill_enable)); diff --git a/source/emulator/src/Graphics/ShaderParse.cpp b/source/emulator/src/Graphics/ShaderParse.cpp new file mode 100644 index 0000000..8e35984 --- /dev/null +++ b/source/emulator/src/Graphics/ShaderParse.cpp @@ -0,0 +1,3350 @@ +#include "Emulator/Graphics/ShaderParse.h" + +#include "Kyty/Core/Common.h" +#include "Kyty/Core/DbgAssert.h" + +#include "Emulator/Config.h" +#include "Emulator/Graphics/Shader.h" + +#ifdef KYTY_EMU_ENABLED + +#define KYTY_SHADER_PARSER_ARGS \ + [[maybe_unused]] uint32_t pc, [[maybe_unused]] const uint32_t *src, [[maybe_unused]] const uint32_t *buffer, \ + [[maybe_unused]] ShaderCode *dst, [[maybe_unused]] bool next_gen +#define KYTY_SHADER_PARSER(f) static uint32_t f(KYTY_SHADER_PARSER_ARGS) +#define KYTY_CP_OP_PARSER_ARGS \ + [[maybe_unused]] CommandProcessor *cp, [[maybe_unused]] uint32_t cmd_id, [[maybe_unused]] const uint32_t *buffer, \ + [[maybe_unused]] uint32_t dw, [[maybe_unused]] uint32_t num_dw +#define KYTY_CP_OP_PARSER(f) static uint32_t f(KYTY_CP_OP_PARSER_ARGS) + +#define KYTY_TYPE_STR(s) [[maybe_unused]] static const char* type_str = s; +#define KYTY_NI(i) \ + printf("%s", dst->DbgDump().c_str()); \ + EXIT("unknown %s instruction %s, opcode = 0x%" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ", crc32 = 0x%08" PRIx32 ")\n", \ + type_str, i, opcode, pc, dst->GetHash0(), dst->GetCrc32()); +#define KYTY_UNKNOWN_OP() \ + printf("%s", dst->DbgDump().c_str()); \ + EXIT("unknown %s opcode: 0x%" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ", crc32 = 0x%08" PRIx32 ")\n", type_str, \ + opcode, pc, dst->GetHash0(), dst->GetCrc32()); + +namespace Kyty::Libs::Graphics { + +static ShaderOperand operand_parse(uint32_t code) +{ + ShaderOperand ret; + + ret.size = 1; + + if (code >= 0 && code <= 103) + { + ret.type = ShaderOperandType::Sgpr; + ret.register_id = static_cast(code); + } else if (code >= 128 && code <= 192) + { + ret.type = ShaderOperandType::IntegerInlineConstant; + ret.constant.i = static_cast(code) - 128; + ret.size = 0; + } else if (code >= 193 && code <= 208) + { + ret.type = ShaderOperandType::IntegerInlineConstant; + ret.constant.i = 192 - static_cast(code); + ret.size = 0; + } else if (code >= 240 && code <= 247) + { + static const float fv[] = {0.5f, -0.5f, 1.0f, -1.0f, 2.0f, -2.0f, 4.0f, -4.0f}; + ret.type = ShaderOperandType::FloatInlineConstant; + ret.constant.f = fv[static_cast(code) - 240]; + ret.size = 0; + } else if (code >= 256) + { + ret.type = ShaderOperandType::Vgpr; + ret.register_id = static_cast(code) - 256; + } else + { + switch (code) + { + case 106: ret.type = ShaderOperandType::VccLo; break; + case 107: ret.type = ShaderOperandType::VccHi; break; + case 124: ret.type = ShaderOperandType::M0; break; + case 125: ret.type = ShaderOperandType::Null; break; + case 126: ret.type = ShaderOperandType::ExecLo; break; + case 127: ret.type = ShaderOperandType::ExecHi; break; + case 252: ret.type = ShaderOperandType::ExecZ; break; + case 255: + ret.type = ShaderOperandType::LiteralConstant; + ret.size = 0; + break; + default: EXIT("unknown operand: %u\n", code); + } + } + + return ret; +} + +KYTY_SHADER_PARSER(shader_parse_sopc) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("sopc"); + + uint32_t ssrc1 = (buffer[0] >> 8u) & 0xffu; + uint32_t ssrc0 = (buffer[0] >> 0u) & 0xffu; + uint32_t opcode = (buffer[0] >> 16u) & 0x7fu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(ssrc0); + inst.src[1] = operand_parse(ssrc1); + inst.src_num = 2; + + uint32_t size = 1; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + if (inst.src[1].type == ShaderOperandType::LiteralConstant) + { + inst.src[1].constant.u = buffer[size]; + size++; + } + + inst.format = ShaderInstructionFormat::Ssrc0Ssrc1; + + switch (opcode) + { + case 0x00: inst.type = ShaderInstructionType::SCmpEqI32; break; + case 0x01: inst.type = ShaderInstructionType::SCmpLgI32; break; + case 0x02: inst.type = ShaderInstructionType::SCmpGtI32; break; + case 0x03: inst.type = ShaderInstructionType::SCmpGeI32; break; + case 0x04: inst.type = ShaderInstructionType::SCmpLtI32; break; + case 0x05: inst.type = ShaderInstructionType::SCmpLeI32; break; + case 0x06: inst.type = ShaderInstructionType::SCmpEqU32; break; + case 0x07: inst.type = ShaderInstructionType::SCmpLgU32; break; + case 0x08: inst.type = ShaderInstructionType::SCmpGtU32; break; + case 0x09: inst.type = ShaderInstructionType::SCmpGeU32; break; + case 0x0a: inst.type = ShaderInstructionType::SCmpLtU32; break; + case 0x0b: inst.type = ShaderInstructionType::SCmpLeU32; break; + case 0xC: KYTY_NI("s_bitcmp0_b32"); break; + case 0xD: KYTY_NI("s_bitcmp1_b32"); break; + case 0xE: KYTY_NI("s_bitcmp0_b64"); break; + case 0xF: KYTY_NI("s_bitcmp1_b64"); break; + case 0x10: KYTY_NI("s_setvskip"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_sopk) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("sopk"); + + uint32_t opcode = (buffer[0] >> 23u) & 0x1fu; + auto imm = static_cast(buffer[0] >> 0u & 0xffffu); + uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(sdst); + + switch (opcode) + { + case 0x00: + inst.type = ShaderInstructionType::SMovkI32; + inst.format = ShaderInstructionFormat::SVdstSVsrc0; + inst.src[0].type = ShaderOperandType::IntegerInlineConstant; + inst.src[0].constant.i = imm; + inst.src_num = 1; + break; + + case 0x2: KYTY_NI("s_cmovk_i32"); break; + case 0x3: KYTY_NI("s_cmpk_eq_i32"); break; + case 0x4: KYTY_NI("s_cmpk_lg_i32"); break; + case 0x5: KYTY_NI("s_cmpk_gt_i32"); break; + case 0x6: KYTY_NI("s_cmpk_ge_i32"); break; + case 0x7: KYTY_NI("s_cmpk_lt_i32"); break; + case 0x8: KYTY_NI("s_cmpk_le_i32"); break; + case 0x9: KYTY_NI("s_cmpk_eq_u32"); break; + case 0xA: KYTY_NI("s_cmpk_lg_u32"); break; + case 0xB: KYTY_NI("s_cmpk_gt_u32"); break; + case 0xC: KYTY_NI("s_cmpk_ge_u32"); break; + case 0xD: KYTY_NI("s_cmpk_lt_u32"); break; + case 0xE: KYTY_NI("s_cmpk_le_u32"); break; + case 0xF: KYTY_NI("s_addk_i32"); break; + case 0x10: KYTY_NI("s_mulk_i32"); break; + case 0x11: KYTY_NI("s_cbranch_i_fork"); break; + case 0x12: KYTY_NI("s_getreg_b32"); break; + case 0x13: KYTY_NI("s_setreg_b32"); break; + case 0x14: KYTY_NI("s_getreg_regrd_b32"); break; + case 0x15: KYTY_NI("s_setreg_imm32_b32"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return 1; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_sopp) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("sopp"); + + uint32_t opcode = (buffer[0] >> 16u) & 0x7fu; + uint32_t simm = (buffer[0] >> 0u) & 0xffffu; + + ShaderInstruction inst; + inst.pc = pc; + + inst.format = ShaderInstructionFormat::Label; + inst.src[0].type = ShaderOperandType::LiteralConstant; + inst.src[0].constant.i = static_cast(simm) * 4; + inst.src_num = 1; + + switch (opcode) + { + case 0x01: + inst.type = ShaderInstructionType::SEndpgm; + inst.format = ShaderInstructionFormat::Empty; + inst.src_num = 0; + break; + case 0x02: inst.type = ShaderInstructionType::SBranch; break; + case 0x04: inst.type = ShaderInstructionType::SCbranchScc0; break; + case 0x05: inst.type = ShaderInstructionType::SCbranchScc1; break; + case 0x06: inst.type = ShaderInstructionType::SCbranchVccz; break; + case 0x07: inst.type = ShaderInstructionType::SCbranchVccnz; break; + case 0x08: inst.type = ShaderInstructionType::SCbranchExecz; break; + case 0x0c: + inst.type = ShaderInstructionType::SWaitcnt; + inst.format = ShaderInstructionFormat::Imm; + inst.src[0].type = ShaderOperandType::LiteralConstant; + inst.src[0].constant.u = simm; + inst.src_num = 1; + break; + case 0x10: + inst.type = ShaderInstructionType::SSendmsg; + inst.format = ShaderInstructionFormat::Imm; + inst.src[0].type = ShaderOperandType::LiteralConstant; + inst.src[0].constant.u = simm; + inst.src_num = 1; + break; + case 0x20: + inst.type = ShaderInstructionType::SInstPrefetch; + inst.format = ShaderInstructionFormat::Imm; + inst.src[0].type = ShaderOperandType::LiteralConstant; + inst.src[0].constant.u = simm; + inst.src_num = 1; + break; + + case 0x0: KYTY_NI("s_nop"); break; + case 0x9: KYTY_NI("s_cbranch_execnz"); break; + case 0xA: KYTY_NI("s_barrier"); break; + case 0xB: KYTY_NI("s_setkill"); break; + case 0xD: KYTY_NI("s_sethalt"); break; + case 0xE: KYTY_NI("s_sleep"); break; + case 0xF: KYTY_NI("s_setprio"); break; + case 0x11: KYTY_NI("s_sendmsghalt"); break; + case 0x12: KYTY_NI("s_trap"); break; + case 0x13: KYTY_NI("s_icache_inv"); break; + case 0x14: KYTY_NI("s_incperflevel"); break; + case 0x15: KYTY_NI("s_decperflevel"); break; + case 0x16: KYTY_NI("s_ttracedata"); break; + case 0x17: KYTY_NI("s_cbranch_cdbgsys"); break; + case 0x18: KYTY_NI("s_cbranch_cdbguser"); break; + case 0x19: KYTY_NI("s_cbranch_cdbgsys_or_user"); break; + case 0x1A: KYTY_NI("s_cbranch_cdbgsys_and_user"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + if (inst.type == ShaderInstructionType::SCbranchScc0 || inst.type == ShaderInstructionType::SCbranchScc1 || + inst.type == ShaderInstructionType::SCbranchVccz || inst.type == ShaderInstructionType::SCbranchVccnz || + inst.type == ShaderInstructionType::SCbranchExecz || inst.type == ShaderInstructionType::SBranch) + { + dst->GetLabels().Add(ShaderLabel(inst)); + + if (inst.type != ShaderInstructionType::SBranch) + { + dst->GetIndirectLabels().Add(ShaderLabel(inst.pc + 4, inst.pc)); + } + } + + return 1; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_sop1) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("sop1"); + + uint32_t opcode = (buffer[0] >> 8u) & 0xffu; + uint32_t ssrc0 = (buffer[0] >> 0u) & 0xffu; + uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(ssrc0); + inst.src_num = 1; + inst.dst = operand_parse(sdst); + + uint32_t size = 1; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + switch (opcode) + { + case 0x03: + inst.type = ShaderInstructionType::SMovB32; + inst.format = ShaderInstructionFormat::SVdstSVsrc0; + break; + case 0x04: + inst.type = ShaderInstructionType::SMovB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02; + inst.dst.size = 2; + inst.src[0].size = 2; + break; + case 0x05: KYTY_NI("s_cmov_b32"); break; + case 0x06: KYTY_NI("s_cmov_b64"); break; + case 0x07: KYTY_NI("s_not_b32"); break; + case 0x08: KYTY_NI("s_not_b64"); break; + case 0x09: KYTY_NI("s_wqm_b32"); break; + case 0x0a: + inst.type = ShaderInstructionType::SWqmB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02; + inst.dst.size = 2; + inst.src[0].size = 2; + break; + case 0x0B: KYTY_NI("s_brev_b32"); break; + case 0x0C: KYTY_NI("s_brev_b64"); break; + case 0x0D: KYTY_NI("s_bcnt0_i32_b32"); break; + case 0x0E: KYTY_NI("s_bcnt0_i32_b64"); break; + case 0x0F: KYTY_NI("s_bcnt1_i32_b32"); break; + case 0x10: KYTY_NI("s_bcnt1_i32_b64"); break; + case 0x11: KYTY_NI("s_ff0_i32_b32"); break; + case 0x12: KYTY_NI("s_ff0_i32_b64"); break; + case 0x13: KYTY_NI("s_ff1_i32_b32"); break; + case 0x14: KYTY_NI("s_ff1_i32_b64"); break; + case 0x15: KYTY_NI("s_flbit_i32_b32"); break; + case 0x16: KYTY_NI("s_flbit_i32_b64"); break; + case 0x17: KYTY_NI("s_flbit_i32"); break; + case 0x18: KYTY_NI("s_flbit_i32_i64"); break; + case 0x19: KYTY_NI("s_sext_i32_i8"); break; + case 0x1A: KYTY_NI("s_sext_i32_i16"); break; + case 0x1B: KYTY_NI("s_bitset0_b32"); break; + case 0x1C: KYTY_NI("s_bitset0_b64"); break; + case 0x1D: KYTY_NI("s_bitset1_b32"); break; + case 0x1E: KYTY_NI("s_bitset1_b64"); break; + case 0x1F: KYTY_NI("s_getpc_b64"); break; + case 0x20: + inst.type = ShaderInstructionType::SSetpcB64; + inst.format = ShaderInstructionFormat::Saddr; + inst.src[0].size = 2; + break; + case 0x21: + inst.type = ShaderInstructionType::SSwappcB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02; + inst.src[0].size = 2; + inst.dst.size = 2; + break; + case 0x22: KYTY_NI("s_rfe_b64"); break; + case 0x24: + inst.type = ShaderInstructionType::SAndSaveexecB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02; + inst.dst.size = 2; + inst.src[0].size = 2; + break; + case 0x25: KYTY_NI("s_or_saveexec_b64"); break; + case 0x26: KYTY_NI("s_xor_saveexec_b64"); break; + case 0x27: KYTY_NI("s_andn2_saveexec_b64"); break; + case 0x28: KYTY_NI("s_orn2_saveexec_b64"); break; + case 0x29: KYTY_NI("s_nand_saveexec_b64"); break; + case 0x2A: KYTY_NI("s_nor_saveexec_b64"); break; + case 0x2B: KYTY_NI("s_xnor_saveexec_b64"); break; + case 0x2C: KYTY_NI("s_quadmask_b32"); break; + case 0x2D: KYTY_NI("s_quadmask_b64"); break; + case 0x2E: KYTY_NI("s_movrels_b32"); break; + case 0x2F: KYTY_NI("s_movrels_b64"); break; + case 0x30: KYTY_NI("s_movreld_b32"); break; + case 0x31: KYTY_NI("s_movreld_b64"); break; + case 0x32: KYTY_NI("s_cbranch_join"); break; + case 0x33: KYTY_NI("s_mov_regrd_b32"); break; + case 0x34: KYTY_NI("s_abs_i32"); break; + case 0x35: KYTY_NI("s_mov_fed_b32"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_sop2) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("sop2"); + + uint32_t opcode = (buffer[0] >> 23u) & 0x7fu; + + switch (opcode) + { + case 0x7d: return shader_parse_sop1(pc, src, buffer, dst, next_gen); break; + case 0x7e: return shader_parse_sopc(pc, src, buffer, dst, next_gen); break; + case 0x7f: return shader_parse_sopp(pc, src, buffer, dst, next_gen); break; + default: break; + } + + if (opcode >= 0x60) + { + return shader_parse_sopk(pc, src, buffer, dst, next_gen); + } + + uint32_t ssrc1 = (buffer[0] >> 8u) & 0xffu; + uint32_t ssrc0 = (buffer[0] >> 0u) & 0xffu; + uint32_t sdst = (buffer[0] >> 16u) & 0x7fu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(ssrc0); + inst.src[1] = operand_parse(ssrc1); + inst.src_num = 2; + inst.dst = operand_parse(sdst); + + uint32_t size = 1; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + if (inst.src[1].type == ShaderOperandType::LiteralConstant) + { + inst.src[1].constant.u = buffer[size]; + size++; + } + + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; + + switch (opcode) + { + case 0x00: inst.type = ShaderInstructionType::SAddU32; break; + case 0x01: KYTY_NI("s_sub_u32"); break; + case 0x02: inst.type = ShaderInstructionType::SAddI32; break; + case 0x03: inst.type = ShaderInstructionType::SSubI32; break; + case 0x04: inst.type = ShaderInstructionType::SAddcU32; break; + case 0x05: KYTY_NI("s_subb_u32"); break; + case 0x06: KYTY_NI("s_min_i32"); break; + case 0x07: KYTY_NI("s_min_u32"); break; + case 0x08: KYTY_NI("s_max_i32"); break; + case 0x09: KYTY_NI("s_max_u32"); break; + case 0x0a: inst.type = ShaderInstructionType::SCselectB32; break; + case 0x0b: + inst.type = ShaderInstructionType::SCselectB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x0e: inst.type = ShaderInstructionType::SAndB32; break; + case 0x0f: + inst.type = ShaderInstructionType::SAndB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x10: inst.type = ShaderInstructionType::SOrB32; break; + case 0x11: + inst.type = ShaderInstructionType::SOrB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x12: KYTY_NI("s_xor_b32"); break; + case 0x13: + inst.type = ShaderInstructionType::SXorB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x14: KYTY_NI("s_andn2_b32"); break; + case 0x15: + inst.type = ShaderInstructionType::SAndn2B64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x16: KYTY_NI("s_orn2_b32"); break; + case 0x17: + inst.type = ShaderInstructionType::SOrn2B64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x18: KYTY_NI("s_nand_b32"); break; + case 0x19: + inst.type = ShaderInstructionType::SNandB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x1A: KYTY_NI("s_nor_b32"); break; + case 0x1b: + inst.type = ShaderInstructionType::SNorB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x1C: KYTY_NI("s_xnor_b32"); break; + case 0x1d: + inst.type = ShaderInstructionType::SXnorB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc12; + inst.dst.size = 2; + inst.src[0].size = 2; + inst.src[1].size = 2; + break; + case 0x1e: inst.type = ShaderInstructionType::SLshlB32; break; + case 0x1f: + inst.type = ShaderInstructionType::SLshlB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc1; + inst.dst.size = 2; + inst.src[0].size = 2; + break; + case 0x20: inst.type = ShaderInstructionType::SLshrB32; break; + case 0x21: + inst.type = ShaderInstructionType::SLshrB64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc1; + inst.dst.size = 2; + inst.src[0].size = 2; + break; + case 0x22: KYTY_NI("s_ashr_i32"); break; + case 0x23: KYTY_NI("s_ashr_i64"); break; + case 0x24: inst.type = ShaderInstructionType::SBfmB32; break; + case 0x25: KYTY_NI("s_bfm_b64"); break; + case 0x26: inst.type = ShaderInstructionType::SMulI32; break; + case 0x27: inst.type = ShaderInstructionType::SBfeU32; break; + case 0x28: KYTY_NI("s_bfe_i32"); break; + case 0x29: + inst.type = ShaderInstructionType::SBfeU64; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc1; + inst.dst.size = 2; + inst.src[0].size = 2; + break; + case 0x2A: KYTY_NI("s_bfe_i64"); break; + case 0x2B: KYTY_NI("s_cbranch_g_fork"); break; + case 0x2C: KYTY_NI("s_absdiff_i32"); break; + case 0x31: + EXIT_NOT_IMPLEMENTED(!next_gen); + inst.type = ShaderInstructionType::SLshl4AddU32; + break; + case 0x32: KYTY_NI("s_pack_ll_b32_b16"); break; + case 0x33: KYTY_NI("s_pack_lh_b32_b16"); break; + case 0x34: KYTY_NI("s_pack_hh_b32_b16"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity,readability-function-size,google-readability-function-size,hicpp-function-size) +KYTY_SHADER_PARSER(shader_parse_vopc) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("vopc"); + + uint32_t opcode = (buffer[0] >> 17u) & 0xffu; + uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; + uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(src0); + inst.src[1] = operand_parse(vsrc1 + 256); + inst.src_num = 2; + + uint32_t size = 1; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1; + inst.dst.type = ShaderOperandType::VccLo; + inst.dst.size = 2; + + switch (opcode) + { + case 0x00: inst.type = ShaderInstructionType::VCmpFF32; break; + case 0x01: inst.type = ShaderInstructionType::VCmpLtF32; break; + case 0x02: inst.type = ShaderInstructionType::VCmpEqF32; break; + case 0x03: inst.type = ShaderInstructionType::VCmpLeF32; break; + case 0x04: inst.type = ShaderInstructionType::VCmpGtF32; break; + case 0x05: inst.type = ShaderInstructionType::VCmpLgF32; break; + case 0x06: inst.type = ShaderInstructionType::VCmpGeF32; break; + case 0x07: inst.type = ShaderInstructionType::VCmpOF32; break; + case 0x08: inst.type = ShaderInstructionType::VCmpUF32; break; + case 0x09: inst.type = ShaderInstructionType::VCmpNgeF32; break; + case 0x0a: inst.type = ShaderInstructionType::VCmpNlgF32; break; + case 0x0b: inst.type = ShaderInstructionType::VCmpNgtF32; break; + case 0x0c: inst.type = ShaderInstructionType::VCmpNleF32; break; + case 0x0d: inst.type = ShaderInstructionType::VCmpNeqF32; break; + case 0x0e: inst.type = ShaderInstructionType::VCmpNltF32; break; + case 0x0f: inst.type = ShaderInstructionType::VCmpTruF32; break; + case 0x10: KYTY_NI("v_cmpx_f_f32"); break; + case 0x11: inst.type = ShaderInstructionType::VCmpxLtF32; break; + case 0x12: KYTY_NI("v_cmpx_eq_f32"); break; + case 0x13: KYTY_NI("v_cmpx_le_f32"); break; + case 0x14: inst.type = ShaderInstructionType::VCmpxGtF32; break; + case 0x15: KYTY_NI("v_cmpx_lg_f32"); break; + case 0x16: KYTY_NI("v_cmpx_ge_f32"); break; + case 0x17: KYTY_NI("v_cmpx_o_f32"); break; + case 0x18: KYTY_NI("v_cmpx_u_f32"); break; + case 0x19: KYTY_NI("v_cmpx_nge_f32"); break; + case 0x1A: KYTY_NI("v_cmpx_nlg_f32"); break; + case 0x1B: KYTY_NI("v_cmpx_ngt_f32"); break; + case 0x1C: KYTY_NI("v_cmpx_nle_f32"); break; + case 0x1d: inst.type = ShaderInstructionType::VCmpxNeqF32; break; + case 0x1E: KYTY_NI("v_cmpx_nlt_f32"); break; + case 0x1F: KYTY_NI("v_cmpx_tru_f32"); break; + case 0x20: KYTY_NI("v_cmp_f_f64"); break; + case 0x21: KYTY_NI("v_cmp_lt_f64"); break; + case 0x22: KYTY_NI("v_cmp_eq_f64"); break; + case 0x23: KYTY_NI("v_cmp_le_f64"); break; + case 0x24: KYTY_NI("v_cmp_gt_f64"); break; + case 0x25: KYTY_NI("v_cmp_lg_f64"); break; + case 0x26: KYTY_NI("v_cmp_ge_f64"); break; + case 0x27: KYTY_NI("v_cmp_o_f64"); break; + case 0x28: KYTY_NI("v_cmp_u_f64"); break; + case 0x29: KYTY_NI("v_cmp_nge_f64"); break; + case 0x2A: KYTY_NI("v_cmp_nlg_f64"); break; + case 0x2B: KYTY_NI("v_cmp_ngt_f64"); break; + case 0x2C: KYTY_NI("v_cmp_nle_f64"); break; + case 0x2D: KYTY_NI("v_cmp_neq_f64"); break; + case 0x2E: KYTY_NI("v_cmp_nlt_f64"); break; + case 0x2F: KYTY_NI("v_cmp_tru_f64"); break; + case 0x30: KYTY_NI("v_cmpx_f_f64"); break; + case 0x31: KYTY_NI("v_cmpx_lt_f64"); break; + case 0x32: KYTY_NI("v_cmpx_eq_f64"); break; + case 0x33: KYTY_NI("v_cmpx_le_f64"); break; + case 0x34: KYTY_NI("v_cmpx_gt_f64"); break; + case 0x35: KYTY_NI("v_cmpx_lg_f64"); break; + case 0x36: KYTY_NI("v_cmpx_ge_f64"); break; + case 0x37: KYTY_NI("v_cmpx_o_f64"); break; + case 0x38: KYTY_NI("v_cmpx_u_f64"); break; + case 0x39: KYTY_NI("v_cmpx_nge_f64"); break; + case 0x3A: KYTY_NI("v_cmpx_nlg_f64"); break; + case 0x3B: KYTY_NI("v_cmpx_ngt_f64"); break; + case 0x3C: KYTY_NI("v_cmpx_nle_f64"); break; + case 0x3D: KYTY_NI("v_cmpx_neq_f64"); break; + case 0x3E: KYTY_NI("v_cmpx_nlt_f64"); break; + case 0x3F: KYTY_NI("v_cmpx_tru_f64"); break; + case 0x40: KYTY_NI("v_cmps_f_f32"); break; + case 0x41: KYTY_NI("v_cmps_lt_f32"); break; + case 0x42: KYTY_NI("v_cmps_eq_f32"); break; + case 0x43: KYTY_NI("v_cmps_le_f32"); break; + case 0x44: KYTY_NI("v_cmps_gt_f32"); break; + case 0x45: KYTY_NI("v_cmps_lg_f32"); break; + case 0x46: KYTY_NI("v_cmps_ge_f32"); break; + case 0x47: KYTY_NI("v_cmps_o_f32"); break; + case 0x48: KYTY_NI("v_cmps_u_f32"); break; + case 0x49: KYTY_NI("v_cmps_nge_f32"); break; + case 0x4A: KYTY_NI("v_cmps_nlg_f32"); break; + case 0x4B: KYTY_NI("v_cmps_ngt_f32"); break; + case 0x4C: KYTY_NI("v_cmps_nle_f32"); break; + case 0x4D: KYTY_NI("v_cmps_neq_f32"); break; + case 0x4E: KYTY_NI("v_cmps_nlt_f32"); break; + case 0x4F: KYTY_NI("v_cmps_tru_f32"); break; + case 0x50: KYTY_NI("v_cmpsx_f_f32"); break; + case 0x51: KYTY_NI("v_cmpsx_lt_f32"); break; + case 0x52: KYTY_NI("v_cmpsx_eq_f32"); break; + case 0x53: KYTY_NI("v_cmpsx_le_f32"); break; + case 0x54: KYTY_NI("v_cmpsx_gt_f32"); break; + case 0x55: KYTY_NI("v_cmpsx_lg_f32"); break; + case 0x56: KYTY_NI("v_cmpsx_ge_f32"); break; + case 0x57: KYTY_NI("v_cmpsx_o_f32"); break; + case 0x58: KYTY_NI("v_cmpsx_u_f32"); break; + case 0x59: KYTY_NI("v_cmpsx_nge_f32"); break; + case 0x5A: KYTY_NI("v_cmpsx_nlg_f32"); break; + case 0x5B: KYTY_NI("v_cmpsx_ngt_f32"); break; + case 0x5C: KYTY_NI("v_cmpsx_nle_f32"); break; + case 0x5D: KYTY_NI("v_cmpsx_neq_f32"); break; + case 0x5E: KYTY_NI("v_cmpsx_nlt_f32"); break; + case 0x5F: KYTY_NI("v_cmpsx_tru_f32"); break; + case 0x60: KYTY_NI("v_cmps_f_f64"); break; + case 0x61: KYTY_NI("v_cmps_lt_f64"); break; + case 0x62: KYTY_NI("v_cmps_eq_f64"); break; + case 0x63: KYTY_NI("v_cmps_le_f64"); break; + case 0x64: KYTY_NI("v_cmps_gt_f64"); break; + case 0x65: KYTY_NI("v_cmps_lg_f64"); break; + case 0x66: KYTY_NI("v_cmps_ge_f64"); break; + case 0x67: KYTY_NI("v_cmps_o_f64"); break; + case 0x68: KYTY_NI("v_cmps_u_f64"); break; + case 0x69: KYTY_NI("v_cmps_nge_f64"); break; + case 0x6A: KYTY_NI("v_cmps_nlg_f64"); break; + case 0x6B: KYTY_NI("v_cmps_ngt_f64"); break; + case 0x6C: KYTY_NI("v_cmps_nle_f64"); break; + case 0x6D: KYTY_NI("v_cmps_neq_f64"); break; + case 0x6E: KYTY_NI("v_cmps_nlt_f64"); break; + case 0x6F: KYTY_NI("v_cmps_tru_f64"); break; + case 0x70: KYTY_NI("v_cmpsx_f_f64"); break; + case 0x71: KYTY_NI("v_cmpsx_lt_f64"); break; + case 0x72: KYTY_NI("v_cmpsx_eq_f64"); break; + case 0x73: KYTY_NI("v_cmpsx_le_f64"); break; + case 0x74: KYTY_NI("v_cmpsx_gt_f64"); break; + case 0x75: KYTY_NI("v_cmpsx_lg_f64"); break; + case 0x76: KYTY_NI("v_cmpsx_ge_f64"); break; + case 0x77: KYTY_NI("v_cmpsx_o_f64"); break; + case 0x78: KYTY_NI("v_cmpsx_u_f64"); break; + case 0x79: KYTY_NI("v_cmpsx_nge_f64"); break; + case 0x7A: KYTY_NI("v_cmpsx_nlg_f64"); break; + case 0x7B: KYTY_NI("v_cmpsx_ngt_f64"); break; + case 0x7C: KYTY_NI("v_cmpsx_nle_f64"); break; + case 0x7D: KYTY_NI("v_cmpsx_neq_f64"); break; + case 0x7E: KYTY_NI("v_cmpsx_nlt_f64"); break; + case 0x7F: KYTY_NI("v_cmpsx_tru_f64"); break; + case 0x80: inst.type = ShaderInstructionType::VCmpFI32; break; + case 0x81: inst.type = ShaderInstructionType::VCmpLtI32; break; + case 0x82: inst.type = ShaderInstructionType::VCmpEqI32; break; + case 0x83: inst.type = ShaderInstructionType::VCmpLeI32; break; + case 0x84: inst.type = ShaderInstructionType::VCmpGtI32; break; + case 0x85: inst.type = ShaderInstructionType::VCmpNeI32; break; + case 0x86: inst.type = ShaderInstructionType::VCmpGeI32; break; + case 0x87: inst.type = ShaderInstructionType::VCmpTI32; break; + case 0x88: KYTY_NI("v_cmp_class_f32"); break; + case 0x89: KYTY_NI("v_cmp_lt_i16"); break; + case 0x8A: KYTY_NI("v_cmp_eq_i16"); break; + case 0x8B: KYTY_NI("v_cmp_le_i16"); break; + case 0x8C: KYTY_NI("v_cmp_gt_i16"); break; + case 0x8D: KYTY_NI("v_cmp_ne_i16"); break; + case 0x8E: KYTY_NI("v_cmp_ge_i16"); break; + case 0x8F: KYTY_NI("v_cmp_class_f16"); break; + case 0x90: KYTY_NI("v_cmpx_f_i32"); break; + case 0x91: KYTY_NI("v_cmpx_lt_i32"); break; + case 0x92: KYTY_NI("v_cmpx_eq_i32"); break; + case 0x93: KYTY_NI("v_cmpx_le_i32"); break; + case 0x94: KYTY_NI("v_cmpx_gt_i32"); break; + case 0x95: KYTY_NI("v_cmpx_ne_i32"); break; + case 0x96: KYTY_NI("v_cmpx_ge_i32"); break; + case 0x97: KYTY_NI("v_cmpx_t_i32"); break; + case 0x98: KYTY_NI("v_cmpx_class_f32"); break; + case 0x99: KYTY_NI("v_cmpx_lt_i16"); break; + case 0x9A: KYTY_NI("v_cmpx_eq_i16"); break; + case 0x9B: KYTY_NI("v_cmpx_le_i16"); break; + case 0x9C: KYTY_NI("v_cmpx_gt_i16"); break; + case 0x9D: KYTY_NI("v_cmpx_ne_i16"); break; + case 0x9E: KYTY_NI("v_cmpx_ge_i16"); break; + case 0x9F: KYTY_NI("v_cmpx_class_f16"); break; + case 0xA0: KYTY_NI("v_cmp_f_i64"); break; + case 0xA1: KYTY_NI("v_cmp_lt_i64"); break; + case 0xA2: KYTY_NI("v_cmp_eq_i64"); break; + case 0xA3: KYTY_NI("v_cmp_le_i64"); break; + case 0xA4: KYTY_NI("v_cmp_gt_i64"); break; + case 0xA5: KYTY_NI("v_cmp_ne_i64"); break; + case 0xA6: KYTY_NI("v_cmp_ge_i64"); break; + case 0xA7: KYTY_NI("v_cmp_t_i64"); break; + case 0xA8: KYTY_NI("v_cmp_class_f64"); break; + case 0xA9: KYTY_NI("v_cmp_lt_u16"); break; + case 0xAA: KYTY_NI("v_cmp_eq_u16"); break; + case 0xAB: KYTY_NI("v_cmp_le_u16"); break; + case 0xAC: KYTY_NI("v_cmp_gt_u16"); break; + case 0xAD: KYTY_NI("v_cmp_ne_u16"); break; + case 0xAE: KYTY_NI("v_cmp_ge_u16"); break; + case 0xB0: KYTY_NI("v_cmpx_f_i64"); break; + case 0xB1: KYTY_NI("v_cmpx_lt_i64"); break; + case 0xB2: KYTY_NI("v_cmpx_eq_i64"); break; + case 0xB3: KYTY_NI("v_cmpx_le_i64"); break; + case 0xB4: KYTY_NI("v_cmpx_gt_i64"); break; + case 0xB5: KYTY_NI("v_cmpx_ne_i64"); break; + case 0xB6: KYTY_NI("v_cmpx_ge_i64"); break; + case 0xB7: KYTY_NI("v_cmpx_t_i64"); break; + case 0xB8: KYTY_NI("v_cmpx_class_f64"); break; + case 0xB9: KYTY_NI("v_cmpx_lt_u16"); break; + case 0xBA: KYTY_NI("v_cmpx_eq_u16"); break; + case 0xBB: KYTY_NI("v_cmpx_le_u16"); break; + case 0xBC: KYTY_NI("v_cmpx_gt_u16"); break; + case 0xBD: KYTY_NI("v_cmpx_ne_u16"); break; + case 0xBE: KYTY_NI("v_cmpx_ge_u16"); break; + case 0xc0: inst.type = ShaderInstructionType::VCmpFU32; break; + case 0xc1: inst.type = ShaderInstructionType::VCmpLtU32; break; + case 0xc2: inst.type = ShaderInstructionType::VCmpEqU32; break; + case 0xc3: inst.type = ShaderInstructionType::VCmpLeU32; break; + case 0xc4: inst.type = ShaderInstructionType::VCmpGtU32; break; + case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break; + case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break; + case 0xc7: inst.type = ShaderInstructionType::VCmpTU32; break; + case 0xC8: KYTY_NI("v_cmp_f_f16"); break; + case 0xC9: KYTY_NI("v_cmp_lt_f16"); break; + case 0xCA: KYTY_NI("v_cmp_eq_f16"); break; + case 0xCB: KYTY_NI("v_cmp_le_f16"); break; + case 0xCC: KYTY_NI("v_cmp_gt_f16"); break; + case 0xCD: KYTY_NI("v_cmp_lg_f16"); break; + case 0xCE: KYTY_NI("v_cmp_ge_f16"); break; + case 0xCF: KYTY_NI("v_cmp_o_f16"); break; + case 0xD0: KYTY_NI("v_cmpx_f_u32"); break; + case 0xD1: KYTY_NI("v_cmpx_lt_u32"); break; + case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break; + case 0xD3: KYTY_NI("v_cmpx_le_u32"); break; + case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break; + case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break; + case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break; + case 0xD7: KYTY_NI("v_cmpx_t_u32"); break; + case 0xD8: KYTY_NI("v_cmpx_f_f16"); break; + case 0xD9: KYTY_NI("v_cmpx_lt_f16"); break; + case 0xDA: KYTY_NI("v_cmpx_eq_f16"); break; + case 0xDB: KYTY_NI("v_cmpx_le_f16"); break; + case 0xDC: KYTY_NI("v_cmpx_gt_f16"); break; + case 0xDD: KYTY_NI("v_cmpx_lg_f16"); break; + case 0xDE: KYTY_NI("v_cmpx_ge_f16"); break; + case 0xDF: KYTY_NI("v_cmpx_o_f16"); break; + case 0xE0: KYTY_NI("v_cmp_f_u64"); break; + case 0xE1: KYTY_NI("v_cmp_lt_u64"); break; + case 0xE2: KYTY_NI("v_cmp_eq_u64"); break; + case 0xE3: KYTY_NI("v_cmp_le_u64"); break; + case 0xE4: KYTY_NI("v_cmp_gt_u64"); break; + case 0xE5: KYTY_NI("v_cmp_ne_u64"); break; + case 0xE6: KYTY_NI("v_cmp_ge_u64"); break; + case 0xE7: KYTY_NI("v_cmp_t_u64"); break; + case 0xE8: KYTY_NI("v_cmp_u_f16"); break; + case 0xE9: KYTY_NI("v_cmp_nge_f16"); break; + case 0xEA: KYTY_NI("v_cmp_nlg_f16"); break; + case 0xEB: KYTY_NI("v_cmp_ngt_f16"); break; + case 0xEC: KYTY_NI("v_cmp_nle_f16"); break; + case 0xED: KYTY_NI("v_cmp_neq_f16"); break; + case 0xEE: KYTY_NI("v_cmp_nlt_f16"); break; + case 0xEF: KYTY_NI("v_cmp_tru_f16"); break; + case 0xF0: KYTY_NI("v_cmpx_f_u64"); break; + case 0xF1: KYTY_NI("v_cmpx_lt_u64"); break; + case 0xF2: KYTY_NI("v_cmpx_eq_u64"); break; + case 0xF3: KYTY_NI("v_cmpx_le_u64"); break; + case 0xF4: KYTY_NI("v_cmpx_gt_u64"); break; + case 0xF5: KYTY_NI("v_cmpx_ne_u64"); break; + case 0xF6: KYTY_NI("v_cmpx_ge_u64"); break; + case 0xF7: KYTY_NI("v_cmpx_t_u64"); break; + case 0xF8: KYTY_NI("v_cmpx_u_f16"); break; + case 0xF9: KYTY_NI("v_cmpx_nge_f16"); break; + case 0xFA: KYTY_NI("v_cmpx_nlg_f16"); break; + case 0xFB: KYTY_NI("v_cmpx_ngt_f16"); break; + case 0xFC: KYTY_NI("v_cmpx_nle_f16"); break; + case 0xFD: KYTY_NI("v_cmpx_neq_f16"); break; + case 0xFE: KYTY_NI("v_cmpx_nlt_f16"); break; + case 0xFF: KYTY_NI("v_cmpx_tru_f16"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_vop1) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("vop1"); + + uint32_t vdst = (buffer[0] >> 17u) & 0xffu; + uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; + uint32_t opcode = (buffer[0] >> 9u) & 0xffu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(src0); + inst.dst = operand_parse(vdst + 256); + inst.src_num = 1; + + uint32_t size = 1; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + inst.format = ShaderInstructionFormat::SVdstSVsrc0; + + switch (opcode) + { + case 0x00: KYTY_NI("v_nop"); break; + case 0x01: inst.type = ShaderInstructionType::VMovB32; break; + case 0x02: KYTY_NI("v_readfirstlane_b32"); break; + case 0x03: KYTY_NI("v_cvt_i32_f64"); break; + case 0x04: KYTY_NI("v_cvt_f64_i32"); break; + case 0x05: inst.type = ShaderInstructionType::VCvtF32I32; break; + case 0x06: inst.type = ShaderInstructionType::VCvtF32U32; break; + case 0x07: inst.type = ShaderInstructionType::VCvtU32F32; break; + case 0x08: KYTY_NI("v_cvt_i32_f32"); break; + case 0x09: KYTY_NI("v_mov_fed_b32"); break; + case 0x0A: KYTY_NI("v_cvt_f16_f32"); break; + case 0x0b: inst.type = ShaderInstructionType::VCvtF32F16; break; + case 0x0C: KYTY_NI("v_cvt_rpi_i32_f32"); break; + case 0x0D: KYTY_NI("v_cvt_flr_i32_f32"); break; + case 0x0E: KYTY_NI("v_cvt_off_f32_i4"); break; + case 0x0F: KYTY_NI("v_cvt_f32_f64"); break; + case 0x10: KYTY_NI("v_cvt_f64_f32"); break; + case 0x11: inst.type = ShaderInstructionType::VCvtF32Ubyte0; break; + case 0x12: inst.type = ShaderInstructionType::VCvtF32Ubyte1; break; + case 0x13: inst.type = ShaderInstructionType::VCvtF32Ubyte2; break; + case 0x14: inst.type = ShaderInstructionType::VCvtF32Ubyte3; break; + case 0x15: KYTY_NI("v_cvt_u32_f64"); break; + case 0x16: KYTY_NI("v_cvt_f64_u32"); break; + case 0x17: KYTY_NI("v_trunc_f64"); break; + case 0x18: KYTY_NI("v_ceil_f64"); break; + case 0x19: KYTY_NI("v_rndne_f64"); break; + case 0x1A: KYTY_NI("v_floor_f64"); break; + case 0x20: inst.type = ShaderInstructionType::VFractF32; break; + case 0x21: inst.type = ShaderInstructionType::VTruncF32; break; + case 0x22: inst.type = ShaderInstructionType::VCeilF32; break; + case 0x23: inst.type = ShaderInstructionType::VRndneF32; break; + case 0x24: inst.type = ShaderInstructionType::VFloorF32; break; + case 0x25: inst.type = ShaderInstructionType::VExpF32; break; + case 0x26: KYTY_NI("v_log_clamp_f32"); break; + case 0x27: inst.type = ShaderInstructionType::VLogF32; break; + case 0x28: KYTY_NI("v_rcp_clamp_f32"); break; + case 0x29: KYTY_NI("v_rcp_legacy_f32"); break; + case 0x2a: inst.type = ShaderInstructionType::VRcpF32; break; + case 0x2B: KYTY_NI("v_rcp_iflag_f32"); break; + case 0x2C: KYTY_NI("v_rsq_clamp_f32"); break; + case 0x2D: KYTY_NI("v_rsq_legacy_f32"); break; + case 0x2e: inst.type = ShaderInstructionType::VRsqF32; break; + case 0x2F: KYTY_NI("v_rcp_f64"); break; + case 0x30: KYTY_NI("v_rcp_clamp_f64"); break; + case 0x31: KYTY_NI("v_rsq_f64"); break; + case 0x32: KYTY_NI("v_rsq_clamp_f64"); break; + case 0x33: inst.type = ShaderInstructionType::VSqrtF32; break; + case 0x34: KYTY_NI("v_sqrt_f64"); break; + case 0x35: inst.type = ShaderInstructionType::VSinF32; break; + case 0x36: inst.type = ShaderInstructionType::VCosF32; break; + case 0x37: inst.type = ShaderInstructionType::VNotB32; break; + case 0x38: inst.type = ShaderInstructionType::VBfrevB32; break; + case 0x39: KYTY_NI("v_ffbh_u32"); break; + case 0x3A: KYTY_NI("v_ffbl_b32"); break; + case 0x3B: KYTY_NI("v_ffbh_i32"); break; + case 0x3C: KYTY_NI("v_frexp_exp_i32_f64"); break; + case 0x3D: KYTY_NI("v_frexp_mant_f64"); break; + case 0x3E: KYTY_NI("v_fract_f64"); break; + case 0x3F: KYTY_NI("v_frexp_exp_i32_f32"); break; + case 0x40: KYTY_NI("v_frexp_mant_f32"); break; + case 0x41: KYTY_NI("v_clrexcp"); break; + case 0x42: KYTY_NI("v_movreld_b32"); break; + case 0x43: KYTY_NI("v_movrels_b32"); break; + case 0x44: KYTY_NI("v_movrelsd_b32"); break; + case 0x45: KYTY_NI("v_log_legacy_f32"); break; + case 0x46: KYTY_NI("v_exp_legacy_f32"); break; + case 0x50: KYTY_NI("v_cvt_f16_u16"); break; + case 0x51: KYTY_NI("v_cvt_f16_i16"); break; + case 0x52: KYTY_NI("v_cvt_u16_f16"); break; + case 0x53: KYTY_NI("v_cvt_i16_f16"); break; + case 0x54: KYTY_NI("v_rcp_f16"); break; + case 0x55: KYTY_NI("v_sqrt_f16"); break; + case 0x56: KYTY_NI("v_rsq_f16"); break; + case 0x57: KYTY_NI("v_log_f16"); break; + case 0x58: KYTY_NI("v_exp_f16"); break; + case 0x59: KYTY_NI("v_frexp_mant_f16"); break; + case 0x5A: KYTY_NI("v_frexp_exp_i16_f16"); break; + case 0x5B: KYTY_NI("v_floor_f16"); break; + case 0x5C: KYTY_NI("v_ceil_f16"); break; + case 0x5D: KYTY_NI("v_trunc_f16"); break; + case 0x5E: KYTY_NI("v_rndne_f16"); break; + case 0x5F: KYTY_NI("v_fract_f16"); break; + case 0x60: KYTY_NI("v_sin_f16"); break; + case 0x61: KYTY_NI("v_cos_f16"); break; + case 0x62: KYTY_NI("v_sat_pk_u8_i16"); break; + case 0x63: KYTY_NI("v_cvt_norm_i16_f16"); break; + case 0x64: KYTY_NI("v_cvt_norm_u16_f16"); break; + case 0x65: KYTY_NI("v_swap_b32"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_vop2) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("vop2"); + + uint32_t opcode = (buffer[0] >> 25u) & 0x3fu; + + switch (opcode) + { + case 0x3e: return shader_parse_vopc(pc, src, buffer, dst, next_gen); break; + case 0x3f: return shader_parse_vop1(pc, src, buffer, dst, next_gen); break; + default: break; + } + + uint32_t vdst = (buffer[0] >> 17u) & 0xffu; + uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; + uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(src0); + inst.src[1] = operand_parse(vsrc1 + 256); + inst.dst = operand_parse(vdst + 256); + inst.src_num = 2; + + uint32_t size = 1; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; + + switch (opcode) + { + case 0x00: + EXIT_NOT_IMPLEMENTED(next_gen); + inst.type = ShaderInstructionType::VCndmaskB32; + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; + inst.src[2].type = ShaderOperandType::VccLo; + inst.src[2].size = 2; + inst.src_num = 3; + break; + case 0x01: + if (next_gen) + { + inst.type = ShaderInstructionType::VCndmaskB32; + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; + inst.src[2].type = ShaderOperandType::VccLo; + inst.src[2].size = 2; + inst.src_num = 3; + } else + { + KYTY_NI("v_readlane_b32"); + }; + break; + case 0x02: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_writelane_b32"); + }; + break; + case 0x03: inst.type = ShaderInstructionType::VAddF32; break; + case 0x04: inst.type = ShaderInstructionType::VSubF32; break; + case 0x05: inst.type = ShaderInstructionType::VSubrevF32; break; + case 0x06: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_mac_legacy_f32") + }; + break; + case 0x07: KYTY_NI("v_mul_legacy_f32"); break; + case 0x08: inst.type = ShaderInstructionType::VMulF32; break; + case 0x09: KYTY_NI("v_mul_i32_i24"); break; + case 0x0A: KYTY_NI("v_mul_hi_i32_i24"); break; + case 0x0C: KYTY_NI("v_mul_hi_u32_u24"); break; + case 0x0D: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_min_legacy_f32") + }; + break; + case 0x0E: KYTY_NI("v_max_legacy_f32"); break; + case 0x0b: inst.type = ShaderInstructionType::VMulU32U24; break; + case 0x0f: inst.type = ShaderInstructionType::VMinF32; break; + case 0x10: inst.type = ShaderInstructionType::VMaxF32; break; + case 0x11: KYTY_NI("v_min_i32"); break; + case 0x12: KYTY_NI("v_max_i32"); break; + case 0x13: KYTY_NI("v_min_u32"); break; + case 0x14: KYTY_NI("v_max_u32"); break; + case 0x15: inst.type = ShaderInstructionType::VLshrB32; break; + case 0x16: inst.type = ShaderInstructionType::VLshrrevB32; break; + case 0x17: inst.type = ShaderInstructionType::VAshrI32; break; + case 0x18: inst.type = ShaderInstructionType::VAshrrevI32; break; + case 0x19: inst.type = ShaderInstructionType::VLshlB32; break; + case 0x1a: inst.type = ShaderInstructionType::VLshlrevB32; break; + case 0x1b: inst.type = ShaderInstructionType::VAndB32; break; + case 0x1c: inst.type = ShaderInstructionType::VOrB32; break; + case 0x1d: inst.type = ShaderInstructionType::VXorB32; break; + case 0x1E: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VBfmB32; + }; + break; + case 0x1f: inst.type = ShaderInstructionType::VMacF32; break; + case 0x20: + inst.type = ShaderInstructionType::VMadmkF32; + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2; + inst.src_num = 3; + inst.src[2] = inst.src[1]; + inst.src[1].type = ShaderOperandType::LiteralConstant; + inst.src[1].constant.u = buffer[size]; + inst.src[1].size = 0; + size++; + break; + case 0x21: + inst.type = ShaderInstructionType::VMadakF32; + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2; + inst.src_num = 3; + inst.src[2].type = ShaderOperandType::LiteralConstant; + inst.src[2].constant.u = buffer[size]; + inst.src[2].size = 0; + size++; + break; + case 0x22: inst.type = ShaderInstructionType::VBcntU32B32; break; + case 0x23: inst.type = ShaderInstructionType::VMbcntLoU32B32; break; + case 0x24: inst.type = ShaderInstructionType::VMbcntHiU32B32; break; + case 0x25: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VAddI32; + inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; + inst.dst2.type = ShaderOperandType::VccLo; + inst.dst2.size = 2; + }; + break; + case 0x26: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VSubI32; + inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; + inst.dst2.type = ShaderOperandType::VccLo; + inst.dst2.size = 2; + }; + break; + case 0x27: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VSubrevI32; + inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; + inst.dst2.type = ShaderOperandType::VccLo; + inst.dst2.size = 2; + }; + break; + case 0x28: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_addc_u32") + }; + break; + case 0x29: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_subb_u32") + }; + break; + case 0x2A: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_subbrev_u32") + }; + break; + case 0x2B: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_ldexp_f32") + }; + break; + case 0x2C: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_cvt_pkaccum_u8_f32") + }; + break; + case 0x2D: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_cvt_pknorm_i16_f32") + }; + break; + case 0x2E: KYTY_NI("v_cvt_pknorm_u16_f32"); break; + case 0x2f: inst.type = ShaderInstructionType::VCvtPkrtzF16F32; break; + case 0x30: KYTY_NI("v_cvt_pk_u16_u32"); break; + case 0x31: KYTY_NI("v_cvt_pk_i16_i32"); break; + case 0x32: KYTY_NI("v_add_f16"); break; + case 0x33: KYTY_NI("v_sub_f16"); break; + case 0x34: KYTY_NI("v_subrev_f16"); break; + case 0x35: KYTY_NI("v_mul_f16"); break; + case 0x36: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_mac_f16") + }; + break; + case 0x37: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_madmk_f16") + }; + break; + case 0x38: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_madak_f16") + }; + break; + case 0x39: KYTY_NI("v_max_f16"); break; + case 0x3A: KYTY_NI("v_min_f16"); break; + case 0x3B: KYTY_NI("v_ldexp_f16"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity,readability-function-size,google-readability-function-size,hicpp-function-size) +KYTY_SHADER_PARSER(shader_parse_vop3) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("vop3"); + + uint32_t opcode = (next_gen ? (buffer[0] >> 16u) & 0x3ffu : (buffer[0] >> 17u) & 0x1ffu); + uint32_t clamp = (next_gen ? (buffer[0] >> 15u) & 0x1u : (buffer[0] >> 11u) & 0x1u); + uint32_t op_sel = (next_gen ? (buffer[0] >> 11u) & 0xfu : 0); + uint32_t abs = (buffer[0] >> 8u) & 0x7u; + uint32_t vdst = (buffer[0] >> 0u) & 0xffu; + uint32_t sdst = (buffer[0] >> 8u) & 0x7fu; + uint32_t neg = (buffer[1] >> 29u) & 0x7u; + uint32_t omod = (buffer[1] >> 27u) & 0x3u; + uint32_t src0 = (buffer[1] >> 0u) & 0x1ffu; + uint32_t src1 = (buffer[1] >> 9u) & 0x1ffu; + uint32_t src2 = (buffer[1] >> 18u) & 0x1ffu; + + EXIT_NOT_IMPLEMENTED(op_sel != 0); + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(src0); + inst.src[1] = operand_parse(src1); + inst.src[2] = operand_parse(src2); + inst.src_num = 3; + inst.dst = operand_parse(vdst + 256); + + switch (omod) + { + case 0: inst.dst.multiplier = 1.0f; break; + case 1: inst.dst.multiplier = 2.0f; break; + case 2: inst.dst.multiplier = 4.0f; break; + case 3: inst.dst.multiplier = 0.5f; break; + default: break; + } + + if ((neg & 0x1u) != 0) + { + inst.src[0].negate = true; + } + if ((neg & 0x2u) != 0) + { + inst.src[1].negate = true; + } + if ((neg & 0x4u) != 0) + { + inst.src[2].negate = true; + } + + uint32_t size = 2; + + if (inst.src[0].type == ShaderOperandType::LiteralConstant) + { + inst.src[0].constant.u = buffer[size]; + size++; + } + + if (inst.src[1].type == ShaderOperandType::LiteralConstant) + { + inst.src[1].constant.u = buffer[size]; + size++; + } + + if (inst.src[2].type == ShaderOperandType::LiteralConstant) + { + inst.src[2].constant.u = buffer[size]; + size++; + } + + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Vsrc2; + + if (opcode >= 0 && opcode <= 0xff) + { + /* VOPC using VOP3 encoding */ + inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1; + inst.src_num = 2; + inst.dst = operand_parse(vdst); + inst.dst.size = 2; + } + + if (opcode >= 0x100 && opcode <= 0x13d) + { + /* VOP2 using VOP3 encoding */ + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; + inst.src_num = 2; + } + + if (opcode >= 0x180 && opcode <= 0x1e8) + { + /* VOP1 using VOP3 encoding */ + inst.format = ShaderInstructionFormat::SVdstSVsrc0; + inst.src_num = 1; + } + + switch (opcode) + { + /* VOPC using VOP3 encoding */ + case 0x00: inst.type = ShaderInstructionType::VCmpFF32; break; + case 0x01: inst.type = ShaderInstructionType::VCmpLtF32; break; + case 0x02: inst.type = ShaderInstructionType::VCmpEqF32; break; + case 0x03: inst.type = ShaderInstructionType::VCmpLeF32; break; + case 0x04: inst.type = ShaderInstructionType::VCmpGtF32; break; + case 0x05: inst.type = ShaderInstructionType::VCmpLgF32; break; + case 0x06: inst.type = ShaderInstructionType::VCmpGeF32; break; + case 0x07: inst.type = ShaderInstructionType::VCmpOF32; break; + case 0x08: inst.type = ShaderInstructionType::VCmpUF32; break; + case 0x09: inst.type = ShaderInstructionType::VCmpNgeF32; break; + case 0x0a: inst.type = ShaderInstructionType::VCmpNlgF32; break; + case 0x0b: inst.type = ShaderInstructionType::VCmpNgtF32; break; + case 0x0c: inst.type = ShaderInstructionType::VCmpNleF32; break; + case 0x0d: inst.type = ShaderInstructionType::VCmpNeqF32; break; + case 0x0e: inst.type = ShaderInstructionType::VCmpNltF32; break; + case 0x0f: inst.type = ShaderInstructionType::VCmpTruF32; break; + case 0x10: KYTY_NI("v_cmpx_f_f32"); break; + case 0x11: KYTY_NI("v_cmpx_lt_f32"); break; + case 0x12: KYTY_NI("v_cmpx_eq_f32"); break; + case 0x13: KYTY_NI("v_cmpx_le_f32"); break; + case 0x14: KYTY_NI("v_cmpx_gt_f32"); break; + case 0x15: KYTY_NI("v_cmpx_lg_f32"); break; + case 0x16: KYTY_NI("v_cmpx_ge_f32"); break; + case 0x17: KYTY_NI("v_cmpx_o_f32"); break; + case 0x18: KYTY_NI("v_cmpx_u_f32"); break; + case 0x19: KYTY_NI("v_cmpx_nge_f32"); break; + case 0x1A: KYTY_NI("v_cmpx_nlg_f32"); break; + case 0x1B: KYTY_NI("v_cmpx_ngt_f32"); break; + case 0x1C: KYTY_NI("v_cmpx_nle_f32"); break; + case 0x1d: inst.type = ShaderInstructionType::VCmpxNeqF32; break; + case 0x1E: KYTY_NI("v_cmpx_nlt_f32"); break; + case 0x1F: KYTY_NI("v_cmpx_tru_f32"); break; + case 0x20: KYTY_NI("v_cmp_f_f64"); break; + case 0x21: KYTY_NI("v_cmp_lt_f64"); break; + case 0x22: KYTY_NI("v_cmp_eq_f64"); break; + case 0x23: KYTY_NI("v_cmp_le_f64"); break; + case 0x24: KYTY_NI("v_cmp_gt_f64"); break; + case 0x25: KYTY_NI("v_cmp_lg_f64"); break; + case 0x26: KYTY_NI("v_cmp_ge_f64"); break; + case 0x27: KYTY_NI("v_cmp_o_f64"); break; + case 0x28: KYTY_NI("v_cmp_u_f64"); break; + case 0x29: KYTY_NI("v_cmp_nge_f64"); break; + case 0x2A: KYTY_NI("v_cmp_nlg_f64"); break; + case 0x2B: KYTY_NI("v_cmp_ngt_f64"); break; + case 0x2C: KYTY_NI("v_cmp_nle_f64"); break; + case 0x2D: KYTY_NI("v_cmp_neq_f64"); break; + case 0x2E: KYTY_NI("v_cmp_nlt_f64"); break; + case 0x2F: KYTY_NI("v_cmp_tru_f64"); break; + case 0x30: KYTY_NI("v_cmpx_f_f64"); break; + case 0x31: KYTY_NI("v_cmpx_lt_f64"); break; + case 0x32: KYTY_NI("v_cmpx_eq_f64"); break; + case 0x33: KYTY_NI("v_cmpx_le_f64"); break; + case 0x34: KYTY_NI("v_cmpx_gt_f64"); break; + case 0x35: KYTY_NI("v_cmpx_lg_f64"); break; + case 0x36: KYTY_NI("v_cmpx_ge_f64"); break; + case 0x37: KYTY_NI("v_cmpx_o_f64"); break; + case 0x38: KYTY_NI("v_cmpx_u_f64"); break; + case 0x39: KYTY_NI("v_cmpx_nge_f64"); break; + case 0x3A: KYTY_NI("v_cmpx_nlg_f64"); break; + case 0x3B: KYTY_NI("v_cmpx_ngt_f64"); break; + case 0x3C: KYTY_NI("v_cmpx_nle_f64"); break; + case 0x3D: KYTY_NI("v_cmpx_neq_f64"); break; + case 0x3E: KYTY_NI("v_cmpx_nlt_f64"); break; + case 0x3F: KYTY_NI("v_cmpx_tru_f64"); break; + case 0x40: KYTY_NI("v_cmps_f_f32"); break; + case 0x41: KYTY_NI("v_cmps_lt_f32"); break; + case 0x42: KYTY_NI("v_cmps_eq_f32"); break; + case 0x43: KYTY_NI("v_cmps_le_f32"); break; + case 0x44: KYTY_NI("v_cmps_gt_f32"); break; + case 0x45: KYTY_NI("v_cmps_lg_f32"); break; + case 0x46: KYTY_NI("v_cmps_ge_f32"); break; + case 0x47: KYTY_NI("v_cmps_o_f32"); break; + case 0x48: KYTY_NI("v_cmps_u_f32"); break; + case 0x49: KYTY_NI("v_cmps_nge_f32"); break; + case 0x4A: KYTY_NI("v_cmps_nlg_f32"); break; + case 0x4B: KYTY_NI("v_cmps_ngt_f32"); break; + case 0x4C: KYTY_NI("v_cmps_nle_f32"); break; + case 0x4D: KYTY_NI("v_cmps_neq_f32"); break; + case 0x4E: KYTY_NI("v_cmps_nlt_f32"); break; + case 0x4F: KYTY_NI("v_cmps_tru_f32"); break; + case 0x50: KYTY_NI("v_cmpsx_f_f32"); break; + case 0x51: KYTY_NI("v_cmpsx_lt_f32"); break; + case 0x52: KYTY_NI("v_cmpsx_eq_f32"); break; + case 0x53: KYTY_NI("v_cmpsx_le_f32"); break; + case 0x54: KYTY_NI("v_cmpsx_gt_f32"); break; + case 0x55: KYTY_NI("v_cmpsx_lg_f32"); break; + case 0x56: KYTY_NI("v_cmpsx_ge_f32"); break; + case 0x57: KYTY_NI("v_cmpsx_o_f32"); break; + case 0x58: KYTY_NI("v_cmpsx_u_f32"); break; + case 0x59: KYTY_NI("v_cmpsx_nge_f32"); break; + case 0x5A: KYTY_NI("v_cmpsx_nlg_f32"); break; + case 0x5B: KYTY_NI("v_cmpsx_ngt_f32"); break; + case 0x5C: KYTY_NI("v_cmpsx_nle_f32"); break; + case 0x5D: KYTY_NI("v_cmpsx_neq_f32"); break; + case 0x5E: KYTY_NI("v_cmpsx_nlt_f32"); break; + case 0x5F: KYTY_NI("v_cmpsx_tru_f32"); break; + case 0x60: KYTY_NI("v_cmps_f_f64"); break; + case 0x61: KYTY_NI("v_cmps_lt_f64"); break; + case 0x62: KYTY_NI("v_cmps_eq_f64"); break; + case 0x63: KYTY_NI("v_cmps_le_f64"); break; + case 0x64: KYTY_NI("v_cmps_gt_f64"); break; + case 0x65: KYTY_NI("v_cmps_lg_f64"); break; + case 0x66: KYTY_NI("v_cmps_ge_f64"); break; + case 0x67: KYTY_NI("v_cmps_o_f64"); break; + case 0x68: KYTY_NI("v_cmps_u_f64"); break; + case 0x69: KYTY_NI("v_cmps_nge_f64"); break; + case 0x6A: KYTY_NI("v_cmps_nlg_f64"); break; + case 0x6B: KYTY_NI("v_cmps_ngt_f64"); break; + case 0x6C: KYTY_NI("v_cmps_nle_f64"); break; + case 0x6D: KYTY_NI("v_cmps_neq_f64"); break; + case 0x6E: KYTY_NI("v_cmps_nlt_f64"); break; + case 0x6F: KYTY_NI("v_cmps_tru_f64"); break; + case 0x70: KYTY_NI("v_cmpsx_f_f64"); break; + case 0x71: KYTY_NI("v_cmpsx_lt_f64"); break; + case 0x72: KYTY_NI("v_cmpsx_eq_f64"); break; + case 0x73: KYTY_NI("v_cmpsx_le_f64"); break; + case 0x74: KYTY_NI("v_cmpsx_gt_f64"); break; + case 0x75: KYTY_NI("v_cmpsx_lg_f64"); break; + case 0x76: KYTY_NI("v_cmpsx_ge_f64"); break; + case 0x77: KYTY_NI("v_cmpsx_o_f64"); break; + case 0x78: KYTY_NI("v_cmpsx_u_f64"); break; + case 0x79: KYTY_NI("v_cmpsx_nge_f64"); break; + case 0x7A: KYTY_NI("v_cmpsx_nlg_f64"); break; + case 0x7B: KYTY_NI("v_cmpsx_ngt_f64"); break; + case 0x7C: KYTY_NI("v_cmpsx_nle_f64"); break; + case 0x7D: KYTY_NI("v_cmpsx_neq_f64"); break; + case 0x7E: KYTY_NI("v_cmpsx_nlt_f64"); break; + case 0x7F: KYTY_NI("v_cmpsx_tru_f64"); break; + case 0x80: inst.type = ShaderInstructionType::VCmpFI32; break; + case 0x81: inst.type = ShaderInstructionType::VCmpLtI32; break; + case 0x82: inst.type = ShaderInstructionType::VCmpEqI32; break; + case 0x83: inst.type = ShaderInstructionType::VCmpLeI32; break; + case 0x84: inst.type = ShaderInstructionType::VCmpGtI32; break; + case 0x85: inst.type = ShaderInstructionType::VCmpNeI32; break; + case 0x86: inst.type = ShaderInstructionType::VCmpGeI32; break; + case 0x87: inst.type = ShaderInstructionType::VCmpTI32; break; + case 0x88: KYTY_NI("v_cmp_class_f32"); break; + case 0x89: KYTY_NI("v_cmp_lt_i16"); break; + case 0x8A: KYTY_NI("v_cmp_eq_i16"); break; + case 0x8B: KYTY_NI("v_cmp_le_i16"); break; + case 0x8C: KYTY_NI("v_cmp_gt_i16"); break; + case 0x8D: KYTY_NI("v_cmp_ne_i16"); break; + case 0x8E: KYTY_NI("v_cmp_ge_i16"); break; + case 0x8F: KYTY_NI("v_cmp_class_f16"); break; + case 0x90: KYTY_NI("v_cmpx_f_i32"); break; + case 0x91: KYTY_NI("v_cmpx_lt_i32"); break; + case 0x92: KYTY_NI("v_cmpx_eq_i32"); break; + case 0x93: KYTY_NI("v_cmpx_le_i32"); break; + case 0x94: KYTY_NI("v_cmpx_gt_i32"); break; + case 0x95: KYTY_NI("v_cmpx_ne_i32"); break; + case 0x96: KYTY_NI("v_cmpx_ge_i32"); break; + case 0x97: KYTY_NI("v_cmpx_t_i32"); break; + case 0x98: KYTY_NI("v_cmpx_class_f32"); break; + case 0x99: KYTY_NI("v_cmpx_lt_i16"); break; + case 0x9A: KYTY_NI("v_cmpx_eq_i16"); break; + case 0x9B: KYTY_NI("v_cmpx_le_i16"); break; + case 0x9C: KYTY_NI("v_cmpx_gt_i16"); break; + case 0x9D: KYTY_NI("v_cmpx_ne_i16"); break; + case 0x9E: KYTY_NI("v_cmpx_ge_i16"); break; + case 0x9F: KYTY_NI("v_cmpx_class_f16"); break; + case 0xA0: KYTY_NI("v_cmp_f_i64"); break; + case 0xA1: KYTY_NI("v_cmp_lt_i64"); break; + case 0xA2: KYTY_NI("v_cmp_eq_i64"); break; + case 0xA3: KYTY_NI("v_cmp_le_i64"); break; + case 0xA4: KYTY_NI("v_cmp_gt_i64"); break; + case 0xA5: KYTY_NI("v_cmp_ne_i64"); break; + case 0xA6: KYTY_NI("v_cmp_ge_i64"); break; + case 0xA7: KYTY_NI("v_cmp_t_i64"); break; + case 0xA8: KYTY_NI("v_cmp_class_f64"); break; + case 0xA9: KYTY_NI("v_cmp_lt_u16"); break; + case 0xAA: KYTY_NI("v_cmp_eq_u16"); break; + case 0xAB: KYTY_NI("v_cmp_le_u16"); break; + case 0xAC: KYTY_NI("v_cmp_gt_u16"); break; + case 0xAD: KYTY_NI("v_cmp_ne_u16"); break; + case 0xAE: KYTY_NI("v_cmp_ge_u16"); break; + case 0xB0: KYTY_NI("v_cmpx_f_i64"); break; + case 0xB1: KYTY_NI("v_cmpx_lt_i64"); break; + case 0xB2: KYTY_NI("v_cmpx_eq_i64"); break; + case 0xB3: KYTY_NI("v_cmpx_le_i64"); break; + case 0xB4: KYTY_NI("v_cmpx_gt_i64"); break; + case 0xB5: KYTY_NI("v_cmpx_ne_i64"); break; + case 0xB6: KYTY_NI("v_cmpx_ge_i64"); break; + case 0xB7: KYTY_NI("v_cmpx_t_i64"); break; + case 0xB8: KYTY_NI("v_cmpx_class_f64"); break; + case 0xB9: KYTY_NI("v_cmpx_lt_u16"); break; + case 0xBA: KYTY_NI("v_cmpx_eq_u16"); break; + case 0xBB: KYTY_NI("v_cmpx_le_u16"); break; + case 0xBC: KYTY_NI("v_cmpx_gt_u16"); break; + case 0xBD: KYTY_NI("v_cmpx_ne_u16"); break; + case 0xBE: KYTY_NI("v_cmpx_ge_u16"); break; + case 0xc0: inst.type = ShaderInstructionType::VCmpFU32; break; + case 0xc1: inst.type = ShaderInstructionType::VCmpLtU32; break; + case 0xc2: inst.type = ShaderInstructionType::VCmpEqU32; break; + case 0xc3: inst.type = ShaderInstructionType::VCmpLeU32; break; + case 0xc4: inst.type = ShaderInstructionType::VCmpGtU32; break; + case 0xc5: inst.type = ShaderInstructionType::VCmpNeU32; break; + case 0xc6: inst.type = ShaderInstructionType::VCmpGeU32; break; + case 0xc7: inst.type = ShaderInstructionType::VCmpTU32; break; + case 0xC8: KYTY_NI("v_cmp_f_f16"); break; + case 0xC9: KYTY_NI("v_cmp_lt_f16"); break; + case 0xCA: KYTY_NI("v_cmp_eq_f16"); break; + case 0xCB: KYTY_NI("v_cmp_le_f16"); break; + case 0xCC: KYTY_NI("v_cmp_gt_f16"); break; + case 0xCD: KYTY_NI("v_cmp_lg_f16"); break; + case 0xCE: KYTY_NI("v_cmp_ge_f16"); break; + case 0xCF: KYTY_NI("v_cmp_o_f16"); break; + case 0xD0: KYTY_NI("v_cmpx_f_u32"); break; + case 0xD1: KYTY_NI("v_cmpx_lt_u32"); break; + case 0xd2: inst.type = ShaderInstructionType::VCmpxEqU32; break; + case 0xD3: KYTY_NI("v_cmpx_le_u32"); break; + case 0xd4: inst.type = ShaderInstructionType::VCmpxGtU32; break; + case 0xd5: inst.type = ShaderInstructionType::VCmpxNeU32; break; + case 0xd6: inst.type = ShaderInstructionType::VCmpxGeU32; break; + case 0xD7: KYTY_NI("v_cmpx_t_u32"); break; + case 0xD8: KYTY_NI("v_cmpx_f_f16"); break; + case 0xD9: KYTY_NI("v_cmpx_lt_f16"); break; + case 0xDA: KYTY_NI("v_cmpx_eq_f16"); break; + case 0xDB: KYTY_NI("v_cmpx_le_f16"); break; + case 0xDC: KYTY_NI("v_cmpx_gt_f16"); break; + case 0xDD: KYTY_NI("v_cmpx_lg_f16"); break; + case 0xDE: KYTY_NI("v_cmpx_ge_f16"); break; + case 0xDF: KYTY_NI("v_cmpx_o_f16"); break; + case 0xE0: KYTY_NI("v_cmp_f_u64"); break; + case 0xE1: KYTY_NI("v_cmp_lt_u64"); break; + case 0xE2: KYTY_NI("v_cmp_eq_u64"); break; + case 0xE3: KYTY_NI("v_cmp_le_u64"); break; + case 0xE4: KYTY_NI("v_cmp_gt_u64"); break; + case 0xE5: KYTY_NI("v_cmp_ne_u64"); break; + case 0xE6: KYTY_NI("v_cmp_ge_u64"); break; + case 0xE7: KYTY_NI("v_cmp_t_u64"); break; + case 0xE8: KYTY_NI("v_cmp_u_f16"); break; + case 0xE9: KYTY_NI("v_cmp_nge_f16"); break; + case 0xEA: KYTY_NI("v_cmp_nlg_f16"); break; + case 0xEB: KYTY_NI("v_cmp_ngt_f16"); break; + case 0xEC: KYTY_NI("v_cmp_nle_f16"); break; + case 0xED: KYTY_NI("v_cmp_neq_f16"); break; + case 0xEE: KYTY_NI("v_cmp_nlt_f16"); break; + case 0xEF: KYTY_NI("v_cmp_tru_f16"); break; + case 0xF0: KYTY_NI("v_cmpx_f_u64"); break; + case 0xF1: KYTY_NI("v_cmpx_lt_u64"); break; + case 0xF2: KYTY_NI("v_cmpx_eq_u64"); break; + case 0xF3: KYTY_NI("v_cmpx_le_u64"); break; + case 0xF4: KYTY_NI("v_cmpx_gt_u64"); break; + case 0xF5: KYTY_NI("v_cmpx_ne_u64"); break; + case 0xF6: KYTY_NI("v_cmpx_ge_u64"); break; + case 0xF7: KYTY_NI("v_cmpx_t_u64"); break; + case 0xF8: KYTY_NI("v_cmpx_u_f16"); break; + case 0xF9: KYTY_NI("v_cmpx_nge_f16"); break; + case 0xFA: KYTY_NI("v_cmpx_nlg_f16"); break; + case 0xFB: KYTY_NI("v_cmpx_ngt_f16"); break; + case 0xFC: KYTY_NI("v_cmpx_nle_f16"); break; + case 0xFD: KYTY_NI("v_cmpx_neq_f16"); break; + case 0xFE: KYTY_NI("v_cmpx_nlt_f16"); break; + case 0xFF: KYTY_NI("v_cmpx_tru_f16"); break; + + /* VOP2 using VOP3 encoding */ + case 0x100: + EXIT_NOT_IMPLEMENTED(next_gen); + inst.type = ShaderInstructionType::VCndmaskB32; + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; + inst.src_num = 3; + inst.src[2].size = 2; + break; + case 0x101: + if (next_gen) + { + inst.type = ShaderInstructionType::VCndmaskB32; + inst.format = ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2; + inst.src_num = 3; + inst.src[2].size = 2; + } else + { + KYTY_NI("v_readlane_b32"); + }; + break; + case 0x102: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_writelane_b32"); + }; + break; + case 0x103: inst.type = ShaderInstructionType::VAddF32; break; + case 0x104: inst.type = ShaderInstructionType::VSubF32; break; + case 0x105: inst.type = ShaderInstructionType::VSubrevF32; break; + case 0x106: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_mac_legacy_f32") + }; + break; + case 0x107: KYTY_NI("v_mul_legacy_f32"); break; + case 0x108: inst.type = ShaderInstructionType::VMulF32; break; + case 0x109: KYTY_NI("v_mul_i32_i24"); break; + case 0x10A: KYTY_NI("v_mul_hi_i32_i24"); break; + case 0x10b: inst.type = ShaderInstructionType::VMulU32U24; break; + case 0x10C: KYTY_NI("v_mul_hi_u32_u24"); break; + case 0x10D: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_min_legacy_f32") + }; + break; + case 0x10E: KYTY_NI("v_max_legacy_f32"); break; + case 0x10f: inst.type = ShaderInstructionType::VMinF32; break; + case 0x110: inst.type = ShaderInstructionType::VMaxF32; break; + case 0x111: KYTY_NI("v_min_i32"); break; + case 0x112: KYTY_NI("v_max_i32"); break; + case 0x113: KYTY_NI("v_min_u32"); break; + case 0x114: KYTY_NI("v_max_u32"); break; + case 0x115: inst.type = ShaderInstructionType::VLshrB32; break; + case 0x116: inst.type = ShaderInstructionType::VLshrrevB32; break; + case 0x117: inst.type = ShaderInstructionType::VAshrI32; break; + case 0x118: inst.type = ShaderInstructionType::VAshrrevI32; break; + case 0x119: inst.type = ShaderInstructionType::VLshlB32; break; + case 0x11a: inst.type = ShaderInstructionType::VLshlrevB32; break; + case 0x11b: inst.type = ShaderInstructionType::VAndB32; break; + case 0x11c: inst.type = ShaderInstructionType::VOrB32; break; + case 0x11d: inst.type = ShaderInstructionType::VXorB32; break; + case 0x11E: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VBfmB32; + }; + break; + case 0x11f: inst.type = ShaderInstructionType::VMacF32; break; + case 0x120: KYTY_NI("v_madmk_f32"); break; + case 0x121: KYTY_NI("v_madak_f32"); break; + case 0x122: inst.type = ShaderInstructionType::VBcntU32B32; break; + case 0x123: inst.type = ShaderInstructionType::VMbcntLoU32B32; break; + case 0x124: inst.type = ShaderInstructionType::VMbcntHiU32B32; break; + case 0x125: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VAddI32; + inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; + inst.dst2 = operand_parse(sdst); + inst.dst2.size = 2; + }; + break; + case 0x126: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VSubI32; + inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; + inst.dst2 = operand_parse(sdst); + inst.dst2.size = 2; + }; + break; + case 0x127: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + inst.type = ShaderInstructionType::VSubrevI32; + inst.format = ShaderInstructionFormat::VdstSdst2Vsrc0Vsrc1; + inst.dst2 = operand_parse(sdst); + inst.dst2.size = 2; + }; + break; + case 0x128: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_addc_u32") + }; + break; + case 0x129: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_subb_u32") + }; + break; + case 0x12A: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_subbrev_u32") + }; + break; + case 0x12B: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_ldexp_f32") + }; + break; + case 0x12C: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_cvt_pkaccum_u8_f32") + }; + break; + case 0x12D: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_cvt_pknorm_i16_f32") + }; + break; + case 0x12E: KYTY_NI("v_cvt_pknorm_u16_f32"); break; + case 0x12f: inst.type = ShaderInstructionType::VCvtPkrtzF16F32; break; + case 0x130: KYTY_NI("v_cvt_pk_u16_u32"); break; + case 0x131: KYTY_NI("v_cvt_pk_i16_i32"); break; + case 0x132: KYTY_NI("v_add_f16"); break; + case 0x133: KYTY_NI("v_sub_f16"); break; + case 0x134: KYTY_NI("v_subrev_f16"); break; + case 0x135: KYTY_NI("v_mul_f16"); break; + case 0x136: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_mac_f16") + }; + break; + case 0x137: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_madmk_f16") + }; + break; + case 0x138: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_madak_f16") + }; + break; + case 0x139: KYTY_NI("v_max_f16"); break; + case 0x13A: KYTY_NI("v_min_f16"); break; + case 0x13B: KYTY_NI("v_ldexp_f16"); break; + + /* VOP3 instructions */ + case 0x140: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_mad_legacy_f32") + }; + break; + case 0x141: inst.type = ShaderInstructionType::VMadF32; break; + case 0x142: KYTY_NI("v_mad_i32_i24"); break; + case 0x143: inst.type = ShaderInstructionType::VMadU32U24; break; + case 0x144: KYTY_NI("v_cubeid_f32"); break; + case 0x145: KYTY_NI("v_cubesc_f32"); break; + case 0x146: KYTY_NI("v_cubetc_f32"); break; + case 0x147: KYTY_NI("v_cubema_f32"); break; + case 0x148: inst.type = ShaderInstructionType::VBfeU32; break; + case 0x149: KYTY_NI("v_bfe_i32"); break; + case 0x14A: KYTY_NI("v_bfi_b32"); break; + case 0x14b: inst.type = ShaderInstructionType::VFmaF32; break; + case 0x14C: KYTY_NI("v_fma_f64"); break; + case 0x14D: KYTY_NI("v_lerp_u8"); break; + case 0x14E: KYTY_NI("v_alignbit_b32"); break; + case 0x14F: KYTY_NI("v_alignbyte_b32"); break; + case 0x150: KYTY_NI("v_mullit_f32"); break; + case 0x151: inst.type = ShaderInstructionType::VMin3F32; break; + case 0x152: KYTY_NI("v_min3_i32"); break; + case 0x153: KYTY_NI("v_min3_u32"); break; + case 0x154: inst.type = ShaderInstructionType::VMax3F32; break; + case 0x155: KYTY_NI("v_max3_i32"); break; + case 0x156: KYTY_NI("v_max3_u32"); break; + case 0x157: inst.type = ShaderInstructionType::VMed3F32; break; + case 0x158: KYTY_NI("v_med3_i32"); break; + case 0x159: KYTY_NI("v_med3_u32"); break; + case 0x15A: KYTY_NI("v_sad_u8"); break; + case 0x15B: KYTY_NI("v_sad_hi_u8"); break; + case 0x15C: KYTY_NI("v_sad_u16"); break; + case 0x15d: inst.type = ShaderInstructionType::VSadU32; break; + case 0x15E: KYTY_NI("v_cvt_pk_u8_f32"); break; + case 0x15F: KYTY_NI("v_div_fixup_f32"); break; + case 0x160: KYTY_NI("v_div_fixup_f64"); break; + case 0x161: KYTY_NI("v_lshl_b64"); break; + case 0x162: KYTY_NI("v_lshr_b64"); break; + case 0x163: KYTY_NI("v_ashr_i64"); break; + case 0x164: KYTY_NI("v_add_f64"); break; + case 0x165: KYTY_NI("v_mul_f64"); break; + case 0x166: KYTY_NI("v_min_f64"); break; + case 0x167: KYTY_NI("v_max_f64"); break; + case 0x168: KYTY_NI("v_ldexp_f64"); break; + case 0x169: + inst.type = ShaderInstructionType::VMulLoU32; + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; + inst.src_num = 2; + break; + case 0x16a: + inst.type = ShaderInstructionType::VMulHiU32; + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; + inst.src_num = 2; + break; + case 0x16b: + inst.type = ShaderInstructionType::VMulLoI32; + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; + inst.src_num = 2; + break; + case 0x16C: KYTY_NI("v_mul_hi_i32"); break; + case 0x16D: KYTY_NI("v_div_scale_f32"); break; + case 0x16E: KYTY_NI("v_div_scale_f64"); break; + case 0x16F: KYTY_NI("v_div_fmas_f32"); break; + case 0x170: KYTY_NI("v_div_fmas_f64"); break; + case 0x171: KYTY_NI("v_msad_u8"); break; + case 0x174: KYTY_NI("v_trig_preop_f64"); break; + case 0x175: KYTY_NI("v_mqsad_u32_u8"); break; + case 0x176: KYTY_NI("v_mad_u64_u32"); break; + case 0x177: KYTY_NI("v_mad_i64_i32"); break; + case 0x303: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_add_u16") + }; + break; + case 0x304: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_sub_u16") + }; + break; + case 0x305: KYTY_NI("v_mul_lo_u16"); break; + case 0x307: KYTY_NI("v_lshrrev_b16"); break; + case 0x308: KYTY_NI("v_ashrrev_i16"); break; + case 0x309: KYTY_NI("v_max_u16"); break; + case 0x30A: KYTY_NI("v_max_i16"); break; + case 0x30B: KYTY_NI("v_min_u16"); break; + case 0x30C: KYTY_NI("v_min_i16"); break; + case 0x30D: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_add_i16") + }; + break; + case 0x30E: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_sub_i16") + }; + break; + case 0x30F: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_add_u32") + }; + break; + case 0x310: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_sub_u32") + }; + break; + case 0x311: KYTY_NI("v_pack_b32_f16"); break; + case 0x312: KYTY_NI("v_cvt_pknorm_i16_f16"); break; + case 0x313: KYTY_NI("v_cvt_pknorm_u16_f16"); break; + case 0x314: KYTY_NI("v_lshlrev_b16"); break; + case 0x340: KYTY_NI("v_mad_u16"); break; + case 0x341: KYTY_NI("v_mad_f16"); break; + case 0x342: KYTY_NI("v_interp_p1ll_f16"); break; + case 0x343: KYTY_NI("v_interp_p1lv_f16"); break; + case 0x344: KYTY_NI("v_perm_b32"); break; + case 0x345: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_xad_b32") + }; + break; + case 0x346: KYTY_NI("v_lshl_add_u32"); break; + case 0x347: KYTY_NI("v_add_lshl_u32"); break; + case 0x34B: KYTY_NI("v_fma_f16"); break; + case 0x351: KYTY_NI("v_min3_f16"); break; + case 0x352: KYTY_NI("v_min3_i16"); break; + case 0x353: KYTY_NI("v_min3_u16"); break; + case 0x354: KYTY_NI("v_max3_f16"); break; + case 0x355: KYTY_NI("v_max3_i16"); break; + case 0x356: KYTY_NI("v_max3_u16"); break; + case 0x357: KYTY_NI("v_med3_f16"); break; + case 0x358: KYTY_NI("v_med3_i16"); break; + case 0x359: KYTY_NI("v_med3_u16"); break; + case 0x35A: KYTY_NI("v_interp_p2_f16"); break; + case 0x35E: KYTY_NI("v_mad_i16"); break; + case 0x35F: KYTY_NI("v_div_fixup_f16"); break; + case 0x36D: KYTY_NI("v_add3_u32"); break; + case 0x36F: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_lshl_or_u32") + }; + break; + case 0x371: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_and_or_u32") + }; + break; + case 0x372: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("v_or3_u32") + }; + break; + case 0x373: KYTY_NI("v_mad_u32_u16"); break; + case 0x375: KYTY_NI("v_mad_i32_i16"); break; + + /* VOP1 using VOP3 encoding */ + case 0x180: KYTY_NI("v_nop"); break; + case 0x181: KYTY_NI("v_mov_b32"); break; + case 0x182: KYTY_NI("v_readfirstlane_b32"); break; + case 0x183: KYTY_NI("v_cvt_i32_f64"); break; + case 0x184: KYTY_NI("v_cvt_f64_i32"); break; + case 0x185: KYTY_NI("v_cvt_f32_i32"); break; + case 0x186: KYTY_NI("v_cvt_f32_u32"); break; + case 0x187: KYTY_NI("v_cvt_u32_f32"); break; + case 0x188: KYTY_NI("v_cvt_i32_f32"); break; + case 0x189: KYTY_NI("v_mov_fed_b32"); break; + case 0x18A: KYTY_NI("v_cvt_f16_f32"); break; + case 0x18B: KYTY_NI("v_cvt_f32_f16"); break; + case 0x18C: KYTY_NI("v_cvt_rpi_i32_f32"); break; + case 0x18D: KYTY_NI("v_cvt_flr_i32_f32"); break; + case 0x18E: KYTY_NI("v_cvt_off_f32_i4"); break; + case 0x18F: KYTY_NI("v_cvt_f32_f64"); break; + case 0x190: KYTY_NI("v_cvt_f64_f32"); break; + case 0x191: KYTY_NI("v_cvt_f32_ubyte0"); break; + case 0x192: KYTY_NI("v_cvt_f32_ubyte1"); break; + case 0x193: KYTY_NI("v_cvt_f32_ubyte2"); break; + case 0x194: KYTY_NI("v_cvt_f32_ubyte3"); break; + case 0x195: KYTY_NI("v_cvt_u32_f64"); break; + case 0x196: KYTY_NI("v_cvt_f64_u32"); break; + case 0x197: KYTY_NI("v_trunc_f64"); break; + case 0x198: KYTY_NI("v_ceil_f64"); break; + case 0x199: KYTY_NI("v_rndne_f64"); break; + case 0x19A: KYTY_NI("v_floor_f64"); break; + case 0x1a0: inst.type = ShaderInstructionType::VFractF32; break; + case 0x1a1: inst.type = ShaderInstructionType::VTruncF32; break; + case 0x1a2: inst.type = ShaderInstructionType::VCeilF32; break; + case 0x1a3: inst.type = ShaderInstructionType::VRndneF32; break; + case 0x1a4: inst.type = ShaderInstructionType::VFloorF32; break; + case 0x1a5: inst.type = ShaderInstructionType::VExpF32; break; + case 0x1A6: KYTY_NI("v_log_clamp_f32"); break; + case 0x1a7: inst.type = ShaderInstructionType::VLogF32; break; + case 0x1A8: KYTY_NI("v_rcp_clamp_f32"); break; + case 0x1A9: KYTY_NI("v_rcp_legacy_f32"); break; + case 0x1aa: inst.type = ShaderInstructionType::VRcpF32; break; + case 0x1AB: KYTY_NI("v_rcp_iflag_f32"); break; + case 0x1AC: KYTY_NI("v_rsq_clamp_f32"); break; + case 0x1AD: KYTY_NI("v_rsq_legacy_f32"); break; + case 0x1ae: inst.type = ShaderInstructionType::VRsqF32; break; + case 0x1AF: KYTY_NI("v_rcp_f64"); break; + case 0x1B0: KYTY_NI("v_rcp_clamp_f64"); break; + case 0x1B1: KYTY_NI("v_rsq_f64"); break; + case 0x1B2: KYTY_NI("v_rsq_clamp_f64"); break; + case 0x1b3: inst.type = ShaderInstructionType::VSqrtF32; break; + case 0x1B4: KYTY_NI("v_sqrt_f64"); break; + case 0x1b5: inst.type = ShaderInstructionType::VSinF32; break; + case 0x1b6: inst.type = ShaderInstructionType::VCosF32; break; + case 0x1B7: KYTY_NI("v_not_b32"); break; + case 0x1B8: KYTY_NI("v_bfrev_b32"); break; + case 0x1B9: KYTY_NI("v_ffbh_u32"); break; + case 0x1BA: KYTY_NI("v_ffbl_b32"); break; + case 0x1BB: KYTY_NI("v_ffbh_i32"); break; + case 0x1BC: KYTY_NI("v_frexp_exp_i32_f64"); break; + case 0x1BD: KYTY_NI("v_frexp_mant_f64"); break; + case 0x1BE: KYTY_NI("v_fract_f64"); break; + case 0x1BF: KYTY_NI("v_frexp_exp_i32_f32"); break; + case 0x1C0: KYTY_NI("v_frexp_mant_f32"); break; + case 0x1C1: KYTY_NI("v_clrexcp"); break; + case 0x1C2: KYTY_NI("v_movreld_b32"); break; + case 0x1C3: KYTY_NI("v_movrels_b32"); break; + case 0x1C4: KYTY_NI("v_movrelsd_b32"); break; + case 0x1C5: KYTY_NI("v_log_legacy_f32"); break; + case 0x1C6: KYTY_NI("v_exp_legacy_f32"); break; + case 0x1D0: KYTY_NI("v_cvt_f16_u16"); break; + case 0x1D1: KYTY_NI("v_cvt_f16_i16"); break; + case 0x1D2: KYTY_NI("v_cvt_u16_f16"); break; + case 0x1D3: KYTY_NI("v_cvt_i16_f16"); break; + case 0x1D4: KYTY_NI("v_rcp_f16"); break; + case 0x1D5: KYTY_NI("v_sqrt_f16"); break; + case 0x1D6: KYTY_NI("v_rsq_f16"); break; + case 0x1D7: KYTY_NI("v_log_f16"); break; + case 0x1D8: KYTY_NI("v_exp_f16"); break; + case 0x1D9: KYTY_NI("v_frexp_mant_f16"); break; + case 0x1DA: KYTY_NI("v_frexp_exp_i16_f16"); break; + case 0x1DB: KYTY_NI("v_floor_f16"); break; + case 0x1DC: KYTY_NI("v_ceil_f16"); break; + case 0x1DD: KYTY_NI("v_trunc_f16"); break; + case 0x1DE: KYTY_NI("v_rndne_f16"); break; + case 0x1DF: KYTY_NI("v_fract_f16"); break; + case 0x1E0: KYTY_NI("v_sin_f16"); break; + case 0x1E1: KYTY_NI("v_cos_f16"); break; + case 0x1E2: KYTY_NI("v_sat_pk_u8_i16"); break; + case 0x1E3: KYTY_NI("v_cvt_norm_i16_f16"); break; + case 0x1E4: KYTY_NI("v_cvt_norm_u16_f16"); break; + case 0x1E5: KYTY_NI("v_swap_b32"); break; + + default: KYTY_UNKNOWN_OP(); + } + + if (inst.dst2.type == ShaderOperandType::Unknown) + { + if ((abs & 0x1u) != 0) + { + inst.src[0].absolute = true; + } + if ((abs & 0x2u) != 0) + { + inst.src[1].absolute = true; + } + if ((abs & 0x4u) != 0) + { + inst.src[2].absolute = true; + } + + if (!next_gen) + { + inst.dst.clamp = (clamp != 0); + } + } + + if (next_gen) + { + inst.dst.clamp = (clamp != 0); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +KYTY_SHADER_PARSER(shader_parse_exp) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("exp"); + + uint32_t vm = (buffer[0] >> 12u) & 0x1u; + uint32_t done = (buffer[0] >> 11u) & 0x1u; + uint32_t compr = (buffer[0] >> 10u) & 0x1u; + uint32_t target = (buffer[0] >> 4u) & 0x3fu; + uint32_t en = (buffer[0] >> 0u) & 0xfu; + + uint32_t vsrc0 = (buffer[1] >> 0u) & 0xffu; + uint32_t vsrc1 = (buffer[1] >> 8u) & 0xffu; + uint32_t vsrc2 = (buffer[1] >> 16u) & 0xffu; + uint32_t vsrc3 = (buffer[1] >> 24u) & 0xffu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(vsrc0 + 256); + inst.src[1] = operand_parse(vsrc1 + 256); + inst.src[2] = operand_parse(vsrc2 + 256); + inst.src[3] = operand_parse(vsrc3 + 256); + inst.src_num = 4; + + inst.type = ShaderInstructionType::Exp; + + switch (target) + { + case 0x00: + if (done != 0 && compr != 0 && vm != 0 && en == 0x0) + { + inst.format = ShaderInstructionFormat::Mrt0OffOffComprVmDone; + inst.src_num = 0; + } else if (done != 0 && compr != 0 && vm != 0 && en == 0xf) + { + inst.format = ShaderInstructionFormat::Mrt0Vsrc0Vsrc1ComprVmDone; + inst.src_num = 2; + } else if (done != 0 && compr == 0 && vm != 0 && en == 0xf) + { + inst.format = ShaderInstructionFormat::Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone; + }; + break; + case 0x0c: + if (done != 0 && en == 0xf) + { + inst.format = ShaderInstructionFormat::Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done; + }; + break; + case 0x14: + if (done != 0 && en == 0x1) + { + inst.format = ShaderInstructionFormat::PrimVsrc0OffOffOffDone; + inst.src_num = 1; + }; + break; + + default: break; + } + + if (inst.format == ShaderInstructionFormat::Unknown && done == 0 && compr == 0 && vm == 0 && en == 0xf) + { + switch (target) + { + case 0x20: inst.format = ShaderInstructionFormat::Param0Vsrc0Vsrc1Vsrc2Vsrc3; break; + case 0x21: inst.format = ShaderInstructionFormat::Param1Vsrc0Vsrc1Vsrc2Vsrc3; break; + case 0x22: inst.format = ShaderInstructionFormat::Param2Vsrc0Vsrc1Vsrc2Vsrc3; break; + case 0x23: inst.format = ShaderInstructionFormat::Param3Vsrc0Vsrc1Vsrc2Vsrc3; break; + case 0x24: inst.format = ShaderInstructionFormat::Param4Vsrc0Vsrc1Vsrc2Vsrc3; break; + default: break; + } + } + + if (inst.format == ShaderInstructionFormat::Unknown) + { + printf("%s", dst->DbgDump().c_str()); + EXIT("%s\n" + "unknown exp target: 0x%02" PRIx32 " at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ", crc32 = 0x%08" PRIx32 ")\n", + dst->DbgDump().c_str(), target, pc, dst->GetHash0(), dst->GetCrc32()); + } + + dst->GetInstructions().Add(inst); + + return 2; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_smem) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("smem"); + + uint32_t opcode = (buffer[0] >> 18u) & 0xffu; + uint32_t glc = (buffer[0] >> 16u) & 0x1u; + uint32_t dlc = (buffer[0] >> 14u) & 0x1u; + uint32_t sdst = (buffer[0] >> 6u) & 0x7fu; + uint32_t sbase = (buffer[0] >> 0u) & 0x3fu; + uint32_t soffset = (buffer[1] >> 25u) & 0x7fu; + auto offset = static_cast((buffer[1] >> 0u) & 0x1fffffu); + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(sdst); + inst.src_num = 2; + inst.src[0] = operand_parse(sbase * 2); + inst.src[1] = operand_parse(soffset); + + uint32_t size = 2; + + EXIT_NOT_IMPLEMENTED(glc != 0); + EXIT_NOT_IMPLEMENTED(dlc != 0); + EXIT_NOT_IMPLEMENTED(inst.src[0].type == ShaderOperandType::LiteralConstant); + EXIT_NOT_IMPLEMENTED(inst.src[1].type == ShaderOperandType::LiteralConstant); + + if (inst.src[1].type == ShaderOperandType::Null) + { + struct + { + int x : 21; + } s {}; + + s.x = offset; + + inst.src[1].type = ShaderOperandType::IntegerInlineConstant; + inst.src[1].constant.i = s.x; + inst.src[1].size = 0; + } else + { + EXIT_NOT_IMPLEMENTED(offset != 0); + } + + switch (opcode) + { + case 0x00: + inst.type = ShaderInstructionType::SLoadDword; + inst.format = ShaderInstructionFormat::SdstSbaseSoffset; + inst.src[0].size = 2; + inst.dst.size = 1; + break; + case 0x01: + inst.type = ShaderInstructionType::SLoadDwordx2; + inst.format = ShaderInstructionFormat::Sdst2Ssrc02Ssrc1; + inst.src[0].size = 2; + inst.dst.size = 2; + break; + case 0x02: + inst.type = ShaderInstructionType::SLoadDwordx4; + inst.format = ShaderInstructionFormat::Sdst4SbaseSoffset; + inst.src[0].size = 2; + inst.dst.size = 4; + break; + case 0x03: KYTY_NI("s_load_dwordx8"); break; + case 0x04: KYTY_NI("s_load_dwordx16"); break; + case 0x08: KYTY_NI("s_buffer_load_dword"); break; + case 0x09: KYTY_NI("s_buffer_load_dwordx2"); break; + case 0x0a: + inst.type = ShaderInstructionType::SBufferLoadDwordx4; + inst.format = ShaderInstructionFormat::Sdst4SvSoffset; + inst.src[0].size = 4; + inst.dst.size = 4; + break; + case 0x0B: KYTY_NI("s_buffer_load_dwordx8"); break; + case 0x0c: + inst.type = ShaderInstructionType::SBufferLoadDwordx16; + inst.format = ShaderInstructionFormat::Sdst16SvSoffset; + inst.src[0].size = 4; + inst.dst.size = 16; + break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_smrd) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("smrd"); + + uint32_t opcode = (buffer[0] >> 22u) & 0x1fu; + uint32_t sdst = (buffer[0] >> 15u) & 0x7fu; + uint32_t sbase = (buffer[0] >> 9u) & 0x3fu; + uint32_t imm = (buffer[0] >> 8u) & 0x1u; + uint32_t offset = (buffer[0] >> 0u) & 0xffu; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(sdst); + inst.src_num = 2; + inst.src[0] = operand_parse(sbase * 2); + + uint32_t size = 1; + + if (imm == 1) + { + inst.src[1].type = ShaderOperandType::LiteralConstant; + inst.src[1].constant.u = offset << 2u; + } else + { + inst.src[1] = operand_parse(offset); + + if (inst.src[1].type == ShaderOperandType::LiteralConstant) + { + inst.src[1].constant.u = buffer[size]; + size++; + } + } + + switch (opcode) + { + case 0x00: KYTY_NI("s_load_dword"); break; + case 0x01: KYTY_NI("s_load_dwordx2"); break; + case 0x02: + inst.type = ShaderInstructionType::SLoadDwordx4; + inst.format = ShaderInstructionFormat::Sdst4SbaseSoffset; + inst.src[0].size = 2; + inst.dst.size = 4; + break; + case 0x03: + inst.type = ShaderInstructionType::SLoadDwordx8; + inst.format = ShaderInstructionFormat::Sdst8SbaseSoffset; + inst.src[0].size = 2; + inst.dst.size = 8; + break; + case 0x04: KYTY_NI("s_load_dwordx16"); break; + case 0x08: + inst.type = ShaderInstructionType::SBufferLoadDword; + inst.format = ShaderInstructionFormat::SdstSvSoffset; + inst.src[0].size = 4; + break; + case 0x09: + inst.type = ShaderInstructionType::SBufferLoadDwordx2; + inst.format = ShaderInstructionFormat::Sdst2SvSoffset; + inst.src[0].size = 4; + inst.dst.size = 2; + break; + case 0x0a: + inst.type = ShaderInstructionType::SBufferLoadDwordx4; + inst.format = ShaderInstructionFormat::Sdst4SvSoffset; + inst.src[0].size = 4; + inst.dst.size = 4; + break; + case 0x0b: + inst.type = ShaderInstructionType::SBufferLoadDwordx8; + inst.format = ShaderInstructionFormat::Sdst8SvSoffset; + inst.src[0].size = 4; + inst.dst.size = 8; + break; + case 0x0c: + inst.type = ShaderInstructionType::SBufferLoadDwordx16; + inst.format = ShaderInstructionFormat::Sdst16SvSoffset; + inst.src[0].size = 4; + inst.dst.size = 16; + break; + case 0x1C: KYTY_NI("s_memrealtime"); break; + case 0x1D: KYTY_NI("s_dcache_inv_vol"); break; + case 0x1E: KYTY_NI("s_memtime"); break; + case 0x1F: KYTY_NI("s_dcache_inv") break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_mubuf) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("mubuf"); + + uint32_t opcode = (buffer[0] >> 18u) & 0x1fu; + uint32_t lds = (buffer[0] >> 16u) & 0x1u; + uint32_t glc = (buffer[0] >> 14u) & 0x1u; + uint32_t idxen = (buffer[0] >> 13u) & 0x1u; + uint32_t offen = (buffer[0] >> 12u) & 0x1u; + uint32_t offset = (buffer[0] >> 0u) & 0xfffu; + + uint32_t soffset = (buffer[1] >> 24u) & 0xffu; + uint32_t tfe = (buffer[1] >> 23u) & 0x1u; + uint32_t slc = (buffer[1] >> 22u) & 0x1u; + uint32_t srsrc = (buffer[1] >> 16u) & 0x1fu; + uint32_t vdata = (buffer[1] >> 8u) & 0xffu; + uint32_t vaddr = (buffer[1] >> 0u) & 0xffu; + + EXIT_NOT_IMPLEMENTED(idxen == 0); + EXIT_NOT_IMPLEMENTED(offen == 1); + EXIT_NOT_IMPLEMENTED(offset != 0); + EXIT_NOT_IMPLEMENTED(glc == 1); + EXIT_NOT_IMPLEMENTED(slc == 1); + EXIT_NOT_IMPLEMENTED(lds == 1); + EXIT_NOT_IMPLEMENTED(tfe == 1); + + uint32_t size = 2; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(vdata + 256); + inst.src_num = 3; + inst.src[0] = operand_parse(vaddr + 256); + inst.src[1] = operand_parse(srsrc * 4); + inst.src[2] = operand_parse(soffset); + + if (inst.src[2].type == ShaderOperandType::LiteralConstant) + { + inst.src[2].constant.u = buffer[size]; + size++; + } + + switch (opcode) + { + case 0x00: + inst.type = ShaderInstructionType::BufferLoadFormatX; + inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; + inst.src[1].size = 4; + break; + case 0x01: + inst.type = ShaderInstructionType::BufferLoadFormatXy; + inst.format = ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen; + inst.src[1].size = 4; + inst.dst.size = 2; + break; + case 0x02: + inst.type = ShaderInstructionType::BufferLoadFormatXyz; + inst.format = ShaderInstructionFormat::Vdata3VaddrSvSoffsIdxen; + inst.src[1].size = 4; + inst.dst.size = 3; + break; + case 0x03: + inst.type = ShaderInstructionType::BufferLoadFormatXyzw; + inst.format = ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxen; + inst.src[1].size = 4; + inst.dst.size = 4; + break; + case 0x04: + inst.type = ShaderInstructionType::BufferStoreFormatX; + inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; + inst.src[1].size = 4; + break; + case 0x05: + inst.type = ShaderInstructionType::BufferStoreFormatXy; + inst.format = ShaderInstructionFormat::Vdata2VaddrSvSoffsIdxen; + inst.src[1].size = 4; + inst.dst.size = 2; + break; + case 0x06: KYTY_NI("buffer_store_format_xyz"); break; + case 0x07: KYTY_NI("buffer_store_format_xyzw"); break; + case 0x08: KYTY_NI("buffer_load_ubyte"); break; + case 0x09: KYTY_NI("buffer_load_sbyte"); break; + case 0x0A: KYTY_NI("buffer_load_ushort"); break; + case 0x0B: KYTY_NI("buffer_load_sshort"); break; + case 0x0c: + inst.type = ShaderInstructionType::BufferLoadDword; + inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; + inst.src[1].size = 4; + break; + case 0x0D: KYTY_NI("buffer_load_dwordx2"); break; + case 0x0E: KYTY_NI("buffer_load_dwordx4"); break; + case 0x0F: KYTY_NI("buffer_load_dwordx3"); break; + case 0x18: KYTY_NI("buffer_store_byte"); break; + case 0x1A: KYTY_NI("buffer_store_short"); break; + case 0x1c: + inst.type = ShaderInstructionType::BufferStoreDword; + inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxen; + inst.src[1].size = 4; + break; + case 0x1D: KYTY_NI("buffer_store_dwordx2"); break; + case 0x1E: KYTY_NI("buffer_store_dwordx4"); break; + case 0x1F: KYTY_NI("buffer_store_dwordx3"); break; + case 0x30: KYTY_NI("buffer_atomic_swap"); break; + case 0x31: KYTY_NI("buffer_atomic_cmpswap"); break; + case 0x32: KYTY_NI("buffer_atomic_add"); break; + case 0x33: KYTY_NI("buffer_atomic_sub"); break; + case 0x34: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("buffer_atomic_rsub") + }; + break; + case 0x35: KYTY_NI("buffer_atomic_smin"); break; + case 0x36: KYTY_NI("buffer_atomic_umin"); break; + case 0x37: KYTY_NI("buffer_atomic_smax"); break; + case 0x38: KYTY_NI("buffer_atomic_umax"); break; + case 0x39: KYTY_NI("buffer_atomic_and"); break; + case 0x3A: KYTY_NI("buffer_atomic_or"); break; + case 0x3B: KYTY_NI("buffer_atomic_xor"); break; + case 0x3C: KYTY_NI("buffer_atomic_inc"); break; + case 0x3D: KYTY_NI("buffer_atomic_dec"); break; + case 0x3E: KYTY_NI("buffer_atomic_fcmpswap"); break; + case 0x3F: KYTY_NI("buffer_atomic_fmin"); break; + case 0x40: KYTY_NI("buffer_atomic_fmax"); break; + case 0x50: KYTY_NI("buffer_atomic_swap_x2"); break; + case 0x51: KYTY_NI("buffer_atomic_cmpswap_x2"); break; + case 0x52: KYTY_NI("buffer_atomic_add_x2"); break; + case 0x53: KYTY_NI("buffer_atomic_sub_x2"); break; + case 0x54: KYTY_NI("buffer_atomic_rsub_x2"); break; + case 0x55: KYTY_NI("buffer_atomic_smin_x2"); break; + case 0x56: KYTY_NI("buffer_atomic_umin_x2"); break; + case 0x57: KYTY_NI("buffer_atomic_smax_x2"); break; + case 0x58: KYTY_NI("buffer_atomic_umax_x2"); break; + case 0x59: KYTY_NI("buffer_atomic_and_x2"); break; + case 0x5A: KYTY_NI("buffer_atomic_or_x2"); break; + case 0x5B: KYTY_NI("buffer_atomic_xor_x2"); break; + case 0x5C: KYTY_NI("buffer_atomic_inc_x2"); break; + case 0x5D: KYTY_NI("buffer_atomic_dec_x2"); break; + case 0x5E: KYTY_NI("buffer_atomic_fcmpswap_x2"); break; + case 0x5F: KYTY_NI("buffer_atomic_fmin_x2"); break; + case 0x60: KYTY_NI("buffer_atomic_fmax_x2"); break; + case 0x71: + if (next_gen) + { + KYTY_UNKNOWN_OP(); + } else + { + KYTY_NI("buffer_wbinvl1") + }; + break; + case 0x80: KYTY_NI("buffer_load_format_d16_x"); break; + case 0x81: KYTY_NI("buffer_load_format_d16_xy"); break; + case 0x82: KYTY_NI("buffer_load_format_d16_xyz"); break; + case 0x83: KYTY_NI("buffer_load_format_d16_xyzw"); break; + case 0x84: KYTY_NI("buffer_store_format_d16_x"); break; + case 0x85: KYTY_NI("buffer_store_format_d16_xy"); break; + case 0x86: KYTY_NI("buffer_store_format_d16_xyz"); break; + case 0x87: KYTY_NI("buffer_store_format_d16_xyzw"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_ds) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("ds"); + + uint32_t opcode = (buffer[0] >> 18u) & 0xffu; + uint32_t gds = (buffer[0] >> 17u) & 0x1u; + uint32_t offset0 = (buffer[0] >> 0u) & 0xffu; + uint32_t offset1 = (buffer[0] >> 8u) & 0xffu; + + uint32_t vdst = (buffer[1] >> 24u) & 0xffu; + uint32_t data1 = (buffer[1] >> 16u) & 0xffu; + uint32_t data0 = (buffer[1] >> 8u) & 0xffu; + uint32_t addr = (buffer[1] >> 0u) & 0xffu; + + EXIT_NOT_IMPLEMENTED(addr != 0); + EXIT_NOT_IMPLEMENTED(data0 != 0); + EXIT_NOT_IMPLEMENTED(data1 != 0); + EXIT_NOT_IMPLEMENTED(offset0 != 0); + EXIT_NOT_IMPLEMENTED(offset1 != 0); + EXIT_NOT_IMPLEMENTED(gds == 0); + + uint32_t size = 2; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(vdst + 256); + inst.src_num = 0; + + switch (opcode) // NOLINT + { + case 0x00: KYTY_NI("ds_add_u32"); break; + case 0x01: KYTY_NI("ds_sub_u32"); break; + case 0x02: KYTY_NI("ds_rsub_u32"); break; + case 0x03: KYTY_NI("ds_inc_u32"); break; + case 0x04: KYTY_NI("ds_dec_u32"); break; + case 0x05: KYTY_NI("ds_min_i32"); break; + case 0x06: KYTY_NI("ds_max_i32"); break; + case 0x07: KYTY_NI("ds_min_u32"); break; + case 0x08: KYTY_NI("ds_max_u32"); break; + case 0x09: KYTY_NI("ds_and_b32"); break; + case 0x0A: KYTY_NI("ds_or_b32"); break; + case 0x0B: KYTY_NI("ds_xor_b32"); break; + case 0x0C: KYTY_NI("ds_mskor_b32"); break; + case 0x0D: KYTY_NI("ds_write_b32"); break; + case 0x0E: KYTY_NI("ds_write2_b32"); break; + case 0x0F: KYTY_NI("ds_write2st64_b32"); break; + case 0x10: KYTY_NI("ds_cmpst_b32"); break; + case 0x11: KYTY_NI("ds_cmpst_f32"); break; + case 0x12: KYTY_NI("ds_min_f32"); break; + case 0x13: KYTY_NI("ds_max_f32"); break; + case 0x14: KYTY_NI("ds_nop"); break; + case 0x18: KYTY_NI("ds_gws_sema_release_all"); break; + case 0x19: KYTY_NI("ds_gws_init"); break; + case 0x1A: KYTY_NI("ds_gws_sema_v"); break; + case 0x1B: KYTY_NI("ds_gws_sema_br"); break; + case 0x1C: KYTY_NI("ds_gws_sema_p"); break; + case 0x1D: KYTY_NI("ds_gws_barrier"); break; + case 0x1E: KYTY_NI("ds_write_b8"); break; + case 0x1F: KYTY_NI("ds_write_b16"); break; + case 0x20: KYTY_NI("ds_add_rtn_u32"); break; + case 0x21: KYTY_NI("ds_sub_rtn_u32"); break; + case 0x22: KYTY_NI("ds_rsub_rtn_u32"); break; + case 0x23: KYTY_NI("ds_inc_rtn_u32"); break; + case 0x24: KYTY_NI("ds_dec_rtn_u32"); break; + case 0x25: KYTY_NI("ds_min_rtn_i32"); break; + case 0x26: KYTY_NI("ds_max_rtn_i32"); break; + case 0x27: KYTY_NI("ds_min_rtn_u32"); break; + case 0x28: KYTY_NI("ds_max_rtn_u32"); break; + case 0x29: KYTY_NI("ds_and_rtn_b32"); break; + case 0x2A: KYTY_NI("ds_or_rtn_b32"); break; + case 0x2B: KYTY_NI("ds_xor_rtn_b32"); break; + case 0x2C: KYTY_NI("ds_mskor_rtn_b32"); break; + case 0x2D: KYTY_NI("ds_wrxchg_rtn_b32"); break; + case 0x2E: KYTY_NI("ds_wrxchg2_rtn_b32"); break; + case 0x2F: KYTY_NI("ds_wrxchg2st64_rtn_b32"); break; + case 0x30: KYTY_NI("ds_cmpst_rtn_b32"); break; + case 0x31: KYTY_NI("ds_cmpst_rtn_f32"); break; + case 0x32: KYTY_NI("ds_min_rtn_f32"); break; + case 0x33: KYTY_NI("ds_max_rtn_f32"); break; + case 0x34: KYTY_NI("ds_wrap_rtn_b32"); break; + case 0x35: KYTY_NI("ds_swizzle_b32"); break; + case 0x36: KYTY_NI("ds_read_b32"); break; + case 0x37: KYTY_NI("ds_read2_b32"); break; + case 0x38: KYTY_NI("ds_read2st64_b32"); break; + case 0x39: KYTY_NI("ds_read_i8"); break; + case 0x3A: KYTY_NI("ds_read_u8"); break; + case 0x3B: KYTY_NI("ds_read_i16"); break; + case 0x3C: KYTY_NI("ds_read_u16"); break; + case 0x3d: + inst.type = ShaderInstructionType::DsConsume; + inst.format = ShaderInstructionFormat::VdstGds; + break; + case 0x3e: + inst.type = ShaderInstructionType::DsAppend; + inst.format = ShaderInstructionFormat::VdstGds; + break; + case 0x3F: KYTY_NI("ds_ordered_count"); break; + case 0x40: KYTY_NI("ds_add_u64"); break; + case 0x41: KYTY_NI("ds_sub_u64"); break; + case 0x42: KYTY_NI("ds_rsub_u64"); break; + case 0x43: KYTY_NI("ds_inc_u64"); break; + case 0x44: KYTY_NI("ds_dec_u64"); break; + case 0x45: KYTY_NI("ds_min_i64"); break; + case 0x46: KYTY_NI("ds_max_i64"); break; + case 0x47: KYTY_NI("ds_min_u64"); break; + case 0x48: KYTY_NI("ds_max_u64"); break; + case 0x49: KYTY_NI("ds_and_b64"); break; + case 0x4A: KYTY_NI("ds_or_b64"); break; + case 0x4B: KYTY_NI("ds_xor_b64"); break; + case 0x4C: KYTY_NI("ds_mskor_b64"); break; + case 0x4D: KYTY_NI("ds_write_b64"); break; + case 0x4E: KYTY_NI("ds_write2_b64"); break; + case 0x4F: KYTY_NI("ds_write2st64_b64"); break; + case 0x50: KYTY_NI("ds_cmpst_b64"); break; + case 0x51: KYTY_NI("ds_cmpst_f64"); break; + case 0x52: KYTY_NI("ds_min_f64"); break; + case 0x53: KYTY_NI("ds_max_f64"); break; + case 0x60: KYTY_NI("ds_add_rtn_u64"); break; + case 0x61: KYTY_NI("ds_sub_rtn_u64"); break; + case 0x62: KYTY_NI("ds_rsub_rtn_u64"); break; + case 0x63: KYTY_NI("ds_inc_rtn_u64"); break; + case 0x64: KYTY_NI("ds_dec_rtn_u64"); break; + case 0x65: KYTY_NI("ds_min_rtn_i64"); break; + case 0x66: KYTY_NI("ds_max_rtn_i64"); break; + case 0x67: KYTY_NI("ds_min_rtn_u64"); break; + case 0x68: KYTY_NI("ds_max_rtn_u64"); break; + case 0x69: KYTY_NI("ds_and_rtn_b64"); break; + case 0x6A: KYTY_NI("ds_or_rtn_b64"); break; + case 0x6B: KYTY_NI("ds_xor_rtn_b64"); break; + case 0x6C: KYTY_NI("ds_mskor_rtn_b64"); break; + case 0x6D: KYTY_NI("ds_wrxchg_rtn_b64"); break; + case 0x6E: KYTY_NI("ds_wrxchg2_rtn_b64"); break; + case 0x6F: KYTY_NI("ds_wrxchg2st64_rtn_b64"); break; + case 0x70: KYTY_NI("ds_cmpst_rtn_b64"); break; + case 0x71: KYTY_NI("ds_cmpst_rtn_f64"); break; + case 0x72: KYTY_NI("ds_min_rtn_f64"); break; + case 0x73: KYTY_NI("ds_max_rtn_f64"); break; + case 0x76: KYTY_NI("ds_read_b64"); break; + case 0x77: KYTY_NI("ds_read2_b64"); break; + case 0x78: KYTY_NI("ds_read2st64_b64"); break; + case 0x7E: KYTY_NI("ds_condxchg32_rtn_b64"); break; + case 0x80: KYTY_NI("ds_add_src2_u32"); break; + case 0x81: KYTY_NI("ds_sub_src2_u32"); break; + case 0x82: KYTY_NI("ds_rsub_src2_u32"); break; + case 0x83: KYTY_NI("ds_inc_src2_u32"); break; + case 0x84: KYTY_NI("ds_dec_src2_u32"); break; + case 0x85: KYTY_NI("ds_min_src2_i32"); break; + case 0x86: KYTY_NI("ds_max_src2_i32"); break; + case 0x87: KYTY_NI("ds_min_src2_u32"); break; + case 0x88: KYTY_NI("ds_max_src2_u32"); break; + case 0x89: KYTY_NI("ds_and_src2_b32"); break; + case 0x8A: KYTY_NI("ds_or_src2_b32"); break; + case 0x8B: KYTY_NI("ds_xor_src2_b32"); break; + case 0x8D: KYTY_NI("ds_write_src2_b32"); break; + case 0x92: KYTY_NI("ds_min_src2_f32"); break; + case 0x93: KYTY_NI("ds_max_src2_f32"); break; + case 0xC0: KYTY_NI("ds_add_src2_u64"); break; + case 0xC1: KYTY_NI("ds_sub_src2_u64"); break; + case 0xC2: KYTY_NI("ds_rsub_src2_u64"); break; + case 0xC3: KYTY_NI("ds_inc_src2_u64"); break; + case 0xC4: KYTY_NI("ds_dec_src2_u64"); break; + case 0xC5: KYTY_NI("ds_min_src2_i64"); break; + case 0xC6: KYTY_NI("ds_max_src2_i64"); break; + case 0xC7: KYTY_NI("ds_min_src2_u64"); break; + case 0xC8: KYTY_NI("ds_max_src2_u64"); break; + case 0xC9: KYTY_NI("ds_and_src2_b64"); break; + case 0xCA: KYTY_NI("ds_or_src2_b64"); break; + case 0xCB: KYTY_NI("ds_xor_src2_b64"); break; + case 0xCD: KYTY_NI("ds_write_src2_b64"); break; + case 0xD2: KYTY_NI("ds_min_src2_f64"); break; + case 0xD3: KYTY_NI("ds_max_src2_f64"); break; + case 0xDE: KYTY_NI("ds_write_b96"); break; + case 0xDF: KYTY_NI("ds_write_b128"); break; + case 0xFD: KYTY_NI("ds_condxchg32_rtn_b128"); break; + case 0xFE: KYTY_NI("ds_read_b96"); break; + case 0xFF: KYTY_NI("ds_read_b128"); break; + + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_mimg) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("mimg"); + + uint32_t slc = (buffer[0] >> 25u) & 0x1u; + uint32_t opcode = (buffer[0] >> 18u) & 0x7fu; + uint32_t lwe = (buffer[0] >> 17u) & 0x1u; + uint32_t tff = (buffer[0] >> 16u) & 0x1u; + uint32_t r128 = (buffer[0] >> 15u) & 0x1u; + uint32_t da = (buffer[0] >> 14u) & 0x1u; + uint32_t glc = (buffer[0] >> 13u) & 0x1u; + uint32_t unrm = (buffer[0] >> 12u) & 0x1u; + uint32_t dmask = (buffer[0] >> 8u) & 0xfu; + + uint32_t ssamp = (buffer[1] >> 21u) & 0x1fu; // S# + uint32_t srsrc = (buffer[1] >> 16u) & 0x1fu; // T# + uint32_t vdata = (buffer[1] >> 8u) & 0xffu; + uint32_t vaddr = (buffer[1] >> 0u) & 0xffu; + + EXIT_NOT_IMPLEMENTED(da == 1); + EXIT_NOT_IMPLEMENTED(r128 == 1); + EXIT_NOT_IMPLEMENTED(tff == 1); + EXIT_NOT_IMPLEMENTED(lwe == 1); + EXIT_NOT_IMPLEMENTED(glc == 1); + EXIT_NOT_IMPLEMENTED(slc == 1); + EXIT_NOT_IMPLEMENTED(unrm == 1); + // EXIT_NOT_IMPLEMENTED(dmask != 0xf && dmask != 0x7); + + uint32_t size = 2; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(vdata + 256); + inst.src_num = 3; + inst.src[0] = operand_parse(vaddr + 256); + inst.src[1] = operand_parse(srsrc * 4); + inst.src[2] = operand_parse(ssamp * 4); + + switch (opcode) + { + case 0x00: + inst.type = ShaderInstructionType::ImageLoad; + inst.src[0].size = 3; + inst.src[1].size = 8; + inst.src_num = 2; + if (dmask == 0xf) + { + inst.format = ShaderInstructionFormat::Vdata4Vaddr3StDmaskF; + inst.dst.size = 4; + } + break; + case 0x01: KYTY_NI("image_load_mip"); break; + case 0x02: KYTY_NI("image_load_pck"); break; + case 0x03: KYTY_NI("image_load_pck_sgn"); break; + case 0x04: KYTY_NI("image_load_mip_pck"); break; + case 0x05: KYTY_NI("image_load_mip_pck_sgn"); break; + case 0x08: + inst.type = ShaderInstructionType::ImageStore; + inst.src[0].size = 3; + inst.src[1].size = 8; + inst.src_num = 2; + if (dmask == 0xf) + { + inst.format = ShaderInstructionFormat::Vdata4Vaddr3StDmaskF; + inst.dst.size = 4; + } + break; + case 0x09: + inst.type = ShaderInstructionType::ImageStoreMip; + inst.src[0].size = 4; + inst.src[1].size = 8; + inst.src_num = 2; + if (dmask == 0xf) + { + inst.format = ShaderInstructionFormat::Vdata4Vaddr4StDmaskF; + inst.dst.size = 4; + } + break; + case 0x0A: KYTY_NI("image_store_pck"); break; + case 0x0B: KYTY_NI("image_store_mip_pck"); break; + case 0x0E: KYTY_NI("image_get_resinfo"); break; + case 0x0F: KYTY_NI("image_atomic_swap"); break; + case 0x10: KYTY_NI("image_atomic_cmpswap"); break; + case 0x11: KYTY_NI("image_atomic_add"); break; + case 0x12: KYTY_NI("image_atomic_sub"); break; + case 0x13: KYTY_NI("image_atomic_rsub"); break; + case 0x14: KYTY_NI("image_atomic_smin"); break; + case 0x15: KYTY_NI("image_atomic_umin"); break; + case 0x16: KYTY_NI("image_atomic_smax"); break; + case 0x17: KYTY_NI("image_atomic_umax"); break; + case 0x18: KYTY_NI("image_atomic_and"); break; + case 0x19: KYTY_NI("image_atomic_or"); break; + case 0x1A: KYTY_NI("image_atomic_xor"); break; + case 0x1B: KYTY_NI("image_atomic_inc"); break; + case 0x1C: KYTY_NI("image_atomic_dec"); break; + case 0x1D: KYTY_NI("image_atomic_fcmpswap"); break; + case 0x1E: KYTY_NI("image_atomic_fmin"); break; + case 0x1F: KYTY_NI("image_atomic_fmax"); break; + case 0x20: + inst.type = ShaderInstructionType::ImageSample; + inst.src[0].size = 3; + inst.src[1].size = 8; + inst.src[2].size = 4; + switch (dmask) + { + case 0x1: + { + inst.format = ShaderInstructionFormat::Vdata1Vaddr3StSsDmask1; + inst.dst.size = 1; + break; + } + case 0x3: + { + inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask3; + inst.dst.size = 2; + break; + } + case 0x5: + { + inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask5; + inst.dst.size = 2; + break; + } + case 0x7: + { + inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7; + inst.dst.size = 3; + break; + } + case 0x8: + { + inst.format = ShaderInstructionFormat::Vdata1Vaddr3StSsDmask8; + inst.dst.size = 1; + break; + } + case 0x9: + { + inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask9; + inst.dst.size = 2; + break; + } + case 0xf: + { + inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF; + inst.dst.size = 4; + break; + } + default:; + } + break; + case 0x21: KYTY_NI("image_sample_cl"); break; + case 0x22: KYTY_NI("image_sample_d"); break; + case 0x23: KYTY_NI("image_sample_d_cl"); break; + case 0x24: KYTY_NI("image_sample_l"); break; + case 0x25: KYTY_NI("image_sample_b"); break; + case 0x26: KYTY_NI("image_sample_b_cl"); break; + case 0x27: + inst.type = ShaderInstructionType::ImageSampleLz; + inst.src[0].size = 3; + inst.src[1].size = 8; + inst.src[2].size = 4; + switch (dmask) // NOLINT + { + case 0x7: + { + inst.format = ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7; + inst.dst.size = 3; + break; + } + default:; + } + break; + case 0x28: KYTY_NI("image_sample_c"); break; + case 0x29: KYTY_NI("image_sample_c_cl"); break; + case 0x2A: KYTY_NI("image_sample_c_d"); break; + case 0x2B: KYTY_NI("image_sample_c_d_cl"); break; + case 0x2C: KYTY_NI("image_sample_c_l"); break; + case 0x2D: KYTY_NI("image_sample_c_b"); break; + case 0x2E: KYTY_NI("image_sample_c_b_cl"); break; + case 0x2F: KYTY_NI("image_sample_c_lz"); break; + case 0x30: KYTY_NI("image_sample_o"); break; + case 0x31: KYTY_NI("image_sample_cl_o"); break; + case 0x32: KYTY_NI("image_sample_d_o"); break; + case 0x33: KYTY_NI("image_sample_d_cl_o"); break; + case 0x34: KYTY_NI("image_sample_l_o"); break; + case 0x35: KYTY_NI("image_sample_b_o"); break; + case 0x36: KYTY_NI("image_sample_b_cl_o"); break; + case 0x37: + inst.type = ShaderInstructionType::ImageSampleLzO; + inst.src[0].size = 4; + inst.src[1].size = 8; + inst.src[2].size = 4; + switch (dmask) // NOLINT + { + case 0x7: + { + inst.format = ShaderInstructionFormat::Vdata3Vaddr4StSsDmask7; + inst.dst.size = 3; + break; + } + default:; + } + break; + case 0x38: KYTY_NI("image_sample_c_o"); break; + case 0x39: KYTY_NI("image_sample_c_cl_o"); break; + case 0x3A: KYTY_NI("image_sample_c_d_o"); break; + case 0x3B: KYTY_NI("image_sample_c_d_cl_o"); break; + case 0x3C: KYTY_NI("image_sample_c_l_o"); break; + case 0x3D: KYTY_NI("image_sample_c_b_o"); break; + case 0x3E: KYTY_NI("image_sample_c_b_cl_o"); break; + case 0x3F: KYTY_NI("image_sample_c_lz_o"); break; + case 0x40: KYTY_NI("image_gather4"); break; + case 0x41: KYTY_NI("image_gather4_cl"); break; + case 0x44: KYTY_NI("image_gather4_l"); break; + case 0x45: KYTY_NI("image_gather4_b"); break; + case 0x46: KYTY_NI("image_gather4_b_cl"); break; + case 0x47: KYTY_NI("image_gather4_lz"); break; + case 0x48: KYTY_NI("image_gather4_c"); break; + case 0x49: KYTY_NI("image_gather4_c_cl"); break; + case 0x4C: KYTY_NI("image_gather4_c_l"); break; + case 0x4D: KYTY_NI("image_gather4_c_b"); break; + case 0x4E: KYTY_NI("image_gather4_c_b_cl"); break; + case 0x4F: KYTY_NI("image_gather4_c_lz"); break; + case 0x50: KYTY_NI("image_gather4_o"); break; + case 0x51: KYTY_NI("image_gather4_cl_o"); break; + case 0x54: KYTY_NI("image_gather4_l_o"); break; + case 0x55: KYTY_NI("image_gather4_b_o"); break; + case 0x56: KYTY_NI("image_gather4_b_cl_o"); break; + case 0x57: KYTY_NI("image_gather4_lz_o"); break; + case 0x58: KYTY_NI("image_gather4_c_o"); break; + case 0x59: KYTY_NI("image_gather4_c_cl_o"); break; + case 0x5C: KYTY_NI("image_gather4_c_l_o"); break; + case 0x5D: KYTY_NI("image_gather4_c_b_o"); break; + case 0x5E: KYTY_NI("image_gather4_c_b_cl_o"); break; + case 0x5F: KYTY_NI("image_gather4_c_lz_o"); break; + case 0x60: KYTY_NI("image_get_lod"); break; + case 0x68: KYTY_NI("image_sample_cd"); break; + case 0x69: KYTY_NI("image_sample_cd_cl"); break; + case 0x6A: KYTY_NI("image_sample_c_cd"); break; + case 0x6B: KYTY_NI("image_sample_c_cd_cl"); break; + case 0x6C: KYTY_NI("image_sample_cd_o"); break; + case 0x6D: KYTY_NI("image_sample_cd_cl_o"); break; + case 0x6E: KYTY_NI("image_sample_c_cd_o"); break; + case 0x6F: KYTY_NI("image_sample_c_cd_cl_o"); break; + case 0x7E: KYTY_NI("image_rsrc256"); break; + case 0x7F: KYTY_NI("image_sampler"); break; + case 0xA0: KYTY_NI("image_sample_a"); break; + case 0xA1: KYTY_NI("image_sample_cl_a"); break; + case 0xA5: KYTY_NI("image_sample_b_a"); break; + case 0xA6: KYTY_NI("image_sample_b_cl_a"); break; + case 0xA8: KYTY_NI("image_sample_c_a"); break; + case 0xA9: KYTY_NI("image_sample_c_cl_a"); break; + case 0xAD: KYTY_NI("image_sample_c_b_a"); break; + case 0xAE: KYTY_NI("image_sample_c_b_cl_a"); break; + case 0xB0: KYTY_NI("image_sample_o_a"); break; + case 0xB1: KYTY_NI("image_sample_cl_o_a"); break; + case 0xB5: KYTY_NI("image_sample_b_o_a"); break; + case 0xB6: KYTY_NI("image_sample_b_cl_o_a"); break; + case 0xB8: KYTY_NI("image_sample_c_o_a"); break; + case 0xB9: KYTY_NI("image_sample_c_cl_o_a"); break; + case 0xBD: KYTY_NI("image_sample_c_b_o_a"); break; + case 0xBE: KYTY_NI("image_sample_c_b_cl_o_a"); break; + case 0xC0: KYTY_NI("image_gather4_a"); break; + case 0xC1: KYTY_NI("image_gather4_cl_a"); break; + case 0xC5: KYTY_NI("image_gather4_b_a"); break; + case 0xC6: KYTY_NI("image_gather4_b_cl_a"); break; + case 0xC8: KYTY_NI("image_gather4_c_a"); break; + case 0xC9: KYTY_NI("image_gather4_c_cl_a"); break; + case 0xCD: KYTY_NI("image_gather4_c_b_a"); break; + case 0xCE: KYTY_NI("image_gather4_c_b_cl_a"); break; + case 0xD0: KYTY_NI("image_gather4_o_a"); break; + case 0xD1: KYTY_NI("image_gather4_cl_o_a"); break; + case 0xD5: KYTY_NI("image_gather4_b_o_a"); break; + case 0xD6: KYTY_NI("image_gather4_b_cl_o_a"); break; + case 0xD8: KYTY_NI("image_gather4_c_o_a"); break; + case 0xD9: KYTY_NI("image_gather4_c_cl_o_a"); break; + case 0xDD: KYTY_NI("image_gather4_c_b_o_a"); break; + case 0xDE: KYTY_NI("image_gather4_c_b_cl_o_a"); break; + + default: KYTY_UNKNOWN_OP(); + } + + if (inst.format == ShaderInstructionFormat::Unknown) + { + printf("%s", dst->DbgDump().c_str()); + EXIT("unknown mimg format for opcode: 0x%02" PRIx32 " at addr 0x%08" PRIx32 ", dmask: 0x%" PRIx32 "\n", opcode, pc, dmask); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +// NOLINTNEXTLINE(readability-function-cognitive-complexity) +KYTY_SHADER_PARSER(shader_parse_mtbuf) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("mtbuf"); + + uint32_t opcode = (buffer[0] >> 16u) & 0x7u; + uint32_t dfmt = (buffer[0] >> 19u) & 0xfu; + uint32_t nfmt = (buffer[0] >> 23u) & 0x7u; + uint32_t glc = (buffer[0] >> 14u) & 0x1u; + uint32_t idxen = (buffer[0] >> 13u) & 0x1u; + uint32_t offen = (buffer[0] >> 12u) & 0x1u; + uint32_t offset = (buffer[0] >> 0u) & 0xfffu; + + uint32_t soffset = (buffer[1] >> 24u) & 0xffu; + uint32_t tfe = (buffer[1] >> 23u) & 0x1u; + uint32_t slc = (buffer[1] >> 22u) & 0x1u; + uint32_t srsrc = (buffer[1] >> 16u) & 0x1fu; + uint32_t vdata = (buffer[1] >> 8u) & 0xffu; + uint32_t vaddr = (buffer[1] >> 0u) & 0xffu; + + EXIT_NOT_IMPLEMENTED(idxen == 0); + // EXIT_NOT_IMPLEMENTED(offen == 1); + EXIT_NOT_IMPLEMENTED(offset != 0); + EXIT_NOT_IMPLEMENTED(glc == 1); + EXIT_NOT_IMPLEMENTED(slc == 1); + EXIT_NOT_IMPLEMENTED(tfe == 1); + // EXIT_NOT_IMPLEMENTED(dfmt != 14); + // EXIT_NOT_IMPLEMENTED(nfmt != 7); + + if ((dfmt != 14 && dfmt != 4) || nfmt != 7) + { + EXIT("unknown format: dfmt = %d, nfmt = %d at addr 0x%08" PRIx32 " (hash0 = 0x%08" PRIx32 ", crc32 = 0x%08" PRIx32 ")\n", dfmt, + nfmt, pc, dst->GetHash0(), dst->GetCrc32()); + } + + uint32_t size = 2; + + ShaderInstruction inst; + inst.pc = pc; + inst.dst = operand_parse(vdata + 256); + inst.src_num = 3; + inst.src[0] = operand_parse(vaddr + 256); + inst.src[1] = operand_parse(srsrc * 4); + inst.src[2] = operand_parse(soffset); + + if (inst.src[2].type == ShaderOperandType::LiteralConstant) + { + inst.src[2].constant.u = buffer[size]; + size++; + } + + inst.src[1].size = 4; + + switch (opcode) + { + case 0x00: + inst.type = ShaderInstructionType::TBufferLoadFormatX; + inst.format = ShaderInstructionFormat::Vdata1VaddrSvSoffsIdxenFloat1; + EXIT_NOT_IMPLEMENTED(offen == 1); + EXIT_NOT_IMPLEMENTED(!(dfmt == 4 && nfmt == 7)); + break; + case 0x01: KYTY_NI("tbuffer_load_format_xy"); break; + case 0x02: KYTY_NI("tbuffer_load_format_xyz"); break; + case 0x03: + inst.type = ShaderInstructionType::TBufferLoadFormatXyzw; + inst.format = (offen == 1 ? ShaderInstructionFormat::Vdata4Vaddr2SvSoffsOffenIdxenFloat4 + : ShaderInstructionFormat::Vdata4VaddrSvSoffsIdxenFloat4); + inst.src[0].size += static_cast(offen); + inst.dst.size = 4; + EXIT_NOT_IMPLEMENTED(!(dfmt == 14 && nfmt == 7)); + break; + case 0x04: KYTY_NI("tbuffer_store_format_x"); break; + case 0x05: KYTY_NI("tbuffer_store_format_xy"); break; + case 0x06: KYTY_NI("tbuffer_store_format_xyz"); break; + case 0x07: KYTY_NI("tbuffer_store_format_xyzw"); break; + case 0x08: KYTY_NI("tbuffer_load_format_d16_x"); break; + case 0x09: KYTY_NI("tbuffer_load_format_d16_xy"); break; + case 0x0A: KYTY_NI("tbuffer_load_format_d16_xyz"); break; + case 0x0B: KYTY_NI("tbuffer_load_format_d16_xyzw"); break; + case 0x0C: KYTY_NI("tbuffer_store_format_d16_x"); break; + case 0x0D: KYTY_NI("tbuffer_store_format_d16_xy"); break; + case 0x0E: KYTY_NI("tbuffer_store_format_d16_xyz"); break; + case 0x0F: KYTY_NI("tbuffer_store_format_d16_xyzw"); break; + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return size; +} + +KYTY_SHADER_PARSER(shader_parse_vintrp) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer == nullptr || buffer < src); + + KYTY_TYPE_STR("vintrp"); + + uint32_t opcode = (buffer[0] >> 16u) & 0x3u; + uint32_t vdst = (buffer[0] >> 18u) & 0xffu; + uint32_t attr = (buffer[0] >> 10u) & 0x3fu; + uint32_t chan = (buffer[0] >> 8u) & 0x3u; + uint32_t vsrc = (buffer[0] >> 0u) & 0xffu; + + ShaderInstruction inst; + inst.pc = pc; + inst.src[0] = operand_parse(vsrc + 256); + inst.dst = operand_parse(vdst + 256); + inst.src[1].type = ShaderOperandType::IntegerInlineConstant; + inst.src[1].constant.u = attr; + inst.src[2].type = ShaderOperandType::IntegerInlineConstant; + inst.src[2].constant.u = chan; + inst.src_num = 3; + + switch (opcode) + { + case 0x00: + inst.type = ShaderInstructionType::VInterpP1F32; + inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; + break; + case 0x01: + inst.type = ShaderInstructionType::VInterpP2F32; + inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; + break; + case 0x02: KYTY_NI("v_interp_mov_f32"); break; + default: KYTY_UNKNOWN_OP(); + } + + dst->GetInstructions().Add(inst); + + return 1; +} + +KYTY_SHADER_PARSER(shader_parse) +{ + EXIT_IF(dst == nullptr); + EXIT_IF(src == nullptr); + EXIT_IF(buffer != nullptr); + + auto type = dst->GetType(); + + dst->GetInstructions().Clear(); + dst->GetLabels().Clear(); + dst->GetIndirectLabels().Clear(); + + const auto* ptr = src + pc / 4; + for (;;) + { + auto instruction = ptr[0]; + auto pc = 4 * static_cast(ptr - src); + + if ((instruction & 0x80000000u) == 0x00000000) + { + ptr += shader_parse_vop2(pc, src, ptr, dst, next_gen); + } else if ((instruction & 0xF8000000u) == 0xC0000000) + { + EXIT_NOT_IMPLEMENTED(next_gen); + ptr += shader_parse_smrd(pc, src, ptr, dst, next_gen); + } else if ((instruction & 0xC0000000u) == 0x80000000) + { + ptr += shader_parse_sop2(pc, src, ptr, dst, next_gen); + } else + { + switch (instruction >> 26u) + { + case 0x32: ptr += shader_parse_vintrp(pc, src, ptr, dst, next_gen); break; + case 0x34: + EXIT_NOT_IMPLEMENTED(next_gen); + ptr += shader_parse_vop3(pc, src, ptr, dst, next_gen); + break; + case 0x35: + EXIT_NOT_IMPLEMENTED(!next_gen); + ptr += shader_parse_vop3(pc, src, ptr, dst, next_gen); + break; + case 0x36: ptr += shader_parse_ds(pc, src, ptr, dst, next_gen); break; + case 0x38: ptr += shader_parse_mubuf(pc, src, ptr, dst, next_gen); break; + case 0x3a: ptr += shader_parse_mtbuf(pc, src, ptr, dst, next_gen); break; + case 0x3c: ptr += shader_parse_mimg(pc, src, ptr, dst, next_gen); break; + case 0x3d: + EXIT_NOT_IMPLEMENTED(!next_gen); + ptr += shader_parse_smem(pc, src, ptr, dst, next_gen); + break; + case 0x3e: ptr += shader_parse_exp(pc, src, ptr, dst, next_gen); break; + default: + { + printf("%s", dst->DbgDump().c_str()); + EXIT("unknown code 0x%08" PRIx32 " at addr 0x%08" PRIx32 "\n", ptr[0], pc); + } + } + } + + if ((instruction == 0xBF810000 && (type == ShaderType::Vertex || type == ShaderType::Pixel || type == ShaderType::Compute) && + !dst->GetLabels().Contains(4 * static_cast(ptr - src), [](auto label, auto pc) { return label.GetDst() == pc; })) || + (instruction == 0xBE802000 && type == ShaderType::Fetch)) + { + break; + } + } + + return ptr - src; +} + +void ShaderParse(const uint32_t* src, ShaderCode* dst) +{ + shader_parse(0, src, nullptr, dst, Config::IsNextGen()); +} + +} // namespace Kyty::Libs::Graphics + +#endif // KYTY_EMU_ENABLED diff --git a/source/emulator/src/Graphics/ShaderSpirv.cpp b/source/emulator/src/Graphics/ShaderSpirv.cpp index cd198a5..61f61a8 100644 --- a/source/emulator/src/Graphics/ShaderSpirv.cpp +++ b/source/emulator/src/Graphics/ShaderSpirv.cpp @@ -3,8 +3,9 @@ #include "Kyty/Core/ArrayWrapper.h" #include "Kyty/Core/Common.h" #include "Kyty/Core/DbgAssert.h" +#include "Kyty/Core/Hashmap.h" #include "Kyty/Core/MagicEnum.h" -#include "Kyty/Core/String.h" +#include "Kyty/Core/String8.h" #include "Kyty/Core/Vector.h" #include "Emulator/Config.h" @@ -13,13 +14,26 @@ #ifdef KYTY_EMU_ENABLED #define KYTY_RECOMPILER_ARGS \ - [[maybe_unused]] uint32_t index, [[maybe_unused]] const ShaderCode &code, [[maybe_unused]] String *dst_source, \ - [[maybe_unused]] Spirv *spirv, [[maybe_unused]] const char32_t **param, [[maybe_unused]] SccCheck scc_check + [[maybe_unused]] uint32_t index, [[maybe_unused]] const ShaderCode &code, [[maybe_unused]] String8 *dst_source, \ + [[maybe_unused]] Spirv *spirv, [[maybe_unused]] const char *const *param, [[maybe_unused]] SccCheck scc_check #define KYTY_RECOMPILER_FUNC(f) static bool f(KYTY_RECOMPILER_ARGS) +namespace Kyty::Core { + +KYTY_HASH_DEFINE_CALC(Kyty::Libs::Graphics::ShaderInstructionTypeFormat) +{ + return hash32(static_cast(key->type)) ^ hash64(static_cast(key->format)); +} + +KYTY_HASH_DEFINE_EQUALS(Kyty::Libs::Graphics::ShaderInstructionTypeFormat) +{ + return key_a->type == key_b->type && key_a->format == key_b->format; +} +} // namespace Kyty::Core + namespace Kyty::Libs::Graphics { -constexpr char32_t FUNC_FETCH_4[] = UR"( +constexpr char FUNC_FETCH_4[] = R"( ; Function fetch_f1_f1_f1_f1_vf4_ ; void fetch(out float p1, out float p2, out float p3, out float p4, in vec4 attr) ; { @@ -51,7 +65,7 @@ constexpr char32_t FUNC_FETCH_4[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_FETCH_3[] = UR"( +constexpr char FUNC_FETCH_3[] = R"( ; Function fetch_f1_f1_f1_vf3_ ; void fetch(out float p1, out float p2, out float p3, in vec3 attr) ; { @@ -78,7 +92,7 @@ constexpr char32_t FUNC_FETCH_3[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_FETCH_2[] = UR"( +constexpr char FUNC_FETCH_2[] = R"( ; Function fetch_f1_f1_vf2_ ; void fetch(out float p1, out float p2, in vec2 attr) ; { @@ -100,7 +114,7 @@ constexpr char32_t FUNC_FETCH_2[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_FETCH_1[] = UR"( +constexpr char FUNC_FETCH_1[] = R"( ; Function fetch_f1_f1_ ; void fetch(out float p1, in float attr) ; { @@ -116,7 +130,7 @@ constexpr char32_t FUNC_FETCH_1[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_ABS_DIFF[] = UR"( +constexpr char FUNC_ABS_DIFF[] = R"( ; uint abs_diff(uint u1, uint u2) ; { ; return max(u1,u2)-min(u1,u2); @@ -132,7 +146,7 @@ constexpr char32_t FUNC_ABS_DIFF[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_WQM[] = UR"( +constexpr char FUNC_WQM[] = R"( ; uint w(uint u, uint s, uint m) ; { ; return ((u >> s) & 0xF) != 0 ? m : 0; @@ -150,7 +164,7 @@ constexpr char32_t FUNC_WQM[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_ADDC[] = UR"( +constexpr char FUNC_ADDC[] = R"( ; uvec2 addc(uint a, uint b, uint c) ; { ; uint cc = 0; @@ -183,7 +197,32 @@ constexpr char32_t FUNC_ADDC[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_MIPMAP[] = UR"( +constexpr char FUNC_LSHL_ADD[] = R"( + ; uvec2 lshl_add(uint a, uint b, uint n) + ; { + ; uint cc = 0; + ; uint sum = uaddCarry(a << n, b, cc); + ; return uvec2(sum, ((a >> (32-n)) !=0) ? 1u : cc); + ; } + %lshl_add = OpFunction %v2uint None %function_u2_u_u_u + %ladd_25 = OpFunctionParameter %uint + %ladd_26 = OpFunctionParameter %uint + %ladd_27 = OpFunctionParameter %uint + %ladd_29 = OpLabel + %ladd_124 = OpShiftLeftLogical %uint %ladd_25 %ladd_27 + %ladd_127 = OpIAddCarry %ResTypeU %ladd_124 %ladd_26 + %ladd_128 = OpCompositeExtract %uint %ladd_127 1 + %ladd_129 = OpCompositeExtract %uint %ladd_127 0 + %ladd_133 = OpISub %uint %uint_32 %ladd_27 + %ladd_134 = OpShiftRightLogical %uint %ladd_25 %ladd_133 + %ladd_135 = OpINotEqual %bool %ladd_134 %uint_0 + %ladd_138 = OpSelect %uint %ladd_135 %uint_1 %ladd_128 + %ladd_139 = OpCompositeConstruct %v2uint %ladd_129 %ladd_138 + OpReturnValue %ladd_139 + OpFunctionEnd +)"; + +constexpr char FUNC_MIPMAP[] = R"( ; uvec2 mipmap(uint lod, uint width, uint height) ; { ; uint mip_width = width; @@ -262,7 +301,7 @@ constexpr char32_t FUNC_MIPMAP[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_ORDERED[] = UR"( +constexpr char FUNC_ORDERED[] = R"( ; bool unordered(float f1, float f2) ; { ; return (isnan(f1) || isnan(f2)); @@ -296,7 +335,7 @@ constexpr char32_t FUNC_ORDERED[] = UR"( OpFunctionEnd )"; -constexpr char32_t FUNC_MUL_EXTENDED[] = UR"( +constexpr char FUNC_MUL_EXTENDED[] = R"( ; uint mul_lo_uint(uint u1, uint u2) ; { ; uint r1, r2; @@ -355,7 +394,179 @@ constexpr char32_t FUNC_MUL_EXTENDED[] = UR"( OpFunctionEnd )"; -constexpr char32_t BUFFER_LOAD_FLOAT1[] = UR"( +constexpr char FUNC_SHIFT_RIGHT[] = R"( + ; void shift_r(out uint d0, out uint d1, in uint s0, in uint s1, in uint n) + ; { + ; d0 = n < 32 ? (s0 >> n) | (n != 0 ? (s1 << (32 - n)) : 0) : (n < 64 ? s1 >> (n - 32) : 0) ; + ; d1 = n < 32 ? s1 >> n : 0; + ; } +%shift_right = OpFunction %void None %function_shift + %shr_9 = OpFunctionParameter %_ptr_Function_uint + %shr_10 = OpFunctionParameter %_ptr_Function_uint + %shr_11 = OpFunctionParameter %_ptr_Function_uint + %shr_12 = OpFunctionParameter %_ptr_Function_uint + %shr_13 = OpFunctionParameter %_ptr_Function_uint + %shr_15 = OpLabel + %shr_27 = OpVariable %_ptr_Function_uint Function + %shr_36 = OpVariable %_ptr_Function_uint Function + %shr_50 = OpVariable %_ptr_Function_uint Function + %shr_62 = OpVariable %_ptr_Function_uint Function + %shr_23 = OpLoad %uint %shr_13 + %shr_26 = OpULessThan %bool %shr_23 %uint_32 + OpSelectionMerge %shr_29 None + OpBranchConditional %shr_26 %shr_28 %shr_46 + %shr_28 = OpLabel + %shr_30 = OpLoad %uint %shr_11 + %shr_31 = OpLoad %uint %shr_13 + %shr_32 = OpShiftRightLogical %uint %shr_30 %shr_31 + %shr_33 = OpLoad %uint %shr_13 + %shr_35 = OpINotEqual %bool %shr_33 %uint_0 + OpSelectionMerge %shr_38 None + OpBranchConditional %shr_35 %shr_37 %shr_43 + %shr_37 = OpLabel + %shr_39 = OpLoad %uint %shr_12 + %shr_40 = OpLoad %uint %shr_13 + %shr_41 = OpISub %uint %uint_32 %shr_40 + %shr_42 = OpShiftLeftLogical %uint %shr_39 %shr_41 + OpStore %shr_36 %shr_42 + OpBranch %shr_38 + %shr_43 = OpLabel + OpStore %shr_36 %uint_0 + OpBranch %shr_38 + %shr_38 = OpLabel + %shr_331 = OpPhi %uint %shr_42 %shr_37 %uint_0 %shr_43 + %shr_45 = OpBitwiseOr %uint %shr_32 %shr_331 + OpStore %shr_27 %shr_45 + OpBranch %shr_29 + %shr_46 = OpLabel + %shr_47 = OpLoad %uint %shr_13 + %shr_49 = OpULessThan %bool %shr_47 %uint_64 + OpSelectionMerge %shr_52 None + OpBranchConditional %shr_49 %shr_51 %shr_57 + %shr_51 = OpLabel + %shr_53 = OpLoad %uint %shr_12 + %shr_54 = OpLoad %uint %shr_13 + %shr_55 = OpISub %uint %shr_54 %uint_32 + %shr_56 = OpShiftRightLogical %uint %shr_53 %shr_55 + OpStore %shr_50 %shr_56 + OpBranch %shr_52 + %shr_57 = OpLabel + OpStore %shr_50 %uint_0 + OpBranch %shr_52 + %shr_52 = OpLabel + %shr_330 = OpPhi %uint %shr_56 %shr_51 %uint_0 %shr_57 + OpStore %shr_27 %shr_330 + OpBranch %shr_29 + %shr_29 = OpLabel + %shr_332 = OpPhi %uint %shr_45 %shr_38 %shr_330 %shr_52 + OpStore %shr_9 %shr_332 + %shr_60 = OpLoad %uint %shr_13 + %shr_61 = OpULessThan %bool %shr_60 %uint_32 + OpSelectionMerge %shr_64 None + OpBranchConditional %shr_61 %shr_63 %shr_68 + %shr_63 = OpLabel + %shr_65 = OpLoad %uint %shr_12 + %shr_66 = OpLoad %uint %shr_13 + %shr_67 = OpShiftRightLogical %uint %shr_65 %shr_66 + OpStore %shr_62 %shr_67 + OpBranch %shr_64 + %shr_68 = OpLabel + OpStore %shr_62 %uint_0 + OpBranch %shr_64 + %shr_64 = OpLabel + %shr_333 = OpPhi %uint %shr_67 %shr_63 %uint_0 %shr_68 + OpStore %shr_10 %shr_333 + OpReturn + OpFunctionEnd +)"; + +constexpr char FUNC_SHIFT_LEFT[] = R"( + ; void shift_l(out uint d0, out uint d1, in uint s0, in uint s1, in uint n) + ; { + ; d0 = n < 32 ? s0 << n : 0; + ; d1 = n < 32 ? (n!=0 ? s0 >> (32-n) : 0) | (s1 << n) : (n < 64 ? s0 << (n-32) : 0); + ; } +%shift_left = OpFunction %void None %function_shift + %shl_16 = OpFunctionParameter %_ptr_Function_uint + %shl_17 = OpFunctionParameter %_ptr_Function_uint + %shl_18 = OpFunctionParameter %_ptr_Function_uint + %shl_19 = OpFunctionParameter %_ptr_Function_uint + %shl_20 = OpFunctionParameter %_ptr_Function_uint + %shl_22 = OpLabel + %shl_72 = OpVariable %_ptr_Function_uint Function + %shl_82 = OpVariable %_ptr_Function_uint Function + %shl_87 = OpVariable %_ptr_Function_uint Function + %shl_103 = OpVariable %_ptr_Function_uint Function + %shl_70 = OpLoad %uint %shl_20 + %shl_71 = OpULessThan %bool %shl_70 %uint_32 + OpSelectionMerge %shl_74 None + OpBranchConditional %shl_71 %shl_73 %shl_78 + %shl_73 = OpLabel + %shl_75 = OpLoad %uint %shl_18 + %shl_76 = OpLoad %uint %shl_20 + %shl_77 = OpShiftLeftLogical %uint %shl_75 %shl_76 + OpStore %shl_72 %shl_77 + OpBranch %shl_74 + %shl_78 = OpLabel + OpStore %shl_72 %uint_0 + OpBranch %shl_74 + %shl_74 = OpLabel + %shl_334 = OpPhi %uint %shl_77 %shl_73 %uint_0 %shl_78 + OpStore %shl_16 %shl_334 + %shl_80 = OpLoad %uint %shl_20 + %shl_81 = OpULessThan %bool %shl_80 %uint_32 + OpSelectionMerge %shl_84 None + OpBranchConditional %shl_81 %shl_83 %shl_100 + %shl_83 = OpLabel + %shl_85 = OpLoad %uint %shl_20 + %shl_86 = OpINotEqual %bool %shl_85 %uint_0 + OpSelectionMerge %shl_89 None + OpBranchConditional %shl_86 %shl_88 %shl_94 + %shl_88 = OpLabel + %shl_90 = OpLoad %uint %shl_18 + %shl_91 = OpLoad %uint %shl_20 + %shl_92 = OpISub %uint %uint_32 %shl_91 + %shl_93 = OpShiftRightLogical %uint %shl_90 %shl_92 + OpStore %shl_87 %shl_93 + OpBranch %shl_89 + %shl_94 = OpLabel + OpStore %shl_87 %uint_0 + OpBranch %shl_89 + %shl_89 = OpLabel + %shl_336 = OpPhi %uint %shl_93 %shl_88 %uint_0 %shl_94 + %shl_96 = OpLoad %uint %shl_19 + %shl_97 = OpLoad %uint %shl_20 + %shl_98 = OpShiftLeftLogical %uint %shl_96 %shl_97 + %shl_99 = OpBitwiseOr %uint %shl_336 %shl_98 + OpStore %shl_82 %shl_99 + OpBranch %shl_84 + %shl_100 = OpLabel + %shl_101 = OpLoad %uint %shl_20 + %shl_102 = OpULessThan %bool %shl_101 %uint_64 + OpSelectionMerge %shl_105 None + OpBranchConditional %shl_102 %shl_104 %shl_110 + %shl_104 = OpLabel + %shl_106 = OpLoad %uint %shl_18 + %shl_107 = OpLoad %uint %shl_20 + %shl_108 = OpISub %uint %shl_107 %uint_32 + %shl_109 = OpShiftLeftLogical %uint %shl_106 %shl_108 + OpStore %shl_103 %shl_109 + OpBranch %shl_105 + %shl_110 = OpLabel + OpStore %shl_103 %uint_0 + OpBranch %shl_105 + %shl_105 = OpLabel + %shl_335 = OpPhi %uint %shl_109 %shl_104 %uint_0 %shl_110 + OpStore %shl_82 %shl_335 + OpBranch %shl_84 + %shl_84 = OpLabel + %shl_337 = OpPhi %uint %shl_99 %shl_89 %shl_335 %shl_105 + OpStore %shl_17 %shl_337 + OpReturn + OpFunctionEnd +)"; + +constexpr char BUFFER_LOAD_FLOAT1[] = R"( ; void buffer_load_float1(out float p1, in int index, in int offset, in int stride, in int buffer_index) ; { ; int addr = (offset + index * stride)/4; @@ -384,7 +595,7 @@ constexpr char32_t BUFFER_LOAD_FLOAT1[] = UR"( OpFunctionEnd )"; -constexpr char32_t BUFFER_LOAD_FLOAT4[] = UR"( +constexpr char BUFFER_LOAD_FLOAT4[] = R"( ; Function buffer_load_float4 ;void buffer_load_float4(out float p1, out float p2, out float p3, out float p4, in int index, ; in int offset, in int stride, in int buffer_index) @@ -436,7 +647,7 @@ constexpr char32_t BUFFER_LOAD_FLOAT4[] = UR"( OpFunctionEnd )"; -constexpr char32_t BUFFER_STORE_FLOAT1[] = UR"( +constexpr char BUFFER_STORE_FLOAT1[] = R"( ; void buffer_store_float1(in float p1, in int index, in int offset, in int stride, in int buffer_index) ; { ; int addr = (offset + index * stride)/4; @@ -465,7 +676,7 @@ constexpr char32_t BUFFER_STORE_FLOAT1[] = UR"( OpFunctionEnd )"; -constexpr char32_t BUFFER_STORE_FLOAT2[] = UR"( +constexpr char BUFFER_STORE_FLOAT2[] = R"( ; void buffer_store_float2(in float p1, in float p2, in int index, in int offset, in int stride, in int buffer_index) ; { ; int addr = (offset + index * stride)/4; @@ -501,7 +712,7 @@ constexpr char32_t BUFFER_STORE_FLOAT2[] = UR"( OpFunctionEnd )"; -constexpr char32_t TBUFFER_LOAD_FORMAT_XYZW[] = UR"( +constexpr char TBUFFER_LOAD_FORMAT_XYZW[] = R"( ; Function tbuffer_load_format_xyzw ; void tbuffer_load_format_xyzw(out float p1, out float p2, out float p3, out float p4, ; in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt) @@ -558,7 +769,7 @@ constexpr char32_t TBUFFER_LOAD_FORMAT_XYZW[] = UR"( OpFunctionEnd )"; -constexpr char32_t TBUFFER_LOAD_FORMAT_X[] = UR"( +constexpr char TBUFFER_LOAD_FORMAT_X[] = R"( ; void tbuffer_load_format_x(out float p1, in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt) ; { ; if (dfmt_nfmt == 36 || dfmt_nfmt == 39) // dmft = 4, nfmt = 4 or 7 @@ -603,7 +814,7 @@ constexpr char32_t TBUFFER_LOAD_FORMAT_X[] = UR"( OpFunctionEnd )"; -constexpr char32_t TBUFFER_STORE_FORMAT_X[] = UR"( +constexpr char TBUFFER_STORE_FORMAT_X[] = R"( ; void tbuffer_store_format_x(in float p1, in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt) ; { ; if (dfmt_nfmt == 36 || dfmt_nfmt == 39) // dmft = 4, nfmt = 4 or 7 @@ -648,7 +859,7 @@ constexpr char32_t TBUFFER_STORE_FORMAT_X[] = UR"( OpFunctionEnd )"; -constexpr char32_t TBUFFER_STORE_FORMAT_XY[] = UR"( +constexpr char TBUFFER_STORE_FORMAT_XY[] = R"( ; void tbuffer_store_format_xy(in float p1, in float p2, in int index, in int offset, in int stride, in int buffer_index, in int dfmt_nfmt) ; { ; if (dfmt_nfmt == 92 || dfmt_nfmt == 95) // dmft = 11, nfmt = 4 or 7 @@ -698,7 +909,7 @@ constexpr char32_t TBUFFER_STORE_FORMAT_XY[] = UR"( OpFunctionEnd )"; -constexpr char32_t SBUFFER_LOAD_DWORD[] = UR"( +constexpr char SBUFFER_LOAD_DWORD[] = R"( ; void sbuffer_load_dword(out uint p1, in int offset, in int buffer_index) ; { ; int addr = offset/4; @@ -722,7 +933,7 @@ constexpr char32_t SBUFFER_LOAD_DWORD[] = UR"( OpFunctionEnd )"; -constexpr char32_t SBUFFER_LOAD_DWORD_2[] = UR"( +constexpr char SBUFFER_LOAD_DWORD_2[] = R"( ; void sbuffer_load_dwordx2(out uint p1, out uint p2, in int offset, in int buffer_index) ; { ; int addr = offset/4; @@ -754,7 +965,7 @@ constexpr char32_t SBUFFER_LOAD_DWORD_2[] = UR"( OpFunctionEnd )"; -constexpr char32_t SBUFFER_LOAD_DWORD_4[] = UR"( +constexpr char SBUFFER_LOAD_DWORD_4[] = R"( ; void sbuffer_load_dwordx4(out uint p1, out uint p2, out uint p3, out uint p4, in int offset, in int buffer_index) ; { ; int addr = offset/4; @@ -802,7 +1013,7 @@ constexpr char32_t SBUFFER_LOAD_DWORD_4[] = UR"( OpFunctionEnd )"; -constexpr char32_t SBUFFER_LOAD_DWORD_8[] = UR"( +constexpr char SBUFFER_LOAD_DWORD_8[] = R"( ; void sbuffer_load_dwordx8(out uint p1, out uint p2, out uint p3, out uint p4, ; out uint p5, out uint p6, out uint p7, out uint p8, in int offset, in int buffer_index) ; { @@ -883,7 +1094,7 @@ constexpr char32_t SBUFFER_LOAD_DWORD_8[] = UR"( OpFunctionEnd )"; -constexpr char32_t SBUFFER_LOAD_DWORD_16[] = UR"( +constexpr char SBUFFER_LOAD_DWORD_16[] = R"( ; void sbuffer_load_dwordx16(out uint p1, out uint p2, out uint p3, out uint p4, ; out uint p5, out uint p6, out uint p7, out uint p8, ; out uint p9, out uint p10, out uint p11, out uint p12, @@ -1030,7 +1241,7 @@ constexpr char32_t SBUFFER_LOAD_DWORD_16[] = UR"( OpFunctionEnd )"; -constexpr char32_t EMBEDDED_SHADER_VS_0[] = UR"( +constexpr char EMBEDDED_SHADER_VS_0[] = R"( ; #version 450 ; ; void main() @@ -1113,7 +1324,7 @@ constexpr char32_t EMBEDDED_SHADER_VS_0[] = UR"( OpFunctionEnd )"; -constexpr char32_t EMBEDDED_SHADER_PS_0[] = UR"( +constexpr char EMBEDDED_SHADER_PS_0[] = R"( ; #version 450 ; ; layout(location = 0) out vec4 outColor; @@ -1149,7 +1360,7 @@ constexpr char32_t EMBEDDED_SHADER_PS_0[] = UR"( OpFunctionEnd )"; -constexpr char32_t EXECZ[] = UR"( +constexpr char EXECZ[] = R"( %z191_ = OpLoad %uint %exec_lo %z192_ = OpIEqual %bool %z191_ %uint_0 %z193_ = OpLoad %uint %exec_hi @@ -1159,14 +1370,14 @@ constexpr char32_t EXECZ[] = UR"( OpStore %execz %z196_ )"; -constexpr char32_t SCC_NZ_1[] = UR"( +constexpr char SCC_NZ_1[] = R"( %snz1_118_ = OpLoad %uint % %snz1_121_ = OpINotEqual %bool %snz1_118_ %uint_0 %snz1_123_ = OpSelect %uint %snz1_121_ %uint_1 %uint_0 OpStore %scc %snz1_123_ )"; -constexpr char32_t SCC_NZ_2[] = UR"( +constexpr char SCC_NZ_2[] = R"( %snz2_124_ = OpLoad %uint % %snz2_125_ = OpINotEqual %bool %snz2_124_ %uint_0 %snz2_127_ = OpLoad %uint % @@ -1176,7 +1387,7 @@ constexpr char32_t SCC_NZ_2[] = UR"( OpStore %scc %snz2_130_ )"; -constexpr char32_t SCC_OVERFLOW_1[] = UR"( +constexpr char SCC_OVERFLOW_ADD_1[] = R"( %so1_124_ = OpExtInst %int %GLSL_std_450 SSign %t0_ %so1_127_ = OpExtInst %int %GLSL_std_450 SSign %t1_ %so1_129_ = OpLoad %uint % @@ -1189,17 +1400,30 @@ constexpr char32_t SCC_OVERFLOW_1[] = UR"( OpStore %scc %so1_142_ )"; -constexpr char32_t SCC_CARRY_1[] = UR"( +constexpr char SCC_OVERFLOW_SUB_1[] = R"( + %so1_124_ = OpExtInst %int %GLSL_std_450 SSign %t0_ + %so1_127_ = OpExtInst %int %GLSL_std_450 SSign %t1_ + %so1_129_ = OpLoad %uint % + %so1_130_ = OpBitcast %int %so1_129_ + %so1_131_ = OpExtInst %int %GLSL_std_450 SSign %so1_130_ + %so1_135_ = OpINotEqual %bool %so1_124_ %so1_127_ + %so1_138_ = OpINotEqual %bool %so1_131_ %so1_124_ + %so1_139_ = OpLogicalAnd %bool %so1_135_ %so1_138_ + %so1_142_ = OpSelect %uint %so1_139_ %uint_1 %uint_0 + OpStore %scc %so1_142_ +)"; + +constexpr char SCC_CARRY_1[] = R"( OpStore %scc %carry_ )"; -constexpr char32_t CLAMP[] = UR"( +constexpr char CLAMP[] = R"( %c197_ = OpLoad %float % %c200_ = OpExtInst %float %GLSL_std_450 FClamp %c197_ %float_0_000000 %float_1_000000 OpStore % %c200_ )"; -constexpr char32_t MULTIPLY[] = UR"( +constexpr char MULTIPLY[] = R"( %m197_ = OpLoad %float % %m200_ = OpFMul %float %m197_ % OpStore % %m200_ @@ -1211,7 +1435,8 @@ enum class SccCheck { None, NonZero, - Overflow, + OverflowAdd, + OverflowSub, CarryOut, }; @@ -1228,7 +1453,7 @@ enum class SpirvType struct SpirvValue { SpirvType type = SpirvType::Unknown; - String value; + String8 value; }; class Spirv @@ -1244,7 +1469,7 @@ public: void GenerateSource(); - [[nodiscard]] const String& GetSource() const { return m_source; } + [[nodiscard]] const String8& GetSource() const { return m_source; } void SetVsInputInfo(const ShaderVertexInputInfo* input_info) { m_vs_input_info = input_info; } [[nodiscard]] const ShaderVertexInputInfo* GetVsInputInfo() const { return m_vs_input_info; } @@ -1258,14 +1483,14 @@ public: [[nodiscard]] const ShaderBindResources* GetBindInfo() const { return m_bind; } //[[nodiscard]] const ShaderBindParameters& GetBindParams() const { return m_bind_params; } - void AddConstantUint(uint32_t u); - void AddConstantInt(int i); - void AddConstantFloat(float f); - void AddConstant(ShaderOperand op); - [[nodiscard]] String GetConstantUint(uint32_t u) const; - [[nodiscard]] String GetConstantInt(int i) const; - [[nodiscard]] String GetConstantFloat(float f) const; - [[nodiscard]] String GetConstant(ShaderOperand op) const; + void AddConstantUint(uint32_t u); + void AddConstantInt(int i); + void AddConstantFloat(float f); + void AddConstant(ShaderOperand op); + [[nodiscard]] String8 GetConstantUint(uint32_t u) const; + [[nodiscard]] String8 GetConstantInt(int i) const; + [[nodiscard]] String8 GetConstantFloat(float f) const; + [[nodiscard]] String8 GetConstant(ShaderOperand op) const; void GetMappedIndex(int offset, int* buffer, int* field) const { @@ -1284,9 +1509,9 @@ private: { SpirvType type = SpirvType::Unknown; ShaderConstant constant = {0}; - String type_str; - String value_str; - String id; + String8 type_str; + String8 value_str; + String8 id; }; void AddConstant(SpirvType type, ShaderConstant constant); @@ -1311,7 +1536,9 @@ private: void ModifyCode(); - String m_source; + void DetectFetch(); + + String8 m_source; ShaderCode m_code; Vector m_constants; Vector m_variables; @@ -1330,7 +1557,7 @@ struct RecompilerFunc inst_recompile_func_t func = nullptr; ShaderInstructionType type = ShaderInstructionType::Unknown; ShaderInstructionFormat::Format format = ShaderInstructionFormat::Unknown; - const char32_t* param[4] = {nullptr, nullptr, nullptr, nullptr}; + const char* param[4] = {nullptr, nullptr, nullptr, nullptr}; SccCheck scc_check = SccCheck::None; }; @@ -1356,39 +1583,39 @@ static SpirvValue operand_variable_to_str(ShaderOperand op) switch (op.type) { case ShaderOperandType::Vgpr: - ret.value = String::FromPrintf("v%d", op.register_id); + ret.value = String8::FromPrintf("v%d", op.register_id); ret.type = SpirvType::Float; break; case ShaderOperandType::Sgpr: - ret.value = String::FromPrintf("s%d", op.register_id); + ret.value = String8::FromPrintf("s%d", op.register_id); ret.type = SpirvType::Uint; break; case ShaderOperandType::VccLo: - ret.value = U"vcc_lo"; + ret.value = "vcc_lo"; ret.type = SpirvType::Uint; break; case ShaderOperandType::VccHi: - ret.value = U"vcc_hi"; + ret.value = "vcc_hi"; ret.type = SpirvType::Uint; break; case ShaderOperandType::ExecLo: - ret.value = U"exec_lo"; + ret.value = "exec_lo"; ret.type = SpirvType::Uint; break; case ShaderOperandType::ExecHi: - ret.value = U"exec_hi"; + ret.value = "exec_hi"; ret.type = SpirvType::Uint; break; case ShaderOperandType::ExecZ: - ret.value = U"execz"; + ret.value = "execz"; ret.type = SpirvType::Uint; break; case ShaderOperandType::Scc: - ret.value = U"scc"; + ret.value = "scc"; ret.type = SpirvType::Uint; break; case ShaderOperandType::M0: - ret.value = U"m0"; + ret.value = "m0"; ret.type = SpirvType::Uint; break; default: break; @@ -1406,32 +1633,32 @@ static SpirvValue operand_variable_to_str(ShaderOperand op, int shift) switch (op.type) { case ShaderOperandType::Vgpr: - ret.value = String::FromPrintf("v%d", op.register_id + shift); + ret.value = String8::FromPrintf("v%d", op.register_id + shift); ret.type = SpirvType::Float; break; case ShaderOperandType::Sgpr: - ret.value = String::FromPrintf("s%d", op.register_id + shift); + ret.value = String8::FromPrintf("s%d", op.register_id + shift); ret.type = SpirvType::Uint; break; case ShaderOperandType::VccLo: if (shift == 0) { - ret.value = U"vcc_lo"; + ret.value = "vcc_lo"; ret.type = SpirvType::Uint; } else if (shift == 1) { - ret.value = U"vcc_hi"; + ret.value = "vcc_hi"; ret.type = SpirvType::Uint; } break; case ShaderOperandType::ExecLo: if (shift == 0) { - ret.value = U"exec_lo"; + ret.value = "exec_lo"; ret.type = SpirvType::Uint; } else if (shift == 1) { - ret.value = U"exec_hi"; + ret.value = "exec_hi"; ret.type = SpirvType::Uint; } break; @@ -1453,7 +1680,7 @@ static bool operand_is_exec(ShaderOperand op) return false; } -static bool operand_load_int(Spirv* spirv, ShaderOperand op, const String& result_id, const String& index, String* load) +static bool operand_load_int(Spirv* spirv, ShaderOperand op, const String8& result_id, const String8& index, String8* load) { EXIT_IF(load == nullptr); @@ -1461,30 +1688,30 @@ static bool operand_load_int(Spirv* spirv, ShaderOperand op, const String& resul if (operand_is_constant(op)) { - String id = spirv->GetConstant(op); + String8 id = spirv->GetConstant(op); - *load = String(U"% = OpBitcast %int %") - .ReplaceStr(U"", index) - .ReplaceStr(U"", id) - .ReplaceStr(U"", result_id); + *load = String8("% = OpBitcast %int %") + .ReplaceStr("", index) + .ReplaceStr("", id) + .ReplaceStr("", result_id); } else if (operand_is_variable(op)) { auto value = operand_variable_to_str(op); if (value.type == SpirvType::Float) { - *load = (String(U"%t = OpLoad %float %\n") + String(U' ', 10) + - String(U"% = OpBitcast %int %t\n")) - .ReplaceStr(U"", index) - .ReplaceStr(U"", value.value) - .ReplaceStr(U"", result_id); + *load = (String8("%t = OpLoad %float %\n") + String8(' ', 10) + + String8("% = OpBitcast %int %t\n")) + .ReplaceStr("", index) + .ReplaceStr("", value.value) + .ReplaceStr("", result_id); } else if (value.type == SpirvType::Uint) { - *load = (String(U"%t = OpLoad %uint %\n") + String(U' ', 10) + - String(U"% = OpBitcast %int %t\n")) - .ReplaceStr(U"", index) - .ReplaceStr(U"", value.value) - .ReplaceStr(U"", result_id); + *load = (String8("%t = OpLoad %uint %\n") + String8(' ', 10) + + String8("% = OpBitcast %int %t\n")) + .ReplaceStr("", index) + .ReplaceStr("", value.value) + .ReplaceStr("", result_id); } } else { @@ -1493,7 +1720,7 @@ static bool operand_load_int(Spirv* spirv, ShaderOperand op, const String& resul return true; } -static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String& result_id, const String& index, String* load, int shift = -1) +static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String8& result_id, const String8& index, String8* load, int shift = -1) { EXIT_IF(load == nullptr); @@ -1507,32 +1734,31 @@ static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String& resu if (shift == 0) { - String id = spirv->GetConstant(op); - *load = String(U"% = OpBitcast %uint %") - .ReplaceStr(U"", index) - .ReplaceStr(U"", id) - .ReplaceStr(U"", result_id); + String8 id = spirv->GetConstant(op); + *load = String8("% = OpBitcast %uint %") + .ReplaceStr("", index) + .ReplaceStr("", id) + .ReplaceStr("", result_id); } else { if (op.type == ShaderOperandType::IntegerInlineConstant && op.constant.i < 0) { - *load = String(U"% = OpBitcast %uint %uint_0xffffffff") - .ReplaceStr(U"", index) - .ReplaceStr(U"", result_id); + *load = String8("% = OpBitcast %uint %uint_0xffffffff") + .ReplaceStr("", index) + .ReplaceStr("", result_id); } else { - *load = String(U"% = OpBitcast %uint %uint_0") - .ReplaceStr(U"", index) - .ReplaceStr(U"", result_id); + *load = + String8("% = OpBitcast %uint %uint_0").ReplaceStr("", index).ReplaceStr("", result_id); } } } else { - String id = spirv->GetConstant(op); - *load = String(U"% = OpBitcast %uint %") - .ReplaceStr(U"", index) - .ReplaceStr(U"", id) - .ReplaceStr(U"", result_id); + String8 id = spirv->GetConstant(op); + *load = String8("% = OpBitcast %uint %") + .ReplaceStr("", index) + .ReplaceStr("", id) + .ReplaceStr("", result_id); } } else if (operand_is_variable(op)) { @@ -1540,17 +1766,17 @@ static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String& resu if (value.type == SpirvType::Float) { - *load = (String(U"%t = OpLoad %float %\n") + String(U' ', 10) + - String(U"% = OpBitcast %uint %t\n")) - .ReplaceStr(U"", index) - .ReplaceStr(U"", value.value) - .ReplaceStr(U"", result_id); + *load = (String8("%t = OpLoad %float %\n") + String8(' ', 10) + + String8("% = OpBitcast %uint %t\n")) + .ReplaceStr("", index) + .ReplaceStr("", value.value) + .ReplaceStr("", result_id); } else if (value.type == SpirvType::Uint) { - *load = (String(U"% = OpLoad %uint %")) - .ReplaceStr(U"", index) - .ReplaceStr(U"", value.value) - .ReplaceStr(U"", result_id); + *load = (String8("% = OpLoad %uint %")) + .ReplaceStr("", index) + .ReplaceStr("", value.value) + .ReplaceStr("", result_id); } else { return false; @@ -1562,31 +1788,31 @@ static bool operand_load_uint(Spirv* spirv, ShaderOperand op, const String& resu return true; } -static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String& result_id, const String& index, String* load) +static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String8& result_id, const String8& index, String8* load) { EXIT_IF(load == nullptr); // EXIT_NOT_IMPLEMENTED(op.negate); - String l; + String8 l; if (operand_is_constant(op)) { - String id = spirv->GetConstant(op); + String8 id = spirv->GetConstant(op); - l = String(U"% = OpBitcast %float %").ReplaceStr(U"", id); + l = String8("% = OpBitcast %float %").ReplaceStr("", id); } else if (operand_is_variable(op)) { auto value = operand_variable_to_str(op); if (value.type == SpirvType::Float) { - l = String(U"% = OpLoad %float %\n").ReplaceStr(U"", value.value); + l = String8("% = OpLoad %float %\n").ReplaceStr("", value.value); } else if (value.type == SpirvType::Uint) { - l = (String(U"%t = OpLoad %uint %\n") + String(U' ', 10) + - String(U"% = OpBitcast %float %t\n")) - .ReplaceStr(U"", value.value); + l = (String8("%t = OpLoad %uint %\n") + String8(' ', 10) + + String8("% = OpBitcast %float %t\n")) + .ReplaceStr("", value.value); } else { return false; @@ -1604,21 +1830,21 @@ static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String& res if (op.absolute) { - l += String(U' ', 10) + String(U"% = OpExtInst %float %GLSL_std_450 FAbs %\n"); - *load = l.ReplaceStr(U"", index).ReplaceStr(U"", U"a" + result_id).ReplaceStr(U"", result_id); + l += String8(' ', 10) + String8("% = OpExtInst %float %GLSL_std_450 FAbs %\n"); + *load = l.ReplaceStr("", index).ReplaceStr("", "a" + result_id).ReplaceStr("", result_id); } else if (op.negate) { - l += String(U' ', 10) + String(U"% = OpFNegate %float %\n"); - *load = l.ReplaceStr(U"", index).ReplaceStr(U"", U"n" + result_id).ReplaceStr(U"", result_id); + l += String8(' ', 10) + String8("% = OpFNegate %float %\n"); + *load = l.ReplaceStr("", index).ReplaceStr("", "n" + result_id).ReplaceStr("", result_id); } else { - *load = l.ReplaceStr(U"", index).ReplaceStr(U"", result_id); + *load = l.ReplaceStr("", index).ReplaceStr("", result_id); } return true; } -static String get_scc_check(SccCheck scc_check, int dst_num) +static String8 get_scc_check(SccCheck scc_check, int dst_num) { EXIT_IF(dst_num < 1 || dst_num > 2); @@ -1627,7 +1853,8 @@ static String get_scc_check(SccCheck scc_check, int dst_num) switch (scc_check) { case SccCheck::NonZero: return SCC_NZ_1; break; - case SccCheck::Overflow: return SCC_OVERFLOW_1; break; + case SccCheck::OverflowAdd: return SCC_OVERFLOW_ADD_1; break; + case SccCheck::OverflowSub: return SCC_OVERFLOW_SUB_1; break; case SccCheck::CarryOut: return SCC_CARRY_1; break; default: break; } @@ -1636,12 +1863,13 @@ static String get_scc_check(SccCheck scc_check, int dst_num) switch (scc_check) { case SccCheck::NonZero: return SCC_NZ_2; break; - case SccCheck::Overflow: KYTY_NOT_IMPLEMENTED; break; + case SccCheck::OverflowAdd: KYTY_NOT_IMPLEMENTED; break; + case SccCheck::OverflowSub: KYTY_NOT_IMPLEMENTED; break; case SccCheck::CarryOut: KYTY_NOT_IMPLEMENTED; break; default: break; } } - return U""; + return ""; } KYTY_RECOMPILER_FUNC(Recompile_BufferLoadDword_Vdata1VaddrSvSoffsIdxen) @@ -1658,7 +1886,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferLoadDword_Vdata1VaddrSvSoffsIdxen) auto src1_value0 = operand_variable_to_str(inst.src[1], 0); auto src1_value1 = operand_variable_to_str(inst.src[1], 1); // auto src1_value3 = operand_variable_to_str(inst.src[1], 3); - String offset = spirv->GetConstant(inst.src[2]); + String8 offset = spirv->GetConstant(inst.src[2]); EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float); EXIT_NOT_IMPLEMENTED(src0_value.type != SpirvType::Float); @@ -1669,7 +1897,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferLoadDword_Vdata1VaddrSvSoffsIdxen) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t100_ = OpLoad %float % %t101_ = OpBitcast %int %t100_ OpStore %temp_int_1 %t101_ @@ -1689,14 +1917,14 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferLoadDword_Vdata1VaddrSvSoffsIdxen) ; OpStore %temp_int_5 %t211_ %t110_ = OpFunctionCall %void %buffer_load_float1 % %temp_int_1 %temp_int_2 %temp_int_3 %temp_int_4 )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value1.value) - //.ReplaceStr(U"", src1_value3.value) - .ReplaceStr(U"", dst_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", offset) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value1.value) + //.ReplaceStr("", src1_value3.value) + .ReplaceStr("", dst_value.value); return true; } @@ -1715,12 +1943,12 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferLoadFormatX_Vdata1VaddrSvSoffsIdxen) // EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2])); - auto dst_value = operand_variable_to_str(inst.dst); - auto src0_value = operand_variable_to_str(inst.src[0]); - auto src1_value0 = operand_variable_to_str(inst.src[1], 0); - auto src1_value1 = operand_variable_to_str(inst.src[1], 1); - auto src1_value3 = operand_variable_to_str(inst.src[1], 3); - String offset = spirv->GetConstant(inst.src[2]); + auto dst_value = operand_variable_to_str(inst.dst); + auto src0_value = operand_variable_to_str(inst.src[0]); + auto src1_value0 = operand_variable_to_str(inst.src[1], 0); + auto src1_value1 = operand_variable_to_str(inst.src[1], 1); + auto src1_value3 = operand_variable_to_str(inst.src[1], 3); + String8 offset = spirv->GetConstant(inst.src[2]); EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float); EXIT_NOT_IMPLEMENTED(src0_value.type != SpirvType::Float); @@ -1731,7 +1959,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferLoadFormatX_Vdata1VaddrSvSoffsIdxen) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t100_ = OpLoad %float % %t101_ = OpBitcast %int %t100_ OpStore %temp_int_1 %t101_ @@ -1751,14 +1979,14 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferLoadFormatX_Vdata1VaddrSvSoffsIdxen) OpStore %temp_int_5 %t211_ %t110_ = OpFunctionCall %void %tbuffer_load_format_x % %temp_int_1 %temp_int_2 %temp_int_3 %temp_int_4 %temp_int_5 )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value1.value) - .ReplaceStr(U"", src1_value3.value) - .ReplaceStr(U"", dst_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", offset) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value1.value) + .ReplaceStr("", src1_value3.value) + .ReplaceStr("", dst_value.value); return true; } @@ -1780,7 +2008,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreDword_Vdata1VaddrSvSoffsIdxen) auto src1_value0 = operand_variable_to_str(inst.src[1], 0); auto src1_value1 = operand_variable_to_str(inst.src[1], 1); // auto src1_value3 = operand_variable_to_str(inst.src[1], 3); - String offset = spirv->GetConstant(inst.src[2]); + String8 offset = spirv->GetConstant(inst.src[2]); EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float); EXIT_NOT_IMPLEMENTED(src0_value.type != SpirvType::Float); @@ -1790,7 +2018,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreDword_Vdata1VaddrSvSoffsIdxen) // TODO() check VSKIP - static const char32_t* text = UR"( + static const char* text = R"( %exec_lo_u_ = OpLoad %uint %exec_lo %exec_hi_u_ = OpLoad %uint %exec_hi ; unused %exec_lo_b_ = OpINotEqual %bool %exec_lo_u_ %uint_0 @@ -1820,14 +2048,14 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreDword_Vdata1VaddrSvSoffsIdxen) OpBranch %t278_ %t278_ = OpLabel )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value1.value) - //.ReplaceStr(U"", src1_value3.value) - .ReplaceStr(U"", dst_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", offset) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value1.value) + //.ReplaceStr("", src1_value3.value) + .ReplaceStr("", dst_value.value); return true; } @@ -1844,12 +2072,12 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen) { EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2])); - auto dst_value = operand_variable_to_str(inst.dst); - auto src0_value = operand_variable_to_str(inst.src[0]); - auto src1_value0 = operand_variable_to_str(inst.src[1], 0); - auto src1_value1 = operand_variable_to_str(inst.src[1], 1); - auto src1_value3 = operand_variable_to_str(inst.src[1], 3); - String offset = spirv->GetConstant(inst.src[2]); + auto dst_value = operand_variable_to_str(inst.dst); + auto src0_value = operand_variable_to_str(inst.src[0]); + auto src1_value0 = operand_variable_to_str(inst.src[1], 0); + auto src1_value1 = operand_variable_to_str(inst.src[1], 1); + auto src1_value3 = operand_variable_to_str(inst.src[1], 3); + String8 offset = spirv->GetConstant(inst.src[2]); EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Float); EXIT_NOT_IMPLEMENTED(src0_value.type != SpirvType::Float); @@ -1859,7 +2087,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen) // TODO() check VSKIP - static const char32_t* text = UR"( + static const char* text = R"( %exec_lo_u_ = OpLoad %uint %exec_lo %exec_hi_u_ = OpLoad %uint %exec_hi ; unused %exec_lo_b_ = OpINotEqual %bool %exec_lo_u_ %uint_0 @@ -1889,14 +2117,14 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatX_Vdata1VaddrSvSoffsIdxen) OpBranch %t278_ %t278_ = OpLabel )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value1.value) - .ReplaceStr(U"", src1_value3.value) - .ReplaceStr(U"", dst_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", offset) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value1.value) + .ReplaceStr("", src1_value3.value) + .ReplaceStr("", dst_value.value); return true; } @@ -1913,13 +2141,13 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatXy_Vdata2VaddrSvSoffsIdxen) { EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2])); - auto dst_value0 = operand_variable_to_str(inst.dst, 0); - auto dst_value1 = operand_variable_to_str(inst.dst, 1); - auto src0_value = operand_variable_to_str(inst.src[0]); - auto src1_value0 = operand_variable_to_str(inst.src[1], 0); - auto src1_value1 = operand_variable_to_str(inst.src[1], 1); - auto src1_value3 = operand_variable_to_str(inst.src[1], 3); - String offset = spirv->GetConstant(inst.src[2]); + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + auto src0_value = operand_variable_to_str(inst.src[0]); + auto src1_value0 = operand_variable_to_str(inst.src[1], 0); + auto src1_value1 = operand_variable_to_str(inst.src[1], 1); + auto src1_value3 = operand_variable_to_str(inst.src[1], 3); + String8 offset = spirv->GetConstant(inst.src[2]); EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Float); EXIT_NOT_IMPLEMENTED(src0_value.type != SpirvType::Float); @@ -1929,7 +2157,7 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatXy_Vdata2VaddrSvSoffsIdxen) // TODO() check VSKIP - static const char32_t* text = UR"( + static const char* text = R"( %exec_lo_u_ = OpLoad %uint %exec_lo %exec_hi_u_ = OpLoad %uint %exec_hi ; unused %exec_lo_b_ = OpINotEqual %bool %exec_lo_u_ %uint_0 @@ -1959,15 +2187,15 @@ KYTY_RECOMPILER_FUNC(Recompile_BufferStoreFormatXy_Vdata2VaddrSvSoffsIdxen) OpBranch %t278_ %t278_ = OpLabel )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value1.value) - .ReplaceStr(U"", src1_value3.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", offset) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value1.value) + .ReplaceStr("", src1_value3.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value); return true; } @@ -1982,7 +2210,7 @@ KYTY_RECOMPILER_FUNC(Recompile_DsAppend_VdstGds) if (bind_info != nullptr && bind_info->gds_pointers.pointers_num > 0) { - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -1993,7 +2221,7 @@ KYTY_RECOMPILER_FUNC(Recompile_DsAppend_VdstGds) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t192_ = OpLoad %uint %m0 %t194_ = OpShiftRightLogical %uint %t192_ %int_16 %t196_ = OpAccessChain %_ptr_StorageBuffer_uint %gds %int_0 %t194_ @@ -2002,7 +2230,7 @@ KYTY_RECOMPILER_FUNC(Recompile_DsAppend_VdstGds) OpStore % %t199_ OpMemoryBarrier %uint_1 %uint_72 )"; - *dst_source += String(text).ReplaceStr(U"", dst_value.value).ReplaceStr(U"", index_str); + *dst_source += String8(text).ReplaceStr("", dst_value.value).ReplaceStr("", index_str); return true; } @@ -2017,7 +2245,7 @@ KYTY_RECOMPILER_FUNC(Recompile_DsConsume_VdstGds) if (bind_info != nullptr && bind_info->gds_pointers.pointers_num > 0) { - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -2028,7 +2256,7 @@ KYTY_RECOMPILER_FUNC(Recompile_DsConsume_VdstGds) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t192_ = OpLoad %uint %m0 %t194_ = OpShiftRightLogical %uint %t192_ %int_16 %t196_ = OpAccessChain %_ptr_StorageBuffer_uint %gds %int_0 %t194_ @@ -2037,7 +2265,7 @@ KYTY_RECOMPILER_FUNC(Recompile_DsConsume_VdstGds) OpStore % %t199_ OpMemoryBarrier %uint_1 %uint_72 )"; - *dst_source += String(text).ReplaceStr(U"", dst_value.value).ReplaceStr(U"", index_str); + *dst_source += String8(text).ReplaceStr("", dst_value.value).ReplaceStr("", index_str); return true; } @@ -2068,11 +2296,11 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0OffOffComprVmDone) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( OpKill )"; - *dst_source += String(text); + *dst_source += String8(text); return true; } @@ -2092,7 +2320,7 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0Vsrc0Vsrc1ComprVmDone) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t1_ = OpLoad %float % %t2_ = OpBitcast %uint %t1_ %t3_ = OpExtInst %v2float %GLSL_std_450 UnpackHalf2x16 %t2_ @@ -2107,10 +2335,10 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0Vsrc0Vsrc1ComprVmDone) OpStore %outColor %t11_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", src1_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", src1_value.value); return true; } @@ -2134,7 +2362,7 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t0_ = OpLoad %float % %t1_ = OpLoad %float % %t2_ = OpLoad %float % @@ -2143,12 +2371,12 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0Vsrc0Vsrc1Vsrc2Vsrc3VmDone) OpStore %outColor %t11_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", src1_value.value) - .ReplaceStr(U"", src2_value.value) - .ReplaceStr(U"", src3_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", src1_value.value) + .ReplaceStr("", src2_value.value) + .ReplaceStr("", src3_value.value); return true; } @@ -2171,7 +2399,7 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Param_XXX_Vsrc0Vsrc1Vsrc2Vsrc3) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t0_ = OpLoad %float % %t1_ = OpLoad %float % %t2_ = OpLoad %float % @@ -2180,13 +2408,13 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Param_XXX_Vsrc0Vsrc1Vsrc2Vsrc3) OpStore % %t4_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", src1_value.value) - .ReplaceStr(U"", src2_value.value) - .ReplaceStr(U"", src3_value.value) - .ReplaceStr(U"", param[0]); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", src1_value.value) + .ReplaceStr("", src2_value.value) + .ReplaceStr("", src3_value.value) + .ReplaceStr("", param[0]); return true; } @@ -2208,7 +2436,7 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done) // TODO() check VSKIP // TODO() check EXEC - static const char32_t* text = UR"( + static const char* text = R"( %t0_ = OpLoad %float % %t1_ = OpLoad %float % %t2_ = OpLoad %float % @@ -2218,16 +2446,26 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Pos0Vsrc0Vsrc1Vsrc2Vsrc3Done) OpStore %t5_ %t4_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value.value) - .ReplaceStr(U"", src1_value.value) - .ReplaceStr(U"", src2_value.value) - .ReplaceStr(U"", src3_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value.value) + .ReplaceStr("", src1_value.value) + .ReplaceStr("", src2_value.value) + .ReplaceStr("", src3_value.value); return true; } +KYTY_RECOMPILER_FUNC(Recompile_Exp_PrimVsrc0OffOffOffDone) +{ + const auto& inst = code.GetInstructions().At(index); + const auto* vs_info = spirv->GetVsInputInfo(); + + EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.src[0])); + + return (vs_info != nullptr && vs_info->gs_prolog); +} + KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata1Vaddr3StSsDmask1) { const auto& inst = code.GetInstructions().At(index); @@ -2250,7 +2488,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata1Vaddr3StSsDmask1) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2267,14 +2505,14 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata1Vaddr3StSsDmask1) %t47_ = OpLoad %float %t46_ OpStore % %t47_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value); return true; } @@ -2304,7 +2542,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata1Vaddr3StSsDmask8) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2321,14 +2559,14 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata1Vaddr3StSsDmask8) %t47_ = OpLoad %float %t46_ OpStore % %t47_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value); return true; } @@ -2359,7 +2597,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask3) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2379,15 +2617,15 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask3) %t55_ = OpLoad %float %t54_ OpStore % %t55_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value); return true; } @@ -2418,7 +2656,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask5) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2438,15 +2676,15 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask5) %t55_ = OpLoad %float %t54_ OpStore % %t55_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value); return true; } @@ -2477,7 +2715,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask9) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2497,15 +2735,15 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask9) %t55_ = OpLoad %float %t54_ OpStore % %t55_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value); return true; } @@ -2538,7 +2776,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2561,16 +2799,16 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7) %t55_ = OpLoad %float %t54_ OpStore % %t55_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value); return true; } @@ -2602,7 +2840,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSampleLz_Vdata3Vaddr3StSsDmask7) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2627,16 +2865,16 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSampleLz_Vdata3Vaddr3StSsDmask7) %t55_ = OpLoad %float %t54_ OpStore % %t55_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value); return true; } @@ -2669,7 +2907,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSampleLzO_Vdata3Vaddr4StSsDmask7) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2707,17 +2945,17 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSampleLzO_Vdata3Vaddr4StSsDmask7) %t55_ = OpLoad %float %t54_ OpStore % %t55_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src0_value3.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src0_value3.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value); return true; } @@ -2751,7 +2989,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF) // TODO() check VSKIP // TODO() check LOD_CLAMPED - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2777,17 +3015,17 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata4Vaddr3StSsDmaskF) %t58_ = OpLoad %float %t57_ OpStore % %t58_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src2_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value) - .ReplaceStr(U"", dst_value3.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src2_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value) + .ReplaceStr("", dst_value3.value); return true; } @@ -2821,7 +3059,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF) // TODO() swizzle channels // TODO() convert SRGB -> LINEAR if SRGB format was replaced with UNORM - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ %t27_ = OpLoad %ImageS %t26_ @@ -2845,16 +3083,16 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageLoad_Vdata4Vaddr3StDmaskF) %t58_ = OpLoad %float %t57_ OpStore % %t58_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value) - .ReplaceStr(U"", dst_value3.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value) + .ReplaceStr("", dst_value3.value); return true; } @@ -2890,7 +3128,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageStore_Vdata4Vaddr3StDmaskF) // TODO() swizzle channels // TODO() convert SRGB -> LINEAR if SRGB format was replaced with UNORM - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t25_ = OpLoad %uint % %t143_ = OpShiftRightLogical %uint %t25_ %uint_0 @@ -2913,16 +3151,16 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageStore_Vdata4Vaddr3StDmaskF) %t88_ = OpCompositeConstruct %v4float %t84_ %t85_ %t86_ %t87_ OpImageWrite %t27_ %t73_ %t88_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value2.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value) - .ReplaceStr(U"", dst_value3.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value2.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value) + .ReplaceStr("", dst_value3.value); return true; } @@ -2959,7 +3197,7 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageStoreMip_Vdata4Vaddr4StDmaskF) // TODO() swizzle channels // TODO() convert SRGB -> LINEAR if SRGB format was replaced with UNORM - static const char32_t* text = UR"( + static const char* text = R"( %t24_ = OpLoad %uint % %t25_ = OpLoad %uint % %t143_ = OpShiftRightLogical %uint %t25_ %uint_0 @@ -2986,17 +3224,17 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageStoreMip_Vdata4Vaddr4StDmaskF) %t88_ = OpCompositeConstruct %v4float %t84_ %t85_ %t86_ %t87_ OpImageWrite %t27_ %t172_ %t88_ )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", src0_value1.value) - .ReplaceStr(U"", src0_value2.value) - .ReplaceStr(U"", src1_value0.value) - .ReplaceStr(U"", src1_value2.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value) - .ReplaceStr(U"", dst_value3.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", src0_value1.value) + .ReplaceStr("", src0_value2.value) + .ReplaceStr("", src1_value0.value) + .ReplaceStr("", src1_value2.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value) + .ReplaceStr("", dst_value3.value); return true; } @@ -3009,7 +3247,7 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12) { const auto& inst = code.GetInstructions().At(index); - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -3018,29 +3256,29 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12) EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint); - String load0; - String load1; - String load2; - String load3; + String8 load0; + String8 load1; + String8 load2; + String8 load3; - if (!operand_load_uint(spirv, inst.src[0], U"t0_", index_str, &load0, 0)) + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0, 0)) { return false; } - if (!operand_load_uint(spirv, inst.src[0], U"t1_", index_str, &load1, 1)) + if (!operand_load_uint(spirv, inst.src[0], "t1_", index_str, &load1, 1)) { return false; } - if (!operand_load_uint(spirv, inst.src[1], U"t2_", index_str, &load2, 0)) + if (!operand_load_uint(spirv, inst.src[1], "t2_", index_str, &load2, 0)) { return false; } - if (!operand_load_uint(spirv, inst.src[1], U"t3_", index_str, &load3, 1)) + if (!operand_load_uint(spirv, inst.src[1], "t3_", index_str, &load3, 1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( @@ -3055,33 +3293,244 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B64_Sdst2Ssrc02Ssrc12) )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", load2) - .ReplaceStr(U"", load3) - .ReplaceStr(U"", param[0]) - .ReplaceStr(U"", param[1]) - .ReplaceStr(U"", (param[2] == nullptr ? U"" : param[2])) - .ReplaceStr(U"", (param[3] == nullptr ? U"" : param[3])) - .ReplaceStr(U"", (operand_is_exec(inst.dst) ? EXECZ : U"")) - .ReplaceStr(U"", get_scc_check(scc_check, 2)) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", load2) + .ReplaceStr("", load3) + .ReplaceStr("", param[0]) + .ReplaceStr("", param[1]) + .ReplaceStr("", (param[2] == nullptr ? "" : param[2])) + .ReplaceStr("", (param[3] == nullptr ? "" : param[3])) + .ReplaceStr("", (operand_is_exec(inst.dst) ? EXECZ : "")) + .ReplaceStr("", get_scc_check(scc_check, 2)) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", index_str); return true; } -/* XXX: And, Lshl, Lshr, CSelect */ +KYTY_RECOMPILER_FUNC(Recompile_S_Lshl_B64_Sdst2Ssrc02Ssrc1) +{ + const auto& inst = code.GetInstructions().At(index); + + String8 index_str = String8::FromPrintf("%u", index); + + EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); + + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + + EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint); + + String8 load0; + String8 load1; + String8 load2; + + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0, 0)) + { + return false; + } + if (!operand_load_uint(spirv, inst.src[0], "t1_", index_str, &load1, 1)) + { + return false; + } + if (!operand_load_uint(spirv, inst.src[1], "t2_", index_str, &load2)) + { + return false; + } + + static const char* text = R"( + + + + + + + +%t22_ = OpBitwiseAnd %uint %t2_ %uint_63 + OpStore %temp_uint_2 %t0_ + OpStore %temp_uint_3 %t1_ + OpStore %temp_uint_4 %t22_ +%t_ = OpFunctionCall %void %shift_left %temp_uint_0 %temp_uint_1 %temp_uint_2 %temp_uint_3 %temp_uint_4 +%r0_ = OpLoad %uint %temp_uint_0 +%r1_ = OpLoad %uint %temp_uint_1 + OpStore % %r0_ + OpStore % %r1_ + + +)"; + + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", load2) + .ReplaceStr("", param[0]) + .ReplaceStr("", (param[1] == nullptr ? "" : param[1])) + .ReplaceStr("", (param[2] == nullptr ? "" : param[2])) + .ReplaceStr("", (param[3] == nullptr ? "" : param[3])) + .ReplaceStr("", (operand_is_exec(inst.dst) ? EXECZ : "")) + .ReplaceStr("", get_scc_check(scc_check, 2)) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", index_str); + + return true; +} + +KYTY_RECOMPILER_FUNC(Recompile_S_Lshr_B64_Sdst2Ssrc02Ssrc1) +{ + const auto& inst = code.GetInstructions().At(index); + + String8 index_str = String8::FromPrintf("%u", index); + + EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); + + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + + EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint); + + String8 load0; + String8 load1; + String8 load2; + + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0, 0)) + { + return false; + } + if (!operand_load_uint(spirv, inst.src[0], "t1_", index_str, &load1, 1)) + { + return false; + } + if (!operand_load_uint(spirv, inst.src[1], "t2_", index_str, &load2)) + { + return false; + } + + static const char* text = R"( + + + + + + + +%t22_ = OpBitwiseAnd %uint %t2_ %uint_63 + OpStore %temp_uint_2 %t0_ + OpStore %temp_uint_3 %t1_ + OpStore %temp_uint_4 %t22_ +%t_ = OpFunctionCall %void %shift_right %temp_uint_0 %temp_uint_1 %temp_uint_2 %temp_uint_3 %temp_uint_4 +%r0_ = OpLoad %uint %temp_uint_0 +%r1_ = OpLoad %uint %temp_uint_1 + OpStore % %r0_ + OpStore % %r1_ + + +)"; + + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", load2) + .ReplaceStr("", param[0]) + .ReplaceStr("", (param[1] == nullptr ? "" : param[1])) + .ReplaceStr("", (param[2] == nullptr ? "" : param[2])) + .ReplaceStr("", (param[3] == nullptr ? "" : param[3])) + .ReplaceStr("", (operand_is_exec(inst.dst) ? EXECZ : "")) + .ReplaceStr("", get_scc_check(scc_check, 2)) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", index_str); + + return true; +} + +KYTY_RECOMPILER_FUNC(Recompile_S_Bfe_U64_Sdst2Ssrc02Ssrc1) +{ + const auto& inst = code.GetInstructions().At(index); + + String8 index_str = String8::FromPrintf("%u", index); + + EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); + + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + + EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint); + + String8 load0; + String8 load1; + String8 load2; + + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0, 0)) + { + return false; + } + if (!operand_load_uint(spirv, inst.src[0], "t1_", index_str, &load1, 1)) + { + return false; + } + if (!operand_load_uint(spirv, inst.src[1], "t2_", index_str, &load2)) + { + return false; + } + + static const char* text = R"( + + + + + + + + %to_ = OpBitFieldUExtract %uint %t2_ %uint_0 %uint_6 + %ts_ = OpBitFieldUExtract %uint %t2_ %uint_16 %uint_7 +%tn0_ = OpISub %uint %uint_64 %to_ +%ts2_ = OpExtInst %uint %GLSL_std_450 UMin %ts_ %tn0_ +%tn1_ = OpISub %uint %uint_64 %ts2_ +%tn2_ = OpISub %uint %tn1_ %to_ + OpStore %temp_uint_2 %t0_ + OpStore %temp_uint_3 %t1_ + OpStore %temp_uint_4 %tn2_ +%tf1_ = OpFunctionCall %void %shift_left %temp_uint_0 %temp_uint_1 %temp_uint_2 %temp_uint_3 %temp_uint_4 + OpStore %temp_uint_4 %tn1_ +%tf2_ = OpFunctionCall %void %shift_right %temp_uint_2 %temp_uint_3 %temp_uint_0 %temp_uint_1 %temp_uint_4 + %r0_ = OpLoad %uint %temp_uint_2 + %r1_ = OpLoad %uint %temp_uint_3 + OpStore % %r0_ + OpStore % %r1_ + + +)"; + + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", load2) + .ReplaceStr("", param[0]) + .ReplaceStr("", (param[1] == nullptr ? "" : param[1])) + .ReplaceStr("", (param[2] == nullptr ? "" : param[2])) + .ReplaceStr("", (param[3] == nullptr ? "" : param[3])) + .ReplaceStr("", (operand_is_exec(inst.dst) ? EXECZ : "")) + .ReplaceStr("", get_scc_check(scc_check, 2)) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", index_str); + + return true; +} + +/* XXX: And, Lshl, Lshr, CSelect, Or */ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1) { const auto& inst = code.GetInstructions().At(index); - String load0; - String load1; + String8 load0; + String8 load1; - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -3090,16 +3539,16 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1) EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - if (!operand_load_uint(spirv, inst.src[0], U"t0_", index_str, &load0)) + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0)) { return false; } - if (!operand_load_uint(spirv, inst.src[1], U"t1_", index_str, &load1)) + if (!operand_load_uint(spirv, inst.src[1], "t1_", index_str, &load1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( @@ -3108,28 +3557,28 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_B32_SVdstSVsrc0SVsrc1) OpStore % %t_ )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", param[0]) - .ReplaceStr(U"", (param[1] == nullptr ? U"" : param[1])) - .ReplaceStr(U"", (param[2] == nullptr ? U"" : param[2])) - .ReplaceStr(U"", get_scc_check(scc_check, 1)) - .ReplaceStr(U"", dst_value.value) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", param[0]) + .ReplaceStr("", (param[1] == nullptr ? "" : param[1])) + .ReplaceStr("", (param[2] == nullptr ? "" : param[2])) + .ReplaceStr("", get_scc_check(scc_check, 1)) + .ReplaceStr("", dst_value.value) + .ReplaceStr("", index_str); return true; } -/* XXX: Add, Mul */ +/* XXX: Add, Mul, Sub */ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1) { const auto& inst = code.GetInstructions().At(index); - String load0; - String load1; + String8 load0; + String8 load1; - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -3138,16 +3587,16 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1) EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - if (!operand_load_int(spirv, inst.src[0], U"t0_", index_str, &load0)) + if (!operand_load_int(spirv, inst.src[0], "t0_", index_str, &load0)) { return false; } - if (!operand_load_int(spirv, inst.src[1], U"t1_", index_str, &load1)) + if (!operand_load_int(spirv, inst.src[1], "t1_", index_str, &load1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( @@ -3155,26 +3604,26 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1) OpStore % %tu_ )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", param[0]) - .ReplaceStr(U"", get_scc_check(scc_check, 1)) - .ReplaceStr(U"", dst_value.value) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", param[0]) + .ReplaceStr("", get_scc_check(scc_check, 1)) + .ReplaceStr("", dst_value.value) + .ReplaceStr("", index_str); return true; } -/* XXX: Add */ +/* XXX: Add, Addc, Bfe, Lshl4Add */ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1) { const auto& inst = code.GetInstructions().At(index); - String load0; - String load1; + String8 load0; + String8 load1; - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -3183,16 +3632,16 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1) EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - if (!operand_load_uint(spirv, inst.src[0], U"t0_", index_str, &load0)) + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0)) { return false; } - if (!operand_load_uint(spirv, inst.src[1], U"t1_", index_str, &load1)) + if (!operand_load_uint(spirv, inst.src[1], "t1_", index_str, &load1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( @@ -3202,16 +3651,16 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1) OpStore % %t_ )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", param[0]) - .ReplaceStr(U"", (param[1] == nullptr ? U"" : param[1])) - .ReplaceStr(U"", (param[2] == nullptr ? U"" : param[2])) - .ReplaceStr(U"", (param[3] == nullptr ? U"" : param[3])) - .ReplaceStr(U"", get_scc_check(scc_check, 1)) - .ReplaceStr(U"", dst_value.value) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", param[0]) + .ReplaceStr("", (param[1] == nullptr ? "" : param[1])) + .ReplaceStr("", (param[2] == nullptr ? "" : param[2])) + .ReplaceStr("", (param[3] == nullptr ? "" : param[3])) + .ReplaceStr("", get_scc_check(scc_check, 1)) + .ReplaceStr("", dst_value.value) + .ReplaceStr("", index_str); return true; } @@ -3220,7 +3669,7 @@ KYTY_RECOMPILER_FUNC(Recompile_SAndSaveexecB64_Sdst2Ssrc02) { const auto& inst = code.GetInstructions().At(index); - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); @@ -3231,19 +3680,19 @@ KYTY_RECOMPILER_FUNC(Recompile_SAndSaveexecB64_Sdst2Ssrc02) EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - String load0; - String load1; + String8 load0; + String8 load1; - if (!operand_load_uint(spirv, inst.src[0], U"t0_", index_str, &load0, 0)) + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0, 0)) { return false; } - if (!operand_load_uint(spirv, inst.src[0], U"t1_", index_str, &load1, 1)) + if (!operand_load_uint(spirv, inst.src[0], "t1_", index_str, &load1, 1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( %t190_ = OpLoad %uint %exec_lo @@ -3258,14 +3707,14 @@ KYTY_RECOMPILER_FUNC(Recompile_SAndSaveexecB64_Sdst2Ssrc02) )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", EXECZ) - .ReplaceStr(U"", get_scc_check(scc_check, 2)) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", EXECZ) + .ReplaceStr("", get_scc_check(scc_check, 2)) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", index_str); return true; } @@ -3275,32 +3724,32 @@ KYTY_RECOMPILER_FUNC(Recompile_SCmp_XXX_I32_Ssrc0Ssrc1) { const auto& inst = code.GetInstructions().At(index); - String load0; - String load1; + String8 load0; + String8 load1; - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); - if (!operand_load_int(spirv, inst.src[0], U"t0_", index_str, &load0)) + if (!operand_load_int(spirv, inst.src[0], "t0_", index_str, &load0)) { return false; } - if (!operand_load_int(spirv, inst.src[1], U"t1_", index_str, &load1)) + if (!operand_load_int(spirv, inst.src[1], "t1_", index_str, &load1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( %t2_ = %bool %t0_ %t1_ %t3_ = OpSelect %uint %t2_ %uint_1 %uint_0 OpStore %scc %t3_ )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", param[0]) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", param[0]) + .ReplaceStr("", index_str); return true; } @@ -3310,32 +3759,32 @@ KYTY_RECOMPILER_FUNC(Recompile_SCmp_XXX_U32_Ssrc0Ssrc1) { const auto& inst = code.GetInstructions().At(index); - String load0; - String load1; + String8 load0; + String8 load1; - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); - if (!operand_load_uint(spirv, inst.src[0], U"t0_", index_str, &load0)) + if (!operand_load_uint(spirv, inst.src[0], "t0_", index_str, &load0)) { return false; } - if (!operand_load_uint(spirv, inst.src[1], U"t1_", index_str, &load1)) + if (!operand_load_uint(spirv, inst.src[1], "t1_", index_str, &load1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( %t2_ = %bool %t0_ %t1_ %t3_ = OpSelect %uint %t2_ %uint_1 %uint_0 OpStore %scc %t3_ )"; - *dst_source += String(text) - .ReplaceStr(U"", load0) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", param[0]) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + .ReplaceStr("", load0) + .ReplaceStr("", load1) + .ReplaceStr("", param[0]) + .ReplaceStr("", index_str); return true; } @@ -3349,15 +3798,15 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDword_SdstSvSoffset) { EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[1])); - auto dst_value = operand_variable_to_str(inst.dst); - auto src0_value0 = operand_variable_to_str(inst.src[0], 0); - String offset = spirv->GetConstant(inst.src[1]); + auto dst_value = operand_variable_to_str(inst.dst); + auto src0_value0 = operand_variable_to_str(inst.src[0], 0); + String8 offset = spirv->GetConstant(inst.src[1]); EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - static const char32_t* text = UR"( + static const char* text = R"( %t100_ = OpLoad %uint % %t101_ = OpBitcast %int %t100_ OpStore %temp_int_2 %t101_ @@ -3365,11 +3814,11 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDword_SdstSvSoffset) OpStore %temp_int_1 %t102_ %t110_ = OpFunctionCall %void %sbuffer_load_dword % %temp_int_1 %temp_int_2 )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", dst_value.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", offset) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", dst_value.value); return true; } @@ -3386,10 +3835,10 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx2_Sdst2SvSoffset) { EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[1])); - auto dst_value0 = operand_variable_to_str(inst.dst, 0); - auto dst_value1 = operand_variable_to_str(inst.dst, 1); - auto src0_value0 = operand_variable_to_str(inst.src[0], 0); - String offset = spirv->GetConstant(inst.src[1]); + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + auto src0_value0 = operand_variable_to_str(inst.src[0], 0); + String8 offset = spirv->GetConstant(inst.src[1]); EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Uint); @@ -3397,7 +3846,7 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx2_Sdst2SvSoffset) EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - static const char32_t* text = UR"( + static const char* text = R"( %t100_ = OpLoad %uint % %t101_ = OpBitcast %int %t100_ OpStore %temp_int_2 %t101_ @@ -3405,12 +3854,12 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx2_Sdst2SvSoffset) OpStore %temp_int_1 %t102_ %t110_ = OpFunctionCall %void %sbuffer_load_dword_2 % % %temp_int_1 %temp_int_2 )"; - *dst_source += String(text) - .ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value); + *dst_source += String8(text) + .ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", offset) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value); return true; } @@ -3432,7 +3881,7 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx4_Sdst4SvSoffset) auto dst_value2 = operand_variable_to_str(inst.dst, 2); auto dst_value3 = operand_variable_to_str(inst.dst, 3); auto src0_value0 = operand_variable_to_str(inst.src[0], 0); - // String offset = spirv->GetConstant(inst.src[1]); + // String8 offset = spirv->GetConstant(inst.src[1]); EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Uint); @@ -3440,16 +3889,16 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx4_Sdst4SvSoffset) EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - String index_str = String::FromPrintf("%u", index); + String8 index_str = String8::FromPrintf("%u", index); - String load1; + String8 load1; - if (!operand_load_uint(spirv, inst.src[1], U"t1_", index_str, &load1)) + if (!operand_load_uint(spirv, inst.src[1], "t1_", index_str, &load1)) { return false; } - static const char32_t* text = UR"( + static const char* text = R"( %t100_ = OpLoad %uint % %t101_ = OpBitcast %int %t100_ @@ -3458,15 +3907,15 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx4_Sdst4SvSoffset) OpStore %temp_int_1 %t102_ %t110_ = OpFunctionCall %void %sbuffer_load_dword_4 % % % % %temp_int_1 %temp_int_2 )"; - *dst_source += String(text) - //.ReplaceStr(U"", offset) - .ReplaceStr(U"", load1) - .ReplaceStr(U"", src0_value0.value) - .ReplaceStr(U"", dst_value0.value) - .ReplaceStr(U"", dst_value1.value) - .ReplaceStr(U"", dst_value2.value) - .ReplaceStr(U"", dst_value3.value) - .ReplaceStr(U"", index_str); + *dst_source += String8(text) + //.ReplaceStr("", offset) + .ReplaceStr("", load1) + .ReplaceStr("", src0_value0.value) + .ReplaceStr("", dst_value0.value) + .ReplaceStr("", dst_value1.value) + .ReplaceStr("", dst_value2.value) + .ReplaceStr("", dst_value3.value) + .ReplaceStr("", index_str); return true; } @@ -3490,15 +3939,15 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx8_Sdst8SvSoffset) dst_value[i] = operand_variable_to_str(inst.dst, i); } - auto src0_value0 = operand_variable_to_str(inst.src[0], 0); - String offset = spirv->GetConstant(inst.src[1]); + auto src0_value0 = operand_variable_to_str(inst.src[0], 0); + String8 offset = spirv->GetConstant(inst.src[1]); EXIT_NOT_IMPLEMENTED(dst_value[0].type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - String text = UR"( + String8 text = R"( %t100_ = OpLoad %uint % %t101_ = OpBitcast %int %t100_ OpStore %temp_int_2 %t101_ @@ -3509,12 +3958,12 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx8_Sdst8SvSoffset) for (int i = 0; i < 8; i++) { - text = text.ReplaceStr(String::FromPrintf("", i), dst_value[i].value); + text = text.ReplaceStr(String8::FromPrintf("", i), dst_value[i].value); } - *dst_source += text.ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src0_value0.value); + *dst_source += text.ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", offset) + .ReplaceStr("", src0_value0.value); return true; } @@ -3538,15 +3987,15 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx16_Sdst16SvSoffset) dst_value[i] = operand_variable_to_str(inst.dst, i); } - auto src0_value0 = operand_variable_to_str(inst.src[0], 0); - String offset = spirv->GetConstant(inst.src[1]); + auto src0_value0 = operand_variable_to_str(inst.src[0], 0); + String8 offset = spirv->GetConstant(inst.src[1]); EXIT_NOT_IMPLEMENTED(dst_value[0].type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Uint); EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); - String text = UR"( + String8 text = R"( %t100_ = OpLoad %uint % %t101_ = OpBitcast %int %t100_ OpStore %temp_int_2 %t101_ @@ -3557,12 +4006,12 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx16_Sdst16SvSoffset) for (int i = 0; i < 16; i++) { - text = text.ReplaceStr(String::FromPrintf("", i), dst_value[i].value); + text = text.ReplaceStr(String8::FromPrintf("", i), dst_value[i].value); } - *dst_source += text.ReplaceStr(U"", String::FromPrintf("%u", index)) - .ReplaceStr(U"", offset) - .ReplaceStr(U"", src0_value0.value); + *dst_source += text.ReplaceStr("", String8::FromPrintf("%u", index)) + .ReplaceStr("", offset) + .ReplaceStr("", src0_value0.value); return true; } @@ -3578,9 +4027,9 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx16_Sdst16SvSoffset) // // EXIT_NOT_IMPLEMENTED(code.ReadBlock(ShaderLabel(inst).GetDst()).is_discard); // -// String label = ShaderLabel(inst).ToString(); +// String8 label = ShaderLabel(inst).ToString(); // -// static const char32_t* text = UR"( +// static const char* text = R"( // %execz_u_ = OpLoad %uint %execz // %execz_b_ = OpINotEqual %bool %execz_u_ %uint_0 // OpSelectionMerge %