diff --git a/appveyor.yml b/appveyor.yml index e08fb87..4f06d8e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.1.6.build-{build} +version: 0.1.7.build-{build} image: Visual Studio 2019 environment: matrix: diff --git a/source/.gitattributes b/source/.gitattributes index 88eb590..a963629 100644 --- a/source/.gitattributes +++ b/source/.gitattributes @@ -1 +1,3 @@ doc/** filter=lfs diff=lfs merge=lfs -text + +*.inc linguist-language=C++ diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index b1388b3..e8d8708 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -82,7 +82,7 @@ if (KYTY_LINKER STREQUAL LD) set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000") endif() -project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.6) +project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.7) include(src_script.cmake) diff --git a/source/emulator/include/Emulator/Graphics/Objects/RenderTexture.h b/source/emulator/include/Emulator/Graphics/Objects/RenderTexture.h index 46b267a..5e274da 100644 --- a/source/emulator/include/Emulator/Graphics/Objects/RenderTexture.h +++ b/source/emulator/include/Emulator/Graphics/Objects/RenderTexture.h @@ -13,6 +13,7 @@ namespace Kyty::Libs::Graphics { enum class RenderTextureFormat : uint64_t { Unknown, + R8Unorm, R8G8B8A8Unorm, R8G8B8A8Srgb, B8G8R8A8Unorm, diff --git a/source/emulator/include/Emulator/Graphics/Shader.h b/source/emulator/include/Emulator/Graphics/Shader.h index 33ad4d4..fec1749 100644 --- a/source/emulator/include/Emulator/Graphics/Shader.h +++ b/source/emulator/include/Emulator/Graphics/Shader.h @@ -6,9 +6,9 @@ #include "Kyty/Core/Vector.h" #include "Emulator/Common.h" +#include "Emulator/Graphics/Shader.h" #include - #ifdef KYTY_EMU_ENABLED namespace Kyty::Libs::Graphics { @@ -57,6 +57,7 @@ enum class ShaderInstructionType SAndn2B64, SAndSaveexecB64, SBfmB32, + SBranch, SBufferLoadDword, SBufferLoadDwordx16, SBufferLoadDwordx2, @@ -66,6 +67,7 @@ enum class ShaderInstructionType SCbranchScc0, SCbranchScc1, SCbranchVccz, + SCbranchVccnz, SCmpEqI32, SCmpEqU32, SCmpGeI32, @@ -260,6 +262,7 @@ enum FormatByte : uint64_t Dmask8, // dmask:0x8 Dmask3, // dmask:0x3 Dmask5, // dmask:0x5 + Dmask9, // dmask:0x9 Gds, // gds }; @@ -308,6 +311,7 @@ enum Format : uint64_t Vdata1VaddrSvSoffsIdxenFloat1 = FormatDefine({D, S0, S1A4, S2, Idxen, Float1}), Vdata2Vaddr3StSsDmask3 = FormatDefine({DA2, S0A3, S1A8, S2A4, Dmask3}), Vdata2Vaddr3StSsDmask5 = FormatDefine({DA2, S0A3, S1A8, S2A4, Dmask5}), + Vdata2Vaddr3StSsDmask9 = FormatDefine({DA2, S0A3, S1A8, S2A4, Dmask9}), Vdata2VaddrSvSoffsIdxen = FormatDefine({DA2, S0, S1A4, S2, Idxen}), Vdata3Vaddr3StSsDmask7 = FormatDefine({DA3, S0A3, S1A8, S2A4, Dmask7}), Vdata3Vaddr4StSsDmask7 = FormatDefine({DA3, S0A4, S1A8, S2A4, Dmask7}), @@ -418,6 +422,14 @@ struct ShaderDebugPrintf Vector args; }; +struct ShaderControlFlowBlock +{ + uint32_t pc = 0; + bool is_discard = false; + bool is_valid = false; + ShaderInstruction last; +}; + class ShaderCode { public: @@ -455,9 +467,11 @@ public: [[nodiscard]] uint32_t GetPsEmbeddedId() const { return m_ps_embedded_id; } void SetPsEmbeddedId(uint32_t embedded_id) { m_ps_embedded_id = embedded_id; } - [[nodiscard]] bool IsDiscardBlock(uint32_t pc) const; - [[nodiscard]] bool IsDiscardInstruction(uint32_t index) const; - [[nodiscard]] Vector GetDiscardBlock(uint32_t pc) const; + //[[nodiscard]] bool IsDiscardBlock(uint32_t pc) const; + //[[nodiscard]] bool IsDiscardInstruction(uint32_t index) const; + //[[nodiscard]] Vector GetDiscardBlock(uint32_t pc) const; + [[nodiscard]] ShaderControlFlowBlock ReadBlock(uint32_t pc) const; + [[nodiscard]] Vector ReadIntructions(const ShaderControlFlowBlock& block) const; [[nodiscard]] uint32_t GetCrc32() const { return m_crc32; } void SetCrc32(uint32_t c) { this->m_crc32 = c; } diff --git a/source/emulator/src/Audio.cpp b/source/emulator/src/Audio.cpp index 86a4576..19b1bc0 100644 --- a/source/emulator/src/Audio.cpp +++ b/source/emulator/src/Audio.cpp @@ -1128,9 +1128,12 @@ static void playback_simulate(void* arg) EXIT_IF(play_data == nullptr); - // TODO(): Audio output is not yet implemented, so simulate audio delay - Core::Thread::SleepMicro(port->data_delay); - play_data->state = Audio3dData::State::Empty; + if (play_data != nullptr) + { + // TODO(): Audio output is not yet implemented, so simulate audio delay + Core::Thread::SleepMicro(port->data_delay); + play_data->state = Audio3dData::State::Empty; + } } port->playback_finished = true; diff --git a/source/emulator/src/Graphics/GraphicsRender.cpp b/source/emulator/src/Graphics/GraphicsRender.cpp index c21c5da..18f40ac 100644 --- a/source/emulator/src/Graphics/GraphicsRender.cpp +++ b/source/emulator/src/Graphics/GraphicsRender.cpp @@ -476,46 +476,50 @@ static void uc_print(const char* func, const HW::UserConfig& uc) // EXIT_NOT_IMPLEMENTED(uc.GetPrimType() != 4); //} -static void rt_print(const char* func, const HW::RenderTarget& rt) +static Core::StringList rt_print(const char* func, const HW::RenderTarget& rt) { - printf("%s\n", func); + Core::StringList dst; - printf("\t base.addr = 0x%016" PRIx64 "\n", rt.base.addr); - printf("\t pitch.pitch_div8_minus1 = 0x%08" PRIx32 "\n", rt.pitch.pitch_div8_minus1); - printf("\t pitch.fmask_pitch_div8_minus1 = 0x%08" PRIx32 "\n", rt.pitch.fmask_pitch_div8_minus1); - printf("\t slice.slice_div64_minus1 = 0x%08" PRIx32 "\n", rt.slice.slice_div64_minus1); - printf("\t view.base_array_slice_index = 0x%08" PRIx32 "\n", rt.view.base_array_slice_index); - printf("\t view.last_array_slice_index = 0x%08" PRIx32 "\n", rt.view.last_array_slice_index); - printf("\t info.fmask_compression_enable = %s\n", rt.info.fmask_compression_enable ? "true" : "false"); - printf("\t info.fmask_compression_mode = 0x%08" PRIx32 "\n", rt.info.fmask_compression_mode); - printf("\t info.cmask_fast_clear_enable = %s\n", rt.info.cmask_fast_clear_enable ? "true" : "false"); - printf("\t info.dcc_compression_enable = %s\n", rt.info.dcc_compression_enable ? "true" : "false"); - printf("\t info.neo_mode = %s\n", rt.info.neo_mode ? "true" : "false"); - printf("\t info.cmask_tile_mode = 0x%08" PRIx32 "\n", rt.info.cmask_tile_mode); - printf("\t info.cmask_tile_mode_neo = 0x%08" PRIx32 "\n", rt.info.cmask_tile_mode_neo); - printf("\t info.format = 0x%08" PRIx32 "\n", rt.info.format); - printf("\t info.channel_type = 0x%08" PRIx32 "\n", rt.info.channel_type); - printf("\t info.channel_order = 0x%08" PRIx32 "\n", rt.info.channel_order); - printf("\t attrib.force_dest_alpha_to_one = %s\n", rt.attrib.force_dest_alpha_to_one ? "true" : "false"); - printf("\t attrib.tile_mode = 0x%08" PRIx32 "\n", rt.attrib.tile_mode); - printf("\t attrib.fmask_tile_mode = 0x%08" PRIx32 "\n", rt.attrib.fmask_tile_mode); - printf("\t attrib.num_samples = 0x%08" PRIx32 "\n", rt.attrib.num_samples); - printf("\t attrib.num_fragments = 0x%08" PRIx32 "\n", rt.attrib.num_fragments); - printf("\t dcc.max_uncompressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.max_uncompressed_block_size); - printf("\t dcc.max_compressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.max_compressed_block_size); - printf("\t dcc.min_compressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.min_compressed_block_size); - printf("\t dcc.color_transform = 0x%08" PRIx32 "\n", rt.dcc.color_transform); - printf("\t dcc.enable_overwrite_combiner = %s\n", rt.dcc.enable_overwrite_combiner ? "true" : "false"); - printf("\t dcc.force_independent_blocks = %s\n", rt.dcc.force_independent_blocks ? "true" : "false"); - printf("\t cmask.addr = 0x%016" PRIx64 "\n", rt.cmask.addr); - printf("\t cmask_slice.slice_minus1 = 0x%08" PRIx32 "\n", rt.cmask_slice.slice_minus1); - printf("\t fmask.addr = 0x%016" PRIx64 "\n", rt.fmask.addr); - printf("\t fmask_slice.slice_minus1 = 0x%08" PRIx32 "\n", rt.fmask_slice.slice_minus1); - printf("\t clear_word0.word0 = 0x%08" PRIx32 "\n", rt.clear_word0.word0); - printf("\t clear_word1.word1 = 0x%08" PRIx32 "\n", rt.clear_word1.word1); - printf("\t dcc_addr.addr = 0x%016" PRIx64 "\n", rt.dcc_addr.addr); - printf("\t size.width = 0x%08" PRIx32 "\n", rt.size.width); - printf("\t size.height = 0x%08" PRIx32 "\n", rt.size.height); + dst.Add(String::FromPrintf("%s\n", func)); + + dst.Add(String::FromPrintf("\t base.addr = 0x%016" PRIx64 "\n", rt.base.addr)); + dst.Add(String::FromPrintf("\t pitch.pitch_div8_minus1 = 0x%08" PRIx32 "\n", rt.pitch.pitch_div8_minus1)); + dst.Add(String::FromPrintf("\t pitch.fmask_pitch_div8_minus1 = 0x%08" PRIx32 "\n", rt.pitch.fmask_pitch_div8_minus1)); + dst.Add(String::FromPrintf("\t slice.slice_div64_minus1 = 0x%08" PRIx32 "\n", rt.slice.slice_div64_minus1)); + dst.Add(String::FromPrintf("\t view.base_array_slice_index = 0x%08" PRIx32 "\n", rt.view.base_array_slice_index)); + dst.Add(String::FromPrintf("\t view.last_array_slice_index = 0x%08" PRIx32 "\n", rt.view.last_array_slice_index)); + dst.Add(String::FromPrintf("\t info.fmask_compression_enable = %s\n", rt.info.fmask_compression_enable ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.fmask_compression_mode = 0x%08" PRIx32 "\n", rt.info.fmask_compression_mode)); + dst.Add(String::FromPrintf("\t info.cmask_fast_clear_enable = %s\n", rt.info.cmask_fast_clear_enable ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.dcc_compression_enable = %s\n", rt.info.dcc_compression_enable ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.neo_mode = %s\n", rt.info.neo_mode ? "true" : "false")); + dst.Add(String::FromPrintf("\t info.cmask_tile_mode = 0x%08" PRIx32 "\n", rt.info.cmask_tile_mode)); + dst.Add(String::FromPrintf("\t info.cmask_tile_mode_neo = 0x%08" PRIx32 "\n", rt.info.cmask_tile_mode_neo)); + dst.Add(String::FromPrintf("\t info.format = 0x%08" PRIx32 "\n", rt.info.format)); + dst.Add(String::FromPrintf("\t info.channel_type = 0x%08" PRIx32 "\n", rt.info.channel_type)); + dst.Add(String::FromPrintf("\t info.channel_order = 0x%08" PRIx32 "\n", rt.info.channel_order)); + dst.Add(String::FromPrintf("\t attrib.force_dest_alpha_to_one = %s\n", rt.attrib.force_dest_alpha_to_one ? "true" : "false")); + dst.Add(String::FromPrintf("\t attrib.tile_mode = 0x%08" PRIx32 "\n", rt.attrib.tile_mode)); + dst.Add(String::FromPrintf("\t attrib.fmask_tile_mode = 0x%08" PRIx32 "\n", rt.attrib.fmask_tile_mode)); + dst.Add(String::FromPrintf("\t attrib.num_samples = 0x%08" PRIx32 "\n", rt.attrib.num_samples)); + dst.Add(String::FromPrintf("\t attrib.num_fragments = 0x%08" PRIx32 "\n", rt.attrib.num_fragments)); + dst.Add(String::FromPrintf("\t dcc.max_uncompressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.max_uncompressed_block_size)); + dst.Add(String::FromPrintf("\t dcc.max_compressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.max_compressed_block_size)); + dst.Add(String::FromPrintf("\t dcc.min_compressed_block_size = 0x%08" PRIx32 "\n", rt.dcc.min_compressed_block_size)); + dst.Add(String::FromPrintf("\t dcc.color_transform = 0x%08" PRIx32 "\n", rt.dcc.color_transform)); + dst.Add(String::FromPrintf("\t dcc.enable_overwrite_combiner = %s\n", rt.dcc.enable_overwrite_combiner ? "true" : "false")); + dst.Add(String::FromPrintf("\t dcc.force_independent_blocks = %s\n", rt.dcc.force_independent_blocks ? "true" : "false")); + dst.Add(String::FromPrintf("\t cmask.addr = 0x%016" PRIx64 "\n", rt.cmask.addr)); + dst.Add(String::FromPrintf("\t cmask_slice.slice_minus1 = 0x%08" PRIx32 "\n", rt.cmask_slice.slice_minus1)); + dst.Add(String::FromPrintf("\t fmask.addr = 0x%016" PRIx64 "\n", rt.fmask.addr)); + dst.Add(String::FromPrintf("\t fmask_slice.slice_minus1 = 0x%08" PRIx32 "\n", rt.fmask_slice.slice_minus1)); + dst.Add(String::FromPrintf("\t clear_word0.word0 = 0x%08" PRIx32 "\n", rt.clear_word0.word0)); + dst.Add(String::FromPrintf("\t clear_word1.word1 = 0x%08" PRIx32 "\n", rt.clear_word1.word1)); + dst.Add(String::FromPrintf("\t dcc_addr.addr = 0x%016" PRIx64 "\n", rt.dcc_addr.addr)); + dst.Add(String::FromPrintf("\t size.width = 0x%08" PRIx32 "\n", rt.size.width)); + dst.Add(String::FromPrintf("\t size.height = 0x%08" PRIx32 "\n", rt.size.height)); + + return dst; } // NOLINTNEXTLINE(readability-function-cognitive-complexity) @@ -1039,22 +1043,26 @@ static void hw_print(const HW::HardwareContext& hw) const auto& aa = hw.GetAaSampleControl(); const auto& ac = hw.GetAaConfig(); - printf("HardwareContext\n"); - printf("\t GetRenderTargetMask() = 0x%08" PRIx32 "\n", hw.GetRenderTargetMask()); - printf("\t GetDepthClearValue() = %f\n", hw.GetDepthClearValue()); - printf("\t GetStencilClearValue() = %" PRIu8 "\n", hw.GetStencilClearValue()); - printf("\t GetLineWidth() = %f\n", hw.GetLineWidth()); + if (Kyty::Log::GetDirection() != Kyty::Log::Direction::Silent) + { + printf("HardwareContext\n"); + printf("\t GetRenderTargetMask() = 0x%08" PRIx32 "\n", hw.GetRenderTargetMask()); + printf("\t GetDepthClearValue() = %f\n", hw.GetDepthClearValue()); + printf("\t GetStencilClearValue() = %" PRIu8 "\n", hw.GetStencilClearValue()); + printf("\t GetLineWidth() = %f\n", hw.GetLineWidth()); - rt_print("RenderTraget:", rt); - z_print("DepthRenderTraget:", z); - vp_print("ScreenViewport:", vp, smc); - clip_print("ClipControl:", c); - rc_print("RenderControl:", rc); - d_print("DepthStencilControlMask:", d, s, sm); - mc_print("ModeControl:", mc); - bc_print("BlendColorControl:", bc, bclr, cc); - eqaa_print("EqaaControl:", eqaa); - aa_print("AaSampleControl:", aa, ac); + printf("%s", rt_print("RenderTraget:", rt).Concat(U"").C_Str()); + + z_print("DepthRenderTraget:", z); + vp_print("ScreenViewport:", vp, smc); + clip_print("ClipControl:", c); + rc_print("RenderControl:", rc); + d_print("DepthStencilControlMask:", d, s, sm); + mc_print("ModeControl:", mc); + bc_print("BlendColorControl:", bc, bclr, cc); + eqaa_print("EqaaControl:", eqaa); + aa_print("AaSampleControl:", aa, ac); + } } void GraphicsRenderInit() @@ -3828,9 +3836,12 @@ static void FindRenderColorInfo(uint64_t submit_id, CommandBuffer* buffer, const } else if (rt.info.format == 0xa && rt.info.channel_type == 0x6 && rt.info.channel_order == 0x1) { rt_format = RenderTextureFormat::B8G8R8A8Srgb; + } else if (rt.info.format == 0x1 && rt.info.channel_type == 0x0 && rt.info.channel_order == 0x0) + { + rt_format = RenderTextureFormat::R8Unorm; } else { - EXIT("unknown format"); + EXIT("%s\n unknown format\n", rt_print("RenderTarget", rt).Concat(U"").C_Str()); } // Render to texture diff --git a/source/emulator/src/Graphics/Objects/GpuMemory.cpp b/source/emulator/src/Graphics/Objects/GpuMemory.cpp index 7bf6c53..a5e39a0 100644 --- a/source/emulator/src/Graphics/Objects/GpuMemory.cpp +++ b/source/emulator/src/Graphics/Objects/GpuMemory.cpp @@ -836,6 +836,18 @@ bool GpuMemory::create_maybe_deleted(const Vector& others, GpuM o_type == GpuMemoryObjectType::Texture); }); } + if (type == GpuMemoryObjectType::RenderTexture) + { + return std::all_of(others.begin(), others.end(), + [heap](auto& r) + { + OverlapType rel = r.relation; + const auto& o = heap.objects[r.object_id]; + GpuMemoryObjectType o_type = o.info.object.type; + return ((rel == OverlapType::IsContainedWithin || rel == OverlapType::Crosses) && + (o_type == GpuMemoryObjectType::RenderTexture || o_type == GpuMemoryObjectType::DepthStencilBuffer)); + }); + } return false; } diff --git a/source/emulator/src/Graphics/Objects/RenderTexture.cpp b/source/emulator/src/Graphics/Objects/RenderTexture.cpp index 7f5e66a..f35dc1c 100644 --- a/source/emulator/src/Graphics/Objects/RenderTexture.cpp +++ b/source/emulator/src/Graphics/Objects/RenderTexture.cpp @@ -163,8 +163,9 @@ static void* create_func(GraphicContext* ctx, const uint64_t* params, const uint VkFormat vk_format = VK_FORMAT_UNDEFINED; - switch (pixel_format) // NOLINT + switch (pixel_format) { + case static_cast(RenderTextureFormat::R8Unorm): vk_format = VK_FORMAT_R8_UNORM; break; case static_cast(RenderTextureFormat::R8G8B8A8Unorm): vk_format = VK_FORMAT_R8G8B8A8_UNORM; break; case static_cast(RenderTextureFormat::R8G8B8A8Srgb): vk_format = VK_FORMAT_R8G8B8A8_SRGB; break; case static_cast(RenderTextureFormat::B8G8R8A8Unorm): vk_format = VK_FORMAT_B8G8R8A8_UNORM; break; @@ -292,8 +293,9 @@ static void* create2_func(GraphicContext* ctx, CommandBuffer* buffer, const uint VkFormat vk_format = VK_FORMAT_UNDEFINED; - switch (pixel_format) // NOLINT + switch (pixel_format) { + case static_cast(RenderTextureFormat::R8Unorm): vk_format = VK_FORMAT_R8_UNORM; break; case static_cast(RenderTextureFormat::R8G8B8A8Unorm): vk_format = VK_FORMAT_R8G8B8A8_UNORM; break; case static_cast(RenderTextureFormat::R8G8B8A8Srgb): vk_format = VK_FORMAT_R8G8B8A8_SRGB; break; case static_cast(RenderTextureFormat::B8G8R8A8Unorm): vk_format = VK_FORMAT_B8G8R8A8_UNORM; break; diff --git a/source/emulator/src/Graphics/Shader.cpp b/source/emulator/src/Graphics/Shader.cpp index 7fecf2d..9e10d79 100644 --- a/source/emulator/src/Graphics/Shader.cpp +++ b/source/emulator/src/Graphics/Shader.cpp @@ -229,6 +229,7 @@ static String dbg_fmt_to_str(const ShaderInstruction& inst) case ShaderInstructionFormat::Vdata1Vaddr3StSsDmask8: return U"Vdata1Vaddr3StSsDmask8"; break; case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask3: return U"Vdata2Vaddr3StSsDmask3"; break; case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask5: return U"Vdata2Vaddr3StSsDmask5"; break; + case ShaderInstructionFormat::Vdata2Vaddr3StSsDmask9: return U"Vdata2Vaddr3StSsDmask9"; break; case ShaderInstructionFormat::Vdata3Vaddr3StSsDmask7: return U"Vdata3Vaddr3StSsDmask7"; break; case ShaderInstructionFormat::Vdata3Vaddr4StSsDmask7: return U"Vdata3Vaddr4StSsDmask7"; break; case ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF: return U"Vdata4Vaddr3StSsDmaskF"; break; @@ -311,6 +312,7 @@ static String dbg_fmt_print(const ShaderInstruction& inst) case ShaderInstructionFormat::Dmask3: s = U"dmask:0x3"; break; case ShaderInstructionFormat::Dmask5: s = U"dmask:0x5"; break; case ShaderInstructionFormat::Dmask7: s = U"dmask:0x7"; break; + case ShaderInstructionFormat::Dmask9: s = U"dmask:0x9"; break; case ShaderInstructionFormat::DmaskF: s = U"dmask:0xf"; break; case ShaderInstructionFormat::Gds: s = U"gds"; break; default: EXIT("unknown code: %u\n", static_cast(fu)); @@ -385,13 +387,13 @@ String ShaderCode::DbgDump() const return ret; } -bool ShaderCode::IsDiscardInstruction(uint32_t index) const +static bool IsDiscardInstruction(const Vector& code, uint32_t index) { - if (!(index == 0 || index + 1 >= m_instructions.Size())) + if (!(index == 0 || index + 1 >= code.Size())) { - const auto& prev_inst = m_instructions.At(index - 1); - const auto& inst = m_instructions.At(index); - const auto& next_inst = m_instructions.At(index + 1); + const auto& prev_inst = code.At(index - 1); + const auto& inst = code.At(index); + const auto& next_inst = code.At(index + 1); return (inst.type == ShaderInstructionType::Exp && inst.format == ShaderInstructionFormat::Mrt0OffOffComprVmDone && prev_inst.type == ShaderInstructionType::SMovB64 && prev_inst.format == ShaderInstructionFormat::Sdst2Ssrc02 && @@ -401,37 +403,72 @@ bool ShaderCode::IsDiscardInstruction(uint32_t index) const return false; } -bool ShaderCode::IsDiscardBlock(uint32_t pc) const +// bool ShaderCode::IsDiscardBlock(uint32_t pc) const +//{ +// auto inst_count = m_instructions.Size(); +// for (uint32_t index = 0; index < inst_count; index++) +// { +// const auto& inst = m_instructions.At(index); +// if (inst.pc == pc) +// { +// for (uint32_t i = index; i < inst_count; i++) +// { +// const auto& inst = m_instructions.At(i); +// +// if (inst.type == ShaderInstructionType::SEndpgm || inst.type == ShaderInstructionType::SCbranchExecz || +// inst.type == ShaderInstructionType::SCbranchScc0 || inst.type == ShaderInstructionType::SCbranchScc1 || +// inst.type == ShaderInstructionType::SCbranchVccz) +// { +// return false; +// } +// +// if (IsDiscardInstruction(i)) +// { +// return true; +// } +// } +// return false; +// } +// } +// return false; +// } + +ShaderControlFlowBlock ShaderCode::ReadBlock(uint32_t pc) const { - auto inst_count = m_instructions.Size(); + ShaderControlFlowBlock ret; + auto inst_count = m_instructions.Size(); for (uint32_t index = 0; index < inst_count; index++) { const auto& inst = m_instructions.At(index); if (inst.pc == pc) { + ret.pc = pc; + ret.is_valid = true; for (uint32_t i = index; i < inst_count; i++) { const auto& inst = m_instructions.At(i); if (inst.type == ShaderInstructionType::SEndpgm || inst.type == ShaderInstructionType::SCbranchExecz || inst.type == ShaderInstructionType::SCbranchScc0 || inst.type == ShaderInstructionType::SCbranchScc1 || - inst.type == ShaderInstructionType::SCbranchVccz) + inst.type == ShaderInstructionType::SCbranchVccz || inst.type == ShaderInstructionType::SCbranchVccnz || + inst.type == ShaderInstructionType::SBranch) { - return false; + ret.last = inst; + break; } - if (IsDiscardInstruction(i)) + if (IsDiscardInstruction(m_instructions, i)) { - return true; + ret.is_discard = true; } } - return false; + break; } } - return false; + return ret; } -Vector ShaderCode::GetDiscardBlock(uint32_t pc) const +Vector ShaderCode::ReadIntructions(const ShaderControlFlowBlock& block) const { Vector ret; @@ -439,7 +476,7 @@ Vector ShaderCode::GetDiscardBlock(uint32_t pc) const for (uint32_t index = 0; index < inst_count; index++) { const auto& inst = m_instructions.At(index); - if (inst.pc == pc) + if (inst.pc == block.pc) { for (uint32_t i = index; i < inst_count; i++) { @@ -447,9 +484,8 @@ Vector ShaderCode::GetDiscardBlock(uint32_t pc) const ret.Add(inst); - if (IsDiscardInstruction(i)) + if (inst.pc == block.last.pc) { - ret.Add(m_instructions.At(i + 1)); break; } } @@ -460,6 +496,35 @@ Vector ShaderCode::GetDiscardBlock(uint32_t pc) const return ret; } +// Vector ShaderCode::GetDiscardBlock(uint32_t pc) const +//{ +// Vector ret; +// +// auto inst_count = m_instructions.Size(); +// for (uint32_t index = 0; index < inst_count; index++) +// { +// const auto& inst = m_instructions.At(index); +// if (inst.pc == pc) +// { +// for (uint32_t i = index; i < inst_count; i++) +// { +// const auto& inst = m_instructions.At(i); +// +// ret.Add(inst); +// +// if (IsDiscardInstruction(i)) +// { +// ret.Add(m_instructions.At(i + 1)); +// break; +// } +// } +// break; +// } +// } +// +// return ret; +// } + static ShaderOperand operand_parse(uint32_t code) { ShaderOperand ret; @@ -634,9 +699,11 @@ KYTY_SHADER_PARSER(shader_parse_sopp) inst.format = ShaderInstructionFormat::Empty; inst.src_num = 0; break; + case 0x02: inst.type = ShaderInstructionType::SBranch; break; case 0x04: inst.type = ShaderInstructionType::SCbranchScc0; break; case 0x05: inst.type = ShaderInstructionType::SCbranchScc1; break; case 0x06: inst.type = ShaderInstructionType::SCbranchVccz; break; + case 0x07: inst.type = ShaderInstructionType::SCbranchVccnz; break; case 0x08: inst.type = ShaderInstructionType::SCbranchExecz; break; case 0x0c: inst.type = ShaderInstructionType::SWaitcnt; @@ -653,7 +720,8 @@ KYTY_SHADER_PARSER(shader_parse_sopp) dst->GetInstructions().Add(inst); if (inst.type == ShaderInstructionType::SCbranchScc0 || inst.type == ShaderInstructionType::SCbranchScc1 || - inst.type == ShaderInstructionType::SCbranchVccz || inst.type == ShaderInstructionType::SCbranchExecz) + inst.type == ShaderInstructionType::SCbranchVccz || inst.type == ShaderInstructionType::SCbranchVccnz || + inst.type == ShaderInstructionType::SCbranchExecz || inst.type == ShaderInstructionType::SBranch) { dst->GetLabels().Add(ShaderLabel(inst)); } @@ -1777,6 +1845,12 @@ KYTY_SHADER_PARSER(shader_parse_mimg) inst.dst.size = 1; break; } + case 0x9: + { + inst.format = ShaderInstructionFormat::Vdata2Vaddr3StSsDmask9; + inst.dst.size = 2; + break; + } case 0xf: { inst.format = ShaderInstructionFormat::Vdata4Vaddr3StSsDmaskF; diff --git a/source/emulator/src/Graphics/ShaderSpirv.cpp b/source/emulator/src/Graphics/ShaderSpirv.cpp index 5817f5f..1815398 100644 --- a/source/emulator/src/Graphics/ShaderSpirv.cpp +++ b/source/emulator/src/Graphics/ShaderSpirv.cpp @@ -2049,24 +2049,15 @@ KYTY_RECOMPILER_FUNC(Recompile_Exp_Mrt0OffOffComprVmDone) { EXIT_NOT_IMPLEMENTED(index == 0 || index + 1 >= code.GetInstructions().Size()); - if (!code.IsDiscardInstruction(index)) + const auto& prev_inst = code.GetInstructions().At(index - 1); + const auto& inst = code.GetInstructions().At(index); + auto block = code.ReadBlock(prev_inst.pc); + + if (!block.is_discard) { return false; } - // const auto& prev_inst = code.GetInstructions().At(index - 1); - // const auto& inst = code.GetInstructions().At(index); - // const auto& next_inst = code.GetInstructions().At(index + 1); - // - // if (!(prev_inst.type == ShaderInstructionType::SMovB64 && prev_inst.format == ShaderInstructionFormat::Sdst2Ssrc02 && - // prev_inst.dst.type == ShaderOperandType::ExecLo && prev_inst.src[0].type == ShaderOperandType::IntegerInlineConstant && - // prev_inst.src[0].constant.i == 0 && next_inst.type == ShaderInstructionType::SEndpgm)) - // { - // return false; - // } - - const auto& inst = code.GetInstructions().At(index); - const auto* info = spirv->GetPsInputInfo(); EXIT_NOT_IMPLEMENTED(info == nullptr || !info->ps_pixel_kill_enable); @@ -2463,6 +2454,65 @@ KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask5) return false; } +KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata2Vaddr3StSsDmask9) +{ + const auto& inst = code.GetInstructions().At(index); + const auto* bind_info = spirv->GetBindInfo(); + + if (bind_info != nullptr && bind_info->textures2D.textures2d_sampled_num > 0 && bind_info->samplers.samplers_num > 0) + { + auto dst_value0 = operand_variable_to_str(inst.dst, 0); + auto dst_value1 = operand_variable_to_str(inst.dst, 1); + auto src0_value0 = operand_variable_to_str(inst.src[0], 0); + auto src0_value1 = operand_variable_to_str(inst.src[0], 1); + auto src0_value2 = operand_variable_to_str(inst.src[0], 2); + auto src1_value0 = operand_variable_to_str(inst.src[1], 0); + auto src2_value0 = operand_variable_to_str(inst.src[2], 0); + + EXIT_NOT_IMPLEMENTED(dst_value0.type != SpirvType::Float); + EXIT_NOT_IMPLEMENTED(src0_value0.type != SpirvType::Float); + EXIT_NOT_IMPLEMENTED(src1_value0.type != SpirvType::Uint); + EXIT_NOT_IMPLEMENTED(src2_value0.type != SpirvType::Uint); + + // TODO() check VSKIP + // TODO() check LOD_CLAMPED + + static const char32_t* text = UR"( + %t24_ = OpLoad %uint % + %t26_ = OpAccessChain %_ptr_UniformConstant_ImageS %textures2D_S %t24_ + %t27_ = OpLoad %ImageS %t26_ + %t33_ = OpLoad %uint % + %t35_ = OpAccessChain %_ptr_UniformConstant_Sampler %samplers %t33_ + %t36_ = OpLoad %Sampler %t35_ + %t38_ = OpSampledImage %SampledImage %t27_ %t36_ + %t39_ = OpLoad %float % + %t40_ = OpLoad %float % + %t42_ = OpCompositeConstruct %v2float %t39_ %t40_ + %t43_ = OpImageSampleImplicitLod %v4float %t38_ %t42_ + OpStore %temp_v4float %t43_ + %t46_ = OpAccessChain %_ptr_Function_float %temp_v4float %uint_0 + %t47_ = OpLoad %float %t46_ + OpStore % %t47_ + %t54_ = OpAccessChain %_ptr_Function_float %temp_v4float %uint_3 + %t55_ = OpLoad %float %t54_ + OpStore % %t55_ +)"; + *dst_source += String(text) + .ReplaceStr(U"", String::FromPrintf("%u", index)) + .ReplaceStr(U"", src0_value0.value) + .ReplaceStr(U"", src0_value1.value) + .ReplaceStr(U"", src0_value2.value) + .ReplaceStr(U"", src1_value0.value) + .ReplaceStr(U"", src2_value0.value) + .ReplaceStr(U"", dst_value0.value) + .ReplaceStr(U"", dst_value1.value); + + return true; + } + + return false; +} + KYTY_RECOMPILER_FUNC(Recompile_ImageSample_Vdata3Vaddr3StSsDmask7) { const auto& inst = code.GetInstructions().At(index); @@ -3520,22 +3570,41 @@ KYTY_RECOMPILER_FUNC(Recompile_SBufferLoadDwordx16_Sdst16SvSoffset) return false; } -KYTY_RECOMPILER_FUNC(Recompile_SCbranchExecz_Label) +// KYTY_RECOMPILER_FUNC(Recompile_SCbranchExecz_Label) +//{ +// const auto& inst = code.GetInstructions().At(index); +// +// EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[0])); +// +// EXIT_NOT_IMPLEMENTED(code.ReadBlock(ShaderLabel(inst).GetDst()).is_discard); +// +// String label = ShaderLabel(inst).ToString(); +// +// static const char32_t* text = UR"( +// %execz_u_ = OpLoad %uint %execz +// %execz_b_ = OpINotEqual %bool %execz_u_ %uint_0 +// OpSelectionMerge %