diff --git a/appveyor.yml b/appveyor.yml index c62ca9a..8a3b451 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.1.12.build-{build} +version: 0.1.13.build-{build} image: Visual Studio 2019 environment: matrix: diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 8dd52ad..949edf5 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -82,7 +82,7 @@ if (KYTY_LINKER STREQUAL LD) set(KYTY_LD_OPTIONS "-Wl,--image-base=0x100000000000") endif() -project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.12) +project(Kyty${KYTY_PROJECT_NAME}${CMAKE_BUILD_TYPE}${KYTY_COMPILER} VERSION 0.1.13) include(src_script.cmake) diff --git a/source/emulator/include/Emulator/Graphics/HardwareContext.h b/source/emulator/include/Emulator/Graphics/HardwareContext.h index 14aa177..ffa8120 100644 --- a/source/emulator/include/Emulator/Graphics/HardwareContext.h +++ b/source/emulator/include/Emulator/Graphics/HardwareContext.h @@ -585,9 +585,11 @@ enum class UserSgprType struct UserSgprInfo { - uint32_t value[16] = {0}; - UserSgprType type[16] = {}; - uint32_t count = 0; + static constexpr int SGPRS_MAX = 16; + + uint32_t value[SGPRS_MAX] = {0}; + UserSgprType type[SGPRS_MAX] = {}; + uint32_t count = 0; }; struct VertexShaderInfo diff --git a/source/emulator/include/Emulator/Graphics/Shader.h b/source/emulator/include/Emulator/Graphics/Shader.h index 6ef0f55..88efb45 100644 --- a/source/emulator/include/Emulator/Graphics/Shader.h +++ b/source/emulator/include/Emulator/Graphics/Shader.h @@ -101,7 +101,9 @@ enum class ShaderInstructionType : uint32_t SMovB32, SMovB64, SMovkI32, + SMulHiU32, SMulI32, + SMulkI32, SNandB64, SNorB64, SOrB32, @@ -181,6 +183,7 @@ enum class ShaderInstructionType : uint32_t VFloorF32, VFmaF32, VFractF32, + VInterpMovF32, VInterpP1F32, VInterpP2F32, VLogF32, @@ -716,6 +719,11 @@ struct ShaderGdsResource [[nodiscard]] uint16_t Size() const { return field & 0xFFFFu; } }; +struct ShaderDirectSgprResource +{ + uint32_t field = 0; +}; + struct ShaderExtendedResource { uint32_t fields[2] = {0}; @@ -823,6 +831,15 @@ struct ShaderGdsResources int binding_index = 0; }; +struct ShaderDirectSgprsResources +{ + static constexpr int SGPRS_MAX = 4; + + ShaderDirectSgprResource sgprs[SGPRS_MAX]; + int start_register[SGPRS_MAX] = {0}; + int sgprs_num = 0; +}; + struct ShaderExtendedResources { bool used = false; @@ -833,14 +850,15 @@ struct ShaderExtendedResources struct ShaderBindResources { - uint32_t push_constant_offset = 0; - uint32_t push_constant_size = 0; - uint32_t descriptor_set_slot = 0; - ShaderStorageResources storage_buffers; - ShaderTextureResources textures2D; - ShaderSamplerResources samplers; - ShaderGdsResources gds_pointers; - ShaderExtendedResources extended; + uint32_t push_constant_offset = 0; + uint32_t push_constant_size = 0; + uint32_t descriptor_set_slot = 0; + ShaderStorageResources storage_buffers; + ShaderTextureResources textures2D; + ShaderSamplerResources samplers; + ShaderGdsResources gds_pointers; + ShaderDirectSgprsResources direct_sgprs; + ShaderExtendedResources extended; }; struct ShaderVertexInputInfo diff --git a/source/emulator/src/Graphics/GraphicsRender.cpp b/source/emulator/src/Graphics/GraphicsRender.cpp index 7cb422b..7f43883 100644 --- a/source/emulator/src/Graphics/GraphicsRender.cpp +++ b/source/emulator/src/Graphics/GraphicsRender.cpp @@ -1949,12 +1949,12 @@ static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layou EXIT_IF(push_constant_info == nullptr); EXIT_IF(push_constant_info_num == nullptr); - bool need_bind = (bind.storage_buffers.buffers_num > 0 || bind.textures2D.textures_num > 0 || bind.samplers.samplers_num > 0 || - bind.gds_pointers.pointers_num > 0); + bool need_descriptor = (bind.storage_buffers.buffers_num > 0 || bind.textures2D.textures_num > 0 || bind.samplers.samplers_num > 0 || + bind.gds_pointers.pointers_num > 0); - EXIT_IF(need_bind && bind.push_constant_size == 0); + EXIT_IF(need_descriptor && bind.push_constant_size == 0); - if (need_bind) + if (bind.push_constant_size != 0) { auto index = *push_constant_info_num; @@ -1964,7 +1964,7 @@ static void CreateLayout(VkDescriptorSetLayout* set_layouts, uint32_t* set_layou (*push_constant_info_num)++; } - if (need_bind) + if (need_descriptor) { EXIT_IF(bind.descriptor_set_slot != *set_layouts_num); @@ -4488,6 +4488,26 @@ static void PrepareGdsPointers(const ShaderGdsResources& gds_pointers, uint32_t* } } +static void PrepareDirectSgprs(const ShaderDirectSgprsResources& direct_sgprs, uint32_t** sgprs) +{ + KYTY_PROFILER_FUNCTION(); + + EXIT_IF(sgprs == nullptr); + EXIT_IF(*sgprs == nullptr); + + for (int i = 0; i < direct_sgprs.sgprs_num; i++) + { + auto r = direct_sgprs.sgprs[i]; + + (*sgprs)[i] = r.field; + } + + if (direct_sgprs.sgprs_num > 0) + { + (*sgprs) += static_cast(4 * ((direct_sgprs.sgprs_num - 1) / 4 + 1)); + } +} + static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelineBindPoint pipeline_bind_point, VkPipelineLayout layout, const ShaderBindResources& bind, VkShaderStageFlags vk_stage, DescriptorCache::Stage stage) { @@ -4503,6 +4523,8 @@ static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelin bind.textures2D.textures_num); EXIT_NOT_IMPLEMENTED(bind.samplers.samplers_num > DescriptorCache::SAMPLERS_MAX); + bool need_descriptor = false; + VulkanBuffer* storage_buffers[DescriptorCache::BUFFERS_MAX]; VulkanImage* textures2d_sampled[DescriptorCache::TEXTURES_SAMPLED_MAX]; int textures2d_sampled_view[DescriptorCache::TEXTURES_SAMPLED_MAX]; @@ -4517,29 +4539,32 @@ static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelin if (bind.storage_buffers.buffers_num > 0) { PrepareStorageBuffers(submit_id, buffer, bind.storage_buffers, storage_buffers, &sgprs_ptr); + need_descriptor = true; } if (bind.textures2D.textures_num > 0) { PrepareTextures(submit_id, buffer, bind.textures2D, textures2d_sampled, textures2d_storage, textures2d_sampled_view, &sgprs_ptr); + need_descriptor = true; } if (bind.samplers.samplers_num > 0) { PrepareSamplers(bind.samplers, samplers, &sgprs_ptr); + need_descriptor = true; } if (bind.gds_pointers.pointers_num > 0) { PrepareGdsPointers(bind.gds_pointers, &sgprs_ptr); - gds_buffer = g_render_ctx->GetGdsBuffer()->GetBuffer(g_render_ctx->GetGraphicCtx()); + gds_buffer = g_render_ctx->GetGdsBuffer()->GetBuffer(g_render_ctx->GetGraphicCtx()); + need_descriptor = true; + } + if (bind.direct_sgprs.sgprs_num > 0) + { + PrepareDirectSgprs(bind.direct_sgprs, &sgprs_ptr); } EXIT_IF(bind.push_constant_size != (sgprs_ptr - sgprs) * 4); - auto* descriptor_set = g_render_ctx->GetDescriptorCache()->GetDescriptor( - stage, storage_buffers, textures2d_sampled, textures2d_sampled_view, textures2d_storage, samplers, &gds_buffer, bind); - - EXIT_IF(descriptor_set == nullptr); - auto* vk_buffer = buffer->GetPool()->buffers[buffer->GetIndex()]; if (bind.textures2D.textures_num > 0) @@ -4556,7 +4581,15 @@ static void BindDescriptors(uint64_t submit_id, CommandBuffer* buffer, VkPipelin } } - vkCmdBindDescriptorSets(vk_buffer, pipeline_bind_point, layout, bind.descriptor_set_slot, 1, &descriptor_set->set, 0, nullptr); + if (need_descriptor) + { + auto* descriptor_set = g_render_ctx->GetDescriptorCache()->GetDescriptor( + stage, storage_buffers, textures2d_sampled, textures2d_sampled_view, textures2d_storage, samplers, &gds_buffer, bind); + + EXIT_IF(descriptor_set == nullptr); + + vkCmdBindDescriptorSets(vk_buffer, pipeline_bind_point, layout, bind.descriptor_set_slot, 1, &descriptor_set->set, 0, nullptr); + } vkCmdPushConstants(vk_buffer, layout, vk_stage, bind.push_constant_offset, bind.push_constant_size, sgprs); } } diff --git a/source/emulator/src/Graphics/GraphicsRun.cpp b/source/emulator/src/Graphics/GraphicsRun.cpp index 16dff61..a0df88f 100644 --- a/source/emulator/src/Graphics/GraphicsRun.cpp +++ b/source/emulator/src/Graphics/GraphicsRun.cpp @@ -1176,7 +1176,8 @@ void CommandProcessor::WriteAtEndOfPipe64(uint32_t cache_policy, uint32_t event_ { GraphicsRenderWriteAtEndOfPipe32(m_sumbit_id, m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); } else if (((eop_event_type == 0x04 && event_index == 0x05) || (eop_event_type == 0x28 && event_index == 0x05) || - (eop_event_type == 0x2f && event_index == 0x06) || (eop_event_type == 0x14 && event_index == 0x00)) && + (eop_event_type == 0x2f && event_index == 0x06) || (eop_event_type == 0x14 && event_index == 0x00) || + (eop_event_type == 0x28 && event_index == 0x00)) && cache_action == 0x38 && source64 && !with_interrupt) { GraphicsRenderWriteAtEndOfPipeWithWriteBack64(m_sumbit_id, m_buffer[m_current_buffer], static_cast(dst_gpu_addr), value); @@ -2249,6 +2250,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_cs_user_sgpr) auto reg_num = (cmd_id >> 16u) & 0x3fffu; + EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX); + for (uint32_t i = 0; i < reg_num; i++) { cp->GetShCtx()->SetCsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker()); @@ -2350,6 +2353,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_ps_user_sgpr) auto reg_num = (cmd_id >> 16u) & 0x3fffu; + EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX); + for (uint32_t i = 0; i < reg_num; i++) { cp->GetShCtx()->SetPsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker()); @@ -2432,6 +2437,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_vs_user_sgpr) auto reg_num = (cmd_id >> 16u) & 0x3fffu; + EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX); + for (uint32_t i = 0; i < reg_num; i++) { cp->GetShCtx()->SetVsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker()); @@ -2449,6 +2456,8 @@ KYTY_HW_SH_PARSER(hw_sh_set_gs_user_sgpr) auto reg_num = (cmd_id >> 16u) & 0x3fffu; + EXIT_NOT_IMPLEMENTED(reg_num >= HW::UserSgprInfo::SGPRS_MAX); + for (uint32_t i = 0; i < reg_num; i++) { cp->GetShCtx()->SetGsUserSgpr(slot + i, buffer[i], cp->GetUserDataMarker()); diff --git a/source/emulator/src/Graphics/Shader.cpp b/source/emulator/src/Graphics/Shader.cpp index 9d2efc8..ed3c211 100644 --- a/source/emulator/src/Graphics/Shader.cpp +++ b/source/emulator/src/Graphics/Shader.cpp @@ -87,6 +87,7 @@ struct ShaderParsedUsage bool extended_buffer = false; int samplers = 0; int gds_pointers = 0; + int direct_sgprs = 0; }; struct ShaderDebugPrintfCmds @@ -1137,7 +1138,7 @@ static void ShaderParseAttrib(ShaderVertexInputInfo* info, const ShaderSemantic* } } -static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index, int slot, ShaderStorageUsage usage, +static void ShaderGetStorageBuffer(ShaderStorageResources* info, bool* direct_sgprs, int start_index, int slot, ShaderStorageUsage usage, const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer) { EXIT_IF(info == nullptr); @@ -1162,6 +1163,8 @@ static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index { auto type = user_sgpr.type[start_index + j]; EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region); + + direct_sgprs[start_index + j] = false; } } @@ -1173,7 +1176,7 @@ static void ShaderGetStorageBuffer(ShaderStorageResources* info, int start_index info->buffers_num++; } -static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index, int slot, ShaderTextureUsage usage, +static void ShaderGetTextureBuffer(ShaderTextureResources* info, bool* direct_sgprs, int start_index, int slot, ShaderTextureUsage usage, const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer) { EXIT_IF(info == nullptr); @@ -1210,6 +1213,8 @@ static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index { auto type = user_sgpr.type[start_index + j]; EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region); + + direct_sgprs[start_index + j] = false; } } @@ -1225,7 +1230,7 @@ static void ShaderGetTextureBuffer(ShaderTextureResources* info, int start_index info->textures_num++; } -static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int slot, const HW::UserSgprInfo& user_sgpr, +static void ShaderGetSampler(ShaderSamplerResources* info, bool* direct_sgprs, int start_index, int slot, const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer) { EXIT_IF(info == nullptr); @@ -1249,6 +1254,8 @@ static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int { auto type = user_sgpr.type[start_index + j]; EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Vsharp && type != HW::UserSgprType::Region); + + direct_sgprs[start_index + j] = false; } } @@ -1260,7 +1267,7 @@ static void ShaderGetSampler(ShaderSamplerResources* info, int start_index, int info->samplers_num++; } -static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int slot, const HW::UserSgprInfo& user_sgpr, +static void ShaderGetGdsPointer(ShaderGdsResources* info, bool* direct_sgprs, int start_index, int slot, const HW::UserSgprInfo& user_sgpr, const uint32_t* extended_buffer) { EXIT_IF(info == nullptr); @@ -1282,6 +1289,8 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s { auto type = user_sgpr.type[start_index]; EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Unknown); + + direct_sgprs[start_index] = false; } info->pointers[index].field = (extended ? extended_buffer[start_index - 16] : user_sgpr.value[start_index]); @@ -1289,6 +1298,26 @@ static void ShaderGetGdsPointer(ShaderGdsResources* info, int start_index, int s info->pointers_num++; } +static void ShaderGetDirectSgpr(ShaderDirectSgprsResources* info, int start_index, const HW::UserSgprInfo& user_sgpr) +{ + EXIT_IF(info == nullptr); + + EXIT_NOT_IMPLEMENTED(info->sgprs_num < 0 || info->sgprs_num >= ShaderDirectSgprsResources::SGPRS_MAX); + + int index = info->sgprs_num; + + EXIT_NOT_IMPLEMENTED(start_index >= 16); + + info->start_register[index] = start_index; + + auto type = user_sgpr.type[start_index]; + EXIT_NOT_IMPLEMENTED(type != HW::UserSgprType::Unknown); + + info->sgprs[index].field = user_sgpr.value[start_index]; + + info->sgprs_num++; +} + void ShaderCalcBindingIndices(ShaderBindResources* bind) { KYTY_PROFILER_FUNCTION(); @@ -1323,11 +1352,17 @@ void ShaderCalcBindingIndices(ShaderBindResources* bind) bind->push_constant_size += (((bind->gds_pointers.pointers_num - 1) / 4) + 1) * 16; } + if (bind->direct_sgprs.sgprs_num > 0) + { + bind->push_constant_size += (((bind->direct_sgprs.sgprs_num - 1) / 4) + 1) * 16; + } + EXIT_IF((bind->push_constant_size % 16) != 0); } // NOLINTNEXTLINE(readability-function-cognitive-complexity) -void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) +void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr, + int user_sgpr_num) { KYTY_PROFILER_FUNCTION(); @@ -1352,9 +1387,16 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource info->extended_buffer = false; info->samplers = 0; info->gds_pointers = 0; + info->direct_sgprs = 0; uint32_t* extended_buffer = nullptr; + bool direct_sgprs[HW::UserSgprInfo::SGPRS_MAX]; + for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++) + { + direct_sgprs[i] = (i < user_sgpr_num); + } + for (int i = 0; i < usages.slots_num; i++) { const auto& usage = usages.slots[i]; @@ -1364,13 +1406,13 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); if (usage.flags == 0) { - ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadOnly, - user_sgpr, extended_buffer); + ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, usage.start_register, usage.slot, + ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); info->storage_buffers_readonly++; } else if (usage.flags == 3) { - ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly, user_sgpr, - extended_buffer); + ShaderGetTextureBuffer(&bind->textures2D, direct_sgprs, usage.start_register, usage.slot, ShaderTextureUsage::ReadOnly, + user_sgpr, extended_buffer); info->textures2D_readonly++; EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9); } @@ -1378,14 +1420,14 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource case 0x01: EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetSampler(&bind->samplers, usage.start_register, usage.slot, user_sgpr, extended_buffer); + ShaderGetSampler(&bind->samplers, direct_sgprs, usage.start_register, usage.slot, user_sgpr, extended_buffer); info->samplers++; break; case 0x02: EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::Constant, user_sgpr, - extended_buffer); + ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, usage.start_register, usage.slot, ShaderStorageUsage::Constant, + user_sgpr, extended_buffer); info->storage_buffers_constant++; break; @@ -1393,13 +1435,13 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); if (usage.flags == 0) { - ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, ShaderStorageUsage::ReadWrite, - user_sgpr, extended_buffer); + ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, usage.start_register, usage.slot, + ShaderStorageUsage::ReadWrite, user_sgpr, extended_buffer); info->storage_buffers_readwrite++; } else if (usage.flags == 3) { - ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite, user_sgpr, - extended_buffer); + ShaderGetTextureBuffer(&bind->textures2D, direct_sgprs, usage.start_register, usage.slot, ShaderTextureUsage::ReadWrite, + user_sgpr, extended_buffer); info->textures2D_readwrite++; EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9); } @@ -1407,45 +1449,61 @@ void ShaderParseUsage(uint64_t addr, ShaderParsedUsage* info, ShaderBindResource case 0x07: EXIT_NOT_IMPLEMENTED(usage.flags != 0); - ShaderGetGdsPointer(&bind->gds_pointers, usage.start_register, usage.slot, user_sgpr, extended_buffer); + ShaderGetGdsPointer(&bind->gds_pointers, direct_sgprs, usage.start_register, usage.slot, user_sgpr, extended_buffer); info->gds_pointers++; break; case 0x12: EXIT_NOT_IMPLEMENTED(usage.slot != 0); EXIT_NOT_IMPLEMENTED(usage.flags != 0); - info->fetch = true; - info->fetch_reg = usage.start_register; + info->fetch = true; + info->fetch_reg = usage.start_register; + direct_sgprs[usage.start_register] = false; + direct_sgprs[usage.start_register + 1] = false; break; case 0x17: EXIT_NOT_IMPLEMENTED(usage.slot != 0); EXIT_NOT_IMPLEMENTED(usage.flags != 0); - info->vertex_buffer = true; - info->vertex_buffer_reg = usage.start_register; + info->vertex_buffer = true; + info->vertex_buffer_reg = usage.start_register; + direct_sgprs[usage.start_register] = false; + direct_sgprs[usage.start_register + 1] = false; break; case 0x1b: EXIT_NOT_IMPLEMENTED(usage.flags != 0); EXIT_NOT_IMPLEMENTED(usage.slot != 1); EXIT_NOT_IMPLEMENTED(bind->extended.used); - bind->extended.used = true; - bind->extended.slot = usage.slot; - bind->extended.start_register = usage.start_register; - bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; - bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; - extended_buffer = reinterpret_cast(bind->extended.data.Base()); - info->extended_buffer = true; + EXIT_NOT_IMPLEMENTED(usage.start_register + 1 >= HW::UserSgprInfo::SGPRS_MAX); + bind->extended.used = true; + bind->extended.slot = usage.slot; + bind->extended.start_register = usage.start_register; + bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; + bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; + extended_buffer = reinterpret_cast(bind->extended.data.Base()); + info->extended_buffer = true; + direct_sgprs[usage.start_register] = false; + direct_sgprs[usage.start_register + 1] = false; break; default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); } } + + for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++) + { + if (direct_sgprs[i]) + { + ShaderGetDirectSgpr(&bind->direct_sgprs, i, user_sgpr); + info->direct_sgprs++; + } + } } // NOLINTNEXTLINE(readability-function-cognitive-complexity) void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, ShaderBindResources* bind, - const HW::UserSgprInfo& user_sgpr) + const HW::UserSgprInfo& user_sgpr, int user_sgpr_num) { KYTY_PROFILER_FUNCTION(); @@ -1464,6 +1522,7 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, info->extended_buffer = false; info->samplers = 0; info->gds_pointers = 0; + info->direct_sgprs = 0; EXIT_NOT_IMPLEMENTED(user_data == nullptr); EXIT_NOT_IMPLEMENTED(user_data->eud_size_dw != 0); @@ -1471,6 +1530,12 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, uint32_t* extended_buffer = nullptr; + bool direct_sgprs[HW::UserSgprInfo::SGPRS_MAX]; + for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++) + { + direct_sgprs[i] = (i < user_sgpr_num); + } + for (uint16_t type = 0; type < user_data->direct_resource_count; type++) { if (user_data->direct_resource_offset[type] == 0xffff) @@ -1483,13 +1548,17 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, switch (type) { case 8: - info->vertex_buffer = true; - info->vertex_buffer_reg = reg; + info->vertex_buffer = true; + info->vertex_buffer_reg = reg; + direct_sgprs[info->vertex_buffer_reg] = false; + direct_sgprs[info->vertex_buffer_reg + 1] = false; break; case 10: - info->vertex_attrib = true; - info->vertex_attrib_reg = reg; + info->vertex_attrib = true; + info->vertex_attrib_reg = reg; + direct_sgprs[info->vertex_attrib_reg] = false; + direct_sgprs[info->vertex_attrib_reg + 1] = false; break; default: EXIT("unknown usage type: 0x%04" PRIx16 "\n", type); @@ -1506,7 +1575,7 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, } EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[0][slot].size != 0); - ShaderGetTextureBuffer(&bind->textures2D, user_data->sharp_resource_offset[0][slot].offset_dw, slot, + ShaderGetTextureBuffer(&bind->textures2D, direct_sgprs, user_data->sharp_resource_offset[0][slot].offset_dw, slot, ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer); info->textures2D_readonly++; EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() != 9); @@ -1525,7 +1594,8 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, } EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[2][slot].size != 1); - ShaderGetSampler(&bind->samplers, user_data->sharp_resource_offset[2][slot].offset_dw, slot, user_sgpr, extended_buffer); + ShaderGetSampler(&bind->samplers, direct_sgprs, user_data->sharp_resource_offset[2][slot].offset_dw, slot, user_sgpr, + extended_buffer); info->samplers++; } } @@ -1540,208 +1610,22 @@ void ShaderParseUsage2(const ShaderUserData* user_data, ShaderParsedUsage* info, } EXIT_NOT_IMPLEMENTED(user_data->sharp_resource_offset[3][slot].size != 1); - ShaderGetStorageBuffer(&bind->storage_buffers, user_data->sharp_resource_offset[3][slot].offset_dw, slot, + ShaderGetStorageBuffer(&bind->storage_buffers, direct_sgprs, user_data->sharp_resource_offset[3][slot].offset_dw, slot, ShaderStorageUsage::Constant, user_sgpr, extended_buffer); info->storage_buffers_constant++; } } - // KYTY_NOT_IMPLEMENTED; + for (int i = 0; i < HW::UserSgprInfo::SGPRS_MAX; i++) + { + if (direct_sgprs[i]) + { + ShaderGetDirectSgpr(&bind->direct_sgprs, i, user_sgpr); + info->direct_sgprs++; + } + } } -//// NOLINTNEXTLINE(readability-function-cognitive-complexity) -// void ShaderParseUsageCS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) -//{ -// KYTY_PROFILER_FUNCTION(); -// -// EXIT_IF(bind == nullptr); -// EXIT_IF(info == nullptr); -// -// const auto* src = reinterpret_cast(addr); -// -// auto usages = GetUsageSlots(src); -// -// info->fetch = false; -// info->fetch_reg = 0; -// info->vertex_buffer = false; -// info->vertex_buffer_reg = 0; -// -// uint32_t* extended_buffer = nullptr; -// -// for (int i = 0; i < usages.slots_num; i++) -// { -// const auto& usage = usages.slots[i]; -// switch (usage.type) -// { -// // case 0x00: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); -// // if (usage.flags == 0) -// // { -// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, -// // ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); } -// // else if (usage.flags -// // == 3) -// // { -// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, -// // ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer); -// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() -// //!= 9); -// // } -// // break; -// // case 0x02: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, -// // ShaderStorageUsage::Constant, user_sgpr, extended_buffer); break; -// // case 0x04: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); -// // if (usage.flags == 0) -// // { -// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, -// // ShaderStorageUsage::ReadWrite, user_sgpr, extended_buffer); } -// // else if (usage.flags -// // == 3) -// // { -// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, -// // ShaderTextureUsage::ReadWrite, user_sgpr, extended_buffer); -// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() -// //!= 9); -// // } -// // break; -// case 0x07: -// EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// ShaderGetGdsPointer(&bind->gds_pointers, usage.start_register, usage.slot, user_sgpr, extended_buffer); -// break; -// // case 0x1b: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // EXIT_NOT_IMPLEMENTED(usage.slot != 1); -// // EXIT_NOT_IMPLEMENTED(bind->extended.used); -// // bind->extended.used = true; -// // bind->extended.slot = usage.slot; -// // bind->extended.start_register = usage.start_register; -// // bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; -// // bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; -// // extended_buffer = reinterpret_cast(bind->extended.data.Base()); -// // break; -// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); -// } -// } -// } -// -//// NOLINTNEXTLINE(readability-function-cognitive-complexity) -// void ShaderParseUsagePS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) -//{ -// KYTY_PROFILER_FUNCTION(); -// -// EXIT_IF(bind == nullptr); -// EXIT_IF(info == nullptr); -// -// const auto* src = reinterpret_cast(addr); -// -// auto usages = GetUsageSlots(src); -// -// info->fetch = false; -// info->fetch_reg = 0; -// info->vertex_buffer = false; -// info->vertex_buffer_reg = 0; -// -// uint32_t* extended_buffer = nullptr; -// -// for (int i = 0; i < usages.slots_num; i++) -// { -// const auto& usage = usages.slots[i]; -// switch (usage.type) -// { -// // case 0x00: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0 && usage.flags != 3); -// // if (usage.flags == 0) -// // { -// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, -// // ShaderStorageUsage::ReadOnly, user_sgpr, extended_buffer); } -// // else if (usage.flags -// // == 3) -// // { -// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, -// // ShaderTextureUsage::ReadOnly, user_sgpr, extended_buffer); -// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() -// //!= 9); -// // } -// // break; -// // case 0x01: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // ShaderGetSampler(&bind->samplers, usage.start_register, usage.slot, user_sgpr, extended_buffer); -// // break; -// // case 0x02: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, -// // ShaderStorageUsage::Constant, user_sgpr, extended_buffer); break; -// // case 0x04: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 3); -// // if (usage.flags == 3) -// // { -// // ShaderGetTextureBuffer(&bind->textures2D, usage.start_register, usage.slot, -// // ShaderTextureUsage::ReadWrite, user_sgpr, extended_buffer); -// // EXIT_NOT_IMPLEMENTED(bind->textures2D.desc[bind->textures2D.textures_num - 1].texture.Type() -// //!= 9); -// // } -// // break; -// // case 0x1b: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // EXIT_NOT_IMPLEMENTED(usage.slot != 1); -// // EXIT_NOT_IMPLEMENTED(bind->extended.used); -// // bind->extended.used = true; -// // bind->extended.slot = usage.slot; -// // bind->extended.start_register = usage.start_register; -// // bind->extended.data.fields[0] = user_sgpr.value[usage.start_register]; -// // bind->extended.data.fields[1] = user_sgpr.value[usage.start_register + 1]; -// // extended_buffer = reinterpret_cast(bind->extended.data.Base()); -// // break; -// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); -// } -// } -// } -// -// void ShaderParseUsageVS(uint64_t addr, ShaderParsedUsage* info, ShaderBindResources* bind, const HW::UserSgprInfo& user_sgpr) -//{ -// KYTY_PROFILER_FUNCTION(); -// -// EXIT_IF(bind == nullptr); -// EXIT_IF(info == nullptr); -// -// const auto* src = reinterpret_cast(addr); -// -// auto usages = GetUsageSlots(src); -// -// info->fetch = false; -// info->fetch_reg = 0; -// info->vertex_buffer = false; -// info->vertex_buffer_reg = 0; -// -// for (int i = 0; i < usages.slots_num; i++) -// { -// const auto& usage = usages.slots[i]; -// switch (usage.type) -// { -// // case 0x02: -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // ShaderGetStorageBuffer(&bind->storage_buffers, usage.start_register, usage.slot, -// // ShaderStorageUsage::Constant, user_sgpr, nullptr); break; -// // case 0x12: -// // EXIT_NOT_IMPLEMENTED(usage.slot != 0); -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // info->fetch = true; -// // info->fetch_reg = usage.start_register; -// // break; -// // case 0x17: -// // EXIT_NOT_IMPLEMENTED(usage.slot != 0); -// // EXIT_NOT_IMPLEMENTED(usage.flags != 0); -// // info->vertex_buffer = true; -// // info->vertex_buffer_reg = usage.start_register; -// // break; -// default: EXIT("unknown usage type: 0x%02" PRIx8 "\n", usage.type); -// } -// } -// } - // NOLINTNEXTLINE(readability-function-cognitive-complexity) void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegisters* sh, ShaderVertexInputInfo* info) { @@ -1764,8 +1648,9 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi bool gs_instead_of_vs = (regs->vs_regs.data_addr == 0 && regs->gs_regs.data_addr == 0 && regs->es_regs.data_addr != 0 && regs->gs_regs.chksum != 0); - uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr); - const HW::UserSgprInfo& user_sgpr = (gs_instead_of_vs ? regs->gs_user_sgpr : regs->vs_user_sgpr); + uint64_t shader_addr = (gs_instead_of_vs ? regs->es_regs.data_addr : regs->vs_regs.data_addr); + const HW::UserSgprInfo& user_sgpr = (gs_instead_of_vs ? regs->gs_user_sgpr : regs->vs_user_sgpr); + auto user_sgpr_num = (gs_instead_of_vs ? regs->gs_regs.rsrc2.user_sgpr : regs->vs_regs.rsrc2.user_sgpr); bool ps5 = Config::IsNextGen(); @@ -1786,14 +1671,14 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi info->gs_prolog = true; - ShaderParseUsage2(data.user_data, &usage, &info->bind, user_sgpr); + ShaderParseUsage2(data.user_data, &usage, &info->bind, user_sgpr, static_cast(user_sgpr_num)); } else { EXIT_NOT_IMPLEMENTED(gs_instead_of_vs); info->gs_prolog = false; - ShaderParseUsage(shader_addr, &usage, &info->bind, user_sgpr); + ShaderParseUsage(shader_addr, &usage, &info->bind, user_sgpr, user_sgpr_num); } EXIT_NOT_IMPLEMENTED(usage.extended_buffer); @@ -1812,6 +1697,9 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi info->fetch_attrib_reg = usage.vertex_attrib_reg; info->fetch_buffer_reg = usage.vertex_buffer_reg; + EXIT_NOT_IMPLEMENTED(usage.vertex_attrib_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX); + EXIT_NOT_IMPLEMENTED(usage.vertex_buffer_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX); + const auto* attrib = reinterpret_cast(static_cast(user_sgpr.value[usage.vertex_attrib_reg]) | (static_cast(user_sgpr.value[usage.vertex_attrib_reg + 1]) << 32u)); @@ -1835,14 +1723,14 @@ void ShaderGetInputInfoVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegi info->fetch_shader_reg = usage.fetch_reg; info->fetch_buffer_reg = usage.vertex_buffer_reg; - EXIT_NOT_IMPLEMENTED(usage.fetch_reg >= 16 || usage.vertex_buffer_reg >= 16); + EXIT_NOT_IMPLEMENTED(usage.fetch_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX); + EXIT_NOT_IMPLEMENTED(usage.vertex_buffer_reg + 1 >= HW::UserSgprInfo::SGPRS_MAX); - const auto* fetch = - reinterpret_cast(static_cast(regs->vs_user_sgpr.value[usage.fetch_reg]) | - (static_cast(regs->vs_user_sgpr.value[usage.fetch_reg + 1]) << 32u)); + const auto* fetch = reinterpret_cast(static_cast(user_sgpr.value[usage.fetch_reg]) | + (static_cast(user_sgpr.value[usage.fetch_reg + 1]) << 32u)); const auto* buffer = - reinterpret_cast(static_cast(regs->vs_user_sgpr.value[usage.vertex_buffer_reg]) | - (static_cast(regs->vs_user_sgpr.value[usage.vertex_buffer_reg + 1]) << 32u)); + reinterpret_cast(static_cast(user_sgpr.value[usage.vertex_buffer_reg]) | + (static_cast(user_sgpr.value[usage.vertex_buffer_reg + 1]) << 32u)); EXIT_NOT_IMPLEMENTED(fetch == nullptr || buffer == nullptr); @@ -1906,15 +1794,16 @@ void ShaderGetInputInfoPS(const HW::PixelShaderInfo* regs, const HW::ShaderRegis { EXIT_NOT_IMPLEMENTED(data.user_data == nullptr); - ShaderParseUsage2(data.user_data, &usage, &ps_info->bind, regs->ps_user_sgpr); + ShaderParseUsage2(data.user_data, &usage, &ps_info->bind, regs->ps_user_sgpr, regs->ps_regs.rsrc2.user_sgpr); } else { - ShaderParseUsage(regs->ps_regs.data_addr, &usage, &ps_info->bind, regs->ps_user_sgpr); + ShaderParseUsage(regs->ps_regs.data_addr, &usage, &ps_info->bind, regs->ps_user_sgpr, regs->ps_regs.rsrc2.user_sgpr); } EXIT_NOT_IMPLEMENTED(usage.fetch || usage.vertex_buffer || usage.vertex_attrib); EXIT_NOT_IMPLEMENTED(usage.storage_buffers_readwrite > 0); EXIT_NOT_IMPLEMENTED(usage.gds_pointers > 0); + EXIT_NOT_IMPLEMENTED(usage.direct_sgprs > 0); ShaderCalcBindingIndices(&ps_info->bind); } @@ -1940,10 +1829,11 @@ void ShaderGetInputInfoCS(const HW::ComputeShaderInfo* regs, const HW::ShaderReg ShaderParsedUsage usage; - ShaderParseUsage(regs->cs_regs.data_addr, &usage, &info->bind, regs->cs_user_sgpr); + ShaderParseUsage(regs->cs_regs.data_addr, &usage, &info->bind, regs->cs_user_sgpr, regs->cs_regs.user_sgpr); EXIT_NOT_IMPLEMENTED(usage.samplers > 0); EXIT_NOT_IMPLEMENTED(usage.fetch || usage.vertex_buffer || usage.vertex_attrib); + EXIT_NOT_IMPLEMENTED(usage.direct_sgprs > 0); ShaderCalcBindingIndices(&info->bind); } @@ -1963,6 +1853,7 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t samplers.binding_index = %d\n", bind.samplers.binding_index); printf("\t gds_pointers.pointers_num = %d\n", bind.gds_pointers.pointers_num); printf("\t gds_pointers.binding_index = %d\n", bind.gds_pointers.binding_index); + printf("\t direct_sgprs.sgprs_num = %d\n", bind.direct_sgprs.sgprs_num); printf("\t extended.used = %s\n", (bind.extended.used ? "true" : "false")); printf("\t extended.slot = %d\n", bind.extended.slot); printf("\t extended.start_register = %d\n", bind.extended.start_register); @@ -2129,6 +2020,17 @@ static void ShaderDbgDumpResources(const ShaderBindResources& bind) printf("\t\t start_register = %d\n", bind.gds_pointers.start_register[i]); printf("\t\t extended = %s\n", (bind.gds_pointers.extended[i] ? "true" : "false")); } + + for (int i = 0; i < bind.direct_sgprs.sgprs_num; i++) + { + const auto& r = bind.direct_sgprs.sgprs[i]; + + printf("\t Direct Sgprs %d\n", i); + + printf("\t\t field = %08" PRIx32 "\n", r.field); + + printf("\t\t start_register = %d\n", bind.direct_sgprs.start_register[i]); + } } void ShaderDbgDumpInputInfo(const ShaderVertexInputInfo* info) @@ -2412,7 +2314,13 @@ ShaderCode ShaderParseVS(const HW::VertexShaderInfo* regs, const HW::ShaderRegis vs_print("ShaderParseVS()", *regs, *sh); vs_check(*regs, *sh); - EXIT_NOT_IMPLEMENTED(regs->vs_regs.rsrc2.user_sgpr > regs->vs_user_sgpr.count); + if (gs_instead_of_vs) + { + EXIT_NOT_IMPLEMENTED(regs->gs_regs.rsrc2.user_sgpr > regs->gs_user_sgpr.count); + } else + { + EXIT_NOT_IMPLEMENTED(regs->vs_regs.rsrc2.user_sgpr > regs->vs_user_sgpr.count); + } if (Config::IsNextGen()) { @@ -2871,6 +2779,13 @@ static void ShaderGetBindIds(ShaderId* ret, const ShaderBindResources& bind) ret->ids.Add(static_cast(bind.gds_pointers.extended[i])); } + ret->ids.Add(bind.direct_sgprs.sgprs_num); + + for (int i = 0; i < bind.direct_sgprs.sgprs_num; i++) + { + ret->ids.Add(bind.direct_sgprs.start_register[i]); + } + ret->ids.Add(static_cast(bind.extended.used)); ret->ids.Add(bind.extended.slot); ret->ids.Add(bind.extended.start_register); diff --git a/source/emulator/src/Graphics/ShaderParse.cpp b/source/emulator/src/Graphics/ShaderParse.cpp index 8e35984..5ff0a75 100644 --- a/source/emulator/src/Graphics/ShaderParse.cpp +++ b/source/emulator/src/Graphics/ShaderParse.cpp @@ -160,31 +160,30 @@ KYTY_SHADER_PARSER(shader_parse_sopk) inst.pc = pc; inst.dst = operand_parse(sdst); + inst.format = ShaderInstructionFormat::SVdstSVsrc0; + inst.src[0].type = ShaderOperandType::IntegerInlineConstant; + inst.src[0].constant.i = imm; + inst.src_num = 1; + switch (opcode) { - case 0x00: - inst.type = ShaderInstructionType::SMovkI32; - inst.format = ShaderInstructionFormat::SVdstSVsrc0; - inst.src[0].type = ShaderOperandType::IntegerInlineConstant; - inst.src[0].constant.i = imm; - inst.src_num = 1; - break; + case 0x00: inst.type = ShaderInstructionType::SMovkI32; break; - case 0x2: KYTY_NI("s_cmovk_i32"); break; - case 0x3: KYTY_NI("s_cmpk_eq_i32"); break; - case 0x4: KYTY_NI("s_cmpk_lg_i32"); break; - case 0x5: KYTY_NI("s_cmpk_gt_i32"); break; - case 0x6: KYTY_NI("s_cmpk_ge_i32"); break; - case 0x7: KYTY_NI("s_cmpk_lt_i32"); break; - case 0x8: KYTY_NI("s_cmpk_le_i32"); break; - case 0x9: KYTY_NI("s_cmpk_eq_u32"); break; - case 0xA: KYTY_NI("s_cmpk_lg_u32"); break; - case 0xB: KYTY_NI("s_cmpk_gt_u32"); break; - case 0xC: KYTY_NI("s_cmpk_ge_u32"); break; - case 0xD: KYTY_NI("s_cmpk_lt_u32"); break; - case 0xE: KYTY_NI("s_cmpk_le_u32"); break; - case 0xF: KYTY_NI("s_addk_i32"); break; - case 0x10: KYTY_NI("s_mulk_i32"); break; + case 0x02: KYTY_NI("s_cmovk_i32"); break; + case 0x03: KYTY_NI("s_cmpk_eq_i32"); break; + case 0x04: KYTY_NI("s_cmpk_lg_i32"); break; + case 0x05: KYTY_NI("s_cmpk_gt_i32"); break; + case 0x06: KYTY_NI("s_cmpk_ge_i32"); break; + case 0x07: KYTY_NI("s_cmpk_lt_i32"); break; + case 0x08: KYTY_NI("s_cmpk_le_i32"); break; + case 0x09: KYTY_NI("s_cmpk_eq_u32"); break; + case 0x0A: KYTY_NI("s_cmpk_lg_u32"); break; + case 0x0B: KYTY_NI("s_cmpk_gt_u32"); break; + case 0x0C: KYTY_NI("s_cmpk_ge_u32"); break; + case 0x0D: KYTY_NI("s_cmpk_lt_u32"); break; + case 0x0E: KYTY_NI("s_cmpk_le_u32"); break; + case 0x0F: KYTY_NI("s_addk_i32"); break; + case 0x10: inst.type = ShaderInstructionType::SMulkI32; break; case 0x11: KYTY_NI("s_cbranch_i_fork"); break; case 0x12: KYTY_NI("s_getreg_b32"); break; case 0x13: KYTY_NI("s_setreg_b32"); break; @@ -579,6 +578,7 @@ KYTY_SHADER_PARSER(shader_parse_sop2) case 0x32: KYTY_NI("s_pack_ll_b32_b16"); break; case 0x33: KYTY_NI("s_pack_lh_b32_b16"); break; case 0x34: KYTY_NI("s_pack_hh_b32_b16"); break; + case 0x35: inst.type = ShaderInstructionType::SMulHiU32; break; default: KYTY_UNKNOWN_OP(); } @@ -601,22 +601,53 @@ KYTY_SHADER_PARSER(shader_parse_vopc) uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu; + bool sdwa = (src0 == 249); + + uint32_t size = (sdwa ? 2 : 1); + + src0 = (sdwa ? (buffer[1] >> 0u) & 0xffu : src0); + uint32_t sdst = (sdwa ? (buffer[1] >> 8u) & 0x7fu : 0); + uint32_t sd = (sdwa ? (buffer[1] >> 15u) & 0x1u : 0); + uint32_t src0_sel = (sdwa ? (buffer[1] >> 16u) & 0x7u : 6); + uint32_t src0_sext = (sdwa ? (buffer[1] >> 19u) & 0x1u : 0); + uint32_t src0_neg = (sdwa ? (buffer[1] >> 20u) & 0x1u : 0); + uint32_t src0_abs = (sdwa ? (buffer[1] >> 21u) & 0x1u : 0); + uint32_t s0 = (sdwa ? (buffer[1] >> 23u) & 0x1u : 1); + uint32_t src1_sel = (sdwa ? (buffer[1] >> 24u) & 0x7u : 6); + uint32_t src1_sext = (sdwa ? (buffer[1] >> 27u) & 0x1u : 0); + uint32_t src1_neg = (sdwa ? (buffer[1] >> 28u) & 0x1u : 0); + uint32_t src1_abs = (sdwa ? (buffer[1] >> 29u) & 0x1u : 0); + uint32_t s1 = (sdwa ? (buffer[1] >> 31u) & 0x1u : 0); + + EXIT_NOT_IMPLEMENTED(src0_sel != 6); + EXIT_NOT_IMPLEMENTED(src0_sext != 0); + EXIT_NOT_IMPLEMENTED(src0_neg != 0); + EXIT_NOT_IMPLEMENTED(src0_abs != 0); + EXIT_NOT_IMPLEMENTED(src1_sel != 6); + EXIT_NOT_IMPLEMENTED(src1_sext != 0); + EXIT_NOT_IMPLEMENTED(src1_neg != 0); + EXIT_NOT_IMPLEMENTED(src1_abs != 0); + ShaderInstruction inst; inst.pc = pc; - inst.src[0] = operand_parse(src0); - inst.src[1] = operand_parse(vsrc1 + 256); + inst.src[0] = operand_parse(src0 + (s0 == 0 ? 256 : 0)); + inst.src[1] = operand_parse(vsrc1 + (s1 == 0 ? 256 : 0)); inst.src_num = 2; - uint32_t size = 1; - if (inst.src[0].type == ShaderOperandType::LiteralConstant) { inst.src[0].constant.u = buffer[size]; size++; } - inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1; - inst.dst.type = ShaderOperandType::VccLo; + inst.format = ShaderInstructionFormat::SmaskVsrc0Vsrc1; + if (sd == 0) + { + inst.dst.type = ShaderOperandType::VccLo; + } else + { + inst.dst = operand_parse(sdst); + } inst.dst.size = 2; switch (opcode) @@ -1034,14 +1065,51 @@ KYTY_SHADER_PARSER(shader_parse_vop2) uint32_t src0 = (buffer[0] >> 0u) & 0x1ffu; uint32_t vsrc1 = (buffer[0] >> 9u) & 0xffu; + bool sdwa = (src0 == 249); + + uint32_t size = (sdwa ? 2 : 1); + + src0 = (sdwa ? (buffer[1] >> 0u) & 0xffu : src0); + uint32_t dst_sel = (sdwa ? (buffer[1] >> 8u) & 0x7u : 6); + uint32_t dst_u = (sdwa ? (buffer[1] >> 11u) & 0x3u : 2); + uint32_t clmp = (sdwa ? (buffer[1] >> 13u) & 0x1u : 0); + uint32_t omod = (sdwa ? (buffer[1] >> 14u) & 0x3u : 0); + uint32_t src0_sel = (sdwa ? (buffer[1] >> 16u) & 0x7u : 6); + uint32_t src0_sext = (sdwa ? (buffer[1] >> 19u) & 0x1u : 0); + uint32_t src0_neg = (sdwa ? (buffer[1] >> 20u) & 0x1u : 0); + uint32_t src0_abs = (sdwa ? (buffer[1] >> 21u) & 0x1u : 0); + uint32_t s0 = (sdwa ? (buffer[1] >> 23u) & 0x1u : 1); + uint32_t src1_sel = (sdwa ? (buffer[1] >> 24u) & 0x7u : 6); + uint32_t src1_sext = (sdwa ? (buffer[1] >> 27u) & 0x1u : 0); + uint32_t src1_neg = (sdwa ? (buffer[1] >> 28u) & 0x1u : 0); + uint32_t src1_abs = (sdwa ? (buffer[1] >> 29u) & 0x1u : 0); + uint32_t s1 = (sdwa ? (buffer[1] >> 31u) & 0x1u : 0); + + EXIT_NOT_IMPLEMENTED(dst_sel != 6); + EXIT_NOT_IMPLEMENTED(sdwa && dst_sel == 6 && dst_u != 0); + EXIT_NOT_IMPLEMENTED(omod != 0); + EXIT_NOT_IMPLEMENTED(src0_sel != 6); + EXIT_NOT_IMPLEMENTED(src0_sext != 0); + EXIT_NOT_IMPLEMENTED(src0_neg != 0); + EXIT_NOT_IMPLEMENTED(src1_sel != 6); + EXIT_NOT_IMPLEMENTED(src1_sext != 0); + EXIT_NOT_IMPLEMENTED(src1_neg != 0); + ShaderInstruction inst; inst.pc = pc; - inst.src[0] = operand_parse(src0); - inst.src[1] = operand_parse(vsrc1 + 256); + inst.src[0] = operand_parse(src0 + (s0 == 0 ? 256 : 0)); + inst.src[1] = operand_parse(vsrc1 + (s1 == 0 ? 256 : 0)); inst.dst = operand_parse(vdst + 256); inst.src_num = 2; - uint32_t size = 1; + switch (omod) + { + case 0: inst.dst.multiplier = 1.0f; break; + case 1: inst.dst.multiplier = 2.0f; break; + case 2: inst.dst.multiplier = 4.0f; break; + case 3: inst.dst.multiplier = 0.5f; break; + default: break; + } if (inst.src[0].type == ShaderOperandType::LiteralConstant) { @@ -1049,6 +1117,11 @@ KYTY_SHADER_PARSER(shader_parse_vop2) size++; } + inst.src[0].absolute = (src0_abs != 0); + inst.src[1].absolute = (src1_abs != 0); + + inst.dst.clamp = (clmp != 0); + inst.format = ShaderInstructionFormat::SVdstSVsrc0SVsrc1; switch (opcode) @@ -3252,17 +3325,18 @@ KYTY_SHADER_PARSER(shader_parse_vintrp) inst.src[2].constant.u = chan; inst.src_num = 3; + inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; + switch (opcode) { - case 0x00: - inst.type = ShaderInstructionType::VInterpP1F32; - inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; + case 0x00: inst.type = ShaderInstructionType::VInterpP1F32; break; + case 0x01: inst.type = ShaderInstructionType::VInterpP2F32; break; + case 0x02: + inst.type = ShaderInstructionType::VInterpMovF32; + inst.src[0].type = ShaderOperandType::IntegerInlineConstant; + inst.src[0].constant.u = vsrc & 0x3u; + inst.src[0].size = 0; break; - case 0x01: - inst.type = ShaderInstructionType::VInterpP2F32; - inst.format = ShaderInstructionFormat::VdstVsrcAttrChan; - break; - case 0x02: KYTY_NI("v_interp_mov_f32"); break; default: KYTY_UNKNOWN_OP(); } diff --git a/source/emulator/src/Graphics/ShaderSpirv.cpp b/source/emulator/src/Graphics/ShaderSpirv.cpp index 61f61a8..cfb0f99 100644 --- a/source/emulator/src/Graphics/ShaderSpirv.cpp +++ b/source/emulator/src/Graphics/ShaderSpirv.cpp @@ -1792,8 +1792,6 @@ static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String8& re { EXIT_IF(load == nullptr); - // EXIT_NOT_IMPLEMENTED(op.negate); - String8 l; if (operand_is_constant(op)) @@ -1824,8 +1822,12 @@ static bool operand_load_float(Spirv* spirv, ShaderOperand op, const String8& re if (op.negate && op.absolute) { - // TODO(): negated absolute value - return false; + l += String8(' ', 10) + String8("%abs_ = OpExtInst %float %GLSL_std_450 FAbs %\n") + String8(' ', 10) + + String8("% = OpFNegate %float %abs_\n"); + + *load = l.ReplaceStr("", index).ReplaceStr("", "a" + result_id).ReplaceStr("", result_id); + + return true; } if (op.absolute) @@ -3615,7 +3617,7 @@ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_I32_SVdstSVsrc0SVsrc1) return true; } -/* XXX: Add, Addc, Bfe, Lshl4Add */ +/* XXX: Add, Addc, Bfe, Lshl4Add, MulHi */ KYTY_RECOMPILER_FUNC(Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1) { const auto& inst = code.GetInstructions().At(index); @@ -4432,6 +4434,49 @@ KYTY_RECOMPILER_FUNC(Recompile_SLoadDwordx8_Sdst8SbaseSoffset) return false; } +KYTY_RECOMPILER_FUNC(Recompile_SMulkI32_SVdstSVsrc0) +{ + const auto& inst = code.GetInstructions().At(index); + + String8 index_str = String8::FromPrintf("%u", index); + + EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); + + auto dst_value = operand_variable_to_str(inst.dst); + + EXIT_NOT_IMPLEMENTED(dst_value.type != SpirvType::Uint); + EXIT_NOT_IMPLEMENTED(operand_is_exec(inst.dst)); + + String8 load0; + + if (!operand_load_int(spirv, inst.src[0], "t0_", index_str, &load0)) + { + return false; + } + + String8 load_dst; + + if (!operand_load_int(spirv, inst.dst, "tdst_", index_str, &load_dst)) + { + return false; + } + + static const char* text = R"( + + +%t_ = OpIMul %int %tdst_ %t0_ +%tu_ = OpBitcast %uint %t_ + OpStore % %tu_ +)"; + *dst_source += String8(text) + .ReplaceStr("", dst_value.value) + .ReplaceStr("", load0) + .ReplaceStr("", load_dst) + .ReplaceStr("", index_str); + + return true; +} + KYTY_RECOMPILER_FUNC(Recompile_SMovB32_SVdstSVsrc0) { const auto& inst = code.GetInstructions().At(index); @@ -5301,6 +5346,37 @@ KYTY_RECOMPILER_FUNC(Recompile_VInterpP2F32_VdstVsrcAttrChan) return true; } +KYTY_RECOMPILER_FUNC(Recompile_VInterpMovF32_VdstVsrcAttrChan) +{ + const auto& inst = code.GetInstructions().At(index); + + String8 index_str = String8::FromPrintf("%u", index); + + EXIT_NOT_IMPLEMENTED(!operand_is_variable(inst.dst)); + EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[0])); + EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[1])); + EXIT_NOT_IMPLEMENTED(!operand_is_constant(inst.src[2])); + + EXIT_NOT_IMPLEMENTED(inst.src[0].constant.u != 2); + + auto dst_value = operand_variable_to_str(inst.dst); + + String8 load0 = String8::FromPrintf("%%t0_ = OpAccessChain %%_ptr_Input_float %%attr%u %%uint_%u", inst.src[1].constant.u, + inst.src[2].constant.u); + + // TODO() check VSKIP + // TODO() check EXEC + + static const char* text = R"( + + %t1_ = OpLoad %float %t0_ + OpStore % %t1_ +)"; + *dst_source += String8(text).ReplaceStr("", dst_value.value).ReplaceStr("", load0).ReplaceStr("", index_str); + + return true; +} + /* XXX: Mad, Madak, Madmk, Max3, Min3, Med3, Fma */ KYTY_RECOMPILER_FUNC(Recompile_V_XXX_F32_VdstVsrc0Vsrc1Vsrc2) { @@ -6213,6 +6289,7 @@ const RecompilerFunc* RecompFunc(ShaderInstructionType type, ShaderInstructionFo {Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SAddU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%ts_ = OpIAddCarry %ResTypeU %t0_ %t1_", "%t_ = OpCompositeExtract %uint %ts_ 0", "%carry_ = OpCompositeExtract %uint %ts_ 1"}, SccCheck::CarryOut}, {Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SBfeU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%to_ = OpBitFieldUExtract %uint %t1_ %uint_0 %uint_5", "%ts_ = OpBitFieldUExtract %uint %t1_ %uint_16 %uint_7", "%t_ = OpBitFieldUExtract %uint %t0_ %to_ %ts_"}, SccCheck::NonZero}, {Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SLshl4AddU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%ts_ = OpFunctionCall %v2uint %lshl_add %t0_ %t1_ %uint_4", "%t_ = OpCompositeExtract %uint %ts_ 0", "%carry_ = OpCompositeExtract %uint %ts_ 1"}, SccCheck::CarryOut}, + {Recompile_S_XXX_U32_SVdstSVsrc0SVsrc1, ShaderInstructionType::SMulHiU32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%t_ = OpFunctionCall %uint %mul_hi_uint %t0_ %t1_"}, SccCheck::None}, {Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VAndB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%t_ = OpBitwiseAnd %uint %t0_ %t1_"}}, {Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VBcntU32B32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%tb_ = OpBitCount %int %t0_", "%tbu_ = OpBitcast %uint %tb_", "%t_ = OpIAdd %uint %tbu_ %t1_"}}, {Recompile_V_XXX_B32_SVdstSVsrc0SVsrc1, ShaderInstructionType::VBfmB32, ShaderInstructionFormat::SVdstSVsrc0SVsrc1, {"%tcount_ = OpBitwiseAnd %uint %t0_ %uint_31", "%toffset_ = OpBitwiseAnd %uint %t1_ %uint_31", "%t_ = OpBitFieldInsert %uint %uint_0 %uint_0xffffffff %toffset_ %tcount_"}}, @@ -6242,6 +6319,7 @@ const RecompilerFunc* RecompFunc(ShaderInstructionType type, ShaderInstructionFo {Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovB32, ShaderInstructionFormat::SVdstSVsrc0, {""}}, {Recompile_SMovB32_SVdstSVsrc0, ShaderInstructionType::SMovkI32, ShaderInstructionFormat::SVdstSVsrc0, {""}}, + {Recompile_SMulkI32_SVdstSVsrc0, ShaderInstructionType::SMulkI32, ShaderInstructionFormat::SVdstSVsrc0, {""}}, {Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VBfrevB32, ShaderInstructionFormat::SVdstSVsrc0, {"%t_ = OpBitReverse %uint %t0_"}}, {Recompile_V_XXX_B32_SVdstSVsrc0, ShaderInstructionType::VNotB32, ShaderInstructionFormat::SVdstSVsrc0, {"%t_ = OpNot %uint %t0_"}}, {Recompile_V_XXX_F32_SVdstSVsrc0, ShaderInstructionType::VCeilF32, ShaderInstructionFormat::SVdstSVsrc0, {"%t_ = OpExtInst %float %GLSL_std_450 Ceil %t0_"}}, @@ -6338,6 +6416,7 @@ const RecompilerFunc* RecompFunc(ShaderInstructionType type, ShaderInstructionFo {Recompile_VCndmaskB32_VdstVsrc0Vsrc1Smask2, ShaderInstructionType::VCndmaskB32, ShaderInstructionFormat::VdstVsrc0Vsrc1Smask2, {""}}, + {Recompile_VInterpMovF32_VdstVsrcAttrChan, ShaderInstructionType::VInterpMovF32, ShaderInstructionFormat::VdstVsrcAttrChan, {""}}, {Recompile_VInterpP1F32_VdstVsrcAttrChan, ShaderInstructionType::VInterpP1F32, ShaderInstructionFormat::VdstVsrcAttrChan, {""}}, {Recompile_VInterpP2F32_VdstVsrcAttrChan, ShaderInstructionType::VInterpP2F32, ShaderInstructionFormat::VdstVsrcAttrChan, {""}}, @@ -6768,9 +6847,16 @@ void Spirv::WriteAnnotations() { for (uint32_t i = 0; i < m_ps_input_info->input_num; i++) { - EXIT_NOT_IMPLEMENTED((m_ps_input_info->interpolator_settings[i] & ~static_cast(0x1f)) != 0); + EXIT_NOT_IMPLEMENTED((m_ps_input_info->interpolator_settings[i] & ~static_cast(0x41fu)) != 0); - vars.Add(String8::FromPrintf("OpDecorate %%attr%d Location %d", i, m_ps_input_info->interpolator_settings[i])); + bool flat = (m_ps_input_info->interpolator_settings[i] & 0x400u) != 0; + uint32_t location = m_ps_input_info->interpolator_settings[i] & 0x1fu; + + if (flat) + { + vars.Add(String8::FromPrintf("OpDecorate %%attr%u Flat", i)); + } + vars.Add(String8::FromPrintf("OpDecorate %%attr%u Location %u", i, location)); } if (m_ps_input_info->ps_pos_xy) { @@ -7424,7 +7510,21 @@ void Spirv::WriteLocalVariables() } } - /* buffer_index += (m_bind->gds_pointers.pointers_num > 0 ? (m_bind->gds_pointers.pointers_num - 1) / 4 + 1 : 0); */ + buffer_index += (m_bind->gds_pointers.pointers_num > 0 ? (m_bind->gds_pointers.pointers_num - 1) / 4 + 1 : 0); + + for (int i = 0; i < m_bind->direct_sgprs.sgprs_num; i++) + { + int start_reg = m_bind->direct_sgprs.start_register[i]; + + EXIT_IF(buffer_index + i / 4 >= static_cast(m_bind->push_constant_size) / 16); + + String8 buffer = String8::FromPrintf("%d", buffer_index + i / 4); + String8 reg = String8::FromPrintf("s%d", start_reg + shift_regs); + String8 field = String8::FromPrintf("%d", i % 4); + m_source += String8(text).ReplaceStr("", reg).ReplaceStr("", buffer).ReplaceStr("", field); + } + + /* buffer_index += (m_bind->direct_sgprs.sgprs_num > 0 ? (m_bind->direct_sgprs.sgprs_num - 1) / 4 + 1 : 0); */ if (m_bind->extended.used) { @@ -7781,7 +7881,7 @@ void Spirv::WriteFunctions() } if (m_code.HasAnyOf({ShaderInstructionType::VMulLoI32, ShaderInstructionType::VMulLoU32, ShaderInstructionType::VMulHiU32, - ShaderInstructionType::VMadU32U24, ShaderInstructionType::VMulU32U24})) + ShaderInstructionType::VMadU32U24, ShaderInstructionType::VMulU32U24, ShaderInstructionType::SMulHiU32})) { m_source += FUNC_MUL_EXTENDED; } diff --git a/source/launcher/include/Configuration.h b/source/launcher/include/Configuration.h index 875bade..d6421fa 100644 --- a/source/launcher/include/Configuration.h +++ b/source/launcher/include/Configuration.h @@ -17,12 +17,6 @@ #define KYTY_CFG_GET(n) n = s->value(#n).value(); #define KYTY_CFG_GETL(n) n = s->value(#n).toStringList(); -//#define KYTY_LIBS \ -// { \ -// "libc_internal_1", "libkernel_1", "libVideoOut_1", "libSysmodule_1", "libDiscMap_1", "libDebug_1", "libGraphicsDriver_1", \ -// "libUserService_1", "libSystemService_1", "libPad_1", "libNet_1", "libDialog_1", "libAudio_1", "libPlayGo_1", "libSaveData_1", \ -// "libAppContent_1" \ -// } template inline QStringList EnumToList()