mirror of https://git.suyu.dev/suyu/suyu
Fix shader dumps with nvdisasm
skip fragment shaders when rasterizer is disabled initialize env_ptrs
This commit is contained in:
parent
096644c01c
commit
dfb7fc8293
|
@ -39,7 +39,7 @@ public:
|
||||||
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
|
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(u32 bank,
|
||||||
u32 offset) = 0;
|
u32 offset) = 0;
|
||||||
|
|
||||||
virtual void Dump(u64 hash) = 0;
|
virtual void Dump(u64 pipeline_hash, u64 shader_hash) = 0;
|
||||||
|
|
||||||
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
||||||
return sph;
|
return sph;
|
||||||
|
|
|
@ -445,7 +445,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
||||||
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
|
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
|
||||||
std::span<Shader::Environment* const> envs, bool use_shader_workers,
|
std::span<Shader::Environment* const> envs, bool use_shader_workers,
|
||||||
bool force_context_flush) try {
|
bool force_context_flush) try {
|
||||||
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
|
auto hash = key.Hash();
|
||||||
|
LOG_INFO(Render_OpenGL, "0x{:016x}", hash);
|
||||||
size_t env_index{};
|
size_t env_index{};
|
||||||
u32 total_storage_buffers{};
|
u32 total_storage_buffers{};
|
||||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||||
|
@ -474,7 +475,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
||||||
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
|
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
|
||||||
|
|
||||||
if (Settings::values.dump_shaders) {
|
if (Settings::values.dump_shaders) {
|
||||||
env.Dump(key.unique_hashes[index]);
|
env.Dump(hash, key.unique_hashes[index]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!uses_vertex_a || index != 1) {
|
if (!uses_vertex_a || index != 1) {
|
||||||
|
@ -566,12 +567,13 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
||||||
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
||||||
ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env,
|
ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env,
|
||||||
bool force_context_flush) try {
|
bool force_context_flush) try {
|
||||||
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
|
auto hash = key.Hash();
|
||||||
|
LOG_INFO(Render_OpenGL, "0x{:016x}", hash);
|
||||||
|
|
||||||
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
||||||
|
|
||||||
if (Settings::values.dump_shaders) {
|
if (Settings::values.dump_shaders) {
|
||||||
env.Dump(key.Hash());
|
env.Dump(hash, key.unique_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||||
|
|
|
@ -584,7 +584,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||||
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
ShaderPools& pools, const GraphicsPipelineCacheKey& key,
|
||||||
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
std::span<Shader::Environment* const> envs, PipelineStatistics* statistics,
|
||||||
bool build_in_parallel) try {
|
bool build_in_parallel) try {
|
||||||
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
|
auto hash = key.Hash();
|
||||||
|
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
|
||||||
size_t env_index{0};
|
size_t env_index{0};
|
||||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||||
const bool uses_vertex_a{key.unique_hashes[0] != 0};
|
const bool uses_vertex_a{key.unique_hashes[0] != 0};
|
||||||
|
@ -611,7 +612,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||||
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
|
||||||
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
|
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
|
||||||
if (Settings::values.dump_shaders) {
|
if (Settings::values.dump_shaders) {
|
||||||
env.Dump(key.unique_hashes[index]);
|
env.Dump(hash, key.unique_hashes[index]);
|
||||||
}
|
}
|
||||||
if (!uses_vertex_a || index != 1) {
|
if (!uses_vertex_a || index != 1) {
|
||||||
// Normal path
|
// Normal path
|
||||||
|
@ -712,18 +713,19 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||||
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||||
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
|
ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
|
||||||
PipelineStatistics* statistics, bool build_in_parallel) try {
|
PipelineStatistics* statistics, bool build_in_parallel) try {
|
||||||
|
auto hash = key.Hash();
|
||||||
if (device.HasBrokenCompute()) {
|
if (device.HasBrokenCompute()) {
|
||||||
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
|
LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", hash);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
|
LOG_INFO(Render_Vulkan, "0x{:016x}", hash);
|
||||||
|
|
||||||
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
|
||||||
|
|
||||||
// Dump it before error.
|
// Dump it before error.
|
||||||
if (Settings::values.dump_shaders) {
|
if (Settings::values.dump_shaders) {
|
||||||
env.Dump(key.Hash());
|
env.Dump(hash, key.unique_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
|
||||||
|
|
|
@ -51,6 +51,11 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
|
||||||
}
|
}
|
||||||
const auto& shader_config{maxwell3d->regs.pipelines[index]};
|
const auto& shader_config{maxwell3d->regs.pipelines[index]};
|
||||||
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)};
|
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)};
|
||||||
|
if (program == Tegra::Engines::Maxwell3D::Regs::ShaderType::Pixel &&
|
||||||
|
!maxwell3d->regs.rasterize_enable) {
|
||||||
|
unique_hashes[index] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const GPUVAddr shader_addr{base_addr + shader_config.offset};
|
const GPUVAddr shader_addr{base_addr + shader_config.offset};
|
||||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
|
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
|
||||||
if (!cpu_shader_addr) {
|
if (!cpu_shader_addr) {
|
||||||
|
|
|
@ -70,7 +70,7 @@ public:
|
||||||
protected:
|
protected:
|
||||||
struct GraphicsEnvironments {
|
struct GraphicsEnvironments {
|
||||||
std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
|
std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
|
||||||
std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
|
std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs{};
|
||||||
|
|
||||||
std::span<Shader::Environment* const> Span() const noexcept {
|
std::span<Shader::Environment* const> Span() const noexcept {
|
||||||
return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
|
return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
|
||||||
|
|
|
@ -102,7 +102,8 @@ static std::string_view StageToPrefix(Shader::Stage stage) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowest,
|
static void DumpImpl(u64 pipeline_hash, u64 shader_hash, std::span<const u64> code,
|
||||||
|
[[maybe_unused]] u32 read_highest, [[maybe_unused]] u32 read_lowest,
|
||||||
u32 initial_offset, Shader::Stage stage) {
|
u32 initial_offset, Shader::Stage stage) {
|
||||||
const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
|
const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
|
||||||
const auto base_dir{shader_dir / "shaders"};
|
const auto base_dir{shader_dir / "shaders"};
|
||||||
|
@ -111,13 +112,18 @@ static void DumpImpl(u64 hash, const u64* code, u32 read_highest, u32 read_lowes
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto prefix = StageToPrefix(stage);
|
const auto prefix = StageToPrefix(stage);
|
||||||
const auto name{base_dir / fmt::format("{}{:016x}.ash", prefix, hash)};
|
const auto name{base_dir /
|
||||||
const size_t real_size = read_highest - read_lowest + initial_offset;
|
fmt::format("{:016x}_{}_{:016x}.ash", pipeline_hash, prefix, shader_hash)};
|
||||||
const size_t padding_needed = ((32 - (real_size % 32)) % 32);
|
|
||||||
std::fstream shader_file(name, std::ios::out | std::ios::binary);
|
std::fstream shader_file(name, std::ios::out | std::ios::binary);
|
||||||
|
ASSERT(initial_offset % sizeof(u64) == 0);
|
||||||
const size_t jump_index = initial_offset / sizeof(u64);
|
const size_t jump_index = initial_offset / sizeof(u64);
|
||||||
shader_file.write(reinterpret_cast<const char*>(code + jump_index), real_size);
|
const size_t code_size = code.size_bytes() - initial_offset;
|
||||||
for (size_t i = 0; i < padding_needed; i++) {
|
shader_file.write(reinterpret_cast<const char*>(&code[jump_index]), code_size);
|
||||||
|
|
||||||
|
// + 1 instruction, due to the fact that we skip the final self branch instruction in the code,
|
||||||
|
// but we need to consider it for padding, otherwise nvdisasm rages.
|
||||||
|
const size_t padding_needed = (32 - ((code_size + INST_SIZE) % 32)) % 32;
|
||||||
|
for (size_t i = 0; i < INST_SIZE + padding_needed; i++) {
|
||||||
shader_file.put(0);
|
shader_file.put(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -197,8 +203,8 @@ u64 GenericEnvironment::CalculateHash() const {
|
||||||
return Common::CityHash64(data.get(), size);
|
return Common::CityHash64(data.get(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenericEnvironment::Dump(u64 hash) {
|
void GenericEnvironment::Dump(u64 pipeline_hash, u64 shader_hash) {
|
||||||
DumpImpl(hash, code.data(), read_highest, read_lowest, initial_offset, stage);
|
DumpImpl(pipeline_hash, shader_hash, code, read_highest, read_lowest, initial_offset, stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GenericEnvironment::Serialize(std::ofstream& file) const {
|
void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||||
|
@ -282,6 +288,7 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
|
||||||
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
|
Tegra::Texture::TICEntry GenericEnvironment::ReadTextureInfo(GPUVAddr tic_addr, u32 tic_limit,
|
||||||
bool via_header_index, u32 raw) {
|
bool via_header_index, u32 raw) {
|
||||||
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
|
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
|
||||||
|
ASSERT(handle.first <= tic_limit);
|
||||||
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
|
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
|
||||||
Tegra::Texture::TICEntry entry;
|
Tegra::Texture::TICEntry entry;
|
||||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||||
|
@ -465,8 +472,8 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||||
.read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
|
.read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
|
||||||
.read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state))
|
.read(reinterpret_cast<char*>(&viewport_transform_state), sizeof(viewport_transform_state))
|
||||||
.read(reinterpret_cast<char*>(&stage), sizeof(stage));
|
.read(reinterpret_cast<char*>(&stage), sizeof(stage));
|
||||||
code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
|
code.resize(Common::DivCeil(code_size, sizeof(u64)));
|
||||||
file.read(reinterpret_cast<char*>(code.get()), code_size);
|
file.read(reinterpret_cast<char*>(code.data()), code_size);
|
||||||
for (size_t i = 0; i < num_texture_types; ++i) {
|
for (size_t i = 0; i < num_texture_types; ++i) {
|
||||||
u32 key;
|
u32 key;
|
||||||
Shader::TextureType type;
|
Shader::TextureType type;
|
||||||
|
@ -509,8 +516,8 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||||
is_propietary_driver = texture_bound == 2;
|
is_propietary_driver = texture_bound == 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileEnvironment::Dump(u64 hash) {
|
void FileEnvironment::Dump(u64 pipeline_hash, u64 shader_hash) {
|
||||||
DumpImpl(hash, code.get(), read_highest, read_lowest, initial_offset, stage);
|
DumpImpl(pipeline_hash, shader_hash, code, read_highest, read_lowest, initial_offset, stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 FileEnvironment::ReadInstruction(u32 address) {
|
u64 FileEnvironment::ReadInstruction(u32 address) {
|
||||||
|
|
|
@ -58,7 +58,7 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] u64 CalculateHash() const;
|
[[nodiscard]] u64 CalculateHash() const;
|
||||||
|
|
||||||
void Dump(u64 hash) override;
|
void Dump(u64 pipeline_hash, u64 shader_hash) override;
|
||||||
|
|
||||||
void Serialize(std::ofstream& file) const;
|
void Serialize(std::ofstream& file) const;
|
||||||
|
|
||||||
|
@ -188,10 +188,10 @@ public:
|
||||||
return cbuf_replacements.size() != 0;
|
return cbuf_replacements.size() != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dump(u64 hash) override;
|
void Dump(u64 pipeline_hash, u64 shader_hash) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<u64[]> code;
|
std::vector<u64> code;
|
||||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||||
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
||||||
std::unordered_map<u64, u32> cbuf_values;
|
std::unordered_map<u64, u32> cbuf_values;
|
||||||
|
|
Loading…
Reference in New Issue