forked from ShuriZma/suyu
1
0
Fork 0

shader: Optimize NVN Fallthrough

This commit is contained in:
FernandoS27 2021-05-02 01:50:27 +02:00 committed by ameerj
parent 153a77efee
commit ee61ec2c39
4 changed files with 83 additions and 9 deletions

View File

@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
AddLabel(); AddLabel();
const size_t num_buffers{info.storage_buffers_descriptors.size()}; const size_t num_buffers{info.storage_buffers_descriptors.size()};
for (size_t index = 0; index < num_buffers; ++index) { for (size_t index = 0; index < num_buffers; ++index) {
if (!info.nvn_buffer_used[index]) {
continue;
}
const auto& ssbo{info.storage_buffers_descriptors[index]}; const auto& ssbo{info.storage_buffers_descriptors[index]};
const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};

View File

@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) {
}()}; }()};
auto& descs{program.info.storage_buffers_descriptors}; auto& descs{program.info.storage_buffers_descriptors};
for (u32 index = 0; index < num_buffers; ++index) { for (u32 index = 0; index < num_buffers; ++index) {
if (!program.info.nvn_buffer_used[index]) {
continue;
}
const u32 offset{base + index * descriptor_size}; const u32 offset{base + index * descriptor_size};
const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
if (it != descs.end()) { if (it != descs.end()) {
it->is_written |= program.info.stores_global_memory;
continue; continue;
} }
// Assume these are written for now
descs.push_back({ descs.push_back({
.cbuf_index = driver_cbuf, .cbuf_index = driver_cbuf,
.cbuf_offset = offset, .cbuf_offset = offset,
.count = 1, .count = 1,
.is_written = true, .is_written = program.info.stores_global_memory,
}); });
} }
} }

View File

@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) {
} }
} }
void CheckCBufNVN(Info& info, IR::Inst& inst) {
const IR::Value cbuf_index{inst.Arg(0)};
if (!cbuf_index.IsImmediate()) {
info.nvn_buffer_used.set();
return;
}
const u32 index{cbuf_index.U32()};
if (index != 0) {
return;
}
const IR::Value cbuf_offset{inst.Arg(1)};
if (!cbuf_offset.IsImmediate()) {
info.nvn_buffer_used.set();
return;
}
const u32 offset{cbuf_offset.U32()};
const u32 descriptor_size{0x10};
const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
if (offset >= info.nvn_buffer_base && offset < upper_limit) {
const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
info.nvn_buffer_used.set(nvn_index, true);
}
}
void VisitUsages(Info& info, IR::Inst& inst) { void VisitUsages(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::CompositeConstructF16x2: case IR::Opcode::CompositeConstructF16x2:
@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) {
break; break;
} }
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS8:
case IR::Opcode::LoadGlobalU16:
case IR::Opcode::LoadGlobalS16:
case IR::Opcode::LoadGlobal32:
case IR::Opcode::LoadGlobal64:
case IR::Opcode::LoadGlobal128:
case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalU8:
case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalS8:
case IR::Opcode::WriteGlobalU16: case IR::Opcode::WriteGlobalU16:
@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GlobalAtomicMinF32x2: case IR::Opcode::GlobalAtomicMinF32x2:
case IR::Opcode::GlobalAtomicMaxF16x2: case IR::Opcode::GlobalAtomicMaxF16x2:
case IR::Opcode::GlobalAtomicMaxF32x2: case IR::Opcode::GlobalAtomicMaxF32x2:
info.stores_global_memory = true;
[[fallthrough]];
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS8:
case IR::Opcode::LoadGlobalU16:
case IR::Opcode::LoadGlobalS16:
case IR::Opcode::LoadGlobal32:
case IR::Opcode::LoadGlobal64:
case IR::Opcode::LoadGlobal128:
info.uses_int64 = true; info.uses_int64 = true;
info.uses_global_memory = true; info.uses_global_memory = true;
info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
} }
} }
void VisitCbufs(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetCbufU8:
case IR::Opcode::GetCbufS8:
case IR::Opcode::GetCbufU16:
case IR::Opcode::GetCbufS16:
case IR::Opcode::GetCbufU32:
case IR::Opcode::GetCbufF32:
case IR::Opcode::GetCbufU32x2: {
CheckCBufNVN(info, inst);
break;
}
default:
break;
}
}
void Visit(Info& info, IR::Inst& inst) { void Visit(Info& info, IR::Inst& inst) {
VisitUsages(info, inst); VisitUsages(info, inst);
VisitFpModifiers(info, inst); VisitFpModifiers(info, inst);
VisitCbufs(info, inst);
} }
void GatherInfoFromHeader(Environment& env, Info& info) { void GatherInfoFromHeader(Environment& env, Info& info) {
@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
void CollectShaderInfoPass(Environment& env, IR::Program& program) { void CollectShaderInfoPass(Environment& env, IR::Program& program) {
Info& info{program.info}; Info& info{program.info};
const u32 base{[&] {
switch (program.stage) {
case Stage::VertexA:
case Stage::VertexB:
return 0x110u;
case Stage::TessellationControl:
return 0x210u;
case Stage::TessellationEval:
return 0x310u;
case Stage::Geometry:
return 0x410u;
case Stage::Fragment:
return 0x510u;
case Stage::Compute:
return 0x310u;
}
throw InvalidArgument("Invalid stage {}", program.stage);
}()};
info.nvn_buffer_base = base;
for (IR::Block* const block : program.post_order_blocks) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
Visit(info, inst); Visit(info, inst);

View File

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include <bitset>
#include "common/common_types.h" #include "common/common_types.h"
#include "shader_recompiler/frontend/ir/type.h" #include "shader_recompiler/frontend/ir/type.h"
@ -140,6 +141,7 @@ struct Info {
bool stores_tess_level_outer{}; bool stores_tess_level_outer{};
bool stores_tess_level_inner{}; bool stores_tess_level_inner{};
bool stores_indexed_attributes{}; bool stores_indexed_attributes{};
bool stores_global_memory{};
bool uses_fp16{}; bool uses_fp16{};
bool uses_fp64{}; bool uses_fp64{};
@ -180,6 +182,8 @@ struct Info {
IR::Type used_storage_buffer_types{}; IR::Type used_storage_buffer_types{};
u32 constant_buffer_mask{}; u32 constant_buffer_mask{};
u32 nvn_buffer_base{};
std::bitset<16> nvn_buffer_used{};
boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
constant_buffer_descriptors; constant_buffer_descriptors;