rsx: Restructure programs

- Also re-enable pipeline optimizations
This commit is contained in:
kd-11 2018-10-20 17:43:00 +03:00 committed by kd-11
parent b0a6b72ce8
commit 1ad76ad331
21 changed files with 777 additions and 439 deletions

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include <sstream>
#include "ShaderParam.h"
@ -169,7 +169,6 @@ namespace glsl
" int is_volatile;\n"
" int frequency;\n"
" int divisor;\n"
" int modulo;\n"
"};\n\n"
"uint get_bits(uvec4 v, int swap)\n"
@ -290,17 +289,24 @@ namespace glsl
"attribute_desc fetch_desc(int location)\n"
"{\n"
" // Each descriptor is 64 bits wide\n"
" // [0-8] attribute stride\n"
" // [8-20] attribute divisor\n"
" // [20-21] swap bytes flag\n"
" // [21-22] volatile flag\n"
" // [22-24] frequency op\n"
" // [24-27] attribute type\n"
" // [27-30] attribute size\n"
" attribute_desc result;\n"
" int attribute_flags = input_attributes[location].w;\n"
" result.type = input_attributes[location].x;\n"
" result.attribute_size = input_attributes[location].y;\n"
" result.starting_offset = input_attributes[location].z;\n"
" int attribute_flags = input_attributes[location].x;\n"
" result.stride = attribute_flags & 0xFF;\n"
" result.swap_bytes = (attribute_flags >> 8) & 0x1;\n"
" result.is_volatile = (attribute_flags >> 9) & 0x1;\n"
" result.frequency = (attribute_flags >> 10) & 0x3;\n"
" result.modulo = (attribute_flags >> 12) & 0x1;\n"
" result.divisor = (attribute_flags >> 13) & 0xFFFF;\n"
" result.divisor = (attribute_flags >> 8) & 0xFFF;\n"
" result.swap_bytes = (attribute_flags >> 20) & 0x1;\n"
" result.is_volatile = (attribute_flags >> 21) & 0x1;\n"
" result.frequency = (attribute_flags >> 22) & 0x3;\n"
" result.type = (attribute_flags >> 24) & 0x7;\n"
" result.attribute_size = (attribute_flags >> 27) & 0x7;\n"
" result.starting_offset = input_attributes[location].y;\n"
" return result;\n"
"}\n\n"
@ -325,15 +331,19 @@ namespace glsl
" }\n\n"
" int vertex_id = " << vertex_id_name << " - int(vertex_base_index);\n"
" if (desc.frequency == 0)\n"
" {\n"
" vertex_id = 0;\n"
" else if (desc.frequency > 1)\n"
" }\n"
" else if (desc.frequency == 2)\n"
" {\n"
" //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
" if (desc.modulo != 0)\n"
" vertex_id = " << vertex_id_name << " % desc.divisor;\n"
" else\n"
" vertex_id = " << vertex_id_name << " / desc.divisor;\n"
" }\n"
" else if (desc.frequency == 3)\n"
" {\n"
" //if a vertex modifier is active; vertex_base must be 0 and is ignored\n"
" vertex_id = " << vertex_id_name << " % desc.divisor;\n"
" }\n"
"\n"
" if (desc.is_volatile != 0)\n"
" return fetch_attribute(desc, vertex_id, volatile_input_stream);\n"

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include <set>
#include "Emu/Memory/vm.h"
#include "Emu/System.h"
@ -132,9 +132,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
OS << "\n";
OS << "layout(std140, binding = 2) uniform FragmentConstantsBuffer\n";
OS << "{\n";
std::string constants_block;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{
if (PT.type == "sampler1D" ||
@ -144,10 +143,21 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
continue;
for (const ParamItem& PI : PT.items)
OS << " " << PT.type << " " << PI.name << ";\n";
{
constants_block += " " + PT.type + " " + PI.name + ";\n";
}
}
// Fragment state parameters
if (!constants_block.empty())
{
OS << "layout(std140, binding = 3) uniform FragmentConstantsBuffer\n";
OS << "{\n";
OS << constants_block;
OS << "};\n\n";
}
OS << "layout(std140, binding = 4) uniform FragmentStateBuffer\n";
OS << "{\n";
OS << " float fog_param0;\n";
OS << " float fog_param1;\n";
OS << " uint rop_control;\n";
@ -156,8 +166,12 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << " uint fog_mode;\n";
OS << " float wpos_scale;\n";
OS << " float wpos_bias;\n";
OS << "};\n\n";
OS << "layout(std140, binding = 5) uniform TextureParametersBuffer\n";
OS << "{\n";
OS << " vec4 texture_parameters[16];\n"; //sampling: x,y scaling and (unused) offsets data
OS << "};\n";
OS << "};\n\n";
}
void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS)

View File

@ -624,7 +624,10 @@ void GLGSRender::end()
m_attrib_ring_buffer->notify();
m_index_ring_buffer->notify();
m_vertex_state_buffer->notify();
m_fragment_env_buffer->notify();
m_vertex_env_buffer->notify();
m_texture_parameters_buffer->notify();
m_vertex_layout_buffer->notify();
m_fragment_constants_buffer->notify();
m_transform_constants_buffer->notify();
@ -795,7 +798,10 @@ void GLGSRender::on_init_thread()
m_attrib_ring_buffer.reset(new gl::legacy_ring_buffer());
m_transform_constants_buffer.reset(new gl::legacy_ring_buffer());
m_fragment_constants_buffer.reset(new gl::legacy_ring_buffer());
m_vertex_state_buffer.reset(new gl::legacy_ring_buffer());
m_fragment_env_buffer.reset(new gl::legacy_ring_buffer());
m_vertex_env_buffer.reset(new gl::legacy_ring_buffer());
m_texture_parameters_buffer.reset(new gl::legacy_ring_buffer());
m_vertex_layout_buffer.reset(new gl::legacy_ring_buffer());
m_index_ring_buffer.reset(new gl::legacy_ring_buffer());
}
else
@ -803,7 +809,10 @@ void GLGSRender::on_init_thread()
m_attrib_ring_buffer.reset(new gl::ring_buffer());
m_transform_constants_buffer.reset(new gl::ring_buffer());
m_fragment_constants_buffer.reset(new gl::ring_buffer());
m_vertex_state_buffer.reset(new gl::ring_buffer());
m_fragment_env_buffer.reset(new gl::ring_buffer());
m_vertex_env_buffer.reset(new gl::ring_buffer());
m_texture_parameters_buffer.reset(new gl::ring_buffer());
m_vertex_layout_buffer.reset(new gl::ring_buffer());
m_index_ring_buffer.reset(new gl::ring_buffer());
}
@ -811,7 +820,10 @@ void GLGSRender::on_init_thread()
m_index_ring_buffer->create(gl::buffer::target::element_array, 64 * 0x100000);
m_transform_constants_buffer->create(gl::buffer::target::uniform, 64 * 0x100000);
m_fragment_constants_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_state_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_fragment_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_env_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_texture_parameters_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000);
if (gl_caps.vendor_AMD)
{
@ -1013,9 +1025,24 @@ void GLGSRender::on_exit()
m_fragment_constants_buffer->remove();
}
if (m_vertex_state_buffer)
if (m_fragment_env_buffer)
{
m_vertex_state_buffer->remove();
m_fragment_env_buffer->remove();
}
if (m_vertex_env_buffer)
{
m_vertex_env_buffer->remove();
}
if (m_texture_parameters_buffer)
{
m_texture_parameters_buffer->remove();
}
if (m_vertex_layout_buffer)
{
m_vertex_layout_buffer->remove();
}
if (m_index_ring_buffer)
@ -1224,78 +1251,113 @@ bool GLGSRender::load_program()
void GLGSRender::load_program_env(const gl::vertex_upload_info& upload_info)
{
u8 *buf;
u32 vertex_state_offset;
u32 vertex_constants_offset;
u32 fragment_constants_offset;
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
if (!m_program)
{
fmt::throw_exception("Unreachable right now" HERE);
}
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty) && fragment_constants_size;
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
m_program->use();
if (manually_flush_ring_buffers)
{
m_vertex_state_buffer->reserve_storage_on_heap(512);
m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256));
if (update_fragment_env) m_fragment_env_buffer->reserve_storage_on_heap(128);
if (update_vertex_env) m_vertex_env_buffer->reserve_storage_on_heap(256);
if (update_fragment_texture_env) m_texture_parameters_buffer->reserve_storage_on_heap(256);
if (update_fragment_constants) m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_constants_size, 256));
if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
m_vertex_layout_buffer->reserve_storage_on_heap(128 + 16);
}
if (update_vertex_env)
{
// Vertex state
auto mapping = m_vertex_state_buffer->alloc_from_heap(512, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
vertex_state_offset = mapping.second;
auto mapping = m_vertex_env_buffer->alloc_from_heap(160, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = upload_info.vertex_index_base;
*(reinterpret_cast<u32*>(buf + 132)) = 0; // Reserved
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_vertex_env_buffer->bind_range(0, mapping.second, 160);
}
{
// Vertex layout state
auto mapping = m_vertex_layout_buffer->alloc_from_heap(128 + 16, m_uniform_buffer_offset_align);
auto buf = static_cast<s32*>(mapping.first);
*buf = upload_info.vertex_index_base;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, buf, upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
m_vertex_layout_buffer->bind_range(1, mapping.second, 128 + 16);
}
if (update_transform_constants)
{
// Vertex constants
mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
vertex_constants_offset = mapping.second;
auto mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_vertex_program_constants_data(buf);
m_transform_constants_buffer->bind_range(2, mapping.second, 8192);
}
// Fragment constants
mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_buffer_size, m_uniform_buffer_offset_align);
buf = static_cast<u8*>(mapping.first);
fragment_constants_offset = mapping.second;
if (fragment_constants_size)
if (update_fragment_constants)
{
// Fragment constants
auto mapping = m_fragment_constants_buffer->alloc_from_heap(fragment_constants_size, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
current_fragment_program, gl::get_driver_caps().vendor_NVIDIA);
m_fragment_constants_buffer->bind_range(3, mapping.second, fragment_constants_size);
}
if (update_fragment_env)
{
// Fragment state
fill_fragment_state_buffer(buf + fragment_constants_size, current_fragment_program);
auto mapping = m_fragment_env_buffer->alloc_from_heap(32, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_fragment_state_buffer(buf, current_fragment_program);
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);
m_fragment_env_buffer->bind_range(4, mapping.second, 32);
}
if (update_transform_constants) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192);
if (update_fragment_texture_env)
{
// Fragment texture parameters
auto mapping = m_texture_parameters_buffer->alloc_from_heap(256, m_uniform_buffer_offset_align);
auto buf = static_cast<u8*>(mapping.first);
fill_fragment_texture_parameters(buf, current_fragment_program);
m_texture_parameters_buffer->bind_range(5, mapping.second, 256);
}
if (manually_flush_ring_buffers)
{
m_vertex_state_buffer->unmap();
m_fragment_constants_buffer->unmap();
if (update_fragment_env) m_fragment_env_buffer->unmap();
if (update_vertex_env) m_vertex_env_buffer->unmap();
if (update_fragment_texture_env) m_texture_parameters_buffer->unmap();
if (update_fragment_constants) m_fragment_constants_buffer->unmap();
if (update_transform_constants) m_transform_constants_buffer->unmap();
m_vertex_layout_buffer->unmap();
}
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty);
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
m_graphics_state &= ~handled_flags;
}

View File

@ -294,7 +294,10 @@ private:
std::unique_ptr<gl::ring_buffer> m_attrib_ring_buffer;
std::unique_ptr<gl::ring_buffer> m_fragment_constants_buffer;
std::unique_ptr<gl::ring_buffer> m_transform_constants_buffer;
std::unique_ptr<gl::ring_buffer> m_vertex_state_buffer;
std::unique_ptr<gl::ring_buffer> m_fragment_env_buffer;
std::unique_ptr<gl::ring_buffer> m_vertex_env_buffer;
std::unique_ptr<gl::ring_buffer> m_texture_parameters_buffer;
std::unique_ptr<gl::ring_buffer> m_vertex_layout_buffer;
std::unique_ptr<gl::ring_buffer> m_index_ring_buffer;
// Identity buffer used to fix broken gl_VertexID on ATI stack

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "Emu/System.h"
#include "GLVertexProgram.h"
@ -37,11 +37,15 @@ void GLVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << " ivec4 user_clip_enabled[2];\n";
OS << " vec4 user_clip_factor[2];\n";
OS << " uint transform_branch_bits;\n";
OS << " uint vertex_base_index;\n";
OS << " float point_size;\n";
OS << " float z_near;\n";
OS << " float z_far;\n";
OS << " ivec4 input_attributes[16];\n";
OS << "};\n\n";
OS << "layout(std140, binding = 1) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " ivec2 input_attributes[16];\n";
OS << "};\n\n";
}
@ -53,7 +57,7 @@ void GLVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
void GLVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
{
OS << "layout(std140, binding = 1) uniform VertexConstantsBuffer\n";
OS << "layout(std140, binding = 2) uniform VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 vc[468];\n";
OS << "};\n\n";

View File

@ -129,7 +129,7 @@ namespace rsx
m_prefetcher_busy.store(true);
read_ahead(m_prefetcher_info, m_prefetched_queue, m_prefetch_get);
//optimize(m_prefetcher_info, m_prefetched_queue);
optimize(m_prefetcher_info, m_prefetched_queue);
m_prefetcher_busy.store(false);
m_prefetch_mutex.unlock();
@ -206,7 +206,9 @@ namespace rsx
info.start_loc = get_pointer;
info.num_draw_calls = 0;
info.draw_call_distance_weight = 0;
u32 cmd;
u32 count;
while (true)
{
@ -218,8 +220,6 @@ namespace rsx
// Validate put and get registers before reading the command
// TODO: Who should handle graphics exceptions??
u32 cmd;
if (u32 addr = RSXIOMem.RealAddr(get_pointer))
{
cmd = vm::read32(addr);
@ -230,6 +230,8 @@ namespace rsx
break;
}
if (UNLIKELY(cmd & 0xe0030003))
{
if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD ||
(cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD ||
(cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD ||
@ -239,8 +241,8 @@ namespace rsx
commands.push_back({ cmd, 0, get_pointer });
break;
}
if ((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD)
}
else if (UNLIKELY((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD))
{
if (commands.empty() || commands.back().reg != RSX_METHOD_NOP_CMD)
{
@ -251,49 +253,50 @@ namespace rsx
get_pointer += 4;
continue;
}
if (cmd & 0x3)
else if (UNLIKELY(cmd & 0x3))
{
// Malformed command, optional recovery
break;
}
u32 count = (cmd >> 18) & 0x7ff;
//Validate the args ptr if the command attempts to read from it
auto args = vm::ptr<u32>::make(RSXIOMem.RealAddr(get_pointer + 4));
if (!args && count)
if (UNLIKELY(!args))
{
// Optional recovery
break;
}
count = (cmd >> 18) & 0x7ff;
if (count > 1)
{
// Stop command execution if put will be equal to get ptr during the execution itself
if (count * 4 + 4 > put - get_pointer)
if (UNLIKELY(count * 4 + 4 > put - get_pointer))
{
count = (put - get_pointer) / 4 - 1;
}
if (count > 1)
{
// Queue packet header
commands.push_back({ FIFO_PACKET_BEGIN, count, get_pointer });
const bool no_increment = (cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD;
u32 reg = cmd & 0xfffc;
get_pointer += 4; // First executed command is at data[0]
// First executed command is at data[0]
get_pointer += 4;
if (UNLIKELY((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD))
{
const u32 reg = cmd & 0xfffc;
for (u32 i = 0; i < count; i++, get_pointer += 4)
{
commands.push_back({ reg, args[i], get_pointer });
if (reg == (NV4097_SET_BEGIN_END << 2))
{
info.num_draw_calls++;
}
if (!no_increment) reg += 4;
}
else
{
u32 reg = cmd & 0xfffc;
for (u32 i = 0; i < count; i++, get_pointer += 4, reg += 4)
{
commands.push_back({ reg, args[i], get_pointer });
}
}
}
else
@ -315,15 +318,14 @@ namespace rsx
}
info.length = get_pointer - info.start_loc;
if (!info.num_draw_calls)
if (info.num_draw_calls < 2)
{
return;
}
info.num_draw_calls /= 2; // Begin+End pairs
//info.draw_call_distance_weight = info.length / info.num_draw_calls;
}
#pragma optimize("", on)
void FIFO_control::report_branch_hit(u32 source, u32 target)
{
const auto range = m_branch_prediction_table.equal_range(source);
@ -507,7 +509,7 @@ namespace rsx
if (queue_size > 0)
{
if (m_internal_get != m_ctrl->get)
if (UNLIKELY(m_internal_get != m_ctrl->get))
{
// Control register changed
registers_changed = true;
@ -545,7 +547,7 @@ namespace rsx
}
}
verify(HERE), m_queue.empty();
//verify(HERE), m_queue.empty();
if (m_ctrl->put == m_ctrl->get)
{
@ -573,7 +575,7 @@ namespace rsx
}
// Lock to disable the prefetcher
if (!m_prefetch_mutex.try_lock())
if (0)//!m_prefetch_mutex.try_lock())
{
return busy_cmd;
}
@ -601,13 +603,13 @@ namespace rsx
{
m_internal_get = m_ctrl->get;
read_ahead(m_fifo_info, m_queue, m_internal_get);
//optimize(m_fifo_info, m_queue);
optimize(m_fifo_info, m_queue);
m_ctrl->get = m_internal_get;
m_ctrl_tag++;
}
m_prefetch_mutex.unlock();
//m_prefetch_mutex.unlock();
if (!m_queue.empty())
{
@ -656,49 +658,58 @@ namespace rsx
// Vertex
{ NV4097_SET_VERTEX_DATA_ARRAY_FORMAT, 16 },
{ NV4097_SET_VERTEX_DATA_ARRAY_OFFSET, 16 },
// Raster
{ NV4097_SET_ALPHA_TEST_ENABLE, 1 },
{ NV4097_SET_ALPHA_FUNC, 1 },
{ NV4097_SET_ALPHA_REF, 1 },
{ NV4097_SET_FRONT_FACE, 1 },
};
for (u32 reg = 0; reg < m_skippable_registers.size(); ++reg)
const std::pair<u32, u32> ignorable_ranges[] =
{
bool _continue = false;
for (const auto &method : skippable_ranges)
{
if (reg < method.first)
break;
// General
{ NV4097_INVALIDATE_VERTEX_FILE, 3 }, // PSLight clears VERTEX_FILE[0-2]
{ NV4097_INVALIDATE_VERTEX_CACHE_FILE, 1 },
{ NV4097_INVALIDATE_L2, 1 },
{ NV4097_INVALIDATE_ZCULL, 1 },
// FIFO
{ (FIFO_DISABLED_COMMAND >> 2), 1},
{ (FIFO_PACKET_BEGIN >> 2), 1 },
{ (FIFO_DRAW_BARRIER >> 2), 1 },
// ROP
{ NV4097_SET_ALPHA_FUNC, 1 },
{ NV4097_SET_ALPHA_REF, 1 },
{ NV4097_SET_ALPHA_TEST_ENABLE, 1 },
{ NV4097_SET_ANTI_ALIASING_CONTROL, 1 },
// Program
{ NV4097_SET_SHADER_PACKER, 1 },
{ NV4097_SET_SHADER_WINDOW, 1 },
// Vertex data offsets
{ NV4097_SET_VERTEX_DATA_BASE_OFFSET, 1 },
{ NV4097_SET_VERTEX_DATA_BASE_INDEX, 1 }
};
if (reg - method.first < method.second)
{
// Safe to ignore if value has not changed
m_skippable_registers[reg] = true;
_continue = true;
break;
}
}
if (_continue)
continue;
m_skippable_registers[reg] = false;
}
std::fill(m_register_properties.begin(), m_register_properties.end(), 0u);
for (const auto &method : skippable_ranges)
{
for (int subreg = 0; subreg < method.second; ++subreg)
for (int i = 0; i < method.second; ++i)
{
// Safe to ignore if value has not changed
verify(HERE), m_skippable_registers[subreg] = true;
m_register_properties[method.first + i] = register_props::skippable;
}
}
for (const auto &method : ignorable_ranges)
{
for (int i = 0; i < method.second; ++i)
{
m_register_properties[method.first + i] |= register_props::ignorable;
}
}
}
void flattening_pass::optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers)
{
__unused(info);
if (info.num_draw_calls < 20)
{
// Not enough draw calls
return;
}
#if (ENABLE_OPTIMIZATION_DEBUGGING)
auto copy = commands;
@ -750,31 +761,14 @@ namespace rsx
for (auto &command : commands)
{
//LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(command.reg), command.value);
bool flush_commands_flag = has_deferred_call;
bool execute_method_flag = true;
const auto reg = command.reg >> 2;
const auto value = command.value;
switch (reg)
{
case NV4097_INVALIDATE_VERTEX_FILE: // PSLight clears VERTEX_FILE[0-2]
case NV4097_PIPE_NOP:
case NV4097_INVALIDATE_VERTEX_FILE + 2:
case NV4097_INVALIDATE_VERTEX_CACHE_FILE:
case NV4097_INVALIDATE_L2:
case NV4097_INVALIDATE_ZCULL:
case (FIFO_DISABLED_COMMAND >> 2):
case (FIFO_PACKET_BEGIN >> 2):
case (FIFO_DRAW_BARRIER >> 2):
case (FIFO_EMPTY >> 2):
case (FIFO_BUSY >> 2):
{
// Ignore these completely
flush_commands_flag = false;
break;
}
case NV4097_SET_BEGIN_END:
{
if (value && value != deferred_primitive_type)
@ -788,47 +782,50 @@ namespace rsx
has_deferred_call = true;
flush_commands_flag = false;
execute_method_flag = false;
// TODO: If END, insert draw barrier
}
break;
}
case NV4097_DRAW_ARRAYS:
{
if (has_deferred_call)
{
const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none)
break;
flush_commands_flag = false;
}
break;
}
case NV4097_DRAW_INDEX_ARRAY:
{
if (has_deferred_call)
{
const auto cmd = method_registers.current_draw_clause.command;
if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none)
break;
flush_commands_flag = false;
break;
}
case NV4097_SET_VERTEX_DATA_BASE_INDEX:
case NV4097_SET_VERTEX_DATA_BASE_OFFSET:
{
// These can be executed when emitting geometry
flush_commands_flag = false;
break;
}
default:
{
// Hopefully this is skippable so the batch can keep growing
if (reg >= m_skippable_registers.size())
if (reg >= m_register_properties.size())
{
// Likely flow control, unskippable
// Flow control or special command
break;
}
if (m_skippable_registers[reg])
const auto properties = m_register_properties[reg];
if (properties & register_props::ignorable)
{
// These have no effect on rendering behavior or can be handled within begin/end
flush_commands_flag = false;
break;
}
if (properties & register_props::skippable)
{
if (has_deferred_call)
{
@ -840,9 +837,10 @@ namespace rsx
break;
}
}
}
set_register(reg, value);
}
break;
}
}
@ -1211,7 +1209,7 @@ namespace rsx
return;
}
if (cmd == FIFO::FIFO_EMPTY || !Emu.IsRunning())
if (cmd == FIFO::FIFO_EMPTY)
{
if (performance_counters.state == FIFO_state::running)
{
@ -1219,7 +1217,6 @@ namespace rsx
performance_counters.state = FIFO_state::empty;
}
std::this_thread::yield();
return;
}
@ -1227,7 +1224,7 @@ namespace rsx
// TODO: Who should handle graphics exceptions??
if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD)
{
u32 offs = cmd & 0x1ffffffc;
const u32 offs = cmd & 0x1ffffffc;
if (offs == command.loc)
{
//Jump to self. Often preceded by NOP
@ -1245,7 +1242,7 @@ namespace rsx
}
if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD)
{
u32 offs = cmd & 0xfffffffc;
const u32 offs = cmd & 0xfffffffc;
if (offs == command.loc)
{
//Jump to self. Often preceded by NOP
@ -1271,8 +1268,7 @@ namespace rsx
return;
}
u32 offs = cmd & 0xfffffffc;
//LOG_WARNING(RSX, "rsx call(0x%x) #0x%x - 0x%x", offs, cmd, get);
const u32 offs = cmd & 0xfffffffc;
m_return_addr = command.loc + 4;
fifo_ctrl->set_get(offs);
return;
@ -1286,7 +1282,6 @@ namespace rsx
return;
}
//LOG_WARNING(RSX, "rsx return(0x%x)", get);
fifo_ctrl->set_get(m_return_addr);
m_return_addr = -1;
return;

View File

@ -47,7 +47,7 @@ namespace rsx
u32 start_loc;
u32 length;
u32 num_draw_calls;
u32 draw_call_distance_weight;
u32 reserved;
};
struct branch_target_info_t
@ -67,7 +67,13 @@ namespace rsx
struct flattening_pass : public optimization_pass
{
private:
std::array<bool, 0x10000 / 4> m_skippable_registers;
enum register_props : u8
{
skippable = 1,
ignorable = 2
};
std::array<u8, 0x10000 / 4> m_register_properties;
public:
flattening_pass();

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "GCM.h"
#include "RSXTexture.h"
@ -230,7 +230,7 @@ struct RSXFragmentProgram
bool front_color_specular_output : 1;
u32 texture_dimensions;
std::array<float, 4> texture_scale[16];
float texture_scale[16][4];
u8 textures_alpha_kill[16];
u8 textures_zfunc[16];

View File

@ -419,6 +419,15 @@ namespace rsx
conditional_render_test_address = 0;
}
if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty)
{
// Request for update of fragment constants if the program block is invalidated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
// Request for update of texture parameters if the program is likely to have changed
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
}
in_begin_end = true;
}
@ -545,7 +554,7 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!!
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
@ -640,13 +649,20 @@ namespace rsx
while (external_interrupt_lock.load()) _mm_pause();
}
// Idle if emulation paused
if (Emu.IsPaused())
{
std::this_thread::sleep_for(1ms);
continue;
}
// Execute backend-local tasks first
do_local_task(performance_counters.state);
// Update sub-units
zcull_ctrl->update(this);
// Execite FIFO queue
// Execute FIFO queue
run_FIFO();
}
}
@ -716,8 +732,9 @@ namespace rsx
rsx::method_registers.clip_plane_5_enabled(),
};
s32 clip_enabled_flags[8] = {};
f32 clip_distance_factors[8] = {};
u8 data_block[64];
s32* clip_enabled_flags = reinterpret_cast<s32*>(data_block);
f32* clip_distance_factors = reinterpret_cast<f32*>(data_block + 32);
for (int index = 0; index < 6; ++index)
{
@ -743,8 +760,7 @@ namespace rsx
}
}
memcpy(buffer, clip_enabled_flags, 32);
memcpy((char*)buffer + 32, clip_distance_factors, 32);
memcpy(buffer, data_block, 2 * 8 * sizeof(u32));
}
/**
@ -814,16 +830,11 @@ namespace rsx
u32 *dst = static_cast<u32*>(buffer);
stream_vector(dst, (u32&)fog0, (u32&)fog1, rop_control, (u32&)alpha_ref);
stream_vector(dst + 4, alpha_func, fog_mode, (u32&)wpos_scale, (u32&)wpos_bias);
size_t offset = 8;
for (int index = 0; index < 16; ++index)
{
stream_vector(&dst[offset],
(u32&)fragment_program.texture_scale[index][0], (u32&)fragment_program.texture_scale[index][1],
(u32&)fragment_program.texture_scale[index][2], (u32&)fragment_program.texture_scale[index][3]);
offset += 4;
}
void thread::fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program)
{
memcpy(buffer, fragment_program.texture_scale, 16 * 4 * sizeof(float));
}
void thread::write_inline_array_to_buffer(void *dst_buffer)
@ -2021,14 +2032,22 @@ namespace rsx
}
// Fill the data
// Each descriptor field is 64 bits wide
// [0-8] attribute stride\n"
// [8-20] attribute divisor\n"
// [20-21] swap bytes flag\n"
// [21-22] volatile flag\n"
// [22-24] frequency op\n"
// [24-27] attribute type\n"
// [27-30] attribute size\n"
memset(buffer, 0, 256);
const s32 swap_storage_mask = (1 << 8);
const s32 volatile_storage_mask = (1 << 9);
const s32 default_frequency_mask = (1 << 10);
const s32 repeating_frequency_mask = (3 << 10);
const s32 input_function_modulo_mask = (1 << 12);
const s32 input_divisor_mask = (0xFFFF << 13);
const s32 swap_storage_mask = (1 << 20);
const s32 volatile_storage_mask = (1 << 21);
const s32 default_frequency_mask = (1 << 22);
const s32 division_op_frequency_mask = (2 << 22);
const s32 modulo_op_frequency_mask = (3 << 22);
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
@ -2114,11 +2133,14 @@ namespace rsx
}
default:
{
if (modulo_mask & (1 << index))
attributes |= input_function_modulo_mask;
verify(HERE), frequency <= 4095u;
attributes |= repeating_frequency_mask;
attributes |= (frequency << 13) & input_divisor_mask;
if (modulo_mask & (1 << index))
attributes |= modulo_op_frequency_mask;
else
attributes |= division_op_frequency_mask;
attributes |= (frequency << 8);
break;
}
}
@ -2144,10 +2166,11 @@ namespace rsx
if (to_swap_bytes) attributes |= swap_storage_mask;
buffer[index * 4 + 0] = static_cast<s32>(type);
buffer[index * 4 + 1] = size;
buffer[index * 4 + 2] = offset_in_block[index];
buffer[index * 4 + 3] = attributes;
attributes |= (static_cast<s32>(type) << 24);
attributes |= (size << 27);
buffer[index * 4 + 0] = attributes;
buffer[index * 4 + 1] = offset_in_block[index];
}
}
@ -2326,6 +2349,9 @@ namespace rsx
{
zcull_ctrl->sync(this);
// Fragment constants may have been updated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
//TODO: On sync every sub-unit should finish any pending tasks
//Might cause zcull lockup due to zombie 'unclaimed reports' which are not forcefully removed currently
//verify (HERE), async_tasks_pending.load() == 0;

View File

@ -88,18 +88,21 @@ namespace rsx
context_clear_all = context_clear_color | context_clear_depth
};
enum pipeline_state : u8
enum pipeline_state : u32
{
fragment_program_dirty = 1,
vertex_program_dirty = 2,
fragment_state_dirty = 4,
vertex_state_dirty = 8,
transform_constants_dirty = 16,
framebuffer_reads_dirty = 32,
fragment_program_dirty = 0x1, // Fragment program changed
vertex_program_dirty = 0x2, // Vertex program changed
fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc)
vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = 0x10, // Transform constants changed
fragment_constants_dirty = 0x20, // Fragment constants changed
framebuffer_reads_dirty = 0x40, // Framebuffer contents changed
fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed
vertex_texture_state_dirty = 0x80, // Fragment texture parameters changed
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
memory_barrier_bits = framebuffer_reads_dirty,
all_dirty = 255
all_dirty = -1u
};
enum FIFO_state : u8
@ -641,6 +644,11 @@ namespace rsx
*/
void fill_fragment_state_buffer(void *buffer, const RSXFragmentProgram &fragment_program);
/**
* Fill buffer with fragment texture parameter constants (texture matrix)
*/
void fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program);
/**
* Write inlined array data to buffer.
* The storage of inlined data looks different from memory stored arrays.

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "Emu/Memory/vm.h"
#include "Emu/System.h"
#include "VKFragmentProgram.h"
@ -144,9 +144,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
}
}
OS << "layout(std140, set = 0, binding = 2) uniform FragmentConstantsBuffer\n";
OS << "{\n";
std::string constants_block;
for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM])
{
if (PT.type == "sampler1D" ||
@ -156,9 +154,21 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
continue;
for (const ParamItem& PI : PT.items)
OS << " " << PT.type << " " << PI.name << ";\n";
{
constants_block += " " + PT.type + " " + PI.name + ";\n";
}
}
if (!constants_block.empty())
{
OS << "layout(std140, set = 0, binding = 3) uniform FragmentConstantsBuffer\n";
OS << "{\n";
OS << constants_block;
OS << "};\n\n";
}
OS << "layout(std140, set = 0, binding = 4) uniform FragmentStateBuffer\n";
OS << "{\n";
OS << " float fog_param0;\n";
OS << " float fog_param1;\n";
OS << " uint rop_control;\n";
@ -167,15 +177,26 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS)
OS << " uint fog_mode;\n";
OS << " float wpos_scale;\n";
OS << " float wpos_bias;\n";
OS << "};\n\n";
OS << "layout(std140, set = 0, binding = 5) uniform TextureParametersBuffer\n";
OS << "{\n";
OS << " vec4 texture_parameters[16];\n";
OS << "};\n";
OS << "};\n\n";
vk::glsl::program_input in;
in.location = FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT;
in.domain = glsl::glsl_fragment_program;
in.name = "FragmentConstantsBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = FRAGMENT_STATE_BIND_SLOT;
in.name = "FragmentStateBuffer";
inputs.push_back(in);
in.location = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT;
in.name = "TextureParametersBuffer";
inputs.push_back(in);
}

View File

@ -433,11 +433,12 @@ namespace
std::tuple<VkPipelineLayout, VkDescriptorSetLayout> get_shared_pipeline_layout(VkDevice dev)
{
std::array<VkDescriptorSetLayoutBinding, 39> bindings = {};
std::array<VkDescriptorSetLayoutBinding, VK_NUM_DESCRIPTOR_BINDINGS> bindings = {};
size_t idx = 0;
// Vertex buffer
for (int i = 0; i < 16; i++)
// Vertex stream, one stream for cacheable data, one stream for transient data
for (int i = 0; i < 2; i++)
{
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
bindings[idx].descriptorCount = 1;
@ -453,6 +454,20 @@ namespace
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_STATE_BIND_SLOT;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[idx].binding = FRAGMENT_TEXTURE_PARAMS_BIND_SLOT;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
@ -460,7 +475,21 @@ namespace
idx++;
for (int i = 0; i < 16; i++)
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = VERTEX_LAYOUT_BIND_SLOT;
idx++;
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = VERTEX_PARAMS_BIND_SLOT;
idx++;
for (int i = 0; i < rsx::limits::fragment_textures_count; i++)
{
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[idx].descriptorCount = 1;
@ -469,7 +498,7 @@ namespace
idx++;
}
for (int i = 0; i < 4; i++)
for (int i = 0; i < rsx::limits::vertex_textures_count; i++)
{
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[idx].descriptorCount = 1;
@ -478,10 +507,7 @@ namespace
idx++;
}
bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
bindings[idx].descriptorCount = 1;
bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
bindings[idx].binding = SCALE_OFFSET_BIND_SLOT;
verify(HERE), idx == VK_NUM_DESCRIPTOR_BINDINGS;
VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
@ -619,7 +645,11 @@ VKGSRender::VKGSRender() : GSRender()
//VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer");
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
@ -719,10 +749,14 @@ VKGSRender::~VKGSRender()
vk::destroy_global_resources();
//Heaps
m_index_buffer_ring_info.destroy();
m_uniform_buffer_ring_info.destroy();
m_transform_constants_ring_info.destroy();
m_attrib_ring_info.destroy();
m_fragment_env_ring_info.destroy();
m_vertex_env_ring_info.destroy();
m_fragment_texture_params_ring_info.destroy();
m_vertex_layout_ring_info.destroy();
m_fragment_constants_ring_info.destroy();
m_transform_constants_ring_info.destroy();
m_index_buffer_ring_info.destroy();
m_texture_upload_buffer_ring_info.destroy();
//Fallback bindables
@ -938,7 +972,11 @@ void VKGSRender::check_heap_status()
{
if (m_attrib_ring_info.is_critical() ||
m_texture_upload_buffer_ring_info.is_critical() ||
m_uniform_buffer_ring_info.is_critical() ||
m_fragment_env_ring_info.is_critical() ||
m_vertex_env_ring_info.is_critical() ||
m_fragment_texture_params_ring_info.is_critical() ||
m_vertex_layout_ring_info.is_critical() ||
m_fragment_constants_ring_info.is_critical() ||
m_transform_constants_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical())
{
@ -963,7 +1001,11 @@ void VKGSRender::check_heap_status()
m_vertex_cache->purge();
m_index_buffer_ring_info.reset_allocation_stats();
m_uniform_buffer_ring_info.reset_allocation_stats();
m_fragment_env_ring_info.reset_allocation_stats();
m_vertex_env_ring_info.reset_allocation_stats();
m_fragment_texture_params_ring_info.reset_allocation_stats();
m_vertex_layout_ring_info.reset_allocation_stats();
m_fragment_constants_ring_info.reset_allocation_stats();
m_transform_constants_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats();
@ -1161,15 +1203,10 @@ void VKGSRender::emit_geometry(u32 sub_index)
if (sub_index == 0)
{
// Load program execution environment
load_program_env(upload_info);
update_descriptors = true;
}
else
{
// Update vertex fetch environment
update_vertex_env(upload_info);
if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
{
/* VkDescriptorSetAllocateInfo alloc_info = {};
@ -1192,10 +1229,13 @@ void VKGSRender::emit_geometry(u32 sub_index)
}
}
// Update vertex fetch parameters
update_vertex_env(upload_info);
if (update_descriptors)
{
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(persistent_buffer, vk::glsl::program_input_type::input_type_texel_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, vk::glsl::program_input_type::input_type_texel_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
}
@ -1203,8 +1243,6 @@ void VKGSRender::emit_geometry(u32 sub_index)
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
begin_render_pass();
if (!upload_info.index_info)
{
if (draw_call.is_single_draw())
@ -1247,8 +1285,6 @@ void VKGSRender::emit_geometry(u32 sub_index)
}
}
close_render_pass();
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
}
@ -1503,6 +1539,9 @@ void VKGSRender::end()
return;
}
// Load program execution environment
load_program_env();
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
@ -1608,6 +1647,7 @@ void VKGSRender::end()
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
update_draw_state();
begin_render_pass();
u32 sub_index = 0;
rsx::method_registers.current_draw_clause.begin();
@ -1617,6 +1657,7 @@ void VKGSRender::end()
}
while (rsx::method_registers.current_draw_clause.next());
close_render_pass();
vk::leave_uninterruptible();
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
@ -2022,7 +2063,11 @@ void VKGSRender::advance_queued_frames()
m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_uniform_buffer_ring_info.get_current_put_pos_minus_one(),
m_vertex_env_ring_info.get_current_put_pos_minus_one(),
m_fragment_env_ring_info.get_current_put_pos_minus_one(),
m_vertex_layout_ring_info.get_current_put_pos_minus_one(),
m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(),
m_fragment_constants_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
@ -2148,14 +2193,22 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
//Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtxconst_heap_ptr;
m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr;
m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr;
m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr;
m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr;
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify();
m_uniform_buffer_ring_info.notify();
m_vertex_env_ring_info.notify();
m_fragment_env_ring_info.notify();
m_fragment_constants_ring_info.notify();
m_transform_constants_ring_info.notify();
m_vertex_layout_ring_info.notify();
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify();
}
@ -2482,107 +2535,119 @@ bool VKGSRender::load_program()
return m_program != nullptr;
}
void VKGSRender::load_program_env(const vk::vertex_upload_info& vertex_info)
void VKGSRender::load_program_env()
{
if (!m_program)
{
fmt::throw_exception("Unreachable right now" HERE);
}
if (1)//m_graphics_state & (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty))
const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length;
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty);
const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty);
const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty);
const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty);
if (update_vertex_env)
{
const size_t fragment_constants_sz = current_fp_metadata.program_constants_buffer_length;
const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float));
const size_t required_mem = 512 + fragment_buffer_sz;
const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem);
const size_t fragment_constants_offset = vertex_state_offset + 512;
//We do this in one go
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem);
// Vertex state
const auto mem = m_vertex_env_ring_info.alloc<256>(256);
auto buf = (u8*)m_vertex_env_ring_info.map(mem, 160);
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = vertex_info.vertex_index_base;
*(reinterpret_cast<u32*>(buf + 132)) = 0; // Reserved
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160),
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
//Fragment constants
buf = buf + 512;
if (fragment_constants_sz)
{
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) },
current_fragment_program, vk::sanitize_fp_values());
m_vertex_env_ring_info.unmap();
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 160 };
}
fill_fragment_state_buffer(buf + fragment_constants_sz, current_fragment_program);
m_uniform_buffer_ring_info.unmap();
m_vertex_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 };
m_fragment_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz };
}
if (m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
if (update_transform_constants)
{
//Vertex constants
const size_t vertex_constants_offset = m_transform_constants_ring_info.alloc<256>(8192);
auto buf = m_transform_constants_ring_info.map(vertex_constants_offset, 8192);
// Transform constants
auto mem = m_transform_constants_ring_info.alloc<256>(8192);
auto buf = m_transform_constants_ring_info.map(mem, 8192);
fill_vertex_program_constants_data(buf);
m_transform_constants_ring_info.unmap();
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, vertex_constants_offset, 8192 };
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, mem, 8192 };
}
if (1)//m_graphics_state || old_program != m_program)
if (update_fragment_constants)
{
m_program->bind_uniform(m_vertex_state_buffer_info, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set);
// Fragment constants
if (fragment_constants_size)
{
auto mem = m_fragment_constants_ring_info.alloc<256>(fragment_constants_size);
auto buf = m_fragment_constants_ring_info.map(mem, fragment_constants_size);
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_size) },
current_fragment_program, vk::sanitize_fp_values());
m_fragment_constants_ring_info.unmap();
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, mem, fragment_constants_size };
}
else
{
m_fragment_constants_buffer_info = { m_fragment_constants_ring_info.heap->value, 0, VK_WHOLE_SIZE };
}
}
if (update_fragment_env)
{
auto mem = m_fragment_env_ring_info.alloc<256>(256);
auto buf = m_fragment_env_ring_info.map(mem, 32);
fill_fragment_state_buffer(buf, current_fragment_program);
m_fragment_env_ring_info.unmap();
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, mem, 32 };
}
if (update_fragment_texture_env)
{
auto mem = m_fragment_texture_params_ring_info.alloc<256>(256);
auto buf = m_fragment_texture_params_ring_info.map(mem, 256);
fill_fragment_texture_parameters(buf, current_fragment_program);
m_fragment_texture_params_ring_info.unmap();
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, mem, 256 };
}
//if (1)
{
m_program->bind_uniform(m_vertex_env_buffer_info, VERTEX_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_state_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_constants_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_env_buffer_info, FRAGMENT_STATE_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_texture_params_buffer_info, FRAGMENT_TEXTURE_PARAMS_BIND_SLOT, m_current_frame->descriptor_set);
}
//Clear flags
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty);
const u32 handled_flags = (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty | rsx::pipeline_state::transform_constants_dirty | rsx::pipeline_state::fragment_constants_dirty | rsx::pipeline_state::fragment_texture_state_dirty);
m_graphics_state &= ~handled_flags;
}
void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
{
// Vertex base index = vertex_offset + 132
// Vertex layout = vertex_offset + 160
auto mem = m_vertex_layout_ring_info.alloc<256>(256);
auto buf = (u32*)m_vertex_layout_ring_info.map(mem, 128 + 16);
std::array<s32, 16 * 4> vertex_layout;
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, vertex_layout.data(),
*buf = vertex_info.vertex_index_base;
buf += 4;
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, (s32*)buf,
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
m_vertex_layout_ring_info.unmap();
m_vertex_layout_buffer_info = { m_vertex_layout_ring_info.heap->value, mem, 128 + 16 };
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset + 132, 4, &vertex_info.vertex_index_base);
u32 write_offset = m_vertex_state_buffer_info.offset + 160;
s32 *src_ptr = vertex_layout.data();
for (const auto& placement : m_vertex_layout.attribute_placement)
{
constexpr u32 data_len = 4 * sizeof(s32);
if (placement != rsx::attribute_buffer_placement::none)
{
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, write_offset, data_len, src_ptr);
}
write_offset += data_len;
src_ptr += 4;
}
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT);
m_program->bind_uniform(m_vertex_layout_buffer_info, VERTEX_LAYOUT_BIND_SLOT, m_current_frame->descriptor_set);
}
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading)
@ -2633,7 +2698,11 @@ void VKGSRender::write_buffers()
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{
if (m_attrib_ring_info.dirty() ||
m_uniform_buffer_ring_info.dirty() ||
m_fragment_env_ring_info.dirty() ||
m_vertex_env_ring_info.dirty() ||
m_fragment_texture_params_ring_info.dirty() ||
m_vertex_layout_ring_info.dirty() ||
m_fragment_constants_ring_info.dirty() ||
m_index_buffer_ring_info.dirty() ||
m_transform_constants_ring_info.dirty() ||
m_texture_upload_buffer_ring_info.dirty())
@ -2642,7 +2711,11 @@ void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore>
m_secondary_command_buffer.begin();
m_attrib_ring_info.sync(m_secondary_command_buffer);
m_uniform_buffer_ring_info.sync(m_secondary_command_buffer);
m_fragment_env_ring_info.sync(m_secondary_command_buffer);
m_vertex_env_ring_info.sync(m_secondary_command_buffer);
m_fragment_texture_params_ring_info.sync(m_secondary_command_buffer);
m_vertex_layout_ring_info.sync(m_secondary_command_buffer);
m_fragment_constants_ring_info.sync(m_secondary_command_buffer);
m_index_buffer_ring_info.sync(m_secondary_command_buffer);
m_transform_constants_ring_info.sync(m_secondary_command_buffer);
m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer);

View File

@ -36,8 +36,9 @@ namespace vk
//NOTE: Texture uploads can be huge, up to 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 64
#define VK_UBO_RING_BUFFER_SIZE_M 16
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64
#define VK_FRAGMENT_CONSTANTS_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_MAX_ASYNC_CB_COUNT 64
@ -161,8 +162,12 @@ struct frame_context_t
//Heap pointers
s64 attrib_heap_ptr = 0;
s64 ubo_heap_ptr = 0;
s64 vtxconst_heap_ptr = 0;
s64 vtx_env_heap_ptr = 0;
s64 frag_env_heap_ptr = 0;
s64 frag_const_heap_ptr = 0;
s64 vtx_const_heap_ptr = 0;
s64 vtx_layout_heap_ptr = 0;
s64 frag_texparam_heap_ptr = 0;
s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0;
@ -177,9 +182,13 @@ struct frame_context_t
used_descriptors = other.used_descriptors;
attrib_heap_ptr = other.attrib_heap_ptr;
ubo_heap_ptr = other.attrib_heap_ptr;
vtxconst_heap_ptr = other.vtxconst_heap_ptr;
index_heap_ptr = other.attrib_heap_ptr;
vtx_env_heap_ptr = other.vtx_env_heap_ptr;
frag_env_heap_ptr = other.frag_env_heap_ptr;
vtx_layout_heap_ptr = other.vtx_layout_heap_ptr;
frag_texparam_heap_ptr = other.frag_texparam_heap_ptr;
frag_const_heap_ptr = other.frag_const_heap_ptr;
vtx_const_heap_ptr = other.vtx_const_heap_ptr;
index_heap_ptr = other.index_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr;
}
@ -190,11 +199,15 @@ struct frame_context_t
std::swap(samplers_to_clean, other.samplers_to_clean);
}
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
void tag_frame_end(s64 attrib_loc, s64 vtxenv_loc, s64 fragenv_loc, s64 vtxlayout_loc, s64 fragtex_loc, s64 fragconst_loc,s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
{
attrib_heap_ptr = attrib_loc;
ubo_heap_ptr = ubo_loc;
vtxconst_heap_ptr = vtxconst_loc;
vtx_env_heap_ptr = vtxenv_loc;
frag_env_heap_ptr = fragenv_loc;
vtx_layout_heap_ptr = vtxlayout_loc;
frag_texparam_heap_ptr = fragtex_loc;
frag_const_heap_ptr = fragconst_loc;
vtx_const_heap_ptr = vtxconst_loc;
index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc;
@ -326,15 +339,22 @@ private:
u64 m_last_heap_sync_time = 0;
u32 m_texbuffer_view_size = 0;
vk::vk_data_heap m_attrib_ring_info;
vk::vk_data_heap m_uniform_buffer_ring_info;
vk::vk_data_heap m_transform_constants_ring_info;
vk::vk_data_heap m_index_buffer_ring_info;
vk::vk_data_heap m_texture_upload_buffer_ring_info;
vk::data_heap m_attrib_ring_info; // Vertex data
vk::data_heap m_fragment_constants_ring_info; // Fragment program constants
vk::data_heap m_transform_constants_ring_info; // Transform program constants
vk::data_heap m_fragment_env_ring_info; // Fragment environment params
vk::data_heap m_vertex_env_ring_info; // Vertex environment params
vk::data_heap m_fragment_texture_params_ring_info; // Fragment texture params
vk::data_heap m_vertex_layout_ring_info; // Vertex layout structure
vk::data_heap m_index_buffer_ring_info; // Index data
vk::data_heap m_texture_upload_buffer_ring_info; // Texture upload heap
VkDescriptorBufferInfo m_vertex_state_buffer_info;
VkDescriptorBufferInfo m_vertex_env_buffer_info;
VkDescriptorBufferInfo m_fragment_env_buffer_info;
VkDescriptorBufferInfo m_vertex_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_state_buffer_info;
VkDescriptorBufferInfo m_fragment_constants_buffer_info;
VkDescriptorBufferInfo m_vertex_layout_buffer_info;
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info;
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage
@ -404,7 +424,7 @@ private:
vk::vertex_upload_info upload_vertex_data();
bool load_program();
void load_program_env(const vk::vertex_upload_info& upload_info);
void load_program_env();
void update_vertex_env(const vk::vertex_upload_info& upload_info);
public:

View File

@ -34,12 +34,17 @@
#define DESCRIPTOR_MAX_DRAW_CALLS 4096
#define OCCLUSION_MAX_POOL_SIZE 8192
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2
#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 1
#define SCALE_OFFSET_BIND_SLOT 0
#define TEXTURES_FIRST_BIND_SLOT 19
#define VERTEX_TEXTURES_FIRST_BIND_SLOT 35 //19+16
#define VERTEX_PARAMS_BIND_SLOT 0
#define VERTEX_LAYOUT_BIND_SLOT 1
#define VERTEX_CONSTANT_BUFFERS_BIND_SLOT 2
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 3
#define FRAGMENT_STATE_BIND_SLOT 4
#define FRAGMENT_TEXTURE_PARAMS_BIND_SLOT 5
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 6
#define TEXTURES_FIRST_BIND_SLOT 8
#define VERTEX_TEXTURES_FIRST_BIND_SLOT 24 //8+16
#define VK_NUM_DESCRIPTOR_BINDINGS (VERTEX_TEXTURES_FIRST_BIND_SLOT + 4)
namespace rsx
{
@ -80,7 +85,7 @@ namespace vk
class command_buffer;
struct image;
struct buffer;
struct vk_data_heap;
struct data_heap;
class mem_allocator_base;
struct memory_type_mapping;
struct gpu_formats_support;
@ -131,7 +136,7 @@ namespace vk
*/
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
VkImageAspectFlags flags, vk::vk_data_heap &upload_heap);
VkImageAspectFlags flags, vk::data_heap &upload_heap);
//Other texture management helpers
void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, VkImageSubresourceRange range);
@ -2723,12 +2728,14 @@ public:
namespace glsl
{
enum program_input_type
enum program_input_type : u32
{
input_type_uniform_buffer = 0,
input_type_texel_buffer = 1,
input_type_texture = 2,
input_type_storage_buffer = 3
input_type_storage_buffer = 3,
input_type_max_enum = 4
};
struct bound_sampler
@ -2834,8 +2841,9 @@ public:
class program
{
std::vector<program_input> uniforms;
std::array<std::vector<program_input>, input_type_max_enum> uniforms;
VkDevice m_device;
public:
VkPipeline pipeline;
u64 attribute_location_mask;
@ -2848,10 +2856,10 @@ public:
program& load_uniforms(::glsl::program_domain domain, const std::vector<program_input>& inputs);
bool has_uniform(std::string uniform_name);
void bind_uniform(const VkDescriptorImageInfo &image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set);
bool has_uniform(program_input_type type, const std::string &uniform_name);
void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set);
void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set);
void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set);
@ -2859,7 +2867,7 @@ public:
};
}
struct vk_data_heap : public data_heap
struct data_heap : public ::data_heap
{
std::unique_ptr<buffer> heap;
bool mapped = false;
@ -2874,7 +2882,7 @@ public:
void create(VkBufferUsageFlags usage, size_t size, const char *name = "unnamed", size_t guard = 0x10000)
{
data_heap::init(size, name, guard);
::data_heap::init(size, name, guard);
const auto device = get_current_renderer();
const auto memory_map = device->get_memory_mapping();

View File

@ -1,4 +1,4 @@
#pragma once
#pragma once
#include "VKHelpers.h"
#include "VKVertexProgram.h"
#include "VKFragmentProgram.h"
@ -26,8 +26,8 @@ namespace vk
std::unordered_map<VkRenderPass, std::unique_ptr<vk::glsl::program>> m_program_cache;
std::unique_ptr<vk::sampler> m_sampler;
std::unique_ptr<vk::framebuffer> m_draw_fbo;
vk_data_heap m_vao;
vk_data_heap m_ubo;
vk::data_heap m_vao;
vk::data_heap m_ubo;
vk::render_device* m_device = nullptr;
std::string vs_src;
@ -574,7 +574,7 @@ namespace vk
}
vk::image_view* upload_simple_texture(vk::render_device &dev, vk::command_buffer &cmd,
vk::vk_data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src, u32 owner_uid)
vk::data_heap& upload_heap, u64 key, int w, int h, bool font, bool temp, void *pixel_src, u32 owner_uid)
{
const VkFormat format = (font) ? VK_FORMAT_R8_UNORM : VK_FORMAT_B8G8R8A8_UNORM;
const u32 pitch = (font) ? w : w * 4;
@ -627,7 +627,7 @@ namespace vk
return result;
}
void create(vk::command_buffer &cmd, vk::vk_data_heap &upload_heap)
void create(vk::command_buffer &cmd, vk::data_heap &upload_heap)
{
auto& dev = cmd.get_command_pool().get_owner();
overlay_pass::create(dev);
@ -674,7 +674,7 @@ namespace vk
}
}
vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap)
vk::image_view* find_font(rsx::overlays::font *font, vk::command_buffer &cmd, vk::data_heap &upload_heap)
{
u64 key = (u64)font;
auto found = view_cache.find(key);
@ -686,7 +686,7 @@ namespace vk
true, false, font->glyph_data.data(), UINT32_MAX);
}
vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::vk_data_heap &upload_heap, u32 owner_uid)
vk::image_view* find_temp_image(rsx::overlays::image_info *desc, vk::command_buffer &cmd, vk::data_heap &upload_heap, u32 owner_uid)
{
u64 key = (u64)desc;
auto found = temp_view_cache.find(key);
@ -735,7 +735,7 @@ namespace vk
}
void run(vk::command_buffer &cmd, u16 w, u16 h, vk::framebuffer* target, VkRenderPass render_pass,
vk::vk_data_heap &upload_heap, rsx::overlays::overlay &ui)
vk::data_heap &upload_heap, rsx::overlays::overlay &ui)
{
m_scale_offset = color4f((f32)ui.virtual_width, (f32)ui.virtual_height, 1.f, 1.f);
m_time = (f32)(get_system_time() / 1000) * 0.005f;

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "VKHelpers.h"
namespace vk
@ -23,23 +23,17 @@ namespace vk
program& program::load_uniforms(program_domain domain, const std::vector<program_input>& inputs)
{
std::vector<program_input> store = uniforms;
uniforms.resize(0);
for (auto &item : store)
{
uniforms.push_back(item);
}
for (auto &item : inputs)
uniforms.push_back(item);
{
uniforms[item.type].push_back(item);
}
return *this;
}
bool program::has_uniform(std::string uniform_name)
bool program::has_uniform(program_input_type type, const std::string &uniform_name)
{
for (auto &uniform : uniforms)
for (const auto &uniform : uniforms[type])
{
if (uniform.name == uniform_name)
return true;
@ -48,20 +42,25 @@ namespace vk
return false;
}
void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, std::string uniform_name, VkDescriptorSet &descriptor_set)
void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorSet &descriptor_set)
{
for (auto &uniform : uniforms)
for (const auto &uniform : uniforms[program_input_type::input_type_texture])
{
if (uniform.name == uniform_name)
{
VkWriteDescriptorSet descriptor_writer = {};
descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writer.dstSet = descriptor_set;
descriptor_writer.descriptorCount = 1;
descriptor_writer.pImageInfo = &image_descriptor;
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptor_writer.dstArrayElement = 0;
descriptor_writer.dstBinding = uniform.location;
const VkWriteDescriptorSet descriptor_writer =
{
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
nullptr, // pNext
descriptor_set, // dstSet
uniform.location, // dstBinding
0, // dstArrayElement
1, // descriptorCount
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, // descriptorType
&image_descriptor, // pImageInfo
nullptr, // pBufferInfo
nullptr // pTexelBufferView
};
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << uniform.location);
@ -77,20 +76,25 @@ namespace vk
bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, descriptor_set);
}
void program::bind_uniform(const VkBufferView &buffer_view, const std::string &binding_name, VkDescriptorSet &descriptor_set)
void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set)
{
for (auto &uniform : uniforms)
for (const auto &uniform : uniforms[type])
{
if (uniform.name == binding_name)
{
VkWriteDescriptorSet descriptor_writer = {};
descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writer.dstSet = descriptor_set;
descriptor_writer.descriptorCount = 1;
descriptor_writer.pTexelBufferView = &buffer_view;
descriptor_writer.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
descriptor_writer.dstArrayElement = 0;
descriptor_writer.dstBinding = uniform.location;
const VkWriteDescriptorSet descriptor_writer =
{
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
nullptr, // pNext
descriptor_set, // dstSet
uniform.location, // dstBinding
0, // dstArrayElement
1, // descriptorCount
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,// descriptorType
nullptr, // pImageInfo
nullptr, // pBufferInfo
&buffer_view // pTexelBufferView
};
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << uniform.location);
@ -103,14 +107,19 @@ namespace vk
void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorType type, VkDescriptorSet &descriptor_set)
{
VkWriteDescriptorSet descriptor_writer = {};
descriptor_writer.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writer.dstSet = descriptor_set;
descriptor_writer.descriptorCount = 1;
descriptor_writer.pBufferInfo = &buffer_descriptor;
descriptor_writer.descriptorType = type;
descriptor_writer.dstArrayElement = 0;
descriptor_writer.dstBinding = binding_point;
const VkWriteDescriptorSet descriptor_writer =
{
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
nullptr, // pNext
descriptor_set, // dstSet
binding_point, // dstBinding
0, // dstArrayElement
1, // descriptorCount
type, // descriptorType
nullptr, // pImageInfo
&buffer_descriptor, // pBufferInfo
nullptr // pTexelBufferView
};
vkUpdateDescriptorSets(m_device, 1, &descriptor_writer, 0, nullptr);
attribute_location_mask |= (1ull << binding_point);
@ -121,10 +130,9 @@ namespace vk
if (vertex_attributes_mask)
return vertex_attributes_mask;
for (auto &uniform : uniforms)
for (const auto &uniform : uniforms[program_input_type::input_type_texel_buffer])
{
if (uniform.domain == program_domain::glsl_vertex_program &&
uniform.type == program_input_type::input_type_texel_buffer)
if (uniform.domain == program_domain::glsl_vertex_program)
{
vertex_attributes_mask |= (1ull << (uniform.location - VERTEX_BUFFERS_FIRST_BIND_SLOT));
}

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "VKHelpers.h"
#include "../GCM.h"
#include "../RSXThread.h"
@ -427,7 +427,7 @@ namespace vk
void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, vk::image* dst_image,
const std::vector<rsx_subresource_layout>& subresource_layout, int format, bool is_swizzled, u16 mipmap_count,
VkImageAspectFlags flags, vk::vk_data_heap &upload_heap)
VkImageAspectFlags flags, vk::data_heap &upload_heap)
{
u32 mipmap_level = 0;
u32 block_in_pixel = get_format_block_size_in_texel(format);

View File

@ -427,7 +427,7 @@ namespace vk
vk::memory_type_mapping m_memory_types;
vk::gpu_formats_support m_formats_support;
VkQueue m_submit_queue;
vk_data_heap* m_texture_upload_heap;
vk::data_heap* m_texture_upload_heap;
//Stuff that has been dereferenced goes into these
std::list<discarded_storage> m_discardable_storage;
@ -956,7 +956,7 @@ namespace vk
public:
using baseclass::texture_cache;
void initialize(vk::render_device& device, VkQueue submit_queue, vk::vk_data_heap& upload_heap)
void initialize(vk::render_device& device, VkQueue submit_queue, vk::data_heap& upload_heap)
{
m_device = &device;
m_memory_types = device.get_memory_mapping();

View File

@ -63,7 +63,7 @@ namespace
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer(
const rsx::draw_clause& clause, u32 vertex_count,
vk::vk_data_heap& m_index_buffer_ring_info)
vk::data_heap& m_index_buffer_ring_info)
{
u32 index_count = get_index_count(clause.primitive, vertex_count);
u32 upload_size = index_count * sizeof(u16);
@ -91,7 +91,7 @@ namespace
struct draw_command_visitor
{
draw_command_visitor(vk::vk_data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout)
draw_command_visitor(vk::data_heap& index_buffer_ring_info, rsx::vertex_input_layout& layout)
: m_index_buffer_ring_info(index_buffer_ring_info)
, m_vertex_layout(layout)
{
@ -226,7 +226,7 @@ namespace
}
private:
vk::vk_data_heap& m_index_buffer_ring_info;
vk::data_heap& m_index_buffer_ring_info;
rsx::vertex_input_layout& m_vertex_layout;
};
}

View File

@ -1,4 +1,4 @@
#include "stdafx.h"
#include "stdafx.h"
#include "Emu/System.h"
#include "VKVertexProgram.h"
@ -28,33 +28,41 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri
void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
{
OS << "#version 450\n\n";
OS << "#extension GL_ARB_separate_shader_objects : enable\n";
OS << "#extension GL_ARB_separate_shader_objects : enable\n\n";
OS << "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n";
OS << "{\n";
OS << " mat4 scale_offset_mat;\n";
OS << " ivec4 user_clip_enabled[2];\n";
OS << " vec4 user_clip_factor[2];\n";
OS << " uint transform_branch_bits;\n";
OS << " uint vertex_base_index;\n";
OS << " float point_size;\n";
OS << " float z_near;\n";
OS << " float z_far;\n";
OS << " ivec4 input_attributes[16];\n";
OS << "};\n";
OS << "};\n\n";
OS << "layout(std140, set = 0, binding = 1) uniform VertexLayoutBuffer\n";
OS << "{\n";
OS << " uint vertex_base_index;\n";
OS << " ivec2 input_attributes[16];\n";
OS << "};\n\n";
vk::glsl::program_input in;
in.location = SCALE_OFFSET_BIND_SLOT;
in.location = VERTEX_PARAMS_BIND_SLOT;
in.domain = glsl::glsl_vertex_program;
in.name = "VertexContextBuffer";
in.type = vk::glsl::input_type_uniform_buffer;
inputs.push_back(in);
in.location = VERTEX_LAYOUT_BIND_SLOT;
in.name = "VertexLayoutBuffer";
inputs.push_back(in);
}
void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::vector<ParamType>& inputs)
{
OS << "layout(set=0, binding=3) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable)
OS << "layout(set=0, binding=4) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data)
OS << "layout(set=0, binding=6) uniform usamplerBuffer persistent_input_stream;\n"; //Data stream with persistent vertex data (cacheable)
OS << "layout(set=0, binding=7) uniform usamplerBuffer volatile_input_stream;\n"; //Data stream with per-draw data (registers and immediate draw data)
vk::glsl::program_input in;
in.location = VERTEX_BUFFERS_FIRST_BIND_SLOT;
@ -72,7 +80,7 @@ void VKVertexDecompilerThread::insertInputs(std::stringstream & OS, const std::v
void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector<ParamType> & constants)
{
OS << "layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer\n";
OS << "layout(std140, set=0, binding = 2) uniform VertexConstantsBuffer\n";
OS << "{\n";
OS << " vec4 vc[468];\n";
OS << "};\n\n";

View File

@ -584,18 +584,48 @@ namespace rsx
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
}
void set_surface_dirty_bit(thread* rsx, u32, u32)
void set_surface_dirty_bit(thread* rsx, u32 reg, u32 arg)
{
if (reg == NV4097_SET_SURFACE_CLIP_VERTICAL ||
reg == NV4097_SET_SURFACE_CLIP_HORIZONTAL)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
}
}
rsx->m_rtts_dirty = true;
rsx->m_framebuffer_state_contested = false;
}
void set_surface_format(thread* rsx, u32 reg, u32 arg)
{
// Special consideration - antialiasing control can affect ROP state
const auto aa_mask = (0xF << 12);
if ((arg & aa_mask) != (method_registers.register_previous_value & aa_mask))
{
// Antialias control has changed, update ROP parameters
rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty;
}
set_surface_dirty_bit(rsx, reg, arg);
}
void set_surface_options_dirty_bit(thread* rsx, u32, u32)
{
if (rsx->m_framebuffer_state_contested)
rsx->m_rtts_dirty = true;
}
void set_ROP_state_dirty_bit(thread* rsx, u32, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::fragment_state_dirty;
}
}
void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg)
{
if (rsx->in_begin_end)
@ -620,6 +650,22 @@ namespace rsx
}
}
void set_vertex_env_dirty_bit(thread* rsx, u32 reg, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
}
}
void set_fragment_env_dirty_bit(thread* rsx, u32 reg, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty;
}
}
template<u32 index>
struct set_texture_dirty_bit
{
@ -647,6 +693,18 @@ namespace rsx
}
}
};
template<u32 index>
struct set_viewport_dirty_bit
{
static void impl(thread* rsx, u32 _reg, u32 arg)
{
if (arg != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
}
}
};
}
namespace nv308a
@ -2619,7 +2677,7 @@ namespace rsx
bind<NV4097_SET_CONTEXT_DMA_COLOR_C, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_CONTEXT_DMA_COLOR_D, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_CONTEXT_DMA_ZETA, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_FORMAT, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_FORMAT, nv4097::set_surface_format>();
bind<NV4097_SET_SURFACE_PITCH_A, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_PITCH_B, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_PITCH_C, nv4097::set_surface_dirty_bit>();
@ -2660,6 +2718,20 @@ namespace rsx
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>();
bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>();
bind<NV4097_SET_USER_CLIP_PLANE_CONTROL, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_TRANSFORM_BRANCH_BITS, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_CLIP_MIN, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_CLIP_MAX, nv4097::set_vertex_env_dirty_bit>();
bind<NV4097_SET_ALPHA_FUNC, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_ALPHA_REF, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_ALPHA_TEST_ENABLE, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_ANTI_ALIASING_CONTROL, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_SHADER_PACKER, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_SHADER_WINDOW, nv4097::set_ROP_state_dirty_bit>();
bind<NV4097_SET_FOG_MODE, nv4097::set_ROP_state_dirty_bit>();
bind_array<NV4097_SET_FOG_PARAMS, 1, 2, nv4097::set_ROP_state_dirty_bit>();
bind_range<NV4097_SET_VIEWPORT_SCALE, 1, 3, nv4097::set_viewport_dirty_bit>();
bind_range<NV4097_SET_VIEWPORT_OFFSET, 1, 3, nv4097::set_viewport_dirty_bit>();
//NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();