rsx: Separate program environment state from program ucode state

- Allows for conservative texture uploads
- Allows to update a program object without running full ucode analysis for no reason
This commit is contained in:
kd-11 2020-12-03 21:11:32 +03:00 committed by kd-11
parent 15a12afe25
commit 3a0b3a85a5
10 changed files with 469 additions and 445 deletions

View File

@ -466,15 +466,6 @@ bool fragment_program_compare::operator()(const RSXFragmentProgram& binary1, con
binary1.shadow_textures != binary2.shadow_textures || binary1.redirected_textures != binary2.redirected_textures)
return false;
for (u8 index = 0; index < 16; ++index)
{
if (binary1.textures_alpha_kill[index] != binary2.textures_alpha_kill[index])
return false;
if (binary1.textures_zfunc[index] != binary2.textures_zfunc[index])
return false;
}
const void* instBuffer1 = binary1.get_data();
const void* instBuffer2 = binary2.get_data();
size_t instIndex = 0;

View File

@ -283,8 +283,11 @@ void GLGSRender::load_texture_env()
surface_store_tag = m_rtts.cache_tag;
}
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1))
continue;
if (!fs_sampler_state[i])
fs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
@ -309,8 +312,11 @@ void GLGSRender::load_texture_env()
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1))
continue;
if (!vs_sampler_state[i])
vs_sampler_state[i] = std::make_unique<gl::texture_cache::sampled_image_descriptor>();
@ -341,75 +347,75 @@ void GLGSRender::bind_texture_env()
// Bind textures and resolve external copy operations
gl::command_context cmd{ gl_state };
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (current_fp_metadata.referenced_textures_mask & (1 << i))
if (!(textures_ref & 1))
continue;
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
gl::texture_view* view = nullptr;
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
gl::texture_view* view = nullptr;
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
if (view = sampler_state->image_handle; !view) [[unlikely]]
{
if (view = sampler_state->image_handle; !view) [[unlikely]]
{
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
}
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
}
}
if (view) [[likely]]
if (view) [[likely]]
{
view->bind();
if (current_fragment_program.redirected_textures & (1 << i))
{
view->bind();
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
if (current_fragment_program.redirected_textures & (1 << i))
{
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
auto root_texture = static_cast<gl::viewable_image*>(view->image());
auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
stencil_view->bind();
}
auto root_texture = static_cast<gl::viewable_image*>(view->image());
auto stencil_view = root_texture->get_view(0xAAE4, rsx::default_remap_vector, gl::image_aspect::stencil);
stencil_view->bind();
}
else
}
else
{
auto target = gl::get_target(current_fragment_program.get_texture_dimension(i));
glBindTexture(target, m_null_textures[target]->id());
if (current_fragment_program.redirected_textures & (1 << i))
{
auto target = gl::get_target(current_fragment_program.get_texture_dimension(i));
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
glBindTexture(target, m_null_textures[target]->id());
if (current_fragment_program.redirected_textures & (1 << i))
{
_SelectTexture(GL_STENCIL_MIRRORS_START + i);
glBindTexture(target, m_null_textures[target]->id());
}
}
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (current_vp_metadata.referenced_textures_mask & (1 << i))
{
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
_SelectTexture(GL_VERTEX_TEXTURES_START + i);
if (!(textures_ref & 1))
continue;
if (rsx::method_registers.vertex_textures[i].enabled() &&
sampler_state->validate())
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
_SelectTexture(GL_VERTEX_TEXTURES_START + i);
if (rsx::method_registers.vertex_textures[i].enabled() &&
sampler_state->validate())
{
if (sampler_state->image_handle) [[likely]]
{
if (sampler_state->image_handle) [[likely]]
{
sampler_state->image_handle->bind();
}
else
{
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
}
sampler_state->image_handle->bind();
}
else
{
glBindTexture(GL_TEXTURE_2D, GL_NONE);
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
}
}
else
{
glBindTexture(GL_TEXTURE_2D, GL_NONE);
}
}
}
@ -584,6 +590,9 @@ void GLGSRender::emit_geometry(u32 sub_index)
void GLGSRender::begin()
{
// Save shader state now before prefetch and loading happens
m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits);
rsx::thread::begin();
if (skip_current_frame || cond_render_ctrl.disable_rendering())

View File

@ -648,7 +648,7 @@ bool GLGSRender::load_program()
{
const auto shadermode = g_cfg.video.shadermode.get();
if ((m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)))
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;

View File

@ -279,8 +279,6 @@ struct RSXFragmentProgram
u32 texcoord_control_mask = 0;
float texture_scale[16][4];
u8 textures_alpha_kill[16];
u8 textures_zfunc[16];
bool valid = false;
@ -303,8 +301,6 @@ struct RSXFragmentProgram
RSXFragmentProgram()
{
std::memset(texture_scale, 0, sizeof(float) * 16 * 4);
std::memset(textures_alpha_kill, 0, sizeof(u8) * 16);
std::memset(textures_zfunc, 0, sizeof(u8) * 16);
}
static RSXFragmentProgram clone(const RSXFragmentProgram& prog)

View File

@ -361,15 +361,16 @@ namespace rsx
}
}
if (m_graphics_state & rsx::pipeline_state::fragment_program_dirty)
if (m_graphics_state & rsx::pipeline_state::fragment_program_ucode_dirty)
{
// Request for update of fragment constants if the program block is invalidated
m_graphics_state |= rsx::pipeline_state::fragment_constants_dirty;
// Request for update of texture parameters if the program is likely to have changed
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
}
// Preload the GPU programs for this draw call if needed
prefetch_vertex_program();
prefetch_fragment_program();
in_begin_end = true;
}
@ -783,7 +784,15 @@ namespace rsx
void thread::fill_fragment_texture_parameters(void *buffer, const RSXFragmentProgram &fragment_program)
{
memcpy(buffer, fragment_program.texture_scale, 16 * 4 * sizeof(float));
// Copy only the relevant section
if (current_fp_metadata.referenced_textures_mask)
{
const auto start = std::countr_zero(current_fp_metadata.referenced_textures_mask);
const auto end = 16 - std::countl_zero(current_fp_metadata.referenced_textures_mask);
const auto mem_offset = (start * 16);
const auto mem_size = (end - start) * 16;
memcpy(static_cast<u8*>(buffer) + mem_offset, reinterpret_cast<const u8*>(fragment_program.texture_scale) + mem_offset, mem_size);
}
}
u64 thread::timestamp()
@ -1490,15 +1499,48 @@ namespace rsx
return true;
}
void thread::get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures, bool skip_vertex_inputs)
void thread::prefetch_fragment_program()
{
if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty))
if (!(m_graphics_state & rsx::pipeline_state::fragment_program_ucode_dirty))
return;
m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty);
m_graphics_state &= ~rsx::pipeline_state::fragment_program_ucode_dirty;
const auto [program_offset, program_location] = method_registers.shader_program_address();
auto data_ptr = vm::base(rsx::get_address(program_offset, program_location, HERE));
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(data_ptr);
current_fragment_program.data = (static_cast<u8*>(data_ptr) + current_fp_metadata.program_start_offset);
current_fragment_program.offset = program_offset + current_fp_metadata.program_start_offset;
current_fragment_program.ucode_length = current_fp_metadata.program_ucode_length;
current_fragment_program.total_length = current_fp_metadata.program_ucode_length + current_fp_metadata.program_start_offset;
current_fragment_program.valid = true;
if (!(m_graphics_state & rsx::pipeline_state::fragment_program_state_dirty))
{
// Verify current texture state is valid
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1)) continue;
if (m_textures_dirty[i])
{
m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty;
break;
}
}
}
}
void thread::prefetch_vertex_program()
{
if (!(m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty))
return;
m_graphics_state &= ~rsx::pipeline_state::vertex_program_ucode_dirty;
const u32 transform_program_start = rsx::method_registers.transform_program_start();
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.skip_vertex_input_check = skip_vertex_inputs;
current_vertex_program.skip_vertex_input_check = true;
current_vertex_program.rsx_vertex_inputs.clear();
current_vertex_program.data.reserve(512 * 4);
@ -1512,59 +1554,38 @@ namespace rsx
current_vertex_program // [out] Program object
);
if (!skip_textures && current_vp_metadata.referenced_textures_mask != 0)
if (!(m_graphics_state & rsx::pipeline_state::vertex_program_state_dirty))
{
for (u32 i = 0; i < rsx::limits::vertex_textures_count; ++i)
// Verify current texture state is valid
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
const auto &tex = rsx::method_registers.vertex_textures[i];
if (tex.enabled() && (current_vp_metadata.referenced_textures_mask & (1 << i)))
if (!(textures_ref & 1)) continue;
if (m_vertex_textures_dirty[i])
{
current_vertex_program.texture_dimensions |= (static_cast<u32>(sampler_descriptors[i]->image_type) << (i << 1));
m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty;
break;
}
}
}
}
if (!skip_vertex_inputs)
void thread::get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors)
{
if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty))
return;
verify(HERE), !(m_graphics_state & rsx::pipeline_state::vertex_program_ucode_dirty);
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
if (!(textures_ref & 1)) continue;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
const auto &tex = rsx::method_registers.vertex_textures[i];
if (tex.enabled() && (current_vp_metadata.referenced_textures_mask & (1 << i)))
{
bool enabled = !!(input_mask & (1 << index));
if (!enabled)
continue;
if (rsx::method_registers.vertex_arrays_info[index].size() > 0)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.vertex_arrays_info[index].size(),
rsx::method_registers.vertex_arrays_info[index].frequency(),
!!((modulo_mask >> index) & 0x1),
true,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
}
else if (vertex_push_buffers[index].vertex_count > 1)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
vertex_push_buffers[index].size,
1,
false,
true,
is_int_type(vertex_push_buffers[index].type), 0 });
}
else if (rsx::method_registers.register_vertex_info[index].size > 0)
{
current_vertex_program.rsx_vertex_inputs.push_back(
{ index,
rsx::method_registers.register_vertex_info[index].size,
rsx::method_registers.register_vertex_info[index].frequency,
!!((modulo_mask >> index) & 0x1),
false,
is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 });
}
current_vertex_program.texture_dimensions |= (static_cast<u32>(sampler_descriptors[i]->image_type) << (i << 1));
}
}
}
@ -1740,44 +1761,42 @@ namespace rsx
if (!(m_graphics_state & rsx::pipeline_state::fragment_program_dirty))
return;
verify(HERE), !(m_graphics_state & rsx::pipeline_state::fragment_program_ucode_dirty);
m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty);
auto &result = current_fragment_program = {};
const auto [program_offset, program_location] = method_registers.shader_program_address();
current_fragment_program.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
current_fragment_program.texcoord_control_mask = rsx::method_registers.texcoord_control_mask();
current_fragment_program.texture_dimensions = 0;
current_fragment_program.unnormalized_coords = 0;
current_fragment_program.two_sided_lighting = rsx::method_registers.two_side_light_en();
current_fragment_program.redirected_textures = 0;
current_fragment_program.shadow_textures = 0;
result.data = vm::base(rsx::get_address(program_offset, program_location, HERE));
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.get_data());
result.data = (static_cast<u8*>(result.get_data()) + current_fp_metadata.program_start_offset);
result.offset = program_offset + current_fp_metadata.program_start_offset;
result.ucode_length = current_fp_metadata.program_ucode_length;
result.total_length = result.ucode_length + current_fp_metadata.program_start_offset;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.texcoord_control_mask = rsx::method_registers.texcoord_control_mask();
result.unnormalized_coords = 0;
result.two_sided_lighting = rsx::method_registers.two_side_light_en();
result.redirected_textures = 0;
result.shadow_textures = 0;
memset(current_fragment_program.texture_scale, 0, sizeof(current_fragment_program.texture_scale));
if (method_registers.current_draw_clause.primitive == primitive_type::points &&
method_registers.point_sprite_enabled())
{
// Set high word of the control mask to store point sprite control
result.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16;
current_fragment_program.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16;
}
for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
auto &tex = rsx::method_registers.fragment_textures[i];
result.texture_scale[i][0] = sampler_descriptors[i]->scale_x;
result.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
result.texture_scale[i][2] = std::bit_cast<f32>(tex.remap());
if (!(textures_ref & 1)) continue;
if (tex.enabled() && (current_fp_metadata.referenced_textures_mask & (1 << i)))
auto &tex = rsx::method_registers.fragment_textures[i];
if (tex.enabled())
{
current_fragment_program.texture_scale[i][0] = sampler_descriptors[i]->scale_x;
current_fragment_program.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
current_fragment_program.texture_scale[i][2] = std::bit_cast<f32>(tex.remap());
m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty;
u32 texture_control = 0;
result.texture_dimensions |= (static_cast<u32>(sampler_descriptors[i]->image_type) << (i << 1));
current_fragment_program.texture_dimensions |= (static_cast<u32>(sampler_descriptors[i]->image_type) << (i << 1));
if (tex.alpha_kill_enabled())
{
@ -1790,7 +1809,7 @@ namespace rsx
const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
if (raw_format & CELL_GCM_TEXTURE_UN)
result.unnormalized_coords |= (1 << i);
current_fragment_program.unnormalized_coords |= (1 << i);
if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR)
{
@ -1808,8 +1827,8 @@ namespace rsx
// TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that)
u32 control_bits = sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32? (1u << 16) : 0u;
control_bits |= tex.remap() & 0xFFFF;
result.redirected_textures |= (1 << i);
result.texture_scale[i][2] = std::bit_cast<f32>(control_bits);
current_fragment_program.redirected_textures |= (1 << i);
current_fragment_program.texture_scale[i][2] = std::bit_cast<f32>(control_bits);
break;
}
case CELL_GCM_TEXTURE_DEPTH16:
@ -1818,11 +1837,11 @@ namespace rsx
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
{
const auto compare_mode = tex.zfunc();
if (result.textures_alpha_kill[i] == 0 &&
if (!tex.alpha_kill_enabled() &&
compare_mode < rsx::comparison_function::always &&
compare_mode > rsx::comparison_function::never)
{
result.shadow_textures |= (1 << i);
current_fragment_program.shadow_textures |= (1 << i);
texture_control |= u32(tex.zfunc()) << 8;
}
break;
@ -1900,12 +1919,12 @@ namespace rsx
#ifdef __APPLE__
texture_control |= (sampler_descriptors[i]->encoded_component_map() << 16);
#endif
result.texture_scale[i][3] = std::bit_cast<f32>(texture_control);
current_fragment_program.texture_scale[i][3] = std::bit_cast<f32>(texture_control);
}
}
//Sanity checks
if (result.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
if (current_fragment_program.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{
//Check that the depth stage is not disabled
if (!rsx::method_registers.depth_test_enabled())
@ -1924,7 +1943,7 @@ namespace rsx
address_range::start_length(dst_offset, size))) [[unlikely]]
{
// Data overlaps
m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty;
return true;
}

View File

@ -110,24 +110,28 @@ namespace rsx
enum pipeline_state : u32
{
fragment_program_dirty = 0x1, // Fragment program changed
vertex_program_dirty = 0x2, // Vertex program changed
fragment_state_dirty = 0x4, // Fragment state changed (alpha test, etc)
vertex_state_dirty = 0x8, // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = 0x10, // Transform constants changed
fragment_constants_dirty = 0x20, // Fragment constants changed
framebuffer_reads_dirty = 0x40, // Framebuffer contents changed
fragment_texture_state_dirty = 0x80, // Fragment texture parameters changed
vertex_texture_state_dirty = 0x100, // Fragment texture parameters changed
scissor_config_state_dirty = 0x200, // Scissor region changed
zclip_config_state_dirty = 0x400, // Viewport Z clip changed
fragment_program_ucode_dirty = 0x1, // Fragment program ucode changed
vertex_program_ucode_dirty = 0x2, // Vertex program ucode changed
fragment_program_state_dirty = 0x4, // Fragment program state changed
vertex_program_state_dirty = 0x8, // Vertex program state changed
fragment_state_dirty = 0x10, // Fragment state changed (alpha test, etc)
vertex_state_dirty = 0x20, // Vertex state changed (scale_offset, clip planes, etc)
transform_constants_dirty = 0x40, // Transform constants changed
fragment_constants_dirty = 0x80, // Fragment constants changed
framebuffer_reads_dirty = 0x100, // Framebuffer contents changed
fragment_texture_state_dirty = 0x200, // Fragment texture parameters changed
vertex_texture_state_dirty = 0x400, // Fragment texture parameters changed
scissor_config_state_dirty = 0x800, // Scissor region changed
zclip_config_state_dirty = 0x1000, // Viewport Z clip changed
scissor_setup_invalid = 0x800, // Scissor configuration is broken
scissor_setup_clipped = 0x1000, // Scissor region is cropped by viewport constraint
scissor_setup_invalid = 0x2000, // Scissor configuration is broken
scissor_setup_clipped = 0x4000, // Scissor region is cropped by viewport constraint
polygon_stipple_pattern_dirty = 0x2000, // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = 0x4000, // Line stippling pattern changed
polygon_stipple_pattern_dirty = 0x8000, // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = 0x10000, // Line stippling pattern changed
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
invalidate_zclip_bits = vertex_state_dirty | zclip_config_state_dirty,
memory_barrier_bits = framebuffer_reads_dirty,
@ -767,7 +771,13 @@ namespace rsx
RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {};
void get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors, bool skip_textures = false, bool skip_vertex_inputs = true);
// Prefetch and analyze the currently active fragment program ucode
void prefetch_fragment_program();
// Prefetch and analyze the currently active vertex program ucode
void prefetch_vertex_program();
void get_current_vertex_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::vertex_textures_count>& sampler_descriptors);
/**
* Gets current fragment program and associated fragment state

View File

@ -152,8 +152,11 @@ void VKGSRender::load_texture_env()
surface_store_tag = m_rtts.cache_tag;
}
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1))
continue;
if (!fs_sampler_state[i])
fs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
@ -289,8 +292,11 @@ void VKGSRender::load_texture_env()
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (!(textures_ref & 1))
continue;
if (!vs_sampler_state[i])
vs_sampler_state[i] = std::make_unique<vk::texture_cache::sampled_image_descriptor>();
@ -357,217 +363,217 @@ void VKGSRender::load_texture_env()
void VKGSRender::bind_texture_env()
{
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (current_fp_metadata.referenced_textures_mask & (1 << i))
if (!(textures_ref & 1))
continue;
vk::image_view* view = nullptr;
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
vk::image_view* view = nullptr;
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
if (view = sampler_state->image_handle; !view)
{
if (view = sampler_state->image_handle; !view)
{
//Requires update, copy subresource
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
}
else
{
switch (auto raw = view->image(); raw->current_layout)
{
default:
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_GENERAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
if (!sampler_state->is_cyclic_reference)
{
// This was used in a cyclic ref before, but is missing a barrier
// No need for a full stall, use a custom barrier instead
VkPipelineStageFlags src_stage;
VkAccessFlags src_access;
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
else
{
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
vk::insert_image_memory_barrier(
*m_current_command_buffer,
raw->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
src_access, VK_ACCESS_SHADER_READ_BIT,
{ raw->aspect(), 0, 1, 0, 1 });
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
}
}
}
if (view) [[likely]]
{
m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set);
if (current_fragment_program.redirected_textures & (1 << i))
{
// Stencil mirror required
auto root_image = static_cast<vk::viewable_image*>(view->image());
auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
if (!m_stencil_mirror_sampler)
{
m_stencil_mirror_sampler = std::make_unique<vk::sampler>(*m_device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_FALSE, 0.f, 1.f, 0.f, 0.f,
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST,
VK_BORDER_COLOR_INT_OPAQUE_BLACK);
}
m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set,
true);
}
//Requires update, copy subresource
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
}
else
{
const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i));
switch (auto raw = view->image(); raw->current_layout)
{
default:
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_GENERAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
if (!sampler_state->is_cyclic_reference)
{
// This was used in a cyclic ref before, but is missing a barrier
// No need for a full stall, use a custom barrier instead
VkPipelineStageFlags src_stage;
VkAccessFlags src_access;
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
else
{
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
vk::insert_image_memory_barrier(
*m_current_command_buffer,
raw->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
src_access, VK_ACCESS_SHADER_READ_BIT,
{ raw->aspect(), 0, 1, 0, 1 });
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
}
}
}
if (view) [[likely]]
{
m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set);
if (current_fragment_program.redirected_textures & (1 << i))
{
// Stencil mirror required
auto root_image = static_cast<vk::viewable_image*>(view->image());
auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT);
if (!m_stencil_mirror_sampler)
{
m_stencil_mirror_sampler = std::make_unique<vk::sampler>(*m_device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_FALSE, 0.f, 1.f, 0.f, 0.f,
VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST,
VK_BORDER_COLOR_INT_OPAQUE_BLACK);
}
m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set,
true);
}
}
else
{
const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i));
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set);
if (current_fragment_program.redirected_textures & (1 << i))
{
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set);
if (current_fragment_program.redirected_textures & (1 << i))
{
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_fragment_program,
m_current_frame->descriptor_set,
true);
}
m_current_frame->descriptor_set,
true);
}
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
for (u32 textures_ref = current_vp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (current_vp_metadata.referenced_textures_mask & (1 << i))
if (!(textures_ref & 1))
continue;
if (!rsx::method_registers.vertex_textures[i].enabled())
{
if (!rsx::method_registers.vertex_textures[i].enabled())
{
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_vertex_program,
m_current_frame->descriptor_set);
continue;
}
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
auto image_ptr = sampler_state->image_handle;
if (!image_ptr && sampler_state->validate())
{
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
m_vertex_textures_dirty[i] = true;
}
if (!image_ptr)
{
rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i);
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_vertex_program,
m_current_frame->descriptor_set);
continue;
}
switch (auto raw = image_ptr->image(); raw->current_layout)
{
default:
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_GENERAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
if (!sampler_state->is_cyclic_reference)
{
// Custom barrier, see similar block in FS stage
VkPipelineStageFlags src_stage;
VkAccessFlags src_access;
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
else
{
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
vk::insert_image_memory_barrier(
*m_current_command_buffer,
raw->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
src_access, VK_ACCESS_SHADER_READ_BIT,
{ raw->aspect(), 0, 1, 0, 1 });
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
}
m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout },
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_vertex_program,
m_current_frame->descriptor_set);
continue;
}
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
auto image_ptr = sampler_state->image_handle;
if (!image_ptr && sampler_state->validate())
{
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
m_vertex_textures_dirty[i] = true;
}
if (!image_ptr)
{
rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i);
const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i));
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL },
i,
::glsl::program_domain::glsl_vertex_program,
m_current_frame->descriptor_set);
continue;
}
switch (auto raw = image_ptr->image(); raw->current_layout)
{
default:
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_GENERAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
if (!sampler_state->is_cyclic_reference)
{
// Custom barrier, see similar block in FS stage
VkPipelineStageFlags src_stage;
VkAccessFlags src_access;
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
else
{
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
vk::insert_image_memory_barrier(
*m_current_command_buffer,
raw->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
src_access, VK_ACCESS_SHADER_READ_BIT,
{ raw->aspect(), 0, 1, 0, 1 });
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
}
m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout },
i,
::glsl::program_domain::glsl_vertex_program,
m_current_frame->descriptor_set);
}
}
@ -605,81 +611,81 @@ void VKGSRender::bind_interpreter_texture_env()
std::advance(end, 16);
std::fill(start, end, fallback);
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i)
{
if (current_fp_metadata.referenced_textures_mask & (1 << i))
if (!(textures_ref & 1))
continue;
vk::image_view* view = nullptr;
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
vk::image_view* view = nullptr;
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
if (view = sampler_state->image_handle; !view)
{
if (view = sampler_state->image_handle; !view)
//Requires update, copy subresource
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
}
else
{
switch (auto raw = view->image(); raw->current_layout)
{
//Requires update, copy subresource
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
}
else
{
switch (auto raw = view->image(); raw->current_layout)
default:
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_GENERAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
if (!sampler_state->is_cyclic_reference)
{
default:
//case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_src;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
case VK_IMAGE_LAYOUT_GENERAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage;
if (!sampler_state->is_cyclic_reference)
// This was used in a cyclic ref before, but is missing a barrier
// No need for a full stall, use a custom barrier instead
VkPipelineStageFlags src_stage;
VkAccessFlags src_access;
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
// This was used in a cyclic ref before, but is missing a barrier
// No need for a full stall, use a custom barrier instead
VkPipelineStageFlags src_stage;
VkAccessFlags src_access;
if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT)
{
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
else
{
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
vk::insert_image_memory_barrier(
*m_current_command_buffer,
raw->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
src_access, VK_ACCESS_SHADER_READ_BIT,
{ raw->aspect(), 0, 1, 0, 1 });
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
else
{
src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
vk::insert_image_memory_barrier(
*m_current_command_buffer,
raw->value,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
src_access, VK_ACCESS_SHADER_READ_BIT,
{ raw->aspect(), 0, 1, 0, 1 });
raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference;
raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
break;
}
}
}
if (view)
{
const int offsets[] = { 0, 16, 48, 32 };
auto& sampled_image_info = texture_env[offsets[static_cast<u32>(sampler_state->image_type)] + i];
sampled_image_info = { fs_sampler_handles[i]->value, view->value, view->image()->current_layout };
}
if (view)
{
const int offsets[] = { 0, 16, 48, 32 };
auto& sampled_image_info = texture_env[offsets[static_cast<u32>(sampler_state->image_type)] + i];
sampled_image_info = { fs_sampler_handles[i]->value, view->value, view->image()->current_layout };
}
}
@ -867,6 +873,9 @@ void VKGSRender::emit_geometry(u32 sub_index)
void VKGSRender::begin()
{
// Save shader state now before prefetch and loading happens
m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits);
rsx::thread::begin();
if (skip_current_frame || swapchain_unavailable || cond_render_ctrl.disable_rendering())

View File

@ -1467,7 +1467,7 @@ void VKGSRender::do_local_task(rsx::FIFO_state state)
bool VKGSRender::load_program()
{
if ((m_interpreter_state = (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)))
if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
{
get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid;

View File

@ -414,8 +414,8 @@ namespace rsx
u16 fp_lighting_flags;
u16 fp_shadow_textures;
u16 fp_redirected_textures;
u16 fp_alphakill_mask;
u64 fp_zfunc_mask;
u16 unused_0; // Retained for binary compatibility
u64 unused_1; // Retained for binary compatibility
pipeline_storage_type pipeline_properties;
};
@ -665,8 +665,6 @@ namespace rsx
state_hash ^= rpcs3::hash_base<u16>(data.fp_lighting_flags);
state_hash ^= rpcs3::hash_base<u16>(data.fp_shadow_textures);
state_hash ^= rpcs3::hash_base<u16>(data.fp_redirected_textures);
state_hash ^= rpcs3::hash_base<u16>(data.fp_alphakill_mask);
state_hash ^= rpcs3::hash_base<u64>(data.fp_zfunc_mask);
std::string pipeline_file_name = fmt::format("%llX+%llX+%llX+%llX.bin", data.vertex_program_hash, data.fragment_program_hash, data.pipeline_storage_hash, state_hash);
std::string pipeline_path = root_path + "/pipelines/" + pipeline_class_name + "/" + version_prefix + "/" + pipeline_file_name;
@ -740,12 +738,6 @@ namespace rsx
fp.shadow_textures = data.fp_shadow_textures;
fp.redirected_textures = data.fp_redirected_textures;
for (u8 index = 0; index < 16; ++index)
{
fp.textures_alpha_kill[index] = (data.fp_alphakill_mask & (1 << index))? 1: 0;
fp.textures_zfunc[index] = (data.fp_zfunc_mask >> (index << 2)) & 0xF;
}
return std::make_tuple(pipeline, vp, fp);
}
@ -790,12 +782,6 @@ namespace rsx
data_block.fp_shadow_textures = fp.shadow_textures;
data_block.fp_redirected_textures = fp.redirected_textures;
for (u8 index = 0; index < 16; ++index)
{
data_block.fp_alphakill_mask |= u32(fp.textures_alpha_kill[index] & 0x1) << index;
data_block.fp_zfunc_mask |= u64(fp.textures_zfunc[index] & 0xF) << (index << 2);
}
return data_block;
}
};

View File

@ -481,7 +481,7 @@ namespace rsx
stream_data_to_memory_swapped_u32<true>(&rsx::method_registers.transform_program[load_pos * 4 + index % 4]
, vm::base(rsx->fifo_ctrl->get_current_arg_ptr()), rcount, 4);
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty;
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
rsx::method_registers.transform_program_load_set(load_pos + ((rcount + index % 4) / 4));
rsx->fifo_ctrl->skip_methods(count - 1);
}
@ -491,7 +491,7 @@ namespace rsx
{
if (method_registers.registers[reg] != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty;
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
}
}
@ -499,7 +499,7 @@ namespace rsx
{
if (method_registers.registers[reg] != method_registers.register_previous_value)
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty | rsx::pipeline_state::fragment_program_dirty;
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty;
}
}
@ -683,7 +683,7 @@ namespace rsx
void set_shader_program_dirty(thread* rsx, u32, u32)
{
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty;
}
void set_surface_dirty_bit(thread* rsx, u32 reg, u32 arg)
@ -863,7 +863,7 @@ namespace rsx
if (rsx->current_fp_metadata.referenced_textures_mask & (1 << index))
{
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty;
}
}
};
@ -877,7 +877,7 @@ namespace rsx
if (rsx->current_vp_metadata.referenced_textures_mask & (1 << index))
{
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty;
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty;
}
}
};
@ -3156,6 +3156,10 @@ namespace rsx
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_SHADER_CONTROL, nv4097::notify_state_changed<fragment_program_state_dirty>>();
bind_array<NV4097_SET_TEX_COORD_CONTROL, 1, 10, nv4097::notify_state_changed<fragment_program_state_dirty>>();
bind<NV4097_SET_TWO_SIDE_LIGHT_EN, nv4097::notify_state_changed<fragment_program_state_dirty>>();
bind<NV4097_SET_POINT_SPRITE_CONTROL, nv4097::notify_state_changed<fragment_program_state_dirty>>();
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>();