rsx: Fix and improve fp program data invalidation

This commit is contained in:
Eladash 2020-03-28 13:17:40 +03:00 committed by kd-11
parent 2ed370093e
commit c2c5005278
6 changed files with 46 additions and 23 deletions

View File

@ -51,10 +51,7 @@ namespace rsx
std::unordered_set<u64>& mem_changes = frame_capture.replay_commands.back().memory_state;
// capture fragment shader mem
const u32 shader_program = method_registers.shader_program_address();
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);
const auto [program_offset, program_location] = method_registers.shader_program_address();
const u32 addr = get_address(program_offset, program_location, HERE);
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(vm::base(addr));

View File

@ -230,6 +230,7 @@ struct RSXFragmentProgram
void *addr;
u32 offset;
u32 ucode_length;
u32 total_length;
u32 ctrl;
u16 unnormalized_coords;
u16 redirected_textures;

View File

@ -1585,10 +1585,7 @@ namespace rsx
m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty);
auto &result = current_fragment_program = {};
const u32 shader_program = rsx::method_registers.shader_program_address();
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);
const auto [program_offset, program_location] = method_registers.shader_program_address();
result.addr = vm::base(rsx::get_address(program_offset, program_location, HERE));
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
@ -1596,6 +1593,7 @@ namespace rsx
result.addr = (static_cast<u8*>(result.addr) + current_fp_metadata.program_start_offset);
result.offset = program_offset + current_fp_metadata.program_start_offset;
result.ucode_length = current_fp_metadata.program_ucode_length;
result.total_length = result.ucode_length + current_fp_metadata.program_start_offset;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.texcoord_control_mask = rsx::method_registers.texcoord_control_mask();
@ -1737,6 +1735,22 @@ namespace rsx
}
}
bool thread::invalidate_fragment_program(u32 dst_dma, u32 dst_offset, u32 size)
{
const auto [shader_offset, shader_dma] = rsx::method_registers.shader_program_address();
if ((dst_dma & CELL_GCM_LOCATION_MAIN) == shader_dma &&
address_range::start_length(shader_offset, current_fragment_program.total_length).overlaps(
address_range::start_length(dst_offset, size))) [[unlikely]]
{
// Data overlaps
m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
return true;
}
return false;
}
void thread::reset()
{
rsx::method_registers.reset();

View File

@ -711,6 +711,8 @@ namespace rsx
* returns whether surface is a render target and surface pitch in native format
*/
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
public:
bool invalidate_fragment_program(u32 dst_dma, u32 dst_offset, u32 size);
public:
u64 target_rsx_flip_time = 0;

View File

@ -876,16 +876,22 @@ namespace rsx
{
// Move last 32 bits
reinterpret_cast<u32*>(dst)[0] = reinterpret_cast<const u32*>(src)[count - 1];
}
else if (dst_dma & CELL_GCM_LOCATION_MAIN)
{
// May overlap
std::memmove(dst, src, data_length);
rsx->invalidate_fragment_program(dst_dma, dst_offset, 4);
}
else
{
// Never overlaps
std::memcpy(dst, src, data_length);
if (dst_dma & CELL_GCM_LOCATION_MAIN)
{
// May overlap
std::memmove(dst, src, data_length);
}
else
{
// Never overlaps
std::memcpy(dst, src, data_length);
}
rsx->invalidate_fragment_program(dst_dma, dst_offset, count * 4);
}
break;
@ -912,6 +918,7 @@ namespace rsx
{
// Move last 16 bits
dst[0] = convert(src[count - 1]);
rsx->invalidate_fragment_program(dst_dma, dst_offset, 2);
break;
}
@ -920,6 +927,7 @@ namespace rsx
dst[i] = convert(src[i]);
}
rsx->invalidate_fragment_program(dst_dma, dst_offset, count * 2);
break;
}
default:
@ -930,12 +938,6 @@ namespace rsx
//res->release(0);
if (!(dst_dma & CELL_GCM_LOCATION_MAIN))
{
// Set this flag on LOCAL memory transfer
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
}
// Skip "handled methods"
rsx->fifo_ctrl->skip_methods(count - 1);
}
@ -1086,6 +1088,8 @@ namespace rsx
const u32 nb_lines = std::min(clip_h, in_h);
const u32 data_length = nb_lines * src_line_length;
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
if (const auto result = rsx->read_barrier(src_address, data_length, false);
result == rsx::result_zcull_intr)
{
@ -1099,6 +1103,8 @@ namespace rsx
else
{
const u32 data_length = in_pitch * (in_h - 1) + src_line_length;
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
rsx->read_barrier(src_address, data_length, true);
}
@ -1437,6 +1443,8 @@ namespace rsx
const auto write_address = get_address(dst_offset, dst_dma, HERE);
const auto data_length = in_pitch * (line_count - 1) + line_length;
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer);
result == rsx::result_zcull_intr)
{

View File

@ -1331,9 +1331,10 @@ namespace rsx
return decode<NV4097_SET_VERTEX_DATA_BASE_INDEX>().vertex_data_base_index();
}
u32 shader_program_address() const
std::pair<u32, u32> shader_program_address() const
{
return decode<NV4097_SET_SHADER_PROGRAM>().shader_program_address();
const u32 shader_address = decode<NV4097_SET_SHADER_PROGRAM>().shader_program_address();
return { shader_address & ~3, (shader_address & 3) - 1 };
}
u32 transform_program_start() const