rsx: Properly implement raster window offsets

This commit is contained in:
kd-11 2018-03-02 18:41:06 +03:00
parent 0c8e4c0887
commit e230867492
7 changed files with 116 additions and 118 deletions

View File

@ -64,22 +64,11 @@ namespace rsx
GcmTileInfo *tile = nullptr; GcmTileInfo *tile = nullptr;
rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample; rsx::surface_antialiasing aa_mode = rsx::surface_antialiasing::center_1_sample;
u16 raster_offset_x = 0;
u16 raster_offset_y = 0;
u32 raster_address_offset = 0;
virtual image_storage_type get_surface() const = 0; virtual image_storage_type get_surface() const = 0;
virtual u16 get_surface_width() const = 0; virtual u16 get_surface_width() const = 0;
virtual u16 get_surface_height() const = 0; virtual u16 get_surface_height() const = 0;
virtual u16 get_rsx_pitch() const = 0; virtual u16 get_rsx_pitch() const = 0;
virtual u16 get_native_pitch() const = 0; virtual u16 get_native_pitch() const = 0;
void set_raster_offset(u16 x, u16 y, u8 bpp)
{
raster_offset_x = x;
raster_offset_y = y;
raster_address_offset = (y * get_rsx_pitch()) + (x * bpp);
}
}; };
/** /**
@ -772,29 +761,17 @@ namespace rsx
return true; return true;
} }
else else if (crop && info.surface_width > x_offset && info.surface_height > y_offset)
{ {
if (crop) //Forcefully fit the requested region by clipping and scaling //Forcefully fit the requested region by clipping and scaling
{ u16 remaining_width = info.surface_width - x_offset;
u16 remaining_width = info.surface_width - x_offset; u16 remaining_height = info.surface_height - y_offset;
u16 remaining_height = info.surface_height - y_offset;
w = std::min(real_width, remaining_width); w = std::min(real_width, remaining_width);
h = std::min(real_height, remaining_height); h = std::min(real_height, remaining_height);
clipped = true; clipped = true;
return true; return true;
}
if (info.surface_width >= real_width && info.surface_height >= real_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
w = real_width;
h = real_height;
clipped = true;
return true;
}
} }
} }

View File

@ -1525,7 +1525,7 @@ namespace rsx
//TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block //TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block
if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{ {
if (test_framebuffer(texaddr + texptr->raster_address_offset)) if (test_framebuffer(texaddr))
{ {
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, false); return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, false);
} }
@ -1538,7 +1538,7 @@ namespace rsx
if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{ {
if (test_framebuffer(texaddr + texptr->raster_address_offset)) if (test_framebuffer(texaddr))
{ {
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, true); return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, true);
} }
@ -1566,12 +1566,11 @@ namespace rsx
* a bound render target. We can bypass the expensive download in this case * a bound render target. We can bypass the expensive download in this case
*/ */
//TODO: Take framebuffer Y compression into account
const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex_width, tex_height, tex_pitch); const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex_width, tex_height, tex_pitch);
if (rsc.surface) if (rsc.surface)
{ {
//TODO: Check that this region is not cpu-dirty before doing a copy //TODO: Check that this region is not cpu-dirty before doing a copy
if (!test_framebuffer(rsc.base_address + rsc.surface->raster_address_offset)) if (!test_framebuffer(rsc.base_address))
{ {
m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface); m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface);
invalidate_address(rsc.base_address, false, true, std::forward<Args>(extras)...); invalidate_address(rsc.base_address, false, true, std::forward<Args>(extras)...);
@ -1764,14 +1763,14 @@ namespace rsx
src_is_render_target = false; src_is_render_target = false;
} }
if (src_is_render_target && !test_framebuffer(src_subres.base_address + src_subres.surface->raster_address_offset)) if (src_is_render_target && !test_framebuffer(src_subres.base_address))
{ {
m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface); m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface);
invalidate_address(src_subres.base_address, false, true, std::forward<Args>(extras)...); invalidate_address(src_subres.base_address, false, true, std::forward<Args>(extras)...);
src_is_render_target = false; src_is_render_target = false;
} }
if (dst_is_render_target && !test_framebuffer(dst_subres.base_address + dst_subres.surface->raster_address_offset)) if (dst_is_render_target && !test_framebuffer(dst_subres.base_address))
{ {
m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface); m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface);
invalidate_address(dst_subres.base_address, false, true, std::forward<Args>(extras)...); invalidate_address(dst_subres.base_address, false, true, std::forward<Args>(extras)...);

View File

@ -284,6 +284,47 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
return; return;
} }
const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(surface_format);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
//Window (raster) offsets
const auto window_offset_x = rsx::method_registers.window_offset_x();
const auto window_offset_y = rsx::method_registers.window_offset_y();
const auto window_clip_width = rsx::method_registers.window_clip_horizontal();
const auto window_clip_height = rsx::method_registers.window_clip_vertical();
if (window_offset_x || window_offset_y)
{
//Window offset is what affects the raster position!
//Tested with Turbo: Super stunt squad that only changes the window offset to declare new framebuffers
//Sampling behavior clearly indicates the addresses are expected to have changed
if (auto clip_type = rsx::method_registers.window_clip_type())
LOG_ERROR(RSX, "Unknown window clip type 0x%X" HERE, clip_type);
for (const auto &index : rsx::utility::get_rtt_indexes(target))
{
if (surface_addresses[index])
{
const u32 window_offset_bytes = (std::max<u32>(pitchs[index], clip_horizontal * aa_factor * bpp) * window_offset_y) + ((aa_factor * bpp) * window_offset_x);
surface_addresses[index] += window_offset_bytes;
}
}
if (depth_address)
{
const auto depth_bpp = depth_format == rsx::surface_depth_format::z16 ? 2 : 4;
depth_address += (std::max<u32>(zeta_pitch, clip_horizontal * aa_factor * depth_bpp) * window_offset_y) + ((aa_factor * depth_bpp) * window_offset_x);
}
}
if ((window_clip_width && window_clip_width != clip_horizontal) ||
(window_clip_height && window_clip_height != clip_vertical))
{
LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d",
window_clip_width, window_clip_height, clip_horizontal, clip_vertical);
}
if (draw_fbo) if (draw_fbo)
{ {
bool really_changed = false; bool really_changed = false;
@ -323,9 +364,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto color_offsets = get_offsets(); const auto color_offsets = get_offsets();
const auto color_locations = get_locations(); const auto color_locations = get_locations();
const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(surface_format);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
for (int i = 0; i < rsx::limits::color_buffers_count; ++i) for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{ {
@ -351,10 +389,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
rtt->tile = find_tile(color_offsets[i], color_locations[i]); rtt->tile = find_tile(color_offsets[i], color_locations[i]);
rtt->aa_mode = aa_mode; rtt->aa_mode = aa_mode;
rtt->set_raster_offset(clip_x, clip_y, bpp);
m_gl_texture_cache.notify_surface_changed(surface_addresses[i]); m_gl_texture_cache.notify_surface_changed(surface_addresses[i]);
m_gl_texture_cache.tag_framebuffer(surface_addresses[i]);
m_gl_texture_cache.tag_framebuffer(surface_addresses[i] + rtt->raster_address_offset);
} }
else else
m_surface_info[i] = {}; m_surface_info[i] = {};
@ -372,13 +408,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
} }
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
u8 texel_size = 2;
if (depth_format == rsx::surface_depth_format::z24s8) if (depth_format == rsx::surface_depth_format::z24s8)
{
draw_fbo.depth_stencil = *ds; draw_fbo.depth_stencil = *ds;
texel_size = 4;
}
else else
draw_fbo.depth = *ds; draw_fbo.depth = *ds;
@ -387,10 +418,9 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
m_depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical }; m_depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical };
ds->aa_mode = aa_mode; ds->aa_mode = aa_mode;
ds->set_raster_offset(clip_x, clip_y, texel_size);
m_gl_texture_cache.notify_surface_changed(depth_address); m_gl_texture_cache.notify_surface_changed(depth_address);
m_gl_texture_cache.tag_framebuffer(depth_address + ds->raster_address_offset); m_gl_texture_cache.tag_framebuffer(depth_address);
} }
else else
m_depth_surface_info = {}; m_depth_surface_info = {};

View File

@ -2580,6 +2580,43 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const auto bpp = get_format_block_size_in_bytes(color_fmt); const auto bpp = get_format_block_size_in_bytes(color_fmt);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2; const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
//Window (raster) offsets
const auto window_offset_x = rsx::method_registers.window_offset_x();
const auto window_offset_y = rsx::method_registers.window_offset_y();
const auto window_clip_width = rsx::method_registers.window_clip_horizontal();
const auto window_clip_height = rsx::method_registers.window_clip_vertical();
if (window_offset_x || window_offset_y)
{
//Window offset is what affects the raster position!
//Tested with Turbo: Super stunt squad that only changes the window offset to declare new framebuffers
//Sampling behavior clearly indicates the addresses are expected to have changed
if (auto clip_type = rsx::method_registers.window_clip_type())
LOG_ERROR(RSX, "Unknown window clip type 0x%X" HERE, clip_type);
for (const auto &index : rsx::utility::get_rtt_indexes(target))
{
if (surface_addresses[index])
{
const u32 window_offset_bytes = (std::max<u32>(surface_pitchs[index], clip_width * aa_factor * bpp) * window_offset_y) + ((aa_factor * bpp) * window_offset_x);
surface_addresses[index] += window_offset_bytes;
}
}
if (zeta_address)
{
const auto depth_bpp = (depth_fmt == rsx::surface_depth_format::z16 ? 2 : 4);
zeta_address += (std::max<u32>(zeta_pitch, clip_width * aa_factor * depth_bpp) * window_offset_y) + ((aa_factor * depth_bpp) * window_offset_x);
}
}
if ((window_clip_width && window_clip_width != clip_width) ||
(window_clip_height && window_clip_height != clip_height))
{
LOG_ERROR(RSX, "Unexpected window clip dimensions: window_clip=%dx%d, surface_clip=%dx%d",
window_clip_width, window_clip_height, clip_width, clip_height);
}
if (m_draw_fbo) if (m_draw_fbo)
{ {
bool really_changed = false; bool really_changed = false;
@ -2668,10 +2705,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
surface->rsx_pitch = surface_pitchs[index]; surface->rsx_pitch = surface_pitchs[index];
surface->aa_mode = aa_mode; surface->aa_mode = aa_mode;
surface->set_raster_offset(clip_x, clip_y, bpp);
m_texture_cache.notify_surface_changed(surface_addresses[index]); m_texture_cache.notify_surface_changed(surface_addresses[index]);
m_texture_cache.tag_framebuffer(surface_addresses[index] + surface->raster_address_offset); m_texture_cache.tag_framebuffer(surface_addresses[index]);
m_draw_buffers_count++; m_draw_buffers_count++;
} }
} }
@ -2686,10 +2722,9 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
ds->rsx_pitch = m_depth_surface_info.pitch; ds->rsx_pitch = m_depth_surface_info.pitch;
ds->aa_mode = aa_mode; ds->aa_mode = aa_mode;
ds->set_raster_offset(clip_x, clip_y, get_pixel_size(rsx::method_registers.surface_depth_fmt()));
m_texture_cache.notify_surface_changed(zeta_address); m_texture_cache.notify_surface_changed(zeta_address);
m_texture_cache.tag_framebuffer(zeta_address + ds->raster_address_offset); m_texture_cache.tag_framebuffer(zeta_address);
} }
if (g_cfg.video.write_color_buffers) if (g_cfg.video.write_color_buffers)

View File

@ -76,7 +76,7 @@ namespace rsx
if (Emu.IsStopped()) if (Emu.IsStopped())
return; return;
const auto tdr = (s64)g_cfg.video.driver_recovery_timeout; const auto tdr = (u64)g_cfg.video.driver_recovery_timeout;
if (tdr == 0) if (tdr == 0)
{ {
//No timeout //No timeout
@ -508,9 +508,10 @@ namespace rsx
rsx->notify_zcull_info_changed(); rsx->notify_zcull_info_changed();
} }
void set_surface_dirty_bit(thread* rsx, u32 _reg, u32) void set_surface_dirty_bit(thread* rsx, u32, u32)
{ {
rsx->m_rtts_dirty = true; rsx->m_rtts_dirty = true;
rsx->m_framebuffer_state_contested = false;
} }
void set_surface_options_dirty_bit(thread* rsx, u32, u32) void set_surface_options_dirty_bit(thread* rsx, u32, u32)
@ -1643,6 +1644,7 @@ namespace rsx
bind<NV4097_SET_SURFACE_PITCH_C, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_PITCH_C, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_PITCH_D, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_PITCH_D, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_SURFACE_PITCH_Z, nv4097::set_surface_dirty_bit>(); bind<NV4097_SET_SURFACE_PITCH_Z, nv4097::set_surface_dirty_bit>();
bind<NV4097_SET_WINDOW_OFFSET, nv4097::set_surface_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_OFFSET, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_OFFSET, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_FORMAT, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_FORMAT, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_ADDRESS, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_ADDRESS, 8, 16, nv4097::set_texture_dirty_bit>();

View File

@ -257,16 +257,31 @@ namespace rsx
return decode<NV4097_SET_SHADER_WINDOW>().window_shader_height(); return decode<NV4097_SET_SHADER_WINDOW>().window_shader_height();
} }
u16 shader_window_offset_x() const u16 window_offset_x() const
{ {
return decode<NV4097_SET_WINDOW_OFFSET>().window_offset_x(); return decode<NV4097_SET_WINDOW_OFFSET>().window_offset_x();
} }
u16 shader_window_offset_y() const u16 window_offset_y() const
{ {
return decode<NV4097_SET_WINDOW_OFFSET>().window_offset_y(); return decode<NV4097_SET_WINDOW_OFFSET>().window_offset_y();
} }
u32 window_clip_type() const
{
return registers[NV4097_SET_WINDOW_CLIP_TYPE];
}
u32 window_clip_horizontal() const
{
return registers[NV4097_SET_WINDOW_CLIP_HORIZONTAL];
}
u32 window_clip_vertical() const
{
return registers[NV4097_SET_WINDOW_CLIP_HORIZONTAL];
}
bool depth_test_enabled() const bool depth_test_enabled() const
{ {
return decode<NV4097_SET_DEPTH_TEST_ENABLE>().depth_test_enabled(); return decode<NV4097_SET_DEPTH_TEST_ENABLE>().depth_test_enabled();

View File

@ -51,66 +51,6 @@ namespace rsx
clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch); clip_image(dst.get(), src, clip_x, clip_y, clip_w, clip_h, bpp, src_pitch, dst_pitch);
} }
void fill_scale_offset_matrix(void *dest_, bool transpose,
float offset_x, float offset_y, float offset_z,
float scale_x, float scale_y, float scale_z)
{
char *dest = (char*)dest_;
if (transpose)
{
stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, 0);
stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, 0);
stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, 0);
stream_vector(dest + 4 * sizeof(f32) * 3, offset_x, offset_y, offset_z, 1);
}
else
{
stream_vector(dest + 4 * sizeof(f32) * 0, scale_x, 0, 0, offset_x);
stream_vector(dest + 4 * sizeof(f32) * 1, 0, scale_y, 0, offset_y);
stream_vector(dest + 4 * sizeof(f32) * 2, 0, 0, scale_z, offset_z);
stream_vector(dest + 4 * sizeof(f32) * 3, 0.f, 0.f, 0.f, 1.f);
}
}
void fill_window_matrix(void *dest, bool transpose)
{
u16 height = method_registers.shader_window_height();
window_origin origin = method_registers.shader_window_origin();
window_pixel_center pixelCenter = method_registers.shader_window_pixel();
f32 offset_x = f32(method_registers.shader_window_offset_x());
f32 offset_y = f32(method_registers.shader_window_offset_y());
f32 scale_y = 1.0;
if (origin == window_origin::bottom)
{
offset_y = height - offset_y + 1;
scale_y = -1.0f;
}
if (false && pixelCenter == window_pixel_center::half)
{
offset_x += 0.5f;
offset_y += 0.5f;
}
fill_scale_offset_matrix(dest, transpose, offset_x, offset_y, 0.0f, 1.0f, scale_y, 1.0f);
}
void fill_viewport_matrix(void *buffer, bool transpose)
{
f32 offset_x = method_registers.viewport_offset_x();
f32 offset_y = method_registers.viewport_offset_y();
f32 offset_z = method_registers.viewport_offset_z();
f32 scale_x = method_registers.viewport_scale_x();
f32 scale_y = method_registers.viewport_scale_y();
f32 scale_z = method_registers.viewport_scale_z();
fill_scale_offset_matrix(buffer, transpose, offset_x, offset_y, offset_z, scale_x, scale_y, scale_z);
}
//Convert decoded integer values for CONSTANT_BLEND_FACTOR into f32 array in 0-1 range //Convert decoded integer values for CONSTANT_BLEND_FACTOR into f32 array in 0-1 range
std::array<float, 4> get_constant_blend_colors() std::array<float, 4> get_constant_blend_colors()
{ {