rsx/gl: Minor refactoring; prepare vulkan backend

This commit is contained in:
kd-11 2017-02-16 21:29:56 +03:00
parent 1e826f5ccf
commit 7c73c3b75c
19 changed files with 1339 additions and 777 deletions

View File

@ -363,34 +363,34 @@ u8 get_format_block_size_in_texel(int format)
LOG_ERROR(RSX, "Unimplemented block size in texels for texture format: 0x%x", format);
return 1;
}
}
u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
{
switch (format)
{
case rsx::surface_color_format::b8:
return 1;
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::r5g6b5:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return 2;
case rsx::surface_color_format::a8b8g8r8:
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x32:
return 4;
case rsx::surface_color_format::w16z16y16x16:
return 8;
case rsx::surface_color_format::w32z32y32x32:
return 16;
default:
fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format);
}
}
u8 get_format_block_size_in_bytes(rsx::surface_color_format format)
{
switch (format)
{
case rsx::surface_color_format::b8:
return 1;
case rsx::surface_color_format::g8b8:
case rsx::surface_color_format::r5g6b5:
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return 2;
case rsx::surface_color_format::a8b8g8r8:
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x32:
return 4;
case rsx::surface_color_format::w16z16y16x16:
return 8;
case rsx::surface_color_format::w32z32y32x32:
return 16;
default:
fmt::throw_exception("Invalid color format 0x%x" HERE, (u32)format);
}
}
static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment)

View File

@ -41,7 +41,7 @@ void Shader::Compile(const std::string &code, SHADER_TYPE st)
void D3D12GSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
ID3D12Resource *surface = nullptr;
if (!is_depth)

View File

@ -194,6 +194,12 @@ void GLGSRender::begin()
if (!draw_fbo.check())
return;
if (surface_clear_flags)
{
clear_surface(surface_clear_flags);
surface_clear_flags = 0;
}
std::chrono::time_point<steady_clock> then = steady_clock::now();
bool color_mask_b = rsx::method_registers.color_mask_b();
@ -332,7 +338,6 @@ void GLGSRender::begin()
std::chrono::time_point<steady_clock> now = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
m_draw_calls++;
}
namespace
@ -381,8 +386,6 @@ void GLGSRender::end()
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
}
draw_fbo.bind();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
@ -473,10 +476,17 @@ void GLGSRender::end()
draw_fbo.draw_arrays(rsx::method_registers.current_draw_clause.primitive, vertex_draw_count);
}
m_attrib_ring_buffer->notify();
m_index_ring_buffer->notify();
m_uniform_ring_buffer->notify();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
write_buffers();
m_draw_calls++;
//LOG_WARNING(RSX, "Finished draw call, EID=%d", m_draw_calls);
synchronize_buffers();
rsx::thread::end();
}
@ -549,6 +559,8 @@ void GLGSRender::on_init_thread()
if (g_cfg_rsx_overlay)
m_text_printer.init();
m_gl_texture_cache.initialize(this);
}
void GLGSRender::on_exit()
@ -587,11 +599,12 @@ void GLGSRender::on_exit()
m_index_ring_buffer->remove();
m_text_printer.close();
m_gl_texture_cache.close();
return GSRender::on_exit();
}
void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
void GLGSRender::clear_surface(u32 arg)
{
if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return;
@ -601,9 +614,6 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
return;
}
renderer->init_buffers(true);
renderer->draw_fbo.bind();
GLbitfield mask = 0;
rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt();
@ -617,6 +627,10 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
glDepthMask(GL_TRUE);
glClearDepth(double(clear_depth) / max_depth_value);
mask |= GLenum(gl::buffers::depth);
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
ds->set_cleared();
}
if (surface_depth_format == rsx::surface_depth_format::z24s8 && (arg & 0x2))
@ -643,46 +657,28 @@ void nv4097_clear_surface(u32 arg, GLGSRender* renderer)
}
glClear(mask);
//renderer->write_buffers();
}
using rsx_method_impl_t = void(*)(u32, GLGSRender*);
static const std::unordered_map<u32, rsx_method_impl_t> g_gl_method_tbl =
{
{ NV4097_CLEAR_SURFACE, nv4097_clear_surface }
};
bool GLGSRender::do_method(u32 cmd, u32 arg)
{
auto found = g_gl_method_tbl.find(cmd);
if (found == g_gl_method_tbl.end())
{
return false;
}
found->second(arg, this);
switch (cmd)
{
case NV4097_CLEAR_SURFACE:
{
if (arg & 0x1)
{
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && !ds->cleared())
ds->set_cleared();
}
}
init_buffers(true);
surface_clear_flags |= arg;
return true;
case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE:
case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE:
flush_draw_buffers = true;
return true;
}
return true;
return false;
}
bool GLGSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture &tex, bool is_depth) -> std::tuple<bool, u16>
{
gl::render_target *surface = nullptr;
if (!is_depth)
@ -690,7 +686,15 @@ bool GLGSRender::load_program()
else
surface = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr);
if (!surface) return std::make_tuple(false, 0);
if (!surface)
{
auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, 0, 0, tex.pitch());
if (!rsc.surface || rsc.is_depth_surface != is_depth)
return std::make_tuple(false, 0);
surface = rsc.surface;
}
return std::make_tuple(true, surface->get_native_pitch());
};
@ -817,17 +821,8 @@ void GLGSRender::flip(int buffer)
rsx::tiled_region buffer_region = get_tiled_address(gcm_buffers[buffer].offset, CELL_GCM_LOCATION_LOCAL);
u32 absolute_address = buffer_region.address + buffer_region.base;
if (0)
{
LOG_NOTICE(RSX, "flip(%d) -> 0x%x [0x%x]", buffer, absolute_address, rsx::get_address(gcm_buffers[1 - buffer].offset, CELL_GCM_LOCATION_LOCAL));
}
gl::texture *render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address);
/**
* Calling read_buffers will overwrite cached content
*/
__glcheck m_flip_fbo.recreate();
m_flip_fbo.bind();
@ -875,33 +870,27 @@ void GLGSRender::flip(int buffer)
areai screen_area = coordi({}, { (int)buffer_width, (int)buffer_height });
coordi aspect_ratio;
if (1) //enable aspect ratio
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
const double aq = (double)buffer_width / buffer_height;
const double rq = (double)new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
aspect_ratio.size = new_size;
new_size.height = int(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else
else if (q < 1.0)
{
aspect_ratio.size = { m_frame->client_width(), m_frame->client_height() };
new_size.width = int(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
aspect_ratio.size = new_size;
gl::screen.clear(gl::buffers::color_depth_stencil);
__glcheck flip_fbo->blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical());
@ -926,6 +915,8 @@ void GLGSRender::flip(int buffer)
m_vertex_upload_time = 0;
m_textures_upload_time = 0;
m_gl_texture_cache.clear_temporary_surfaces();
for (auto &tex : m_rtts.invalidated_resources)
{
tex->remove();
@ -949,3 +940,43 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
else
return m_gl_texture_cache.flush_section(address);
}
void GLGSRender::do_local_task()
{
std::lock_guard<std::mutex> lock(queue_guard);
for (work_item& q: work_queue)
{
std::unique_lock<std::mutex> lock(q.guard_mutex);
//Process this address
q.result = m_gl_texture_cache.flush_section(q.address_to_flush);
q.processed = true;
//Notify thread waiting on this
lock.unlock();
q.cv.notify_one();
}
work_queue.clear();
}
work_item& GLGSRender::post_flush_request(u32 address)
{
std::lock_guard<std::mutex> lock(queue_guard);
work_queue.emplace_back();
work_item &result = work_queue.back();
result.address_to_flush = address;
return result;
}
void GLGSRender::synchronize_buffers()
{
if (flush_draw_buffers)
{
//LOG_WARNING(RSX, "Flushing RTT buffers EID=%d", m_draw_calls);
write_buffers();
flush_draw_buffers = false;
}
}

View File

@ -12,6 +12,40 @@
#pragma comment(lib, "opengl32.lib")
struct work_item
{
std::condition_variable cv;
std::mutex guard_mutex;
u32 address_to_flush;
bool processed;
bool result;
};
struct gcm_buffer_info
{
u32 address = 0;
u32 pitch = 0;
bool is_depth_surface;
rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format;
u16 width;
u16 height;
gcm_buffer_info()
{
address = 0;
pitch = 0;
}
gcm_buffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
{}
};
class GLGSRender : public GSRender
{
private:
@ -49,6 +83,15 @@ private:
gl::text_writer m_text_printer;
std::mutex queue_guard;
std::list<work_item> work_queue;
gcm_buffer_info surface_info[rsx::limits::color_buffers_count];
gcm_buffer_info depth_surface_info;
u32 surface_clear_flags = 0;
bool flush_draw_buffers = false;
public:
gl::fbo draw_fbo;
@ -72,6 +115,8 @@ private:
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
void clear_surface(u32 arg);
public:
bool load_program();
void init_buffers(bool skip_reading = false);
@ -79,6 +124,9 @@ public:
void write_buffers();
void set_viewport();
void synchronize_buffers();
work_item& post_flush_request(u32 address);
protected:
void begin() override;
void end() override;
@ -89,6 +137,8 @@ protected:
void flip(int buffer) override;
u64 timestamp() const override;
void do_local_task() override;
bool on_access_violation(u32 address, bool is_writing) override;
virtual std::array<std::vector<gsl::byte>, 4> copy_render_targets_to_memory() override;

View File

@ -67,6 +67,105 @@ namespace gl
}
};
class fence
{
GLsync m_value = nullptr;
GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
public:
fence() {}
~fence() {}
void create()
{
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void destroy()
{
glDeleteSync(m_value);
m_value = nullptr;
}
void reset()
{
if (m_value != nullptr)
destroy();
create();
}
bool is_empty()
{
return (m_value == nullptr);
}
bool check_signaled()
{
verify(HERE), m_value != nullptr;
if (flags)
{
GLenum err = glClientWaitSync(m_value, flags, 0);
flags = 0;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
return (status == GL_SIGNALED);
}
}
bool wait_for_signal()
{
verify(HERE), m_value != nullptr;
GLenum err = GL_WAIT_FAILED;
bool done = false;
while (!done)
{
if (flags)
{
err = glClientWaitSync(m_value, flags, 1000);
flags = 0;
switch (err)
{
default:
LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
else
{
GLint status = GL_UNSIGNALED;
GLint tmp;
glGetSynciv(m_value, GL_SYNC_STATUS, 4, &tmp, &status);
if (status == GL_SIGNALED)
break;
}
}
glDeleteSync(m_value);
m_value = nullptr;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
};
template<typename Type, uint BindId, uint GetStateId>
class save_binding_state_base
{
@ -594,33 +693,7 @@ namespace gl
u32 m_limit = 0;
void *m_memory_mapping = nullptr;
GLsync m_fence = nullptr;
void wait_for_sync()
{
verify(HERE), m_fence != nullptr;
bool done = false;
while (!done)
{
//Check if we are finished, wait time = 1us
GLenum err = glClientWaitSync(m_fence, GL_SYNC_FLUSH_COMMANDS_BIT, 1000);
switch (err)
{
default:
LOG_ERROR(RSX, "err Returned 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
glDeleteSync(m_fence);
m_fence = nullptr;
}
fence m_fence;
public:
@ -628,7 +701,7 @@ namespace gl
{
if (m_id)
{
wait_for_sync();
m_fence.wait_for_signal();
remove();
}
@ -656,17 +729,15 @@ namespace gl
if ((offset + alloc_size) > m_limit)
{
//TODO: Measure the stall here
wait_for_sync();
if (!m_fence.is_empty())
m_fence.wait_for_signal();
m_data_loc = 0;
offset = 0;
}
if (!m_data_loc)
{
verify(HERE), m_fence == nullptr;
m_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
m_fence.reset();
//Align data loc to 256; allows some "guard" region so we dont trample our own data inadvertently
m_data_loc = align(offset + alloc_size, 256);
@ -697,6 +768,13 @@ namespace gl
{
glBindBufferRange((GLenum)current_target(), index, id(), offset, size);
}
//Notification of a draw command
virtual void notify()
{
if (m_fence.is_empty())
m_fence.reset();
}
};
class legacy_ring_buffer : public ring_buffer
@ -790,6 +868,8 @@ namespace gl
m_mapped_bytes = 0;
m_mapping_offset = 0;
}
void notify() override {}
};
class vao
@ -1019,7 +1099,16 @@ namespace gl
compressed_rgb_s3tc_dxt1 = GL_COMPRESSED_RGB_S3TC_DXT1_EXT,
compressed_rgba_s3tc_dxt1 = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,
compressed_rgba_s3tc_dxt3 = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,
compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
compressed_rgba_s3tc_dxt5 = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,
//Sized internal formats, see opengl spec document on glTexImage2D, table 3
rgba8 = GL_RGBA8,
r5g6b5 = GL_RGB565,
r8 = GL_R8,
rg8 = GL_RG8,
r32f = GL_R32F,
rgba16f = GL_RGBA16F,
rgba32f = GL_RGBA32F
};
enum class wrap

View File

@ -185,6 +185,7 @@ OPENGL_PROC(PFNGLBUFFERSTORAGEPROC, BufferStorage);
//ARB_sync
OPENGL_PROC(PFNGLFENCESYNCPROC, FenceSync);
OPENGL_PROC(PFNGLCLIENTWAITSYNCPROC, ClientWaitSync);
OPENGL_PROC(PFNGLGETSYNCIVPROC, GetSynciv);
OPENGL_PROC(PFNGLDELETESYNCPROC, DeleteSync);
//KHR_debug

View File

@ -44,9 +44,10 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma
case rsx::surface_color_format::x32:
return{ ::gl::texture::type::f32, ::gl::texture::format::red, false, 1, 4 };
case rsx::surface_color_format::a8b8g8r8:
default:
LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format);
case rsx::surface_color_format::a8b8g8r8:
return{ ::gl::texture::type::uint_8_8_8_8, ::gl::texture::format::bgra, false, 4, 1 };
}
}
@ -75,92 +76,46 @@ u8 rsx::internals::get_pixel_size(rsx::surface_depth_format format)
fmt::throw_exception("Unknown depth format" HERE);
}
void GLGSRender::init_buffers(bool skip_reading)
::gl::texture::internal_format rsx::internals::sized_internal_format(rsx::surface_color_format color_format)
{
u16 clip_horizontal = rsx::method_registers.surface_clip_width();
u16 clip_vertical = rsx::method_registers.surface_clip_height();
set_viewport();
if (draw_fbo && !m_rtts_dirty)
switch (color_format)
{
return;
case rsx::surface_color_format::r5g6b5:
return ::gl::texture::internal_format::r5g6b5;
case rsx::surface_color_format::a8r8g8b8:
return ::gl::texture::internal_format::rgba8;
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
return ::gl::texture::internal_format::rgba8;
case rsx::surface_color_format::w16z16y16x16:
return ::gl::texture::internal_format::rgba16f;
case rsx::surface_color_format::w32z32y32x32:
return ::gl::texture::internal_format::rgba32f;
case rsx::surface_color_format::b8:
return ::gl::texture::internal_format::r8;
case rsx::surface_color_format::g8b8:
return ::gl::texture::internal_format::rg8;
case rsx::surface_color_format::x32:
return ::gl::texture::internal_format::r32f;
case rsx::surface_color_format::a8b8g8r8:
return ::gl::texture::internal_format::rgba8;
default:
LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format);
return ::gl::texture::internal_format::rgba8;
}
m_rtts_dirty = false;
if (0)
{
LOG_NOTICE(RSX, "render to -> 0x%x", get_color_surface_addresses()[0]);
}
m_rtts.prepare_render_target(nullptr, rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), clip_horizontal, clip_vertical,
rsx::method_registers.surface_color_target(),
get_color_surface_addresses(), get_zeta_surface_address());
draw_fbo.recreate();
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
{
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);
}
}
if (std::get<0>(m_rtts.m_bound_depth_stencil))
{
__glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil);
}
if (!draw_fbo.check())
return;
//HACK: read_buffer shouldn't be there
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
}
}
std::array<std::vector<gsl::byte>, 4> GLGSRender::copy_render_targets_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h);
}
std::array<std::vector<gsl::byte>, 2> GLGSRender::copy_depth_stencil_buffer_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h);
}
namespace
@ -196,6 +151,162 @@ namespace
}
}
void GLGSRender::init_buffers(bool skip_reading)
{
//NOTE 1: Sometimes, we process clear before sync flushing rsx buffers. Leads to downloading of blank data
//Clearing of surfaces is deferred to handle this
//NOTE 2: It is possible for a game to do:
//1. Bind buffer 1
//2. Clear
//3. Bind buffer 2 without touching 1
//4. Clear
//5. Bind buffer 1
//6. Draw without clear
if (draw_fbo && !m_rtts_dirty)
{
set_viewport();
return;
}
//We are about to change buffers, flush any pending requests for the old buffers
//LOG_WARNING(RSX, "Render targets have changed; checking for sync points (EID=%d)", m_draw_calls);
synchronize_buffers();
//If the old buffers were dirty, clear them before we bind new buffers
if (surface_clear_flags)
{
clear_surface(surface_clear_flags);
surface_clear_flags = 0;
}
m_rtts_dirty = false;
const u16 clip_horizontal = rsx::method_registers.surface_clip_width();
const u16 clip_vertical = rsx::method_registers.surface_clip_height();
const auto pitchs = get_pitchs();
const auto surface_format = rsx::method_registers.surface_color();
const auto depth_format = rsx::method_registers.surface_depth_fmt();
const auto surface_addresses = get_color_surface_addresses();
const auto depth_address = get_zeta_surface_address();
m_rtts.prepare_render_target(nullptr, surface_format, depth_format, clip_horizontal, clip_vertical,
rsx::method_registers.surface_color_target(),
surface_addresses, depth_address);
draw_fbo.recreate();
for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (std::get<0>(m_rtts.m_bound_render_targets[i]))
{
__glcheck draw_fbo.color[i] = *std::get<1>(m_rtts.m_bound_render_targets[i]);
std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]);
surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical };
}
else
surface_info[i] = {};
}
if (std::get<0>(m_rtts.m_bound_depth_stencil))
{
__glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil);
std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch());
depth_surface_info = { depth_address, rsx::method_registers.surface_z_pitch(), true, surface_format, depth_format, clip_horizontal, clip_vertical };
}
else
depth_surface_info = {};
if (!draw_fbo.check())
return;
draw_fbo.bind();
set_viewport();
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none: break;
case rsx::surface_target::surface_a:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[0]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surface_b:
__glcheck draw_fbo.draw_buffer(draw_fbo.color[1]);
__glcheck draw_fbo.read_buffer(draw_fbo.color[1]);
break;
case rsx::surface_target::surfaces_a_b:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
case rsx::surface_target::surfaces_a_b_c_d:
__glcheck draw_fbo.draw_buffers({ draw_fbo.color[0], draw_fbo.color[1], draw_fbo.color[2], draw_fbo.color[3] });
__glcheck draw_fbo.read_buffer(draw_fbo.color[0]);
break;
}
//Mark buffer regions as NO_ACCESS on Cell visible side
if (g_cfg_rsx_write_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(surface_format);
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (!surface_info[i].address || pitchs[i] <= 64) continue;
const u32 range = surface_info[i].pitch * surface_info[i].height;
m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch,
color_format.format, color_format.type, *std::get<1>(m_rtts.m_bound_render_targets[i]));
}
}
if (g_cfg_rsx_write_depth_buffer)
{
if (depth_surface_info.address && rsx::method_registers.surface_z_pitch() > 64)
{
auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format);
u32 pitch = depth_surface_info.width * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 range = pitch * depth_surface_info.height;
//TODO: Verify that depth surface pitch variance affects results
if (pitch != depth_surface_info.pitch)
LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch);
m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, *std::get<1>(m_rtts.m_bound_depth_stencil));
}
}
}
std::array<std::vector<gsl::byte>, 4> GLGSRender::copy_render_targets_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_render_targets_data(rsx::method_registers.surface_color(), clip_w, clip_h);
}
std::array<std::vector<gsl::byte>, 2> GLGSRender::copy_depth_stencil_buffer_to_memory()
{
int clip_w = rsx::method_registers.surface_clip_width();
int clip_h = rsx::method_registers.surface_clip_height();
return m_rtts.get_depth_stencil_data(rsx::method_registers.surface_depth_fmt(), clip_w, clip_h);
}
void GLGSRender::read_buffers()
{
if (!draw_fbo)
@ -334,82 +445,34 @@ void GLGSRender::write_buffers()
if (g_cfg_rsx_write_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color());
auto write_color_buffers = [&](int index, int count)
{
u32 width = rsx::method_registers.surface_clip_width();
u32 height = rsx::method_registers.surface_clip_height();
std::array<u32, 4> offsets = get_offsets();
const std::array<u32, 4 > locations = get_locations();
const std::array<u32, 4 > pitchs = get_pitchs();
for (int i = index; i < index + count; ++i)
{
u32 offset = offsets[i];
u32 location = locations[i];
u32 pitch = pitchs[i];
if (pitch <= 64)
if (surface_info[i].address == 0 || surface_info[i].pitch <= 64)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0));
u32 range = pitch * height;
/**Even tiles are loaded as whole textures during read_buffers from testing.
* Need further evaluation to determine correct behavior. Separate paths for both show no difference,
* but using the GPU to perform the caching is many times faster.
*/
__glcheck m_gl_texture_cache.save_rtt(texaddr, range, (*std::get<1>(m_rtts.m_bound_render_targets[i])), width, height, pitch, color_format.format, color_format.type);
const u32 range = surface_info[i].pitch * surface_info[i].height;
__glcheck m_gl_texture_cache.save_rtt(surface_info[i].address, range);
}
};
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
write_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
write_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
write_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
write_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
write_color_buffers(0, 4);
break;
}
write_color_buffers(0, 4);
}
if (g_cfg_rsx_write_depth_buffer)
{
//TODO: use pitch
u32 pitch = rsx::method_registers.surface_z_pitch();
if (!depth_surface_info.address || depth_surface_info.pitch <= 64) return;
if (pitch <= 64)
return;
u32 range = depth_surface_info.width * depth_surface_info.height * 2;
if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2;
u32 width = rsx::method_registers.surface_clip_width();
u32 height = rsx::method_registers.surface_clip_height();
u32 range = width * height * 2;
auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt());
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
if (rsx::method_registers.surface_depth_fmt() != rsx::surface_depth_format::z16) range *= 2;
m_gl_texture_cache.save_rtt(depth_address, range, (*std::get<1>(m_rtts.m_bound_depth_stencil)), width, height, pitch, depth_format.format, depth_format.type);
m_gl_texture_cache.save_rtt(depth_surface_info.address, range);
}
}

View File

@ -4,40 +4,6 @@
#include "stdafx.h"
#include "../RSXThread.h"
namespace gl
{
class render_target : public texture
{
bool is_cleared = false;
u16 native_pitch = 0;
public:
render_target() {}
void set_cleared()
{
is_cleared = true;
}
bool cleared() const
{
return is_cleared;
}
// Internal pitch is the actual row length in bytes of the openGL texture
void set_native_pitch(u16 pitch)
{
native_pitch = pitch;
}
u16 get_native_pitch() const
{
return native_pitch;
}
};
}
struct color_swizzle
{
gl::texture::channel a = gl::texture::channel::a;
@ -73,12 +39,111 @@ namespace rsx
{
namespace internals
{
::gl::texture::internal_format sized_internal_format(rsx::surface_color_format color_format);
color_format surface_color_format_to_gl(rsx::surface_color_format color_format);
depth_format surface_depth_format_to_gl(rsx::surface_depth_format depth_format);
u8 get_pixel_size(rsx::surface_depth_format format);
}
}
namespace gl
{
class render_target : public texture
{
bool is_cleared = false;
u32 rsx_pitch = 0;
u16 native_pitch = 0;
u16 surface_height = 0;
u16 surface_width = 0;
u16 surface_pixel_size = 0;
texture::internal_format compatible_internal_format = texture::internal_format::rgba8;
public:
render_target() {}
void set_cleared()
{
is_cleared = true;
}
bool cleared() const
{
return is_cleared;
}
// Internal pitch is the actual row length in bytes of the openGL texture
void set_native_pitch(u16 pitch)
{
native_pitch = pitch;
}
u16 get_native_pitch() const
{
return native_pitch;
}
// Rsx pitch
void set_rsx_pitch(u16 pitch)
{
rsx_pitch = pitch;
}
u16 get_rsx_pitch() const
{
return rsx_pitch;
}
std::pair<u16, u16> get_dimensions()
{
if (!surface_height) surface_height = height();
if (!surface_width) surface_width = width();
return std::make_pair(surface_width, surface_height);
}
void set_compatible_format(texture::internal_format format)
{
compatible_internal_format = format;
}
texture::internal_format get_compatible_internal_format()
{
return compatible_internal_format;
}
// For an address within the texture, extract this sub-section's rect origin
std::tuple<bool, u16, u16> get_texture_subresource(u32 offset)
{
if (!offset)
{
return std::make_tuple(true, 0, 0);
}
if (!surface_height) surface_height = height();
if (!surface_width) surface_width = width();
u32 range = rsx_pitch * surface_height;
if (offset < range)
{
if (!surface_pixel_size)
surface_pixel_size = native_pitch / surface_width;
u32 pixel_offset = (offset / surface_pixel_size);
u32 y = (pixel_offset / surface_width);
u32 x = (pixel_offset % surface_width);
return std::make_tuple(true, (u16)x, (u16)y);
}
else
return std::make_tuple(false, 0, 0);
}
};
}
struct gl_render_target_traits
{
using surface_storage_type = std::unique_ptr<gl::render_target>;
@ -97,13 +162,17 @@ struct gl_render_target_traits
std::unique_ptr<gl::render_target> result(new gl::render_target());
auto format = rsx::internals::surface_color_format_to_gl(surface_color_format);
auto internal_fmt = rsx::internals::sized_internal_format(surface_color_format);
result->recreate(gl::texture::target::texture2D);
result->set_native_pitch(width * format.channel_count * format.channel_size);
result->set_compatible_format(internal_fmt);
__glcheck result->config()
.size({ (int)width, (int)height })
.type(format.type)
.format(format.format)
.internal_format(internal_fmt)
.swizzle(format.swizzle.r, format.swizzle.g, format.swizzle.b, format.swizzle.a)
.wrap(gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border, gl::texture::wrap::clamp_to_border)
.apply();
@ -144,6 +213,7 @@ struct gl_render_target_traits
native_pitch *= 2;
result->set_native_pitch(native_pitch);
result->set_compatible_format(format.internal_format);
return result;
}
@ -210,7 +280,138 @@ struct gl_render_target_traits
}
};
struct gl_render_targets : public rsx::surface_store<gl_render_target_traits>
struct surface_subresource
{
gl::render_target *surface = nullptr;
u16 x = 0;
u16 y = 0;
u16 w = 0;
u16 h = 0;
bool is_bound = false;
bool is_depth_surface = false;
surface_subresource() {}
surface_subresource(gl::render_target *src, u16 X, u16 Y, u16 W, u16 H, bool _Bound, bool _Depth)
: surface(src), x(X), y(Y), w(W), h(H), is_bound(_Bound), is_depth_surface(_Depth)
{}
};
class gl_render_targets : public rsx::surface_store<gl_render_target_traits>
{
private:
bool surface_overlaps(gl::render_target *surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y)
{
bool is_subslice = false;
u16 x_offset = 0;
u16 y_offset = 0;
if (surface_address > texaddr)
return false;
u32 offset = texaddr - surface_address;
if (offset >= 0)
{
std::tie(is_subslice, x_offset, y_offset) = surface->get_texture_subresource(offset);
if (is_subslice)
{
*x = x_offset;
*y = y_offset;
return true;
}
}
return false;
}
bool is_bound(u32 address, bool is_depth)
{
if (is_depth)
{
const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil);
return (bound_depth_address == address);
}
for (auto &surface: m_bound_render_targets)
{
const u32 bound_address = std::get<0>(surface);
if (bound_address == address)
return true;
}
return false;
}
bool fits(gl::render_target *src, std::pair<u16, u16> &dims, u16 x_offset, u16 y_offset, u16 width, u16 height) const
{
if ((x_offset + width) > dims.first) return false;
if ((y_offset + height) > dims.second) return false;
return true;
}
public:
surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch)
{
gl::render_target *surface = nullptr;
bool is_subslice = false;
u16 x_offset = 0;
u16 y_offset = 0;
for (auto &tex_info : m_render_targets_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, false), false };
else
{
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, false), false };
}
}
}
}
//Check depth surfaces for overlap
for (auto &tex_info : m_depth_stencil_storage)
{
u32 this_address = std::get<0>(tex_info);
surface = std::get<1>(tex_info).get();
if (surface_overlaps(surface, this_address, texaddr, &x_offset, &y_offset))
{
if (surface->get_rsx_pitch() != requested_pitch)
continue;
auto dims = surface->get_dimensions();
if (fits(surface, dims, x_offset, y_offset, requested_width, requested_height))
return{ surface, x_offset, y_offset, requested_width, requested_height, is_bound(this_address, true), true };
else
{
if (dims.first >= requested_width && dims.second >= requested_height)
{
LOG_WARNING(RSX, "Overlapping depth surface exceeds bounds; returning full surface region");
return{ surface, 0, 0, requested_width, requested_height, is_bound(this_address, true), true };
}
}
}
}
return {};
}
};

View File

@ -7,7 +7,7 @@
#include "../rsx_utils.h"
#include "../Common/TextureUtils.h"
namespace
namespace gl
{
GLenum get_sized_internal_format(u32 texture_format)
{
@ -40,7 +40,6 @@ namespace
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
}
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format)
{
switch (texture_format)
@ -68,7 +67,10 @@ namespace
}
fmt::throw_exception("Compressed or unknown texture format 0x%x" HERE, texture_format);
}
}
namespace
{
bool is_compressed_format(u32 texture_format)
{
switch (texture_format)
@ -319,10 +321,10 @@ namespace rsx
int mip_level = 0;
if (dim == rsx::texture_dimension_extended::texture_dimension_1d)
{
__glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width);
__glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, ::gl::get_sized_internal_format(format), width);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -335,7 +337,7 @@ namespace rsx
{
u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
@ -343,10 +345,10 @@ namespace rsx
if (dim == rsx::texture_dimension_extended::texture_dimension_2d)
{
__glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, get_sized_internal_format(format), width, height);
__glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, ::gl::get_sized_internal_format(format), width, height);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -359,7 +361,7 @@ namespace rsx
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;
@ -367,13 +369,13 @@ namespace rsx
if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap)
{
__glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, get_sized_internal_format(format), width, height);
__glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, ::gl::get_sized_internal_format(format), width, height);
// Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap
// Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0.
// mip_level % mipmap_per_layer will always be equal to mip_level
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -387,7 +389,7 @@ namespace rsx
{
u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
mip_level++;
}
}
@ -396,10 +398,10 @@ namespace rsx
if (dim == rsx::texture_dimension_extended::texture_dimension_3d)
{
__glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, get_sized_internal_format(format), width, height, depth);
__glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, ::gl::get_sized_internal_format(format), width, height, depth);
if (!is_compressed_format(format))
{
const auto &format_type = get_format_type(format);
const auto &format_type = ::gl::get_format_type(format);
for (const rsx_subresource_layout &layout : input_layouts)
{
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
@ -412,7 +414,7 @@ namespace rsx
{
u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16);
__glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4);
__glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data());
__glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, ::gl::get_sized_internal_format(format), size, staging_buffer.data());
}
}
return;

View File

@ -64,3 +64,9 @@ namespace rsx
};
}
}
namespace gl
{
GLenum get_sized_internal_format(u32 gcm_format);
std::tuple<GLenum, GLenum> get_format_type(u32 texture_format);
}

View File

@ -0,0 +1,63 @@
#pragma once
#include "stdafx.h"
#include "GLGSRender.h"
#include "GLTextureCache.h"
namespace gl
{
bool texture_cache::flush_section(u32 address)
{
if (address < rtt_cache_range.first ||
address >= rtt_cache_range.second)
return false;
bool post_task = false;
{
std::lock_guard<std::mutex> lock(m_section_mutex);
for (cached_rtt_section &rtt : m_rtt_cache)
{
if (rtt.is_dirty()) continue;
if (rtt.is_locked() && rtt.overlaps(address))
{
if (rtt.is_flushed())
{
LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size());
continue;
}
//LOG_WARNING(RSX, "Cell needs GPU data synced here, address=0x%X", address);
if (std::this_thread::get_id() != m_renderer_thread)
{
post_task = true;
break;
}
rtt.flush();
return true;
}
}
}
if (post_task)
{
//LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address);
work_item &task = m_renderer->post_flush_request(address);
{
std::unique_lock<std::mutex> lock(task.guard_mutex);
task.cv.wait(lock, [&task] { return task.processed; });
}
verify(HERE), task.result == true;
return task.result;
}
return false;
}
}

View File

@ -9,227 +9,14 @@
#include <memory>
#include <unordered_map>
#include "GLGSRender.h"
#include "GLRenderTargets.h"
#include "../Common/TextureUtils.h"
#include <chrono>
namespace rsx
{
//TODO: Properly move this into rsx shared
class buffered_section
{
protected:
u32 cpu_address_base = 0;
u32 cpu_address_range = 0;
u32 locked_address_base = 0;
u32 locked_address_range = 0;
u32 memory_protection = 0;
bool locked = false;
bool dirty = false;
bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
//Check for memory area overlap. unlock page(s) if needed and add this index to array.
//Axis separation test
const u32 &block_start = base1;
const u32 block_end = limit1;
if (limit2 < block_start) return false;
if (base2 > block_end) return false;
u32 min_separation = (limit2 - base2) + (limit1 - base1);
u32 range_limit = (block_end > limit2) ? block_end : limit2;
u32 range_base = (block_start < base2) ? block_start : base2;
u32 actual_separation = (range_limit - range_base);
if (actual_separation < min_separation)
return true;
return false;
}
public:
buffered_section() {}
~buffered_section() {}
void reset(u32 base, u32 length)
{
verify(HERE), locked == false;
cpu_address_base = base;
cpu_address_range = length;
locked_address_base = (base & ~4095);
locked_address_range = align(base + length, 4096) - locked_address_base;
memory_protection = vm::page_readable|vm::page_writable;
locked = false;
}
bool protect(u8 flags_set, u8 flags_clear)
{
if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear))
{
memory_protection &= ~flags_clear;
memory_protection |= flags_set;
locked = memory_protection != (vm::page_readable | vm::page_writable);
}
else
fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base);
return false;
}
bool unprotect()
{
u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection;
if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0))
{
memory_protection = (vm::page_writable | vm::page_readable);
locked = false;
return true;
}
else
fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base);
return false;
}
bool overlaps(std::pair<u32, u32> range)
{
return region_overlaps(locked_address_base, locked_address_base+locked_address_range, range.first, range.first + range.second);
}
bool overlaps(u32 address)
{
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
}
bool is_locked() const
{
return locked;
}
bool is_dirty() const
{
return dirty;
}
void set_dirty(bool state)
{
dirty = state;
}
u32 get_section_base() const
{
return cpu_address_base;
}
u32 get_section_size() const
{
return cpu_address_range;
}
bool matches(u32 cpu_address, u32 size) const
{
return (cpu_address_base == cpu_address && cpu_address_range == size);
}
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max)
{
u32 min = std::min(current_min_max.first, locked_address_base);
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
return std::make_pair(min, max);
}
};
}
class GLGSRender;
namespace gl
{
//TODO: Properly move this into helpers
class fence
{
GLsync m_value = nullptr;
GLenum flags = GL_SYNC_FLUSH_COMMANDS_BIT;
public:
fence() {}
~fence() {}
void create()
{
m_value = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}
void destroy()
{
glDeleteSync(m_value);
m_value = nullptr;
}
void reset()
{
if (m_value != nullptr)
destroy();
create();
}
bool check_signaled()
{
verify(HERE), m_value != nullptr;
GLenum err = glClientWaitSync(m_value, flags, 0);
flags = 0;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
bool wait_for_signal()
{
verify(HERE), m_value != nullptr;
GLenum err = GL_WAIT_FAILED;
bool done = false;
while (!done)
{
//Check if we are finished, wait time = 1us
err = glClientWaitSync(m_value, flags, 1000);
flags = 0;
switch (err)
{
default:
LOG_ERROR(RSX, "gl::fence sync returned unknown error 0x%X", err);
case GL_ALREADY_SIGNALED:
case GL_CONDITION_SATISFIED:
done = true;
break;
case GL_TIMEOUT_EXPIRED:
continue;
}
}
glDeleteSync(m_value);
m_value = nullptr;
return (err == GL_ALREADY_SIGNALED || err == GL_CONDITION_SATISFIED);
}
};
//TODO: Unify all cache objects
class texture_cache
{
public:
@ -253,7 +40,7 @@ namespace gl
this->mipmaps = mipmaps;
}
bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps)
bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const
{
if (rsx_address == cpu_address_base && texture_id != 0)
{
@ -275,7 +62,7 @@ namespace gl
texture_id = 0;
}
bool is_empty()
bool is_empty() const
{
return (texture_id == 0);
}
@ -293,12 +80,12 @@ namespace gl
u32 pbo_id = 0;
u32 pbo_size = 0;
u32 source_texture = 0;
bool copied = false;
bool flushed = false;
bool is_depth = false;
u32 flush_count = 0;
u32 copy_count = 0;
u32 current_width = 0;
u32 current_height = 0;
u32 current_pitch = 0;
@ -372,62 +159,27 @@ namespace gl
return size;
}
public:
void reset(u32 base, u32 size)
void scale_image_fallback(u8* dst, const u8* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
{
rsx::buffered_section::reset(base, size);
flushed = false;
flush_count = 0;
copy_count = 0;
}
u32 dst_offset = 0;
u32 src_offset = 0;
u32 padding = dst_pitch - (src_pitch * samples);
void init_buffer()
{
glGenBuffers(1, &pbo_id);
for (u16 h = 0; h < src_height; ++h)
{
for (u16 w = 0; w < src_width; ++w)
{
for (u8 n = 0; n < samples; ++n)
{
memcpy(&dst[dst_offset], &src[src_offset], pixel_size);
dst_offset += pixel_size;
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
glBufferStorage(GL_PIXEL_PACK_BUFFER, locked_address_range, nullptr, GL_MAP_READ_BIT);
src_offset += pixel_size;
}
pbo_size = locked_address_range;
}
void set_dimensions(u32 width, u32 height, u32 pitch)
{
current_width = width;
current_height = height;
current_pitch = pitch;
real_pitch = width * get_pixel_size(format, type);
}
void set_format(texture::format gl_format, texture::type gl_type)
{
format = gl_format;
type = gl_type;
real_pitch = current_width * get_pixel_size(format, type);
}
void copy_texture(gl::texture &source)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
glGetTextureImage(source.id(), 0, (GLenum)format, (GLenum)type, pbo_size, nullptr);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
m_fence.reset();
copy_count++;
}
void fill_texture(gl::texture &tex)
{
u32 min_width = std::min((u32)tex.width(), current_width);
u32 min_height = std::min((u32)tex.height(), current_height);
tex.bind();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id);
glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
dst_offset += padding;
}
}
template <typename T, int N>
@ -476,8 +228,98 @@ namespace gl
}
}
public:
void reset(u32 base, u32 size)
{
rsx::buffered_section::reset(base, size);
flushed = false;
copied = false;
source_texture = 0;
}
void init_buffer()
{
glGenBuffers(1, &pbo_id);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
glBufferStorage(GL_PIXEL_PACK_BUFFER, locked_address_range, nullptr, GL_MAP_READ_BIT);
pbo_size = locked_address_range;
}
void set_dimensions(u32 width, u32 height, u32 pitch)
{
current_width = width;
current_height = height;
current_pitch = pitch;
real_pitch = width * get_pixel_size(format, type);
}
void set_format(texture::format gl_format, texture::type gl_type)
{
format = gl_format;
type = gl_type;
real_pitch = current_width * get_pixel_size(format, type);
}
void set_source(gl::texture &source)
{
source_texture = source.id();
}
void copy_texture()
{
if (!glIsTexture(source_texture))
{
LOG_ERROR(RSX, "Attempted to download rtt texture, but texture handle was invalid! (0x%X)", source_texture);
return;
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id);
glGetTextureImage(source_texture, 0, (GLenum)format, (GLenum)type, pbo_size, nullptr);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
m_fence.reset();
copied = true;
}
void fill_texture(gl::texture &tex)
{
if (!copied)
{
//LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read");
return;
}
u32 min_width = std::min((u32)tex.width(), current_width);
u32 min_height = std::min((u32)tex.height(), current_height);
tex.bind();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id);
glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
void flush()
{
if (!copied)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
copy_texture();
if (!copied)
{
LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on...");
protect(vm::page_readable, 0);
return;
}
}
protect(vm::page_writable, 0);
m_fence.wait_for_signal();
flushed = true;
@ -493,6 +335,7 @@ namespace gl
memcpy(dst, data, cpu_address_range);
else
{
//TODO: Use compression hint from the gcm tile information
//Scale this image by repeating pixel data n times
//n = expected_pitch / real_pitch
//Use of fixed argument templates for performance reasons
@ -521,15 +364,13 @@ namespace gl
break;
default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch);
memcpy(dst, data, cpu_address_range);
scale_image_fallback(dst, static_cast<u8*>(data), current_width, current_height, current_pitch, real_pitch, pixel_size, sample_count);
}
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
protect(vm::page_readable, vm::page_writable);
flush_count++;
}
void destroy()
@ -549,46 +390,30 @@ namespace gl
return flushed;
}
bool can_skip()
{
//TODO: Better balancing algorithm. Copying buffers is very expensive
//TODO: Add a switch to force strict enforcement
//Always accept the first attempt at caching after creation
if (!copy_count)
return false;
//If surface is flushed often, force buffering
if (flush_count)
{
//TODO: Pick better values. Using 80% and 20% for now
if (flush_count >= (4 * copy_count / 5))
return false;
else
{
if (flushed) return false; //fence is guaranteed to have been signaled and destroyed
return !m_fence.check_signaled();
}
}
return true;
}
void set_flushed(bool state)
{
flushed = state;
}
void set_copied(bool state)
{
copied = state;
}
};
private:
std::vector<cached_texture_section> m_texture_cache;
std::vector<cached_rtt_section> m_rtt_cache;
std::vector<u32> m_temporary_surfaces;
std::pair<u32, u32> texture_cache_range = std::make_pair(0xFFFFFFFF, 0);
std::pair<u32, u32> rtt_cache_range = std::make_pair(0xFFFFFFFF, 0);
std::mutex m_section_mutex;
GLGSRender *m_renderer;
std::thread::id m_renderer_thread;
cached_texture_section *find_texture(u64 texaddr, u32 w, u32 h, u16 mipmaps)
{
for (cached_texture_section &tex : m_texture_cache)
@ -638,6 +463,8 @@ namespace gl
m_rtt_cache.resize(0);
m_texture_cache.resize(0);
clear_temporary_surfaces();
}
cached_rtt_section* find_cached_rtt_section(u32 base, u32 size)
@ -700,11 +527,48 @@ namespace gl
return region;
}
u32 create_temporary_subresource(u32 src_id, GLenum sized_internal_fmt, u16 x, u16 y, u16 width, u16 height)
{
u32 dst_id = 0;
glGenTextures(1, &dst_id);
glBindTexture(GL_TEXTURE_2D, dst_id);
glTexStorage2D(GL_TEXTURE_2D, 1, sized_internal_fmt, width, height);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
//Empty GL_ERROR
glGetError();
glCopyImageSubData(src_id, GL_TEXTURE_2D, 0, x, y, 0,
dst_id, GL_TEXTURE_2D, 0, 0, 0, 0, width, height, 1);
m_temporary_surfaces.push_back(dst_id);
//Check for error
if (GLenum err = glGetError())
{
LOG_WARNING(RSX, "Failed to copy image subresource with GL error 0x%X", err);
return 0;
}
return dst_id;
}
public:
texture_cache() {}
~texture_cache()
~texture_cache() {}
void initialize(GLGSRender *renderer)
{
m_renderer = renderer;
m_renderer_thread = std::this_thread::get_id();
}
void close()
{
clear();
}
@ -733,13 +597,78 @@ namespace gl
return;
}
/**
* Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise
* (Turbo: Super Stunt Squad does this; bypassing the need for a sync object)
* The engine does not read back the texture resource through cell, but specifies a texture location that is
* a bound render target. We can bypass the expensive download in this case
*/
surface_subresource rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex.width(), tex.height(), tex.pitch());
if (rsc.surface)
{
//Check that this region is not cpu-dirty before doing a copy
//This section is guaranteed to have a locking section *if* this bit has been bypassed before
bool upload_from_cpu = false;
for (cached_rtt_section &section : m_rtt_cache)
{
if (section.overlaps(std::make_pair(texaddr, range)) && section.is_dirty())
{
LOG_ERROR(RSX, "Cell wrote to render target section we are uploading from!");
upload_from_cpu = true;
break;
}
}
if (!upload_from_cpu)
{
if (tex.get_extended_texture_dimension() != rsx::texture_dimension_extended::texture_dimension_2d)
{
LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d",
texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height());
}
else
{
const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
GLenum src_format = (GLenum)rsc.surface->get_internal_format();
GLenum dst_format = std::get<0>(get_format_type(format));
u32 bound_index = ~0U;
if (src_format != dst_format)
{
LOG_WARNING(RSX, "Sampling from a section of a render target, but formats might be incompatible (0x%X vs 0x%X)", src_format, dst_format);
}
if (!rsc.is_bound)
{
if (rsc.w == tex.width() && rsc.h == tex.height())
rsc.surface->bind();
else
bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h);
}
else
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h);
}
if (bound_index)
return;
}
}
}
/**
* If all the above failed, then its probably a generic texture.
* Search in cache and upload/bind
*/
cached_texture_section *cached_texture = find_texture(texaddr, tex.width(), tex.height(), tex.get_exact_mipmap_count());
verify(HERE), gl_texture.id() == 0;
if (cached_texture)
{
@ -771,17 +700,28 @@ namespace gl
gl_texture.set_id(0);
}
void save_rtt(u32 base, u32 size, gl::texture &source, u32 width, u32 height, u32 pitch, texture::format format, texture::type type)
void save_rtt(u32 base, u32 size)
{
std::lock_guard<std::mutex> lock(m_section_mutex);
cached_rtt_section *region = find_cached_rtt_section(base, size);
if (!region)
{
LOG_ERROR(RSX, "Attempted to download render target that does not exist. Please report to developers");
return;
}
verify(HERE), region->is_locked();
region->copy_texture();
}
void lock_rtt_region(const u32 base, const u32 size, const u16 width, const u16 height, const u16 pitch, const texture::format format, const texture::type type, gl::texture &source)
{
std::lock_guard<std::mutex> lock(m_section_mutex);
cached_rtt_section *region = create_locked_view_of_section(base, size);
//Ignore this if we haven't finished downloading previous draw call
//TODO: Separate locking sections vs downloading to pbo unless address faults often
if (0)//region->can_skip())
return;
if (!region->matches(base, size))
{
//This memory region overlaps our own region, but does not match it exactly
@ -793,10 +733,11 @@ namespace gl
}
region->set_dimensions(width, height, pitch);
region->copy_texture(source);
region->set_format(format, type);
region->set_dirty(false);
region->set_flushed(false);
region->set_copied(false);
region->set_source(source);
verify(HERE), region->is_locked() == true;
}
@ -890,32 +831,16 @@ namespace gl
}
}
bool flush_section(u32 address)
bool flush_section(u32 address);
void clear_temporary_surfaces()
{
if (address < rtt_cache_range.first ||
address >= rtt_cache_range.second)
return false;
std::lock_guard<std::mutex> lock(m_section_mutex);
for (cached_rtt_section &rtt : m_rtt_cache)
for (u32 &id : m_temporary_surfaces)
{
if (rtt.is_dirty()) continue;
if (rtt.is_locked() && rtt.overlaps(address))
{
if (rtt.is_flushed())
{
LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", rtt.get_section_base(), rtt.get_section_size());
continue;
}
rtt.flush();
return true;
}
glDeleteTextures(1, &id);
}
return false;
m_temporary_surfaces.clear();
}
};
}

View File

@ -391,6 +391,9 @@ namespace rsx
// TODO: exit condition
while (!Emu.IsStopped())
{
//Execute backend-local tasks first
do_local_task();
const u32 get = ctrl->get;
const u32 put = ctrl->put;
@ -837,7 +840,7 @@ namespace rsx
return result;
}
RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, bool)> get_surface_info) const
RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const
{
RSXFragmentProgram result = {};
u32 shader_program = rsx::method_registers.shader_program_address();
@ -885,7 +888,7 @@ namespace rsx
bool surface_exists;
u16 surface_pitch;
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, false);
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, false);
if (surface_exists && surface_pitch)
{
@ -894,7 +897,7 @@ namespace rsx
}
else
{
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, true);
std::tie(surface_exists, surface_pitch) = get_surface_info(texaddr, tex, true);
if (surface_exists)
{
u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);

View File

@ -219,7 +219,7 @@ namespace rsx
* get_surface_info is a helper takes 2 parameters: rsx_texture_address and surface_is_depth
* returns whether surface is a render target and surface pitch in native format
*/
RSXFragmentProgram get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, bool)> get_surface_info) const;
RSXFragmentProgram get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const;
public:
double fps_limit = 59.94;
@ -239,6 +239,11 @@ namespace rsx
virtual void on_task() override;
virtual void on_exit() override;
/**
* Execute a backend local task queue
*/
virtual void do_local_task() {}
public:
virtual std::string get_name() const override;

View File

@ -981,7 +981,7 @@ bool VKGSRender::do_method(u32 cmd, u32 arg)
bool VKGSRender::load_program()
{
auto rtt_lookup_func = [this](u32 texaddr, bool is_depth) -> std::tuple<bool, u16>
auto rtt_lookup_func = [this](u32 texaddr, rsx::fragment_texture&, bool is_depth) -> std::tuple<bool, u16>
{
vk::render_target *surface = nullptr;
if (!is_depth)

View File

@ -6,143 +6,132 @@
namespace vk
{
struct cached_texture_object
class cached_texture_section : public rsx::buffered_section
{
u32 native_rsx_address;
u32 native_rsx_size;
u16 width;
u16 height;
u16 depth;
u16 mipmaps;
std::unique_ptr<vk::image_view> uploaded_image_view;
std::unique_ptr<vk::image> uploaded_texture;
u64 protected_rgn_start;
u64 protected_rgn_end;
bool exists = false;
bool locked = false;
bool dirty = true;
public:
cached_texture_section() {}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image_view *view, vk::image *image)
{
width = w;
height = h;
this->depth = depth;
this->mipmaps = mipmaps;
uploaded_image_view.reset(view);
uploaded_texture.reset(image);
}
bool matches(u32 rsx_address, u32 rsx_size) const
{
return rsx::buffered_section::matches(rsx_address, rsx_size);
}
bool matches(u32 rsx_address, u32 width, u32 height, u32 mipmaps) const
{
if (rsx_address == cpu_address_base)
{
if (!width && !height && !mipmaps)
return true;
return (width == this->width && height == this->height && mipmaps == this->mipmaps);
}
return false;
}
bool exists() const
{
return (uploaded_texture.get() != nullptr);
}
u16 get_width() const
{
return width;
}
u16 get_height() const
{
return height;
}
std::unique_ptr<vk::image_view>& get_view()
{
return uploaded_image_view;
}
std::unique_ptr<vk::image>& get_texture()
{
return uploaded_texture;
}
};
class texture_cache
{
private:
std::vector<cached_texture_object> m_cache;
std::vector<cached_texture_section> m_cache;
std::pair<u64, u64> texture_cache_range = std::make_pair(0xFFFFFFFF, 0);
std::vector<std::unique_ptr<vk::image_view> > m_temporary_image_view;
std::vector<std::unique_ptr<vk::image>> m_dirty_textures;
bool lock_memory_region(u32 start, u32 size)
cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
static const u32 memory_page_size = 4096;
start = start & ~(memory_page_size - 1);
size = (u32)align(size, memory_page_size);
return vm::page_protect(start, size, 0, 0, vm::page_writable);
}
bool unlock_memory_region(u32 start, u32 size)
{
static const u32 memory_page_size = 4096;
start = start & ~(memory_page_size - 1);
size = (u32)align(size, memory_page_size);
return vm::page_protect(start, size, 0, vm::page_writable, 0);
}
bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
//Check for memory area overlap. unlock page(s) if needed and add this index to array.
//Axis separation test
const u32 &block_start = base1;
const u32 block_end = limit1;
if (limit2 < block_start) return false;
if (base2 > block_end) return false;
u32 min_separation = (limit2 - base2) + (limit1 - base1);
u32 range_limit = (block_end > limit2) ? block_end : limit2;
u32 range_base = (block_start < base2) ? block_start : base2;
u32 actual_separation = (range_limit - range_base);
if (actual_separation < min_separation)
return true;
return false;
}
cached_texture_object& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0)
{
for (cached_texture_object &tex : m_cache)
for (auto &tex : m_cache)
{
if (!tex.dirty && tex.exists &&
tex.native_rsx_address == rsx_address &&
tex.native_rsx_size == rsx_size)
if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty())
{
if (!confirm_dimensions) return tex;
if (tex.width == width && tex.height == height && tex.mipmaps == mipmaps)
if (tex.matches(rsx_address, width, height, mipmaps))
return tex;
else
{
LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.");
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.width, tex.height);
LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height());
}
}
}
for (cached_texture_object &tex : m_cache)
for (auto &tex : m_cache)
{
if (tex.dirty)
if (tex.is_dirty())
{
if (tex.exists)
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.uploaded_texture));
tex.exists = false;
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
return tex;
}
}
m_cache.push_back(cached_texture_object());
m_cache.push_back(cached_texture_section());
return m_cache[m_cache.size() - 1];
}
void lock_object(cached_texture_object &obj)
{
static const u32 memory_page_size = 4096;
obj.protected_rgn_start = obj.native_rsx_address & ~(memory_page_size - 1);
obj.protected_rgn_end = (u32)align(obj.native_rsx_size, memory_page_size);
obj.protected_rgn_end += obj.protected_rgn_start;
lock_memory_region(static_cast<u32>(obj.protected_rgn_start), static_cast<u32>(obj.native_rsx_size));
if (obj.protected_rgn_start < texture_cache_range.first)
texture_cache_range = std::make_pair(obj.protected_rgn_start, texture_cache_range.second);
if (obj.protected_rgn_end > texture_cache_range.second)
texture_cache_range = std::make_pair(texture_cache_range.first, obj.protected_rgn_end);
}
void unlock_object(cached_texture_object &obj)
{
unlock_memory_region(static_cast<u32>(obj.protected_rgn_start), static_cast<u32>(obj.native_rsx_size));
}
void purge_cache()
{
for (cached_texture_object &tex : m_cache)
for (auto &tex : m_cache)
{
if (tex.exists)
m_dirty_textures.push_back(std::move(tex.uploaded_texture));
if (tex.exists())
{
m_dirty_textures.push_back(std::move(tex.get_texture()));
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
if (tex.locked)
unlock_object(tex);
if (tex.is_locked())
tex.unprotect();
}
m_temporary_image_view.clear();
@ -196,12 +185,6 @@ namespace vk
return m_temporary_image_view.back().get();
}
cached_texture_object& cto = find_cached_texture(texaddr, range, true, tex.width(), tex.height(), tex.get_exact_mipmap_count());
if (cto.exists && !cto.dirty)
{
return cto.uploaded_image_view.get();
}
u32 raw_format = tex.format();
u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN);
@ -213,6 +196,7 @@ namespace vk
u16 height = 0;
u16 depth = 0;
u8 layer = 0;
switch (tex.get_extended_texture_dimension())
{
case rsx::texture_dimension_extended::texture_dimension_1d:
@ -245,6 +229,12 @@ namespace vk
break;
}
cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, tex.get_exact_mipmap_count());
if (region.exists() && !region.is_dirty())
{
return region.get_view().get();
}
bool is_cubemap = tex.get_extended_texture_dimension() == rsx::texture_dimension_extended::texture_dimension_cubemap;
VkImageSubresourceRange subresource_range = vk::get_image_subresource_range(0, 0, is_cubemap ? 6 : 1, tex.get_exact_mipmap_count(), VK_IMAGE_ASPECT_COLOR_BIT);
@ -255,33 +245,28 @@ namespace vk
return nullptr;
}
cto.uploaded_texture = std::make_unique<vk::image>(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
vk::image *image = new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
image_type,
vk_format,
tex.width(), height, depth, tex.get_exact_mipmap_count(), layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0);
change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
cto.uploaded_image_view = std::make_unique<vk::image_view>(*vk::get_current_renderer(), cto.uploaded_texture->value, image_view_type, vk_format,
vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, image_view_type, vk_format,
mapping,
subresource_range);
copy_mipmaped_image_using_buffer(cmd, cto.uploaded_texture->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(),
copy_mipmaped_image_using_buffer(cmd, image->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(),
upload_heap, upload_buffer);
change_image_layout(cmd, cto.uploaded_texture->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
change_image_layout(cmd, image->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range);
cto.exists = true;
cto.dirty = false;
cto.native_rsx_address = texaddr;
cto.native_rsx_size = range;
cto.width = tex.width();
cto.height = tex.height();
cto.mipmaps = tex.get_exact_mipmap_count();
lock_object(cto);
region.reset(texaddr, range);
region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image);
region.protect(0, vm::page_writable);
region.set_dirty(false);
return cto.uploaded_image_view.get();
return view;
}
bool invalidate_address(u32 rsx_address)
@ -290,23 +275,22 @@ namespace vk
rsx_address > texture_cache_range.second)
return false;
for (cached_texture_object &tex : m_cache)
bool response = false;
for (auto &tex : m_cache)
{
if (tex.dirty) continue;
if (tex.is_dirty()) continue;
if (rsx_address >= tex.protected_rgn_start &&
rsx_address < tex.protected_rgn_end)
if (tex.overlaps(rsx_address))
{
unlock_object(tex);
tex.set_dirty(true);
tex.unprotect();
tex.native_rsx_address = 0;
tex.dirty = true;
return true;
response = true;
}
}
return false;
return response;
}
void flush()

View File

@ -1,5 +1,6 @@
#pragma once
#include <rsx_decompiler.h>
#include "Emu/Memory/vm.h"
namespace rsx
{
@ -64,4 +65,140 @@ namespace rsx
program_info get(raw_program raw_program_, decompile_language lang);
void clear();
};
class buffered_section
{
protected:
u32 cpu_address_base = 0;
u32 cpu_address_range = 0;
u32 locked_address_base = 0;
u32 locked_address_range = 0;
u32 memory_protection = 0;
bool locked = false;
bool dirty = false;
bool region_overlaps(u32 base1, u32 limit1, u32 base2, u32 limit2)
{
//Check for memory area overlap. unlock page(s) if needed and add this index to array.
//Axis separation test
const u32 &block_start = base1;
const u32 block_end = limit1;
if (limit2 < block_start) return false;
if (base2 > block_end) return false;
u32 min_separation = (limit2 - base2) + (limit1 - base1);
u32 range_limit = (block_end > limit2) ? block_end : limit2;
u32 range_base = (block_start < base2) ? block_start : base2;
u32 actual_separation = (range_limit - range_base);
if (actual_separation < min_separation)
return true;
return false;
}
public:
buffered_section() {}
~buffered_section() {}
void reset(u32 base, u32 length)
{
verify(HERE), locked == false;
cpu_address_base = base;
cpu_address_range = length;
locked_address_base = (base & ~4095);
locked_address_range = align(base + length, 4096) - locked_address_base;
memory_protection = vm::page_readable | vm::page_writable;
locked = false;
}
bool protect(u8 flags_set, u8 flags_clear)
{
if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, flags_clear))
{
memory_protection &= ~flags_clear;
memory_protection |= flags_set;
locked = memory_protection != (vm::page_readable | vm::page_writable);
}
else
fmt::throw_exception("failed to lock memory @ 0x%X!", locked_address_base);
return false;
}
bool unprotect()
{
u32 flags_set = (vm::page_readable | vm::page_writable) & ~memory_protection;
if (vm::page_protect(locked_address_base, locked_address_range, 0, flags_set, 0))
{
memory_protection = (vm::page_writable | vm::page_readable);
locked = false;
return true;
}
else
fmt::throw_exception("failed to unlock memory @ 0x%X!", locked_address_base);
return false;
}
bool overlaps(std::pair<u32, u32> range)
{
return region_overlaps(locked_address_base, locked_address_base + locked_address_range, range.first, range.first + range.second);
}
bool overlaps(u32 address)
{
return (locked_address_base <= address && (address - locked_address_base) < locked_address_range);
}
bool is_locked() const
{
return locked;
}
bool is_dirty() const
{
return dirty;
}
void set_dirty(bool state)
{
dirty = state;
}
u32 get_section_base() const
{
return cpu_address_base;
}
u32 get_section_size() const
{
return cpu_address_range;
}
bool matches(u32 cpu_address, u32 size) const
{
return (cpu_address_base == cpu_address && cpu_address_range == size);
}
std::pair<u32, u32> get_min_max(std::pair<u32, u32> current_min_max)
{
u32 min = std::min(current_min_max.first, locked_address_base);
u32 max = std::max(current_min_max.second, locked_address_base + locked_address_range);
return std::make_pair(min, max);
}
};
}

View File

@ -109,6 +109,7 @@
<ClCompile Include="Emu\RSX\GL\GLCommonDecompiler.cpp" />
<ClCompile Include="Emu\RSX\GL\GLFragmentProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />

View File

@ -10,6 +10,7 @@
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTextureCache.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Emu\RSX\GL\GLTexture.h" />