vulkan: Start work on buffer synchronization

addendum - fix build

vulkan: separate cached section lifetime management

bug fixes
This commit is contained in:
kd-11 2017-04-21 16:35:13 +03:00
parent b1e8eefad0
commit fd754a4ddc
6 changed files with 440 additions and 38 deletions

View File

@ -9,6 +9,7 @@
#include "define_new_memleakdetect.h"
#include "GLProgramBuffer.h"
#include "GLTextOut.h"
#include "../rsx_utils.h"
#include "../rsx_cache.h"
#pragma comment(lib, "opengl32.lib")
@ -26,30 +27,6 @@ struct work_item
volatile bool received = false;
};
struct gcm_buffer_info
{
u32 address = 0;
u32 pitch = 0;
bool is_depth_surface;
rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format;
u16 width;
u16 height;
gcm_buffer_info()
{
address = 0;
pitch = 0;
}
gcm_buffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
{}
};
class GLGSRender : public GSRender
{
private:
@ -93,8 +70,8 @@ private:
std::mutex queue_guard;
std::list<work_item> work_queue;
gcm_buffer_info surface_info[rsx::limits::color_buffers_count];
gcm_buffer_info depth_surface_info;
rsx::gcm_framebuffer_info surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info depth_surface_info;
bool flush_draw_buffers = false;

View File

@ -8,6 +8,8 @@
#include "VKFormats.h"
extern cfg::bool_entry g_cfg_rsx_overlay;
extern cfg::bool_entry g_cfg_rsx_write_color_buffers;
extern cfg::bool_entry g_cfg_rsx_write_depth_buffer;
namespace
{
@ -59,13 +61,13 @@ namespace vk
return std::make_pair(VK_FORMAT_R5G6B5_UNORM_PACK16, vk::default_component_map());
case rsx::surface_color_format::a8r8g8b8:
case rsx::surface_color_format::a8b8g8r8:
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, vk::default_component_map());
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
case rsx::surface_color_format::a8b8g8r8:
{
VkComponentMapping no_alpha = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_ONE };
return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, no_alpha);
@ -629,6 +631,8 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
{
if (is_writing)
return m_texture_cache.invalidate_address(address);
else
return m_texture_cache.flush_address(address, *m_device, m_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
return false;
}
@ -830,6 +834,8 @@ void VKGSRender::end()
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - vertex_end).count();
rsx::thread::end();
copy_render_targets_to_dma_location();
}
void VKGSRender::set_viewport()
@ -987,6 +993,37 @@ void VKGSRender::sync_at_semaphore_release()
CHECK_RESULT(vkResetFences(*m_device, 1, &m_submit_fence));
CHECK_RESULT(vkResetCommandPool(*m_device, m_command_buffer_pool, 0));
open_command_buffer();
m_flush_draw_buffers = true;
}
void VKGSRender::copy_render_targets_to_dma_location()
{
if (!m_flush_draw_buffers)
return;
if (g_cfg_rsx_write_color_buffers)
{
for (u8 index = 0; index < rsx::limits::color_buffers_count; index++)
{
if (!m_surface_info[index].pitch)
continue;
m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height,
m_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
}
if (g_cfg_rsx_write_depth_buffer)
{
if (m_depth_surface_info.pitch)
{
m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height,
m_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue());
}
}
m_flush_draw_buffers = false;
}
bool VKGSRender::do_method(u32 cmd, u32 arg)
@ -1293,6 +1330,8 @@ void VKGSRender::prepare_rtts()
if (!m_rtts_dirty)
return;
copy_render_targets_to_dma_location();
m_rtts_dirty = false;
u32 clip_width = rsx::method_registers.surface_clip_width();
@ -1300,16 +1339,35 @@ void VKGSRender::prepare_rtts()
u32 clip_x = rsx::method_registers.surface_clip_origin_x();
u32 clip_y = rsx::method_registers.surface_clip_origin_y();
auto surface_addresses = get_color_surface_addresses();
auto zeta_address = get_zeta_surface_address();
const u32 surface_pitchs[] = { rsx::method_registers.surface_a_pitch(), rsx::method_registers.surface_b_pitch(),
rsx::method_registers.surface_c_pitch(), rsx::method_registers.surface_d_pitch() };
m_rtts.prepare_render_target(&m_command_buffer,
rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(),
rsx::method_registers.surface_clip_width(), rsx::method_registers.surface_clip_height(),
clip_width, clip_height,
rsx::method_registers.surface_color_target(),
get_color_surface_addresses(), get_zeta_surface_address(),
surface_addresses, zeta_address,
(*m_device), &m_command_buffer, m_optimal_tiling_supported_formats, m_memory_type_mapping);
//Reset framebuffer information
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
m_surface_info[i].address = m_surface_info[i].pitch = 0;
m_surface_info[i].width = clip_width;
m_surface_info[i].height = clip_height;
m_surface_info[i].color_format = rsx::method_registers.surface_color();
}
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
m_depth_surface_info.width = clip_width;
m_depth_surface_info.height = clip_height;
m_depth_surface_info.depth_format = rsx::method_registers.surface_depth_fmt();
//Bind created rtts as current fbo...
std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
std::vector<std::unique_ptr<vk::image_view>> fbo_images;
for (u8 index : draw_buffers)
@ -1324,6 +1382,16 @@ void VKGSRender::prepare_rtts()
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
m_surface_info[index].address = surface_addresses[index];
m_surface_info[index].pitch = surface_pitchs[index];
if (surface_pitchs[index] <= 64)
{
if (clip_width > surface_pitchs[index])
//Ignore this buffer (usually set to 64)
m_surface_info[index].pitch = 0;
}
}
m_draw_buffers_count = static_cast<u32>(fbo_images.size());
@ -1340,6 +1408,37 @@ void VKGSRender::prepare_rtts()
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
m_depth_surface_info.address = zeta_address;
m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch();
if (m_depth_surface_info.pitch <= 64 && clip_width > m_depth_surface_info.pitch)
m_depth_surface_info.pitch = 0;
}
if (g_cfg_rsx_write_color_buffers)
{
for (u8 index : draw_buffers)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height);
}
}
if (g_cfg_rsx_write_depth_buffer)
{
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
u32 pitch = m_depth_surface_info.width * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 range = pitch * m_depth_surface_info.height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
m_depth_surface_info.width, m_depth_surface_info.height);
}
}
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());

View File

@ -8,11 +8,9 @@
#include "restore_new.h"
#include <Utilities/optional.hpp>
#include "define_new_memleakdetect.h"
#define RSX_DEBUG 1
#include "VKProgramBuffer.h"
#include "../GCM.h"
#include "../rsx_utils.h"
#pragma comment(lib, "VKstatic.1.lib")
@ -86,6 +84,10 @@ private:
u32 m_used_descriptors = 0;
u8 m_draw_buffers_count = 0;
rsx::gcm_framebuffer_info m_surface_info[rsx::limits::color_buffers_count];
rsx::gcm_framebuffer_info m_depth_surface_info;
bool m_flush_draw_buffers = false;
public:
VKGSRender();
~VKGSRender();
@ -96,6 +98,7 @@ private:
void open_command_buffer();
void sync_at_semaphore_release();
void prepare_rtts();
void copy_render_targets_to_dma_location();
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
public:

View File

@ -993,6 +993,11 @@ namespace vk
vkFreeCommandBuffers(pool->get_owner(), (*pool), 1, &commands);
}
vk::command_pool& get_command_pool() const
{
return *pool;
}
operator VkCommandBuffer()
{
return commands;

View File

@ -8,19 +8,27 @@ namespace vk
{
class cached_texture_section : public rsx::buffered_section
{
u16 pitch;
u16 width;
u16 height;
u16 depth;
u16 mipmaps;
std::unique_ptr<vk::image_view> uploaded_image_view;
std::unique_ptr<vk::image> uploaded_texture;
std::unique_ptr<vk::image> managed_texture = nullptr;
//DMA relevant data
u16 native_pitch;
VkFence dma_fence = VK_NULL_HANDLE;
vk::render_device* m_device = nullptr;
vk::image *vram_texture = nullptr;
std::unique_ptr<vk::buffer> dma_buffer;
public:
cached_texture_section() {}
void create(u16 w, u16 h, u16 depth, u16 mipmaps, vk::image_view *view, vk::image *image)
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 native_pitch = 0, bool managed=true)
{
width = w;
height = h;
@ -28,7 +36,28 @@ namespace vk
this->mipmaps = mipmaps;
uploaded_image_view.reset(view);
uploaded_texture.reset(image);
vram_texture = image;
if (managed)
managed_texture.reset(image);
//TODO: Properly compute these values
this->native_pitch = native_pitch;
pitch = cpu_address_range / height;
}
void release_dma_resources()
{
if (dma_buffer.get() != nullptr)
{
dma_buffer.reset();
if (dma_fence != nullptr)
{
vkDestroyFence(*m_device, dma_fence, nullptr);
dma_fence = VK_NULL_HANDLE;
}
}
}
bool matches(u32 rsx_address, u32 rsx_size) const
@ -51,7 +80,7 @@ namespace vk
bool exists() const
{
return (uploaded_texture.get() != nullptr);
return (vram_texture != nullptr);
}
u16 get_width() const
@ -71,7 +100,185 @@ namespace vk
std::unique_ptr<vk::image>& get_texture()
{
return uploaded_texture;
return managed_texture;
}
bool is_flushable() const
{
if (protection == utils::protection::ro || protection == utils::protection::no)
return true;
if (uploaded_image_view.get() == nullptr && vram_texture != nullptr)
return true;
return false;
}
void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue, VkImageLayout layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{
if (m_device == nullptr)
{
m_device = &cmd.get_command_pool().get_owner();
}
if (dma_fence == VK_NULL_HANDLE)
{
VkFenceCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &createInfo, nullptr, &dma_fence);
}
if (dma_buffer.get() == nullptr)
{
dma_buffer.reset(new vk::buffer(*m_device, native_pitch * height, heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0));
}
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = width;
copyRegion.bufferImageHeight = height;
copyRegion.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copyRegion.imageOffset = {};
copyRegion.imageExtent = {width, height, 1};
VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
change_image_layout(cmd, vram_texture->value, layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, &copyRegion);
change_image_layout(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, layout, subresource_range);
CHECK_RESULT(vkEndCommandBuffer(cmd));
VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
VkCommandBuffer command_buffer = cmd;
VkSubmitInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
infos.commandBufferCount = 1;
infos.pCommandBuffers = &command_buffer;
infos.pWaitDstStageMask = &pipe_stage_flags;
infos.pWaitSemaphores = nullptr;
infos.waitSemaphoreCount = 0;
CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence));
//Now we need to restart the command-buffer to restore it to the way it was before...
CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX));
CHECK_RESULT(vkResetCommandPool(*m_device, cmd.get_command_pool(), 0));
CHECK_RESULT(vkResetFences(*m_device, 1, &dma_fence));
VkCommandBufferInheritanceInfo inheritance_info = {};
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
VkCommandBufferBeginInfo begin_infos = {};
begin_infos.pInheritanceInfo = &inheritance_info;
begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos));
}
template<typename T>
void do_memory_transfer(void *pixels_dst, void *pixels_src)
{
if (pitch == native_pitch)
{
if (sizeof T == 1)
memcpy(pixels_dst, pixels_src, native_pitch * height);
else
{
const u32 block_size = native_pitch * height / sizeof T;
auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src;
for (u8 n = 0; n < block_size; ++n)
typed_dst[n] = typed_src[n];
}
}
else
{
if (sizeof T == 1)
{
u8 *typed_dst = (u8 *)pixels_dst;
u8 *typed_src = (u8 *)pixels_src;
//TODO: Scaling
for (int row = 0; row < height; ++row)
{
memcpy(typed_dst, typed_src, native_pitch);
typed_dst += pitch;
typed_src += native_pitch;
}
}
else
{
const u32 src_step = native_pitch / sizeof T;
const u32 dst_step = pitch / sizeof T;
auto typed_dst = (be_t<T> *)pixels_dst;
auto typed_src = (T *)pixels_src;
for (int row = 0; row < height; ++row)
{
for (int px = 0; px < width; ++px)
{
typed_dst[px] = typed_src[px];
}
typed_dst += dst_step;
typed_src += src_step;
}
}
}
}
void flush(vk::render_device& dev, vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue)
{
if (m_device == nullptr)
m_device = &dev;
if (dma_fence == VK_NULL_HANDLE || dma_buffer.get() == nullptr)
{
LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base);
copy_texture(cmd, heap_index, submit_queue, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
verify (HERE), (dma_fence != VK_NULL_HANDLE && dma_buffer.get());
}
protect(utils::protection::rw);
//TODO: Image scaling, etc
void* pixels_src = dma_buffer->map(0, VK_WHOLE_SIZE);
void* pixels_dst = vm::base(cpu_address_base);
//We have to do our own byte swapping since the driver doesnt do it for us
const u8 bpp = native_pitch / width;
switch (bpp)
{
default:
LOG_ERROR(RSX, "Invalid bpp %d", bpp);
case 1:
do_memory_transfer<u8>(pixels_dst, pixels_src);
break;
case 2:
do_memory_transfer<u16>(pixels_dst, pixels_src);
break;
case 4:
do_memory_transfer<u32>(pixels_dst, pixels_src);
break;
case 8:
do_memory_transfer<u64>(pixels_dst, pixels_src);
break;
}
dma_buffer->unmap();
//Cleanup
//These sections are usually one-use only so we destroy system resources
//TODO: Recycle dma buffers
release_dma_resources();
vram_texture = nullptr; //Let m_rtts handle lifetime management
}
};
@ -111,6 +318,7 @@ namespace vk
m_temporary_image_view.push_back(std::move(tex.get_view()));
}
tex.release_dma_resources();
return tex;
}
}
@ -120,6 +328,20 @@ namespace vk
return m_cache[m_cache.size() - 1];
}
cached_texture_section* find_flushable_section(const u32 address, const u32 range)
{
for (auto &tex : m_cache)
{
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
if (tex.matches(address, range))
return &tex;
}
return nullptr;
}
void purge_cache()
{
for (auto &tex : m_cache)
@ -132,6 +354,8 @@ namespace vk
if (tex.is_locked())
tex.unprotect();
tex.release_dma_resources();
}
m_temporary_image_view.clear();
@ -302,6 +526,73 @@ namespace vk
return view;
}
void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height)
{
cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1);
region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false);
if (!region.is_locked())
{
region.reset(memory_address, memory_size);
region.protect(utils::protection::no);
region.set_dirty(false);
texture_cache_range = region.get_min_max(texture_cache_range);
}
}
void flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
{
cached_texture_section* region = find_flushable_section(memory_address, memory_size);
//TODO: Make this an assertion
if (region == nullptr)
{
LOG_ERROR(RSX, "Failed to find section for render target 0x%X + 0x%X", memory_address, memory_size);
return;
}
region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue);
}
bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue)
{
if (address < texture_cache_range.first ||
address > texture_cache_range.second)
return false;
bool response = false;
std::pair<u32, u32> trampled_range = std::make_pair(0xffffffff, 0x0);
for (int i = 0; i < m_cache.size(); ++i)
{
auto &tex = m_cache[i];
if (tex.is_dirty()) continue;
if (!tex.is_flushable()) continue;
auto overlapped = tex.overlaps_page(trampled_range, address);
if (std::get<0>(overlapped))
{
auto &new_range = std::get<1>(overlapped);
if (new_range.first != trampled_range.first ||
new_range.second != trampled_range.second)
{
trampled_range = new_range;
i = 0;
}
//TODO: Map basic host_visible memory without coherent constraint
tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue);
tex.set_dirty(true);
response = true;
}
}
return response;
}
bool invalidate_address(u32 address)
{
if (address < texture_cache_range.first ||

View File

@ -1,5 +1,7 @@
#pragma once
#include "gcm_enums.h"
extern "C"
{
#include <libavutil/pixfmt.h>
@ -7,6 +9,31 @@ extern "C"
namespace rsx
{
//Holds information about a framebuffer
struct gcm_framebuffer_info
{
u32 address = 0;
u32 pitch = 0;
bool is_depth_surface;
rsx::surface_color_format color_format;
rsx::surface_depth_format depth_format;
u16 width;
u16 height;
gcm_framebuffer_info()
{
address = 0;
pitch = 0;
}
gcm_framebuffer_info(const u32 address_, const u32 pitch_, bool is_depth_, const rsx::surface_color_format fmt_, const rsx::surface_depth_format dfmt_, const u16 w, const u16 h)
:address(address_), pitch(pitch_), is_depth_surface(is_depth_), color_format(fmt_), depth_format(dfmt_), width(w), height(h)
{}
};
template<typename T>
void pad_texture(void* input_pixels, void* output_pixels, u16 input_width, u16 input_height, u16 output_width, u16 output_height)
{