mirror of https://github.com/RPCS3/rpcs3.git
rsx/vk: Support ingesting pre-processed GPU data as texture input
This commit is contained in:
parent
bef7d793d3
commit
ca054644ef
|
@ -75,6 +75,7 @@ namespace vk
|
||||||
upload_contents_async = 1,
|
upload_contents_async = 1,
|
||||||
initialize_image_layout = 2,
|
initialize_image_layout = 2,
|
||||||
preserve_image_layout = 4,
|
preserve_image_layout = 4,
|
||||||
|
source_is_gpu_resident = 8,
|
||||||
|
|
||||||
// meta-flags
|
// meta-flags
|
||||||
upload_contents_inline = 0,
|
upload_contents_inline = 0,
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
#include "VKCompute.h"
|
||||||
|
#include "VKDMA.h"
|
||||||
#include "VKRenderTargets.h"
|
#include "VKRenderTargets.h"
|
||||||
#include "VKResourceManager.h"
|
#include "VKResourceManager.h"
|
||||||
#include "Emu/RSX/rsx_methods.h"
|
#include "Emu/RSX/rsx_methods.h"
|
||||||
|
@ -681,32 +683,75 @@ namespace vk
|
||||||
subres.depth = 1;
|
subres.depth = 1;
|
||||||
subres.data = { vm::get_super_ptr<const std::byte>(base_addr), static_cast<std::span<const std::byte>::size_type>(rsx_pitch * surface_height * samples_y) };
|
subres.data = { vm::get_super_ptr<const std::byte>(base_addr), static_cast<std::span<const std::byte>::size_type>(rsx_pitch * surface_height * samples_y) };
|
||||||
|
|
||||||
// FIXME: Move to GPU queue
|
|
||||||
std::vector<std::byte> ext_data;
|
|
||||||
const auto range = get_memory_range();
|
const auto range = get_memory_range();
|
||||||
|
rsx::flags32_t upload_flags = upload_contents_inline;
|
||||||
|
u32 heap_align = rsx_pitch;
|
||||||
|
|
||||||
if (auto region = rsx::get_current_renderer()->get_tiled_memory_region(range))
|
if (auto tiled_region = rsx::get_current_renderer()->get_tiled_memory_region(range))
|
||||||
{
|
{
|
||||||
auto real_data = vm::get_super_ptr<u8>(range.start);
|
const auto available_tile_size = tiled_region.tile->size - (range.start - tiled_region.base_address);
|
||||||
ext_data.resize(region.tile->size);
|
const auto max_content_size = tiled_region.tile->pitch * utils::align<u32>(subres.height_in_block, 64);
|
||||||
rsx::tile_texel_data<u32, true>(
|
const auto section_length = std::min(max_content_size, available_tile_size);
|
||||||
ext_data.data(),
|
|
||||||
real_data,
|
const auto dma_mapping = vk::map_dma(range.start, section_length);
|
||||||
region.base_address,
|
const auto scratch_buf = vk::get_scratch_buffer(cmd, section_length * 3); // 0 = linear data, 1 = padding (deswz), 2 = tiled data
|
||||||
range.start - region.base_address,
|
const auto tiled_data_scratch_offset = section_length * 2;
|
||||||
region.tile->size,
|
const auto linear_data_scratch_offset = 0;
|
||||||
region.tile->bank,
|
|
||||||
region.tile->pitch,
|
// Schedule the job
|
||||||
subres.width_in_block,
|
const RSX_detiler_config config =
|
||||||
subres.height_in_block
|
{
|
||||||
);
|
.tile_base_address = tiled_region.base_address,
|
||||||
subres.data = std::span(ext_data);
|
.tile_base_offset = range.start - tiled_region.base_address,
|
||||||
|
.tile_size = tiled_region.tile->size,
|
||||||
|
.tile_pitch = tiled_region.tile->pitch,
|
||||||
|
.bank = tiled_region.tile->bank,
|
||||||
|
|
||||||
|
.dst = scratch_buf,
|
||||||
|
.dst_offset = linear_data_scratch_offset,
|
||||||
|
.src = scratch_buf,
|
||||||
|
.src_offset = section_length * 2,
|
||||||
|
|
||||||
|
.image_width = subres.width_in_block,
|
||||||
|
.image_height = subres.height_in_block,
|
||||||
|
.image_pitch = subres.width_in_block * static_cast<u32>(get_bpp()),
|
||||||
|
.image_bpp = get_bpp()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Transfer
|
||||||
|
VkBufferCopy copy_rgn
|
||||||
|
{
|
||||||
|
.srcOffset = dma_mapping.first,
|
||||||
|
.dstOffset = tiled_data_scratch_offset,
|
||||||
|
.size = section_length
|
||||||
|
};
|
||||||
|
vkCmdCopyBuffer(cmd, dma_mapping.second->value, scratch_buf->value, 1, ©_rgn);
|
||||||
|
|
||||||
|
// Barrier
|
||||||
|
vk::insert_buffer_memory_barrier(
|
||||||
|
cmd, scratch_buf->value, linear_data_scratch_offset, section_length,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||||
|
|
||||||
|
// Detile
|
||||||
|
vk::get_compute_task<vk::cs_tile_memcpy<RSX_detiler_op::decode>>()->run(cmd, config);
|
||||||
|
|
||||||
|
// Barrier
|
||||||
|
vk::insert_buffer_memory_barrier(
|
||||||
|
cmd, scratch_buf->value, linear_data_scratch_offset, subres.width_in_block * get_bpp() * subres.height_in_block,
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||||
|
VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT);
|
||||||
|
|
||||||
|
// FIXME: !!EVIL!!
|
||||||
|
subres.data = { scratch_buf, linear_data_scratch_offset };
|
||||||
|
upload_flags |= source_is_gpu_resident;
|
||||||
|
heap_align = subres.width_in_block * get_bpp();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]]
|
if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]]
|
||||||
{
|
{
|
||||||
push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||||
vk::upload_image(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline);
|
vk::upload_image(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, heap_align, upload_flags);
|
||||||
pop_layout(cmd);
|
pop_layout(cmd);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -735,7 +780,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load Cell data into temp buffer
|
// Load Cell data into temp buffer
|
||||||
vk::upload_image(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline);
|
vk::upload_image(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, heap_align, upload_flags);
|
||||||
|
|
||||||
// Write into final image
|
// Write into final image
|
||||||
if (content != final_dst)
|
if (content != final_dst)
|
||||||
|
|
|
@ -1009,13 +1009,19 @@ namespace vk
|
||||||
{
|
{
|
||||||
caps.supports_byteswap = (image_linear_size >= 1024);
|
caps.supports_byteswap = (image_linear_size >= 1024);
|
||||||
caps.supports_hw_deswizzle = caps.supports_byteswap;
|
caps.supports_hw_deswizzle = caps.supports_byteswap;
|
||||||
caps.supports_zero_copy = false;// caps.supports_byteswap;
|
caps.supports_zero_copy = caps.supports_byteswap;
|
||||||
caps.supports_vtc_decoding = false;
|
caps.supports_vtc_decoding = false;
|
||||||
check_caps = false;
|
check_caps = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto buf_allocator = [&]() -> std::tuple<void*, usz>
|
auto buf_allocator = [&]() -> std::tuple<void*, usz>
|
||||||
{
|
{
|
||||||
|
if (image_setup_flags & source_is_gpu_resident)
|
||||||
|
{
|
||||||
|
// We should never reach here, unless something is very wrong...
|
||||||
|
fmt::throw_exception("Cannot allocate CPU memory for GPU-only data");
|
||||||
|
}
|
||||||
|
|
||||||
// Map with extra padding bytes in case of realignment
|
// Map with extra padding bytes in case of realignment
|
||||||
offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8);
|
offset_in_upload_buffer = upload_heap.alloc<512>(image_linear_size + 8);
|
||||||
void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8);
|
void* mapped_buffer = upload_heap.map(offset_in_upload_buffer, image_linear_size + 8);
|
||||||
|
@ -1026,6 +1032,21 @@ namespace vk
|
||||||
opt = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
|
opt = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
|
||||||
upload_heap.unmap();
|
upload_heap.unmap();
|
||||||
|
|
||||||
|
if (image_setup_flags & source_is_gpu_resident)
|
||||||
|
{
|
||||||
|
// Read from GPU buf if the input is already uploaded.
|
||||||
|
auto [iobuf, io_offset] = layout.data.raw();
|
||||||
|
upload_buffer = static_cast<buffer*>(iobuf);
|
||||||
|
offset_in_upload_buffer = io_offset;
|
||||||
|
// Never upload. Data is already resident.
|
||||||
|
opt.require_upload = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Read from upload buffer
|
||||||
|
upload_buffer = upload_heap.heap.get();
|
||||||
|
}
|
||||||
|
|
||||||
copy_regions.push_back({});
|
copy_regions.push_back({});
|
||||||
auto& copy_info = copy_regions.back();
|
auto& copy_info = copy_regions.back();
|
||||||
copy_info.bufferOffset = offset_in_upload_buffer;
|
copy_info.bufferOffset = offset_in_upload_buffer;
|
||||||
|
@ -1038,8 +1059,6 @@ namespace vk
|
||||||
copy_info.imageSubresource.mipLevel = layout.level;
|
copy_info.imageSubresource.mipLevel = layout.level;
|
||||||
copy_info.bufferRowLength = upload_pitch_in_texel;
|
copy_info.bufferRowLength = upload_pitch_in_texel;
|
||||||
|
|
||||||
upload_buffer = upload_heap.heap.get();
|
|
||||||
|
|
||||||
if (opt.require_upload)
|
if (opt.require_upload)
|
||||||
{
|
{
|
||||||
ensure(!opt.deferred_cmds.empty());
|
ensure(!opt.deferred_cmds.empty());
|
||||||
|
@ -1117,7 +1136,7 @@ namespace vk
|
||||||
copy.size = copy_cmd.length;
|
copy.size = copy_cmd.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (upload_buffer != scratch_buf || offset_in_upload_buffer != scratch_offset)
|
||||||
{
|
{
|
||||||
buffer_copies.push_back({});
|
buffer_copies.push_back({});
|
||||||
auto& copy = buffer_copies.back();
|
auto& copy = buffer_copies.back();
|
||||||
|
@ -1163,7 +1182,7 @@ namespace vk
|
||||||
range_ptr += op.second;
|
range_ptr += op.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (!buffer_copies.empty())
|
||||||
{
|
{
|
||||||
vkCmdCopyBuffer(cmd2, upload_buffer->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
|
vkCmdCopyBuffer(cmd2, upload_buffer->value, scratch_buf->value, static_cast<u32>(buffer_copies.size()), buffer_copies.data());
|
||||||
}
|
}
|
||||||
|
|
|
@ -194,6 +194,7 @@ namespace vk
|
||||||
.src = working_buffer,
|
.src = working_buffer,
|
||||||
.src_offset = 0,
|
.src_offset = 0,
|
||||||
|
|
||||||
|
// TODO: Check interaction with anti-aliasing
|
||||||
.image_width = width,
|
.image_width = width,
|
||||||
.image_height = height,
|
.image_height = height,
|
||||||
.image_pitch = real_pitch,
|
.image_pitch = real_pitch,
|
||||||
|
|
|
@ -22,15 +22,22 @@ namespace rsx
|
||||||
mutable void* m_ptr = nullptr;
|
mutable void* m_ptr = nullptr;
|
||||||
mutable usz m_size = 0;
|
mutable usz m_size = 0;
|
||||||
|
|
||||||
std::function<std::tuple<void*, usz> ()> m_allocator = nullptr;
|
std::function<std::tuple<void*, usz>()> m_allocator{};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
io_buffer() = default;
|
io_buffer() = default;
|
||||||
|
|
||||||
|
io_buffer(const io_buffer& that)
|
||||||
|
{
|
||||||
|
m_ptr = that.m_ptr;
|
||||||
|
m_size = that.m_size;
|
||||||
|
m_allocator = that.m_allocator;
|
||||||
|
}
|
||||||
|
|
||||||
template <SpanLike T>
|
template <SpanLike T>
|
||||||
io_buffer(const T& container)
|
io_buffer(const T& container)
|
||||||
{
|
{
|
||||||
m_ptr = reinterpret_cast<void*>(container.data());
|
m_ptr = const_cast<void*>(reinterpret_cast<const void*>(container.data()));
|
||||||
m_size = container.size_bytes();
|
m_size = container.size_bytes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,6 +57,11 @@ namespace rsx
|
||||||
: m_ptr(const_cast<void*>(ptr)), m_size(size)
|
: m_ptr(const_cast<void*>(ptr)), m_size(size)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
std::pair<void*, usz> raw() const
|
||||||
|
{
|
||||||
|
return { m_ptr, m_size };
|
||||||
|
}
|
||||||
|
|
||||||
template <Integral T = u8>
|
template <Integral T = u8>
|
||||||
T* data() const
|
T* data() const
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue