rsx: Use variant based draw commands.

This commit is contained in:
vlj 2016-08-28 17:00:02 +02:00
parent 03c86ae43b
commit 8d54bcbc0d
8 changed files with 819 additions and 634 deletions

View File

@ -80,166 +80,6 @@ namespace
}
}
namespace
{
struct vertex_buffer_visitor
{
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_views;
vertex_buffer_visitor(u32 vtx_cnt, ID3D12GraphicsCommandList* cmdlst, ID3D12Resource* write_vertex_buffer,
d3d12_data_heap& heap)
: vertex_count(vtx_cnt)
, offset_in_vertex_buffers_buffer(0)
, m_buffer_data(heap)
, command_list(cmdlst)
, m_vertex_buffer_data(write_vertex_buffer)
{
}
void operator()(const rsx::vertex_array_buffer& vertex_array)
{
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
UINT buffer_size = element_size * vertex_count;
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void* mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
gsl::span<gsl::byte> mapped_buffer_span = {(gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)};
write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count, vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size);
vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_array.type, vertex_array.attribute_size, offset_in_vertex_buffers_buffer, buffer_size));
offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
//m_timers.buffer_upload_size += buffer_size;
}
void operator()(const rsx::vertex_array_register& vertex_register)
{
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
UINT buffer_size = element_size;
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void* mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
memcpy(mapped_buffer, vertex_register.data.data(), buffer_size);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer, m_buffer_data.get_heap(), heap_offset, buffer_size);
vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_register.type, vertex_register.attribute_size, offset_in_vertex_buffers_buffer, buffer_size));
offset_in_vertex_buffers_buffer = get_next_multiple_of<48>(offset_in_vertex_buffers_buffer + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
}
void operator()(const rsx::empty_vertex_array& vbo)
{
}
protected:
u32 vertex_count;
ID3D12GraphicsCommandList* command_list;
ID3D12Resource* m_vertex_buffer_data;
size_t offset_in_vertex_buffers_buffer;
d3d12_data_heap& m_buffer_data;
};
} // End anonymous namespace
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> D3D12GSRender::upload_vertex_attributes(
const std::vector<std::pair<u32, u32>>& vertex_ranges,
gsl::not_null<ID3D12GraphicsCommandList*> command_list)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST));
u32 vertex_count = get_vertex_count(vertex_ranges);
//verify(HERE), rsx::method_registers.vertex_data_base_index() == 0;
vertex_buffer_visitor visitor(vertex_count, command_list, m_vertex_buffer_data.Get(), m_buffer_data);
const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, vertex_ranges);
for (const auto& vbo : vertex_buffers)
std::apply_visitor(visitor, vbo);
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER));
return visitor.vertex_buffer_views;
}
namespace
{
std::tuple<std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>, size_t> upload_inlined_vertex_array(
gsl::span<const rsx::data_array_format_info, 16> vertex_attribute_infos,
gsl::span<const gsl::byte> inlined_array_raw_data,
d3d12_data_heap& ring_buffer_data,
ID3D12Resource* vertex_buffer_placement,
ID3D12GraphicsCommandList* command_list
)
{
// We can't rely on vertex_attribute_infos strides here so compute it
// assuming all attributes are packed
u32 stride = 0;
u32 initial_offsets[rsx::limits::vertex_count];
u8 index = 0;
for (const auto &info : vertex_attribute_infos)
{
initial_offsets[index++] = stride;
if (!info.size()) // disabled
continue;
stride += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
u32 element_count = ::narrow<u32>(inlined_array_raw_data.size_bytes()) / stride;
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> result;
UINT64 vertex_buffer_offset = 0;
index = 0;
for (const auto &info : vertex_attribute_infos)
{
if (!info.size())
{
index++;
continue;
}
u32 element_size = rsx::get_vertex_type_size_on_host(info.type(), info.size());
UINT buffer_size = element_size * element_count;
size_t heap_offset = ring_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = ring_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
gsl::span<gsl::byte> dst = { (gsl::byte*)mapped_buffer, buffer_size };
for (u32 i = 0; i < element_count; i++)
{
auto subdst = dst.subspan(i * element_size, element_size);
auto subsrc = inlined_array_raw_data.subspan(initial_offsets[index] + (i * stride), element_size);
if (info.type() == rsx::vertex_base_type::ub && info.size() == 4)
{
subdst[0] = subsrc[3];
subdst[1] = subsrc[2];
subdst[2] = subsrc[1];
subdst[3] = subsrc[0];
}
else
{
std::copy(subsrc.begin(), subsrc.end(), subdst.begin());
}
}
ring_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(vertex_buffer_placement, vertex_buffer_offset, ring_buffer_data.get_heap(), heap_offset, buffer_size);
result.emplace_back(get_vertex_attribute_srv(info, vertex_buffer_offset, buffer_size));
vertex_buffer_offset = get_next_multiple_of<48>(vertex_buffer_offset + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
index++;
}
return std::make_tuple(result, element_count);
}
}
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
{
@ -309,106 +149,335 @@ D3D12_CONSTANT_BUFFER_VIEW_DESC D3D12GSRender::upload_fragment_shader_constants(
};
}
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer_for_emulated_primitives_array(const std::vector<std::pair<u32, u32> > &vertex_ranges)
namespace
{
size_t index_count = 0;
for (const auto &pair : vertex_ranges)
index_count += get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second);
struct vertex_buffer_visitor
{
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_views;
vertex_buffer_visitor(u32 vtx_cnt, ID3D12GraphicsCommandList* cmdlst,
ID3D12Resource* write_vertex_buffer, d3d12_data_heap& heap)
: vertex_count(vtx_cnt), offset_in_vertex_buffers_buffer(0), m_buffer_data(heap),
command_list(cmdlst), m_vertex_buffer_data(write_vertex_buffer)
{
}
void operator()(const rsx::vertex_array_buffer& vertex_array)
{
u32 element_size =
rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
UINT buffer_size = element_size * vertex_count;
size_t heap_offset =
m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void* mapped_buffer =
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
gsl::span<gsl::byte> mapped_buffer_span = {
(gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)};
write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count,
vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer,
m_buffer_data.get_heap(), heap_offset, buffer_size);
vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_array.type,
vertex_array.attribute_size, offset_in_vertex_buffers_buffer, buffer_size));
offset_in_vertex_buffers_buffer =
get_next_multiple_of<48>(offset_in_vertex_buffers_buffer +
buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
// m_timers.buffer_upload_size += buffer_size;
}
void operator()(const rsx::vertex_array_register& vertex_register)
{
u32 element_size = rsx::get_vertex_type_size_on_host(
vertex_register.type, vertex_register.attribute_size);
UINT buffer_size = element_size;
size_t heap_offset =
m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void* mapped_buffer =
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
memcpy(mapped_buffer, vertex_register.data.data(), buffer_size);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(m_vertex_buffer_data, offset_in_vertex_buffers_buffer,
m_buffer_data.get_heap(), heap_offset, buffer_size);
vertex_buffer_views.emplace_back(get_vertex_attribute_srv(vertex_register.type,
vertex_register.attribute_size, offset_in_vertex_buffers_buffer, buffer_size));
offset_in_vertex_buffers_buffer =
get_next_multiple_of<48>(offset_in_vertex_buffers_buffer +
buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
}
void operator()(const rsx::empty_vertex_array& vbo)
{
}
protected:
u32 vertex_count;
ID3D12GraphicsCommandList* command_list;
ID3D12Resource* m_vertex_buffer_data;
size_t offset_in_vertex_buffers_buffer;
d3d12_data_heap& m_buffer_data;
};
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array(
const std::vector<std::pair<u32, u32>>& vertex_ranges, d3d12_data_heap& m_buffer_data)
{
size_t index_count = std::accumulate(
vertex_ranges.begin(), vertex_ranges.end(), 0, [](size_t acc, const auto& pair) {
return acc + get_index_count(
rsx::method_registers.current_draw_clause.primitive, pair.second);
});
// Alloc
size_t buffer_size = align(index_count * sizeof(u16), 64);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset =
m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
void* mapped_buffer =
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
size_t first = 0;
for (const auto &pair : vertex_ranges)
{
size_t element_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second);
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, (u32)first, (u32)pair.second);
for (const auto& pair : vertex_ranges) {
size_t element_count =
get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.second);
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer,
rsx::method_registers.current_draw_clause.primitive, (u32)first, (u32)pair.second);
mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16);
first += pair.second;
}
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
DXGI_FORMAT_R16_UINT
};
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size,
DXGI_FORMAT_R16_UINT};
return std::make_tuple(index_buffer_view, index_count);
}
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list)
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
size_t vertex_count;
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_view;
std::tie(vertex_buffer_view, vertex_count) =
upload_inlined_vertex_array(rsx::method_registers.vertex_arrays_info,
{(const gsl::byte*)rsx::method_registers.current_draw_clause.inline_vertex_array.data(),
::narrow<int>(rsx::method_registers.current_draw_clause.inline_vertex_array.size() *
sizeof(uint))},
m_buffer_data, m_vertex_buffer_data.Get(), command_list);
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
rsx::vertex_array_register, rsx::empty_vertex_array>>;
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
return std::make_tuple(false, vertex_count, vertex_buffer_view);
/**
* Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges.
* A range in vertex_range is a pair whose first element is the index of the beginning of the
* range, and whose second element is the number of vertex in this range.
*/
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes(
const std::vector<std::pair<u32, u32>>& vertex_ranges,
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)>
get_vertex_buffers,
ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data,
gsl::not_null<ID3D12GraphicsCommandList*> command_list)
{
command_list->ResourceBarrier(1,
&CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data,
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST));
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
size_t index_count;
std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array({ { 0, (u32)vertex_count } });
command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, vertex_buffer_view);
u32 vertex_count = get_vertex_count(vertex_ranges);
verify(HERE), rsx::method_registers.vertex_data_base_index() == 0;
vertex_buffer_visitor visitor(
vertex_count, command_list, m_vertex_buffer_data, m_buffer_data);
const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, vertex_ranges);
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
command_list->ResourceBarrier(1,
&CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER));
return visitor.vertex_buffer_views;
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
std::tuple<std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>, size_t> upload_inlined_vertex_array(
gsl::span<const rsx::data_array_format_info, 16> vertex_attribute_infos,
gsl::span<const gsl::byte> inlined_array_raw_data, d3d12_data_heap& ring_buffer_data,
ID3D12Resource* vertex_buffer_placement, ID3D12GraphicsCommandList* command_list)
{
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
// We can't rely on vertex_attribute_infos strides here so compute it
// assuming all attributes are packed
u32 stride = 0;
u32 initial_offsets[rsx::limits::vertex_count];
u8 index = 0;
for (const auto& info : vertex_attribute_infos) {
initial_offsets[index++] = stride;
if (!info.size()) // disabled
continue;
stride += rsx::get_vertex_type_size_on_host(info.type(), info.size());
}
u32 element_count = ::narrow<u32>(inlined_array_raw_data.size_bytes()) / stride;
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> result;
UINT64 vertex_buffer_offset = 0;
index = 0;
for (const auto& info : vertex_attribute_infos) {
if (!info.size()) {
index++;
continue;
}
u32 element_size = rsx::get_vertex_type_size_on_host(info.type(), info.size());
UINT buffer_size = element_size * element_count;
size_t heap_offset =
ring_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void* mapped_buffer =
ring_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
gsl::span<gsl::byte> dst = {(gsl::byte*)mapped_buffer, buffer_size};
for (u32 i = 0; i < element_count; i++) {
auto subdst = dst.subspan(i * element_size, element_size);
auto subsrc = inlined_array_raw_data.subspan(
initial_offsets[index] + (i * stride), element_size);
if (info.type() == rsx::vertex_base_type::ub && info.size() == 4) {
subdst[0] = subsrc[3];
subdst[1] = subsrc[2];
subdst[2] = subsrc[1];
subdst[3] = subsrc[0];
}
else
{
size_t vertex_count = get_vertex_count(rsx::method_registers.current_draw_clause.first_count_commands);
return std::make_tuple(false, vertex_count, upload_vertex_attributes(rsx::method_registers.current_draw_clause.first_count_commands, command_list));
std::copy(subsrc.begin(), subsrc.end(), subdst.begin());
}
}
ring_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->CopyBufferRegion(vertex_buffer_placement, vertex_buffer_offset,
ring_buffer_data.get_heap(), heap_offset, buffer_size);
result.emplace_back(get_vertex_attribute_srv(info, vertex_buffer_offset, buffer_size));
vertex_buffer_offset = get_next_multiple_of<48>(
vertex_buffer_offset + buffer_size); // 48 is multiple of 2, 4, 6, 8, 12, 16
index++;
}
return std::make_tuple(result, element_count);
}
struct draw_command_visitor
{
draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data,
ID3D12Resource* vertex_buffer_data,
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_info_lambda)
: command_list(cmd_list), m_buffer_data(buffer_data),
m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda)
{
}
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()(
const rsx::draw_array_command& command)
{
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
size_t vertex_count = get_vertex_count(command.indexes_range);
return std::make_tuple(false, vertex_count,
upload_vertex_attributes(command.indexes_range, get_vertex_buffers,
m_vertex_buffer_data, m_buffer_data, command_list));
}
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
size_t index_count;
std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array(rsx::method_registers.current_draw_clause.first_count_commands);
std::tie(index_buffer_view, index_count) =
generate_index_buffer_for_emulated_primitives_array(
command.indexes_range, m_buffer_data);
command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, upload_vertex_attributes(rsx::method_registers.current_draw_clause.first_count_commands, command_list));
return std::make_tuple(true, index_count,
upload_vertex_attributes(command.indexes_range, get_vertex_buffers,
m_vertex_buffer_data, m_buffer_data, command_list));
}
verify(HERE), rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed;
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()(
const rsx::draw_indexed_array_command& command)
{
// Index count
size_t index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, ::narrow<int>(get_vertex_count(rsx::method_registers.current_draw_clause.first_count_commands)));
size_t index_count =
get_index_count(rsx::method_registers.current_draw_clause.primitive,
::narrow<int>(get_vertex_count(command.ranges_to_fetch_in_index_buffer)));
rsx::index_array_type indexed_type = rsx::method_registers.index_type();
size_t index_size = get_index_type_size(indexed_type);
// Alloc
size_t buffer_size = align(index_count * index_size, 64);
size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset =
m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
void* mapped_buffer =
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
u32 min_index, max_index;
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(mapped_buffer), ::narrow<u32>(buffer_size) };
gsl::span<gsl::byte> dst{
reinterpret_cast<gsl::byte*>(mapped_buffer), ::narrow<u32>(buffer_size)};
std::tie(min_index, max_index) = write_index_array_data_to_buffer(dst, get_raw_index_array(rsx::method_registers.current_draw_clause.first_count_commands),
indexed_type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.first_count_commands,
std::tie(min_index, max_index) =
write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type,
rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(), command.ranges_to_fetch_in_index_buffer,
[](auto prim) { return !is_primitive_native(prim); });
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size,
get_index_type(indexed_type)
};
m_timers.buffer_upload_size += buffer_size;
m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset, (UINT)buffer_size,
get_index_type(indexed_type)};
// m_timers.buffer_upload_size += buffer_size;
command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, upload_vertex_attributes({ std::make_pair(0, max_index + 1) }, command_list));
return std::make_tuple(true, index_count,
upload_vertex_attributes({std::make_pair(0, max_index + 1)}, get_vertex_buffers,
m_vertex_buffer_data, m_buffer_data, command_list));
}
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()(
const rsx::draw_inlined_array& command)
{
size_t vertex_count;
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> vertex_buffer_view;
std::tie(vertex_buffer_view, vertex_count) =
upload_inlined_vertex_array(rsx::method_registers.vertex_arrays_info,
{(const gsl::byte*)command.inline_vertex_array.data(),
::narrow<int>(command.inline_vertex_array.size() * sizeof(uint))},
m_buffer_data, m_vertex_buffer_data, command_list);
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
return std::make_tuple(false, vertex_count, vertex_buffer_view);
D3D12_INDEX_BUFFER_VIEW index_buffer_view;
size_t index_count;
std::tie(index_buffer_view, index_count) =
generate_index_buffer_for_emulated_primitives_array(
{{0, (u32)vertex_count}}, m_buffer_data);
command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, vertex_buffer_view);
}
private:
ID3D12GraphicsCommandList* command_list;
d3d12_data_heap& m_buffer_data;
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers;
ID3D12Resource* m_vertex_buffer_data;
};
} // End anonymous namespace
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>>
D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* command_list)
{
return std::apply_visitor(
draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(),
[this](
const auto& state, const auto& list) { return get_vertex_buffers(state, list); }),
get_draw_command(rsx::method_registers));
}
#endif

View File

@ -141,16 +141,6 @@ private:
*/
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> > upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list);
/**
* Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges.
* A range in vertex_range is a pair whose first element is the index of the beginning of the
* range, and whose second element is the number of vertex in this range.
*/
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes(const std::vector<std::pair<u32, u32> > &vertex_ranges,
gsl::not_null<ID3D12GraphicsCommandList*> command_list);
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array(const std::vector<std::pair<u32, u32> > &vertex_ranges);
void upload_and_bind_scale_offset_matrix(size_t descriptor_index);
void upload_and_bind_vertex_shader_constants(size_t descriptor_index);
D3D12_CONSTANT_BUFFER_VIEW_DESC upload_fragment_shader_constants();

View File

@ -60,11 +60,6 @@ private:
// Return element to draw and in case of indexed draw index type and offset in index buffer
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > set_vertex_buffer();
void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size, const u32& texture_index_offset);
// Returns vertex count
u32 upload_inline_array(const u32 &max_vertex_attrib_size, const u32 &texture_index_offset);
public:
bool load_program();
void init_buffers(bool skip_reading = false);

View File

@ -196,100 +196,12 @@ namespace
}
throw;
}
}
std::tuple<u32, std::optional<std::tuple<GLenum, u32> > > GLGSRender::set_vertex_buffer()
{
//initialize vertex attributes
//merge all vertex arrays
static const u32 texture_index_offset = rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
u32 min_index = 0, max_index = 0;
u32 max_vertex_attrib_size = 0;
u32 vertex_or_index_count;
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{
if (rsx::method_registers.vertex_arrays_info[index].size() || rsx::method_registers.register_vertex_info[index].size)
{
max_vertex_attrib_size += 16;
}
}
std::optional<std::tuple<GLenum, u32> > index_info;
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
rsx::index_array_type type = rsx::method_registers.index_type();
u32 type_size = ::narrow<u32>(get_index_type_size(type));
vertex_or_index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.get_elements_count());
u32 max_size = vertex_or_index_count * type_size;
auto mapping = m_index_ring_buffer.alloc_and_map(max_size);
void *ptr = mapping.first;
u32 offset_in_index_buffer = mapping.second;
std::tie(min_index, max_index, vertex_or_index_count) = upload_index_buffer(get_raw_index_array(rsx::method_registers.current_draw_clause.first_count_commands), ptr, type, rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.first_count_commands, vertex_or_index_count);
min_index = 0; // we must keep index to vertex mapping
m_index_ring_buffer.unmap();
index_info = std::make_tuple(get_index_type(type), offset_in_index_buffer);
}
else
{
u32 vertex_count;
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
// We need to go through array to determine vertex count so upload it
vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset);
}
else
{
assert(rsx::method_registers.current_draw_clause.command == rsx::draw_command::array);
vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
max_index = vertex_count - 1 + min_index;
}
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive))
{
u32 offset_in_index_buffer;
std::tie(vertex_or_index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(rsx::method_registers.current_draw_clause.first_count_commands, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
index_info = std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer);
}
else
{
vertex_or_index_count = vertex_count;
}
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
// Already uploaded when determining vertex count, we can return here
return std::make_tuple(vertex_or_index_count, index_info);
}
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now - then).count();
return std::make_tuple(vertex_or_index_count, index_info);
}
namespace
{
struct vertex_buffer_visitor
{
vertex_buffer_visitor(u32 vtx_cnt,
u32 texture_idx_offset,
gl::ring_buffer& heap, gl::glsl::program* prog,
gl::texture* attrib_buffer,
u32 min_texbuffer_offset)
vertex_buffer_visitor(u32 vtx_cnt, u32 texture_idx_offset, gl::ring_buffer& heap,
gl::glsl::program* prog, gl::texture* attrib_buffer, u32 min_texbuffer_offset)
: vertex_count(vtx_cnt)
, m_attrib_ring_info(heap)
, m_program(prog)
@ -339,7 +251,7 @@ namespace
{
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
const u32 gl_type = to_gl_internal_type(vertex_register.type, vertex_register.attribute_size);
const size_t data_size = element_size;
const u32 data_size = element_size;
auto& texture = m_gl_attrib_buffers[vertex_register.index];
@ -378,39 +290,154 @@ namespace
GLint m_min_texbuffer_alignment;
};
} // End anonymous namespace
struct draw_command_visitor
{
using attribute_storage = std::vector<
std::variant<rsx::vertex_array_buffer, rsx::vertex_array_register, rsx::empty_vertex_array>>;
void GLGSRender::upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size, const u32& texture_index_offset)
draw_command_visitor(gl::ring_buffer& index_ring_buffer, gl::ring_buffer& attrib_ring_buffer,
gl::texture* gl_attrib_buffers, gl::glsl::program* program, GLint min_texbuffer_alignment,
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)> gvb)
: m_index_ring_buffer(index_ring_buffer)
, m_attrib_ring_buffer(attrib_ring_buffer)
, m_gl_attrib_buffers(gl_attrib_buffers)
, m_program(program)
, m_min_texbuffer_alignment(min_texbuffer_alignment)
, get_vertex_buffers(gvb)
{
for (u8 index = 0; index < rsx::limits::vertex_count; ++index) {
if (rsx::method_registers.vertex_arrays_info[index].size() ||
rsx::method_registers.register_vertex_info[index].size)
{
max_vertex_attrib_size += 16;
}
}
}
std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> operator()(
const rsx::draw_array_command& command)
{
u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
u32 min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
u32 max_index = vertex_count - 1 + min_index;
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
u32 index_count;
u32 offset_in_index_buffer;
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
rsx::method_registers.current_draw_clause.first_count_commands,
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
// std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
// m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now -
//then).count();
return std::make_tuple(index_count,
std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer));
}
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
// std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
// m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(now -
//then).count();
return std::make_tuple(vertex_count, std::optional<std::tuple<GLenum, u32>>());
}
std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> operator()(
const rsx::draw_indexed_array_command& command)
{
u32 min_index = 0, max_index = 0;
u32 max_vertex_attrib_size = 0;
rsx::index_array_type type = rsx::method_registers.index_type();
u32 type_size = ::narrow<u32>(get_index_type_size(type));
u32 index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.current_draw_clause.get_elements_count());
u32 max_size = index_count * type_size;
auto mapping = m_index_ring_buffer.alloc_and_map(max_size);
void* ptr = mapping.first;
u32 offset_in_index_buffer = mapping.second;
u32 expanded_index_count;
std::tie(min_index, max_index, expanded_index_count) = upload_index_buffer(
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.current_draw_clause.first_count_commands, index_count);
min_index = 0; // we must keep index to vertex mapping
m_index_ring_buffer.unmap();
upload_vertex_buffers(min_index, max_index, max_vertex_attrib_size, texture_index_offset);
return std::make_tuple(
expanded_index_count, std::make_tuple(get_index_type(type), offset_in_index_buffer));
}
std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> operator()(
const rsx::draw_inlined_array& command)
{
// We need to go through array to determine vertex count so upload it
u32 vertex_count = upload_inline_array(max_vertex_attrib_size, texture_index_offset);
if (!gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
u32 offset_in_index_buffer;
u32 index_count;
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
rsx::method_registers.current_draw_clause.first_count_commands,
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
return std::make_tuple(index_count,
std::make_tuple(static_cast<GLenum>(GL_UNSIGNED_SHORT), offset_in_index_buffer));
}
return std::make_tuple(vertex_count, std::optional<std::tuple<GLenum, u32>>());
}
private:
const u32 texture_index_offset =
rsx::limits::fragment_textures_count + rsx::limits::vertex_textures_count;
u32 max_vertex_attrib_size = 0;
gl::ring_buffer& m_index_ring_buffer;
gl::ring_buffer& m_attrib_ring_buffer;
gl::texture* m_gl_attrib_buffers;
gl::glsl::program* m_program;
GLint m_min_texbuffer_alignment;
std::function<attribute_storage(rsx::rsx_state, std::vector<std::pair<u32, u32>>)>
get_vertex_buffers;
void upload_vertex_buffers(u32 min_index, u32 max_index, const u32& max_vertex_attrib_size,
const u32& texture_index_offset)
{
u32 verts_allocated = max_index - min_index + 1;
__glcheck m_attrib_ring_buffer.reserve_and_map(verts_allocated * max_vertex_attrib_size);
// Disable texture then reenable them
// Is it really necessary ?
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
int location;
if (!m_program->uniforms.has_location(s_reg_table[index], &location))
continue;
if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue;
glActiveTexture(GL_TEXTURE0 + index + texture_index_offset);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glProgramUniform1i(m_program->id(), location, index + texture_index_offset);
continue;
}
vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer, m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment);
const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, { {min_index, verts_allocated } });
for (const auto& vbo : vertex_buffers)
std::apply_visitor(visitor, vbo);
vertex_buffer_visitor visitor(verts_allocated, texture_index_offset, m_attrib_ring_buffer,
m_program, m_gl_attrib_buffers, m_min_texbuffer_alignment);
const auto& vertex_buffers =
get_vertex_buffers(rsx::method_registers, {{min_index, verts_allocated}});
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
m_attrib_ring_buffer.unmap();
}
u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32 &texture_index_offset)
u32 upload_inline_array(const u32& max_vertex_attrib_size, const u32& texture_index_offset)
{
u32 stride = 0;
u32 offsets[rsx::limits::vertex_count] = {0};
for (u32 i = 0; i < rsx::limits::vertex_count; ++i)
{
for (u32 i = 0; i < rsx::limits::vertex_count; ++i) {
const auto& info = rsx::method_registers.vertex_arrays_info[i];
if (!info.size()) continue;
@ -423,13 +450,11 @@ u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32
stride;
m_attrib_ring_buffer.reserve_and_map(vertex_draw_count * max_vertex_attrib_size);
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
int location;
if (!m_program->uniforms.has_location(s_reg_table[index], &location))
continue;
if (!m_program->uniforms.has_location(s_reg_table[index], &location)) continue;
if (!vertex_info.size()) // disabled, bind a null sampler
{
@ -439,7 +464,8 @@ u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32
continue;
}
const u32 element_size = rsx::get_vertex_type_size_on_host(vertex_info.type(), vertex_info.size());
const u32 element_size =
rsx::get_vertex_type_size_on_host(vertex_info.type(), vertex_info.size());
u32 data_size = element_size * vertex_draw_count;
u32 gl_type = to_gl_internal_type(vertex_info.type(), vertex_info.size());
@ -454,10 +480,8 @@ u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32
prepare_buffer_for_writing(dst, vertex_info.type(), vertex_info.size(), vertex_draw_count);
// TODO: properly handle compressed data
for (u32 i = 0; i < vertex_draw_count; ++i)
{
if (vertex_info.type() == rsx::vertex_base_type::ub && vertex_info.size() == 4)
{
for (u32 i = 0; i < vertex_draw_count; ++i) {
if (vertex_info.type() == rsx::vertex_base_type::ub && vertex_info.size() == 4) {
dst[0] = src[3];
dst[1] = src[2];
dst[2] = src[1];
@ -478,3 +502,20 @@ u32 GLGSRender::upload_inline_array(const u32 &max_vertex_attrib_size, const u32
}
return vertex_draw_count;
}
};
}
std::tuple<u32, std::optional<std::tuple<GLenum, u32>>> GLGSRender::set_vertex_buffer()
{
std::chrono::time_point<std::chrono::system_clock> then = std::chrono::system_clock::now();
return std::apply_visitor(draw_command_visitor(m_index_ring_buffer, m_attrib_ring_buffer,
m_gl_attrib_buffers, m_program, m_min_texbuffer_alignment,
[this](const auto& state, const auto& list) {
return this->get_vertex_buffers(state, list);
}),
get_draw_command(rsx::method_registers));
}
namespace
{
} // End anonymous namespace

View File

@ -636,6 +636,29 @@ namespace rsx
return result;
}
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
thread::get_draw_command(const rsx::rsx_state& state) const
{
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array) {
return draw_array_command{
rsx::method_registers.current_draw_clause.first_count_commands};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed) {
return draw_indexed_array_command{
rsx::method_registers.current_draw_clause.first_count_commands,
get_raw_index_array(
rsx::method_registers.current_draw_clause.first_count_commands)};
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array) {
return draw_inlined_array{
rsx::method_registers.current_draw_clause.inline_vertex_array};
}
fmt::throw_exception("ill-formed draw command" HERE);
}
void thread::do_internal_task()
{
if (m_internal_tasks.empty())

View File

@ -139,6 +139,29 @@ namespace rsx
u8 index;
};
struct draw_array_command
{
/**
* First and count of index subranges.
*/
std::vector<std::pair<u32, u32>> indexes_range;
};
struct draw_indexed_array_command
{
/**
* First and count of subranges to fetch in index buffer.
*/
std::vector<std::pair<u32, u32>> ranges_to_fetch_in_index_buffer;
gsl::span<const gsl::byte> raw_index_buffer;
};
struct draw_inlined_array
{
std::vector<u32> inline_vertex_array;
};
class thread : public named_thread
{
std::shared_ptr<thread_ctrl> m_vblank_thread;
@ -231,6 +254,8 @@ namespace rsx
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> get_vertex_buffers(const rsx::rsx_state& state, const std::vector<std::pair<u32, u32>>& vertex_ranges) const;
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const;
private:
std::mutex m_mtx_task;

View File

@ -87,24 +87,6 @@ private:
void prepare_rtts();
/// returns primitive topology, is_indexed, index_count, offset in index buffer, index type
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > > upload_vertex_data();
void upload_vertex_buffers(u32 min_index, u32 vertex_max_index);
/// returns number of vertex drawn
u32 upload_inlined_array();
/**
* Upload index (and expands it if primitive type is not natively supported).
* Returns min index, max index, index_count, and (offset_in_index_buffer, index_type)
*/
std::tuple<u32, u32, u32, std::tuple<VkDeviceSize, VkIndexType>> upload_index_buffer(const rsx::draw_clause &clause);
/**
* Creates and fills an index buffer emulating unsupported primitive type.
* Returns index_count and (offset_in_index_buffer, index_type)
*/
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType> > generate_emulating_index_buffer(const rsx::draw_clause &clause, u32 vertex_count);
public:
bool load_program();
void init_buffers(bool skip_reading = false);

View File

@ -217,81 +217,44 @@ namespace
"in_tc0_buffer", "in_tc1_buffer", "in_tc2_buffer", "in_tc3_buffer",
"in_tc4_buffer", "in_tc5_buffer", "in_tc6_buffer", "in_tc7_buffer"
};
/**
* Creates and fills an index buffer emulating unsupported primitive type.
* Returns index_count and (offset_in_index_buffer, index_type)
*/
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType>> generate_emulating_index_buffer(
const rsx::draw_clause& clause, u32 vertex_count,
vk::vk_data_heap& m_index_buffer_ring_info)
{
u32 index_count = get_index_count(clause.primitive, vertex_count);
u32 upload_size = index_count * sizeof(u16);
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
reinterpret_cast<char*>(buf), clause.primitive, 0, vertex_count);
m_index_buffer_ring_info.unmap();
return std::make_tuple(
index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16));
}
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType> > >
VKGSRender::upload_vertex_data()
{
u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
u32 min_index, max_index;
bool is_indexed_draw = (rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed);
u32 index_count = 0;
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info;
if (is_indexed_draw)
{
std::tie(min_index, max_index, index_count, index_info) = upload_index_buffer(rsx::method_registers.current_draw_clause);
min_index = 0; // We need correct index mapping
}
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array)
{
if (primitives_emulated)
{
std::tie(index_count, index_info) = generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, rsx::method_registers.current_draw_clause.get_elements_count());
}
else
{
index_count = rsx::method_registers.current_draw_clause.get_elements_count();
}
min_index = rsx::method_registers.current_draw_clause.first_count_commands.front().first;
max_index = rsx::method_registers.current_draw_clause.get_elements_count() + min_index;
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::inlined_array)
{
index_count = upload_inlined_array();
if (primitives_emulated)
{
std::tie(index_count, index_info) = generate_emulating_index_buffer(rsx::method_registers.current_draw_clause, index_count);
}
}
if (rsx::method_registers.current_draw_clause.command == rsx::draw_command::array || rsx::method_registers.current_draw_clause.command == rsx::draw_command::indexed)
{
upload_vertex_buffers(min_index, max_index);
}
return std::make_tuple(prims, index_count, index_info);
}
namespace
{
struct vertex_buffer_visitor
{
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev,
vk::vk_data_heap& heap, vk::glsl::program* prog,
VkDescriptorSet desc_set,
vertex_buffer_visitor(u32 vtx_cnt, VkDevice dev, vk::vk_data_heap& heap,
vk::glsl::program* prog, VkDescriptorSet desc_set,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean)
: vertex_count(vtx_cnt)
, m_attrib_ring_info(heap)
, device(dev)
, m_program(prog)
, descriptor_sets(desc_set)
, m_buffer_view_to_clean(buffer_view_to_clean)
: vertex_count(vtx_cnt), m_attrib_ring_info(heap), device(dev), m_program(prog),
descriptor_sets(desc_set), m_buffer_view_to_clean(buffer_view_to_clean)
{
}
void operator()(const rsx::vertex_array_buffer& vertex_array)
{
// Fill vertex_array
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
u32 element_size =
rsx::get_vertex_type_size_on_host(vertex_array.type, vertex_array.attribute_size);
u32 real_element_size = vk::get_suitable_vk_size(vertex_array.type, vertex_array.attribute_size);
u32 upload_size = real_element_size * vertex_count;
@ -317,14 +280,16 @@ namespace
{
case rsx::vertex_base_type::f:
{
size_t data_size = rsx::get_vertex_type_size_on_host(vertex_register.type, vertex_register.attribute_size);
size_t data_size = rsx::get_vertex_type_size_on_host(
vertex_register.type, vertex_register.attribute_size);
const VkFormat format = vk::get_suitable_vk_format(vertex_register.type, vertex_register.attribute_size);
u32 offset_in_attrib_buffer = 0;
if (vk::requires_component_expansion(vertex_register.type, vertex_register.attribute_size))
{
const u32 num_stored_verts = static_cast<u32>(data_size / (sizeof(float) * vertex_register.attribute_size));
const u32 num_stored_verts = static_cast<u32>(
data_size / (sizeof(float) * vertex_register.attribute_size));
const u32 real_element_size = vk::get_suitable_vk_size(vertex_register.type, vertex_register.attribute_size);
data_size = real_element_size * num_stored_verts;
@ -370,23 +335,135 @@ namespace
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
};
} // End anonymous namespace
using attribute_storage = std::vector<std::variant<rsx::vertex_array_buffer,
rsx::vertex_array_register, rsx::empty_vertex_array>>;
void VKGSRender::upload_vertex_buffers(u32 min_index, u32 vertex_max_index)
struct draw_command_visitor
{
using result_type = std::tuple<VkPrimitiveTopology, u32,
std::optional<std::tuple<VkDeviceSize, VkIndexType>>>;
draw_command_visitor(VkDevice device, vk::vk_data_heap& index_buffer_ring_info,
vk::vk_data_heap& attrib_ring_info, vk::glsl::program* program,
VkDescriptorSet descriptor_sets,
std::vector<std::unique_ptr<vk::buffer_view>>& buffer_view_to_clean,
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers_f)
: m_device(device), m_index_buffer_ring_info(index_buffer_ring_info),
m_attrib_ring_info(attrib_ring_info), m_program(program),
m_descriptor_sets(descriptor_sets), m_buffer_view_to_clean(buffer_view_to_clean),
get_vertex_buffers(get_vertex_buffers_f)
{
vertex_buffer_visitor visitor(vertex_max_index - min_index + 1, *m_device, m_attrib_ring_info, m_program, descriptor_sets, m_buffer_view_to_clean);
const auto& vertex_buffers = get_vertex_buffers(rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
for (const auto& vbo : vertex_buffers)
std::apply_visitor(visitor, vbo);
}
u32 VKGSRender::upload_inlined_array()
result_type operator()(const rsx::draw_array_command& command)
{
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
u32 index_count = 0;
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
u32 min_index =
rsx::method_registers.current_draw_clause.first_count_commands.front().first;
u32 max_index =
rsx::method_registers.current_draw_clause.get_elements_count() + min_index;
if (primitives_emulated) {
std::tie(index_count, index_info) =
generate_emulating_index_buffer(rsx::method_registers.current_draw_clause,
max_index - min_index + 1, m_index_buffer_ring_info);
}
else
{
index_count = rsx::method_registers.current_draw_clause.get_elements_count();
}
upload_vertex_buffers(min_index, max_index);
return std::make_tuple(prims, index_count, index_info);
}
result_type operator()(const rsx::draw_indexed_array_command& command)
{
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
rsx::index_array_type index_type = rsx::method_registers.index_type();
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
u32 index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.current_draw_clause.get_elements_count());
u32 upload_size = index_count * type_size;
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
/**
* Upload index (and expands it if primitive type is not natively supported).
*/
u32 min_index, max_index;
std::tie(min_index, max_index) = write_index_array_data_to_buffer(
gsl::span<gsl::byte>(static_cast<gsl::byte*>(buf), index_count * type_size),
command.raw_index_buffer, index_type,
rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(), command.ranges_to_fetch_in_index_buffer,
[](auto prim) { return !is_primitive_native(prim); });
m_index_buffer_ring_info.unmap();
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info =
std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type));
upload_vertex_buffers(0, max_index);
return std::make_tuple(prims, index_count, index_info);
}
result_type operator()(const rsx::draw_inlined_array& command)
{
bool primitives_emulated = false;
VkPrimitiveTopology prims = vk::get_appropriate_topology(
rsx::method_registers.current_draw_clause.primitive, primitives_emulated);
u32 index_count = upload_inlined_array();
if (!primitives_emulated) {
return std::make_tuple(prims, index_count, std::nullopt);
}
std::optional<std::tuple<VkDeviceSize, VkIndexType>> index_info;
std::tie(index_count, index_info) = generate_emulating_index_buffer(
rsx::method_registers.current_draw_clause, index_count, m_index_buffer_ring_info);
return std::make_tuple(prims, index_count, index_info);
}
private:
vk::vk_data_heap& m_index_buffer_ring_info;
VkDevice m_device;
vk::vk_data_heap& m_attrib_ring_info;
vk::glsl::program* m_program;
VkDescriptorSet m_descriptor_sets;
std::vector<std::unique_ptr<vk::buffer_view>>& m_buffer_view_to_clean;
std::function<attribute_storage(
const rsx::rsx_state&, const std::vector<std::pair<u32, u32>>&)>
get_vertex_buffers;
void upload_vertex_buffers(u32 min_index, u32 vertex_max_index)
{
vertex_buffer_visitor visitor(vertex_max_index - min_index + 1, m_device,
m_attrib_ring_info, m_program, m_descriptor_sets, m_buffer_view_to_clean);
const auto& vertex_buffers = get_vertex_buffers(
rsx::method_registers, {{min_index, vertex_max_index - min_index + 1}});
for (const auto& vbo : vertex_buffers) std::apply_visitor(visitor, vbo);
}
u32 upload_inlined_array()
{
u32 stride = 0;
u32 offsets[rsx::limits::vertex_count] = {0};
for (u32 i = 0; i < rsx::limits::vertex_count; ++i)
{
for (u32 i = 0; i < rsx::limits::vertex_count; ++i) {
const auto& info = rsx::method_registers.vertex_arrays_info[i];
if (!info.size()) continue;
@ -395,100 +472,83 @@ u32 VKGSRender::upload_inlined_array()
}
u32 vertex_draw_count =
(u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() * sizeof(u32)) /
(u32)(rsx::method_registers.current_draw_clause.inline_vertex_array.size() *
sizeof(u32)) /
stride;
for (int index = 0; index < rsx::limits::vertex_count; ++index)
{
for (int index = 0; index < rsx::limits::vertex_count; ++index) {
auto& vertex_info = rsx::method_registers.vertex_arrays_info[index];
if (!m_program->has_uniform(s_reg_table[index]))
continue;
if (!m_program->has_uniform(s_reg_table[index])) continue;
if (!vertex_info.size()) // disabled
{
continue;
}
const u32 element_size = vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size());
const u32 element_size =
vk::get_suitable_vk_size(vertex_info.type(), vertex_info.size());
const u32 data_size = element_size * vertex_draw_count;
const VkFormat format = vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size());
const VkFormat format =
vk::get_suitable_vk_format(vertex_info.type(), vertex_info.size());
u32 offset_in_attrib_buffer = m_attrib_ring_info.alloc<256>(data_size);
u8* src =
reinterpret_cast<u8*>(rsx::method_registers.current_draw_clause.inline_vertex_array.data());
u8 *dst = static_cast<u8*>(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size));
u8* src = reinterpret_cast<u8*>(
rsx::method_registers.current_draw_clause.inline_vertex_array.data());
u8* dst =
static_cast<u8*>(m_attrib_ring_info.map(offset_in_attrib_buffer, data_size));
src += offsets[index];
u8 opt_size = vertex_info.size();
if (vertex_info.size() == 3)
opt_size = 4;
if (vertex_info.size() == 3) opt_size = 4;
// TODO: properly handle cmp type
if (vertex_info.type() == rsx::vertex_base_type::cmp)
LOG_ERROR(RSX, "Compressed vertex attributes not supported for inlined arrays yet");
LOG_ERROR(
RSX, "Compressed vertex attributes not supported for inlined arrays yet");
switch (vertex_info.type())
{
case rsx::vertex_base_type::f:
vk::copy_inlined_data_to_buffer<float, 1>(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
vk::copy_inlined_data_to_buffer<float, 1>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
case rsx::vertex_base_type::sf:
vk::copy_inlined_data_to_buffer<u16, 0x3c00>(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
vk::copy_inlined_data_to_buffer<u16, 0x3c00>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
case rsx::vertex_base_type::s1:
case rsx::vertex_base_type::ub:
case rsx::vertex_base_type::ub256:
vk::copy_inlined_data_to_buffer<u8, 1>(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
vk::copy_inlined_data_to_buffer<u8, 1>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
case rsx::vertex_base_type::s32k:
case rsx::vertex_base_type::cmp:
vk::copy_inlined_data_to_buffer<u16, 1>(src, dst, vertex_draw_count, vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
vk::copy_inlined_data_to_buffer<u16, 1>(src, dst, vertex_draw_count,
vertex_info.type(), vertex_info.size(), opt_size, element_size, stride);
break;
default:
fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type());
default: fmt::throw_exception("Unknown base type %d" HERE, (u32)vertex_info.type());
}
m_attrib_ring_info.unmap();
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(*m_device, m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(m_buffer_view_to_clean.back()->value, s_reg_table[index], descriptor_sets);
m_buffer_view_to_clean.push_back(std::make_unique<vk::buffer_view>(m_device,
m_attrib_ring_info.heap->value, format, offset_in_attrib_buffer, data_size));
m_program->bind_uniform(
m_buffer_view_to_clean.back()->value, s_reg_table[index], m_descriptor_sets);
}
return vertex_draw_count;
}
std::tuple<u32, u32, u32, std::tuple<VkDeviceSize, VkIndexType>> VKGSRender::upload_index_buffer(const rsx::draw_clause &clause)
{
rsx::index_array_type index_type = rsx::method_registers.index_type();
u32 type_size = gsl::narrow<u32>(get_index_type_size(index_type));
u32 index_count = get_index_count(clause.primitive, clause.get_elements_count());
u32 upload_size = index_count * type_size;
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
u32 min_index, max_index;
std::tie(min_index, max_index) = write_index_array_data_to_buffer(gsl::span<gsl::byte>(static_cast<gsl::byte*>(buf), index_count * type_size), get_raw_index_array(clause.first_count_commands),
index_type, clause.primitive, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), clause.first_count_commands,
[](auto prim) { return !is_primitive_native(prim); });
m_index_buffer_ring_info.unmap();
return std::make_tuple(min_index, max_index, index_count, std::make_tuple(offset_in_index_buffer, vk::get_index_type(index_type)));
};
}
std::tuple<u32, std::tuple<VkDeviceSize, VkIndexType> > VKGSRender::generate_emulating_index_buffer(const rsx::draw_clause &clause, u32 vertex_count)
std::tuple<VkPrimitiveTopology, u32, std::optional<std::tuple<VkDeviceSize, VkIndexType>>>
VKGSRender::upload_vertex_data()
{
u32 index_count = get_index_count(clause.primitive, vertex_count);
u32 upload_size = index_count * sizeof(u16);
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<256>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
write_index_array_for_non_indexed_non_native_primitive_to_buffer(reinterpret_cast<char*>(buf), clause.primitive, 0, vertex_count);
m_index_buffer_ring_info.unmap();
return std::make_tuple(index_count, std::make_tuple(offset_in_index_buffer, VK_INDEX_TYPE_UINT16));
draw_command_visitor visitor(*m_device, m_index_buffer_ring_info, m_attrib_ring_info, m_program,
descriptor_sets, m_buffer_view_to_clean,
[this](const auto& state, const auto& range) { return get_vertex_buffers(state, range); });
return std::apply_visitor(visitor, get_draw_command(rsx::method_registers));
}