diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index d48bb99c41..28bda5f5a5 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -34,9 +34,10 @@ void D3D12GSRender::load_vertex_data(u32 first, u32 count) vertex_draw_count += count; } -void D3D12GSRender::upload_vertex_attributes(const std::vector > &vertex_ranges) +std::vector D3D12GSRender::upload_vertex_attributes(const std::vector > &vertex_ranges) { - m_vertex_buffer_views.clear(); + std::vector vertex_buffer_views; + m_IASet.clear(); size_t input_slot = 0; @@ -81,7 +82,7 @@ void D3D12GSRender::upload_vertex_attributes(const std::vector D3D12GSRender::upload_inlined_vertex_array() { - // Index count - m_rendering_info.m_count = 0; - for (const auto &pair : m_first_count_pairs) - m_rendering_info.m_count += get_index_count(draw_mode, pair.second); - - if (!m_rendering_info.m_indexed) + UINT offset = 0; + m_IASet.clear(); + // Bind attributes + for (int index = 0; index < rsx::limits::vertex_count; ++index) { - // Non indexed - upload_vertex_attributes(m_first_count_pairs); - command_list->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); + const auto &info = vertex_arrays_info[index]; + + if (!info.size) // disabled + continue; + + D3D12_INPUT_ELEMENT_DESC IAElement = {}; + IAElement.SemanticName = "TEXCOORD"; + IAElement.SemanticIndex = (UINT)index; + IAElement.InputSlot = 0; + IAElement.Format = get_vertex_attribute_format(info.type, info.size); + IAElement.AlignedByteOffset = offset; + IAElement.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + IAElement.InstanceDataStepRate = 0; + m_IASet.push_back(IAElement); + + offset += rsx::get_vertex_type_size(info.type) * info.size; + } + + // Copy inline buffer + size_t buffer_size = inline_vertex_array.size() * sizeof(int); + assert(m_vertex_index_data.can_alloc(buffer_size)); + size_t heap_offset = m_vertex_index_data.alloc(buffer_size); + void *buffer; + CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; + write_inline_array_to_buffer(mapped_buffer); + m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = + { + m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, + (UINT)offset + }; + + return std::make_tuple(vertex_buffer_view, (u32)buffer_size / offset); +} + +std::tuple D3D12GSRender::generate_index_buffer_for_emulated_primitives_array(const std::vector > &vertex_ranges) +{ + size_t index_count = 0; + for (const auto &pair : vertex_ranges) + index_count += get_index_count(draw_mode, pair.second); + + // Alloc + size_t buffer_size = align(index_count * sizeof(u16), 64); + assert(m_vertex_index_data.can_alloc(buffer_size)); + size_t heap_offset = m_vertex_index_data.alloc(buffer_size); + + void *buffer; + CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; + size_t first = 0; + for (const auto &pair : vertex_ranges) + { + size_t element_count = get_index_count(draw_mode, pair.second); + write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, draw_mode, (u32)first, (u32)pair.second); + mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16); + first += pair.second; + } + m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + D3D12_INDEX_BUFFER_VIEW index_buffer_view = { + m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, + DXGI_FORMAT_R16_UINT + }; + + return std::make_tuple(index_buffer_view, index_count); +} + +std::tuple D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list) +{ + if (draw_command == Draw_command::draw_command_inlined_array) + { + size_t vertex_count; + D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; + std::tie(vertex_buffer_view, vertex_count) = upload_inlined_vertex_array(); + command_list->IASetVertexBuffers(0, (UINT)1, &vertex_buffer_view); + if (is_primitive_native(draw_mode)) - return; - // Handle non native primitive + return std::make_tuple(false, vertex_count); - // Alloc - size_t buffer_size = align(m_rendering_info.m_count * sizeof(u16), 64); - assert(m_vertex_index_data.can_alloc(buffer_size)); - size_t heap_offset = m_vertex_index_data.alloc(buffer_size); - - void *buffer; - CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); - void *mapped_buffer = (char*)buffer + heap_offset; - size_t first = 0; - for (const auto &pair : m_first_count_pairs) - { - size_t element_count = get_index_count(draw_mode, pair.second); - write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)mapped_buffer, draw_mode, (u32)first, (u32)pair.second); - mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16); - first += pair.second; - } - m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_INDEX_BUFFER_VIEW index_buffer_view = { - m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - DXGI_FORMAT_R16_UINT - }; + D3D12_INDEX_BUFFER_VIEW index_buffer_view; + size_t index_count; + std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array({ { 0, (u32)vertex_count } }); command_list->IASetIndexBuffer(&index_buffer_view); - m_rendering_info.m_indexed = true; + return std::make_tuple(true, index_count); } - else + + if (draw_command == Draw_command::draw_command_array) { - u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; + const std::vector &vertex_buffer_views = upload_vertex_attributes(m_first_count_pairs); + command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data()); - // Index type - size_t index_size = get_index_type_size(indexed_type); - - // Alloc - size_t buffer_size = align(m_rendering_info.m_count * index_size, 64); - assert(m_vertex_index_data.can_alloc(buffer_size)); - size_t heap_offset = m_vertex_index_data.alloc(buffer_size); - - void *buffer; - CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); - void *mapped_buffer = (char*)buffer + heap_offset; - u32 min_index = (u32)-1, max_index = 0; - for (const auto &pair : m_first_count_pairs) + if (is_primitive_native(draw_mode)) { - size_t element_count = get_index_count(draw_mode, pair.second); - write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index); - mapped_buffer = (char*)mapped_buffer + element_count * index_size; + // Index count + size_t vertex_count = 0; + for (const auto &pair : m_first_count_pairs) + vertex_count += pair.second; + return std::make_tuple(false, vertex_count); } - m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); - D3D12_INDEX_BUFFER_VIEW index_buffer_view = { - m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, - (UINT)buffer_size, - get_index_type(indexed_type) - }; - m_timers.m_buffer_upload_size += buffer_size; - command_list->IASetIndexBuffer(&index_buffer_view); - m_rendering_info.m_indexed = true; - upload_vertex_attributes({ std::make_pair(0, max_index + 1) }); - command_list->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); + D3D12_INDEX_BUFFER_VIEW index_buffer_view; + size_t index_count; + std::tie(index_buffer_view, index_count) = generate_index_buffer_for_emulated_primitives_array(m_first_count_pairs); + command_list->IASetIndexBuffer(&index_buffer_view); + return std::make_tuple(true, index_count); } + + assert(draw_command == Draw_command::draw_command_indexed); + + u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; + size_t index_size = get_index_type_size(indexed_type); + + // Index count + size_t index_count = 0; + for (const auto &pair : m_first_count_pairs) + index_count += get_index_count(draw_mode, pair.second); + + // Alloc + size_t buffer_size = align(index_count * index_size, 64); + assert(m_vertex_index_data.can_alloc(buffer_size)); + size_t heap_offset = m_vertex_index_data.alloc(buffer_size); + + void *buffer; + CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); + void *mapped_buffer = (char*)buffer + heap_offset; + u32 min_index = (u32)-1, max_index = 0; + for (const auto &pair : m_first_count_pairs) + { + size_t element_count = get_index_count(draw_mode, pair.second); + write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index); + mapped_buffer = (char*)mapped_buffer + element_count * index_size; + } + m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); + D3D12_INDEX_BUFFER_VIEW index_buffer_view = { + m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, + (UINT)buffer_size, + get_index_type(indexed_type) + }; + m_timers.m_buffer_upload_size += buffer_size; + command_list->IASetIndexBuffer(&index_buffer_view); + + const std::vector &vertex_buffer_views = upload_vertex_attributes({ std::make_pair(0, max_index + 1) }); + command_list->IASetVertexBuffers(0, (UINT)vertex_buffer_views.size(), vertex_buffer_views.data()); + + return std::make_tuple(true, index_count); } #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 80c30e49d2..dd8629fb7f 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -264,8 +264,9 @@ void D3D12GSRender::end() std::chrono::time_point vertex_index_duration_start = std::chrono::system_clock::now(); - if (!vertex_index_array.empty() || vertex_draw_count) - upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get()); + size_t vertex_count; + bool indexed_draw; + std::tie(indexed_draw, vertex_count) = upload_and_set_vertex_index_data(get_current_resource_storage().command_list.Get()); std::chrono::time_point vertex_index_duration_end = std::chrono::system_clock::now(); m_timers.m_vertex_index_duration += std::chrono::duration_cast(vertex_index_duration_end - vertex_index_duration_start).count(); @@ -341,10 +342,10 @@ void D3D12GSRender::end() get_current_resource_storage().command_list->IASetPrimitiveTopology(get_primitive_topology(draw_mode)); - if (m_rendering_info.m_indexed) - get_current_resource_storage().command_list->DrawIndexedInstanced((UINT)m_rendering_info.m_count, 1, 0, 0, 0); + if (indexed_draw) + get_current_resource_storage().command_list->DrawIndexedInstanced((UINT)vertex_count, 1, 0, 0, 0); else - get_current_resource_storage().command_list->DrawInstanced((UINT)m_rendering_info.m_count, 1, 0, 0); + get_current_resource_storage().command_list->DrawInstanced((UINT)vertex_count, 1, 0, 0); vertex_index_array.clear(); std::chrono::time_point end_duration = std::chrono::system_clock::now(); @@ -358,7 +359,6 @@ void D3D12GSRender::end() get_current_resource_storage().set_new_command_list(); } m_first_count_pairs.clear(); - m_rendering_info.m_indexed = false; thread::end(); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index 5f2feadbb0..143ba42243 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -121,16 +121,9 @@ private: data_heap m_uav_heap; data_heap m_readback_resources; - struct - { - bool m_indexed; /* m_IASet; - std::vector m_vertex_buffer_views; INT g_descriptor_stride_srv_cbv_uav; INT g_descriptor_stride_dsv; @@ -158,10 +151,11 @@ private: void set_rtt_and_ds(ID3D12GraphicsCommandList *command_list); /** - * Create vertex and index buffers (if needed) and set them to cmdlist. - * Non native primitive type are emulated by index buffers expansion. + * Create vertex and index buffers (if needed) and set them to cmdlist. + * Non native primitive type are emulated by index buffers expansion. + * Returns whether the draw call is indexed or not and the vertex count to draw. */ - void upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); + std::tuple upload_and_set_vertex_index_data(ID3D12GraphicsCommandList *command_list); std::vector > m_first_count_pairs; /** @@ -169,7 +163,11 @@ private: * A range in vertex_range is a pair whose first element is the index of the beginning of the * range, and whose second element is the number of vertex in this range. */ - void upload_vertex_attributes(const std::vector > &vertex_ranges); + std::vector upload_vertex_attributes(const std::vector > &vertex_ranges); + + std::tuple upload_inlined_vertex_array(); + + std::tuple generate_index_buffer_for_emulated_primitives_array(const std::vector > &vertex_ranges); void upload_and_bind_scale_offset_matrix(size_t descriptor_index); void upload_and_bind_vertex_shader_constants(size_t descriptor_index); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index bcf8dea005..46edb5f7e5 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -379,62 +379,85 @@ void GLGSRender::end() u32 input_mask = rsx::method_registers[NV4097_SET_VERTEX_ATTRIB_INPUT_MASK]; m_vao.bind(); - for (int index = 0; index < rsx::limits::vertex_count; ++index) + if (draw_command == Draw_command::draw_command_inlined_array) { - bool enabled = !!(input_mask & (1 << index)); - if (!enabled) - continue; - - int location; - if (!m_program->attribs.has_location(reg_table[index], &location)) - continue; - - if (vertex_arrays_info[index].size > 0) + write_inline_array_to_buffer(vertex_arrays_data.data()); + size_t offset = 0; + for (int index = 0; index < rsx::limits::vertex_count; ++index) { auto &vertex_info = vertex_arrays_info[index]; - // Active vertex array - size_t position = vertex_arrays_data.size(); - vertex_arrays_offsets[index] = position; - - if (vertex_arrays[index].empty()) + if (!vertex_info.size) // disabled continue; - size_t size = vertex_arrays[index].size(); - vertex_arrays_data.resize(position + size); - - memcpy(vertex_arrays_data.data() + position, vertex_arrays[index].data(), size); + int location; + if (!m_program->attribs.has_location(reg_table[index], &location)) + continue; __glcheck m_program->attribs[location] = - (m_vao + vertex_arrays_offsets[index]) + (m_vao + offset) .config(gl_types[vertex_info.type], vertex_info.size, gl_normalized[vertex_info.type]); + offset += rsx::get_vertex_type_size(vertex_info.type) * vertex_info.size; } - else if (register_vertex_info[index].size > 0) + } + else + { + for (int index = 0; index < rsx::limits::vertex_count; ++index) { - auto &vertex_data = register_vertex_data[index]; - auto &vertex_info = register_vertex_info[index]; + bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; - switch (vertex_info.type) + int location; + if (!m_program->attribs.has_location(reg_table[index], &location)) + continue; + + if (vertex_arrays_info[index].size > 0) { - case CELL_GCM_VERTEX_F: - switch (register_vertex_info[index].size) - { - case 1: apply_attrib_array(*m_program, location, vertex_data); break; - case 2: apply_attrib_array(*m_program, location, vertex_data); break; - case 3: apply_attrib_array(*m_program, location, vertex_data); break; - case 4: apply_attrib_array(*m_program, location, vertex_data); break; - } - break; + auto &vertex_info = vertex_arrays_info[index]; + // Active vertex array - default: - LOG_ERROR(RSX, "bad non array vertex data format (type = %d, size = %d)", vertex_info.type, vertex_info.size); - break; + size_t position = vertex_arrays_data.size(); + vertex_arrays_offsets[index] = position; + + if (vertex_arrays[index].empty()) + continue; + + size_t size = vertex_arrays[index].size(); + vertex_arrays_data.resize(position + size); + + memcpy(vertex_arrays_data.data() + position, vertex_arrays[index].data(), size); + + __glcheck m_program->attribs[location] = + (m_vao + vertex_arrays_offsets[index]) + .config(gl_types[vertex_info.type], vertex_info.size, gl_normalized[vertex_info.type]); + } + else if (register_vertex_info[index].size > 0) + { + auto &vertex_data = register_vertex_data[index]; + auto &vertex_info = register_vertex_info[index]; + + switch (vertex_info.type) + { + case CELL_GCM_VERTEX_F: + switch (register_vertex_info[index].size) + { + case 1: apply_attrib_array(*m_program, location, vertex_data); break; + case 2: apply_attrib_array(*m_program, location, vertex_data); break; + case 3: apply_attrib_array(*m_program, location, vertex_data); break; + case 4: apply_attrib_array(*m_program, location, vertex_data); break; + } + break; + + default: + LOG_ERROR(RSX, "bad non array vertex data format (type = %d, size = %d)", vertex_info.type, vertex_info.size); + break; + } } } } m_vbo.data(vertex_arrays_data.size(), vertex_arrays_data.data()); - if (vertex_index_array.empty()) { draw_fbo.draw_arrays(gl::draw_mode(draw_mode - 1), vertex_draw_count); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 615385533e..8c11798d31 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -175,6 +175,7 @@ namespace rsx force_inline void draw_arrays(thread* rsx, u32 arg) { + rsx->draw_command = thread::Draw_command::draw_command_array; u32 first = arg & 0xffffff; u32 count = (arg >> 24) + 1; @@ -183,6 +184,7 @@ namespace rsx force_inline void draw_index_array(thread* rsx, u32 arg) { + rsx->draw_command = thread::Draw_command::draw_command_indexed; u32 first = arg & 0xffffff; u32 count = (arg >> 24) + 1; @@ -190,6 +192,13 @@ namespace rsx rsx->load_vertex_index_data(first, count); } + force_inline void draw_inline_array(thread* rsx, u32 arg) + { + rsx->draw_command = thread::Draw_command::draw_command_inlined_array; + rsx->draw_inline_vertex_array = true; + rsx->inline_vertex_array.push_back(arg); + } + template struct set_transform_constant { @@ -225,6 +234,8 @@ namespace rsx { if (arg) { + rsx->draw_inline_vertex_array = false; + rsx->inline_vertex_array.clear(); rsx->begin(); return; } @@ -774,6 +785,7 @@ namespace rsx bind(); bind(); bind(); + bind(); bind_range(); bind_range(); bind_range(); @@ -1001,7 +1013,7 @@ namespace rsx color_index_to_record = { 0, 1, 2, 3 }; break; } - for (size_t i : color_index_to_record) +/* for (size_t i : color_index_to_record) { draw_state.color_buffer[i].width = clip_w; draw_state.color_buffer[i].height = clip_h; @@ -1018,7 +1030,7 @@ namespace rsx draw_state.stencil.height = clip_h; draw_state.stencil.data.resize(clip_w * clip_h * 4); copy_stencil_buffer_to_memory(draw_state.stencil.data.data()); - } + }*/ draw_state.programs = get_programs(); draw_state.name = name; frame_debug.draw_calls.push_back(draw_state); @@ -1199,6 +1211,42 @@ namespace rsx stream_vector_from_memory((char*)buffer + entry.first * 4 * sizeof(float), (void*)entry.second.rgba); } + void thread::write_inline_array_to_buffer(void *dst_buffer) + { + u8* src = reinterpret_cast(inline_vertex_array.data()); + u8* dst = (u8*)dst_buffer; + + size_t bytes_written = 0; + while (bytes_written < inline_vertex_array.size() * sizeof(u32)) + { + for (int index = 0; index < rsx::limits::vertex_count; ++index) + { + const auto &info = vertex_arrays_info[index]; + + if (!info.size) // disabled + continue; + + u32 type_size = rsx::get_vertex_type_size(info.type); + u32 element_size = type_size * info.size; + + if (type_size == 1 && info.size == 4) + { + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + } + else + memcpy(dst, src, element_size); + + src += element_size; + dst += element_size; + + bytes_written += element_size; + } + } + } + u64 thread::timestamp() const { // Get timestamp, and convert it from microseconds to nanoseconds diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 47c1ceeb6f..b383233ff0 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -413,11 +413,21 @@ namespace rsx u32 ctxt_addr; u32 report_main_addr; u32 label_addr; + enum class Draw_command + { + draw_command_array, + draw_command_inlined_array, + draw_command_indexed, + } draw_command; u32 draw_mode; u32 local_mem_addr, main_mem_addr; bool strict_ordering[0x1000]; + + bool draw_inline_vertex_array; + std::vector inline_vertex_array; + public: u32 draw_array_count; u32 draw_array_first; @@ -464,6 +474,13 @@ namespace rsx */ void fill_vertex_program_constants_data(void *buffer); + /** + * Write inlined array data to buffer. + * The storage of inlined data looks different from memory stored arrays. + * There is no swapping required except for 4 u8 (according to Bleach Soul Resurection) + */ + void write_inline_array_to_buffer(void *dst_buffer); + /** * Copy rtt values to buffer. * TODO: It's more efficient to combine multiple call of this function into one.