rsx: Properly implement immediate mode rendering

- Treat the draw commands as being consumed on-the-fly with ATTR0 as provoking attribute
- Analysing streams sent to RSX and the results implies they are consumed fully inline.
  This only makes sense if a provoking attribute is present. The 'static' register is truly the immediate register for the draw.
This commit is contained in:
kd-11 2021-11-13 23:47:07 +03:00 committed by kd-11
parent 1f627caa81
commit 59b1c324a9
8 changed files with 139 additions and 69 deletions

View File

@ -401,6 +401,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GSRender.cpp RSX/GSRender.cpp
RSX/RSXFIFO.cpp RSX/RSXFIFO.cpp
RSX/rsx_methods.cpp RSX/rsx_methods.cpp
RSX/rsx_vertex_data.cpp
RSX/RSXOffload.cpp RSX/RSXOffload.cpp
RSX/RSXTexture.cpp RSX/RSXTexture.cpp
RSX/RSXThread.cpp RSX/RSXThread.cpp

View File

@ -473,26 +473,28 @@ namespace rsx
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
{ {
vertex_push_buffers[attribute].size = size; if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute)))
vertex_push_buffers[attribute].append_vertex_data(subreg_index, type, value); {
return;
}
// Enforce ATTR0 as vertex attribute for push buffers.
// This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
} }
u32 thread::get_push_buffer_vertex_count() const u32 thread::get_push_buffer_vertex_count() const
{ {
//There's no restriction on which attrib shall hold vertex data, so we check them all // Enforce ATTR0 as vertex attribute for push buffers.
u32 max_vertex_count = 0; // This whole thing becomes a mess if we don't have a provoking attribute.
for (auto &buf: vertex_push_buffers) return vertex_push_buffers[0].vertex_count;
{
max_vertex_count = std::max(max_vertex_count, buf.vertex_count);
}
return max_vertex_count;
} }
void thread::append_array_element(u32 index) void thread::append_array_element(u32 index)
{ {
//Endianness is swapped because common upload code expects input in BE // Endianness is swapped because common upload code expects input in BE
//TODO: Implement fast upload path for LE inputs and do away with this // TODO: Implement fast upload path for LE inputs and do away with this
element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index)); element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
} }
@ -1732,7 +1734,7 @@ namespace rsx
current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask); current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask);
} }
void thread::analyse_inputs_interleaved(vertex_input_layout& result) const void thread::analyse_inputs_interleaved(vertex_input_layout& result)
{ {
const rsx_state& state = rsx::method_registers; const rsx_state& state = rsx::method_registers;
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask; const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
@ -1800,6 +1802,9 @@ namespace rsx
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults // Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
if (vertex_push_buffers[index].vertex_count > 1) if (vertex_push_buffers[index].vertex_count > 1)
{ {
// Ensure consistent number of vertices per attribute.
vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false);
// Read temp buffer (register array) // Read temp buffer (register array)
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32))); std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
result.volatile_blocks.push_back(volatile_range_info); result.volatile_blocks.push_back(volatile_range_info);

View File

@ -745,7 +745,7 @@ namespace rsx
/** /**
* Analyze vertex inputs and group all interleaved blocks * Analyze vertex inputs and group all interleaved blocks
*/ */
void analyse_inputs_interleaved(vertex_input_layout&) const; void analyse_inputs_interleaved(vertex_input_layout&);
RSXVertexProgram current_vertex_program = {}; RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {}; RSXFragmentProgram current_fragment_program = {};

View File

@ -281,10 +281,10 @@ namespace rsx
if (rsx->in_begin_end) if (rsx->in_begin_end)
{ {
// Update to immediate mode register/array // Update to immediate mode register/array
// NOTE: Push buffers still behave like register writes.
// You do not need to specify each attribute for each vertex, the register is referenced instead.
// This is classic OpenGL 1.x behavior as I remember.
rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg); rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg);
// NOTE: one can update the register to update constant across primitive. Needs verification.
// Fall through
} }
auto& info = rsx::method_registers.register_vertex_info[attribute_index]; auto& info = rsx::method_registers.register_vertex_info[attribute_index];

View File

@ -0,0 +1,101 @@
#include "stdafx.h"
#include "rsx_vertex_data.h"
#include "rsx_methods.h"
namespace rsx
{
void push_buffer_vertex_info::clear()
{
if (size)
{
data.clear();
vertex_count = 0;
dword_count = 0;
size = 0;
}
}
u8 push_buffer_vertex_info::get_vertex_size_in_dwords() const
{
// NOTE: Types are always provided to fit into 32-bits
// i.e no less than 4 8-bit values and no less than 2 16-bit values
switch (type)
{
case vertex_base_type::f:
return size;
case vertex_base_type::ub:
case vertex_base_type::ub256:
return 1;
case vertex_base_type::s1:
case vertex_base_type::s32k:
return size / 2;
default:
fmt::throw_exception("Unsupported vertex base type %d", static_cast<u8>(type));
}
}
u32 push_buffer_vertex_info::get_vertex_id() const
{
ensure(attr == 0); // Only ask ATTR0 for vertex ID
// Which is the current vertex ID to be written to?
// NOTE: Fully writing to ATTR0 closes the current block
return size ? (dword_count / get_vertex_size_in_dwords()) : 0;
}
void push_buffer_vertex_info::set_vertex_data(u32 attribute_id, u32 vertex_id, u32 sub_index, vertex_base_type type, u32 size, u32 arg)
{
if (vertex_count && (type != this->type || size != this->size))
{
// TODO: Should forcefully break the draw call on this step using an execution barrier.
// While RSX can handle this behavior without problem, it can only be the product of nonsensical game design.
rsx_log.error("Vertex attribute %u was respecced mid-draw (type = %d vs %d, size = %u vs %u). Indexed execution barrier required. Report this to developers.",
attribute_id, static_cast<int>(type), static_cast<int>(this->type), size, this->size);
}
this->type = type;
this->size = size;
this->attr = attribute_id;
const auto required_vertex_count = (vertex_id + 1);
const auto vertex_size = get_vertex_size_in_dwords();
if (vertex_count != required_vertex_count)
{
pad_to(required_vertex_count, true);
ensure(vertex_count == required_vertex_count);
}
auto current_vertex = data.data() + ((vertex_count - 1) * vertex_size);
current_vertex[sub_index] = arg;
++dword_count;
}
void push_buffer_vertex_info::pad_to(u32 required_vertex_count, bool skip_last)
{
if (vertex_count >= required_vertex_count)
{
return;
}
const auto vertex_size = get_vertex_size_in_dwords();
data.resize(vertex_size * required_vertex_count);
// For all previous verts, copy over the register contents duplicated over the stream.
// Internally it appears RSX actually executes the draw commands as they are encountered.
// You can change register data contents mid-way for example and it will pick up for the next N draws.
// This is how immediate mode is implemented internally.
u32* src = rsx::method_registers.register_vertex_info[attr].data.data();
u32* dst = data.data() + (vertex_count * vertex_size);
u32* end = data.data() + ((required_vertex_count - (skip_last ? 1 : 0)) * vertex_size);
while (dst < end)
{
std::memcpy(dst, src, vertex_size * sizeof(u32));
dst += vertex_size;
}
vertex_count = required_vertex_count;
}
}

View File

@ -1,7 +1,7 @@
#pragma once #pragma once
#include "gcm_enums.h" #include "gcm_enums.h"
#include "rsx_decode.h"
#include "Common/simple_array.hpp" #include "Common/simple_array.hpp"
#include "util/types.hpp" #include "util/types.hpp"
@ -56,64 +56,23 @@ public:
struct push_buffer_vertex_info struct push_buffer_vertex_info
{ {
u8 size = 0; u32 attr = 0;
u32 size = 0;
vertex_base_type type = vertex_base_type::f; vertex_base_type type = vertex_base_type::f;
u32 vertex_count = 0; u32 vertex_count = 0;
u32 attribute_mask = ~0; u32 dword_count = 0;
rsx::simple_array<u32> data; rsx::simple_array<u32> data;
void clear() push_buffer_vertex_info() = default;
{ ~push_buffer_vertex_info() = default;
if (size)
{
data.clear();
attribute_mask = ~0;
vertex_count = 0;
size = 0;
}
}
u8 get_vertex_size_in_dwords(vertex_base_type type) const u8 get_vertex_size_in_dwords() const;
{ u32 get_vertex_id() const;
//NOTE: Types are always provided to fit into 32-bits
//i.e no less than 4 8-bit values and no less than 2 16-bit values
switch (type) void clear();
{ void set_vertex_data(u32 attribute_id, u32 vertex_id, u32 sub_index, vertex_base_type type, u32 size, u32 arg);
case vertex_base_type::f: void pad_to(u32 required_vertex_count, bool skip_last);
return size;
case vertex_base_type::ub:
case vertex_base_type::ub256:
return 1;
case vertex_base_type::s1:
case vertex_base_type::s32k:
return size / 2;
default:
fmt::throw_exception("Unsupported vertex base type %d", static_cast<u8>(type));
}
}
void append_vertex_data(u32 sub_index, vertex_base_type type, u32 arg)
{
const u32 element_mask = (1 << sub_index);
const u8 vertex_size = get_vertex_size_in_dwords(type);
this->type = type;
if (attribute_mask & element_mask)
{
attribute_mask = 0;
vertex_count++;
data.resize(vertex_count * vertex_size);
}
attribute_mask |= element_mask;
u32* dst = data.data() + ((vertex_count - 1) * vertex_size) + sub_index;
*dst = arg;
}
}; };
struct register_vertex_data_info struct register_vertex_data_info

View File

@ -86,6 +86,7 @@
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" /> <ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
<ClCompile Include="Emu\RSX\Program\program_util.cpp" /> <ClCompile Include="Emu\RSX\Program\program_util.cpp" />
<ClCompile Include="Emu\RSX\RSXDisAsm.cpp" /> <ClCompile Include="Emu\RSX\RSXDisAsm.cpp" />
<ClCompile Include="Emu\RSX\rsx_vertex_data.cpp" />
<ClCompile Include="Emu\system_config_types.cpp" /> <ClCompile Include="Emu\system_config_types.cpp" />
<ClCompile Include="Emu\perf_meter.cpp" /> <ClCompile Include="Emu\perf_meter.cpp" />
<ClCompile Include="Emu\system_progress.cpp" /> <ClCompile Include="Emu\system_progress.cpp" />

View File

@ -1027,6 +1027,9 @@
<ClCompile Include="Emu\Io\camera_config.cpp"> <ClCompile Include="Emu\Io\camera_config.cpp">
<Filter>Emu\Io</Filter> <Filter>Emu\Io</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="Emu\RSX\rsx_vertex_data.cpp">
<Filter>Emu\GPU\RSX</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="Crypto\aes.h"> <ClInclude Include="Crypto\aes.h">