mirror of https://github.com/RPCS3/rpcs3.git
rsx: Properly implement immediate mode rendering
- Treat the draw commands as being consumed on-the-fly with ATTR0 as provoking attribute - Analysing streams sent to RSX and the results implies they are consumed fully inline. This only makes sense if a provoking attribute is present. The 'static' register is truly the immediate register for the draw.
This commit is contained in:
parent
1f627caa81
commit
59b1c324a9
|
@ -401,6 +401,7 @@ target_sources(rpcs3_emu PRIVATE
|
||||||
RSX/GSRender.cpp
|
RSX/GSRender.cpp
|
||||||
RSX/RSXFIFO.cpp
|
RSX/RSXFIFO.cpp
|
||||||
RSX/rsx_methods.cpp
|
RSX/rsx_methods.cpp
|
||||||
|
RSX/rsx_vertex_data.cpp
|
||||||
RSX/RSXOffload.cpp
|
RSX/RSXOffload.cpp
|
||||||
RSX/RSXTexture.cpp
|
RSX/RSXTexture.cpp
|
||||||
RSX/RSXThread.cpp
|
RSX/RSXThread.cpp
|
||||||
|
|
|
@ -473,26 +473,28 @@ namespace rsx
|
||||||
|
|
||||||
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
||||||
{
|
{
|
||||||
vertex_push_buffers[attribute].size = size;
|
if (!(rsx::method_registers.vertex_attrib_input_mask() & (1 << attribute)))
|
||||||
vertex_push_buffers[attribute].append_vertex_data(subreg_index, type, value);
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||||
|
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||||
|
const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
|
||||||
|
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 thread::get_push_buffer_vertex_count() const
|
u32 thread::get_push_buffer_vertex_count() const
|
||||||
{
|
{
|
||||||
//There's no restriction on which attrib shall hold vertex data, so we check them all
|
// Enforce ATTR0 as vertex attribute for push buffers.
|
||||||
u32 max_vertex_count = 0;
|
// This whole thing becomes a mess if we don't have a provoking attribute.
|
||||||
for (auto &buf: vertex_push_buffers)
|
return vertex_push_buffers[0].vertex_count;
|
||||||
{
|
|
||||||
max_vertex_count = std::max(max_vertex_count, buf.vertex_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
return max_vertex_count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::append_array_element(u32 index)
|
void thread::append_array_element(u32 index)
|
||||||
{
|
{
|
||||||
//Endianness is swapped because common upload code expects input in BE
|
// Endianness is swapped because common upload code expects input in BE
|
||||||
//TODO: Implement fast upload path for LE inputs and do away with this
|
// TODO: Implement fast upload path for LE inputs and do away with this
|
||||||
element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
|
element_push_buffer.push_back(std::bit_cast<u32, be_t<u32>>(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1732,7 +1734,7 @@ namespace rsx
|
||||||
current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask);
|
current_vertex_program.texture_state.import(current_vp_texture_state, current_vp_metadata.referenced_textures_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::analyse_inputs_interleaved(vertex_input_layout& result) const
|
void thread::analyse_inputs_interleaved(vertex_input_layout& result)
|
||||||
{
|
{
|
||||||
const rsx_state& state = rsx::method_registers;
|
const rsx_state& state = rsx::method_registers;
|
||||||
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
|
const u32 input_mask = state.vertex_attrib_input_mask() & current_vp_metadata.referenced_inputs_mask;
|
||||||
|
@ -1800,6 +1802,9 @@ namespace rsx
|
||||||
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
|
// Observed with GT5, immediate render bypasses array pointers completely, even falling back to fixed-function register defaults
|
||||||
if (vertex_push_buffers[index].vertex_count > 1)
|
if (vertex_push_buffers[index].vertex_count > 1)
|
||||||
{
|
{
|
||||||
|
// Ensure consistent number of vertices per attribute.
|
||||||
|
vertex_push_buffers[index].pad_to(vertex_push_buffers[0].vertex_count, false);
|
||||||
|
|
||||||
// Read temp buffer (register array)
|
// Read temp buffer (register array)
|
||||||
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
|
std::pair<u8, u32> volatile_range_info = std::make_pair(index, static_cast<u32>(vertex_push_buffers[index].data.size() * sizeof(u32)));
|
||||||
result.volatile_blocks.push_back(volatile_range_info);
|
result.volatile_blocks.push_back(volatile_range_info);
|
||||||
|
|
|
@ -745,7 +745,7 @@ namespace rsx
|
||||||
/**
|
/**
|
||||||
* Analyze vertex inputs and group all interleaved blocks
|
* Analyze vertex inputs and group all interleaved blocks
|
||||||
*/
|
*/
|
||||||
void analyse_inputs_interleaved(vertex_input_layout&) const;
|
void analyse_inputs_interleaved(vertex_input_layout&);
|
||||||
|
|
||||||
RSXVertexProgram current_vertex_program = {};
|
RSXVertexProgram current_vertex_program = {};
|
||||||
RSXFragmentProgram current_fragment_program = {};
|
RSXFragmentProgram current_fragment_program = {};
|
||||||
|
|
|
@ -281,10 +281,10 @@ namespace rsx
|
||||||
if (rsx->in_begin_end)
|
if (rsx->in_begin_end)
|
||||||
{
|
{
|
||||||
// Update to immediate mode register/array
|
// Update to immediate mode register/array
|
||||||
|
// NOTE: Push buffers still behave like register writes.
|
||||||
|
// You do not need to specify each attribute for each vertex, the register is referenced instead.
|
||||||
|
// This is classic OpenGL 1.x behavior as I remember.
|
||||||
rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg);
|
rsx->append_to_push_buffer(attribute_index, count, vertex_subreg, vtype, arg);
|
||||||
|
|
||||||
// NOTE: one can update the register to update constant across primitive. Needs verification.
|
|
||||||
// Fall through
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& info = rsx::method_registers.register_vertex_info[attribute_index];
|
auto& info = rsx::method_registers.register_vertex_info[attribute_index];
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "rsx_vertex_data.h"
|
||||||
|
#include "rsx_methods.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
void push_buffer_vertex_info::clear()
|
||||||
|
{
|
||||||
|
if (size)
|
||||||
|
{
|
||||||
|
data.clear();
|
||||||
|
vertex_count = 0;
|
||||||
|
dword_count = 0;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 push_buffer_vertex_info::get_vertex_size_in_dwords() const
|
||||||
|
{
|
||||||
|
// NOTE: Types are always provided to fit into 32-bits
|
||||||
|
// i.e no less than 4 8-bit values and no less than 2 16-bit values
|
||||||
|
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case vertex_base_type::f:
|
||||||
|
return size;
|
||||||
|
case vertex_base_type::ub:
|
||||||
|
case vertex_base_type::ub256:
|
||||||
|
return 1;
|
||||||
|
case vertex_base_type::s1:
|
||||||
|
case vertex_base_type::s32k:
|
||||||
|
return size / 2;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unsupported vertex base type %d", static_cast<u8>(type));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 push_buffer_vertex_info::get_vertex_id() const
|
||||||
|
{
|
||||||
|
ensure(attr == 0); // Only ask ATTR0 for vertex ID
|
||||||
|
|
||||||
|
// Which is the current vertex ID to be written to?
|
||||||
|
// NOTE: Fully writing to ATTR0 closes the current block
|
||||||
|
return size ? (dword_count / get_vertex_size_in_dwords()) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_buffer_vertex_info::set_vertex_data(u32 attribute_id, u32 vertex_id, u32 sub_index, vertex_base_type type, u32 size, u32 arg)
|
||||||
|
{
|
||||||
|
if (vertex_count && (type != this->type || size != this->size))
|
||||||
|
{
|
||||||
|
// TODO: Should forcefully break the draw call on this step using an execution barrier.
|
||||||
|
// While RSX can handle this behavior without problem, it can only be the product of nonsensical game design.
|
||||||
|
rsx_log.error("Vertex attribute %u was respecced mid-draw (type = %d vs %d, size = %u vs %u). Indexed execution barrier required. Report this to developers.",
|
||||||
|
attribute_id, static_cast<int>(type), static_cast<int>(this->type), size, this->size);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->type = type;
|
||||||
|
this->size = size;
|
||||||
|
this->attr = attribute_id;
|
||||||
|
|
||||||
|
const auto required_vertex_count = (vertex_id + 1);
|
||||||
|
const auto vertex_size = get_vertex_size_in_dwords();
|
||||||
|
|
||||||
|
if (vertex_count != required_vertex_count)
|
||||||
|
{
|
||||||
|
pad_to(required_vertex_count, true);
|
||||||
|
ensure(vertex_count == required_vertex_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto current_vertex = data.data() + ((vertex_count - 1) * vertex_size);
|
||||||
|
current_vertex[sub_index] = arg;
|
||||||
|
++dword_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_buffer_vertex_info::pad_to(u32 required_vertex_count, bool skip_last)
|
||||||
|
{
|
||||||
|
if (vertex_count >= required_vertex_count)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto vertex_size = get_vertex_size_in_dwords();
|
||||||
|
data.resize(vertex_size * required_vertex_count);
|
||||||
|
|
||||||
|
// For all previous verts, copy over the register contents duplicated over the stream.
|
||||||
|
// Internally it appears RSX actually executes the draw commands as they are encountered.
|
||||||
|
// You can change register data contents mid-way for example and it will pick up for the next N draws.
|
||||||
|
// This is how immediate mode is implemented internally.
|
||||||
|
u32* src = rsx::method_registers.register_vertex_info[attr].data.data();
|
||||||
|
u32* dst = data.data() + (vertex_count * vertex_size);
|
||||||
|
u32* end = data.data() + ((required_vertex_count - (skip_last ? 1 : 0)) * vertex_size);
|
||||||
|
|
||||||
|
while (dst < end)
|
||||||
|
{
|
||||||
|
std::memcpy(dst, src, vertex_size * sizeof(u32));
|
||||||
|
dst += vertex_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
vertex_count = required_vertex_count;
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gcm_enums.h"
|
#include "gcm_enums.h"
|
||||||
|
#include "rsx_decode.h"
|
||||||
#include "Common/simple_array.hpp"
|
#include "Common/simple_array.hpp"
|
||||||
#include "util/types.hpp"
|
#include "util/types.hpp"
|
||||||
|
|
||||||
|
@ -56,64 +56,23 @@ public:
|
||||||
|
|
||||||
struct push_buffer_vertex_info
|
struct push_buffer_vertex_info
|
||||||
{
|
{
|
||||||
u8 size = 0;
|
u32 attr = 0;
|
||||||
|
u32 size = 0;
|
||||||
vertex_base_type type = vertex_base_type::f;
|
vertex_base_type type = vertex_base_type::f;
|
||||||
|
|
||||||
u32 vertex_count = 0;
|
u32 vertex_count = 0;
|
||||||
u32 attribute_mask = ~0;
|
u32 dword_count = 0;
|
||||||
rsx::simple_array<u32> data;
|
rsx::simple_array<u32> data;
|
||||||
|
|
||||||
void clear()
|
push_buffer_vertex_info() = default;
|
||||||
{
|
~push_buffer_vertex_info() = default;
|
||||||
if (size)
|
|
||||||
{
|
|
||||||
data.clear();
|
|
||||||
attribute_mask = ~0;
|
|
||||||
vertex_count = 0;
|
|
||||||
size = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u8 get_vertex_size_in_dwords(vertex_base_type type) const
|
u8 get_vertex_size_in_dwords() const;
|
||||||
{
|
u32 get_vertex_id() const;
|
||||||
//NOTE: Types are always provided to fit into 32-bits
|
|
||||||
//i.e no less than 4 8-bit values and no less than 2 16-bit values
|
|
||||||
|
|
||||||
switch (type)
|
void clear();
|
||||||
{
|
void set_vertex_data(u32 attribute_id, u32 vertex_id, u32 sub_index, vertex_base_type type, u32 size, u32 arg);
|
||||||
case vertex_base_type::f:
|
void pad_to(u32 required_vertex_count, bool skip_last);
|
||||||
return size;
|
|
||||||
case vertex_base_type::ub:
|
|
||||||
case vertex_base_type::ub256:
|
|
||||||
return 1;
|
|
||||||
case vertex_base_type::s1:
|
|
||||||
case vertex_base_type::s32k:
|
|
||||||
return size / 2;
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("Unsupported vertex base type %d", static_cast<u8>(type));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void append_vertex_data(u32 sub_index, vertex_base_type type, u32 arg)
|
|
||||||
{
|
|
||||||
const u32 element_mask = (1 << sub_index);
|
|
||||||
const u8 vertex_size = get_vertex_size_in_dwords(type);
|
|
||||||
|
|
||||||
this->type = type;
|
|
||||||
|
|
||||||
if (attribute_mask & element_mask)
|
|
||||||
{
|
|
||||||
attribute_mask = 0;
|
|
||||||
|
|
||||||
vertex_count++;
|
|
||||||
data.resize(vertex_count * vertex_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
attribute_mask |= element_mask;
|
|
||||||
|
|
||||||
u32* dst = data.data() + ((vertex_count - 1) * vertex_size) + sub_index;
|
|
||||||
*dst = arg;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct register_vertex_data_info
|
struct register_vertex_data_info
|
||||||
|
|
|
@ -86,6 +86,7 @@
|
||||||
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
|
<ClCompile Include="Emu\RSX\Program\ProgramStateCache.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
|
<ClCompile Include="Emu\RSX\Program\program_util.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\RSXDisAsm.cpp" />
|
<ClCompile Include="Emu\RSX\RSXDisAsm.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\rsx_vertex_data.cpp" />
|
||||||
<ClCompile Include="Emu\system_config_types.cpp" />
|
<ClCompile Include="Emu\system_config_types.cpp" />
|
||||||
<ClCompile Include="Emu\perf_meter.cpp" />
|
<ClCompile Include="Emu\perf_meter.cpp" />
|
||||||
<ClCompile Include="Emu\system_progress.cpp" />
|
<ClCompile Include="Emu\system_progress.cpp" />
|
||||||
|
|
|
@ -1027,6 +1027,9 @@
|
||||||
<ClCompile Include="Emu\Io\camera_config.cpp">
|
<ClCompile Include="Emu\Io\camera_config.cpp">
|
||||||
<Filter>Emu\Io</Filter>
|
<Filter>Emu\Io</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\rsx_vertex_data.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="Crypto\aes.h">
|
<ClInclude Include="Crypto\aes.h">
|
||||||
|
|
Loading…
Reference in New Issue