mirror of https://github.com/RPCS3/rpcs3.git
rsx: Scrap the prebuffered queue approach
- Basically starting over - The cost of making command copies into the queue has a measurable impact
This commit is contained in:
parent
9deecd506a
commit
2e32777375
File diff suppressed because it is too large
Load Diff
|
@ -29,6 +29,7 @@ namespace rsx
|
|||
NOP = 0,
|
||||
FIFO_EMPTY = 0xDEADF1F0,
|
||||
FIFO_BUSY = 0xBABEF1F0,
|
||||
FIFO_ERROR = 0xDEADBEEF,
|
||||
FIFO_PACKET_BEGIN = 0xF1F0,
|
||||
FIFO_DISABLED_COMMAND = 0xF1F4,
|
||||
FIFO_DRAW_BARRIER = 0xF1F8,
|
||||
|
@ -42,193 +43,30 @@ namespace rsx
|
|||
u32 reserved;
|
||||
};
|
||||
|
||||
struct fifo_buffer_info_t
|
||||
{
|
||||
u32 start_loc;
|
||||
u32 length;
|
||||
u32 num_draw_calls;
|
||||
u32 reserved;
|
||||
};
|
||||
|
||||
struct branch_target_info_t
|
||||
{
|
||||
u32 branch_target;
|
||||
u32 branch_origin;
|
||||
s64 weight;
|
||||
u64 checksum_16;
|
||||
u64 reserved;
|
||||
};
|
||||
|
||||
struct optimization_pass
|
||||
{
|
||||
virtual void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) = 0;
|
||||
};
|
||||
|
||||
struct flattening_pass : public optimization_pass
|
||||
class FIFO_control
|
||||
{
|
||||
private:
|
||||
enum register_props : u8
|
||||
{
|
||||
skippable = 1,
|
||||
ignorable = 2
|
||||
none = 0,
|
||||
skip_on_match = 1,
|
||||
always_ignore = 2
|
||||
};
|
||||
|
||||
std::array<u8, 0x10000 / 4> m_register_properties;
|
||||
|
||||
public:
|
||||
flattening_pass();
|
||||
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
|
||||
};
|
||||
|
||||
struct reordering_pass : public optimization_pass
|
||||
{
|
||||
private:
|
||||
|
||||
struct instruction_buffer_t
|
||||
{
|
||||
std::unordered_map<u32, u32> m_storage;
|
||||
simple_array<u32> m_insertion_order;
|
||||
|
||||
instruction_buffer_t()
|
||||
{
|
||||
m_insertion_order.reserve(64);
|
||||
}
|
||||
|
||||
void add_cmd(u32 reg, u32 value)
|
||||
{
|
||||
const auto is_new = std::get<1>(m_storage.insert_or_assign(reg, value));
|
||||
if (!is_new)
|
||||
{
|
||||
for (auto &loc : m_insertion_order)
|
||||
{
|
||||
if (loc == reg)
|
||||
{
|
||||
loc |= 0x80000000;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_insertion_order.push_back(reg);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
m_storage.clear();
|
||||
m_insertion_order.clear();
|
||||
}
|
||||
|
||||
void swap(instruction_buffer_t& other)
|
||||
{
|
||||
m_storage.swap(other.m_storage);
|
||||
m_insertion_order.swap(other.m_insertion_order);
|
||||
}
|
||||
|
||||
auto size() const
|
||||
{
|
||||
return m_storage.size();
|
||||
}
|
||||
|
||||
inline std::pair<u32, u32> get(int index) const
|
||||
{
|
||||
const auto key = m_insertion_order[index];
|
||||
if (key & 0x80000000)
|
||||
{
|
||||
// Disabled by a later write to the same register
|
||||
// TODO: Track command type registers and avoid this
|
||||
return { FIFO_DISABLED_COMMAND, 0 };
|
||||
}
|
||||
|
||||
const auto value = m_storage.at(key);
|
||||
return { key, value };
|
||||
}
|
||||
|
||||
bool operator == (const instruction_buffer_t& other) const
|
||||
{
|
||||
if (size() == other.size())
|
||||
{
|
||||
for (const auto &e : other.m_storage)
|
||||
{
|
||||
const auto found = m_storage.find(e.first);
|
||||
if (found == m_storage.end())
|
||||
return false;
|
||||
|
||||
if (found->second != e.second)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
struct draw_call
|
||||
{
|
||||
instruction_buffer_t prologue;
|
||||
std::vector<register_pair> draws;
|
||||
bool write_prologue;
|
||||
u32 primitive_type;
|
||||
const register_pair* start_pos;
|
||||
|
||||
bool matches(const instruction_buffer_t setup, u32 prim) const
|
||||
{
|
||||
if (prim != primitive_type)
|
||||
return false;
|
||||
|
||||
return prologue == setup;
|
||||
}
|
||||
};
|
||||
|
||||
instruction_buffer_t registers_changed;
|
||||
std::vector<draw_call> bins;
|
||||
|
||||
std::unordered_multimap<u32, fifo_buffer_info_t> m_results_prediction_table;
|
||||
|
||||
public:
|
||||
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
|
||||
};
|
||||
|
||||
class FIFO_control
|
||||
{
|
||||
RsxDmaControl* m_ctrl = nullptr;
|
||||
u32 m_internal_get = 0;
|
||||
|
||||
std::shared_ptr<thread_base> m_prefetcher_thread;
|
||||
u32 m_prefetch_get = 0;
|
||||
atomic_t<bool> m_prefetcher_busy{ false };
|
||||
atomic_t<bool> m_fifo_busy{ false };
|
||||
fifo_buffer_info_t m_prefetcher_info;
|
||||
bool m_prefetcher_speculating;
|
||||
|
||||
std::vector<std::unique_ptr<optimization_pass>> m_optimization_passes;
|
||||
|
||||
simple_array<register_pair> m_queue;
|
||||
simple_array<register_pair> m_prefetched_queue;
|
||||
atomic_t<u32> m_command_index{ 0 };
|
||||
|
||||
shared_mutex m_prefetch_mutex; // Guards prefetch queue
|
||||
shared_mutex m_queue_mutex; // Guards primary queue
|
||||
atomic_t<u64> m_ctrl_tag{ 0 }; // 'Guards' control registers
|
||||
|
||||
register_pair empty_cmd { FIFO_EMPTY };
|
||||
register_pair busy_cmd { FIFO_BUSY };
|
||||
|
||||
u32 m_memwatch_addr = 0;
|
||||
u32 m_memwatch_cmp = 0;
|
||||
|
||||
fifo_buffer_info_t m_fifo_info;
|
||||
std::unordered_multimap<u32, branch_target_info_t> m_branch_prediction_table;
|
||||
u32 m_command_reg = 0;
|
||||
u32 m_command_inc = 0;
|
||||
u32 m_remaining_commands = 0;
|
||||
u32 m_args_ptr = 0;
|
||||
|
||||
void read_ahead(fifo_buffer_info_t& info, simple_array<register_pair>& commands, u32& get_pointer);
|
||||
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands);
|
||||
void clear_buffer();
|
||||
|
||||
u32 get_likely_target(u32 source);
|
||||
void report_branch_miss(u32 source, u32 target, u32 actual);
|
||||
void report_branch_hit(u32 source, u32 target);
|
||||
bool test_prefetcher_correctness(u32 actual_target);
|
||||
std::array<u8, 0x10000 / 4> m_register_properties;
|
||||
bool has_deferred_draw = false;
|
||||
|
||||
public:
|
||||
FIFO_control(rsx::thread* pctrl);
|
||||
|
@ -237,12 +75,9 @@ namespace rsx
|
|||
void set_get(u32 get, bool spinning = false);
|
||||
void set_put(u32 put);
|
||||
|
||||
const register_pair& read();
|
||||
inline const register_pair& read_unsafe();
|
||||
|
||||
void register_optimization_pass(optimization_pass* pass);
|
||||
|
||||
void finalize();
|
||||
void read(register_pair& data);
|
||||
inline void read_unsafe(register_pair& data);
|
||||
inline bool has_next() const;
|
||||
|
||||
public:
|
||||
static bool is_blocking_cmd(u32 cmd);
|
||||
|
|
|
@ -554,10 +554,6 @@ namespace rsx
|
|||
|
||||
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
|
||||
|
||||
fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
|
||||
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!!
|
||||
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
|
||||
|
||||
last_flip_time = get_system_time() - 1000000;
|
||||
|
||||
named_thread vblank_thread("VBlank Thread", [this]()
|
||||
|
@ -682,11 +678,6 @@ namespace rsx
|
|||
m_decompiler_thread->join();
|
||||
m_decompiler_thread.reset();
|
||||
}
|
||||
|
||||
if (fifo_ctrl)
|
||||
{
|
||||
fifo_ctrl->finalize();
|
||||
}
|
||||
}
|
||||
|
||||
std::string thread::get_name() const
|
||||
|
|
|
@ -456,6 +456,10 @@ namespace rsx
|
|||
rsx::method_registers.current_draw_clause.compile();
|
||||
rsxthr->end();
|
||||
}
|
||||
else
|
||||
{
|
||||
rsxthr->in_begin_end = false;
|
||||
}
|
||||
}
|
||||
|
||||
vm::addr_t get_report_data_impl(u32 offset)
|
||||
|
@ -2229,8 +2233,11 @@ namespace rsx
|
|||
{
|
||||
u32 result = 0;
|
||||
|
||||
for (const auto &barrier : draw_command_barriers[current_range_index])
|
||||
for (const auto &barrier : draw_command_barriers)
|
||||
{
|
||||
if (barrier.draw_id != current_range_index)
|
||||
continue;
|
||||
|
||||
switch (barrier.type)
|
||||
{
|
||||
case primitive_restart_barrier:
|
||||
|
|
|
@ -43,6 +43,7 @@ namespace rsx
|
|||
|
||||
struct barrier_t
|
||||
{
|
||||
u32 draw_id;
|
||||
u64 timestamp;
|
||||
|
||||
u32 address;
|
||||
|
@ -74,7 +75,7 @@ namespace rsx
|
|||
simple_array<draw_range_t> draw_command_ranges;
|
||||
|
||||
// Stores rasterization barriers for primitive types sensitive to adjacency
|
||||
std::vector<std::set<barrier_t>> draw_command_barriers;
|
||||
simple_array<barrier_t> draw_command_barriers;
|
||||
|
||||
// Counter used to parse the commands in order
|
||||
u32 current_range_index;
|
||||
|
@ -87,28 +88,27 @@ namespace rsx
|
|||
void append_draw_command(const draw_range_t& range)
|
||||
{
|
||||
draw_command_ranges.push_back(range);
|
||||
draw_command_barriers.push_back({});
|
||||
}
|
||||
|
||||
// Insert a new draw command within the others
|
||||
void insert_draw_command(int index, const draw_range_t& range)
|
||||
{
|
||||
auto range_It = draw_command_ranges.begin();
|
||||
auto barrier_It = draw_command_barriers.begin();
|
||||
|
||||
// Because deque::insert fails with initializer list on MSVC
|
||||
const std::set<barrier_t> new_barrier;
|
||||
|
||||
while (index--)
|
||||
{
|
||||
++range_It;
|
||||
++barrier_It;
|
||||
}
|
||||
|
||||
draw_command_ranges.insert(range_It, range);
|
||||
draw_command_barriers.insert(barrier_It, new_barrier);
|
||||
|
||||
verify(HERE), draw_command_ranges.size() == draw_command_barriers.size();
|
||||
// Update all barrier draw ids after this one
|
||||
for (auto &barrier : draw_command_barriers)
|
||||
{
|
||||
if (barrier.draw_id >= index)
|
||||
{
|
||||
barrier.draw_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -125,6 +125,26 @@ namespace rsx
|
|||
{
|
||||
verify(HERE), !draw_command_ranges.empty();
|
||||
|
||||
auto _do_barrier_insert = [this](barrier_t&& val)
|
||||
{
|
||||
if (draw_command_barriers.empty() || draw_command_barriers.back() < val)
|
||||
{
|
||||
draw_command_barriers.push_back(val);
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++)
|
||||
{
|
||||
if (*it < val)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
draw_command_barriers.insert(it, val);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if (type == primitive_restart_barrier)
|
||||
{
|
||||
// Rasterization flow barrier
|
||||
|
@ -132,7 +152,7 @@ namespace rsx
|
|||
const auto address = last.first + last.count;
|
||||
|
||||
const auto command_index = draw_command_ranges.size() - 1;
|
||||
draw_command_barriers[command_index].insert({ 0, address, arg, 0, type });
|
||||
_do_barrier_insert({ command_index, 0, address, arg, 0, type });
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -140,7 +160,7 @@ namespace rsx
|
|||
append_draw_command({});
|
||||
const auto command_index = draw_command_ranges.size() - 1;
|
||||
|
||||
draw_command_barriers[command_index].insert({ get_system_time(), -1u, arg, 0, type });
|
||||
_do_barrier_insert({ command_index, get_system_time(), -1u, arg, 0, type });
|
||||
last_execution_barrier_index = command_index;
|
||||
}
|
||||
}
|
||||
|
@ -243,8 +263,11 @@ namespace rsx
|
|||
}
|
||||
|
||||
verify(HERE), current_range_index != -1u;
|
||||
for (const auto &barrier : draw_command_barriers[current_range_index])
|
||||
for (const auto &barrier : draw_command_barriers)
|
||||
{
|
||||
if (barrier.draw_id != current_range_index)
|
||||
continue;
|
||||
|
||||
if (barrier.type == primitive_restart_barrier)
|
||||
return false;
|
||||
}
|
||||
|
@ -342,8 +365,11 @@ namespace rsx
|
|||
u32 previous_barrier = range.first;
|
||||
u32 vertex_counter = 0;
|
||||
|
||||
for (const auto &barrier : draw_command_barriers[current_range_index])
|
||||
for (const auto &barrier : draw_command_barriers)
|
||||
{
|
||||
if (barrier.draw_id != current_range_index)
|
||||
continue;
|
||||
|
||||
if (barrier.type != primitive_restart_barrier)
|
||||
continue;
|
||||
|
||||
|
|
Loading…
Reference in New Issue