rsx: Scrap the prebuffered queue approach

- Basically starting over
- The cost of making command copies into the queue has a measurable impact
This commit is contained in:
kd-11 2018-10-31 12:24:43 +03:00 committed by kd-11
parent 9deecd506a
commit 2e32777375
5 changed files with 256 additions and 1350 deletions

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,7 @@ namespace rsx
NOP = 0,
FIFO_EMPTY = 0xDEADF1F0,
FIFO_BUSY = 0xBABEF1F0,
FIFO_ERROR = 0xDEADBEEF,
FIFO_PACKET_BEGIN = 0xF1F0,
FIFO_DISABLED_COMMAND = 0xF1F4,
FIFO_DRAW_BARRIER = 0xF1F8,
@ -42,193 +43,30 @@ namespace rsx
u32 reserved;
};
struct fifo_buffer_info_t
{
u32 start_loc;
u32 length;
u32 num_draw_calls;
u32 reserved;
};
struct branch_target_info_t
{
u32 branch_target;
u32 branch_origin;
s64 weight;
u64 checksum_16;
u64 reserved;
};
struct optimization_pass
{
virtual void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) = 0;
};
struct flattening_pass : public optimization_pass
class FIFO_control
{
private:
enum register_props : u8
{
skippable = 1,
ignorable = 2
none = 0,
skip_on_match = 1,
always_ignore = 2
};
std::array<u8, 0x10000 / 4> m_register_properties;
public:
flattening_pass();
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
};
struct reordering_pass : public optimization_pass
{
private:
struct instruction_buffer_t
{
std::unordered_map<u32, u32> m_storage;
simple_array<u32> m_insertion_order;
instruction_buffer_t()
{
m_insertion_order.reserve(64);
}
void add_cmd(u32 reg, u32 value)
{
const auto is_new = std::get<1>(m_storage.insert_or_assign(reg, value));
if (!is_new)
{
for (auto &loc : m_insertion_order)
{
if (loc == reg)
{
loc |= 0x80000000;
break;
}
}
}
m_insertion_order.push_back(reg);
}
void clear()
{
m_storage.clear();
m_insertion_order.clear();
}
void swap(instruction_buffer_t& other)
{
m_storage.swap(other.m_storage);
m_insertion_order.swap(other.m_insertion_order);
}
auto size() const
{
return m_storage.size();
}
inline std::pair<u32, u32> get(int index) const
{
const auto key = m_insertion_order[index];
if (key & 0x80000000)
{
// Disabled by a later write to the same register
// TODO: Track command type registers and avoid this
return { FIFO_DISABLED_COMMAND, 0 };
}
const auto value = m_storage.at(key);
return { key, value };
}
bool operator == (const instruction_buffer_t& other) const
{
if (size() == other.size())
{
for (const auto &e : other.m_storage)
{
const auto found = m_storage.find(e.first);
if (found == m_storage.end())
return false;
if (found->second != e.second)
return false;
}
return true;
}
return false;
}
};
struct draw_call
{
instruction_buffer_t prologue;
std::vector<register_pair> draws;
bool write_prologue;
u32 primitive_type;
const register_pair* start_pos;
bool matches(const instruction_buffer_t setup, u32 prim) const
{
if (prim != primitive_type)
return false;
return prologue == setup;
}
};
instruction_buffer_t registers_changed;
std::vector<draw_call> bins;
std::unordered_multimap<u32, fifo_buffer_info_t> m_results_prediction_table;
public:
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
};
class FIFO_control
{
RsxDmaControl* m_ctrl = nullptr;
u32 m_internal_get = 0;
std::shared_ptr<thread_base> m_prefetcher_thread;
u32 m_prefetch_get = 0;
atomic_t<bool> m_prefetcher_busy{ false };
atomic_t<bool> m_fifo_busy{ false };
fifo_buffer_info_t m_prefetcher_info;
bool m_prefetcher_speculating;
std::vector<std::unique_ptr<optimization_pass>> m_optimization_passes;
simple_array<register_pair> m_queue;
simple_array<register_pair> m_prefetched_queue;
atomic_t<u32> m_command_index{ 0 };
shared_mutex m_prefetch_mutex; // Guards prefetch queue
shared_mutex m_queue_mutex; // Guards primary queue
atomic_t<u64> m_ctrl_tag{ 0 }; // 'Guards' control registers
register_pair empty_cmd { FIFO_EMPTY };
register_pair busy_cmd { FIFO_BUSY };
u32 m_memwatch_addr = 0;
u32 m_memwatch_cmp = 0;
fifo_buffer_info_t m_fifo_info;
std::unordered_multimap<u32, branch_target_info_t> m_branch_prediction_table;
u32 m_command_reg = 0;
u32 m_command_inc = 0;
u32 m_remaining_commands = 0;
u32 m_args_ptr = 0;
void read_ahead(fifo_buffer_info_t& info, simple_array<register_pair>& commands, u32& get_pointer);
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands);
void clear_buffer();
u32 get_likely_target(u32 source);
void report_branch_miss(u32 source, u32 target, u32 actual);
void report_branch_hit(u32 source, u32 target);
bool test_prefetcher_correctness(u32 actual_target);
std::array<u8, 0x10000 / 4> m_register_properties;
bool has_deferred_draw = false;
public:
FIFO_control(rsx::thread* pctrl);
@ -237,12 +75,9 @@ namespace rsx
void set_get(u32 get, bool spinning = false);
void set_put(u32 put);
const register_pair& read();
inline const register_pair& read_unsafe();
void register_optimization_pass(optimization_pass* pass);
void finalize();
void read(register_pair& data);
inline void read_unsafe(register_pair& data);
inline bool has_next() const;
public:
static bool is_blocking_cmd(u32 cmd);

View File

@ -554,10 +554,6 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!!
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
last_flip_time = get_system_time() - 1000000;
named_thread vblank_thread("VBlank Thread", [this]()
@ -682,11 +678,6 @@ namespace rsx
m_decompiler_thread->join();
m_decompiler_thread.reset();
}
if (fifo_ctrl)
{
fifo_ctrl->finalize();
}
}
std::string thread::get_name() const

View File

@ -456,6 +456,10 @@ namespace rsx
rsx::method_registers.current_draw_clause.compile();
rsxthr->end();
}
else
{
rsxthr->in_begin_end = false;
}
}
vm::addr_t get_report_data_impl(u32 offset)
@ -2229,8 +2233,11 @@ namespace rsx
{
u32 result = 0;
for (const auto &barrier : draw_command_barriers[current_range_index])
for (const auto &barrier : draw_command_barriers)
{
if (barrier.draw_id != current_range_index)
continue;
switch (barrier.type)
{
case primitive_restart_barrier:

View File

@ -43,6 +43,7 @@ namespace rsx
struct barrier_t
{
u32 draw_id;
u64 timestamp;
u32 address;
@ -74,7 +75,7 @@ namespace rsx
simple_array<draw_range_t> draw_command_ranges;
// Stores rasterization barriers for primitive types sensitive to adjacency
std::vector<std::set<barrier_t>> draw_command_barriers;
simple_array<barrier_t> draw_command_barriers;
// Counter used to parse the commands in order
u32 current_range_index;
@ -87,28 +88,27 @@ namespace rsx
void append_draw_command(const draw_range_t& range)
{
draw_command_ranges.push_back(range);
draw_command_barriers.push_back({});
}
// Insert a new draw command within the others
void insert_draw_command(int index, const draw_range_t& range)
{
auto range_It = draw_command_ranges.begin();
auto barrier_It = draw_command_barriers.begin();
// Because deque::insert fails with initializer list on MSVC
const std::set<barrier_t> new_barrier;
while (index--)
{
++range_It;
++barrier_It;
}
draw_command_ranges.insert(range_It, range);
draw_command_barriers.insert(barrier_It, new_barrier);
verify(HERE), draw_command_ranges.size() == draw_command_barriers.size();
// Update all barrier draw ids after this one
for (auto &barrier : draw_command_barriers)
{
if (barrier.draw_id >= index)
{
barrier.draw_id++;
}
}
}
public:
@ -125,6 +125,26 @@ namespace rsx
{
verify(HERE), !draw_command_ranges.empty();
auto _do_barrier_insert = [this](barrier_t&& val)
{
if (draw_command_barriers.empty() || draw_command_barriers.back() < val)
{
draw_command_barriers.push_back(val);
return;
}
for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++)
{
if (*it < val)
{
continue;
}
draw_command_barriers.insert(it, val);
break;
}
};
if (type == primitive_restart_barrier)
{
// Rasterization flow barrier
@ -132,7 +152,7 @@ namespace rsx
const auto address = last.first + last.count;
const auto command_index = draw_command_ranges.size() - 1;
draw_command_barriers[command_index].insert({ 0, address, arg, 0, type });
_do_barrier_insert({ command_index, 0, address, arg, 0, type });
}
else
{
@ -140,7 +160,7 @@ namespace rsx
append_draw_command({});
const auto command_index = draw_command_ranges.size() - 1;
draw_command_barriers[command_index].insert({ get_system_time(), -1u, arg, 0, type });
_do_barrier_insert({ command_index, get_system_time(), -1u, arg, 0, type });
last_execution_barrier_index = command_index;
}
}
@ -243,8 +263,11 @@ namespace rsx
}
verify(HERE), current_range_index != -1u;
for (const auto &barrier : draw_command_barriers[current_range_index])
for (const auto &barrier : draw_command_barriers)
{
if (barrier.draw_id != current_range_index)
continue;
if (barrier.type == primitive_restart_barrier)
return false;
}
@ -342,8 +365,11 @@ namespace rsx
u32 previous_barrier = range.first;
u32 vertex_counter = 0;
for (const auto &barrier : draw_command_barriers[current_range_index])
for (const auto &barrier : draw_command_barriers)
{
if (barrier.draw_id != current_range_index)
continue;
if (barrier.type != primitive_restart_barrier)
continue;