GPU: Implement FIFO and timings

This will cause a slight performance loss. I've left some knobs in which
can be tweaked to mitigate this, but the goal is to be compatible with
all games which require them.
This commit is contained in:
Connor McLaughlin 2020-04-19 01:16:58 +10:00
parent 65ca8b9fe0
commit 1757932b3a
12 changed files with 562 additions and 311 deletions

View File

@ -259,14 +259,6 @@ void DMA::TransferChannel(Channel channel)
if (word_count > 0)
TransferMemoryToDevice(channel, (current_address + sizeof(header)) & ADDRESS_MASK, 4, word_count);
// Self-referencing DMA loops.. not sure how these are happening?
if (current_address == next_address)
{
Log_ErrorPrintf("HACK: Aborting self-referencing DMA loop @ 0x%08X. Something went wrong to generate this.",
current_address);
break;
}
current_address = next_address;
if (current_address & UINT32_C(0x800000))
break;

View File

@ -38,16 +38,19 @@ bool GPU::Initialize(HostDisplay* host_display, System* system, DMA* dma, Interr
void GPU::UpdateSettings()
{
m_force_progressive_scan = m_system->GetSettings().gpu_disable_interlacing;
const Settings& settings = m_system->GetSettings();
if (m_force_ntsc_timings != m_system->GetSettings().gpu_force_ntsc_timings)
m_force_progressive_scan = settings.gpu_disable_interlacing;
m_fifo_size = settings.gpu_fifo_size;
m_max_run_ahead = settings.gpu_max_run_ahead;
if (m_force_ntsc_timings != settings.gpu_force_ntsc_timings)
{
m_force_ntsc_timings = m_system->GetSettings().gpu_force_ntsc_timings;
m_force_ntsc_timings = settings.gpu_force_ntsc_timings;
UpdateCRTCConfig();
}
m_crtc_state.display_aspect_ratio =
Settings::GetDisplayAspectRatioValue(m_system->GetSettings().display_aspect_ratio);
m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(settings.display_aspect_ratio);
// Crop mode calls this, so recalculate the display area
UpdateCRTCDisplayParameters();
@ -77,11 +80,13 @@ void GPU::SoftReset()
m_crtc_state.current_scanline = 0;
m_crtc_state.in_hblank = false;
m_crtc_state.in_vblank = false;
m_state = State::Idle;
m_blitter_ticks = 0;
m_blitter_state = BlitterState::Idle;
m_command_ticks = 0;
m_command_total_words = 0;
m_vram_transfer = {};
m_GP0_buffer.clear();
m_fifo.Clear();
m_blit_buffer.clear();
m_blit_remaining_words = 0;
SetDrawMode(0);
SetTexturePalette(0);
SetTextureWindow(0);
@ -148,8 +153,8 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_crtc_state.in_hblank);
sw.Do(&m_crtc_state.in_vblank);
sw.Do(&m_state);
sw.Do(&m_blitter_ticks);
sw.Do(&m_blitter_state);
sw.Do(&m_command_ticks);
sw.Do(&m_command_total_words);
sw.Do(&m_GPUREAD_latch);
@ -160,7 +165,13 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_vram_transfer.col);
sw.Do(&m_vram_transfer.row);
sw.Do(&m_GP0_buffer);
sw.Do(&m_fifo);
sw.Do(&m_blit_buffer);
sw.Do(&m_blit_remaining_words);
sw.Do(&m_render_command.bits);
sw.Do(&m_max_run_ahead);
sw.Do(&m_fifo_size);
if (sw.IsReading())
{
@ -207,16 +218,26 @@ void GPU::RestoreGraphicsAPIState() {}
void GPU::UpdateDMARequest()
{
// we can kill the blitter ticks here if enough time has passed
if (m_blitter_ticks > 0 && GetPendingGPUTicks() >= m_blitter_ticks)
m_blitter_ticks = 0;
switch (m_blitter_state)
{
case BlitterState::Idle:
m_GPUSTAT.gpu_idle = (m_command_ticks <= 0);
m_GPUSTAT.ready_to_send_vram = false;
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
break;
const bool blitter_idle = (m_blitter_ticks <= 0);
case BlitterState::WritingVRAM:
m_GPUSTAT.gpu_idle = false;
m_GPUSTAT.ready_to_send_vram = false;
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
break;
m_GPUSTAT.ready_to_send_vram = (blitter_idle && m_state == State::ReadingVRAM);
m_GPUSTAT.ready_to_recieve_cmd = (blitter_idle && m_state == State::Idle);
m_GPUSTAT.ready_to_recieve_dma =
blitter_idle && (m_state == State::Idle || (m_state != State::ReadingVRAM && m_command_total_words > 0));
case BlitterState::ReadingVRAM:
m_GPUSTAT.gpu_idle = false;
m_GPUSTAT.ready_to_send_vram = true;
m_GPUSTAT.ready_to_recieve_dma = false;
break;
}
bool dma_request;
switch (m_GPUSTAT.dma_direction)
@ -226,15 +247,15 @@ void GPU::UpdateDMARequest()
break;
case DMADirection::FIFO:
dma_request = blitter_idle && m_state >= State::ReadingVRAM; // FIFO not full/full
dma_request = m_GPUSTAT.ready_to_recieve_dma;
break;
case DMADirection::CPUtoGP0:
dma_request = blitter_idle && m_GPUSTAT.ready_to_recieve_dma;
dma_request = m_GPUSTAT.ready_to_recieve_dma;
break;
case DMADirection::GPUREADtoCPU:
dma_request = blitter_idle && m_GPUSTAT.ready_to_send_vram;
dma_request = m_GPUSTAT.ready_to_send_vram;
break;
default:
@ -256,7 +277,7 @@ u32 GPU::ReadRegister(u32 offset)
{
// code can be dependent on the odd/even bit, so update the GPU state when reading.
// we can mitigate this slightly by only updating when the raster is actually hitting a new line
if (IsRasterScanlinePending())
if (IsRasterScanlineOrCommandPending())
Synchronize();
return m_GPUSTAT.bits;
@ -273,7 +294,8 @@ void GPU::WriteRegister(u32 offset, u32 value)
switch (offset)
{
case 0x00:
WriteGP0(value);
m_fifo.Push(value);
ExecuteCommands();
return;
case 0x04:
@ -305,22 +327,13 @@ void GPU::DMAWrite(const u32* words, u32 word_count)
{
case DMADirection::CPUtoGP0:
{
std::copy(words, words + word_count, std::back_inserter(m_GP0_buffer));
m_fifo.PushRange(words, word_count);
m_fifo_pushed = true;
if (!m_syncing)
ExecuteCommands();
if (m_state == State::WritingVRAM)
{
Assert(m_blitter_ticks == 0);
m_blitter_ticks = GetPendingGPUTicks() + word_count;
// reschedule GPU tick event
const TickCount sysclk_ticks = GPUTicksToSystemTicks(word_count);
if (m_tick_event->GetTicksUntilNextExecution() > sysclk_ticks)
m_tick_event->Schedule(sysclk_ticks);
else
UpdateDMARequest();
}
}
break;
default:
@ -332,6 +345,22 @@ void GPU::DMAWrite(const u32* words, u32 word_count)
}
}
void GPU::AddCommandTicks(TickCount ticks)
{
if (m_command_ticks != 0)
{
m_command_ticks += ticks;
return;
}
m_command_ticks = GetPendingGPUTicks() + ticks;
// reschedule GPU tick event if it would execute later than this command finishes
const TickCount sysclk_ticks = GPUTicksToSystemTicks(ticks);
if (m_tick_event->GetTicksUntilNextExecution() > sysclk_ticks)
m_tick_event->Schedule(sysclk_ticks);
}
void GPU::Synchronize()
{
m_tick_event->InvokeEarly();
@ -547,7 +576,7 @@ void GPU::UpdateSliceTicks()
(m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline);
m_tick_event->Schedule(
GPUTicksToSystemTicks((m_blitter_ticks > 0) ? std::min(m_blitter_ticks, ticks_until_vblank) : ticks_until_vblank));
GPUTicksToSystemTicks((m_command_ticks > 0) ? std::min(m_command_ticks, ticks_until_vblank) : ticks_until_vblank));
}
bool GPU::IsRasterScanlinePending() const
@ -555,6 +584,13 @@ bool GPU::IsRasterScanlinePending() const
return (GetPendingGPUTicks() + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total;
}
bool GPU::IsRasterScanlineOrCommandPending() const
{
const TickCount pending_ticks = GetPendingGPUTicks();
return ((pending_ticks + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total) ||
(m_command_ticks > 0 && pending_ticks > m_command_ticks);
}
void GPU::Execute(TickCount ticks)
{
// convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider
@ -565,13 +601,12 @@ void GPU::Execute(TickCount ticks)
m_crtc_state.current_tick_in_scanline += gpu_ticks;
// handle blits
TickCount blit_ticks_remaining = gpu_ticks;
while (m_blitter_ticks > 0 && blit_ticks_remaining > 0)
if (m_command_ticks > 0)
{
const TickCount slice = std::min(blit_ticks_remaining, m_blitter_ticks);
m_blitter_ticks -= slice;
blit_ticks_remaining -= slice;
UpdateDMARequest();
m_command_ticks -= gpu_ticks;
ExecuteCommands();
if (m_command_ticks < 0)
m_command_ticks = 0;
}
}
@ -672,7 +707,7 @@ void GPU::Execute(TickCount ticks)
u32 GPU::ReadGPUREAD()
{
if (m_state != State::ReadingVRAM)
if (m_blitter_state != BlitterState::ReadingVRAM)
return m_GPUREAD_latch;
// Read two pixels out of VRAM and combine them. Zero fill odd pixel counts.
@ -692,11 +727,12 @@ u32 GPU::ReadGPUREAD()
{
Log_DebugPrintf("End of VRAM->CPU transfer");
m_vram_transfer = {};
m_state = State::Idle;
m_blitter_state = BlitterState::Idle;
UpdateDMARequest();
// end of transfer, catch up on any commands which were written (unlikely)
ExecuteCommands();
UpdateDMARequest();
break;
}
}
@ -706,12 +742,6 @@ u32 GPU::ReadGPUREAD()
return value;
}
void GPU::WriteGP0(u32 value)
{
m_GP0_buffer.push_back(value);
ExecuteCommands();
}
void GPU::WriteGP1(u32 value)
{
const u8 command = Truncate8(value >> 24);
@ -729,11 +759,13 @@ void GPU::WriteGP1(u32 value)
case 0x01: // Clear FIFO
{
Log_DebugPrintf("GP1 clear FIFO");
m_state = State::Idle;
m_blitter_state = BlitterState::Idle;
m_command_total_words = 0;
m_vram_transfer = {};
m_GP0_buffer.clear();
m_blitter_ticks = 0;
m_fifo.Clear();
m_blit_buffer.clear();
m_blit_remaining_words = 0;
m_command_ticks = 0;
UpdateDMARequest();
}
break;
@ -756,10 +788,13 @@ void GPU::WriteGP1(u32 value)
case 0x04: // DMA Direction
{
m_GPUSTAT.dma_direction = static_cast<DMADirection>(param);
Log_DebugPrintf("DMA direction <- 0x%02X", static_cast<u32>(m_GPUSTAT.dma_direction.GetValue()));
if (m_GPUSTAT.dma_direction != static_cast<DMADirection>(param))
{
m_GPUSTAT.dma_direction = static_cast<DMADirection>(param);
UpdateDMARequest();
}
}
break;
case 0x05: // Set display start address
@ -827,8 +862,10 @@ void GPU::WriteGP1(u32 value)
if (m_GPUSTAT.bits != new_GPUSTAT.bits)
{
// Have to be careful when setting this because Synchronize() can modify GPUSTAT.
static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000);
Synchronize();
m_GPUSTAT.bits = new_GPUSTAT.bits;
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK);
UpdateCRTCConfig();
}
}
@ -1023,7 +1060,7 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
}
}
void GPU::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) {}
void GPU::DispatchRenderCommand() {}
void GPU::FlushRender() {}

View File

@ -1,5 +1,6 @@
#pragma once
#include "common/bitfield.h"
#include "common/fifo_queue.h"
#include "common/rectangle.h"
#include "timers.h"
#include "types.h"
@ -23,13 +24,12 @@ class Timers;
class GPU
{
public:
enum class State : u8
enum class BlitterState : u8
{
Idle,
WaitingForParameters,
ExecutingCommand,
ReadingVRAM,
WritingVRAM
WritingVRAM,
DrawingPolyLine
};
enum class DMADirection : u32
@ -88,13 +88,14 @@ public:
VRAM_WIDTH = 1024,
VRAM_HEIGHT = 512,
VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16),
MAX_FIFO_SIZE = 4096,
TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256,
MAX_PRIMITIVE_WIDTH = 1024,
MAX_PRIMITIVE_HEIGHT = 512,
DOT_TIMER_INDEX = 0,
HBLANK_TIMER_INDEX = 1,
MAX_RESOLUTION_SCALE = 16,
MAX_RESOLUTION_SCALE = 16
};
enum : u16
@ -143,6 +144,9 @@ public:
/// Returns true if enough ticks have passed for the raster to be on the next line.
bool IsRasterScanlinePending() const;
/// Returns true if a raster scanline or command execution is pending.
bool IsRasterScanlineOrCommandPending() const;
// Synchronizes the CRTC, updating the hblank timer.
void Synchronize();
@ -347,10 +351,20 @@ protected:
void SetTextureWindow(u32 value);
u32 ReadGPUREAD();
void WriteGP0(u32 value);
void FinishVRAMWrite();
/// Returns the number of vertices in the buffered poly-line.
ALWAYS_INLINE u32 GetPolyLineVertexCount() const
{
return (static_cast<u32>(m_blit_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
BoolToUInt8(m_render_command.shading_enable);
}
void AddCommandTicks(TickCount ticks);
void WriteGP1(u32 value);
void ExecuteCommands();
void EndCommand();
void ExecuteCommands();
void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend
@ -358,11 +372,30 @@ protected:
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data);
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
virtual void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
virtual void DispatchRenderCommand();
virtual void FlushRender();
virtual void UpdateDisplay();
virtual void DrawRendererStats(bool is_idle_frame);
// These are **very** approximate.
ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool textured, bool shaded)
{
#if 0
const u32 draw_ticks = static_cast<u32>((std::abs(x1 * (y2 - y3) + x2 * (y3 - y1) + x3 * (y1 - y2)) + 1u) / 2u);
#else
const u32 draw_ticks = (width * height) / 2;
#endif
AddCommandTicks(draw_ticks << BoolToUInt8(textured | shaded));
}
ALWAYS_INLINE void AddDrawRectangleTicks(u32 width, u32 height, bool textured)
{
AddCommandTicks((width * height) << BoolToUInt8(textured));
}
ALWAYS_INLINE void AddDrawLineTicks(u32 width, u32 height, bool shaded)
{
AddCommandTicks(std::max(width, height));
}
HostDisplay* m_host_display = nullptr;
System* m_system = nullptr;
DMA* m_dma = nullptr;
@ -397,7 +430,7 @@ protected:
BitField<u32, bool, 23, 1> display_disable;
BitField<u32, bool, 24, 1> interrupt_request;
BitField<u32, bool, 25, 1> dma_data_request;
BitField<u32, bool, 26, 1> ready_to_recieve_cmd;
BitField<u32, bool, 26, 1> gpu_idle;
BitField<u32, bool, 27, 1> ready_to_send_vram;
BitField<u32, bool, 28, 1> ready_to_recieve_dma;
BitField<u32, DMADirection, 29, 2> dma_direction;
@ -595,13 +628,17 @@ protected:
bool in_vblank;
} m_crtc_state = {};
State m_state = State::Idle;
TickCount m_blitter_ticks = 0;
BlitterState m_blitter_state = BlitterState::Idle;
TickCount m_command_ticks = 0;
u32 m_command_total_words = 0;
/// GPUREAD value for non-VRAM-reads.
u32 m_GPUREAD_latch = 0;
/// True if currently executing/syncing.
bool m_syncing = false;
bool m_fifo_pushed = false;
struct VRAMTransfer
{
u16 x;
@ -612,7 +649,13 @@ protected:
u16 row;
} m_vram_transfer = {};
std::vector<u32> m_GP0_buffer;
HeapFIFOQueue<u32, MAX_FIFO_SIZE> m_fifo;
std::vector<u32> m_blit_buffer;
u32 m_blit_remaining_words;
RenderCommand m_render_command{};
TickCount m_max_run_ahead = 128;
u32 m_fifo_size = 128;
struct Stats
{
@ -627,26 +670,29 @@ protected:
Stats m_last_stats = {};
private:
using GP0CommandHandler = bool (GPU::*)(const u32*&, u32);
using GP0CommandHandler = bool (GPU::*)();
using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>;
static GP0CommandHandlerTable GenerateGP0CommandHandlerTable();
// Rendering commands, returns false if not enough data is provided
bool HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size);
bool HandleNOPCommand(const u32*& command_ptr, u32 command_size);
bool HandleClearCacheCommand(const u32*& command_ptr, u32 command_size);
bool HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_size);
bool HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size);
bool HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_size);
bool HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 command_size);
bool HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 command_size);
bool HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_size);
bool HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size);
bool HandleRenderCommand(const u32*& command_ptr, u32 command_size);
bool HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size);
bool HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size);
bool HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size);
bool HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size);
bool HandleUnknownGP0Command();
bool HandleNOPCommand();
bool HandleClearCacheCommand();
bool HandleInterruptRequestCommand();
bool HandleSetDrawModeCommand();
bool HandleSetTextureWindowCommand();
bool HandleSetDrawingAreaTopLeftCommand();
bool HandleSetDrawingAreaBottomRightCommand();
bool HandleSetDrawingOffsetCommand();
bool HandleSetMaskBitCommand();
bool HandleRenderPolygonCommand();
bool HandleRenderRectangleCommand();
bool HandleRenderLineCommand();
bool HandleRenderPolyLineCommand();
bool HandleFillRectangleCommand();
bool HandleCopyRectangleCPUToVRAMCommand();
bool HandleCopyRectangleVRAMToCPUCommand();
bool HandleCopyRectangleVRAMToVRAMCommand();
static const GP0CommandHandlerTable s_GP0_command_handler_table;
};

View File

@ -7,10 +7,9 @@
Log_SetChannel(GPU);
#define CHECK_COMMAND_SIZE(num_words) \
if (command_size < num_words) \
if (m_fifo.GetSize() < num_words) \
{ \
m_command_total_words = num_words; \
m_state = State::WaitingForParameters; \
return false; \
}
@ -24,33 +23,95 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
void GPU::ExecuteCommands()
{
Assert(m_GP0_buffer.size() < 1048576);
m_syncing = true;
const u32* command_ptr = m_GP0_buffer.data();
u32 command_size = static_cast<u32>(m_GP0_buffer.size());
while (m_state != State::ReadingVRAM && command_size > 0 && command_size >= m_command_total_words)
for (;;)
{
const u32 command = command_ptr[0] >> 24;
const u32* old_command_ptr = command_ptr;
if (!(this->*s_GP0_command_handler_table[command])(command_ptr, command_size))
if (m_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
{
switch (m_blitter_state)
{
case BlitterState::Idle:
{
const u32 command = m_fifo.Peek(0) >> 24;
if ((this->*s_GP0_command_handler_table[command])())
continue;
else
break;
const u32 words_used = static_cast<u32>(command_ptr - old_command_ptr);
DebugAssert(words_used <= command_size);
command_size -= words_used;
}
if (command_size == 0)
m_GP0_buffer.clear();
else if (command_ptr > m_GP0_buffer.data())
m_GP0_buffer.erase(m_GP0_buffer.begin(), m_GP0_buffer.begin() + (command_ptr - m_GP0_buffer.data()));
case BlitterState::WritingVRAM:
{
DebugAssert(m_blit_remaining_words > 0);
const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
const size_t old_size = m_blit_buffer.size();
m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy);
m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy);
m_blit_remaining_words -= words_to_copy;
AddCommandTicks(words_to_copy);
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)
FinishVRAMWrite();
continue;
}
case BlitterState::ReadingVRAM:
{
Panic("shouldn't be here");
}
break;
case BlitterState::DrawingPolyLine:
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
// terminator is on the first word for the vertex
if ((m_fifo.Peek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
break;
}
const bool found_terminator = (terminator_index < m_fifo.GetSize());
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{
const size_t old_size = m_blit_buffer.size();
m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy);
m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy);
}
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
if (found_terminator)
{
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
m_blit_buffer.clear();
EndCommand();
}
}
break;
}
}
m_fifo_pushed = false;
UpdateDMARequest();
if (!m_fifo_pushed)
break;
}
m_syncing = false;
}
void GPU::EndCommand()
{
m_state = State::Idle;
m_blitter_state = BlitterState::Idle;
m_command_total_words = 0;
}
@ -67,7 +128,24 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
table[i] = &GPU::HandleNOPCommand;
table[0x1F] = &GPU::HandleInterruptRequestCommand;
for (u32 i = 0x20; i <= 0x7F; i++)
table[i] = &GPU::HandleRenderCommand;
{
const RenderCommand rc{i << 24};
switch (rc.primitive)
{
case Primitive::Polygon:
table[i] = &GPU::HandleRenderPolygonCommand;
break;
case Primitive::Line:
table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand;
break;
case Primitive::Rectangle:
table[i] = &GPU::HandleRenderRectangleCommand;
break;
default:
table[i] = &GPU::HandleUnknownGP0Command;
break;
}
}
table[0xE0] = &GPU::HandleNOPCommand;
table[0xE1] = &GPU::HandleSetDrawModeCommand;
table[0xE2] = &GPU::HandleSetTextureWindowCommand;
@ -87,30 +165,31 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
return table;
}
bool GPU::HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size)
bool GPU::HandleUnknownGP0Command()
{
const u32 command = *(command_ptr++) >> 24;
const u32 command = m_fifo.Pop() >> 24;
Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command);
EndCommand();
return true;
}
bool GPU::HandleNOPCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleNOPCommand()
{
command_ptr++;
m_fifo.RemoveOne();
EndCommand();
return true;
}
bool GPU::HandleClearCacheCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleClearCacheCommand()
{
Log_DebugPrintf("GP0 clear cache");
command_ptr++;
m_fifo.RemoveOne();
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleInterruptRequestCommand()
{
Log_WarningPrintf("GP0 interrupt request");
if (!m_GPUSTAT.interrupt_request)
@ -119,35 +198,38 @@ bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_siz
m_interrupt_controller->InterruptRequest(InterruptController::IRQ::GPU);
}
command_ptr++;
m_fifo.RemoveOne();
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleSetDrawModeCommand()
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
Log_DebugPrintf("Set draw mode %08X", param);
SetDrawMode(Truncate16(param));
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleSetTextureWindowCommand()
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
SetTextureWindow(param);
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x,
m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x,
m_draw_mode.texture_window_offset_y);
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleSetDrawingAreaTopLeftCommand()
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
const u32 left = param & 0x3FF;
const u32 top = (param >> 10) & 0x1FF;
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top);
@ -160,16 +242,17 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 comman
m_drawing_area_changed = true;
}
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleSetDrawingAreaBottomRightCommand()
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
const u32 right = param & 0x3FF;
const u32 bottom = (param >> 10) & 0x1FF;
const u32 right = param & 0x3FFu;
const u32 bottom = (param >> 10) & 0x1FFu;
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom);
if (m_drawing_area.right != right || m_drawing_area.bottom != bottom)
{
@ -180,15 +263,16 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 co
m_drawing_area_changed = true;
}
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleSetDrawingOffsetCommand()
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
const s32 x = SignExtendN<11, s32>(param & 0x7FF);
const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FF);
const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
const s32 x = SignExtendN<11, s32>(param & 0x7FFu);
const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu);
Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y);
if (m_drawing_offset.x != x || m_drawing_offset.y != y)
{
@ -198,13 +282,14 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz
m_drawing_offset.y = y;
}
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleSetMaskBitCommand()
{
const u32 param = *(command_ptr++) & 0x00FFFFFF;
const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
const u32 gpustat_bits = (param & 0x03) << 11;
@ -216,172 +301,198 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size)
Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
BoolToUInt32(m_GPUSTAT.check_mask_before_draw));
AddCommandTicks(1);
EndCommand();
return true;
}
bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleRenderPolygonCommand()
{
const RenderCommand rc{command_ptr[0]};
u8 words_per_vertex;
u32 num_vertices;
u32 total_words;
switch (rc.primitive)
{
case Primitive::Polygon:
{
const RenderCommand rc{m_fifo.Peek(0)};
// shaded vertices use the colour from the first word for the first vertex
words_per_vertex = 1 + BoolToUInt8(rc.texture_enable) + BoolToUInt8(rc.shading_enable);
num_vertices = rc.quad_polygon ? 4 : 3;
total_words = words_per_vertex * num_vertices + BoolToUInt8(!rc.shading_enable);
const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable);
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
const u32 total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable);
CHECK_COMMAND_SIZE(total_words);
// set draw state up
if (rc.texture_enable)
{
const u16 texpage_attribute = Truncate16((rc.shading_enable ? command_ptr[5] : command_ptr[4]) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(command_ptr[2] >> 16));
}
}
break;
case Primitive::Line:
{
words_per_vertex = 1 + BoolToUInt8(rc.shading_enable);
if (rc.polyline)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. terminator
// is on the first word for the vertex
num_vertices = 2;
bool found_terminator = false;
for (u32 pos = rc.shading_enable ? 4 : 3; pos < command_size; pos += words_per_vertex)
{
if ((command_ptr[pos] & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
{
found_terminator = true;
break;
}
num_vertices++;
}
if (!found_terminator)
return false;
total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable) + 1;
}
else
{
num_vertices = 2;
total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable);
}
}
break;
case Primitive::Rectangle:
{
words_per_vertex =
2 + BoolToUInt8(rc.texture_enable) + BoolToUInt8(rc.rectangle_size == DrawRectangleSize::Variable);
num_vertices = 1;
total_words = words_per_vertex;
if (rc.texture_enable)
SetTexturePalette(Truncate16(command_ptr[2] >> 16));
}
break;
default:
UnreachableCode();
return true;
}
CHECK_COMMAND_SIZE(total_words);
static constexpr std::array<const char*, 4> primitive_names = {{"", "polygon", "line", "rectangle"}};
Log_TracePrintf("Render %s %s %s %s %s (%u verts, %u words per vert)", rc.quad_polygon ? "four-point" : "three-point",
rc.transparency_enable ? "semi-transparent" : "opaque",
rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome",
primitive_names[static_cast<u8>(rc.primitive.GetValue())], ZeroExtend32(num_vertices),
ZeroExtend32(words_per_vertex));
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
DispatchRenderCommand(rc, num_vertices, command_ptr);
command_ptr += total_words;
Log_TracePrintf(
"Render %s %s %s %s polygon (%u verts, %u words per vert)", rc.quad_polygon ? "four-point" : "three-point",
rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured",
rc.shading_enable ? "shaded" : "monochrome", ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex));
// set draw state up
if (rc.texture_enable)
{
const u16 texpage_attribute = Truncate16((rc.shading_enable ? m_fifo.Peek(5) : m_fifo.Peek(4)) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16));
}
m_stats.num_vertices += num_vertices;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
EndCommand();
return true;
}
bool GPU::HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleRenderRectangleCommand()
{
const RenderCommand rc{m_fifo.Peek(0)};
const u32 total_words =
2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable);
CHECK_COMMAND_SIZE(total_words);
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
if (rc.texture_enable)
SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16));
Log_TracePrintf("Render %s %s %s rectangle (%u words)", rc.transparency_enable ? "semi-transparent" : "opaque",
rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome",
total_words);
m_stats.num_vertices++;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
EndCommand();
return true;
}
bool GPU::HandleRenderLineCommand()
{
const RenderCommand rc{m_fifo.Peek(0)};
const u32 total_words = rc.shading_enable ? 4 : 3;
CHECK_COMMAND_SIZE(total_words);
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
Log_TracePrintf("Render %s %s line (%u total words)", rc.transparency_enable ? "semi-transparent" : "opaque",
rc.shading_enable ? "shaded" : "monochrome", total_words);
m_stats.num_vertices += 2;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
EndCommand();
return true;
}
bool GPU::HandleRenderPolyLineCommand()
{
// always read the first two vertices, we test for the terminator after that
const RenderCommand rc{m_fifo.Peek(0)};
const u32 min_words = rc.shading_enable ? 3 : 4;
CHECK_COMMAND_SIZE(min_words);
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
Log_TracePrintf("Render %s %s poly-line", rc.transparency_enable ? "semi-transparent" : "opaque",
rc.shading_enable ? "shaded" : "monochrome");
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
const u32 words_to_pop = min_words - 1;
m_blit_buffer.resize(words_to_pop);
m_fifo.PopRange(m_blit_buffer.data(), words_to_pop);
// polyline goes via a different path through the blit buffer
m_blitter_state = BlitterState::DrawingPolyLine;
m_command_total_words = 0;
return true;
}
bool GPU::HandleFillRectangleCommand()
{
CHECK_COMMAND_SIZE(3);
FlushRender();
const u32 color = command_ptr[0] & 0x00FFFFFF;
const u32 dst_x = command_ptr[1] & 0x3F0;
const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF;
const u32 width = ((command_ptr[2] & 0x3FF) + 0xF) & ~0xF;
const u32 height = (command_ptr[2] >> 16) & 0x1FF;
command_ptr += 3;
const u32 color = m_fifo.Pop() & 0x00FFFFFF;
const u32 dst_x = m_fifo.Peek() & 0x3F0;
const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 width = ((m_fifo.Peek() & 0x3FF) + 0xF) & ~0xF;
const u32 height = (m_fifo.Pop() >> 16) & 0x1FF;
Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height);
FillVRAM(dst_x, dst_y, width, height, color);
m_stats.num_vram_fills++;
AddCommandTicks(46 + ((width / 8) + 9) * height);
EndCommand();
return true;
}
bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleCopyRectangleCPUToVRAMCommand()
{
CHECK_COMMAND_SIZE(3);
m_fifo.RemoveOne();
const u32 copy_width = ReplaceZero(command_ptr[2] & 0x3FF, 0x400);
const u32 copy_height = ReplaceZero((command_ptr[2] >> 16) & 0x1FF, 0x200);
const u32 dst_x = m_fifo.Peek() & 0x3FF;
const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 copy_width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400);
const u32 copy_height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200);
const u32 num_pixels = copy_width * copy_height;
const u32 num_words = 3 + ((num_pixels + 1) / 2);
if (command_size < num_words)
{
m_command_total_words = num_words;
m_state = State::WritingVRAM;
return false;
}
const u32 dst_x = command_ptr[1] & 0x3FF;
const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF;
const u32 num_words = ((num_pixels + 1) / 2);
Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width,
copy_height);
if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies)
{
DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(),
copy_width, copy_height, sizeof(u16) * copy_width, &command_ptr[3], true);
}
FlushRender();
UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &command_ptr[3]);
command_ptr += num_words;
m_stats.num_vram_writes++;
EndCommand();
m_blitter_state = BlitterState::WritingVRAM;
m_blit_buffer.reserve(num_words);
m_blit_remaining_words = num_words;
m_vram_transfer.x = Truncate16(dst_x);
m_vram_transfer.y = Truncate16(dst_y);
m_vram_transfer.width = Truncate16(copy_width);
m_vram_transfer.height = Truncate16(copy_height);
return true;
}
bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size)
void GPU::FinishVRAMWrite()
{
if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies)
{
DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(),
m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width,
m_blit_buffer.data(), true);
}
FlushRender();
UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data());
m_blit_buffer.clear();
m_vram_transfer = {};
m_blitter_state = BlitterState::Idle;
m_stats.num_vram_writes++;
}
bool GPU::HandleCopyRectangleVRAMToCPUCommand()
{
CHECK_COMMAND_SIZE(3);
m_fifo.RemoveOne();
m_vram_transfer.width = ((Truncate16(command_ptr[2]) - 1) & 0x3FF) + 1;
m_vram_transfer.height = ((Truncate16(command_ptr[2] >> 16) - 1) & 0x1FF) + 1;
m_vram_transfer.x = Truncate16(command_ptr[1] & 0x3FF);
m_vram_transfer.y = Truncate16((command_ptr[1] >> 16) & 0x3FF);
command_ptr += 3;
m_vram_transfer.x = Truncate16(m_fifo.Peek() & 0x3FF);
m_vram_transfer.y = Truncate16((m_fifo.Pop() >> 16) & 0x3FF);
m_vram_transfer.width = ((Truncate16(m_fifo.Peek()) - 1) & 0x3FF) + 1;
m_vram_transfer.height = ((Truncate16(m_fifo.Pop() >> 16) - 1) & 0x1FF) + 1;
Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y,
m_vram_transfer.width, m_vram_transfer.height);
@ -402,22 +513,22 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 comma
// switch to pixel-by-pixel read state
m_stats.num_vram_reads++;
m_state = State::ReadingVRAM;
m_blitter_state = BlitterState::ReadingVRAM;
m_command_total_words = 0;
return true;
}
bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size)
bool GPU::HandleCopyRectangleVRAMToVRAMCommand()
{
CHECK_COMMAND_SIZE(4);
m_fifo.RemoveOne();
const u32 src_x = command_ptr[1] & 0x3FF;
const u32 src_y = (command_ptr[1] >> 16) & 0x3FF;
const u32 dst_x = command_ptr[2] & 0x3FF;
const u32 dst_y = (command_ptr[2] >> 16) & 0x3FF;
const u32 width = ReplaceZero(command_ptr[3] & 0x3FF, 0x400);
const u32 height = ReplaceZero((command_ptr[3] >> 16) & 0x1FF, 0x200);
command_ptr += 4;
const u32 src_x = m_fifo.Peek() & 0x3FF;
const u32 src_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 dst_x = m_fifo.Peek() & 0x3FF;
const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400);
const u32 height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200);
Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y,
width, height);
@ -425,6 +536,7 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 comm
FlushRender();
CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
m_stats.num_vram_copies++;
AddCommandTicks(width * height * 2);
EndCommand();
return true;
}

View File

@ -172,8 +172,9 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
}
}
void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
void GPU_HW::LoadVertices()
{
const RenderCommand rc{m_render_command.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
// TODO: Move this to the GPU..
@ -181,20 +182,19 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
{
case Primitive::Polygon:
{
DebugAssert(num_vertices == 3 || num_vertices == 4);
EnsureVertexBufferSpace(rc.quad_polygon ? 6 : 3);
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
u32 buffer_pos = 1;
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<BatchVertex, 4> vertices;
for (u32 i = 0; i < num_vertices; i++)
{
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
const u16 packed_texcoord = textured ? Truncate16(command_ptr[buffer_pos++]) : 0;
const u32 color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{m_fifo.Pop()};
const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0;
vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, texpage, packed_texcoord);
}
@ -226,6 +226,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.shading_enable);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3;
@ -255,6 +256,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.shading_enable);
AddVertex(vertices[2]);
AddVertex(vertices[1]);
@ -266,14 +268,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Rectangle:
{
u32 buffer_pos = 1;
const u32 color = rc.color_for_first_vertex;
const VertexPosition vp{command_ptr[buffer_pos++]};
const VertexPosition vp{m_fifo.Pop()};
const s32 pos_x = m_drawing_offset.x + vp.x;
const s32 pos_y = m_drawing_offset.y + vp.y;
const auto [texcoord_x, texcoord_y] =
UnpackTexcoord(rc.texture_enable ? Truncate16(command_ptr[buffer_pos++]) : 0);
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(m_fifo.Pop()) : 0);
u16 orig_tex_left = ZeroExtend16(texcoord_x);
u16 orig_tex_top = ZeroExtend16(texcoord_y);
s32 rectangle_width;
@ -293,8 +293,11 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
rectangle_height = 16;
break;
default:
rectangle_width = static_cast<s32>(command_ptr[buffer_pos] & 0xFFFF);
rectangle_height = static_cast<s32>(command_ptr[buffer_pos] >> 16);
{
const u32 width_and_height = m_fifo.Pop();
rectangle_width = static_cast<s32>(width_and_height & 0xFFFF);
rectangle_height = static_cast<s32>(width_and_height >> 16);
}
break;
}
@ -350,22 +353,35 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable);
}
break;
case Primitive::Line:
{
EnsureVertexBufferSpace(num_vertices * 2);
const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
EnsureVertexBufferSpace(num_vertices);
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
u32 buffer_pos = 1;
BatchVertex last_vertex;
u32 buffer_pos = 0;
for (u32 i = 0; i < num_vertices; i++)
{
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
u32 color;
VertexPosition vp;
if (rc.polyline)
{
color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
vp.bits = m_blit_buffer[buffer_pos++];
}
else
{
color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
vp.bits = m_fifo.Pop();
}
BatchVertex vertex;
vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, 0, 0);
@ -394,6 +410,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
}
}
@ -485,8 +502,10 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
}
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
void GPU_HW::DispatchRenderCommand()
{
const RenderCommand rc{m_render_command.bits};
TextureMode texture_mode;
if (rc.IsTexturingEnabled())
{
@ -574,7 +593,7 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
m_batch_ubo_dirty = true;
}
LoadVertices(rc, num_vertices, command_ptr);
LoadVertices();
}
void GPU_HW::FlushRender()

View File

@ -145,7 +145,7 @@ protected:
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override;
void DispatchRenderCommand() override;
void FlushRender() override;
void DrawRendererStats(bool is_idle_frame) override;
@ -198,7 +198,7 @@ private:
static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void LoadVertices();
ALWAYS_INLINE void AddVertex(const BatchVertex& v)
{

View File

@ -204,8 +204,9 @@ void GPU_SW::UpdateDisplay()
}
}
void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
void GPU_SW::DispatchRenderCommand()
{
const RenderCommand rc{m_render_command.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable;
switch (rc.primitive)
@ -216,24 +217,23 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<SWVertex, 4> vertices;
u32 buffer_pos = 1;
for (u32 i = 0; i < num_vertices; i++)
{
SWVertex& vert = vertices[i];
const u32 color_rgb = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const u32 color_rgb = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
vert.color_r = Truncate8(color_rgb);
vert.color_g = Truncate8(color_rgb >> 8);
vert.color_b = Truncate8(color_rgb >> 16);
const VertexPosition vp{command_ptr[buffer_pos++]};
const VertexPosition vp{m_fifo.Pop()};
vert.x = vp.x;
vert.y = vp.y;
if (textured)
{
std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(command_ptr[buffer_pos++]));
std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(m_fifo.Pop()));
}
else
{
@ -253,10 +253,9 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
case Primitive::Rectangle:
{
u32 buffer_pos = 1;
const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex);
const VertexPosition vp{command_ptr[buffer_pos++]};
const u32 texcoord_and_palette = rc.texture_enable ? command_ptr[buffer_pos++] : 0;
const VertexPosition vp{m_fifo.Pop()};
const u32 texcoord_and_palette = rc.texture_enable ? m_fifo.Pop() : 0;
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette));
s32 width;
@ -276,8 +275,11 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
height = 16;
break;
default:
width = static_cast<s32>(command_ptr[buffer_pos] & UINT32_C(0xFFFF));
height = static_cast<s32>(command_ptr[buffer_pos] >> 16);
{
const u32 width_and_height = m_fifo.Pop();
width = static_cast<s32>(width_and_height & UINT32_C(0xFFFF));
height = static_cast<s32>(width_and_height >> 16);
}
break;
}
@ -296,19 +298,28 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable);
std::array<SWVertex, 2> vertices = {};
u32 buffer_pos = 1;
u32 buffer_pos = 0;
// first vertex
SWVertex* p0 = &vertices[0];
SWVertex* p1 = &vertices[1];
p0->SetPosition(VertexPosition{command_ptr[buffer_pos++]});
p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : m_fifo.Pop()});
p0->SetColorRGB24(first_color);
// remaining vertices in line strip
const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
for (u32 i = 1; i < num_vertices; i++)
{
p1->SetColorRGB24(shaded ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{command_ptr[buffer_pos++]});
if (rc.polyline)
{
p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]});
}
else
{
p1->SetColorRGB24(shaded ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_fifo.Pop()});
}
(this->*DrawFunction)(p0, p1);
@ -408,6 +419,7 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, texture_enable, shading_enable);
// compute per-pixel increments
const s32 a01 = py0 - py1, b01 = px1 - px0;
@ -501,6 +513,18 @@ void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8
origin_x += m_drawing_offset.x;
origin_y += m_drawing_offset.y;
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(origin_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(origin_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) +
1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(origin_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(origin_y + static_cast<s32>(height), m_drawing_area.top,
m_drawing_area.bottom)) +
1u;
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable);
}
for (u32 offset_y = 0; offset_y < height; offset_y++)
{
const s32 y = origin_y + static_cast<s32>(offset_y);
@ -690,6 +714,21 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
const s32 dy = p1->y - p0->y;
const s32 k = std::max(std::abs(dx), std::abs(dy));
{
// TODO: Move to base class
const s32 min_x = std::min(p0->x, p1->x);
const s32 max_x = std::max(p0->x, p1->x);
const s32 min_y = std::min(p0->y, p1->y);
const s32 max_y = std::max(p0->y, p1->y);
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable);
}
FixedPointCoord step_x, step_y;
FixedPointColor step_r, step_g, step_b;
if (k > 0)

View File

@ -51,7 +51,7 @@ protected:
// Rasterization
//////////////////////////////////////////////////////////////////////////
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override;
void DispatchRenderCommand() override;
static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);

View File

@ -924,6 +924,8 @@ void HostInterface::UpdateSettings(const std::function<void()>& apply_callback)
const AudioBackend old_audio_backend = m_settings.audio_backend;
const GPURenderer old_gpu_renderer = m_settings.gpu_renderer;
const u32 old_gpu_resolution_scale = m_settings.gpu_resolution_scale;
const u32 old_gpu_fifo_size = m_settings.gpu_fifo_size;
const u32 old_gpu_max_run_ahead = m_settings.gpu_max_run_ahead;
const bool old_gpu_true_color = m_settings.gpu_true_color;
const bool old_gpu_scaled_dithering = m_settings.gpu_scaled_dithering;
const bool old_gpu_texture_filtering = m_settings.gpu_texture_filtering;
@ -978,8 +980,8 @@ void HostInterface::UpdateSettings(const std::function<void()>& apply_callback)
m_system->SetCPUExecutionMode(m_settings.cpu_execution_mode);
}
if (m_settings.gpu_resolution_scale != old_gpu_resolution_scale ||
m_settings.gpu_true_color != old_gpu_true_color ||
if (m_settings.gpu_resolution_scale != old_gpu_resolution_scale || m_settings.gpu_fifo_size != old_gpu_fifo_size ||
m_settings.gpu_max_run_ahead != old_gpu_max_run_ahead || m_settings.gpu_true_color != old_gpu_true_color ||
m_settings.gpu_scaled_dithering != old_gpu_scaled_dithering ||
m_settings.gpu_texture_filtering != old_gpu_texture_filtering ||
m_settings.gpu_disable_interlacing != old_gpu_disable_interlacing ||

View File

@ -2,4 +2,4 @@
#include "types.h"
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
static constexpr u32 SAVE_STATE_VERSION = 22;
static constexpr u32 SAVE_STATE_VERSION = 23;

View File

@ -23,6 +23,8 @@ void Settings::Load(SettingsInterface& si)
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
.value_or(DEFAULT_GPU_RENDERER);
gpu_resolution_scale = static_cast<u32>(si.GetIntValue("GPU", "ResolutionScale", 1));
gpu_fifo_size = static_cast<u32>(si.GetIntValue("GPU", "FIFOSize", 128));
gpu_max_run_ahead = static_cast<u32>(si.GetIntValue("GPU", "MaxRunAhead", 128));
gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false);
gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true);
gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false);

View File

@ -47,6 +47,8 @@ struct Settings
GPURenderer gpu_renderer = GPURenderer::Software;
u32 gpu_resolution_scale = 1;
u32 gpu_fifo_size = 128;
u32 gpu_max_run_ahead = 128;
bool gpu_use_debug_device = false;
bool gpu_true_color = true;
bool gpu_scaled_dithering = false;