GPU: Implement FIFO and timings

This will cause a slight performance loss. I've left some knobs in which
can be tweaked to mitigate this, but the goal is to be compatible with
all games which require them.
This commit is contained in:
Connor McLaughlin 2020-04-19 01:16:58 +10:00
parent 65ca8b9fe0
commit 1757932b3a
12 changed files with 562 additions and 311 deletions

View File

@ -259,14 +259,6 @@ void DMA::TransferChannel(Channel channel)
if (word_count > 0) if (word_count > 0)
TransferMemoryToDevice(channel, (current_address + sizeof(header)) & ADDRESS_MASK, 4, word_count); TransferMemoryToDevice(channel, (current_address + sizeof(header)) & ADDRESS_MASK, 4, word_count);
// Self-referencing DMA loops.. not sure how these are happening?
if (current_address == next_address)
{
Log_ErrorPrintf("HACK: Aborting self-referencing DMA loop @ 0x%08X. Something went wrong to generate this.",
current_address);
break;
}
current_address = next_address; current_address = next_address;
if (current_address & UINT32_C(0x800000)) if (current_address & UINT32_C(0x800000))
break; break;

View File

@ -38,16 +38,19 @@ bool GPU::Initialize(HostDisplay* host_display, System* system, DMA* dma, Interr
void GPU::UpdateSettings() void GPU::UpdateSettings()
{ {
m_force_progressive_scan = m_system->GetSettings().gpu_disable_interlacing; const Settings& settings = m_system->GetSettings();
if (m_force_ntsc_timings != m_system->GetSettings().gpu_force_ntsc_timings) m_force_progressive_scan = settings.gpu_disable_interlacing;
m_fifo_size = settings.gpu_fifo_size;
m_max_run_ahead = settings.gpu_max_run_ahead;
if (m_force_ntsc_timings != settings.gpu_force_ntsc_timings)
{ {
m_force_ntsc_timings = m_system->GetSettings().gpu_force_ntsc_timings; m_force_ntsc_timings = settings.gpu_force_ntsc_timings;
UpdateCRTCConfig(); UpdateCRTCConfig();
} }
m_crtc_state.display_aspect_ratio = m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(settings.display_aspect_ratio);
Settings::GetDisplayAspectRatioValue(m_system->GetSettings().display_aspect_ratio);
// Crop mode calls this, so recalculate the display area // Crop mode calls this, so recalculate the display area
UpdateCRTCDisplayParameters(); UpdateCRTCDisplayParameters();
@ -77,11 +80,13 @@ void GPU::SoftReset()
m_crtc_state.current_scanline = 0; m_crtc_state.current_scanline = 0;
m_crtc_state.in_hblank = false; m_crtc_state.in_hblank = false;
m_crtc_state.in_vblank = false; m_crtc_state.in_vblank = false;
m_state = State::Idle; m_blitter_state = BlitterState::Idle;
m_blitter_ticks = 0; m_command_ticks = 0;
m_command_total_words = 0; m_command_total_words = 0;
m_vram_transfer = {}; m_vram_transfer = {};
m_GP0_buffer.clear(); m_fifo.Clear();
m_blit_buffer.clear();
m_blit_remaining_words = 0;
SetDrawMode(0); SetDrawMode(0);
SetTexturePalette(0); SetTexturePalette(0);
SetTextureWindow(0); SetTextureWindow(0);
@ -148,8 +153,8 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_crtc_state.in_hblank); sw.Do(&m_crtc_state.in_hblank);
sw.Do(&m_crtc_state.in_vblank); sw.Do(&m_crtc_state.in_vblank);
sw.Do(&m_state); sw.Do(&m_blitter_state);
sw.Do(&m_blitter_ticks); sw.Do(&m_command_ticks);
sw.Do(&m_command_total_words); sw.Do(&m_command_total_words);
sw.Do(&m_GPUREAD_latch); sw.Do(&m_GPUREAD_latch);
@ -160,7 +165,13 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_vram_transfer.col); sw.Do(&m_vram_transfer.col);
sw.Do(&m_vram_transfer.row); sw.Do(&m_vram_transfer.row);
sw.Do(&m_GP0_buffer); sw.Do(&m_fifo);
sw.Do(&m_blit_buffer);
sw.Do(&m_blit_remaining_words);
sw.Do(&m_render_command.bits);
sw.Do(&m_max_run_ahead);
sw.Do(&m_fifo_size);
if (sw.IsReading()) if (sw.IsReading())
{ {
@ -207,16 +218,26 @@ void GPU::RestoreGraphicsAPIState() {}
void GPU::UpdateDMARequest() void GPU::UpdateDMARequest()
{ {
// we can kill the blitter ticks here if enough time has passed switch (m_blitter_state)
if (m_blitter_ticks > 0 && GetPendingGPUTicks() >= m_blitter_ticks) {
m_blitter_ticks = 0; case BlitterState::Idle:
m_GPUSTAT.gpu_idle = (m_command_ticks <= 0);
m_GPUSTAT.ready_to_send_vram = false;
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
break;
const bool blitter_idle = (m_blitter_ticks <= 0); case BlitterState::WritingVRAM:
m_GPUSTAT.gpu_idle = false;
m_GPUSTAT.ready_to_send_vram = false;
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
break;
m_GPUSTAT.ready_to_send_vram = (blitter_idle && m_state == State::ReadingVRAM); case BlitterState::ReadingVRAM:
m_GPUSTAT.ready_to_recieve_cmd = (blitter_idle && m_state == State::Idle); m_GPUSTAT.gpu_idle = false;
m_GPUSTAT.ready_to_recieve_dma = m_GPUSTAT.ready_to_send_vram = true;
blitter_idle && (m_state == State::Idle || (m_state != State::ReadingVRAM && m_command_total_words > 0)); m_GPUSTAT.ready_to_recieve_dma = false;
break;
}
bool dma_request; bool dma_request;
switch (m_GPUSTAT.dma_direction) switch (m_GPUSTAT.dma_direction)
@ -226,15 +247,15 @@ void GPU::UpdateDMARequest()
break; break;
case DMADirection::FIFO: case DMADirection::FIFO:
dma_request = blitter_idle && m_state >= State::ReadingVRAM; // FIFO not full/full dma_request = m_GPUSTAT.ready_to_recieve_dma;
break; break;
case DMADirection::CPUtoGP0: case DMADirection::CPUtoGP0:
dma_request = blitter_idle && m_GPUSTAT.ready_to_recieve_dma; dma_request = m_GPUSTAT.ready_to_recieve_dma;
break; break;
case DMADirection::GPUREADtoCPU: case DMADirection::GPUREADtoCPU:
dma_request = blitter_idle && m_GPUSTAT.ready_to_send_vram; dma_request = m_GPUSTAT.ready_to_send_vram;
break; break;
default: default:
@ -256,7 +277,7 @@ u32 GPU::ReadRegister(u32 offset)
{ {
// code can be dependent on the odd/even bit, so update the GPU state when reading. // code can be dependent on the odd/even bit, so update the GPU state when reading.
// we can mitigate this slightly by only updating when the raster is actually hitting a new line // we can mitigate this slightly by only updating when the raster is actually hitting a new line
if (IsRasterScanlinePending()) if (IsRasterScanlineOrCommandPending())
Synchronize(); Synchronize();
return m_GPUSTAT.bits; return m_GPUSTAT.bits;
@ -273,7 +294,8 @@ void GPU::WriteRegister(u32 offset, u32 value)
switch (offset) switch (offset)
{ {
case 0x00: case 0x00:
WriteGP0(value); m_fifo.Push(value);
ExecuteCommands();
return; return;
case 0x04: case 0x04:
@ -305,22 +327,13 @@ void GPU::DMAWrite(const u32* words, u32 word_count)
{ {
case DMADirection::CPUtoGP0: case DMADirection::CPUtoGP0:
{ {
std::copy(words, words + word_count, std::back_inserter(m_GP0_buffer)); m_fifo.PushRange(words, word_count);
m_fifo_pushed = true;
if (!m_syncing)
ExecuteCommands(); ExecuteCommands();
else
if (m_state == State::WritingVRAM)
{
Assert(m_blitter_ticks == 0);
m_blitter_ticks = GetPendingGPUTicks() + word_count;
// reschedule GPU tick event
const TickCount sysclk_ticks = GPUTicksToSystemTicks(word_count);
if (m_tick_event->GetTicksUntilNextExecution() > sysclk_ticks)
m_tick_event->Schedule(sysclk_ticks);
UpdateDMARequest(); UpdateDMARequest();
} }
}
break; break;
default: default:
@ -332,6 +345,22 @@ void GPU::DMAWrite(const u32* words, u32 word_count)
} }
} }
void GPU::AddCommandTicks(TickCount ticks)
{
if (m_command_ticks != 0)
{
m_command_ticks += ticks;
return;
}
m_command_ticks = GetPendingGPUTicks() + ticks;
// reschedule GPU tick event if it would execute later than this command finishes
const TickCount sysclk_ticks = GPUTicksToSystemTicks(ticks);
if (m_tick_event->GetTicksUntilNextExecution() > sysclk_ticks)
m_tick_event->Schedule(sysclk_ticks);
}
void GPU::Synchronize() void GPU::Synchronize()
{ {
m_tick_event->InvokeEarly(); m_tick_event->InvokeEarly();
@ -547,7 +576,7 @@ void GPU::UpdateSliceTicks()
(m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline); (m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline);
m_tick_event->Schedule( m_tick_event->Schedule(
GPUTicksToSystemTicks((m_blitter_ticks > 0) ? std::min(m_blitter_ticks, ticks_until_vblank) : ticks_until_vblank)); GPUTicksToSystemTicks((m_command_ticks > 0) ? std::min(m_command_ticks, ticks_until_vblank) : ticks_until_vblank));
} }
bool GPU::IsRasterScanlinePending() const bool GPU::IsRasterScanlinePending() const
@ -555,6 +584,13 @@ bool GPU::IsRasterScanlinePending() const
return (GetPendingGPUTicks() + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total; return (GetPendingGPUTicks() + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total;
} }
bool GPU::IsRasterScanlineOrCommandPending() const
{
const TickCount pending_ticks = GetPendingGPUTicks();
return ((pending_ticks + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total) ||
(m_command_ticks > 0 && pending_ticks > m_command_ticks);
}
void GPU::Execute(TickCount ticks) void GPU::Execute(TickCount ticks)
{ {
// convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider // convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider
@ -565,13 +601,12 @@ void GPU::Execute(TickCount ticks)
m_crtc_state.current_tick_in_scanline += gpu_ticks; m_crtc_state.current_tick_in_scanline += gpu_ticks;
// handle blits // handle blits
TickCount blit_ticks_remaining = gpu_ticks; if (m_command_ticks > 0)
while (m_blitter_ticks > 0 && blit_ticks_remaining > 0)
{ {
const TickCount slice = std::min(blit_ticks_remaining, m_blitter_ticks); m_command_ticks -= gpu_ticks;
m_blitter_ticks -= slice; ExecuteCommands();
blit_ticks_remaining -= slice; if (m_command_ticks < 0)
UpdateDMARequest(); m_command_ticks = 0;
} }
} }
@ -672,7 +707,7 @@ void GPU::Execute(TickCount ticks)
u32 GPU::ReadGPUREAD() u32 GPU::ReadGPUREAD()
{ {
if (m_state != State::ReadingVRAM) if (m_blitter_state != BlitterState::ReadingVRAM)
return m_GPUREAD_latch; return m_GPUREAD_latch;
// Read two pixels out of VRAM and combine them. Zero fill odd pixel counts. // Read two pixels out of VRAM and combine them. Zero fill odd pixel counts.
@ -692,11 +727,12 @@ u32 GPU::ReadGPUREAD()
{ {
Log_DebugPrintf("End of VRAM->CPU transfer"); Log_DebugPrintf("End of VRAM->CPU transfer");
m_vram_transfer = {}; m_vram_transfer = {};
m_state = State::Idle; m_blitter_state = BlitterState::Idle;
UpdateDMARequest(); UpdateDMARequest();
// end of transfer, catch up on any commands which were written (unlikely) // end of transfer, catch up on any commands which were written (unlikely)
ExecuteCommands(); ExecuteCommands();
UpdateDMARequest();
break; break;
} }
} }
@ -706,12 +742,6 @@ u32 GPU::ReadGPUREAD()
return value; return value;
} }
void GPU::WriteGP0(u32 value)
{
m_GP0_buffer.push_back(value);
ExecuteCommands();
}
void GPU::WriteGP1(u32 value) void GPU::WriteGP1(u32 value)
{ {
const u8 command = Truncate8(value >> 24); const u8 command = Truncate8(value >> 24);
@ -729,11 +759,13 @@ void GPU::WriteGP1(u32 value)
case 0x01: // Clear FIFO case 0x01: // Clear FIFO
{ {
Log_DebugPrintf("GP1 clear FIFO"); Log_DebugPrintf("GP1 clear FIFO");
m_state = State::Idle; m_blitter_state = BlitterState::Idle;
m_command_total_words = 0; m_command_total_words = 0;
m_vram_transfer = {}; m_vram_transfer = {};
m_GP0_buffer.clear(); m_fifo.Clear();
m_blitter_ticks = 0; m_blit_buffer.clear();
m_blit_remaining_words = 0;
m_command_ticks = 0;
UpdateDMARequest(); UpdateDMARequest();
} }
break; break;
@ -756,10 +788,13 @@ void GPU::WriteGP1(u32 value)
case 0x04: // DMA Direction case 0x04: // DMA Direction
{ {
m_GPUSTAT.dma_direction = static_cast<DMADirection>(param);
Log_DebugPrintf("DMA direction <- 0x%02X", static_cast<u32>(m_GPUSTAT.dma_direction.GetValue())); Log_DebugPrintf("DMA direction <- 0x%02X", static_cast<u32>(m_GPUSTAT.dma_direction.GetValue()));
if (m_GPUSTAT.dma_direction != static_cast<DMADirection>(param))
{
m_GPUSTAT.dma_direction = static_cast<DMADirection>(param);
UpdateDMARequest(); UpdateDMARequest();
} }
}
break; break;
case 0x05: // Set display start address case 0x05: // Set display start address
@ -827,8 +862,10 @@ void GPU::WriteGP1(u32 value)
if (m_GPUSTAT.bits != new_GPUSTAT.bits) if (m_GPUSTAT.bits != new_GPUSTAT.bits)
{ {
// Have to be careful when setting this because Synchronize() can modify GPUSTAT.
static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000);
Synchronize(); Synchronize();
m_GPUSTAT.bits = new_GPUSTAT.bits; m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK);
UpdateCRTCConfig(); UpdateCRTCConfig();
} }
} }
@ -1023,7 +1060,7 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
} }
} }
void GPU::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) {} void GPU::DispatchRenderCommand() {}
void GPU::FlushRender() {} void GPU::FlushRender() {}

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "common/bitfield.h" #include "common/bitfield.h"
#include "common/fifo_queue.h"
#include "common/rectangle.h" #include "common/rectangle.h"
#include "timers.h" #include "timers.h"
#include "types.h" #include "types.h"
@ -23,13 +24,12 @@ class Timers;
class GPU class GPU
{ {
public: public:
enum class State : u8 enum class BlitterState : u8
{ {
Idle, Idle,
WaitingForParameters,
ExecutingCommand,
ReadingVRAM, ReadingVRAM,
WritingVRAM WritingVRAM,
DrawingPolyLine
}; };
enum class DMADirection : u32 enum class DMADirection : u32
@ -88,13 +88,14 @@ public:
VRAM_WIDTH = 1024, VRAM_WIDTH = 1024,
VRAM_HEIGHT = 512, VRAM_HEIGHT = 512,
VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16), VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16),
MAX_FIFO_SIZE = 4096,
TEXTURE_PAGE_WIDTH = 256, TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256, TEXTURE_PAGE_HEIGHT = 256,
MAX_PRIMITIVE_WIDTH = 1024, MAX_PRIMITIVE_WIDTH = 1024,
MAX_PRIMITIVE_HEIGHT = 512, MAX_PRIMITIVE_HEIGHT = 512,
DOT_TIMER_INDEX = 0, DOT_TIMER_INDEX = 0,
HBLANK_TIMER_INDEX = 1, HBLANK_TIMER_INDEX = 1,
MAX_RESOLUTION_SCALE = 16, MAX_RESOLUTION_SCALE = 16
}; };
enum : u16 enum : u16
@ -143,6 +144,9 @@ public:
/// Returns true if enough ticks have passed for the raster to be on the next line. /// Returns true if enough ticks have passed for the raster to be on the next line.
bool IsRasterScanlinePending() const; bool IsRasterScanlinePending() const;
/// Returns true if a raster scanline or command execution is pending.
bool IsRasterScanlineOrCommandPending() const;
// Synchronizes the CRTC, updating the hblank timer. // Synchronizes the CRTC, updating the hblank timer.
void Synchronize(); void Synchronize();
@ -347,10 +351,20 @@ protected:
void SetTextureWindow(u32 value); void SetTextureWindow(u32 value);
u32 ReadGPUREAD(); u32 ReadGPUREAD();
void WriteGP0(u32 value); void FinishVRAMWrite();
/// Returns the number of vertices in the buffered poly-line.
ALWAYS_INLINE u32 GetPolyLineVertexCount() const
{
return (static_cast<u32>(m_blit_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >>
BoolToUInt8(m_render_command.shading_enable);
}
void AddCommandTicks(TickCount ticks);
void WriteGP1(u32 value); void WriteGP1(u32 value);
void ExecuteCommands();
void EndCommand(); void EndCommand();
void ExecuteCommands();
void HandleGetGPUInfoCommand(u32 value); void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend // Rendering in the backend
@ -358,11 +372,30 @@ protected:
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data);
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
virtual void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr); virtual void DispatchRenderCommand();
virtual void FlushRender(); virtual void FlushRender();
virtual void UpdateDisplay(); virtual void UpdateDisplay();
virtual void DrawRendererStats(bool is_idle_frame); virtual void DrawRendererStats(bool is_idle_frame);
// These are **very** approximate.
ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool textured, bool shaded)
{
#if 0
const u32 draw_ticks = static_cast<u32>((std::abs(x1 * (y2 - y3) + x2 * (y3 - y1) + x3 * (y1 - y2)) + 1u) / 2u);
#else
const u32 draw_ticks = (width * height) / 2;
#endif
AddCommandTicks(draw_ticks << BoolToUInt8(textured | shaded));
}
ALWAYS_INLINE void AddDrawRectangleTicks(u32 width, u32 height, bool textured)
{
AddCommandTicks((width * height) << BoolToUInt8(textured));
}
ALWAYS_INLINE void AddDrawLineTicks(u32 width, u32 height, bool shaded)
{
AddCommandTicks(std::max(width, height));
}
HostDisplay* m_host_display = nullptr; HostDisplay* m_host_display = nullptr;
System* m_system = nullptr; System* m_system = nullptr;
DMA* m_dma = nullptr; DMA* m_dma = nullptr;
@ -397,7 +430,7 @@ protected:
BitField<u32, bool, 23, 1> display_disable; BitField<u32, bool, 23, 1> display_disable;
BitField<u32, bool, 24, 1> interrupt_request; BitField<u32, bool, 24, 1> interrupt_request;
BitField<u32, bool, 25, 1> dma_data_request; BitField<u32, bool, 25, 1> dma_data_request;
BitField<u32, bool, 26, 1> ready_to_recieve_cmd; BitField<u32, bool, 26, 1> gpu_idle;
BitField<u32, bool, 27, 1> ready_to_send_vram; BitField<u32, bool, 27, 1> ready_to_send_vram;
BitField<u32, bool, 28, 1> ready_to_recieve_dma; BitField<u32, bool, 28, 1> ready_to_recieve_dma;
BitField<u32, DMADirection, 29, 2> dma_direction; BitField<u32, DMADirection, 29, 2> dma_direction;
@ -595,13 +628,17 @@ protected:
bool in_vblank; bool in_vblank;
} m_crtc_state = {}; } m_crtc_state = {};
State m_state = State::Idle; BlitterState m_blitter_state = BlitterState::Idle;
TickCount m_blitter_ticks = 0; TickCount m_command_ticks = 0;
u32 m_command_total_words = 0; u32 m_command_total_words = 0;
/// GPUREAD value for non-VRAM-reads. /// GPUREAD value for non-VRAM-reads.
u32 m_GPUREAD_latch = 0; u32 m_GPUREAD_latch = 0;
/// True if currently executing/syncing.
bool m_syncing = false;
bool m_fifo_pushed = false;
struct VRAMTransfer struct VRAMTransfer
{ {
u16 x; u16 x;
@ -612,7 +649,13 @@ protected:
u16 row; u16 row;
} m_vram_transfer = {}; } m_vram_transfer = {};
std::vector<u32> m_GP0_buffer; HeapFIFOQueue<u32, MAX_FIFO_SIZE> m_fifo;
std::vector<u32> m_blit_buffer;
u32 m_blit_remaining_words;
RenderCommand m_render_command{};
TickCount m_max_run_ahead = 128;
u32 m_fifo_size = 128;
struct Stats struct Stats
{ {
@ -627,26 +670,29 @@ protected:
Stats m_last_stats = {}; Stats m_last_stats = {};
private: private:
using GP0CommandHandler = bool (GPU::*)(const u32*&, u32); using GP0CommandHandler = bool (GPU::*)();
using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>; using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>;
static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); static GP0CommandHandlerTable GenerateGP0CommandHandlerTable();
// Rendering commands, returns false if not enough data is provided // Rendering commands, returns false if not enough data is provided
bool HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size); bool HandleUnknownGP0Command();
bool HandleNOPCommand(const u32*& command_ptr, u32 command_size); bool HandleNOPCommand();
bool HandleClearCacheCommand(const u32*& command_ptr, u32 command_size); bool HandleClearCacheCommand();
bool HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_size); bool HandleInterruptRequestCommand();
bool HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size); bool HandleSetDrawModeCommand();
bool HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_size); bool HandleSetTextureWindowCommand();
bool HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 command_size); bool HandleSetDrawingAreaTopLeftCommand();
bool HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 command_size); bool HandleSetDrawingAreaBottomRightCommand();
bool HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_size); bool HandleSetDrawingOffsetCommand();
bool HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size); bool HandleSetMaskBitCommand();
bool HandleRenderCommand(const u32*& command_ptr, u32 command_size); bool HandleRenderPolygonCommand();
bool HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size); bool HandleRenderRectangleCommand();
bool HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size); bool HandleRenderLineCommand();
bool HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size); bool HandleRenderPolyLineCommand();
bool HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size); bool HandleFillRectangleCommand();
bool HandleCopyRectangleCPUToVRAMCommand();
bool HandleCopyRectangleVRAMToCPUCommand();
bool HandleCopyRectangleVRAMToVRAMCommand();
static const GP0CommandHandlerTable s_GP0_command_handler_table; static const GP0CommandHandlerTable s_GP0_command_handler_table;
}; };

View File

@ -7,10 +7,9 @@
Log_SetChannel(GPU); Log_SetChannel(GPU);
#define CHECK_COMMAND_SIZE(num_words) \ #define CHECK_COMMAND_SIZE(num_words) \
if (command_size < num_words) \ if (m_fifo.GetSize() < num_words) \
{ \ { \
m_command_total_words = num_words; \ m_command_total_words = num_words; \
m_state = State::WaitingForParameters; \
return false; \ return false; \
} }
@ -24,33 +23,95 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
void GPU::ExecuteCommands() void GPU::ExecuteCommands()
{ {
Assert(m_GP0_buffer.size() < 1048576); m_syncing = true;
const u32* command_ptr = m_GP0_buffer.data(); for (;;)
u32 command_size = static_cast<u32>(m_GP0_buffer.size());
while (m_state != State::ReadingVRAM && command_size > 0 && command_size >= m_command_total_words)
{ {
const u32 command = command_ptr[0] >> 24; if (m_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty())
const u32* old_command_ptr = command_ptr; {
if (!(this->*s_GP0_command_handler_table[command])(command_ptr, command_size)) switch (m_blitter_state)
{
case BlitterState::Idle:
{
const u32 command = m_fifo.Peek(0) >> 24;
if ((this->*s_GP0_command_handler_table[command])())
continue;
else
break; break;
const u32 words_used = static_cast<u32>(command_ptr - old_command_ptr);
DebugAssert(words_used <= command_size);
command_size -= words_used;
} }
if (command_size == 0) case BlitterState::WritingVRAM:
m_GP0_buffer.clear(); {
else if (command_ptr > m_GP0_buffer.data()) DebugAssert(m_blit_remaining_words > 0);
m_GP0_buffer.erase(m_GP0_buffer.begin(), m_GP0_buffer.begin() + (command_ptr - m_GP0_buffer.data())); const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize());
const size_t old_size = m_blit_buffer.size();
m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy);
m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy);
m_blit_remaining_words -= words_to_copy;
AddCommandTicks(words_to_copy);
Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words);
if (m_blit_remaining_words == 0)
FinishVRAMWrite();
continue;
}
case BlitterState::ReadingVRAM:
{
Panic("shouldn't be here");
}
break;
case BlitterState::DrawingPolyLine:
{
const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1;
u32 terminator_index =
m_render_command.shading_enable ? ((static_cast<u32>(m_blit_buffer.size()) & 1u) ^ 1u) : 0u;
for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000.
// terminator is on the first word for the vertex
if ((m_fifo.Peek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
break;
}
const bool found_terminator = (terminator_index < m_fifo.GetSize());
const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize());
if (words_to_copy > 0)
{
const size_t old_size = m_blit_buffer.size();
m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy);
m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy);
}
Log_DebugPrintf("Added %u words to polyline", words_to_copy);
if (found_terminator)
{
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
m_blit_buffer.clear();
EndCommand();
}
}
break;
}
}
m_fifo_pushed = false;
UpdateDMARequest(); UpdateDMARequest();
if (!m_fifo_pushed)
break;
}
m_syncing = false;
} }
void GPU::EndCommand() void GPU::EndCommand()
{ {
m_state = State::Idle; m_blitter_state = BlitterState::Idle;
m_command_total_words = 0; m_command_total_words = 0;
} }
@ -67,7 +128,24 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
table[i] = &GPU::HandleNOPCommand; table[i] = &GPU::HandleNOPCommand;
table[0x1F] = &GPU::HandleInterruptRequestCommand; table[0x1F] = &GPU::HandleInterruptRequestCommand;
for (u32 i = 0x20; i <= 0x7F; i++) for (u32 i = 0x20; i <= 0x7F; i++)
table[i] = &GPU::HandleRenderCommand; {
const RenderCommand rc{i << 24};
switch (rc.primitive)
{
case Primitive::Polygon:
table[i] = &GPU::HandleRenderPolygonCommand;
break;
case Primitive::Line:
table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand;
break;
case Primitive::Rectangle:
table[i] = &GPU::HandleRenderRectangleCommand;
break;
default:
table[i] = &GPU::HandleUnknownGP0Command;
break;
}
}
table[0xE0] = &GPU::HandleNOPCommand; table[0xE0] = &GPU::HandleNOPCommand;
table[0xE1] = &GPU::HandleSetDrawModeCommand; table[0xE1] = &GPU::HandleSetDrawModeCommand;
table[0xE2] = &GPU::HandleSetTextureWindowCommand; table[0xE2] = &GPU::HandleSetTextureWindowCommand;
@ -87,30 +165,31 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
return table; return table;
} }
bool GPU::HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size) bool GPU::HandleUnknownGP0Command()
{ {
const u32 command = *(command_ptr++) >> 24; const u32 command = m_fifo.Pop() >> 24;
Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleNOPCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleNOPCommand()
{ {
command_ptr++; m_fifo.RemoveOne();
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleClearCacheCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleClearCacheCommand()
{ {
Log_DebugPrintf("GP0 clear cache"); Log_DebugPrintf("GP0 clear cache");
command_ptr++; m_fifo.RemoveOne();
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleInterruptRequestCommand()
{ {
Log_WarningPrintf("GP0 interrupt request"); Log_WarningPrintf("GP0 interrupt request");
if (!m_GPUSTAT.interrupt_request) if (!m_GPUSTAT.interrupt_request)
@ -119,35 +198,38 @@ bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_siz
m_interrupt_controller->InterruptRequest(InterruptController::IRQ::GPU); m_interrupt_controller->InterruptRequest(InterruptController::IRQ::GPU);
} }
command_ptr++; m_fifo.RemoveOne();
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleSetDrawModeCommand()
{ {
const u32 param = *(command_ptr++) & 0x00FFFFFF; const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
Log_DebugPrintf("Set draw mode %08X", param); Log_DebugPrintf("Set draw mode %08X", param);
SetDrawMode(Truncate16(param)); SetDrawMode(Truncate16(param));
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleSetTextureWindowCommand()
{ {
const u32 param = *(command_ptr++) & 0x00FFFFFF; const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
SetTextureWindow(param); SetTextureWindow(param);
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x,
m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x,
m_draw_mode.texture_window_offset_y); m_draw_mode.texture_window_offset_y);
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleSetDrawingAreaTopLeftCommand()
{ {
const u32 param = *(command_ptr++) & 0x00FFFFFF; const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
const u32 left = param & 0x3FF; const u32 left = param & 0x3FF;
const u32 top = (param >> 10) & 0x1FF; const u32 top = (param >> 10) & 0x1FF;
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top);
@ -160,16 +242,17 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 comman
m_drawing_area_changed = true; m_drawing_area_changed = true;
} }
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleSetDrawingAreaBottomRightCommand()
{ {
const u32 param = *(command_ptr++) & 0x00FFFFFF; const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
const u32 right = param & 0x3FF; const u32 right = param & 0x3FFu;
const u32 bottom = (param >> 10) & 0x1FF; const u32 bottom = (param >> 10) & 0x1FFu;
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom); Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom);
if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) if (m_drawing_area.right != right || m_drawing_area.bottom != bottom)
{ {
@ -180,15 +263,16 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 co
m_drawing_area_changed = true; m_drawing_area_changed = true;
} }
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleSetDrawingOffsetCommand()
{ {
const u32 param = *(command_ptr++) & 0x00FFFFFF; const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
const s32 x = SignExtendN<11, s32>(param & 0x7FF); const s32 x = SignExtendN<11, s32>(param & 0x7FFu);
const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FF); const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu);
Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y); Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y);
if (m_drawing_offset.x != x || m_drawing_offset.y != y) if (m_drawing_offset.x != x || m_drawing_offset.y != y)
{ {
@ -198,13 +282,14 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz
m_drawing_offset.y = y; m_drawing_offset.y = y;
} }
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleSetMaskBitCommand()
{ {
const u32 param = *(command_ptr++) & 0x00FFFFFF; const u32 param = m_fifo.Pop() & 0x00FFFFFFu;
constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
const u32 gpustat_bits = (param & 0x03) << 11; const u32 gpustat_bits = (param & 0x03) << 11;
@ -216,172 +301,198 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size)
Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); BoolToUInt32(m_GPUSTAT.check_mask_before_draw));
AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleRenderPolygonCommand()
{
const RenderCommand rc{command_ptr[0]};
u8 words_per_vertex;
u32 num_vertices;
u32 total_words;
switch (rc.primitive)
{
case Primitive::Polygon:
{ {
const RenderCommand rc{m_fifo.Peek(0)};
// shaded vertices use the colour from the first word for the first vertex // shaded vertices use the colour from the first word for the first vertex
words_per_vertex = 1 + BoolToUInt8(rc.texture_enable) + BoolToUInt8(rc.shading_enable); const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable);
num_vertices = rc.quad_polygon ? 4 : 3; const u32 num_vertices = rc.quad_polygon ? 4 : 3;
total_words = words_per_vertex * num_vertices + BoolToUInt8(!rc.shading_enable); const u32 total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable);
CHECK_COMMAND_SIZE(total_words); CHECK_COMMAND_SIZE(total_words);
// set draw state up
if (rc.texture_enable)
{
const u16 texpage_attribute = Truncate16((rc.shading_enable ? command_ptr[5] : command_ptr[4]) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(command_ptr[2] >> 16));
}
}
break;
case Primitive::Line:
{
words_per_vertex = 1 + BoolToUInt8(rc.shading_enable);
if (rc.polyline)
{
// polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. terminator
// is on the first word for the vertex
num_vertices = 2;
bool found_terminator = false;
for (u32 pos = rc.shading_enable ? 4 : 3; pos < command_size; pos += words_per_vertex)
{
if ((command_ptr[pos] & UINT32_C(0xF000F000)) == UINT32_C(0x50005000))
{
found_terminator = true;
break;
}
num_vertices++;
}
if (!found_terminator)
return false;
total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable) + 1;
}
else
{
num_vertices = 2;
total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable);
}
}
break;
case Primitive::Rectangle:
{
words_per_vertex =
2 + BoolToUInt8(rc.texture_enable) + BoolToUInt8(rc.rectangle_size == DrawRectangleSize::Variable);
num_vertices = 1;
total_words = words_per_vertex;
if (rc.texture_enable)
SetTexturePalette(Truncate16(command_ptr[2] >> 16));
}
break;
default:
UnreachableCode();
return true;
}
CHECK_COMMAND_SIZE(total_words);
static constexpr std::array<const char*, 4> primitive_names = {{"", "polygon", "line", "rectangle"}};
Log_TracePrintf("Render %s %s %s %s %s (%u verts, %u words per vert)", rc.quad_polygon ? "four-point" : "three-point",
rc.transparency_enable ? "semi-transparent" : "opaque",
rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome",
primitive_names[static_cast<u8>(rc.primitive.GetValue())], ZeroExtend32(num_vertices),
ZeroExtend32(words_per_vertex));
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize(); Synchronize();
DispatchRenderCommand(rc, num_vertices, command_ptr); Log_TracePrintf(
command_ptr += total_words; "Render %s %s %s %s polygon (%u verts, %u words per vert)", rc.quad_polygon ? "four-point" : "three-point",
rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured",
rc.shading_enable ? "shaded" : "monochrome", ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex));
// set draw state up
if (rc.texture_enable)
{
const u16 texpage_attribute = Truncate16((rc.shading_enable ? m_fifo.Peek(5) : m_fifo.Peek(4)) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16));
}
m_stats.num_vertices += num_vertices; m_stats.num_vertices += num_vertices;
m_stats.num_polygons++; m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleRenderRectangleCommand()
{
const RenderCommand rc{m_fifo.Peek(0)};
const u32 total_words =
2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable);
CHECK_COMMAND_SIZE(total_words);
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
if (rc.texture_enable)
SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16));
Log_TracePrintf("Render %s %s %s rectangle (%u words)", rc.transparency_enable ? "semi-transparent" : "opaque",
rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome",
total_words);
m_stats.num_vertices++;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
EndCommand();
return true;
}
bool GPU::HandleRenderLineCommand()
{
const RenderCommand rc{m_fifo.Peek(0)};
const u32 total_words = rc.shading_enable ? 4 : 3;
CHECK_COMMAND_SIZE(total_words);
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
Log_TracePrintf("Render %s %s line (%u total words)", rc.transparency_enable ? "semi-transparent" : "opaque",
rc.shading_enable ? "shaded" : "monochrome", total_words);
m_stats.num_vertices += 2;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
EndCommand();
return true;
}
bool GPU::HandleRenderPolyLineCommand()
{
// always read the first two vertices, we test for the terminator after that
const RenderCommand rc{m_fifo.Peek(0)};
const u32 min_words = rc.shading_enable ? 3 : 4;
CHECK_COMMAND_SIZE(min_words);
if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending())
Synchronize();
Log_TracePrintf("Render %s %s poly-line", rc.transparency_enable ? "semi-transparent" : "opaque",
rc.shading_enable ? "shaded" : "monochrome");
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
const u32 words_to_pop = min_words - 1;
m_blit_buffer.resize(words_to_pop);
m_fifo.PopRange(m_blit_buffer.data(), words_to_pop);
// polyline goes via a different path through the blit buffer
m_blitter_state = BlitterState::DrawingPolyLine;
m_command_total_words = 0;
return true;
}
bool GPU::HandleFillRectangleCommand()
{ {
CHECK_COMMAND_SIZE(3); CHECK_COMMAND_SIZE(3);
FlushRender(); FlushRender();
const u32 color = command_ptr[0] & 0x00FFFFFF; const u32 color = m_fifo.Pop() & 0x00FFFFFF;
const u32 dst_x = command_ptr[1] & 0x3F0; const u32 dst_x = m_fifo.Peek() & 0x3F0;
const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF; const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 width = ((command_ptr[2] & 0x3FF) + 0xF) & ~0xF; const u32 width = ((m_fifo.Peek() & 0x3FF) + 0xF) & ~0xF;
const u32 height = (command_ptr[2] >> 16) & 0x1FF; const u32 height = (m_fifo.Pop() >> 16) & 0x1FF;
command_ptr += 3;
Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height);
FillVRAM(dst_x, dst_y, width, height, color); FillVRAM(dst_x, dst_y, width, height, color);
m_stats.num_vram_fills++; m_stats.num_vram_fills++;
AddCommandTicks(46 + ((width / 8) + 9) * height);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleCopyRectangleCPUToVRAMCommand()
{ {
CHECK_COMMAND_SIZE(3); CHECK_COMMAND_SIZE(3);
m_fifo.RemoveOne();
const u32 copy_width = ReplaceZero(command_ptr[2] & 0x3FF, 0x400); const u32 dst_x = m_fifo.Peek() & 0x3FF;
const u32 copy_height = ReplaceZero((command_ptr[2] >> 16) & 0x1FF, 0x200); const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 copy_width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400);
const u32 copy_height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200);
const u32 num_pixels = copy_width * copy_height; const u32 num_pixels = copy_width * copy_height;
const u32 num_words = 3 + ((num_pixels + 1) / 2); const u32 num_words = ((num_pixels + 1) / 2);
if (command_size < num_words)
{
m_command_total_words = num_words;
m_state = State::WritingVRAM;
return false;
}
const u32 dst_x = command_ptr[1] & 0x3FF;
const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF;
Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width, Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width,
copy_height); copy_height);
if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies)
{
DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(),
copy_width, copy_height, sizeof(u16) * copy_width, &command_ptr[3], true);
}
FlushRender();
UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &command_ptr[3]);
command_ptr += num_words;
m_stats.num_vram_writes++;
EndCommand(); EndCommand();
m_blitter_state = BlitterState::WritingVRAM;
m_blit_buffer.reserve(num_words);
m_blit_remaining_words = num_words;
m_vram_transfer.x = Truncate16(dst_x);
m_vram_transfer.y = Truncate16(dst_y);
m_vram_transfer.width = Truncate16(copy_width);
m_vram_transfer.height = Truncate16(copy_height);
return true; return true;
} }
bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size) void GPU::FinishVRAMWrite()
{
if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies)
{
DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(),
m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width,
m_blit_buffer.data(), true);
}
FlushRender();
UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data());
m_blit_buffer.clear();
m_vram_transfer = {};
m_blitter_state = BlitterState::Idle;
m_stats.num_vram_writes++;
}
bool GPU::HandleCopyRectangleVRAMToCPUCommand()
{ {
CHECK_COMMAND_SIZE(3); CHECK_COMMAND_SIZE(3);
m_fifo.RemoveOne();
m_vram_transfer.width = ((Truncate16(command_ptr[2]) - 1) & 0x3FF) + 1; m_vram_transfer.x = Truncate16(m_fifo.Peek() & 0x3FF);
m_vram_transfer.height = ((Truncate16(command_ptr[2] >> 16) - 1) & 0x1FF) + 1; m_vram_transfer.y = Truncate16((m_fifo.Pop() >> 16) & 0x3FF);
m_vram_transfer.x = Truncate16(command_ptr[1] & 0x3FF); m_vram_transfer.width = ((Truncate16(m_fifo.Peek()) - 1) & 0x3FF) + 1;
m_vram_transfer.y = Truncate16((command_ptr[1] >> 16) & 0x3FF); m_vram_transfer.height = ((Truncate16(m_fifo.Pop() >> 16) - 1) & 0x1FF) + 1;
command_ptr += 3;
Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y, Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y,
m_vram_transfer.width, m_vram_transfer.height); m_vram_transfer.width, m_vram_transfer.height);
@ -402,22 +513,22 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 comma
// switch to pixel-by-pixel read state // switch to pixel-by-pixel read state
m_stats.num_vram_reads++; m_stats.num_vram_reads++;
m_state = State::ReadingVRAM; m_blitter_state = BlitterState::ReadingVRAM;
m_command_total_words = 0; m_command_total_words = 0;
return true; return true;
} }
bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size) bool GPU::HandleCopyRectangleVRAMToVRAMCommand()
{ {
CHECK_COMMAND_SIZE(4); CHECK_COMMAND_SIZE(4);
m_fifo.RemoveOne();
const u32 src_x = command_ptr[1] & 0x3FF; const u32 src_x = m_fifo.Peek() & 0x3FF;
const u32 src_y = (command_ptr[1] >> 16) & 0x3FF; const u32 src_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 dst_x = command_ptr[2] & 0x3FF; const u32 dst_x = m_fifo.Peek() & 0x3FF;
const u32 dst_y = (command_ptr[2] >> 16) & 0x3FF; const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF;
const u32 width = ReplaceZero(command_ptr[3] & 0x3FF, 0x400); const u32 width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400);
const u32 height = ReplaceZero((command_ptr[3] >> 16) & 0x1FF, 0x200); const u32 height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200);
command_ptr += 4;
Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y,
width, height); width, height);
@ -425,6 +536,7 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 comm
FlushRender(); FlushRender();
CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
m_stats.num_vram_copies++; m_stats.num_vram_copies++;
AddCommandTicks(width * height * 2);
EndCommand(); EndCommand();
return true; return true;
} }

View File

@ -172,8 +172,9 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)
} }
} }
void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr) void GPU_HW::LoadVertices()
{ {
const RenderCommand rc{m_render_command.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
// TODO: Move this to the GPU.. // TODO: Move this to the GPU..
@ -181,20 +182,19 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
{ {
case Primitive::Polygon: case Primitive::Polygon:
{ {
DebugAssert(num_vertices == 3 || num_vertices == 4);
EnsureVertexBufferSpace(rc.quad_polygon ? 6 : 3); EnsureVertexBufferSpace(rc.quad_polygon ? 6 : 3);
const u32 first_color = rc.color_for_first_vertex; const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable; const bool textured = rc.texture_enable;
u32 buffer_pos = 1; const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<BatchVertex, 4> vertices; std::array<BatchVertex, 4> vertices;
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; const u32 color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]}; const VertexPosition vp{m_fifo.Pop()};
const u16 packed_texcoord = textured ? Truncate16(command_ptr[buffer_pos++]) : 0; const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0;
vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, texpage, packed_texcoord); vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, texpage, packed_texcoord);
} }
@ -226,6 +226,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.shading_enable);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3; m_batch_current_vertex_ptr += 3;
@ -255,6 +256,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u; static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.shading_enable);
AddVertex(vertices[2]); AddVertex(vertices[2]);
AddVertex(vertices[1]); AddVertex(vertices[1]);
@ -266,14 +268,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Rectangle: case Primitive::Rectangle:
{ {
u32 buffer_pos = 1;
const u32 color = rc.color_for_first_vertex; const u32 color = rc.color_for_first_vertex;
const VertexPosition vp{command_ptr[buffer_pos++]}; const VertexPosition vp{m_fifo.Pop()};
const s32 pos_x = m_drawing_offset.x + vp.x; const s32 pos_x = m_drawing_offset.x + vp.x;
const s32 pos_y = m_drawing_offset.y + vp.y; const s32 pos_y = m_drawing_offset.y + vp.y;
const auto [texcoord_x, texcoord_y] = const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(m_fifo.Pop()) : 0);
UnpackTexcoord(rc.texture_enable ? Truncate16(command_ptr[buffer_pos++]) : 0);
u16 orig_tex_left = ZeroExtend16(texcoord_x); u16 orig_tex_left = ZeroExtend16(texcoord_x);
u16 orig_tex_top = ZeroExtend16(texcoord_y); u16 orig_tex_top = ZeroExtend16(texcoord_y);
s32 rectangle_width; s32 rectangle_width;
@ -293,8 +293,11 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
rectangle_height = 16; rectangle_height = 16;
break; break;
default: default:
rectangle_width = static_cast<s32>(command_ptr[buffer_pos] & 0xFFFF); {
rectangle_height = static_cast<s32>(command_ptr[buffer_pos] >> 16); const u32 width_and_height = m_fifo.Pop();
rectangle_width = static_cast<s32>(width_and_height & 0xFFFF);
rectangle_height = static_cast<s32>(width_and_height >> 16);
}
break; break;
} }
@ -350,22 +353,35 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u; static_cast<u32>(std::clamp<s32>(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable);
} }
break; break;
case Primitive::Line: case Primitive::Line:
{ {
EnsureVertexBufferSpace(num_vertices * 2); const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
EnsureVertexBufferSpace(num_vertices);
const u32 first_color = rc.color_for_first_vertex; const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
u32 buffer_pos = 1;
BatchVertex last_vertex; BatchVertex last_vertex;
u32 buffer_pos = 0;
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; u32 color;
const VertexPosition vp{command_ptr[buffer_pos++]}; VertexPosition vp;
if (rc.polyline)
{
color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
vp.bits = m_blit_buffer[buffer_pos++];
}
else
{
color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
vp.bits = m_fifo.Pop();
}
BatchVertex vertex; BatchVertex vertex;
vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, 0, 0); vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, 0, 0);
@ -394,6 +410,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
} }
} }
@ -485,8 +502,10 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
} }
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) void GPU_HW::DispatchRenderCommand()
{ {
const RenderCommand rc{m_render_command.bits};
TextureMode texture_mode; TextureMode texture_mode;
if (rc.IsTexturingEnabled()) if (rc.IsTexturingEnabled())
{ {
@ -574,7 +593,7 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
} }
LoadVertices(rc, num_vertices, command_ptr); LoadVertices();
} }
void GPU_HW::FlushRender() void GPU_HW::FlushRender()

View File

@ -145,7 +145,7 @@ protected:
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override; void DispatchRenderCommand() override;
void FlushRender() override; void FlushRender() override;
void DrawRendererStats(bool is_idle_frame) override; void DrawRendererStats(bool is_idle_frame) override;
@ -198,7 +198,7 @@ private:
static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc); static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); void LoadVertices();
ALWAYS_INLINE void AddVertex(const BatchVertex& v) ALWAYS_INLINE void AddVertex(const BatchVertex& v)
{ {

View File

@ -204,8 +204,9 @@ void GPU_SW::UpdateDisplay()
} }
} }
void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) void GPU_SW::DispatchRenderCommand()
{ {
const RenderCommand rc{m_render_command.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable; const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable;
switch (rc.primitive) switch (rc.primitive)
@ -216,24 +217,23 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
const bool shaded = rc.shading_enable; const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable; const bool textured = rc.texture_enable;
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<SWVertex, 4> vertices; std::array<SWVertex, 4> vertices;
u32 buffer_pos = 1;
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
SWVertex& vert = vertices[i]; SWVertex& vert = vertices[i];
const u32 color_rgb = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; const u32 color_rgb = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
vert.color_r = Truncate8(color_rgb); vert.color_r = Truncate8(color_rgb);
vert.color_g = Truncate8(color_rgb >> 8); vert.color_g = Truncate8(color_rgb >> 8);
vert.color_b = Truncate8(color_rgb >> 16); vert.color_b = Truncate8(color_rgb >> 16);
const VertexPosition vp{command_ptr[buffer_pos++]}; const VertexPosition vp{m_fifo.Pop()};
vert.x = vp.x; vert.x = vp.x;
vert.y = vp.y; vert.y = vp.y;
if (textured) if (textured)
{ {
std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(command_ptr[buffer_pos++])); std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(m_fifo.Pop()));
} }
else else
{ {
@ -253,10 +253,9 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
case Primitive::Rectangle: case Primitive::Rectangle:
{ {
u32 buffer_pos = 1;
const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex); const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex);
const VertexPosition vp{command_ptr[buffer_pos++]}; const VertexPosition vp{m_fifo.Pop()};
const u32 texcoord_and_palette = rc.texture_enable ? command_ptr[buffer_pos++] : 0; const u32 texcoord_and_palette = rc.texture_enable ? m_fifo.Pop() : 0;
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette)); const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette));
s32 width; s32 width;
@ -276,8 +275,11 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
height = 16; height = 16;
break; break;
default: default:
width = static_cast<s32>(command_ptr[buffer_pos] & UINT32_C(0xFFFF)); {
height = static_cast<s32>(command_ptr[buffer_pos] >> 16); const u32 width_and_height = m_fifo.Pop();
width = static_cast<s32>(width_and_height & UINT32_C(0xFFFF));
height = static_cast<s32>(width_and_height >> 16);
}
break; break;
} }
@ -296,19 +298,28 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable); const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable);
std::array<SWVertex, 2> vertices = {}; std::array<SWVertex, 2> vertices = {};
u32 buffer_pos = 1; u32 buffer_pos = 0;
// first vertex // first vertex
SWVertex* p0 = &vertices[0]; SWVertex* p0 = &vertices[0];
SWVertex* p1 = &vertices[1]; SWVertex* p1 = &vertices[1];
p0->SetPosition(VertexPosition{command_ptr[buffer_pos++]}); p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : m_fifo.Pop()});
p0->SetColorRGB24(first_color); p0->SetColorRGB24(first_color);
// remaining vertices in line strip // remaining vertices in line strip
const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
for (u32 i = 1; i < num_vertices; i++) for (u32 i = 1; i < num_vertices; i++)
{ {
p1->SetColorRGB24(shaded ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color); if (rc.polyline)
p1->SetPosition(VertexPosition{command_ptr[buffer_pos++]}); {
p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]});
}
else
{
p1->SetColorRGB24(shaded ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_fifo.Pop()});
}
(this->*DrawFunction)(p0, p1); (this->*DrawFunction)(p0, p1);
@ -408,6 +419,7 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right)); max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom)); min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom)); max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, texture_enable, shading_enable);
// compute per-pixel increments // compute per-pixel increments
const s32 a01 = py0 - py1, b01 = px1 - px0; const s32 a01 = py0 - py1, b01 = px1 - px0;
@ -501,6 +513,18 @@ void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8
origin_x += m_drawing_offset.x; origin_x += m_drawing_offset.x;
origin_y += m_drawing_offset.y; origin_y += m_drawing_offset.y;
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(origin_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(origin_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) +
1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(origin_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(origin_y + static_cast<s32>(height), m_drawing_area.top,
m_drawing_area.bottom)) +
1u;
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable);
}
for (u32 offset_y = 0; offset_y < height; offset_y++) for (u32 offset_y = 0; offset_y < height; offset_y++)
{ {
const s32 y = origin_y + static_cast<s32>(offset_y); const s32 y = origin_y + static_cast<s32>(offset_y);
@ -690,6 +714,21 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
const s32 dy = p1->y - p0->y; const s32 dy = p1->y - p0->y;
const s32 k = std::max(std::abs(dx), std::abs(dy)); const s32 k = std::max(std::abs(dx), std::abs(dy));
{
// TODO: Move to base class
const s32 min_x = std::min(p0->x, p1->x);
const s32 max_x = std::max(p0->x, p1->x);
const s32 min_y = std::min(p0->y, p1->y);
const s32 max_y = std::max(p0->y, p1->y);
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable);
}
FixedPointCoord step_x, step_y; FixedPointCoord step_x, step_y;
FixedPointColor step_r, step_g, step_b; FixedPointColor step_r, step_g, step_b;
if (k > 0) if (k > 0)

View File

@ -51,7 +51,7 @@ protected:
// Rasterization // Rasterization
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override; void DispatchRenderCommand() override;
static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);

View File

@ -924,6 +924,8 @@ void HostInterface::UpdateSettings(const std::function<void()>& apply_callback)
const AudioBackend old_audio_backend = m_settings.audio_backend; const AudioBackend old_audio_backend = m_settings.audio_backend;
const GPURenderer old_gpu_renderer = m_settings.gpu_renderer; const GPURenderer old_gpu_renderer = m_settings.gpu_renderer;
const u32 old_gpu_resolution_scale = m_settings.gpu_resolution_scale; const u32 old_gpu_resolution_scale = m_settings.gpu_resolution_scale;
const u32 old_gpu_fifo_size = m_settings.gpu_fifo_size;
const u32 old_gpu_max_run_ahead = m_settings.gpu_max_run_ahead;
const bool old_gpu_true_color = m_settings.gpu_true_color; const bool old_gpu_true_color = m_settings.gpu_true_color;
const bool old_gpu_scaled_dithering = m_settings.gpu_scaled_dithering; const bool old_gpu_scaled_dithering = m_settings.gpu_scaled_dithering;
const bool old_gpu_texture_filtering = m_settings.gpu_texture_filtering; const bool old_gpu_texture_filtering = m_settings.gpu_texture_filtering;
@ -978,8 +980,8 @@ void HostInterface::UpdateSettings(const std::function<void()>& apply_callback)
m_system->SetCPUExecutionMode(m_settings.cpu_execution_mode); m_system->SetCPUExecutionMode(m_settings.cpu_execution_mode);
} }
if (m_settings.gpu_resolution_scale != old_gpu_resolution_scale || if (m_settings.gpu_resolution_scale != old_gpu_resolution_scale || m_settings.gpu_fifo_size != old_gpu_fifo_size ||
m_settings.gpu_true_color != old_gpu_true_color || m_settings.gpu_max_run_ahead != old_gpu_max_run_ahead || m_settings.gpu_true_color != old_gpu_true_color ||
m_settings.gpu_scaled_dithering != old_gpu_scaled_dithering || m_settings.gpu_scaled_dithering != old_gpu_scaled_dithering ||
m_settings.gpu_texture_filtering != old_gpu_texture_filtering || m_settings.gpu_texture_filtering != old_gpu_texture_filtering ||
m_settings.gpu_disable_interlacing != old_gpu_disable_interlacing || m_settings.gpu_disable_interlacing != old_gpu_disable_interlacing ||

View File

@ -2,4 +2,4 @@
#include "types.h" #include "types.h"
static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; static constexpr u32 SAVE_STATE_MAGIC = 0x43435544;
static constexpr u32 SAVE_STATE_VERSION = 22; static constexpr u32 SAVE_STATE_VERSION = 23;

View File

@ -23,6 +23,8 @@ void Settings::Load(SettingsInterface& si)
gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str()) gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str())
.value_or(DEFAULT_GPU_RENDERER); .value_or(DEFAULT_GPU_RENDERER);
gpu_resolution_scale = static_cast<u32>(si.GetIntValue("GPU", "ResolutionScale", 1)); gpu_resolution_scale = static_cast<u32>(si.GetIntValue("GPU", "ResolutionScale", 1));
gpu_fifo_size = static_cast<u32>(si.GetIntValue("GPU", "FIFOSize", 128));
gpu_max_run_ahead = static_cast<u32>(si.GetIntValue("GPU", "MaxRunAhead", 128));
gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false);
gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true);
gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false);

View File

@ -47,6 +47,8 @@ struct Settings
GPURenderer gpu_renderer = GPURenderer::Software; GPURenderer gpu_renderer = GPURenderer::Software;
u32 gpu_resolution_scale = 1; u32 gpu_resolution_scale = 1;
u32 gpu_fifo_size = 128;
u32 gpu_max_run_ahead = 128;
bool gpu_use_debug_device = false; bool gpu_use_debug_device = false;
bool gpu_true_color = true; bool gpu_true_color = true;
bool gpu_scaled_dithering = false; bool gpu_scaled_dithering = false;