GPU: Make VRAM a global object
This commit is contained in:
parent
7340324ed9
commit
411213d6a0
|
@ -33,6 +33,7 @@
|
||||||
Log_SetChannel(GPU);
|
Log_SetChannel(GPU);
|
||||||
|
|
||||||
std::unique_ptr<GPU> g_gpu;
|
std::unique_ptr<GPU> g_gpu;
|
||||||
|
alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)];
|
||||||
|
|
||||||
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
|
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
|
||||||
|
|
||||||
|
@ -132,6 +133,10 @@ void GPU::Reset(bool clear_vram)
|
||||||
m_crtc_state.in_vblank = false;
|
m_crtc_state.in_vblank = false;
|
||||||
m_crtc_state.interlaced_field = 0;
|
m_crtc_state.interlaced_field = 0;
|
||||||
m_crtc_state.interlaced_display_field = 0;
|
m_crtc_state.interlaced_display_field = 0;
|
||||||
|
|
||||||
|
if (clear_vram)
|
||||||
|
std::memset(g_vram, 0, sizeof(g_vram));
|
||||||
|
|
||||||
SoftReset();
|
SoftReset();
|
||||||
UpdateDisplay();
|
UpdateDisplay();
|
||||||
}
|
}
|
||||||
|
@ -300,7 +305,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
||||||
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
|
sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1074,7 +1079,7 @@ u32 GPU::ReadGPUREAD()
|
||||||
// Read with correct wrap-around behavior.
|
// Read with correct wrap-around behavior.
|
||||||
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
|
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
|
||||||
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
|
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
|
||||||
value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16);
|
value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16);
|
||||||
|
|
||||||
if (++m_vram_transfer.col == m_vram_transfer.width)
|
if (++m_vram_transfer.col == m_vram_transfer.width)
|
||||||
{
|
{
|
||||||
|
@ -1357,7 +1362,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
{
|
{
|
||||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
|
std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IsInterlacedRenderingEnabled())
|
else if (IsInterlacedRenderingEnabled())
|
||||||
|
@ -1373,7 +1378,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
if ((row & u32(1)) == active_field)
|
if ((row & u32(1)) == active_field)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
|
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
|
||||||
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||||
{
|
{
|
||||||
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||||
|
@ -1386,7 +1391,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
{
|
{
|
||||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
|
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
|
||||||
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||||
{
|
{
|
||||||
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||||
|
@ -1402,7 +1407,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool
|
||||||
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
|
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
|
||||||
{
|
{
|
||||||
const u16* src_ptr = static_cast<const u16*>(data);
|
const u16* src_ptr = static_cast<const u16*>(data);
|
||||||
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
|
u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
|
||||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
{
|
{
|
||||||
std::copy_n(src_ptr, width, dst_ptr);
|
std::copy_n(src_ptr, width, dst_ptr);
|
||||||
|
@ -1420,7 +1425,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool
|
||||||
|
|
||||||
for (u32 row = 0; row < height;)
|
for (u32 row = 0; row < height;)
|
||||||
{
|
{
|
||||||
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
|
u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
for (u32 col = 0; col < width;)
|
for (u32 col = 0; col < width;)
|
||||||
{
|
{
|
||||||
// TODO: Handle unaligned reads...
|
// TODO: Handle unaligned reads...
|
||||||
|
@ -1475,8 +1480,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
|
||||||
{
|
{
|
||||||
for (u32 row = 0; row < height; row++)
|
for (u32 row = 0; row < height; row++)
|
||||||
{
|
{
|
||||||
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
|
||||||
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
|
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
|
||||||
{
|
{
|
||||||
|
@ -1491,8 +1496,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
|
||||||
{
|
{
|
||||||
for (u32 row = 0; row < height; row++)
|
for (u32 row = 0; row < height; row++)
|
||||||
{
|
{
|
||||||
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
|
||||||
for (u32 col = 0; col < width; col++)
|
for (u32 col = 0; col < width; col++)
|
||||||
{
|
{
|
||||||
|
@ -2175,11 +2180,11 @@ bool GPU::DumpVRAMToFile(const char* filename)
|
||||||
const char* extension = std::strrchr(filename, '.');
|
const char* extension = std::strrchr(filename, '.');
|
||||||
if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
|
if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
|
||||||
{
|
{
|
||||||
return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, m_vram_ptr, true);
|
return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true);
|
||||||
}
|
}
|
||||||
else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
|
else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
|
||||||
{
|
{
|
||||||
return FileSystem::WriteBinaryFile(filename, m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
|
return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "common/bitfield.h"
|
#include "common/bitfield.h"
|
||||||
#include "common/fifo_queue.h"
|
#include "common/fifo_queue.h"
|
||||||
#include "common/rectangle.h"
|
#include "common/rectangle.h"
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
@ -359,9 +360,6 @@ protected:
|
||||||
std::unique_ptr<TimingEvent> m_crtc_tick_event;
|
std::unique_ptr<TimingEvent> m_crtc_tick_event;
|
||||||
std::unique_ptr<TimingEvent> m_command_tick_event;
|
std::unique_ptr<TimingEvent> m_command_tick_event;
|
||||||
|
|
||||||
// Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer.
|
|
||||||
u16* m_vram_ptr = nullptr;
|
|
||||||
|
|
||||||
union GPUSTAT
|
union GPUSTAT
|
||||||
{
|
{
|
||||||
u32 bits;
|
u32 bits;
|
||||||
|
@ -651,3 +649,4 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
extern std::unique_ptr<GPU> g_gpu;
|
extern std::unique_ptr<GPU> g_gpu;
|
||||||
|
extern u16 g_vram[VRAM_SIZE / sizeof(u16)];
|
||||||
|
|
|
@ -23,7 +23,7 @@ bool GPUBackend::Initialize(bool force_thread)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUBackend::Reset(bool clear_vram)
|
void GPUBackend::Reset()
|
||||||
{
|
{
|
||||||
Sync(true);
|
Sync(true);
|
||||||
m_drawing_area = {};
|
m_drawing_area = {};
|
||||||
|
|
|
@ -22,12 +22,11 @@ public:
|
||||||
GPUBackend();
|
GPUBackend();
|
||||||
virtual ~GPUBackend();
|
virtual ~GPUBackend();
|
||||||
|
|
||||||
ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
|
|
||||||
ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; }
|
ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; }
|
||||||
|
|
||||||
virtual bool Initialize(bool force_thread);
|
virtual bool Initialize(bool force_thread);
|
||||||
virtual void UpdateSettings();
|
virtual void UpdateSettings();
|
||||||
virtual void Reset(bool clear_vram);
|
virtual void Reset();
|
||||||
virtual void Shutdown();
|
virtual void Shutdown();
|
||||||
|
|
||||||
GPUBackendFillVRAMCommand* NewFillVRAMCommand();
|
GPUBackendFillVRAMCommand* NewFillVRAMCommand();
|
||||||
|
@ -64,8 +63,6 @@ protected:
|
||||||
|
|
||||||
void HandleCommand(const GPUBackendCommand* cmd);
|
void HandleCommand(const GPUBackendCommand* cmd);
|
||||||
|
|
||||||
u16* m_vram_ptr = nullptr;
|
|
||||||
|
|
||||||
Common::Rectangle<u32> m_drawing_area{};
|
Common::Rectangle<u32> m_drawing_area{};
|
||||||
|
|
||||||
Threading::KernelSemaphore m_sync_semaphore;
|
Threading::KernelSemaphore m_sync_semaphore;
|
||||||
|
|
|
@ -582,7 +582,7 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
|
||||||
{
|
{
|
||||||
DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width,
|
DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width,
|
||||||
m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
|
m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
|
||||||
&m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
|
&g_vram[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// switch to pixel-by-pixel read state
|
// switch to pixel-by-pixel read state
|
||||||
|
|
|
@ -130,8 +130,6 @@ private:
|
||||||
|
|
||||||
GPU_HW::GPU_HW() : GPU()
|
GPU_HW::GPU_HW() : GPU()
|
||||||
{
|
{
|
||||||
m_vram_ptr = m_vram_shadow.data();
|
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
s_draw_number = 0;
|
s_draw_number = 0;
|
||||||
#endif
|
#endif
|
||||||
|
@ -251,9 +249,8 @@ void GPU_HW::Reset(bool clear_vram)
|
||||||
|
|
||||||
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
||||||
|
|
||||||
m_vram_shadow.fill(0);
|
|
||||||
if (m_sw_renderer)
|
if (m_sw_renderer)
|
||||||
m_sw_renderer->Reset(clear_vram);
|
m_sw_renderer->Reset();
|
||||||
|
|
||||||
m_batch = {};
|
m_batch = {};
|
||||||
m_batch_ubo_data = {};
|
m_batch_ubo_data = {};
|
||||||
|
@ -442,7 +439,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
|
||||||
Panic("Failed to recreate buffers.");
|
Panic("Failed to recreate buffers.");
|
||||||
|
|
||||||
RestoreDeviceContext();
|
RestoreDeviceContext();
|
||||||
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
|
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false);
|
||||||
UpdateDepthBufferFromMaskBit();
|
UpdateDepthBufferFromMaskBit();
|
||||||
UpdateDisplay();
|
UpdateDisplay();
|
||||||
}
|
}
|
||||||
|
@ -2291,8 +2288,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
|
||||||
if (current_enabled == new_enabled)
|
if (current_enabled == new_enabled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
m_vram_ptr = m_vram_shadow.data();
|
|
||||||
|
|
||||||
if (!new_enabled)
|
if (!new_enabled)
|
||||||
{
|
{
|
||||||
if (m_sw_renderer)
|
if (m_sw_renderer)
|
||||||
|
@ -2310,7 +2305,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
|
||||||
{
|
{
|
||||||
FlushRender();
|
FlushRender();
|
||||||
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
||||||
std::memcpy(sw_renderer->GetVRAM(), m_vram_ptr, sizeof(u16) * VRAM_WIDTH * VRAM_HEIGHT);
|
|
||||||
|
|
||||||
// Sync the drawing area.
|
// Sync the drawing area.
|
||||||
GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand();
|
GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand();
|
||||||
|
@ -2319,7 +2313,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
|
||||||
}
|
}
|
||||||
|
|
||||||
m_sw_renderer = std::move(sw_renderer);
|
m_sw_renderer = std::move(sw_renderer);
|
||||||
m_vram_ptr = m_sw_renderer->GetVRAM();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const
|
void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const
|
||||||
|
@ -2429,7 +2422,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
||||||
|
|
||||||
// Stage the readback and copy it into our shadow buffer.
|
// Stage the readback and copy it into our shadow buffer.
|
||||||
g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height,
|
g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height,
|
||||||
reinterpret_cast<u32*>(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
|
reinterpret_cast<u32*>(&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
|
||||||
VRAM_WIDTH * sizeof(u16));
|
VRAM_WIDTH * sizeof(u16));
|
||||||
|
|
||||||
RestoreDeviceContext();
|
RestoreDeviceContext();
|
||||||
|
|
|
@ -222,8 +222,6 @@ private:
|
||||||
std::unique_ptr<GPUTextureBuffer> m_vram_upload_buffer;
|
std::unique_ptr<GPUTextureBuffer> m_vram_upload_buffer;
|
||||||
std::unique_ptr<GPUTexture> m_vram_write_texture;
|
std::unique_ptr<GPUTexture> m_vram_write_texture;
|
||||||
|
|
||||||
FixedHeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
|
|
||||||
|
|
||||||
std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
|
std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
|
||||||
|
|
||||||
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
||||||
|
|
|
@ -24,10 +24,7 @@ ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
|
||||||
return std::tie(v1, v2);
|
return std::tie(v1, v2);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_SW::GPU_SW()
|
GPU_SW::GPU_SW() = default;
|
||||||
{
|
|
||||||
m_vram_ptr = m_backend.GetVRAM();
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_SW::~GPU_SW()
|
GPU_SW::~GPU_SW()
|
||||||
{
|
{
|
||||||
|
@ -84,7 +81,7 @@ void GPU_SW::Reset(bool clear_vram)
|
||||||
{
|
{
|
||||||
GPU::Reset(clear_vram);
|
GPU::Reset(clear_vram);
|
||||||
|
|
||||||
m_backend.Reset(clear_vram);
|
m_backend.Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_SW::UpdateSettings(const Settings& old_settings)
|
void GPU_SW::UpdateSettings(const Settings& old_settings)
|
||||||
|
@ -269,7 +266,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
|
||||||
const u32 rows = height >> interlaced_shift;
|
const u32 rows = height >> interlaced_shift;
|
||||||
dst_stride <<= interlaced_shift;
|
dst_stride <<= interlaced_shift;
|
||||||
|
|
||||||
const u16* src_ptr = &m_vram_ptr[src_y * VRAM_WIDTH + src_x];
|
const u16* src_ptr = &g_vram[src_y * VRAM_WIDTH + src_x];
|
||||||
const u32 src_step = VRAM_WIDTH << interleaved_shift;
|
const u32 src_step = VRAM_WIDTH << interleaved_shift;
|
||||||
for (u32 row = 0; row < rows; row++)
|
for (u32 row = 0; row < rows; row++)
|
||||||
{
|
{
|
||||||
|
@ -286,7 +283,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
|
||||||
const u32 end_x = src_x + width;
|
const u32 end_x = src_x + width;
|
||||||
for (u32 row = 0; row < rows; row++)
|
for (u32 row = 0; row < rows; row++)
|
||||||
{
|
{
|
||||||
const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
|
const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);
|
OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);
|
||||||
|
|
||||||
for (u32 col = src_x; col < end_x; col++)
|
for (u32 col = src_x; col < end_x; col++)
|
||||||
|
@ -352,7 +349,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
|
||||||
|
|
||||||
if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT)
|
if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT)
|
||||||
{
|
{
|
||||||
const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram_ptr[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
|
const u8* src_ptr = reinterpret_cast<const u8*>(&g_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
|
||||||
const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16);
|
const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16);
|
||||||
for (u32 row = 0; row < rows; row++)
|
for (u32 row = 0; row < rows; row++)
|
||||||
{
|
{
|
||||||
|
@ -412,7 +409,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
|
||||||
{
|
{
|
||||||
for (u32 row = 0; row < rows; row++)
|
for (u32 row = 0; row < rows; row++)
|
||||||
{
|
{
|
||||||
const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
|
const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);
|
OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);
|
||||||
|
|
||||||
for (u32 col = 0; col < width; col++)
|
for (u32 col = 0; col < width; col++)
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||||
|
|
||||||
|
#include "gpu.h"
|
||||||
#include "gpu_sw_backend.h"
|
#include "gpu_sw_backend.h"
|
||||||
#include "system.h"
|
#include "system.h"
|
||||||
|
|
||||||
|
@ -8,11 +9,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
GPU_SW_Backend::GPU_SW_Backend() : GPUBackend()
|
GPU_SW_Backend::GPU_SW_Backend() = default;
|
||||||
{
|
|
||||||
m_vram.fill(0);
|
|
||||||
m_vram_ptr = m_vram.data();
|
|
||||||
}
|
|
||||||
|
|
||||||
GPU_SW_Backend::~GPU_SW_Backend() = default;
|
GPU_SW_Backend::~GPU_SW_Backend() = default;
|
||||||
|
|
||||||
|
@ -21,12 +18,9 @@ bool GPU_SW_Backend::Initialize(bool force_thread)
|
||||||
return GPUBackend::Initialize(force_thread);
|
return GPUBackend::Initialize(force_thread);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_SW_Backend::Reset(bool clear_vram)
|
void GPU_SW_Backend::Reset()
|
||||||
{
|
{
|
||||||
GPUBackend::Reset(clear_vram);
|
GPUBackend::Reset();
|
||||||
|
|
||||||
if (clear_vram)
|
|
||||||
m_vram.fill(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
|
void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
|
||||||
|
@ -728,7 +722,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
|
||||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
{
|
{
|
||||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
|
std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (params.interlaced_rendering)
|
else if (params.interlaced_rendering)
|
||||||
|
@ -741,7 +735,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
|
||||||
if ((row & u32(1)) == active_field)
|
if ((row & u32(1)) == active_field)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
|
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
|
||||||
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||||
{
|
{
|
||||||
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||||
|
@ -754,7 +748,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
|
||||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
{
|
{
|
||||||
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
const u32 row = (y + yoffs) % VRAM_HEIGHT;
|
||||||
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
|
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
|
||||||
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
for (u32 xoffs = 0; xoffs < width; xoffs++)
|
||||||
{
|
{
|
||||||
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
const u32 col = (x + xoffs) % VRAM_WIDTH;
|
||||||
|
@ -771,7 +765,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
|
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
|
||||||
{
|
{
|
||||||
const u16* src_ptr = static_cast<const u16*>(data);
|
const u16* src_ptr = static_cast<const u16*>(data);
|
||||||
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
|
u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
|
||||||
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
for (u32 yoffs = 0; yoffs < height; yoffs++)
|
||||||
{
|
{
|
||||||
std::copy_n(src_ptr, width, dst_ptr);
|
std::copy_n(src_ptr, width, dst_ptr);
|
||||||
|
@ -788,7 +782,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
||||||
|
|
||||||
for (u32 row = 0; row < height;)
|
for (u32 row = 0; row < height;)
|
||||||
{
|
{
|
||||||
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
|
u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
for (u32 col = 0; col < width;)
|
for (u32 col = 0; col < width;)
|
||||||
{
|
{
|
||||||
// TODO: Handle unaligned reads...
|
// TODO: Handle unaligned reads...
|
||||||
|
@ -844,8 +838,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
|
||||||
{
|
{
|
||||||
for (u32 row = 0; row < height; row++)
|
for (u32 row = 0; row < height; row++)
|
||||||
{
|
{
|
||||||
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
|
||||||
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
|
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
|
||||||
{
|
{
|
||||||
|
@ -860,8 +854,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
|
||||||
{
|
{
|
||||||
for (u32 row = 0; row < height; row++)
|
for (u32 row = 0; row < height; row++)
|
||||||
{
|
{
|
||||||
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
|
||||||
|
|
||||||
for (u32 col = 0; col < width; col++)
|
for (u32 col = 0; col < width; col++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -14,12 +14,12 @@ public:
|
||||||
~GPU_SW_Backend() override;
|
~GPU_SW_Backend() override;
|
||||||
|
|
||||||
bool Initialize(bool force_thread) override;
|
bool Initialize(bool force_thread) override;
|
||||||
void Reset(bool clear_vram) override;
|
void Reset() override;
|
||||||
|
|
||||||
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
|
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return g_vram[VRAM_WIDTH * y + x]; }
|
||||||
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; }
|
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &g_vram[VRAM_WIDTH * y + x]; }
|
||||||
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
|
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &g_vram[VRAM_WIDTH * y + x]; }
|
||||||
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
|
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { g_vram[VRAM_WIDTH * y + x] = value; }
|
||||||
|
|
||||||
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
|
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
|
||||||
static constexpr u32 DITHER_LUT_SIZE = 512;
|
static constexpr u32 DITHER_LUT_SIZE = 512;
|
||||||
|
@ -165,6 +165,4 @@ protected:
|
||||||
const GPUBackendDrawLineCommand::Vertex* p0,
|
const GPUBackendDrawLineCommand::Vertex* p0,
|
||||||
const GPUBackendDrawLineCommand::Vertex* p1);
|
const GPUBackendDrawLineCommand::Vertex* p1);
|
||||||
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
|
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
|
||||||
|
|
||||||
std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram;
|
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue