GPU: Improve batching by sampling a VRAM copy

This commit is contained in:
Connor McLaughlin 2019-09-26 23:33:20 +10:00
parent 332b5481e8
commit 792ec27b1a
6 changed files with 189 additions and 167 deletions

View File

@ -52,8 +52,8 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_GPUSTAT.bits); sw.Do(&m_GPUSTAT.bits);
sw.Do(&m_render_state.texture_base_x); sw.Do(&m_render_state.texture_page_x);
sw.Do(&m_render_state.texture_base_y); sw.Do(&m_render_state.texture_page_y);
sw.Do(&m_render_state.texture_palette_x); sw.Do(&m_render_state.texture_palette_x);
sw.Do(&m_render_state.texture_palette_y); sw.Do(&m_render_state.texture_palette_y);
sw.Do(&m_render_state.texture_color_mode); sw.Do(&m_render_state.texture_color_mode);
@ -69,10 +69,10 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_render_state.texture_changed); sw.Do(&m_render_state.texture_changed);
sw.Do(&m_render_state.transparency_mode_changed); sw.Do(&m_render_state.transparency_mode_changed);
sw.Do(&m_drawing_area.top_left_x); sw.Do(&m_drawing_area.left);
sw.Do(&m_drawing_area.top_left_y); sw.Do(&m_drawing_area.top);
sw.Do(&m_drawing_area.bottom_right_x); sw.Do(&m_drawing_area.right);
sw.Do(&m_drawing_area.bottom_right_y); sw.Do(&m_drawing_area.bottom);
sw.Do(&m_drawing_offset.x); sw.Do(&m_drawing_offset.x);
sw.Do(&m_drawing_offset.y); sw.Do(&m_drawing_offset.y);
sw.Do(&m_drawing_offset.x); sw.Do(&m_drawing_offset.x);
@ -429,18 +429,18 @@ void GPU::WriteGP0(u32 value)
case 0xE3: // Set drawing area top left case 0xE3: // Set drawing area top left
{ {
m_drawing_area.top_left_x = param & UINT32_C(0x3FF); m_drawing_area.left = param & UINT32_C(0x3FF);
m_drawing_area.top_left_y = (param >> 10) & UINT32_C(0x1FF); m_drawing_area.top = (param >> 10) & UINT32_C(0x1FF);
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.top_left_x, m_drawing_area.top_left_y); Log_DebugPrintf("Set drawing area top-left: (%u, %u)", m_drawing_area.left, m_drawing_area.top);
} }
break; break;
case 0xE4: // Set drawing area bottom right case 0xE4: // Set drawing area bottom right
{ {
m_drawing_area.bottom_right_x = param & UINT32_C(0x3FF); m_drawing_area.right = param & UINT32_C(0x3FF);
m_drawing_area.bottom_right_y = (param >> 10) & UINT32_C(0x1FF); m_drawing_area.bottom = (param >> 10) & UINT32_C(0x1FF);
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.bottom_right_x, Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right,
m_drawing_area.bottom_right_y); m_drawing_area.bottom);
} }
break; break;
@ -785,8 +785,8 @@ void GPU::RenderState::SetFromPageAttribute(u16 value)
if (texpage_attribute == value) if (texpage_attribute == value)
return; return;
texture_base_x = static_cast<s32>(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64)); texture_page_x = static_cast<s32>(ZeroExtend32(value & UINT16_C(0x0F)) * UINT32_C(64));
texture_base_y = static_cast<s32>(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256)); texture_page_y = static_cast<s32>(ZeroExtend32((value >> 4) & UINT16_C(1)) * UINT32_C(256));
texture_color_mode = (static_cast<TextureColorMode>((value >> 7) & UINT16_C(0x03))); texture_color_mode = (static_cast<TextureColorMode>((value >> 7) & UINT16_C(0x03)));
if (texture_color_mode == TextureColorMode::Reserved_Direct16Bit) if (texture_color_mode == TextureColorMode::Reserved_Direct16Bit)
texture_color_mode = TextureColorMode::Direct16Bit; texture_color_mode = TextureColorMode::Direct16Bit;

View File

@ -228,8 +228,8 @@ protected:
static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111); static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111);
// decoded values // decoded values
s32 texture_base_x; s32 texture_page_x;
s32 texture_base_y; s32 texture_page_y;
s32 texture_palette_x; s32 texture_palette_x;
s32 texture_palette_y; s32 texture_palette_y;
TextureColorMode texture_color_mode; TextureColorMode texture_color_mode;
@ -265,8 +265,8 @@ protected:
struct DrawingArea struct DrawingArea
{ {
u32 top_left_x, top_left_y; u32 left, top;
u32 bottom_right_x, bottom_right_y; u32 right, bottom;
} m_drawing_area = {}; } m_drawing_area = {};
struct DrawingOffset struct DrawingOffset

View File

@ -1,6 +1,8 @@
#include "gpu_hw.h" #include "gpu_hw.h"
#include "YBaseLib/Assert.h" #include "YBaseLib/Assert.h"
#include "YBaseLib/Log.h"
#include <sstream> #include <sstream>
Log_SetChannel(GPU_HW);
GPU_HW::GPU_HW() = default; GPU_HW::GPU_HW() = default;
@ -110,10 +112,10 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices)
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
{ {
*left = m_drawing_area.top_left_x; *left = m_drawing_area.left;
*right = m_drawing_area.bottom_right_x + 1; *right = m_drawing_area.right + 1;
*top = m_drawing_area.top_left_y; *top = m_drawing_area.top;
*bottom = m_drawing_area.bottom_right_y + 1; *bottom = m_drawing_area.bottom + 1;
} }
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
@ -176,7 +178,6 @@ in vec2 a_tex0;
out vec4 v_col0; out vec4 v_col0;
#if TEXTURED #if TEXTURED
uniform vec2 u_tex_scale;
out vec2 v_tex0; out vec2 v_tex0;
#endif #endif
@ -199,26 +200,75 @@ void main()
return ss.str(); return ss.str();
} }
std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending) std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode)
{ {
std::stringstream ss; std::stringstream ss;
GenerateShaderHeader(ss); GenerateShaderHeader(ss);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "BLENDING", blending); DefineMacro(ss, "BLENDING", blending);
DefineMacro(ss, "PALETTE",
textured && (texture_color_mode == GPU::TextureColorMode::Palette4Bit ||
texture_color_mode == GPU::TextureColorMode::Palette8Bit));
DefineMacro(ss, "PALETTE_4_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", textured && texture_color_mode == GPU::TextureColorMode::Palette8Bit);
ss << R"( ss << R"(
in vec4 v_col0; in vec4 v_col0;
#if TEXTURED #if TEXTURED
in vec2 v_tex0; in vec2 v_tex0;
uniform sampler2D samp0; uniform sampler2D samp0;
uniform ivec2 u_texture_page_base;
#if PALETTE
uniform ivec2 u_texture_palette_base;
#endif
#endif #endif
out vec4 o_col0; out vec4 o_col0;
#if TEXTURED
vec4 SampleFromVRAM(vec2 coord)
{
// from 0..1 to 0..255
ivec2 icoord = ivec2(coord * vec2(255.0));
// adjust for tightly packed palette formats
ivec2 index_coord = icoord;
#if PALETTE_4_BIT
index_coord.x /= 4;
#elif PALETTE_8_BIT
index_coord.x /= 2;
#endif
// fixup coords
ivec2 vicoord = ivec2(u_texture_page_base.x + index_coord.x,
fixYCoord(u_texture_page_base.y + index_coord.y));
// load colour/palette
vec4 color = texelFetch(samp0, vicoord & VRAM_COORD_MASK, 0);
// apply palette
#if PALETTE
#if PALETTE_4_BIT
int subpixel = int(icoord.x) & 3;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
#elif PALETTE_8_BIT
int subpixel = int(icoord.x) & 1;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
#endif
ivec2 palette_icoord = ivec2(u_texture_palette_base.x + palette_index, fixYCoord(u_texture_palette_base.y));
color = texelFetch(samp0, palette_icoord & VRAM_COORD_MASK, 0);
#endif
return color;
}
#endif
void main() void main()
{ {
#if TEXTURED #if TEXTURED
vec4 texcol = texture(samp0, v_tex0); vec4 texcol = SampleFromVRAM(v_tex0);
if (texcol == vec4(0.0, 0.0, 0.0, 0.0)) if (texcol == vec4(0.0, 0.0, 0.0, 0.0))
discard; discard;
@ -255,64 +305,6 @@ void main()
return ss.str(); return ss.str();
} }
std::string GPU_HW::GenerateTexturePageFragmentShader(TextureColorMode mode)
{
const bool is_palette = (mode == GPU::TextureColorMode::Palette4Bit || mode == GPU::TextureColorMode::Palette8Bit);
std::stringstream ss;
GenerateShaderHeader(ss);
DefineMacro(ss, "PALETTE", is_palette);
DefineMacro(ss, "PALETTE_4_BIT", mode == GPU::TextureColorMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", mode == GPU::TextureColorMode::Palette8Bit);
ss << R"(
uniform sampler2D samp0;
uniform ivec2 base_offset;
#if PALETTE
uniform ivec2 palette_offset;
#endif
in vec2 v_tex0;
out vec4 o_col0;
void main()
{
ivec2 local_coords = ivec2(gl_FragCoord.xy);
#if PALETTE_4_BIT
local_coords.x /= 4;
#elif PALETTE_8_BIT
local_coords.x /= 2;
#endif
// fixup coords
ivec2 coords = ivec2(base_offset.x + local_coords.x, fixYCoord(base_offset.y + local_coords.y));
// load colour/palette
vec4 color = texelFetch(samp0, coords & VRAM_COORD_MASK, 0);
// apply palette
#if PALETTE
#if PALETTE_4_BIT
int subpixel = int(gl_FragCoord.x) & 3;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
#elif PALETTE_8_BIT
int subpixel = int(gl_FragCoord.x) & 1;
uint vram_value = RGBA8ToRGBA5551(color);
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
#endif
ivec2 palette_coords = ivec2(palette_offset.x + palette_index, fixYCoord(palette_offset.y));
color = texelFetch(samp0, palette_coords & VRAM_COORD_MASK, 0);
#endif
o_col0 = color;
}
)";
return ss.str();
}
std::string GPU_HW::GenerateFillFragmentShader() std::string GPU_HW::GenerateFillFragmentShader()
{ {
std::stringstream ss; std::stringstream ss;
@ -331,8 +323,6 @@ void main()
return ss.str(); return ss.str();
} }
void GPU_HW::UpdateTexturePageTexture() {}
GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
{ {
if (rc.primitive == Primitive::Line) if (rc.primitive == Primitive::Line)
@ -343,6 +333,8 @@ GPU_HW::HWRenderBatch::Primitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc
return HWRenderBatch::Primitive::Triangles; return HWRenderBatch::Primitive::Triangles;
} }
void GPU_HW::InvalidateVRAMReadCache() {}
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
{ {
if (rc.texture_enable) if (rc.texture_enable)
@ -375,20 +367,46 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
if (m_render_state.IsTextureChanged()) if (m_render_state.IsTextureChanged())
{ {
if (!IsFlushed()) if (!IsFlushed())
{
// we only need to update the copy texture if the render area intersects with the texture page
const u32 texture_page_left = m_render_state.texture_page_x;
const u32 texture_page_right = m_render_state.texture_page_y + TEXTURE_PAGE_WIDTH;
const u32 texture_page_top = m_render_state.texture_page_y;
const u32 texture_page_bottom = texture_page_top + TEXTURE_PAGE_HEIGHT;
const bool texture_page_overlaps =
(texture_page_left < m_drawing_area.right && texture_page_right > m_drawing_area.left &&
texture_page_top > m_drawing_area.bottom && texture_page_bottom < m_drawing_area.top);
// TODO: Check palette too.
if (texture_page_overlaps)
{
Log_DebugPrintf("Invalidating VRAM read cache due to drawing area overlap");
InvalidateVRAMReadCache();
}
// texture page changed?
// TODO: Move this to the shader...
FlushRender(); FlushRender();
UpdateTexturePageTexture(); }
m_render_state.ClearTextureChangedFlag(); m_render_state.ClearTextureChangedFlag();
} }
if (m_batch.transparency_enable && m_render_state.IsTransparencyModeChanged() && !IsFlushed()) if (m_batch.transparency_enable && m_render_state.IsTransparencyModeChanged() && !IsFlushed())
FlushRender(); FlushRender();
m_batch.transparency_mode = m_render_state.transparency_mode;
m_render_state.ClearTransparencyModeChangedFlag(); m_render_state.ClearTransparencyModeChangedFlag();
m_batch.texture_color_mode = m_render_state.texture_color_mode;
m_batch.texture_page_x = m_render_state.texture_page_x;
m_batch.texture_page_y = m_render_state.texture_page_y;
m_batch.texture_palette_x = m_render_state.texture_palette_x;
m_batch.texture_palette_y = m_render_state.texture_palette_y;
m_batch.transparency_mode = m_render_state.transparency_mode;
} }
} }
// extract state // extract state
const bool rc_transparency_enable = rc.transparency_enable;
const bool rc_texture_enable = rc.texture_enable; const bool rc_texture_enable = rc.texture_enable;
const bool rc_texture_blend_enable = !rc.texture_blend_disable; const bool rc_texture_blend_enable = !rc.texture_blend_disable;
const HWRenderBatch::Primitive rc_primitive = GetPrimitiveForCommand(rc); const HWRenderBatch::Primitive rc_primitive = GetPrimitiveForCommand(rc);
@ -399,14 +417,15 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices)
// including the degenerate triangles for strips // including the degenerate triangles for strips
const u32 max_added_vertices = num_vertices + 2; const u32 max_added_vertices = num_vertices + 2;
const bool params_changed = const bool params_changed =
(m_batch.texture_enable != rc_texture_enable || m_batch.texture_blending_enable != rc_texture_blend_enable || (m_batch.transparency_enable != rc_transparency_enable || m_batch.texture_enable != rc_texture_enable ||
m_batch.primitive != rc_primitive); m_batch.texture_blending_enable != rc_texture_blend_enable || m_batch.primitive != rc_primitive);
if ((m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT || params_changed) if ((m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT || params_changed)
FlushRender(); FlushRender();
} }
m_batch.primitive = rc_primitive;
m_batch.transparency_enable = rc_transparency_enable;
m_batch.texture_enable = rc_texture_enable; m_batch.texture_enable = rc_texture_enable;
m_batch.texture_blending_enable = rc_texture_blend_enable; m_batch.texture_blending_enable = rc_texture_blend_enable;
m_batch.primitive = rc_primitive;
LoadVertices(rc, num_vertices); LoadVertices(rc, num_vertices);
} }

View File

@ -39,6 +39,11 @@ protected:
bool transparency_enable; bool transparency_enable;
bool texture_enable; bool texture_enable;
bool texture_blending_enable; bool texture_blending_enable;
TextureColorMode texture_color_mode;
u32 texture_page_x;
u32 texture_page_y;
u32 texture_palette_x;
u32 texture_palette_y;
TransparencyMode transparency_mode; TransparencyMode transparency_mode;
std::vector<HWVertex> vertices; std::vector<HWVertex> vertices;
@ -46,6 +51,10 @@ protected:
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024; static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex); static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
static constexpr u32 TEXTURE_TILE_SIZE = 256;
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
static constexpr u32 TEXTURE_TILE_Y_COUNT = VRAM_HEIGHT / TEXTURE_TILE_SIZE;
static constexpr u32 TEXTURE_TILE_COUNT = TEXTURE_TILE_X_COUNT * TEXTURE_TILE_Y_COUNT;
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba) static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
{ {
@ -55,7 +64,7 @@ protected:
static_cast<float>(rgba >> 24) * (1.0f / 255.0f)); static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
} }
virtual void UpdateTexturePageTexture(); virtual void InvalidateVRAMReadCache();
bool IsFlushed() const { return m_batch.vertices.empty(); } bool IsFlushed() const { return m_batch.vertices.empty(); }
@ -64,9 +73,8 @@ protected:
void CalcScissorRect(int* left, int* top, int* right, int* bottom); void CalcScissorRect(int* left, int* top, int* right, int* bottom);
std::string GenerateVertexShader(bool textured); std::string GenerateVertexShader(bool textured);
std::string GenerateFragmentShader(bool textured, bool blending); std::string GenerateFragmentShader(bool textured, bool blending, TextureColorMode texture_color_mode);
std::string GenerateScreenQuadVertexShader(); std::string GenerateScreenQuadVertexShader();
std::string GenerateTexturePageFragmentShader(TextureColorMode mode);
std::string GenerateFillFragmentShader(); std::string GenerateFillFragmentShader();
HWRenderBatch m_batch = {}; HWRenderBatch m_batch = {};

View File

@ -43,7 +43,7 @@ void GPU_HW_OpenGL::RenderUI()
ImGui::TextUnformatted("Texture Page Updates:"); ImGui::TextUnformatted("Texture Page Updates:");
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::Text("%u", m_stats.num_texture_page_updates); ImGui::Text("%u", m_stats.num_vram_read_texture_updates);
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::TextUnformatted("Batches Drawn:"); ImGui::TextUnformatted("Batches Drawn:");
@ -64,6 +64,11 @@ void GPU_HW_OpenGL::RenderUI()
m_stats = {}; m_stats = {};
} }
void GPU_HW_OpenGL::InvalidateVRAMReadCache()
{
m_vram_read_texture_dirty = true;
}
std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y) std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
{ {
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y)); return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
@ -79,11 +84,11 @@ void GPU_HW_OpenGL::CreateFramebuffer()
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0);
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
m_texture_page_texture = m_vram_read_texture =
std::make_unique<GL::Texture>(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false); std::make_unique<GL::Texture>(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false);
glGenFramebuffers(1, &m_texture_page_fbo_id); glGenFramebuffers(1, &m_vram_read_fbo_id);
glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id); glBindFramebuffer(GL_FRAMEBUFFER, m_vram_read_fbo_id);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_page_texture->GetGLId(), 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_vram_read_texture->GetGLId(), 0);
Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
} }
@ -100,9 +105,9 @@ void GPU_HW_OpenGL::ClearFramebuffer()
void GPU_HW_OpenGL::DestroyFramebuffer() void GPU_HW_OpenGL::DestroyFramebuffer()
{ {
glDeleteFramebuffers(1, &m_texture_page_fbo_id); glDeleteFramebuffers(1, &m_vram_read_fbo_id);
m_texture_page_fbo_id = 0; m_vram_read_fbo_id = 0;
m_texture_page_texture.reset(); m_vram_read_texture.reset();
glDeleteFramebuffers(1, &m_framebuffer_fbo_id); glDeleteFramebuffers(1, &m_framebuffer_fbo_id);
m_framebuffer_fbo_id = 0; m_framebuffer_fbo_id = 0;
@ -132,41 +137,29 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
bool GPU_HW_OpenGL::CompilePrograms() bool GPU_HW_OpenGL::CompilePrograms()
{ {
bool result = true; for (u32 textured = 0; textured < 2; textured++)
result &= CompileProgram(m_color_program, false, false);
result &= CompileProgram(m_texture_program, true, false);
result &= CompileProgram(m_blended_texture_program, true, true);
if (!result)
return false;
const std::string screen_quad_vs = GenerateScreenQuadVertexShader();
for (u32 palette_size = 0; palette_size < static_cast<u32>(m_texture_page_programs.size()); palette_size++)
{ {
const std::string fs = GenerateTexturePageFragmentShader(static_cast<TextureColorMode>(palette_size)); for (u32 blending = 0; blending < 2; blending++)
{
GL::Program& prog = m_texture_page_programs[palette_size]; for (u32 format = 0; format < 3; format++)
if (!prog.Compile(screen_quad_vs.c_str(), fs.c_str())) {
return false; // TODO: eliminate duplicate shaders here
if (!CompileProgram(m_render_programs[textured][blending][format], ConvertToBoolUnchecked(textured),
prog.BindFragData(0, "o_col0"); ConvertToBoolUnchecked(blending), static_cast<TextureColorMode>(format)))
{
if (!prog.Link()) return false;
return false; }
}
prog.RegisterUniform("samp0"); }
prog.RegisterUniform("base_offset");
prog.RegisterUniform("palette_offset");
prog.Bind();
prog.Uniform1i(0, 0);
} }
return true; return true;
} }
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending) bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode)
{ {
const std::string vs = GenerateVertexShader(textured); const std::string vs = GenerateVertexShader(textured);
const std::string fs = GenerateFragmentShader(textured, blending); const std::string fs = GenerateFragmentShader(textured, blending, texture_color_mode);
if (!prog.Compile(vs.c_str(), fs.c_str())) if (!prog.Compile(vs.c_str(), fs.c_str()))
return false; return false;
@ -187,21 +180,29 @@ bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blendi
if (textured) if (textured)
{ {
prog.RegisterUniform("samp0"); prog.RegisterUniform("samp0");
prog.RegisterUniform("u_texture_page_base");
prog.RegisterUniform("u_texture_palette_base");
prog.Uniform1i(1, 0); prog.Uniform1i(1, 0);
} }
return true; return true;
} }
void GPU_HW_OpenGL::SetProgram(bool textured, bool blending) void GPU_HW_OpenGL::SetProgram()
{ {
const GL::Program& prog = textured ? (blending ? m_blended_texture_program : m_texture_program) : m_color_program; const GL::Program& prog =
m_render_programs[BoolToUInt32(m_batch.texture_enable)][BoolToUInt32(m_batch.texture_blending_enable)]
[static_cast<u32>(m_batch.texture_color_mode)];
prog.Bind(); prog.Bind();
if (textured)
m_texture_page_texture->Bind();
prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y); prog.Uniform2i(0, m_drawing_offset.x, m_drawing_offset.y);
if (m_batch.texture_enable)
{
m_vram_read_texture->Bind();
prog.Uniform2i(2, m_batch.texture_page_x, m_batch.texture_page_y);
prog.Uniform2i(3, m_batch.texture_palette_x, m_batch.texture_palette_y);
}
} }
void GPU_HW_OpenGL::SetViewport() void GPU_HW_OpenGL::SetViewport()
@ -302,6 +303,8 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color)
const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color)); const auto [r, g, b, a] = RGBA8ToFloat(RGBA5551ToRGBA8888(color));
glClearColor(r, g, b, a); glClearColor(r, g, b, a);
glClear(GL_COLOR_BUFFER_BIT); glClear(GL_COLOR_BUFFER_BIT);
InvalidateVRAMReadCache();
} }
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
@ -334,6 +337,8 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
// lower-left origin flip happens here // lower-left origin flip happens here
glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE, glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE,
rgba_data.data()); rgba_data.data());
InvalidateVRAMReadCache();
} }
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
@ -347,31 +352,18 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id); glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height, glBlitFramebuffer(src_x, src_y, src_x + width, src_y + height, dst_x, dst_y, dst_x + width, dst_y + height,
GL_COLOR_BUFFER_BIT, GL_NEAREST); GL_COLOR_BUFFER_BIT, GL_NEAREST);
InvalidateVRAMReadCache();
} }
void GPU_HW_OpenGL::UpdateTexturePageTexture() void GPU_HW_OpenGL::UpdateVRAMReadTexture()
{ {
m_stats.num_texture_page_updates++; m_stats.num_vram_read_texture_updates++;
m_vram_read_texture_dirty = false;
glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id); // TODO: Fallback blit path, and partial updates.
m_framebuffer_texture->Bind(); glCopyImageSubData(m_framebuffer_texture->GetGLId(), GL_TEXTURE_2D, 0, 0, 0, 0, m_vram_read_texture->GetGLId(),
GL_TEXTURE_2D, 0, 0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 1);
glDisable(GL_BLEND);
glDisable(GL_SCISSOR_TEST);
glViewport(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT);
glBindVertexArray(m_attributeless_vao_id);
const GL::Program& prog = m_texture_page_programs[static_cast<u8>(m_render_state.texture_color_mode)];
prog.Bind();
prog.Uniform2i(1, m_render_state.texture_base_x, m_render_state.texture_base_y);
if (m_render_state.texture_color_mode >= GPU::TextureColorMode::Palette4Bit)
prog.Uniform2i(2, m_render_state.texture_palette_x, m_render_state.texture_palette_y);
glDrawArrays(GL_TRIANGLES, 0, 3);
m_framebuffer_texture->Unbind();
glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id);
} }
void GPU_HW_OpenGL::FlushRender() void GPU_HW_OpenGL::FlushRender()
@ -379,6 +371,9 @@ void GPU_HW_OpenGL::FlushRender()
if (m_batch.vertices.empty()) if (m_batch.vertices.empty())
return; return;
if (m_vram_read_texture_dirty)
UpdateVRAMReadTexture();
m_stats.num_batches++; m_stats.num_batches++;
m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size()); m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size());
@ -386,7 +381,7 @@ void GPU_HW_OpenGL::FlushRender()
glDisable(GL_DEPTH_TEST); glDisable(GL_DEPTH_TEST);
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
glDepthMask(GL_FALSE); glDepthMask(GL_FALSE);
SetProgram(m_batch.texture_enable, m_batch.texture_blending_enable); SetProgram();
SetViewport(); SetViewport();
SetScissor(); SetScissor();
SetBlendState(); SetBlendState();

View File

@ -23,13 +23,13 @@ protected:
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u16 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void UpdateTexturePageTexture() override;
void FlushRender() override; void FlushRender() override;
void InvalidateVRAMReadCache() override;
private: private:
struct GLStats struct GLStats
{ {
u32 num_texture_page_updates; u32 num_vram_read_texture_updates;
u32 num_batches; u32 num_batches;
u32 num_vertices; u32 num_vertices;
}; };
@ -39,13 +39,14 @@ private:
void CreateFramebuffer(); void CreateFramebuffer();
void ClearFramebuffer(); void ClearFramebuffer();
void DestroyFramebuffer(); void DestroyFramebuffer();
void UpdateVRAMReadTexture();
void CreateVertexBuffer(); void CreateVertexBuffer();
bool CompilePrograms(); bool CompilePrograms();
bool CompileProgram(GL::Program& prog, bool textured, bool blending); bool CompileProgram(GL::Program& prog, bool textured, bool blending, TextureColorMode texture_color_mode);
void SetProgram(bool textured, bool blending); void SetProgram();
void SetViewport(); void SetViewport();
void SetScissor(); void SetScissor();
void SetBlendState(); void SetBlendState();
@ -53,16 +54,15 @@ private:
std::unique_ptr<GL::Texture> m_framebuffer_texture; std::unique_ptr<GL::Texture> m_framebuffer_texture;
GLuint m_framebuffer_fbo_id = 0; GLuint m_framebuffer_fbo_id = 0;
std::unique_ptr<GL::Texture> m_texture_page_texture; std::unique_ptr<GL::Texture> m_vram_read_texture;
GLuint m_texture_page_fbo_id = 0; GLuint m_vram_read_fbo_id = 0;
bool m_vram_read_texture_dirty = true;
GLuint m_vertex_buffer = 0; GLuint m_vertex_buffer = 0;
GLuint m_vao_id = 0; GLuint m_vao_id = 0;
GLuint m_attributeless_vao_id = 0; GLuint m_attributeless_vao_id = 0;
GL::Program m_texture_program; std::array<std::array<std::array<GL::Program, 3>, 2>, 2> m_render_programs;
GL::Program m_color_program;
GL::Program m_blended_texture_program;
std::array<GL::Program, 3> m_texture_page_programs; std::array<GL::Program, 3> m_texture_page_programs;
GLStats m_stats = {}; GLStats m_stats = {};