From 20401787a44a463c7d5e205f2aab36566e95cce8 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 27 Oct 2020 22:57:40 +1000 Subject: [PATCH] GPU/Software: Eliminate branches in ShadePixel --- src/core/gpu_sw_backend.cpp | 185 +++++++++++++++++++++++++----------- src/core/gpu_sw_backend.h | 15 ++- 2 files changed, 138 insertions(+), 62 deletions(-) diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index fc4d2d2f6..00fababf6 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -31,9 +31,13 @@ void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) { const GPURenderCommand rc{cmd->rc.bits}; const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + const GPUTextureMode texture_mode = + rc.texture_enable ? (cmd->draw_mode.texture_mode | + (rc.raw_texture_enable ? GPUTextureMode::RawTextureBit : static_cast(0))) : + GPUTextureMode::Disabled; - const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + const DrawTriangleFunction DrawFunction = + GetDrawTriangleFunction(rc.shading_enable, texture_mode, rc.transparency_enable, dithering_enable); (this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); if (rc.quad_polygon) @@ -44,9 +48,12 @@ void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) { const GPURenderCommand rc{cmd->rc.bits}; const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; + const GPUTextureMode texture_mode = + rc.texture_enable ? (cmd->draw_mode.texture_mode | + (rc.raw_texture_enable ? GPUTextureMode::RawTextureBit : static_cast(0))) : + GPUTextureMode::Disabled; - const DrawRectangleFunction DrawFunction = - GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); + const DrawRectangleFunction DrawFunction = GetDrawRectangleFunction(texture_mode, rc.transparency_enable); (this->*DrawFunction)(cmd); } @@ -79,13 +86,13 @@ constexpr GPU_SW_Backend::DitherLUT GPU_SW_Backend::ComputeDitherLUT() static constexpr GPU_SW_Backend::DitherLUT s_dither_lut = GPU_SW_Backend::ComputeDitherLUT(); -template +template void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) { VRAMPixel color; bool transparent; - if constexpr (texture_enable) + if constexpr (texture_mode != GPUTextureMode::Disabled) { // Apply texture window // TODO: Precompute the second half @@ -96,6 +103,7 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman switch (cmd->draw_mode.texture_mode) { case GPUTextureMode::Palette4Bit: + case GPUTextureMode::RawPalette4Bit: { const u16 palette_value = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, @@ -110,6 +118,7 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman break; case GPUTextureMode::Palette8Bit: + case GPUTextureMode::RawPalette8Bit: { const u16 palette_value = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, @@ -133,7 +142,7 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman transparent = texture_color.c; - if constexpr (raw_texture_enable) + if constexpr (texture_mode >= GPUTextureMode::RawPalette4Bit) { color.bits = texture_color.bits; } @@ -212,7 +221,7 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawComman SetPixel(static_cast(x), static_cast(y), color.bits | cmd->params.GetMaskOR()); } -template +template void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) { const s32 origin_x = cmd->x; @@ -239,8 +248,8 @@ void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); - ShadePixel( - cmd, static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); + ShadePixel(cmd, static_cast(x), static_cast(y), r, g, b, + texcoord_x, texcoord_y); } } } @@ -351,8 +360,7 @@ void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DY(i_group& ig, const i_de } } -template +template void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl) { @@ -377,6 +385,7 @@ void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s3 if (w <= 0) return; + constexpr bool texture_enable = (texture_mode != GPUTextureMode::Disabled); AddIDeltas_DX(ig, idl, x_ig_adjust); AddIDeltas_DY(ig, idl, y); @@ -388,17 +397,16 @@ void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s3 const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING); const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING); - ShadePixel( - cmd, static_cast(x), static_cast(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), - Truncate8(v)); + ShadePixel(cmd, static_cast(x), static_cast(y), + Truncate8(r), Truncate8(g), Truncate8(b), + Truncate8(u), Truncate8(v)); x++; AddIDeltas_DX(ig, idl); } while (--w > 0); } -template +template void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, @@ -475,6 +483,7 @@ void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y)); i_deltas idl; + constexpr bool texture_enable = (texture_mode != GPUTextureMode::Disabled); if (!CalcIDeltas(idl, v0, v1, v2)) return; @@ -563,8 +572,8 @@ void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, if (y > static_cast(m_drawing_area.bottom)) continue; - DrawSpan( - cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + DrawSpan(cmd, yi, GetPolyXFP_Int(lc), + GetPolyXFP_Int(rc), ig, idl); } } else @@ -579,8 +588,8 @@ void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, if (y >= static_cast(m_drawing_area.top)) { - DrawSpan( - cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl); + DrawSpan(cmd, yi, GetPolyXFP_Int(lc), + GetPolyXFP_Int(rc), ig, idl); } yi++; @@ -591,36 +600,99 @@ void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, } } -GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, bool texture_enable, - bool raw_texture_enable, +GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, + GPUTextureMode texture_mode, bool transparency_enable, bool dithering_enable) { -#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \ - &GPU_SW_Backend::DrawTriangle +#define F(SHADING, TEXTURE, TRANSPARENCY, DITHERING) \ + &GPU_SW_Backend::DrawTriangle - static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = { - {{{{F(false, false, false, false, false), F(false, false, false, false, true)}, - {F(false, false, false, true, false), F(false, false, false, true, true)}}, - {{F(false, false, true, false, false), F(false, false, true, false, true)}, - {F(false, false, true, true, false), F(false, false, true, true, true)}}}, - {{{F(false, true, false, false, false), F(false, true, false, false, true)}, - {F(false, true, false, true, false), F(false, true, false, true, true)}}, - {{F(false, true, true, false, false), F(false, true, true, false, true)}, - {F(false, true, true, true, false), F(false, true, true, true, true)}}}}, - {{{{F(true, false, false, false, false), F(true, false, false, false, true)}, - {F(true, false, false, true, false), F(true, false, false, true, true)}}, - {{F(true, false, true, false, false), F(true, false, true, false, true)}, - {F(true, false, true, true, false), F(true, false, true, true, true)}}}, - {{{F(true, true, false, false, false), F(true, true, false, false, true)}, - {F(true, true, false, true, false), F(true, true, false, true, true)}}, - {{F(true, true, true, false, false), F(true, true, true, false, true)}, - {F(true, true, true, true, false), F(true, true, true, true, true)}}}}}; + static constexpr DrawTriangleFunction funcs[2][9][2][2] = { + { + { + {F(false, GPUTextureMode::Palette4Bit, false, false), F(false, GPUTextureMode::Palette4Bit, false, true)}, + {F(false, GPUTextureMode::Palette4Bit, true, false), F(false, GPUTextureMode::Palette4Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::Palette8Bit, false, false), F(false, GPUTextureMode::Palette8Bit, false, true)}, + {F(false, GPUTextureMode::Palette8Bit, true, false), F(false, GPUTextureMode::Palette8Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::Direct16Bit, false, false), F(false, GPUTextureMode::Direct16Bit, false, true)}, + {F(false, GPUTextureMode::Direct16Bit, true, false), F(false, GPUTextureMode::Direct16Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::Reserved_Direct16Bit, false, false), + F(false, GPUTextureMode::Reserved_Direct16Bit, false, true)}, + {F(false, GPUTextureMode::Reserved_Direct16Bit, true, false), + F(false, GPUTextureMode::Reserved_Direct16Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::RawPalette4Bit, false, false), F(false, GPUTextureMode::RawPalette4Bit, false, true)}, + {F(false, GPUTextureMode::RawPalette4Bit, true, false), F(false, GPUTextureMode::RawPalette4Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::RawPalette8Bit, false, false), F(false, GPUTextureMode::RawPalette8Bit, false, true)}, + {F(false, GPUTextureMode::RawPalette8Bit, true, false), F(false, GPUTextureMode::RawPalette8Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::RawDirect16Bit, false, false), F(false, GPUTextureMode::RawDirect16Bit, false, true)}, + {F(false, GPUTextureMode::RawDirect16Bit, true, false), F(false, GPUTextureMode::RawDirect16Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::Reserved_RawDirect16Bit, false, false), + F(false, GPUTextureMode::Reserved_RawDirect16Bit, false, true)}, + {F(false, GPUTextureMode::Reserved_RawDirect16Bit, true, false), + F(false, GPUTextureMode::Reserved_RawDirect16Bit, true, true)}, + }, + { + {F(false, GPUTextureMode::Disabled, false, false), F(false, GPUTextureMode::Disabled, false, true)}, + {F(false, GPUTextureMode::Disabled, true, false), F(false, GPUTextureMode::Disabled, true, true)}, + }, + }, + {{ + {F(true, GPUTextureMode::Palette4Bit, false, false), F(true, GPUTextureMode::Palette4Bit, false, true)}, + {F(true, GPUTextureMode::Palette4Bit, true, false), F(true, GPUTextureMode::Palette4Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::Palette8Bit, false, false), F(true, GPUTextureMode::Palette8Bit, false, true)}, + {F(true, GPUTextureMode::Palette8Bit, true, false), F(true, GPUTextureMode::Palette8Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::Direct16Bit, false, false), F(true, GPUTextureMode::Direct16Bit, false, true)}, + {F(true, GPUTextureMode::Direct16Bit, true, false), F(true, GPUTextureMode::Direct16Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::Reserved_Direct16Bit, false, false), + F(true, GPUTextureMode::Reserved_Direct16Bit, false, true)}, + {F(true, GPUTextureMode::Reserved_Direct16Bit, true, false), + F(true, GPUTextureMode::Reserved_Direct16Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::RawPalette4Bit, false, false), F(true, GPUTextureMode::RawPalette4Bit, false, true)}, + {F(true, GPUTextureMode::RawPalette4Bit, true, false), F(true, GPUTextureMode::RawPalette4Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::RawPalette8Bit, false, false), F(true, GPUTextureMode::RawPalette8Bit, false, true)}, + {F(true, GPUTextureMode::RawPalette8Bit, true, false), F(true, GPUTextureMode::RawPalette8Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::RawDirect16Bit, false, false), F(true, GPUTextureMode::RawDirect16Bit, false, true)}, + {F(true, GPUTextureMode::RawDirect16Bit, true, false), F(true, GPUTextureMode::RawDirect16Bit, true, true)}, + }, + { + {F(true, GPUTextureMode::Reserved_RawDirect16Bit, false, false), + F(true, GPUTextureMode::Reserved_RawDirect16Bit, false, true)}, + {F(true, GPUTextureMode::Reserved_RawDirect16Bit, true, false), + F(true, GPUTextureMode::Reserved_RawDirect16Bit, true, true)}, + }, + {{F(true, GPUTextureMode::Disabled, false, false), F(true, GPUTextureMode::Disabled, false, true)}, + {F(true, GPUTextureMode::Disabled, true, false), F(true, GPUTextureMode::Disabled, true, true)}}}}; #undef F - return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)] - [u8(dithering_enable)]; + return funcs[u8(shading_enable)][u8(texture_mode)][u8(transparency_enable)][u8(dithering_enable)]; } enum @@ -725,8 +797,8 @@ void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBac const u8 g = shading_enable ? static_cast(cur_point.g >> Line_RGB_FractBits) : p0->g; const u8 b = shading_enable ? static_cast(cur_point.b >> Line_RGB_FractBits) : p0->b; - ShadePixel(cmd, static_cast(x), static_cast(y), r, - g, b, 0, 0); + ShadePixel(cmd, static_cast(x), + static_cast(y), r, g, b, 0, 0); } cur_point.x += step.dx_dk; @@ -755,18 +827,25 @@ GPU_SW_Backend::DrawLineFunction GPU_SW_Backend::GetDrawLineFunction(bool shadin return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)]; } -GPU_SW_Backend::DrawRectangleFunction -GPU_SW_Backend::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable) +GPU_SW_Backend::DrawRectangleFunction GPU_SW_Backend::GetDrawRectangleFunction(GPUTextureMode texture_mode, + bool transparency_enable) { -#define F(TEXTURE, RAW_TEXTURE, TRANSPARENCY) &GPU_SW_Backend::DrawRectangle +#define F(TEXTURE, TRANSPARENCY) &GPU_SW_Backend::DrawRectangle - static constexpr DrawRectangleFunction funcs[2][2][2] = { - {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}}, - {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}}; + static constexpr DrawRectangleFunction funcs[9][2] = { + {F(GPUTextureMode::Palette4Bit, false), F(GPUTextureMode::Palette4Bit, true)}, + {F(GPUTextureMode::Palette8Bit, false), F(GPUTextureMode::Palette8Bit, true)}, + {F(GPUTextureMode::Direct16Bit, false), F(GPUTextureMode::Direct16Bit, true)}, + {F(GPUTextureMode::Reserved_Direct16Bit, false), F(GPUTextureMode::Reserved_Direct16Bit, true)}, + {F(GPUTextureMode::RawPalette4Bit, false), F(GPUTextureMode::RawPalette4Bit, true)}, + {F(GPUTextureMode::RawPalette8Bit, false), F(GPUTextureMode::RawPalette8Bit, true)}, + {F(GPUTextureMode::RawDirect16Bit, false), F(GPUTextureMode::RawDirect16Bit, true)}, + {F(GPUTextureMode::Reserved_RawDirect16Bit, false), F(GPUTextureMode::Reserved_RawDirect16Bit, true)}, + {F(GPUTextureMode::Disabled, false), F(GPUTextureMode::Disabled, true)}}; #undef F - return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; + return funcs[u8(texture_mode)][u8(transparency_enable)]; } void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h index a7105ecc2..19e52a3c0 100644 --- a/src/core/gpu_sw_backend.h +++ b/src/core/gpu_sw_backend.h @@ -105,16 +105,15 @@ protected: ////////////////////////////////////////////////////////////////////////// // Rasterization ////////////////////////////////////////////////////////////////////////// - template + template void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y); - template + template void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd); using DrawRectangleFunction = void (GPU_SW_Backend::*)(const GPUBackendDrawRectangleCommand* cmd); - DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, - bool transparency_enable); + DrawRectangleFunction GetDrawRectangleFunction(GPUTextureMode texture_mode, bool transparency_enable); ////////////////////////////////////////////////////////////////////////// // Polygon and line rasterization ported from Mednafen @@ -144,13 +143,11 @@ protected: template void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1); - template + template void DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl); - template + template void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); @@ -158,7 +155,7 @@ protected: const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); - DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, + DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, GPUTextureMode texture_mode, bool transparency_enable, bool dithering_enable); template