GPU/HW: Split shadergen to seperate class
This commit is contained in:
parent
91c99f0226
commit
be81d08109
|
@ -21,6 +21,8 @@ add_library(core
|
|||
gpu_hw.h
|
||||
gpu_hw_opengl.cpp
|
||||
gpu_hw_opengl.h
|
||||
gpu_hw_shadergen.cpp
|
||||
gpu_hw_shadergen.h
|
||||
gpu_sw.cpp
|
||||
gpu_sw.h
|
||||
gte.cpp
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
<ClCompile Include="cpu_disasm.cpp" />
|
||||
<ClCompile Include="digital_controller.cpp" />
|
||||
<ClCompile Include="gpu_commands.cpp" />
|
||||
<ClCompile Include="gpu_hw_shadergen.cpp" />
|
||||
<ClCompile Include="gpu_sw.cpp" />
|
||||
<ClCompile Include="gte.cpp" />
|
||||
<ClCompile Include="dma.cpp" />
|
||||
|
@ -64,6 +65,7 @@
|
|||
<ClInclude Include="cpu_core.h" />
|
||||
<ClInclude Include="cpu_disasm.h" />
|
||||
<ClInclude Include="digital_controller.h" />
|
||||
<ClInclude Include="gpu_hw_shadergen.h" />
|
||||
<ClInclude Include="gpu_sw.h" />
|
||||
<ClInclude Include="gte.h" />
|
||||
<ClInclude Include="cpu_types.h" />
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
<ClCompile Include="settings.cpp" />
|
||||
<ClCompile Include="gpu_commands.cpp" />
|
||||
<ClCompile Include="gpu_sw.cpp" />
|
||||
<ClCompile Include="gpu_hw_shadergen.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="types.h" />
|
||||
|
@ -50,6 +51,7 @@
|
|||
<ClInclude Include="memory_card.h" />
|
||||
<ClInclude Include="settings.h" />
|
||||
<ClInclude Include="gpu_sw.h" />
|
||||
<ClInclude Include="gpu_hw_shadergen.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="cpu_core.inl" />
|
||||
|
|
117
src/core/gpu.h
117
src/core/gpu.h
|
@ -18,6 +18,57 @@ class Timers;
|
|||
class GPU
|
||||
{
|
||||
public:
|
||||
enum class DMADirection : u32
|
||||
{
|
||||
Off = 0,
|
||||
FIFO = 1,
|
||||
CPUtoGP0 = 2,
|
||||
GPUREADtoCPU = 3
|
||||
};
|
||||
|
||||
enum class Primitive : u8
|
||||
{
|
||||
Reserved = 0,
|
||||
Polygon = 1,
|
||||
Line = 2,
|
||||
Rectangle = 3
|
||||
};
|
||||
|
||||
enum class DrawRectangleSize : u8
|
||||
{
|
||||
Variable = 0,
|
||||
R1x1 = 1,
|
||||
R8x8 = 2,
|
||||
R16x16 = 3
|
||||
};
|
||||
|
||||
enum class TextureMode : u8
|
||||
{
|
||||
Palette4Bit = 0,
|
||||
Palette8Bit = 1,
|
||||
Direct16Bit = 2,
|
||||
Reserved_Direct16Bit = 3,
|
||||
|
||||
// Not register values.
|
||||
RawTextureBit = 4,
|
||||
RawPalette4Bit = RawTextureBit | Palette4Bit,
|
||||
RawPalette8Bit = RawTextureBit | Palette8Bit,
|
||||
RawDirect16Bit = RawTextureBit | Direct16Bit,
|
||||
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
|
||||
|
||||
Disabled = 8 // Not a register value
|
||||
};
|
||||
|
||||
enum class TransparencyMode : u8
|
||||
{
|
||||
HalfBackgroundPlusHalfForeground = 0,
|
||||
BackgroundPlusForeground = 1,
|
||||
BackgroundMinusForeground = 2,
|
||||
BackgroundPlusQuarterForeground = 3,
|
||||
|
||||
Disabled = 4 // Not a register value
|
||||
};
|
||||
|
||||
enum : u32
|
||||
{
|
||||
VRAM_WIDTH = 1024,
|
||||
|
@ -29,6 +80,13 @@ public:
|
|||
HBLANK_TIMER_INDEX = 1
|
||||
};
|
||||
|
||||
// 4x4 dither matrix.
|
||||
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
|
||||
{+2, -2, +3, -1}, // row 1
|
||||
{-3, +1, -4, +0}, // row 2
|
||||
{+4, -1, +2, -2}}; // row 3
|
||||
|
||||
// Base class constructor.
|
||||
GPU();
|
||||
virtual ~GPU();
|
||||
|
||||
|
@ -112,57 +170,6 @@ protected:
|
|||
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
|
||||
bool remove_alpha);
|
||||
|
||||
enum class DMADirection : u32
|
||||
{
|
||||
Off = 0,
|
||||
FIFO = 1,
|
||||
CPUtoGP0 = 2,
|
||||
GPUREADtoCPU = 3
|
||||
};
|
||||
|
||||
enum class Primitive : u8
|
||||
{
|
||||
Reserved = 0,
|
||||
Polygon = 1,
|
||||
Line = 2,
|
||||
Rectangle = 3
|
||||
};
|
||||
|
||||
enum class DrawRectangleSize : u8
|
||||
{
|
||||
Variable = 0,
|
||||
R1x1 = 1,
|
||||
R8x8 = 2,
|
||||
R16x16 = 3
|
||||
};
|
||||
|
||||
enum class TextureMode : u8
|
||||
{
|
||||
Palette4Bit = 0,
|
||||
Palette8Bit = 1,
|
||||
Direct16Bit = 2,
|
||||
Reserved_Direct16Bit = 3,
|
||||
|
||||
// Not register values.
|
||||
RawTextureBit = 4,
|
||||
RawPalette4Bit = RawTextureBit | Palette4Bit,
|
||||
RawPalette8Bit = RawTextureBit | Palette8Bit,
|
||||
RawDirect16Bit = RawTextureBit | Direct16Bit,
|
||||
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
|
||||
|
||||
Disabled = 8 // Not a register value
|
||||
};
|
||||
|
||||
enum class TransparencyMode : u8
|
||||
{
|
||||
HalfBackgroundPlusHalfForeground = 0,
|
||||
BackgroundPlusForeground = 1,
|
||||
BackgroundMinusForeground = 2,
|
||||
BackgroundPlusQuarterForeground = 3,
|
||||
|
||||
Disabled = 4 // Not a register value
|
||||
};
|
||||
|
||||
union RenderCommand
|
||||
{
|
||||
u32 bits;
|
||||
|
@ -258,12 +265,6 @@ protected:
|
|||
}
|
||||
};
|
||||
|
||||
// 4x4 dither matrix.
|
||||
static constexpr s32 DITHER_MATRIX[4][4] = {{-4, +0, -3, +1}, // row 0
|
||||
{+2, -2, +3, -1}, // row 1
|
||||
{-3, +1, -4, +0}, // row 2
|
||||
{+4, -1, +2, -2}}; // row 3
|
||||
|
||||
void SoftReset();
|
||||
|
||||
// Sets dots per scanline
|
||||
|
@ -464,3 +465,5 @@ private:
|
|||
|
||||
static const GP0CommandHandlerTable s_GP0_command_handler_table;
|
||||
};
|
||||
|
||||
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode);
|
||||
|
|
|
@ -163,7 +163,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
|
|||
|
||||
void GPU_HW::AddDuplicateVertex()
|
||||
{
|
||||
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex));
|
||||
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(BatchVertex));
|
||||
m_batch_current_vertex_ptr++;
|
||||
}
|
||||
|
||||
|
@ -175,443 +175,14 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
|
|||
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
|
||||
}
|
||||
|
||||
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
||||
{
|
||||
if (enabled)
|
||||
ss << "#define " << name << " 1\n";
|
||||
else
|
||||
ss << "/* #define " << name << " 0 */\n";
|
||||
}
|
||||
|
||||
void GPU_HW::GenerateShaderHeader(std::stringstream& ss)
|
||||
{
|
||||
ss << "#version 330 core\n\n";
|
||||
ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
|
||||
ss << "const ivec2 VRAM_SIZE = ivec2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
||||
ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
|
||||
ss << R"(
|
||||
|
||||
float fixYCoord(float y)
|
||||
{
|
||||
return 1.0 - RCP_VRAM_SIZE.y - y;
|
||||
}
|
||||
|
||||
int fixYCoord(int y)
|
||||
{
|
||||
return VRAM_SIZE.y - y - 1;
|
||||
}
|
||||
|
||||
uint RGBA8ToRGBA5551(vec4 v)
|
||||
{
|
||||
uint r = uint(v.r * 255.0) >> 3;
|
||||
uint g = uint(v.g * 255.0) >> 3;
|
||||
uint b = uint(v.b * 255.0) >> 3;
|
||||
uint a = (v.a != 0.0) ? 1u : 0u;
|
||||
return (r) | (g << 5) | (b << 10) | (a << 15);
|
||||
}
|
||||
|
||||
vec4 RGBA5551ToRGBA8(uint v)
|
||||
{
|
||||
uint r = (v & 31u);
|
||||
uint g = ((v >> 5) & 31u);
|
||||
uint b = ((v >> 10) & 31u);
|
||||
uint a = ((v >> 15) & 1u);
|
||||
|
||||
// repeat lower bits
|
||||
r = (r << 3) | (r & 7u);
|
||||
g = (g << 3) | (g & 7u);
|
||||
b = (b << 3) | (b & 7u);
|
||||
|
||||
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
void GPU_HW::GenerateBatchUniformBuffer(std::stringstream& ss)
|
||||
{
|
||||
ss << R"(
|
||||
uniform UBOBlock {
|
||||
ivec2 u_pos_offset;
|
||||
uvec2 u_texture_window_mask;
|
||||
uvec2 u_texture_window_offset;
|
||||
float u_src_alpha_factor;
|
||||
float u_dst_alpha_factor;
|
||||
};
|
||||
)";
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateVertexShader(bool textured)
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "TEXTURED", textured);
|
||||
GenerateBatchUniformBuffer(ss);
|
||||
|
||||
ss << R"(
|
||||
in ivec2 a_pos;
|
||||
in vec4 a_col0;
|
||||
in int a_texcoord;
|
||||
in int a_texpage;
|
||||
|
||||
out vec3 v_col0;
|
||||
#if TEXTURED
|
||||
out vec2 v_tex0;
|
||||
flat out ivec4 v_texpage;
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
// 0..+1023 -> -1..1
|
||||
float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
|
||||
float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
|
||||
gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
|
||||
|
||||
v_col0 = a_col0.rgb;
|
||||
#if TEXTURED
|
||||
v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
|
||||
|
||||
// base_x,base_y,palette_x,palette_y
|
||||
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
|
||||
v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
|
||||
v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
|
||||
v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering)
|
||||
{
|
||||
const TextureMode actual_texture_mode =
|
||||
static_cast<TextureMode>(static_cast<u8>(texture_mode) & ~static_cast<u8>(TextureMode::RawTextureBit));
|
||||
const bool raw_texture = (static_cast<u8>(texture_mode) & static_cast<u8>(TextureMode::RawTextureBit)) ==
|
||||
static_cast<u8>(TextureMode::RawTextureBit);
|
||||
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
GenerateBatchUniformBuffer(ss);
|
||||
DefineMacro(ss, "TRANSPARENCY", transparency != HWBatchRenderMode::TransparencyDisabled);
|
||||
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == HWBatchRenderMode::OnlyOpaque);
|
||||
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == HWBatchRenderMode::OnlyTransparent);
|
||||
DefineMacro(ss, "TEXTURED", actual_texture_mode != TextureMode::Disabled);
|
||||
DefineMacro(ss, "PALETTE",
|
||||
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
|
||||
actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
||||
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
|
||||
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
||||
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
|
||||
DefineMacro(ss, "DITHERING", dithering);
|
||||
DefineMacro(ss, "TRUE_COLOR", m_true_color);
|
||||
|
||||
ss << "const int[16] s_dither_values = int[16]( ";
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
ss << ", ";
|
||||
ss << DITHER_MATRIX[i / 4][i % 4];
|
||||
}
|
||||
ss << " );\n";
|
||||
|
||||
ss << R"(
|
||||
in vec3 v_col0;
|
||||
#if TEXTURED
|
||||
in vec2 v_tex0;
|
||||
flat in ivec4 v_texpage;
|
||||
uniform sampler2D samp0;
|
||||
#endif
|
||||
|
||||
out vec4 o_col0;
|
||||
|
||||
ivec3 ApplyDithering(ivec3 icol)
|
||||
{
|
||||
ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
|
||||
int offset = s_dither_values[fc.y * 4 + fc.x];
|
||||
return icol + ivec3(offset, offset, offset);
|
||||
}
|
||||
|
||||
ivec3 TruncateTo15Bit(ivec3 icol)
|
||||
{
|
||||
icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
|
||||
return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
|
||||
}
|
||||
|
||||
#if TEXTURED
|
||||
ivec2 ApplyNativeTextureWindow(ivec2 coords)
|
||||
{
|
||||
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
|
||||
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
|
||||
return ivec2(int(x), int(y));
|
||||
}
|
||||
|
||||
ivec2 ApplyTextureWindow(ivec2 coords)
|
||||
{
|
||||
if (RESOLUTION_SCALE == 1)
|
||||
return ApplyNativeTextureWindow(coords);
|
||||
|
||||
ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
|
||||
ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
|
||||
return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
|
||||
}
|
||||
|
||||
ivec4 SampleFromVRAM(vec2 coord)
|
||||
{
|
||||
// from 0..1 to 0..255
|
||||
ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
|
||||
icoord = ApplyTextureWindow(icoord);
|
||||
|
||||
// adjust for tightly packed palette formats
|
||||
ivec2 index_coord = icoord;
|
||||
#if PALETTE_4_BIT
|
||||
index_coord.x /= 4;
|
||||
#elif PALETTE_8_BIT
|
||||
index_coord.x /= 2;
|
||||
#endif
|
||||
|
||||
// fixup coords
|
||||
ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
|
||||
|
||||
// load colour/palette
|
||||
vec4 color = texelFetch(samp0, vicoord, 0);
|
||||
|
||||
// apply palette
|
||||
#if PALETTE
|
||||
#if PALETTE_4_BIT
|
||||
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
||||
#elif PALETTE_8_BIT
|
||||
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
||||
#endif
|
||||
ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
|
||||
color = texelFetch(samp0, palette_icoord, 0);
|
||||
#endif
|
||||
|
||||
return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
|
||||
}
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
|
||||
|
||||
bool semitransparent;
|
||||
bool new_mask_bit;
|
||||
ivec3 icolor;
|
||||
|
||||
#if TEXTURED
|
||||
ivec4 texcol = SampleFromVRAM(v_tex0);
|
||||
if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
|
||||
discard;
|
||||
|
||||
// Grab semitransparent bit from the texture color.
|
||||
semitransparent = (texcol.a != 0);
|
||||
|
||||
#if RAW_TEXTURE
|
||||
icolor = texcol.rgb;
|
||||
#else
|
||||
icolor = (vertcol * texcol.rgb) >> 7;
|
||||
#endif
|
||||
#else
|
||||
// All pixels are semitransparent for untextured polygons.
|
||||
semitransparent = true;
|
||||
icolor = vertcol;
|
||||
#endif
|
||||
|
||||
// Apply dithering
|
||||
#if DITHERING
|
||||
icolor = ApplyDithering(icolor);
|
||||
#endif
|
||||
|
||||
// Clip to 15-bit range
|
||||
#if !TRUE_COLOR
|
||||
icolor = TruncateTo15Bit(icolor);
|
||||
#endif
|
||||
|
||||
// Normalize
|
||||
vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
|
||||
|
||||
#if TRANSPARENCY
|
||||
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
|
||||
if (semitransparent)
|
||||
{
|
||||
#if TRANSPARENCY_ONLY_OPAQUE
|
||||
discard;
|
||||
#endif
|
||||
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if TRANSPARENCY_ONLY_TRANSPARENCY
|
||||
discard;
|
||||
#endif
|
||||
o_col0 = vec4(color, 0.0);
|
||||
}
|
||||
#else
|
||||
o_col0 = vec4(color, 0.0);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateScreenQuadVertexShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
ss << R"(
|
||||
|
||||
out vec2 v_tex0;
|
||||
|
||||
void main()
|
||||
{
|
||||
v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
|
||||
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
|
||||
gl_Position.y = -gl_Position.y;
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateFillFragmentShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
|
||||
ss << R"(
|
||||
uniform vec4 fill_color;
|
||||
out vec4 o_col0;
|
||||
|
||||
void main()
|
||||
{
|
||||
o_col0 = fill_color;
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
|
||||
DefineMacro(ss, "INTERLACED", interlaced);
|
||||
|
||||
ss << R"(
|
||||
in vec2 v_tex0;
|
||||
out vec4 o_col0;
|
||||
|
||||
uniform sampler2D samp0;
|
||||
uniform ivec3 u_base_coords;
|
||||
|
||||
ivec2 GetCoords(vec2 fragcoord)
|
||||
{
|
||||
ivec2 icoords = ivec2(fragcoord);
|
||||
#if INTERLACED
|
||||
if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
|
||||
discard;
|
||||
#endif
|
||||
return icoords;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 icoords = GetCoords(gl_FragCoord.xy);
|
||||
|
||||
#if DEPTH_24BIT
|
||||
// compute offset in dwords from the start of the 24-bit values
|
||||
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
|
||||
int xoff = int(icoords.x);
|
||||
int dword_index = (xoff / 2) + (xoff / 4);
|
||||
|
||||
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
|
||||
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
|
||||
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
|
||||
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
|
||||
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
|
||||
|
||||
// select the bit for this pixel depending on its offset in the 4-pixel block
|
||||
uint r, g, b;
|
||||
int block_offset = xoff & 3;
|
||||
if (block_offset == 0)
|
||||
{
|
||||
r = s0 & 0xFFu;
|
||||
g = s0 >> 8;
|
||||
b = s1 & 0xFFu;
|
||||
}
|
||||
else if (block_offset == 1)
|
||||
{
|
||||
r = s1 >> 8;
|
||||
g = s2 & 0xFFu;
|
||||
b = s2 >> 8;
|
||||
}
|
||||
else if (block_offset == 2)
|
||||
{
|
||||
r = s1 & 0xFFu;
|
||||
g = s1 >> 8;
|
||||
b = s2 & 0xFFu;
|
||||
}
|
||||
else
|
||||
{
|
||||
r = s2 >> 8;
|
||||
g = s3 & 0xFFu;
|
||||
b = s3 >> 8;
|
||||
}
|
||||
|
||||
// and normalize
|
||||
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
|
||||
#else
|
||||
// load and return
|
||||
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW::GenerateVRAMWriteFragmentShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
|
||||
ss << R"(
|
||||
|
||||
uniform ivec2 u_base_coords;
|
||||
uniform ivec2 u_size;
|
||||
uniform usamplerBuffer samp0;
|
||||
|
||||
out vec4 o_col0;
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||
ivec2 offset = coords - u_base_coords;
|
||||
offset.y = u_size.y - offset.y - 1;
|
||||
|
||||
int buffer_offset = offset.y * u_size.x + offset.x;
|
||||
uint value = texelFetch(samp0, buffer_offset).r;
|
||||
|
||||
o_col0 = RGBA5551ToRGBA8(value);
|
||||
})";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||
GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
|
||||
{
|
||||
if (rc.primitive == Primitive::Line)
|
||||
return rc.polyline ? HWPrimitive::LineStrip : HWPrimitive::Lines;
|
||||
return rc.polyline ? BatchPrimitive::LineStrip : BatchPrimitive::Lines;
|
||||
else if ((rc.primitive == Primitive::Polygon && rc.quad_polygon) || rc.primitive == Primitive::Rectangle)
|
||||
return HWPrimitive::TriangleStrip;
|
||||
return BatchPrimitive::TriangleStrip;
|
||||
else
|
||||
return HWPrimitive::Triangles;
|
||||
return BatchPrimitive::Triangles;
|
||||
}
|
||||
|
||||
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
|
||||
|
@ -687,13 +258,13 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
|
|||
// has any state changed which requires a new batch?
|
||||
const TransparencyMode transparency_mode =
|
||||
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
|
||||
const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc);
|
||||
const BatchPrimitive rc_primitive = GetPrimitiveForCommand(rc);
|
||||
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
|
||||
const u32 max_added_vertices = num_vertices + 2;
|
||||
if (!IsFlushed())
|
||||
{
|
||||
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
|
||||
if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
|
||||
if (buffer_overflow || rc_primitive == BatchPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
|
||||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
|
||||
dithering_enable != m_batch.dithering || m_render_state.IsTextureWindowChanged())
|
||||
{
|
||||
|
|
|
@ -8,6 +8,22 @@
|
|||
class GPU_HW : public GPU
|
||||
{
|
||||
public:
|
||||
enum class BatchPrimitive : u8
|
||||
{
|
||||
Lines = 0,
|
||||
LineStrip = 1,
|
||||
Triangles = 2,
|
||||
TriangleStrip = 3
|
||||
};
|
||||
|
||||
enum class BatchRenderMode : u8
|
||||
{
|
||||
TransparencyDisabled,
|
||||
TransparentAndOpaque,
|
||||
OnlyOpaque,
|
||||
OnlyTransparent
|
||||
};
|
||||
|
||||
GPU_HW();
|
||||
virtual ~GPU_HW();
|
||||
|
||||
|
@ -16,23 +32,7 @@ public:
|
|||
virtual void UpdateSettings() override;
|
||||
|
||||
protected:
|
||||
enum class HWPrimitive : u8
|
||||
{
|
||||
Lines = 0,
|
||||
LineStrip = 1,
|
||||
Triangles = 2,
|
||||
TriangleStrip = 3
|
||||
};
|
||||
|
||||
enum class HWBatchRenderMode : u8
|
||||
{
|
||||
TransparencyDisabled,
|
||||
TransparentAndOpaque,
|
||||
OnlyOpaque,
|
||||
OnlyTransparent
|
||||
};
|
||||
|
||||
struct HWVertex
|
||||
struct BatchVertex
|
||||
{
|
||||
s32 x;
|
||||
s32 y;
|
||||
|
@ -55,9 +55,9 @@ protected:
|
|||
}
|
||||
};
|
||||
|
||||
struct HWBatchConfig
|
||||
struct BatchConfig
|
||||
{
|
||||
HWPrimitive primitive;
|
||||
BatchPrimitive primitive;
|
||||
TextureMode texture_mode;
|
||||
TransparencyMode transparency_mode;
|
||||
bool dithering;
|
||||
|
@ -71,14 +71,14 @@ protected:
|
|||
}
|
||||
|
||||
// Returns the render mode for this batch.
|
||||
HWBatchRenderMode GetRenderMode() const
|
||||
BatchRenderMode GetRenderMode() const
|
||||
{
|
||||
return transparency_mode == TransparencyMode::Disabled ? HWBatchRenderMode::TransparencyDisabled :
|
||||
HWBatchRenderMode::TransparentAndOpaque;
|
||||
return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
|
||||
BatchRenderMode::TransparentAndOpaque;
|
||||
}
|
||||
};
|
||||
|
||||
struct HWBatchUBOData
|
||||
struct BatchUBOData
|
||||
{
|
||||
s32 u_pos_offset[2];
|
||||
u32 u_texture_window_mask[2];
|
||||
|
@ -90,7 +90,7 @@ protected:
|
|||
static constexpr u32 VRAM_UPDATE_TEXTURE_BUFFER_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u32);
|
||||
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
|
||||
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
|
||||
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
|
||||
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex);
|
||||
static constexpr u32 UNIFORM_BUFFER_SIZE = 512 * 1024;
|
||||
|
||||
static constexpr std::tuple<float, float, float, float> RGBA8ToFloat(u32 rgba)
|
||||
|
@ -121,31 +121,21 @@ protected:
|
|||
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
|
||||
}
|
||||
|
||||
std::string GenerateVertexShader(bool textured);
|
||||
std::string GenerateFragmentShader(HWBatchRenderMode transparency, TextureMode texture_mode, bool dithering);
|
||||
std::string GenerateScreenQuadVertexShader();
|
||||
std::string GenerateFillFragmentShader();
|
||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
||||
std::string GenerateVRAMWriteFragmentShader();
|
||||
|
||||
HWVertex* m_batch_start_vertex_ptr = nullptr;
|
||||
HWVertex* m_batch_end_vertex_ptr = nullptr;
|
||||
HWVertex* m_batch_current_vertex_ptr = nullptr;
|
||||
BatchVertex* m_batch_start_vertex_ptr = nullptr;
|
||||
BatchVertex* m_batch_end_vertex_ptr = nullptr;
|
||||
BatchVertex* m_batch_current_vertex_ptr = nullptr;
|
||||
u32 m_batch_base_vertex = 0;
|
||||
|
||||
u32 m_resolution_scale = 1;
|
||||
u32 m_max_resolution_scale = 1;
|
||||
bool m_true_color = false;
|
||||
|
||||
HWBatchConfig m_batch = {};
|
||||
HWBatchUBOData m_batch_ubo_data = {};
|
||||
BatchConfig m_batch = {};
|
||||
BatchUBOData m_batch_ubo_data = {};
|
||||
bool m_batch_ubo_dirty = true;
|
||||
|
||||
private:
|
||||
static HWPrimitive GetPrimitiveForCommand(RenderCommand rc);
|
||||
|
||||
void GenerateShaderHeader(std::stringstream& ss);
|
||||
void GenerateBatchUniformBuffer(std::stringstream& ss);
|
||||
static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc);
|
||||
|
||||
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
|
||||
void AddDuplicateVertex();
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "YBaseLib/Assert.h"
|
||||
#include "YBaseLib/Log.h"
|
||||
#include "YBaseLib/String.h"
|
||||
#include "gpu_hw_shadergen.h"
|
||||
#include "host_interface.h"
|
||||
#include "imgui.h"
|
||||
#include "system.h"
|
||||
|
@ -138,9 +139,9 @@ void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
|
|||
Assert(!m_batch_start_vertex_ptr);
|
||||
|
||||
const GL::StreamBuffer::MappingResult res =
|
||||
m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex));
|
||||
m_vertex_stream_buffer->Map(sizeof(BatchVertex), required_vertices * sizeof(BatchVertex));
|
||||
|
||||
m_batch_start_vertex_ptr = static_cast<HWVertex*>(res.pointer);
|
||||
m_batch_start_vertex_ptr = static_cast<BatchVertex*>(res.pointer);
|
||||
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
|
||||
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
|
||||
m_batch_base_vertex = res.index_aligned;
|
||||
|
@ -246,11 +247,11 @@ void GPU_HW_OpenGL::CreateVertexBuffer()
|
|||
glEnableVertexAttribArray(1);
|
||||
glEnableVertexAttribArray(2);
|
||||
glEnableVertexAttribArray(3);
|
||||
glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, x)));
|
||||
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex),
|
||||
reinterpret_cast<void*>(offsetof(HWVertex, color)));
|
||||
glVertexAttribIPointer(2, 2, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, texcoord)));
|
||||
glVertexAttribIPointer(3, 1, GL_INT, sizeof(HWVertex), reinterpret_cast<void*>(offsetof(HWVertex, texpage)));
|
||||
glVertexAttribIPointer(0, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, x)));
|
||||
glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(BatchVertex),
|
||||
reinterpret_cast<void*>(offsetof(BatchVertex, color)));
|
||||
glVertexAttribIPointer(2, 2, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, texcoord)));
|
||||
glVertexAttribIPointer(3, 1, GL_INT, sizeof(BatchVertex), reinterpret_cast<void*>(offsetof(BatchVertex, texpage)));
|
||||
glBindVertexArray(0);
|
||||
|
||||
glGenVertexArrays(1, &m_attributeless_vao_id);
|
||||
|
@ -280,31 +281,56 @@ void GPU_HW_OpenGL::CreateTextureBuffer()
|
|||
|
||||
bool GPU_HW_OpenGL::CompilePrograms()
|
||||
{
|
||||
GPU_HW_ShaderGen shadergen(GPU_HW_ShaderGen::Backend::OpenGL, m_resolution_scale, m_true_color);
|
||||
|
||||
for (u32 render_mode = 0; render_mode < 4; render_mode++)
|
||||
{
|
||||
for (u32 texture_mode = 0; texture_mode < 9; texture_mode++)
|
||||
{
|
||||
for (u8 dithering = 0; dithering < 2; dithering++)
|
||||
{
|
||||
if (!CompileProgram(m_render_programs[render_mode][texture_mode][dithering],
|
||||
static_cast<HWBatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
|
||||
ConvertToBoolUnchecked(dithering)))
|
||||
{
|
||||
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
|
||||
const std::string vs = shadergen.GenerateBatchVertexShader(textured);
|
||||
const std::string fs = shadergen.GenerateBatchFragmentShader(static_cast<BatchRenderMode>(render_mode),
|
||||
static_cast<TextureMode>(texture_mode),
|
||||
ConvertToBoolUnchecked(dithering));
|
||||
|
||||
GL::Program& prog = m_render_programs[render_mode][texture_mode][dithering];
|
||||
if (!prog.Compile(vs, fs))
|
||||
return false;
|
||||
|
||||
prog.BindAttribute(0, "a_pos");
|
||||
prog.BindAttribute(1, "a_col0");
|
||||
if (textured)
|
||||
{
|
||||
prog.BindAttribute(2, "a_texcoord");
|
||||
prog.BindAttribute(3, "a_texpage");
|
||||
}
|
||||
|
||||
prog.BindFragData(0, "o_col0");
|
||||
|
||||
if (!prog.Link())
|
||||
return false;
|
||||
|
||||
prog.BindUniformBlock("UBOBlock", 1);
|
||||
if (textured)
|
||||
{
|
||||
prog.Bind();
|
||||
prog.RegisterUniform("samp0");
|
||||
prog.Uniform1i(0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Use string_view
|
||||
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
|
||||
{
|
||||
for (u8 interlaced = 0; interlaced < 2; interlaced++)
|
||||
{
|
||||
GL::Program& prog = m_display_programs[depth_24bit][interlaced];
|
||||
const std::string vs = GenerateScreenQuadVertexShader();
|
||||
const std::string fs =
|
||||
GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), ConvertToBoolUnchecked(interlaced));
|
||||
const std::string vs = shadergen.GenerateScreenQuadVertexShader();
|
||||
const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
|
||||
ConvertToBoolUnchecked(interlaced));
|
||||
if (!prog.Compile(vs, fs))
|
||||
return false;
|
||||
|
||||
|
@ -319,8 +345,11 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
|||
}
|
||||
}
|
||||
|
||||
if (!m_vram_write_program.Compile(GenerateScreenQuadVertexShader(), GenerateVRAMWriteFragmentShader()))
|
||||
if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(),
|
||||
shadergen.GenerateVRAMWriteFragmentShader()))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_vram_write_program.BindFragData(0, "o_col0");
|
||||
if (!m_vram_write_program.Link())
|
||||
|
@ -335,41 +364,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode,
|
||||
bool dithering)
|
||||
{
|
||||
const bool textured = texture_mode != TextureMode::Disabled;
|
||||
const std::string vs = GenerateVertexShader(textured);
|
||||
const std::string fs = GenerateFragmentShader(render_mode, texture_mode, dithering);
|
||||
if (!prog.Compile(vs, fs))
|
||||
return false;
|
||||
|
||||
prog.BindAttribute(0, "a_pos");
|
||||
prog.BindAttribute(1, "a_col0");
|
||||
if (textured)
|
||||
{
|
||||
prog.BindAttribute(2, "a_texcoord");
|
||||
prog.BindAttribute(3, "a_texpage");
|
||||
}
|
||||
|
||||
prog.BindFragData(0, "o_col0");
|
||||
|
||||
if (!prog.Link())
|
||||
return false;
|
||||
|
||||
prog.BindUniformBlock("UBOBlock", 1);
|
||||
|
||||
if (textured)
|
||||
{
|
||||
prog.Bind();
|
||||
prog.RegisterUniform("samp0");
|
||||
prog.Uniform1i(0, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
|
||||
void GPU_HW_OpenGL::SetDrawState(BatchRenderMode render_mode)
|
||||
{
|
||||
const GL::Program& prog = m_render_programs[static_cast<u8>(render_mode)][static_cast<u8>(m_batch.texture_mode)]
|
||||
[BoolToUInt8(m_batch.dithering)];
|
||||
|
@ -378,7 +373,7 @@ void GPU_HW_OpenGL::SetDrawState(HWBatchRenderMode render_mode)
|
|||
if (m_batch.texture_mode != TextureMode::Disabled)
|
||||
m_vram_read_texture->Bind();
|
||||
|
||||
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == HWBatchRenderMode::OnlyOpaque)
|
||||
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
|
||||
{
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
|
@ -732,7 +727,7 @@ void GPU_HW_OpenGL::FlushRender()
|
|||
m_stats.num_batches++;
|
||||
m_stats.num_vertices += vertex_count;
|
||||
|
||||
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex));
|
||||
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(BatchVertex));
|
||||
m_vertex_stream_buffer->Bind();
|
||||
m_batch_start_vertex_ptr = nullptr;
|
||||
m_batch_end_vertex_ptr = nullptr;
|
||||
|
@ -742,9 +737,9 @@ void GPU_HW_OpenGL::FlushRender()
|
|||
|
||||
if (m_batch.NeedsTwoPassRendering())
|
||||
{
|
||||
SetDrawState(HWBatchRenderMode::OnlyTransparent);
|
||||
SetDrawState(BatchRenderMode::OnlyTransparent);
|
||||
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
|
||||
SetDrawState(HWBatchRenderMode::OnlyOpaque);
|
||||
SetDrawState(BatchRenderMode::OnlyOpaque);
|
||||
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -58,8 +58,7 @@ private:
|
|||
void CreateTextureBuffer();
|
||||
|
||||
bool CompilePrograms();
|
||||
bool CompileProgram(GL::Program& prog, HWBatchRenderMode render_mode, TextureMode texture_mode, bool dithering);
|
||||
void SetDrawState(HWBatchRenderMode render_mode);
|
||||
void SetDrawState(BatchRenderMode render_mode);
|
||||
void UploadUniformBlock(const void* data, u32 data_size);
|
||||
|
||||
// downsample texture - used for readbacks at >1xIR.
|
||||
|
|
|
@ -0,0 +1,436 @@
|
|||
#include "gpu_hw_shadergen.h"
|
||||
|
||||
GPU_HW_ShaderGen::GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color)
|
||||
: m_backend(backend), m_resolution_scale(resolution_scale), m_true_color(true_color)
|
||||
{
|
||||
}
|
||||
|
||||
GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
|
||||
|
||||
static void DefineMacro(std::stringstream& ss, const char* name, bool enabled)
|
||||
{
|
||||
if (enabled)
|
||||
ss << "#define " << name << " 1\n";
|
||||
else
|
||||
ss << "/* #define " << name << " 0 */\n";
|
||||
}
|
||||
|
||||
void GPU_HW_ShaderGen::GenerateShaderHeader(std::stringstream& ss)
|
||||
{
|
||||
ss << "#version 330 core\n\n";
|
||||
ss << "const int RESOLUTION_SCALE = " << m_resolution_scale << ";\n";
|
||||
ss << "const ivec2 VRAM_SIZE = ivec2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
|
||||
ss << "const vec2 RCP_VRAM_SIZE = vec2(1.0, 1.0) / vec2(VRAM_SIZE);\n";
|
||||
ss << R"(
|
||||
|
||||
float fixYCoord(float y)
|
||||
{
|
||||
return 1.0 - RCP_VRAM_SIZE.y - y;
|
||||
}
|
||||
|
||||
int fixYCoord(int y)
|
||||
{
|
||||
return VRAM_SIZE.y - y - 1;
|
||||
}
|
||||
|
||||
uint RGBA8ToRGBA5551(vec4 v)
|
||||
{
|
||||
uint r = uint(v.r * 255.0) >> 3;
|
||||
uint g = uint(v.g * 255.0) >> 3;
|
||||
uint b = uint(v.b * 255.0) >> 3;
|
||||
uint a = (v.a != 0.0) ? 1u : 0u;
|
||||
return (r) | (g << 5) | (b << 10) | (a << 15);
|
||||
}
|
||||
|
||||
vec4 RGBA5551ToRGBA8(uint v)
|
||||
{
|
||||
uint r = (v & 31u);
|
||||
uint g = ((v >> 5) & 31u);
|
||||
uint b = ((v >> 10) & 31u);
|
||||
uint a = ((v >> 15) & 1u);
|
||||
|
||||
// repeat lower bits
|
||||
r = (r << 3) | (r & 7u);
|
||||
g = (g << 3) | (g & 7u);
|
||||
b = (b << 3) | (b & 7u);
|
||||
|
||||
return vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, float(a));
|
||||
}
|
||||
)";
|
||||
}
|
||||
|
||||
void GPU_HW_ShaderGen::GenerateBatchUniformBuffer(std::stringstream& ss)
|
||||
{
|
||||
ss << R"(
|
||||
uniform UBOBlock {
|
||||
ivec2 u_pos_offset;
|
||||
uvec2 u_texture_window_mask;
|
||||
uvec2 u_texture_window_offset;
|
||||
float u_src_alpha_factor;
|
||||
float u_dst_alpha_factor;
|
||||
};
|
||||
)";
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "TEXTURED", textured);
|
||||
GenerateBatchUniformBuffer(ss);
|
||||
|
||||
ss << R"(
|
||||
in ivec2 a_pos;
|
||||
in vec4 a_col0;
|
||||
in int a_texcoord;
|
||||
in int a_texpage;
|
||||
|
||||
out vec3 v_col0;
|
||||
#if TEXTURED
|
||||
out vec2 v_tex0;
|
||||
flat out ivec4 v_texpage;
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
// 0..+1023 -> -1..1
|
||||
float pos_x = (float(a_pos.x + u_pos_offset.x) / 512.0) - 1.0;
|
||||
float pos_y = (float(a_pos.y + u_pos_offset.y) / -256.0) + 1.0;
|
||||
gl_Position = vec4(pos_x, pos_y, 0.0, 1.0);
|
||||
|
||||
v_col0 = a_col0.rgb;
|
||||
#if TEXTURED
|
||||
v_tex0 = vec2(float(a_texcoord & 0xFFFF), float(a_texcoord >> 16)) / vec2(255.0);
|
||||
|
||||
// base_x,base_y,palette_x,palette_y
|
||||
v_texpage.x = (a_texpage & 15) * 64 * RESOLUTION_SCALE;
|
||||
v_texpage.y = ((a_texpage >> 4) & 1) * 256 * RESOLUTION_SCALE;
|
||||
v_texpage.z = ((a_texpage >> 16) & 63) * 16 * RESOLUTION_SCALE;
|
||||
v_texpage.w = ((a_texpage >> 22) & 511) * RESOLUTION_SCALE;
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
|
||||
GPU::TextureMode texture_mode, bool dithering)
|
||||
{
|
||||
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
|
||||
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
|
||||
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
GenerateBatchUniformBuffer(ss);
|
||||
DefineMacro(ss, "TRANSPARENCY", transparency != GPU_HW::BatchRenderMode::TransparencyDisabled);
|
||||
DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", transparency == GPU_HW::BatchRenderMode::OnlyOpaque);
|
||||
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENCY", transparency == GPU_HW::BatchRenderMode::OnlyTransparent);
|
||||
DefineMacro(ss, "TEXTURED", actual_texture_mode != GPU::TextureMode::Disabled);
|
||||
DefineMacro(ss, "PALETTE",
|
||||
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
|
||||
actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
||||
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
|
||||
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
|
||||
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
|
||||
DefineMacro(ss, "DITHERING", dithering);
|
||||
DefineMacro(ss, "TRUE_COLOR", m_true_color);
|
||||
|
||||
ss << "const int[16] s_dither_values = int[16]( ";
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
ss << ", ";
|
||||
ss << GPU::DITHER_MATRIX[i / 4][i % 4];
|
||||
}
|
||||
ss << " );\n";
|
||||
|
||||
ss << R"(
|
||||
in vec3 v_col0;
|
||||
#if TEXTURED
|
||||
in vec2 v_tex0;
|
||||
flat in ivec4 v_texpage;
|
||||
uniform sampler2D samp0;
|
||||
#endif
|
||||
|
||||
out vec4 o_col0;
|
||||
|
||||
ivec3 ApplyDithering(ivec3 icol)
|
||||
{
|
||||
ivec2 fc = (ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE)) & ivec2(3, 3);
|
||||
int offset = s_dither_values[fc.y * 4 + fc.x];
|
||||
return icol + ivec3(offset, offset, offset);
|
||||
}
|
||||
|
||||
ivec3 TruncateTo15Bit(ivec3 icol)
|
||||
{
|
||||
icol = clamp(icol, ivec3(0, 0, 0), ivec3(255, 255, 255));
|
||||
return (icol & ivec3(~7, ~7, ~7)) | ((icol >> 3) & ivec3(7, 7, 7));
|
||||
}
|
||||
|
||||
#if TEXTURED
|
||||
ivec2 ApplyNativeTextureWindow(ivec2 coords)
|
||||
{
|
||||
uint x = (uint(coords.x) & ~(u_texture_window_mask.x * 8u)) | ((u_texture_window_offset.x & u_texture_window_mask.x) * 8u);
|
||||
uint y = (uint(coords.y) & ~(u_texture_window_mask.y * 8u)) | ((u_texture_window_offset.y & u_texture_window_mask.y) * 8u);
|
||||
return ivec2(int(x), int(y));
|
||||
}
|
||||
|
||||
ivec2 ApplyTextureWindow(ivec2 coords)
|
||||
{
|
||||
if (RESOLUTION_SCALE == 1)
|
||||
return ApplyNativeTextureWindow(coords);
|
||||
|
||||
ivec2 downscaled_coords = coords / ivec2(RESOLUTION_SCALE);
|
||||
ivec2 coords_offset = coords % ivec2(RESOLUTION_SCALE);
|
||||
return (ApplyNativeTextureWindow(downscaled_coords) * ivec2(RESOLUTION_SCALE)) + coords_offset;
|
||||
}
|
||||
|
||||
ivec4 SampleFromVRAM(vec2 coord)
|
||||
{
|
||||
// from 0..1 to 0..255
|
||||
ivec2 icoord = ivec2(coord * vec2(255 * RESOLUTION_SCALE));
|
||||
icoord = ApplyTextureWindow(icoord);
|
||||
|
||||
// adjust for tightly packed palette formats
|
||||
ivec2 index_coord = icoord;
|
||||
#if PALETTE_4_BIT
|
||||
index_coord.x /= 4;
|
||||
#elif PALETTE_8_BIT
|
||||
index_coord.x /= 2;
|
||||
#endif
|
||||
|
||||
// fixup coords
|
||||
ivec2 vicoord = ivec2(v_texpage.x + index_coord.x, fixYCoord(v_texpage.y + index_coord.y));
|
||||
|
||||
// load colour/palette
|
||||
vec4 color = texelFetch(samp0, vicoord, 0);
|
||||
|
||||
// apply palette
|
||||
#if PALETTE
|
||||
#if PALETTE_4_BIT
|
||||
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 3;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 4)) & 0x0Fu);
|
||||
#elif PALETTE_8_BIT
|
||||
int subpixel = int(icoord.x / RESOLUTION_SCALE) & 1;
|
||||
uint vram_value = RGBA8ToRGBA5551(color);
|
||||
int palette_index = int((vram_value >> (subpixel * 8)) & 0xFFu);
|
||||
#endif
|
||||
ivec2 palette_icoord = ivec2(v_texpage.z + (palette_index * RESOLUTION_SCALE), fixYCoord(v_texpage.w));
|
||||
color = texelFetch(samp0, palette_icoord, 0);
|
||||
#endif
|
||||
|
||||
return ivec4(color * vec4(255.0, 255.0, 255.0, 255.0));
|
||||
}
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec3 vertcol = ivec3(v_col0 * vec3(255.0, 255.0, 255.0));
|
||||
|
||||
bool semitransparent;
|
||||
bool new_mask_bit;
|
||||
ivec3 icolor;
|
||||
|
||||
#if TEXTURED
|
||||
ivec4 texcol = SampleFromVRAM(v_tex0);
|
||||
if (texcol == ivec4(0.0, 0.0, 0.0, 0.0))
|
||||
discard;
|
||||
|
||||
// Grab semitransparent bit from the texture color.
|
||||
semitransparent = (texcol.a != 0);
|
||||
|
||||
#if RAW_TEXTURE
|
||||
icolor = texcol.rgb;
|
||||
#else
|
||||
icolor = (vertcol * texcol.rgb) >> 7;
|
||||
#endif
|
||||
#else
|
||||
// All pixels are semitransparent for untextured polygons.
|
||||
semitransparent = true;
|
||||
icolor = vertcol;
|
||||
#endif
|
||||
|
||||
// Apply dithering
|
||||
#if DITHERING
|
||||
icolor = ApplyDithering(icolor);
|
||||
#endif
|
||||
|
||||
// Clip to 15-bit range
|
||||
#if !TRUE_COLOR
|
||||
icolor = TruncateTo15Bit(icolor);
|
||||
#endif
|
||||
|
||||
// Normalize
|
||||
vec3 color = vec3(icolor) / vec3(255.0, 255.0, 255.0);
|
||||
|
||||
#if TRANSPARENCY
|
||||
// Apply semitransparency. If not a semitransparent texel, destination alpha is ignored.
|
||||
if (semitransparent)
|
||||
{
|
||||
#if TRANSPARENCY_ONLY_OPAQUE
|
||||
discard;
|
||||
#endif
|
||||
o_col0 = vec4(color * u_src_alpha_factor, u_dst_alpha_factor);
|
||||
}
|
||||
else
|
||||
{
|
||||
#if TRANSPARENCY_ONLY_TRANSPARENCY
|
||||
discard;
|
||||
#endif
|
||||
o_col0 = vec4(color, 0.0);
|
||||
}
|
||||
#else
|
||||
o_col0 = vec4(color, 0.0);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateScreenQuadVertexShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
ss << R"(
|
||||
|
||||
out vec2 v_tex0;
|
||||
|
||||
void main()
|
||||
{
|
||||
v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2));
|
||||
gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f);
|
||||
gl_Position.y = -gl_Position.y;
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateFillFragmentShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
|
||||
ss << R"(
|
||||
uniform vec4 fill_color;
|
||||
out vec4 o_col0;
|
||||
|
||||
void main()
|
||||
{
|
||||
o_col0 = fill_color;
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
|
||||
DefineMacro(ss, "INTERLACED", interlaced);
|
||||
|
||||
ss << R"(
|
||||
in vec2 v_tex0;
|
||||
out vec4 o_col0;
|
||||
|
||||
uniform sampler2D samp0;
|
||||
uniform ivec3 u_base_coords;
|
||||
|
||||
ivec2 GetCoords(vec2 fragcoord)
|
||||
{
|
||||
ivec2 icoords = ivec2(fragcoord);
|
||||
#if INTERLACED
|
||||
if ((((icoords.y - u_base_coords.z) / RESOLUTION_SCALE) & 1) != 0)
|
||||
discard;
|
||||
#endif
|
||||
return icoords;
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 icoords = GetCoords(gl_FragCoord.xy);
|
||||
|
||||
#if DEPTH_24BIT
|
||||
// compute offset in dwords from the start of the 24-bit values
|
||||
ivec2 base = ivec2(u_base_coords.x, u_base_coords.y + icoords.y);
|
||||
int xoff = int(icoords.x);
|
||||
int dword_index = (xoff / 2) + (xoff / 4);
|
||||
|
||||
// sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
|
||||
uint s0 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 0, base.y), 0));
|
||||
uint s1 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + dword_index * 2 + 1, base.y), 0));
|
||||
uint s2 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
|
||||
uint s3 = RGBA8ToRGBA5551(texelFetch(samp0, ivec2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
|
||||
|
||||
// select the bit for this pixel depending on its offset in the 4-pixel block
|
||||
uint r, g, b;
|
||||
int block_offset = xoff & 3;
|
||||
if (block_offset == 0)
|
||||
{
|
||||
r = s0 & 0xFFu;
|
||||
g = s0 >> 8;
|
||||
b = s1 & 0xFFu;
|
||||
}
|
||||
else if (block_offset == 1)
|
||||
{
|
||||
r = s1 >> 8;
|
||||
g = s2 & 0xFFu;
|
||||
b = s2 >> 8;
|
||||
}
|
||||
else if (block_offset == 2)
|
||||
{
|
||||
r = s1 & 0xFFu;
|
||||
g = s1 >> 8;
|
||||
b = s2 & 0xFFu;
|
||||
}
|
||||
else
|
||||
{
|
||||
r = s2 >> 8;
|
||||
g = s3 & 0xFFu;
|
||||
b = s3 >> 8;
|
||||
}
|
||||
|
||||
// and normalize
|
||||
o_col0 = vec4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
|
||||
#else
|
||||
// load and return
|
||||
o_col0 = texelFetch(samp0, u_base_coords.xy + icoords, 0);
|
||||
#endif
|
||||
}
|
||||
)";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
GenerateShaderHeader(ss);
|
||||
|
||||
ss << R"(
|
||||
|
||||
uniform ivec2 u_base_coords;
|
||||
uniform ivec2 u_size;
|
||||
uniform usamplerBuffer samp0;
|
||||
|
||||
out vec4 o_col0;
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 coords = ivec2(gl_FragCoord.xy) / ivec2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||
ivec2 offset = coords - u_base_coords;
|
||||
offset.y = u_size.y - offset.y - 1;
|
||||
|
||||
int buffer_offset = offset.y * u_size.x + offset.x;
|
||||
uint value = texelFetch(samp0, buffer_offset).r;
|
||||
|
||||
o_col0 = RGBA5551ToRGBA8(value);
|
||||
})";
|
||||
|
||||
return ss.str();
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "gpu_hw.h"
|
||||
|
||||
class GPU_HW_ShaderGen
|
||||
{
|
||||
public:
|
||||
enum class Backend
|
||||
{
|
||||
OpenGL
|
||||
};
|
||||
|
||||
public:
|
||||
GPU_HW_ShaderGen(Backend backend, u32 resolution_scale, bool true_color);
|
||||
~GPU_HW_ShaderGen();
|
||||
|
||||
void Init(Backend backend, u32 resolution_scale, bool true_color);
|
||||
|
||||
std::string GenerateBatchVertexShader(bool textured);
|
||||
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, bool dithering);
|
||||
std::string GenerateScreenQuadVertexShader();
|
||||
std::string GenerateFillFragmentShader();
|
||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
|
||||
std::string GenerateVRAMWriteFragmentShader();
|
||||
|
||||
Backend m_backend;
|
||||
u32 m_resolution_scale;
|
||||
bool m_true_color;
|
||||
|
||||
private:
|
||||
void GenerateShaderHeader(std::stringstream& ss);
|
||||
void GenerateBatchUniformBuffer(std::stringstream& ss);
|
||||
};
|
Loading…
Reference in New Issue