melonDS/src/GPU3D_Compute.cpp

1137 lines
39 KiB
C++
Raw Normal View History

Compute shader renderer (#2041) * nothing works yet * don't double buffer 3D framebuffers for the GL Renderer looks like leftovers from when 3D+2D composition was done in the frontend * oops * it works! * implement display capture for compute renderer it's actually just all stolen from the regular OpenGL renderer * fix bad indirect call * handle cleanup properly * add hires rendering to the compute shader renderer * fix UB also misc changes to use more unsigned multiplication also fix framebuffer resize * correct edge filling behaviour when AA is disabled * fix full color textures * fix edge marking (polygon id is 6-bit not 5) also make the code a bit nicer * take all edge cases into account for XMin/XMax calculation * use hires coordinate again * stop using fixed size buffers based on scale factor in shaders this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits * apparently & is not defined on bvec4 why does this even compile on Intel and Nvidia? * put the texture cache into it's own file * add compute shader renderer properly to the GUI also add option to toggle using high resolution vertex coordinates * unbind sampler object in compute shader renderer * fix GetRangedBitMask for 64 bit aligned 64 bits pretty embarassing * convert NonStupidBitfield.h back to LF only new lines * actually adapt to latest changes * fix stupid merge * actually make compute shader renderer work with newest changes * show progress on shader compilation * remove merge leftover
2024-05-13 15:17:39 +00:00
/*
2024-06-15 15:01:19 +00:00
Copyright 2016-2024 melonDS team
Compute shader renderer (#2041) * nothing works yet * don't double buffer 3D framebuffers for the GL Renderer looks like leftovers from when 3D+2D composition was done in the frontend * oops * it works! * implement display capture for compute renderer it's actually just all stolen from the regular OpenGL renderer * fix bad indirect call * handle cleanup properly * add hires rendering to the compute shader renderer * fix UB also misc changes to use more unsigned multiplication also fix framebuffer resize * correct edge filling behaviour when AA is disabled * fix full color textures * fix edge marking (polygon id is 6-bit not 5) also make the code a bit nicer * take all edge cases into account for XMin/XMax calculation * use hires coordinate again * stop using fixed size buffers based on scale factor in shaders this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits * apparently & is not defined on bvec4 why does this even compile on Intel and Nvidia? * put the texture cache into it's own file * add compute shader renderer properly to the GUI also add option to toggle using high resolution vertex coordinates * unbind sampler object in compute shader renderer * fix GetRangedBitMask for 64 bit aligned 64 bits pretty embarassing * convert NonStupidBitfield.h back to LF only new lines * actually adapt to latest changes * fix stupid merge * actually make compute shader renderer work with newest changes * show progress on shader compilation * remove merge leftover
2024-05-13 15:17:39 +00:00
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#include "GPU3D_Compute.h"
#include <assert.h>
#include "OpenGLSupport.h"
#include "GPU3D_Compute_shaders.h"
namespace melonDS
{
ComputeRenderer::ComputeRenderer(GLCompositor&& compositor)
: Renderer3D(true), Texcache(TexcacheOpenGLLoader()), CurGLCompositor(std::move(compositor))
{}
bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, const std::initializer_list<const char*>& defines)
{
std::string shaderName;
std::string shaderSource;
shaderSource += "#version 430 core\n";
for (const char* define : defines)
{
shaderSource += "#define ";
shaderSource += define;
shaderSource += '\n';
shaderName += define;
shaderName += ',';
}
shaderSource += "#define ScreenWidth ";
shaderSource += std::to_string(ScreenWidth);
shaderSource += "\n#define ScreenHeight ";
shaderSource += std::to_string(ScreenHeight);
shaderSource += "\n#define MaxWorkTiles ";
shaderSource += std::to_string(MaxWorkTiles);
shaderSource += ComputeRendererShaders::Common;
shaderSource += source;
return OpenGL::CompileComputeProgram(shader, shaderSource.c_str(), shaderName.c_str());
}
void ComputeRenderer::ShaderCompileStep(int& current, int& count)
{
current = ShaderStepIdx;
ShaderStepIdx++;
count = 33;
switch (current)
{
case 0:
CompileShader(ShaderInterpXSpans[0], ComputeRendererShaders::InterpSpans, {"InterpSpans", "ZBuffer"});
return;
case 1:
CompileShader(ShaderInterpXSpans[1], ComputeRendererShaders::InterpSpans, {"InterpSpans", "WBuffer"});
return;
case 2:
CompileShader(ShaderBinCombined, ComputeRendererShaders::BinCombined, {"BinCombined"});
return;
case 3:
CompileShader(ShaderDepthBlend[0], ComputeRendererShaders::DepthBlend, {"DepthBlend", "ZBuffer"});
return;
case 4:
CompileShader(ShaderDepthBlend[1], ComputeRendererShaders::DepthBlend, {"DepthBlend", "WBuffer"});
return;
case 5:
CompileShader(ShaderRasteriseNoTexture[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "NoTexture"});
return;
case 6:
CompileShader(ShaderRasteriseNoTexture[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "NoTexture"});
return;
case 7:
CompileShader(ShaderRasteriseNoTextureToon[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "NoTexture", "Toon"});
return;
case 8:
CompileShader(ShaderRasteriseNoTextureToon[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "NoTexture", "Toon"});
return;
case 9:
CompileShader(ShaderRasteriseNoTextureHighlight[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "NoTexture", "Highlight"});
return;
case 10:
CompileShader(ShaderRasteriseNoTextureHighlight[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "NoTexture", "Highlight"});
return;
case 11:
CompileShader(ShaderRasteriseUseTextureDecal[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "UseTexture", "Decal"});
return;
case 12:
CompileShader(ShaderRasteriseUseTextureDecal[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "UseTexture", "Decal"});
return;
case 13:
CompileShader(ShaderRasteriseUseTextureModulate[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "UseTexture", "Modulate"});
return;
case 14:
CompileShader(ShaderRasteriseUseTextureModulate[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "UseTexture", "Modulate"});
return;
case 15:
CompileShader(ShaderRasteriseUseTextureToon[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "UseTexture", "Toon"});
return;
case 16:
CompileShader(ShaderRasteriseUseTextureToon[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "UseTexture", "Toon"});
return;
case 17:
CompileShader(ShaderRasteriseUseTextureHighlight[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "UseTexture", "Highlight"});
return;
case 18:
CompileShader(ShaderRasteriseUseTextureHighlight[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "UseTexture", "Highlight"});
return;
case 19:
CompileShader(ShaderRasteriseShadowMask[0], ComputeRendererShaders::Rasterise, {"Rasterise", "ZBuffer", "ShadowMask"});
return;
case 20:
CompileShader(ShaderRasteriseShadowMask[1], ComputeRendererShaders::Rasterise, {"Rasterise", "WBuffer", "ShadowMask"});
return;
case 21:
CompileShader(ShaderClearCoarseBinMask, ComputeRendererShaders::ClearCoarseBinMask, {"ClearCoarseBinMask"});
return;
case 22:
CompileShader(ShaderClearIndirectWorkCount, ComputeRendererShaders::ClearIndirectWorkCount, {"ClearIndirectWorkCount"});
return;
case 23:
CompileShader(ShaderCalculateWorkListOffset, ComputeRendererShaders::CalcOffsets, {"CalculateWorkOffsets"});
return;
case 24:
CompileShader(ShaderSortWork, ComputeRendererShaders::SortWork, {"SortWork"});
return;
case 25:
CompileShader(ShaderFinalPass[0], ComputeRendererShaders::FinalPass, {"FinalPass"});
return;
case 26:
CompileShader(ShaderFinalPass[1], ComputeRendererShaders::FinalPass, {"FinalPass", "EdgeMarking"});
return;
case 27:
CompileShader(ShaderFinalPass[2], ComputeRendererShaders::FinalPass, {"FinalPass", "Fog"});
return;
case 28:
CompileShader(ShaderFinalPass[3], ComputeRendererShaders::FinalPass, {"FinalPass", "EdgeMarking", "Fog"});
return;
case 29:
CompileShader(ShaderFinalPass[4], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing"});
return;
case 30:
CompileShader(ShaderFinalPass[5], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking"});
return;
case 31:
CompileShader(ShaderFinalPass[6], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "Fog"});
return;
case 32:
CompileShader(ShaderFinalPass[7], ComputeRendererShaders::FinalPass, {"FinalPass", "AntiAliasing", "EdgeMarking", "Fog"});
return;
default:
__builtin_unreachable();
return;
}
}
void blah(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *message, const void *userParam)
{
printf("%s\n", message);
}
std::unique_ptr<ComputeRenderer> ComputeRenderer::New()
{
std::optional<GLCompositor> compositor = GLCompositor::New();
if (!compositor)
return nullptr;
std::unique_ptr<ComputeRenderer> result = std::unique_ptr<ComputeRenderer>(new ComputeRenderer(std::move(*compositor)));
//glDebugMessageCallback(blah, NULL);
//glEnable(GL_DEBUG_OUTPUT);
glGenBuffers(1, &result->YSpanSetupMemory);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, result->YSpanSetupMemory);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(SpanSetupY)*MaxYSpanSetups, nullptr, GL_DYNAMIC_DRAW);
glGenBuffers(1, &result->RenderPolygonMemory);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, result->RenderPolygonMemory);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(RenderPolygon)*2048, nullptr, GL_DYNAMIC_DRAW);
glGenBuffers(1, &result->XSpanSetupMemory);
glGenBuffers(1, &result->BinResultMemory);
glGenBuffers(1, &result->FinalTileMemory);
glGenBuffers(1, &result->YSpanIndicesTextureMemory);
glGenBuffers(tilememoryLayer_Num, result->TileMemory);
glGenBuffers(1, &result->WorkDescMemory);
glGenTextures(1, &result->YSpanIndicesTexture);
glGenTextures(1, &result->LowResFramebuffer);
glBindTexture(GL_TEXTURE_2D, result->LowResFramebuffer);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8UI, 256, 192);
glGenBuffers(1, &result->MetaUniformMemory);
glBindBuffer(GL_UNIFORM_BUFFER, result->MetaUniformMemory);
glBufferData(GL_UNIFORM_BUFFER, sizeof(MetaUniform), nullptr, GL_DYNAMIC_DRAW);
glGenSamplers(9, result->Samplers);
for (u32 j = 0; j < 3; j++)
{
for (u32 i = 0; i < 3; i++)
{
const GLenum translateWrapMode[3] = {GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT};
glSamplerParameteri(result->Samplers[i+j*3], GL_TEXTURE_WRAP_S, translateWrapMode[i]);
glSamplerParameteri(result->Samplers[i+j*3], GL_TEXTURE_WRAP_T, translateWrapMode[j]);
glSamplerParameteri(result->Samplers[i+j*3], GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameterf(result->Samplers[i+j*3], GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
}
glGenBuffers(1, &result->PixelBuffer);
glBindBuffer(GL_PIXEL_PACK_BUFFER, result->PixelBuffer);
glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ);
return result;
}
ComputeRenderer::~ComputeRenderer()
{
Texcache.Reset();
glDeleteBuffers(1, &YSpanSetupMemory);
glDeleteBuffers(1, &RenderPolygonMemory);
glDeleteBuffers(1, &XSpanSetupMemory);
glDeleteBuffers(1, &BinResultMemory);
glDeleteBuffers(tilememoryLayer_Num, TileMemory);
glDeleteBuffers(1, &WorkDescMemory);
glDeleteBuffers(1, &FinalTileMemory);
glDeleteBuffers(1, &YSpanIndicesTextureMemory);
glDeleteTextures(1, &YSpanIndicesTexture);
glDeleteTextures(1, &Framebuffer);
glDeleteBuffers(1, &MetaUniformMemory);
glDeleteSamplers(9, Samplers);
glDeleteBuffers(1, &PixelBuffer);
}
void ComputeRenderer::DeleteShaders()
{
std::initializer_list<GLuint> allPrograms =
{
ShaderInterpXSpans[0],
ShaderInterpXSpans[1],
ShaderBinCombined,
ShaderDepthBlend[0],
ShaderDepthBlend[1],
ShaderRasteriseNoTexture[0],
ShaderRasteriseNoTexture[1],
ShaderRasteriseNoTextureToon[0],
ShaderRasteriseNoTextureToon[1],
ShaderRasteriseNoTextureHighlight[0],
ShaderRasteriseNoTextureHighlight[1],
ShaderRasteriseUseTextureDecal[0],
ShaderRasteriseUseTextureDecal[1],
ShaderRasteriseUseTextureModulate[0],
ShaderRasteriseUseTextureModulate[1],
ShaderRasteriseUseTextureToon[0],
ShaderRasteriseUseTextureToon[1],
ShaderRasteriseUseTextureHighlight[0],
ShaderRasteriseUseTextureHighlight[1],
ShaderRasteriseShadowMask[0],
ShaderRasteriseShadowMask[1],
ShaderClearCoarseBinMask,
ShaderClearIndirectWorkCount,
ShaderCalculateWorkListOffset,
ShaderSortWork,
ShaderFinalPass[0],
ShaderFinalPass[1],
ShaderFinalPass[2],
ShaderFinalPass[3],
ShaderFinalPass[4],
ShaderFinalPass[5],
ShaderFinalPass[6],
ShaderFinalPass[7],
};
for (GLuint program : allPrograms)
glDeleteProgram(program);
}
void ComputeRenderer::Reset(GPU& gpu)
{
Texcache.Reset();
}
void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates)
{
CurGLCompositor.SetScaleFactor(scale);
if (ScaleFactor != -1)
{
DeleteShaders();
}
ShaderStepIdx = 0;
ScaleFactor = scale;
ScreenWidth = 256 * ScaleFactor;
ScreenHeight = 192 * ScaleFactor;
TilesPerLine = ScreenWidth/TileSize;
TileLines = ScreenHeight/TileSize;
HiresCoordinates = highResolutionCoordinates;
MaxWorkTiles = TilesPerLine*TileLines*16;
Compute shader renderer (#2041) * nothing works yet * don't double buffer 3D framebuffers for the GL Renderer looks like leftovers from when 3D+2D composition was done in the frontend * oops * it works! * implement display capture for compute renderer it's actually just all stolen from the regular OpenGL renderer * fix bad indirect call * handle cleanup properly * add hires rendering to the compute shader renderer * fix UB also misc changes to use more unsigned multiplication also fix framebuffer resize * correct edge filling behaviour when AA is disabled * fix full color textures * fix edge marking (polygon id is 6-bit not 5) also make the code a bit nicer * take all edge cases into account for XMin/XMax calculation * use hires coordinate again * stop using fixed size buffers based on scale factor in shaders this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits * apparently & is not defined on bvec4 why does this even compile on Intel and Nvidia? * put the texture cache into it's own file * add compute shader renderer properly to the GUI also add option to toggle using high resolution vertex coordinates * unbind sampler object in compute shader renderer * fix GetRangedBitMask for 64 bit aligned 64 bits pretty embarassing * convert NonStupidBitfield.h back to LF only new lines * actually adapt to latest changes * fix stupid merge * actually make compute shader renderer work with newest changes * show progress on shader compilation * remove merge leftover
2024-05-13 15:17:39 +00:00
for (int i = 0; i < tilememoryLayer_Num; i++)
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, TileMemory[i]);
glBufferData(GL_SHADER_STORAGE_BUFFER, 4*TileSize*TileSize*MaxWorkTiles, nullptr, GL_DYNAMIC_DRAW);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, FinalTileMemory);
glBufferData(GL_SHADER_STORAGE_BUFFER, 4*3*2*ScreenWidth*ScreenHeight, nullptr, GL_DYNAMIC_DRAW);
int binResultSize = sizeof(BinResultHeader)
+ TilesPerLine*TileLines*CoarseBinStride*4 // BinnedMaskCoarse
+ TilesPerLine*TileLines*BinStride*4 // BinnedMask
+ TilesPerLine*TileLines*BinStride*4; // WorkOffsets
glBindBuffer(GL_SHADER_STORAGE_BUFFER, BinResultMemory);
glBufferData(GL_SHADER_STORAGE_BUFFER, binResultSize, nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, WorkDescMemory);
glBufferData(GL_SHADER_STORAGE_BUFFER, MaxWorkTiles*2*4*2, nullptr, GL_DYNAMIC_DRAW);
if (Framebuffer != 0)
glDeleteTextures(1, &Framebuffer);
glGenTextures(1, &Framebuffer);
glBindTexture(GL_TEXTURE_2D, Framebuffer);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, ScreenWidth, ScreenHeight);
// eh those are pretty bad guesses
// though real hw shouldn't be eable to render all 2048 polygons on every line either
int maxYSpanIndices = 64*2048 * ScaleFactor;
YSpanIndices.resize(maxYSpanIndices);
glBindBuffer(GL_TEXTURE_BUFFER, YSpanIndicesTextureMemory);
glBufferData(GL_TEXTURE_BUFFER, maxYSpanIndices*2*4, nullptr, GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, XSpanSetupMemory);
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(SpanSetupX)*maxYSpanIndices, nullptr, GL_DYNAMIC_DRAW);
glBindTexture(GL_TEXTURE_BUFFER, YSpanIndicesTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA16UI, YSpanIndicesTextureMemory);
}
void ComputeRenderer::VCount144(GPU& gpu)
{
}
void ComputeRenderer::SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to)
{
span->Z0 = poly->FinalZ[from];
span->W0 = poly->FinalW[from];
span->Z1 = poly->FinalZ[to];
span->W1 = poly->FinalW[to];
span->ColorR0 = poly->Vertices[from]->FinalColor[0];
span->ColorG0 = poly->Vertices[from]->FinalColor[1];
span->ColorB0 = poly->Vertices[from]->FinalColor[2];
span->ColorR1 = poly->Vertices[to]->FinalColor[0];
span->ColorG1 = poly->Vertices[to]->FinalColor[1];
span->ColorB1 = poly->Vertices[to]->FinalColor[2];
span->TexcoordU0 = poly->Vertices[from]->TexCoords[0];
span->TexcoordV0 = poly->Vertices[from]->TexCoords[1];
span->TexcoordU1 = poly->Vertices[to]->TexCoords[0];
span->TexcoordV1 = poly->Vertices[to]->TexCoords[1];
}
void ComputeRenderer::SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int vertex, int side, s32 positions[10][2])
{
s32 x0 = positions[vertex][0];
if (side)
{
span->DxInitial = -0x40000;
x0--;
}
else
{
span->DxInitial = 0;
}
span->X0 = span->X1 = x0;
span->XMin = x0;
span->XMax = x0;
span->Y0 = span->Y1 = positions[vertex][1];
if (span->XMin < rp->XMin)
{
rp->XMin = span->XMin;
rp->XMinY = span->Y0;
}
if (span->XMax > rp->XMax)
{
rp->XMax = span->XMax;
rp->XMaxY = span->Y0;
}
span->Increment = 0;
span->I0 = span->I1 = span->IRecip = 0;
span->Linear = true;
span->XCovIncr = 0;
span->IsDummy = true;
SetupAttrs(span, poly, vertex, vertex);
}
void ComputeRenderer::SetupYSpan(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int from, int to, int side, s32 positions[10][2])
{
span->X0 = positions[from][0];
span->X1 = positions[to][0];
span->Y0 = positions[from][1];
span->Y1 = positions[to][1];
SetupAttrs(span, poly, from, to);
s32 minXY, maxXY;
bool negative = false;
if (span->X1 > span->X0)
{
span->XMin = span->X0;
span->XMax = span->X1-1;
minXY = span->Y0;
maxXY = span->Y1;
}
else if (span->X1 < span->X0)
{
span->XMin = span->X1;
span->XMax = span->X0-1;
negative = true;
minXY = span->Y1;
maxXY = span->Y0;
}
else
{
span->XMin = span->X0;
if (side) span->XMin--;
span->XMax = span->XMin;
// doesn't matter for completely vertical slope
minXY = span->Y0;
maxXY = span->Y0;
}
if (span->XMin < rp->XMin)
{
rp->XMin = span->XMin;
rp->XMinY = minXY;
}
if (span->XMax > rp->XMax)
{
rp->XMax = span->XMax;
rp->XMaxY = maxXY;
}
span->IsDummy = false;
s32 xlen = span->XMax+1 - span->XMin;
s32 ylen = span->Y1 - span->Y0;
// slope increment has a 18-bit fractional part
// note: for some reason, x/y isn't calculated directly,
// instead, 1/y is calculated and then multiplied by x
// TODO: this is still not perfect (see for example x=169 y=33)
if (ylen == 0)
{
span->Increment = 0;
}
else if (ylen == xlen)
{
span->Increment = 0x40000;
}
else
{
s32 yrecip = (1<<18) / ylen;
span->Increment = (span->X1-span->X0) * yrecip;
if (span->Increment < 0) span->Increment = -span->Increment;
}
bool xMajor = (span->Increment > 0x40000);
if (side)
{
// right
if (xMajor)
span->DxInitial = negative ? (0x20000 + 0x40000) : (span->Increment - 0x20000);
else if (span->Increment != 0)
span->DxInitial = negative ? 0x40000 : 0;
else
span->DxInitial = -0x40000;
}
else
{
// left
if (xMajor)
span->DxInitial = negative ? ((span->Increment - 0x20000) + 0x40000) : 0x20000;
else if (span->Increment != 0)
span->DxInitial = negative ? 0x40000 : 0;
else
span->DxInitial = 0;
}
if (xMajor)
{
if (side)
{
span->I0 = span->X0 - 1;
span->I1 = span->X1 - 1;
}
else
{
span->I0 = span->X0;
span->I1 = span->X1;
}
// used for calculating AA coverage
span->XCovIncr = (ylen << 10) / xlen;
}
else
{
span->I0 = span->Y0;
span->I1 = span->Y1;
}
if (span->I0 != span->I1)
span->IRecip = (1<<30) / (span->I1 - span->I0);
else
span->IRecip = 0;
span->Linear = (span->W0 == span->W1) && !(span->W0 & 0x7E) && !(span->W1 & 0x7E);
if ((span->W0 & 0x1) && !(span->W1 & 0x1))
{
span->W0n = (span->W0 - 1) >> 1;
span->W0d = (span->W0 + 1) >> 1;
span->W1d = span->W1 >> 1;
}
else
{
span->W0n = span->W0 >> 1;
span->W0d = span->W0 >> 1;
span->W1d = span->W1 >> 1;
}
}
struct Variant
{
GLuint Texture, Sampler;
u16 Width, Height;
u8 BlendMode;
bool operator==(const Variant& other)
{
return Texture == other.Texture && Sampler == other.Sampler && BlendMode == other.BlendMode;
}
};
/*
Antialiasing
W-Buffer
With Texture
0
1, 3
2
without Texture
2
0, 1, 3
=> 20 Shader + 1x Shadow Mask
*/
void ComputeRenderer::RenderFrame(GPU& gpu)
{
assert(!NeedsShaderCompile());
Compute shader renderer (#2041) * nothing works yet * don't double buffer 3D framebuffers for the GL Renderer looks like leftovers from when 3D+2D composition was done in the frontend * oops * it works! * implement display capture for compute renderer it's actually just all stolen from the regular OpenGL renderer * fix bad indirect call * handle cleanup properly * add hires rendering to the compute shader renderer * fix UB also misc changes to use more unsigned multiplication also fix framebuffer resize * correct edge filling behaviour when AA is disabled * fix full color textures * fix edge marking (polygon id is 6-bit not 5) also make the code a bit nicer * take all edge cases into account for XMin/XMax calculation * use hires coordinate again * stop using fixed size buffers based on scale factor in shaders this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits * apparently & is not defined on bvec4 why does this even compile on Intel and Nvidia? * put the texture cache into it's own file * add compute shader renderer properly to the GUI also add option to toggle using high resolution vertex coordinates * unbind sampler object in compute shader renderer * fix GetRangedBitMask for 64 bit aligned 64 bits pretty embarassing * convert NonStupidBitfield.h back to LF only new lines * actually adapt to latest changes * fix stupid merge * actually make compute shader renderer work with newest changes * show progress on shader compilation * remove merge leftover
2024-05-13 15:17:39 +00:00
if (!Texcache.Update(gpu) && gpu.GPU3D.RenderFrameIdentical)
{
return;
}
int numYSpans = 0;
int numSetupIndices = 0;
/*
Some games really like to spam small textures, often
to store the data like PPU tiles. E.g. Shantae
or some Mega Man game. Fortunately they are usually kind
enough to not vary the texture size all too often (usually
they just use 8x8 or 16x for everything).
This is the reason we have this whole mess where textures of
the same size are put into array textures. This allows
to increase the batch size.
Less variance between each Variant hah!
*/
u32 numVariants = 0, prevVariant, prevTexLayer;
Variant variants[MaxVariants];
bool enableTextureMaps = gpu.GPU3D.RenderDispCnt & (1<<0);
for (int i = 0; i < gpu.GPU3D.RenderNumPolygons; i++)
{
Polygon* polygon = gpu.GPU3D.RenderPolygonRAM[i];
u32 nverts = polygon->NumVertices;
u32 vtop = polygon->VTop, vbot = polygon->VBottom;
u32 curVL = vtop, curVR = vtop;
u32 nextVL, nextVR;
RenderPolygons[i].FirstXSpan = numSetupIndices;
RenderPolygons[i].Attr = polygon->Attr;
bool foundVariant = false;
if (i > 0)
{
// if the whole texture attribute matches
// the texture layer will also match
Polygon* prevPolygon = gpu.GPU3D.RenderPolygonRAM[i - 1];
foundVariant = prevPolygon->TexParam == polygon->TexParam
&& prevPolygon->TexPalette == polygon->TexPalette
&& (prevPolygon->Attr & 0x30) == (polygon->Attr & 0x30)
&& prevPolygon->IsShadowMask == polygon->IsShadowMask;
}
if (!foundVariant)
{
Variant variant;
variant.BlendMode = polygon->IsShadowMask ? 4 : ((polygon->Attr >> 4) & 0x3);
variant.Texture = 0;
variant.Sampler = 0;
u32* textureLastVariant = nullptr;
// we always need to look up the texture to get the layer of the array texture
if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7)
{
Texcache.GetTexture(gpu, polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant);
bool wrapS = (polygon->TexParam >> 16) & 1;
bool wrapT = (polygon->TexParam >> 17) & 1;
bool mirrorS = (polygon->TexParam >> 18) & 1;
bool mirrorT = (polygon->TexParam >> 19) & 1;
variant.Sampler = Samplers[(wrapS ? (mirrorS ? 2 : 1) : 0) + (wrapT ? (mirrorT ? 2 : 1) : 0) * 3];
if (*textureLastVariant < numVariants && variants[*textureLastVariant] == variant)
{
foundVariant = true;
prevVariant = *textureLastVariant;
}
}
if (!foundVariant)
{
for (int j = numVariants - 1; j >= 0; j--)
{
if (variants[j] == variant)
{
foundVariant = true;
prevVariant = j;
goto foundVariant;
}
}
prevVariant = numVariants;
variants[numVariants] = variant;
variants[numVariants].Width = TextureWidth(polygon->TexParam);
variants[numVariants].Height = TextureHeight(polygon->TexParam);
numVariants++;
assert(numVariants <= MaxVariants);
foundVariant:;
if (textureLastVariant)
*textureLastVariant = prevVariant;
}
}
RenderPolygons[i].Variant = prevVariant;
RenderPolygons[i].TextureLayer = (float)prevTexLayer;
if (polygon->FacingView)
{
nextVL = curVL + 1;
if (nextVL >= nverts) nextVL = 0;
nextVR = curVR - 1;
if ((s32)nextVR < 0) nextVR = nverts - 1;
}
else
{
nextVL = curVL - 1;
if ((s32)nextVL < 0) nextVL = nverts - 1;
nextVR = curVR + 1;
if (nextVR >= nverts) nextVR = 0;
}
s32 scaledPositions[10][2];
s32 ytop = ScreenHeight, ybot = 0;
for (int i = 0; i < polygon->NumVertices; i++)
{
if (HiresCoordinates)
{
scaledPositions[i][0] = (polygon->Vertices[i]->HiresPosition[0] * ScaleFactor) >> 4;
scaledPositions[i][1] = (polygon->Vertices[i]->HiresPosition[1] * ScaleFactor) >> 4;
}
else
{
scaledPositions[i][0] = polygon->Vertices[i]->FinalPosition[0] * ScaleFactor;
scaledPositions[i][1] = polygon->Vertices[i]->FinalPosition[1] * ScaleFactor;
}
ytop = std::min(scaledPositions[i][1], ytop);
ybot = std::max(scaledPositions[i][1], ybot);
}
RenderPolygons[i].YTop = ytop;
RenderPolygons[i].YBot = ybot;
RenderPolygons[i].XMin = ScreenWidth;
RenderPolygons[i].XMax = 0;
if (ybot == ytop)
{
vtop = 0; vbot = 0;
RenderPolygons[i].YBot++;
int j = 1;
if (scaledPositions[j][0] < scaledPositions[vtop][0]) vtop = j;
if (scaledPositions[j][0] > scaledPositions[vbot][0]) vbot = j;
j = nverts - 1;
if (scaledPositions[j][0] < scaledPositions[vtop][0]) vtop = j;
if (scaledPositions[j][0] > scaledPositions[vbot][0]) vbot = j;
assert(numYSpans < MaxYSpanSetups);
u32 curSpanL = numYSpans;
SetupYSpanDummy(&RenderPolygons[i], &YSpanSetups[numYSpans++], polygon, vtop, 0, scaledPositions);
assert(numYSpans < MaxYSpanSetups);
u32 curSpanR = numYSpans;
SetupYSpanDummy(&RenderPolygons[i], &YSpanSetups[numYSpans++], polygon, vbot, 1, scaledPositions);
YSpanIndices[numSetupIndices].PolyIdx = i;
YSpanIndices[numSetupIndices].SpanIdxL = curSpanL;
YSpanIndices[numSetupIndices].SpanIdxR = curSpanR;
YSpanIndices[numSetupIndices].Y = ytop;
numSetupIndices++;
}
else
{
u32 curSpanL = numYSpans;
assert(numYSpans < MaxYSpanSetups);
SetupYSpan(&RenderPolygons[i], &YSpanSetups[numYSpans++], polygon, curVL, nextVL, 0, scaledPositions);
u32 curSpanR = numYSpans;
assert(numYSpans < MaxYSpanSetups);
SetupYSpan(&RenderPolygons[i], &YSpanSetups[numYSpans++], polygon, curVR, nextVR, 1, scaledPositions);
for (u32 y = ytop; y < ybot; y++)
{
if (y >= scaledPositions[nextVL][1] && curVL != polygon->VBottom)
{
while (y >= scaledPositions[nextVL][1] && curVL != polygon->VBottom)
{
curVL = nextVL;
if (polygon->FacingView)
{
nextVL = curVL + 1;
if (nextVL >= nverts)
nextVL = 0;
}
else
{
nextVL = curVL - 1;
if ((s32)nextVL < 0)
nextVL = nverts - 1;
}
}
assert(numYSpans < MaxYSpanSetups);
curSpanL = numYSpans;
SetupYSpan(&RenderPolygons[i], &YSpanSetups[numYSpans++], polygon, curVL, nextVL, 0, scaledPositions);
}
if (y >= scaledPositions[nextVR][1] && curVR != polygon->VBottom)
{
while (y >= scaledPositions[nextVR][1] && curVR != polygon->VBottom)
{
curVR = nextVR;
if (polygon->FacingView)
{
nextVR = curVR - 1;
if ((s32)nextVR < 0)
nextVR = nverts - 1;
}
else
{
nextVR = curVR + 1;
if (nextVR >= nverts)
nextVR = 0;
}
}
assert(numYSpans < MaxYSpanSetups);
curSpanR = numYSpans;
SetupYSpan(&RenderPolygons[i] ,&YSpanSetups[numYSpans++], polygon, curVR, nextVR, 1, scaledPositions);
}
YSpanIndices[numSetupIndices].PolyIdx = i;
YSpanIndices[numSetupIndices].SpanIdxL = curSpanL;
YSpanIndices[numSetupIndices].SpanIdxR = curSpanR;
YSpanIndices[numSetupIndices].Y = y;
numSetupIndices++;
}
}
//printf("polygon min max %d %d | %d %d\n", RenderPolygons[i].XMin, RenderPolygons[i].XMinY, RenderPolygons[i].XMax, RenderPolygons[i].XMaxY);
}
/*for (u32 i = 0; i < RenderNumPolygons; i++)
{
if (RenderPolygons[i].Variant >= numVariants)
{
printf("blarb2 %d %d %d\n", RenderPolygons[i].Variant, i, RenderNumPolygons);
}
//assert(RenderPolygons[i].Variant < numVariants);
}*/
if (numYSpans > 0)
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, YSpanSetupMemory);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(SpanSetupY)*numYSpans, YSpanSetups);
glBindBuffer(GL_TEXTURE_BUFFER, YSpanIndicesTextureMemory);
glBufferSubData(GL_TEXTURE_BUFFER, 0, numSetupIndices*4*2, YSpanIndices.data());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, RenderPolygonMemory);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, gpu.GPU3D.RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons);
// we haven't accessed image data yet, so we don't need to invalidate anything
}
//printf("found via %d %d %d of %d\n", foundviatexcache, foundviaprev, numslow, RenderNumPolygons);
// bind everything
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, RenderPolygonMemory);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, XSpanSetupMemory);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, YSpanSetupMemory);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, FinalTileMemory);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, BinResultMemory);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, WorkDescMemory);
MetaUniform meta;
meta.DispCnt = gpu.GPU3D.RenderDispCnt;
meta.NumPolygons = gpu.GPU3D.RenderNumPolygons;
meta.NumVariants = numVariants;
meta.AlphaRef = gpu.GPU3D.RenderAlphaRef;
{
u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++;
u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++;
u32 b = (gpu.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++;
u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F;
meta.ClearColor = r | (g << 8) | (b << 16) | (a << 24);
meta.ClearDepth = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF;
meta.ClearAttr = gpu.GPU3D.RenderClearAttr1 & 0x3F008000;
}
for (u32 i = 0; i < 32; i++)
{
u32 color = gpu.GPU3D.RenderToonTable[i];
u32 r = (color << 1) & 0x3E;
u32 g = (color >> 4) & 0x3E;
u32 b = (color >> 9) & 0x3E;
if (r) r++;
if (g) g++;
if (b) b++;
meta.ToonTable[i*4+0] = r | (g << 8) | (b << 16);
}
for (u32 i = 0; i < 34; i++)
{
meta.ToonTable[i*4+1] = gpu.GPU3D.RenderFogDensityTable[i];
}
for (u32 i = 0; i < 8; i++)
{
u32 color = gpu.GPU3D.RenderEdgeTable[i];
u32 r = (color << 1) & 0x3E;
u32 g = (color >> 4) & 0x3E;
u32 b = (color >> 9) & 0x3E;
if (r) r++;
if (g) g++;
if (b) b++;
meta.ToonTable[i*4+2] = r | (g << 8) | (b << 16);
}
meta.FogOffset = gpu.GPU3D.RenderFogOffset;
meta.FogShift = gpu.GPU3D.RenderFogShift;
{
u32 fogR = (gpu.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++;
u32 fogG = (gpu.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++;
u32 fogB = (gpu.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++;
u32 fogA = (gpu.GPU3D.RenderFogColor >> 16) & 0x1F;
meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24);
}
glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(MetaUniform), &meta);
glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory);
glUseProgram(ShaderClearCoarseBinMask);
glDispatchCompute(TilesPerLine*TileLines/32, 1, 1);
bool wbuffer = false;
if (numYSpans > 0)
{
wbuffer = gpu.GPU3D.RenderPolygonRAM[0]->WBuffer;
glUseProgram(ShaderClearIndirectWorkCount);
glDispatchCompute((numVariants+31)/32, 1, 1);
// calculate x-spans
glBindImageTexture(0, YSpanIndicesTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16UI);
glUseProgram(ShaderInterpXSpans[wbuffer]);
glDispatchCompute((numSetupIndices + 31) / 32, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
// bin polygons
glUseProgram(ShaderBinCombined);
glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
// calculate list offsets
glUseProgram(ShaderCalculateWorkListOffset);
glDispatchCompute((numVariants + 31) / 32, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
// sort shader work
glUseProgram(ShaderSortWork);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount));
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
glActiveTexture(GL_TEXTURE0);
for (int i = 0; i < tilememoryLayer_Num; i++)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2+i, TileMemory[i]);
// rasterise
{
bool highLightMode = gpu.GPU3D.RenderDispCnt & (1<<1);
GLuint shadersNoTexture[] =
{
ShaderRasteriseNoTexture[wbuffer],
ShaderRasteriseNoTexture[wbuffer],
highLightMode
? ShaderRasteriseNoTextureHighlight[wbuffer]
: ShaderRasteriseNoTextureToon[wbuffer],
ShaderRasteriseNoTexture[wbuffer],
ShaderRasteriseShadowMask[wbuffer]
};
GLuint shadersUseTexture[] =
{
ShaderRasteriseUseTextureModulate[wbuffer],
ShaderRasteriseUseTextureDecal[wbuffer],
highLightMode
? ShaderRasteriseUseTextureHighlight[wbuffer]
: ShaderRasteriseUseTextureToon[wbuffer],
ShaderRasteriseUseTextureDecal[wbuffer],
ShaderRasteriseShadowMask[wbuffer]
};
GLuint prevShader = 0;
s32 prevTexture = 0, prevSampler = 0;
for (int i = 0; i < numVariants; i++)
{
GLuint shader = 0;
if (variants[i].Texture == 0)
{
shader = shadersNoTexture[variants[i].BlendMode];
}
else
{
shader = shadersUseTexture[variants[i].BlendMode];
if (variants[i].Texture != prevTexture)
{
glBindTexture(GL_TEXTURE_2D_ARRAY, variants[i].Texture);
prevTexture = variants[i].Texture;
}
if (variants[i].Sampler != prevSampler)
{
glBindSampler(0, variants[i].Sampler);
prevSampler = variants[i].Sampler;
}
}
assert(shader != 0);
if (shader != prevShader)
{
glUseProgram(shader);
prevShader = shader;
}
glUniform1ui(UniformIdxCurVariant, i);
glUniform2f(UniformIdxTextureSize, 1.f / variants[i].Width, 1.f / variants[i].Height);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
glDispatchComputeIndirect(offsetof(BinResultHeader, VariantWorkCount) + i*4*4);
}
}
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
// compose final image
glUseProgram(ShaderDepthBlend[wbuffer]);
glDispatchCompute(ScreenWidth/TileSize, ScreenHeight/TileSize, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glBindImageTexture(0, Framebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
glBindImageTexture(1, LowResFramebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
u32 finalPassShader = 0;
if (gpu.GPU3D.RenderDispCnt & (1<<4))
finalPassShader |= 0x4;
if (gpu.GPU3D.RenderDispCnt & (1<<7))
finalPassShader |= 0x2;
if (gpu.GPU3D.RenderDispCnt & (1<<5))
finalPassShader |= 0x1;
glUseProgram(ShaderFinalPass[finalPassShader]);
glDispatchCompute(ScreenWidth/32, ScreenHeight, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glBindSampler(0, 0);
/*u64 starttime = armGetSystemTick();
EmuQueue.waitIdle();
printf("total time %f\n", armTicksToNs(armGetSystemTick()-starttime)*0.000001f);*/
/*for (u32 i = 0; i < RenderNumPolygons; i++)
{
if (RenderPolygons[i].Variant >= numVariants)
{
printf("blarb %d %d %d\n", RenderPolygons[i].Variant, i, RenderNumPolygons);
}
//assert(RenderPolygons[i].Variant < numVariants);
}*/
/*for (int i = 0; i < binresult->SortWorkWorkCount[0]*32; i++)
{
printf("sorted %x %x\n", binresult->SortedWork[i*2+0], binresult->SortedWork[i*2+1]);
}*/
/* if (polygonvisible != -1)
{
SpanSetupX* xspans = Gfx::DataHeap->CpuAddr<SpanSetupX>(XSpanSetupMemory);
printf("span result\n");
Polygon* poly = RenderPolygonRAM[polygonvisible];
u32 xspanoffset = RenderPolygons[polygonvisible].FirstXSpan;
for (u32 i = 0; i < (poly->YBottom - poly->YTop); i++)
{
printf("%d: %d - %d | %d %d | %d %d\n", i + poly->YTop, xspans[xspanoffset + i].X0, xspans[xspanoffset + i].X1, xspans[xspanoffset + i].__pad0, xspans[xspanoffset + i].__pad1, RenderPolygons[polygonvisible].YTop, RenderPolygons[polygonvisible].YBot);
}
}*/
/*
printf("xspans: %d\n", numSetupIndices);
SpanSetupX* xspans = Gfx::DataHeap->CpuAddr<SpanSetupX>(XSpanSetupMemory[curSlice]);
for (int i = 0; i < numSetupIndices; i++)
{
printf("poly %d %d %d | line %d | %d to %d\n", YSpanIndices[i].PolyIdx, YSpanIndices[i].SpanIdxL, YSpanIndices[i].SpanIdxR, YSpanIndices[i].Y, xspans[i].X0, xspans[i].X1);
}
printf("bin result\n");
BinResult* binresult = Gfx::DataHeap->CpuAddr<BinResult>(BinResultMemory);
for (u32 y = 0; y < 192/8; y++)
{
for (u32 x = 0; x < 256/8; x++)
{
printf("%08x ", binresult->BinnedMaskCoarse[(x + y * (256/8)) * 2]);
}
printf("\n");
}*/
}
void ComputeRenderer::RestartFrame(GPU& gpu)
{
}
u32* ComputeRenderer::GetLine(int line)
{
int stride = 256;
if (line == 0)
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelBuffer);
u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
if (data) memcpy(&FramebufferCPU[0], data, 4*stride*192);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
}
return &FramebufferCPU[stride * line];
}
void ComputeRenderer::SetupAccelFrame()
{
glBindTexture(GL_TEXTURE_2D, Framebuffer);
}
void ComputeRenderer::PrepareCaptureFrame()
{
glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelBuffer);
glBindTexture(GL_TEXTURE_2D, LowResFramebuffer);
glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, nullptr);
}
void ComputeRenderer::BindOutputTexture(int buffer)
{
CurGLCompositor.BindOutputTexture(buffer);
}
void ComputeRenderer::Blit(const GPU &gpu)
{
CurGLCompositor.RenderFrame(gpu, *this);
}
void ComputeRenderer::Stop(const GPU &gpu)
{
CurGLCompositor.Stop(gpu);
}
}