stop using fixed size buffers based on scale factor in shaders
this makes shader compile times tolerable on Wintel - beginning of the shader cache - increase size of tile idx in workdesc to 20 bits
This commit is contained in:
parent
2272a8a974
commit
e7168ac563
|
@ -39,7 +39,7 @@ ComputeRenderer::~ComputeRenderer()
|
|||
|
||||
|
||||
|
||||
bool ComputeRenderer::CompileShader(GLuint& shader, const char* source, const std::initializer_list<const char*>& defines)
|
||||
bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, const std::initializer_list<const char*>& defines)
|
||||
{
|
||||
std::string shaderName;
|
||||
std::string shaderSource;
|
||||
|
@ -72,8 +72,8 @@ void blah(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,con
|
|||
|
||||
bool ComputeRenderer::Init()
|
||||
{
|
||||
//glDebugMessageCallback(blah, NULL);
|
||||
//glEnable(GL_DEBUG_OUTPUT);
|
||||
glDebugMessageCallback(blah, NULL);
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glGenBuffers(1, &YSpanSetupMemory);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, YSpanSetupMemory);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(SpanSetupY)*MaxYSpanSetups, nullptr, GL_DYNAMIC_DRAW);
|
||||
|
@ -86,7 +86,8 @@ bool ComputeRenderer::Init()
|
|||
glGenBuffers(1, &BinResultMemory);
|
||||
glGenBuffers(1, &FinalTileMemory);
|
||||
glGenBuffers(1, &YSpanIndicesTextureMemory);
|
||||
glGenBuffers(1, &TileMemory);
|
||||
glGenBuffers(tilememoryLayer_Num, TileMemory);
|
||||
glGenBuffers(1, &WorkDescMemory);
|
||||
|
||||
glGenTextures(1, &YSpanIndicesTexture);
|
||||
glGenTextures(1, &LowResFramebuffer);
|
||||
|
@ -123,9 +124,10 @@ void ComputeRenderer::DeInit()
|
|||
|
||||
glDeleteBuffers(1, &YSpanSetupMemory);
|
||||
glDeleteBuffers(1, &RenderPolygonMemory);
|
||||
glDeleteBuffers(1, &TileMemory);
|
||||
glDeleteBuffers(1, &XSpanSetupMemory);
|
||||
glDeleteBuffers(1, &BinResultMemory);
|
||||
glDeleteBuffers(tilememoryLayer_Num, TileMemory);
|
||||
glDeleteBuffers(1, &WorkDescMemory);
|
||||
glDeleteBuffers(1, &FinalTileMemory);
|
||||
glDeleteBuffers(1, &YSpanIndicesTextureMemory);
|
||||
glDeleteTextures(1, &YSpanIndicesTexture);
|
||||
|
@ -214,21 +216,25 @@ void ComputeRenderer::SetRenderSettings(GPU::RenderSettings& settings)
|
|||
|
||||
MaxWorkTiles = TilesPerLine*TileLines*8;
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, TileMemory);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, 4*3*TileSize*TileSize*MaxWorkTiles, nullptr, GL_DYNAMIC_DRAW);
|
||||
for (int i = 0; i < tilememoryLayer_Num; i++)
|
||||
{
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, TileMemory[i]);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, 4*TileSize*TileSize*MaxWorkTiles, nullptr, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, FinalTileMemory);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, 4*3*2*ScreenWidth*ScreenHeight, nullptr, GL_DYNAMIC_DRAW);
|
||||
|
||||
int binResultSize = sizeof(BinResultHeader)
|
||||
+ MaxWorkTiles*2*4 // UnsortedWorkDescs
|
||||
+ MaxWorkTiles*2*4 // SortedWork
|
||||
+ TilesPerLine*TileLines*CoarseBinStride*4 // BinnedMaskCoarse
|
||||
+ TilesPerLine*TileLines*BinStride*4 // BinnedMask
|
||||
+ TilesPerLine*TileLines*BinStride*4; // WorkOffsets
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, BinResultMemory);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, binResultSize, nullptr, GL_DYNAMIC_DRAW);
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, WorkDescMemory);
|
||||
glBufferData(GL_SHADER_STORAGE_BUFFER, MaxWorkTiles*2*4*2, nullptr, GL_DYNAMIC_DRAW);
|
||||
|
||||
if (Framebuffer != 0)
|
||||
glDeleteTextures(1, &Framebuffer);
|
||||
glGenTextures(1, &Framebuffer);
|
||||
|
@ -1237,12 +1243,14 @@ void ComputeRenderer::RenderFrame()
|
|||
//printf("found via %d %d %d of %d\n", foundviatexcache, foundviaprev, numslow, RenderNumPolygons);
|
||||
|
||||
// bind everything
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, YSpanSetupMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, RenderPolygonMemory);
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, XSpanSetupMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, RenderPolygonMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, BinResultMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, TileMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, YSpanSetupMemory);
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, FinalTileMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, BinResultMemory);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, WorkDescMemory);
|
||||
|
||||
MetaUniform meta;
|
||||
meta.DispCnt = RenderDispCnt;
|
||||
|
@ -1327,7 +1335,6 @@ void ComputeRenderer::RenderFrame()
|
|||
glDispatchCompute((numVariants + 31) / 32, 1, 1);
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
|
||||
|
||||
|
||||
// sort shader work
|
||||
glUseProgram(ShaderSortWork);
|
||||
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory);
|
||||
|
@ -1336,6 +1343,9 @@ void ComputeRenderer::RenderFrame()
|
|||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
for (int i = 0; i < tilememoryLayer_Num; i++)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2+i, TileMemory[i]);
|
||||
|
||||
// rasterise
|
||||
{
|
||||
bool highLightMode = RenderDispCnt & (1<<1);
|
||||
|
|
|
@ -74,7 +74,17 @@ private:
|
|||
GLuint XSpanSetupMemory;
|
||||
GLuint BinResultMemory;
|
||||
GLuint RenderPolygonMemory;
|
||||
GLuint TileMemory;
|
||||
GLuint WorkDescMemory;
|
||||
|
||||
enum
|
||||
{
|
||||
tilememoryLayer_Color,
|
||||
tilememoryLayer_Depth,
|
||||
tilememoryLayer_Attr,
|
||||
tilememoryLayer_Num,
|
||||
};
|
||||
|
||||
GLuint TileMemory[tilememoryLayer_Num];
|
||||
GLuint FinalTileMemory;
|
||||
|
||||
u32 DummyLine[256] = {};
|
||||
|
@ -102,7 +112,7 @@ private:
|
|||
s32 DxInitial;
|
||||
|
||||
s32 XCovIncr;
|
||||
u32 IsDummy, __pad1;
|
||||
u32 IsDummy;
|
||||
};
|
||||
struct SpanSetupX
|
||||
{
|
||||
|
@ -138,7 +148,6 @@ private:
|
|||
u32 Attr;
|
||||
|
||||
float TextureLayer;
|
||||
u32 __pad0, __pad1;
|
||||
};
|
||||
|
||||
static constexpr int TileSize = 8;
|
||||
|
@ -233,7 +242,7 @@ private:
|
|||
void SetupYSpan(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int from, int to, int side, s32 positions[10][2]);
|
||||
void SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int vertex, int side, s32 positions[10][2]);
|
||||
|
||||
bool CompileShader(GLuint& shader, const char* source, const std::initializer_list<const char*>& defines);
|
||||
bool CompileShader(GLuint& shader, const std::string& source, const std::initializer_list<const char*>& defines);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#ifndef GPU3D_COMPUTE_SHADERS
|
||||
#define GPU3D_COMPUTE_SHADERS
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace GPU3D
|
||||
{
|
||||
|
||||
|
@ -69,23 +71,67 @@ namespace ComputeRendererShaders
|
|||
|
||||
*/
|
||||
|
||||
const std::string XSpanSetupBuffer{R"(
|
||||
|
||||
const char* Common = R"(
|
||||
struct Polygon
|
||||
const uint XSpanSetup_Linear = 1U << 0;
|
||||
const uint XSpanSetup_FillInside = 1U << 1;
|
||||
const uint XSpanSetup_FillLeft = 1U << 2;
|
||||
const uint XSpanSetup_FillRight = 1U << 3;
|
||||
|
||||
struct XSpanSetup
|
||||
{
|
||||
int FirstXSpan;
|
||||
int YTop, YBot;
|
||||
int X0, X1;
|
||||
|
||||
int XMin, XMax;
|
||||
int XMinY, XMaxY;
|
||||
int InsideStart, InsideEnd, EdgeCovL, EdgeCovR;
|
||||
|
||||
int Variant;
|
||||
int XRecip;
|
||||
|
||||
uint Attr;
|
||||
uint Flags;
|
||||
|
||||
float TextureLayer;
|
||||
int Z0, Z1, W0, W1;
|
||||
int ColorR0, ColorG0, ColorB0;
|
||||
int ColorR1, ColorG1, ColorB1;
|
||||
int TexcoordU0, TexcoordV0;
|
||||
int TexcoordU1, TexcoordV1;
|
||||
|
||||
int CovLInitial, CovRInitial;
|
||||
};
|
||||
|
||||
#if defined(Rasterise)
|
||||
int CalcYFactorX(XSpanSetup span, int x)
|
||||
{
|
||||
x -= span.X0;
|
||||
|
||||
if (span.X0 != span.X1)
|
||||
{
|
||||
uint numLo = uint(x) * uint(span.W0);
|
||||
uint numHi = 0U;
|
||||
numHi |= numLo >> (32U-YFactorShift);
|
||||
numLo <<= YFactorShift;
|
||||
|
||||
uint den = uint(x) * uint(span.W0) + uint(span.X1 - span.X0 - x) * uint(span.W1);
|
||||
|
||||
if (den == 0)
|
||||
return 0;
|
||||
else
|
||||
return int(Div64_32_32(numHi, numLo, den));
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
layout (std430, binding = 1) buffer XSpanSetupsBuffer
|
||||
{
|
||||
XSpanSetup XSpanSetups[];
|
||||
};
|
||||
|
||||
)"};
|
||||
|
||||
const std::string YSpanSetupBuffer{R"(
|
||||
|
||||
struct YSpanSetup
|
||||
{
|
||||
// Attributes
|
||||
|
@ -113,53 +159,185 @@ struct YSpanSetup
|
|||
bool IsDummy;
|
||||
};
|
||||
|
||||
const uint XSpanSetup_Linear = 1U << 0;
|
||||
const uint XSpanSetup_FillInside = 1U << 1;
|
||||
const uint XSpanSetup_FillLeft = 1U << 2;
|
||||
const uint XSpanSetup_FillRight = 1U << 3;
|
||||
|
||||
struct XSpanSetup
|
||||
#if defined(InterpSpans)
|
||||
int CalcYFactorY(YSpanSetup span, int i)
|
||||
{
|
||||
int X0, X1;
|
||||
/*
|
||||
maybe it would be better to do use a 32x32=64 multiplication?
|
||||
*/
|
||||
uint numLo = uint(abs(i)) * uint(span.W0n);
|
||||
uint numHi = 0U;
|
||||
numHi |= numLo >> (32U-YFactorShift);
|
||||
numLo <<= YFactorShift;
|
||||
|
||||
int InsideStart, InsideEnd, EdgeCovL, EdgeCovR;
|
||||
uint den = uint(abs(i)) * uint(span.W0d) + uint(abs(span.I1 - span.I0 - i)) * span.W1d;
|
||||
|
||||
int XRecip;
|
||||
if (den == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return int(Div64_32_32(numHi, numLo, den));
|
||||
}
|
||||
}
|
||||
|
||||
uint Flags;
|
||||
int CalculateDx(int y, YSpanSetup span)
|
||||
{
|
||||
return span.DxInitial + (y - span.Y0) * span.Increment;
|
||||
}
|
||||
|
||||
int Z0, Z1, W0, W1;
|
||||
int ColorR0, ColorG0, ColorB0;
|
||||
int ColorR1, ColorG1, ColorB1;
|
||||
int TexcoordU0, TexcoordV0;
|
||||
int TexcoordU1, TexcoordV1;
|
||||
int CalculateX(int dx, YSpanSetup span)
|
||||
{
|
||||
int x = span.X0;
|
||||
if (span.X1 < span.X0)
|
||||
x -= dx >> 18;
|
||||
else
|
||||
x += dx >> 18;
|
||||
return clamp(x, span.XMin, span.XMax);
|
||||
}
|
||||
|
||||
int CovLInitial, CovRInitial;
|
||||
};
|
||||
void EdgeParams_XMajor(bool side, int dx, YSpanSetup span, out int edgelen, out int edgecov)
|
||||
{
|
||||
bool negative = span.X1 < span.X0;
|
||||
int len;
|
||||
if (side != negative)
|
||||
len = (dx >> 18) - ((dx-span.Increment) >> 18);
|
||||
else
|
||||
len = ((dx+span.Increment) >> 18) - (dx >> 18);
|
||||
edgelen = len;
|
||||
|
||||
layout (std140, binding = 0) readonly buffer YSpanSetupsBuffer
|
||||
int xlen = span.XMax + 1 - span.XMin;
|
||||
int startx = dx >> 18;
|
||||
if (negative) startx = xlen - startx;
|
||||
if (side) startx = startx - len + 1;
|
||||
|
||||
uint r;
|
||||
int startcov = int(Div(uint(((startx << 10) + 0x1FF) * (span.Y1 - span.Y0)), uint(xlen), r));
|
||||
edgecov = (1<<31) | ((startcov & 0x3FF) << 12) | (span.XCovIncr & 0x3FF);
|
||||
}
|
||||
|
||||
void EdgeParams_YMajor(bool side, int dx, YSpanSetup span, out int edgelen, out int edgecov)
|
||||
{
|
||||
bool negative = span.X1 < span.X0;
|
||||
edgelen = 1;
|
||||
|
||||
if (span.Increment == 0)
|
||||
{
|
||||
edgecov = 31;
|
||||
}
|
||||
else
|
||||
{
|
||||
int cov = ((dx >> 9) + (span.Increment >> 10)) >> 4;
|
||||
if ((cov >> 5) != (dx >> 18)) cov = 31;
|
||||
cov &= 0x1F;
|
||||
if (side == negative) cov = 0x1F - cov;
|
||||
|
||||
edgecov = cov;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
layout (std430, binding = 2) buffer YSpanSetupsBuffer
|
||||
{
|
||||
YSpanSetup YSpanSetups[];
|
||||
};
|
||||
|
||||
#if defined(InterpSpans) || defined(BinCombined) || defined(Rasterise)
|
||||
layout (std140, binding = 1)
|
||||
#ifdef InterpSpans
|
||||
writeonly
|
||||
#endif
|
||||
#if defined(BinCombined) || defined(Rasterise)
|
||||
readonly
|
||||
#endif
|
||||
buffer XSpanSetupsBuffer
|
||||
{
|
||||
XSpanSetup XSpanSetups[];
|
||||
};
|
||||
#endif
|
||||
)"};
|
||||
|
||||
layout (std140, binding = 2) readonly buffer PolygonBuffer
|
||||
const std::string PolygonBuffer{R"(
|
||||
struct Polygon
|
||||
{
|
||||
int FirstXSpan;
|
||||
int YTop, YBot;
|
||||
|
||||
int XMin, XMax;
|
||||
int XMinY, XMaxY;
|
||||
|
||||
int Variant;
|
||||
|
||||
uint Attr;
|
||||
|
||||
float TextureLayer;
|
||||
};
|
||||
|
||||
layout (std430, binding = 0) readonly buffer PolygonBuffer
|
||||
{
|
||||
Polygon Polygons[];
|
||||
};
|
||||
)"};
|
||||
|
||||
const std::string BinningBuffer{R"(
|
||||
|
||||
layout (std430, binding = 6) buffer BinResultBuffer
|
||||
{
|
||||
uvec4 VariantWorkCount[MaxVariants];
|
||||
uint SortedWorkOffset[MaxVariants];
|
||||
|
||||
uvec4 SortWorkWorkCount;
|
||||
|
||||
uint BinningMaskAndOffset[];
|
||||
//uint BinnedMaskCoarse[TilesPerLine*TileLines*CoarseBinStride];
|
||||
//uint BinnedMask[TilesPerLine*TileLines*BinStride];
|
||||
//uint WorkOffsets[TilesPerLine*TileLines*BinStride];
|
||||
};
|
||||
|
||||
const int BinningCoarseMaskStart = 0;
|
||||
const int BinningMaskStart = BinningCoarseMaskStart+TilesPerLine*TileLines*CoarseBinStride;
|
||||
const int BinningWorkOffsetsStart = BinningMaskStart+TilesPerLine*TileLines*BinStride;
|
||||
|
||||
)"};
|
||||
|
||||
/*
|
||||
structure of each WorkDesc item:
|
||||
x:
|
||||
bits 0-10: polygon idx
|
||||
bits 11-31: tile idx (before sorting within variant after sorting within all tiles)
|
||||
y:
|
||||
bits 0-15: X position on screen
|
||||
bits 15-31: Y position on screen
|
||||
*/
|
||||
const std::string WorkDescBuffer{R"(
|
||||
layout (std430, binding = 7) buffer WorkDescBuffer
|
||||
{
|
||||
//uvec2 UnsortedWorkDescs[MaxWorkTiles];
|
||||
//uvec2 SortedWorkDescs[MaxWorkTiles];
|
||||
uvec2 WorkDescs[];
|
||||
};
|
||||
|
||||
const uint WorkDescsUnsortedStart = 0;
|
||||
const uint WorkDescsSortedStart = WorkDescsUnsortedStart+MaxWorkTiles;
|
||||
|
||||
)"};
|
||||
|
||||
const std::string Tilebuffers{R"(
|
||||
layout (std430, binding = 2) buffer ColorTileBuffer
|
||||
{
|
||||
uint ColorTiles[];
|
||||
};
|
||||
layout (std430, binding = 3) buffer DepthTileBuffer
|
||||
{
|
||||
uint DepthTiles[];
|
||||
};
|
||||
layout (std430, binding = 4) buffer AttrTileBuffer
|
||||
{
|
||||
uint AttrTiles[];
|
||||
};
|
||||
|
||||
)"};
|
||||
|
||||
const std::string ResultBuffer{R"(
|
||||
layout (std430, binding = 5) buffer ResultBuffer
|
||||
{
|
||||
uint ResultValue[];
|
||||
};
|
||||
|
||||
const uint ResultColorStart = 0;
|
||||
const uint ResultDepthStart = ResultColorStart+ScreenWidth*ScreenHeight*2;
|
||||
const uint ResultAttrStart = ResultDepthStart+ScreenWidth*ScreenHeight*2;
|
||||
)"};
|
||||
|
||||
const char* Common = R"(
|
||||
|
||||
#define TileSize 8
|
||||
const int CoarseTileCountX = 8;
|
||||
|
@ -174,56 +352,8 @@ const int TileLines = ScreenHeight/TileSize;
|
|||
const int BinStride = 2048/32;
|
||||
const int CoarseBinStride = BinStride/32;
|
||||
|
||||
|
||||
const int MaxVariants = 256;
|
||||
|
||||
layout (std430, binding = 3)
|
||||
buffer BinResultBuffer
|
||||
{
|
||||
uvec4 VariantWorkCount[MaxVariants];
|
||||
uint SortedWorkOffset[MaxVariants];
|
||||
|
||||
uvec4 SortWorkWorkCount;
|
||||
uvec2 UnsortedWorkDescs[MaxWorkTiles];
|
||||
uvec2 SortedWork[MaxWorkTiles];
|
||||
|
||||
uint BinnedMaskCoarse[TilesPerLine*TileLines*CoarseBinStride];
|
||||
uint BinnedMask[TilesPerLine*TileLines*BinStride];
|
||||
uint WorkOffsets[TilesPerLine*TileLines*BinStride];
|
||||
};
|
||||
|
||||
#if defined(Rasterise) || defined(DepthBlend)
|
||||
layout (std430, binding = 4)
|
||||
#ifdef Rasterise
|
||||
writeonly
|
||||
#endif
|
||||
#ifdef DepthBlend
|
||||
readonly
|
||||
#endif
|
||||
buffer TilesBuffer
|
||||
{
|
||||
uint ColorTiles[MaxWorkTiles*TileSize*TileSize];
|
||||
uint DepthTiles[MaxWorkTiles*TileSize*TileSize];
|
||||
uint AttrTiles[MaxWorkTiles*TileSize*TileSize];
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(DepthBlend) || defined(FinalPass)
|
||||
layout (std430, binding = 5)
|
||||
#ifdef DepthBlend
|
||||
writeonly
|
||||
#endif
|
||||
#ifdef FinalPass
|
||||
readonly
|
||||
#endif
|
||||
buffer RasterResult
|
||||
{
|
||||
uint ColorResult[ScreenWidth*ScreenHeight*2];
|
||||
uint DepthResult[ScreenWidth*ScreenHeight*2];
|
||||
uint AttrResult[ScreenWidth*ScreenHeight*2];
|
||||
};
|
||||
#endif
|
||||
|
||||
layout (std140, binding = 0) uniform MetaUniform
|
||||
{
|
||||
uint NumPolygons;
|
||||
|
@ -243,6 +373,12 @@ layout (std140, binding = 0) uniform MetaUniform
|
|||
uint FogOffset, FogShift, FogColor;
|
||||
};
|
||||
|
||||
#ifdef InterpSpans
|
||||
const int YFactorShift = 9;
|
||||
#else
|
||||
const int YFactorShift = 8;
|
||||
#endif
|
||||
|
||||
#if defined(InterpSpans) || defined(Rasterise)
|
||||
uint Umulh(uint a, uint b)
|
||||
{
|
||||
|
@ -338,58 +474,6 @@ uint Div64_32_32(uint numHi, uint numLo, uint den)
|
|||
return bitfieldInsert(qhat, q1, 16, 16);
|
||||
}
|
||||
|
||||
#ifdef InterpSpans
|
||||
const int YFactorShift = 9;
|
||||
#else
|
||||
const int YFactorShift = 8;
|
||||
#endif
|
||||
|
||||
int CalcYFactorY(YSpanSetup span, int i)
|
||||
{
|
||||
/*
|
||||
maybe it would be better to do use a 32x32=64 multiplication?
|
||||
*/
|
||||
uint numLo = uint(abs(i)) * uint(span.W0n);
|
||||
uint numHi = 0U;
|
||||
numHi |= numLo >> (32U-YFactorShift);
|
||||
numLo <<= YFactorShift;
|
||||
|
||||
uint den = uint(abs(i)) * uint(span.W0d) + uint(abs(span.I1 - span.I0 - i)) * span.W1d;
|
||||
|
||||
if (den == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return int(Div64_32_32(numHi, numLo, den));
|
||||
}
|
||||
}
|
||||
|
||||
int CalcYFactorX(XSpanSetup span, int x)
|
||||
{
|
||||
x -= span.X0;
|
||||
|
||||
if (span.X0 != span.X1)
|
||||
{
|
||||
uint numLo = uint(x) * uint(span.W0);
|
||||
uint numHi = 0U;
|
||||
numHi |= numLo >> (32U-YFactorShift);
|
||||
numLo <<= YFactorShift;
|
||||
|
||||
uint den = uint(x) * uint(span.W0) + uint(span.X1 - span.X0 - x) * uint(span.W1);
|
||||
|
||||
if (den == 0)
|
||||
return 0;
|
||||
else
|
||||
return int(Div64_32_32(numHi, numLo, den));
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int InterpolateAttrPersp(int y0, int y1, int ifactor)
|
||||
{
|
||||
if (y0 == y1)
|
||||
|
@ -548,67 +632,14 @@ uint InterpolateZWBuffer(int z0, int z1, int ifactor)
|
|||
return uint(z1) + uint((int64_t(z0-z1) * int64_t((1<<YFactorShift)-ifactor)) >> YFactorShift);
|
||||
}*/
|
||||
}
|
||||
|
||||
int CalculateDx(int y, YSpanSetup span)
|
||||
{
|
||||
return span.DxInitial + (y - span.Y0) * span.Increment;
|
||||
}
|
||||
|
||||
int CalculateX(int dx, YSpanSetup span)
|
||||
{
|
||||
int x = span.X0;
|
||||
if (span.X1 < span.X0)
|
||||
x -= dx >> 18;
|
||||
else
|
||||
x += dx >> 18;
|
||||
return clamp(x, span.XMin, span.XMax);
|
||||
}
|
||||
|
||||
void EdgeParams_XMajor(bool side, int dx, YSpanSetup span, out int edgelen, out int edgecov)
|
||||
{
|
||||
bool negative = span.X1 < span.X0;
|
||||
int len;
|
||||
if (side != negative)
|
||||
len = (dx >> 18) - ((dx-span.Increment) >> 18);
|
||||
else
|
||||
len = ((dx+span.Increment) >> 18) - (dx >> 18);
|
||||
edgelen = len;
|
||||
|
||||
int xlen = span.XMax + 1 - span.XMin;
|
||||
int startx = dx >> 18;
|
||||
if (negative) startx = xlen - startx;
|
||||
if (side) startx = startx - len + 1;
|
||||
|
||||
uint r;
|
||||
int startcov = int(Div(uint(((startx << 10) + 0x1FF) * (span.Y1 - span.Y0)), uint(xlen), r));
|
||||
edgecov = (1<<31) | ((startcov & 0x3FF) << 12) | (span.XCovIncr & 0x3FF);
|
||||
}
|
||||
|
||||
void EdgeParams_YMajor(bool side, int dx, YSpanSetup span, out int edgelen, out int edgecov)
|
||||
{
|
||||
bool negative = span.X1 < span.X0;
|
||||
edgelen = 1;
|
||||
|
||||
if (span.Increment == 0)
|
||||
{
|
||||
edgecov = 31;
|
||||
}
|
||||
else
|
||||
{
|
||||
int cov = ((dx >> 9) + (span.Increment >> 10)) >> 4;
|
||||
if ((cov >> 5) != (dx >> 18)) cov = 31;
|
||||
cov &= 0x1F;
|
||||
if (side == negative) cov = 0x1F - cov;
|
||||
|
||||
edgecov = cov;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
)";
|
||||
|
||||
const char* InterpSpans = R"(
|
||||
|
||||
const std::string InterpSpans =
|
||||
PolygonBuffer +
|
||||
XSpanSetupBuffer +
|
||||
YSpanSetupBuffer + R"(
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
layout (binding = 0, rgba16ui) uniform readonly uimageBuffer SetupIndices;
|
||||
|
@ -803,7 +834,8 @@ void main()
|
|||
|
||||
)";
|
||||
|
||||
const char* ClearIndirectWorkCount = R"(
|
||||
const std::string ClearIndirectWorkCount =
|
||||
BinningBuffer + R"(
|
||||
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
|
@ -814,19 +846,23 @@ void main()
|
|||
|
||||
)";
|
||||
|
||||
const char* ClearCoarseBinMask = R"(
|
||||
|
||||
const std::string ClearCoarseBinMask =
|
||||
BinningBuffer + R"(
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
BinnedMaskCoarse[gl_GlobalInvocationID.x*CoarseBinStride+0] = 0;
|
||||
BinnedMaskCoarse[gl_GlobalInvocationID.x*CoarseBinStride+1] = 0;
|
||||
BinningMaskAndOffset[BinningCoarseMaskStart + gl_GlobalInvocationID.x*CoarseBinStride+0] = 0;
|
||||
BinningMaskAndOffset[BinningCoarseMaskStart + gl_GlobalInvocationID.x*CoarseBinStride+1] = 0;
|
||||
}
|
||||
|
||||
)";
|
||||
|
||||
const char* BinCombined = R"(
|
||||
const std::string BinCombined =
|
||||
PolygonBuffer +
|
||||
BinningBuffer +
|
||||
XSpanSetupBuffer +
|
||||
WorkDescBuffer + R"(
|
||||
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
|
@ -942,15 +978,15 @@ void main()
|
|||
|
||||
int linearTile = fineTile.x + fineTile.y * TilesPerLine + coarseTile.x * CoarseTileCountX + coarseTile.y * TilesPerLine * CoarseTileCountY;
|
||||
|
||||
BinnedMask[linearTile * BinStride + groupIdx] = binnedMask;
|
||||
BinningMaskAndOffset[BinningMaskStart + linearTile * BinStride + groupIdx] = binnedMask;
|
||||
int coarseMaskIdx = linearTile * CoarseBinStride + (groupIdx >> 5);
|
||||
if (binnedMask != 0U)
|
||||
atomicOr(BinnedMaskCoarse[coarseMaskIdx], 1U << (groupIdx & 0x1F));
|
||||
atomicOr(BinningMaskAndOffset[BinningCoarseMaskStart + coarseMaskIdx], 1U << (groupIdx & 0x1F));
|
||||
|
||||
if (binnedMask != 0U)
|
||||
{
|
||||
uint workOffset = atomicAdd(VariantWorkCount[0].w, uint(bitCount(binnedMask)));
|
||||
WorkOffsets[linearTile * BinStride + groupIdx] = workOffset;
|
||||
BinningMaskAndOffset[BinningWorkOffsetsStart + linearTile * BinStride + groupIdx] = workOffset;
|
||||
|
||||
uint tilePositionCombined = bitfieldInsert(fineTileTopLeft.x, fineTileTopLeft.y, 16, 16);
|
||||
|
||||
|
@ -964,7 +1000,7 @@ void main()
|
|||
int variantIdx = Polygons[polygonIdx].Variant;
|
||||
|
||||
int inVariantOffset = int(atomicAdd(VariantWorkCount[variantIdx].z, 1));
|
||||
UnsortedWorkDescs[workOffset + idx] = uvec2(tilePositionCombined, bitfieldInsert(inVariantOffset, polygonIdx, 16, 16));
|
||||
WorkDescs[WorkDescsUnsortedStart + workOffset + idx] = uvec2(tilePositionCombined, bitfieldInsert(polygonIdx, inVariantOffset, 12, 20));
|
||||
|
||||
idx++;
|
||||
}
|
||||
|
@ -973,7 +1009,8 @@ void main()
|
|||
|
||||
)";
|
||||
|
||||
const char* CalcOffsets = R"(
|
||||
const std::string CalcOffsets =
|
||||
BinningBuffer + R"(
|
||||
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
|
@ -993,7 +1030,10 @@ void main()
|
|||
|
||||
)";
|
||||
|
||||
const char* SortWork = R"(
|
||||
const std::string SortWork =
|
||||
PolygonBuffer +
|
||||
BinningBuffer +
|
||||
WorkDescBuffer + R"(
|
||||
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
|
@ -1001,19 +1041,24 @@ void main()
|
|||
{
|
||||
if (gl_GlobalInvocationID.x < VariantWorkCount[0].w)
|
||||
{
|
||||
uvec2 workDesc = UnsortedWorkDescs[gl_GlobalInvocationID.x];
|
||||
int inVariantOffset = int(bitfieldExtract(workDesc.y, 0, 16));
|
||||
int polygonIdx = int(bitfieldExtract(workDesc.y, 16, 16));
|
||||
uvec2 workDesc = WorkDescs[WorkDescsUnsortedStart + gl_GlobalInvocationID.x];
|
||||
int inVariantOffset = int(bitfieldExtract(workDesc.y, 12, 20));
|
||||
int polygonIdx = int(bitfieldExtract(workDesc.y, 0, 12));
|
||||
int variantIdx = Polygons[polygonIdx].Variant;
|
||||
|
||||
int sortedIndex = int(SortedWorkOffset[variantIdx]) + inVariantOffset;
|
||||
SortedWork[sortedIndex] = uvec2(workDesc.x, bitfieldInsert(workDesc.y, gl_GlobalInvocationID.x, 0, 16));
|
||||
WorkDescs[WorkDescsSortedStart + sortedIndex] = uvec2(workDesc.x, bitfieldInsert(workDesc.y, gl_GlobalInvocationID.x, 12, 20));
|
||||
}
|
||||
}
|
||||
|
||||
)";
|
||||
|
||||
const char* Rasterise = R"(
|
||||
const std::string Rasterise =
|
||||
PolygonBuffer +
|
||||
WorkDescBuffer +
|
||||
XSpanSetupBuffer +
|
||||
BinningBuffer +
|
||||
Tilebuffers + R"(
|
||||
|
||||
layout (local_size_x = TileSize, local_size_y = TileSize) in;
|
||||
|
||||
|
@ -1024,10 +1069,10 @@ layout (location = 1) uniform vec2 InvTextureSize;
|
|||
|
||||
void main()
|
||||
{
|
||||
uvec2 workDesc = SortedWork[SortedWorkOffset[CurVariant] + gl_WorkGroupID.z];
|
||||
Polygon polygon = Polygons[bitfieldExtract(workDesc.y, 16, 16)];
|
||||
uvec2 workDesc = WorkDescs[WorkDescsSortedStart + SortedWorkOffset[CurVariant] + gl_WorkGroupID.z];
|
||||
Polygon polygon = Polygons[bitfieldExtract(workDesc.y, 0, 12)];
|
||||
ivec2 position = ivec2(bitfieldExtract(workDesc.x, 0, 16), bitfieldExtract(workDesc.x, 16, 16)) + ivec2(gl_LocalInvocationID.xy);
|
||||
int tileOffset = int(bitfieldExtract(workDesc.y, 0, 16)) * TileSize * TileSize + TileSize * int(gl_LocalInvocationID.y) + int(gl_LocalInvocationID.x);
|
||||
int tileOffset = int(bitfieldExtract(workDesc.y, 12, 20)) * TileSize * TileSize + TileSize * int(gl_LocalInvocationID.y) + int(gl_LocalInvocationID.x);
|
||||
|
||||
uint color = 0U;
|
||||
if (position.y >= polygon.YTop && position.y < polygon.YBot)
|
||||
|
@ -1203,7 +1248,11 @@ void main()
|
|||
|
||||
)";
|
||||
|
||||
const char* DepthBlend = R"(
|
||||
const std::string DepthBlend =
|
||||
PolygonBuffer +
|
||||
Tilebuffers +
|
||||
ResultBuffer +
|
||||
BinningBuffer + R"(
|
||||
|
||||
layout (local_size_x = TileSize, local_size_y = TileSize) in;
|
||||
|
||||
|
@ -1253,8 +1302,8 @@ void ProcessCoarseMask(int linearTile, uint coarseMask, uint coarseOffset,
|
|||
|
||||
uint tileOffset = linearTile * BinStride + coarseBit + coarseOffset;
|
||||
|
||||
uint fineMask = BinnedMask[tileOffset];
|
||||
uint workIdx = WorkOffsets[tileOffset];
|
||||
uint fineMask = BinningMaskAndOffset[BinningMaskStart + tileOffset];
|
||||
uint workIdx = BinningMaskAndOffset[BinningWorkOffsetsStart + tileOffset];
|
||||
|
||||
while (fineMask != 0U)
|
||||
{
|
||||
|
@ -1403,8 +1452,8 @@ void main()
|
|||
{
|
||||
int linearTile = int(gl_WorkGroupID.x + (gl_WorkGroupID.y * TilesPerLine));
|
||||
|
||||
uint coarseMaskLo = BinnedMaskCoarse[linearTile*CoarseBinStride + 0];
|
||||
uint coarseMaskHi = BinnedMaskCoarse[linearTile*CoarseBinStride + 1];
|
||||
uint coarseMaskLo = BinningMaskAndOffset[BinningCoarseMaskStart + linearTile*CoarseBinStride + 0];
|
||||
uint coarseMaskHi = BinningMaskAndOffset[BinningCoarseMaskStart + linearTile*CoarseBinStride + 1];
|
||||
|
||||
uvec2 color = uvec2(ClearColor, 0U);
|
||||
uvec2 depth = uvec2(ClearDepth, 0U);
|
||||
|
@ -1416,17 +1465,18 @@ void main()
|
|||
ProcessCoarseMask(linearTile, coarseMaskHi, BinStride/2, color, depth, attr, stencil, prevIsShadowMask);
|
||||
|
||||
int resultOffset = int(gl_GlobalInvocationID.x) + int(gl_GlobalInvocationID.y) * ScreenWidth;
|
||||
ColorResult[resultOffset] = color.x;
|
||||
ColorResult[resultOffset+FramebufferStride] = color.y;
|
||||
DepthResult[resultOffset] = depth.x;
|
||||
DepthResult[resultOffset+FramebufferStride] = depth.y;
|
||||
AttrResult[resultOffset] = attr.x;
|
||||
AttrResult[resultOffset+FramebufferStride] = attr.y;
|
||||
ResultValue[ResultColorStart+resultOffset] = color.x;
|
||||
ResultValue[ResultColorStart+resultOffset+FramebufferStride] = color.y;
|
||||
ResultValue[ResultDepthStart+resultOffset] = depth.x;
|
||||
ResultValue[ResultDepthStart+resultOffset+FramebufferStride] = depth.y;
|
||||
ResultValue[ResultAttrStart+resultOffset] = attr.x;
|
||||
ResultValue[ResultAttrStart+resultOffset+FramebufferStride] = attr.y;
|
||||
}
|
||||
|
||||
)";
|
||||
|
||||
const char* FinalPass = R"(
|
||||
const std::string FinalPass =
|
||||
ResultBuffer + R"(
|
||||
|
||||
layout (local_size_x = 32) in;
|
||||
|
||||
|
@ -1481,9 +1531,9 @@ void main()
|
|||
int srcX = int(gl_GlobalInvocationID.x);
|
||||
int resultOffset = int(srcX) + int(gl_GlobalInvocationID.y) * ScreenWidth;
|
||||
|
||||
uvec2 color = uvec2(ColorResult[resultOffset], ColorResult[resultOffset+FramebufferStride]);
|
||||
uvec2 depth = uvec2(DepthResult[resultOffset], DepthResult[resultOffset+FramebufferStride]);
|
||||
uvec2 attr = uvec2(AttrResult[resultOffset], AttrResult[resultOffset+FramebufferStride]);
|
||||
uvec2 color = uvec2(ResultValue[resultOffset+ResultColorStart], ResultValue[resultOffset+FramebufferStride+ResultColorStart]);
|
||||
uvec2 depth = uvec2(ResultValue[resultOffset+ResultDepthStart], ResultValue[resultOffset+FramebufferStride+ResultDepthStart]);
|
||||
uvec2 attr = uvec2(ResultValue[resultOffset+ResultAttrStart], ResultValue[resultOffset+FramebufferStride+ResultAttrStart]);
|
||||
|
||||
#ifdef EdgeMarking
|
||||
if ((attr.x & 0xFU) != 0U)
|
||||
|
@ -1493,23 +1543,23 @@ void main()
|
|||
|
||||
if (srcX > 0U)
|
||||
{
|
||||
otherAttr.x = AttrResult[resultOffset-1];
|
||||
otherDepth.x = DepthResult[resultOffset-1];
|
||||
otherAttr.x = ResultValue[resultOffset-1+ResultAttrStart];
|
||||
otherDepth.x = ResultValue[resultOffset-1+ResultDepthStart];
|
||||
}
|
||||
if (srcX < ScreenWidth-1)
|
||||
{
|
||||
otherAttr.y = AttrResult[resultOffset+1];
|
||||
otherDepth.y = DepthResult[resultOffset+1];
|
||||
otherAttr.y = ResultValue[resultOffset+1+ResultAttrStart];
|
||||
otherDepth.y = ResultValue[resultOffset+1+ResultDepthStart];
|
||||
}
|
||||
if (gl_GlobalInvocationID.y > 0U)
|
||||
{
|
||||
otherAttr.z = AttrResult[resultOffset-ScreenWidth];
|
||||
otherDepth.z = DepthResult[resultOffset-ScreenWidth];
|
||||
otherAttr.z = ResultValue[resultOffset-ScreenWidth+ResultAttrStart];
|
||||
otherDepth.z = ResultValue[resultOffset-ScreenWidth+ResultDepthStart];
|
||||
}
|
||||
if (gl_GlobalInvocationID.y < ScreenHeight-1)
|
||||
{
|
||||
otherAttr.w = AttrResult[resultOffset+ScreenWidth];
|
||||
otherDepth.w = DepthResult[resultOffset+ScreenWidth];
|
||||
otherAttr.w = ResultValue[resultOffset+ScreenWidth+ResultAttrStart];
|
||||
otherDepth.w = ResultValue[resultOffset+ScreenWidth+ResultDepthStart];
|
||||
}
|
||||
|
||||
uint polyId = bitfieldExtract(attr.x, 24, 6);
|
||||
|
|
|
@ -28,26 +28,22 @@
|
|||
namespace GPU3D
|
||||
{
|
||||
|
||||
bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
||||
bool GLRenderer::BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs)
|
||||
{
|
||||
char shadername[32];
|
||||
sprintf(shadername, "RenderShader%02X", flags);
|
||||
|
||||
int headerlen = strlen(kShaderHeader);
|
||||
|
||||
int vslen = strlen(vs);
|
||||
int vsclen = strlen(kRenderVSCommon);
|
||||
char* vsbuf = new char[headerlen + vsclen + vslen + 1];
|
||||
strcpy(&vsbuf[0], kShaderHeader);
|
||||
strcpy(&vsbuf[headerlen], kRenderVSCommon);
|
||||
strcpy(&vsbuf[headerlen + vsclen], vs);
|
||||
std::string vsbuf;
|
||||
vsbuf += kShaderHeader;
|
||||
vsbuf += kRenderVSCommon;
|
||||
vsbuf += vs;
|
||||
|
||||
int fslen = strlen(fs);
|
||||
int fsclen = strlen(kRenderFSCommon);
|
||||
char* fsbuf = new char[headerlen + fsclen + fslen + 1];
|
||||
strcpy(&fsbuf[0], kShaderHeader);
|
||||
strcpy(&fsbuf[headerlen], kRenderFSCommon);
|
||||
strcpy(&fsbuf[headerlen + fsclen], fs);
|
||||
std::string fsbuf;
|
||||
fsbuf += kShaderHeader;
|
||||
fsbuf += kRenderFSCommon;
|
||||
fsbuf += fs;
|
||||
|
||||
GLuint prog;
|
||||
bool ret = OpenGL::CompileVertexFragmentProgram(prog,
|
||||
|
@ -56,9 +52,6 @@ bool GLRenderer::BuildRenderShader(u32 flags, const char* vs, const char* fs)
|
|||
{{"vPosition", 0}, {"vColor", 1}, {"vTexcoord", 2}, {"vPolygonAttr", 3}},
|
||||
{{"oColor", 0}, {"oAttr", 1}});
|
||||
|
||||
delete[] vsbuf;
|
||||
delete[] fsbuf;
|
||||
|
||||
if (!ret) return false;
|
||||
|
||||
GLint uni_id = glGetUniformBlockIndex(prog, "uConfig");
|
||||
|
|
|
@ -67,7 +67,7 @@ private:
|
|||
|
||||
RendererPolygon PolygonList[2048] {};
|
||||
|
||||
bool BuildRenderShader(u32 flags, const char* vs, const char* fs);
|
||||
bool BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs);
|
||||
void UseRenderShader(u32 flags);
|
||||
void SetupPolygon(RendererPolygon* rp, Polygon* polygon);
|
||||
u32* SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u32* vptr);
|
||||
|
|
|
@ -18,15 +18,174 @@
|
|||
|
||||
#include "OpenGLSupport.h"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY
|
||||
#include "xxhash/xxhash.h"
|
||||
|
||||
using Platform::Log;
|
||||
using Platform::LogLevel;
|
||||
|
||||
namespace OpenGL
|
||||
{
|
||||
|
||||
bool CompilerShader(GLuint& id, const char* source, const char* name, const char* type)
|
||||
struct ShaderCacheEntry
|
||||
{
|
||||
u32 Length;
|
||||
u8* Data;
|
||||
u32 BinaryFormat;
|
||||
|
||||
ShaderCacheEntry(u8* data, u32 length, u32 binaryFmt)
|
||||
: Length(length), Data(data), BinaryFormat(binaryFmt)
|
||||
{
|
||||
assert(data != nullptr);
|
||||
}
|
||||
|
||||
ShaderCacheEntry(const ShaderCacheEntry&) = delete;
|
||||
ShaderCacheEntry(ShaderCacheEntry&& other)
|
||||
{
|
||||
Data = other.Data;
|
||||
Length = other.Length;
|
||||
BinaryFormat = other.BinaryFormat;
|
||||
|
||||
other.Data = nullptr;
|
||||
other.Length = 0;
|
||||
other.BinaryFormat = 0;
|
||||
}
|
||||
|
||||
~ShaderCacheEntry()
|
||||
{
|
||||
if (Data) // check whether it was moved
|
||||
delete[] Data;
|
||||
}
|
||||
};
|
||||
|
||||
std::unordered_map<u64, ShaderCacheEntry> ShaderCache;
|
||||
std::vector<u64> NewShaders;
|
||||
|
||||
constexpr u32 ShaderCacheMagic = 0x11CAC4E1;
|
||||
constexpr u32 ShaderCacheVersion = 1;
|
||||
|
||||
void LoadShaderCache()
|
||||
{
|
||||
// for now the shader cache only contains only compute shaders
|
||||
// because they take the longest to compile
|
||||
FILE* file = Platform::OpenLocalFile("shadercache", "rb");
|
||||
if (file == nullptr)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not find shader cache\n");
|
||||
return;
|
||||
}
|
||||
|
||||
u32 magic, version, numPrograms;
|
||||
if (fread(&magic, 4, 1, file) != 1 || magic != ShaderCacheMagic)
|
||||
{
|
||||
Log(LogLevel::Error, "Shader cache file has invalid magic\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
if (fread(&version, 4, 1, file) != 1 || version != ShaderCacheVersion)
|
||||
{
|
||||
Log(LogLevel::Error, "Shader cache file has bad version\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
if (fread(&numPrograms, 4, 1, file) != 1)
|
||||
{
|
||||
Log(LogLevel::Error, "Shader cache file invalid program count\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
// not the best approach, because once changes pile up
|
||||
// we read and overwrite the old files
|
||||
for (u32 i = 0; i < numPrograms; i++)
|
||||
{
|
||||
int error = 3;
|
||||
|
||||
u32 length, binaryFormat;
|
||||
u64 sourceHash;
|
||||
error -= fread(&sourceHash, 8, 1, file);
|
||||
error -= fread(&length, 4, 1, file);
|
||||
error -= fread(&binaryFormat, 4, 1, file);
|
||||
|
||||
if (error != 0)
|
||||
{
|
||||
Log(LogLevel::Error, "Invalid shader cache entry\n");
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
u8* data = new u8[length];
|
||||
if (fread(data, length, 1, file) != 1)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not read shader cache entry data\n");
|
||||
delete[] data;
|
||||
goto fileInvalid;
|
||||
}
|
||||
|
||||
ShaderCache.erase(sourceHash);
|
||||
ShaderCache.emplace(sourceHash, ShaderCacheEntry(data, length, binaryFormat));
|
||||
}
|
||||
|
||||
fileInvalid:
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void SaveShaderCache()
|
||||
{
|
||||
FILE* file = Platform::OpenLocalFile("shadercache", "rb+");
|
||||
if (file == nullptr)
|
||||
file = Platform::OpenLocalFile("shadercache", "wb");
|
||||
|
||||
if (file == nullptr)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not open or create shader cache file\n");
|
||||
return;
|
||||
}
|
||||
|
||||
int written = 3;
|
||||
u32 magic = ShaderCacheMagic, version = ShaderCacheVersion, numPrograms = ShaderCache.size();
|
||||
written -= fwrite(&magic, 4, 1, file);
|
||||
written -= fwrite(&version, 4, 1, file);
|
||||
written -= fwrite(&numPrograms, 4, 1, file);
|
||||
|
||||
if (written != 0)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not write shader cache header\n");
|
||||
goto writeError;
|
||||
}
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
|
||||
printf("new shaders %d\n", NewShaders.size());
|
||||
|
||||
for (u64 newShader : NewShaders)
|
||||
{
|
||||
int error = 4;
|
||||
auto it = ShaderCache.find(newShader);
|
||||
|
||||
error -= fwrite(&it->first, 8, 1, file);
|
||||
error -= fwrite(&it->second.Length, 4, 1, file);
|
||||
error -= fwrite(&it->second.BinaryFormat, 4, 1, file);
|
||||
error -= fwrite(it->second.Data, it->second.Length, 1, file);
|
||||
|
||||
if (error != 0)
|
||||
{
|
||||
Log(LogLevel::Error, "Could not insert new shader cache entry\n");
|
||||
goto writeError;
|
||||
}
|
||||
}
|
||||
|
||||
writeError:
|
||||
fclose(file);
|
||||
|
||||
NewShaders.clear();
|
||||
}
|
||||
|
||||
bool CompilerShader(GLuint& id, const std::string& source, const std::string& name, const std::string& type)
|
||||
{
|
||||
int len;
|
||||
int res;
|
||||
|
||||
if (!glCreateShader)
|
||||
|
@ -35,8 +194,10 @@ bool CompilerShader(GLuint& id, const char* source, const char* name, const char
|
|||
return false;
|
||||
}
|
||||
|
||||
len = strlen(source);
|
||||
glShaderSource(id, 1, &source, &len);
|
||||
const char* sourceC = source.c_str();
|
||||
int len = source.length();
|
||||
glShaderSource(id, 1, &sourceC, &len);
|
||||
|
||||
glCompileShader(id);
|
||||
|
||||
glGetShaderiv(id, GL_COMPILE_STATUS, &res);
|
||||
|
@ -46,8 +207,8 @@ bool CompilerShader(GLuint& id, const char* source, const char* name, const char
|
|||
if (res < 1) res = 1024;
|
||||
char* log = new char[res+1];
|
||||
glGetShaderInfoLog(id, res+1, NULL, log);
|
||||
Log(LogLevel::Error, "OpenGL: failed to compile %s shader %s: %s\n", type, name, log);
|
||||
Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", source);
|
||||
Log(LogLevel::Error, "OpenGL: failed to compile %s shader %s: %s\n", type.c_str(), name.c_str(), log);
|
||||
Log(LogLevel::Debug, "shader source:\n--\n%s\n--\n", source.c_str());
|
||||
delete[] log;
|
||||
|
||||
return false;
|
||||
|
@ -92,8 +253,29 @@ bool LinkProgram(GLuint& result, GLuint* ids, int numIds)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CompileComputeProgram(GLuint& result, const char* source, const char* name)
|
||||
bool CompileComputeProgram(GLuint& result, const std::string& source, const std::string& name)
|
||||
{
|
||||
result = glCreateProgram();
|
||||
|
||||
/*u64 sourceHash = XXH64(source.data(), source.size(), 0);
|
||||
auto it = ShaderCache.find(sourceHash);
|
||||
if (it != ShaderCache.end())
|
||||
{
|
||||
glProgramBinary(result, it->second.BinaryFormat, it->second.Data, it->second.Length);
|
||||
|
||||
GLint linkStatus;
|
||||
glGetProgramiv(result, GL_LINK_STATUS, &linkStatus);
|
||||
if (linkStatus == GL_TRUE)
|
||||
{
|
||||
Log(LogLevel::Info, "Restored shader %s from cache\n", name.c_str());
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
}
|
||||
}*/
|
||||
Log(LogLevel::Error, "Shader %s from cache was rejected\n", name.c_str());
|
||||
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
bool linkingSucess = false;
|
||||
if (glDeleteProgram)
|
||||
|
@ -101,9 +283,6 @@ bool CompileComputeProgram(GLuint& result, const char* source, const char* name)
|
|||
goto error;
|
||||
}
|
||||
|
||||
result = glCreateProgram();
|
||||
|
||||
printf("compiling %s", name);
|
||||
if (!CompilerShader(shader, source, name, "compute"))
|
||||
goto error;
|
||||
|
||||
|
@ -113,14 +292,28 @@ error:
|
|||
glDeleteShader(shader);
|
||||
|
||||
if (!linkingSucess)
|
||||
{
|
||||
glDeleteProgram(result);
|
||||
}
|
||||
/*else
|
||||
{
|
||||
GLint length;
|
||||
GLenum format;
|
||||
glGetProgramiv(result, GL_PROGRAM_BINARY_LENGTH, &length);
|
||||
|
||||
u8* buffer = new u8[length];
|
||||
glGetProgramBinary(result, length, nullptr, &format, buffer);
|
||||
|
||||
ShaderCache.emplace(sourceHash, ShaderCacheEntry(buffer, length, format));
|
||||
NewShaders.push_back(sourceHash);
|
||||
}*/
|
||||
|
||||
return linkingSucess;
|
||||
}
|
||||
|
||||
bool CompileVertexFragmentProgram(GLuint& result,
|
||||
const char* vs, const char* fs,
|
||||
const char* name,
|
||||
const std::string& vs, const std::string& fs,
|
||||
const std::string& name,
|
||||
const std::initializer_list<AttributeTarget>& vertexInAttrs,
|
||||
const std::initializer_list<AttributeTarget>& fragmentOutAttrs)
|
||||
{
|
||||
|
|
|
@ -29,19 +29,23 @@
|
|||
namespace OpenGL
|
||||
{
|
||||
|
||||
void LoadShaderCache();
|
||||
void SaveShaderCache();
|
||||
|
||||
struct AttributeTarget
|
||||
{
|
||||
const char* Name;
|
||||
u32 Location;
|
||||
};
|
||||
|
||||
|
||||
bool CompileVertexFragmentProgram(GLuint& result,
|
||||
const char* vs, const char* fs,
|
||||
const char* name,
|
||||
const std::string& vs, const std::string& fs,
|
||||
const std::string& name,
|
||||
const std::initializer_list<AttributeTarget>& vertexInAttrs,
|
||||
const std::initializer_list<AttributeTarget>& fragmentOutAttrs);
|
||||
|
||||
bool CompileComputeProgram(GLuint& result, const char* source, const char* name);
|
||||
bool CompileComputeProgram(GLuint& result, const std::string& source, const std::string& name);
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue