Cocoa Port:
- Initialize the HQnx LUTs only once, instead of doing it per display window. - Fix issue where the HQnx LUT init code was causing extremely long compile times. (Regression from r5087.)
This commit is contained in:
parent
c233b1c14f
commit
3abfa637b4
|
@ -1465,10 +1465,151 @@ enum OGLVertexAttributeID
|
|||
OGLVertexAttributeID_TexCoord0 = 8
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t p0;
|
||||
uint8_t p1;
|
||||
uint8_t p2;
|
||||
uint8_t w0;
|
||||
uint8_t w1;
|
||||
uint8_t w2;
|
||||
} LUTValues;
|
||||
|
||||
static LUTValues _LQ2xLUT[256*(2*2)*16];
|
||||
static LUTValues _HQ2xLUT[256*(2*2)*16];
|
||||
static LUTValues _HQ4xLUT[256*(4*4)*16];
|
||||
|
||||
static const GLint filterVtxBuffer[8] = {-1, -1, 1, -1, 1, 1, -1, 1};
|
||||
static const GLubyte filterElementBuffer[6] = {0, 1, 2, 2, 3, 0};
|
||||
static const GLubyte outputElementBuffer[12] = {0, 1, 2, 2, 3, 0, 4, 5, 6, 6, 7, 4};
|
||||
|
||||
// Turn off inlining for this function so that we don't get hit with extremely long compile times.
|
||||
static NOINLINE LUTValues PackLUTValues(uint8_t p0, uint8_t p1, uint8_t p2, uint8_t w0, uint8_t w1, uint8_t w2)
|
||||
{
|
||||
if (w1 == 0 && w2 == 0)
|
||||
{
|
||||
w0 = 255;
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint8_t wR = 256 / (w0 + w1 + w2);
|
||||
w0 *= wR;
|
||||
w1 *= wR;
|
||||
w2 *= wR;
|
||||
}
|
||||
|
||||
return {p0*31, p1*31, p2*31, w0, w1, w2};
|
||||
}
|
||||
|
||||
static void InitHQnxLUTs()
|
||||
{
|
||||
static bool lutValuesInited = false;
|
||||
|
||||
if (lutValuesInited)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#define MUR (compare & 0x01) // top-right
|
||||
#define MDR (compare & 0x02) // bottom-right
|
||||
#define MDL (compare & 0x04) // bottom-left
|
||||
#define MUL (compare & 0x08) // top-left
|
||||
#define IC(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
|
||||
#define I11(p0,p1) PackLUTValues(p0, p1, p0, 1, 1, 0)
|
||||
#define I211(p0,p1,p2) PackLUTValues(p0, p1, p2, 2, 1, 1)
|
||||
#define I31(p0,p1) PackLUTValues(p0, p1, p0, 3, 1, 0)
|
||||
#define I332(p0,p1,p2) PackLUTValues(p0, p1, p2, 3, 3, 2)
|
||||
#define I431(p0,p1,p2) PackLUTValues(p0, p1, p2, 4, 3, 1)
|
||||
#define I521(p0,p1,p2) PackLUTValues(p0, p1, p2, 5, 2, 1)
|
||||
#define I53(p0,p1) PackLUTValues(p0, p1, p0, 5, 3, 0)
|
||||
#define I611(p0,p1,p2) PackLUTValues(p0, p1, p2, 6, 1, 1)
|
||||
#define I71(p0,p1) PackLUTValues(p0, p1, p0, 7, 1, 0)
|
||||
#define I772(p0,p1,p2) PackLUTValues(p0, p1, p2, 7, 7, 2)
|
||||
#define I97(p0,p1) PackLUTValues(p0, p1, p0, 9, 7, 0)
|
||||
#define I1411(p0,p1,p2) PackLUTValues(p0, p1, p2, 14, 1, 1)
|
||||
#define I151(p0,p1) PackLUTValues(p0, p1, p0, 15, 1, 0)
|
||||
|
||||
#define P0 _LQ2xLUT[pattern+(256*0)+(1024*compare)]
|
||||
#define P1 _LQ2xLUT[pattern+(256*1)+(1024*compare)]
|
||||
#define P2 _LQ2xLUT[pattern+(256*2)+(1024*compare)]
|
||||
#define P3 _LQ2xLUT[pattern+(256*3)+(1024*compare)]
|
||||
for (size_t compare = 0; compare < 16; compare++)
|
||||
{
|
||||
for (size_t pattern = 0; pattern < 256; pattern++)
|
||||
{
|
||||
switch (pattern)
|
||||
{
|
||||
#include "../filter/lq2x.h"
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef P0
|
||||
#undef P1
|
||||
#undef P2
|
||||
#undef P3
|
||||
|
||||
#define P0 _HQ2xLUT[pattern+(256*0)+(1024*compare)]
|
||||
#define P1 _HQ2xLUT[pattern+(256*1)+(1024*compare)]
|
||||
#define P2 _HQ2xLUT[pattern+(256*2)+(1024*compare)]
|
||||
#define P3 _HQ2xLUT[pattern+(256*3)+(1024*compare)]
|
||||
for (size_t compare = 0; compare < 16; compare++)
|
||||
{
|
||||
for (size_t pattern = 0; pattern < 256; pattern++)
|
||||
{
|
||||
switch (pattern)
|
||||
{
|
||||
#include "../filter/hq2x.h"
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef P0
|
||||
#undef P1
|
||||
#undef P2
|
||||
#undef P3
|
||||
|
||||
#define P(a, b) _HQ4xLUT[pattern+(256*((b*4)+a))+(4096*compare)]
|
||||
#define I1(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
|
||||
#define I2(i0, i1, p0, p1) PackLUTValues(p0, p1, p0, i0, i1, 0)
|
||||
#define I3(i0, i1, i2, p0, p1, p2) PackLUTValues(p0, p1, p2, i0, i1, i2)
|
||||
for (size_t compare = 0; compare < 16; compare++)
|
||||
{
|
||||
for (size_t pattern = 0; pattern < 256; pattern++)
|
||||
{
|
||||
switch (pattern)
|
||||
{
|
||||
#include "../filter/hq4x.dat"
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef P
|
||||
#undef I1
|
||||
#undef I2
|
||||
#undef I3
|
||||
|
||||
#undef MUR
|
||||
#undef MDR
|
||||
#undef MDL
|
||||
#undef MUL
|
||||
#undef IC
|
||||
#undef I11
|
||||
#undef I211
|
||||
#undef I31
|
||||
#undef I332
|
||||
#undef I431
|
||||
#undef I521
|
||||
#undef I53
|
||||
#undef I611
|
||||
#undef I71
|
||||
#undef I772
|
||||
#undef I97
|
||||
#undef I1411
|
||||
#undef I151
|
||||
|
||||
lutValuesInited = true;
|
||||
}
|
||||
|
||||
#pragma mark -
|
||||
|
||||
OGLInfo::OGLInfo()
|
||||
{
|
||||
_versionMajor = 0;
|
||||
|
@ -2322,7 +2463,7 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO)
|
|||
OGLShaderProgram *shaderFilterProgram = _shaderFilter->GetProgram();
|
||||
shaderFilterProgram->SetVertexAndFragmentShaderOGL(Sample1x1_VertShader_110, PassthroughFragShader_110);
|
||||
|
||||
InitHQnxPixelScaler();
|
||||
UploadHQnxLUTs();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2368,157 +2509,35 @@ OGLDisplayLayer::~OGLDisplayLayer()
|
|||
free(_vfMasterDstBuffer);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
void OGLDisplayLayer::UploadHQnxLUTs()
|
||||
{
|
||||
GLubyte p0;
|
||||
GLubyte p1;
|
||||
GLubyte p2;
|
||||
GLubyte w0;
|
||||
GLubyte w1;
|
||||
GLubyte w2;
|
||||
} LUTValues;
|
||||
|
||||
LUTValues PackLUTValues(GLubyte p0, GLubyte p1, GLubyte p2, GLubyte w0, GLubyte w1, GLubyte w2)
|
||||
{
|
||||
if (w1 == 0 && w2 == 0)
|
||||
{
|
||||
w0 = 255;
|
||||
}
|
||||
else
|
||||
{
|
||||
const GLubyte wR = 256 / (w0 + w1 + w2);
|
||||
w0 *= wR;
|
||||
w1 *= wR;
|
||||
w2 *= wR;
|
||||
}
|
||||
|
||||
return {p0*31, p1*31, p2*31, w0, w1, w2};
|
||||
}
|
||||
|
||||
void OGLDisplayLayer::InitHQnxPixelScaler()
|
||||
{
|
||||
LUTValues hqnxLUT[256*16*16];
|
||||
InitHQnxLUTs();
|
||||
|
||||
glGenTextures(1, &_texLQ2xLUT);
|
||||
glGenTextures(1, &_texHQ2xLUT);
|
||||
glGenTextures(1, &_texHQ4xLUT);
|
||||
glActiveTexture(GL_TEXTURE0 + 1);
|
||||
|
||||
#define MUR (compare & 0x01) // top-right
|
||||
#define MDR (compare & 0x02) // bottom-right
|
||||
#define MDL (compare & 0x04) // bottom-left
|
||||
#define MUL (compare & 0x08) // top-left
|
||||
#define IC(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
|
||||
#define I11(p0,p1) PackLUTValues(p0, p1, p0, 1, 1, 0)
|
||||
#define I211(p0,p1,p2) PackLUTValues(p0, p1, p2, 2, 1, 1)
|
||||
#define I31(p0,p1) PackLUTValues(p0, p1, p0, 3, 1, 0)
|
||||
#define I332(p0,p1,p2) PackLUTValues(p0, p1, p2, 3, 3, 2)
|
||||
#define I431(p0,p1,p2) PackLUTValues(p0, p1, p2, 4, 3, 1)
|
||||
#define I521(p0,p1,p2) PackLUTValues(p0, p1, p2, 5, 2, 1)
|
||||
#define I53(p0,p1) PackLUTValues(p0, p1, p0, 5, 3, 0)
|
||||
#define I611(p0,p1,p2) PackLUTValues(p0, p1, p2, 6, 1, 1)
|
||||
#define I71(p0,p1) PackLUTValues(p0, p1, p0, 7, 1, 0)
|
||||
#define I772(p0,p1,p2) PackLUTValues(p0, p1, p2, 7, 7, 2)
|
||||
#define I97(p0,p1) PackLUTValues(p0, p1, p0, 9, 7, 0)
|
||||
#define I1411(p0,p1,p2) PackLUTValues(p0, p1, p2, 14, 1, 1)
|
||||
#define I151(p0,p1) PackLUTValues(p0, p1, p0, 15, 1, 0)
|
||||
|
||||
#define P0 hqnxLUT[pattern+(256*0)+(1024*compare)]
|
||||
#define P1 hqnxLUT[pattern+(256*1)+(1024*compare)]
|
||||
#define P2 hqnxLUT[pattern+(256*2)+(1024*compare)]
|
||||
#define P3 hqnxLUT[pattern+(256*3)+(1024*compare)]
|
||||
for (size_t compare = 0; compare < 16; compare++)
|
||||
{
|
||||
for (size_t pattern = 0; pattern < 256; pattern++)
|
||||
{
|
||||
switch (pattern)
|
||||
{
|
||||
#include "../filter/lq2x.h"
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef P0
|
||||
#undef P1
|
||||
#undef P2
|
||||
#undef P3
|
||||
|
||||
glBindTexture(GL_TEXTURE_3D, _texLQ2xLUT);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, hqnxLUT);
|
||||
|
||||
#define P0 hqnxLUT[pattern+(256*0)+(1024*compare)]
|
||||
#define P1 hqnxLUT[pattern+(256*1)+(1024*compare)]
|
||||
#define P2 hqnxLUT[pattern+(256*2)+(1024*compare)]
|
||||
#define P3 hqnxLUT[pattern+(256*3)+(1024*compare)]
|
||||
for (size_t compare = 0; compare < 16; compare++)
|
||||
{
|
||||
for (size_t pattern = 0; pattern < 256; pattern++)
|
||||
{
|
||||
switch (pattern)
|
||||
{
|
||||
#include "../filter/hq2x.h"
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef P0
|
||||
#undef P1
|
||||
#undef P2
|
||||
#undef P3
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, _LQ2xLUT);
|
||||
|
||||
glBindTexture(GL_TEXTURE_3D, _texHQ2xLUT);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, hqnxLUT);
|
||||
|
||||
#define P(a, b) hqnxLUT[pattern+(256*((b*4)+a))+(4096*compare)]
|
||||
#define I1(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
|
||||
#define I2(i0, i1, p0, p1) PackLUTValues(p0, p1, p0, i0, i1, 0)
|
||||
#define I3(i0, i1, i2, p0, p1, p2) PackLUTValues(p0, p1, p2, i0, i1, i2)
|
||||
for (size_t compare = 0; compare < 16; compare++)
|
||||
{
|
||||
for (size_t pattern = 0; pattern < 256; pattern++)
|
||||
{
|
||||
switch (pattern)
|
||||
{
|
||||
#include "../filter/hq4x.dat"
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef P
|
||||
#undef I1
|
||||
#undef I2
|
||||
#undef I3
|
||||
|
||||
#undef MUR
|
||||
#undef MDR
|
||||
#undef MDL
|
||||
#undef MUL
|
||||
#undef IC
|
||||
#undef I11
|
||||
#undef I211
|
||||
#undef I31
|
||||
#undef I332
|
||||
#undef I431
|
||||
#undef I521
|
||||
#undef I53
|
||||
#undef I611
|
||||
#undef I71
|
||||
#undef I772
|
||||
#undef I97
|
||||
#undef I1411
|
||||
#undef I151
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, _HQ2xLUT);
|
||||
|
||||
glBindTexture(GL_TEXTURE_3D, _texHQ4xLUT);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 16, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, hqnxLUT);
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 16, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, _HQ4xLUT);
|
||||
|
||||
glBindTexture(GL_TEXTURE_3D, 0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
|
|
@ -246,7 +246,7 @@ protected:
|
|||
GLint _uniformFinalOutputScalar;
|
||||
GLint _uniformFinalOutputViewSize;
|
||||
|
||||
void InitHQnxPixelScaler();
|
||||
void UploadHQnxLUTs();
|
||||
|
||||
virtual void UploadVerticesOGL();
|
||||
virtual void UploadTexCoordsOGL();
|
||||
|
|
Loading…
Reference in New Issue