Cocoa Port:

- Initialize the HQnx LUTs only once, instead of doing it per display window.
- Fix issue where the HQnx LUT init code was causing extremely long compile times. (Regression from r5087.)
This commit is contained in:
rogerman 2015-01-26 05:22:43 +00:00
parent c233b1c14f
commit 3abfa637b4
2 changed files with 148 additions and 129 deletions

View File

@ -1465,10 +1465,151 @@ enum OGLVertexAttributeID
OGLVertexAttributeID_TexCoord0 = 8
};
typedef struct
{
uint8_t p0;
uint8_t p1;
uint8_t p2;
uint8_t w0;
uint8_t w1;
uint8_t w2;
} LUTValues;
static LUTValues _LQ2xLUT[256*(2*2)*16];
static LUTValues _HQ2xLUT[256*(2*2)*16];
static LUTValues _HQ4xLUT[256*(4*4)*16];
static const GLint filterVtxBuffer[8] = {-1, -1, 1, -1, 1, 1, -1, 1};
static const GLubyte filterElementBuffer[6] = {0, 1, 2, 2, 3, 0};
static const GLubyte outputElementBuffer[12] = {0, 1, 2, 2, 3, 0, 4, 5, 6, 6, 7, 4};
// Turn off inlining for this function so that we don't get hit with extremely long compile times.
static NOINLINE LUTValues PackLUTValues(uint8_t p0, uint8_t p1, uint8_t p2, uint8_t w0, uint8_t w1, uint8_t w2)
{
if (w1 == 0 && w2 == 0)
{
w0 = 255;
}
else
{
const uint8_t wR = 256 / (w0 + w1 + w2);
w0 *= wR;
w1 *= wR;
w2 *= wR;
}
return {p0*31, p1*31, p2*31, w0, w1, w2};
}
static void InitHQnxLUTs()
{
static bool lutValuesInited = false;
if (lutValuesInited)
{
return;
}
#define MUR (compare & 0x01) // top-right
#define MDR (compare & 0x02) // bottom-right
#define MDL (compare & 0x04) // bottom-left
#define MUL (compare & 0x08) // top-left
#define IC(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
#define I11(p0,p1) PackLUTValues(p0, p1, p0, 1, 1, 0)
#define I211(p0,p1,p2) PackLUTValues(p0, p1, p2, 2, 1, 1)
#define I31(p0,p1) PackLUTValues(p0, p1, p0, 3, 1, 0)
#define I332(p0,p1,p2) PackLUTValues(p0, p1, p2, 3, 3, 2)
#define I431(p0,p1,p2) PackLUTValues(p0, p1, p2, 4, 3, 1)
#define I521(p0,p1,p2) PackLUTValues(p0, p1, p2, 5, 2, 1)
#define I53(p0,p1) PackLUTValues(p0, p1, p0, 5, 3, 0)
#define I611(p0,p1,p2) PackLUTValues(p0, p1, p2, 6, 1, 1)
#define I71(p0,p1) PackLUTValues(p0, p1, p0, 7, 1, 0)
#define I772(p0,p1,p2) PackLUTValues(p0, p1, p2, 7, 7, 2)
#define I97(p0,p1) PackLUTValues(p0, p1, p0, 9, 7, 0)
#define I1411(p0,p1,p2) PackLUTValues(p0, p1, p2, 14, 1, 1)
#define I151(p0,p1) PackLUTValues(p0, p1, p0, 15, 1, 0)
#define P0 _LQ2xLUT[pattern+(256*0)+(1024*compare)]
#define P1 _LQ2xLUT[pattern+(256*1)+(1024*compare)]
#define P2 _LQ2xLUT[pattern+(256*2)+(1024*compare)]
#define P3 _LQ2xLUT[pattern+(256*3)+(1024*compare)]
for (size_t compare = 0; compare < 16; compare++)
{
for (size_t pattern = 0; pattern < 256; pattern++)
{
switch (pattern)
{
#include "../filter/lq2x.h"
}
}
}
#undef P0
#undef P1
#undef P2
#undef P3
#define P0 _HQ2xLUT[pattern+(256*0)+(1024*compare)]
#define P1 _HQ2xLUT[pattern+(256*1)+(1024*compare)]
#define P2 _HQ2xLUT[pattern+(256*2)+(1024*compare)]
#define P3 _HQ2xLUT[pattern+(256*3)+(1024*compare)]
for (size_t compare = 0; compare < 16; compare++)
{
for (size_t pattern = 0; pattern < 256; pattern++)
{
switch (pattern)
{
#include "../filter/hq2x.h"
}
}
}
#undef P0
#undef P1
#undef P2
#undef P3
#define P(a, b) _HQ4xLUT[pattern+(256*((b*4)+a))+(4096*compare)]
#define I1(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
#define I2(i0, i1, p0, p1) PackLUTValues(p0, p1, p0, i0, i1, 0)
#define I3(i0, i1, i2, p0, p1, p2) PackLUTValues(p0, p1, p2, i0, i1, i2)
for (size_t compare = 0; compare < 16; compare++)
{
for (size_t pattern = 0; pattern < 256; pattern++)
{
switch (pattern)
{
#include "../filter/hq4x.dat"
}
}
}
#undef P
#undef I1
#undef I2
#undef I3
#undef MUR
#undef MDR
#undef MDL
#undef MUL
#undef IC
#undef I11
#undef I211
#undef I31
#undef I332
#undef I431
#undef I521
#undef I53
#undef I611
#undef I71
#undef I772
#undef I97
#undef I1411
#undef I151
lutValuesInited = true;
}
#pragma mark -
OGLInfo::OGLInfo()
{
_versionMajor = 0;
@ -2322,7 +2463,7 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO)
OGLShaderProgram *shaderFilterProgram = _shaderFilter->GetProgram();
shaderFilterProgram->SetVertexAndFragmentShaderOGL(Sample1x1_VertShader_110, PassthroughFragShader_110);
InitHQnxPixelScaler();
UploadHQnxLUTs();
}
else
{
@ -2368,157 +2509,35 @@ OGLDisplayLayer::~OGLDisplayLayer()
free(_vfMasterDstBuffer);
}
typedef struct
void OGLDisplayLayer::UploadHQnxLUTs()
{
GLubyte p0;
GLubyte p1;
GLubyte p2;
GLubyte w0;
GLubyte w1;
GLubyte w2;
} LUTValues;
LUTValues PackLUTValues(GLubyte p0, GLubyte p1, GLubyte p2, GLubyte w0, GLubyte w1, GLubyte w2)
{
if (w1 == 0 && w2 == 0)
{
w0 = 255;
}
else
{
const GLubyte wR = 256 / (w0 + w1 + w2);
w0 *= wR;
w1 *= wR;
w2 *= wR;
}
return {p0*31, p1*31, p2*31, w0, w1, w2};
}
void OGLDisplayLayer::InitHQnxPixelScaler()
{
LUTValues hqnxLUT[256*16*16];
InitHQnxLUTs();
glGenTextures(1, &_texLQ2xLUT);
glGenTextures(1, &_texHQ2xLUT);
glGenTextures(1, &_texHQ4xLUT);
glActiveTexture(GL_TEXTURE0 + 1);
#define MUR (compare & 0x01) // top-right
#define MDR (compare & 0x02) // bottom-right
#define MDL (compare & 0x04) // bottom-left
#define MUL (compare & 0x08) // top-left
#define IC(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
#define I11(p0,p1) PackLUTValues(p0, p1, p0, 1, 1, 0)
#define I211(p0,p1,p2) PackLUTValues(p0, p1, p2, 2, 1, 1)
#define I31(p0,p1) PackLUTValues(p0, p1, p0, 3, 1, 0)
#define I332(p0,p1,p2) PackLUTValues(p0, p1, p2, 3, 3, 2)
#define I431(p0,p1,p2) PackLUTValues(p0, p1, p2, 4, 3, 1)
#define I521(p0,p1,p2) PackLUTValues(p0, p1, p2, 5, 2, 1)
#define I53(p0,p1) PackLUTValues(p0, p1, p0, 5, 3, 0)
#define I611(p0,p1,p2) PackLUTValues(p0, p1, p2, 6, 1, 1)
#define I71(p0,p1) PackLUTValues(p0, p1, p0, 7, 1, 0)
#define I772(p0,p1,p2) PackLUTValues(p0, p1, p2, 7, 7, 2)
#define I97(p0,p1) PackLUTValues(p0, p1, p0, 9, 7, 0)
#define I1411(p0,p1,p2) PackLUTValues(p0, p1, p2, 14, 1, 1)
#define I151(p0,p1) PackLUTValues(p0, p1, p0, 15, 1, 0)
#define P0 hqnxLUT[pattern+(256*0)+(1024*compare)]
#define P1 hqnxLUT[pattern+(256*1)+(1024*compare)]
#define P2 hqnxLUT[pattern+(256*2)+(1024*compare)]
#define P3 hqnxLUT[pattern+(256*3)+(1024*compare)]
for (size_t compare = 0; compare < 16; compare++)
{
for (size_t pattern = 0; pattern < 256; pattern++)
{
switch (pattern)
{
#include "../filter/lq2x.h"
}
}
}
#undef P0
#undef P1
#undef P2
#undef P3
glBindTexture(GL_TEXTURE_3D, _texLQ2xLUT);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, hqnxLUT);
#define P0 hqnxLUT[pattern+(256*0)+(1024*compare)]
#define P1 hqnxLUT[pattern+(256*1)+(1024*compare)]
#define P2 hqnxLUT[pattern+(256*2)+(1024*compare)]
#define P3 hqnxLUT[pattern+(256*3)+(1024*compare)]
for (size_t compare = 0; compare < 16; compare++)
{
for (size_t pattern = 0; pattern < 256; pattern++)
{
switch (pattern)
{
#include "../filter/hq2x.h"
}
}
}
#undef P0
#undef P1
#undef P2
#undef P3
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, _LQ2xLUT);
glBindTexture(GL_TEXTURE_3D, _texHQ2xLUT);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, hqnxLUT);
#define P(a, b) hqnxLUT[pattern+(256*((b*4)+a))+(4096*compare)]
#define I1(p0) PackLUTValues(p0, p0, p0, 1, 0, 0)
#define I2(i0, i1, p0, p1) PackLUTValues(p0, p1, p0, i0, i1, 0)
#define I3(i0, i1, i2, p0, p1, p2) PackLUTValues(p0, p1, p2, i0, i1, i2)
for (size_t compare = 0; compare < 16; compare++)
{
for (size_t pattern = 0; pattern < 256; pattern++)
{
switch (pattern)
{
#include "../filter/hq4x.dat"
}
}
}
#undef P
#undef I1
#undef I2
#undef I3
#undef MUR
#undef MDR
#undef MDL
#undef MUL
#undef IC
#undef I11
#undef I211
#undef I31
#undef I332
#undef I431
#undef I521
#undef I53
#undef I611
#undef I71
#undef I772
#undef I97
#undef I1411
#undef I151
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 4, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, _HQ2xLUT);
glBindTexture(GL_TEXTURE_3D, _texHQ4xLUT);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 16, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, hqnxLUT);
glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 256*2, 16, 16, 0, GL_BGR, GL_UNSIGNED_BYTE, _HQ4xLUT);
glBindTexture(GL_TEXTURE_3D, 0);
glActiveTexture(GL_TEXTURE0);

View File

@ -246,7 +246,7 @@ protected:
GLint _uniformFinalOutputScalar;
GLint _uniformFinalOutputViewSize;
void InitHQnxPixelScaler();
void UploadHQnxLUTs();
virtual void UploadVerticesOGL();
virtual void UploadTexCoordsOGL();