Merge branch 'ogl-tex2d'

Conflicts:
	Source/Core/VideoBackends/OGL/Src/TextureConverter.cpp
This commit is contained in:
degasus 2013-12-09 13:02:15 +01:00
commit 42619c1d2d
7 changed files with 194 additions and 504 deletions

View File

@ -33,8 +33,6 @@ GLuint FramebufferManager::m_resolvedDepthTexture;
GLuint FramebufferManager::m_xfbFramebuffer;
// reinterpret pixel format
GLuint FramebufferManager::m_pixel_format_vao;
GLuint FramebufferManager::m_pixel_format_vbo;
SHADER FramebufferManager::m_pixel_format_shaders[2];
@ -79,24 +77,24 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
m_efbDepth = glObj[1];
m_resolvedColorTexture = glObj[2]; // needed for pixel format convertion
glBindTexture(getFbType(), m_efbColor);
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, m_efbColor);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(getFbType(), m_efbDepth);
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(getFbType(), 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL);
glBindTexture(GL_TEXTURE_2D, m_efbDepth);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL);
glBindTexture(getFbType(), m_resolvedColorTexture);
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, m_resolvedColorTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
// Bind target textures to the EFB framebuffer.
glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_efbColor, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, getFbType(), m_efbDepth, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_efbColor, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_efbDepth, 0);
GL_REPORT_FBO_ERROR();
}
@ -144,20 +142,20 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
m_resolvedColorTexture = glObj[0];
m_resolvedDepthTexture = glObj[1];
glBindTexture(getFbType(), m_resolvedColorTexture);
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(getFbType(), 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, m_resolvedColorTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, m_targetWidth, m_targetHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(getFbType(), m_resolvedDepthTexture);
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(getFbType(), 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL);
glBindTexture(GL_TEXTURE_2D, m_resolvedDepthTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT24, m_targetWidth, m_targetHeight, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL);
// Bind resolved textures to resolved framebuffer.
glBindFramebuffer(GL_FRAMEBUFFER, m_resolvedFramebuffer);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_resolvedColorTexture, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, getFbType(), m_resolvedDepthTexture, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_resolvedColorTexture, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, m_resolvedDepthTexture, 0);
GL_REPORT_FBO_ERROR();
@ -177,33 +175,18 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
// reinterpret pixel format
glGenBuffers(1, &m_pixel_format_vbo);
glGenVertexArrays(1, &m_pixel_format_vao);
glBindVertexArray(m_pixel_format_vao);
glBindBuffer(GL_ARRAY_BUFFER, m_pixel_format_vbo);
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*2, NULL);
float vertices[] = {
-1.0, -1.0,
1.0, -1.0,
-1.0, 1.0,
1.0, 1.0,
};
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
char vs[] =
"ATTRIN vec2 rawpos;\n"
"void main(void) {\n"
" gl_Position = vec4(rawpos,0,1);\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
"}\n";
char ps_rgba6_to_rgb8[] =
"uniform sampler2DRect samp9;\n"
"uniform sampler2D samp9;\n"
"out vec4 ocol0;\n"
"void main()\n"
"{\n"
" ivec4 src6 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 63.f));\n"
" ivec4 src6 = ivec4(round(texelFetch(samp9, ivec2(gl_FragCoord.xy), 0) * 63.f));\n"
" ivec4 dst8;\n"
" dst8.r = (src6.r << 2) | (src6.g >> 4);\n"
" dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"
@ -213,11 +196,11 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
"}";
char ps_rgb8_to_rgba6[] =
"uniform sampler2DRect samp9;\n"
"uniform sampler2D samp9;\n"
"out vec4 ocol0;\n"
"void main()\n"
"{\n"
" ivec4 src8 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 255.f));\n"
" ivec4 src8 = ivec4(round(texelFetch(samp9, ivec2(gl_FragCoord.xy), 0) * 255.f));\n"
" ivec4 dst6;\n"
" dst6.r = src8.r >> 2;\n"
" dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"
@ -261,8 +244,6 @@ FramebufferManager::~FramebufferManager()
m_efbDepth = 0;
// reinterpret pixel format
glDeleteVertexArrays(1, &m_pixel_format_vao);
glDeleteBuffers(1, &m_pixel_format_vbo);
m_pixel_format_shaders[0].Destroy();
m_pixel_format_shaders[1].Destroy();
}
@ -386,14 +367,13 @@ void FramebufferManager::ReinterpretPixelData(unsigned int convtype)
m_resolvedColorTexture = src_texture;
// also switch them on fbo
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, getFbType(), m_efbColor, 0);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_efbColor, 0);
}
glViewport(0,0, m_targetWidth, m_targetHeight);
glActiveTexture(GL_TEXTURE0 + 9);
glBindTexture(getFbType(), src_texture);
glBindTexture(GL_TEXTURE_2D, src_texture);
m_pixel_format_shaders[convtype ? 1 : 0].Bind();
glBindVertexArray(m_pixel_format_vao);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
g_renderer->RestoreAPIState();

View File

@ -57,14 +57,6 @@ struct XFBSource : public XFBSourceBase
const GLuint texture;
};
inline GLenum getFbType()
{
#ifndef USE_GLES3
return GL_TEXTURE_RECTANGLE;
#endif
return GL_TEXTURE_2D;
}
class FramebufferManager : public FramebufferManagerBase
{
public:
@ -121,8 +113,6 @@ private:
static GLuint m_xfbFramebuffer; // Only used in MSAA mode
// For pixel format draw
static GLuint m_pixel_format_vbo;
static GLuint m_pixel_format_vao;
static SHADER m_pixel_format_shaders[2];
};

View File

@ -25,18 +25,15 @@ static u32 s_width;
static u32 s_height;
static GLuint s_fbo;
static GLuint s_texture;
static GLuint s_vao;
static GLuint s_vbo;
static GLuint s_uniform_resolution;
static char s_vertex_shader[] =
"in vec2 rawpos;\n"
"in vec2 tex0;\n"
"out vec2 uv0;\n"
"void main(void) {\n"
" gl_Position = vec4(rawpos,0,1);\n"
" uv0 = tex0;\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
" uv0 = rawpos;\n"
"}\n";
void Init()
@ -56,34 +53,14 @@ void Init()
glBindFramebuffer(GL_FRAMEBUFFER, s_fbo);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_texture, 0);
FramebufferManager::SetFramebuffer(0);
glGenBuffers(1, &s_vbo);
glBindBuffer(GL_ARRAY_BUFFER, s_vbo);
GLfloat vertices[] = {
-1.f, -1.f, 0.f, 0.f,
-1.f, 1.f, 0.f, 1.f,
1.f, -1.f, 1.f, 0.f,
1.f, 1.f, 1.f, 1.f
};
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glGenVertexArrays(1, &s_vao);
glBindVertexArray( s_vao );
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, NULL);
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2);
}
void Shutdown()
{
s_shader.Destroy();
glDeleteFramebuffers(1, &s_vbo);
glDeleteFramebuffers(1, &s_fbo);
glDeleteTextures(1, &s_texture);
glDeleteBuffers(1, &s_vbo);
glDeleteVertexArrays(1, &s_vao);
}
void ReloadShader()
@ -103,7 +80,6 @@ void BlitToScreen()
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
glViewport(0, 0, s_width, s_height);
glBindVertexArray(s_vao);
s_shader.Bind();
glUniform4f(s_uniform_resolution, (float)s_width, (float)s_height, 1.0f/(float)s_width, 1.0f/(float)s_height);
@ -111,7 +87,6 @@ void BlitToScreen()
glActiveTexture(GL_TEXTURE0+9);
glBindTexture(GL_TEXTURE_2D, s_texture);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glBindTexture(GL_TEXTURE_2D, 0);
/* glBindFramebuffer(GL_READ_FRAMEBUFFER, s_fbo);
@ -132,7 +107,6 @@ void Update ( u32 width, u32 height )
glActiveTexture(GL_TEXTURE0+9);
glBindTexture(GL_TEXTURE_2D, s_texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glBindTexture(GL_TEXTURE_2D, 0);
}
}

View File

@ -584,16 +584,14 @@ void ProgramShaderCache::CreateHeader ( void )
"#define float2 vec2\n"
"#define float3 vec3\n"
"#define float4 vec4\n"
"#define int2 ivec2\n"
"#define int3 ivec3\n"
"#define int4 ivec4\n"
// hlsl to glsl function translation
"#define frac fract\n"
"#define lerp mix\n"
// texture2d hack
"%s\n"
"%s\n"
"%s\n"
, v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150"
, g_ActiveConfig.backend_info.bSupportsGLSLUBO && v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
, g_ActiveConfig.backend_info.bSupportsEarlyZ ? "#extension GL_ARB_shader_image_load_store : enable" : ""
@ -602,10 +600,6 @@ void ProgramShaderCache::CreateHeader ( void )
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "in" : "centroid in"
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "out" : "centroid out"
, v==GLSLES3 ? "" : v<=GLSL_130 ? "#extension GL_ARB_texture_rectangle : enable" : "#define texture2DRect texture"
, v==GLSLES3 ? "#define texture2DRect(samp, uv) texelFetch(samp, ivec2(floor(uv)), 0)" : ""
, v==GLSLES3 ? "#define sampler2DRect sampler2D" : ""
);
}

View File

@ -45,6 +45,8 @@ static SHADER s_ColorMatrixProgram;
static SHADER s_DepthMatrixProgram;
static GLuint s_ColorMatrixUniform;
static GLuint s_DepthMatrixUniform;
static GLuint s_ColorCopyPositionUniform;
static GLuint s_DepthCopyPositionUniform;
static u32 s_ColorCbufid;
static u32 s_DepthCbufid;
@ -52,13 +54,6 @@ static u32 s_Textures[8];
static u32 s_ActiveTexture;
static u32 s_NextStage;
struct VBOCache {
GLuint vbo;
GLuint vao;
TargetRectangle targetSource;
};
static std::map<u64,VBOCache> s_VBO;
bool SaveTexture(const std::string filename, u32 textarget, u32 tex, int virtual_width, int virtual_height, unsigned int level)
{
#ifndef USE_GLES3
@ -296,7 +291,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
GL_REPORT_ERRORD();
glActiveTexture(GL_TEXTURE0+9);
glBindTexture(getFbType(), read_texture);
glBindTexture(GL_TEXTURE_2D, read_texture);
glViewport(0, 0, virtual_width, virtual_height);
@ -311,53 +306,12 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
glUniform4fv(s_ColorMatrixUniform, 7, colmat);
s_ColorCbufid = cbufid;
}
TargetRectangle R = g_renderer->ConvertEFBRectangle(srcRect);
glUniform4f(srcFormat == PIXELFMT_Z24 ? s_DepthCopyPositionUniform : s_ColorCopyPositionUniform,
R.left, R.top, R.right, R.bottom);
GL_REPORT_ERRORD();
TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(srcRect);
GL_REPORT_ERRORD();
// should be unique enough, if not, vbo will "only" be uploaded to much
u64 targetSourceHash = u64(targetSource.left)<<48 | u64(targetSource.top)<<32 | u64(targetSource.right)<<16 | u64(targetSource.bottom);
std::map<u64, VBOCache>::iterator vbo_it = s_VBO.find(targetSourceHash);
if(vbo_it == s_VBO.end()) {
VBOCache item;
item.targetSource.bottom = -1;
item.targetSource.top = -1;
item.targetSource.left = -1;
item.targetSource.right = -1;
glGenBuffers(1, &item.vbo);
glGenVertexArrays(1, &item.vao);
glBindBuffer(GL_ARRAY_BUFFER, item.vbo);
glBindVertexArray(item.vao);
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL);
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2);
vbo_it = s_VBO.insert(std::pair<u64,VBOCache>(targetSourceHash, item)).first;
}
if(!(vbo_it->second.targetSource == targetSource)) {
GLfloat vertices[] = {
-1.f, 1.f,
(GLfloat)targetSource.left, (GLfloat)targetSource.bottom,
-1.f, -1.f,
(GLfloat)targetSource.left, (GLfloat)targetSource.top,
1.f, 1.f,
(GLfloat)targetSource.right, (GLfloat)targetSource.bottom,
1.f, -1.f,
(GLfloat)targetSource.right, (GLfloat)targetSource.top
};
glBindBuffer(GL_ARRAY_BUFFER, vbo_it->second.vbo);
glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW);
vbo_it->second.targetSource = targetSource;
}
glBindVertexArray(vbo_it->second.vao);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
GL_REPORT_ERRORD();
@ -403,38 +357,39 @@ void TextureCache::TCacheEntry::FromRenderTarget(u32 dstAddr, unsigned int dstFo
TextureCache::TextureCache()
{
const char *pColorMatrixProg =
"uniform sampler2DRect samp9;\n"
"uniform sampler2D samp9;\n"
"uniform vec4 colmat[7];\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"\n"
"void main(){\n"
" vec4 texcol = texture2DRect(samp9, uv0);\n"
" vec4 texcol = texture(samp9, uv0);\n"
" texcol = round(texcol * colmat[5]) * colmat[6];\n"
" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n"
"}\n";
const char *pDepthMatrixProg =
"uniform sampler2DRect samp9;\n"
"uniform sampler2D samp9;\n"
"uniform vec4 colmat[5];\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"\n"
"void main(){\n"
" vec4 texcol = texture2DRect(samp9, uv0);\n"
" vec4 texcol = texture(samp9, uv0);\n"
" vec4 EncodedDepth = fract((texcol.r * (16777215.0/16777216.0)) * vec4(1.0,256.0,256.0*256.0,1.0));\n"
" texcol = round(EncodedDepth * (16777216.0/16777215.0) * vec4(255.0,255.0,255.0,15.0)) / vec4(255.0,255.0,255.0,15.0);\n"
" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];"
"}\n";
const char *VProgram =
"ATTRIN vec2 rawpos;\n"
"ATTRIN vec2 tex0;\n"
"VARYOUT vec2 uv0;\n"
"uniform sampler2D samp9;\n"
"uniform vec4 copy_position;\n" // left, top, right, bottom
"void main()\n"
"{\n"
" uv0 = tex0;\n"
" gl_Position = vec4(rawpos,0,1);\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0));\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
"}\n";
ProgramShaderCache::CompileShader(s_ColorMatrixProgram, VProgram, pColorMatrixProg);
@ -445,6 +400,9 @@ TextureCache::TextureCache()
s_ColorCbufid = -1;
s_DepthCbufid = -1;
s_ColorCopyPositionUniform = glGetUniformLocation(s_ColorMatrixProgram.glprogid, "copy_position");
s_DepthCopyPositionUniform = glGetUniformLocation(s_DepthMatrixProgram.glprogid, "copy_position");
s_ActiveTexture = -1;
s_NextStage = -1;
for(auto& gtex : s_Textures)
@ -456,12 +414,6 @@ TextureCache::~TextureCache()
{
s_ColorMatrixProgram.Destroy();
s_DepthMatrixProgram.Destroy();
for(auto& cache : s_VBO) {
glDeleteBuffers(1, &cache.second.vbo);
glDeleteVertexArrays(1, &cache.second.vao);
}
s_VBO.clear();
}
void TextureCache::DisableStage(unsigned int stage)

View File

@ -26,7 +26,7 @@ namespace TextureConverter
using OGL::TextureCache;
static GLuint s_texConvFrameBuffer = 0;
static GLuint s_texConvFrameBuffer[2] = {0,0};
static GLuint s_srcTexture = 0; // for decoding from RAM
static GLuint s_dstTexture = 0; // for encoding to RAM
@ -34,28 +34,16 @@ const int renderBufferWidth = 1024;
const int renderBufferHeight = 1024;
static SHADER s_rgbToYuyvProgram;
static int s_rgbToYuyvUniform_loc;
static SHADER s_yuyvToRgbProgram;
// Not all slots are taken - but who cares.
const u32 NUM_ENCODING_PROGRAMS = 64;
static SHADER s_encodingPrograms[NUM_ENCODING_PROGRAMS];
static GLuint s_encode_VBO = 0;
static GLuint s_encode_VAO = 0;
static TargetRectangle s_cached_sourceRc;
static GLuint s_PBO = 0; // for readback with different strides
static const char *VProgram =
"ATTRIN vec2 rawpos;\n"
"ATTRIN vec2 tex0;\n"
"VARYOUT vec2 uv0;\n"
"void main()\n"
"{\n"
" uv0 = tex0;\n"
" gl_Position = vec4(rawpos, 0.0, 1.0);\n"
"}\n";
void CreatePrograms()
{
/* TODO: Accuracy Improvements
@ -75,14 +63,24 @@ void CreatePrograms()
* inbetween the two Pixels, and only blurs over these two pixels.
*/
// Output is BGRA because that is slightly faster than RGBA.
const char *VProgramRgbToYuyv =
"VARYOUT vec2 uv0;\n"
"uniform vec4 copy_position;\n" // left, top, right, bottom
"uniform sampler2D samp9;\n"
"void main()\n"
"{\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
" uv0 = mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0));\n"
"}\n";
const char *FProgramRgbToYuyv =
"uniform sampler2DRect samp9;\n"
"uniform sampler2D samp9;\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"void main()\n"
"{\n"
" vec3 c0 = texture2DRect(samp9, uv0 - dFdx(uv0) * 0.25).rgb;\n"
" vec3 c1 = texture2DRect(samp9, uv0 + dFdx(uv0) * 0.25).rgb;\n"
" vec3 c0 = texture(samp9, (uv0 - dFdx(uv0) * 0.25)).rgb;\n"
" vec3 c1 = texture(samp9, (uv0 + dFdx(uv0) * 0.25)).rgb;\n"
" vec3 c01 = (c0 + c1) * 0.5;\n"
" vec3 y_const = vec3(0.257,0.504,0.098);\n"
" vec3 u_const = vec3(-0.148,-0.291,0.439);\n"
@ -90,6 +88,8 @@ void CreatePrograms()
" vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n"
" ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
"}\n";
ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgramRgbToYuyv, FProgramRgbToYuyv);
s_rgbToYuyvUniform_loc = glGetUniformLocation(s_rgbToYuyvProgram.glprogid, "copy_position");
/* TODO: Accuracy Improvements
*
@ -105,20 +105,15 @@ void CreatePrograms()
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
"}\n";
const char *FProgramYuyvToRgb =
"uniform sampler2DRect samp9;\n"
"uniform sampler2D samp9;\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"void main()\n"
"{\n"
" ivec2 uv = ivec2(gl_FragCoord.xy);\n"
#ifdef USE_GLES3
// We switch top/bottom here. TODO: move this to screen blit.
" ivec2 ts = textureSize(samp9, 0);\n"
" vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1), 0);\n"
#else
" ivec2 ts = textureSize(samp9);\n"
" vec4 c0 = texelFetch(samp9, ivec2(uv.x/2, ts.y-uv.y-1));\n"
#endif
" float y = mix(c0.b, c0.r, (uv.x & 1) == 1);\n"
" float yComp = 1.164 * (y - 0.0625);\n"
" float uComp = c0.g - 0.5;\n"
@ -128,8 +123,6 @@ void CreatePrograms()
" yComp + (2.018 * uComp),\n"
" 1.0);\n"
"}\n";
ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgram, FProgramRgbToYuyv);
ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb);
}
@ -156,6 +149,13 @@ SHADER &GetOrCreateEncodingShader(u32 format)
}
#endif
const char *VProgram =
"void main()\n"
"{\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
"}\n";
ProgramShaderCache::CompileShader(s_encodingPrograms[format], VProgram, shader);
}
return s_encodingPrograms[format];
@ -163,30 +163,22 @@ SHADER &GetOrCreateEncodingShader(u32 format)
void Init()
{
glGenFramebuffers(1, &s_texConvFrameBuffer);
glGenBuffers(1, &s_encode_VBO );
glGenVertexArrays(1, &s_encode_VAO );
glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO );
glBindVertexArray( s_encode_VAO );
glEnableVertexAttribArray(SHADER_POSITION_ATTRIB);
glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL);
glEnableVertexAttribArray(SHADER_TEXTURE0_ATTRIB);
glVertexAttribPointer(SHADER_TEXTURE0_ATTRIB, 2, GL_FLOAT, 0, sizeof(GLfloat)*4, (GLfloat*)NULL+2);
s_cached_sourceRc.top = -1;
s_cached_sourceRc.bottom = -1;
s_cached_sourceRc.left = -1;
s_cached_sourceRc.right = -1;
glGenFramebuffers(2, s_texConvFrameBuffer);
glActiveTexture(GL_TEXTURE0 + 9);
glGenTextures(1, &s_srcTexture);
glBindTexture(getFbType(), s_srcTexture);
glTexParameteri(getFbType(), GL_TEXTURE_MAX_LEVEL, 0);
glBindTexture(GL_TEXTURE_2D, s_srcTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glGenTextures(1, &s_dstTexture);
glBindTexture(GL_TEXTURE_2D, s_dstTexture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, renderBufferWidth, renderBufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0);
FramebufferManager::SetFramebuffer(0);
glGenBuffers(1, &s_PBO);
@ -197,10 +189,8 @@ void Shutdown()
{
glDeleteTextures(1, &s_srcTexture);
glDeleteTextures(1, &s_dstTexture);
glDeleteFramebuffers(1, &s_texConvFrameBuffer);
glDeleteBuffers(1, &s_encode_VBO );
glDeleteVertexArrays(1, &s_encode_VAO );
glDeleteBuffers(1, &s_PBO);
glDeleteFramebuffers(2, s_texConvFrameBuffer);
s_rgbToYuyvProgram.Destroy();
s_yuyvToRgbProgram.Destroy();
@ -210,8 +200,9 @@ void Shutdown()
s_srcTexture = 0;
s_dstTexture = 0;
s_texConvFrameBuffer = 0;
s_PBO = 0;
s_texConvFrameBuffer[0] = 0;
s_texConvFrameBuffer[1] = 0;
}
void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc,
@ -222,49 +213,28 @@ void EncodeToRamUsingShader(GLuint srcTexture, const TargetRectangle& sourceRc,
// switch to texture converter frame buffer
// attach render buffer as color destination
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, s_dstTexture, 0);
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[0]);
GL_REPORT_ERRORD();
// set source texture
glActiveTexture(GL_TEXTURE0+9);
glBindTexture(getFbType(), srcTexture);
glBindTexture(GL_TEXTURE_2D, srcTexture);
if (linearFilter)
{
glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
}
else
{
glTexParameteri(getFbType(), GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(getFbType(), GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
}
GL_REPORT_ERRORD();
glViewport(0, 0, (GLsizei)dstWidth, (GLsizei)dstHeight);
GL_REPORT_ERRORD();
if(!(s_cached_sourceRc == sourceRc)) {
GLfloat vertices[] = {
-1.f, -1.f,
(float)sourceRc.left, (float)sourceRc.top,
-1.f, 1.f,
(float)sourceRc.left, (float)sourceRc.bottom,
1.f, -1.f,
(float)sourceRc.right, (float)sourceRc.top,
1.f, 1.f,
(float)sourceRc.right, (float)sourceRc.bottom
};
glBindBuffer(GL_ARRAY_BUFFER, s_encode_VBO );
glBufferData(GL_ARRAY_BUFFER, 4*4*sizeof(GLfloat), vertices, GL_STREAM_DRAW);
s_cached_sourceRc = sourceRc;
}
glBindVertexArray( s_encode_VAO );
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
GL_REPORT_ERRORD();
@ -342,17 +312,10 @@ int EncodeToRamFromTexture(u32 address,GLuint source_texture, bool bFromZBuffer,
s32 expandedWidth = (width + blkW) & (~blkW);
s32 expandedHeight = (height + blkH) & (~blkH);
float sampleStride = bScaleByHalf ? 2.f : 1.f;
float params[] = {
Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledYf(sampleStride),
0.0f, 0.0f,
(float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight)-1,
(float)Renderer::EFBToScaledX(source.left), (float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight)
};
texconv_shader.Bind();
glUniform4fv(texconv_shader.UniformLocations[0], 2, params);
glUniform4i(texconv_shader.UniformLocations[0],
source.left, source.top,
expandedWidth, bScaleByHalf ? 2 : 1);
TargetRectangle scaledSource;
scaledSource.top = 0;
@ -378,6 +341,8 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des
s_rgbToYuyvProgram.Bind();
glUniform4f(s_rgbToYuyvUniform_loc, sourceRc.left, sourceRc.top, sourceRc.right, sourceRc.bottom);
// We enable linear filtering, because the gamecube does filtering in the vertical direction when
// yscale is enabled.
// Otherwise we get jaggies when a game uses yscaling (most PAL games)
@ -403,7 +368,7 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur
// switch to texture converter frame buffer
// attach destTexture as color destination
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer);
FramebufferManager::SetFramebuffer(s_texConvFrameBuffer[1]);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, destTexture, 0);
GL_REPORT_FBO_ERROR();
@ -411,8 +376,8 @@ void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTextur
// activate source texture
// set srcAddr as data for source texture
glActiveTexture(GL_TEXTURE0+9);
glBindTexture(getFbType(), s_srcTexture);
glTexImage2D(getFbType(), 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
glBindTexture(GL_TEXTURE_2D, s_srcTexture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, srcWidth / 2, srcHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, srcAddr);
glViewport(0, 0, srcWidth, srcHeight);
s_yuyvToRgbProgram.Bind();

View File

@ -21,7 +21,6 @@
static char text[16384];
static bool IntensityConstantAdded = false;
static int s_incrementSampleXCount = 0;
namespace TextureConversionShader
{
@ -57,34 +56,25 @@ u16 GetEncodedSampleCount(u32 format)
}
}
const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num)
{
if (ApiType == API_OPENGL)
return ""; // Once we switch to GLSL 1.3 we can do something here
static char result[64];
sprintf(result, " : register(%s%d)", prefix, num);
return result;
}
// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
{
// [0] left, top, right, bottom of source rectangle within source texture
// [1] width and height of destination texture in pixels
// Two were merged for GLSL
WRITE(p, "uniform float4 " I_COLORS"[2] %s;\n", WriteRegister(ApiType, "c", C_COLORS));
// left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2)
WRITE(p, "uniform int4 " I_COLORS";\n");
int blkW = TexDecoder_GetBlockWidthInTexels(format);
int blkH = TexDecoder_GetBlockHeightInTexels(format);
int samples = GetEncodedSampleCount(format);
// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
int factor = samples == 1 ? 2 : 1;
if (ApiType == API_OPENGL)
{
WRITE(p, "#define samp0 samp9\n");
WRITE(p, "uniform sampler2DRect samp0;\n");
WRITE(p, "uniform sampler2D samp0;\n");
WRITE(p, " out vec4 ocol0;\n");
WRITE(p, " VARYIN float2 uv0;\n");
WRITE(p, "void main()\n");
}
else // D3D
@ -93,126 +83,44 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
WRITE(p, "Texture2D Tex0 : register(t0);\n");
WRITE(p,"void main(\n");
WRITE(p," out float4 ocol0 : SV_Target,\n");
WRITE(p," in float2 uv0 : TEXCOORD0)\n");
WRITE(p," out float4 ocol0 : SV_Target)\n");
}
WRITE(p, "{\n"
" float2 sampleUv;\n"
" float2 uv1 = floor(uv0);\n");
" int2 sampleUv;\n"
" int2 uv1 = int2(gl_FragCoord.xy);\n"
" float2 uv0 = float2(0.0, 0.0);\n"
);
WRITE(p, " uv1.x = uv1.x * %d.0;\n", samples);
WRITE(p, " uv1.x = uv1.x * %d;\n", samples);
WRITE(p, " float xl = floor(uv1.x / %d.0);\n", blkW);
WRITE(p, " float xib = uv1.x - (xl * %d.0);\n", blkW);
WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH);
WRITE(p, " float yb = yl * %d.0;\n", blkH);
WRITE(p, " float yoff = uv1.y - yb;\n");
WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n");
WRITE(p, " float xel = floor(xp / %d.0);\n", blkW);
WRITE(p, " float xb = floor(xel / %d.0);\n", blkH);
WRITE(p, " float xoff = xel - (xb * %d.0);\n", blkH);
WRITE(p, " int yl = uv1.y / %d;\n", blkH);
WRITE(p, " int yb = yl * %d;\n", blkH);
WRITE(p, " int yoff = uv1.y - yb;\n");
WRITE(p, " int xp = uv1.x + yoff * " I_COLORS".z;\n");
WRITE(p, " int xel = xp / %d;\n", samples == 1 ? factor : blkW);
WRITE(p, " int xb = xel / %d;\n", blkH);
WRITE(p, " int xoff = xel - xb * %d;\n", blkH);
WRITE(p, " int xl = uv1.x * %d / %d;\n", factor, blkW);
WRITE(p, " int xib = uv1.x * %d - xl * %d;\n", factor, blkW);
WRITE(p, " int halfxb = xb / %d;\n", factor);
WRITE(p, " sampleUv.x = xib + (xb * %d.0);\n", blkW);
WRITE(p, " sampleUv.x = xib + halfxb * %d;\n", blkW);
WRITE(p, " sampleUv.y = yb + xoff;\n");
WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n");
if (ApiType == API_OPENGL)
WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n");
WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n");
if (ApiType != API_OPENGL)
{
WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n"); // still need to determine the reason for this
WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n");
}
}
// block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset
void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType)
void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, API_TYPE ApiType)
{
// [0] left, top, right, bottom of source rectangle within source texture
// [1] width and height of destination texture in pixels
// Two were merged for GLSL
WRITE(p, "uniform float4 " I_COLORS"[2] %s;\n", WriteRegister(ApiType, "c", C_COLORS));
int blkW = TexDecoder_GetBlockWidthInTexels(format);
int blkH = TexDecoder_GetBlockHeightInTexels(format);
// 32 bit textures (RGBA8 and Z24) are store in 2 cache line increments
if (ApiType == API_OPENGL)
{
WRITE(p, "#define samp0 samp9\n");
WRITE(p, "uniform sampler2DRect samp0;\n");
WRITE(p, " out float4 ocol0;\n");
WRITE(p, " VARYIN float2 uv0;\n");
WRITE(p, "void main()\n");
}
else
{
WRITE(p,"sampler samp0 : register(s0);\n");
WRITE(p, "Texture2D Tex0 : register(t0);\n");
WRITE(p,"void main(\n");
WRITE(p," out float4 ocol0 : SV_Target,\n");
WRITE(p," in float2 uv0 : TEXCOORD0)\n");
}
WRITE(p, "{\n"
" float2 sampleUv;\n"
" float2 uv1 = floor(uv0);\n");
WRITE(p, " float yl = floor(uv1.y / %d.0);\n", blkH);
WRITE(p, " float yb = yl * %d.0;\n", blkH);
WRITE(p, " float yoff = uv1.y - yb;\n");
WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n");
WRITE(p, " float xel = floor(xp / 2.0);\n");
WRITE(p, " float xb = floor(xel / %d.0);\n", blkH);
WRITE(p, " float xoff = xel - (xb * %d.0);\n", blkH);
WRITE(p, " float x2 = uv1.x * 2.0;\n");
WRITE(p, " float xl = floor(x2 / %d.0);\n", blkW);
WRITE(p, " float xib = x2 - (xl * %d.0);\n", blkW);
WRITE(p, " float halfxb = floor(xb / 2.0);\n");
WRITE(p, " sampleUv.x = xib + (halfxb * %d.0);\n", blkW);
WRITE(p, " sampleUv.y = yb + xoff;\n");
WRITE(p, " sampleUv = sampleUv * " I_COLORS"[0].xy;\n");
if (ApiType == API_OPENGL)
WRITE(p," sampleUv.y = " I_COLORS"[1].y - sampleUv.y;\n");
WRITE(p, " sampleUv = sampleUv + " I_COLORS"[1].zw;\n");
if (ApiType != API_OPENGL)
{
WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n");// still to determine the reason for this
WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n");
}
}
void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYPE ApiType)
{
const char* texSampleOpName;
if (ApiType == API_D3D)
texSampleOpName = "tex0.Sample";
else // OGL
texSampleOpName = "texture2DRect";
// the increment of sampleUv.x is delayed, so we perform it here. see WriteIncrementSampleX.
const char* texSampleIncrementUnit;
if (ApiType == API_D3D)
texSampleIncrementUnit = I_COLORS"[0].x / " I_COLORS"[0].z";
else // OGL
texSampleIncrementUnit = I_COLORS"[0].x";
WRITE(p, " %s = %s(samp0, sampleUv + float2(%d.0 * (%s), 0.0)).%s;\n",
dest, texSampleOpName, s_incrementSampleXCount, texSampleIncrementUnit, colorComp);
WRITE(p, // sampleUv is the sample position in (int)gx_coords
"uv0 = float2(sampleUv + int2(%d, 0)" // pixel offset (if more than one pixel is samped)
" + " I_COLORS".xy);\n" // move to copyed rect
"uv0 += float2(0.5, 0.5);\n" // move to center of pixel
"uv0 *= float(" I_COLORS".w);\n" // scale by two if needed (this will move to pixels border to filter linear)
"uv0 /= float2(%d, %d);\n" // normlize to [0:1]
"uv0.y = 1.0-uv0.y;\n" // ogl foo (disable this line for d3d)
"%s = texture(samp0, uv0).%s;\n",
xoffset, EFB_WIDTH, EFB_HEIGHT, dest, colorComp
);
}
void WriteColorToIntensity(char*& p, const char* src, const char* dest)
@ -226,25 +134,6 @@ void WriteColorToIntensity(char*& p, const char* src, const char* dest)
// don't add IntensityConst.a yet, because doing it later is faster and uses less instructions, due to vectorization
}
void WriteIncrementSampleX(char*& p,API_TYPE ApiType)
{
// the shader compiler apparently isn't smart or aggressive enough to recognize that:
// foo1 = lookup(x)
// x = x + increment;
// foo2 = lookup(x)
// x = x + increment;
// foo3 = lookup(x)
// can be replaced with this:
// foo1 = lookup(x + 0.0 * increment)
// foo2 = lookup(x + 1.0 * increment)
// foo3 = lookup(x + 2.0 * increment)
// which looks like the same operations but uses considerably fewer ALU instruction slots.
// thus, instead of using the former method, we only increment a counter internally here,
// and we wait until WriteSampleColor to write out the constant multiplier
// to achieve the increment as in the latter case.
s_incrementSampleXCount++;
}
void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest)
{
WRITE(p, " %s = floor(%s * 255.0 / exp2(8.0 - %d.0));\n", dest, src, depth);
@ -254,7 +143,6 @@ void WriteEncoderEnd(char* p, API_TYPE ApiType)
{
WRITE(p, "}\n");
IntensityConstantAdded = false;
s_incrementSampleXCount = 0;
}
void WriteI8Encoder(char* p, API_TYPE ApiType)
@ -262,19 +150,16 @@ void WriteI8Encoder(char* p, API_TYPE ApiType)
WriteSwizzler(p, GX_TF_I8, ApiType);
WRITE(p, " float3 texSample;\n");
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 0, ApiType);
WriteColorToIntensity(p, "texSample", "ocol0.b");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 1, ApiType);
WriteColorToIntensity(p, "texSample", "ocol0.g");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 2, ApiType);
WriteColorToIntensity(p, "texSample", "ocol0.r");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 3, ApiType);
WriteColorToIntensity(p, "texSample", "ocol0.a");
WRITE(p, " ocol0.rgba += IntensityConst.aaaa;\n"); // see WriteColorToIntensity
@ -289,35 +174,28 @@ void WriteI4Encoder(char* p, API_TYPE ApiType)
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 0, ApiType);
WriteColorToIntensity(p, "texSample", "color0.b");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 1, ApiType);
WriteColorToIntensity(p, "texSample", "color1.b");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 2, ApiType);
WriteColorToIntensity(p, "texSample", "color0.g");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 3, ApiType);
WriteColorToIntensity(p, "texSample", "color1.g");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 4, ApiType);
WriteColorToIntensity(p, "texSample", "color0.r");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 5, ApiType);
WriteColorToIntensity(p, "texSample", "color1.r");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 6, ApiType);
WriteColorToIntensity(p, "texSample", "color0.a");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "texSample", ApiType);
WriteSampleColor(p, "rgb", "texSample", 7, ApiType);
WriteColorToIntensity(p, "texSample", "color1.a");
WRITE(p, " color0.rgba += IntensityConst.aaaa;\n");
@ -335,12 +213,11 @@ void WriteIA8Encoder(char* p,API_TYPE ApiType)
WriteSwizzler(p, GX_TF_IA8, ApiType);
WRITE(p, " float4 texSample;\n");
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType);
WRITE(p, " ocol0.b = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.g");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType);
WRITE(p, " ocol0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.a");
@ -356,22 +233,19 @@ void WriteIA4Encoder(char* p,API_TYPE ApiType)
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType);
WRITE(p, " color0.b = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.b");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType);
WRITE(p, " color0.g = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.g");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 2, ApiType);
WRITE(p, " color0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.r");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 3, ApiType);
WRITE(p, " color0.a = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.a");
@ -388,9 +262,8 @@ void WriteRGB565Encoder(char* p,API_TYPE ApiType)
{
WriteSwizzler(p, GX_TF_RGB565, ApiType);
WriteSampleColor(p, "rgb", "float3 texSample0", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgb", "float3 texSample1", ApiType);
WriteSampleColor(p, "rgb", "float3 texSample0", 0, ApiType);
WriteSampleColor(p, "rgb", "float3 texSample1", 1, ApiType);
WRITE(p, " float2 texRs = float2(texSample0.r, texSample1.r);\n");
WRITE(p, " float2 texGs = float2(texSample0.g, texSample1.g);\n");
WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n");
@ -417,7 +290,7 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType)
WRITE(p, " float gUpper;\n");
WRITE(p, " float gLower;\n");
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType);
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
WRITE(p, "if(texSample.a > 0.878f) {\n");
@ -444,9 +317,8 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType)
WRITE(p, "}\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType);
WRITE(p, "if(texSample.a > 0.878f) {\n");
@ -483,15 +355,13 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType)
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType);
WriteToBitDepth(p, 3, "texSample.a", "color0.b");
WriteToBitDepth(p, 4, "texSample.r", "color1.b");
WriteToBitDepth(p, 4, "texSample.g", "color0.g");
WriteToBitDepth(p, 4, "texSample.b", "color1.g");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType);
WriteToBitDepth(p, 3, "texSample.a", "color0.r");
WriteToBitDepth(p, 4, "texSample.r", "color1.r");
WriteToBitDepth(p, 4, "texSample.g", "color0.a");
@ -503,7 +373,7 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType)
void WriteRGBA8Encoder(char* p,API_TYPE ApiType)
{
Write32BitSwizzler(p, GX_TF_RGBA8, ApiType);
WriteSwizzler(p, GX_TF_RGBA8, ApiType);
WRITE(p, " float cl1 = xb - (halfxb * 2.0);\n");
WRITE(p, " float cl0 = 1.0 - cl1;\n");
@ -512,15 +382,13 @@ void WriteRGBA8Encoder(char* p,API_TYPE ApiType)
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 0, ApiType);
WRITE(p, " color0.b = texSample.a;\n");
WRITE(p, " color0.g = texSample.r;\n");
WRITE(p, " color1.b = texSample.g;\n");
WRITE(p, " color1.g = texSample.b;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "rgba", "texSample", ApiType);
WriteSampleColor(p, "rgba", "texSample", 1, ApiType);
WRITE(p, " color0.r = texSample.a;\n");
WRITE(p, " color0.a = texSample.r;\n");
WRITE(p, " color1.r = texSample.g;\n");
@ -537,28 +405,14 @@ void WriteC4Encoder(char* p, const char* comp,API_TYPE ApiType)
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, comp, "color0.b", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color1.b", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color0.g", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color1.g", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color0.r", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color1.r", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color0.a", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "color1.a", ApiType);
WriteSampleColor(p, comp, "color0.b", 0, ApiType);
WriteSampleColor(p, comp, "color1.b", 1, ApiType);
WriteSampleColor(p, comp, "color0.g", 2, ApiType);
WriteSampleColor(p, comp, "color1.g", 3, ApiType);
WriteSampleColor(p, comp, "color0.r", 4, ApiType);
WriteSampleColor(p, comp, "color1.r", 5, ApiType);
WriteSampleColor(p, comp, "color0.a", 6, ApiType);
WriteSampleColor(p, comp, "color1.a", 7, ApiType);
WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1");
@ -571,16 +425,10 @@ void WriteC8Encoder(char* p, const char* comp,API_TYPE ApiType)
{
WriteSwizzler(p, GX_CTF_R8, ApiType);
WriteSampleColor(p, comp, "ocol0.b", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "ocol0.g", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "ocol0.r", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "ocol0.a", ApiType);
WriteSampleColor(p, comp, "ocol0.b", 0, ApiType);
WriteSampleColor(p, comp, "ocol0.g", 1, ApiType);
WriteSampleColor(p, comp, "ocol0.r", 2, ApiType);
WriteSampleColor(p, comp, "ocol0.a", 3, ApiType);
WriteEncoderEnd(p, ApiType);
}
@ -592,22 +440,19 @@ void WriteCC4Encoder(char* p, const char* comp,API_TYPE ApiType)
WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n");
WriteSampleColor(p, comp, "texSample", ApiType);
WriteSampleColor(p, comp, "texSample", 0, ApiType);
WRITE(p, " color0.b = texSample.x;\n");
WRITE(p, " color1.b = texSample.y;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "texSample", ApiType);
WriteSampleColor(p, comp, "texSample", 1, ApiType);
WRITE(p, " color0.g = texSample.x;\n");
WRITE(p, " color1.g = texSample.y;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "texSample", ApiType);
WriteSampleColor(p, comp, "texSample", 2, ApiType);
WRITE(p, " color0.r = texSample.x;\n");
WRITE(p, " color1.r = texSample.y;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "texSample", ApiType);
WriteSampleColor(p, comp, "texSample", 3, ApiType);
WRITE(p, " color0.a = texSample.x;\n");
WRITE(p, " color1.a = texSample.y;\n");
@ -622,10 +467,8 @@ void WriteCC8Encoder(char* p, const char* comp, API_TYPE ApiType)
{
WriteSwizzler(p, GX_CTF_RA8, ApiType);
WriteSampleColor(p, comp, "ocol0.bg", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, comp, "ocol0.ra", ApiType);
WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType);
WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType);
WriteEncoderEnd(p, ApiType);
}
@ -636,19 +479,16 @@ void WriteZ8Encoder(char* p, const char* multiplier,API_TYPE ApiType)
WRITE(p, " float depth;\n");
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 0, ApiType);
WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 1, ApiType);
WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 2, ApiType);
WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 3, ApiType);
WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier);
WriteEncoderEnd(p, ApiType);
@ -663,7 +503,7 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType)
// byte order is reversed
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 0, ApiType);
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -673,9 +513,7 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType)
WRITE(p, " ocol0.b = expanded.g / 255.0;\n");
WRITE(p, " ocol0.g = expanded.r / 255.0;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 1, ApiType);
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -697,7 +535,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType)
// byte order is reversed
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 0, ApiType);
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -709,9 +547,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType)
WRITE(p, " ocol0.b = expanded.b / 255.0;\n");
WRITE(p, " ocol0.g = expanded.g / 255.0;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WriteSampleColor(p, "b", "depth", 1, ApiType);
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -728,7 +564,7 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType)
void WriteZ24Encoder(char* p, API_TYPE ApiType)
{
Write32BitSwizzler(p, GX_TF_Z24X8, ApiType);
WriteSwizzler(p, GX_TF_Z24X8, ApiType);
WRITE(p, " float cl = xb - (halfxb * 2.0);\n");
@ -737,9 +573,8 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType)
WRITE(p, " float3 expanded0;\n");
WRITE(p, " float3 expanded1;\n");
WriteSampleColor(p, "b", "depth0", ApiType);
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth1", ApiType);
WriteSampleColor(p, "b", "depth0", 0, ApiType);
WriteSampleColor(p, "b", "depth1", 1, ApiType);
for (int i = 0; i < 2; i++)
{