zzogl-pg:

* properry separate both GLSL implementation
* glsl4: Use a define for logz instead of extra math computation. Much more easier to understand


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5364 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2012-08-08 17:44:03 +00:00
parent 0d9d10bc26
commit 7ff8abe376
7 changed files with 941 additions and 957 deletions

View File

@ -47,7 +47,7 @@ endif(CMAKE_BUILD_TYPE STREQUAL Release)
# Select the shader API
if(GLSL_API)
add_definitions(-DGLSL_API -DGLSL4_API -DOGL4_LOG)
add_definitions(-DGLSL4_API -DOGL4_LOG)
#add_definitions(-DGLSL_API)
else(GLSL_API)
add_definitions(-DNVIDIA_CG_API)

View File

@ -226,7 +226,7 @@ void CDepthTarget::Update(int context, CRenderTarget* prndr)
vdepth.z = vdepth.w = 0;
}
#ifdef GLSL_API
#if defined(GLSL_API) || defined(GLSL4_API)
assert(ppsBitBltDepth.sBitBltZ != -1);
#else
assert(ppsBitBltDepth.sBitBltZ != 0);

View File

@ -475,10 +475,10 @@ bool ZZCreate(int _width, int _height)
g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH;
// FIXME: not clean maybe re integrate the function in shader files --greg
#ifndef GLSL_API
if (!CreateOpenShadersFile()) return false;
#else
#if defined(GLSL_API) || defined(GLSL4_API)
if (!ZZshCreateOpenShadersFile()) return false;
#else
if (!CreateOpenShadersFile()) return false;
#endif
GL_REPORT_ERROR();

View File

@ -18,7 +18,7 @@
*/
// By default enable nvidia cg api
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API)
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API) && !defined(GLSL4_API)
#define NVIDIA_CG_API
#endif

View File

@ -35,7 +35,7 @@
#include "GS.h"
// By default enable nvidia cg api
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API)
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API) && !defined(GLSL4_API)
#define NVIDIA_CG_API
#endif
// --------------------------- API abstraction level --------------------------------
@ -55,7 +55,6 @@
#define sZero 0 // Zero program
#define SAFE_RELEASE_PROG(x) { if( (x) != NULL ) { cgDestroyProgram(x); x = NULL; } }
inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
#endif // end NVIDIA cg-toolkit API
@ -68,7 +67,10 @@ inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
// Set it to 0 to diable context usage, 1 -- to enable. FFX-1 have a strange issue with ClampExt.
#define NOCONTEXT 0
#ifdef GLSL_API
#if defined(GLSL_API)
#define MAX_ACTIVE_UNIFORMS 600
#define MAX_ACTIVE_SHADERS 400
enum ZZshPARAMTYPE {
ZZ_UNDEFINED,
@ -89,6 +91,13 @@ typedef struct {
bool Constant; // Uniform could be constants, does not change at program flow
bool Settled; // Check if Uniform value was set.
} ZZshParamInfo;
const ZZshParamInfo qZero = {ShName:"", type:ZZ_UNDEFINED, fvalue:{0}, sampler: -1, texid: 0, Constant: false, Settled: false};
#define SAFE_RELEASE_PROG(x) { /*don't know what to do*/ }
#endif
#if defined(GLSL_API) || defined(GLSL4_API)
typedef struct {
void* link;
@ -103,21 +112,13 @@ typedef struct {
#define ZZshError int
#define ZZshIndex GLuint
const ZZshParamInfo qZero = {ShName:"", type:ZZ_UNDEFINED, fvalue:{0}, sampler: -1, texid: 0, Constant: false, Settled: false};
#define pZero 0
const ZZshShaderLink sZero = {link: NULL, isFragment: false};
inline bool ZZshActiveParameter(ZZshParameter param) {return (param > -1); }
#ifndef GLSL4_API
#define SAFE_RELEASE_PROG(x) { /*don't know what to do*/ }
#endif
// ---------------------------
#endif
extern float4 g_vdepth;
extern float4 vlogz;
@ -160,15 +161,13 @@ struct ConstantUniform {
float g_fMult[4];
// VS
float g_fZ[4];
float g_fZMin[4];
float g_fZNorm[4];
// PS
float g_fExactColor[4];
};
float linear[8*4];
float linear[6*4];
};
void SettleFloat(uint indice, const float* v) {
assert(indice + 3 < 8*4);
assert(indice + 3 < 6*4);
linear[indice+0] = v[0];
linear[indice+1] = v[1];
linear[indice+2] = v[2];
@ -223,8 +222,6 @@ struct VertexUniform {
//const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC, ZZ_SH_NONE};
// We have "compatible" shaders, as RegularFogVS and RegularFogPS. if don't need to wory about incompatible shaders
// It used only in GLSL mode.
@ -235,9 +232,6 @@ extern int g_nPixelShaderVer;
extern ZZshShaderLink pvs[16], g_vsprog, g_psprog;
extern ZZshParameter g_vparamPosXY[2], g_fparamFogColor;
#define MAX_ACTIVE_UNIFORMS 600
#define MAX_ACTIVE_SHADERS 400
#ifndef GLSL4_API
struct FRAGMENTSHADER
{
@ -511,8 +505,6 @@ struct COMMONSHADER
g_fc0 = (ZZshParameter)offsetof(struct ConstantUniform, g_fc0) /4;
g_fMult = (ZZshParameter)offsetof(struct ConstantUniform, g_fMult) /4;
g_fZ = (ZZshParameter)offsetof(struct ConstantUniform, g_fZ) /4;
g_fZMin = (ZZshParameter)offsetof(struct ConstantUniform, g_fZMin) /4;
g_fZNorm = (ZZshParameter)offsetof(struct ConstantUniform, g_fZNorm) /4;
g_fExactColor = (ZZshParameter)offsetof(struct ConstantUniform, g_fExactColor) /4;
// Setup the constant buffer
@ -520,18 +512,11 @@ struct COMMONSHADER
// Set Z-test, log or no log;
if (conf.settings().no_logz) {
g_vdepth = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
vlogz = float4( 1.0f, 0.0f, 0.0f, 0.0f);
}
else {
g_vdepth = float4( 256.0f*65536.0f, 65536.0f, 256.0f, 65536.0f*65536.0f);
vlogz = float4( 0.0f, 1.0f, 0.0f, 0.0f);
}
uniform_buffer_constant.SettleFloat(g_fZ, g_vdepth );
uniform_buffer_constant.SettleFloat(g_fZMin, vlogz );
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
float4 vnorm = float4(g_filog32, 0, 0,0);
uniform_buffer_constant.SettleFloat(g_fZNorm, vnorm);
uniform_buffer_constant.SettleFloat(g_fBilinear, float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f ) );
uniform_buffer_constant.SettleFloat(g_fZBias, float4(1.0f/256.0f, 1.0004f, 1, 0.5f) );
@ -542,7 +527,7 @@ struct COMMONSHADER
}
ZZshParameter g_fparamFogColor, g_vparamPosXY;
ZZshParameter g_fBilinear, g_fZBias, g_fc0, g_fMult, g_fZ, g_fZMin, g_fZNorm, g_fExactColor;
ZZshParameter g_fBilinear, g_fZBias, g_fc0, g_fMult, g_fZ, g_fExactColor;
uint context;
GlobalUniform uniform_buffer[ZZSH_CTX_ALL];

View File

@ -48,7 +48,6 @@
#include "Util.h"
#include "ZZoglShaders.h"
#include "zpipe.h"
#include <math.h>
#include <map>
#include <fcntl.h> // this for open(). Maybe linux-specific
#include <sys/mman.h> // and this for mmap
@ -75,11 +74,7 @@
// #define ENABLE_MARKER // Fire some marker for opengl Debugger (apitrace, gdebugger)
//------------------ Constants
// Used in a logarithmic Z-test, as (1-o(1))/log(MAX_U32).
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" };
const static char* g_pShaders[4] = { "full", "reduced", "accurate", "accurate-reduced" };
const static char* g_pPsTexWrap[] = { "#define REPEAT 1\n", "#define CLAMP 1\n", "#define REGION_REPEAT 1\n", "\n" };
const int GLSL_VERSION = 330;
@ -146,16 +141,6 @@ bool ZZshCheckProfilesSupport() {
return true;
}
// Error handler. Setup in ZZogl_Create once.
void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata)
{/*
ZZLog::Error_Log("%s->%s: %s", ShaderCallerName, ShaderHandleName, cgGetErrorString(err));
const char* listing = cgGetLastListing(g_cgcontext);
if (listing != NULL)
ZZLog::Debug_Log(" last listing: %s", listing);
*/
}
bool ZZshStartUsingShaders() {
ZZLog::Error_Log("Creating effects.");
@ -173,7 +158,6 @@ bool ZZshStartUsingShaders() {
memset(&temp, 0, sizeof(temp));
temp.wms = 3; temp.wmt = 3;
g_nPixelShaderVer = 0;//SHADER_ACCURATE;
// test
bool bFailed;
FRAGMENTSHADER* pfrag = ZZshLoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed);
@ -185,7 +169,6 @@ bool ZZshStartUsingShaders() {
ZZLog::Error_Log("Creating extra effects.");
B_G(ZZshLoadExtraEffects(), return false);
ZZLog::Error_Log("Using %s shaders.", g_pShaders[g_nPixelShaderVer]);
return true;
}
@ -469,6 +452,9 @@ std::string BuildGlslMacro(bool writedepth, int texwrap = 3, bool testaem = fals
if (exactcolor) header += "#define EXACT_COLOR 1\n";
header += format("%s", g_pPsTexWrap[texwrap]);
//const char* AddAccurate = (ps & SHADER_ACCURATE)?"#define ACCURATE_DECOMPRESSION 1\n":"";
if (conf.settings().no_logz) {
header += "#define NO_LOGZ 1\n";
}
return header;
}
@ -589,7 +575,7 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
else
texwrap = TEXWRAP_REPEAT_CLAMP;
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, 0, 0);
if( pbFailed != NULL ) *pbFailed = false;

View File

@ -31,6 +31,7 @@
//#define WRITE_DEPTH // set if depth is also written in a MRT
//#define ACCURATE_DECOMPRESSION // set for less capable hardware ATI Radeon 9000 series
//#define EXACT_COLOR // make sure the output color is clamped to 1/255 boundaries (for alpha testing)
//#define NO_LOGZ // disable logz
#define PERSPECTIVE_CORRECT_TEX
@ -58,7 +59,7 @@ struct vertex
{
vec4 color;
TEX_DECL tex;
vec4 z;
vec4 Z;
float fog;
};
@ -137,8 +138,6 @@ layout(std140, binding = 0) uniform constant_buffer
float4 g_fMult;
// Vertex
float4 g_fZ; // transforms d3dcolor z into float z
float4 g_fZMin;
float4 g_fZNorm;
// Pixel
half4 g_fExactColor;
};
@ -567,7 +566,7 @@ half4 ps2FinalColor(half4 col)
#ifdef WRITE_DEPTH
void write_depth_target()
{
FragData1 = PSin.z;
FragData1 = PSin.Z;
}
#else
void write_depth_target() { }
@ -665,7 +664,11 @@ void BitBltDepthPS() {
vec4 data;
data = texture(g_sMemory, ps2memcoord(PSin.tex.xy));
FragData0 = data + g_fZBias.y;
gl_FragDepth = (log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w) * g_fZMin.y + dot(data, g_fBitBltZ) * g_fZMin.x ;
#ifdef NO_LOGZ
gl_FragDepth = dot(data, g_fBitBltZ);
#else
gl_FragDepth = log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w;
#endif
}
void BitBltDepthMRTPS() {
@ -673,7 +676,11 @@ void BitBltDepthMRTPS() {
data = texture(g_sMemory, ps2memcoord(PSin.tex.xy));
FragData0 = data + g_fZBias.y;
FragData1.x = g_fc0.x;
gl_FragDepth = (log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w) * g_fZMin.y + dot(data, g_fBitBltZ) * g_fZMin.x ;
#ifdef NO_LOGZ
gl_FragDepth = dot(data, g_fBitBltZ);
#else
gl_FragDepth = log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w;
#endif
}
// static const float BlurKernel[9] = {
@ -694,7 +701,7 @@ half4 BilinearFloat16(float2 tex0)
}
void CRTCTargInterPS() {
float finter = texture(g_sInterlace, PSin.z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float4 c = BilinearFloat16(PSin.tex.xy);
c.w = ( g_fc0.w*c.w * g_fOneColor.x + g_fOneColor.y ) * finter;
FragData0 = c;
@ -709,7 +716,7 @@ void CRTCTargPS() {
}
void CRTCInterPS() {
float finter = texture(g_sInterlace, PSin.z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float2 filtcoord = trunc(PSin.tex.xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
half4 c = BilinearBitBlt(filtcoord);
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
@ -718,7 +725,7 @@ void CRTCInterPS() {
// simpler
void CRTCInterPS_Nearest() {
float finter = texture(g_sInterlace, PSin.z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
half4 c = texture(g_sMemory, ps2memcoord(PSin.tex.xy));
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
FragData0 = c;
@ -739,7 +746,7 @@ void CRTCPS_Nearest() {
}
void CRTC24InterPS() {
float finter = texture(g_sInterlace, PSin.z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float2 filtcoord = trunc(PSin.tex.xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
half4 c = texture(g_sMemory, ps2memcoord(filtcoord));
@ -832,8 +839,8 @@ void SetTex() {
void SetZ() {
#ifdef WRITE_DEPTH
VSout.z = SecondaryColor * g_fZBias.x + g_fZBias.y;
VSout.z.w = g_fc0.y;
VSout.Z = SecondaryColor * g_fZBias.x + g_fZBias.y;
VSout.Z.w = 1.0f;
#endif
}
@ -849,8 +856,14 @@ void SetPosition() {
// position.z = log(g_fc0.y + dot(g_fZ, SecondaryColor.zyxw)) * g_fZNorm.x
// position.z = log(1 + Z_INT) * 0.999f / (32 * log(2.0)) = log2(1 + Z_INT) * 0.999f / 32
// log2(...) will range from 0 to 32
position.z = (log(g_fc0.y + dot(g_fZ, SecondaryColor.zyxw)) * g_fZNorm.x + g_fZNorm.y) * g_fZMin.y + dot(g_fZ, SecondaryColor.zyxw) * g_fZMin.x ;
position.w = g_fc0.y;
// position.z = (log(g_fc0.y + dot(g_fZ, SecondaryColor.zyxw)) * g_fZNorm.x + g_fZNorm.y) * g_fZMin.y + dot(g_fZ, SecondaryColor.zyxw) * g_fZMin.x ;
#ifdef NO_LOGZ
position.z = dot(g_fZ, SecondaryColor.zyxw);
#else
position.z = log2(1.0f + dot(g_fZ, SecondaryColor.zyxw)) * 0.999f/32.0f;
#endif
position.w = 1.0f;
gl_Position = position;
}
@ -896,7 +909,7 @@ void BitBltVS() {
gl_Position = position;
VSout.tex.xy = TexCoord.xy * g_fBitBltTex.xy + g_fBitBltTex.zw;
VSout.z.xy = position.xy * g_fBitBltTrans.xy + g_fBitBltTrans.zw;
VSout.Z.xy = position.xy * g_fBitBltTrans.xy + g_fBitBltTrans.zw;
}
#endif