diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index a2ddf6e4ce..ba97af6d66 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -107,17 +107,42 @@ void PixelShaderCache::Init() s_ColorMatrixProgram = 0; } - sprintf(pmatrixprog, "!!ARBfp1.0" + sprintf(pmatrixprog, "!!ARBfp1.0\n" "TEMP R0;\n" "TEMP R1;\n" "TEMP R2;\n" - "PARAM K0 = { 65535.0, 255.0,1.0,16777215.0};\n" - "PARAM K1 = { 0.999999940395355224609375, 1.0000000596046483281045155587504,0.0,0.0};\n" + //16777215/16777216*256, 1/255, 256, 0 + "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" + //sample the depth value "TEX R2, fragment.texcoord[0], texture[0], RECT;\n" - "MUL R0, R2.x, K1.x;\n" - "MUL R0, R0.x, K0;\n" - "FRC R0, R0;\n" - "MUL R0, R0, K1.y;\n" + + //scale from [0*16777216..1*16777216] to + //[0*16777215..1*16777215], multiply by 256 + "MUL R0, R2.x, K0.x;\n" // *16777215/16777216*256 + + //It is easy to get bad results due to low precision + //here, for example converting like this: + //MUL R0,R0,{ 65536, 256, 1, 16777216 } + //FRC R0,R0 + //gives {?, 128/255, 254/255, ?} for depth value 254/255 + //on some gpus + + "FLR R0.z,R0;\n" //bits 31..24 + + "SUB R0.xyw,R0,R0.z;\n" //subtract bits 31..24 from rest + "MUL R0.xyw,R0,K0.z;\n" // *256 + "FLR R0.y,R0;\n" //bits 23..16 + + "SUB R0.xw,R0,R0.y;\n" //subtract bits 23..16 from rest + "MUL R0.xw,R0,K0.z;\n" // *256 + "FLR R0.x,R0;\n" //bits 15..8 + + "SUB R0.w,R0,R0.x;\n" //subtract bits 15..8 from rest + "MUL R0.w,R0,K0.z;\n" // *256 + "FLR R0.w,R0;\n" //bits 7..0 + + "MUL R0,R0,K0.y;\n" // /255 + "DP4 R1.x, R0, program.env[%d];\n" "DP4 R1.y, R0, program.env[%d];\n" "DP4 R1.z, R0, program.env[%d];\n"