// Cg Shaders for PS2 GS emulation // divides by z for every pixel, instead of in vertex shader // fixes kh textures #define PERSPECTIVE_CORRECT_TEX //#define TEST_AEM // tests AEM for black pixels //#define REGION_REPEAT // set if texture wrapping mode is region repeat //#define WRITE_DEPTH // set if depth is also written in a MRT //#define ACCURATE_DECOMPRESSION // set for less capable hardware ATI Radeon 9000 series //#define EXACT_COLOR // make sure the output color is clamped to 1/255 boundaries (for alpha testing) #ifdef PERSPECTIVE_CORRECT_TEX #define TEX_XY tex.xy/tex.z #define TEX_DECL float3 #else #define TEX_XY tex.xy #define TEX_DECL float2 #endif #ifdef WRITE_DEPTH #define DOZWRITE(x) x #else #define DOZWRITE(x) #endif #include "ps2hw_ctx0.fx" // used to get the tiled offset into a page given the linear offset texture g_txBlocks; texture g_txBilinearBlocks; texture g_txSrc; texture g_txConv16to32; texture g_txConv32to16; // for region_repeat mode texture g_txBitwiseANDX; texture g_txBitwiseANDY; texture g_txCLUT; sampler g_sSrcFinal : register(s2) = sampler_state { Texture = ; MipFilter = LINEAR; MinFilter = LINEAR; MagFilter = LINEAR; AddressU = Clamp; AddressV = Clamp; }; sampler g_sBlocks : register(s3) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; sampler g_sBilinearBlocks : register(s4) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; sampler g_sConv16to32 : register(s4) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; sampler3D g_sConv32to16 : register(s4) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; sampler g_sBitwiseANDX : register(s5) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; sampler g_sBitwiseANDY : register(s6) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; // used only on rare cases where the render target is PSMT8H sampler g_sCLUT : register(s2) = sampler_state { Texture = ; MipFilter = POINT; MinFilter = POINT; MagFilter = POINT; AddressU = Wrap; AddressV = Wrap; }; // global pixel shader constants float4 g_fInvTexDims : register(c22); // similar to g_fClutOff float3 g_fFogColor : register(c23); // used for rectblitting float4 g_fBitBltZ : register(c24); half4 g_fOneColor : register(c25); // col*.xxxy+.zzzw // vertex shader constants float4 g_fBitBltPos : register(c4); float4 g_fZ : register(c5); // transforms d3dcolor z into float z float2 g_fZNorm : register(c6); float4 g_fBitBltTex : register(c7); // pixel shader consts // .z is used for the addressing fn half4 g_fExactColor : register(c27) = half4(0.5,0.5/256.0f,0,1/255.0f); float3 g_fBilinear : register(c28) = float3(-0.7f, -0.65f, 0.9); float4 g_fZBias : register(c29) = half4(1.0f/256.0f, 1.0004f, 1, 0.5); float4 g_fc0 : register(c30) = float4(0,1, 0.001, 0.5f); // also for vs float4 g_fMult : register(c31) = float4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f); // vertex shader consts float4 g_fBitBltTrans : register(c31) = float4(0.5f, -0.5f, 0.5, 0.5 + 0.4/416.0f); // given a local tex coord, returns the coord in the memory float2 ps2memcoord(float2 realtex) { float4 off; // block off realtex.xy = realtex.xy * g_fTexDims.xy + g_fTexDims.zw; realtex.xy = (realtex.xy - frac(realtex.xy)) * g_fMult.zw; float2 fblock = frac(realtex.xy); off.xy = realtex.xy-fblock.xy; #ifdef ACCURATE_DECOMPRESSION off.zw = tex2D(g_sBlocks, g_fTexBlock.xy*fblock + g_fTexBlock.zw).xw; off.x = dot(off.xyw, g_fTexOffset.xyw); float f = frac(off.x); float fadd = g_fTexOffset.z * off.z; off.w = off.x + fadd; off.x = frac(f + fadd); off.w -= off.x; #else off.z = tex2D(g_sBlocks, g_fTexBlock.xy*fblock + g_fTexBlock.zw).r; // combine the two off.x = dot(off.xyz, g_fTexOffset.xyz)+g_fTexOffset.w; off.x = modf(off.x, off.w); #endif off.y = off.w * g_fPageOffset.y + g_fPageOffset.x; return off.xy; } // find all texcoords for bilinear filtering // assume that orgtex are already on boundaries void ps2memcoord4(float4 orgtex, out float4 off0, out float4 off1) { //float4 off0, off1, off2, off3; float4 realtex; // block off realtex = (orgtex * g_fTexDims.xyxy + g_fTexDims.zwzw);// * g_fMult.zwzw; float4 fblock = frac(realtex.xyzw); float4 ftransblock = g_fTexBlock.xyxy*fblock + g_fTexBlock.zwzw; realtex -= fblock; float4 transvals = g_fTexOffset.x * realtex.xzxz + g_fTexOffset.y * realtex.yyww + g_fTexOffset.w; float4 colors;// = tex2D(g_sBilinearBlocks, ftransblock.xy); // this is faster on ffx ingame colors.x = tex2D(g_sBlocks, ftransblock.xy).r; colors.y = tex2D(g_sBlocks, ftransblock.zy).r; colors.z = tex2D(g_sBlocks, ftransblock.xw).r; colors.w = tex2D(g_sBlocks, ftransblock.zw).r; float4 fr, rem; #ifdef ACCURATE_DECOMPRESSION fr = frac(transvals); float4 fadd = colors * g_fTexOffset.z; rem = transvals + fadd; fr = frac(fr + fadd); rem -= fr; #else transvals += colors * g_fTexOffset.z; fr = modf(transvals, rem); #endif rem = rem * g_fPageOffset.y + g_fPageOffset.x; // combine off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy; off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww; } void ps2memcoord4_fast(float4 orgtex, out float4 off0, out float4 off1) { float4 realtex; realtex = (orgtex * g_fTexDims.xyxy + g_fTexDims.zwzw);// * g_fMult.zwzw; float4 fblock = frac(realtex.xyzw); float2 ftransblock = g_fTexBlock.xy*fblock.xy + g_fTexBlock.zw; realtex -= fblock; float4 transvals = g_fTexOffset.x * realtex.xzxz + g_fTexOffset.y * realtex.yyww + g_fTexOffset.w; float4 colors = tex2D(g_sBilinearBlocks, ftransblock.xy); float4 fr, rem; #ifdef ACCURATE_DECOMPRESSION fr = frac(transvals); float4 fadd = colors * g_fTexOffset.z; rem = transvals + fadd; fr = frac(fr + fadd); rem -= fr; #else transvals += colors * g_fTexOffset.z; fr = modf(transvals, rem); #endif rem = rem * g_fPageOffset.y + g_fPageOffset.x; off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy; off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww; } // Wrapping modes #if defined(REPEAT) float2 ps2addr(float2 coord) { return frac(coord.xy); } #elif defined(CLAMP) float2 ps2addr(float2 coord) { return clamp(coord.xy, g_fClampExts.xy, g_fClampExts.zw); } #elif defined(REGION_REPEAT) // computes the local tex coord along with addressing modes float2 ps2addr(float2 coord) { float2 final = frac(clamp(coord.xy, g_fClampExts.xy, g_fClampExts.zw)); if( TexWrapMode.x > g_fBilinear.z ) // region repeat mode for x (umsk&x)|ufix final.x = tex2D(g_sBitwiseANDX, abs(coord.x)*TexWrapMode.z).x * g_fClampExts.x + g_fClampExts.z; if( TexWrapMode.y > g_fBilinear.z ) // region repeat mode for x (vmsk&x)|vfix final.y = tex2D(g_sBitwiseANDY, abs(coord.y)*TexWrapMode.w).x * g_fClampExts.y + g_fClampExts.w; return final; } #else float2 ps2addr(float2 coord) { return frac(clamp(coord.xy, g_fClampExts.xy, g_fClampExts.zw)); } #endif half4 tex2DPS_32(float2 tex0) { return tex2D(g_sMemory, ps2memcoord(tex0).xy); } // use when texture is not tiled half4 tex2DPS_tex32(float2 tex0) { return tex2D(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw)*g_fZBias.zzzw+g_fPageOffset.w; } // use when texture is not tiled half4 tex2DPS_clut32(float2 tex0) { float index = tex2D(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw).a+g_fPageOffset.w; return tex1D(g_sCLUT, index*g_fExactColor.x+g_fExactColor.y); } // use when texture is not tiled and converting from 32bit to 16bit // don't convert on the block level, only on the column level // so every other 8 pixels, use the upper bits instead of lower half4 tex2DPS_tex32to16(float2 tex0) { bool upper = false; tex0.y += g_fPageOffset.z; float2 ffrac = fmod(tex0, g_fTexOffset.xy); //tex0.xy = g_fc0.ww * (tex0.xy + ffrac); tex0.y += ffrac.y; if( ffrac.x > g_fTexOffset.z ) { tex0.x -= g_fTexOffset.z; upper = true; } if( ffrac.y >= g_fTexOffset.w ) { tex0.y -= g_fTexOffset.y; tex0.x += g_fTexOffset.z; } half4 color = tex2D(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw)*g_fZBias.zzzw+g_fPageOffset.w; float2 uv = upper ? color.xw : color.zy; return tex2D(g_sConv16to32, uv+g_fPageOffset.xy).zyxw; } // used when a 16 bit texture is used an 8h half4 tex2DPS_tex16to8h(float2 tex0) { float4 final; float2 ffrac = fmod(tex0+g_fPageOffset.zw, g_fTexOffset.xy); //tex0.xy = g_fPageOffset.xy * tex0.xy - ffrac * g_fc0.yw; tex0.y += g_fPageOffset.y * ffrac.y; if( ffrac.x > g_fTexOffset.z ) { tex0.x -= g_fTexOffset.z; tex0.y += g_fTexOffset.w; } float4 upper = tex2D(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw); // only need alpha float index = tex3D(g_sConv32to16, upper.zyx-g_fc0.z).y + upper.w*g_fc0.w*g_fc0.w; return tex1D(g_sCLUT, index+g_fExactColor.y); } // used when a 16 bit texture is used a 32bit one half4 tex2DPS_tex16to32(float2 tex0) { float4 final; float2 ffrac = fmod(tex0+g_fPageOffset.zw, g_fTexOffset.xy); //tex0.xy = g_fPageOffset.xy * tex0.xy - ffrac * g_fc0.yw; tex0.y += g_fPageOffset.y * ffrac.y; if( ffrac.x > g_fTexOffset.z ) { tex0.x -= g_fTexOffset.z; tex0.y += g_fTexOffset.w; } float fconst = g_fc0.w*g_fc0.w; float4 lower = tex2D(g_sSrcFinal, g_fTexDims.xy*tex0); float4 upper = tex2D(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw); final.zy = tex3D(g_sConv32to16, lower.zyx).xy + lower.ww*fconst; final.xw = tex3D(g_sConv32to16, upper.zyx).xy + upper.ww*fconst; return final; } //half4 f; //f.w = old.y > (127.2f/255.0f) ? 1 : 0; //old.y -= 0.5f * f.w; //f.xyz = frac(old.yyx*half3(2.002*255.0f/256.0f, 64.025f*255.0f/256.0f, 8.002*255.0f/256.0f)); //f.y += old.x * (0.25f*255.0f/256.0f); //////////////////////////////// // calculates the texture color //////////////////////////////// #define decl_ps2shade(num) \ decl_ps2shade_##num(_32); \ decl_ps2shade_##num(_tex32); \ decl_ps2shade_##num(_clut32); \ decl_ps2shade_##num(_tex32to16); \ decl_ps2shade_##num(_tex16to8h); \ // nearest #define decl_ps2shade_0(bit) \ half4 ps2shade0##bit( TEX_DECL tex) \ { \ return tex2DPS##bit( ps2addr(TEX_XY)); \ } \ // do fast memcoord4 calcs when textures behave well #ifdef REPEAT #define PS2MEMCOORD4 ps2memcoord4 #else #define PS2MEMCOORD4 ps2memcoord4 #endif #define decl_BilinearFilter(bit, addrfn) \ half4 BilinearFilter##bit(float2 tex0) \ { \ float4 off0, off1; \ float4 ftex; \ float2 ffrac; \ ftex.xy = tex0 + g_fBilinear.xy * g_fRealTexDims.zw; \ ffrac = frac(ftex.xy*g_fRealTexDims.xy); \ ftex.xy -= ffrac.xy * g_fRealTexDims.zw; \ \ ftex.zw = ps2addr(ftex.xy + g_fRealTexDims.zw); \ ftex.xy = ps2addr(ftex.xy); \ \ PS2MEMCOORD4(ftex, off0, off1); \ half4 c0 = tex2D(g_sMemory, off0.xy); \ half4 c1 = tex2D(g_sMemory, off0.zw); \ half4 c2 = tex2D(g_sMemory, off1.xy); \ half4 c3 = tex2D(g_sMemory, off1.zw); \ return lerp( lerp(c0, c1, ffrac.x), lerp(c2, c3, ffrac.x), ffrac.y ); \ } \ decl_BilinearFilter(_32, ps2addr); decl_BilinearFilter(_tex32, ps2addr); decl_BilinearFilter(_clut32, ps2addr); decl_BilinearFilter(_tex32to16, ps2addr); decl_BilinearFilter(_tex16to8h, ps2addr); //TODO! For mip maps, only apply when LOD >= 0 // lcm == 0, LOD = log(1/Q)*L + K, lcm == 1, LOD = K // bilinear #define decl_ps2shade_1(bit) \ half4 ps2shade1##bit(TEX_DECL tex) \ { \ return BilinearFilter##bit(TEX_XY); \ } \ // nearest, mip nearest #define decl_ps2shade_2(bit) \ half4 ps2shade2##bit(TEX_DECL tex) \ { \ return tex2DPS##bit( ps2addr(TEX_XY)); \ } \ // nearest, mip linear #define decl_ps2shade_3(bit) \ half4 ps2shade3##bit(TEX_DECL tex) \ { \ return tex2DPS##bit(ps2addr(TEX_XY)); \ } \ // linear, mip nearest #define decl_ps2shade_4(bit) \ half4 ps2shade4##bit(TEX_DECL tex) \ { \ return BilinearFilter##bit(TEX_XY); \ } \ // linear, mip linear #define decl_ps2shade_5(bit) \ half4 ps2shade5##bit(TEX_DECL tex) \ { \ return BilinearFilter##bit(TEX_XY); \ } \ decl_ps2shade(0); decl_ps2shade(1); decl_ps2shade(2); decl_ps2shade(3); decl_ps2shade(4); decl_ps2shade(5); half4 ps2CalcShade(half4 texcol, half4 color) { #ifdef TEST_AEM if( dot(texcol.xyzw, g_fTestBlack.xyzw) <= g_fc0.z ) texcol.w = g_fc0.x; else #endif texcol.w = texcol.w * fTexAlpha.y + fTexAlpha.x; texcol = texcol * (fTexAlpha2.zzzw * color + fTexAlpha2.xxxy) + fTexAlpha.zzzw * color.wwww; return texcol; } // final ops on the color #ifdef EXACT_COLOR half4 ps2FinalColor(half4 col) { // g_fOneColor has to scale by 255 half4 temp = col * g_fOneColor.xxxy + g_fOneColor.zzzw; temp.w = floor(temp.w)*g_fExactColor.w; return temp; } #else half4 ps2FinalColor(half4 col) { return col * g_fOneColor.xxxy + g_fOneColor.zzzw; } #endif //////////////// // Techniques // //////////////// // technique to copy a rectangle from source to target struct VSOUT_ { float4 pos : POSITION; half4 color : COLOR0; DOZWRITE(float4 z : TEXCOORD0;) }; struct VSOUT_T { float4 pos : POSITION; half4 color : COLOR0; TEX_DECL tex : TEXCOORD0; DOZWRITE(float4 z : TEXCOORD1;) }; struct VSOUT_F { float4 pos : POSITION; half4 color : COLOR0; float fog : TEXCOORD0; DOZWRITE(float4 z : TEXCOORD1;) }; struct VSOUT_TF { float4 pos : POSITION; half4 color : COLOR0; TEX_DECL tex : TEXCOORD0; half fog : TEXCOORD1; DOZWRITE(float4 z : TEXCOORD2;) }; // just smooth shadering VSOUT_ RegularVS(float4 pos : POSITION, half4 color : COLOR0, float4 z : TEXCOORD0 ) { VSOUT_ o; o.pos.xy = pos.xy*g_fPosXY.xy+g_fPosXY.zw; o.pos.z = log(g_fc0.y+dot(g_fZ, z.zyxw))*g_fZNorm.x+g_fZNorm.y; o.pos.w = g_fc0.y; // 1 o.color = color; DOZWRITE(o.z = z*g_fZBias.x+g_fZBias.y; o.z.w = g_fc0.y;) return o; } void RegularPS(VSOUT_ i, out half4 c0 : COLOR0 #ifdef WRITE_DEPTH , out float4 c1 : COLOR1 #endif ) { // whenever outputting depth, make sure to mult by 255/256 and 1 c0 = ps2FinalColor(i.color); DOZWRITE(c1 = i.z;) } technique Regular { pass p0 { VertexShader = compile vs_3_0 RegularVS(); PixelShader = compile ps_3_0 RegularPS(); } }; // diffuse texture mapping VSOUT_T TextureVS(float4 pos : POSITION, half4 color : COLOR0, float3 tex0 : TEXCOORD1, float4 z : TEXCOORD0) { VSOUT_T o; o.pos.xy = pos.xy*g_fPosXY.xy+g_fPosXY.zw; o.pos.z = log(g_fc0.y+dot(g_fZ, z.zyxw))*g_fZNorm.x + g_fZNorm.y; o.pos.w = g_fc0.y; o.color = color; DOZWRITE(o.z = z*g_fZBias.x+g_fZBias.y; o.z.w = g_fc0.y;) #ifdef PERSPECTIVE_CORRECT_TEX o.tex = tex0; #else o.tex = tex0.xy/tex0.z; #endif return o; } #ifdef WRITE_DEPTH #define DECL_TEXPS(num, bit) \ void Texture##num##bit##PS(VSOUT_T i, out half4 c0 : COLOR0, out float4 c1 : COLOR1) \ { \ c0 = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(i.tex), i.color)); \ c1 = i.z; \ } \ #else #define DECL_TEXPS(num, bit) \ void Texture##num##bit##PS(VSOUT_T i, out half4 c0 : COLOR0) \ { \ c0 = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(i.tex), i.color)); \ } \ #endif #define DECL_TEXPS_(num) \ DECL_TEXPS(num, _32); \ DECL_TEXPS(num, _tex32); \ DECL_TEXPS(num, _clut32); \ DECL_TEXPS(num, _tex32to16); \ DECL_TEXPS(num, _tex16to8h); \ DECL_TEXPS_(0); DECL_TEXPS_(1); DECL_TEXPS_(2); DECL_TEXPS_(3); DECL_TEXPS_(4); DECL_TEXPS_(5); // special functions for limitations on ps20 technique Texture { pass p0 { VertexShader = compile vs_2_0 TextureVS(); PixelShader = compile ps_2_a Texture0_32PS(); } } VSOUT_F RegularFogVS(float4 pos : POSITION, half4 color : COLOR0, float4 z : TEXCOORD0) { VSOUT_F o; o.pos.xy = pos.xy*g_fPosXY.xy+g_fPosXY.zw; o.pos.z = log(g_fc0.y+dot(g_fZ, z.zyxw))*g_fZNorm.x+g_fZNorm.y; o.pos.w = g_fc0.y; DOZWRITE(o.z = z*g_fZBias.x+g_fZBias.y; o.z.w = g_fc0.y;) o.color = color; o.fog = pos.z; return o; } void RegularFogPS(VSOUT_F i, out half4 c0 : COLOR0 #ifdef WRITE_DEPTH , out float4 c1 : COLOR1 #endif ) { half4 c; c.xyz = lerp(g_fFogColor.xyz, i.color.xyz, i.fog); \ c.w = i.color.w; c0 = ps2FinalColor(c); DOZWRITE(c1 = i.z;) } technique RegularFog { pass p0 { VertexShader = compile vs_2_0 RegularFogVS(); PixelShader = compile ps_2_a RegularFogPS(); } }; VSOUT_TF TextureFogVS(float4 pos : POSITION, half4 color : COLOR0, float3 tex0 : TEXCOORD1, float4 z : TEXCOORD0) { VSOUT_TF o; o.pos.xy = pos.xy*g_fPosXY.xy+g_fPosXY.zw; o.pos.z = log(g_fc0.y+dot(g_fZ, z.zyxw))*g_fZNorm.x+g_fZNorm.y; o.pos.w = g_fc0.y; o.color = color; o.fog = pos.z; DOZWRITE(o.z = z*g_fZBias.x+g_fZBias.y; o.z.w = g_fc0.y;) #ifdef PERSPECTIVE_CORRECT_TEX o.tex = tex0; #else o.tex = tex0.xy/tex0.z; #endif return o; } #ifdef WRITE_DEPTH #define DECL_TEXFOGPS(num, bit) \ void TextureFog##num##bit##PS(VSOUT_TF i, out half4 c0 : COLOR0, out float4 c1 : COLOR1 ) \ { \ half4 c = ps2CalcShade(ps2shade##num##bit(i.tex), i.color); \ c.xyz = lerp(g_fFogColor.xyz, c.xyz, i.fog); \ c0 = ps2FinalColor(c); \ c1 = i.z; \ } \ #else #define DECL_TEXFOGPS(num, bit) \ void TextureFog##num##bit##PS(VSOUT_TF i, out half4 c0 : COLOR0) \ { \ half4 c = ps2CalcShade(ps2shade##num##bit(i.tex), i.color); \ c.xyz = lerp(g_fFogColor.xyz, c.xyz, i.fog); \ c0 = ps2FinalColor(c); \ } \ #endif #define DECL_TEXFOGPS_(num) \ DECL_TEXFOGPS(num, _32); \ DECL_TEXFOGPS(num, _tex32); \ DECL_TEXFOGPS(num, _clut32); \ DECL_TEXFOGPS(num, _tex32to16); \ DECL_TEXFOGPS(num, _tex16to8h); \ DECL_TEXFOGPS_(0); DECL_TEXFOGPS_(1); DECL_TEXFOGPS_(2); DECL_TEXFOGPS_(3); DECL_TEXFOGPS_(4); DECL_TEXFOGPS_(5); technique TextureFog { pass p0 { VertexShader = compile vs_2_0 TextureFogVS(); PixelShader = compile ps_2_a TextureFog0_32PS(); } }; //------------------------------------------------------- // Techniques not related to the main primitive commands half4 BilinearBitBlt(float2 tex0) { float4 ftex; float2 ffrac; ffrac.xy = frac(tex0*g_fRealTexDims.xy); ftex.xy = tex0 - ffrac.xy * g_fRealTexDims.zw; ftex.zw = ftex.xy + g_fRealTexDims.zw; float4 off0, off1; ps2memcoord4_fast(ftex, off0, off1); half4 c0 = tex2D(g_sMemory, off0.xy); half4 c1 = tex2D(g_sMemory, off0.zw); half4 c2 = tex2D(g_sMemory, off1.xy); half4 c3 = tex2D(g_sMemory, off1.zw); return lerp( lerp(c0, c1, ffrac.x), lerp(c2, c3, ffrac.x), ffrac.y ); } void BitBltVS(in float4 pos : POSITION, in half4 tex0 : TEXCOORD0, in float3 tex : TEXCOORD1, out float4 opos : POSITION, out float2 otex0 : TEXCOORD0, out float2 ointerpos : TEXCOORD1) { opos.xy = pos.xy * g_fBitBltPos.xy + g_fBitBltPos.zw; ointerpos = opos.xy * g_fBitBltTrans.xy + g_fBitBltTrans.zw; opos.zw = g_fc0.xy; otex0 = tex * g_fBitBltTex.xy + g_fBitBltTex.zw; } half4 BitBltPS(in float2 tex0 : TEXCOORD0) : COLOR { return tex2D(g_sMemory, ps2memcoord(tex0).xy)*g_fOneColor.xxxy; } // used when AA half4 BitBltAAPS(in float2 tex0 : TEXCOORD0) : COLOR { return BilinearBitBlt(tex0)*g_fOneColor.xxxy; } void BitBltDepthPS(in float2 tex0 : TEXCOORD0, out float4 c : COLOR0, out float depth : DEPTH) { c = tex2D(g_sMemory, ps2memcoord(tex0)); depth = log(g_fc0.y+dot(c, g_fBitBltZ))*g_fOneColor.w; c += g_fZBias.y; } void BitBltDepthMRTPS(in float2 tex0 : TEXCOORD0, out half4 c0 : COLOR0, out float4 c1 : COLOR1, out float depth : DEPTH) { c1 = tex2D(g_sMemory, ps2memcoord(tex0)); depth = log(g_fc0.y+dot(c1, g_fBitBltZ))*g_fOneColor.w; c1 += g_fZBias.y; c0 = g_fc0.x; } // no swizzling void BitBltDepthTexPS(in float2 tex0 : TEXCOORD0, out float4 c : COLOR0, out float depth : DEPTH) { c = tex2D(g_sSrcFinal, tex0); depth = log(g_fc0.y+dot(c-g_fZBias.y, g_fBitBltZ))*g_fOneColor.w; //c += g_fZBias.y; } // no swizzling void BitBltDepthTexMRTPS(in float2 tex0 : TEXCOORD0, out half4 c0 : COLOR0, out float4 c1 : COLOR1, out float depth : DEPTH) { c1 = tex2D(g_sSrcFinal, tex0); depth = log(g_fc0.y+dot(c1-g_fZBias.y, g_fBitBltZ))*g_fOneColor.w; //c1 += g_fZBias.y; c0 = g_fc0.x; } technique BitBlt { pass p0 { VertexShader = compile vs_1_1 BitBltVS(); PixelShader = compile ps_2_0 BitBltDepthMRTPS(); } } /*static const float BlurKernel[9] = { 0.027601, 0.066213, 0.123701, 0.179952, 0.205065, 0.179952, 0.123701, 0.066213, 0.027601 };*/ half4 BilinearFloat16(float2 tex0) { /*float4 ffrac, ftex; ffrac.xy = frac(tex0); ftex.xy = (tex0 - ffrac.xy) * g_fInvTexDims.xy + g_fInvTexDims.zw; ftex.zw = ftex.xy + g_fInvTexDims.xy; half4 c0 = tex2D(g_sSrcFinal, ftex.xy); half4 c1 = tex2D(g_sSrcFinal, ftex.zy); half4 c2 = tex2D(g_sSrcFinal, ftex.xw); half4 c3 = tex2D(g_sSrcFinal, ftex.zw); return lerp( lerp(c0, c1, ffrac.x), lerp(c2, c3, ffrac.x), ffrac.y );*/ return tex2D(g_sSrcFinal, tex0.xy); // return 0.55f * tex2D(g_sSrcFinal, tex0.xy) + // 0.15f * tex2D(g_sSrcFinal, tex0.xy+g_fInvTexDims.xz) + // 0.15f * tex2D(g_sSrcFinal, tex0.xy+g_fInvTexDims.zy) + // 0.15f * tex2D(g_sSrcFinal, tex0.xy+g_fInvTexDims.xy); } half4 CRTCTargInterPS(in float2 tex0 : TEXCOORD0, in float2 ointerpos : TEXCOORD1) : COLOR { float finter = tex1D(g_sBitwiseANDX, ointerpos.y).x; clip(finter * g_fOneColor.z + g_fOneColor.w); half4 c = BilinearFloat16(tex0); c.w = g_fc0.w*c.w * g_fOneColor.x + g_fOneColor.y; return c.zyxw; } half4 CRTCTargPS(in float2 tex0 : TEXCOORD0) : COLOR { float4 c = BilinearFloat16(tex0); c.w = g_fc0.w*c.w * g_fOneColor.x + g_fOneColor.y; return c.zyxw; } half4 CRTCInterPS(in float2 tex0 : TEXCOORD0, in float2 ointerpos : TEXCOORD1) : COLOR { float2 filtcoord = (tex0-frac(tex0))*g_fInvTexDims.xy+g_fInvTexDims.zw; float finter = tex1D(g_sBitwiseANDX, ointerpos.y).x; clip(finter * g_fOneColor.z + g_fOneColor.w); half4 c = BilinearBitBlt(filtcoord); c.w = c.w * g_fOneColor.x + g_fOneColor.y; return c.zyxw; } half4 CRTCPS(in float2 tex0 : TEXCOORD0) : COLOR { float2 filtcoord = (tex0/*-frac(tex0)*/)*g_fInvTexDims.xy+g_fInvTexDims.zw; half4 c = BilinearBitBlt(filtcoord); c.w = c.w * g_fOneColor.x + g_fOneColor.y; return c.zyxw; } half4 CRTC24InterPS(in float2 tex0 : TEXCOORD0, in float2 ointerpos : TEXCOORD1) : COLOR { float2 filtcoord = (tex0-frac(tex0))*g_fInvTexDims.xy+g_fInvTexDims.zw; float finter = tex1D(g_sBitwiseANDX, ointerpos.y).x; clip(finter * g_fOneColor.z + g_fOneColor.w); half4 c = tex2D(g_sMemory, ps2memcoord(filtcoord).xy).x; c.w = c.w * g_fOneColor.x + g_fOneColor.y; return c.zyxw; } half4 CRTC24PS(in float2 tex0 : TEXCOORD0) : COLOR { float2 filtcoord = (tex0-frac(tex0))*g_fInvTexDims.xy+g_fInvTexDims.zw; half4 c = tex2D(g_sMemory, ps2memcoord(filtcoord).xy).x; c.w = c.w * g_fOneColor.x + g_fOneColor.y; return c.zyxw; } technique CRTC { pass p0 { VertexShader = compile vs_1_1 BitBltVS(); PixelShader = compile ps_2_0 CRTCTargInterPS(); } } half4 ZeroPS() : COLOR { return g_fOneColor.x; } half4 BaseTexturePS(in float2 tex0 : TEXCOORD0) : COLOR { return tex2D(g_sSrcFinal, tex0) * g_fOneColor; } // inverse of 32->16bit conversion half4 Convert16to32PS(float2 tex0 : TEXCOORD0) : COLOR { float4 final; float2 ffrac = fmod(tex0+g_fTexDims.zw, g_fTexOffset.xy); tex0.y += g_fTexDims.y * ffrac.y; if( ffrac.x > g_fTexOffset.z ) { tex0.x -= g_fTexOffset.z; tex0.y += g_fTexOffset.w; } float4 lower = tex2D(g_sSrcFinal, tex0); float4 upper = tex2D(g_sSrcFinal, tex0+g_fPageOffset.xy); //return half4(frac(32*tex0.x),frac(7*tex0.y),0,1); final.zy = tex3D(g_sConv32to16, lower.zyx).xy + lower.ww*g_fPageOffset.zw; final.xw = tex3D(g_sConv32to16, upper.zyx).xy + upper.ww*g_fPageOffset.zw; return final; } // use when texture is not tiled and converting from 32bit to 16bit // one condition is that the converted texture has to keep the same block configuration // every 16 32bit horz pixels gets converted to 16x2 16bit horz pixels. // the first row is the first 8 pixels, the second row is the last 8 pixels // the last 8 columns are the upper bits half4 Convert32to16PS(float2 tex0 : TEXCOORD0) : COLOR { bool upper = false; float2 ffrac = fmod(tex0+g_fTexDims.zw, g_fTexOffset.xy); //tex0 += g_fTexDims.xy * ffrac; //tex0.y += g_fTexDims.y * ffrac.y; tex0.y += ffrac.y; //tex0.x -= g_fc0.w*ffrac.x; if( ffrac.x > g_fTexOffset.z ) { tex0.x -= g_fTexOffset.z; upper = true; } if( ffrac.y >= g_fTexOffset.w ) { tex0.y -= g_fTexOffset.y; tex0.x += g_fTexOffset.z; } //return half4(frac(32*tex0.x),frac(7*tex0.y),0,1); half4 color = tex2D(g_sSrcFinal, tex0)*g_fc0.yyyw; float2 uv = upper ? color.xw : color.zy; return tex2D(g_sConv16to32, uv*g_fPageOffset.xy+g_fPageOffset.zw).zyxw*g_fOneColor; }