DX11: Eliminate most int-to-float conversions and use float math in more places. GPUs are more efficient with float operations. This gives a slight speed-up.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7303 8ced0084-cf51-0410-be5f-012b33b47a6e
2011-03-05 20:40:17 +00:00 · 2011-03-05 20:40:17 +00:00 · 9abf449e7c
parent 4f0705fd70
commit 9abf449e7c
1 changed files with 325 additions and 326 deletions
--- a/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp
+++ b/Source/Plugins/Plugin_VideoDX11/Src/PSTextureEncoder.cpp
@ -49,17 +49,17 @@ union EFBEncodeParams
 {
 	struct
 	{
-		UINT NumHalfCacheLinesX;
-		UINT NumBlocksY;
-		UINT PosX;
-		UINT PosY;
+		FLOAT NumHalfCacheLinesX;
+		FLOAT NumBlocksY;
+		FLOAT PosX;
+		FLOAT PosY;
 		FLOAT TexLeft;
 		FLOAT TexTop;
 		FLOAT TexRight;
 		FLOAT TexBottom;
 	};
 	// Constant buffers must be a multiple of 16 bytes in size.
-	u8 pad[32]; // Should be at least the size of the struct above
+	u8 pad[32]; // Pad to the next multiple of 16 bytes
 };

 static const char EFB_ENCODE_VS[] =
@ -69,10 +69,10 @@ static const char EFB_ENCODE_VS[] =
 "{\n"
 	"struct\n" // Should match EFBEncodeParams above
 	"{\n"
-		"uint NumHalfCacheLinesX;\n"
-		"uint NumBlocksY;\n"
-		"uint PosX;\n" // Upper-left corner of source
-		"uint PosY;\n"
+		"float NumHalfCacheLinesX;\n"
+		"float NumBlocksY;\n"
+		"float PosX;\n" // Upper-left corner of source
+		"float PosY;\n"
 		"float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized)
 		"float TexTop;\n"
 		"float TexRight;\n"
@ -104,10 +104,10 @@ static const char EFB_ENCODE_PS[] =
 "{\n"
 	"struct\n" // Should match EFBEncodeParams above
 	"{\n"
-		"uint NumHalfCacheLinesX;\n"
-		"uint NumBlocksY;\n"
-		"uint PosX;\n" // Upper-left corner of source
-		"uint PosY;\n"
+		"float NumHalfCacheLinesX;\n"
+		"float NumBlocksY;\n"
+		"float PosX;\n" // Upper-left corner of source
+		"float PosY;\n"
 		"float TexLeft;\n" // Rectangle within EFBTexture representing the actual EFB (normalized)
 		"float TexTop;\n"
 		"float TexRight;\n"
@ -136,8 +136,8 @@ static const char EFB_ENCODE_PS[] =
 	"return (d << 24) | (c << 16) | (b << 8) | a;\n"
 "}\n"

-"uint UINT_44444444(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n"
-	"return (a << 28) | (b << 24) | (c << 20) | (d << 16) | (e << 12) | (f << 8) | (g << 4) | h;\n"
+"uint UINT_44444444_BE(uint a, uint b, uint c, uint d, uint e, uint f, uint g, uint h) {\n"
+	"return (g << 28) | (h << 24) | (e << 20) | (f << 16) | (c << 12) | (d << 8) | (a << 4) | b;\n"
 "}\n"

 "uint UINT_1555(uint a, uint b, uint c, uint d) {\n"
@ -156,30 +156,30 @@ static const char EFB_ENCODE_PS[] =
 	"return (a << 16) | b;\n"
 "}\n"

-"uint EncodeRGB5A3(uint4 pixel) {\n"
-	"if (pixel.a >= 224) {\n"
+"uint EncodeRGB5A3(float4 pixel) {\n"
+	"if (pixel.a >= 224.0/255.0) {\n"
 		// Encode to ARGB1555
-		"return UINT_1555(1, pixel.r >> 3, pixel.g >> 3, pixel.b >> 3);\n"
+		"return UINT_1555(1, pixel.r*31, pixel.g*31, pixel.b*31);\n"
 	"} else {\n"
 		// Encode to ARGB3444
-		"return UINT_3444(pixel.a >> 5, pixel.r >> 4, pixel.g >> 4, pixel.b >> 4);\n"
+		"return UINT_3444(pixel.a*7, pixel.r*15, pixel.g*15, pixel.b*15);\n"
 	"}\n"
 "}\n"

-"uint EncodeRGB565(uint4 pixel) {\n"
-	"return UINT_565(pixel.r >> 3, pixel.g >> 2, pixel.b >> 3);\n"
+"uint EncodeRGB565(float4 pixel) {\n"
+	"return UINT_565(pixel.r*31, pixel.g*63, pixel.b*31);\n"
 "}\n"

-"float2 CalcTexCoord(uint2 coord)\n"
+"float2 CalcTexCoord(float2 coord)\n"
 "{\n"
 	// Add 0.5,0.5 to sample from the center of the EFB pixel
-	"float2 efbCoord = float2(coord) + float2(0.5,0.5);\n"
+	"float2 efbCoord = coord + float2(0.5,0.5);\n"
 	"return lerp(float2(Params.TexLeft,Params.TexTop), float2(Params.TexRight,Params.TexBottom), efbCoord * INV_EFB_DIMS);\n"
 "}\n"

 // Interface and classes for different source formats

-"float4 Fetch_0(uint2 coord)\n"
+"float4 Fetch_0(float2 coord)\n"
 "{\n"
 	"float2 texCoord = CalcTexCoord(coord);\n"
 	"float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n"
@ -187,13 +187,13 @@ static const char EFB_ENCODE_PS[] =
 	"return result;\n"
 "}\n"

-"float4 Fetch_1(uint2 coord)\n"
+"float4 Fetch_1(float2 coord)\n"
 "{\n"
 	"float2 texCoord = CalcTexCoord(coord);\n"
 	"return EFBTexture.Sample(EFBSampler, texCoord);\n"
 "}\n"

-"float4 Fetch_2(uint2 coord)\n"
+"float4 Fetch_2(float2 coord)\n"
 "{\n"
 	"float2 texCoord = CalcTexCoord(coord);\n"
 	"float4 result = EFBTexture.Sample(EFBSampler, texCoord);\n"
@ -201,7 +201,7 @@ static const char EFB_ENCODE_PS[] =
 	"return result;\n"
 "}\n"

-"float4 Fetch_3(uint2 coord)\n"
+"float4 Fetch_3(float2 coord)\n"
 "{\n"
 	"float2 texCoord = CalcTexCoord(coord);\n"

@ -217,13 +217,13 @@ static const char EFB_ENCODE_PS[] =
 "#ifdef DYNAMIC_MODE\n"
 "interface iFetch\n"
 "{\n"
-	"float4 Fetch(uint2 coord);\n"
+	"float4 Fetch(float2 coord);\n"
 "};\n"

 // Source format 0
 "class cFetch_0 : iFetch\n"
 "{\n"
-	"float4 Fetch(uint2 coord)\n"
+	"float4 Fetch(float2 coord)\n"
 	"{ return Fetch_0(coord); }\n"
 "};\n"

@ -231,21 +231,21 @@ static const char EFB_ENCODE_PS[] =
 // Source format 1
 "class cFetch_1 : iFetch\n"
 "{\n"
-	"float4 Fetch(uint2 coord)\n"
+	"float4 Fetch(float2 coord)\n"
 	"{ return Fetch_1(coord); }\n"
 "};\n"

 // Source format 2
 "class cFetch_2 : iFetch\n"
 "{\n"
-	"float4 Fetch(uint2 coord)\n"
+	"float4 Fetch(float2 coord)\n"
 	"{ return Fetch_2(coord); }\n"
 "};\n"

 // Source format 3
 "class cFetch_3 : iFetch\n"
 "{\n"
-	"float4 Fetch(uint2 coord)\n"
+	"float4 Fetch(float2 coord)\n"
 	"{ return Fetch_3(coord); }\n"
 "};\n"

@ -308,18 +308,18 @@ static const char EFB_ENCODE_PS[] =

 // Interface and classes for different scale/filter settings (on or off)

-"float4 ScaledFetch_0(uint2 coord)\n"
+"float4 ScaledFetch_0(float2 coord)\n"
 "{\n"
-	"return IMP_FETCH(uint2(Params.PosX,Params.PosY) + coord);\n"
+	"return IMP_FETCH(float2(Params.PosX,Params.PosY) + coord);\n"
 "}\n"

-"float4 ScaledFetch_1(uint2 coord)\n"
+"float4 ScaledFetch_1(float2 coord)\n"
 "{\n"
-	"uint2 ul = uint2(Params.PosX,Params.PosY) + 2*coord;\n"
-	"float4 sample0 = IMP_FETCH(ul+uint2(0,0));\n"
-	"float4 sample1 = IMP_FETCH(ul+uint2(1,0));\n"
-	"float4 sample2 = IMP_FETCH(ul+uint2(0,1));\n"
-	"float4 sample3 = IMP_FETCH(ul+uint2(1,1));\n"
+	"float2 ul = float2(Params.PosX,Params.PosY) + 2*coord;\n"
+	"float4 sample0 = IMP_FETCH(ul+float2(0,0));\n"
+	"float4 sample1 = IMP_FETCH(ul+float2(1,0));\n"
+	"float4 sample2 = IMP_FETCH(ul+float2(0,1));\n"
+	"float4 sample3 = IMP_FETCH(ul+float2(1,1));\n"
 	// Average all four samples together
 	// FIXME: Is this correct?
 	"return 0.25 * (sample0+sample1+sample2+sample3);\n"
@ -328,20 +328,20 @@ static const char EFB_ENCODE_PS[] =
 "#ifdef DYNAMIC_MODE\n"
 "interface iScaledFetch\n"
 "{\n"
-	"float4 ScaledFetch(uint2 coord);\n"
+	"float4 ScaledFetch(float2 coord);\n"
 "};\n"

 // Scale off
 "class cScaledFetch_0 : iScaledFetch\n"
 "{\n"
-	"float4 ScaledFetch(uint2 coord)\n"
+	"float4 ScaledFetch(float2 coord)\n"
 	"{ return ScaledFetch_0(coord); }\n"
 "};\n"

 // Scale on
 "class cScaledFetch_1 : iScaledFetch\n"
 "{\n"
-	"float4 ScaledFetch(uint2 coord)\n"
+	"float4 ScaledFetch(float2 coord)\n"
 	"{ return ScaledFetch_1(coord); }\n"
 "};\n"

@ -358,178 +358,177 @@ static const char EFB_ENCODE_PS[] =
 // Main EFB-sampling function: performs all steps of fetching pixels, scaling,
 // applying intensity function

-"uint4 SampleEFB(uint2 coord)\n"
+"float4 SampleEFB(float2 coord)\n"
 "{\n"
 	// FIXME: Does intensity happen before or after scaling? Or does
 	// it matter?
 	"float4 sample = IMP_SCALEDFETCH(coord);\n"
-	"sample = IMP_INTENSITY(sample);\n"
-	"return uint4(255.0 * sample);\n"
+	"return IMP_INTENSITY(sample);\n"
 "}\n"

 // Interfaces and classes for different destination formats

-"uint4 Generate_0(uint2 cacheCoord)\n" // R4
+"uint4 Generate_0(float2 cacheCoord)\n" // R4
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,8);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 4*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,8);\n"
+	"float2 subBlockUL = blockUL + float2(0, 4*(cacheCoord.x%2));\n"

-	"uint4 sample[32];\n"
+	"float4 sample[32];\n"
 	"for (uint y = 0; y < 4; ++y) {\n"
 		"for (uint x = 0; x < 8; ++x) {\n"
-			"sample[y*8+x] = SampleEFB(subBlockUL+uint2(x,y));\n"
+			"sample[y*8+x] = SampleEFB(subBlockUL+float2(x,y));\n"
 		"}\n"
 	"}\n"
 		
 	"uint dw[4];\n"
 	"for (uint i = 0; i < 4; ++i) {\n"
-		"dw[i] = UINT_44444444(\n"
-			"sample[8*i+0].r >> 4,\n"
-			"sample[8*i+1].r >> 4,\n"
-			"sample[8*i+2].r >> 4,\n"
-			"sample[8*i+3].r >> 4,\n"
-			"sample[8*i+4].r >> 4,\n"
-			"sample[8*i+5].r >> 4,\n"
-			"sample[8*i+6].r >> 4,\n"
-			"sample[8*i+7].r >> 4\n"
+		"dw[i] = UINT_44444444_BE(\n"
+			"15*sample[8*i+0].r,\n"
+			"15*sample[8*i+1].r,\n"
+			"15*sample[8*i+2].r,\n"
+			"15*sample[8*i+3].r,\n"
+			"15*sample[8*i+4].r,\n"
+			"15*sample[8*i+5].r,\n"
+			"15*sample[8*i+6].r,\n"
+			"15*sample[8*i+7].r\n"
 			");\n"
 	"}\n"

-	"return Swap4_32(uint4(dw[0], dw[1], dw[2], dw[3]));\n"
+	"return uint4(dw[0], dw[1], dw[2], dw[3]);\n"
 "}\n"

 // FIXME: Untested
-"uint4 Generate_1(uint2 cacheCoord)\n" // R8 (FIXME: Duplicate of R8 below?)
+"uint4 Generate_1(float2 cacheCoord)\n" // R8 (FIXME: Duplicate of R8 below?)
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(4,0));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(5,0));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(6,0));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(7,0));\n"
-	"uint4 sample8 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample9 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sampleA = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sampleB = SampleEFB(subBlockUL+uint2(3,1));\n"
-	"uint4 sampleC = SampleEFB(subBlockUL+uint2(4,1));\n"
-	"uint4 sampleD = SampleEFB(subBlockUL+uint2(5,1));\n"
-	"uint4 sampleE = SampleEFB(subBlockUL+uint2(6,1));\n"
-	"uint4 sampleF = SampleEFB(subBlockUL+uint2(7,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n"
+	"float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n"
+	"float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n"
+	"float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n"
+	"float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n"
+	"float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.r, sample4.r, sample8.r, sampleC.r),\n"
-		"uint4(sample1.r, sample5.r, sample9.r, sampleD.r),\n"
-		"uint4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n"
-		"uint4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n"
+		"255*float4(sample0.r, sample4.r, sample8.r, sampleC.r),\n"
+		"255*float4(sample1.r, sample5.r, sample9.r, sampleD.r),\n"
+		"255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n"
+		"255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

 // FIXME: Untested
-"uint4 Generate_2(uint2 cacheCoord)\n" // A4 R4
+"uint4 Generate_2(float2 cacheCoord)\n" // A4 R4
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"
 	
-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(4,0));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(5,0));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(6,0));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(7,0));\n"
-	"uint4 sample8 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample9 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sampleA = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sampleB = SampleEFB(subBlockUL+uint2(3,1));\n"
-	"uint4 sampleC = SampleEFB(subBlockUL+uint2(4,1));\n"
-	"uint4 sampleD = SampleEFB(subBlockUL+uint2(5,1));\n"
-	"uint4 sampleE = SampleEFB(subBlockUL+uint2(6,1));\n"
-	"uint4 sampleF = SampleEFB(subBlockUL+uint2(7,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n"
+	"float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n"
+	"float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n"
+	"float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n"
+	"float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n"
+	"float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n"

-	"uint dw0 = UINT_44444444(\n"
-		"sample0.a >> 4, sample0.r >> 4,\n"
-		"sample1.a >> 4, sample1.r >> 4,\n"
-		"sample2.a >> 4, sample2.r >> 4,\n"
-		"sample3.a >> 4, sample3.r >> 4\n"
+	"uint dw0 = UINT_44444444_BE(\n"
+		"15*sample0.a, 15*sample0.r,\n"
+		"15*sample1.a, 15*sample1.r,\n"
+		"15*sample2.a, 15*sample2.r,\n"
+		"15*sample3.a, 15*sample3.r\n"
 		");\n"
-	"uint dw1 = UINT_44444444(\n"
-		"sample4.a >> 4, sample4.r >> 4,\n"
-		"sample5.a >> 4, sample5.r >> 4,\n"
-		"sample6.a >> 4, sample6.r >> 4,\n"
-		"sample7.a >> 4, sample7.r >> 4\n"
+	"uint dw1 = UINT_44444444_BE(\n"
+		"15*sample4.a, 15*sample4.r,\n"
+		"15*sample5.a, 15*sample5.r,\n"
+		"15*sample6.a, 15*sample6.r,\n"
+		"15*sample7.a, 15*sample7.r\n"
 		");\n"
-	"uint dw2 = UINT_44444444(\n"
-		"sample8.a >> 4, sample8.r >> 4,\n"
-		"sample9.a >> 4, sample9.r >> 4,\n"
-		"sampleA.a >> 4, sampleA.r >> 4,\n"
-		"sampleB.a >> 4, sampleB.r >> 4\n"
+	"uint dw2 = UINT_44444444_BE(\n"
+		"15*sample8.a, 15*sample8.r,\n"
+		"15*sample9.a, 15*sample9.r,\n"
+		"15*sampleA.a, 15*sampleA.r,\n"
+		"15*sampleB.a, 15*sampleB.r\n"
 		");\n"
-	"uint dw3 = UINT_44444444(\n"
-		"sampleC.a >> 4, sampleC.r >> 4,\n"
-		"sampleD.a >> 4, sampleD.r >> 4,\n"
-		"sampleE.a >> 4, sampleE.r >> 4,\n"
-		"sampleF.a >> 4, sampleF.r >> 4\n"
+	"uint dw3 = UINT_44444444_BE(\n"
+		"15*sampleC.a, 15*sampleC.r,\n"
+		"15*sampleD.a, 15*sampleD.r,\n"
+		"15*sampleE.a, 15*sampleE.r,\n"
+		"15*sampleF.a, 15*sampleF.r\n"
 		");\n"
 	
-	"return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n"
+	"return uint4(dw0, dw1, dw2, dw3);\n"
 "}\n"

 // FIXME: Untested
-"uint4 Generate_3(uint2 cacheCoord)\n" // A8 R8
+"uint4 Generate_3(float2 cacheCoord)\n" // A8 R8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(4,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(4,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(3,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.a, sample2.a, sample4.a, sample6.a),\n"
-		"uint4(sample0.r, sample2.r, sample4.r, sample6.r),\n"
-		"uint4(sample1.a, sample3.a, sample5.a, sample7.a),\n"
-		"uint4(sample1.r, sample3.r, sample5.r, sample7.r)\n"
+		"255*float4(sample0.a, sample2.a, sample4.a, sample6.a),\n"
+		"255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n"
+		"255*float4(sample1.a, sample3.a, sample5.a, sample7.a),\n"
+		"255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

-"uint4 Generate_4(uint2 cacheCoord)\n" // R5 G6 B5
+"uint4 Generate_4(float2 cacheCoord)\n" // R5 G6 B5
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(4,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(4,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(3,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n"
 		
 	"uint dw0 = UINT_1616(EncodeRGB565(sample0), EncodeRGB565(sample1));\n"
 	"uint dw1 = UINT_1616(EncodeRGB565(sample2), EncodeRGB565(sample3));\n"
@ -539,21 +538,21 @@ static const char EFB_ENCODE_PS[] =
 	"return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n"
 "}\n"

-"uint4 Generate_5(uint2 cacheCoord)\n" // 1 R5 G5 B5 or 0 A3 R4 G4 G4
+"uint4 Generate_5(float2 cacheCoord)\n" // 1 R5 G5 B5 or 0 A3 R4 G4 G4
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(4,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(4,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(3,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n"
 		
 	"uint dw0 = UINT_1616(EncodeRGB5A3(sample0), EncodeRGB5A3(sample1));\n"
 	"uint dw1 = UINT_1616(EncodeRGB5A3(sample2), EncodeRGB5A3(sample3));\n"
@ -563,232 +562,232 @@ static const char EFB_ENCODE_PS[] =
 	"return Swap4_32(uint4(dw0, dw1, dw2, dw3));\n"
 "}\n"

-"uint4 Generate_6(uint2 cacheCoord)\n" // A8 R8 A8 R8 | G8 B8 G8 B8
+"uint4 Generate_6(float2 cacheCoord)\n" // A8 R8 A8 R8 | G8 B8 G8 B8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(4,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(4,1));\n"

-	"uint2 blockUL = blockCoord * uint2(4,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(4,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(3,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n"

 	"uint4 dw4;\n"
 	"if (cacheCoord.x % 4 < 2)\n"
 	"{\n"
 		// First cache line gets AR
 		"dw4 = UINT4_8888_BE(\n"
-			"uint4(sample0.a, sample2.a, sample4.a, sample6.a),\n"
-			"uint4(sample0.r, sample2.r, sample4.r, sample6.r),\n"
-			"uint4(sample1.a, sample3.a, sample5.a, sample7.a),\n"
-			"uint4(sample1.r, sample3.r, sample5.r, sample7.r)\n"
+			"255*float4(sample0.a, sample2.a, sample4.a, sample6.a),\n"
+			"255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n"
+			"255*float4(sample1.a, sample3.a, sample5.a, sample7.a),\n"
+			"255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n"
 			");\n"
 	"}\n"
 	"else\n"
 	"{\n"
 		// Second cache line gets GB
 		"dw4 = UINT4_8888_BE(\n"
-			"uint4(sample0.g, sample2.g, sample4.g, sample6.g),\n"
-			"uint4(sample0.b, sample2.b, sample4.b, sample6.b),\n"
-			"uint4(sample1.g, sample3.g, sample5.g, sample7.g),\n"
-			"uint4(sample1.b, sample3.b, sample5.b, sample7.b)\n"
+			"255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n"
+			"255*float4(sample0.b, sample2.b, sample4.b, sample6.b),\n"
+			"255*float4(sample1.g, sample3.g, sample5.g, sample7.g),\n"
+			"255*float4(sample1.b, sample3.b, sample5.b, sample7.b)\n"
 			");\n"
 	"}\n"
 	
 	"return dw4;\n"
 "}\n"

-"uint4 Generate_7(uint2 cacheCoord)\n" // A8
+"uint4 Generate_7(float2 cacheCoord)\n" // A8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"
 	
-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(4,0));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(5,0));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(6,0));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(7,0));\n"
-	"uint4 sample8 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample9 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sampleA = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sampleB = SampleEFB(subBlockUL+uint2(3,1));\n"
-	"uint4 sampleC = SampleEFB(subBlockUL+uint2(4,1));\n"
-	"uint4 sampleD = SampleEFB(subBlockUL+uint2(5,1));\n"
-	"uint4 sampleE = SampleEFB(subBlockUL+uint2(6,1));\n"
-	"uint4 sampleF = SampleEFB(subBlockUL+uint2(7,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n"
+	"float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n"
+	"float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n"
+	"float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n"
+	"float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n"
+	"float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.a, sample4.a, sample8.a, sampleC.a),\n"
-		"uint4(sample1.a, sample5.a, sample9.a, sampleD.a),\n"
-		"uint4(sample2.a, sample6.a, sampleA.a, sampleE.a),\n"
-		"uint4(sample3.a, sample7.a, sampleB.a, sampleF.a)\n"
+		"255*float4(sample0.a, sample4.a, sample8.a, sampleC.a),\n"
+		"255*float4(sample1.a, sample5.a, sample9.a, sampleD.a),\n"
+		"255*float4(sample2.a, sample6.a, sampleA.a, sampleE.a),\n"
+		"255*float4(sample3.a, sample7.a, sampleB.a, sampleF.a)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

-"uint4 Generate_8(uint2 cacheCoord)\n" // R8
+"uint4 Generate_8(float2 cacheCoord)\n" // R8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(4,0));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(5,0));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(6,0));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(7,0));\n"
-	"uint4 sample8 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample9 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sampleA = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sampleB = SampleEFB(subBlockUL+uint2(3,1));\n"
-	"uint4 sampleC = SampleEFB(subBlockUL+uint2(4,1));\n"
-	"uint4 sampleD = SampleEFB(subBlockUL+uint2(5,1));\n"
-	"uint4 sampleE = SampleEFB(subBlockUL+uint2(6,1));\n"
-	"uint4 sampleF = SampleEFB(subBlockUL+uint2(7,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n"
+	"float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n"
+	"float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n"
+	"float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n"
+	"float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n"
+	"float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.r, sample4.r, sample8.r, sampleC.r),\n"
-		"uint4(sample1.r, sample5.r, sample9.r, sampleD.r),\n"
-		"uint4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n"
-		"uint4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n"
+		"255*float4(sample0.r, sample4.r, sample8.r, sampleC.r),\n"
+		"255*float4(sample1.r, sample5.r, sample9.r, sampleD.r),\n"
+		"255*float4(sample2.r, sample6.r, sampleA.r, sampleE.r),\n"
+		"255*float4(sample3.r, sample7.r, sampleB.r, sampleF.r)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

 // FIXME: Untested
-"uint4 Generate_9(uint2 cacheCoord)\n" // G8
+"uint4 Generate_9(float2 cacheCoord)\n" // G8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(4,0));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(5,0));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(6,0));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(7,0));\n"
-	"uint4 sample8 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample9 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sampleA = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sampleB = SampleEFB(subBlockUL+uint2(3,1));\n"
-	"uint4 sampleC = SampleEFB(subBlockUL+uint2(4,1));\n"
-	"uint4 sampleD = SampleEFB(subBlockUL+uint2(5,1));\n"
-	"uint4 sampleE = SampleEFB(subBlockUL+uint2(6,1));\n"
-	"uint4 sampleF = SampleEFB(subBlockUL+uint2(7,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n"
+	"float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n"
+	"float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n"
+	"float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n"
+	"float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n"
+	"float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.g, sample4.g, sample8.g, sampleC.g),\n"
-		"uint4(sample1.g, sample5.g, sample9.g, sampleD.g),\n"
-		"uint4(sample2.g, sample6.g, sampleA.g, sampleE.g),\n"
-		"uint4(sample3.g, sample7.g, sampleB.g, sampleF.g)\n"
+		"255*float4(sample0.g, sample4.g, sample8.g, sampleC.g),\n"
+		"255*float4(sample1.g, sample5.g, sample9.g, sampleD.g),\n"
+		"255*float4(sample2.g, sample6.g, sampleA.g, sampleE.g),\n"
+		"255*float4(sample3.g, sample7.g, sampleB.g, sampleF.g)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

-"uint4 Generate_A(uint2 cacheCoord)\n" // B8
+"uint4 Generate_A(float2 cacheCoord)\n" // B8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(8,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(8,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"
 	
-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(4,0));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(5,0));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(6,0));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(7,0));\n"
-	"uint4 sample8 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample9 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sampleA = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sampleB = SampleEFB(subBlockUL+uint2(3,1));\n"
-	"uint4 sampleC = SampleEFB(subBlockUL+uint2(4,1));\n"
-	"uint4 sampleD = SampleEFB(subBlockUL+uint2(5,1));\n"
-	"uint4 sampleE = SampleEFB(subBlockUL+uint2(6,1));\n"
-	"uint4 sampleF = SampleEFB(subBlockUL+uint2(7,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(4,0));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(5,0));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(6,0));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(7,0));\n"
+	"float4 sample8 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample9 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sampleA = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sampleB = SampleEFB(subBlockUL+float2(3,1));\n"
+	"float4 sampleC = SampleEFB(subBlockUL+float2(4,1));\n"
+	"float4 sampleD = SampleEFB(subBlockUL+float2(5,1));\n"
+	"float4 sampleE = SampleEFB(subBlockUL+float2(6,1));\n"
+	"float4 sampleF = SampleEFB(subBlockUL+float2(7,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.b, sample4.b, sample8.b, sampleC.b),\n"
-		"uint4(sample1.b, sample5.b, sample9.b, sampleD.b),\n"
-		"uint4(sample2.b, sample6.b, sampleA.b, sampleE.b),\n"
-		"uint4(sample3.b, sample7.b, sampleB.b, sampleF.b)\n"
+		"255*float4(sample0.b, sample4.b, sample8.b, sampleC.b),\n"
+		"255*float4(sample1.b, sample5.b, sample9.b, sampleD.b),\n"
+		"255*float4(sample2.b, sample6.b, sampleA.b, sampleE.b),\n"
+		"255*float4(sample3.b, sample7.b, sampleB.b, sampleF.b)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

-"uint4 Generate_B(uint2 cacheCoord)\n" // G8 R8
+"uint4 Generate_B(float2 cacheCoord)\n" // G8 R8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(4,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(4,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(3,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.g, sample2.g, sample4.g, sample6.g),\n"
-		"uint4(sample0.r, sample2.r, sample4.r, sample6.r),\n"
-		"uint4(sample1.g, sample3.g, sample5.g, sample7.g),\n"
-		"uint4(sample1.r, sample3.r, sample5.r, sample7.r)\n"
+		"255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n"
+		"255*float4(sample0.r, sample2.r, sample4.r, sample6.r),\n"
+		"255*float4(sample1.g, sample3.g, sample5.g, sample7.g),\n"
+		"255*float4(sample1.r, sample3.r, sample5.r, sample7.r)\n"
 		");\n"
 	
 	"return dw4;\n"
 "}\n"

 // FIXME: Untested
-"uint4 Generate_C(uint2 cacheCoord)\n" // B8 G8
+"uint4 Generate_C(float2 cacheCoord)\n" // B8 G8
 "{\n"
-	"uint2 blockCoord = cacheCoord / uint2(2,1);\n"
+	"float2 blockCoord = floor(cacheCoord / float2(2,1));\n"

-	"uint2 blockUL = blockCoord * uint2(4,4);\n"
-	"uint2 subBlockUL = blockUL + uint2(0, 2*(cacheCoord.x%2));\n"
+	"float2 blockUL = blockCoord * float2(4,4);\n"
+	"float2 subBlockUL = blockUL + float2(0, 2*(cacheCoord.x%2));\n"

-	"uint4 sample0 = SampleEFB(subBlockUL+uint2(0,0));\n"
-	"uint4 sample1 = SampleEFB(subBlockUL+uint2(1,0));\n"
-	"uint4 sample2 = SampleEFB(subBlockUL+uint2(2,0));\n"
-	"uint4 sample3 = SampleEFB(subBlockUL+uint2(3,0));\n"
-	"uint4 sample4 = SampleEFB(subBlockUL+uint2(0,1));\n"
-	"uint4 sample5 = SampleEFB(subBlockUL+uint2(1,1));\n"
-	"uint4 sample6 = SampleEFB(subBlockUL+uint2(2,1));\n"
-	"uint4 sample7 = SampleEFB(subBlockUL+uint2(3,1));\n"
+	"float4 sample0 = SampleEFB(subBlockUL+float2(0,0));\n"
+	"float4 sample1 = SampleEFB(subBlockUL+float2(1,0));\n"
+	"float4 sample2 = SampleEFB(subBlockUL+float2(2,0));\n"
+	"float4 sample3 = SampleEFB(subBlockUL+float2(3,0));\n"
+	"float4 sample4 = SampleEFB(subBlockUL+float2(0,1));\n"
+	"float4 sample5 = SampleEFB(subBlockUL+float2(1,1));\n"
+	"float4 sample6 = SampleEFB(subBlockUL+float2(2,1));\n"
+	"float4 sample7 = SampleEFB(subBlockUL+float2(3,1));\n"

 	"uint4 dw4 = UINT4_8888_BE(\n"
-		"uint4(sample0.b, sample2.b, sample4.b, sample6.b),\n"
-		"uint4(sample0.g, sample2.g, sample4.g, sample6.g),\n"
-		"uint4(sample1.b, sample3.b, sample5.b, sample7.b),\n"
-		"uint4(sample1.g, sample3.g, sample5.g, sample7.g)\n"
+		"255*float4(sample0.b, sample2.b, sample4.b, sample6.b),\n"
+		"255*float4(sample0.g, sample2.g, sample4.g, sample6.g),\n"
+		"255*float4(sample1.b, sample3.b, sample5.b, sample7.b),\n"
+		"255*float4(sample1.g, sample3.g, sample5.g, sample7.g)\n"
 		");\n"
 	
 	"return dw4;\n"
@ -797,36 +796,36 @@ static const char EFB_ENCODE_PS[] =
 "#ifdef DYNAMIC_MODE\n"
 "interface iGenerator\n"
 "{\n"
-	"uint4 Generate(uint2 cacheCoord);\n"
+	"uint4 Generate(float2 cacheCoord);\n"
 "};\n"

 "class cGenerator_4 : iGenerator\n"
 "{\n"
-	"uint4 Generate(uint2 cacheCoord)\n"
+	"uint4 Generate(float2 cacheCoord)\n"
 	"{ return Generate_4(cacheCoord); }\n"
 "};\n"

 "class cGenerator_5 : iGenerator\n"
 "{\n"
-	"uint4 Generate(uint2 cacheCoord)\n"
+	"uint4 Generate(float2 cacheCoord)\n"
 	"{ return Generate_5(cacheCoord); }\n"
 "};\n"

 "class cGenerator_6 : iGenerator\n"
 "{\n"
-	"uint4 Generate(uint2 cacheCoord)\n"
+	"uint4 Generate(float2 cacheCoord)\n"
 	"{ return Generate_6(cacheCoord); }\n"
 "};\n"

 "class cGenerator_8 : iGenerator\n"
 "{\n"
-	"uint4 Generate(uint2 cacheCoord)\n"
+	"uint4 Generate(float2 cacheCoord)\n"
 	"{ return Generate_8(cacheCoord); }\n"
 "};\n"

 "class cGenerator_B : iGenerator\n"
 "{\n"
-	"uint4 Generate(uint2 cacheCoord)\n"
+	"uint4 Generate(float2 cacheCoord)\n"
 	"{ return Generate_B(cacheCoord); }\n"
 "};\n"

@ -842,7 +841,7 @@ static const char EFB_ENCODE_PS[] =

 "void main(out uint4 ocol0 : SV_Target, in float4 Pos : SV_Position, in float2 fCacheCoord : ENCODECOORD)\n"
 "{\n"
-	"uint2 cacheCoord = uint2(fCacheCoord);\n"
+	"float2 cacheCoord = floor(fCacheCoord);\n"
 	"ocol0 = IMP_GENERATOR(cacheCoord);\n"
 "}\n"
 ;
@ -1099,10 +1098,10 @@ size_t PSTextureEncoder::Encode(u8* dst, unsigned int dstFormat,
 		TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect);
 	
 		EFBEncodeParams params = { 0 };
-		params.NumHalfCacheLinesX = cacheLinesPerRow*2;
-		params.NumBlocksY = numBlocksY;
-		params.PosX = srcRect.left;
-		params.PosY = srcRect.top;
+		params.NumHalfCacheLinesX = FLOAT(cacheLinesPerRow*2);
+		params.NumBlocksY = FLOAT(numBlocksY);
+		params.PosX = FLOAT(srcRect.left);
+		params.PosY = FLOAT(srcRect.top);
 		params.TexLeft = float(targetRect.left) / g_renderer->GetFullTargetWidth();
 		params.TexTop = float(targetRect.top) / g_renderer->GetFullTargetHeight();
 		params.TexRight = float(targetRect.right) / g_renderer->GetFullTargetWidth();