Merge branch 'master' of https://github.com/PCSX2/pcsx2

2015-06-10 01:40:52 +03:00 · 2015-06-10 01:40:52 +03:00 · bbf4b58306
parent 5f201e21ee 206d2008f3
commit bbf4b58306
8 changed files with 93 additions and 97 deletions
--- a/bin/shaders/GSdx.fx
+++ b/bin/shaders/GSdx.fx
@ -39,14 +39,25 @@
 #define float2 vec2
 #define float3 vec3
 #define float4 vec4
-#define float4x3 mat4x3
+#define float3x3 mat3
+#define float4x4 mat4
 #define static
 #define frac fract
-#define mul(x, y) y * x
+#define mul(x, y) x * y
 #define lerp(x,y,s) mix(x,y,s)
 #define saturate(x) clamp(x, 0.0, 1.0)
 #define SamplerState sampler2D

+#define matrix4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
+           mat4(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3);
+
+#define matrix3(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
+           mat3(a0, b0, c0, a1, b1, c1, a2, b2, c2);
+
+// Yes it sucks!
+#define matrix4x3(v0, v1, v2, v3) \
+    mat3x4(v0.x, v1.x, v2.x, v3.x, v0.y, v1.y, v2.y, v3.y, v0.z, v1.z, v2.z, v3.z);
+
 struct vertex_basic
 {
    vec4 p;
@ -75,6 +86,15 @@ layout(std140, binding = 14) uniform cb10

 #else

+#define matrix4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
+       float4x4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3);
+
+#define matrix3(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
+       float3x3(a0, a1, a2, b0, b1, b2, c0, c1, c2);
+
+#define matrix4x3(v0, v1, v2, v3) \
+         float4x3(v0, v1, v2, v3);
+
 Texture2D Texture : register(t0);
 SamplerState TextureSampler : register(s0);

@ -793,13 +813,8 @@ float4 BicubicScaler(in SamplerState tex, in float2 uv, in float2 texSize)
    float2 index = floor(coord_hg);
    float2 f = coord_hg - index;

-    #if (GLSL == 1)
-    mat4 M = mat4( -1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0,
-                   -3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0 );
-    #else
-    float4x4 M = { -1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0,
-                   -3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0 };
-    #endif
+    float4x4 M = matrix4(-1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0,
+                   -3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0);
    M /= 6.0;

    float4 wx = mul(float4(f.x*f.x*f.x, f.x*f.x, f.x, 1.0), M);
@ -867,7 +882,7 @@ float4 WeightQuad(float x)

 float3 LineRun(float ypos, float4 xpos, float4 linetaps)
 {
-    return mul(linetaps, float4x3(
+    return mul(linetaps, matrix4x3(
    PixelPos(xpos.x, ypos),
    PixelPos(xpos.y, ypos),
    PixelPos(xpos.z, ypos),
@ -890,7 +905,7 @@ float4 LanczosScaler(float2 texcoord, float2 inputSize)
    float4 columntaps = WeightQuad(f.y);

    // final sum and weight normalization
-    return float4(mul(columntaps, float4x3(
+    return float4(mul(columntaps, matrix4x3(
    LineRun(xystart.y, xpos, linetaps),
    LineRun(xystart.y + stepxy.y, xpos, linetaps),
    LineRun(xystart.y + stepxy.y * 2.0, xpos, linetaps),
@ -1241,22 +1256,11 @@ float4 TonemapPass(float4 color, float2 texcoord)
    if (TonemapType == 1) { color.rgb = FilmicTonemap(color.rgb); }

    // RGB -> XYZ conversion
-    #if (GLSL == 1)
-    // GLSL is column major whereas HLSL is row major ...
-    const mat3 RGB2XYZ = mat3 ( 0.4124564, 0.2126729, 0.0193339,   // first column (not row)
-                                0.3575761, 0.7151522, 0.1191920,   // 2nd column
-                                0.1804375, 0.0721750, 0.9503041 ); // 3rd column
-    #else
-    const float3x3 RGB2XYZ = { 0.4124564, 0.3575761, 0.1804375,
+    const float3x3 RGB2XYZ = matrix3(0.4124564, 0.3575761, 0.1804375,
                               0.2126729, 0.7151522, 0.0721750,
-                               0.0193339, 0.1191920, 0.9503041 };
-    #endif
+                               0.0193339, 0.1191920, 0.9503041);

-    #if (GLSL == 1)
-    float3 XYZ = RGB2XYZ * color.rgb;
-    #else
    float3 XYZ = mul(RGB2XYZ, color.rgb);
-    #endif

    // XYZ -> Yxy conversion
    float3 Yxy;
@ -1283,22 +1287,11 @@ float4 TonemapPass(float4 color, float2 texcoord)
    if (CorrectionPalette == 3) { XYZ.rgb = ColorCorrection(XYZ.rgb); }

    // XYZ -> RGB conversion
-    #if (GLSL == 1)
-    // GLSL is column major whereas HLSL is row major ...
-    const mat3 XYZ2RGB = mat3 ( 3.2404542, -0.9692660,  0.0556434,   // first column (not row)
-                               -1.5371385,  1.8760108, -0.2040259,   // 2nd column
-                               -0.4985314,  0.0415560,  1.0572252 ); // 3rd column
-    #else
-    const float3x3 XYZ2RGB = { 3.2404542,-1.5371385,-0.4985314,
+    const float3x3 XYZ2RGB = matrix3(3.2404542,-1.5371385,-0.4985314,
                              -0.9692660, 1.8760108, 0.0415560,
-                               0.0556434,-0.2040259, 1.0572252 };
-    #endif
+                               0.0556434,-0.2040259, 1.0572252);

-    #if (GLSL == 1)
-    color.rgb = XYZ2RGB * XYZ;
-    #else
    color.rgb = mul(XYZ2RGB, XYZ);
-    #endif
    color.a = RGBLuminance(color.rgb);

    return color;
@ -1371,38 +1364,20 @@ float4 ContrastPass(float4 color, float2 texcoord)
 #if (CEL_SHADING == 1)
 float3 GetYUV(float3 RGB)
 {
-    #if (GLSL == 1)
-    const mat3 RGB2YUV = mat3(0.2126, 0.7152, 0.0722,
-                             -0.09991,-0.33609, 0.436,
-                              0.615, -0.55861, -0.05639);
-
-    return (RGB * RGB2YUV);
-    #else
-    const float3x3 RGB2YUV = { 0.2126, 0.7152, 0.0722,
+    const float3x3 RGB2YUV = matrix3(0.2126, 0.7152, 0.0722,
                              -0.09991,-0.33609, 0.436,
-                               0.615, -0.55861, -0.05639 };
+                               0.615, -0.55861, -0.05639);

    return mul(RGB2YUV, RGB);
-
-    #endif
 }

 float3 GetRGB(float3 YUV)
 {
-    #if (GLSL == 1)
-    const mat3 YUV2RGB = mat3(1.000, 0.000, 1.28033,
-                              1.000,-0.21482,-0.38059,
-                              1.000, 2.12798, 0.000);
-
-    return (YUV * YUV2RGB);
-    #else
-    const float3x3 YUV2RGB = { 1.000, 0.000, 1.28033,
+    const float3x3 YUV2RGB = matrix3(1.000, 0.000, 1.28033,
                               1.000,-0.21482,-0.38059,
-                               1.000, 2.12798, 0.000 };
+                               1.000, 2.12798, 0.000);

    return mul(YUV2RGB, YUV);
-
-    #endif
 }

 float4 CelPass(float4 color, float2 texcoord)
--- a/common/build/Utilities/utilities_vs2012.vcxproj
+++ b/common/build/Utilities/utilities_vs2012.vcxproj
@ -166,6 +166,7 @@
    <ClInclude Include="..\..\include\Utilities\HashMap.h" />
    <ClInclude Include="..\..\include\intrin_x86.h" />
    <ClInclude Include="..\..\include\Utilities\lnx_memzero.h" />
+    <ClInclude Include="..\..\include\Utilities\Math.h" />
    <ClInclude Include="..\..\include\Utilities\MemcpyFast.h" />
    <ClInclude Include="..\..\include\Utilities\Path.h" />
    <ClInclude Include="..\..\src\Utilities\PrecompiledHeader.h" />
--- a/common/build/Utilities/utilities_vs2012.vcxproj.filters
+++ b/common/build/Utilities/utilities_vs2012.vcxproj.filters
@ -168,6 +168,9 @@
    <ClInclude Include="..\..\include\Utilities\lnx_memzero.h">
      <Filter>Header Files</Filter>
    </ClInclude>
+    <ClInclude Include="..\..\include\Utilities\Math.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
    <ClInclude Include="..\..\include\Utilities\MemcpyFast.h">
      <Filter>Header Files</Filter>
    </ClInclude>
--- a/common/build/Utilities/utilities_vs2013.vcxproj
+++ b/common/build/Utilities/utilities_vs2013.vcxproj
@ -166,6 +166,7 @@
    <ClInclude Include="..\..\include\Utilities\HashMap.h" />
    <ClInclude Include="..\..\include\intrin_x86.h" />
    <ClInclude Include="..\..\include\Utilities\lnx_memzero.h" />
+    <ClInclude Include="..\..\include\Utilities\Math.h" />
    <ClInclude Include="..\..\include\Utilities\MemcpyFast.h" />
    <ClInclude Include="..\..\include\Utilities\Path.h" />
    <ClInclude Include="..\..\src\Utilities\PrecompiledHeader.h" />
--- a/common/build/Utilities/utilities_vs2013.vcxproj.filters
+++ b/common/build/Utilities/utilities_vs2013.vcxproj.filters
@ -168,6 +168,9 @@
    <ClInclude Include="..\..\include\Utilities\lnx_memzero.h">
      <Filter>Header Files</Filter>
    </ClInclude>
+    <ClInclude Include="..\..\include\Utilities\Math.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
    <ClInclude Include="..\..\include\Utilities\MemcpyFast.h">
      <Filter>Header Files</Filter>
    </ClInclude>
--- a/common/include/Utilities/Math.h
+++ b/common/include/Utilities/Math.h
@ -0,0 +1,41 @@
+/*  PCSX2 - PS2 Emulator for PCs
+ *  Copyright (C) 2014-  PCSX2 Dev Team
+ *
+ *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ *  of the GNU Lesser General Public License as published by the Free Software Found-
+ *  ation, either version 3 of the License, or (at your option) any later version.
+ *
+ *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *  PURPOSE.  See the GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along with PCSX2.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+// Hopefully this file will be used for cross-source math utilities.
+// Currently these are strewn across the code base. Please collect them all!
+
+#include "Pcsx2Defs.h"
+
+// On GCC >= 4.7, this is equivalent to __builtin_clrsb(n);
+inline u32 count_leading_sign_bits(s32 n) {
+	// If the sign bit is 1, we invert the bits to 0 for count-leading-zero.
+	if (n < 0)
+		n = ~n;
+
+	// If BSR is used directly, it would have an undefined value for 0.
+	if (n == 0)
+		return 32;
+
+	// Perform our count leading zero.
+#ifdef _MSC_VER
+	unsigned long ret;
+	_BitScanReverse(&ret, n);
+	return 31 - (u32)ret;
+#else
+	return __builtin_clz(n); 
+#endif
+}
+
--- a/pcsx2/MMI.cpp
+++ b/pcsx2/MMI.cpp
@ -16,6 +16,7 @@

 #include "PrecompiledHeader.h"
 #include "Common.h"
+#include "Utilities/Math.h"

 namespace R5900 {
 namespace Interpreter {
@ -145,30 +146,13 @@ namespace MMI {

 //*****************MMI OPCODES*********************************

-static __fi void _PLZCW(int n)
-{
-	// This function counts the number of "like" bits in the source register, starting
-	// with the MSB and working its way down, and returns the result MINUS ONE.
-	// So 0xff00 would return 7, not 8.
-
-	int c = 0;
-	s32 i = cpuRegs.GPR.r[_Rs_].SL[n];
-
-	// Negate the source based on the sign bit.  This allows us to use a simple
-	// unified bit test of the MSB for either condition.
-	if( i >= 0 ) i = ~i;
-
-	// shift first, compare, then increment.  This excludes the sign bit from our final count.
-	while( i <<= 1, i < 0 ) c++;
-
-	cpuRegs.GPR.r[_Rd_].UL[n] = c;
-}
-
 void PLZCW() {
-    if (!_Rd_) return;
+	if (!_Rd_)
+		return;

-	_PLZCW (0);
-	_PLZCW (1);
+	// Return the leading sign bits, excluding the original bit
+	cpuRegs.GPR.r[_Rd_].UL[0] = count_leading_sign_bits(cpuRegs.GPR.r[_Rs_].SL[0]) - 1;
+	cpuRegs.GPR.r[_Rd_].UL[1] = count_leading_sign_bits(cpuRegs.GPR.r[_Rs_].SL[1]) - 1;
 }

 __fi void PMFHL_CLAMP(u16& dst, s32 src)
--- a/pcsx2/x86/iMMI.cpp
+++ b/pcsx2/x86/iMMI.cpp
@ -25,6 +25,7 @@
 #include "R5900OpcodeTables.h"
 #include "iR5900.h"
 #include "iMMI.h"
+#include "Utilities/Math.h"

 using namespace x86Emitter;

@ -66,23 +67,10 @@ void recPLZCW()
 		_deleteEEreg(_Rd_, 0);
 		GPR_SET_CONST(_Rd_);

-		for(regs = 0; regs < 2; ++regs) {
-			u32 val = g_cpuConstRegs[_Rs_].UL[regs];
+		// Return the leading sign bits, excluding the original bit
+		g_cpuConstRegs[_Rd_].UL[0] = count_leading_sign_bits(g_cpuConstRegs[_Rs_].SL[0]) - 1;
+		g_cpuConstRegs[_Rd_].UL[1] = count_leading_sign_bits(g_cpuConstRegs[_Rs_].SL[1]) - 1;

-			if( val != 0 ) {
-				u32 setbit = val&0x80000000;
-				g_cpuConstRegs[_Rd_].UL[regs] = 0;
-				val <<= 1;
-
-				while((val & 0x80000000) == setbit) {
-					g_cpuConstRegs[_Rd_].UL[regs]++;
-					val <<= 1;
-				}
-			}
-			else {
-				g_cpuConstRegs[_Rd_].UL[regs] = 31;
-			}
-		}
 		return;
 	}