diff --git a/bin/shaders/GSdx.fx b/bin/shaders/GSdx.fx index 21ea83ca7e..1c0df624cc 100644 --- a/bin/shaders/GSdx.fx +++ b/bin/shaders/GSdx.fx @@ -39,14 +39,25 @@ #define float2 vec2 #define float3 vec3 #define float4 vec4 -#define float4x3 mat4x3 +#define float3x3 mat3 +#define float4x4 mat4 #define static #define frac fract -#define mul(x, y) y * x +#define mul(x, y) x * y #define lerp(x,y,s) mix(x,y,s) #define saturate(x) clamp(x, 0.0, 1.0) #define SamplerState sampler2D +#define matrix4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \ + mat4(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3); + +#define matrix3(a0, a1, a2, b0, b1, b2, c0, c1, c2) \ + mat3(a0, b0, c0, a1, b1, c1, a2, b2, c2); + +// Yes it sucks! +#define matrix4x3(v0, v1, v2, v3) \ + mat3x4(v0.x, v1.x, v2.x, v3.x, v0.y, v1.y, v2.y, v3.y, v0.z, v1.z, v2.z, v3.z); + struct vertex_basic { vec4 p; @@ -75,6 +86,15 @@ layout(std140, binding = 14) uniform cb10 #else +#define matrix4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \ + float4x4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3); + +#define matrix3(a0, a1, a2, b0, b1, b2, c0, c1, c2) \ + float3x3(a0, a1, a2, b0, b1, b2, c0, c1, c2); + +#define matrix4x3(v0, v1, v2, v3) \ + float4x3(v0, v1, v2, v3); + Texture2D Texture : register(t0); SamplerState TextureSampler : register(s0); @@ -793,13 +813,8 @@ float4 BicubicScaler(in SamplerState tex, in float2 uv, in float2 texSize) float2 index = floor(coord_hg); float2 f = coord_hg - index; - #if (GLSL == 1) - mat4 M = mat4( -1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0, - -3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0 ); - #else - float4x4 M = { -1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0, - -3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0 }; - #endif + float4x4 M = matrix4(-1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0, + -3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0); M /= 6.0; float4 wx = mul(float4(f.x*f.x*f.x, f.x*f.x, f.x, 1.0), M); @@ -867,7 +882,7 @@ float4 WeightQuad(float x) float3 LineRun(float ypos, float4 xpos, float4 linetaps) { - return mul(linetaps, float4x3( + return mul(linetaps, matrix4x3( PixelPos(xpos.x, ypos), PixelPos(xpos.y, ypos), PixelPos(xpos.z, ypos), @@ -890,7 +905,7 @@ float4 LanczosScaler(float2 texcoord, float2 inputSize) float4 columntaps = WeightQuad(f.y); // final sum and weight normalization - return float4(mul(columntaps, float4x3( + return float4(mul(columntaps, matrix4x3( LineRun(xystart.y, xpos, linetaps), LineRun(xystart.y + stepxy.y, xpos, linetaps), LineRun(xystart.y + stepxy.y * 2.0, xpos, linetaps), @@ -1241,22 +1256,11 @@ float4 TonemapPass(float4 color, float2 texcoord) if (TonemapType == 1) { color.rgb = FilmicTonemap(color.rgb); } // RGB -> XYZ conversion - #if (GLSL == 1) - // GLSL is column major whereas HLSL is row major ... - const mat3 RGB2XYZ = mat3 ( 0.4124564, 0.2126729, 0.0193339, // first column (not row) - 0.3575761, 0.7151522, 0.1191920, // 2nd column - 0.1804375, 0.0721750, 0.9503041 ); // 3rd column - #else - const float3x3 RGB2XYZ = { 0.4124564, 0.3575761, 0.1804375, + const float3x3 RGB2XYZ = matrix3(0.4124564, 0.3575761, 0.1804375, 0.2126729, 0.7151522, 0.0721750, - 0.0193339, 0.1191920, 0.9503041 }; - #endif + 0.0193339, 0.1191920, 0.9503041); - #if (GLSL == 1) - float3 XYZ = RGB2XYZ * color.rgb; - #else float3 XYZ = mul(RGB2XYZ, color.rgb); - #endif // XYZ -> Yxy conversion float3 Yxy; @@ -1283,22 +1287,11 @@ float4 TonemapPass(float4 color, float2 texcoord) if (CorrectionPalette == 3) { XYZ.rgb = ColorCorrection(XYZ.rgb); } // XYZ -> RGB conversion - #if (GLSL == 1) - // GLSL is column major whereas HLSL is row major ... - const mat3 XYZ2RGB = mat3 ( 3.2404542, -0.9692660, 0.0556434, // first column (not row) - -1.5371385, 1.8760108, -0.2040259, // 2nd column - -0.4985314, 0.0415560, 1.0572252 ); // 3rd column - #else - const float3x3 XYZ2RGB = { 3.2404542,-1.5371385,-0.4985314, + const float3x3 XYZ2RGB = matrix3(3.2404542,-1.5371385,-0.4985314, -0.9692660, 1.8760108, 0.0415560, - 0.0556434,-0.2040259, 1.0572252 }; - #endif + 0.0556434,-0.2040259, 1.0572252); - #if (GLSL == 1) - color.rgb = XYZ2RGB * XYZ; - #else color.rgb = mul(XYZ2RGB, XYZ); - #endif color.a = RGBLuminance(color.rgb); return color; @@ -1371,38 +1364,20 @@ float4 ContrastPass(float4 color, float2 texcoord) #if (CEL_SHADING == 1) float3 GetYUV(float3 RGB) { - #if (GLSL == 1) - const mat3 RGB2YUV = mat3(0.2126, 0.7152, 0.0722, - -0.09991,-0.33609, 0.436, - 0.615, -0.55861, -0.05639); - - return (RGB * RGB2YUV); - #else - const float3x3 RGB2YUV = { 0.2126, 0.7152, 0.0722, + const float3x3 RGB2YUV = matrix3(0.2126, 0.7152, 0.0722, -0.09991,-0.33609, 0.436, - 0.615, -0.55861, -0.05639 }; + 0.615, -0.55861, -0.05639); return mul(RGB2YUV, RGB); - - #endif } float3 GetRGB(float3 YUV) { - #if (GLSL == 1) - const mat3 YUV2RGB = mat3(1.000, 0.000, 1.28033, - 1.000,-0.21482,-0.38059, - 1.000, 2.12798, 0.000); - - return (YUV * YUV2RGB); - #else - const float3x3 YUV2RGB = { 1.000, 0.000, 1.28033, + const float3x3 YUV2RGB = matrix3(1.000, 0.000, 1.28033, 1.000,-0.21482,-0.38059, - 1.000, 2.12798, 0.000 }; + 1.000, 2.12798, 0.000); return mul(YUV2RGB, YUV); - - #endif } float4 CelPass(float4 color, float2 texcoord) diff --git a/common/build/Utilities/utilities_vs2012.vcxproj b/common/build/Utilities/utilities_vs2012.vcxproj index cf57db1452..d1cdcc753e 100644 --- a/common/build/Utilities/utilities_vs2012.vcxproj +++ b/common/build/Utilities/utilities_vs2012.vcxproj @@ -166,6 +166,7 @@ + diff --git a/common/build/Utilities/utilities_vs2012.vcxproj.filters b/common/build/Utilities/utilities_vs2012.vcxproj.filters index d0e937a35c..fa10f1eba0 100644 --- a/common/build/Utilities/utilities_vs2012.vcxproj.filters +++ b/common/build/Utilities/utilities_vs2012.vcxproj.filters @@ -168,6 +168,9 @@ Header Files + + Header Files + Header Files diff --git a/common/build/Utilities/utilities_vs2013.vcxproj b/common/build/Utilities/utilities_vs2013.vcxproj index f261d769e0..141fc4d745 100644 --- a/common/build/Utilities/utilities_vs2013.vcxproj +++ b/common/build/Utilities/utilities_vs2013.vcxproj @@ -166,6 +166,7 @@ + diff --git a/common/build/Utilities/utilities_vs2013.vcxproj.filters b/common/build/Utilities/utilities_vs2013.vcxproj.filters index 961b6a7d18..9dc015334a 100644 --- a/common/build/Utilities/utilities_vs2013.vcxproj.filters +++ b/common/build/Utilities/utilities_vs2013.vcxproj.filters @@ -168,6 +168,9 @@ Header Files + + Header Files + Header Files diff --git a/common/include/Utilities/Math.h b/common/include/Utilities/Math.h new file mode 100644 index 0000000000..f9ab252922 --- /dev/null +++ b/common/include/Utilities/Math.h @@ -0,0 +1,41 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2014- PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once +// Hopefully this file will be used for cross-source math utilities. +// Currently these are strewn across the code base. Please collect them all! + +#include "Pcsx2Defs.h" + +// On GCC >= 4.7, this is equivalent to __builtin_clrsb(n); +inline u32 count_leading_sign_bits(s32 n) { + // If the sign bit is 1, we invert the bits to 0 for count-leading-zero. + if (n < 0) + n = ~n; + + // If BSR is used directly, it would have an undefined value for 0. + if (n == 0) + return 32; + + // Perform our count leading zero. +#ifdef _MSC_VER + unsigned long ret; + _BitScanReverse(&ret, n); + return 31 - (u32)ret; +#else + return __builtin_clz(n); +#endif +} + diff --git a/pcsx2/MMI.cpp b/pcsx2/MMI.cpp index 6375405461..9aa68670e6 100644 --- a/pcsx2/MMI.cpp +++ b/pcsx2/MMI.cpp @@ -16,6 +16,7 @@ #include "PrecompiledHeader.h" #include "Common.h" +#include "Utilities/Math.h" namespace R5900 { namespace Interpreter { @@ -145,30 +146,13 @@ namespace MMI { //*****************MMI OPCODES********************************* -static __fi void _PLZCW(int n) -{ - // This function counts the number of "like" bits in the source register, starting - // with the MSB and working its way down, and returns the result MINUS ONE. - // So 0xff00 would return 7, not 8. - - int c = 0; - s32 i = cpuRegs.GPR.r[_Rs_].SL[n]; - - // Negate the source based on the sign bit. This allows us to use a simple - // unified bit test of the MSB for either condition. - if( i >= 0 ) i = ~i; - - // shift first, compare, then increment. This excludes the sign bit from our final count. - while( i <<= 1, i < 0 ) c++; - - cpuRegs.GPR.r[_Rd_].UL[n] = c; -} - void PLZCW() { - if (!_Rd_) return; + if (!_Rd_) + return; - _PLZCW (0); - _PLZCW (1); + // Return the leading sign bits, excluding the original bit + cpuRegs.GPR.r[_Rd_].UL[0] = count_leading_sign_bits(cpuRegs.GPR.r[_Rs_].SL[0]) - 1; + cpuRegs.GPR.r[_Rd_].UL[1] = count_leading_sign_bits(cpuRegs.GPR.r[_Rs_].SL[1]) - 1; } __fi void PMFHL_CLAMP(u16& dst, s32 src) diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 74ee3d7d49..0183c4e5b1 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -25,6 +25,7 @@ #include "R5900OpcodeTables.h" #include "iR5900.h" #include "iMMI.h" +#include "Utilities/Math.h" using namespace x86Emitter; @@ -66,23 +67,10 @@ void recPLZCW() _deleteEEreg(_Rd_, 0); GPR_SET_CONST(_Rd_); - for(regs = 0; regs < 2; ++regs) { - u32 val = g_cpuConstRegs[_Rs_].UL[regs]; + // Return the leading sign bits, excluding the original bit + g_cpuConstRegs[_Rd_].UL[0] = count_leading_sign_bits(g_cpuConstRegs[_Rs_].SL[0]) - 1; + g_cpuConstRegs[_Rd_].UL[1] = count_leading_sign_bits(g_cpuConstRegs[_Rs_].SL[1]) - 1; - if( val != 0 ) { - u32 setbit = val&0x80000000; - g_cpuConstRegs[_Rd_].UL[regs] = 0; - val <<= 1; - - while((val & 0x80000000) == setbit) { - g_cpuConstRegs[_Rd_].UL[regs]++; - val <<= 1; - } - } - else { - g_cpuConstRegs[_Rd_].UL[regs] = 31; - } - } return; }