This commit is contained in:
bositman 2015-06-10 01:40:52 +03:00
commit bbf4b58306
8 changed files with 93 additions and 97 deletions

View File

@ -39,14 +39,25 @@
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define float4x3 mat4x3
#define float3x3 mat3
#define float4x4 mat4
#define static
#define frac fract
#define mul(x, y) y * x
#define mul(x, y) x * y
#define lerp(x,y,s) mix(x,y,s)
#define saturate(x) clamp(x, 0.0, 1.0)
#define SamplerState sampler2D
#define matrix4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
mat4(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3);
#define matrix3(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
mat3(a0, b0, c0, a1, b1, c1, a2, b2, c2);
// Yes it sucks!
#define matrix4x3(v0, v1, v2, v3) \
mat3x4(v0.x, v1.x, v2.x, v3.x, v0.y, v1.y, v2.y, v3.y, v0.z, v1.z, v2.z, v3.z);
struct vertex_basic
{
vec4 p;
@ -75,6 +86,15 @@ layout(std140, binding = 14) uniform cb10
#else
#define matrix4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
float4x4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3);
#define matrix3(a0, a1, a2, b0, b1, b2, c0, c1, c2) \
float3x3(a0, a1, a2, b0, b1, b2, c0, c1, c2);
#define matrix4x3(v0, v1, v2, v3) \
float4x3(v0, v1, v2, v3);
Texture2D Texture : register(t0);
SamplerState TextureSampler : register(s0);
@ -793,13 +813,8 @@ float4 BicubicScaler(in SamplerState tex, in float2 uv, in float2 texSize)
float2 index = floor(coord_hg);
float2 f = coord_hg - index;
#if (GLSL == 1)
mat4 M = mat4( -1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0,
-3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0 );
#else
float4x4 M = { -1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0,
-3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0 };
#endif
float4x4 M = matrix4(-1.0, 3.0,-3.0, 1.0, 3.0,-6.0, 3.0, 0.0,
-3.0, 0.0, 3.0, 0.0, 1.0, 4.0, 1.0, 0.0);
M /= 6.0;
float4 wx = mul(float4(f.x*f.x*f.x, f.x*f.x, f.x, 1.0), M);
@ -867,7 +882,7 @@ float4 WeightQuad(float x)
float3 LineRun(float ypos, float4 xpos, float4 linetaps)
{
return mul(linetaps, float4x3(
return mul(linetaps, matrix4x3(
PixelPos(xpos.x, ypos),
PixelPos(xpos.y, ypos),
PixelPos(xpos.z, ypos),
@ -890,7 +905,7 @@ float4 LanczosScaler(float2 texcoord, float2 inputSize)
float4 columntaps = WeightQuad(f.y);
// final sum and weight normalization
return float4(mul(columntaps, float4x3(
return float4(mul(columntaps, matrix4x3(
LineRun(xystart.y, xpos, linetaps),
LineRun(xystart.y + stepxy.y, xpos, linetaps),
LineRun(xystart.y + stepxy.y * 2.0, xpos, linetaps),
@ -1241,22 +1256,11 @@ float4 TonemapPass(float4 color, float2 texcoord)
if (TonemapType == 1) { color.rgb = FilmicTonemap(color.rgb); }
// RGB -> XYZ conversion
#if (GLSL == 1)
// GLSL is column major whereas HLSL is row major ...
const mat3 RGB2XYZ = mat3 ( 0.4124564, 0.2126729, 0.0193339, // first column (not row)
0.3575761, 0.7151522, 0.1191920, // 2nd column
0.1804375, 0.0721750, 0.9503041 ); // 3rd column
#else
const float3x3 RGB2XYZ = { 0.4124564, 0.3575761, 0.1804375,
const float3x3 RGB2XYZ = matrix3(0.4124564, 0.3575761, 0.1804375,
0.2126729, 0.7151522, 0.0721750,
0.0193339, 0.1191920, 0.9503041 };
#endif
0.0193339, 0.1191920, 0.9503041);
#if (GLSL == 1)
float3 XYZ = RGB2XYZ * color.rgb;
#else
float3 XYZ = mul(RGB2XYZ, color.rgb);
#endif
// XYZ -> Yxy conversion
float3 Yxy;
@ -1283,22 +1287,11 @@ float4 TonemapPass(float4 color, float2 texcoord)
if (CorrectionPalette == 3) { XYZ.rgb = ColorCorrection(XYZ.rgb); }
// XYZ -> RGB conversion
#if (GLSL == 1)
// GLSL is column major whereas HLSL is row major ...
const mat3 XYZ2RGB = mat3 ( 3.2404542, -0.9692660, 0.0556434, // first column (not row)
-1.5371385, 1.8760108, -0.2040259, // 2nd column
-0.4985314, 0.0415560, 1.0572252 ); // 3rd column
#else
const float3x3 XYZ2RGB = { 3.2404542,-1.5371385,-0.4985314,
const float3x3 XYZ2RGB = matrix3(3.2404542,-1.5371385,-0.4985314,
-0.9692660, 1.8760108, 0.0415560,
0.0556434,-0.2040259, 1.0572252 };
#endif
0.0556434,-0.2040259, 1.0572252);
#if (GLSL == 1)
color.rgb = XYZ2RGB * XYZ;
#else
color.rgb = mul(XYZ2RGB, XYZ);
#endif
color.a = RGBLuminance(color.rgb);
return color;
@ -1371,38 +1364,20 @@ float4 ContrastPass(float4 color, float2 texcoord)
#if (CEL_SHADING == 1)
float3 GetYUV(float3 RGB)
{
#if (GLSL == 1)
const mat3 RGB2YUV = mat3(0.2126, 0.7152, 0.0722,
-0.09991,-0.33609, 0.436,
0.615, -0.55861, -0.05639);
return (RGB * RGB2YUV);
#else
const float3x3 RGB2YUV = { 0.2126, 0.7152, 0.0722,
const float3x3 RGB2YUV = matrix3(0.2126, 0.7152, 0.0722,
-0.09991,-0.33609, 0.436,
0.615, -0.55861, -0.05639 };
0.615, -0.55861, -0.05639);
return mul(RGB2YUV, RGB);
#endif
}
float3 GetRGB(float3 YUV)
{
#if (GLSL == 1)
const mat3 YUV2RGB = mat3(1.000, 0.000, 1.28033,
1.000,-0.21482,-0.38059,
1.000, 2.12798, 0.000);
return (YUV * YUV2RGB);
#else
const float3x3 YUV2RGB = { 1.000, 0.000, 1.28033,
const float3x3 YUV2RGB = matrix3(1.000, 0.000, 1.28033,
1.000,-0.21482,-0.38059,
1.000, 2.12798, 0.000 };
1.000, 2.12798, 0.000);
return mul(YUV2RGB, YUV);
#endif
}
float4 CelPass(float4 color, float2 texcoord)

View File

@ -166,6 +166,7 @@
<ClInclude Include="..\..\include\Utilities\HashMap.h" />
<ClInclude Include="..\..\include\intrin_x86.h" />
<ClInclude Include="..\..\include\Utilities\lnx_memzero.h" />
<ClInclude Include="..\..\include\Utilities\Math.h" />
<ClInclude Include="..\..\include\Utilities\MemcpyFast.h" />
<ClInclude Include="..\..\include\Utilities\Path.h" />
<ClInclude Include="..\..\src\Utilities\PrecompiledHeader.h" />

View File

@ -168,6 +168,9 @@
<ClInclude Include="..\..\include\Utilities\lnx_memzero.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\Utilities\Math.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\Utilities\MemcpyFast.h">
<Filter>Header Files</Filter>
</ClInclude>

View File

@ -166,6 +166,7 @@
<ClInclude Include="..\..\include\Utilities\HashMap.h" />
<ClInclude Include="..\..\include\intrin_x86.h" />
<ClInclude Include="..\..\include\Utilities\lnx_memzero.h" />
<ClInclude Include="..\..\include\Utilities\Math.h" />
<ClInclude Include="..\..\include\Utilities\MemcpyFast.h" />
<ClInclude Include="..\..\include\Utilities\Path.h" />
<ClInclude Include="..\..\src\Utilities\PrecompiledHeader.h" />

View File

@ -168,6 +168,9 @@
<ClInclude Include="..\..\include\Utilities\lnx_memzero.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\Utilities\Math.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\include\Utilities\MemcpyFast.h">
<Filter>Header Files</Filter>
</ClInclude>

View File

@ -0,0 +1,41 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2014- PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
// Hopefully this file will be used for cross-source math utilities.
// Currently these are strewn across the code base. Please collect them all!
#include "Pcsx2Defs.h"
// On GCC >= 4.7, this is equivalent to __builtin_clrsb(n);
inline u32 count_leading_sign_bits(s32 n) {
// If the sign bit is 1, we invert the bits to 0 for count-leading-zero.
if (n < 0)
n = ~n;
// If BSR is used directly, it would have an undefined value for 0.
if (n == 0)
return 32;
// Perform our count leading zero.
#ifdef _MSC_VER
unsigned long ret;
_BitScanReverse(&ret, n);
return 31 - (u32)ret;
#else
return __builtin_clz(n);
#endif
}

View File

@ -16,6 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Utilities/Math.h"
namespace R5900 {
namespace Interpreter {
@ -145,30 +146,13 @@ namespace MMI {
//*****************MMI OPCODES*********************************
static __fi void _PLZCW(int n)
{
// This function counts the number of "like" bits in the source register, starting
// with the MSB and working its way down, and returns the result MINUS ONE.
// So 0xff00 would return 7, not 8.
int c = 0;
s32 i = cpuRegs.GPR.r[_Rs_].SL[n];
// Negate the source based on the sign bit. This allows us to use a simple
// unified bit test of the MSB for either condition.
if( i >= 0 ) i = ~i;
// shift first, compare, then increment. This excludes the sign bit from our final count.
while( i <<= 1, i < 0 ) c++;
cpuRegs.GPR.r[_Rd_].UL[n] = c;
}
void PLZCW() {
if (!_Rd_) return;
if (!_Rd_)
return;
_PLZCW (0);
_PLZCW (1);
// Return the leading sign bits, excluding the original bit
cpuRegs.GPR.r[_Rd_].UL[0] = count_leading_sign_bits(cpuRegs.GPR.r[_Rs_].SL[0]) - 1;
cpuRegs.GPR.r[_Rd_].UL[1] = count_leading_sign_bits(cpuRegs.GPR.r[_Rs_].SL[1]) - 1;
}
__fi void PMFHL_CLAMP(u16& dst, s32 src)

View File

@ -25,6 +25,7 @@
#include "R5900OpcodeTables.h"
#include "iR5900.h"
#include "iMMI.h"
#include "Utilities/Math.h"
using namespace x86Emitter;
@ -66,23 +67,10 @@ void recPLZCW()
_deleteEEreg(_Rd_, 0);
GPR_SET_CONST(_Rd_);
for(regs = 0; regs < 2; ++regs) {
u32 val = g_cpuConstRegs[_Rs_].UL[regs];
// Return the leading sign bits, excluding the original bit
g_cpuConstRegs[_Rd_].UL[0] = count_leading_sign_bits(g_cpuConstRegs[_Rs_].SL[0]) - 1;
g_cpuConstRegs[_Rd_].UL[1] = count_leading_sign_bits(g_cpuConstRegs[_Rs_].SL[1]) - 1;
if( val != 0 ) {
u32 setbit = val&0x80000000;
g_cpuConstRegs[_Rd_].UL[regs] = 0;
val <<= 1;
while((val & 0x80000000) == setbit) {
g_cpuConstRegs[_Rd_].UL[regs]++;
val <<= 1;
}
}
else {
g_cpuConstRegs[_Rd_].UL[regs] = 31;
}
}
return;
}