From f869281301a9b638f4fdf1ab3f4168f12909be2c Mon Sep 17 00:00:00 2001 From: Rodolfo Osvaldo Bogado Date: Fri, 7 Jan 2011 19:23:57 +0000 Subject: [PATCH] normalize the efb to texture process for color textures to make it work the same in all the plugins and with the same accuracy as real hardware (almost :)) please test for regressions and fixes. some little changes to make pixel shader more dx9 sm2.0 friendly. the condition is not to use pixel lighting ( sorry no hardware support for the quantity of parameters needed). git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6777 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Common/Src/LinearDiskCache.h | 2 +- Source/Core/VideoCommon/Src/PixelShaderGen.h | 21 +- .../VideoCommon/Src/PixelShaderManager.cpp | 6 +- .../Core/VideoCommon/Src/PixelShaderManager.h | 2 +- .../Core/VideoCommon/Src/TextureCacheBase.cpp | 221 +++++++++--------- .../Plugin_VideoDX11/Src/PixelShaderCache.cpp | 29 +-- .../Plugin_VideoDX11/Src/TextureCache.cpp | 4 +- .../Plugin_VideoDX9/Src/PixelShaderCache.cpp | 14 +- Source/Plugins/Plugin_VideoDX9/Src/Render.cpp | 12 +- .../Plugin_VideoDX9/Src/TextureCache.cpp | 3 +- .../Plugin_VideoDX9/Src/TextureConverter.cpp | 4 +- .../Plugin_VideoOGL/Src/PixelShaderCache.cpp | 42 ++-- .../Plugin_VideoOGL/Src/TextureCache.cpp | 3 +- .../Plugin_VideoOGL/Src/TextureConverter.cpp | 3 +- 14 files changed, 185 insertions(+), 181 deletions(-) diff --git a/Source/Core/Common/Src/LinearDiskCache.h b/Source/Core/Common/Src/LinearDiskCache.h index e9e00239ae..82b643ad1c 100644 --- a/Source/Core/Common/Src/LinearDiskCache.h +++ b/Source/Core/Common/Src/LinearDiskCache.h @@ -26,7 +26,7 @@ // shader cache for every revision, graphics-related or not, which is simply annoying. enum { - LINEAR_DISKCACHE_VER = 6622 + LINEAR_DISKCACHE_VER = 6777 }; // On disk format: diff --git a/Source/Core/VideoCommon/Src/PixelShaderGen.h b/Source/Core/VideoCommon/Src/PixelShaderGen.h index ff4d0f912d..9b01eca3c0 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderGen.h +++ b/Source/Core/VideoCommon/Src/PixelShaderGen.h @@ -31,16 +31,17 @@ #define I_PLIGHTS "cLights" #define I_PMATERIALS "cmtrl" -#define C_COLORS 0 -#define C_KCOLORS (C_COLORS + 4) -#define C_ALPHA (C_KCOLORS + 4) -#define C_TEXDIMS (C_ALPHA + 1) -#define C_ZBIAS (C_TEXDIMS + 8) -#define C_INDTEXSCALE (C_ZBIAS + 2) -#define C_INDTEXMTX (C_INDTEXSCALE + 2) -#define C_FOG (C_INDTEXMTX + 6) -#define C_COLORMATRIX (C_FOG + 2) -#define C_PLIGHTS (C_COLORMATRIX + 5) +#define C_COLORMATRIX 0 // 0 +#define C_COLORS 0 // 0 +#define C_KCOLORS (C_COLORS + 4) // 4 +#define C_ALPHA (C_KCOLORS + 4) // 8 +#define C_TEXDIMS (C_ALPHA + 1) // 9 +#define C_ZBIAS (C_TEXDIMS + 8) //17 +#define C_INDTEXSCALE (C_ZBIAS + 2) //19 +#define C_INDTEXMTX (C_INDTEXSCALE + 2) //21 +#define C_FOG (C_INDTEXMTX + 6) //27 + +#define C_PLIGHTS (C_FOG + 2) #define C_PMATERIALS (C_PLIGHTS + 40) #define C_PENVCONST_END (C_PMATERIALS + 4) #define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11 + 2) diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp index 1e3ed1d321..5f07615592 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.cpp @@ -395,10 +395,10 @@ void PixelShaderManager::SetFogParamChanged() s_bFogParamChanged = true; } -void PixelShaderManager::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) +void PixelShaderManager::SetColorMatrix(const float* pmatrix) { - SetMultiPSConstant4fv(C_COLORMATRIX,4,pmatrix); - SetPSConstant4fv(C_COLORMATRIX+4, pfConstAdd); + SetMultiPSConstant4fv(C_COLORMATRIX,7,pmatrix); + s_nColorsChanged[0] = s_nColorsChanged[1] = 15; } void PixelShaderManager::InvalidateXFRange(int start, int end) diff --git a/Source/Core/VideoCommon/Src/PixelShaderManager.h b/Source/Core/VideoCommon/Src/PixelShaderManager.h index c45f9153a0..de46341441 100644 --- a/Source/Core/VideoCommon/Src/PixelShaderManager.h +++ b/Source/Core/VideoCommon/Src/PixelShaderManager.h @@ -51,7 +51,7 @@ public: static void SetTexCoordChanged(u8 texmapid); static void SetFogColorChanged(); static void SetFogParamChanged(); - static void SetColorMatrix(const float* pmatrix, const float* pfConstAdd); + static void SetColorMatrix(const float* pmatrix); static void InvalidateXFRange(int start, int end); static void SetMaterialColor(int index, u32 data); }; diff --git a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp index e58b0d87fb..4b6081c61c 100644 --- a/Source/Core/VideoCommon/Src/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/Src/TextureCacheBase.cpp @@ -414,106 +414,64 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, { DVSTARTPROFILE(); - float colmat[20] = {}; - // last four floats of colmat for fConstAdd + float colmat[28] = {0}; float *const fConstAdd = colmat + 16; + float *const ColorMask = colmat + 20; + ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f; + ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f; unsigned int cbufid = -1; if (bFromZBuffer) { - // TODO: these values differ slightly from the DX9/11 values, - // do they need to? or can this be removed - if (g_ActiveConfig.backend_info.APIType == API_OPENGL) + switch (copyfmt) { - switch(copyfmt) - { - case 0: // Z4 - case 1: // Z8 - case 8: // Z8 - colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; - break; + case 0: // Z4 + colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f; + cbufid = 0; + break; + case 1: // Z8 + case 8: // Z8 + colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f; + cbufid = 1; + break; - case 3: // Z16 //? - colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; - break; + case 3: // Z16 + colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f; + cbufid = 13; + break; - case 11: // Z16 (reverse order) - colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1; - break; + case 11: // Z16 (reverse order) + colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; + cbufid = 2; + break; - case 6: // Z24X8 - colmat[2] = colmat[5] = colmat[8] = colmat[15] = 1; - break; + case 6: // Z24X8 + colmat[0] = colmat[5] = colmat[10] = 1.0f; + cbufid = 3; + break; - case 9: // Z8M - colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; - break; + case 9: // Z8M + colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; + cbufid = 4; + break; - case 10: // Z8L - colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; - break; + case 10: // Z8L + colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; + cbufid = 5; + break; - case 12: // Z16L - colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; - break; + case 12: // Z16L + colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f; + cbufid = 6; + break; - default: - ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt); - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - break; - } - } - else - { - switch (copyfmt) - { - case 0: // Z4 - colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1; - cbufid = 20; - break; - case 1: // Z8 - case 8: // Z8 - colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f; - cbufid = 12; - break; - - case 3: // Z16 - colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f; - cbufid = 13; - break; - - case 11: // Z16 (reverse order) - colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; - cbufid = 14; - break; - - case 6: // Z24X8 - colmat[0] = colmat[5] = colmat[10] = 1.0f; - cbufid = 15; - break; - - case 9: // Z8M - colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; - cbufid = 16; - break; - - case 10: // Z8L - colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; - cbufid = 17; - break; - - case 12: // Z16L - colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f; - cbufid = 18; - break; - - default: - ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt); - colmat[2] = colmat[5] = colmat[8] = 1.0f; - cbufid = 19; - break; - } + default: + ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt); + colmat[2] = colmat[5] = colmat[8] = 1.0f; + cbufid = 7; + break; } + } else if (bIsIntensityFmt) { @@ -532,21 +490,40 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, if (copyfmt < 2 || copyfmt == 8) { - fConstAdd[3] = 16.0f / 255.0f; colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f; - cbufid = 0; + fConstAdd[3] = 16.0f/255.0f; + if (copyfmt == 0) + { + ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f; + ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f; + cbufid = 8; + } + else + { + cbufid = 9; + } } else// alpha { colmat[15] = 1; - cbufid = 1; + if (copyfmt == 2) + { + ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f; + ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f; + cbufid = 10; + } + else + { + cbufid = 11; + } + } - break; default: ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", copyfmt); - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; + cbufid = 23; break; } } @@ -555,59 +532,79 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, switch (copyfmt) { case 0: // R4 + colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; + ColorMask[0] = 15.0f; + ColorMask[4] = 1.0f / 15.0f; + cbufid = 12; + break; case 1: // R8 case 8: // R8 colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; - cbufid = 2; + cbufid = 13; break; case 2: // RA4 + colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f; + ColorMask[0] = ColorMask[3] = 15.0f; + ColorMask[4] = ColorMask[7] = 1.0f / 15.0f; + cbufid = 14; + break; case 3: // RA8 - colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1; - cbufid = 3; + colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f; + cbufid = 15; break; case 7: // A8 - colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1; - cbufid = 4; + colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f; + cbufid = 16; break; case 9: // G8 - colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; - cbufid = 5; + colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; + cbufid = 17; break; - case 10: // B8 - colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; - cbufid = 6; + colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; + cbufid = 18; break; case 11: // RG8 - colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; - cbufid = 7; + colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; + cbufid = 19; break; case 12: // GB8 - colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; - cbufid = 8; + colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f; + cbufid = 20; break; case 4: // RGB565 - colmat[0] = colmat[5] = colmat[10] = 1; - fConstAdd[3] = 1; // set alpha to 1 - cbufid = 9; + colmat[0] = colmat[5] = colmat[10] = 1.0f; + ColorMask[0] = ColorMask[2] = 31.0f; + ColorMask[4] = ColorMask[6] = 1.0f / 31.0f; + ColorMask[1] = 63.0f; + ColorMask[5] = 1.0f / 63.0f; + fConstAdd[3] = 1.0f; // set alpha to 1 + cbufid = 21; break; case 5: // RGB5A3 + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; + ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f; + ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f; + ColorMask[3] = 7.0f; + ColorMask[7] = 1.0f / 7.0f; + cbufid = 22; + break; case 6: // RGBA8 - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - cbufid = 10; + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; + cbufid = 23; break; default: ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", copyfmt); - colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; - cbufid = 11; + colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; + cbufid = 23; break; } } diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp index a4a8cb5956..9323b0f37e 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PixelShaderCache.cpp @@ -86,12 +86,13 @@ const char color_copy_program_code_msaa[] = { const char color_matrix_program_code[] = { "sampler samp0 : register(s0);\n" "Texture2D Tex0 : register(t0);\n" - "uniform float4 cColMatrix[5] : register(c0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" "void main(\n" "out float4 ocol0 : SV_Target,\n" "in float4 pos : SV_Position,\n" " in float2 uv0 : TEXCOORD0){\n" "float4 texcol = Tex0.Sample(samp0,uv0);\n" + "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n" }; @@ -99,7 +100,7 @@ const char color_matrix_program_code[] = { const char color_matrix_program_code_msaa[] = { "sampler samp0 : register(s0);\n" "Texture2DMS Tex0 : register(t0);\n" - "uniform float4 cColMatrix[5] : register(c0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" "void main(\n" "out float4 ocol0 : SV_Target,\n" "in float4 pos : SV_Position,\n" @@ -110,6 +111,7 @@ const char color_matrix_program_code_msaa[] = { "for(int i = 0; i < samples; ++i)\n" " texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n" "texcol /= samples;\n" + "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "}\n" }; @@ -117,7 +119,7 @@ const char color_matrix_program_code_msaa[] = { const char depth_matrix_program[] = { "sampler samp0 : register(s0);\n" "Texture2D Tex0 : register(t0);\n" - "uniform float4 cColMatrix[5] : register(c0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" "void main(\n" "out float4 ocol0 : SV_Target,\n" " in float4 pos : SV_Position,\n" @@ -132,7 +134,7 @@ const char depth_matrix_program[] = { const char depth_matrix_program_msaa[] = { "sampler samp0 : register(s0);\n" "Texture2DMS Tex0 : register(t0);\n" - "uniform float4 cColMatrix[5] : register(c0);\n" + "uniform float4 cColMatrix[7] : register(c0);\n" "void main(\n" "out float4 ocol0 : SV_Target,\n" " in float4 pos : SV_Position,\n" @@ -214,16 +216,15 @@ unsigned int ps_constant_offset_table[] = { 76, 80, // C_INDTEXSCALE, 8 84, 88, 92, 96, 100, 104, // C_INDTEXMTX, 24 108, 112, // C_FOG, 8 - 116, 120, 124, 128, 132, // C_COLORMATRIX, 20 - 136, 140, 144, 148, 152, // C_PLIGHTS0, 20 - 156, 160, 164, 168, 172, // C_PLIGHTS1, 20 - 176, 180, 184, 188, 192, // C_PLIGHTS2, 20 - 196, 200, 204, 208, 212, // C_PLIGHTS3, 20 - 216, 220, 224, 228, 232, // C_PLIGHTS4, 20 - 236, 240, 244, 248, 252, // C_PLIGHTS5, 20 - 256, 260, 264, 268, 272, // C_PLIGHTS6, 20 - 276, 280, 284, 288, 292, // C_PLIGHTS7, 20 - 296, 300, 304, 308, // C_PMATERIALS, 16 + 116, 120, 124, 128, 132, // C_PLIGHTS0, 20 + 136, 140, 144, 148, 152, // C_PLIGHTS1, 20 + 156, 160, 164, 168, 172, // C_PLIGHTS2, 20 + 176, 180, 184, 188, 192, // C_PLIGHTS3, 20 + 196, 200, 204, 208, 212, // C_PLIGHTS4, 20 + 216, 220, 224, 228, 232, // C_PLIGHTS5, 20 + 236, 240, 244, 248, 252, // C_PLIGHTS6, 20 + 256, 260, 264, 268, 272, // C_PLIGHTS7, 20 + 276, 280, 284, 288, // C_PMATERIALS, 16 }; void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) { diff --git a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp index 7347ebe5d0..f51512282f 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/TextureCache.cpp @@ -43,7 +43,7 @@ namespace DX11 { -#define MAX_COPY_BUFFERS 21 +#define MAX_COPY_BUFFERS 24 ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {}; TextureCache::TCacheEntry::~TCacheEntry() @@ -120,7 +120,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB // set transformation if (NULL == efbcopycbuf[cbufid]) { - const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(20 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); + const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); D3D11_SUBRESOURCE_DATA data; data.pSysMem = colmat; HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp index d02913847a..990fadc130 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShaderCache.cpp @@ -170,7 +170,7 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv WRITE(p, "uniform sampler samp0 : register(s0);\n"); if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) - WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX); + WRITE(p, "uniform float4 cColMatrix[7] : register(c%d);\n", C_COLORMATRIX); WRITE(p, "void main(\n" "out float4 ocol0 : COLOR0,\n"); @@ -208,6 +208,9 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) { + if(depthConversionType == DEPTH_CONVERSION_TYPE_NONE) + WRITE(p, "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"); + WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"); } else @@ -235,8 +238,7 @@ void PixelShaderCache::Init() } int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF); - int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536); - bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants); + int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536); // other screen copy/convert programs for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) @@ -253,11 +255,6 @@ void PixelShaderCache::Init() // so skip this attempt to avoid duplicate error messages. s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL; } - else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix) - { - // color matrix not supported, so substitute the nearest equivalent program that doesn't use it. - s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode]; - } else { s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode); @@ -311,6 +308,7 @@ void PixelShaderCache::Shutdown() if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release(); s_rgba6_to_rgb8 = NULL; + Clear(); g_ps_disk_cache.Sync(); g_ps_disk_cache.Close(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 40bd58d816..bd8f554e2e 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -285,7 +285,7 @@ Renderer::Renderer() CalculateXYScale(dst_rect); s_LastAA = g_ActiveConfig.iMultisampleMode; - int SupersampleCoeficient = s_LastAA + 1; + int SupersampleCoeficient = (s_LastAA % 3) + 1; s_LastEFBScale = g_ActiveConfig.iEFBScale; CalculateTargetSize(SupersampleCoeficient); @@ -604,10 +604,9 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) vp.MaxZ = 1.0f; D3D::dev->SetViewport(&vp); - float colmat[16] = {0.0f}; - float fConstAdd[4] = {0.0f}; + float colmat[28] = {0.0f}; colmat[0] = colmat[5] = colmat[10] = 1.0f; - PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation + PixelShaderManager::SetColorMatrix(colmat); // set transformation LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBDepthTexture(); D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); @@ -1080,7 +1079,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons { TargetRectangle targetRc = ConvertEFBRectangle(rc); LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBColorTexture(); - D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_Config.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_Config.iMultisampleMode),Gamma); + D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_ActiveConfig.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_ActiveConfig.iMultisampleMode),Gamma); + } D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); @@ -1216,7 +1216,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons CalculateXYScale(dst_rect); - int SupersampleCoeficient = s_LastAA + 1; + int SupersampleCoeficient = (s_LastAA % 3) + 1; s_LastEFBScale = g_ActiveConfig.iEFBScale; CalculateTargetSize(SupersampleCoeficient); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp index 365a80b972..03eeefd169 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureCache.cpp @@ -100,8 +100,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB destrect.right = virtualW; destrect.top = 0; - const float* const fConstAdd = colmat + 16; // fConstAdd is the last 4 floats of colmat - PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation + PixelShaderManager::SetColorMatrix(colmat); // set transformation TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); RECT sourcerect; sourcerect.bottom = targetSource.bottom; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp index b5d97d2a7b..8d89f6aceb 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureConverter.cpp @@ -362,7 +362,7 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf (float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this? (float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()), - (float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()), + (float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()), Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledYf(sampleStride), (float)Renderer::GetFullTargetWidth(), @@ -426,7 +426,7 @@ u64 EncodeToRamFromTexture(u32 address,LPDIRECT3DTEXTURE9 source_texture, u32 So (float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this? (float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()), - (float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()), + (float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()), Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledYf(sampleStride), (float)SourceW, diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp index a24fc93893..c83eeb890a 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderCache.cpp @@ -96,17 +96,22 @@ void PixelShaderCache::Init() glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs); INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs); - char pmatrixprog[1024]; + char pmatrixprog[2048]; sprintf(pmatrixprog, "!!ARBfp1.0" "TEMP R0;\n" "TEMP R1;\n" + "PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n" "TEX R0, fragment.texcoord[0], texture[0], RECT;\n" - "DP4 R1.w, R0, program.env[%d];\n" - "DP4 R1.z, R0, program.env[%d];\n" + "MUL R0, R0, program.env[%d];\n" + "ADD R0, R0, K0;\n" + "FLR R0, R0;\n" + "MUL R0, R0, program.env[%d];\n" "DP4 R1.x, R0, program.env[%d];\n" "DP4 R1.y, R0, program.env[%d];\n" + "DP4 R1.z, R0, program.env[%d];\n" + "DP4 R1.w, R0, program.env[%d];\n" "ADD result.color, R1, program.env[%d];\n" - "END\n", C_COLORMATRIX+3, C_COLORMATRIX+2, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+4); + "END\n",C_COLORMATRIX+5,C_COLORMATRIX+6, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4); glGenProgramsARB(1, &s_ColorMatrixProgram); SetCurrentShader(s_ColorMatrixProgram); glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); @@ -118,12 +123,13 @@ void PixelShaderCache::Init() s_ColorMatrixProgram = 0; } - sprintf(pmatrixprog, "!!ARBfp1.0\n" + sprintf(pmatrixprog, "!!ARBfp1.0\n" "TEMP R0;\n" "TEMP R1;\n" "TEMP R2;\n" //16777215/16777216*256, 1/255, 256, 0 - "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" + "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" + "PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n" //sample the depth value "TEX R2, fragment.texcoord[0], texture[0], RECT;\n" @@ -138,21 +144,23 @@ void PixelShaderCache::Init() //gives {?, 128/255, 254/255, ?} for depth value 254/255 //on some gpus - "FLR R0.z,R0;\n" //bits 31..24 + "FLR R0.x,R0;\n" //bits 31..24 - "SUB R0.xyw,R0,R0.z;\n" //subtract bits 31..24 from rest - "MUL R0.xyw,R0,K0.z;\n" // *256 + "SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest + "MUL R0.yzw,R0,K0.z;\n" // *256 "FLR R0.y,R0;\n" //bits 23..16 - "SUB R0.xw,R0,R0.y;\n" //subtract bits 23..16 from rest - "MUL R0.xw,R0,K0.z;\n" // *256 - "FLR R0.x,R0;\n" //bits 15..8 + "SUB R0.zw,R0,R0.y;\n" //subtract bits 23..16 from rest + "MUL R0.zw,R0,K0.z;\n" // *256 + "FLR R0.z,R0;\n" //bits 15..8 - "SUB R0.w,R0,R0.x;\n" //subtract bits 15..8 from rest - "MUL R0.w,R0,K0.z;\n" // *256 - "FLR R0.w,R0;\n" //bits 7..0 + "MOV R0.w,R0.x;\n" //duplicate bit 31..24 + + "MUL R0,R0,K0.y;\n" // /255 - "MUL R0,R0,K0.y;\n" // /255 + "MUL R0.w,R0,K1.x;\n" // *15 + "FLR R0.w,R0;\n" //bits 31..28 + "MUL R0.w,R0,K1.y;\n" // /15 "DP4 R1.x, R0, program.env[%d];\n" "DP4 R1.y, R0, program.env[%d];\n" @@ -168,7 +176,7 @@ void PixelShaderCache::Init() if (err != GL_NO_ERROR) { ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program"); glDeleteProgramsARB(1, &s_DepthMatrixProgram); - s_DepthMatrixProgram = 0; + s_DepthMatrixProgram = 0; } } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp index 303edf7f81..09f01ae826 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureCache.cpp @@ -297,8 +297,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB glViewport(0, 0, virtualW, virtualH); PixelShaderCache::SetCurrentShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); - const float* const fConstAdd = colmat + 16; // fConstAdd is the last 4 floats of colmat - PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation + PixelShaderManager::SetColorMatrix(colmat); // set transformation GL_REPORT_ERRORD(); TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp index 13fb372767..0308d07f9e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureConverter.cpp @@ -285,7 +285,8 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf s32 expandedHeight = (height + blkH) & (~blkH); float sampleStride = bScaleByHalf ? 2.f : 1.f; - TextureConversionShader::SetShaderParameters((float)expandedWidth, + TextureConversionShader::SetShaderParameters( + (float)expandedWidth, (float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this? (float)Renderer::EFBToScaledX(source.left), (float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight),