normalize the efb to texture process for color textures to make it work the same in all the plugins and with the same accuracy as real hardware (almost :))

please test for regressions and fixes.
some little changes to make pixel shader more dx9 sm2.0 friendly. the condition is not to use pixel lighting ( sorry no hardware support for the quantity of parameters needed).

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6777 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2011-01-07 19:23:57 +00:00
parent 62b79028ef
commit f869281301
14 changed files with 185 additions and 181 deletions

View File

@ -26,7 +26,7 @@
// shader cache for every revision, graphics-related or not, which is simply annoying. // shader cache for every revision, graphics-related or not, which is simply annoying.
enum enum
{ {
LINEAR_DISKCACHE_VER = 6622 LINEAR_DISKCACHE_VER = 6777
}; };
// On disk format: // On disk format:

View File

@ -31,16 +31,17 @@
#define I_PLIGHTS "cLights" #define I_PLIGHTS "cLights"
#define I_PMATERIALS "cmtrl" #define I_PMATERIALS "cmtrl"
#define C_COLORS 0 #define C_COLORMATRIX 0 // 0
#define C_KCOLORS (C_COLORS + 4) #define C_COLORS 0 // 0
#define C_ALPHA (C_KCOLORS + 4) #define C_KCOLORS (C_COLORS + 4) // 4
#define C_TEXDIMS (C_ALPHA + 1) #define C_ALPHA (C_KCOLORS + 4) // 8
#define C_ZBIAS (C_TEXDIMS + 8) #define C_TEXDIMS (C_ALPHA + 1) // 9
#define C_INDTEXSCALE (C_ZBIAS + 2) #define C_ZBIAS (C_TEXDIMS + 8) //17
#define C_INDTEXMTX (C_INDTEXSCALE + 2) #define C_INDTEXSCALE (C_ZBIAS + 2) //19
#define C_FOG (C_INDTEXMTX + 6) #define C_INDTEXMTX (C_INDTEXSCALE + 2) //21
#define C_COLORMATRIX (C_FOG + 2) #define C_FOG (C_INDTEXMTX + 6) //27
#define C_PLIGHTS (C_COLORMATRIX + 5)
#define C_PLIGHTS (C_FOG + 2)
#define C_PMATERIALS (C_PLIGHTS + 40) #define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4) #define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11 + 2) #define PIXELSHADERUID_MAX_VALUES (5 + 32 + 6 + 11 + 2)

View File

@ -395,10 +395,10 @@ void PixelShaderManager::SetFogParamChanged()
s_bFogParamChanged = true; s_bFogParamChanged = true;
} }
void PixelShaderManager::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) void PixelShaderManager::SetColorMatrix(const float* pmatrix)
{ {
SetMultiPSConstant4fv(C_COLORMATRIX,4,pmatrix); SetMultiPSConstant4fv(C_COLORMATRIX,7,pmatrix);
SetPSConstant4fv(C_COLORMATRIX+4, pfConstAdd); s_nColorsChanged[0] = s_nColorsChanged[1] = 15;
} }
void PixelShaderManager::InvalidateXFRange(int start, int end) void PixelShaderManager::InvalidateXFRange(int start, int end)

View File

@ -51,7 +51,7 @@ public:
static void SetTexCoordChanged(u8 texmapid); static void SetTexCoordChanged(u8 texmapid);
static void SetFogColorChanged(); static void SetFogColorChanged();
static void SetFogParamChanged(); static void SetFogParamChanged();
static void SetColorMatrix(const float* pmatrix, const float* pfConstAdd); static void SetColorMatrix(const float* pmatrix);
static void InvalidateXFRange(int start, int end); static void InvalidateXFRange(int start, int end);
static void SetMaterialColor(int index, u32 data); static void SetMaterialColor(int index, u32 data);
}; };

View File

@ -414,106 +414,64 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer,
{ {
DVSTARTPROFILE(); DVSTARTPROFILE();
float colmat[20] = {}; float colmat[28] = {0};
// last four floats of colmat for fConstAdd
float *const fConstAdd = colmat + 16; float *const fConstAdd = colmat + 16;
float *const ColorMask = colmat + 20;
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
unsigned int cbufid = -1; unsigned int cbufid = -1;
if (bFromZBuffer) if (bFromZBuffer)
{ {
// TODO: these values differ slightly from the DX9/11 values, switch (copyfmt)
// do they need to? or can this be removed
if (g_ActiveConfig.backend_info.APIType == API_OPENGL)
{ {
switch(copyfmt) case 0: // Z4
{ colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
case 0: // Z4 cbufid = 0;
case 1: // Z8 break;
case 8: // Z8 case 1: // Z8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; case 8: // Z8
break; colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
cbufid = 1;
break;
case 3: // Z16 //? case 3: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
break; cbufid = 13;
break;
case 11: // Z16 (reverse order) case 11: // Z16 (reverse order)
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1; colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
break; cbufid = 2;
break;
case 6: // Z24X8 case 6: // Z24X8
colmat[2] = colmat[5] = colmat[8] = colmat[15] = 1; colmat[0] = colmat[5] = colmat[10] = 1.0f;
break; cbufid = 3;
break;
case 9: // Z8M case 9: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
break; cbufid = 4;
break;
case 10: // Z8L case 10: // Z8L
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
break; cbufid = 5;
break;
case 12: // Z16L case 12: // Z16L
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f;
break; cbufid = 6;
break;
default: default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt); ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; colmat[2] = colmat[5] = colmat[8] = 1.0f;
break; cbufid = 7;
} break;
}
else
{
switch (copyfmt)
{
case 0: // Z4
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1;
cbufid = 20;
break;
case 1: // Z8
case 8: // Z8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
cbufid = 12;
break;
case 3: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
cbufid = 13;
break;
case 11: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 14;
break;
case 6: // Z24X8
colmat[0] = colmat[5] = colmat[10] = 1.0f;
cbufid = 15;
break;
case 9: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 16;
break;
case 10: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 17;
break;
case 12: // Z16L
colmat[2] = colmat[6] = colmat[10] = colmat[13] = 1.0f;
cbufid = 18;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", copyfmt);
colmat[2] = colmat[5] = colmat[8] = 1.0f;
cbufid = 19;
break;
}
} }
} }
else if (bIsIntensityFmt) else if (bIsIntensityFmt)
{ {
@ -532,21 +490,40 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer,
if (copyfmt < 2 || copyfmt == 8) if (copyfmt < 2 || copyfmt == 8)
{ {
fConstAdd[3] = 16.0f / 255.0f;
colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f; colmat[12] = 0.257f; colmat[13] = 0.504f; colmat[14] = 0.098f;
cbufid = 0; fConstAdd[3] = 16.0f/255.0f;
if (copyfmt == 0)
{
ColorMask[0] = ColorMask[1] = ColorMask[2] = 15.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
cbufid = 8;
}
else
{
cbufid = 9;
}
} }
else// alpha else// alpha
{ {
colmat[15] = 1; colmat[15] = 1;
cbufid = 1; if (copyfmt == 2)
{
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 15.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
cbufid = 10;
}
else
{
cbufid = 11;
}
} }
break; break;
default: default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", copyfmt); ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 23;
break; break;
} }
} }
@ -555,59 +532,79 @@ void TextureCache::CopyRenderTargetToTexture(u32 address, bool bFromZBuffer,
switch (copyfmt) switch (copyfmt)
{ {
case 0: // R4 case 0: // R4
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
ColorMask[0] = 15.0f;
ColorMask[4] = 1.0f / 15.0f;
cbufid = 12;
break;
case 1: // R8 case 1: // R8
case 8: // R8 case 8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
cbufid = 2; cbufid = 13;
break; break;
case 2: // RA4 case 2: // RA4
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
ColorMask[0] = ColorMask[3] = 15.0f;
ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;
cbufid = 14;
break;
case 3: // RA8 case 3: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1; colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
cbufid = 3; cbufid = 15;
break; break;
case 7: // A8 case 7: // A8
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1; colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 4; cbufid = 16;
break; break;
case 9: // G8 case 9: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1; colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 5; cbufid = 17;
break; break;
case 10: // B8 case 10: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1; colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 6; cbufid = 18;
break; break;
case 11: // RG8 case 11: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1; colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 7; cbufid = 19;
break; break;
case 12: // GB8 case 12: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1; colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 8; cbufid = 20;
break; break;
case 4: // RGB565 case 4: // RGB565
colmat[0] = colmat[5] = colmat[10] = 1; colmat[0] = colmat[5] = colmat[10] = 1.0f;
fConstAdd[3] = 1; // set alpha to 1 ColorMask[0] = ColorMask[2] = 31.0f;
cbufid = 9; ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
ColorMask[1] = 63.0f;
ColorMask[5] = 1.0f / 63.0f;
fConstAdd[3] = 1.0f; // set alpha to 1
cbufid = 21;
break; break;
case 5: // RGB5A3 case 5: // RGB5A3
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
ColorMask[0] = ColorMask[1] = ColorMask[2] = 31.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
ColorMask[3] = 7.0f;
ColorMask[7] = 1.0f / 7.0f;
cbufid = 22;
break;
case 6: // RGBA8 case 6: // RGBA8
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 10; cbufid = 23;
break; break;
default: default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", copyfmt); ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", copyfmt);
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 11; cbufid = 23;
break; break;
} }
} }

View File

@ -86,12 +86,13 @@ const char color_copy_program_code_msaa[] = {
const char color_matrix_program_code[] = { const char color_matrix_program_code[] = {
"sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n" "Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n" "uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : SV_Target,\n" "out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n" "in float4 pos : SV_Position,\n"
" in float2 uv0 : TEXCOORD0){\n" " in float2 uv0 : TEXCOORD0){\n"
"float4 texcol = Tex0.Sample(samp0,uv0);\n" "float4 texcol = Tex0.Sample(samp0,uv0);\n"
"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n" "}\n"
}; };
@ -99,7 +100,7 @@ const char color_matrix_program_code[] = {
const char color_matrix_program_code_msaa[] = { const char color_matrix_program_code_msaa[] = {
"sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2DMS<float4, %d> Tex0 : register(t0);\n" "Texture2DMS<float4, %d> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n" "uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : SV_Target,\n" "out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n" "in float4 pos : SV_Position,\n"
@ -110,6 +111,7 @@ const char color_matrix_program_code_msaa[] = {
"for(int i = 0; i < samples; ++i)\n" "for(int i = 0; i < samples; ++i)\n"
" texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n" " texcol += Tex0.Load(int2(uv0.x*(width), uv0.y*(height)), i);\n"
"texcol /= samples;\n" "texcol /= samples;\n"
"texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n" "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n" "}\n"
}; };
@ -117,7 +119,7 @@ const char color_matrix_program_code_msaa[] = {
const char depth_matrix_program[] = { const char depth_matrix_program[] = {
"sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2D Tex0 : register(t0);\n" "Texture2D Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n" "uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : SV_Target,\n" "out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n" " in float4 pos : SV_Position,\n"
@ -132,7 +134,7 @@ const char depth_matrix_program[] = {
const char depth_matrix_program_msaa[] = { const char depth_matrix_program_msaa[] = {
"sampler samp0 : register(s0);\n" "sampler samp0 : register(s0);\n"
"Texture2DMS<float4, %d> Tex0 : register(t0);\n" "Texture2DMS<float4, %d> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[5] : register(c0);\n" "uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n" "void main(\n"
"out float4 ocol0 : SV_Target,\n" "out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n" " in float4 pos : SV_Position,\n"
@ -214,16 +216,15 @@ unsigned int ps_constant_offset_table[] = {
76, 80, // C_INDTEXSCALE, 8 76, 80, // C_INDTEXSCALE, 8
84, 88, 92, 96, 100, 104, // C_INDTEXMTX, 24 84, 88, 92, 96, 100, 104, // C_INDTEXMTX, 24
108, 112, // C_FOG, 8 108, 112, // C_FOG, 8
116, 120, 124, 128, 132, // C_COLORMATRIX, 20 116, 120, 124, 128, 132, // C_PLIGHTS0, 20
136, 140, 144, 148, 152, // C_PLIGHTS0, 20 136, 140, 144, 148, 152, // C_PLIGHTS1, 20
156, 160, 164, 168, 172, // C_PLIGHTS1, 20 156, 160, 164, 168, 172, // C_PLIGHTS2, 20
176, 180, 184, 188, 192, // C_PLIGHTS2, 20 176, 180, 184, 188, 192, // C_PLIGHTS3, 20
196, 200, 204, 208, 212, // C_PLIGHTS3, 20 196, 200, 204, 208, 212, // C_PLIGHTS4, 20
216, 220, 224, 228, 232, // C_PLIGHTS4, 20 216, 220, 224, 228, 232, // C_PLIGHTS5, 20
236, 240, 244, 248, 252, // C_PLIGHTS5, 20 236, 240, 244, 248, 252, // C_PLIGHTS6, 20
256, 260, 264, 268, 272, // C_PLIGHTS6, 20 256, 260, 264, 268, 272, // C_PLIGHTS7, 20
276, 280, 284, 288, 292, // C_PLIGHTS7, 20 276, 280, 284, 288, // C_PMATERIALS, 16
296, 300, 304, 308, // C_PMATERIALS, 16
}; };
void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{ {

View File

@ -43,7 +43,7 @@
namespace DX11 namespace DX11
{ {
#define MAX_COPY_BUFFERS 21 #define MAX_COPY_BUFFERS 24
ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {}; ID3D11Buffer* efbcopycbuf[MAX_COPY_BUFFERS] = {};
TextureCache::TCacheEntry::~TCacheEntry() TextureCache::TCacheEntry::~TCacheEntry()
@ -120,7 +120,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
// set transformation // set transformation
if (NULL == efbcopycbuf[cbufid]) if (NULL == efbcopycbuf[cbufid])
{ {
const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(20 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); const D3D11_BUFFER_DESC cbdesc = CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
D3D11_SUBRESOURCE_DATA data; D3D11_SUBRESOURCE_DATA data;
data.pSysMem = colmat; data.pSysMem = colmat;
HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]); HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &efbcopycbuf[cbufid]);

View File

@ -170,7 +170,7 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
WRITE(p, "uniform sampler samp0 : register(s0);\n"); WRITE(p, "uniform sampler samp0 : register(s0);\n");
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
WRITE(p, "uniform float4 cColMatrix[5] : register(c%d);\n", C_COLORMATRIX); WRITE(p, "uniform float4 cColMatrix[7] : register(c%d);\n", C_COLORMATRIX);
WRITE(p, "void main(\n" WRITE(p, "void main(\n"
"out float4 ocol0 : COLOR0,\n"); "out float4 ocol0 : COLOR0,\n");
@ -208,6 +208,9 @@ static LPDIRECT3DPIXELSHADER9 CreateCopyShader(int copyMatrixType, int depthConv
if(copyMatrixType == COPY_TYPE_MATRIXCOLOR) if(copyMatrixType == COPY_TYPE_MATRIXCOLOR)
{ {
if(depthConversionType == DEPTH_CONVERSION_TYPE_NONE)
WRITE(p, "texcol = round(texcol * cColMatrix[5])*cColMatrix[6];\n");
WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n"); WRITE(p, "ocol0 = float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + cColMatrix[4];\n");
} }
else else
@ -235,8 +238,7 @@ void PixelShaderCache::Init()
} }
int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF); int shaderModel = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536); int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
bool canUseColorMatrix = (C_COLORMATRIX + 5 <= maxConstants);
// other screen copy/convert programs // other screen copy/convert programs
for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++) for(int copyMatrixType = 0; copyMatrixType < NUM_COPY_TYPES; copyMatrixType++)
@ -253,11 +255,6 @@ void PixelShaderCache::Init()
// so skip this attempt to avoid duplicate error messages. // so skip this attempt to avoid duplicate error messages.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL; s_CopyProgram[copyMatrixType][depthType][ssaaMode] = NULL;
} }
else if(copyMatrixType == COPY_TYPE_MATRIXCOLOR && !canUseColorMatrix)
{
// color matrix not supported, so substitute the nearest equivalent program that doesn't use it.
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = s_CopyProgram[COPY_TYPE_DIRECT][depthType][ssaaMode];
}
else else
{ {
s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode); s_CopyProgram[copyMatrixType][depthType][ssaaMode] = CreateCopyShader(copyMatrixType, depthType, ssaaMode);
@ -311,6 +308,7 @@ void PixelShaderCache::Shutdown()
if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release(); if (s_rgba6_to_rgb8) s_rgba6_to_rgb8->Release();
s_rgba6_to_rgb8 = NULL; s_rgba6_to_rgb8 = NULL;
Clear(); Clear();
g_ps_disk_cache.Sync(); g_ps_disk_cache.Sync();
g_ps_disk_cache.Close(); g_ps_disk_cache.Close();

View File

@ -285,7 +285,7 @@ Renderer::Renderer()
CalculateXYScale(dst_rect); CalculateXYScale(dst_rect);
s_LastAA = g_ActiveConfig.iMultisampleMode; s_LastAA = g_ActiveConfig.iMultisampleMode;
int SupersampleCoeficient = s_LastAA + 1; int SupersampleCoeficient = (s_LastAA % 3) + 1;
s_LastEFBScale = g_ActiveConfig.iEFBScale; s_LastEFBScale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(SupersampleCoeficient); CalculateTargetSize(SupersampleCoeficient);
@ -604,10 +604,9 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
vp.MaxZ = 1.0f; vp.MaxZ = 1.0f;
D3D::dev->SetViewport(&vp); D3D::dev->SetViewport(&vp);
float colmat[16] = {0.0f}; float colmat[28] = {0.0f};
float fConstAdd[4] = {0.0f};
colmat[0] = colmat[5] = colmat[10] = 1.0f; colmat[0] = colmat[5] = colmat[10] = 1.0f;
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation PixelShaderManager::SetColorMatrix(colmat); // set transformation
LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBDepthTexture(); LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBDepthTexture();
D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT); D3D::ChangeSamplerState(0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
@ -1080,7 +1079,8 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
{ {
TargetRectangle targetRc = ConvertEFBRectangle(rc); TargetRectangle targetRc = ConvertEFBRectangle(rc);
LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBColorTexture(); LPDIRECT3DTEXTURE9 read_texture = FramebufferManager::GetEFBColorTexture();
D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_Config.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_Config.iMultisampleMode),Gamma); D3D::drawShadedTexQuad(read_texture,targetRc.AsRECT(),Renderer::GetFullTargetWidth(),Renderer::GetFullTargetHeight(),Width,Height,PixelShaderCache::GetColorCopyProgram(g_ActiveConfig.iMultisampleMode),VertexShaderCache::GetSimpleVertexShader(g_ActiveConfig.iMultisampleMode),Gamma);
} }
D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MINFILTER);
D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER); D3D::RefreshSamplerState(0, D3DSAMP_MAGFILTER);
@ -1216,7 +1216,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
CalculateXYScale(dst_rect); CalculateXYScale(dst_rect);
int SupersampleCoeficient = s_LastAA + 1; int SupersampleCoeficient = (s_LastAA % 3) + 1;
s_LastEFBScale = g_ActiveConfig.iEFBScale; s_LastEFBScale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(SupersampleCoeficient); CalculateTargetSize(SupersampleCoeficient);

View File

@ -100,8 +100,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
destrect.right = virtualW; destrect.right = virtualW;
destrect.top = 0; destrect.top = 0;
const float* const fConstAdd = colmat + 16; // fConstAdd is the last 4 floats of colmat PixelShaderManager::SetColorMatrix(colmat); // set transformation
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);
RECT sourcerect; RECT sourcerect;
sourcerect.bottom = targetSource.bottom; sourcerect.bottom = targetSource.bottom;

View File

@ -362,7 +362,7 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
(float)expandedWidth, (float)expandedWidth,
(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this? (float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
(float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()), (float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()),
(float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()), (float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()),
Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledXf(sampleStride),
Renderer::EFBToScaledYf(sampleStride), Renderer::EFBToScaledYf(sampleStride),
(float)Renderer::GetFullTargetWidth(), (float)Renderer::GetFullTargetWidth(),
@ -426,7 +426,7 @@ u64 EncodeToRamFromTexture(u32 address,LPDIRECT3DTEXTURE9 source_texture, u32 So
(float)expandedWidth, (float)expandedWidth,
(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this? (float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
(float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()), (float)(Renderer::EFBToScaledX(source.left) + Renderer::TargetStrideX()),
(float)(Renderer::EFBToScaledY(source.top) + Renderer::TargetStrideY()), (float)(Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight) + Renderer::TargetStrideY()),
Renderer::EFBToScaledXf(sampleStride), Renderer::EFBToScaledXf(sampleStride),
Renderer::EFBToScaledYf(sampleStride), Renderer::EFBToScaledYf(sampleStride),
(float)SourceW, (float)SourceW,

View File

@ -96,17 +96,22 @@ void PixelShaderCache::Init()
glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs); glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB, (GLint *)&maxattribs);
INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs); INFO_LOG(VIDEO, "pixel max_alu=%d, max_inst=%d, max_attrib=%d", s_nMaxPixelInstructions, maxinst, maxattribs);
char pmatrixprog[1024]; char pmatrixprog[2048];
sprintf(pmatrixprog, "!!ARBfp1.0" sprintf(pmatrixprog, "!!ARBfp1.0"
"TEMP R0;\n" "TEMP R0;\n"
"TEMP R1;\n" "TEMP R1;\n"
"PARAM K0 = { 0.5, 0.5, 0.5, 0.5};\n"
"TEX R0, fragment.texcoord[0], texture[0], RECT;\n" "TEX R0, fragment.texcoord[0], texture[0], RECT;\n"
"DP4 R1.w, R0, program.env[%d];\n" "MUL R0, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n" "ADD R0, R0, K0;\n"
"FLR R0, R0;\n"
"MUL R0, R0, program.env[%d];\n"
"DP4 R1.x, R0, program.env[%d];\n" "DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n" "DP4 R1.y, R0, program.env[%d];\n"
"DP4 R1.z, R0, program.env[%d];\n"
"DP4 R1.w, R0, program.env[%d];\n"
"ADD result.color, R1, program.env[%d];\n" "ADD result.color, R1, program.env[%d];\n"
"END\n", C_COLORMATRIX+3, C_COLORMATRIX+2, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+4); "END\n",C_COLORMATRIX+5,C_COLORMATRIX+6, C_COLORMATRIX, C_COLORMATRIX+1, C_COLORMATRIX+2, C_COLORMATRIX+3, C_COLORMATRIX+4);
glGenProgramsARB(1, &s_ColorMatrixProgram); glGenProgramsARB(1, &s_ColorMatrixProgram);
SetCurrentShader(s_ColorMatrixProgram); SetCurrentShader(s_ColorMatrixProgram);
glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog); glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei)strlen(pmatrixprog), pmatrixprog);
@ -118,12 +123,13 @@ void PixelShaderCache::Init()
s_ColorMatrixProgram = 0; s_ColorMatrixProgram = 0;
} }
sprintf(pmatrixprog, "!!ARBfp1.0\n" sprintf(pmatrixprog, "!!ARBfp1.0\n"
"TEMP R0;\n" "TEMP R0;\n"
"TEMP R1;\n" "TEMP R1;\n"
"TEMP R2;\n" "TEMP R2;\n"
//16777215/16777216*256, 1/255, 256, 0 //16777215/16777216*256, 1/255, 256, 0
"PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n" "PARAM K0 = { 255.99998474121, 0.003921568627451, 256.0, 0.0};\n"
"PARAM K1 = { 15.0, 0.066666666666, 0.0, 0.0};\n"
//sample the depth value //sample the depth value
"TEX R2, fragment.texcoord[0], texture[0], RECT;\n" "TEX R2, fragment.texcoord[0], texture[0], RECT;\n"
@ -138,21 +144,23 @@ void PixelShaderCache::Init()
//gives {?, 128/255, 254/255, ?} for depth value 254/255 //gives {?, 128/255, 254/255, ?} for depth value 254/255
//on some gpus //on some gpus
"FLR R0.z,R0;\n" //bits 31..24 "FLR R0.x,R0;\n" //bits 31..24
"SUB R0.xyw,R0,R0.z;\n" //subtract bits 31..24 from rest "SUB R0.yzw,R0,R0.x;\n" //subtract bits 31..24 from rest
"MUL R0.xyw,R0,K0.z;\n" // *256 "MUL R0.yzw,R0,K0.z;\n" // *256
"FLR R0.y,R0;\n" //bits 23..16 "FLR R0.y,R0;\n" //bits 23..16
"SUB R0.xw,R0,R0.y;\n" //subtract bits 23..16 from rest "SUB R0.zw,R0,R0.y;\n" //subtract bits 23..16 from rest
"MUL R0.xw,R0,K0.z;\n" // *256 "MUL R0.zw,R0,K0.z;\n" // *256
"FLR R0.x,R0;\n" //bits 15..8 "FLR R0.z,R0;\n" //bits 15..8
"SUB R0.w,R0,R0.x;\n" //subtract bits 15..8 from rest "MOV R0.w,R0.x;\n" //duplicate bit 31..24
"MUL R0.w,R0,K0.z;\n" // *256
"FLR R0.w,R0;\n" //bits 7..0 "MUL R0,R0,K0.y;\n" // /255
"MUL R0,R0,K0.y;\n" // /255 "MUL R0.w,R0,K1.x;\n" // *15
"FLR R0.w,R0;\n" //bits 31..28
"MUL R0.w,R0,K1.y;\n" // /15
"DP4 R1.x, R0, program.env[%d];\n" "DP4 R1.x, R0, program.env[%d];\n"
"DP4 R1.y, R0, program.env[%d];\n" "DP4 R1.y, R0, program.env[%d];\n"
@ -168,7 +176,7 @@ void PixelShaderCache::Init()
if (err != GL_NO_ERROR) { if (err != GL_NO_ERROR) {
ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program"); ERROR_LOG(VIDEO, "Failed to create depth matrix fragment program");
glDeleteProgramsARB(1, &s_DepthMatrixProgram); glDeleteProgramsARB(1, &s_DepthMatrixProgram);
s_DepthMatrixProgram = 0; s_DepthMatrixProgram = 0;
} }
} }

View File

@ -297,8 +297,7 @@ void TextureCache::TCacheEntry::FromRenderTarget(bool bFromZBuffer, bool bScaleB
glViewport(0, 0, virtualW, virtualH); glViewport(0, 0, virtualW, virtualH);
PixelShaderCache::SetCurrentShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram()); PixelShaderCache::SetCurrentShader(bFromZBuffer ? PixelShaderCache::GetDepthMatrixProgram() : PixelShaderCache::GetColorMatrixProgram());
const float* const fConstAdd = colmat + 16; // fConstAdd is the last 4 floats of colmat PixelShaderManager::SetColorMatrix(colmat); // set transformation
PixelShaderManager::SetColorMatrix(colmat, fConstAdd); // set transformation
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect); TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(source_rect);

View File

@ -285,7 +285,8 @@ void EncodeToRam(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyf
s32 expandedHeight = (height + blkH) & (~blkH); s32 expandedHeight = (height + blkH) & (~blkH);
float sampleStride = bScaleByHalf ? 2.f : 1.f; float sampleStride = bScaleByHalf ? 2.f : 1.f;
TextureConversionShader::SetShaderParameters((float)expandedWidth, TextureConversionShader::SetShaderParameters(
(float)expandedWidth,
(float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this? (float)Renderer::EFBToScaledY(expandedHeight), // TODO: Why do we scale this?
(float)Renderer::EFBToScaledX(source.left), (float)Renderer::EFBToScaledX(source.left),
(float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight), (float)Renderer::EFBToScaledY(EFB_HEIGHT - source.top - expandedHeight),