a little optimization in shaders constants setting , based on the same idea of nodchip changes.
a MAYORRRRRR change in pixel shader generation, please review all games you can and leave comments this must improve accuracy in graphic emulation a lot. for example: mario eyes in super mario galaxy, water pod transparency an water transparency on game intro in super mario sunshine, etc. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5457 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
bdbdef51ee
commit
7947543d98
|
@ -18,7 +18,7 @@
|
|||
#include "LinearDiskCache.h"
|
||||
|
||||
static const char ID[4] = {'D', 'C', 'A', 'C'};
|
||||
const int version = 4888; // TODO: Get from SVN_REV
|
||||
const int version = 5457; // TODO: Get from SVN_REV
|
||||
|
||||
LinearDiskCache::LinearDiskCache()
|
||||
: file_(NULL), num_entries_(0) {
|
||||
|
|
|
@ -157,13 +157,13 @@ const float epsilon8bit = 1.0f / 255.0f;
|
|||
static const char *tevKSelTableC[] = // KCSEL
|
||||
{
|
||||
"1.0f,1.0f,1.0f", // 1 = 0x00
|
||||
"0.875f,0.875f,0.875f", // 7_8 = 0x01
|
||||
"0.75f,0.75f,0.75f", // 3_4 = 0x02
|
||||
"0.625f,0.625f,0.625f", // 5_8 = 0x03
|
||||
"0.5f,0.5f,0.5f", // 1_2 = 0x04
|
||||
"0.375f,0.375f,0.375f", // 3_8 = 0x05
|
||||
"0.25f,0.25f,0.25f", // 1_4 = 0x06
|
||||
"0.125f,0.125f,0.125f", // 1_8 = 0x07
|
||||
"0.8745098f,0.8745098f,0.8745098f", // 7_8 = 0x01
|
||||
"0.7490196f,0.7490196f,0.7490196f", // 3_4 = 0x02
|
||||
"0.6235294f,0.6235294f,0.6235294f", // 5_8 = 0x03
|
||||
"0.4980392f,0.4980392f,0.4980392f", // 1_2 = 0x04
|
||||
"0.372549f,0.372549f,0.372549f", // 3_8 = 0x05
|
||||
"0.2470588f,0.2470588f,0.2470588f", // 1_4 = 0x06
|
||||
"0.1215686f,0.1215686f,0.1215686f", // 1_8 = 0x07
|
||||
"ERROR", // 0x08
|
||||
"ERROR", // 0x09
|
||||
"ERROR", // 0x0a
|
||||
|
@ -193,13 +193,13 @@ static const char *tevKSelTableC[] = // KCSEL
|
|||
static const char *tevKSelTableA[] = // KASEL
|
||||
{
|
||||
"1.0f", // 1 = 0x00
|
||||
"0.875f",// 7_8 = 0x01
|
||||
"0.75f", // 3_4 = 0x02
|
||||
"0.625f",// 5_8 = 0x03
|
||||
"0.5f", // 1_2 = 0x04
|
||||
"0.375f",// 3_8 = 0x05
|
||||
"0.25f", // 1_4 = 0x06
|
||||
"0.125f",// 1_8 = 0x07
|
||||
"0.8745098f",// 7_8 = 0x01
|
||||
"0.7490196f", // 3_4 = 0x02
|
||||
"0.6235294f",// 5_8 = 0x03
|
||||
"0.4980392f", // 1_2 = 0x04
|
||||
"0.372549f",// 3_8 = 0x05
|
||||
"0.2470588f", // 1_4 = 0x06
|
||||
"0.1215686f",// 1_8 = 0x07
|
||||
"ERROR", // 0x08
|
||||
"ERROR", // 0x09
|
||||
"ERROR", // 0x0a
|
||||
|
@ -237,8 +237,8 @@ static const char *tevScaleTable[] = // CS
|
|||
static const char *tevBiasTable[] = // TB
|
||||
{
|
||||
"", // ZERO,
|
||||
"+0.5f", // ADDHALF,
|
||||
"-0.5f", // SUBHALF,
|
||||
"+0.4980392f", // ADDHALF,
|
||||
"-0.4980392f", // SUBHALF,
|
||||
"",
|
||||
};
|
||||
|
||||
|
@ -269,7 +269,7 @@ static const char *tevCInputTable[] = // CC
|
|||
"rastemp.rgb", // RASC,
|
||||
"rastemp.aaa", // RASA,
|
||||
"float3(1.0f,1.0f,1.0f)", // ONE,
|
||||
"float3(0.5f,0.5f,0.5f)", // HALF,
|
||||
"float3(0.4980392f,0.4980392f,0.4980392f)", // HALF,
|
||||
"konsttemp.rgb", // KONST,
|
||||
"float3(0.0f,0.0f,0.0f)", // ZERO
|
||||
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
|
||||
|
@ -294,7 +294,7 @@ static const char *tevCInputTable2[] = // CC
|
|||
"rastemp", // RASC,
|
||||
"(rastemp.aaa)", // RASA,
|
||||
"float3(1.0f,1.0f,1.0f)", // ONE
|
||||
"float3(0.5f,0.5f,0.5f)", // HALF
|
||||
"float3(0.4980392f,0.4980392f,0.4980392f)", // HALF
|
||||
"konsttemp", //"konsttemp.rgb", // KONST
|
||||
"float3(0.0f,0.0f,0.0f)", // ZERO
|
||||
"PADERROR", "PADERROR", "PADERROR", "PADERROR",
|
||||
|
@ -350,8 +350,8 @@ static const char *tevRasTable[] =
|
|||
|
||||
static const char *alphaRef[2] =
|
||||
{
|
||||
I_ALPHA"[0].x",
|
||||
I_ALPHA"[0].y"
|
||||
I_ALPHA"[0].r",
|
||||
I_ALPHA"[0].g"
|
||||
};
|
||||
|
||||
//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
|
||||
|
@ -521,8 +521,15 @@ const char *GeneratePixelShaderCode(u32 texture_mask, bool dstAlphaEnable, u32 H
|
|||
|
||||
for (int i = 0; i < numStages; i++)
|
||||
WriteStage(p, i, texture_mask,HLSL); //build the equation for this stage
|
||||
WRITE(p, "prev = saturate(prev);\n");
|
||||
|
||||
// emulation of unisgned 8 overflow when casting
|
||||
if(HLSL)
|
||||
{
|
||||
WRITE(p, "prev = ((((prev * 255.0f) %% 256.0f) + 256.0f) %% 256.0f) / 255.0f;\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
WRITE(p, "prev = mod(mod(prev * 255.0f,256.0f) + 256.0f,256.0f) / 255.0f;\n");
|
||||
}
|
||||
|
||||
if (!WriteAlphaTest(p, HLSL))
|
||||
{
|
||||
|
@ -586,11 +593,11 @@ static const char *TEVCMPColorOPTable[16] =
|
|||
"float3(0.0f,0.0f,0.0f)",//5
|
||||
"float3(0.0f,0.0f,0.0f)",//6
|
||||
"float3(0.0f,0.0f,0.0f)",//7
|
||||
" %s + ((%s.r > %s.r + (0.25f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
|
||||
" %s + ((%s.r >= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_GT 8
|
||||
" %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_R8_EQ 9
|
||||
" %s + (( dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
|
||||
" %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_GT 10
|
||||
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_GR16_EQ 11
|
||||
" %s + (( dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
|
||||
" %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_GT 12
|
||||
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f,0.0f,0.0f))",//#define TEVCMP_BGR24_EQ 13
|
||||
" %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)),float3(0.0f,0.0f,0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
|
||||
" %s + ((float3(1.0f,1.0f,1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)),float3(0.0f,0.0f,0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
|
||||
|
@ -607,13 +614,13 @@ static const char *TEVCMPAlphaOPTable[16] =
|
|||
"0.0f",//5
|
||||
"0.0f",//6
|
||||
"0.0f",//7
|
||||
" %s + ((%s.r > (%s.r + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
|
||||
" %s + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_R8_GT 8
|
||||
" %s + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_R8_EQ 9
|
||||
" %s + ((dot(%s.rgb, comp16) > (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
|
||||
" %s + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_GR16_GT 10
|
||||
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_GR16_EQ 11
|
||||
" %s + ((dot(%s.rgb, comp24) > (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
|
||||
" %s + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_BGR24_GT 12
|
||||
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : 0.0f)",//#define TEVCMP_BGR24_EQ 13
|
||||
" %s + ((%s.a > (%s.a + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
|
||||
" %s + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s : 0.0f)",//#define TEVCMP_A8_GT 14
|
||||
" %s + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s : 0.0f)"//#define TEVCMP_A8_EQ 15
|
||||
|
||||
};
|
||||
|
@ -777,7 +784,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
|
|||
}
|
||||
if (cc.clamp)
|
||||
WRITE(p,")");
|
||||
WRITE(p,";\n");
|
||||
WRITE(p,";\n");
|
||||
|
||||
// combine the alpha channel
|
||||
if (ac.clamp)
|
||||
|
@ -813,6 +820,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
|
|||
|
||||
if (ac.shift>0)
|
||||
WRITE(p, ")");
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -826,7 +834,7 @@ static void WriteStage(char *&p, int n, u32 texture_mask, u32 HLSL)
|
|||
}
|
||||
if (ac.clamp)
|
||||
WRITE(p, ")");
|
||||
WRITE(p, ";\n\n");
|
||||
WRITE(p, ";\n\n");
|
||||
}
|
||||
|
||||
void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, u32 texture_mask, u32 HLSL)
|
||||
|
|
|
@ -396,10 +396,7 @@ void PixelShaderManager::SetFogParamChanged()
|
|||
|
||||
void PixelShaderManager::SetColorMatrix(const float* pmatrix, const float* pfConstAdd)
|
||||
{
|
||||
SetPSConstant4fv(C_COLORMATRIX, pmatrix);
|
||||
SetPSConstant4fv(C_COLORMATRIX+1, pmatrix+4);
|
||||
SetPSConstant4fv(C_COLORMATRIX+2, pmatrix+8);
|
||||
SetPSConstant4fv(C_COLORMATRIX+3, pmatrix+12);
|
||||
SetMultiPSConstant4fv(C_COLORMATRIX,4,pmatrix);
|
||||
SetPSConstant4fv(C_COLORMATRIX+4, pfConstAdd);
|
||||
}
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ void VideoConfig::Load(const char *ini_file)
|
|||
iniFile.Get("Settings", "wideScreenHack", &bWidescreenHack, false);
|
||||
iniFile.Get("Settings", "AspectRatio", &iAspectRatio, (int)ASPECT_AUTO);
|
||||
iniFile.Get("Settings", "Crop", &bCrop, false);
|
||||
iniFile.Get("Settings", "UseXFB", &bUseXFB, true);
|
||||
iniFile.Get("Settings", "UseXFB", &bUseXFB, 0);
|
||||
iniFile.Get("Settings", "UseRealXFB", &bUseRealXFB, 0);
|
||||
iniFile.Get("Settings", "AutoScale", &bAutoScale, true);
|
||||
iniFile.Get("Settings", "UseNativeMips", &bUseNativeMips, true);
|
||||
|
|
|
@ -72,27 +72,30 @@ LPDIRECT3DPIXELSHADER9 PixelShaderCache::GetClearProgram()
|
|||
void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
|
||||
{
|
||||
if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
|
||||
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4 )
|
||||
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4)
|
||||
{
|
||||
const float f[4] = {f1, f2, f3, f4};
|
||||
D3D::dev->SetPixelShaderConstantF(const_number, f, 1);
|
||||
lastPSconstants[const_number][0] = f1;
|
||||
lastPSconstants[const_number][1] = f2;
|
||||
lastPSconstants[const_number][2] = f3;
|
||||
lastPSconstants[const_number][3] = f4;
|
||||
}
|
||||
D3D::dev->SetPixelShaderConstantF(const_number, lastPSconstants[const_number], 1);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void SetPSConstant4fv(int const_number, const float *f)
|
||||
{
|
||||
if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] ||
|
||||
lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3] )
|
||||
{
|
||||
if (memcmp(&lastPSconstants[const_number], f, sizeof(float) * 4)) {
|
||||
memcpy(&lastPSconstants[const_number], f, sizeof(float) * 4);
|
||||
D3D::dev->SetPixelShaderConstantF(const_number, f, 1);
|
||||
lastPSconstants[const_number][0] = f[0];
|
||||
lastPSconstants[const_number][1] = f[1];
|
||||
lastPSconstants[const_number][2] = f[2];
|
||||
lastPSconstants[const_number][3] = f[3];
|
||||
}
|
||||
}
|
||||
|
||||
void SetMultiPSConstant4fv(int const_number, int count, const float *f)
|
||||
{
|
||||
if (memcmp(&lastPSconstants[const_number], f, count * sizeof(float) * 4)) {
|
||||
memcpy(&lastPSconstants[const_number], f, count * sizeof(float) * 4);
|
||||
D3D::dev->SetPixelShaderConstantF(const_number, f, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,7 +68,6 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4)
|
|||
lastVSconstants[const_number][2] != f3 ||
|
||||
lastVSconstants[const_number][3] != f4)
|
||||
{
|
||||
const float f[4] = {f1, f2, f3, f4};
|
||||
lastVSconstants[const_number][0] = f1;
|
||||
lastVSconstants[const_number][1] = f2;
|
||||
lastVSconstants[const_number][2] = f3;
|
||||
|
@ -79,17 +78,10 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4)
|
|||
|
||||
void SetVSConstant4fv(int const_number, const float *f)
|
||||
{
|
||||
if (lastVSconstants[const_number][0] != f[0] ||
|
||||
lastVSconstants[const_number][1] != f[1] ||
|
||||
lastVSconstants[const_number][2] != f[2] ||
|
||||
lastVSconstants[const_number][3] != f[3])
|
||||
{
|
||||
lastVSconstants[const_number][0] = f[0];
|
||||
lastVSconstants[const_number][1] = f[1];
|
||||
lastVSconstants[const_number][2] = f[2];
|
||||
lastVSconstants[const_number][3] = f[3];
|
||||
if (memcmp(&lastVSconstants[const_number], f, sizeof(float) * 4)) {
|
||||
memcpy(&lastVSconstants[const_number], f, sizeof(float) * 4);
|
||||
D3D::dev->SetVertexShaderConstantF(const_number, lastVSconstants[const_number], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetMultiVSConstant3fv(int const_number, int count, const float *f)
|
||||
|
|
|
@ -53,24 +53,32 @@ void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4)
|
|||
if (lastPSconstants[const_number][0] != f1 || lastPSconstants[const_number][1] != f2 ||
|
||||
lastPSconstants[const_number][2] != f3 || lastPSconstants[const_number][3] != f4)
|
||||
{
|
||||
glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f1, f2, f3, f4);
|
||||
lastPSconstants[const_number][0] = f1;
|
||||
lastPSconstants[const_number][1] = f2;
|
||||
lastPSconstants[const_number][2] = f3;
|
||||
lastPSconstants[const_number][3] = f4;
|
||||
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, lastPSconstants[const_number]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void SetPSConstant4fv(int const_number, const float *f)
|
||||
{
|
||||
if (lastPSconstants[const_number][0] != f[0] || lastPSconstants[const_number][1] != f[1] ||
|
||||
lastPSconstants[const_number][2] != f[2] || lastPSconstants[const_number][3] != f[3])
|
||||
{
|
||||
if (memcmp(&lastPSconstants[const_number], f, sizeof(float) * 4)) {
|
||||
memcpy(&lastPSconstants[const_number], f, sizeof(float) * 4);
|
||||
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f);
|
||||
lastPSconstants[const_number][0] = f[0];
|
||||
lastPSconstants[const_number][1] = f[1];
|
||||
lastPSconstants[const_number][2] = f[2];
|
||||
lastPSconstants[const_number][3] = f[3];
|
||||
}
|
||||
}
|
||||
|
||||
void SetMultiPSConstant4fv(int const_number, int count, const float *f)
|
||||
{
|
||||
const float *f0 = f;
|
||||
for (int i = 0; i < count ;i++,f0+=4)
|
||||
{
|
||||
if (memcmp(&lastPSconstants[const_number + i], f0, sizeof(float) * 4)) {
|
||||
memcpy(&lastPSconstants[const_number + i], f0, sizeof(float) * 4);
|
||||
glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number + i, lastPSconstants[const_number + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -63,32 +63,19 @@ void SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4)
|
|||
|
||||
void SetVSConstant4fv(int const_number, const float *f)
|
||||
{
|
||||
if (lastVSconstants[const_number][0] != f[0] ||
|
||||
lastVSconstants[const_number][1] != f[1] ||
|
||||
lastVSconstants[const_number][2] != f[2] ||
|
||||
lastVSconstants[const_number][3] != f[3])
|
||||
{
|
||||
lastVSconstants[const_number][0] = f[0];
|
||||
lastVSconstants[const_number][1] = f[1];
|
||||
lastVSconstants[const_number][2] = f[2];
|
||||
lastVSconstants[const_number][3] = f[3];
|
||||
if (memcmp(&lastVSconstants[const_number], f, sizeof(float) * 4)) {
|
||||
memcpy(&lastVSconstants[const_number], f, sizeof(float) * 4);
|
||||
glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number, lastVSconstants[const_number]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetMultiVSConstant4fv(int const_number, int count, const float *f)
|
||||
{
|
||||
for (int i = 0; i < count; i++)
|
||||
const float *f0 = f;
|
||||
for (int i = 0; i < count; i++,f0+=4)
|
||||
{
|
||||
if (lastVSconstants[const_number + i][0] != f[0 + i*4] ||
|
||||
lastVSconstants[const_number + i][1] != f[1 + i*4] ||
|
||||
lastVSconstants[const_number + i][2] != f[2 + i*4] ||
|
||||
lastVSconstants[const_number + i][3] != f[3 + i*4])
|
||||
{
|
||||
lastVSconstants[const_number + i][0] = f[0 + i*4];
|
||||
lastVSconstants[const_number + i][1] = f[1 + i*4];
|
||||
lastVSconstants[const_number + i][2] = f[2 + i*4];
|
||||
lastVSconstants[const_number + i][3] = f[3 + i*4];
|
||||
if (memcmp(&lastVSconstants[const_number + i], f0, sizeof(float) * 4)) {
|
||||
memcpy(&lastVSconstants[const_number + i], f0, sizeof(float) * 4);
|
||||
glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number + i, lastVSconstants[const_number + i]);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue