Various fixes and cleanups.

This commit is contained in:
NeoBrainX 2011-09-08 02:09:44 +02:00
parent 98b62d8362
commit 6c7bda6851
5 changed files with 40 additions and 162 deletions

View File

@ -21,12 +21,10 @@
#include "Common.h" #include "Common.h"
#include <fstream> #include <fstream>
// Update this to the current SVN revision every time you change shader generation code. // Increment this every time you change shader generation code.
// We don't automatically get this from SVN_REV because that would mean regenerating the
// shader cache for every revision, graphics-related or not, which is simply annoying.
enum enum
{ {
LINEAR_DISKCACHE_VER = 6964 LINEAR_DISKCACHE_VER = 6965
}; };
// On disk format: // On disk format:

View File

@ -100,14 +100,16 @@ static void StageHash(int stage, u32* out)
} }
} }
// Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes
// are set...
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode) void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
{ {
uid->values[0] |= bpmem.genMode.numtevstages; // 4 uid->values[0] |= bpmem.genMode.numtevstages; // 4
uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4 uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
uid->values[0] |= dstAlphaMode << 8; // 2 uid->values[0] |= dstAlphaMode << 8; // 2
uid->tevstages = bpmem.genMode.numtevstages;
bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth; bool DepthTextureEnable = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth;
uid->values[0] |= DepthTextureEnable << 10; // 1 uid->values[0] |= DepthTextureEnable << 10; // 1
@ -124,7 +126,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
{ {
// Courtesy of PreAlphaTest, we're done already ;) // Courtesy of PreAlphaTest, we're done already ;)
// TODO: There's a comment including bpmem.genmode.numindstages.. shouldnt really bother about that though. // TODO: There's a comment including bpmem.genmode.numindstages.. shouldnt really bother about that though.
uid->tevstages = 1; uid->num_values = 1;
return; return;
} }
@ -141,8 +143,6 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1 uid->values[1] |= xfregs.texMtxInfo[i].projection << (i - 14); // 1
} }
uid->indstages = bpmem.genMode.numindstages;
uid->values[1] = bpmem.genMode.numindstages << 2; // 3 uid->values[1] = bpmem.genMode.numindstages << 2; // 3
u32 indirectStagesUsed = 0; u32 indirectStagesUsed = 0;
for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i) for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
@ -194,129 +194,7 @@ void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1 ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1
} }
} }
uid->tevstages = (ptr+1) - uid->values; uid->num_values = (ptr+1) - uid->values;
}
// Mash together all the inputs that contribute to the code of a generated pixel shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
// It would likely be a lot more efficient to build this incrementally as the attributes
// are set...
void _GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode)
{
u32 numstages = bpmem.genMode.numtevstages + 1;
u32 projtexcoords = 0;
for (u32 i = 0; i < numstages; i++)
{
if (bpmem.tevorders[i/2].getEnable(i & 1))
{
int texcoord = bpmem.tevorders[i / 2].getTexCoord(i & 1);
if (xfregs.texMtxInfo[i].projection)
projtexcoords |= 1 << texcoord;
}
}
uid->values[0] = (u32)bpmem.genMode.numtevstages |
((u32)bpmem.genMode.numindstages << 4) |
((u32)bpmem.genMode.numtexgens << 7) |
((u32)dstAlphaMode << 11) |
((u32)((bpmem.alphaFunc.hex >> 16) & 0xff) << 13) |
(projtexcoords << 21) |
((u32)bpmem.ztex2.op << 29);
// swap table
for (int i = 0; i < 8; i += 2)
((u8*)&uid->values[1])[i / 2] = (bpmem.tevksel[i].hex & 0xf) | ((bpmem.tevksel[i + 1].hex & 0xf) << 4);
u32 enableZTexture = (bpmem.ztex2.op != ZTEXTURE_DISABLE && !bpmem.zcontrol.zcomploc && bpmem.zmode.testenable && bpmem.zmode.updateenable) || g_ActiveConfig.bEnablePerPixelDepth ? 1 : 0;
uid->values[2] = (u32)bpmem.fog.c_proj_fsel.fsel |
((u32)bpmem.fog.c_proj_fsel.proj << 3) |
((u32)enableZTexture << 4) | ((u32)bpmem.fogRange.Base.Enabled << 5);
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 2; ++i) {
uid->values[3 + i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
uid->values[3 + i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
}
uid->values[4] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
int hdr = 5;
u32 *pcurvalue = &uid->values[hdr];
for (u32 i = 0; i < numstages; ++i)
{
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[i].colorC;
TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[i].alphaC;
u32 val0 = cc.hex & 0xffffff;
u32 val1 = ac.hex & 0xffffff;
val0 |= bpmem.tevksel[i / 2].getKC(i & 1) << 24;
val1 |= bpmem.tevksel[i / 2].getKA(i & 1) << 24;
pcurvalue[0] = val0;
pcurvalue[1] = val1;
pcurvalue += 2;
}
for (u32 i = 0; i < numstages / 2; ++i)
{
u32 val0, val1;
if (bpmem.tevorders[i].hex & 0x40)
val0 = bpmem.tevorders[i].hex & 0x3ff;
else
val0 = bpmem.tevorders[i].hex & 0x380;
if (bpmem.tevorders[i].hex & 0x40000)
val1 = (bpmem.tevorders[i].hex & 0x3ff000) >> 12;
else
val1 = (bpmem.tevorders[i].hex & 0x380000) >> 12;
switch (i % 3) {
case 0: pcurvalue[0] = val0|(val1<<10); break;
case 1: pcurvalue[0] |= val0<<20; pcurvalue[1] = val1; pcurvalue++; break;
case 2: pcurvalue[1] |= (val0<<10)|(val1<<20); pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages / 2: %08x", (i % 3));
}
}
if (numstages & 1) { // odd
u32 val0;
if (bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x40)
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x3ff;
else
val0 = bpmem.tevorders[bpmem.genMode.numtevstages/2].hex & 0x380;
switch (bpmem.genMode.numtevstages % 3)
{
case 0: pcurvalue[0] = val0; break;
case 1: pcurvalue[0] |= val0 << 20; break;
case 2: pcurvalue[1] |= val0 << 10; pcurvalue++; break;
default: PanicAlert("Unknown case for Tev Stages: %08x", bpmem.genMode.numtevstages % 3);
}
}
if ((bpmem.genMode.numtevstages % 3) != 2)
++pcurvalue;
uid->tevstages = (u32)(pcurvalue - &uid->values[0] - hdr);
for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
{
u32 val = bpmem.tevind[i].hex & 0x1fffff; // 21 bits
switch (i % 3)
{
case 0: pcurvalue[0] = val; break;
case 1: pcurvalue[0] |= val << 21; pcurvalue[1] = val >> 11; ++pcurvalue; break;
case 2: pcurvalue[0] |= val << 10; ++pcurvalue; break;
default: PanicAlert("Unknown case for Ind Stages: %08x", (i % 3));
}
}
// yeah, well ....
uid->indstages = (u32)(pcurvalue - &uid->values[0] - (hdr - 1) - uid->tevstages);
} }
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode) void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode)
@ -346,19 +224,21 @@ void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode)
for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times for (int i = 0; i < bpmem.genMode.numtevstages+1; ++i) // up to 16 times
{ {
// TODO ... *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
StageHash(i, ptr); *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
ptr += 4; // max: ptr = &uid->values[33+63] *ptr++ = bpmem.tevind[i].hex; // 35+5*i
*ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
*ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
} }
ptr = &uid->values[97]; ptr = &uid->values[113];
*ptr++ = bpmem.alphaFunc.hex; // 97 *ptr++ = bpmem.alphaFunc.hex; // 113
*ptr++ = bpmem.fog.c_proj_fsel.hex; // 98 *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
*ptr++ = bpmem.fogRange.Base.hex; // 99 *ptr++ = bpmem.fogRange.Base.hex; // 115
_assert_((ptr - uid->values) <= uid->GetNumValues()); _assert_((ptr - uid->values) == uid->GetNumValues());
} }

View File

@ -45,7 +45,7 @@
#define C_PMATERIALS (C_PLIGHTS + 40) #define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4) #define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES 67 #define PIXELSHADERUID_MAX_VALUES 67
#define PIXELSHADERUID_MAX_VALUES_SAFE 100 #define PIXELSHADERUID_MAX_VALUES_SAFE 115
// DO NOT make anything in this class virtual. // DO NOT make anything in this class virtual.
template<bool safe> template<bool safe>
@ -53,38 +53,37 @@ class _PIXELSHADERUID
{ {
public: public:
u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES]; u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
u16 tevstages, indstages; u16 num_values;
_PIXELSHADERUID() _PIXELSHADERUID()
{ {
memset(values, 0, sizeof(values)); memset(values, 0, sizeof(values));
tevstages = indstages = 0;
if (safe) num_values = sizeof(values) / sizeof(values[0]);
else num_values = 0;
} }
_PIXELSHADERUID(const _PIXELSHADERUID& r) _PIXELSHADERUID(const _PIXELSHADERUID& r)
{ {
tevstages = r.tevstages; num_values = r.num_values;
indstages = r.indstages; if (safe) memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE);
int N = GetNumValues(); else memcpy(values, r.values, r.GetNumValues() * sizeof(values[0]));
_assert_(N <= GetNumValues());
for (int i = 0; i < N; ++i)
values[i] = r.values[i];
} }
int GetNumValues() const int GetNumValues() const
{ {
if (safe) return (sizeof(values) / sizeof(u32)); if (safe) return (sizeof(values) / sizeof(u32));
else return tevstages; else return num_values;
} }
bool operator <(const _PIXELSHADERUID& _Right) const bool operator <(const _PIXELSHADERUID& _Right) const
{ {
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues(); int N = GetNumValues();
for (int i = 1; i < N; ++i) if (N < _Right.GetNumValues())
return true;
else if (N > _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{ {
if (values[i] < _Right.values[i]) if (values[i] < _Right.values[i])
return true; return true;
@ -96,10 +95,10 @@ public:
bool operator ==(const _PIXELSHADERUID& _Right) const bool operator ==(const _PIXELSHADERUID& _Right) const
{ {
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues(); int N = GetNumValues();
for (int i = 1; i < N; ++i) if (N != _Right.GetNumValues())
return false;
for (int i = 0; i < N; ++i)
{ {
if (values[i] != _Right.values[i]) if (values[i] != _Right.values[i])
return false; return false;

View File

@ -17,11 +17,12 @@
#pragma once #pragma once
#include <map> #include "PixelShaderGen.h"
#include <d3d11.h> #include <d3d11.h>
class PIXELSHADERUID; #include <map>
enum DSTALPHA_MODE; enum DSTALPHA_MODE;
namespace DX11 namespace DX11

View File

@ -18,12 +18,12 @@
#ifndef _VERTEXSHADERCACHE_H #ifndef _VERTEXSHADERCACHE_H
#define _VERTEXSHADERCACHE_H #define _VERTEXSHADERCACHE_H
#include <map> #include "VertexShaderGen.h"
#include "D3DBase.h" #include "D3DBase.h"
#include "D3DBlob.h" #include "D3DBlob.h"
class VERTEXSHADERUID; #include <map>
namespace DX11 { namespace DX11 {