diff --git a/Source/Core/Core/Src/PowerPC/Gekko.h b/Source/Core/Core/Src/PowerPC/Gekko.h index 7a1ce76ce1..b2431c1f0a 100644 --- a/Source/Core/Core/Src/PowerPC/Gekko.h +++ b/Source/Core/Core/Src/PowerPC/Gekko.h @@ -279,13 +279,11 @@ union UGQR { unsigned ST_TYPE : 3; unsigned : 5; -// signed ST_SCALE : 6; - unsigned ST_SCALE : 6; + unsigned ST_SCALE : 6; unsigned : 2; unsigned LD_TYPE : 3; unsigned : 5; -// signed LD_SCALE : 6; - unsigned LD_SCALE : 6; + unsigned LD_SCALE : 6; unsigned : 2; }; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp index 58800b86da..f2c3d263ac 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.cpp @@ -346,6 +346,7 @@ namespace Jit64 js.blockStart = emaddress; js.fifoBytesThisBlock = 0; js.curBlock = &b; + js.blockSetsQuantizers = false; //Analyze the block, collect all instructions it is made of (including inlining, //if that is enabled), reorder instructions for optimal performance, and join joinable instructions. diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 729fb5a033..01cfb6fe2c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -49,6 +49,7 @@ namespace Jit64 int downcountAmount; bool isLastInstruction; + bool blockSetsQuantizers; int fifoBytesThisBlock; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 34d0a4c225..b272d0bd03 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -95,12 +95,14 @@ const double m_dequantizeTableD[] = (1 << 4), (1 << 3), (1 << 2), (1 << 1), }; +// The big problem is likely instructions that set the quantizers in the same block. +// We will have to break block after quantizers are written to. u32 temp; void psq_st(UGeckoInstruction inst) { BIT32OLD; OLD; - if (!Core::GetStartupParameter().bOptimizeQuantizers) + if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) { Default(inst); return; @@ -220,7 +222,7 @@ void psq_l(UGeckoInstruction inst) { BIT32OLD; OLD; - if (!Core::GetStartupParameter().bOptimizeQuantizers) + if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) { Default(inst); return; @@ -296,6 +298,14 @@ void psq_l(UGeckoInstruction inst) ADD(32, gpr.R(inst.RA), Imm32(offset)); } break; + + /* + Dynamic quantizer. Todo when we have a test set. + MOVZX(32, 8, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]) + 3)); // it's in the high byte. + AND(32, R(EAX), Imm8(0x3F)); + MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD)); + MOVDDUP(r, MComplex(RCX, EAX, 8, 0)); + */ #endif default: // 4 0 diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 369fc816f1..357d14d661 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -35,15 +35,32 @@ namespace Jit64 { case SPR_LR: case SPR_CTR: - gpr.Lock(d); - gpr.LoadToX64(d,true); - MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); - gpr.UnlockAll(); + // These are safe to do the easy way, see the bottom of this function. break; + + case SPR_GQR0: + case SPR_GQR0 + 1: + case SPR_GQR0 + 2: + case SPR_GQR0 + 3: + case SPR_GQR0 + 4: + case SPR_GQR0 + 5: + case SPR_GQR0 + 6: + case SPR_GQR0 + 7: + js.blockSetsQuantizers = false; + // Prevent recompiler from compiling in old quantizer values. + // TODO - actually save the set state and use it in following quantizer ops. + break; + // TODO - break block if quantizers are written to. default: Default(inst); return; } + + // OK, this is easy. + gpr.Lock(d); + gpr.LoadToX64(d,true); + MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); + gpr.UnlockAll(); } void mfspr(UGeckoInstruction inst) diff --git a/Source/Core/VideoCommon/Src/LookUpTables.cpp b/Source/Core/VideoCommon/Src/LookUpTables.cpp new file mode 100644 index 0000000000..fd69cee6d9 --- /dev/null +++ b/Source/Core/VideoCommon/Src/LookUpTables.cpp @@ -0,0 +1,27 @@ +#include "LookUpTables.h" + +int lut3to8[8]; +int lut4to8[16]; +int lut5to8[32]; +int lut6to8[64]; +float lutu8tosfloat[256]; +float lutu8toufloat[256]; +float luts8tosfloat[256]; + +void InitLUTs() +{ + for (int i = 0; i < 8; i++) + lut3to8[i] = (i*255) / 7; + for (int i = 0; i < 16; i++) + lut4to8[i] = (i*255) / 15; + for (int i = 0; i < 32; i++) + lut5to8[i] = (i*255) / 31; + for (int i = 0; i < 64; i++) + lut6to8[i] = (i*255) / 63; + for (int i = 0; i < 256; i++) + { + lutu8tosfloat[i] = (float)(i-128) / 127.0f; + lutu8toufloat[i] = (float)(i) / 255.0f; + luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f; + } +} diff --git a/Source/Core/VideoCommon/Src/LookUpTables.h b/Source/Core/VideoCommon/Src/LookUpTables.h new file mode 100644 index 0000000000..43e86e8209 --- /dev/null +++ b/Source/Core/VideoCommon/Src/LookUpTables.h @@ -0,0 +1,16 @@ +#ifndef _LOOKUPTABLES_H +#define _LOOKUPTABLES_H + +#include "Common.h" + +extern int lut3to8[8]; +extern int lut4to8[16]; +extern int lut5to8[32]; +extern int lut6to8[64]; +extern float lutu8tosfloat[256]; +extern float lutu8toufloat[256]; +extern float luts8tosfloat[256]; + +void InitLUTs(); + +#endif diff --git a/Source/Core/VideoCommon/Src/XFMemory.cpp b/Source/Core/VideoCommon/Src/XFMemory.cpp new file mode 100644 index 0000000000..bf8c95dc0f --- /dev/null +++ b/Source/Core/VideoCommon/Src/XFMemory.cpp @@ -0,0 +1,4 @@ +#include "XFMemory.h" + +XFRegisters xfregs; +u32 xfmem[XFMEM_SIZE]; \ No newline at end of file diff --git a/Source/Core/VideoCommon/Src/XFMemory.h b/Source/Core/VideoCommon/Src/XFMemory.h new file mode 100644 index 0000000000..995463c7bf --- /dev/null +++ b/Source/Core/VideoCommon/Src/XFMemory.h @@ -0,0 +1,218 @@ +#ifndef _XFMEMORY_H +#define _XFMEMORY_H + +#include "Common.h" + +///////////// +// Lighting +///////////// + +#define XF_TEXPROJ_ST 0 +#define XF_TEXPROJ_STQ 1 + +#define XF_TEXINPUT_AB11 0 +#define XF_TEXINPUT_ABC1 1 + +#define XF_TEXGEN_REGULAR 0 +#define XF_TEXGEN_EMBOSS_MAP 1 // used when bump mapping +#define XF_TEXGEN_COLOR_STRGBC0 2 +#define XF_TEXGEN_COLOR_STRGBC1 3 + +#define XF_SRCGEOM_INROW 0 // input is abc +#define XF_SRCNORMAL_INROW 1 // input is abc +#define XF_SRCCOLORS_INROW 2 +#define XF_SRCBINORMAL_T_INROW 3 // input is abc +#define XF_SRCBINORMAL_B_INROW 4 // input is abc +#define XF_SRCTEX0_INROW 5 +#define XF_SRCTEX1_INROW 6 +#define XF_SRCTEX2_INROW 7 +#define XF_SRCTEX3_INROW 8 +#define XF_SRCTEX4_INROW 9 +#define XF_SRCTEX5_INROW 10 +#define XF_SRCTEX6_INROW 11 +#define XF_SRCTEX7_INROW 12 + +#define GX_SRC_REG 0 +#define GX_SRC_VTX 1 + +struct Light +{ + u32 useless[3]; + u32 color; //rgba + float a0; //attenuation + float a1; + float a2; + float k0; //k stuff + float k1; + float k2; + union + { + struct { + float dpos[3]; + float ddir[3]; // specular lights only + }; + struct { + float sdir[3]; + float shalfangle[3]; // specular lights only + }; + }; +}; + +#define LIGHTDIF_NONE 0 +#define LIGHTDIF_SIGN 1 +#define LIGHTDIF_CLAMP 2 + +#define LIGHTATTN_SPEC 0 // specular attenuation +#define LIGHTATTN_SPOT 1 // distance/spotlight attenuation +#define LIGHTATTN_NONE 2 +#define LIGHTATTN_DIR 3 + +union LitChannel +{ + struct + { + unsigned matsource : 1; + unsigned enablelighting : 1; + unsigned lightMask0_3 : 4; + unsigned ambsource : 1; + unsigned diffusefunc : 2; // LIGHTDIF_X + unsigned attnfunc : 2; // LIGHTATTN_X + unsigned lightMask4_7 : 4; + }; + struct + { + u32 hex : 15; + u32 unused : 17; + }; + struct + { + u32 dummy0 : 7; + u32 lightparams : 4; + u32 dummy1 : 21; + }; + unsigned int GetFullLightMask() const + { + return enablelighting ? (lightMask0_3 | (lightMask4_7 << 4)) : 0; + } +}; + +struct ColorChannel +{ + u32 ambColor; + u32 matColor; + LitChannel color; + LitChannel alpha; +}; + +union INVTXSPEC +{ + struct + { + unsigned numcolors : 2; + unsigned numnormals : 2; // 0 - nothing, 1 - just normal, 2 - normals and binormals + unsigned numtextures : 4; + unsigned unused : 24; + }; + u32 hex; +}; + +union TexMtxInfo +{ + struct + { + unsigned unknown : 1; + unsigned projection : 1; // XF_TEXPROJ_X + unsigned inputform : 2; // XF_TEXINPUT_X + unsigned texgentype : 3; // XF_TEXGEN_X + unsigned sourcerow : 5; // XF_SRCGEOM_X + unsigned embosssourceshift : 3; // what generated texcoord to use + unsigned embosslightshift : 3; // light index that is used + }; + u32 hex; +}; + +union PostMtxInfo +{ + struct + { + unsigned index : 6; // base row of dual transform matrix + unsigned unused : 2; + unsigned normalize : 1; // normalize before send operation + }; + u32 hex; +}; + +struct TexCoordInfo +{ + TexMtxInfo texmtxinfo; + PostMtxInfo postmtxinfo; +}; + +struct XFRegisters +{ + int numTexGens; + int nNumChans; + INVTXSPEC hostinfo; // number of textures,colors,normals from vertex input + ColorChannel colChans[2]; //C0A0 C1A1 + TexCoordInfo texcoords[8]; + bool bEnableDualTexTransform; +}; + +#define XFMEM_SIZE 0x8000 +#define XFMEM_POSMATRICES 0x000 +#define XFMEM_POSMATRICES_END 0x100 +#define XFMEM_NORMALMATRICES 0x400 +#define XFMEM_NORMALMATRICES_END 0x460 +#define XFMEM_POSTMATRICES 0x500 +#define XFMEM_POSTMATRICES_END 0x600 +#define XFMEM_LIGHTS 0x600 +#define XFMEM_LIGHTS_END 0x680 + +// Matrix indices +union TMatrixIndexA +{ + struct + { + unsigned PosNormalMtxIdx : 6; + unsigned Tex0MtxIdx : 6; + unsigned Tex1MtxIdx : 6; + unsigned Tex2MtxIdx : 6; + unsigned Tex3MtxIdx : 6; + }; + struct + { + u32 Hex : 30; + u32 unused : 2; + }; +}; + +union TMatrixIndexB +{ + struct + { + unsigned Tex4MtxIdx : 6; + unsigned Tex5MtxIdx : 6; + unsigned Tex6MtxIdx : 6; + unsigned Tex7MtxIdx : 6; + }; + struct + { + u32 Hex : 24; + u32 unused : 8; + }; +}; + +struct Viewport +{ + float wd; + float ht; + float nearZ; + float xOrig; + float yOrig; + float farZ; +}; + +extern XFRegisters xfregs; +extern u32 xfmem[XFMEM_SIZE]; + +#endif \ No newline at end of file diff --git a/Source/Core/VideoCommon/VideoCommon.vcproj b/Source/Core/VideoCommon/VideoCommon.vcproj index 3987410b7f..b52ba3cede 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcproj +++ b/Source/Core/VideoCommon/VideoCommon.vcproj @@ -405,6 +405,22 @@ RelativePath=".\Src\CPMemory.h" > + + + + + + + + diff --git a/Source/Plugins/Plugin_VideoDX9/Src/CPStructs.h b/Source/Plugins/Plugin_VideoDX9/Src/CPStructs.h index 5d45fa7284..344a8e8b01 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/CPStructs.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/CPStructs.h @@ -3,39 +3,7 @@ #include "Common.h" #include "CPMemory.h" - -#pragma pack(4) - -////////////////////////////////////////////////////////////////////////// -// Matrix indices -////////////////////////////////////////////////////////////////////////// - -union TMatrixIndexA -{ - u32 Hex; - struct - { - unsigned PosNormalMtxIdx : 6; - unsigned Tex0MtxIdx : 6; - unsigned Tex1MtxIdx : 6; - unsigned Tex2MtxIdx : 6; - unsigned Tex3MtxIdx : 6; - }; -}; - -union TMatrixIndexB -{ - u32 Hex; - struct - { - unsigned Tex4MtxIdx : 6; - unsigned Tex5MtxIdx : 6; - unsigned Tex6MtxIdx : 6; - unsigned Tex7MtxIdx : 6; - }; -}; - -#pragma pack () +#include "XFMemory.h" extern TMatrixIndexA MatrixIndexA; extern TMatrixIndexB MatrixIndexB; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp b/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp index e5394bf46f..f56feb0b2b 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/PixelShader.cpp @@ -248,7 +248,7 @@ tevhash GetCurrentTEV() { hash = _rotl(hash,3) ^ (bpmem.combiners[i].colorC.hex*13); hash = _rotl(hash,7) ^ ((bpmem.combiners[i].alphaC.hex&0xFFFFFFFC)*3); - hash = _rotl(hash,9) ^ texcoords[i].texmtxinfo.projection*451; + hash = _rotl(hash,9) ^ xfregs.texcoords[i].texmtxinfo.projection*451; } for (int i = 0; i < (int)bpmem.genMode.numtevstages/2+1; i++) { @@ -369,7 +369,7 @@ void WriteStage(char *&p, int n) char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap]; char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap]; - int texfun = texcoords[n].texmtxinfo.projection; + int texfun = xfregs.texcoords[n].texmtxinfo.projection; WRITE(p,"rastemp=%s.%s;\n",tevRasTable[bpmem.tevorders[n/2].getColorChan(n&1)],rasswap); if (bpmem.tevorders[n/2].getEnable(n&1)) diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TextureDecoder.h b/Source/Plugins/Plugin_VideoDX9/Src/TextureDecoder.h index a70bdb2faf..5f9d3c838e 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TextureDecoder.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/TextureDecoder.h @@ -27,16 +27,29 @@ enum TextureFormat GX_TF_C14X2 = 0xA, GX_TF_CMPR = 0xE, - _GX_TF_CTF = 0x20, /* copy-texture-format only */ - _GX_TF_ZTF = 0x10, /* Z-texture-format */ + _GX_TF_CTF = 0x20, // copy-texture-format only (simply means linear?) + _GX_TF_ZTF = 0x10, // Z-texture-format - GX_TF_Z8 = 0x1 | _GX_TF_ZTF, + // these formats are also valid when copying targets + GX_CTF_R4 = 0x0 | _GX_TF_CTF, + GX_CTF_RA4 = 0x2 | _GX_TF_CTF, + GX_CTF_RA8 = 0x3 | _GX_TF_CTF, + GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, + GX_CTF_A8 = 0x7 | _GX_TF_CTF, + GX_CTF_R8 = 0x8 | _GX_TF_CTF, + GX_CTF_G8 = 0x9 | _GX_TF_CTF, + GX_CTF_B8 = 0xA | _GX_TF_CTF, + GX_CTF_RG8 = 0xB | _GX_TF_CTF, + GX_CTF_GB8 = 0xC | _GX_TF_CTF, + + GX_TF_Z8 = 0x1 | _GX_TF_ZTF, GX_TF_Z16 = 0x3 | _GX_TF_ZTF, GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF, - //and the strange copy texture formats.. - - + GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF, + GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF, + GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF, + GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF, }; int TexDecoder_GetTexelSizeInNibbles(int format); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/TransformEngine.cpp b/Source/Plugins/Plugin_VideoDX9/Src/TransformEngine.cpp index 28ffeed948..cf7be9422c 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/TransformEngine.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/TransformEngine.cpp @@ -1,5 +1,6 @@ #include "Common.h" #include "Globals.h" +#include "Vec3.h" #include "TransformEngine.h" #include "VertexHandler.h" #include "VertexLoader.h" @@ -34,7 +35,7 @@ float DoLighting(const Light *light, const LitChannel &chan, const Vec3 &pos, co float val; if (chan.attnfunc == 0 || chan.attnfunc == 2) //no attn { - Vec3 ldir = (light->dpos-pos); + Vec3 ldir = (Vec3(light->dpos) - pos); val = ldir.normalized() * normal; } else @@ -44,17 +45,17 @@ float DoLighting(const Light *light, const LitChannel &chan, const Vec3 &pos, co float mul = 1.0f; if (chan.attnfunc == 3) { - Vec3 ldir = (light->dpos - pos); + Vec3 ldir = (Vec3(light->dpos) - pos); d = ldir.length(); Vec3 ldirNorm = ldir / d; //normalize float l = ldirNorm * normal; - aattn = light->ddir * ldirNorm; + aattn = Vec3(light->ddir) * ldirNorm; mul = l; } else if (chan.attnfunc == 1) { - d = aattn = light->shalfangle * normal; - mul = (light->sdir * normal > 0) ? (normal * light->shalfangle) : 0; + d = aattn = Vec3(light->shalfangle) * normal; + mul = (Vec3(light->sdir) * normal > 0) ? (normal * Vec3(light->shalfangle)) : 0; if (mul < 0) mul = 0; } @@ -167,14 +168,14 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * ////////////////////////////////////////////////////////////////////////// //find all used lights u32 lightMask = - colChans[0].color.GetFullLightMask() | colChans[0].alpha.GetFullLightMask() | - colChans[1].color.GetFullLightMask() | colChans[1].alpha.GetFullLightMask(); + xfregs.colChans[0].color.GetFullLightMask() | xfregs.colChans[0].alpha.GetFullLightMask() | + xfregs.colChans[1].color.GetFullLightMask() | xfregs.colChans[1].alpha.GetFullLightMask(); float r0=0,g0=0,b0=0,a0=0; //go through them and compute the lit colors //Sum lighting for both two color channels if they're active - for (int j=0; j<(int)bpmem.genMode.numcolchans; j++) + for (int j = 0; j < (int)bpmem.genMode.numcolchans; j++) { RGBAFloat material; RGBAFloat lightSum(0,0,0,0); @@ -182,8 +183,8 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * bool hasColorJ = (varray->GetComponents() & (VB_HAS_COL0 << j)) != 0; //get basic material color from appropriate sources (this would compile nicely!:) - if (colChans[j].color.matsource==GX_SRC_REG) - material.convertRGB_GC(colChans[j].matColor); + if (xfregs.colChans[j].color.matsource == GX_SRC_REG) + material.convertRGB_GC(xfregs.colChans[j].matColor); else { if (hasColorJ) @@ -192,8 +193,8 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * material.r=material.g=material.b=1.0f; } - if (colChans[j].alpha.matsource==GX_SRC_REG) - material.convertA_GC(colChans[j].matColor); + if (xfregs.colChans[j].alpha.matsource == GX_SRC_REG) + material.convertA_GC(xfregs.colChans[j].matColor); else { if (hasColorJ) @@ -203,11 +204,11 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * } //combine together the light values from the lights that affect the color - if (colChans[j].color.enablelighting) + if (xfregs.colChans[j].color.enablelighting) { //choose ambient source and start our lightsum accumulator with its value.. - if (colChans[j].color.ambsource == GX_SRC_REG) - lightSum.convertRGB_GC(colChans[j].ambColor); //ambient + if (xfregs.colChans[j].color.ambsource == GX_SRC_REG) + lightSum.convertRGB_GC(xfregs.colChans[j].ambColor); //ambient else { if (hasColorJ) @@ -219,12 +220,12 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * } //accumulate light colors - int cmask = colChans[j].color.GetFullLightMask(); + int cmask = xfregs.colChans[j].color.GetFullLightMask(); for (int l=0; l<8; l++) { if (cmask&1) { - float val = DoLighting(GetLight(l), colChans[j].color, TempPos, TempNormal); + float val = DoLighting(GetLight(l), xfregs.colChans[j].color, TempPos, TempNormal); float r = lightColors[l].r * val; float g = lightColors[l].g * val; float b = lightColors[l].b * val; @@ -237,15 +238,15 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * } else { - lightSum.r=lightSum.g=lightSum.b=1.0f; + lightSum.r = lightSum.g = lightSum.b = 1.0f; } //combine together the light values from the lights that affect alpha (should be rare) - if (colChans[j].alpha.enablelighting) + if (xfregs.colChans[j].alpha.enablelighting) { //choose ambient source.. - if (colChans[j].alpha.ambsource==GX_SRC_REG) - lightSum.convertA_GC(colChans[j].ambColor); + if (xfregs.colChans[j].alpha.ambsource==GX_SRC_REG) + lightSum.convertA_GC(xfregs.colChans[j].ambColor); else { if (hasColorJ) @@ -254,12 +255,12 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * lightSum.a=0.0f; } //accumulate light alphas - int amask = colChans[j].alpha.GetFullLightMask(); - for (int l=0; l<8; l++) + int amask = xfregs.colChans[j].alpha.GetFullLightMask(); + for (int l = 0; l < 8; l++) { if (amask&1) { - float val = DoLighting(GetLight(l),colChans[j].alpha,TempPos,TempNormal); + float val = DoLighting(GetLight(l), xfregs.colChans[j].alpha, TempPos, TempNormal); float a = lightColors[l].a * val; lightSum.a += a; } @@ -279,21 +280,21 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * //Step 3: Generate texture coordinates! ////////////////////////////////////////////////////////////////////////// Vec3 TempUVs[8]; - for (int j=0; jGetComponents() & (VB_HAS_UV0 << c)) != 0; if (c >= 0 && c <= 7 && hasTCC) { @@ -304,26 +305,26 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * } Vec3 out,out2; - switch (texcoords[n].texmtxinfo.texgentype) + switch (xfregs.texcoords[n].texmtxinfo.texgentype) { case XF_TEXGEN_COLOR_STRGBC0: - out=Vec3(chans[0].r*255, chans[0].g*255, 1)/255.0f; + out = Vec3(chans[0].r*255, chans[0].g*255, 1)/255.0f; break; case XF_TEXGEN_COLOR_STRGBC1: - out=Vec3(chans[1].r*255, chans[1].g*255, 1)/255.0f; //FIX: take color1 instead + out = Vec3(chans[1].r*255, chans[1].g*255, 1)/255.0f; //FIX: take color1 instead break; case XF_TEXGEN_REGULAR: - if (texcoords[n].texmtxinfo.projection) + if (xfregs.texcoords[n].texmtxinfo.projection) VtxMulMtx43(out, t, m_pTexMatrix[n]); else VtxMulMtx42(out, t, m_pTexMatrix[n]); break; } - if (texcoords[n].postmtxinfo.normalize) + if (xfregs.texcoords[n].postmtxinfo.normalize) out.normalize(); - int postMatrix = texcoords[n].postmtxinfo.index; + int postMatrix = xfregs.texcoords[n].postmtxinfo.index; float *pmtx = ((float*)xfmem) + 0x500 + postMatrix * 4; //CHECK //multiply with postmatrix VtxMulMtx43(TempUVs[j], out, pmtx); @@ -332,12 +333,12 @@ void CTransformEngine::TransformVertices(int _numVertices, const DecodedVArray * ////////////////////////////////////////////////////////////////////////// //Step 4: Output the vertex! ////////////////////////////////////////////////////////////////////////// - for (int j=0; j<2; j++) + for (int j = 0; j < 2; j++) chans[j].convertToD3DColor(vbuffer[i].colors[j]); vbuffer[i].pos = TempPos; vbuffer[i].normal = TempNormal; - for (int j=0; j<(int)bpmem.genMode.numtexgens; j++) + for (int j = 0; j < (int)bpmem.genMode.numtexgens; j++) { vbuffer[i].uv[j].u = TempUVs[j].x; vbuffer[i].uv[j].v = TempUVs[j].y; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Utils.h b/Source/Plugins/Plugin_VideoDX9/Src/Utils.h index 87159c3b38..6df98cd670 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Utils.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/Utils.h @@ -3,18 +3,11 @@ #include "Common.h" #include "main.h" +#include "LookUpTables.h" extern int frameCount; -extern int lut3to8[8]; -extern int lut4to8[16]; -extern int lut5to8[32]; -extern int lut6to8[64]; -extern float lutu8tosfloat[256]; -extern float lutu8toufloat[256]; -extern float luts8tosfloat[256]; LRESULT CALLBACK AboutProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam); -void InitLUTs(); //#define RAM_MASK 0x1FFFFFF diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Vec3.h b/Source/Plugins/Plugin_VideoDX9/Src/Vec3.h index 41c6ab362a..f9b00f88f1 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Vec3.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/Vec3.h @@ -10,6 +10,7 @@ public: float x,y,z; Vec3() { } explicit Vec3(float f) {x=y=z=f;} + explicit Vec3(const float *f) {x=f[0]; y=f[1]; z=f[2];} Vec3(const float _x, const float _y, const float _z) { x=_x; y=_y; z=_z; } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.cpp b/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.cpp index 5d7feede35..7a26cf7883 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.cpp @@ -5,23 +5,13 @@ #include "VertexHandler.h" #include "Utils.h" - -//XF state -ColorChannel colChans[2]; //C0A0 C1A1 -TexCoordInfo texcoords[8]; -MiscXF miscxf; -u32 xfmem[XFMEM_SIZE]; - float rawViewPort[6]; float rawProjection[7]; - - #define BEGINSAVELOAD char *optr=ptr; #define SAVELOAD(what,size) memcpy((void*)((save)?(void*)(ptr):(void*)(what)),(void*)((save)?(void*)(what):(void*)(ptr)),(size)); ptr+=(size); #define ENDSAVELOAD return ptr-optr; - // __________________________________________________________________________________________________ // LoadXFReg 0x10 // @@ -52,54 +42,54 @@ void LoadXFReg(u32 transferSize, u32 baseAddress, u32 *pData) break; case 0x1009: //GXSetNumChans (no) break; - case 0x100a: colChans[0].ambColor = data; break; //GXSetChanAmbientcolor - case 0x100b: colChans[1].ambColor = data; break; //GXSetChanAmbientcolor - case 0x100c: colChans[0].matColor = data; break; //GXSetChanMatcolor (rgba) - case 0x100d: colChans[1].matColor = data; break; //GXSetChanMatcolor (rgba) + case 0x100a: xfregs.colChans[0].ambColor = data; break; //GXSetChanAmbientcolor + case 0x100b: xfregs.colChans[1].ambColor = data; break; //GXSetChanAmbientcolor + case 0x100c: xfregs.colChans[0].matColor = data; break; //GXSetChanMatcolor (rgba) + case 0x100d: xfregs.colChans[1].matColor = data; break; //GXSetChanMatcolor (rgba) - case 0x100e: colChans[0].color.hex = data; break; //color0 - case 0x100f: colChans[1].color.hex = data; break; //color1 - case 0x1010: colChans[0].alpha.hex = data; break; //alpha0 - case 0x1011: colChans[1].alpha.hex = data; break; //alpha1 + case 0x100e: xfregs.colChans[0].color.hex = data; break; //color0 + case 0x100f: xfregs.colChans[1].color.hex = data; break; //color1 + case 0x1010: xfregs.colChans[0].alpha.hex = data; break; //alpha0 + case 0x1011: xfregs.colChans[1].alpha.hex = data; break; //alpha1 case 0x1018: break; case 0x101a: CVertexHandler::Flush(); - memcpy(rawViewPort,&pData[i],sizeof(rawViewPort)); + memcpy(rawViewPort, &pData[i], sizeof(rawViewPort)); XFUpdateVP(); i += 6; break; case 0x1020: CVertexHandler::Flush(); - memcpy(rawProjection,&pData[i],sizeof(rawProjection)); + memcpy(rawProjection, &pData[i], sizeof(rawProjection)); XFUpdatePJ(); i += 7; return; case 0x103f: - miscxf.numTexGens = data; + xfregs.numTexGens = data; break; - case 0x1040: texcoords[0].texmtxinfo.hex = data; break; - case 0x1041: texcoords[1].texmtxinfo.hex = data; break; - case 0x1042: texcoords[2].texmtxinfo.hex = data; break; - case 0x1043: texcoords[3].texmtxinfo.hex = data; break; - case 0x1044: texcoords[4].texmtxinfo.hex = data; break; - case 0x1045: texcoords[5].texmtxinfo.hex = data; break; - case 0x1046: texcoords[6].texmtxinfo.hex = data; break; - case 0x1047: texcoords[7].texmtxinfo.hex = data; break; + case 0x1040: xfregs.texcoords[0].texmtxinfo.hex = data; break; + case 0x1041: xfregs.texcoords[1].texmtxinfo.hex = data; break; + case 0x1042: xfregs.texcoords[2].texmtxinfo.hex = data; break; + case 0x1043: xfregs.texcoords[3].texmtxinfo.hex = data; break; + case 0x1044: xfregs.texcoords[4].texmtxinfo.hex = data; break; + case 0x1045: xfregs.texcoords[5].texmtxinfo.hex = data; break; + case 0x1046: xfregs.texcoords[6].texmtxinfo.hex = data; break; + case 0x1047: xfregs.texcoords[7].texmtxinfo.hex = data; break; - case 0x1050: texcoords[0].postmtxinfo.hex = data; break; - case 0x1051: texcoords[1].postmtxinfo.hex = data; break; - case 0x1052: texcoords[2].postmtxinfo.hex = data; break; - case 0x1053: texcoords[3].postmtxinfo.hex = data; break; - case 0x1054: texcoords[4].postmtxinfo.hex = data; break; - case 0x1055: texcoords[5].postmtxinfo.hex = data; break; - case 0x1056: texcoords[6].postmtxinfo.hex = data; break; - case 0x1057: texcoords[7].postmtxinfo.hex = data; break; + case 0x1050: xfregs.texcoords[0].postmtxinfo.hex = data; break; + case 0x1051: xfregs.texcoords[1].postmtxinfo.hex = data; break; + case 0x1052: xfregs.texcoords[2].postmtxinfo.hex = data; break; + case 0x1053: xfregs.texcoords[3].postmtxinfo.hex = data; break; + case 0x1054: xfregs.texcoords[4].postmtxinfo.hex = data; break; + case 0x1055: xfregs.texcoords[5].postmtxinfo.hex = data; break; + case 0x1056: xfregs.texcoords[6].postmtxinfo.hex = data; break; + case 0x1057: xfregs.texcoords[7].postmtxinfo.hex = data; break; default: break; @@ -118,9 +108,9 @@ void LoadIndexedXF(u32 val, int array) { DVSTARTPROFILE(); - int index = val>>16; - int address = val&0xFFF; //check mask - int size = ((val>>12)&0xF)+1; + int index = val >> 16; + int address = val & 0xFFF; //check mask + int size = ((val >> 12) & 0xF)+1; //load stuff from array to address in xf mem for (int i = 0; i < size; i++) xfmem[address + i] = Memory_Read_U32(arraybases[array] + arraystrides[array]*index + i*4); @@ -138,9 +128,8 @@ void XFUpdatePJ() size_t XFSaveLoadState(char *ptr, BOOL save) { BEGINSAVELOAD; - SAVELOAD(colChans,2*sizeof(ColorChannel)); - SAVELOAD(texcoords,16*sizeof(TexCoordInfo)); - SAVELOAD(&miscxf,sizeof(MiscXF)); + SAVELOAD(xfregs.colChans,2*sizeof(ColorChannel)); + SAVELOAD(xfregs.texcoords,16*sizeof(TexCoordInfo)); SAVELOAD(rawViewPort,sizeof(rawViewPort)); SAVELOAD(rawProjection,sizeof(rawProjection)); SAVELOAD(xfmem,XFMEM_SIZE*sizeof(u32)); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.h b/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.h index 22bfb5ca32..c00836b0d2 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/XFStructs.h @@ -3,153 +3,7 @@ #include "Common.h" #include "Vec3.h" - -#pragma pack(4) - -////////////////////////////////////////////////////////////////////////// -// Lighting -////////////////////////////////////////////////////////////////////////// -#define XF_TEX_ST 0x00000000 -#define XF_TEX_STQ 0x00000001 - -#define XF_TEX_AB11 0x00000000 -#define XF_TEX_ABC1 0x00000001 - -#define XF_TEXGEN_REGULAR 0x00000000 -#define XF_TEXGEN_EMBOSS_MAP 0x00000001 -#define XF_TEXGEN_COLOR_STRGBC0 0x00000002 -#define XF_TEXGEN_COLOR_STRGBC1 0x00000003 - -#define XF_GEOM_INROW 0x00000000 -#define XF_NORMAL_INROW 0x00000001 -#define XF_COLORS_INROW 0x00000002 -#define XF_BINORMAL_T_INROW 0x00000003 -#define XF_BINORMAL_B_INROW 0x00000004 -#define XF_TEX0_INROW 0x00000005 -#define XF_TEX1_INROW 0x00000006 -#define XF_TEX2_INROW 0x00000007 -#define XF_TEX3_INROW 0x00000008 -#define XF_TEX4_INROW 0x00000009 -#define XF_TEX5_INROW 0x0000000a -#define XF_TEX6_INROW 0x0000000b -#define XF_TEX7_INROW 0x0000000c - -struct Light -{ - u32 useless[3]; - //Vec3 direction; - u32 color; //rgba - float a0; //attenuation - float a1; - float a2; - float k0; //k stuff - float k1; - float k2; - union - { - struct { - Vec3 dpos; - Vec3 ddir; - }; - struct { - Vec3 sdir; - Vec3 shalfangle; - }; - }; -}; - -#define GX_SRC_REG 0 -#define GX_SRC_VTX 1 - -union LitChannel -{ - struct - { - unsigned matsource : 1; - unsigned enablelighting : 1; - unsigned lightMask0_3 : 4; - unsigned ambsource : 1; - unsigned diffusefunc : 2; //0=none 1=sign 2=clamp - unsigned attnfunc : 2; //1=spec 3=spot 2=none ??? - unsigned lightMask4_7 : 4; - }; - u32 hex; - unsigned int GetFullLightMask() - { - return lightMask0_3 | (lightMask4_7 << 4); - } -}; - -struct ColorChannel -{ - u32 ambColor; - u32 matColor; - LitChannel color; - LitChannel alpha; -}; - -struct MiscXF -{ - int numTexGens; -}; - -union TexMtxInfo -{ - struct - { - unsigned unknown : 1; - unsigned projection : 1; - unsigned inputform : 2; //1 if three-component, 0 if two-component ? - unsigned texgentype : 3; //0-POS 1-NRM 3-BINRM 4-TANGENT 5-TEX0 ...12-TEX7 13-COLOR - unsigned sourcerow : 5; - unsigned embosssourceshift : 3; - unsigned embosslightshift : 3; - }; - u32 hex; -}; - -union PostMtxInfo -{ - struct - { - unsigned index : 8; - unsigned normalize : 1; - }; - u32 hex; -}; - -struct TexCoordInfo -{ - TexMtxInfo texmtxinfo; - PostMtxInfo postmtxinfo; -}; - -struct Viewport -{ - float wd; - float ht; - float nearZ; - float xOrig; - float yOrig; - float farZ; -}; - -#define XFMEM_SIZE 0x8000 -#define XFMEM_POSMATRICES 0x000 -#define XFMEM_POSMATRICES_END 0x100 -#define XFMEM_NORMALMATRICES 0x400 -#define XFMEM_NORMALMATRICES_END 0x460 -#define XFMEM_POSTMATRICES 0x500 -#define XFMEM_POSTMATRICES_END 0x600 -#define XFMEM_LIGHTS 0x600 -#define XFMEM_LIGHTS_END 0x680 - - -extern TexCoordInfo texcoords[8]; -extern ColorChannel colChans[2]; //C0A0 C1A1 -extern MiscXF miscxf; - -extern unsigned __int32 xfmem[XFMEM_SIZE]; +#include "XFMemory.h" extern float rawViewPort[6]; extern float rawProjection[7]; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp index b0a5e08d21..8d51a011ab 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/BPStructs.cpp @@ -20,6 +20,7 @@ #include "VertexLoader.h" #include "BPStructs.h" +#include "Render.h" #include "OpcodeDecoding.h" #include "TextureMngr.h" #include "TextureDecoder.h" diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp index b21bd476c5..f76bc96694 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Globals.cpp @@ -24,13 +24,6 @@ #include int frameCount; -int lut3to8[8]; -int lut4to8[16]; -int lut5to8[32]; -int lut6to8[64]; -float lutu8tosfloat[256]; -float lutu8toufloat[256]; -float luts8tosfloat[256]; int g_Res[NUMWNDRES][2] = { @@ -50,25 +43,6 @@ void Statistics::ResetFrame() memset(&thisFrame,0,sizeof(ThisFrame)); } -void InitLUTs() -{ - int i; - for (i=0; i<8; i++) - lut3to8[i] = (i*255)/7; - for (i=0; i<16; i++) - lut4to8[i] = (i*255)/15; - for (i=0; i<32; i++) - lut5to8[i] = (i*255)/31; - for (i=0; i<64; i++) - lut6to8[i] = (i*255)/63; - for (i=0; i<256; i++) - { - lutu8tosfloat[i] = (float)(i-128)/127.0f; - lutu8toufloat[i] = (float)(i)/255.0f; - luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f; - } -} - Config::Config() { memset(this, 0, sizeof(Config)); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Globals.h b/Source/Plugins/Plugin_VideoOGL/Src/Globals.h index 9576bb778f..9291b3abc6 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Globals.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/Globals.h @@ -128,20 +128,7 @@ struct RECT #define SAVELOAD(what,size) memcpy((void*)((save)?(void*)(ptr):(void*)(what)),(void*)((save)?(void*)(what):(void*)(ptr)),(size)); ptr+=(size); #define ENDSAVELOAD return ptr-optr; -struct TEXTUREFMT -{ - TEXTUREFMT(GLenum format, GLenum type) : type(type), format(format) {} - GLenum type, format; -}; - extern int frameCount; -extern int lut3to8[8]; -extern int lut4to8[16]; -extern int lut5to8[32]; -extern int lut6to8[64]; -extern float lutu8tosfloat[256]; -extern float lutu8toufloat[256]; -extern float luts8tosfloat[256]; #define NUMWNDRES 6 extern int g_Res[NUMWNDRES][2]; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp index 62549a7234..120bf167f6 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShader.cpp @@ -16,12 +16,13 @@ // http://code.google.com/p/dolphin-emu/ #include "Globals.h" + #include #include #include "PixelShader.h" -#include "VertexShader.h" // for texture projection mode -#include "PixelShaderManager.h" +#include "XFMemory.h" // for texture projection mode +#include "BPMemory.h" // old tev->pixelshader notes // @@ -32,8 +33,8 @@ // output is given by .outreg // tevtemp is set according to swapmodetables and -void WriteStage(char *&p, int n); -void WrapNonPow2Tex(char* &p, const char* var, int texmap); +void WriteStage(char *&p, int n, u32 texture_mask); +void WrapNonPow2Tex(char* &p, const char* var, int texmap, u32 texture_mask); void WriteAlphaCompare(char *&p, int num, int comp); bool WriteAlphaTest(char *&p); @@ -234,14 +235,14 @@ const char *tevRasTable[] = const char *tevTexFunc[] = { "tex2D", "texRECT" }; -const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; -const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; -const char* tevIndAlphaSel[] = {"", "x", "y", "z"}; +const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" }; +const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; +const char* tevIndAlphaSel[] = {"", "x", "y", "z"}; const char* tevIndAlphaScale[] = {"", "*32","*16","*8"}; -const char* tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias -const char* tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt -const char* tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" }; -const char* tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" }; +const char* tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias +const char* tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt +const char* tevIndWrapStart[] = {"0", "256", "128", "64", "32", "16", "0.001" }; +const char* tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "8.0f" }; #define WRITE p+=sprintf @@ -251,7 +252,7 @@ char swapModeTable[4][5]; void BuildSwapModeTable() { //bpmem.tevregs[0]. - for (int i=0; i<4; i++) + for (int i = 0; i < 4; i++) { swapModeTable[i][0]=swapColors[bpmem.tevksel[i*2].swap1]; swapModeTable[i][1]=swapColors[bpmem.tevksel[i*2].swap2]; @@ -262,7 +263,7 @@ void BuildSwapModeTable() } static char text[16384]; -bool GeneratePixelShader(FRAGMENTSHADER& ps) +char *GeneratePixelShader(u32 texture_mask, bool has_zbuffer_target, bool bRenderZToCol0) { DVSTARTPROFILE(); @@ -275,11 +276,11 @@ bool GeneratePixelShader(FRAGMENTSHADER& ps) WRITE(p,"//%i TEV stages, %i texgens, %i IND stages\n", numStages,numTexgen,bpmem.genMode.numindstages); - bool bRenderZ = Renderer::GetZBufferTarget() != 0 && bpmem.zmode.updateenable; + bool bRenderZ = has_zbuffer_target && bpmem.zmode.updateenable; bool bOutputZ = bpmem.ztex2.op != ZTEXTURE_DISABLE; bool bInputZ = bpmem.ztex2.op==ZTEXTURE_ADD || bRenderZ; - bool bRenderZToCol0 = Renderer::GetRenderMode()!=Renderer::RM_Normal; // output z and alpha to color0 + // bool bRenderZToCol0 = ; // output z and alpha to color0 assert( !bRenderZToCol0 || bRenderZ ); int ztexcoord = -1; @@ -296,11 +297,11 @@ bool GeneratePixelShader(FRAGMENTSHADER& ps) } // samplers - if( s_texturemask ) { + if( texture_mask ) { WRITE(p,"uniform samplerRECT "); bool bfirst = true; for(int i = 0; i < 8; ++i) { - if( s_texturemask & (1< #include "Common.h" +#include "Render.h" #include "VertexShader.h" #include "PixelShaderManager.h" #include "PixelShader.h" @@ -47,6 +48,14 @@ static u32 maptocoord_mask=0; static GLuint s_ColorMatrixProgram=0; +void PixelShaderMngr::SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4) { + glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f1, f2, f3, f4); +} + +void PixelShaderMngr::SetPSConstant4fv(int const_number, const float *f) { + glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, const_number, f); +} + void PixelShaderMngr::Init() { s_nColorsChanged[0] = s_nColorsChanged[1] = 0; @@ -119,7 +128,10 @@ FRAGMENTSHADER* PixelShaderMngr::GetShader() PSCacheEntry& newentry = pshaders[uid]; - if (!GeneratePixelShader(newentry.shader)) { + char *code = GeneratePixelShader(s_texturemask, + Renderer::GetZBufferTarget() != 0, + Renderer::GetRenderMode() != Renderer::RM_Normal); + if (!code || !CompilePixelShader(newentry.shader, code)) { ERROR_LOG("failed to create pixel shader\n"); return NULL; } @@ -218,7 +230,7 @@ void PixelShaderMngr::SetConstants(FRAGMENTSHADER& ps) int baseind = i?C_KCOLORS:C_COLORS; for(int j = 0; j < 4; ++j) { if( s_nColorsChanged[i] & (1<>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); + SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f); } if( s_bZBiasChanged ) { @@ -311,8 +323,8 @@ void PixelShaderMngr::SetConstants(FRAGMENTSHADER& ps) break; } //ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_ZBIAS, ftemp); - glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_ZBIAS+1, 0, 0, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); + SetPSConstant4fv(C_ZBIAS, ftemp); + SetPSConstant4f(C_ZBIAS+1, 0, 0, 0, (float)( (((int)lastZBias<<8)>>8))/16777216.0f); } // indirect incoming texture scales, update all! @@ -330,10 +342,10 @@ void PixelShaderMngr::SetConstants(FRAGMENTSHADER& ps) PRIM_LOG("tex indscale%d: %f %f\n", i, f[2*i], f[2*i+1]); } - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXSCALE, f); + SetPSConstant4fv(C_INDTEXSCALE, f); if( bpmem.genMode.numindstages > 2 ) - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXSCALE+1, &f[4]); + SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]); s_bIndTexScaleChanged = false; } @@ -346,9 +358,9 @@ void PixelShaderMngr::SetConstants(FRAGMENTSHADER& ps) // xyz - static matrix //TODO w - dynamic matrix scale / 256...... somehow / 4 works better - glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXMTX+2*i, + SetPSConstant4f(C_INDTEXMTX+2*i, bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale, fscale * 256.0f); - glProgramEnvParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, C_INDTEXMTX+2*i+1, + SetPSConstant4f(C_INDTEXMTX+2*i+1, bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale, fscale * 256.0f); PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n", i, @@ -470,11 +482,11 @@ void PixelShaderMngr::SetTexDimsChanged(int texmapid) void PixelShaderMngr::SetColorMatrix(const float* pmatrix, const float* pfConstAdd) { - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX, pmatrix); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+1, pmatrix+4); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+2, pmatrix+8); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+3, pmatrix+12); - glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, C_COLORMATRIX+4, pfConstAdd); + SetPSConstant4fv(C_COLORMATRIX, pmatrix); + SetPSConstant4fv(C_COLORMATRIX+1, pmatrix+4); + SetPSConstant4fv(C_COLORMATRIX+2, pmatrix+8); + SetPSConstant4fv(C_COLORMATRIX+3, pmatrix+12); + SetPSConstant4fv(C_COLORMATRIX+4, pfConstAdd); } GLuint PixelShaderMngr::GetColorMatrixProgram() diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h index 174996365b..3630abc0e4 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PixelShaderManager.h @@ -20,8 +20,19 @@ #include "PixelShader.h" -extern u32 s_texturemask; +#include "BPMemory.h" +#include + +struct FRAGMENTSHADER +{ + FRAGMENTSHADER() : glprogid(0) { } + GLuint glprogid; // opengl program id + +#ifdef _DEBUG + std::string strprog; +#endif +}; class PixelShaderMngr { class PIXELSHADERUID @@ -110,6 +121,8 @@ class PixelShaderMngr static void GetPixelShaderId(PIXELSHADERUID&); static PIXELSHADERUID s_curuid; // the current pixel shader uid (progressively changed as memory is written) + static void SetPSConstant4f(int const_number, float f1, float f2, float f3, float f4); + static void SetPSConstant4fv(int const_number, const float *f); public: static void Init(); static void Cleanup(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.h b/Source/Plugins/Plugin_VideoOGL/Src/Render.h index 6fd6516579..b5733de08e 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.h @@ -15,66 +15,66 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#ifndef GCOGL_RENDER -#define GCOGL_RENDER - -#include "TextureMngr.h" - -extern CGcontext g_cgcontext; -extern CGprofile g_cgvProf, g_cgfProf; -extern u32 g_AAx, g_AAy; // anti-aliasing - -class Renderer -{ - static void FlushZBufferAlphaToTarget(); - -public: - enum RenderMode - { - RM_Normal=0, // normal target as color0, ztarget as color1 - RM_ZBufferOnly, // zbuffer as color 0 - RM_ZBufferAlpha // zbuffer as color0, also will dump alpha info to regular target once mode is switched - // use stencil buffer to indicate what pixels were written - }; - - static bool Create2(); - static void Shutdown(); - - // initialize opengl standard values (like viewport) - static bool Initialize(); - - static void AddMessage(const char* str, u32 ms); - static void ProcessMessages(); // draw the current messages on the screen - static void DrawText(const char* pstr, int left, int top, u32 color); - static void SetAA(int aa); // sets the anti-aliasing level - - static void ReinitView(int nNewWidth, int nNewHeight); - - static int GetTargetWidth(); - static int GetTargetHeight(); - static bool CanBlendLogicOp(); - static void SetCgErrorOutput(bool bOutput); - - static void ResetGLState(); - static void RestoreGLState(); - static bool IsUsingATIDrawBuffers(); - static bool HaveStencilBuffer(); - - static void SetZBufferRender(); // sets rendering of the zbuffer using MRTs - static u32 GetZBufferTarget(); - - static void SetRenderMode(RenderMode mode); - static RenderMode GetRenderMode(); - - static void SetRenderTarget(u32 targ); // if targ is 0, sets to original render target - static void SetDepthTarget(u32 targ); - static void SetFramebuffer(u32 fb); - static u32 GetRenderTarget(); - - // Finish up the current frame, print some stats - static void Swap(const TRectangle& rc); - - static bool SaveRenderTarget(const char* filename, int jpeg); -}; - -#endif +#ifndef GCOGL_RENDER +#define GCOGL_RENDER + +#include "TextureMngr.h" + +extern CGcontext g_cgcontext; +extern CGprofile g_cgvProf, g_cgfProf; +extern u32 g_AAx, g_AAy; // anti-aliasing + +class Renderer +{ + static void FlushZBufferAlphaToTarget(); + +public: + enum RenderMode + { + RM_Normal=0, // normal target as color0, ztarget as color1 + RM_ZBufferOnly, // zbuffer as color 0 + RM_ZBufferAlpha // zbuffer as color0, also will dump alpha info to regular target once mode is switched + // use stencil buffer to indicate what pixels were written + }; + + static bool Create2(); + static void Shutdown(); + + // initialize opengl standard values (like viewport) + static bool Initialize(); + + static void AddMessage(const char* str, u32 ms); + static void ProcessMessages(); // draw the current messages on the screen + static void DrawText(const char* pstr, int left, int top, u32 color); + static void SetAA(int aa); // sets the anti-aliasing level + + static void ReinitView(int nNewWidth, int nNewHeight); + + static int GetTargetWidth(); + static int GetTargetHeight(); + static bool CanBlendLogicOp(); + static void SetCgErrorOutput(bool bOutput); + + static void ResetGLState(); + static void RestoreGLState(); + static bool IsUsingATIDrawBuffers(); + static bool HaveStencilBuffer(); + + static void SetZBufferRender(); // sets rendering of the zbuffer using MRTs + static u32 GetZBufferTarget(); + + static void SetRenderMode(RenderMode mode); + static RenderMode GetRenderMode(); + + static void SetRenderTarget(u32 targ); // if targ is 0, sets to original render target + static void SetDepthTarget(u32 targ); + static void SetFramebuffer(u32 fb); + static u32 GetRenderTarget(); + + // Finish up the current frame, print some stats + static void Swap(const TRectangle& rc); + + static bool SaveRenderTarget(const char* filename, int jpeg); +}; + +#endif diff --git a/Source/Plugins/Plugin_VideoOGL/Src/TextureDecoder.cpp b/Source/Plugins/Plugin_VideoOGL/Src/TextureDecoder.cpp index 730cb7ab60..0dcd3ff8c6 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/TextureDecoder.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/TextureDecoder.cpp @@ -19,19 +19,20 @@ #include "Common.h" #include "TextureDecoder.h" +#include "LookUpTables.h" // TRAM u8 texMem[TMEM_SIZE]; ////////////////////////////////////////////////////////////////////////// -// Gamecube texture decoder +// Gamecube/Wii texture decoder ////////////////////////////////////////////////////////////////////////// -// Decodes all known Gamecube texture formats. +// Decodes all known Gamecube/Wii texture formats. // by ector ////////////////////////////////////////////////////////////////////////// int TexDecoder_GetTexelSizeInNibbles(int format) { - switch(format&0x3f) { + switch (format & 0x3f) { case GX_TF_I4: return 1; case GX_TF_I8: return 2; case GX_TF_IA4: return 2; @@ -49,7 +50,7 @@ int TexDecoder_GetTexelSizeInNibbles(int format) int TexDecoder_GetBlockWidthInTexels(int format) { - switch(format) { + switch (format) { case GX_TF_I4: return 8; case GX_TF_I8: return 8; case GX_TF_IA4: return 8; @@ -80,9 +81,9 @@ int TexDecoder_GetPaletteSize(int format) inline u32 decode565(u16 val) { int r,g,b,a; - r=lut5to8[(val>>11)&0x1f]; - g=lut6to8[(val>>5 )&0x3f]; - b=lut5to8[(val )&0x1f]; + r=lut5to8[(val>>11) & 0x1f]; + g=lut6to8[(val>>5 ) & 0x3f]; + b=lut5to8[(val ) & 0x1f]; a=0xFF; return (a<<24) | (r<<16) | (g<<8) | b; } @@ -100,17 +101,17 @@ inline u32 decode5A3(u16 val) int r,g,b,a; if ((val&0x8000)) { - r=lut5to8[(val>>10)&0x1f]; - g=lut5to8[(val>>5 )&0x1f]; - b=lut5to8[(val )&0x1f]; + r=lut5to8[(val>>10) & 0x1f]; + g=lut5to8[(val>>5 ) & 0x1f]; + b=lut5to8[(val ) & 0x1f]; a=0xFF; } else { - a=lut3to8[(val>>12)&0x7]; - r=lut4to8[(val>>8 )&0xf]; - g=lut4to8[(val>>4 )&0xf]; - b=lut4to8[(val )&0xf]; + a=lut3to8[(val>>12) & 0x7]; + r=lut4to8[(val>>8 ) & 0xf]; + g=lut4to8[(val>>4 ) & 0xf]; + b=lut4to8[(val ) & 0xf]; } return (a<<24) | (r<<16) | (g<<8) | b; } @@ -132,7 +133,7 @@ inline int expand8888(const int j) inline void decodebytesI4(u32 *dst, u8 *src, int numbytes) { - for (int x=0; x>4]); @@ -142,28 +143,28 @@ inline void decodebytesI4(u32 *dst, u8 *src, int numbytes) inline void decodebytesI8(u32 *dst, u8 *src, int numbytes) { - for (int x=0; x>4)])); - *dst++ = decodeIA8(Common::swap16(tlut[(val&15)])); + *dst++ = decodeIA8(Common::swap16(tlut[val >> 4])); + *dst++ = decodeIA8(Common::swap16(tlut[val & 15])); break; case 1: - *dst++ = decode565(Common::swap16(tlut[(val>>4)])); - *dst++ = decode565(Common::swap16(tlut[(val&15)])); + *dst++ = decode565(Common::swap16(tlut[val >> 4])); + *dst++ = decode565(Common::swap16(tlut[val & 15])); break; case 2: - *dst++ = decode5A3(Common::swap16(tlut[(val>>4)])); - *dst++ = decode5A3(Common::swap16(tlut[(val&15)])); + *dst++ = decode5A3(Common::swap16(tlut[val >> 4])); + *dst++ = decode5A3(Common::swap16(tlut[val & 15])); break; case 3: //ERROR *dst++ = 0xFFFF00FF; @@ -176,10 +177,10 @@ inline void decodebytesC4(u32 *dst, u8 *src, int numbytes, int tlutaddr, int tlu inline void decodebytesC8(u32 *dst, u8 *src, int numbytes, int tlutaddr, int tlutfmt) { u16 *tlut = (u16*)(texMem+tlutaddr); - for (int x=0; xcolor2); int blue1 = lut5to8[c1&0x1F]; int blue2 = lut5to8[c2&0x1F]; - int green1 = lut6to8[(c1>>5)&0x3F]; - int green2 = lut6to8[(c2>>5)&0x3F]; - int red1 = lut5to8[(c1>>11)&0x1F]; - int red2 = lut5to8[(c2>>11)&0x1F]; + int green1 = lut6to8[(c1>>5) & 0x3F]; + int green2 = lut6to8[(c2>>5) & 0x3F]; + int red1 = lut5to8[(c1>>11) & 0x1F]; + int red2 = lut5to8[(c2>>11) & 0x1F]; int colors[4]; - if (c1>c2) + if (c1 > c2) { - colors[0]=makecol(red1,green1,blue1,255); - colors[1]=makecol(red2,green2,blue2,255); - colors[2]=makecol(red1+(red2-red1)/3,green1+(green2-green1)/3,blue1+(blue2-blue1)/3,255); - colors[3]=makecol(red2+(red1-red2)/3,green2+(green1-green2)/3,blue2+(blue1-blue2)/3,255); + colors[0] = makecol(red1, green1, blue1, 255); + colors[1] = makecol(red2, green2, blue2, 255); + colors[2] = makecol(red1+(red2-red1)/3, green1+(green2-green1)/3, blue1+(blue2-blue1)/3, 255); + colors[3] = makecol(red2+(red1-red2)/3, green2+(green1-green2)/3, blue2+(blue1-blue2)/3, 255); } else { - colors[0]=makecol(red1,green1,blue1,255); - colors[1]=makecol(red2,green2,blue2,255); - colors[2]=makecol((red1+red2)/2,(green1+green2)/2,(blue1+blue2)/2,255); - colors[3]=makecol(0,0,0,0); //transparent + colors[0] = makecol(red1, green1, blue1, 255); + colors[1] = makecol(red2, green2, blue2, 255); + colors[2] = makecol((red1+red2)/2, (green1+green2)/2, (blue1+blue2)/2, 255); + colors[3] = makecol(0,0,0,0); //transparent } - for (int y=0; y<4; y++) + for (int y = 0; y < 4; y++) { int val = src->lines[y]; - for (int x=0; x<4; x++) + for (int x = 0; x < 4; x++) { - dst[x] = colors[(val>>6)&3]; - val<<=2; + dst[x] = colors[(val>>6) & 3]; + val <<= 2; } - dst+=pitch; + dst += pitch; } } @@ -327,7 +327,7 @@ void decodeDXTBlock(u32 *dst, DXTBlock *src, int pitch) //TODO: to save memory, don't blindly convert everything to argb8888 //also ARGB order needs to be swapped later, to accommodate modern hardware better //need to add DXT support too -TEXTUREFMT TexDecoder_Decode(u8 *dst, u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt) +PC_TexFormat TexDecoder_Decode(u8 *dst, u8 *src, int width, int height, int texformat, int tlutaddr, int tlutfmt) { DVSTARTPROFILE(); @@ -335,108 +335,108 @@ TEXTUREFMT TexDecoder_Decode(u8 *dst, u8 *src, int width, int height, int texfor { case GX_TF_C4: { - for (int y=0; y -#include "BPStructs.h" - -struct TRectangle -{ - int left, top, right, bottom; -}; - -class TextureMngr -{ -public: - struct TCacheEntry - { - TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; } - - u32 texture; - u32 addr; - u32 hash; - u32 paletteHash; - u32 hashoffset; - u32 oldpixel; // used for simple cleanup - TexMode0 mode; // current filter and clamp modes that texture is set to - - int frameCount; - int w,h,fmt; - - bool isRenderTarget; // if render texture, then rendertex is filled with the direct copy of the render target - // later conversions would have to convert properly from rendertexfmt to texfmt - bool isUpsideDown; - bool isNonPow2; // if nonpow2, use GL_TEXTURE_2D, else GL_TEXTURE_RECTANGLE_NV - bool bHaveMipMaps; - - void SetTextureParameters(TexMode0& newmode); - void Destroy(); - void ConvertFromRenderTarget(u32 taddr, int twidth, int theight, int tformat, int tlutaddr, int tlutfmt); - }; - - struct DEPTHTARGET - { - DEPTHTARGET() : targ(0), framecount(0) {} - GLuint targ; - int framecount; - }; - -private: - typedef std::map TexCache; - - static u8 *temp; - static TexCache textures; - static std::map mapDepthTargets; - static int nTex2DEnabled, nTexRECTEnabled; - -public: - - static void Init(); - static void Cleanup(); - static void Shutdown(); - static void Invalidate(); - static TCacheEntry* Load(int texstage, u32 address, int width, int height, int format, int tlutaddr, int tlutfmt); - static void CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, TRectangle *source); - - static void EnableTex2D(int stage); - static void EnableTexRECT(int stage); - static void DisableStage(int stage); // sets active texture -}; - -#endif +#ifndef _TextureMngr_H +#define _TextureMngr_H + +#include +#include "BPStructs.h" + +struct TRectangle +{ + int left, top, right, bottom; +}; + +class TextureMngr +{ +public: + struct TCacheEntry + { + TCacheEntry() : texture(0), addr(0), hash(0), w(0), h(0), isRenderTarget(false), isUpsideDown(false), isNonPow2(true), bHaveMipMaps(false) { mode.hex = 0xFCFCFCFC; } + + u32 texture; + u32 addr; + u32 hash; + u32 paletteHash; + u32 hashoffset; + u32 oldpixel; // used for simple cleanup + TexMode0 mode; // current filter and clamp modes that texture is set to + + int frameCount; + int w,h,fmt; + + bool isRenderTarget; // if render texture, then rendertex is filled with the direct copy of the render target + // later conversions would have to convert properly from rendertexfmt to texfmt + bool isUpsideDown; + bool isNonPow2; // if nonpow2, use GL_TEXTURE_2D, else GL_TEXTURE_RECTANGLE_NV + bool bHaveMipMaps; + + void SetTextureParameters(TexMode0& newmode); + void Destroy(); + void ConvertFromRenderTarget(u32 taddr, int twidth, int theight, int tformat, int tlutaddr, int tlutfmt); + }; + + struct DEPTHTARGET + { + DEPTHTARGET() : targ(0), framecount(0) {} + GLuint targ; + int framecount; + }; + +private: + typedef std::map TexCache; + + static u8 *temp; + static TexCache textures; + static std::map mapDepthTargets; + static int nTex2DEnabled, nTexRECTEnabled; + +public: + + static void Init(); + static void Cleanup(); + static void Shutdown(); + static void Invalidate(); + static TCacheEntry* Load(int texstage, u32 address, int width, int height, int format, int tlutaddr, int tlutfmt); + static void CopyRenderTargetToTexture(u32 address, bool bFromZBuffer, bool bIsIntensityFmt, u32 copyfmt, bool bScaleByHalf, TRectangle *source); + + static void EnableTex2D(int stage); + static void EnableTexRECT(int stage); + static void DisableStage(int stage); // sets active texture +}; + +#endif diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp index 5e256c7560..eadb267a86 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader.cpp @@ -21,6 +21,7 @@ #include "x64Emitter.h" +#include "Render.h" #include "VertexLoader.h" #include "BPStructs.h" #include "DataReader.h" diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h index 663afbaffe..f1b057950b 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexLoader_Color.h @@ -18,6 +18,8 @@ #ifndef _VERTEXLOADERCOLOR_H #define _VERTEXLOADERCOLOR_H +#include "LookUpTables.h" + #define RSHIFT 0 #define GSHIFT 8 #define BSHIFT 16 diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.cpp index 646f6d3768..ff9cb4bb96 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.cpp @@ -15,13 +15,13 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ - #include "Globals.h" #include -#include "Render.h" -#include "VertexShader.h" -#include "VertexShaderManager.h" + #include "BPStructs.h" +#include "VertexShader.h" + +// This is the tricky one to get rid off. #include "VertexLoader.h" static char text[16384]; @@ -29,9 +29,9 @@ static char text[16384]; #define LIGHTS_POS "" -char* GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha); +char *GenerateLightShader(char* p, int index, const LitChannel& chan, const char* dest, int coloralpha); -bool GenerateVertexShader(VERTEXSHADER& vs, u32 components) +char *GenerateVertexShader(u32 components, bool has_zbuffer_target) { DVSTARTPROFILE(); @@ -44,7 +44,7 @@ bool GenerateVertexShader(VERTEXSHADER& vs, u32 components) if( xfregs.nNumChans > 1 ) lightMask |= xfregs.colChans[1].color.GetFullLightMask() | xfregs.colChans[1].alpha.GetFullLightMask(); - bool bOutputZ = bpmem.ztex2.op==ZTEXTURE_ADD || Renderer::GetZBufferTarget()!=0; + bool bOutputZ = bpmem.ztex2.op==ZTEXTURE_ADD || has_zbuffer_target; int ztexcoord = -1; char *p = text; @@ -95,15 +95,15 @@ bool GenerateVertexShader(VERTEXSHADER& vs, u32 components) // if outputting Z, embed the Z coordinate in the w component of a texture coordinate // if number of tex gens occupies all the texture coordinates, use the last tex coord // otherwise use the next available tex coord - for(int i = 0; i < xfregs.numTexGens; ++i) { + for (int i = 0; i < xfregs.numTexGens; ++i) { WRITE(p," float%d tex%d : TEXCOORD%d;\n", (i==(xfregs.numTexGens-1)&&bOutputZ)?4:3, i, i); } - if( bOutputZ && xfregs.numTexGens == 0 ) { + if (bOutputZ && xfregs.numTexGens == 0) { ztexcoord = 0; WRITE(p," float4 tex%d : TEXCOORD%d;\n", ztexcoord, ztexcoord); } - else if( bOutputZ ) - ztexcoord = xfregs.numTexGens-1; + else if (bOutputZ) + ztexcoord = xfregs.numTexGens - 1; WRITE(p,"};\n"); WRITE(p,"\n"); @@ -404,7 +404,7 @@ bool GenerateVertexShader(VERTEXSHADER& vs, u32 components) WRITE(p,"return o;\n}\n\0"); - return VertexShaderMngr::CompileVertexShader(vs, text); + return text; } // coloralpha - 1 if color, 2 if alpha diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.h b/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.h index 1207eaec02..8865601cd1 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShader.h @@ -18,215 +18,29 @@ #ifndef GCOGL_VERTEXSHADER_H #define GCOGL_VERTEXSHADER_H +#include "XFMemory.h" -///////////// -// Lighting -///////////// -#define XF_TEXPROJ_ST 0 -#define XF_TEXPROJ_STQ 1 +// shader variables +#define I_POSNORMALMATRIX "cpnmtx" +#define I_PROJECTION "cproj" +#define I_MATERIALS "cmtrl" +#define I_LIGHTS "clights" +#define I_TEXMATRICES "ctexmtx" +#define I_TRANSFORMMATRICES "ctrmtx" +#define I_NORMALMATRICES "cnmtx" +#define I_POSTTRANSFORMMATRICES "cpostmtx" +#define I_FOGPARAMS "cfog" -#define XF_TEXINPUT_AB11 0 -#define XF_TEXINPUT_ABC1 1 +#define C_POSNORMALMATRIX 0 +#define C_PROJECTION (C_POSNORMALMATRIX+6) +#define C_MATERIALS (C_PROJECTION+4) +#define C_LIGHTS (C_MATERIALS+4) +#define C_TEXMATRICES (C_LIGHTS+40) +#define C_TRANSFORMMATRICES (C_TEXMATRICES+24) +#define C_NORMALMATRICES (C_TRANSFORMMATRICES+64) +#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES+32) +#define C_FOGPARAMS (C_POSTTRANSFORMMATRICES+64) -#define XF_TEXGEN_REGULAR 0 -#define XF_TEXGEN_EMBOSS_MAP 1 // used when bump mapping -#define XF_TEXGEN_COLOR_STRGBC0 2 -#define XF_TEXGEN_COLOR_STRGBC1 3 - -#define XF_SRCGEOM_INROW 0 // input is abc -#define XF_SRCNORMAL_INROW 1 // input is abc -#define XF_SRCCOLORS_INROW 2 -#define XF_SRCBINORMAL_T_INROW 3 // input is abc -#define XF_SRCBINORMAL_B_INROW 4 // input is abc -#define XF_SRCTEX0_INROW 5 -#define XF_SRCTEX1_INROW 6 -#define XF_SRCTEX2_INROW 7 -#define XF_SRCTEX3_INROW 8 -#define XF_SRCTEX4_INROW 9 -#define XF_SRCTEX5_INROW 10 -#define XF_SRCTEX6_INROW 11 -#define XF_SRCTEX7_INROW 12 - -struct Light -{ - u32 useless[3]; - u32 color; //rgba - float a0; //attenuation - float a1; - float a2; - float k0; //k stuff - float k1; - float k2; - union - { - struct { - float dpos[3]; - float ddir[3]; // specular lights only - }; - struct { - float sdir[3]; - float shalfangle[3]; // specular lights only - }; - }; -}; - -#define LIGHTDIF_NONE 0 -#define LIGHTDIF_SIGN 1 -#define LIGHTDIF_CLAMP 2 - -#define LIGHTATTN_SPEC 0 // specular attenuation -#define LIGHTATTN_SPOT 1 // distance/spotlight attenuation -#define LIGHTATTN_NONE 2 -#define LIGHTATTN_DIR 3 - -union LitChannel -{ - struct - { - unsigned matsource : 1; - unsigned enablelighting : 1; - unsigned lightMask0_3 : 4; - unsigned ambsource : 1; - unsigned diffusefunc : 2; // LIGHTDIF_X - unsigned attnfunc : 2; // LIGHTATTN_X - unsigned lightMask4_7 : 4; - }; - struct - { - u32 hex : 15; - u32 unused : 17; - }; - struct - { - u32 dummy0 : 7; - u32 lightparams : 4; - u32 dummy1 : 21; - }; - unsigned int GetFullLightMask() const - { - return enablelighting ? (lightMask0_3 | (lightMask4_7 << 4)) : 0; - } -}; - -struct ColorChannel -{ - u32 ambColor; - u32 matColor; - LitChannel color; - LitChannel alpha; -}; - - -union INVTXSPEC -{ - struct - { - unsigned numcolors : 2; - unsigned numnormals : 2; // 0 - nothing, 1 - just normal, 2 - normals and binormals - unsigned numtextures : 4; - unsigned unused : 24; - }; - u32 hex; -}; - -union TexMtxInfo -{ - struct - { - unsigned unknown : 1; - unsigned projection : 1; // XF_TEXPROJ_X - unsigned inputform : 2; // XF_TEXINPUT_X - unsigned texgentype : 3; // XF_TEXGEN_X - unsigned sourcerow : 5; // XF_SRCGEOM_X - unsigned embosssourceshift : 3; // what generated texcoord to use - unsigned embosslightshift : 3; // light index that is used - }; - u32 hex; -}; - -union PostMtxInfo -{ - struct - { - unsigned index : 6; // base row of dual transform matrix - unsigned unused : 2; - unsigned normalize : 1; // normalize before send operation - }; - u32 hex; -}; - -struct TexCoordInfo -{ - TexMtxInfo texmtxinfo; - PostMtxInfo postmtxinfo; -}; - -struct XFRegisters -{ - int numTexGens; - int nNumChans; - INVTXSPEC hostinfo; // number of textures,colors,normals from vertex input - ColorChannel colChans[2]; //C0A0 C1A1 - TexCoordInfo texcoords[8]; - bool bEnableDualTexTransform; -}; - -#define XFMEM_SIZE 0x8000 -#define XFMEM_POSMATRICES 0x000 -#define XFMEM_POSMATRICES_END 0x100 -#define XFMEM_NORMALMATRICES 0x400 -#define XFMEM_NORMALMATRICES_END 0x460 -#define XFMEM_POSTMATRICES 0x500 -#define XFMEM_POSTMATRICES_END 0x600 -#define XFMEM_LIGHTS 0x600 -#define XFMEM_LIGHTS_END 0x680 - -// Matrix indices -union TMatrixIndexA -{ - struct - { - unsigned PosNormalMtxIdx : 6; - unsigned Tex0MtxIdx : 6; - unsigned Tex1MtxIdx : 6; - unsigned Tex2MtxIdx : 6; - unsigned Tex3MtxIdx : 6; - }; - struct - { - u32 Hex : 30; - u32 unused : 2; - }; -}; - -union TMatrixIndexB -{ - struct - { - unsigned Tex4MtxIdx : 6; - unsigned Tex5MtxIdx : 6; - unsigned Tex6MtxIdx : 6; - unsigned Tex7MtxIdx : 6; - }; - struct - { - u32 Hex : 24; - u32 unused : 8; - }; -}; - -struct VERTEXSHADER -{ - VERTEXSHADER() : glprogid(0) {} - GLuint glprogid; // opengl program id - -#ifdef _DEBUG - std::string strprog; -#endif -}; - -bool GenerateVertexShader(VERTEXSHADER& vs, u32 components); - -extern XFRegisters xfregs; +char *GenerateVertexShader(u32 components, bool has_zbuffer_target); #endif diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp index 4efe09fbf7..096afabe08 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexShaderManager.cpp @@ -22,9 +22,9 @@ #include "Render.h" #include "VertexShader.h" #include "VertexShaderManager.h" -#include "BPStructs.h" #include "VertexLoader.h" - +#include "BPMemory.h" +#include "XFMemory.h" VertexShaderMngr::VSCache VertexShaderMngr::vshaders; VERTEXSHADER* VertexShaderMngr::pShaderLast = NULL; @@ -39,8 +39,6 @@ static int s_nMaxVertexInstructions; //////////////////////// // Internal Variables // //////////////////////// -XFRegisters xfregs; -static u32 xfmem[XFMEM_SIZE]; static float s_fMaterials[16]; // track changes @@ -51,6 +49,14 @@ static int nNormalMatricesChanged[2]; // min,max static int nPostTransformMatricesChanged[2]; // min,max static int nLightsChanged[2]; // min,max +void VertexShaderMngr::SetVSConstant4f(int const_number, float f1, float f2, float f3, float f4) { + glProgramEnvParameter4fARB(GL_VERTEX_PROGRAM_ARB, const_number, f1, f2, f3, f4); +} + +void VertexShaderMngr::SetVSConstant4fv(int const_number, const float *f) { + glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, const_number, f); +} + void VertexShaderMngr::Init() { nTransformMatricesChanged[0] = nTransformMatricesChanged[1] = -1; @@ -93,10 +99,11 @@ VERTEXSHADER* VertexShaderMngr::GetShader(u32 components) } VSCacheEntry& entry = vshaders[uid]; - - if (!GenerateVertexShader(entry.shader, components)) { + char *code = GenerateVertexShader(components, Renderer::GetZBufferTarget() != 0); + if (!code || !VertexShaderMngr::CompileVertexShader(entry.shader, code)) { ERROR_LOG("failed to create vertex shader\n"); - } + return NULL; + } //Make an entry in the table entry.frameCount=frameCount; @@ -210,7 +217,7 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs) int endn = (nTransformMatricesChanged[1]+3)/4; const float* pstart = (const float*)&xfmem[startn*4]; for(int i = startn; i < endn; ++i, pstart += 4) - glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, C_TRANSFORMMATRICES+i, pstart); + SetVSConstant4fv(C_TRANSFORMMATRICES+i, pstart); nTransformMatricesChanged[0] = nTransformMatricesChanged[1] = -1; } if (nNormalMatricesChanged[0] >= 0) { @@ -219,7 +226,7 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs) const float* pnstart = (const float*)&xfmem[XFMEM_NORMALMATRICES+3*startn]; for(int i = startn; i < endn; ++i, pnstart += 3) - glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, C_NORMALMATRICES+i, pnstart); + SetVSConstant4fv(C_NORMALMATRICES+i, pnstart); nNormalMatricesChanged[0] = nNormalMatricesChanged[1] = -1; } @@ -229,7 +236,7 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs) int endn = (nPostTransformMatricesChanged[1]+3)/4; const float* pstart = (const float*)&xfmem[XFMEM_POSTMATRICES+startn*4]; for(int i = startn; i < endn; ++i, pstart += 4) - glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, C_POSTTRANSFORMMATRICES+i, pstart); + SetVSConstant4fv(C_POSTTRANSFORMMATRICES+i, pstart); } if (nLightsChanged[0] >= 0) { @@ -240,16 +247,16 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs) for(int i = istart; i < iend; ++i) { u32 color = *(const u32*)(xfmemptr+3); - glProgramEnvParameter4fARB(GL_VERTEX_PROGRAM_ARB, C_LIGHTS+5*i, + SetVSConstant4f(C_LIGHTS+5*i, ((color>>24)&0xFF)/255.0f, ((color>>16)&0xFF)/255.0f, ((color>>8)&0xFF)/255.0f, ((color)&0xFF)/255.0f); xfmemptr += 4; for(int j = 0; j < 4; ++j, xfmemptr += 3) { if( j == 1 && fabs(xfmemptr[0]) < 0.00001f && fabs(xfmemptr[1]) < 0.00001f && fabs(xfmemptr[2]) < 0.00001f) { // dist atten, make sure not equal to 0!!! - glProgramEnvParameter4fARB(GL_VERTEX_PROGRAM_ARB, C_LIGHTS+5*i+j+1, 0.00001f, xfmemptr[1], xfmemptr[2], 0); + SetVSConstant4f(C_LIGHTS+5*i+j+1, 0.00001f, xfmemptr[1], xfmemptr[2], 0); } else - glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB, C_LIGHTS+5*i+j+1, xfmemptr); + SetVSConstant4fv(C_LIGHTS+5*i+j+1, xfmemptr); } } @@ -259,7 +266,7 @@ void VertexShaderMngr::SetConstants(VERTEXSHADER& vs) if (nMaterialsChanged) { for(int i = 0; i < 4; ++i) { if( nMaterialsChanged&(1<