diff --git a/plugins/zzogl-pg/opengl/GS.h b/plugins/zzogl-pg/opengl/GS.h index 87875b9ffe..fc578dd1e5 100644 --- a/plugins/zzogl-pg/opengl/GS.h +++ b/plugins/zzogl-pg/opengl/GS.h @@ -36,6 +36,7 @@ using namespace std; class GLWindow { + private: #ifdef GL_X11_WINDOW Display *glDisplay; @@ -248,6 +249,7 @@ extern u8* g_pBasePS2Mem; (((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf) // PS2 vertex + struct VertexGPU { // gained from XYZ2, XYZ3, XYZF2, XYZF3, @@ -264,6 +266,7 @@ struct VertexGPU }; // Almost same with previous, controlled by prim.fst flagf + struct Vertex { u16 x, y, f, resv0; // note: xy is 12d3 @@ -281,7 +284,8 @@ extern int ppf; // PSM values // PSM types == Texture Storage Format -enum PSM_value{ +enum PSM_value +{ PSMCT32 = 0, // 000000 PSMCT24 = 1, // 000001 PSMCT16 = 2, // 000010 @@ -328,7 +332,8 @@ inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);} //----------------------- Data from registers ----------------------- -typedef union { +typedef union +{ s64 SD; u64 UD; s32 SL[2]; @@ -340,7 +345,9 @@ typedef union { } reg64; /* general purpose regs structs */ -typedef struct { + +typedef struct +{ int fbp; int fbw; int fbh; @@ -349,7 +356,8 @@ typedef struct { } frameInfo; // Create frame structure from known data -inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){ +inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm) +{ frameInfo frame; frame.fbp = fbp; frame.fbw = fbw; @@ -359,11 +367,14 @@ inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){ return frame; } -typedef struct { +typedef struct +{ u16 prim; - union { - struct { + union + { + struct + { u16 iip : 1; u16 tme : 1; u16 fge : 1; @@ -380,8 +391,10 @@ typedef struct { extern primInfo *prim; -typedef union { - struct { +typedef union +{ + struct + { u32 ate : 1; u32 atst : 3; u32 aref : 8; @@ -395,13 +408,15 @@ typedef union { u32 _val; } pixTest; -typedef struct { +typedef struct +{ int bp; int bw; int psm; } bufInfo; -typedef struct { +typedef struct +{ int tbp0; int tbw; int cbp; @@ -432,13 +447,17 @@ union tex_0_info u64 csa : 5; u64 cld : 3; }; + u64 _u64; u32 _u32[2]; u16 _u16[4]; u8 _u8[8]; tex_0_info(u64 data) { _u64 = data; } + tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; } + tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; } + u32 tbw_mult() { if (tbw == 0) @@ -446,26 +465,34 @@ union tex_0_info else return ((u32)tbw << 6); } + u32 psm_fix() { - // printf ("psm %d\n", psm); - if ( psm == 9 ) return 1; + // printf ("psm %d\n", psm); + if (psm == 9) return 1; + return psm; } + u32 tw_exp() { - if (tw > 10) return (1<<10); - return (1< 10) return (1 << 10); + + return (1 << tw); } + u32 th_exp() { - if (th > 10) return (1<<10); - return (1< 10) return (1 << 10); + + return (1 << th); } + u32 cpsm_fix() { return cpsm & 0xe; } + u32 csa_fix() { if (cpsm < 2) @@ -480,7 +507,8 @@ union tex_0_info #define TEX_HIGHLIGHT 2 #define TEX_HIGHLIGHT2 3 -typedef struct { +typedef struct +{ int lcm; int mxl; int mmag; @@ -490,7 +518,8 @@ typedef struct { int k; } tex1Info; -typedef struct { +typedef struct +{ int wms; int wmt; int minu; @@ -499,24 +528,28 @@ typedef struct { int maxv; } clampInfo; -typedef struct { +typedef struct +{ int cbw; int cou; int cov; } clutInfo; -typedef struct { +typedef struct +{ int tbp[3]; int tbw[3]; } miptbpInfo; -typedef struct { +typedef struct +{ u16 aem; u8 ta[2]; float fta[2]; } texaInfo; -typedef struct { +typedef struct +{ int sx; int sy; int dx; @@ -524,9 +557,12 @@ typedef struct { int dir; } trxposInfo; -typedef struct { - union { - struct { +typedef struct +{ + union + { + struct + { u8 a : 2; u8 b : 2; u8 c : 2; @@ -538,17 +574,20 @@ typedef struct { u8 fix : 8; } alphaInfo; -typedef struct { +typedef struct +{ u16 zbp; // u16 address / 64 u8 psm; u8 zmsk; } zbufInfo; -typedef struct { +typedef struct +{ int fba; } fbaInfo; -typedef struct { +typedef struct +{ Vertex gsvertex[3]; u32 rgba; float q; @@ -593,36 +632,41 @@ extern GSinternal gs; static __forceinline u16 RGBA32to16(u32 c) { return (u16)((((c) & 0x000000f8) >> 3) | - (((c) & 0x0000f800) >> 6) | - (((c) & 0x00f80000) >> 9) | - (((c) & 0x80000000) >> 16)); + (((c) & 0x0000f800) >> 6) | + (((c) & 0x00f80000) >> 9) | + (((c) & 0x80000000) >> 16)); } static __forceinline u32 RGBA16to32(u16 c) { - return (((c) & 0x001f) << 3) | - (((c) & 0x03e0) << 6) | - (((c) & 0x7c00) << 9) | - (((c) & 0x8000) ? 0xff000000 : 0); + return (((c) & 0x001f) << 3) | + (((c) & 0x03e0) << 6) | + (((c) & 0x7c00) << 9) | + (((c) & 0x8000) ? 0xff000000 : 0); } // converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits) // f is a u16 -static __forceinline u16 Float16ToBYTE(u16 f) { +static __forceinline u16 Float16ToBYTE(u16 f) +{ //assert( !(f & 0x8000) ); - if( f & 0x8000 ) return 0; + if (f & 0x8000) return 0; + + u16 d = ((((f & 0x3ff) | 0x400) * 255) >> (10 - ((f >> 10) & 0x1f) + 15)); - u16 d = ((((f&0x3ff)|0x400)*255)>>(10-((f>>10)&0x1f)+15)); return d > 255 ? 255 : d; } -static __forceinline u16 Float16ToALPHA(u16 f) { +static __forceinline u16 Float16ToALPHA(u16 f) +{ //assert( !(f & 0x8000) ); - if( f & 0x8000 ) return 0; + if (f & 0x8000) return 0; // round up instead of down (crash and burn), too much and charlie breaks - u16 d = (((((f&0x3ff)|0x400))*255)>>(10-((f>>10)&0x1f)+15)); - d = (d)>>1; + u16 d = (((((f & 0x3ff) | 0x400)) * 255) >> (10 - ((f >> 10) & 0x1f) + 15)); + + d = (d) >> 1; + return d > 255 ? 255 : d; } @@ -650,12 +694,14 @@ static __forceinline u16 Float16ToALPHA(u16 f) { inline float Clamp(float fx, float fmin, float fmax) { - if( fx < fmin ) return fmin; + if (fx < fmin) return fmin; + return fx > fmax ? fmax : fx; } // PSMT16, 16S have shorter color per pixel, also cluted textures with half storage. -inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) { +inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) +{ if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1)) return true; else @@ -670,7 +716,7 @@ inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) { static __forceinline int ZZOglGet_tbp0_TexBits(u32 data) { //return tex_0_info(data).tbp0; - return (data ) & 0x3fff; + return (data) & 0x3fff; } // Obtain tbw -- Texture Buffer Width (Texels/64) -- from data, do not multiply to 64. Bits 14-19 @@ -686,6 +732,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data) { //return text_0_info(data).tbw_mult(); int result = ZZOglGet_tbw_TexBits(data); + if (result == 0) return 64; else @@ -697,7 +744,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data) static __forceinline int ZZOglGet_psm_TexBits(u32 data) { //return tex_0_info(data).psm; - return ((data >> 20) & 0x3f); + return ((data >> 20) & 0x3f); } // Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. Fix incorrect psm == 9 @@ -706,7 +753,9 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data) //return tex_0_info(data).psm_fix(); int result = ZZOglGet_psm_TexBits(data) ; // printf ("result %d\n", result); - if ( result == 9 ) result = 1; + + if (result == 9) result = 1; + return result; } @@ -715,7 +764,7 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data) static __forceinline u16 ZZOglGet_tw_TexBits(u32 data) { //return tex_0_info(data).tw; - return ((data >> 26) & 0xf); + return ((data >> 26) & 0xf); } // Obtain tw -- Texture Width (Width = TW) -- from data. Width could newer be more than 1024. @@ -723,8 +772,10 @@ static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data) { //return tex_0_info(data).tw_exp(); u16 result = ZZOglGet_tw_TexBits(data); + if (result > 10) result = 10; - return (1< 10) result = 10; - return (1<> 2) & 0x1); + return ((data >> 2) & 0x1); } // Obtain tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2) -- from data. Bit 4-5 @@ -759,7 +812,7 @@ static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data) static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data) { //return tex_0_info(0, data).tfx; - return ((data >> 3) & 0x3); + return ((data >> 3) & 0x3); } // Obtain cbp from data -- Clut Buffer Base Pointer (Address/256) -- Bits 5-18 @@ -767,7 +820,7 @@ static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data) static __forceinline int ZZOglGet_cbp_TexBits(u32 data) { //return tex_0_info(0, data).cbp; - return ((data >> 5) & 0x3fff); + return ((data >> 5) & 0x3fff); } // Obtain cpsm from data -- Clut pixel Storage Format -- Bits 19-22. 22nd is at no use. @@ -794,7 +847,7 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data) { //return tex_0_info(0, data).csa_fix(); - if ((data & 0x700000) == 0 ) // it is cpsm < 2 check + if ((data & 0x700000) == 0) // it is cpsm < 2 check return ((data >> 24) & 0xf); else return ((data >> 24) & 0x1f); @@ -805,79 +858,81 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data) static __forceinline u8 ZZOglGet_cld_TexBits(u32 data) { //return tex_0_info(0, data).cld; - return ((data >> 29) & 0x7); + return ((data >> 29) & 0x7); } //-------------------------- frames // FrameInfo bits. // Obtain fbp -- frame Buffer Base Pointer (Word Address/2048) -- from data. Bits 0-15 -inline int -ZZOglGet_fbp_FrameBits(u32 data) { - return ((data ) & 0x1ff); +inline int ZZOglGet_fbp_FrameBits(u32 data) +{ + return ((data) & 0x1ff); } -// So we got adress / 64, henceby frame fbp and tex tbp have the same dimension -- "real adress" is x64. -inline int -ZZOglGet_fbp_FrameBitsMult(u32 data) { +// So we got address / 64, henceby frame fbp and tex tbp have the same dimension -- "real address" is x64. +inline int ZZOglGet_fbp_FrameBitsMult(u32 data) +{ return (ZZOglGet_fbp_FrameBits(data) << 5); } // Obtain fbw -- width (Texels/64) -- from data. Bits 16-23 -inline int -ZZOglGet_fbw_FrameBits(u32 data) { +inline int ZZOglGet_fbw_FrameBits(u32 data) +{ return ((data >> 16) & 0x3f); } -inline int -ZZOglGet_fbw_FrameBitsMult(u32 data) { +inline int ZZOglGet_fbw_FrameBitsMult(u32 data) +{ return (ZZOglGet_fbw_FrameBits(data) << 6); } // Obtain psm -- Pixel Storage Format -- from data. Bits 24-29. // (data & 0x3f000000) >> 24 -inline int -ZZOglGet_psm_FrameBits(u32 data) { - return ((data >> 24) & 0x3f); +inline int ZZOglGet_psm_FrameBits(u32 data) +{ + return ((data >> 24) & 0x3f); } // Function for calculating overal height from frame data. -inline int -ZZOgl_fbh_Calc (int fbp, int fbw, int psm) { - int fbh = ( 1024 * 1024 - 64 * fbp ) / fbw; +inline int ZZOgl_fbh_Calc(int fbp, int fbw, int psm) +{ + int fbh = (1024 * 1024 - 64 * fbp) / fbw; fbh &= ~0x1f; - if (PSMT_ISHALF(psm)) - fbh *= 2; - if (fbh > 1024) - fbh = 1024; + + if (PSMT_ISHALF(psm)) fbh *= 2; + if (fbh > 1024) fbh = 1024; + return fbh ; } -inline int -ZZOgl_fbh_Calc (frameInfo frame) { + +inline int ZZOgl_fbh_Calc(frameInfo frame) +{ return ZZOgl_fbh_Calc(frame.fbp, frame.fbw, frame.psm); } // Calculate fbh from data, It does not set in register -inline int -ZZOglGet_fbh_FrameBitsCalc (u32 data) { +inline int ZZOglGet_fbh_FrameBitsCalc(u32 data) +{ int fbh = 0; int fbp = ZZOglGet_fbp_FrameBits(data); int fbw = ZZOglGet_fbw_FrameBits(data); int psm = ZZOglGet_psm_FrameBits(data); - if (fbw > 0) - fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ; + + if (fbw > 0) fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ; + return fbh ; } // Obtain fbm -- frame mask -- from data. All higher word. -inline u32 -ZZOglGet_fbm_FrameBits(u32 data) { - return (data); +inline u32 ZZOglGet_fbm_FrameBits(u32 data) +{ + return (data); } // Obtain fbm -- frame mask -- from data. All higher word. Fixed from psm == PCMT24 (without alpha) -inline u32 -ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) { +inline u32 ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) +{ if (PSMT_BITMODE(ZZOglGet_psm_FrameBits(dataLO)) == 1) return (dataHI | 0xff000000); else @@ -885,53 +940,51 @@ ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) { } // obtain colormask RED -inline u32 -ZZOglGet_fbmRed_FrameBits(u32 data) { +inline u32 ZZOglGet_fbmRed_FrameBits(u32 data) +{ return (data & 0xff); } // obtain colormask Green -inline u32 -ZZOglGet_fbmGreen_FrameBits(u32 data) { +inline u32 ZZOglGet_fbmGreen_FrameBits(u32 data) +{ return ((data >> 8) & 0xff); } // obtain colormask Blue -inline u32 -ZZOglGet_fbmBlue_FrameBits(u32 data) { +inline u32 ZZOglGet_fbmBlue_FrameBits(u32 data) +{ return ((data >> 16) & 0xff); } // obtain colormask Alpha -inline u32 -ZZOglGet_fbmAlpha_FrameBits(u32 data) { +inline u32 ZZOglGet_fbmAlpha_FrameBits(u32 data) +{ return ((data >> 24) & 0xff); } // obtain colormask Alpha -inline u32 -ZZOglGet_fbmHighByte(u32 data) { +inline u32 ZZOglGet_fbmHighByte(u32 data) +{ return (!!(data & 0x80000000)); } - - //-------------------------- tex0 comparison // Check if old and new tex0 registers have only clut difference -inline bool -ZZOglAllExceptClutIsSame( u32* oldtex, u32* newtex) { +inline bool ZZOglAllExceptClutIsSame(u32* oldtex, u32* newtex) +{ return ((oldtex[0] == newtex[0]) && ((oldtex[1] & 0x1f) == (newtex[1] & 0x1f))); } // Check if the CLUT registers are same, except CLD -inline bool -ZZOglClutMinusCLDunchanged( u32* oldtex, u32* newtex) { +inline bool ZZOglClutMinusCLDunchanged(u32* oldtex, u32* newtex) +{ return ((oldtex[1] & 0x1fffffe0) == (newtex[1] & 0x1fffffe0)); } // Check if CLUT storage mode is not changed (CSA, CSM and CSPM) -inline bool -ZZOglClutStorageUnchanged( u32* oldtex, u32* newtex) { +inline bool ZZOglClutStorageUnchanged(u32* oldtex, u32* newtex) +{ return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000)); } diff --git a/plugins/zzogl-pg/opengl/GifTransfer.h b/plugins/zzogl-pg/opengl/GifTransfer.h index a2fc86445f..52b6218f54 100644 --- a/plugins/zzogl-pg/opengl/GifTransfer.h +++ b/plugins/zzogl-pg/opengl/GifTransfer.h @@ -39,29 +39,33 @@ union GIFTag { u64 ai64[2]; u32 ai32[4]; + struct { - u32 NLOOP:15; - u32 EOP:1; - u32 _PAD1:16; - u32 _PAD2:14; - u32 PRE:1; - u32 PRIM:11; - u32 FLG:2; // enum GIF_FLG - u32 NREG:4; - u64 REGS:64; + u32 NLOOP : 15; + u32 EOP : 1; + u32 _PAD1 : 16; + u32 _PAD2 : 14; + u32 PRE : 1; + u32 PRIM : 11; + u32 FLG : 2; // enum GIF_FLG + u32 NREG : 4; + u64 REGS : 64; }; + void set(u32 *data) { - for(int i = 0; i <= 3; i++) + for (int i = 0; i <= 3; i++) { ai32[i] = data[i]; } } + GIFTag(u32 *data) { set(data); } + GIFTag(){ ai64[0] = 0; ai64[1] = 0; } }; @@ -101,13 +105,12 @@ typedef struct // Hmm.... nreg = tag.NREG << 2; if (nreg == 0) nreg = 64; - regs = tag.REGS; reg = 0; - // ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d", - // data[3], data[2], data[1], data[0], - // path->eop, path->nloop, mode, path->nreg, tag.PRE); + // ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d", + // data[3], data[2], data[1], data[0], + // path->eop, path->nloop, mode, path->nreg, tag.PRE); } u32 GetReg() @@ -124,49 +127,48 @@ typedef struct reg = 0; nloop--; - if (nloop == 0) - { - return false; - } + if (nloop == 0) return false; } + return true; } + #else - void setTag(u32 *data) - { - tag.set(data); + void setTag(u32 *data) + { + tag.set(data); - nloop = tag.NLOOP; - eop = tag.EOP; - u32 tagpre = tag.PRE; - u32 tagprim = tag.PRIM; - u32 tagflg = tag.FLG; + nloop = tag.NLOOP; + eop = tag.EOP; + u32 tagpre = tag.PRE; + u32 tagprim = tag.PRIM; + u32 tagflg = tag.FLG; - // Hmm.... - nreg = tag.NREG << 2; - if (nreg == 0) nreg = 64; + // Hmm.... + nreg = tag.NREG << 2; + if (nreg == 0) nreg = 64; - // ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d", - // data[3], data[2], data[1], data[0], - // path->eop, path->nloop, tagflg, path->nreg, tagpre); + // ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d", + // data[3], data[2], data[1], data[0], + // path->eop, path->nloop, tagflg, path->nreg, tagpre); - mode = tagflg; + mode = tagflg; - switch (mode) - { - case GIF_FLG_PACKED: - regs = *(u64 *)(data+2); - regn = 0; - if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim); + switch (mode) + { + case GIF_FLG_PACKED: + regs = *(u64 *)(data + 2); + regn = 0; + if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim); + break; - break; + case GIF_FLG_REGLIST: + regs = *(u64 *)(data + 2); + regn = 0; + break; + } + } - case GIF_FLG_REGLIST: - regs = *(u64 *)(data+2); - regn = 0; - break; - } - } #endif } pathInfo; diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 1cc7a24a64..85086602f0 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -23,9 +23,9 @@ #include // works only when base is a power of 2 -static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val)+(base-1))&~(base-1)); } -static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base-1)); } -static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base-1)); } +static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val) + (base - 1))&~(base - 1)); } +static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base - 1)); } +static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base - 1)); } // d3d texture dims const int BLOCK_TEXWIDTH = 128; @@ -33,13 +33,12 @@ const int BLOCK_TEXHEIGHT = 512; extern PCSX2_ALIGNED16(u32 tempblock[64]); - -typedef u32 ( *_getPixelAddress)(int x, int y, u32 bp, u32 bw); -typedef u32 (*_getPixelAddress_0)(int x, int y, u32 bw); +typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw); +typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw); typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw); typedef void (*_writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw); -typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw); -typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw); +typedef u32(*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw); +typedef u32(*_readPixel_0)(const void* pmem, int x, int y, u32 bw); typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize); typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize); typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask); @@ -53,6 +52,7 @@ enum Psm_Size // Both of the following structs should probably be local class variables or in a namespace, // but this works for the moment. + struct TransferData { // Signed because Visual C++ is weird. @@ -88,6 +88,7 @@ struct TransferFuncts }; // rest not visible externally + struct BLOCK { BLOCK() { memset(this, 0, sizeof(BLOCK)); } @@ -142,14 +143,14 @@ extern u32 g_pageTable4[128][128]; static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>5) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6); u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63]; return word; } static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) { - u32 basepage = ((y>>5) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6); u32 word = basepage * 2048 + g_pageTable32[y&31][x&63]; return word; } @@ -165,70 +166,70 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = bp * 128 + basepage * 4096 + g_pageTable16[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = basepage * 4096 + g_pageTable16[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = bp * 128 + basepage * 4096 + g_pageTable16S[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7); + u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7); u32 word = bp * 256 + basepage * 8192 + g_pageTable8[y&63][x&127]; return word; } static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw) { - u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7); + u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7); u32 word = basepage * 8192 + g_pageTable8[y&63][x&127]; return word; } static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7); + u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7); u32 word = bp * 512 + basepage * 16384 + g_pageTable4[y&127][x&127]; return word; } static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw) { - u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7); + u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7); u32 word = basepage * 16384 + g_pageTable4[y&127][x&127]; return word; } static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>5) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6); u32 word = bp * 64 + basepage * 2048 + g_pageTable32Z[y&31][x&63]; return word; } static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw) { - u32 basepage = ((y>>5) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6); u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63]; return word; } @@ -238,28 +239,28 @@ static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw) static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = bp * 128 + basepage * 4096 + g_pageTable16Z[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = bp * 128 + basepage * 4096 + g_pageTable16SZ[y&63][x&63]; return word; } static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) { - u32 basepage = ((y>>6) * (bw>>6)) + (x>>6); + u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6); u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63]; return word; } @@ -276,9 +277,11 @@ static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { - u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32(x, y, bp, bw)]; - u8 *pix = (u8*)&pixel; - buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2]; + u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)]; + u8 *pix = (u8*) & pixel; + buf[0] = pix[0]; + buf[1] = pix[1]; + buf[2] = pix[2]; } static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) @@ -305,20 +308,21 @@ static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 b { u32 addr = getPixelAddress4(x, y, bp, bw); u8 pix = ((u8*)pmem)[addr/2]; - if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4); + + if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4); else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel); } static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { - u8 *p = (u8*)pmem + 4*getPixelAddress4HL(x, y, bp, bw)+3; + u8 *p = (u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3; *p = (*p & 0xf0) | pixel; } static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { - u8 *p = (u8*)pmem + 4*getPixelAddress4HH(x, y, bp, bw)+3; - *p = (*p & 0x0f) | (pixel<<4); + u8 *p = (u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3; + *p = (*p & 0x0f) | (pixel << 4); } static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) @@ -328,9 +332,11 @@ static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { - u8 *buf = (u8*)pmem + 4*getPixelAddress32Z(x, y, bp, bw); - u8 *pix = (u8*)&pixel; - buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2]; + u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z(x, y, bp, bw); + u8 *pix = (u8*) & pixel; + buf[0] = pix[0]; + buf[1] = pix[1]; + buf[2] = pix[2]; } static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) @@ -380,20 +386,22 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32 { u32 addr = getPixelAddress4(x, y, bp, bw); u8 pix = ((const u8*)pmem)[addr/2]; + if (addr & 0x1) - return pix >> 4; - else return pix & 0xf; + return pix >> 4; + else + return pix & 0xf; } static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw) { - const u8 *p = (const u8*)pmem+4*getPixelAddress4HL(x, y, bp, bw)+3; + const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3; return *p & 0x0f; } static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw) { - const u8 *p = (const u8*)pmem+4*getPixelAddress4HH(x, y, bp, bw) + 3; + const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3; return *p >> 4; } @@ -430,9 +438,11 @@ static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u3 static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw) { - u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32_0(x, y, bw)]; - u8 *pix = (u8*)&pixel; - buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2]; + u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)]; + u8 *pix = (u8*) & pixel; + buf[0] = pix[0]; + buf[1] = pix[1]; + buf[2] = pix[2]; } static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw) @@ -459,20 +469,21 @@ static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 { u32 addr = getPixelAddress4_0(x, y, bw); u8 pix = ((u8*)pmem)[addr/2]; - if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4); + + if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4); else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel); } static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw) { - u8 *p = (u8*)pmem + 4*getPixelAddress4HL_0(x, y, bw)+3; + u8 *p = (u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3; *p = (*p & 0xf0) | pixel; } static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw) { - u8 *p = (u8*)pmem + 4*getPixelAddress4HH_0(x, y, bw)+3; - *p = (*p & 0x0f) | (pixel<<4); + u8 *p = (u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3; + *p = (*p & 0x0f) | (pixel << 4); } static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) @@ -482,9 +493,11 @@ static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { - u8 *buf = (u8*)pmem + 4*getPixelAddress32Z_0(x, y, bw); - u8 *pix = (u8*)&pixel; - buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2]; + u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw); + u8 *pix = (u8*) & pixel; + buf[0] = pix[0]; + buf[1] = pix[1]; + buf[2] = pix[2]; } static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) @@ -534,6 +547,7 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw) { u32 addr = getPixelAddress4_0(x, y, bw); u8 pix = ((const u8*)pmem)[addr/2]; + if (addr & 0x1) return pix >> 4; else @@ -542,13 +556,13 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw) static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw) { - const u8 *p = (const u8*)pmem+4*getPixelAddress4HL_0(x, y, bw)+3; + const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3; return *p & 0x0f; } static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw) { - const u8 *p = (const u8*)pmem+4*getPixelAddress4HH_0(x, y, bw) + 3; + const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3; return *p >> 4; } diff --git a/plugins/zzogl-pg/opengl/Mem_Transmit.h b/plugins/zzogl-pg/opengl/Mem_Transmit.h index c067e4b0fd..876a020095 100644 --- a/plugins/zzogl-pg/opengl/Mem_Transmit.h +++ b/plugins/zzogl-pg/opengl/Mem_Transmit.h @@ -14,57 +14,60 @@ extern u8* pstart; template static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) { - assert( (nSize%widthlimit) == 0 && widthlimit <= 4 ); - if ((gs.imageEndX-gs.trxpos.dx) % widthlimit) + assert((nSize % widthlimit) == 0 && widthlimit <= 4); + + if ((gs.imageEndX - gs.trxpos.dx) % widthlimit) { // ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM); - for(; tempY < endY; ++tempY) + for (; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1) + for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1) { /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw); + wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw); } } } - for(; tempY < endY; ++tempY) + + for (; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit) + for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit) { /* write as many pixel at one time as possible */ - if( nSize < widthlimit ) return NULL; + if (nSize < widthlimit) return NULL; - wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw); + wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw); - if( widthlimit > 1 ) + if (widthlimit > 1) { - wp(pstart, (tempX+1)%2048, tempY%2048, buf[1], gs.dstbuf.bw); + wp(pstart, (tempX + 1) % 2048, tempY % 2048, buf[1], gs.dstbuf.bw); - if( widthlimit > 2 ) + if (widthlimit > 2) { - wp(pstart, (tempX+2)%2048, tempY%2048, buf[2], gs.dstbuf.bw); + wp(pstart, (tempX + 2) % 2048, tempY % 2048, buf[2], gs.dstbuf.bw); - if( widthlimit > 3 ) + if (widthlimit > 3) { - wp(pstart, (tempX+3)%2048, tempY%2048, buf[3], gs.dstbuf.bw); + wp(pstart, (tempX + 3) % 2048, tempY % 2048, buf[3], gs.dstbuf.bw); } } } } - if ( tempX >= gs.imageEndX ) + if (tempX >= gs.imageEndX) { assert(tempX == gs.imageEndX); tempX = gs.trxpos.dx; } else { - assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 ); + assert(gs.imageTransfer == -1 || nSize*sizeof(T) / 4 == 0); return NULL; } } + return buf; } @@ -72,47 +75,14 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim template static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) { - if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit)) + if (widthlimit != 8 || ((gs.imageEndX - gs.trxpos.dx) % widthlimit)) { //ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM); - for(; tempY < endY; ++tempY) + for (; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3) + for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3) { - wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf), gs.dstbuf.bw); - } - - if( tempX >= gs.imageEndX ) - { - assert(gs.imageTransfer == -1 || tempX == gs.imageEndX); - tempX = gs.trxpos.dx; - } - else - { - assert( gs.imageTransfer == -1 || nSize == 0 ); - return NULL; - } - } - } - else - { - assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 ); - for(; tempY < endY; ++tempY) - { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3*widthlimit) - { - if (nSize < widthlimit) return NULL; - - /* write as many pixel at one time as possible */ - - wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf+0), gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(buf+3), gs.dstbuf.bw); - wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(buf+6), gs.dstbuf.bw); - wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(buf+9), gs.dstbuf.bw); - wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(buf+12), gs.dstbuf.bw); - wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(buf+15), gs.dstbuf.bw); - wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(buf+18), gs.dstbuf.bw); - wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(buf+21), gs.dstbuf.bw); + wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf), gs.dstbuf.bw); } if (tempX >= gs.imageEndX) @@ -122,18 +92,55 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl } else { - if ( nSize < 0 ) - { - /* extracted too much */ - assert( (nSize%3)==0 && nSize > -24 ); - tempX += nSize/3; - nSize = 0; - } - assert( gs.imageTransfer == -1 || nSize == 0 ); + assert(gs.imageTransfer == -1 || nSize == 0); return NULL; } } } + else + { + assert(/*(nSize%widthlimit) == 0 &&*/ widthlimit == 8); + + for (; tempY < endY; ++tempY) + { + for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit) + { + if (nSize < widthlimit) return NULL; + + /* write as many pixel at one time as possible */ + + wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf + 0), gs.dstbuf.bw); + wp(pstart, (tempX + 1) % 2048, tempY % 2048, *(u32*)(buf + 3), gs.dstbuf.bw); + wp(pstart, (tempX + 2) % 2048, tempY % 2048, *(u32*)(buf + 6), gs.dstbuf.bw); + wp(pstart, (tempX + 3) % 2048, tempY % 2048, *(u32*)(buf + 9), gs.dstbuf.bw); + wp(pstart, (tempX + 4) % 2048, tempY % 2048, *(u32*)(buf + 12), gs.dstbuf.bw); + wp(pstart, (tempX + 5) % 2048, tempY % 2048, *(u32*)(buf + 15), gs.dstbuf.bw); + wp(pstart, (tempX + 6) % 2048, tempY % 2048, *(u32*)(buf + 18), gs.dstbuf.bw); + wp(pstart, (tempX + 7) % 2048, tempY % 2048, *(u32*)(buf + 21), gs.dstbuf.bw); + } + + if (tempX >= gs.imageEndX) + { + assert(gs.imageTransfer == -1 || tempX == gs.imageEndX); + tempX = gs.trxpos.dx; + } + else + { + if (nSize < 0) + { + /* extracted too much */ + assert((nSize % 3) == 0 && nSize > -24); + tempX += nSize / 3; + nSize = 0; + } + + assert(gs.imageTransfer == -1 || nSize == 0); + + return NULL; + } + } + } + return buf; } @@ -141,73 +148,84 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl template static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) { - for(; tempY < endY; ++tempY) + for (; tempY < endY; ++tempY) { - for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit) + for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit) { /* write as many pixel at one time as possible */ - wp(pstart, tempX%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX + 1) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw); buf++; - if ( widthlimit > 2 ) + + if (widthlimit > 2) { - wp(pstart, (tempX+2)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+3)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + wp(pstart, (tempX + 2) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX + 3) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw); buf++; - if( widthlimit > 4 ) + if (widthlimit > 4) { - wp(pstart, (tempX+4)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+5)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + wp(pstart, (tempX + 4) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX + 5) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw); buf++; - if( widthlimit > 6 ) + if (widthlimit > 6) { - wp(pstart, (tempX+6)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+7)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw); + wp(pstart, (tempX + 6) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX + 7) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw); buf++; } } } } - if ( tempX >= gs.imageEndX ) + if (tempX >= gs.imageEndX) { tempX = gs.trxpos.dx; } else { - assert( gs.imageTransfer == -1 || (nSize/32) == 0 ); + assert(gs.imageTransfer == -1 || (nSize / 32) == 0); return NULL; } } + return buf; } template - static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) - { - switch (data.psm) - { - case PSM_: return TransmitHostLocalY_(wp, widthlimit, endY, buf); - case PSM_4_: return TransmitHostLocalY_4(wp, widthlimit, endY, buf); - case PSM_24_: return TransmitHostLocalY_24(wp, widthlimit, endY, buf); - } - assert(0); - return NULL; - } +static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf) +{ + switch (data.psm) + { + case PSM_: + return TransmitHostLocalY_(wp, widthlimit, endY, buf); + + case PSM_4_: + return TransmitHostLocalY_4(wp, widthlimit, endY, buf); + + case PSM_24_: + return TransmitHostLocalY_24(wp, widthlimit, endY, buf); + } + + assert(0); + + return NULL; +} template static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) { - for(u32 tempi = 0; tempi < blockheight; ++tempi) + for (u32 tempi = 0; tempi < blockheight; ++tempi) { - for(tempX = startX; tempX < gs.imageEndX; tempX++, buf++) + for (tempX = startX; tempX < gs.imageEndX; tempX++, buf++) { - wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0], gs.dstbuf.bw); + wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw); } + buf += pitch - fracX; } + return buf; } @@ -215,14 +233,16 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim template static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) { - for(u32 tempi = 0; tempi < blockheight; ++tempi) + for (u32 tempi = 0; tempi < blockheight; ++tempi) { - for(tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3) + for (tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3) { - wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)buf, gs.dstbuf.bw); + wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw); } - buf += 3*(pitch-fracX); + + buf += 3 * (pitch - fracX); } + return buf; } @@ -230,30 +250,39 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl template static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) { - for(u32 tempi = 0; tempi < blockheight; ++tempi) + for (u32 tempi = 0; tempi < blockheight; ++tempi) { - for(tempX = startX; tempX < gs.imageEndX; tempX+=2, buf++) + for (tempX = startX; tempX < gs.imageEndX; tempX += 2, buf++) { - wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0]&0x0f, gs.dstbuf.bw); - wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, buf[0]>>4, gs.dstbuf.bw); + wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw); + wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw); } - buf += (pitch-fracX)/2; + + buf += (pitch - fracX) / 2; } + return buf; } template - static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) - { - switch (data.psm) - { - case PSM_: return TransmitHostLocalX_(wp, widthlimit, blockheight, startX, buf); - case PSM_4_: return TransmitHostLocalX_4(wp, widthlimit, blockheight, startX, buf); - case PSM_24_: return TransmitHostLocalX_24(wp, widthlimit, blockheight, startX, buf); - } - assert(0); - return NULL; - } +static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf) +{ + switch (data.psm) + { + case PSM_: + return TransmitHostLocalX_(wp, widthlimit, blockheight, startX, buf); + + case PSM_4_: + return TransmitHostLocalX_4(wp, widthlimit, blockheight, startX, buf); + + case PSM_24_: + return TransmitHostLocalX_24(wp, widthlimit, blockheight, startX, buf); + } + + assert(0); + + return NULL; +} // calculate pitch in source buffer static __forceinline u32 TransPitch(u32 pitch, u32 size) diff --git a/plugins/zzogl-pg/opengl/Util.h b/plugins/zzogl-pg/opengl/Util.h index e40cedcb69..f824a4971f 100644 --- a/plugins/zzogl-pg/opengl/Util.h +++ b/plugins/zzogl-pg/opengl/Util.h @@ -70,21 +70,22 @@ extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file // declare linux equivalents static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align) { - assert( align < 0x10000 ); - char* p = (char*)malloc(size+align); - int off = 2+align - ((int)(uptr)(p+2) % align); + assert(align < 0x10000); + char* p = (char*)malloc(size + align); + int off = 2 + align - ((int)(uptr)(p + 2) % align); p += off; - *(u16*)(p-2) = off; + *(u16*)(p - 2) = off; return p; } static __forceinline void pcsx2_aligned_free(void* pmem) { - if( pmem != NULL ) { + if (pmem != NULL) + { char* p = (char*)pmem; - free(p - (int)*(u16*)(p-2)); + free(p - (int)*(u16*)(p - 2)); } } @@ -98,7 +99,7 @@ inline unsigned long timeGetTime() timeb t; ftime(&t); - return (unsigned long)(t.time*1000+t.millitm); + return (unsigned long)(t.time*1000 + t.millitm); } struct RECT @@ -113,20 +114,24 @@ struct RECT #define min(a,b) (((a) < (b)) ? (a) : (b)) -typedef struct { +typedef struct +{ int x, y, w, h; } Rect; -typedef struct { +typedef struct +{ int x, y; } Point; -typedef struct { +typedef struct +{ int x0, y0; int x1, y1; } Rect2; -typedef struct { +typedef struct +{ int x, y, c; } PointC; @@ -145,6 +150,7 @@ typedef struct { #define GSOPTION_LOADED 0x8000 //Configuration values. + typedef struct { u8 mrtdepth; // write color in render target @@ -227,18 +233,18 @@ extern void __LogToConsole(const char *fmt, ...); namespace ZZLog { - extern void Message(const char *fmt, ...); - extern void Log(const char *fmt, ...); - extern void WriteToConsole(const char *fmt, ...); - extern void Print(const char *fmt, ...); - - extern void Greg_Log(const char *fmt, ...); - extern void Prim_Log(const char *fmt, ...); - extern void GS_Log(const char *fmt, ...); - - extern void Debug_Log(const char *fmt, ...); - extern void Warn_Log(const char *fmt, ...); - extern void Error_Log(const char *fmt, ...); +extern void Message(const char *fmt, ...); +extern void Log(const char *fmt, ...); +extern void WriteToConsole(const char *fmt, ...); +extern void Print(const char *fmt, ...); + +extern void Greg_Log(const char *fmt, ...); +extern void Prim_Log(const char *fmt, ...); +extern void GS_Log(const char *fmt, ...); + +extern void Debug_Log(const char *fmt, ...); +extern void Warn_Log(const char *fmt, ...); +extern void Error_Log(const char *fmt, ...); }; #define REG64(name) \ @@ -247,14 +253,14 @@ union name \ u64 i64; \ u32 ai32[2]; \ struct { \ - + #define REG128(name)\ union name \ { \ u64 ai64[2]; \ u32 ai32[4]; \ struct { \ - + #define REG64_(prefix, name) REG64(prefix##name) #define REG128_(prefix, name) REG128(prefix##name) @@ -266,13 +272,13 @@ union name \ { \ u64 i64; \ u32 ai32[2]; \ - + #define REG128_SET(name)\ union name \ { \ u64 ai64[2]; \ u32 ai32[4]; \ - + #define REG_SET_END }; extern void LoadConfig(); @@ -310,16 +316,18 @@ static __forceinline u64 GetTickFrequency() static __forceinline u64 GetCPUTicks() { + struct timeval t; gettimeofday(&t, NULL); - return ((u64)t.tv_sec*GetTickFrequency())+t.tv_usec; + return ((u64)t.tv_sec*GetTickFrequency()) + t.tv_usec; } + #else static __aligned16 LARGE_INTEGER lfreq; static __forceinline void InitCPUTicks() { - QueryPerformanceFrequency( &lfreq ); + QueryPerformanceFrequency(&lfreq); } static __forceinline u64 GetTickFrequency() @@ -330,42 +338,47 @@ static __forceinline u64 GetTickFrequency() static __forceinline u64 GetCPUTicks() { LARGE_INTEGER count; - QueryPerformanceCounter( &count ); + QueryPerformanceCounter(&count); return count.QuadPart; } + #endif template + class CInterfacePtr { -public: - inline CInterfacePtr() : ptr(NULL) {} - inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if ( ptr != NULL ) ptr->AddRef(); } - inline ~CInterfacePtr() { if( ptr != NULL ) ptr->Release(); } - inline T* operator* () { assert( ptr != NULL); return *ptr; } - inline T* operator->() { return ptr; } - inline T* get() { return ptr; } + public: + inline CInterfacePtr() : ptr(NULL) {} + inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if (ptr != NULL) ptr->AddRef(); } + inline ~CInterfacePtr() { if (ptr != NULL) ptr->Release(); } + inline T* operator*() { assert(ptr != NULL); return *ptr; } + inline T* operator->() { return ptr; } + inline T* get() { return ptr; } - inline void release() { - if( ptr != NULL ) { ptr->Release(); ptr = NULL; } - } + inline void release() + { + if (ptr != NULL) { ptr->Release(); ptr = NULL; } + } - inline operator T*() { return ptr; } + inline operator T*() { return ptr; } + inline bool operator==(T* rhs) { return ptr == rhs; } + inline bool operator!=(T* rhs) { return ptr != rhs; } - inline bool operator==(T* rhs) { return ptr == rhs; } - inline bool operator!=(T* rhs) { return ptr != rhs; } + inline CInterfacePtr& operator= (T* newptr) + { + if (ptr != NULL) ptr->Release(); - inline CInterfacePtr& operator= (T* newptr) { - if( ptr != NULL ) ptr->Release(); - ptr = newptr; + ptr = newptr; - if( ptr != NULL ) ptr->AddRef(); - return *this; - } + if (ptr != NULL) ptr->AddRef(); -private: - T* ptr; + return *this; + } + + private: + T* ptr; }; @@ -380,24 +393,25 @@ void DVProfClear(); // clears all the profilers class DVProfileFunc { -public: - u32 dwUserData; - DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; } - DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); } - ~DVProfileFunc() { DVProfEnd(dwUserData); } + public: + u32 dwUserData; + DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; } + DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); } + ~DVProfileFunc() { DVProfEnd(dwUserData); } }; #else class DVProfileFunc { -public: - u32 dwUserData; - static __forceinline DVProfileFunc(char* pname) {} - static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { } - ~DVProfileFunc() {} + + public: + u32 dwUserData; + static __forceinline DVProfileFunc(char* pname) {} + static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { } + ~DVProfileFunc() {} }; #endif - + #endif // UTIL_H_INCLUDED diff --git a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp index be9bb3fee3..4c331e0ffd 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCRTC.cpp @@ -584,7 +584,7 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list& l return false; } -// First try to draw frame from targets. It's +// First try to draw frame from targets. inline bool RenderCheckForTargets(tex0Info& texframe, list& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace) { // get the start and end addresses of the buffer @@ -662,9 +662,15 @@ inline bool RenderCheckForTargets(tex0Info& texframe, list& list // The same as the previous, but from memory. // If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia), // this is the function that does it. -inline void RenderCheckForMemory(tex0Info& texframe, list& listTargs, int interlace, int bInterlace) +inline void RenderCheckForMemory(tex0Info& texframe, list& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace) { - + // get the start and end addresses of the buffer + int bpp = RenderGetBpp(texframe.psm); + GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1; + + int start, end; + GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw); + for (list::iterator it = listTargs.begin(); it != listTargs.end(); ++it) { (*it)->Resolve(); @@ -676,34 +682,36 @@ inline void RenderCheckForMemory(tex0Info& texframe, list& listT if ((pmemtarg == NULL) || (bInterlace >= 2)) ZZLog::Error_Log("CRCR Check for memory shader fault."); + //if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil); + SetShaderCaller("RenderCheckForMemory"); SetTexVariablesInt(0, g_bCRTCBilinear ? 2 : 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1); - cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex); - cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory); - if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0); - - // finally render from the memory (note that the stencil buffer will keep previous regions) - Vector v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT); - + Vector v; + // Fixme: Why is this here? // We should probably call RenderSetTargetBitTex instead. if (g_bCRTCBilinear) - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(texframe.tw, texframe.th, -0.5f, -0.5f), "g_fBitBltTex"); + v = RenderSetTargetBitTex(texframe.tw, texframe.th, -0.5f, -0.5f, INTERLACE_COUNT); else - ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th), "g_fBitBltTex"); + v = RenderSetTargetBitTex(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th, INTERLACE_COUNT); + // finally render from the memory (note that the stencil buffer will keep previous regions) + v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT); + v = RenderSetTargetBitTrans(texframe.th); v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]); Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]); + cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex); + cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory); RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]); SETPIXELSHADER(ppsCRTC[bInterlace].prog); - + GL_REPORT_ERRORD(); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } @@ -909,7 +917,6 @@ void ZeroGS::RenderCRTC(int interlace) // start from the last circuit for (int i = !PMODE->SLBG; i >= 0; --i) { - tex0Info& texframe = dispinfo[i]; if (texframe.th <= 1) continue; @@ -928,7 +935,7 @@ void ZeroGS::RenderCRTC(int interlace) // if we could not draw image from target's do it from memory if (!RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace)) - RenderCheckForMemory(texframe, listTargs, interlace, bInterlace); + RenderCheckForMemory(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace); } GL_REPORT_ERRORD(); diff --git a/plugins/zzogl-pg/opengl/ZZoglVB.cpp b/plugins/zzogl-pg/opengl/ZZoglVB.cpp index 08f6250f92..e0ce410b81 100644 --- a/plugins/zzogl-pg/opengl/ZZoglVB.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglVB.cpp @@ -337,7 +337,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp) } // After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made. -inline void ZeroGS::VB::CheckFrame16vs32Convesion() +inline void ZeroGS::VB::CheckFrame16vs32Conversion() { if (prndr->status & CRenderTarget::TS_NeedConvert32) { @@ -393,7 +393,7 @@ void ZeroGS::VB::CheckFrame(int tbp) bChanged = CheckFrameResolveRender(tbp); - CheckFrame16vs32Convesion(); + CheckFrame16vs32Conversion(); } else if (bNeedZCheck) { diff --git a/plugins/zzogl-pg/opengl/rasterfont.h b/plugins/zzogl-pg/opengl/rasterfont.h index 0ffb700c1e..2aeae72f06 100644 --- a/plugins/zzogl-pg/opengl/rasterfont.h +++ b/plugins/zzogl-pg/opengl/rasterfont.h @@ -1,22 +1,24 @@ #ifndef RasterFont_Header #define RasterFont_Header -class RasterFont { -protected: - int fontOffset; +class RasterFont +{ -public: - RasterFont(); - ~RasterFont(void); - static int debug; + protected: + int fontOffset; - // some useful constants - enum {char_width = 10}; - enum {char_height = 15}; + public: + RasterFont(); + ~RasterFont(void); + static int debug; - // and the happy helper functions - void printString(const char *s, double x, double y, double z=0.0); - void printCenteredString(const char *s, double y, int screen_width, double z=0.0); + // some useful constants + enum {char_width = 10}; + enum {char_height = 15}; + + // and the happy helper functions + void printString(const char *s, double x, double y, double z = 0.0); + void printCenteredString(const char *s, double y, int screen_width, double z = 0.0); }; #endif diff --git a/plugins/zzogl-pg/opengl/targets.h b/plugins/zzogl-pg/opengl/targets.h index 6c6ed33d2e..599953272e 100644 --- a/plugins/zzogl-pg/opengl/targets.h +++ b/plugins/zzogl-pg/opengl/targets.h @@ -22,23 +22,27 @@ #define TARGET_VIRTUAL_KEY 0x80000000 #include "PS2Edefs.h" -inline Vector DefaultOneColor( FRAGMENTSHADER ptr ) { - Vector v = Vector ( 1, 1, 1, 1 ); - cgGLSetParameter4fv( ptr.sOneColor, v); +inline Vector DefaultOneColor(FRAGMENTSHADER ptr) +{ + Vector v = Vector(1, 1, 1, 1); + cgGLSetParameter4fv(ptr.sOneColor, v); return v ; } -namespace ZeroGS { +namespace ZeroGS +{ - inline u32 GetFrameKey (int fbp, int fbw, VB& curvb); +inline u32 GetFrameKey(int fbp, int fbw, VB& curvb); - // manages render targets - class CRenderTargetMngr - { +// manages render targets + +class CRenderTargetMngr +{ public: typedef map MAPTARGETS; - enum TargetOptions { + enum TargetOptions + { TO_DepthBuffer = 1, TO_StrictHeight = 2, // height returned has to be the same as requested TO_Virtual = 4 @@ -50,16 +54,17 @@ namespace ZeroGS { static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m); CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight); - inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) { - MAPTARGETS::iterator it = mapTargets.find (GetFrameKey(fbp, fbw, curvb)); + inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) + { + MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb)); -/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end()) - { - printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ; - printf("%x %x\n", fbp, fbw); - for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1) - printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp); - }*/ + /* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end()) + { + printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ; + printf("%x %x\n", fbp, fbw); + for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1) + printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp); + }*/ return it != mapTargets.end() ? it->second : NULL; } @@ -68,8 +73,9 @@ namespace ZeroGS { // resolves all targets within a range __forceinline void Resolve(int start, int end); - __forceinline void ResolveAll() { - for(MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it ) + __forceinline void ResolveAll() + { + for (MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it) it->second->Resolve(); } @@ -77,12 +83,13 @@ namespace ZeroGS { void DestroyIntersecting(CRenderTarget* prndr); // promotes a target from virtual to real - inline CRenderTarget* Promote(u32 key) { - assert( !(key & TARGET_VIRTUAL_KEY) ); + inline CRenderTarget* Promote(u32 key) + { + assert(!(key & TARGET_VIRTUAL_KEY)); // promote to regular targ - CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key|TARGET_VIRTUAL_KEY); - assert( it != mapTargets.end() ); + CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key | TARGET_VIRTUAL_KEY); + assert(it != mapTargets.end()); CRenderTarget* ptarg = it->second; mapTargets.erase(it); @@ -90,31 +97,36 @@ namespace ZeroGS { DestroyIntersecting(ptarg); it = mapTargets.find(key); - if( it != mapTargets.end() ) { + + if (it != mapTargets.end()) + { DestroyTarg(it->second); it->second = ptarg; } else mapTargets[key] = ptarg; - if( g_GameSettings & GAME_RESOLVEPROMOTED ) - ptarg->status = CRenderTarget::TS_Resolved; - else - ptarg->status = CRenderTarget::TS_NeedUpdate; - return ptarg; + if (g_GameSettings & GAME_RESOLVEPROMOTED) + ptarg->status = CRenderTarget::TS_Resolved; + else + ptarg->status = CRenderTarget::TS_NeedUpdate; + + return ptarg; } static void DestroyTarg(CRenderTarget* ptarg); MAPTARGETS mapTargets, mapDummyTargs; - }; +}; + +class CMemoryTargetMngr +{ - class CMemoryTargetMngr - { public: CMemoryTargetMngr() : curstamp(0) {} + CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut - CMemoryTarget* MemoryTarget_SearchExistTarget (int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate); + CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate); CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height); void Destroy(); // destroy all targs @@ -122,26 +134,28 @@ namespace ZeroGS { void ClearRange(int starty, int endy); // set all targets to cleared void DestroyCleared(); // flush all cleared targes void DestroyOldest(); - + list listTargets, listClearedTargets; u32 curstamp; private: list::iterator DestroyTargetIter(list::iterator& it); - }; +}; - class CBitwiseTextureMngr - { +class CBitwiseTextureMngr +{ public: ~CBitwiseTextureMngr() { Destroy(); } void Destroy(); // since GetTex can delete textures to free up mem, it is dangerous if using that texture, so specify at least one other tex to save - __forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) { + __forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) + { map::iterator it = mapTextures.find(bitvalue); - if( it != mapTextures.end() ) - return it->second; + + if (it != mapTextures.end()) return it->second; + return GetTexInt(bitvalue, ptexDoNotDelete); } @@ -149,99 +163,115 @@ namespace ZeroGS { u32 GetTexInt(u32 bitvalue, u32 ptexDoNotDelete); map mapTextures; - }; +}; - // manages - class CRangeManager - { +// manages + +class CRangeManager +{ public: - CRangeManager() { + CRangeManager() + { ranges.reserve(16); } // [start, end) - struct RANGE { + + struct RANGE + { RANGE() {} + inline RANGE(int start, int end) : start(start), end(end) {} + int start, end; }; // works in semi logN void Insert(int start, int end); void RangeSanityCheck(); - inline void Clear() { + inline void Clear() + { ranges.resize(0); } vector ranges; // organized in ascending order, non-intersecting - }; +}; - extern CRenderTargetMngr s_RTs, s_DepthRTs; - extern CBitwiseTextureMngr s_BitwiseTextures; - extern CMemoryTargetMngr g_MemTargs; +extern CRenderTargetMngr s_RTs, s_DepthRTs; +extern CBitwiseTextureMngr s_BitwiseTextures; +extern CMemoryTargetMngr g_MemTargs; - extern u8 s_AAx, s_AAy, s_AAz, s_AAw; +extern u8 s_AAx, s_AAy, s_AAz, s_AAw; - // Real rendered width, depends on AA and AAneg. - inline int RW(int tbw) { - if (s_AAx >= s_AAz) - return (tbw << ( s_AAx - s_AAz )); - else - return (tbw >> ( s_AAz - s_AAx )); - } +// Real rendered width, depends on AA and AAneg. +inline int RW(int tbw) +{ + if (s_AAx >= s_AAz) + return (tbw << (s_AAx - s_AAz)); + else + return (tbw >> (s_AAz - s_AAx)); +} - // Real rendered height, depends on AA and AAneg. - inline int RH(int tbh) { - if (s_AAy >= s_AAw) - return (tbh << ( s_AAy - s_AAw )); - else - return (tbh >> ( s_AAw - s_AAy )); - } +// Real rendered height, depends on AA and AAneg. +inline int RH(int tbh) +{ + if (s_AAy >= s_AAw) + return (tbh << (s_AAy - s_AAw)); + else + return (tbh >> (s_AAw - s_AAy)); +} /* inline void CreateTargetsList(int start, int end, list& listTargs) { s_DepthRTs.GetTargs(start, end, listTargs); s_RTs.GetTargs(start, end, listTargs); }*/ - // This pattern of functions is called 3 times, so I add creating Targets list into one. - inline list CreateTargetsList(int start, int end) { - list listTargs; - s_DepthRTs.GetTargs(start, end, listTargs); - s_RTs.GetTargs(start, end, listTargs); - return listTargs; - } +// This pattern of functions is called 3 times, so I add creating Targets list into one. +inline list CreateTargetsList(int start, int end) +{ + list listTargs; + s_DepthRTs.GetTargs(start, end, listTargs); + s_RTs.GetTargs(start, end, listTargs); + return listTargs; +} - extern Vector g_vdepth; - extern int icurctx; +extern Vector g_vdepth; +extern int icurctx; - extern VERTEXSHADER pvsBitBlt; - extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; - extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; - extern GLuint vboRect; +extern VERTEXSHADER pvsBitBlt; +extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; +extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; +extern GLuint vboRect; // Unworking #define PSMPOSITION 28 // Code width and height of frame into key, that used in targetmanager // This is 3 variants of one function, Key dependant on fbp and fbw. -inline u32 GetFrameKey (const frameInfo& frame) { +inline u32 GetFrameKey(const frameInfo& frame) +{ return (((frame.fbw) << 16) | (frame.fbp)); } -inline u32 GetFrameKey ( CRenderTarget* frame ) { + +inline u32 GetFrameKey(CRenderTarget* frame) +{ return (((frame->fbw) << 16) | (frame->fbp)); } -inline u32 GetFrameKey (int fbp, int fbw, VB& curvb) { +inline u32 GetFrameKey(int fbp, int fbw, VB& curvb) +{ return (((fbw) << 16) | (fbp)); } -inline u16 ShiftHeight (int fbh, int fbp, int fbhCalc) { +inline u16 ShiftHeight(int fbh, int fbp, int fbhCalc) +{ return fbh; } -//FIXME: this code for P4 ad KH1. It should not be such strange! +//FIXME: this code is for P4 and KH1. It should not be so strange! //Dummy targets was deleted from mapTargets, but not erased. -inline u32 GetFrameKeyDummy (const frameInfo& frame) { +inline u32 GetFrameKeyDummy(const frameInfo& frame) +{ // if (frame.fbp > 0x2000 && ZZOgl_fbh_Calc(frame) < 0x400 && ZZOgl_fbh_Calc(frame) != frame.fbh) // printf ("Z %x %x %x %x\n", frame.fbh, frame.fbhCalc, frame.fbp, ZZOgl_fbh_Calc(frame)); // height over 1024 would shrink to 1024, so dummy targets with calculated size more than 0x400 should be @@ -252,7 +282,8 @@ inline u32 GetFrameKeyDummy (const frameInfo& frame) { return (((frame.fbw) << 16) | frame.fbh); } -inline u32 GetFrameKeyDummy ( CRenderTarget* frame ) { +inline u32 GetFrameKeyDummy(CRenderTarget* frame) +{ if (/*frame->fbp > 0x2000 && */ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm) < 0x300) return (((frame->fbw) << 16) | ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm)); else diff --git a/plugins/zzogl-pg/opengl/x86.h b/plugins/zzogl-pg/opengl/x86.h index c3bc5d74b6..3cbbfc88b6 100644 --- a/plugins/zzogl-pg/opengl/x86.h +++ b/plugins/zzogl-pg/opengl/x86.h @@ -106,7 +106,7 @@ extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut); extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut); extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut); -extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters ); +extern void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters); #ifdef ZEROGS_SSE2 diff --git a/plugins/zzogl-pg/opengl/zerogs.h b/plugins/zzogl-pg/opengl/zerogs.h index d159645cec..205ba33c48 100644 --- a/plugins/zzogl-pg/opengl/zerogs.h +++ b/plugins/zzogl-pg/opengl/zerogs.h @@ -43,7 +43,7 @@ #include #include -inline void* wglGetProcAddress(const char* x) +inline void* wglGetProcAddress(const char* x) { return (void*)glXGetProcAddress((const GLubyte*)x); } @@ -143,11 +143,13 @@ using namespace std; extern const char* ShaderCallerName; extern const char* ShaderHandleName; -inline void SetShaderCaller(const char* Name) { +inline void SetShaderCaller(const char* Name) +{ ShaderCallerName = Name; } -inline void SetHandleName(const char* Name) { +inline void SetHandleName(const char* Name) +{ ShaderHandleName = Name; } @@ -160,14 +162,14 @@ extern void ZZcgSetParameter4fv(CGparameter param, const float* v, const char* n g_vsprog = prog; \ } \ } \ - + #define SETPIXELSHADER(prog) { \ if( (prog) != g_psprog ) { \ cgGLBindProgram(prog); \ g_psprog = prog; \ } \ } \ - + #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -180,66 +182,76 @@ const float g_filog32 = 0.999f / (32.0f * logf(2.0f)); //------------------------ Inlines ------------------------- -inline const char *error_name(int err) { - switch (err) { +inline const char *error_name(int err) +{ + switch (err) + { case GL_NO_ERROR: return "GL_NO_ERROR"; + case GL_INVALID_ENUM: return "GL_INVALID_ENUM"; + case GL_INVALID_VALUE: return "GL_INVALID_VALUE"; + case GL_INVALID_OPERATION: return "GL_INVALID_OPERATION"; + case GL_STACK_OVERFLOW: return "GL_STACK_OVERFLOW"; + case GL_STACK_UNDERFLOW: return "GL_STACK_UNDERFLOW"; + case GL_OUT_OF_MEMORY: return "GL_OUT_OF_MEMORY"; + case GL_TABLE_TOO_LARGE: return "GL_TABLE_TOO_LARGE"; + default: return "Unknown GL error"; } } -// inline for extemely ofthen used sequence +// inline for an extemely often used sequence // This is turning off all gl functions. Safe to do updates. -inline void -DisableAllgl () { +inline void DisableAllgl() +{ glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); glDisable(GL_ALPHA_TEST); glDisable(GL_DEPTH_TEST); glDepthMask(0); glDisable(GL_STENCIL_TEST); - glColorMask(1,1,1,1); + glColorMask(1, 1, 1, 1); } // Calculate maximum height for target -inline int -get_maxheight(int fbp, int fbw, int psm) +inline int get_maxheight(int fbp, int fbw, int psm) { int ret; if (fbw == 0) return 0; + if (PSMT_ISHALF(psm)) - ret = (((0x00100000 - 64 * fbp)/fbw ) & ~0x1f) * 2; + ret = (((0x00100000 - 64 * fbp) / fbw) & ~0x1f) * 2; else - ret = (((0x00100000 - 64 * fbp)/fbw ) & ~0x1f); + ret = (((0x00100000 - 64 * fbp) / fbw) & ~0x1f); return ret; } -// Does psm need Alpha test with alpha expansion -inline int -nNeedAlpha(u8 psm) { +// Does psm need Alpha test with alpha expansion? +inline int nNeedAlpha(u8 psm) +{ return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S); } // Get color storage model psm, that is important on flush stage. -inline u8 -GetTexCPSM(const tex0Info& tex) { +inline u8 GetTexCPSM(const tex0Info& tex) +{ if (PSMT_ISCLUT(tex.psm)) return tex.cpsm; else @@ -249,19 +261,20 @@ GetTexCPSM(const tex0Info& tex) { //--------------------- Dummies #ifdef _WIN32 - extern void (__stdcall *zgsBlendEquationSeparateEXT)(GLenum, GLenum); - extern void (__stdcall *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum); +extern void (__stdcall *zgsBlendEquationSeparateEXT)(GLenum, GLenum); +extern void (__stdcall *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum); #else - extern void (APIENTRY *zgsBlendEquationSeparateEXT)(GLenum, GLenum); - extern void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum); +extern void (APIENTRY *zgsBlendEquationSeparateEXT)(GLenum, GLenum); +extern void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum); #endif // ------------------------ Types ------------------------- + struct FRAGMENTSHADER { FRAGMENTSHADER() : prog(0), sMemory(0), sFinal(0), sBitwiseANDX(0), sBitwiseANDY(0), sInterlace(0), sCLUT(0), sOneColor(0), sBitBltZ(0), - fTexAlpha2(0), fTexOffset(0), fTexDims(0), fTexBlock(0), fClampExts(0), fTexWrapMode(0), - fRealTexDims(0), fTestBlack(0), fPageOffset(0), fTexAlpha(0) {} + fTexAlpha2(0), fTexOffset(0), fTexDims(0), fTexBlock(0), fClampExts(0), fTexWrapMode(0), + fRealTexDims(0), fTestBlack(0), fPageOffset(0), fTexAlpha(0) {} CGprogram prog; CGparameter sMemory, sFinal, sBitwiseANDX, sBitwiseANDY, sInterlace, sCLUT; @@ -275,6 +288,7 @@ struct FRAGMENTSHADER { CGparameter p; p = cgGetNamedParameter(prog, name); + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p; } @@ -283,12 +297,14 @@ struct FRAGMENTSHADER CGparameter p; p = cgGetNamedParameter(prog, name); - if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) { cgGLSetTextureParameter(p, texobj); cgGLEnableTextureParameter(p); return true; } + return false; } @@ -297,11 +313,13 @@ struct FRAGMENTSHADER CGparameter p; p = cgGetNamedParameter(prog, name); - if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) { cgConnectParameter(tex, p); return true; } + return false; } @@ -310,12 +328,14 @@ struct FRAGMENTSHADER CGparameter p; p = cgGetNamedParameter(prog, name); - if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) { //cgGLEnableTextureParameter(p); tex = p; return true; } + return false; } @@ -325,11 +345,12 @@ struct FRAGMENTSHADER p = cgGetNamedParameter(prog, name); - if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) + if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) { cgGLSetParameter4fv(p, v); return true; } + return false; } }; @@ -337,9 +358,11 @@ struct FRAGMENTSHADER struct VERTEXSHADER { VERTEXSHADER() : prog(0), sBitBltPos(0), sBitBltTex(0) {} + CGprogram prog; CGparameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants }; + // ------------------------ Variables ------------------------- // all textures have this width //#define GPU_TEXWIDTH 512 @@ -458,7 +481,7 @@ enum GAME_HACK_OPTIONS GAME_32BITTARGS = 0x00200000, GAME_PATH3HACK = 0x00400000, GAME_DOPARALLELCTX = 0x00800000, // tries to parallelize both contexts so that render calls are reduced (xenosaga) - // makes the game faster, but can be buggy + // makes the game faster, but can be buggy GAME_XENOSPECHACK = 0x01000000, // xenosaga specularity hack (ignore any zmask=1 draws) GAME_PARTIALPOINTERS = 0x02000000, // whenver the texture or render target are small, tries to look for bigger ones to read from GAME_PARTIALDEPTH = 0x04000000, // tries to save depth targets as much as possible across height changes @@ -469,191 +492,191 @@ enum GAME_HACK_OPTIONS #define USEALPHATESTING (!(g_GameSettings&GAME_NOALPHATEST)) - // CRC Information - enum Title_Info - { - Unknown_Title, - MetalSlug6, - TomoyoAfter, - Clannad, - Lamune, - KyuuketsuKitanMoonties, - PiaCarroteYoukosoGPGakuenPrincess, - KazokuKeikakuKokoroNoKizuna, - DuelSaviorDestiny, - FFX, - FFX2, - FFXII, - ShadowHearts, - Okami, - MetalGearSolid3, - DBZBT2, - DBZBT3, - SFEX3, - Bully, - BullyCC, - SoTC, - OnePieceGrandAdventure, - OnePieceGrandBattle, - ICO, - GT4, - WildArms5, - Manhunt2, - CrashBandicootWoC, - ResidentEvil4, - Spartan, - AceCombat4, - Drakengard2, - Tekken5, - IkkiTousen, - GodOfWar, - GodOfWar2, - JackieChanAdv, - HarvestMoon, - NamcoXCapcom, - GiTS, - Onimusha3, - MajokkoALaMode2, - TalesOfAbyss, - SonicUnleashed, - SimpsonsGame, - Genji, - StarOcean3, - ValkyrieProfile2, - RadiataStories, - SMTNocturne, - SMTDDS1, - SMTDDS2, - RozenMaidenGebetGarden, - Xenosaga, - Espgaluda, - OkageShadowKing, - ShadowTheHedgehog, - AtelierIris1, - AtelierIris2, - AtelierIris3, - AtelierJudie, - AtelierLilie, - AtelierViorate, - ArTonelico1, - ArTonelico2, - ManaKhemia1, - ManaKhemia2, - DarkCloud1, - DarkCloud2, - GhostInTheShell, - TitleCount, - }; +enum Title_Info +{ + Unknown_Title, + MetalSlug6, + TomoyoAfter, + Clannad, + Lamune, + KyuuketsuKitanMoonties, + PiaCarroteYoukosoGPGakuenPrincess, + KazokuKeikakuKokoroNoKizuna, + DuelSaviorDestiny, + FFX, + FFX2, + FFXII, + ShadowHearts, + Okami, + MetalGearSolid3, + DBZBT2, + DBZBT3, + SFEX3, + Bully, + BullyCC, + SoTC, + OnePieceGrandAdventure, + OnePieceGrandBattle, + ICO, + GT4, + WildArms5, + Manhunt2, + CrashBandicootWoC, + ResidentEvil4, + Spartan, + AceCombat4, + Drakengard2, + Tekken5, + IkkiTousen, + GodOfWar, + GodOfWar2, + JackieChanAdv, + HarvestMoon, + NamcoXCapcom, + GiTS, + Onimusha3, + MajokkoALaMode2, + TalesOfAbyss, + SonicUnleashed, + SimpsonsGame, + Genji, + StarOcean3, + ValkyrieProfile2, + RadiataStories, + SMTNocturne, + SMTDDS1, + SMTDDS2, + RozenMaidenGebetGarden, + Xenosaga, + Espgaluda, + OkageShadowKing, + ShadowTheHedgehog, + AtelierIris1, + AtelierIris2, + AtelierIris3, + AtelierJudie, + AtelierLilie, + AtelierViorate, + ArTonelico1, + ArTonelico2, + ManaKhemia1, + ManaKhemia2, + DarkCloud1, + DarkCloud2, + GhostInTheShell, + TitleCount, +}; - enum Region_Info - { - Unknown_Region, - US, - EU, - JP, - JPUNDUB, - RU, - FR, - DE, - IT, - ES, - ASIA, - RegionCount, - }; +enum Region_Info +{ + Unknown_Region, + US, + EU, + JP, + JPUNDUB, + RU, + FR, + DE, + IT, + ES, + ASIA, + RegionCount, +}; + +struct Game_Info +{ + u32 crc; + Title_Info title; + Region_Info region; + u32 flags; + s32 v_thresh, t_thresh; +}; + +static const Game_Info crc_game_list[] = +{ + {0xA3D63039, Xenosaga, JP, GAME_DOPARALLELCTX, 64, 32}, + {0x0E7807B2, Xenosaga, US, GAME_DOPARALLELCTX, 64, 32}, + {0x7D2FE035, Espgaluda, JP, 0/*GAME_BIGVALIDATE*/, 24, -1}, + {0x21068223, Okami, US, GAME_XENOSPECHACK, -1, -1}, + {0x891f223f, Okami, FR, GAME_XENOSPECHACK, -1, -1}, + {0xC5DEFEA0, Okami, JP, GAME_XENOSPECHACK, -1, -1}, + {0xe0426fc6, OkageShadowKing, Unknown_Region, GAME_XENOSPECHACK, -1, -1}, + + {0xD6385328, GodOfWar, US, GAME_FULL16BITRES, -1, -1}, + {0xFB0E6D72, GodOfWar, EU, GAME_FULL16BITRES, -1, -1}, + {0xEB001875, GodOfWar, EU, GAME_FULL16BITRES, -1, -1}, + {0xA61A4C6D, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + {0xE23D532B, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + {0xDF1AF973, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + {0xD6385328, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + + //{0x2F123FD8, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + //{0x44A8A22A, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + //{0x4340C7C6, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + //{0xF8CD3DF6, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + //{0x0B82BFF7, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, + + {0xF0A6D880, HarvestMoon, US, GAME_NOSTENCIL, -1, -1}, + //{0x304C115C, HarvestMoon, Unknown, GAME_NOSTENCIL, -1, -1}, + {0xFB236A46, SonicUnleashed, US, GAME_FASTUPDATE | GAME_NOALPHAFAIL, -1, -1}, + {0xa5d29941, ShadowTheHedgehog, US, GAME_FASTUPDATE | GAME_NOALPHAFAIL, -1, -1}, + + {0x7acf7e03, AtelierIris1, Unknown_Region, GAME_GUSTHACK, -1, -1}, + {0xF0457CEF, AtelierIris1, Unknown_Region, GAME_GUSTHACK, -1, -1}, + {0xE3981DBB, AtelierIris1, US, GAME_GUSTHACK, -1, -1}, + {0x9AC65D6A, AtelierIris2, US, GAME_GUSTHACK, -1, -1}, + {0x4CCC9212, AtelierIris3, US, GAME_GUSTHACK, -1, -1}, + {0xCA295E61, AtelierIris3, JP, GAME_GUSTHACK, -1, -1}, + //{0x4437F4B1, ArTonelico1, US, GAME_GUSTHACK, -1, -1}, + {0xF95F37EE, ArTonelico2, US, GAME_GUSTHACK, -1, -1}, + {0xF46142D3, ArTonelico2, JPUNDUB, GAME_GUSTHACK, -1, -1}, + {0x77b0236f, ManaKhemia1, US, GAME_GUSTHACK , -1, -1}, + {0x433951e7, ManaKhemia2, US, GAME_GUSTHACK, -1, -1}, + //{0xda11c6d4, AtelierJudie, JP, GAME_GUSTHACK, -1, -1}, + //{0x3e72c085, AtelierLilie, JP, GAME_GUSTHACK, -1, -1}, + //{0x6eac076b, AtelierViorate, JP, GAME_GUSTHACK, -1, -1}, + + {0xbaa8dd8, DarkCloud1, US, GAME_NOTARGETRESOLVE, -1, -1}, + {0xA5C05C78, DarkCloud1, Unknown_Region, GAME_NOTARGETRESOLVE, -1, -1}, + //{0x1DF41F33, DarkCloud2, US, 0, -1, -1}, + {0x95cc86ef, GhostInTheShell, Unknown_Region, GAME_NOALPHAFAIL, -1, -1} + + //{0xC164550A, WildArms5, JPUNDUB, 0, -1, -1}, + //{0xC1640D2C, WildArms5, US, 0, -1, -1}, + //{0x0FCF8FE4, WildArms5, EU, 0, -1, -1}, + //{0x2294D322, WildArms5, JP, 0, -1, -1}, + //{0x565B6170, WildArms5, JP, 0, -1, -1}, + //{0xD7273511, SMTDDS1, US, 0, -1, -1}, // SMT Digital Devil Saga + //{0x1683A6BE, SMTDDS1, EU, 0, -1, -1}, // SMT Digital Devil Saga + //{0x44865CE1, SMTDDS1, JP, 0, -1, -1}, // SMT Digital Devil Saga + //{0xD382C164, SMTDDS2, US, 0, -1, -1}, // SMT Digital Devil Saga 2 + //{0xE47C1A9C, SMTDDS2, JP, 0, -1, -1}, // SMT Digital Devil Saga 2 +}; + +#define GAME_INFO_INDEX (sizeof(crc_game_list)/sizeof(Game_Info)) - struct Game_Info - { - u32 crc; - Title_Info title; - Region_Info region; - u32 flags; - s32 v_thresh, t_thresh; - }; - - // Note; all the options surrounded by /**/ are ones that were getting chosen previously because of missing break statements, and might not be appropriate. - // I'll have to check and see if they work better with or without them. - static const Game_Info crc_game_list[] = - { - {0xA3D63039, Xenosaga, JP, GAME_DOPARALLELCTX, 64, 32}, - {0x0E7807B2, Xenosaga, US, GAME_DOPARALLELCTX, 64, 32}, - {0x7D2FE035, Espgaluda, JP, 0/*GAME_BIGVALIDATE*/, 24, -1}, - {0x21068223, Okami, US, GAME_XENOSPECHACK, -1, -1}, - {0x891f223f, Okami, FR, GAME_XENOSPECHACK, -1, -1}, - {0xC5DEFEA0, Okami, JP, GAME_XENOSPECHACK, -1, -1}, - {0xe0426fc6, OkageShadowKing, Unknown_Region, GAME_XENOSPECHACK, -1, -1}, - - {0xD6385328, GodOfWar, US, GAME_FULL16BITRES, -1, -1}, - {0xFB0E6D72, GodOfWar, EU, GAME_FULL16BITRES, -1, -1}, - {0xEB001875, GodOfWar, EU, GAME_FULL16BITRES, -1, -1}, - {0xA61A4C6D, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - {0xE23D532B, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - {0xDF1AF973, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - {0xD6385328, GodOfWar, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - - //{0x2F123FD8, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - //{0x44A8A22A, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - //{0x4340C7C6, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - //{0xF8CD3DF6, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - //{0x0B82BFF7, GodOfWar2, Unknown_Region, GAME_FULL16BITRES, -1, -1}, - - {0xF0A6D880, HarvestMoon, US, GAME_NOSTENCIL, -1, -1}, - //{0x304C115C, HarvestMoon, Unknown, GAME_NOSTENCIL, -1, -1}, - {0xFB236A46, SonicUnleashed, US, GAME_FASTUPDATE | GAME_NOALPHAFAIL, -1, -1}, - {0xa5d29941, ShadowTheHedgehog, US, GAME_FASTUPDATE | GAME_NOALPHAFAIL, -1, -1}, - - {0x7acf7e03, AtelierIris1, Unknown_Region, GAME_GUSTHACK, -1, -1}, - {0xF0457CEF, AtelierIris1, Unknown_Region, GAME_GUSTHACK, -1, -1}, - {0xE3981DBB, AtelierIris1, US, GAME_GUSTHACK, -1, -1}, - {0x9AC65D6A, AtelierIris2, US, GAME_GUSTHACK, -1, -1}, - {0x4CCC9212, AtelierIris3, US, GAME_GUSTHACK, -1, -1}, - {0xCA295E61, AtelierIris3, JP, GAME_GUSTHACK, -1, -1}, - //{0x4437F4B1, ArTonelico1, US, GAME_GUSTHACK, -1, -1}, - {0xF95F37EE, ArTonelico2, US, GAME_GUSTHACK, -1, -1}, - {0xF46142D3, ArTonelico2, JPUNDUB, GAME_GUSTHACK, -1, -1}, - {0x77b0236f, ManaKhemia1, US, GAME_GUSTHACK , -1, -1}, - {0x433951e7, ManaKhemia2, US, GAME_GUSTHACK, -1, -1}, - //{0xda11c6d4, AtelierJudie, JP, GAME_GUSTHACK, -1, -1}, - //{0x3e72c085, AtelierLilie, JP, GAME_GUSTHACK, -1, -1}, - //{0x6eac076b, AtelierViorate, JP, GAME_GUSTHACK, -1, -1}, - - {0xbaa8dd8, DarkCloud1, US, GAME_NOTARGETRESOLVE, -1, -1}, - {0xA5C05C78, DarkCloud1, Unknown_Region, GAME_NOTARGETRESOLVE, -1, -1}, - //{0x1DF41F33, DarkCloud2, US, 0, -1, -1}, - {0x95cc86ef, GhostInTheShell, Unknown_Region, GAME_NOALPHAFAIL, -1, -1} - - //{0xC164550A, WildArms5, JPUNDUB, 0, -1, -1}, - //{0xC1640D2C, WildArms5, US, 0, -1, -1}, - //{0x0FCF8FE4, WildArms5, EU, 0, -1, -1}, - //{0x2294D322, WildArms5, JP, 0, -1, -1}, - //{0x565B6170, WildArms5, JP, 0, -1, -1}, - //{0xD7273511, SMTDDS1, US, 0, -1, -1}, // SMT Digital Devil Saga - //{0x1683A6BE, SMTDDS1, EU, 0, -1, -1}, // SMT Digital Devil Saga - //{0x44865CE1, SMTDDS1, JP, 0, -1, -1}, // SMT Digital Devil Saga - //{0xD382C164, SMTDDS2, US, 0, -1, -1}, // SMT Digital Devil Saga 2 - //{0xE47C1A9C, SMTDDS2, JP, 0, -1, -1}, // SMT Digital Devil Saga 2 - }; - - #define GAME_INFO_INDEX (sizeof(crc_game_list)/sizeof(Game_Info)) - extern int nBackbufferWidth, nBackbufferHeight; extern u8* g_pbyGSMemory; extern u8* g_pbyGSClut; // the temporary clut buffer extern CGparameter g_vparamPosXY[2], g_fparamFogColor; -namespace ZeroGS { +namespace ZeroGS +{ - typedef void (*DrawFn)(); +typedef void (*DrawFn)(); - enum RenderFormatType - { - RFT_byte8 = 0, // A8R8G8B8 - RFT_float16 = 1, // A32R32B32G32 - }; +enum RenderFormatType +{ + RFT_byte8 = 0, // A8R8G8B8 + RFT_float16 = 1, // A32R32B32G32 +}; + +// managers render-to-texture targets + +class CRenderTarget +{ - // managers render-to-texture targets - class CRenderTarget - { public: CRenderTarget(); virtual ~CRenderTarget(); @@ -666,8 +689,9 @@ namespace ZeroGS { void SetViewport(); // copies/creates the feedback contents - inline void CreateFeedback() { - if( ptexFeedback == 0 || !(status&TS_FeedbackReady) ) + inline void CreateFeedback() + { + if (ptexFeedback == 0 || !(status&TS_FeedbackReady)) _CreateFeedback(); } @@ -678,6 +702,7 @@ namespace ZeroGS { virtual void ConvertTo16(); // converts a psm==0 target, to a psm==2 virtual bool IsDepth() { return false; } + void SetRenderTarget(int targ); void* psys; // system data used for comparison @@ -702,7 +727,8 @@ namespace ZeroGS { // this is optionally used when feedback effects are used (render target is used as a texture when rendering to itself) u32 ptexFeedback; - enum TargetStatus { + enum TargetStatus + { TS_Resolved = 1, TS_NeedUpdate = 2, TS_Virtual = 4, // currently not mapped to memory @@ -712,14 +738,17 @@ namespace ZeroGS { }; inline Vector DefaultBitBltPos() ; inline Vector DefaultBitBltTex() ; + private: void _CreateFeedback(); - inline bool InitialiseDefaultTexture ( u32 *p_ptr, int fbw, int fbh ) ; - }; + inline bool InitialiseDefaultTexture(u32 *p_ptr, int fbw, int fbh) ; +}; + +// manages zbuffers + +class CDepthTarget : public CRenderTarget +{ - // manages zbuffers - class CDepthTarget : public CRenderTarget - { public: CDepthTarget(); virtual ~CDepthTarget(); @@ -738,16 +767,18 @@ namespace ZeroGS { u32 pdepth; // 24 bit, will contain the stencil buffer if possible u32 pstencil; // if not 0, contains the stencil buffer int icount; // internal counter - }; +}; - // manages contiguous chunks of memory (width is always 1024) - class CMemoryTarget - { +// manages contiguous chunks of memory (width is always 1024) + +class CMemoryTarget +{ public: struct TEXTURE { inline TEXTURE() : tex(0), memptr(NULL), ref(0) {} inline ~TEXTURE() { glDeleteTextures(1, &tex); _aligned_free(memptr); } + u32 tex; u8* memptr; // GPU memory used for comparison int ref; @@ -755,9 +786,12 @@ namespace ZeroGS { inline CMemoryTarget() : ptex(NULL), starty(0), height(0), realy(0), realheight(0), usedstamp(0), psm(0), cpsm(0), channels(0), clearminy(0), clearmaxy(0), validatecount(0) {} - inline CMemoryTarget(const CMemoryTarget& r) { + inline CMemoryTarget(const CMemoryTarget& r) + { ptex = r.ptex; - if( ptex != NULL ) ptex->ref++; + + if (ptex != NULL) ptex->ref++; + starty = r.starty; height = r.height; realy = r.realy; @@ -776,14 +810,15 @@ namespace ZeroGS { ~CMemoryTarget() { Destroy(); } - inline void Destroy() { - if( ptex != NULL && ptex->ref > 0 ) { - if( --ptex->ref <= 0 ) - delete ptex; + inline void Destroy() + { + if (ptex != NULL && ptex->ref > 0) + { + if (--ptex->ref <= 0) delete ptex; } ptex = NULL; - } + } // returns true if clut data is synced bool ValidateClut(const tex0Info& tex0); @@ -810,257 +845,268 @@ namespace ZeroGS { int validatecount; // count how many times has been validated, if too many, destroy vector clut; // if nonzero, texture uses CLUT - }; +}; - struct VB +struct VB +{ + VB(); + ~VB(); + + void Destroy(); + + inline bool CheckPrim() { - VB(); - ~VB(); + static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA) - void Destroy(); + if ((PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim]) + return nCount > 0; - inline bool CheckPrim() { - static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA) - if( (PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim] ) - return nCount > 0; - return false; + return false; + } + + void CheckFrame(int tbp); + + // context specific state + Point offset; + Rect2 scissor; + tex0Info tex0; + tex1Info tex1; + miptbpInfo miptbp0; + miptbpInfo miptbp1; + alphaInfo alpha; + fbaInfo fba; + clampInfo clamp; + pixTest test; + u32 ptexClamp[2]; // textures for x and y dir region clamping + +public: + void FlushTexData(); + inline int CheckFrameAddConstraints(int tbp); + inline void CheckScissors(int maxpos); + inline void CheckFrame32bitRes(int maxpos); + inline int FindMinimalMemoryConstrain(int tbp, int maxpos); + inline int FindZbufferMemoryConstrain(int tbp, int maxpos); + inline int FindMinimalHeightConstrain(int maxpos); + + inline int CheckFrameResolveRender(int tbp); + inline void CheckFrame16vs32Conversion(); + inline int CheckFrameResolveDepth(int tbp); + + inline void FlushTexUnchangedClutDontUpdate() ; + inline void FlushTexClutDontUpdate() ; + inline void FlushTexClutting() ; + inline void FlushTexSetNewVars(u32 psm) ; + + // notify VB that nVerts need to be written to pbuf + inline void NotifyWrite(int nVerts) + { + assert(pBufferData != NULL && nCount <= nNumVertices && nVerts > 0); + + if (nCount + nVerts > nNumVertices) + { + // recreate except with a bigger count + VertexGPU* ptemp = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nNumVertices * 2, 256); + memcpy_amd(ptemp, pBufferData, sizeof(VertexGPU) * nCount); + nNumVertices *= 2; + assert(nCount + nVerts <= nNumVertices); + _aligned_free(pBufferData); + pBufferData = ptemp; + } + } + + void Init(int nVerts) + { + if (pBufferData == NULL && nVerts > 0) + { + pBufferData = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nVerts, 256); + nNumVertices = nVerts; } - void CheckFrame(int tbp); + nCount = 0; + } - // context specific state - Point offset; - Rect2 scissor; - tex0Info tex0; - tex1Info tex1; - miptbpInfo miptbp0; - miptbpInfo miptbp1; - alphaInfo alpha; - fbaInfo fba; - clampInfo clamp; - pixTest test; - u32 ptexClamp[2]; // textures for x and y dir region clamping + u8 bNeedFrameCheck; + u8 bNeedZCheck; + u8 bNeedTexCheck; + u8 dummy0; - public: - void FlushTexData(); - inline int CheckFrameAddConstraints(int tbp); - inline void CheckScissors (int maxpos); - inline void CheckFrame32bitRes(int maxpos); - inline int FindMinimalMemoryConstrain(int tbp, int maxpos); - inline int FindZbufferMemoryConstrain(int tbp, int maxpos); - inline int FindMinimalHeightConstrain(int maxpos); - - inline int CheckFrameResolveRender(int tbp); - inline void CheckFrame16vs32Convesion(); - inline int CheckFrameResolveDepth(int tbp); - - inline void FlushTexUnchangedClutDontUpdate() ; - inline void FlushTexClutDontUpdate() ; - inline void FlushTexClutting() ; - inline void FlushTexSetNewVars(u32 psm) ; - - // notify VB that nVerts need to be written to pbuf - inline void NotifyWrite(int nVerts) { - assert( pBufferData != NULL && nCount <= nNumVertices && nVerts > 0 ); - - if( nCount + nVerts > nNumVertices ) { - // recreate except with a bigger count - VertexGPU* ptemp = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU)*nNumVertices*2, 256); - memcpy_amd(ptemp, pBufferData, sizeof(VertexGPU) * nCount); - nNumVertices *= 2; - assert( nCount + nVerts <= nNumVertices ); - _aligned_free(pBufferData); - pBufferData = ptemp; - } - } - - void Init(int nVerts) { - if( pBufferData == NULL && nVerts > 0 ) { - pBufferData = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU)*nVerts, 256); - nNumVertices = nVerts; - } - - nCount = 0; - } - - u8 bNeedFrameCheck; - u8 bNeedZCheck; - u8 bNeedTexCheck; - u8 dummy0; - - union { - struct { - u8 bTexConstsSync; // only pixel shader constants that context owns - u8 bVarsTexSync; // texture info - u8 bVarsSetTarg; - u8 dummy1; - }; - u32 bSyncVars; + union + { + struct + { + u8 bTexConstsSync; // only pixel shader constants that context owns + u8 bVarsTexSync; // texture info + u8 bVarsSetTarg; + u8 dummy1; }; - int ictx; - VertexGPU* pBufferData; // current allocated data - - int nNumVertices; // size of pBufferData in terms of VertexGPU objects - int nCount; - primInfo curprim; // the previous prim the current buffers are set to - - zbufInfo zbuf; - frameInfo gsfb; // the real info set by FRAME cmd - frameInfo frame; - int zprimmask; // zmask for incoming points - - u32 uCurTex0Data[2]; // current tex0 data - u32 uNextTex0Data[2]; // tex0 data that has to be applied if bNeedTexCheck is 1 - - //int nFrameHeights[8]; // frame heights for the past frame changes - int nNextFrameHeight; - - CMemoryTarget* pmemtarg; // the current mem target set - CRenderTarget* prndr; - CDepthTarget* pdepth; + u32 bSyncVars; }; - // Return, if tcc, aem or psm mode told us, than Alpha test should be used - // if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure - // that it's correct, psm -- color mode, - inline bool - IsAlphaTestExpansion(VB& curvb){ - return (curvb.tex0.tcc && gs.texa.aem && nNeedAlpha(GetTexCPSM(curvb.tex0))); - } + int ictx; + VertexGPU* pBufferData; // current allocated data - // visible members - extern DrawFn drawfn[8]; + int nNumVertices; // size of pBufferData in terms of VertexGPU objects + int nCount; + primInfo curprim; // the previous prim the current buffers are set to - // VB variables - extern VB vb[2]; - extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height - extern vector g_vboBuffers; // VBOs for all drawing commands - extern GLuint vboRect; - extern int g_nCurVBOIndex; + zbufInfo zbuf; + frameInfo gsfb; // the real info set by FRAME cmd + frameInfo frame; + int zprimmask; // zmask for incoming points - // Shaders variables - extern Vector g_vdepth; - extern Vector vlogz; - extern VERTEXSHADER pvsBitBlt; - extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; - extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; - bool LoadEffects(); - bool LoadExtraEffects(); - FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); + u32 uCurTex0Data[2]; // current tex0 data + u32 uNextTex0Data[2]; // tex0 data that has to be applied if bNeedTexCheck is 1 - extern RenderFormatType g_RenderFormatType; + //int nFrameHeights[8]; // frame heights for the past frame changes + int nNextFrameHeight; - void AddMessage(const char* pstr, u32 ms = 5000); - void DrawText(const char* pstr, int left, int top, u32 color); - void ChangeWindowSize(int nNewWidth, int nNewHeight); - void SetChangeDeviceSize(int nNewWidth, int nNewHeight); - void ChangeDeviceSize(int nNewWidth, int nNewHeight); - void SetAA(int mode); - void SetNegAA(int mode); - void SetCRC(int crc); + CMemoryTarget* pmemtarg; // the current mem target set + CRenderTarget* prndr; + CDepthTarget* pdepth; +}; - void ReloadEffects(); +// Return, if tcc, aem or psm mode told us, than Alpha test should be used +// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure +// that it's correct, psm -- color mode, +inline bool +IsAlphaTestExpansion(VB& curvb) +{ + return (curvb.tex0.tcc && gs.texa.aem && nNeedAlpha(GetTexCPSM(curvb.tex0))); +} - // Methods // - bool IsGLExt( const char* szTargetExtension ); ///< returns true if the the opengl extension is supported - inline bool Create_Window(int _width, int _height); - bool Create(int width, int height); - void Destroy(BOOL bD3D); +// visible members +extern DrawFn drawfn[8]; - void Restore(); // call to restore device - void Reset(); // call to destroy video resources +// VB variables +extern VB vb[2]; +extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height +extern vector g_vboBuffers; // VBOs for all drawing commands +extern GLuint vboRect; +extern int g_nCurVBOIndex; - void GSStateReset(); - void HandleGLError(); +// Shaders variables +extern Vector g_vdepth; +extern Vector vlogz; +extern VERTEXSHADER pvsBitBlt; +extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne; +extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16; +bool LoadEffects(); +bool LoadExtraEffects(); +FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed); - // called on a primitive switch - void Prim(); +extern RenderFormatType g_RenderFormatType; - void SetTexFlush(); - // flush current vertices, call before setting new registers (the main render method) - void Flush(int context); +void AddMessage(const char* pstr, u32 ms = 5000); +void DrawText(const char* pstr, int left, int top, u32 color); +void ChangeWindowSize(int nNewWidth, int nNewHeight); +void SetChangeDeviceSize(int nNewWidth, int nNewHeight); +void ChangeDeviceSize(int nNewWidth, int nNewHeight); +void SetAA(int mode); +void SetNegAA(int mode); +void SetCRC(int crc); - void ExtWrite(); +void ReloadEffects(); - void SetWriteDepth(); - bool IsWriteDepth(); +// Methods // +bool IsGLExt(const char* szTargetExtension); ///< returns true if the the opengl extension is supported +inline bool Create_Window(int _width, int _height); +bool Create(int width, int height); +void Destroy(BOOL bD3D); - void SetDestAlphaTest(); - bool IsWriteDestAlphaTest(); +void Restore(); // call to restore device +void Reset(); // call to destroy video resources - void SetFogColor(u32 fog); - void SaveTex(tex0Info* ptex, int usevid); - char* NamedSaveTex(tex0Info* ptex, int usevid); +void GSStateReset(); +void HandleGLError(); - // called when trxdir is accessed. If host is involved, transfers memory to temp buffer byTransferBuf. - // Otherwise performs the transfer. TODO: Perhaps divide the transfers into chunks? - void InitTransferHostLocal(); - void TransferHostLocal(const void* pbyMem, u32 nQWordSize); +// called on a primitive switch +void Prim(); - void InitTransferLocalHost(); - void TransferLocalHost(void* pbyMem, u32 nQWordSize); - inline void TerminateLocalHost() {} +void SetTexFlush(); +// flush current vertices, call before setting new registers (the main render method) +void Flush(int context); - void TransferLocalLocal(); +void ExtWrite(); - // switches the render target to the real target, flushes the current render targets and renders the real image - void RenderCRTC(int interlace); - void ResetRenderTarget(int index); +void SetWriteDepth(); +bool IsWriteDepth(); - bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op +void SetDestAlphaTest(); +bool IsWriteDestAlphaTest(); - // call to load CLUT data (depending on CLD) - void texClutWrite(int ctx); - RenderFormatType GetRenderFormat(); - GLenum GetRenderTargetFormat(); +void SetFogColor(u32 fog); +void SaveTex(tex0Info* ptex, int usevid); +char* NamedSaveTex(tex0Info* ptex, int usevid); - int Save(s8* pbydata); - bool Load(s8* pbydata); +// called when trxdir is accessed. If host is involved, transfers memory to temp buffer byTransferBuf. +// Otherwise performs the transfer. TODO: Perhaps divide the transfers into chunks? +void InitTransferHostLocal(); +void TransferHostLocal(const void* pbyMem, u32 nQWordSize); - void SaveSnapshot(const char* filename); - bool SaveRenderTarget(const char* filename, int width, int height, int jpeg); - bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height); - bool SaveJPEG(const char* filename, int width, int height, const void* pdata, int quality); - bool SaveTGA(const char* filename, int width, int height, void* pdata); - void Stop_Avi(); +void InitTransferLocalHost(); +void TransferLocalHost(void* pbyMem, u32 nQWordSize); +inline void TerminateLocalHost() {} - // private methods - void FlushSysMem(const RECT* prc); - void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode); +void TransferLocalLocal(); - // returns the first and last addresses aligned to a page that cover - void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw); +// switches the render target to the real target, flushes the current render targets and renders the real image +void RenderCRTC(int interlace); +void ResetRenderTarget(int index); - // inits the smallest rectangle in ptexMem that covers this region in ptexMem - // returns the offset that needs to be added to the locked rect to get the beginning of the buffer - //void GetMemRect(RECT& rc, int psm, int x, int y, int w, int h, int bp, int bw); +bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op - void SetContextTarget(int context) ; +// call to load CLUT data (depending on CLD) +void texClutWrite(int ctx); +RenderFormatType GetRenderFormat(); +GLenum GetRenderTargetFormat(); - void NeedFactor(int w); - // only sets a limited amount of state (for Update) - void SetTexClamping(int context, FRAGMENTSHADER* pfragment); - void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, ZeroGS::CMemoryTarget* pmemtarg, FRAGMENTSHADER* pfragment, int force); +int Save(s8* pbydata); +bool Load(s8* pbydata); - void ResetAlphaVariables(); +void SaveSnapshot(const char* filename); +bool SaveRenderTarget(const char* filename, int width, int height, int jpeg); +bool SaveTexture(const char* filename, u32 textarget, u32 tex, int width, int height); +bool SaveJPEG(const char* filename, int width, int height, const void* pdata, int quality); +bool SaveTGA(const char* filename, int width, int height, void* pdata); +void Stop_Avi(); - void StartCapture(); - void StopCapture(); - void CaptureFrame(); +// private methods +void FlushSysMem(const RECT* prc); +void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode); - // Perform clutting for flushed texture. Better check if it needs a prior call. - inline void - CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx) { - tex0->cbp = ZZOglGet_cbp_TexBits(Data); - tex0->cpsm = ZZOglGet_cpsm_TexBits(Data); - tex0->csm = ZZOglGet_csm_TexBits(Data); - tex0->csa = ZZOglGet_csa_TexBits(Data); - tex0->cld = ZZOglGet_cld_TexBits(Data); +// returns the first and last addresses aligned to a page that cover +void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw); - ZeroGS::texClutWrite(ictx); - } +// inits the smallest rectangle in ptexMem that covers this region in ptexMem +// returns the offset that needs to be added to the locked rect to get the beginning of the buffer +//void GetMemRect(RECT& rc, int psm, int x, int y, int w, int h, int bp, int bw); + +void SetContextTarget(int context) ; + +void NeedFactor(int w); +// only sets a limited amount of state (for Update) +void SetTexClamping(int context, FRAGMENTSHADER* pfragment); +void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, ZeroGS::CMemoryTarget* pmemtarg, FRAGMENTSHADER* pfragment, int force); + +void ResetAlphaVariables(); + +void StartCapture(); +void StopCapture(); +void CaptureFrame(); + +// Perform clutting for flushed texture. Better check if it needs a prior call. +inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx) +{ + tex0->cbp = ZZOglGet_cbp_TexBits(Data); + tex0->cpsm = ZZOglGet_cpsm_TexBits(Data); + tex0->csm = ZZOglGet_csm_TexBits(Data); + tex0->csa = ZZOglGet_csa_TexBits(Data); + tex0->cld = ZZOglGet_cld_TexBits(Data); + + ZeroGS::texClutWrite(ictx); +} }; // GL prototypes diff --git a/plugins/zzogl-pg/opengl/zerogsmath.h b/plugins/zzogl-pg/opengl/zerogsmath.h index 8cf5392c6d..bd710b9d4f 100644 --- a/plugins/zzogl-pg/opengl/zerogsmath.h +++ b/plugins/zzogl-pg/opengl/zerogsmath.h @@ -17,6 +17,7 @@ template inline T RAD_2_DEG(T radians) { return (radians * (T)57.29577951); } class Transform; + class TransformMatrix; typedef float dReal; @@ -35,63 +36,57 @@ inline dReal* inv4(const dReal* pf, dReal* pfres); // class used for 3 and 4 dim vectors and quaternions // It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used + class Vector { -public: - dReal x, y, z, w; + public: + dReal x, y, z, w; - Vector() : x(0), y(0), z(0), w(0) {} - Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {} - Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {} - Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} - Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; } + Vector() : x(0), y(0), z(0), w(0) {} + Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {} + Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {} + Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {} + Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; } + dReal operator[](int i) const { return (&x)[i]; } + dReal& operator[](int i) { return (&x)[i]; } + + // casting operators + operator dReal*() { return &x; } + operator const dReal*() const { return (const dReal*)&x; } + + // SCALAR FUNCTIONS + inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; } + inline void normalize() { normalize4(&x, &x); } + inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; } + inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; } + inline void SetColor(u32 color) + { + x = (color & 0xff) / 255.0f; + y = ((color >> 8) & 0xff) / 255.0f; + z = ((color >> 16) & 0xff) / 255.0f; + } - dReal operator[](int i) const { return (&x)[i]; } - dReal& operator[](int i) { return (&x)[i]; } - - // casting operators - operator dReal* () { return &x; } - operator const dReal* () const { return (const dReal*)&x; } - - // SCALAR FUNCTIONS - inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; } - inline void normalize() { normalize4(&x, &x); } - - inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; } - inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; } - inline void SetColor(u32 color) - { - x = (color & 0xff) / 255.0f; - y = ((color >> 8) & 0xff) / 255.0f; - z = ((color >> 16) & 0xff) / 255.0f; - } - - // 3 dim cross product, w is not touched - /// this = this x v - inline void Cross(const Vector &v) { cross3(&x, &x, v); } - - /// this = u x v - inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); } - - inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; } - inline Vector operator+(const Vector &r) const { Vector v; v.x = x+r.x; v.y = y+r.y; v.z = z+r.z; v.w = w+r.w; return v; } - inline Vector operator-(const Vector &r) const { Vector v; v.x = x-r.x; v.y = y-r.y; v.z = z-r.z; v.w = w-r.w; return v; } - inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x*x; v.y = r.y*y; v.z = r.z*z; v.w = r.w*w; return v; } - inline Vector operator*(dReal k) const { Vector v; v.x = k*x; v.y = k*y; v.z = k*z; v.w = k*w; return v; } - - inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; } - inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; } - inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; } - - inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; } - inline Vector& operator /= (const dReal _k) { dReal k=1/_k; x *= k; y *= k; z *= k; w *= k; return *this; } - - friend Vector operator* (float f, const Vector& v); - //friend ostream& operator<<(ostream& O, const Vector& v); - //friend istream& operator>>(istream& I, Vector& v); + // 3 dim cross product, w is not touched + /// this = this x v + inline void Cross(const Vector &v) { cross3(&x, &x, v); } + /// this = u x v + inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); } + inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; } + inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; } + inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; } + inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; } + inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; } + inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; } + inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; } + inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; } + inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; } + inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; } + friend Vector operator*(float f, const Vector& v); + //friend ostream& operator<<(ostream& O, const Vector& v); + //friend istream& operator>>(istream& I, Vector& v); }; -inline Vector operator* (float f, const Vector& left) +inline Vector operator*(float f, const Vector& left) { Vector v; v.x = f * left.x; @@ -113,18 +108,22 @@ struct OBB struct TRIANGLE { TRIANGLE() {} + TRIANGLE(const Vector& v1, const Vector& v2, const Vector& v3) : v1(v1), v2(v2), v3(v3) {} + ~TRIANGLE() {} Vector v1, v2, v3; //!< the vertices of the triangle const Vector& operator[](int i) const { return (&v1)[i]; } - Vector& operator[](int i) { return (&v1)[i]; } + + Vector& operator[](int i) { return (&v1)[i]; } /// assumes CCW ordering of vertices - inline Vector ComputeNormal() { + inline Vector ComputeNormal() + { Vector normal; - cross3(normal, v2-v1, v3-v1); + cross3(normal, v2 - v1, v3 - v1); return normal; } }; @@ -172,8 +171,8 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf); inline bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2x, dReal& fv2y); // Simple routines for linear algebra algorithms // -int CubicRoots (double c0, double c1, double c2, double *r0, double *r1, double *r2); -bool QLAlgorithm3 (dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag); +int CubicRoots(double c0, double c1, double c2, double *r0, double *r1, double *r2); +bool QLAlgorithm3(dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag); void EigenSymmetric3(dReal* fCovariance, dReal* eval, dReal* fAxes); @@ -182,7 +181,7 @@ void GetCovarBasisVectors(dReal fCovariance[3][3], Vector* vRight, Vector* vUp, // first root returned is always >= second, roots are defined if the quadratic doesn't have real solutions void QuadraticSolver(dReal* pfQuadratic, dReal* pfRoots); -int insideQuadrilateral(const Vector* p0,const Vector* p1, const Vector* p2,const Vector* p3); +int insideQuadrilateral(const Vector* p0, const Vector* p1, const Vector* p2, const Vector* p3); int insideTriangle(const Vector* p0, const Vector* p1, const Vector* p2); // multiplies a matrix by a scalar @@ -238,30 +237,48 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2 c = pfmat[0] * pfmat[3] - pfmat[1] * pfmat[2]; d = b * b - 4.0f * c + 1e-16f; - if( d < 0 ) return false; - if( d < 1e-16f ) { + if (d < 0) return false; + + if (d < 1e-16f) + { a = -0.5f * b; - peigs[0] = a; peigs[1] = a; - fv1x = pfmat[1]; fv1y = a - pfmat[0]; - c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y); - fv1x *= c; fv1y *= c; - fv2x = -fv1y; fv2y = fv1x; + peigs[0] = a; + peigs[1] = a; + fv1x = pfmat[1]; + fv1y = a - pfmat[0]; + c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y); + fv1x *= c; + fv1y *= c; + fv2x = -fv1y; + fv2y = fv1x; return true; } // two roots d = sqrtf(d); + a = -0.5f * (b + d); peigs[0] = a; - fv1x = pfmat[1]; fv1y = a-pfmat[0]; - c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y); - fv1x *= c; fv1y *= c; + + fv1x = pfmat[1]; + fv1y = a - pfmat[0]; + + c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y); + + fv1x *= c; + fv1y *= c; a += d; peigs[1] = a; - fv2x = pfmat[1]; fv2y = a-pfmat[0]; - c = 1 / sqrtf(fv2x*fv2x + fv2y*fv2y); - fv2x *= c; fv2y *= c; + + fv2x = pfmat[1]; + fv2y = a - pfmat[0]; + + c = 1 / sqrtf(fv2x * fv2x + fv2y * fv2y); + + fv2x *= c; + fv2y *= c; + return true; } @@ -270,62 +287,70 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2 // Functions that are replacable by ipp library funcs template inline T* _mult3(T* pfres, const T* pf1, const T* pf2) { - assert( pf1 != NULL && pf2 != NULL && pfres != NULL ); + assert(pf1 != NULL && pf2 != NULL && pfres != NULL); T* pfres2; - if( pfres == pf1 || pfres == pf2 ) pfres2 = (T*)alloca(9 * sizeof(T)); - else pfres2 = pfres; - pfres2[0*4+0] = pf1[0*4+0]*pf2[0*4+0]+pf1[0*4+1]*pf2[1*4+0]+pf1[0*4+2]*pf2[2*4+0]; - pfres2[0*4+1] = pf1[0*4+0]*pf2[0*4+1]+pf1[0*4+1]*pf2[1*4+1]+pf1[0*4+2]*pf2[2*4+1]; - pfres2[0*4+2] = pf1[0*4+0]*pf2[0*4+2]+pf1[0*4+1]*pf2[1*4+2]+pf1[0*4+2]*pf2[2*4+2]; + if (pfres == pf1 || pfres == pf2) + pfres2 = (T*)alloca(9 * sizeof(T)); + else + pfres2 = pfres; - pfres2[1*4+0] = pf1[1*4+0]*pf2[0*4+0]+pf1[1*4+1]*pf2[1*4+0]+pf1[1*4+2]*pf2[2*4+0]; - pfres2[1*4+1] = pf1[1*4+0]*pf2[0*4+1]+pf1[1*4+1]*pf2[1*4+1]+pf1[1*4+2]*pf2[2*4+1]; - pfres2[1*4+2] = pf1[1*4+0]*pf2[0*4+2]+pf1[1*4+1]*pf2[1*4+2]+pf1[1*4+2]*pf2[2*4+2]; + pfres2[0*4+0] = pf1[0*4+0] * pf2[0*4+0] + pf1[0*4+1] * pf2[1*4+0] + pf1[0*4+2] * pf2[2*4+0]; + pfres2[0*4+1] = pf1[0*4+0] * pf2[0*4+1] + pf1[0*4+1] * pf2[1*4+1] + pf1[0*4+2] * pf2[2*4+1]; + pfres2[0*4+2] = pf1[0*4+0] * pf2[0*4+2] + pf1[0*4+1] * pf2[1*4+2] + pf1[0*4+2] * pf2[2*4+2]; + + pfres2[1*4+0] = pf1[1*4+0] * pf2[0*4+0] + pf1[1*4+1] * pf2[1*4+0] + pf1[1*4+2] * pf2[2*4+0]; + pfres2[1*4+1] = pf1[1*4+0] * pf2[0*4+1] + pf1[1*4+1] * pf2[1*4+1] + pf1[1*4+2] * pf2[2*4+1]; + pfres2[1*4+2] = pf1[1*4+0] * pf2[0*4+2] + pf1[1*4+1] * pf2[1*4+2] + pf1[1*4+2] * pf2[2*4+2]; + + pfres2[2*4+0] = pf1[2*4+0] * pf2[0*4+0] + pf1[2*4+1] * pf2[1*4+0] + pf1[2*4+2] * pf2[2*4+0]; + pfres2[2*4+1] = pf1[2*4+0] * pf2[0*4+1] + pf1[2*4+1] * pf2[1*4+1] + pf1[2*4+2] * pf2[2*4+1]; + pfres2[2*4+2] = pf1[2*4+0] * pf2[0*4+2] + pf1[2*4+1] * pf2[1*4+2] + pf1[2*4+2] * pf2[2*4+2]; - pfres2[2*4+0] = pf1[2*4+0]*pf2[0*4+0]+pf1[2*4+1]*pf2[1*4+0]+pf1[2*4+2]*pf2[2*4+0]; - pfres2[2*4+1] = pf1[2*4+0]*pf2[0*4+1]+pf1[2*4+1]*pf2[1*4+1]+pf1[2*4+2]*pf2[2*4+1]; - pfres2[2*4+2] = pf1[2*4+0]*pf2[0*4+2]+pf1[2*4+1]*pf2[1*4+2]+pf1[2*4+2]*pf2[2*4+2]; - - if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T)); + if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T)); return pfres; } inline dReal* mult3(dReal* pfres, const dReal* pf1, const dReal* pf2) { return _mult3(pfres, pf1, pf2); } + inline double* mult3(double* pfres, const double* pf1, const double* pf2) { return _mult3(pfres, pf1, pf2); } template inline T* _mult4(T* pfres, const T* p1, const T* p2) { - assert( pfres != NULL && p1 != NULL && p2 != NULL ); + assert(pfres != NULL && p1 != NULL && p2 != NULL); T* pfres2; - if( pfres == p1 || pfres == p2 ) pfres2 = (T*)alloca(16 * sizeof(T)); - else pfres2 = pfres; - pfres2[0*4+0] = p1[0*4+0]*p2[0*4+0] + p1[0*4+1]*p2[1*4+0] + p1[0*4+2]*p2[2*4+0] + p1[0*4+3]*p2[3*4+0]; - pfres2[0*4+1] = p1[0*4+0]*p2[0*4+1] + p1[0*4+1]*p2[1*4+1] + p1[0*4+2]*p2[2*4+1] + p1[0*4+3]*p2[3*4+1]; - pfres2[0*4+2] = p1[0*4+0]*p2[0*4+2] + p1[0*4+1]*p2[1*4+2] + p1[0*4+2]*p2[2*4+2] + p1[0*4+3]*p2[3*4+2]; - pfres2[0*4+3] = p1[0*4+0]*p2[0*4+3] + p1[0*4+1]*p2[1*4+3] + p1[0*4+2]*p2[2*4+3] + p1[0*4+3]*p2[3*4+3]; + if (pfres == p1 || pfres == p2) + pfres2 = (T*)alloca(16 * sizeof(T)); + else + pfres2 = pfres; - pfres2[1*4+0] = p1[1*4+0]*p2[0*4+0] + p1[1*4+1]*p2[1*4+0] + p1[1*4+2]*p2[2*4+0] + p1[1*4+3]*p2[3*4+0]; - pfres2[1*4+1] = p1[1*4+0]*p2[0*4+1] + p1[1*4+1]*p2[1*4+1] + p1[1*4+2]*p2[2*4+1] + p1[1*4+3]*p2[3*4+1]; - pfres2[1*4+2] = p1[1*4+0]*p2[0*4+2] + p1[1*4+1]*p2[1*4+2] + p1[1*4+2]*p2[2*4+2] + p1[1*4+3]*p2[3*4+2]; - pfres2[1*4+3] = p1[1*4+0]*p2[0*4+3] + p1[1*4+1]*p2[1*4+3] + p1[1*4+2]*p2[2*4+3] + p1[1*4+3]*p2[3*4+3]; + pfres2[0*4+0] = p1[0*4+0] * p2[0*4+0] + p1[0*4+1] * p2[1*4+0] + p1[0*4+2] * p2[2*4+0] + p1[0*4+3] * p2[3*4+0]; + pfres2[0*4+1] = p1[0*4+0] * p2[0*4+1] + p1[0*4+1] * p2[1*4+1] + p1[0*4+2] * p2[2*4+1] + p1[0*4+3] * p2[3*4+1]; + pfres2[0*4+2] = p1[0*4+0] * p2[0*4+2] + p1[0*4+1] * p2[1*4+2] + p1[0*4+2] * p2[2*4+2] + p1[0*4+3] * p2[3*4+2]; + pfres2[0*4+3] = p1[0*4+0] * p2[0*4+3] + p1[0*4+1] * p2[1*4+3] + p1[0*4+2] * p2[2*4+3] + p1[0*4+3] * p2[3*4+3]; - pfres2[2*4+0] = p1[2*4+0]*p2[0*4+0] + p1[2*4+1]*p2[1*4+0] + p1[2*4+2]*p2[2*4+0] + p1[2*4+3]*p2[3*4+0]; - pfres2[2*4+1] = p1[2*4+0]*p2[0*4+1] + p1[2*4+1]*p2[1*4+1] + p1[2*4+2]*p2[2*4+1] + p1[2*4+3]*p2[3*4+1]; - pfres2[2*4+2] = p1[2*4+0]*p2[0*4+2] + p1[2*4+1]*p2[1*4+2] + p1[2*4+2]*p2[2*4+2] + p1[2*4+3]*p2[3*4+2]; - pfres2[2*4+3] = p1[2*4+0]*p2[0*4+3] + p1[2*4+1]*p2[1*4+3] + p1[2*4+2]*p2[2*4+3] + p1[2*4+3]*p2[3*4+3]; + pfres2[1*4+0] = p1[1*4+0] * p2[0*4+0] + p1[1*4+1] * p2[1*4+0] + p1[1*4+2] * p2[2*4+0] + p1[1*4+3] * p2[3*4+0]; + pfres2[1*4+1] = p1[1*4+0] * p2[0*4+1] + p1[1*4+1] * p2[1*4+1] + p1[1*4+2] * p2[2*4+1] + p1[1*4+3] * p2[3*4+1]; + pfres2[1*4+2] = p1[1*4+0] * p2[0*4+2] + p1[1*4+1] * p2[1*4+2] + p1[1*4+2] * p2[2*4+2] + p1[1*4+3] * p2[3*4+2]; + pfres2[1*4+3] = p1[1*4+0] * p2[0*4+3] + p1[1*4+1] * p2[1*4+3] + p1[1*4+2] * p2[2*4+3] + p1[1*4+3] * p2[3*4+3]; + + pfres2[2*4+0] = p1[2*4+0] * p2[0*4+0] + p1[2*4+1] * p2[1*4+0] + p1[2*4+2] * p2[2*4+0] + p1[2*4+3] * p2[3*4+0]; + pfres2[2*4+1] = p1[2*4+0] * p2[0*4+1] + p1[2*4+1] * p2[1*4+1] + p1[2*4+2] * p2[2*4+1] + p1[2*4+3] * p2[3*4+1]; + pfres2[2*4+2] = p1[2*4+0] * p2[0*4+2] + p1[2*4+1] * p2[1*4+2] + p1[2*4+2] * p2[2*4+2] + p1[2*4+3] * p2[3*4+2]; + pfres2[2*4+3] = p1[2*4+0] * p2[0*4+3] + p1[2*4+1] * p2[1*4+3] + p1[2*4+2] * p2[2*4+3] + p1[2*4+3] * p2[3*4+3]; + + pfres2[3*4+0] = p1[3*4+0] * p2[0*4+0] + p1[3*4+1] * p2[1*4+0] + p1[3*4+2] * p2[2*4+0] + p1[3*4+3] * p2[3*4+0]; + pfres2[3*4+1] = p1[3*4+0] * p2[0*4+1] + p1[3*4+1] * p2[1*4+1] + p1[3*4+2] * p2[2*4+1] + p1[3*4+3] * p2[3*4+1]; + pfres2[3*4+2] = p1[3*4+0] * p2[0*4+2] + p1[3*4+1] * p2[1*4+2] + p1[3*4+2] * p2[2*4+2] + p1[3*4+3] * p2[3*4+2]; + pfres2[3*4+3] = p1[3*4+0] * p2[0*4+3] + p1[3*4+1] * p2[1*4+3] + p1[3*4+2] * p2[2*4+3] + p1[3*4+3] * p2[3*4+3]; - pfres2[3*4+0] = p1[3*4+0]*p2[0*4+0] + p1[3*4+1]*p2[1*4+0] + p1[3*4+2]*p2[2*4+0] + p1[3*4+3]*p2[3*4+0]; - pfres2[3*4+1] = p1[3*4+0]*p2[0*4+1] + p1[3*4+1]*p2[1*4+1] + p1[3*4+2]*p2[2*4+1] + p1[3*4+3]*p2[3*4+1]; - pfres2[3*4+2] = p1[3*4+0]*p2[0*4+2] + p1[3*4+1]*p2[1*4+2] + p1[3*4+2]*p2[2*4+2] + p1[3*4+3]*p2[3*4+2]; - pfres2[3*4+3] = p1[3*4+0]*p2[0*4+3] + p1[3*4+1]*p2[1*4+3] + p1[3*4+2]*p2[2*4+3] + p1[3*4+3]*p2[3*4+3]; + if (pfres != pfres2) memcpy(pfres, pfres2, sizeof(T)*16); - if( pfres != pfres2 ) memcpy(pfres, pfres2, sizeof(T)*16); return pfres; } @@ -336,22 +361,23 @@ template inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2) { T* pfres2; - if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T)); - else pfres2 = pfres; - pfres2[0] = pf1[0]*pf2[0]+pf1[3]*pf2[3]+pf1[6]*pf2[6]; - pfres2[1] = pf1[0]*pf2[1]+pf1[3]*pf2[4]+pf1[6]*pf2[7]; - pfres2[2] = pf1[0]*pf2[2]+pf1[3]*pf2[5]+pf1[6]*pf2[8]; + if (pfres == pf1) + pfres2 = (T*)alloca(9 * sizeof(T)); + else + pfres2 = pfres; - pfres2[3] = pf1[1]*pf2[0]+pf1[4]*pf2[3]+pf1[7]*pf2[6]; - pfres2[4] = pf1[1]*pf2[1]+pf1[4]*pf2[4]+pf1[7]*pf2[7]; - pfres2[5] = pf1[1]*pf2[2]+pf1[4]*pf2[5]+pf1[7]*pf2[8]; + pfres2[0] = pf1[0] * pf2[0] + pf1[3] * pf2[3] + pf1[6] * pf2[6]; + pfres2[1] = pf1[0] * pf2[1] + pf1[3] * pf2[4] + pf1[6] * pf2[7]; + pfres2[2] = pf1[0] * pf2[2] + pf1[3] * pf2[5] + pf1[6] * pf2[8]; + pfres2[3] = pf1[1] * pf2[0] + pf1[4] * pf2[3] + pf1[7] * pf2[6]; + pfres2[4] = pf1[1] * pf2[1] + pf1[4] * pf2[4] + pf1[7] * pf2[7]; + pfres2[5] = pf1[1] * pf2[2] + pf1[4] * pf2[5] + pf1[7] * pf2[8]; + pfres2[6] = pf1[2] * pf2[0] + pf1[5] * pf2[3] + pf1[8] * pf2[6]; + pfres2[7] = pf1[2] * pf2[1] + pf1[5] * pf2[4] + pf1[8] * pf2[7]; + pfres2[8] = pf1[2] * pf2[2] + pf1[5] * pf2[5] + pf1[8] * pf2[8]; - pfres2[6] = pf1[2]*pf2[0]+pf1[5]*pf2[3]+pf1[8]*pf2[6]; - pfres2[7] = pf1[2]*pf2[1]+pf1[5]*pf2[4]+pf1[8]*pf2[7]; - pfres2[8] = pf1[2]*pf2[2]+pf1[5]*pf2[5]+pf1[8]*pf2[8]; - - if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T)); + if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T)); return pfres; } @@ -360,11 +386,16 @@ template inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2) { T* pfres2; - if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T)); - else pfres2 = pfres; - for(int i = 0; i < 4; ++i) { - for(int j = 0; j < 4; ++j) { + if (pfres == pf1) + pfres2 = (T*)alloca(16 * sizeof(T)); + else + pfres2 = pfres; + + for (int i = 0; i < 4; ++i) + { + for (int j = 0; j < 4; ++j) + { pfres[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12]; } } @@ -381,8 +412,11 @@ inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) { template inline T* _inv3(const T* pf, T* pfres, int stride) { T* pfres2; - if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T)); - else pfres2 = pfres; + + if (pfres == pf) + pfres2 = (T*)alloca(3 * stride * sizeof(T)); + else + pfres2 = pfres; // inverse = C^t / det(pf) where C is the matrix of coefficients @@ -390,29 +424,40 @@ template inline T* _inv3(const T* pf, T* pfres, int stride) pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1]; pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2]; pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1]; + pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2]; pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0]; pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2]; + pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0]; pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1]; pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0]; T fdet = pf[0*stride + 2] * pfres2[2*stride + 0] + pf[1*stride + 2] * pfres2[2*stride + 1] + - pf[2*stride + 2] * pfres2[2*stride + 2]; + pf[2*stride + 2] * pfres2[2*stride + 2]; - if( fabs(fdet) < 1e-6 ) return NULL; + if (fabs(fdet) < 1e-6) return NULL; fdet = 1 / fdet; + //if( pfdet != NULL ) *pfdet = fdet; - if( pfres != pf ) { - pfres[0*stride+0] *= fdet; pfres[0*stride+1] *= fdet; pfres[0*stride+2] *= fdet; - pfres[1*stride+0] *= fdet; pfres[1*stride+1] *= fdet; pfres[1*stride+2] *= fdet; - pfres[2*stride+0] *= fdet; pfres[2*stride+1] *= fdet; pfres[2*stride+2] *= fdet; + if (pfres != pf) + { + pfres[0*stride+0] *= fdet; + pfres[0*stride+1] *= fdet; + pfres[0*stride+2] *= fdet; + pfres[1*stride+0] *= fdet; + pfres[1*stride+1] *= fdet; + pfres[1*stride+2] *= fdet; + pfres[2*stride+0] *= fdet; + pfres[2*stride+1] *= fdet; + pfres[2*stride+2] *= fdet; return pfres; } pfres[0*stride+0] = pfres2[0*stride+0] * fdet; + pfres[0*stride+1] = pfres2[0*stride+1] * fdet; pfres[0*stride+2] = pfres2[0*stride+2] * fdet; pfres[1*stride+0] = pfres2[1*stride+0] * fdet; @@ -430,8 +475,11 @@ inline dReal* inv3(const dReal* pf, dReal* pfres, int stride) { return _inv3 inline T* _inv4(const T* pf, T* pfres) { T* pfres2; - if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T)); - else pfres2 = pfres; + + if (pfres == pf) + pfres2 = (T*)alloca(16 * sizeof(T)); + else + pfres2 = pfres; // inverse = C^t / det(pf) where C is the matrix of coefficients @@ -439,7 +487,9 @@ template inline T* _inv4(const T* pf, T* pfres) // determinants of all possibel 2x2 submatrices formed by last two rows T fd0, fd1, fd2; + T f1, f2, f3; + fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0]; fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1]; fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2]; @@ -482,20 +532,24 @@ template inline T* _inv4(const T* pf, T* pfres) pfres2[3*4 + 3] = pf[2*4 + 0] * fd1 - pf[2*4 + 1] * f3 + pf[2*4 + 2] * fd0; T fdet = pf[0*4 + 3] * pfres2[3*4 + 0] + pf[1*4 + 3] * pfres2[3*4 + 1] + - pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3]; + pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3]; - if( fabs(fdet) < 1e-6) return NULL; + if (fabs(fdet) < 1e-6) return NULL; fdet = 1 / fdet; + //if( pfdet != NULL ) *pfdet = fdet; - if( pfres2 == pfres ) { + if (pfres2 == pfres) + { mult(pfres, fdet, 16); return pfres; } int i = 0; - while(i < 16) { + + while (i < 16) + { pfres[i] = pfres2[i] * fdet; ++i; } @@ -507,18 +561,26 @@ inline dReal* inv4(const dReal* pf, dReal* pfres) { return _inv4(pf, pfre template inline T* _transpose3(const T* pf, T* pfres) { - assert( pf != NULL && pfres != NULL ); + assert(pf != NULL && pfres != NULL); - if( pf == pfres ) { + if (pf == pfres) + { rswap(pfres[1], pfres[3]); rswap(pfres[2], pfres[6]); rswap(pfres[5], pfres[7]); return pfres; } - pfres[0] = pf[0]; pfres[1] = pf[3]; pfres[2] = pf[6]; - pfres[3] = pf[1]; pfres[4] = pf[4]; pfres[5] = pf[7]; - pfres[6] = pf[2]; pfres[7] = pf[5]; pfres[8] = pf[8]; + pfres[0] = pf[0]; + + pfres[1] = pf[3]; + pfres[2] = pf[6]; + pfres[3] = pf[1]; + pfres[4] = pf[4]; + pfres[5] = pf[7]; + pfres[6] = pf[2]; + pfres[7] = pf[5]; + pfres[8] = pf[8]; return pfres; } @@ -528,9 +590,10 @@ inline double* transpose3(const double* pf, double* pfres) { return _transpose3( template inline T* _transpose4(const T* pf, T* pfres) { - assert( pf != NULL && pfres != NULL ); + assert(pf != NULL && pfres != NULL); - if( pf == pfres ) { + if (pf == pfres) + { rswap(pfres[1], pfres[4]); rswap(pfres[2], pfres[8]); rswap(pfres[3], pfres[12]); @@ -540,10 +603,23 @@ template inline T* _transpose4(const T* pf, T* pfres) return pfres; } - pfres[0] = pf[0]; pfres[1] = pf[4]; pfres[2] = pf[8]; pfres[3] = pf[12]; - pfres[4] = pf[1]; pfres[5] = pf[5]; pfres[6] = pf[9]; pfres[7] = pf[13]; - pfres[8] = pf[2]; pfres[9] = pf[6]; pfres[10] = pf[10]; pfres[11] = pf[14]; - pfres[12] = pf[3]; pfres[13] = pf[7]; pfres[14] = pf[11]; pfres[15] = pf[15]; + pfres[0] = pf[0]; + + pfres[1] = pf[4]; + pfres[2] = pf[8]; + pfres[3] = pf[12]; + pfres[4] = pf[1]; + pfres[5] = pf[5]; + pfres[6] = pf[9]; + pfres[7] = pf[13]; + pfres[8] = pf[2]; + pfres[9] = pf[6]; + pfres[10] = pf[10]; + pfres[11] = pf[14]; + pfres[12] = pf[3]; + pfres[13] = pf[7]; + pfres[14] = pf[11]; + pfres[15] = pf[15]; return pfres; } @@ -552,37 +628,37 @@ inline double* transpose4(const double* pf, double* pfres) { return _transpose4( inline dReal dot2(const dReal* pf1, const dReal* pf2) { - assert( pf1 != NULL && pf2 != NULL ); + assert(pf1 != NULL && pf2 != NULL); return pf1[0]*pf2[0] + pf1[1]*pf2[1]; } inline dReal dot3(const dReal* pf1, const dReal* pf2) { - assert( pf1 != NULL && pf2 != NULL ); + assert(pf1 != NULL && pf2 != NULL); return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2]; } inline dReal dot4(const dReal* pf1, const dReal* pf2) { - assert( pf1 != NULL && pf2 != NULL ); + assert(pf1 != NULL && pf2 != NULL); return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2] + pf1[3] * pf2[3]; } inline dReal lengthsqr2(const dReal* pf) { - assert( pf != NULL ); + assert(pf != NULL); return pf[0] * pf[0] + pf[1] * pf[1]; } inline dReal lengthsqr3(const dReal* pf) { - assert( pf != NULL ); + assert(pf != NULL); return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2]; } inline dReal lengthsqr4(const dReal* pf) { - assert( pf != NULL ); + assert(pf != NULL); return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3]; } @@ -590,7 +666,7 @@ inline dReal* normalize2(dReal* pfout, const dReal* pf) { assert(pf != NULL); - dReal f = pf[0]*pf[0] + pf[1]*pf[1]; + dReal f = pf[0] * pf[0] + pf[1] * pf[1]; f = 1.0f / sqrtf(f); pfout[0] = pf[0] * f; pfout[1] = pf[1] * f; @@ -602,7 +678,7 @@ inline dReal* normalize3(dReal* pfout, const dReal* pf) { assert(pf != NULL); - dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2]; + dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2]; f = 1.0f / sqrtf(f); pfout[0] = pf[0] * f; @@ -616,7 +692,7 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf) { assert(pf != NULL); - dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2] + pf[3]*pf[3]; + dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3]; f = 1.0f / sqrtf(f); pfout[0] = pf[0] * f; @@ -629,22 +705,25 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf) inline dReal* cross3(dReal* pfout, const dReal* pf1, const dReal* pf2) { - assert( pfout != NULL && pf1 != NULL && pf2 != NULL ); + assert(pfout != NULL && pf1 != NULL && pf2 != NULL); dReal temp[3]; temp[0] = pf1[1] * pf2[2] - pf1[2] * pf2[1]; temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2]; temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0]; - pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2]; + pfout[0] = temp[0]; + pfout[1] = temp[1]; + pfout[2] = temp[2]; return pfout; } template inline void mult(T* pf, T fa, int r) { - assert( pf != NULL ); + assert(pf != NULL); - while(r > 0) { + while (r > 0) + { --r; pf[r] *= fa; } @@ -653,25 +732,32 @@ template inline void mult(T* pf, T fa, int r) template inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd) { - assert( pf1 != NULL && pf2 != NULL && pfres != NULL); + assert(pf1 != NULL && pf2 != NULL && pfres != NULL); int j, k; - if( !badd ) memset(pfres, 0, sizeof(S) * r1 * c2); + if (!badd) memset(pfres, 0, sizeof(S) * r1 * c2); - while(r1 > 0) { + while (r1 > 0) + { --r1; j = 0; - while(j < c2) { + + while (j < c2) + { k = 0; - while(k < c1) { + + while (k < c1) + { pfres[j] += pf1[k] * pf2[k*c2 + j]; ++k; } + ++j; } pf1 += c1; + pfres += c2; } @@ -681,26 +767,32 @@ inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd) template inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd) { - assert( pf1 != NULL && pf2 != NULL && pfres != NULL); + assert(pf1 != NULL && pf2 != NULL && pfres != NULL); int i, j, k; - if( !badd ) memset(pfres, 0, sizeof(S) * c1 * c2); + if (!badd) memset(pfres, 0, sizeof(S) * c1 * c2); i = 0; - while(i < c1) { + while (i < c1) + { j = 0; - while(j < c2) { + while (j < c2) + { k = 0; - while(k < r1) { + + while (k < r1) + { pfres[j] += pf1[k*c1] * pf2[k*c2 + j]; ++k; } + ++j; } pfres += c2; + ++pf1; ++i; @@ -712,25 +804,32 @@ inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd) template inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool badd) { - assert( pf1 != NULL && pf2 != NULL && pfres != NULL); + assert(pf1 != NULL && pf2 != NULL && pfres != NULL); int j, k; - if( !badd ) memset(pfres, 0, sizeof(S) * r1 * r2); + if (!badd) memset(pfres, 0, sizeof(S) * r1 * r2); - while(r1 > 0) { + while (r1 > 0) + { --r1; j = 0; - while(j < r2) { + + while (j < r2) + { k = 0; - while(k < c1) { + + while (k < c1) + { pfres[j] += pf1[k] * pf2[j*c1 + k]; ++k; } + ++j; } pf1 += c1; + pfres += r2; } @@ -739,88 +838,107 @@ inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool b template inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp) { - assert( pf1 != NULL && pf2 != NULL ); + assert(pf1 != NULL && pf2 != NULL); int j, k; bool bdel = false; - if( pftemp == NULL ) { + if (pftemp == NULL) + { pftemp = new T[c]; bdel = true; } - while(r > 0) { + while (r > 0) + { --r; j = 0; - while(j < c) { + + while (j < c) + { pftemp[j] = 0.0; k = 0; - while(k < c) { + + while (k < c) + { pftemp[j] += pf1[k] * pf2[k*c + j]; ++k; } + ++j; } memcpy(pf1, pftemp, c * sizeof(T)); + pf1 += c; } - if( bdel ) delete[] pftemp; + if (bdel) delete[] pftemp; return pf1; } template inline T* multto2(T* pf1, S* pf2, int r2, int c2, S* pftemp) { - assert( pf1 != NULL && pf2 != NULL ); + assert(pf1 != NULL && pf2 != NULL); int i, j, k; bool bdel = false; - if( pftemp == NULL ) { + if (pftemp == NULL) + { pftemp = new S[r2]; bdel = true; } // do columns first j = 0; - while(j < c2) { + + while (j < c2) + { i = 0; - while(i < r2) { + + while (i < r2) + { pftemp[i] = 0.0; k = 0; - while(k < r2) { + + while (k < r2) + { pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j]; ++k; } + ++i; } i = 0; - while(i < r2) { - *(pf2+i*c2+j) = pftemp[i]; + + while (i < r2) + { + *(pf2 + i*c2 + j) = pftemp[i]; ++i; } ++j; } - if( bdel ) delete[] pftemp; + if (bdel) delete[] pftemp; return pf1; } template inline void add(T* pf1, T* pf2, int r) { - assert( pf1 != NULL && pf2 != NULL); + assert(pf1 != NULL && pf2 != NULL); - while(r > 0) { + while (r > 0) + { --r; pf1[r] += pf2[r]; } @@ -828,9 +946,10 @@ template inline void add(T* pf1, T* pf2, int r) template inline void sub(T* pf1, T* pf2, int r) { - assert( pf1 != NULL && pf2 != NULL); + assert(pf1 != NULL && pf2 != NULL); - while(r > 0) { + while (r > 0) + { --r; pf1[r] -= pf2[r]; } @@ -838,10 +957,12 @@ template inline void sub(T* pf1, T* pf2, int r) template inline T normsqr(T* pf1, int r) { - assert( pf1 != NULL ); + assert(pf1 != NULL); T d = 0.0; - while(r > 0) { + + while (r > 0) + { --r; d += pf1[r] * pf1[r]; } @@ -852,7 +973,9 @@ template inline T normsqr(T* pf1, int r) template inline T lengthsqr(T* pf1, T* pf2, int length) { T d = 0; - while(length > 0) { + + while (length > 0) + { --length; d += sqr(pf1[length] - pf2[length]); } @@ -863,7 +986,9 @@ template inline T lengthsqr(T* pf1, T* pf2, int length) template inline T dot(T* pf1, T* pf2, int length) { T d = 0; - while(length > 0) { + + while (length > 0) + { --length; d += pf1[length] * pf2[length]; } @@ -874,7 +999,9 @@ template inline T dot(T* pf1, T* pf2, int length) template inline T sum(T* pf, int length) { T d = 0; - while(length > 0) { + + while (length > 0) + { --length; d += pf[length]; } @@ -886,18 +1013,23 @@ template inline bool inv2(T* pf, T* pfres) { T fdet = pf[0] * pf[3] - pf[1] * pf[2]; - if( fabs(fdet) < 1e-16 ) return false; + if (fabs(fdet) < 1e-16) return false; fdet = 1 / fdet; + //if( pfdet != NULL ) *pfdet = fdet; - if( pfres != pf ) { - pfres[0] = fdet * pf[3]; pfres[1] = -fdet * pf[1]; - pfres[2] = -fdet * pf[2]; pfres[3] = fdet * pf[0]; + if (pfres != pf) + { + pfres[0] = fdet * pf[3]; + pfres[1] = -fdet * pf[1]; + pfres[2] = -fdet * pf[2]; + pfres[3] = fdet * pf[0]; return true; } dReal ftemp = pf[0]; + pfres[0] = pf[3] * fdet; pfres[1] *= -fdet; pfres[2] *= -fdet;