zzogl-pg: Part 2 of the re-formatting; ran AStyle over the headers.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2932 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-05-01 22:54:23 +00:00
parent a6c4df49ea
commit 12ad5308ed
12 changed files with 1453 additions and 1123 deletions

View File

@ -36,6 +36,7 @@ using namespace std;
class GLWindow
{
private:
#ifdef GL_X11_WINDOW
Display *glDisplay;
@ -248,6 +249,7 @@ extern u8* g_pBasePS2Mem;
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
// PS2 vertex
struct VertexGPU
{
// gained from XYZ2, XYZ3, XYZF2, XYZF3,
@ -264,6 +266,7 @@ struct VertexGPU
};
// Almost same with previous, controlled by prim.fst flagf
struct Vertex
{
u16 x, y, f, resv0; // note: xy is 12d3
@ -281,7 +284,8 @@ extern int ppf;
// PSM values
// PSM types == Texture Storage Format
enum PSM_value{
enum PSM_value
{
PSMCT32 = 0, // 000000
PSMCT24 = 1, // 000001
PSMCT16 = 2, // 000010
@ -328,7 +332,8 @@ inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
//----------------------- Data from registers -----------------------
typedef union {
typedef union
{
s64 SD;
u64 UD;
s32 SL[2];
@ -340,7 +345,9 @@ typedef union {
} reg64;
/* general purpose regs structs */
typedef struct {
typedef struct
{
int fbp;
int fbw;
int fbh;
@ -349,7 +356,8 @@ typedef struct {
} frameInfo;
// Create frame structure from known data
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm)
{
frameInfo frame;
frame.fbp = fbp;
frame.fbw = fbw;
@ -359,11 +367,14 @@ inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
return frame;
}
typedef struct {
typedef struct
{
u16 prim;
union {
struct {
union
{
struct
{
u16 iip : 1;
u16 tme : 1;
u16 fge : 1;
@ -380,8 +391,10 @@ typedef struct {
extern primInfo *prim;
typedef union {
struct {
typedef union
{
struct
{
u32 ate : 1;
u32 atst : 3;
u32 aref : 8;
@ -395,13 +408,15 @@ typedef union {
u32 _val;
} pixTest;
typedef struct {
typedef struct
{
int bp;
int bw;
int psm;
} bufInfo;
typedef struct {
typedef struct
{
int tbp0;
int tbw;
int cbp;
@ -432,13 +447,17 @@ union tex_0_info
u64 csa : 5;
u64 cld : 3;
};
u64 _u64;
u32 _u32[2];
u16 _u16[4];
u8 _u8[8];
tex_0_info(u64 data) { _u64 = data; }
tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; }
tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; }
u32 tbw_mult()
{
if (tbw == 0)
@ -446,26 +465,34 @@ union tex_0_info
else
return ((u32)tbw << 6);
}
u32 psm_fix()
{
// printf ("psm %d\n", psm);
if (psm == 9) return 1;
return psm;
}
u32 tw_exp()
{
if (tw > 10) return (1 << 10);
return (1 << tw);
}
u32 th_exp()
{
if (th > 10) return (1 << 10);
return (1 << th);
}
u32 cpsm_fix()
{
return cpsm & 0xe;
}
u32 csa_fix()
{
if (cpsm < 2)
@ -480,7 +507,8 @@ union tex_0_info
#define TEX_HIGHLIGHT 2
#define TEX_HIGHLIGHT2 3
typedef struct {
typedef struct
{
int lcm;
int mxl;
int mmag;
@ -490,7 +518,8 @@ typedef struct {
int k;
} tex1Info;
typedef struct {
typedef struct
{
int wms;
int wmt;
int minu;
@ -499,24 +528,28 @@ typedef struct {
int maxv;
} clampInfo;
typedef struct {
typedef struct
{
int cbw;
int cou;
int cov;
} clutInfo;
typedef struct {
typedef struct
{
int tbp[3];
int tbw[3];
} miptbpInfo;
typedef struct {
typedef struct
{
u16 aem;
u8 ta[2];
float fta[2];
} texaInfo;
typedef struct {
typedef struct
{
int sx;
int sy;
int dx;
@ -524,9 +557,12 @@ typedef struct {
int dir;
} trxposInfo;
typedef struct {
union {
struct {
typedef struct
{
union
{
struct
{
u8 a : 2;
u8 b : 2;
u8 c : 2;
@ -538,17 +574,20 @@ typedef struct {
u8 fix : 8;
} alphaInfo;
typedef struct {
typedef struct
{
u16 zbp; // u16 address / 64
u8 psm;
u8 zmsk;
} zbufInfo;
typedef struct {
typedef struct
{
int fba;
} fbaInfo;
typedef struct {
typedef struct
{
Vertex gsvertex[3];
u32 rgba;
float q;
@ -608,21 +647,26 @@ static __forceinline u32 RGBA16to32(u16 c)
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
// f is a u16
static __forceinline u16 Float16ToBYTE(u16 f) {
static __forceinline u16 Float16ToBYTE(u16 f)
{
//assert( !(f & 0x8000) );
if (f & 0x8000) return 0;
u16 d = ((((f & 0x3ff) | 0x400) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
return d > 255 ? 255 : d;
}
static __forceinline u16 Float16ToALPHA(u16 f) {
static __forceinline u16 Float16ToALPHA(u16 f)
{
//assert( !(f & 0x8000) );
if (f & 0x8000) return 0;
// round up instead of down (crash and burn), too much and charlie breaks
u16 d = (((((f & 0x3ff) | 0x400)) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
d = (d) >> 1;
return d > 255 ? 255 : d;
}
@ -651,11 +695,13 @@ static __forceinline u16 Float16ToALPHA(u16 f) {
inline float Clamp(float fx, float fmin, float fmax)
{
if (fx < fmin) return fmin;
return fx > fmax ? fmax : fx;
}
// PSMT16, 16S have shorter color per pixel, also cluted textures with half storage.
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0)
{
if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1))
return true;
else
@ -686,6 +732,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
{
//return text_0_info(data).tbw_mult();
int result = ZZOglGet_tbw_TexBits(data);
if (result == 0)
return 64;
else
@ -706,7 +753,9 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
//return tex_0_info(data).psm_fix();
int result = ZZOglGet_psm_TexBits(data) ;
// printf ("result %d\n", result);
if (result == 9) result = 1;
return result;
}
@ -723,7 +772,9 @@ static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data)
{
//return tex_0_info(data).tw_exp();
u16 result = ZZOglGet_tw_TexBits(data);
if (result > 10) result = 10;
return (1 << result);
}
@ -741,7 +792,9 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
{
//return tex_0_info(dataLO, dataHI).th_exp();
u16 result = ZZOglGet_th_TexBits(dataLO, dataHI);
if (result > 10) result = 10;
return (1 << result);
}
@ -811,73 +864,75 @@ static __forceinline u8 ZZOglGet_cld_TexBits(u32 data)
//-------------------------- frames
// FrameInfo bits.
// Obtain fbp -- frame Buffer Base Pointer (Word Address/2048) -- from data. Bits 0-15
inline int
ZZOglGet_fbp_FrameBits(u32 data) {
inline int ZZOglGet_fbp_FrameBits(u32 data)
{
return ((data) & 0x1ff);
}
// So we got adress / 64, henceby frame fbp and tex tbp have the same dimension -- "real adress" is x64.
inline int
ZZOglGet_fbp_FrameBitsMult(u32 data) {
// So we got address / 64, henceby frame fbp and tex tbp have the same dimension -- "real address" is x64.
inline int ZZOglGet_fbp_FrameBitsMult(u32 data)
{
return (ZZOglGet_fbp_FrameBits(data) << 5);
}
// Obtain fbw -- width (Texels/64) -- from data. Bits 16-23
inline int
ZZOglGet_fbw_FrameBits(u32 data) {
inline int ZZOglGet_fbw_FrameBits(u32 data)
{
return ((data >> 16) & 0x3f);
}
inline int
ZZOglGet_fbw_FrameBitsMult(u32 data) {
inline int ZZOglGet_fbw_FrameBitsMult(u32 data)
{
return (ZZOglGet_fbw_FrameBits(data) << 6);
}
// Obtain psm -- Pixel Storage Format -- from data. Bits 24-29.
// (data & 0x3f000000) >> 24
inline int
ZZOglGet_psm_FrameBits(u32 data) {
inline int ZZOglGet_psm_FrameBits(u32 data)
{
return ((data >> 24) & 0x3f);
}
// Function for calculating overal height from frame data.
inline int
ZZOgl_fbh_Calc (int fbp, int fbw, int psm) {
inline int ZZOgl_fbh_Calc(int fbp, int fbw, int psm)
{
int fbh = (1024 * 1024 - 64 * fbp) / fbw;
fbh &= ~0x1f;
if (PSMT_ISHALF(psm))
fbh *= 2;
if (fbh > 1024)
fbh = 1024;
if (PSMT_ISHALF(psm)) fbh *= 2;
if (fbh > 1024) fbh = 1024;
return fbh ;
}
inline int
ZZOgl_fbh_Calc (frameInfo frame) {
inline int ZZOgl_fbh_Calc(frameInfo frame)
{
return ZZOgl_fbh_Calc(frame.fbp, frame.fbw, frame.psm);
}
// Calculate fbh from data, It does not set in register
inline int
ZZOglGet_fbh_FrameBitsCalc (u32 data) {
inline int ZZOglGet_fbh_FrameBitsCalc(u32 data)
{
int fbh = 0;
int fbp = ZZOglGet_fbp_FrameBits(data);
int fbw = ZZOglGet_fbw_FrameBits(data);
int psm = ZZOglGet_psm_FrameBits(data);
if (fbw > 0)
fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
if (fbw > 0) fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
return fbh ;
}
// Obtain fbm -- frame mask -- from data. All higher word.
inline u32
ZZOglGet_fbm_FrameBits(u32 data) {
inline u32 ZZOglGet_fbm_FrameBits(u32 data)
{
return (data);
}
// Obtain fbm -- frame mask -- from data. All higher word. Fixed from psm == PCMT24 (without alpha)
inline u32
ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
inline u32 ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI)
{
if (PSMT_BITMODE(ZZOglGet_psm_FrameBits(dataLO)) == 1)
return (dataHI | 0xff000000);
else
@ -885,53 +940,51 @@ ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
}
// obtain colormask RED
inline u32
ZZOglGet_fbmRed_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmRed_FrameBits(u32 data)
{
return (data & 0xff);
}
// obtain colormask Green
inline u32
ZZOglGet_fbmGreen_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmGreen_FrameBits(u32 data)
{
return ((data >> 8) & 0xff);
}
// obtain colormask Blue
inline u32
ZZOglGet_fbmBlue_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmBlue_FrameBits(u32 data)
{
return ((data >> 16) & 0xff);
}
// obtain colormask Alpha
inline u32
ZZOglGet_fbmAlpha_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmAlpha_FrameBits(u32 data)
{
return ((data >> 24) & 0xff);
}
// obtain colormask Alpha
inline u32
ZZOglGet_fbmHighByte(u32 data) {
inline u32 ZZOglGet_fbmHighByte(u32 data)
{
return (!!(data & 0x80000000));
}
//-------------------------- tex0 comparison
// Check if old and new tex0 registers have only clut difference
inline bool
ZZOglAllExceptClutIsSame( u32* oldtex, u32* newtex) {
inline bool ZZOglAllExceptClutIsSame(u32* oldtex, u32* newtex)
{
return ((oldtex[0] == newtex[0]) && ((oldtex[1] & 0x1f) == (newtex[1] & 0x1f)));
}
// Check if the CLUT registers are same, except CLD
inline bool
ZZOglClutMinusCLDunchanged( u32* oldtex, u32* newtex) {
inline bool ZZOglClutMinusCLDunchanged(u32* oldtex, u32* newtex)
{
return ((oldtex[1] & 0x1fffffe0) == (newtex[1] & 0x1fffffe0));
}
// Check if CLUT storage mode is not changed (CSA, CSM and CSPM)
inline bool
ZZOglClutStorageUnchanged( u32* oldtex, u32* newtex) {
inline bool ZZOglClutStorageUnchanged(u32* oldtex, u32* newtex)
{
return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000));
}

View File

@ -39,6 +39,7 @@ union GIFTag
{
u64 ai64[2];
u32 ai32[4];
struct
{
u32 NLOOP : 15;
@ -51,6 +52,7 @@ union GIFTag
u32 NREG : 4;
u64 REGS : 64;
};
void set(u32 *data)
{
for (int i = 0; i <= 3; i++)
@ -58,10 +60,12 @@ union GIFTag
ai32[i] = data[i];
}
}
GIFTag(u32 *data)
{
set(data);
}
GIFTag(){ ai64[0] = 0; ai64[1] = 0; }
};
@ -101,7 +105,6 @@ typedef struct
// Hmm....
nreg = tag.NREG << 2;
if (nreg == 0) nreg = 64;
regs = tag.REGS;
reg = 0;
@ -124,13 +127,12 @@ typedef struct
reg = 0;
nloop--;
if (nloop == 0)
{
return false;
}
if (nloop == 0) return false;
}
return true;
}
#else
void setTag(u32 *data)
{
@ -158,7 +160,6 @@ typedef struct
regs = *(u64 *)(data + 2);
regn = 0;
if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
break;
case GIF_FLG_REGLIST:
@ -167,6 +168,7 @@ typedef struct
break;
}
}
#endif
} pathInfo;

View File

@ -33,7 +33,6 @@ const int BLOCK_TEXHEIGHT = 512;
extern PCSX2_ALIGNED16(u32 tempblock[64]);
typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
@ -53,6 +52,7 @@ enum Psm_Size
// Both of the following structs should probably be local class variables or in a namespace,
// but this works for the moment.
struct TransferData
{
// Signed because Visual C++ is weird.
@ -88,6 +88,7 @@ struct TransferFuncts
};
// rest not visible externally
struct BLOCK
{
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
@ -278,7 +279,9 @@ static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32
{
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
u8 *pix = (u8*) & pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -305,6 +308,7 @@ static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 b
{
u32 addr = getPixelAddress4(x, y, bp, bw);
u8 pix = ((u8*)pmem)[addr/2];
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
}
@ -330,7 +334,9 @@ static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32
{
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z(x, y, bp, bw);
u8 *pix = (u8*) & pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -380,9 +386,11 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
{
u32 addr = getPixelAddress4(x, y, bp, bw);
u8 pix = ((const u8*)pmem)[addr/2];
if (addr & 0x1)
return pix >> 4;
else return pix & 0xf;
else
return pix & 0xf;
}
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
@ -432,7 +440,9 @@ static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u3
{
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
u8 *pix = (u8*) & pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -459,6 +469,7 @@ static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32
{
u32 addr = getPixelAddress4_0(x, y, bw);
u8 pix = ((u8*)pmem)[addr/2];
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
}
@ -484,7 +495,9 @@ static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u
{
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
u8 *pix = (u8*) & pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -534,6 +547,7 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
{
u32 addr = getPixelAddress4_0(x, y, bw);
u8 pix = ((const u8*)pmem)[addr/2];
if (addr & 0x1)
return pix >> 4;
else

View File

@ -15,6 +15,7 @@ template <class T>
static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
{
assert((nSize % widthlimit) == 0 && widthlimit <= 4);
if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
{
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
@ -28,6 +29,7 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
}
}
}
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
@ -65,6 +67,7 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
return NULL;
}
}
return buf;
}
@ -97,6 +100,7 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
else
{
assert(/*(nSize%widthlimit) == 0 &&*/ widthlimit == 8);
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
@ -129,11 +133,14 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
tempX += nSize / 3;
nSize = 0;
}
assert(gs.imageTransfer == -1 || nSize == 0);
return NULL;
}
}
}
return buf;
}
@ -149,6 +156,7 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthli
wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
buf++;
if (widthlimit > 2)
{
wp(pstart, (tempX + 2) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
@ -181,6 +189,7 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthli
return NULL;
}
}
return buf;
}
@ -189,11 +198,18 @@ template <class T>
{
switch (data.psm)
{
case PSM_: return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
case PSM_4_: return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
case PSM_24_: return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
case PSM_:
return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
case PSM_4_:
return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
case PSM_24_:
return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
}
assert(0);
return NULL;
}
@ -206,8 +222,10 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
{
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
}
buf += pitch - fracX;
}
return buf;
}
@ -221,8 +239,10 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
{
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
}
buf += 3 * (pitch - fracX);
}
return buf;
}
@ -237,8 +257,10 @@ static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthli
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);
}
buf += (pitch - fracX) / 2;
}
return buf;
}
@ -247,11 +269,18 @@ template <class T>
{
switch (data.psm)
{
case PSM_: return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_4_: return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_24_: return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_:
return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_4_:
return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_24_:
return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
}
assert(0);
return NULL;
}

View File

@ -82,7 +82,8 @@ static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align)
static __forceinline void pcsx2_aligned_free(void* pmem)
{
if( pmem != NULL ) {
if (pmem != NULL)
{
char* p = (char*)pmem;
free(p - (int)*(u16*)(p - 2));
}
@ -113,20 +114,24 @@ struct RECT
#define min(a,b) (((a) < (b)) ? (a) : (b))
typedef struct {
typedef struct
{
int x, y, w, h;
} Rect;
typedef struct {
typedef struct
{
int x, y;
} Point;
typedef struct {
typedef struct
{
int x0, y0;
int x1, y1;
} Rect2;
typedef struct {
typedef struct
{
int x, y, c;
} PointC;
@ -145,6 +150,7 @@ typedef struct {
#define GSOPTION_LOADED 0x8000
//Configuration values.
typedef struct
{
u8 mrtdepth; // write color in render target
@ -310,10 +316,12 @@ static __forceinline u64 GetTickFrequency()
static __forceinline u64 GetCPUTicks()
{
struct timeval t;
gettimeofday(&t, NULL);
return ((u64)t.tv_sec*GetTickFrequency()) + t.tv_usec;
}
#else
static __aligned16 LARGE_INTEGER lfreq;
@ -333,34 +341,39 @@ static __forceinline u64 GetCPUTicks()
QueryPerformanceCounter(&count);
return count.QuadPart;
}
#endif
template <typename T>
class CInterfacePtr
{
public:
inline CInterfacePtr() : ptr(NULL) {}
inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if (ptr != NULL) ptr->AddRef(); }
inline ~CInterfacePtr() { if (ptr != NULL) ptr->Release(); }
inline T* operator*() { assert(ptr != NULL); return *ptr; }
inline T* operator->() { return ptr; }
inline T* get() { return ptr; }
inline void release() {
inline void release()
{
if (ptr != NULL) { ptr->Release(); ptr = NULL; }
}
inline operator T*() { return ptr; }
inline bool operator==(T* rhs) { return ptr == rhs; }
inline bool operator!=(T* rhs) { return ptr != rhs; }
inline CInterfacePtr& operator= (T* newptr) {
inline CInterfacePtr& operator= (T* newptr)
{
if (ptr != NULL) ptr->Release();
ptr = newptr;
if (ptr != NULL) ptr->AddRef();
return *this;
}
@ -391,6 +404,7 @@ public:
class DVProfileFunc
{
public:
u32 dwUserData;
static __forceinline DVProfileFunc(char* pname) {}

View File

@ -584,7 +584,7 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
return false;
}
// First try to draw frame from targets. It's
// First try to draw frame from targets.
inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
{
// get the start and end addresses of the buffer
@ -662,8 +662,14 @@ inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
// The same as the previous, but from memory.
// If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
// this is the function that does it.
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int interlace, int bInterlace)
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
{
// get the start and end addresses of the buffer
int bpp = RenderGetBpp(texframe.psm);
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
int start, end;
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
{
@ -676,23 +682,23 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
if ((pmemtarg == NULL) || (bInterlace >= 2))
ZZLog::Error_Log("CRCR Check for memory shader fault.");
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
SetShaderCaller("RenderCheckForMemory");
SetTexVariablesInt(0, g_bCRTCBilinear ? 2 : 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
// finally render from the memory (note that the stencil buffer will keep previous regions)
Vector v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
Vector v;
// Fixme: Why is this here?
// We should probably call RenderSetTargetBitTex instead.
if (g_bCRTCBilinear)
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(texframe.tw, texframe.th, -0.5f, -0.5f), "g_fBitBltTex");
v = RenderSetTargetBitTex(texframe.tw, texframe.th, -0.5f, -0.5f, INTERLACE_COUNT);
else
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th), "g_fBitBltTex");
v = RenderSetTargetBitTex(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th, INTERLACE_COUNT);
// finally render from the memory (note that the stencil buffer will keep previous regions)
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
v = RenderSetTargetBitTrans(texframe.th);
@ -700,10 +706,12 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
SETPIXELSHADER(ppsCRTC[bInterlace].prog);
GL_REPORT_ERRORD();
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
@ -909,7 +917,6 @@ void ZeroGS::RenderCRTC(int interlace)
// start from the last circuit
for (int i = !PMODE->SLBG; i >= 0; --i)
{
tex0Info& texframe = dispinfo[i];
if (texframe.th <= 1) continue;
@ -928,7 +935,7 @@ void ZeroGS::RenderCRTC(int interlace)
// if we could not draw image from target's do it from memory
if (!RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace))
RenderCheckForMemory(texframe, listTargs, interlace, bInterlace);
RenderCheckForMemory(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
}
GL_REPORT_ERRORD();

View File

@ -337,7 +337,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
}
// After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made.
inline void ZeroGS::VB::CheckFrame16vs32Convesion()
inline void ZeroGS::VB::CheckFrame16vs32Conversion()
{
if (prndr->status & CRenderTarget::TS_NeedConvert32)
{
@ -393,7 +393,7 @@ void ZeroGS::VB::CheckFrame(int tbp)
bChanged = CheckFrameResolveRender(tbp);
CheckFrame16vs32Convesion();
CheckFrame16vs32Conversion();
}
else if (bNeedZCheck)
{

View File

@ -1,7 +1,9 @@
#ifndef RasterFont_Header
#define RasterFont_Header
class RasterFont {
class RasterFont
{
protected:
int fontOffset;

View File

@ -22,23 +22,27 @@
#define TARGET_VIRTUAL_KEY 0x80000000
#include "PS2Edefs.h"
inline Vector DefaultOneColor( FRAGMENTSHADER ptr ) {
inline Vector DefaultOneColor(FRAGMENTSHADER ptr)
{
Vector v = Vector(1, 1, 1, 1);
cgGLSetParameter4fv(ptr.sOneColor, v);
return v ;
}
namespace ZeroGS {
namespace ZeroGS
{
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb);
// manages render targets
class CRenderTargetMngr
{
public:
typedef map<u32, CRenderTarget*> MAPTARGETS;
enum TargetOptions {
enum TargetOptions
{
TO_DepthBuffer = 1,
TO_StrictHeight = 2, // height returned has to be the same as requested
TO_Virtual = 4
@ -50,7 +54,8 @@ namespace ZeroGS {
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) {
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
{
MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb));
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
@ -68,7 +73,8 @@ namespace ZeroGS {
// resolves all targets within a range
__forceinline void Resolve(int start, int end);
__forceinline void ResolveAll() {
__forceinline void ResolveAll()
{
for (MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it)
it->second->Resolve();
}
@ -77,7 +83,8 @@ namespace ZeroGS {
void DestroyIntersecting(CRenderTarget* prndr);
// promotes a target from virtual to real
inline CRenderTarget* Promote(u32 key) {
inline CRenderTarget* Promote(u32 key)
{
assert(!(key & TARGET_VIRTUAL_KEY));
// promote to regular targ
@ -90,7 +97,9 @@ namespace ZeroGS {
DestroyIntersecting(ptarg);
it = mapTargets.find(key);
if( it != mapTargets.end() ) {
if (it != mapTargets.end())
{
DestroyTarg(it->second);
it->second = ptarg;
}
@ -101,6 +110,7 @@ namespace ZeroGS {
ptarg->status = CRenderTarget::TS_Resolved;
else
ptarg->status = CRenderTarget::TS_NeedUpdate;
return ptarg;
}
@ -111,8 +121,10 @@ namespace ZeroGS {
class CMemoryTargetMngr
{
public:
CMemoryTargetMngr() : curstamp(0) {}
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
@ -138,10 +150,12 @@ namespace ZeroGS {
void Destroy();
// since GetTex can delete textures to free up mem, it is dangerous if using that texture, so specify at least one other tex to save
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) {
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete)
{
map<u32, u32>::iterator it = mapTextures.find(bitvalue);
if( it != mapTextures.end() )
return it->second;
if (it != mapTextures.end()) return it->second;
return GetTexInt(bitvalue, ptexDoNotDelete);
}
@ -152,24 +166,31 @@ namespace ZeroGS {
};
// manages
class CRangeManager
{
public:
CRangeManager() {
CRangeManager()
{
ranges.reserve(16);
}
// [start, end)
struct RANGE {
struct RANGE
{
RANGE() {}
inline RANGE(int start, int end) : start(start), end(end) {}
int start, end;
};
// works in semi logN
void Insert(int start, int end);
void RangeSanityCheck();
inline void Clear() {
inline void Clear()
{
ranges.resize(0);
}
@ -183,7 +204,8 @@ namespace ZeroGS {
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
// Real rendered width, depends on AA and AAneg.
inline int RW(int tbw) {
inline int RW(int tbw)
{
if (s_AAx >= s_AAz)
return (tbw << (s_AAx - s_AAz));
else
@ -191,7 +213,8 @@ namespace ZeroGS {
}
// Real rendered height, depends on AA and AAneg.
inline int RH(int tbh) {
inline int RH(int tbh)
{
if (s_AAy >= s_AAw)
return (tbh << (s_AAy - s_AAw));
else
@ -204,7 +227,8 @@ namespace ZeroGS {
}*/
// This pattern of functions is called 3 times, so I add creating Targets list into one.
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end) {
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
{
list<ZeroGS::CRenderTarget*> listTargs;
s_DepthRTs.GetTargs(start, end, listTargs);
s_RTs.GetTargs(start, end, listTargs);
@ -224,24 +248,30 @@ namespace ZeroGS {
// Code width and height of frame into key, that used in targetmanager
// This is 3 variants of one function, Key dependant on fbp and fbw.
inline u32 GetFrameKey (const frameInfo& frame) {
inline u32 GetFrameKey(const frameInfo& frame)
{
return (((frame.fbw) << 16) | (frame.fbp));
}
inline u32 GetFrameKey ( CRenderTarget* frame ) {
inline u32 GetFrameKey(CRenderTarget* frame)
{
return (((frame->fbw) << 16) | (frame->fbp));
}
inline u32 GetFrameKey (int fbp, int fbw, VB& curvb) {
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb)
{
return (((fbw) << 16) | (fbp));
}
inline u16 ShiftHeight (int fbh, int fbp, int fbhCalc) {
inline u16 ShiftHeight(int fbh, int fbp, int fbhCalc)
{
return fbh;
}
//FIXME: this code for P4 ad KH1. It should not be such strange!
//FIXME: this code is for P4 and KH1. It should not be so strange!
//Dummy targets was deleted from mapTargets, but not erased.
inline u32 GetFrameKeyDummy (const frameInfo& frame) {
inline u32 GetFrameKeyDummy(const frameInfo& frame)
{
// if (frame.fbp > 0x2000 && ZZOgl_fbh_Calc(frame) < 0x400 && ZZOgl_fbh_Calc(frame) != frame.fbh)
// printf ("Z %x %x %x %x\n", frame.fbh, frame.fbhCalc, frame.fbp, ZZOgl_fbh_Calc(frame));
// height over 1024 would shrink to 1024, so dummy targets with calculated size more than 0x400 should be
@ -252,7 +282,8 @@ inline u32 GetFrameKeyDummy (const frameInfo& frame) {
return (((frame.fbw) << 16) | frame.fbh);
}
inline u32 GetFrameKeyDummy ( CRenderTarget* frame ) {
inline u32 GetFrameKeyDummy(CRenderTarget* frame)
{
if (/*frame->fbp > 0x2000 && */ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm) < 0x300)
return (((frame->fbw) << 16) | ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm));
else

View File

@ -143,11 +143,13 @@ using namespace std;
extern const char* ShaderCallerName;
extern const char* ShaderHandleName;
inline void SetShaderCaller(const char* Name) {
inline void SetShaderCaller(const char* Name)
{
ShaderCallerName = Name;
}
inline void SetHandleName(const char* Name) {
inline void SetHandleName(const char* Name)
{
ShaderHandleName = Name;
}
@ -180,33 +182,43 @@ const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
//------------------------ Inlines -------------------------
inline const char *error_name(int err) {
switch (err) {
inline const char *error_name(int err)
{
switch (err)
{
case GL_NO_ERROR:
return "GL_NO_ERROR";
case GL_INVALID_ENUM:
return "GL_INVALID_ENUM";
case GL_INVALID_VALUE:
return "GL_INVALID_VALUE";
case GL_INVALID_OPERATION:
return "GL_INVALID_OPERATION";
case GL_STACK_OVERFLOW:
return "GL_STACK_OVERFLOW";
case GL_STACK_UNDERFLOW:
return "GL_STACK_UNDERFLOW";
case GL_OUT_OF_MEMORY:
return "GL_OUT_OF_MEMORY";
case GL_TABLE_TOO_LARGE:
return "GL_TABLE_TOO_LARGE";
default:
return "Unknown GL error";
}
}
// inline for extemely ofthen used sequence
// inline for an extemely often used sequence
// This is turning off all gl functions. Safe to do updates.
inline void
DisableAllgl () {
inline void DisableAllgl()
{
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDisable(GL_ALPHA_TEST);
@ -217,12 +229,12 @@ DisableAllgl () {
}
// Calculate maximum height for target
inline int
get_maxheight(int fbp, int fbw, int psm)
inline int get_maxheight(int fbp, int fbw, int psm)
{
int ret;
if (fbw == 0) return 0;
if (PSMT_ISHALF(psm))
ret = (((0x00100000 - 64 * fbp) / fbw) & ~0x1f) * 2;
else
@ -231,15 +243,15 @@ get_maxheight(int fbp, int fbw, int psm)
return ret;
}
// Does psm need Alpha test with alpha expansion
inline int
nNeedAlpha(u8 psm) {
// Does psm need Alpha test with alpha expansion?
inline int nNeedAlpha(u8 psm)
{
return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S);
}
// Get color storage model psm, that is important on flush stage.
inline u8
GetTexCPSM(const tex0Info& tex) {
inline u8 GetTexCPSM(const tex0Info& tex)
{
if (PSMT_ISCLUT(tex.psm))
return tex.cpsm;
else
@ -257,6 +269,7 @@ GetTexCPSM(const tex0Info& tex) {
#endif
// ------------------------ Types -------------------------
struct FRAGMENTSHADER
{
FRAGMENTSHADER() : prog(0), sMemory(0), sFinal(0), sBitwiseANDX(0), sBitwiseANDY(0), sInterlace(0), sCLUT(0), sOneColor(0), sBitBltZ(0),
@ -275,6 +288,7 @@ struct FRAGMENTSHADER
{
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p;
}
@ -283,12 +297,14 @@ struct FRAGMENTSHADER
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgGLSetTextureParameter(p, texobj);
cgGLEnableTextureParameter(p);
return true;
}
return false;
}
@ -297,11 +313,13 @@ struct FRAGMENTSHADER
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
cgConnectParameter(tex, p);
return true;
}
return false;
}
@ -310,12 +328,14 @@ struct FRAGMENTSHADER
CGparameter p;
p = cgGetNamedParameter(prog, name);
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
{
//cgGLEnableTextureParameter(p);
tex = p;
return true;
}
return false;
}
@ -330,6 +350,7 @@ struct FRAGMENTSHADER
cgGLSetParameter4fv(p, v);
return true;
}
return false;
}
};
@ -337,9 +358,11 @@ struct FRAGMENTSHADER
struct VERTEXSHADER
{
VERTEXSHADER() : prog(0), sBitBltPos(0), sBitBltTex(0) {}
CGprogram prog;
CGparameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants
};
// ------------------------ Variables -------------------------
// all textures have this width
//#define GPU_TEXWIDTH 512
@ -469,7 +492,6 @@ enum GAME_HACK_OPTIONS
#define USEALPHATESTING (!(g_GameSettings&GAME_NOALPHATEST))
// CRC Information
enum Title_Info
{
@ -571,8 +593,6 @@ enum GAME_HACK_OPTIONS
s32 v_thresh, t_thresh;
};
// Note; all the options surrounded by /**/ are ones that were getting chosen previously because of missing break statements, and might not be appropriate.
// I'll have to check and see if they work better with or without them.
static const Game_Info crc_game_list[] =
{
{0xA3D63039, Xenosaga, JP, GAME_DOPARALLELCTX, 64, 32},
@ -641,7 +661,8 @@ extern u8* g_pbyGSMemory;
extern u8* g_pbyGSClut; // the temporary clut buffer
extern CGparameter g_vparamPosXY[2], g_fparamFogColor;
namespace ZeroGS {
namespace ZeroGS
{
typedef void (*DrawFn)();
@ -652,8 +673,10 @@ namespace ZeroGS {
};
// managers render-to-texture targets
class CRenderTarget
{
public:
CRenderTarget();
virtual ~CRenderTarget();
@ -666,7 +689,8 @@ namespace ZeroGS {
void SetViewport();
// copies/creates the feedback contents
inline void CreateFeedback() {
inline void CreateFeedback()
{
if (ptexFeedback == 0 || !(status&TS_FeedbackReady))
_CreateFeedback();
}
@ -678,6 +702,7 @@ namespace ZeroGS {
virtual void ConvertTo16(); // converts a psm==0 target, to a psm==2
virtual bool IsDepth() { return false; }
void SetRenderTarget(int targ);
void* psys; // system data used for comparison
@ -702,7 +727,8 @@ namespace ZeroGS {
// this is optionally used when feedback effects are used (render target is used as a texture when rendering to itself)
u32 ptexFeedback;
enum TargetStatus {
enum TargetStatus
{
TS_Resolved = 1,
TS_NeedUpdate = 2,
TS_Virtual = 4, // currently not mapped to memory
@ -712,14 +738,17 @@ namespace ZeroGS {
};
inline Vector DefaultBitBltPos() ;
inline Vector DefaultBitBltTex() ;
private:
void _CreateFeedback();
inline bool InitialiseDefaultTexture(u32 *p_ptr, int fbw, int fbh) ;
};
// manages zbuffers
class CDepthTarget : public CRenderTarget
{
public:
CDepthTarget();
virtual ~CDepthTarget();
@ -741,6 +770,7 @@ namespace ZeroGS {
};
// manages contiguous chunks of memory (width is always 1024)
class CMemoryTarget
{
public:
@ -748,6 +778,7 @@ namespace ZeroGS {
{
inline TEXTURE() : tex(0), memptr(NULL), ref(0) {}
inline ~TEXTURE() { glDeleteTextures(1, &tex); _aligned_free(memptr); }
u32 tex;
u8* memptr; // GPU memory used for comparison
int ref;
@ -755,9 +786,12 @@ namespace ZeroGS {
inline CMemoryTarget() : ptex(NULL), starty(0), height(0), realy(0), realheight(0), usedstamp(0), psm(0), cpsm(0), channels(0), clearminy(0), clearmaxy(0), validatecount(0) {}
inline CMemoryTarget(const CMemoryTarget& r) {
inline CMemoryTarget(const CMemoryTarget& r)
{
ptex = r.ptex;
if (ptex != NULL) ptex->ref++;
starty = r.starty;
height = r.height;
realy = r.realy;
@ -776,10 +810,11 @@ namespace ZeroGS {
~CMemoryTarget() { Destroy(); }
inline void Destroy() {
if( ptex != NULL && ptex->ref > 0 ) {
if( --ptex->ref <= 0 )
delete ptex;
inline void Destroy()
{
if (ptex != NULL && ptex->ref > 0)
{
if (--ptex->ref <= 0) delete ptex;
}
ptex = NULL;
@ -820,10 +855,13 @@ namespace ZeroGS {
void Destroy();
inline bool CheckPrim() {
inline bool CheckPrim()
{
static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
if ((PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim])
return nCount > 0;
return false;
}
@ -852,7 +890,7 @@ namespace ZeroGS {
inline int FindMinimalHeightConstrain(int maxpos);
inline int CheckFrameResolveRender(int tbp);
inline void CheckFrame16vs32Convesion();
inline void CheckFrame16vs32Conversion();
inline int CheckFrameResolveDepth(int tbp);
inline void FlushTexUnchangedClutDontUpdate() ;
@ -861,10 +899,12 @@ namespace ZeroGS {
inline void FlushTexSetNewVars(u32 psm) ;
// notify VB that nVerts need to be written to pbuf
inline void NotifyWrite(int nVerts) {
inline void NotifyWrite(int nVerts)
{
assert(pBufferData != NULL && nCount <= nNumVertices && nVerts > 0);
if( nCount + nVerts > nNumVertices ) {
if (nCount + nVerts > nNumVertices)
{
// recreate except with a bigger count
VertexGPU* ptemp = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nNumVertices * 2, 256);
memcpy_amd(ptemp, pBufferData, sizeof(VertexGPU) * nCount);
@ -875,8 +915,10 @@ namespace ZeroGS {
}
}
void Init(int nVerts) {
if( pBufferData == NULL && nVerts > 0 ) {
void Init(int nVerts)
{
if (pBufferData == NULL && nVerts > 0)
{
pBufferData = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nVerts, 256);
nNumVertices = nVerts;
}
@ -889,13 +931,16 @@ namespace ZeroGS {
u8 bNeedTexCheck;
u8 dummy0;
union {
struct {
union
{
struct
{
u8 bTexConstsSync; // only pixel shader constants that context owns
u8 bVarsTexSync; // texture info
u8 bVarsSetTarg;
u8 dummy1;
};
u32 bSyncVars;
};
@ -926,7 +971,8 @@ namespace ZeroGS {
// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure
// that it's correct, psm -- color mode,
inline bool
IsAlphaTestExpansion(VB& curvb){
IsAlphaTestExpansion(VB& curvb)
{
return (curvb.tex0.tcc && gs.texa.aem && nNeedAlpha(GetTexCPSM(curvb.tex0)));
}
@ -1051,8 +1097,8 @@ namespace ZeroGS {
void CaptureFrame();
// Perform clutting for flushed texture. Better check if it needs a prior call.
inline void
CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx) {
inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
{
tex0->cbp = ZZOglGet_cbp_TexBits(Data);
tex0->cpsm = ZZOglGet_cpsm_TexBits(Data);
tex0->csm = ZZOglGet_csm_TexBits(Data);

View File

@ -17,6 +17,7 @@
template <class T> inline T RAD_2_DEG(T radians) { return (radians * (T)57.29577951); }
class Transform;
class TransformMatrix;
typedef float dReal;
@ -35,6 +36,7 @@ inline dReal* inv4(const dReal* pf, dReal* pfres);
// class used for 3 and 4 dim vectors and quaternions
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
class Vector
{
public:
@ -45,7 +47,6 @@ public:
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
dReal operator[](int i) const { return (&x)[i]; }
dReal& operator[](int i) { return (&x)[i]; }
@ -56,7 +57,6 @@ public:
// SCALAR FUNCTIONS
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
inline void normalize() { normalize4(&x, &x); }
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
inline void SetColor(u32 color)
@ -69,23 +69,18 @@ public:
// 3 dim cross product, w is not touched
/// this = this x v
inline void Cross(const Vector &v) { cross3(&x, &x, v); }
/// this = u x v
inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
friend Vector operator*(float f, const Vector& v);
//friend ostream& operator<<(ostream& O, const Vector& v);
//friend istream& operator>>(istream& I, Vector& v);
@ -113,16 +108,20 @@ struct OBB
struct TRIANGLE
{
TRIANGLE() {}
TRIANGLE(const Vector& v1, const Vector& v2, const Vector& v3) : v1(v1), v2(v2), v3(v3) {}
~TRIANGLE() {}
Vector v1, v2, v3; //!< the vertices of the triangle
const Vector& operator[](int i) const { return (&v1)[i]; }
Vector& operator[](int i) { return (&v1)[i]; }
/// assumes CCW ordering of vertices
inline Vector ComputeNormal() {
inline Vector ComputeNormal()
{
Vector normal;
cross3(normal, v2 - v1, v3 - v1);
return normal;
@ -239,29 +238,47 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
d = b * b - 4.0f * c + 1e-16f;
if (d < 0) return false;
if( d < 1e-16f ) {
if (d < 1e-16f)
{
a = -0.5f * b;
peigs[0] = a; peigs[1] = a;
fv1x = pfmat[1]; fv1y = a - pfmat[0];
peigs[0] = a;
peigs[1] = a;
fv1x = pfmat[1];
fv1y = a - pfmat[0];
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
fv1x *= c; fv1y *= c;
fv2x = -fv1y; fv2y = fv1x;
fv1x *= c;
fv1y *= c;
fv2x = -fv1y;
fv2y = fv1x;
return true;
}
// two roots
d = sqrtf(d);
a = -0.5f * (b + d);
peigs[0] = a;
fv1x = pfmat[1]; fv1y = a-pfmat[0];
fv1x = pfmat[1];
fv1y = a - pfmat[0];
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
fv1x *= c; fv1y *= c;
fv1x *= c;
fv1y *= c;
a += d;
peigs[1] = a;
fv2x = pfmat[1]; fv2y = a-pfmat[0];
fv2x = pfmat[1];
fv2y = a - pfmat[0];
c = 1 / sqrtf(fv2x * fv2x + fv2y * fv2y);
fv2x *= c; fv2y *= c;
fv2x *= c;
fv2y *= c;
return true;
}
@ -273,8 +290,11 @@ template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
T* pfres2;
if( pfres == pf1 || pfres == pf2 ) pfres2 = (T*)alloca(9 * sizeof(T));
else pfres2 = pfres;
if (pfres == pf1 || pfres == pf2)
pfres2 = (T*)alloca(9 * sizeof(T));
else
pfres2 = pfres;
pfres2[0*4+0] = pf1[0*4+0] * pf2[0*4+0] + pf1[0*4+1] * pf2[1*4+0] + pf1[0*4+2] * pf2[2*4+0];
pfres2[0*4+1] = pf1[0*4+0] * pf2[0*4+1] + pf1[0*4+1] * pf2[1*4+1] + pf1[0*4+2] * pf2[2*4+1];
@ -294,6 +314,7 @@ template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
}
inline dReal* mult3(dReal* pfres, const dReal* pf1, const dReal* pf2) { return _mult3<dReal>(pfres, pf1, pf2); }
inline double* mult3(double* pfres, const double* pf1, const double* pf2) { return _mult3<double>(pfres, pf1, pf2); }
template <class T>
@ -302,8 +323,11 @@ inline T* _mult4(T* pfres, const T* p1, const T* p2)
assert(pfres != NULL && p1 != NULL && p2 != NULL);
T* pfres2;
if( pfres == p1 || pfres == p2 ) pfres2 = (T*)alloca(16 * sizeof(T));
else pfres2 = pfres;
if (pfres == p1 || pfres == p2)
pfres2 = (T*)alloca(16 * sizeof(T));
else
pfres2 = pfres;
pfres2[0*4+0] = p1[0*4+0] * p2[0*4+0] + p1[0*4+1] * p2[1*4+0] + p1[0*4+2] * p2[2*4+0] + p1[0*4+3] * p2[3*4+0];
pfres2[0*4+1] = p1[0*4+0] * p2[0*4+1] + p1[0*4+1] * p2[1*4+1] + p1[0*4+2] * p2[2*4+1] + p1[0*4+3] * p2[3*4+1];
@ -326,6 +350,7 @@ inline T* _mult4(T* pfres, const T* p1, const T* p2)
pfres2[3*4+3] = p1[3*4+0] * p2[0*4+3] + p1[3*4+1] * p2[1*4+3] + p1[3*4+2] * p2[2*4+3] + p1[3*4+3] * p2[3*4+3];
if (pfres != pfres2) memcpy(pfres, pfres2, sizeof(T)*16);
return pfres;
}
@ -336,17 +361,18 @@ template <class T>
inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2)
{
T* pfres2;
if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T));
else pfres2 = pfres;
if (pfres == pf1)
pfres2 = (T*)alloca(9 * sizeof(T));
else
pfres2 = pfres;
pfres2[0] = pf1[0] * pf2[0] + pf1[3] * pf2[3] + pf1[6] * pf2[6];
pfres2[1] = pf1[0] * pf2[1] + pf1[3] * pf2[4] + pf1[6] * pf2[7];
pfres2[2] = pf1[0] * pf2[2] + pf1[3] * pf2[5] + pf1[6] * pf2[8];
pfres2[3] = pf1[1] * pf2[0] + pf1[4] * pf2[3] + pf1[7] * pf2[6];
pfres2[4] = pf1[1] * pf2[1] + pf1[4] * pf2[4] + pf1[7] * pf2[7];
pfres2[5] = pf1[1] * pf2[2] + pf1[4] * pf2[5] + pf1[7] * pf2[8];
pfres2[6] = pf1[2] * pf2[0] + pf1[5] * pf2[3] + pf1[8] * pf2[6];
pfres2[7] = pf1[2] * pf2[1] + pf1[5] * pf2[4] + pf1[8] * pf2[7];
pfres2[8] = pf1[2] * pf2[2] + pf1[5] * pf2[5] + pf1[8] * pf2[8];
@ -360,11 +386,16 @@ template <class T>
inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2)
{
T* pfres2;
if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T));
else pfres2 = pfres;
for(int i = 0; i < 4; ++i) {
for(int j = 0; j < 4; ++j) {
if (pfres == pf1)
pfres2 = (T*)alloca(16 * sizeof(T));
else
pfres2 = pfres;
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < 4; ++j)
{
pfres[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12];
}
}
@ -381,8 +412,11 @@ inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) {
template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
{
T* pfres2;
if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T));
else pfres2 = pfres;
if (pfres == pf)
pfres2 = (T*)alloca(3 * stride * sizeof(T));
else
pfres2 = pfres;
// inverse = C^t / det(pf) where C is the matrix of coefficients
@ -390,9 +424,11 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1];
pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2];
pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1];
pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2];
pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0];
pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2];
pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0];
pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1];
pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0];
@ -403,16 +439,25 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
if (fabs(fdet) < 1e-6) return NULL;
fdet = 1 / fdet;
//if( pfdet != NULL ) *pfdet = fdet;
if( pfres != pf ) {
pfres[0*stride+0] *= fdet; pfres[0*stride+1] *= fdet; pfres[0*stride+2] *= fdet;
pfres[1*stride+0] *= fdet; pfres[1*stride+1] *= fdet; pfres[1*stride+2] *= fdet;
pfres[2*stride+0] *= fdet; pfres[2*stride+1] *= fdet; pfres[2*stride+2] *= fdet;
if (pfres != pf)
{
pfres[0*stride+0] *= fdet;
pfres[0*stride+1] *= fdet;
pfres[0*stride+2] *= fdet;
pfres[1*stride+0] *= fdet;
pfres[1*stride+1] *= fdet;
pfres[1*stride+2] *= fdet;
pfres[2*stride+0] *= fdet;
pfres[2*stride+1] *= fdet;
pfres[2*stride+2] *= fdet;
return pfres;
}
pfres[0*stride+0] = pfres2[0*stride+0] * fdet;
pfres[0*stride+1] = pfres2[0*stride+1] * fdet;
pfres[0*stride+2] = pfres2[0*stride+2] * fdet;
pfres[1*stride+0] = pfres2[1*stride+0] * fdet;
@ -430,8 +475,11 @@ inline dReal* inv3(const dReal* pf, dReal* pfres, int stride) { return _inv3<dRe
template <class T> inline T* _inv4(const T* pf, T* pfres)
{
T* pfres2;
if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T));
else pfres2 = pfres;
if (pfres == pf)
pfres2 = (T*)alloca(16 * sizeof(T));
else
pfres2 = pfres;
// inverse = C^t / det(pf) where C is the matrix of coefficients
@ -439,7 +487,9 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
// determinants of all possibel 2x2 submatrices formed by last two rows
T fd0, fd1, fd2;
T f1, f2, f3;
fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0];
fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1];
fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2];
@ -487,15 +537,19 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
if (fabs(fdet) < 1e-6) return NULL;
fdet = 1 / fdet;
//if( pfdet != NULL ) *pfdet = fdet;
if( pfres2 == pfres ) {
if (pfres2 == pfres)
{
mult(pfres, fdet, 16);
return pfres;
}
int i = 0;
while(i < 16) {
while (i < 16)
{
pfres[i] = pfres2[i] * fdet;
++i;
}
@ -509,16 +563,24 @@ template <class T> inline T* _transpose3(const T* pf, T* pfres)
{
assert(pf != NULL && pfres != NULL);
if( pf == pfres ) {
if (pf == pfres)
{
rswap(pfres[1], pfres[3]);
rswap(pfres[2], pfres[6]);
rswap(pfres[5], pfres[7]);
return pfres;
}
pfres[0] = pf[0]; pfres[1] = pf[3]; pfres[2] = pf[6];
pfres[3] = pf[1]; pfres[4] = pf[4]; pfres[5] = pf[7];
pfres[6] = pf[2]; pfres[7] = pf[5]; pfres[8] = pf[8];
pfres[0] = pf[0];
pfres[1] = pf[3];
pfres[2] = pf[6];
pfres[3] = pf[1];
pfres[4] = pf[4];
pfres[5] = pf[7];
pfres[6] = pf[2];
pfres[7] = pf[5];
pfres[8] = pf[8];
return pfres;
}
@ -530,7 +592,8 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
{
assert(pf != NULL && pfres != NULL);
if( pf == pfres ) {
if (pf == pfres)
{
rswap(pfres[1], pfres[4]);
rswap(pfres[2], pfres[8]);
rswap(pfres[3], pfres[12]);
@ -540,10 +603,23 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
return pfres;
}
pfres[0] = pf[0]; pfres[1] = pf[4]; pfres[2] = pf[8]; pfres[3] = pf[12];
pfres[4] = pf[1]; pfres[5] = pf[5]; pfres[6] = pf[9]; pfres[7] = pf[13];
pfres[8] = pf[2]; pfres[9] = pf[6]; pfres[10] = pf[10]; pfres[11] = pf[14];
pfres[12] = pf[3]; pfres[13] = pf[7]; pfres[14] = pf[11]; pfres[15] = pf[15];
pfres[0] = pf[0];
pfres[1] = pf[4];
pfres[2] = pf[8];
pfres[3] = pf[12];
pfres[4] = pf[1];
pfres[5] = pf[5];
pfres[6] = pf[9];
pfres[7] = pf[13];
pfres[8] = pf[2];
pfres[9] = pf[6];
pfres[10] = pf[10];
pfres[11] = pf[14];
pfres[12] = pf[3];
pfres[13] = pf[7];
pfres[14] = pf[11];
pfres[15] = pf[15];
return pfres;
}
@ -636,7 +712,9 @@ inline dReal* cross3(dReal* pfout, const dReal* pf1, const dReal* pf2)
temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2];
temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0];
pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2];
pfout[0] = temp[0];
pfout[1] = temp[1];
pfout[2] = temp[2];
return pfout;
}
@ -644,7 +722,8 @@ template <class T> inline void mult(T* pf, T fa, int r)
{
assert(pf != NULL);
while(r > 0) {
while (r > 0)
{
--r;
pf[r] *= fa;
}
@ -658,20 +737,27 @@ inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
if (!badd) memset(pfres, 0, sizeof(S) * r1 * c2);
while(r1 > 0) {
while (r1 > 0)
{
--r1;
j = 0;
while(j < c2) {
while (j < c2)
{
k = 0;
while(k < c1) {
while (k < c1)
{
pfres[j] += pf1[k] * pf2[k*c2 + j];
++k;
}
++j;
}
pf1 += c1;
pfres += c2;
}
@ -687,20 +773,26 @@ inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
if (!badd) memset(pfres, 0, sizeof(S) * c1 * c2);
i = 0;
while(i < c1) {
while (i < c1)
{
j = 0;
while(j < c2) {
while (j < c2)
{
k = 0;
while(k < r1) {
while (k < r1)
{
pfres[j] += pf1[k*c1] * pf2[k*c2 + j];
++k;
}
++j;
}
pfres += c2;
++pf1;
++i;
@ -717,20 +809,27 @@ inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool b
if (!badd) memset(pfres, 0, sizeof(S) * r1 * r2);
while(r1 > 0) {
while (r1 > 0)
{
--r1;
j = 0;
while(j < r2) {
while (j < r2)
{
k = 0;
while(k < c1) {
while (k < c1)
{
pfres[j] += pf1[k] * pf2[j*c1 + k];
++k;
}
++j;
}
pf1 += c1;
pfres += r2;
}
@ -744,28 +843,36 @@ template <class T> inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp)
int j, k;
bool bdel = false;
if( pftemp == NULL ) {
if (pftemp == NULL)
{
pftemp = new T[c];
bdel = true;
}
while(r > 0) {
while (r > 0)
{
--r;
j = 0;
while(j < c) {
while (j < c)
{
pftemp[j] = 0.0;
k = 0;
while(k < c) {
while (k < c)
{
pftemp[j] += pf1[k] * pf2[k*c + j];
++k;
}
++j;
}
memcpy(pf1, pftemp, c * sizeof(T));
pf1 += c;
}
@ -781,29 +888,39 @@ template <class T, class S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S*
int i, j, k;
bool bdel = false;
if( pftemp == NULL ) {
if (pftemp == NULL)
{
pftemp = new S[r2];
bdel = true;
}
// do columns first
j = 0;
while(j < c2) {
while (j < c2)
{
i = 0;
while(i < r2) {
while (i < r2)
{
pftemp[i] = 0.0;
k = 0;
while(k < r2) {
while (k < r2)
{
pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j];
++k;
}
++i;
}
i = 0;
while(i < r2) {
while (i < r2)
{
*(pf2 + i*c2 + j) = pftemp[i];
++i;
}
@ -820,7 +937,8 @@ template <class T> inline void add(T* pf1, T* pf2, int r)
{
assert(pf1 != NULL && pf2 != NULL);
while(r > 0) {
while (r > 0)
{
--r;
pf1[r] += pf2[r];
}
@ -830,7 +948,8 @@ template <class T> inline void sub(T* pf1, T* pf2, int r)
{
assert(pf1 != NULL && pf2 != NULL);
while(r > 0) {
while (r > 0)
{
--r;
pf1[r] -= pf2[r];
}
@ -841,7 +960,9 @@ template <class T> inline T normsqr(T* pf1, int r)
assert(pf1 != NULL);
T d = 0.0;
while(r > 0) {
while (r > 0)
{
--r;
d += pf1[r] * pf1[r];
}
@ -852,7 +973,9 @@ template <class T> inline T normsqr(T* pf1, int r)
template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
{
T d = 0;
while(length > 0) {
while (length > 0)
{
--length;
d += sqr(pf1[length] - pf2[length]);
}
@ -863,7 +986,9 @@ template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
template <class T> inline T dot(T* pf1, T* pf2, int length)
{
T d = 0;
while(length > 0) {
while (length > 0)
{
--length;
d += pf1[length] * pf2[length];
}
@ -874,7 +999,9 @@ template <class T> inline T dot(T* pf1, T* pf2, int length)
template <class T> inline T sum(T* pf, int length)
{
T d = 0;
while(length > 0) {
while (length > 0)
{
--length;
d += pf[length];
}
@ -889,15 +1016,20 @@ template <class T> inline bool inv2(T* pf, T* pfres)
if (fabs(fdet) < 1e-16) return false;
fdet = 1 / fdet;
//if( pfdet != NULL ) *pfdet = fdet;
if( pfres != pf ) {
pfres[0] = fdet * pf[3]; pfres[1] = -fdet * pf[1];
pfres[2] = -fdet * pf[2]; pfres[3] = fdet * pf[0];
if (pfres != pf)
{
pfres[0] = fdet * pf[3];
pfres[1] = -fdet * pf[1];
pfres[2] = -fdet * pf[2];
pfres[3] = fdet * pf[0];
return true;
}
dReal ftemp = pf[0];
pfres[0] = pf[3] * fdet;
pfres[1] *= -fdet;
pfres[2] *= -fdet;