zzogl-pg: Part 2 of the re-formatting; ran AStyle over the headers.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2932 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2010-05-01 22:54:23 +00:00
parent a6c4df49ea
commit 12ad5308ed
12 changed files with 1453 additions and 1123 deletions

View File

@ -36,6 +36,7 @@ using namespace std;
class GLWindow
{
private:
#ifdef GL_X11_WINDOW
Display *glDisplay;
@ -248,6 +249,7 @@ extern u8* g_pBasePS2Mem;
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
// PS2 vertex
struct VertexGPU
{
// gained from XYZ2, XYZ3, XYZF2, XYZF3,
@ -264,6 +266,7 @@ struct VertexGPU
};
// Almost same with previous, controlled by prim.fst flagf
struct Vertex
{
u16 x, y, f, resv0; // note: xy is 12d3
@ -281,7 +284,8 @@ extern int ppf;
// PSM values
// PSM types == Texture Storage Format
enum PSM_value{
enum PSM_value
{
PSMCT32 = 0, // 000000
PSMCT24 = 1, // 000001
PSMCT16 = 2, // 000010
@ -328,7 +332,8 @@ inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
//----------------------- Data from registers -----------------------
typedef union {
typedef union
{
s64 SD;
u64 UD;
s32 SL[2];
@ -340,7 +345,9 @@ typedef union {
} reg64;
/* general purpose regs structs */
typedef struct {
typedef struct
{
int fbp;
int fbw;
int fbh;
@ -349,7 +356,8 @@ typedef struct {
} frameInfo;
// Create frame structure from known data
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm)
{
frameInfo frame;
frame.fbp = fbp;
frame.fbw = fbw;
@ -359,11 +367,14 @@ inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
return frame;
}
typedef struct {
typedef struct
{
u16 prim;
union {
struct {
union
{
struct
{
u16 iip : 1;
u16 tme : 1;
u16 fge : 1;
@ -380,8 +391,10 @@ typedef struct {
extern primInfo *prim;
typedef union {
struct {
typedef union
{
struct
{
u32 ate : 1;
u32 atst : 3;
u32 aref : 8;
@ -395,13 +408,15 @@ typedef union {
u32 _val;
} pixTest;
typedef struct {
typedef struct
{
int bp;
int bw;
int psm;
} bufInfo;
typedef struct {
typedef struct
{
int tbp0;
int tbw;
int cbp;
@ -432,13 +447,17 @@ union tex_0_info
u64 csa : 5;
u64 cld : 3;
};
u64 _u64;
u32 _u32[2];
u16 _u16[4];
u8 _u8[8];
tex_0_info(u64 data) { _u64 = data; }
tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; }
tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; }
u32 tbw_mult()
{
if (tbw == 0)
@ -446,26 +465,34 @@ union tex_0_info
else
return ((u32)tbw << 6);
}
u32 psm_fix()
{
// printf ("psm %d\n", psm);
if ( psm == 9 ) return 1;
// printf ("psm %d\n", psm);
if (psm == 9) return 1;
return psm;
}
u32 tw_exp()
{
if (tw > 10) return (1<<10);
return (1<<tw);
if (tw > 10) return (1 << 10);
return (1 << tw);
}
u32 th_exp()
{
if (th > 10) return (1<<10);
return (1<<th);
if (th > 10) return (1 << 10);
return (1 << th);
}
u32 cpsm_fix()
{
return cpsm & 0xe;
}
u32 csa_fix()
{
if (cpsm < 2)
@ -480,7 +507,8 @@ union tex_0_info
#define TEX_HIGHLIGHT 2
#define TEX_HIGHLIGHT2 3
typedef struct {
typedef struct
{
int lcm;
int mxl;
int mmag;
@ -490,7 +518,8 @@ typedef struct {
int k;
} tex1Info;
typedef struct {
typedef struct
{
int wms;
int wmt;
int minu;
@ -499,24 +528,28 @@ typedef struct {
int maxv;
} clampInfo;
typedef struct {
typedef struct
{
int cbw;
int cou;
int cov;
} clutInfo;
typedef struct {
typedef struct
{
int tbp[3];
int tbw[3];
} miptbpInfo;
typedef struct {
typedef struct
{
u16 aem;
u8 ta[2];
float fta[2];
} texaInfo;
typedef struct {
typedef struct
{
int sx;
int sy;
int dx;
@ -524,9 +557,12 @@ typedef struct {
int dir;
} trxposInfo;
typedef struct {
union {
struct {
typedef struct
{
union
{
struct
{
u8 a : 2;
u8 b : 2;
u8 c : 2;
@ -538,17 +574,20 @@ typedef struct {
u8 fix : 8;
} alphaInfo;
typedef struct {
typedef struct
{
u16 zbp; // u16 address / 64
u8 psm;
u8 zmsk;
} zbufInfo;
typedef struct {
typedef struct
{
int fba;
} fbaInfo;
typedef struct {
typedef struct
{
Vertex gsvertex[3];
u32 rgba;
float q;
@ -593,36 +632,41 @@ extern GSinternal gs;
static __forceinline u16 RGBA32to16(u32 c)
{
return (u16)((((c) & 0x000000f8) >> 3) |
(((c) & 0x0000f800) >> 6) |
(((c) & 0x00f80000) >> 9) |
(((c) & 0x80000000) >> 16));
(((c) & 0x0000f800) >> 6) |
(((c) & 0x00f80000) >> 9) |
(((c) & 0x80000000) >> 16));
}
static __forceinline u32 RGBA16to32(u16 c)
{
return (((c) & 0x001f) << 3) |
(((c) & 0x03e0) << 6) |
(((c) & 0x7c00) << 9) |
(((c) & 0x8000) ? 0xff000000 : 0);
return (((c) & 0x001f) << 3) |
(((c) & 0x03e0) << 6) |
(((c) & 0x7c00) << 9) |
(((c) & 0x8000) ? 0xff000000 : 0);
}
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
// f is a u16
static __forceinline u16 Float16ToBYTE(u16 f) {
static __forceinline u16 Float16ToBYTE(u16 f)
{
//assert( !(f & 0x8000) );
if( f & 0x8000 ) return 0;
if (f & 0x8000) return 0;
u16 d = ((((f & 0x3ff) | 0x400) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
u16 d = ((((f&0x3ff)|0x400)*255)>>(10-((f>>10)&0x1f)+15));
return d > 255 ? 255 : d;
}
static __forceinline u16 Float16ToALPHA(u16 f) {
static __forceinline u16 Float16ToALPHA(u16 f)
{
//assert( !(f & 0x8000) );
if( f & 0x8000 ) return 0;
if (f & 0x8000) return 0;
// round up instead of down (crash and burn), too much and charlie breaks
u16 d = (((((f&0x3ff)|0x400))*255)>>(10-((f>>10)&0x1f)+15));
d = (d)>>1;
u16 d = (((((f & 0x3ff) | 0x400)) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
d = (d) >> 1;
return d > 255 ? 255 : d;
}
@ -650,12 +694,14 @@ static __forceinline u16 Float16ToALPHA(u16 f) {
inline float Clamp(float fx, float fmin, float fmax)
{
if( fx < fmin ) return fmin;
if (fx < fmin) return fmin;
return fx > fmax ? fmax : fx;
}
// PSMT16, 16S have shorter color per pixel, also cluted textures with half storage.
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0)
{
if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1))
return true;
else
@ -670,7 +716,7 @@ inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
static __forceinline int ZZOglGet_tbp0_TexBits(u32 data)
{
//return tex_0_info(data).tbp0;
return (data ) & 0x3fff;
return (data) & 0x3fff;
}
// Obtain tbw -- Texture Buffer Width (Texels/64) -- from data, do not multiply to 64. Bits 14-19
@ -686,6 +732,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
{
//return text_0_info(data).tbw_mult();
int result = ZZOglGet_tbw_TexBits(data);
if (result == 0)
return 64;
else
@ -697,7 +744,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
static __forceinline int ZZOglGet_psm_TexBits(u32 data)
{
//return tex_0_info(data).psm;
return ((data >> 20) & 0x3f);
return ((data >> 20) & 0x3f);
}
// Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. Fix incorrect psm == 9
@ -706,7 +753,9 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
//return tex_0_info(data).psm_fix();
int result = ZZOglGet_psm_TexBits(data) ;
// printf ("result %d\n", result);
if ( result == 9 ) result = 1;
if (result == 9) result = 1;
return result;
}
@ -715,7 +764,7 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
static __forceinline u16 ZZOglGet_tw_TexBits(u32 data)
{
//return tex_0_info(data).tw;
return ((data >> 26) & 0xf);
return ((data >> 26) & 0xf);
}
// Obtain tw -- Texture Width (Width = TW) -- from data. Width could newer be more than 1024.
@ -723,8 +772,10 @@ static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data)
{
//return tex_0_info(data).tw_exp();
u16 result = ZZOglGet_tw_TexBits(data);
if (result > 10) result = 10;
return (1<<result);
return (1 << result);
}
// TH set at the border of upper and higher words.
@ -741,8 +792,10 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
{
//return tex_0_info(dataLO, dataHI).th_exp();
u16 result = ZZOglGet_th_TexBits(dataLO, dataHI);
if (result > 10) result = 10;
return (1<<result);
return (1 << result);
}
// Tex0Info bits, higher word.
@ -751,7 +804,7 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
{
//return tex_0_info(0, data).tcc;
return ((data >> 2) & 0x1);
return ((data >> 2) & 0x1);
}
// Obtain tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2) -- from data. Bit 4-5
@ -759,7 +812,7 @@ static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
{
//return tex_0_info(0, data).tfx;
return ((data >> 3) & 0x3);
return ((data >> 3) & 0x3);
}
// Obtain cbp from data -- Clut Buffer Base Pointer (Address/256) -- Bits 5-18
@ -767,7 +820,7 @@ static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
static __forceinline int ZZOglGet_cbp_TexBits(u32 data)
{
//return tex_0_info(0, data).cbp;
return ((data >> 5) & 0x3fff);
return ((data >> 5) & 0x3fff);
}
// Obtain cpsm from data -- Clut pixel Storage Format -- Bits 19-22. 22nd is at no use.
@ -794,7 +847,7 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
{
//return tex_0_info(0, data).csa_fix();
if ((data & 0x700000) == 0 ) // it is cpsm < 2 check
if ((data & 0x700000) == 0) // it is cpsm < 2 check
return ((data >> 24) & 0xf);
else
return ((data >> 24) & 0x1f);
@ -805,79 +858,81 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
static __forceinline u8 ZZOglGet_cld_TexBits(u32 data)
{
//return tex_0_info(0, data).cld;
return ((data >> 29) & 0x7);
return ((data >> 29) & 0x7);
}
//-------------------------- frames
// FrameInfo bits.
// Obtain fbp -- frame Buffer Base Pointer (Word Address/2048) -- from data. Bits 0-15
inline int
ZZOglGet_fbp_FrameBits(u32 data) {
return ((data ) & 0x1ff);
inline int ZZOglGet_fbp_FrameBits(u32 data)
{
return ((data) & 0x1ff);
}
// So we got adress / 64, henceby frame fbp and tex tbp have the same dimension -- "real adress" is x64.
inline int
ZZOglGet_fbp_FrameBitsMult(u32 data) {
// So we got address / 64, henceby frame fbp and tex tbp have the same dimension -- "real address" is x64.
inline int ZZOglGet_fbp_FrameBitsMult(u32 data)
{
return (ZZOglGet_fbp_FrameBits(data) << 5);
}
// Obtain fbw -- width (Texels/64) -- from data. Bits 16-23
inline int
ZZOglGet_fbw_FrameBits(u32 data) {
inline int ZZOglGet_fbw_FrameBits(u32 data)
{
return ((data >> 16) & 0x3f);
}
inline int
ZZOglGet_fbw_FrameBitsMult(u32 data) {
inline int ZZOglGet_fbw_FrameBitsMult(u32 data)
{
return (ZZOglGet_fbw_FrameBits(data) << 6);
}
// Obtain psm -- Pixel Storage Format -- from data. Bits 24-29.
// (data & 0x3f000000) >> 24
inline int
ZZOglGet_psm_FrameBits(u32 data) {
return ((data >> 24) & 0x3f);
inline int ZZOglGet_psm_FrameBits(u32 data)
{
return ((data >> 24) & 0x3f);
}
// Function for calculating overal height from frame data.
inline int
ZZOgl_fbh_Calc (int fbp, int fbw, int psm) {
int fbh = ( 1024 * 1024 - 64 * fbp ) / fbw;
inline int ZZOgl_fbh_Calc(int fbp, int fbw, int psm)
{
int fbh = (1024 * 1024 - 64 * fbp) / fbw;
fbh &= ~0x1f;
if (PSMT_ISHALF(psm))
fbh *= 2;
if (fbh > 1024)
fbh = 1024;
if (PSMT_ISHALF(psm)) fbh *= 2;
if (fbh > 1024) fbh = 1024;
return fbh ;
}
inline int
ZZOgl_fbh_Calc (frameInfo frame) {
inline int ZZOgl_fbh_Calc(frameInfo frame)
{
return ZZOgl_fbh_Calc(frame.fbp, frame.fbw, frame.psm);
}
// Calculate fbh from data, It does not set in register
inline int
ZZOglGet_fbh_FrameBitsCalc (u32 data) {
inline int ZZOglGet_fbh_FrameBitsCalc(u32 data)
{
int fbh = 0;
int fbp = ZZOglGet_fbp_FrameBits(data);
int fbw = ZZOglGet_fbw_FrameBits(data);
int psm = ZZOglGet_psm_FrameBits(data);
if (fbw > 0)
fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
if (fbw > 0) fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
return fbh ;
}
// Obtain fbm -- frame mask -- from data. All higher word.
inline u32
ZZOglGet_fbm_FrameBits(u32 data) {
return (data);
inline u32 ZZOglGet_fbm_FrameBits(u32 data)
{
return (data);
}
// Obtain fbm -- frame mask -- from data. All higher word. Fixed from psm == PCMT24 (without alpha)
inline u32
ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
inline u32 ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI)
{
if (PSMT_BITMODE(ZZOglGet_psm_FrameBits(dataLO)) == 1)
return (dataHI | 0xff000000);
else
@ -885,53 +940,51 @@ ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
}
// obtain colormask RED
inline u32
ZZOglGet_fbmRed_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmRed_FrameBits(u32 data)
{
return (data & 0xff);
}
// obtain colormask Green
inline u32
ZZOglGet_fbmGreen_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmGreen_FrameBits(u32 data)
{
return ((data >> 8) & 0xff);
}
// obtain colormask Blue
inline u32
ZZOglGet_fbmBlue_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmBlue_FrameBits(u32 data)
{
return ((data >> 16) & 0xff);
}
// obtain colormask Alpha
inline u32
ZZOglGet_fbmAlpha_FrameBits(u32 data) {
inline u32 ZZOglGet_fbmAlpha_FrameBits(u32 data)
{
return ((data >> 24) & 0xff);
}
// obtain colormask Alpha
inline u32
ZZOglGet_fbmHighByte(u32 data) {
inline u32 ZZOglGet_fbmHighByte(u32 data)
{
return (!!(data & 0x80000000));
}
//-------------------------- tex0 comparison
// Check if old and new tex0 registers have only clut difference
inline bool
ZZOglAllExceptClutIsSame( u32* oldtex, u32* newtex) {
inline bool ZZOglAllExceptClutIsSame(u32* oldtex, u32* newtex)
{
return ((oldtex[0] == newtex[0]) && ((oldtex[1] & 0x1f) == (newtex[1] & 0x1f)));
}
// Check if the CLUT registers are same, except CLD
inline bool
ZZOglClutMinusCLDunchanged( u32* oldtex, u32* newtex) {
inline bool ZZOglClutMinusCLDunchanged(u32* oldtex, u32* newtex)
{
return ((oldtex[1] & 0x1fffffe0) == (newtex[1] & 0x1fffffe0));
}
// Check if CLUT storage mode is not changed (CSA, CSM and CSPM)
inline bool
ZZOglClutStorageUnchanged( u32* oldtex, u32* newtex) {
inline bool ZZOglClutStorageUnchanged(u32* oldtex, u32* newtex)
{
return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000));
}

View File

@ -39,29 +39,33 @@ union GIFTag
{
u64 ai64[2];
u32 ai32[4];
struct
{
u32 NLOOP:15;
u32 EOP:1;
u32 _PAD1:16;
u32 _PAD2:14;
u32 PRE:1;
u32 PRIM:11;
u32 FLG:2; // enum GIF_FLG
u32 NREG:4;
u64 REGS:64;
u32 NLOOP : 15;
u32 EOP : 1;
u32 _PAD1 : 16;
u32 _PAD2 : 14;
u32 PRE : 1;
u32 PRIM : 11;
u32 FLG : 2; // enum GIF_FLG
u32 NREG : 4;
u64 REGS : 64;
};
void set(u32 *data)
{
for(int i = 0; i <= 3; i++)
for (int i = 0; i <= 3; i++)
{
ai32[i] = data[i];
}
}
GIFTag(u32 *data)
{
set(data);
}
GIFTag(){ ai64[0] = 0; ai64[1] = 0; }
};
@ -101,13 +105,12 @@ typedef struct
// Hmm....
nreg = tag.NREG << 2;
if (nreg == 0) nreg = 64;
regs = tag.REGS;
reg = 0;
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
// data[3], data[2], data[1], data[0],
// path->eop, path->nloop, mode, path->nreg, tag.PRE);
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
// data[3], data[2], data[1], data[0],
// path->eop, path->nloop, mode, path->nreg, tag.PRE);
}
u32 GetReg()
@ -124,49 +127,48 @@ typedef struct
reg = 0;
nloop--;
if (nloop == 0)
{
return false;
}
if (nloop == 0) return false;
}
return true;
}
#else
void setTag(u32 *data)
{
tag.set(data);
void setTag(u32 *data)
{
tag.set(data);
nloop = tag.NLOOP;
eop = tag.EOP;
u32 tagpre = tag.PRE;
u32 tagprim = tag.PRIM;
u32 tagflg = tag.FLG;
nloop = tag.NLOOP;
eop = tag.EOP;
u32 tagpre = tag.PRE;
u32 tagprim = tag.PRIM;
u32 tagflg = tag.FLG;
// Hmm....
nreg = tag.NREG << 2;
if (nreg == 0) nreg = 64;
// Hmm....
nreg = tag.NREG << 2;
if (nreg == 0) nreg = 64;
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
// data[3], data[2], data[1], data[0],
// path->eop, path->nloop, tagflg, path->nreg, tagpre);
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
// data[3], data[2], data[1], data[0],
// path->eop, path->nloop, tagflg, path->nreg, tagpre);
mode = tagflg;
mode = tagflg;
switch (mode)
{
case GIF_FLG_PACKED:
regs = *(u64 *)(data+2);
regn = 0;
if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
switch (mode)
{
case GIF_FLG_PACKED:
regs = *(u64 *)(data + 2);
regn = 0;
if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
break;
break;
case GIF_FLG_REGLIST:
regs = *(u64 *)(data + 2);
regn = 0;
break;
}
}
case GIF_FLG_REGLIST:
regs = *(u64 *)(data+2);
regn = 0;
break;
}
}
#endif
} pathInfo;

View File

@ -23,9 +23,9 @@
#include <vector>
// works only when base is a power of 2
static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val)+(base-1))&~(base-1)); }
static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base-1)); }
static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base-1)); }
static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val) + (base - 1))&~(base - 1)); }
static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base - 1)); }
static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base - 1)); }
// d3d texture dims
const int BLOCK_TEXWIDTH = 128;
@ -33,13 +33,12 @@ const int BLOCK_TEXHEIGHT = 512;
extern PCSX2_ALIGNED16(u32 tempblock[64]);
typedef u32 ( *_getPixelAddress)(int x, int y, u32 bp, u32 bw);
typedef u32 (*_getPixelAddress_0)(int x, int y, u32 bw);
typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
typedef void (*_writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
typedef u32(*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
typedef u32(*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
@ -53,6 +52,7 @@ enum Psm_Size
// Both of the following structs should probably be local class variables or in a namespace,
// but this works for the moment.
struct TransferData
{
// Signed because Visual C++ is weird.
@ -88,6 +88,7 @@ struct TransferFuncts
};
// rest not visible externally
struct BLOCK
{
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
@ -142,14 +143,14 @@ extern u32 g_pageTable4[128][128];
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
return word;
}
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
return word;
}
@ -165,70 +166,70 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16S[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
u32 word = bp * 256 + basepage * 8192 + g_pageTable8[y&63][x&127];
return word;
}
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
return word;
}
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
u32 word = bp * 512 + basepage * 16384 + g_pageTable4[y&127][x&127];
return word;
}
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
return word;
}
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
u32 word = bp * 64 + basepage * 2048 + g_pageTable32Z[y&31][x&63];
return word;
}
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
return word;
}
@ -238,28 +239,28 @@ static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16Z[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = bp * 128 + basepage * 4096 + g_pageTable16SZ[y&63][x&63];
return word;
}
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
{
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
return word;
}
@ -276,9 +277,11 @@ static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32
static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
u8 *pix = (u8*)&pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
u8 *pix = (u8*) & pixel;
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -305,20 +308,21 @@ static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 b
{
u32 addr = getPixelAddress4(x, y, bp, bw);
u8 pix = ((u8*)pmem)[addr/2];
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
}
static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HL(x, y, bp, bw)+3;
u8 *p = (u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3;
*p = (*p & 0xf0) | pixel;
}
static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HH(x, y, bp, bw)+3;
*p = (*p & 0x0f) | (pixel<<4);
u8 *p = (u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3;
*p = (*p & 0x0f) | (pixel << 4);
}
static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -328,9 +332,11 @@ static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32
static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z(x, y, bp, bw);
u8 *pix = (u8*)&pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z(x, y, bp, bw);
u8 *pix = (u8*) & pixel;
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
@ -380,20 +386,22 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
{
u32 addr = getPixelAddress4(x, y, bp, bw);
u8 pix = ((const u8*)pmem)[addr/2];
if (addr & 0x1)
return pix >> 4;
else return pix & 0xf;
return pix >> 4;
else
return pix & 0xf;
}
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL(x, y, bp, bw)+3;
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3;
return *p & 0x0f;
}
static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH(x, y, bp, bw) + 3;
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3;
return *p >> 4;
}
@ -430,9 +438,11 @@ static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u3
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
u8 *pix = (u8*)&pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
u8 *pix = (u8*) & pixel;
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -459,20 +469,21 @@ static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32
{
u32 addr = getPixelAddress4_0(x, y, bw);
u8 pix = ((u8*)pmem)[addr/2];
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
}
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HL_0(x, y, bw)+3;
u8 *p = (u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
*p = (*p & 0xf0) | pixel;
}
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *p = (u8*)pmem + 4*getPixelAddress4HH_0(x, y, bw)+3;
*p = (*p & 0x0f) | (pixel<<4);
u8 *p = (u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
*p = (*p & 0x0f) | (pixel << 4);
}
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -482,9 +493,11 @@ static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
{
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z_0(x, y, bw);
u8 *pix = (u8*)&pixel;
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
u8 *pix = (u8*) & pixel;
buf[0] = pix[0];
buf[1] = pix[1];
buf[2] = pix[2];
}
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
@ -534,6 +547,7 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
{
u32 addr = getPixelAddress4_0(x, y, bw);
u8 pix = ((const u8*)pmem)[addr/2];
if (addr & 0x1)
return pix >> 4;
else
@ -542,13 +556,13 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL_0(x, y, bw)+3;
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
return *p & 0x0f;
}
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
{
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH_0(x, y, bw) + 3;
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
return *p >> 4;
}

View File

@ -14,57 +14,60 @@ extern u8* pstart;
template <class T>
static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
{
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 );
if ((gs.imageEndX-gs.trxpos.dx) % widthlimit)
assert((nSize % widthlimit) == 0 && widthlimit <= 4);
if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
{
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
for(; tempY < endY; ++tempY)
for (; tempY < endY; ++tempY)
{
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
{
/* write as many pixel at one time as possible */
wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
}
}
}
for(; tempY < endY; ++tempY)
for (; tempY < endY; ++tempY)
{
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
{
/* write as many pixel at one time as possible */
if( nSize < widthlimit ) return NULL;
if (nSize < widthlimit) return NULL;
wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
if( widthlimit > 1 )
if (widthlimit > 1)
{
wp(pstart, (tempX+1)%2048, tempY%2048, buf[1], gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, tempY % 2048, buf[1], gs.dstbuf.bw);
if( widthlimit > 2 )
if (widthlimit > 2)
{
wp(pstart, (tempX+2)%2048, tempY%2048, buf[2], gs.dstbuf.bw);
wp(pstart, (tempX + 2) % 2048, tempY % 2048, buf[2], gs.dstbuf.bw);
if( widthlimit > 3 )
if (widthlimit > 3)
{
wp(pstart, (tempX+3)%2048, tempY%2048, buf[3], gs.dstbuf.bw);
wp(pstart, (tempX + 3) % 2048, tempY % 2048, buf[3], gs.dstbuf.bw);
}
}
}
}
if ( tempX >= gs.imageEndX )
if (tempX >= gs.imageEndX)
{
assert(tempX == gs.imageEndX);
tempX = gs.trxpos.dx;
}
else
{
assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 );
assert(gs.imageTransfer == -1 || nSize*sizeof(T) / 4 == 0);
return NULL;
}
}
return buf;
}
@ -72,47 +75,14 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
template <class T>
static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
{
if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit))
if (widthlimit != 8 || ((gs.imageEndX - gs.trxpos.dx) % widthlimit))
{
//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
for(; tempY < endY; ++tempY)
for (; tempY < endY; ++tempY)
{
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
{
wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf), gs.dstbuf.bw);
}
if( tempX >= gs.imageEndX )
{
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
tempX = gs.trxpos.dx;
}
else
{
assert( gs.imageTransfer == -1 || nSize == 0 );
return NULL;
}
}
}
else
{
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 );
for(; tempY < endY; ++tempY)
{
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3*widthlimit)
{
if (nSize < widthlimit) return NULL;
/* write as many pixel at one time as possible */
wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf+0), gs.dstbuf.bw);
wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(buf+3), gs.dstbuf.bw);
wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(buf+6), gs.dstbuf.bw);
wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(buf+9), gs.dstbuf.bw);
wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(buf+12), gs.dstbuf.bw);
wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(buf+15), gs.dstbuf.bw);
wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(buf+18), gs.dstbuf.bw);
wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(buf+21), gs.dstbuf.bw);
wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf), gs.dstbuf.bw);
}
if (tempX >= gs.imageEndX)
@ -122,18 +92,55 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
}
else
{
if ( nSize < 0 )
{
/* extracted too much */
assert( (nSize%3)==0 && nSize > -24 );
tempX += nSize/3;
nSize = 0;
}
assert( gs.imageTransfer == -1 || nSize == 0 );
assert(gs.imageTransfer == -1 || nSize == 0);
return NULL;
}
}
}
else
{
assert(/*(nSize%widthlimit) == 0 &&*/ widthlimit == 8);
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
{
if (nSize < widthlimit) return NULL;
/* write as many pixel at one time as possible */
wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf + 0), gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, tempY % 2048, *(u32*)(buf + 3), gs.dstbuf.bw);
wp(pstart, (tempX + 2) % 2048, tempY % 2048, *(u32*)(buf + 6), gs.dstbuf.bw);
wp(pstart, (tempX + 3) % 2048, tempY % 2048, *(u32*)(buf + 9), gs.dstbuf.bw);
wp(pstart, (tempX + 4) % 2048, tempY % 2048, *(u32*)(buf + 12), gs.dstbuf.bw);
wp(pstart, (tempX + 5) % 2048, tempY % 2048, *(u32*)(buf + 15), gs.dstbuf.bw);
wp(pstart, (tempX + 6) % 2048, tempY % 2048, *(u32*)(buf + 18), gs.dstbuf.bw);
wp(pstart, (tempX + 7) % 2048, tempY % 2048, *(u32*)(buf + 21), gs.dstbuf.bw);
}
if (tempX >= gs.imageEndX)
{
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
tempX = gs.trxpos.dx;
}
else
{
if (nSize < 0)
{
/* extracted too much */
assert((nSize % 3) == 0 && nSize > -24);
tempX += nSize / 3;
nSize = 0;
}
assert(gs.imageTransfer == -1 || nSize == 0);
return NULL;
}
}
}
return buf;
}
@ -141,73 +148,84 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
template <class T>
static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
{
for(; tempY < endY; ++tempY)
for (; tempY < endY; ++tempY)
{
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
{
/* write as many pixel at one time as possible */
wp(pstart, tempX%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX+1)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
buf++;
if ( widthlimit > 2 )
if (widthlimit > 2)
{
wp(pstart, (tempX+2)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX+3)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
wp(pstart, (tempX + 2) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 3) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
buf++;
if( widthlimit > 4 )
if (widthlimit > 4)
{
wp(pstart, (tempX+4)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX+5)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
wp(pstart, (tempX + 4) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 5) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
buf++;
if( widthlimit > 6 )
if (widthlimit > 6)
{
wp(pstart, (tempX+6)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX+7)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
wp(pstart, (tempX + 6) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 7) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
buf++;
}
}
}
}
if ( tempX >= gs.imageEndX )
if (tempX >= gs.imageEndX)
{
tempX = gs.trxpos.dx;
}
else
{
assert( gs.imageTransfer == -1 || (nSize/32) == 0 );
assert(gs.imageTransfer == -1 || (nSize / 32) == 0);
return NULL;
}
}
return buf;
}
template <class T>
static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
{
switch (data.psm)
{
case PSM_: return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
case PSM_4_: return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
case PSM_24_: return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
}
assert(0);
return NULL;
}
static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
{
switch (data.psm)
{
case PSM_:
return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
case PSM_4_:
return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
case PSM_24_:
return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
}
assert(0);
return NULL;
}
template <class T>
static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
{
for(u32 tempi = 0; tempi < blockheight; ++tempi)
for (u32 tempi = 0; tempi < blockheight; ++tempi)
{
for(tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
{
wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0], gs.dstbuf.bw);
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
}
buf += pitch - fracX;
}
return buf;
}
@ -215,14 +233,16 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
template <class T>
static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
{
for(u32 tempi = 0; tempi < blockheight; ++tempi)
for (u32 tempi = 0; tempi < blockheight; ++tempi)
{
for(tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
{
wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)buf, gs.dstbuf.bw);
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
}
buf += 3*(pitch-fracX);
buf += 3 * (pitch - fracX);
}
return buf;
}
@ -230,30 +250,39 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
template <class T>
static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
{
for(u32 tempi = 0; tempi < blockheight; ++tempi)
for (u32 tempi = 0; tempi < blockheight; ++tempi)
{
for(tempX = startX; tempX < gs.imageEndX; tempX+=2, buf++)
for (tempX = startX; tempX < gs.imageEndX; tempX += 2, buf++)
{
wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0]&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, buf[0]>>4, gs.dstbuf.bw);
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);
}
buf += (pitch-fracX)/2;
buf += (pitch - fracX) / 2;
}
return buf;
}
template <class T>
static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
{
switch (data.psm)
{
case PSM_: return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_4_: return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_24_: return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
}
assert(0);
return NULL;
}
static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
{
switch (data.psm)
{
case PSM_:
return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_4_:
return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
case PSM_24_:
return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
}
assert(0);
return NULL;
}
// calculate pitch in source buffer
static __forceinline u32 TransPitch(u32 pitch, u32 size)

View File

@ -70,21 +70,22 @@ extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file
// declare linux equivalents
static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align)
{
assert( align < 0x10000 );
char* p = (char*)malloc(size+align);
int off = 2+align - ((int)(uptr)(p+2) % align);
assert(align < 0x10000);
char* p = (char*)malloc(size + align);
int off = 2 + align - ((int)(uptr)(p + 2) % align);
p += off;
*(u16*)(p-2) = off;
*(u16*)(p - 2) = off;
return p;
}
static __forceinline void pcsx2_aligned_free(void* pmem)
{
if( pmem != NULL ) {
if (pmem != NULL)
{
char* p = (char*)pmem;
free(p - (int)*(u16*)(p-2));
free(p - (int)*(u16*)(p - 2));
}
}
@ -98,7 +99,7 @@ inline unsigned long timeGetTime()
timeb t;
ftime(&t);
return (unsigned long)(t.time*1000+t.millitm);
return (unsigned long)(t.time*1000 + t.millitm);
}
struct RECT
@ -113,20 +114,24 @@ struct RECT
#define min(a,b) (((a) < (b)) ? (a) : (b))
typedef struct {
typedef struct
{
int x, y, w, h;
} Rect;
typedef struct {
typedef struct
{
int x, y;
} Point;
typedef struct {
typedef struct
{
int x0, y0;
int x1, y1;
} Rect2;
typedef struct {
typedef struct
{
int x, y, c;
} PointC;
@ -145,6 +150,7 @@ typedef struct {
#define GSOPTION_LOADED 0x8000
//Configuration values.
typedef struct
{
u8 mrtdepth; // write color in render target
@ -227,18 +233,18 @@ extern void __LogToConsole(const char *fmt, ...);
namespace ZZLog
{
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void Greg_Log(const char *fmt, ...);
extern void Prim_Log(const char *fmt, ...);
extern void GS_Log(const char *fmt, ...);
extern void Debug_Log(const char *fmt, ...);
extern void Warn_Log(const char *fmt, ...);
extern void Error_Log(const char *fmt, ...);
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void Greg_Log(const char *fmt, ...);
extern void Prim_Log(const char *fmt, ...);
extern void GS_Log(const char *fmt, ...);
extern void Debug_Log(const char *fmt, ...);
extern void Warn_Log(const char *fmt, ...);
extern void Error_Log(const char *fmt, ...);
};
#define REG64(name) \
@ -247,14 +253,14 @@ union name \
u64 i64; \
u32 ai32[2]; \
struct { \
#define REG128(name)\
union name \
{ \
u64 ai64[2]; \
u32 ai32[4]; \
struct { \
#define REG64_(prefix, name) REG64(prefix##name)
#define REG128_(prefix, name) REG128(prefix##name)
@ -266,13 +272,13 @@ union name \
{ \
u64 i64; \
u32 ai32[2]; \
#define REG128_SET(name)\
union name \
{ \
u64 ai64[2]; \
u32 ai32[4]; \
#define REG_SET_END };
extern void LoadConfig();
@ -310,16 +316,18 @@ static __forceinline u64 GetTickFrequency()
static __forceinline u64 GetCPUTicks()
{
struct timeval t;
gettimeofday(&t, NULL);
return ((u64)t.tv_sec*GetTickFrequency())+t.tv_usec;
return ((u64)t.tv_sec*GetTickFrequency()) + t.tv_usec;
}
#else
static __aligned16 LARGE_INTEGER lfreq;
static __forceinline void InitCPUTicks()
{
QueryPerformanceFrequency( &lfreq );
QueryPerformanceFrequency(&lfreq);
}
static __forceinline u64 GetTickFrequency()
@ -330,42 +338,47 @@ static __forceinline u64 GetTickFrequency()
static __forceinline u64 GetCPUTicks()
{
LARGE_INTEGER count;
QueryPerformanceCounter( &count );
QueryPerformanceCounter(&count);
return count.QuadPart;
}
#endif
template <typename T>
class CInterfacePtr
{
public:
inline CInterfacePtr() : ptr(NULL) {}
inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if ( ptr != NULL ) ptr->AddRef(); }
inline ~CInterfacePtr() { if( ptr != NULL ) ptr->Release(); }
inline T* operator* () { assert( ptr != NULL); return *ptr; }
inline T* operator->() { return ptr; }
inline T* get() { return ptr; }
public:
inline CInterfacePtr() : ptr(NULL) {}
inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if (ptr != NULL) ptr->AddRef(); }
inline ~CInterfacePtr() { if (ptr != NULL) ptr->Release(); }
inline T* operator*() { assert(ptr != NULL); return *ptr; }
inline T* operator->() { return ptr; }
inline T* get() { return ptr; }
inline void release() {
if( ptr != NULL ) { ptr->Release(); ptr = NULL; }
}
inline void release()
{
if (ptr != NULL) { ptr->Release(); ptr = NULL; }
}
inline operator T*() { return ptr; }
inline operator T*() { return ptr; }
inline bool operator==(T* rhs) { return ptr == rhs; }
inline bool operator!=(T* rhs) { return ptr != rhs; }
inline bool operator==(T* rhs) { return ptr == rhs; }
inline bool operator!=(T* rhs) { return ptr != rhs; }
inline CInterfacePtr& operator= (T* newptr)
{
if (ptr != NULL) ptr->Release();
inline CInterfacePtr& operator= (T* newptr) {
if( ptr != NULL ) ptr->Release();
ptr = newptr;
ptr = newptr;
if( ptr != NULL ) ptr->AddRef();
return *this;
}
if (ptr != NULL) ptr->AddRef();
private:
T* ptr;
return *this;
}
private:
T* ptr;
};
@ -380,24 +393,25 @@ void DVProfClear(); // clears all the profilers
class DVProfileFunc
{
public:
u32 dwUserData;
DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; }
DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
~DVProfileFunc() { DVProfEnd(dwUserData); }
public:
u32 dwUserData;
DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; }
DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
~DVProfileFunc() { DVProfEnd(dwUserData); }
};
#else
class DVProfileFunc
{
public:
u32 dwUserData;
static __forceinline DVProfileFunc(char* pname) {}
static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { }
~DVProfileFunc() {}
public:
u32 dwUserData;
static __forceinline DVProfileFunc(char* pname) {}
static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { }
~DVProfileFunc() {}
};
#endif
#endif // UTIL_H_INCLUDED

View File

@ -584,7 +584,7 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
return false;
}
// First try to draw frame from targets. It's
// First try to draw frame from targets.
inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
{
// get the start and end addresses of the buffer
@ -662,9 +662,15 @@ inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
// The same as the previous, but from memory.
// If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
// this is the function that does it.
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int interlace, int bInterlace)
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
{
// get the start and end addresses of the buffer
int bpp = RenderGetBpp(texframe.psm);
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
int start, end;
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
{
(*it)->Resolve();
@ -676,34 +682,36 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
if ((pmemtarg == NULL) || (bInterlace >= 2))
ZZLog::Error_Log("CRCR Check for memory shader fault.");
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
SetShaderCaller("RenderCheckForMemory");
SetTexVariablesInt(0, g_bCRTCBilinear ? 2 : 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
// finally render from the memory (note that the stencil buffer will keep previous regions)
Vector v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
Vector v;
// Fixme: Why is this here?
// We should probably call RenderSetTargetBitTex instead.
if (g_bCRTCBilinear)
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(texframe.tw, texframe.th, -0.5f, -0.5f), "g_fBitBltTex");
v = RenderSetTargetBitTex(texframe.tw, texframe.th, -0.5f, -0.5f, INTERLACE_COUNT);
else
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th), "g_fBitBltTex");
v = RenderSetTargetBitTex(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th, INTERLACE_COUNT);
// finally render from the memory (note that the stencil buffer will keep previous regions)
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
v = RenderSetTargetBitTrans(texframe.th);
v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
SETPIXELSHADER(ppsCRTC[bInterlace].prog);
GL_REPORT_ERRORD();
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
@ -909,7 +917,6 @@ void ZeroGS::RenderCRTC(int interlace)
// start from the last circuit
for (int i = !PMODE->SLBG; i >= 0; --i)
{
tex0Info& texframe = dispinfo[i];
if (texframe.th <= 1) continue;
@ -928,7 +935,7 @@ void ZeroGS::RenderCRTC(int interlace)
// if we could not draw image from target's do it from memory
if (!RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace))
RenderCheckForMemory(texframe, listTargs, interlace, bInterlace);
RenderCheckForMemory(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
}
GL_REPORT_ERRORD();

View File

@ -337,7 +337,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
}
// After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made.
inline void ZeroGS::VB::CheckFrame16vs32Convesion()
inline void ZeroGS::VB::CheckFrame16vs32Conversion()
{
if (prndr->status & CRenderTarget::TS_NeedConvert32)
{
@ -393,7 +393,7 @@ void ZeroGS::VB::CheckFrame(int tbp)
bChanged = CheckFrameResolveRender(tbp);
CheckFrame16vs32Convesion();
CheckFrame16vs32Conversion();
}
else if (bNeedZCheck)
{

View File

@ -1,22 +1,24 @@
#ifndef RasterFont_Header
#define RasterFont_Header
class RasterFont {
protected:
int fontOffset;
class RasterFont
{
public:
RasterFont();
~RasterFont(void);
static int debug;
protected:
int fontOffset;
// some useful constants
enum {char_width = 10};
enum {char_height = 15};
public:
RasterFont();
~RasterFont(void);
static int debug;
// and the happy helper functions
void printString(const char *s, double x, double y, double z=0.0);
void printCenteredString(const char *s, double y, int screen_width, double z=0.0);
// some useful constants
enum {char_width = 10};
enum {char_height = 15};
// and the happy helper functions
void printString(const char *s, double x, double y, double z = 0.0);
void printCenteredString(const char *s, double y, int screen_width, double z = 0.0);
};
#endif

View File

@ -22,23 +22,27 @@
#define TARGET_VIRTUAL_KEY 0x80000000
#include "PS2Edefs.h"
inline Vector DefaultOneColor( FRAGMENTSHADER ptr ) {
Vector v = Vector ( 1, 1, 1, 1 );
cgGLSetParameter4fv( ptr.sOneColor, v);
inline Vector DefaultOneColor(FRAGMENTSHADER ptr)
{
Vector v = Vector(1, 1, 1, 1);
cgGLSetParameter4fv(ptr.sOneColor, v);
return v ;
}
namespace ZeroGS {
namespace ZeroGS
{
inline u32 GetFrameKey (int fbp, int fbw, VB& curvb);
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb);
// manages render targets
class CRenderTargetMngr
{
// manages render targets
class CRenderTargetMngr
{
public:
typedef map<u32, CRenderTarget*> MAPTARGETS;
enum TargetOptions {
enum TargetOptions
{
TO_DepthBuffer = 1,
TO_StrictHeight = 2, // height returned has to be the same as requested
TO_Virtual = 4
@ -50,16 +54,17 @@ namespace ZeroGS {
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) {
MAPTARGETS::iterator it = mapTargets.find (GetFrameKey(fbp, fbw, curvb));
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
{
MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb));
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
{
printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
printf("%x %x\n", fbp, fbw);
for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
}*/
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
{
printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
printf("%x %x\n", fbp, fbw);
for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
}*/
return it != mapTargets.end() ? it->second : NULL;
}
@ -68,8 +73,9 @@ namespace ZeroGS {
// resolves all targets within a range
__forceinline void Resolve(int start, int end);
__forceinline void ResolveAll() {
for(MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it )
__forceinline void ResolveAll()
{
for (MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it)
it->second->Resolve();
}
@ -77,12 +83,13 @@ namespace ZeroGS {
void DestroyIntersecting(CRenderTarget* prndr);
// promotes a target from virtual to real
inline CRenderTarget* Promote(u32 key) {
assert( !(key & TARGET_VIRTUAL_KEY) );
inline CRenderTarget* Promote(u32 key)
{
assert(!(key & TARGET_VIRTUAL_KEY));
// promote to regular targ
CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key|TARGET_VIRTUAL_KEY);
assert( it != mapTargets.end() );
CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key | TARGET_VIRTUAL_KEY);
assert(it != mapTargets.end());
CRenderTarget* ptarg = it->second;
mapTargets.erase(it);
@ -90,31 +97,36 @@ namespace ZeroGS {
DestroyIntersecting(ptarg);
it = mapTargets.find(key);
if( it != mapTargets.end() ) {
if (it != mapTargets.end())
{
DestroyTarg(it->second);
it->second = ptarg;
}
else
mapTargets[key] = ptarg;
if( g_GameSettings & GAME_RESOLVEPROMOTED )
ptarg->status = CRenderTarget::TS_Resolved;
else
ptarg->status = CRenderTarget::TS_NeedUpdate;
return ptarg;
if (g_GameSettings & GAME_RESOLVEPROMOTED)
ptarg->status = CRenderTarget::TS_Resolved;
else
ptarg->status = CRenderTarget::TS_NeedUpdate;
return ptarg;
}
static void DestroyTarg(CRenderTarget* ptarg);
MAPTARGETS mapTargets, mapDummyTargs;
};
};
class CMemoryTargetMngr
{
class CMemoryTargetMngr
{
public:
CMemoryTargetMngr() : curstamp(0) {}
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
CMemoryTarget* MemoryTarget_SearchExistTarget (int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
void Destroy(); // destroy all targs
@ -122,26 +134,28 @@ namespace ZeroGS {
void ClearRange(int starty, int endy); // set all targets to cleared
void DestroyCleared(); // flush all cleared targes
void DestroyOldest();
list<CMemoryTarget> listTargets, listClearedTargets;
u32 curstamp;
private:
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
};
};
class CBitwiseTextureMngr
{
class CBitwiseTextureMngr
{
public:
~CBitwiseTextureMngr() { Destroy(); }
void Destroy();
// since GetTex can delete textures to free up mem, it is dangerous if using that texture, so specify at least one other tex to save
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) {
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete)
{
map<u32, u32>::iterator it = mapTextures.find(bitvalue);
if( it != mapTextures.end() )
return it->second;
if (it != mapTextures.end()) return it->second;
return GetTexInt(bitvalue, ptexDoNotDelete);
}
@ -149,99 +163,115 @@ namespace ZeroGS {
u32 GetTexInt(u32 bitvalue, u32 ptexDoNotDelete);
map<u32, u32> mapTextures;
};
};
// manages
class CRangeManager
{
// manages
class CRangeManager
{
public:
CRangeManager() {
CRangeManager()
{
ranges.reserve(16);
}
// [start, end)
struct RANGE {
struct RANGE
{
RANGE() {}
inline RANGE(int start, int end) : start(start), end(end) {}
int start, end;
};
// works in semi logN
void Insert(int start, int end);
void RangeSanityCheck();
inline void Clear() {
inline void Clear()
{
ranges.resize(0);
}
vector<RANGE> ranges; // organized in ascending order, non-intersecting
};
};
extern CRenderTargetMngr s_RTs, s_DepthRTs;
extern CBitwiseTextureMngr s_BitwiseTextures;
extern CMemoryTargetMngr g_MemTargs;
extern CRenderTargetMngr s_RTs, s_DepthRTs;
extern CBitwiseTextureMngr s_BitwiseTextures;
extern CMemoryTargetMngr g_MemTargs;
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
// Real rendered width, depends on AA and AAneg.
inline int RW(int tbw) {
if (s_AAx >= s_AAz)
return (tbw << ( s_AAx - s_AAz ));
else
return (tbw >> ( s_AAz - s_AAx ));
}
// Real rendered width, depends on AA and AAneg.
inline int RW(int tbw)
{
if (s_AAx >= s_AAz)
return (tbw << (s_AAx - s_AAz));
else
return (tbw >> (s_AAz - s_AAx));
}
// Real rendered height, depends on AA and AAneg.
inline int RH(int tbh) {
if (s_AAy >= s_AAw)
return (tbh << ( s_AAy - s_AAw ));
else
return (tbh >> ( s_AAw - s_AAy ));
}
// Real rendered height, depends on AA and AAneg.
inline int RH(int tbh)
{
if (s_AAy >= s_AAw)
return (tbh << (s_AAy - s_AAw));
else
return (tbh >> (s_AAw - s_AAy));
}
/* inline void CreateTargetsList(int start, int end, list<ZeroGS::CRenderTarget*>& listTargs) {
s_DepthRTs.GetTargs(start, end, listTargs);
s_RTs.GetTargs(start, end, listTargs);
}*/
// This pattern of functions is called 3 times, so I add creating Targets list into one.
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end) {
list<ZeroGS::CRenderTarget*> listTargs;
s_DepthRTs.GetTargs(start, end, listTargs);
s_RTs.GetTargs(start, end, listTargs);
return listTargs;
}
// This pattern of functions is called 3 times, so I add creating Targets list into one.
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
{
list<ZeroGS::CRenderTarget*> listTargs;
s_DepthRTs.GetTargs(start, end, listTargs);
s_RTs.GetTargs(start, end, listTargs);
return listTargs;
}
extern Vector g_vdepth;
extern int icurctx;
extern Vector g_vdepth;
extern int icurctx;
extern VERTEXSHADER pvsBitBlt;
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
extern GLuint vboRect;
extern VERTEXSHADER pvsBitBlt;
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
extern GLuint vboRect;
// Unworking
#define PSMPOSITION 28
// Code width and height of frame into key, that used in targetmanager
// This is 3 variants of one function, Key dependant on fbp and fbw.
inline u32 GetFrameKey (const frameInfo& frame) {
inline u32 GetFrameKey(const frameInfo& frame)
{
return (((frame.fbw) << 16) | (frame.fbp));
}
inline u32 GetFrameKey ( CRenderTarget* frame ) {
inline u32 GetFrameKey(CRenderTarget* frame)
{
return (((frame->fbw) << 16) | (frame->fbp));
}
inline u32 GetFrameKey (int fbp, int fbw, VB& curvb) {
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb)
{
return (((fbw) << 16) | (fbp));
}
inline u16 ShiftHeight (int fbh, int fbp, int fbhCalc) {
inline u16 ShiftHeight(int fbh, int fbp, int fbhCalc)
{
return fbh;
}
//FIXME: this code for P4 ad KH1. It should not be such strange!
//FIXME: this code is for P4 and KH1. It should not be so strange!
//Dummy targets was deleted from mapTargets, but not erased.
inline u32 GetFrameKeyDummy (const frameInfo& frame) {
inline u32 GetFrameKeyDummy(const frameInfo& frame)
{
// if (frame.fbp > 0x2000 && ZZOgl_fbh_Calc(frame) < 0x400 && ZZOgl_fbh_Calc(frame) != frame.fbh)
// printf ("Z %x %x %x %x\n", frame.fbh, frame.fbhCalc, frame.fbp, ZZOgl_fbh_Calc(frame));
// height over 1024 would shrink to 1024, so dummy targets with calculated size more than 0x400 should be
@ -252,7 +282,8 @@ inline u32 GetFrameKeyDummy (const frameInfo& frame) {
return (((frame.fbw) << 16) | frame.fbh);
}
inline u32 GetFrameKeyDummy ( CRenderTarget* frame ) {
inline u32 GetFrameKeyDummy(CRenderTarget* frame)
{
if (/*frame->fbp > 0x2000 && */ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm) < 0x300)
return (((frame->fbw) << 16) | ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm));
else

View File

@ -106,7 +106,7 @@ extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut);
extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters );
extern void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters);
#ifdef ZEROGS_SSE2

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@
template <class T> inline T RAD_2_DEG(T radians) { return (radians * (T)57.29577951); }
class Transform;
class TransformMatrix;
typedef float dReal;
@ -35,63 +36,57 @@ inline dReal* inv4(const dReal* pf, dReal* pfres);
// class used for 3 and 4 dim vectors and quaternions
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
class Vector
{
public:
dReal x, y, z, w;
public:
dReal x, y, z, w;
Vector() : x(0), y(0), z(0), w(0) {}
Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
Vector() : x(0), y(0), z(0), w(0) {}
Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
dReal operator[](int i) const { return (&x)[i]; }
dReal& operator[](int i) { return (&x)[i]; }
// casting operators
operator dReal*() { return &x; }
operator const dReal*() const { return (const dReal*)&x; }
// SCALAR FUNCTIONS
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
inline void normalize() { normalize4(&x, &x); }
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
inline void SetColor(u32 color)
{
x = (color & 0xff) / 255.0f;
y = ((color >> 8) & 0xff) / 255.0f;
z = ((color >> 16) & 0xff) / 255.0f;
}
dReal operator[](int i) const { return (&x)[i]; }
dReal& operator[](int i) { return (&x)[i]; }
// casting operators
operator dReal* () { return &x; }
operator const dReal* () const { return (const dReal*)&x; }
// SCALAR FUNCTIONS
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
inline void normalize() { normalize4(&x, &x); }
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
inline void SetColor(u32 color)
{
x = (color & 0xff) / 255.0f;
y = ((color >> 8) & 0xff) / 255.0f;
z = ((color >> 16) & 0xff) / 255.0f;
}
// 3 dim cross product, w is not touched
/// this = this x v
inline void Cross(const Vector &v) { cross3(&x, &x, v); }
/// this = u x v
inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
inline Vector operator+(const Vector &r) const { Vector v; v.x = x+r.x; v.y = y+r.y; v.z = z+r.z; v.w = w+r.w; return v; }
inline Vector operator-(const Vector &r) const { Vector v; v.x = x-r.x; v.y = y-r.y; v.z = z-r.z; v.w = w-r.w; return v; }
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x*x; v.y = r.y*y; v.z = r.z*z; v.w = r.w*w; return v; }
inline Vector operator*(dReal k) const { Vector v; v.x = k*x; v.y = k*y; v.z = k*z; v.w = k*w; return v; }
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
inline Vector& operator /= (const dReal _k) { dReal k=1/_k; x *= k; y *= k; z *= k; w *= k; return *this; }
friend Vector operator* (float f, const Vector& v);
//friend ostream& operator<<(ostream& O, const Vector& v);
//friend istream& operator>>(istream& I, Vector& v);
// 3 dim cross product, w is not touched
/// this = this x v
inline void Cross(const Vector &v) { cross3(&x, &x, v); }
/// this = u x v
inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
friend Vector operator*(float f, const Vector& v);
//friend ostream& operator<<(ostream& O, const Vector& v);
//friend istream& operator>>(istream& I, Vector& v);
};
inline Vector operator* (float f, const Vector& left)
inline Vector operator*(float f, const Vector& left)
{
Vector v;
v.x = f * left.x;
@ -113,18 +108,22 @@ struct OBB
struct TRIANGLE
{
TRIANGLE() {}
TRIANGLE(const Vector& v1, const Vector& v2, const Vector& v3) : v1(v1), v2(v2), v3(v3) {}
~TRIANGLE() {}
Vector v1, v2, v3; //!< the vertices of the triangle
const Vector& operator[](int i) const { return (&v1)[i]; }
Vector& operator[](int i) { return (&v1)[i]; }
Vector& operator[](int i) { return (&v1)[i]; }
/// assumes CCW ordering of vertices
inline Vector ComputeNormal() {
inline Vector ComputeNormal()
{
Vector normal;
cross3(normal, v2-v1, v3-v1);
cross3(normal, v2 - v1, v3 - v1);
return normal;
}
};
@ -172,8 +171,8 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf);
inline bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2x, dReal& fv2y);
// Simple routines for linear algebra algorithms //
int CubicRoots (double c0, double c1, double c2, double *r0, double *r1, double *r2);
bool QLAlgorithm3 (dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag);
int CubicRoots(double c0, double c1, double c2, double *r0, double *r1, double *r2);
bool QLAlgorithm3(dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag);
void EigenSymmetric3(dReal* fCovariance, dReal* eval, dReal* fAxes);
@ -182,7 +181,7 @@ void GetCovarBasisVectors(dReal fCovariance[3][3], Vector* vRight, Vector* vUp,
// first root returned is always >= second, roots are defined if the quadratic doesn't have real solutions
void QuadraticSolver(dReal* pfQuadratic, dReal* pfRoots);
int insideQuadrilateral(const Vector* p0,const Vector* p1, const Vector* p2,const Vector* p3);
int insideQuadrilateral(const Vector* p0, const Vector* p1, const Vector* p2, const Vector* p3);
int insideTriangle(const Vector* p0, const Vector* p1, const Vector* p2);
// multiplies a matrix by a scalar
@ -238,30 +237,48 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
c = pfmat[0] * pfmat[3] - pfmat[1] * pfmat[2];
d = b * b - 4.0f * c + 1e-16f;
if( d < 0 ) return false;
if( d < 1e-16f ) {
if (d < 0) return false;
if (d < 1e-16f)
{
a = -0.5f * b;
peigs[0] = a; peigs[1] = a;
fv1x = pfmat[1]; fv1y = a - pfmat[0];
c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y);
fv1x *= c; fv1y *= c;
fv2x = -fv1y; fv2y = fv1x;
peigs[0] = a;
peigs[1] = a;
fv1x = pfmat[1];
fv1y = a - pfmat[0];
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
fv1x *= c;
fv1y *= c;
fv2x = -fv1y;
fv2y = fv1x;
return true;
}
// two roots
d = sqrtf(d);
a = -0.5f * (b + d);
peigs[0] = a;
fv1x = pfmat[1]; fv1y = a-pfmat[0];
c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y);
fv1x *= c; fv1y *= c;
fv1x = pfmat[1];
fv1y = a - pfmat[0];
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
fv1x *= c;
fv1y *= c;
a += d;
peigs[1] = a;
fv2x = pfmat[1]; fv2y = a-pfmat[0];
c = 1 / sqrtf(fv2x*fv2x + fv2y*fv2y);
fv2x *= c; fv2y *= c;
fv2x = pfmat[1];
fv2y = a - pfmat[0];
c = 1 / sqrtf(fv2x * fv2x + fv2y * fv2y);
fv2x *= c;
fv2y *= c;
return true;
}
@ -270,62 +287,70 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
// Functions that are replacable by ipp library funcs
template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
{
assert( pf1 != NULL && pf2 != NULL && pfres != NULL );
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
T* pfres2;
if( pfres == pf1 || pfres == pf2 ) pfres2 = (T*)alloca(9 * sizeof(T));
else pfres2 = pfres;
pfres2[0*4+0] = pf1[0*4+0]*pf2[0*4+0]+pf1[0*4+1]*pf2[1*4+0]+pf1[0*4+2]*pf2[2*4+0];
pfres2[0*4+1] = pf1[0*4+0]*pf2[0*4+1]+pf1[0*4+1]*pf2[1*4+1]+pf1[0*4+2]*pf2[2*4+1];
pfres2[0*4+2] = pf1[0*4+0]*pf2[0*4+2]+pf1[0*4+1]*pf2[1*4+2]+pf1[0*4+2]*pf2[2*4+2];
if (pfres == pf1 || pfres == pf2)
pfres2 = (T*)alloca(9 * sizeof(T));
else
pfres2 = pfres;
pfres2[1*4+0] = pf1[1*4+0]*pf2[0*4+0]+pf1[1*4+1]*pf2[1*4+0]+pf1[1*4+2]*pf2[2*4+0];
pfres2[1*4+1] = pf1[1*4+0]*pf2[0*4+1]+pf1[1*4+1]*pf2[1*4+1]+pf1[1*4+2]*pf2[2*4+1];
pfres2[1*4+2] = pf1[1*4+0]*pf2[0*4+2]+pf1[1*4+1]*pf2[1*4+2]+pf1[1*4+2]*pf2[2*4+2];
pfres2[0*4+0] = pf1[0*4+0] * pf2[0*4+0] + pf1[0*4+1] * pf2[1*4+0] + pf1[0*4+2] * pf2[2*4+0];
pfres2[0*4+1] = pf1[0*4+0] * pf2[0*4+1] + pf1[0*4+1] * pf2[1*4+1] + pf1[0*4+2] * pf2[2*4+1];
pfres2[0*4+2] = pf1[0*4+0] * pf2[0*4+2] + pf1[0*4+1] * pf2[1*4+2] + pf1[0*4+2] * pf2[2*4+2];
pfres2[1*4+0] = pf1[1*4+0] * pf2[0*4+0] + pf1[1*4+1] * pf2[1*4+0] + pf1[1*4+2] * pf2[2*4+0];
pfres2[1*4+1] = pf1[1*4+0] * pf2[0*4+1] + pf1[1*4+1] * pf2[1*4+1] + pf1[1*4+2] * pf2[2*4+1];
pfres2[1*4+2] = pf1[1*4+0] * pf2[0*4+2] + pf1[1*4+1] * pf2[1*4+2] + pf1[1*4+2] * pf2[2*4+2];
pfres2[2*4+0] = pf1[2*4+0] * pf2[0*4+0] + pf1[2*4+1] * pf2[1*4+0] + pf1[2*4+2] * pf2[2*4+0];
pfres2[2*4+1] = pf1[2*4+0] * pf2[0*4+1] + pf1[2*4+1] * pf2[1*4+1] + pf1[2*4+2] * pf2[2*4+1];
pfres2[2*4+2] = pf1[2*4+0] * pf2[0*4+2] + pf1[2*4+1] * pf2[1*4+2] + pf1[2*4+2] * pf2[2*4+2];
pfres2[2*4+0] = pf1[2*4+0]*pf2[0*4+0]+pf1[2*4+1]*pf2[1*4+0]+pf1[2*4+2]*pf2[2*4+0];
pfres2[2*4+1] = pf1[2*4+0]*pf2[0*4+1]+pf1[2*4+1]*pf2[1*4+1]+pf1[2*4+2]*pf2[2*4+1];
pfres2[2*4+2] = pf1[2*4+0]*pf2[0*4+2]+pf1[2*4+1]*pf2[1*4+2]+pf1[2*4+2]*pf2[2*4+2];
if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T));
return pfres;
}
inline dReal* mult3(dReal* pfres, const dReal* pf1, const dReal* pf2) { return _mult3<dReal>(pfres, pf1, pf2); }
inline double* mult3(double* pfres, const double* pf1, const double* pf2) { return _mult3<double>(pfres, pf1, pf2); }
template <class T>
inline T* _mult4(T* pfres, const T* p1, const T* p2)
{
assert( pfres != NULL && p1 != NULL && p2 != NULL );
assert(pfres != NULL && p1 != NULL && p2 != NULL);
T* pfres2;
if( pfres == p1 || pfres == p2 ) pfres2 = (T*)alloca(16 * sizeof(T));
else pfres2 = pfres;
pfres2[0*4+0] = p1[0*4+0]*p2[0*4+0] + p1[0*4+1]*p2[1*4+0] + p1[0*4+2]*p2[2*4+0] + p1[0*4+3]*p2[3*4+0];
pfres2[0*4+1] = p1[0*4+0]*p2[0*4+1] + p1[0*4+1]*p2[1*4+1] + p1[0*4+2]*p2[2*4+1] + p1[0*4+3]*p2[3*4+1];
pfres2[0*4+2] = p1[0*4+0]*p2[0*4+2] + p1[0*4+1]*p2[1*4+2] + p1[0*4+2]*p2[2*4+2] + p1[0*4+3]*p2[3*4+2];
pfres2[0*4+3] = p1[0*4+0]*p2[0*4+3] + p1[0*4+1]*p2[1*4+3] + p1[0*4+2]*p2[2*4+3] + p1[0*4+3]*p2[3*4+3];
if (pfres == p1 || pfres == p2)
pfres2 = (T*)alloca(16 * sizeof(T));
else
pfres2 = pfres;
pfres2[1*4+0] = p1[1*4+0]*p2[0*4+0] + p1[1*4+1]*p2[1*4+0] + p1[1*4+2]*p2[2*4+0] + p1[1*4+3]*p2[3*4+0];
pfres2[1*4+1] = p1[1*4+0]*p2[0*4+1] + p1[1*4+1]*p2[1*4+1] + p1[1*4+2]*p2[2*4+1] + p1[1*4+3]*p2[3*4+1];
pfres2[1*4+2] = p1[1*4+0]*p2[0*4+2] + p1[1*4+1]*p2[1*4+2] + p1[1*4+2]*p2[2*4+2] + p1[1*4+3]*p2[3*4+2];
pfres2[1*4+3] = p1[1*4+0]*p2[0*4+3] + p1[1*4+1]*p2[1*4+3] + p1[1*4+2]*p2[2*4+3] + p1[1*4+3]*p2[3*4+3];
pfres2[0*4+0] = p1[0*4+0] * p2[0*4+0] + p1[0*4+1] * p2[1*4+0] + p1[0*4+2] * p2[2*4+0] + p1[0*4+3] * p2[3*4+0];
pfres2[0*4+1] = p1[0*4+0] * p2[0*4+1] + p1[0*4+1] * p2[1*4+1] + p1[0*4+2] * p2[2*4+1] + p1[0*4+3] * p2[3*4+1];
pfres2[0*4+2] = p1[0*4+0] * p2[0*4+2] + p1[0*4+1] * p2[1*4+2] + p1[0*4+2] * p2[2*4+2] + p1[0*4+3] * p2[3*4+2];
pfres2[0*4+3] = p1[0*4+0] * p2[0*4+3] + p1[0*4+1] * p2[1*4+3] + p1[0*4+2] * p2[2*4+3] + p1[0*4+3] * p2[3*4+3];
pfres2[2*4+0] = p1[2*4+0]*p2[0*4+0] + p1[2*4+1]*p2[1*4+0] + p1[2*4+2]*p2[2*4+0] + p1[2*4+3]*p2[3*4+0];
pfres2[2*4+1] = p1[2*4+0]*p2[0*4+1] + p1[2*4+1]*p2[1*4+1] + p1[2*4+2]*p2[2*4+1] + p1[2*4+3]*p2[3*4+1];
pfres2[2*4+2] = p1[2*4+0]*p2[0*4+2] + p1[2*4+1]*p2[1*4+2] + p1[2*4+2]*p2[2*4+2] + p1[2*4+3]*p2[3*4+2];
pfres2[2*4+3] = p1[2*4+0]*p2[0*4+3] + p1[2*4+1]*p2[1*4+3] + p1[2*4+2]*p2[2*4+3] + p1[2*4+3]*p2[3*4+3];
pfres2[1*4+0] = p1[1*4+0] * p2[0*4+0] + p1[1*4+1] * p2[1*4+0] + p1[1*4+2] * p2[2*4+0] + p1[1*4+3] * p2[3*4+0];
pfres2[1*4+1] = p1[1*4+0] * p2[0*4+1] + p1[1*4+1] * p2[1*4+1] + p1[1*4+2] * p2[2*4+1] + p1[1*4+3] * p2[3*4+1];
pfres2[1*4+2] = p1[1*4+0] * p2[0*4+2] + p1[1*4+1] * p2[1*4+2] + p1[1*4+2] * p2[2*4+2] + p1[1*4+3] * p2[3*4+2];
pfres2[1*4+3] = p1[1*4+0] * p2[0*4+3] + p1[1*4+1] * p2[1*4+3] + p1[1*4+2] * p2[2*4+3] + p1[1*4+3] * p2[3*4+3];
pfres2[2*4+0] = p1[2*4+0] * p2[0*4+0] + p1[2*4+1] * p2[1*4+0] + p1[2*4+2] * p2[2*4+0] + p1[2*4+3] * p2[3*4+0];
pfres2[2*4+1] = p1[2*4+0] * p2[0*4+1] + p1[2*4+1] * p2[1*4+1] + p1[2*4+2] * p2[2*4+1] + p1[2*4+3] * p2[3*4+1];
pfres2[2*4+2] = p1[2*4+0] * p2[0*4+2] + p1[2*4+1] * p2[1*4+2] + p1[2*4+2] * p2[2*4+2] + p1[2*4+3] * p2[3*4+2];
pfres2[2*4+3] = p1[2*4+0] * p2[0*4+3] + p1[2*4+1] * p2[1*4+3] + p1[2*4+2] * p2[2*4+3] + p1[2*4+3] * p2[3*4+3];
pfres2[3*4+0] = p1[3*4+0] * p2[0*4+0] + p1[3*4+1] * p2[1*4+0] + p1[3*4+2] * p2[2*4+0] + p1[3*4+3] * p2[3*4+0];
pfres2[3*4+1] = p1[3*4+0] * p2[0*4+1] + p1[3*4+1] * p2[1*4+1] + p1[3*4+2] * p2[2*4+1] + p1[3*4+3] * p2[3*4+1];
pfres2[3*4+2] = p1[3*4+0] * p2[0*4+2] + p1[3*4+1] * p2[1*4+2] + p1[3*4+2] * p2[2*4+2] + p1[3*4+3] * p2[3*4+2];
pfres2[3*4+3] = p1[3*4+0] * p2[0*4+3] + p1[3*4+1] * p2[1*4+3] + p1[3*4+2] * p2[2*4+3] + p1[3*4+3] * p2[3*4+3];
pfres2[3*4+0] = p1[3*4+0]*p2[0*4+0] + p1[3*4+1]*p2[1*4+0] + p1[3*4+2]*p2[2*4+0] + p1[3*4+3]*p2[3*4+0];
pfres2[3*4+1] = p1[3*4+0]*p2[0*4+1] + p1[3*4+1]*p2[1*4+1] + p1[3*4+2]*p2[2*4+1] + p1[3*4+3]*p2[3*4+1];
pfres2[3*4+2] = p1[3*4+0]*p2[0*4+2] + p1[3*4+1]*p2[1*4+2] + p1[3*4+2]*p2[2*4+2] + p1[3*4+3]*p2[3*4+2];
pfres2[3*4+3] = p1[3*4+0]*p2[0*4+3] + p1[3*4+1]*p2[1*4+3] + p1[3*4+2]*p2[2*4+3] + p1[3*4+3]*p2[3*4+3];
if (pfres != pfres2) memcpy(pfres, pfres2, sizeof(T)*16);
if( pfres != pfres2 ) memcpy(pfres, pfres2, sizeof(T)*16);
return pfres;
}
@ -336,22 +361,23 @@ template <class T>
inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2)
{
T* pfres2;
if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T));
else pfres2 = pfres;
pfres2[0] = pf1[0]*pf2[0]+pf1[3]*pf2[3]+pf1[6]*pf2[6];
pfres2[1] = pf1[0]*pf2[1]+pf1[3]*pf2[4]+pf1[6]*pf2[7];
pfres2[2] = pf1[0]*pf2[2]+pf1[3]*pf2[5]+pf1[6]*pf2[8];
if (pfres == pf1)
pfres2 = (T*)alloca(9 * sizeof(T));
else
pfres2 = pfres;
pfres2[3] = pf1[1]*pf2[0]+pf1[4]*pf2[3]+pf1[7]*pf2[6];
pfres2[4] = pf1[1]*pf2[1]+pf1[4]*pf2[4]+pf1[7]*pf2[7];
pfres2[5] = pf1[1]*pf2[2]+pf1[4]*pf2[5]+pf1[7]*pf2[8];
pfres2[0] = pf1[0] * pf2[0] + pf1[3] * pf2[3] + pf1[6] * pf2[6];
pfres2[1] = pf1[0] * pf2[1] + pf1[3] * pf2[4] + pf1[6] * pf2[7];
pfres2[2] = pf1[0] * pf2[2] + pf1[3] * pf2[5] + pf1[6] * pf2[8];
pfres2[3] = pf1[1] * pf2[0] + pf1[4] * pf2[3] + pf1[7] * pf2[6];
pfres2[4] = pf1[1] * pf2[1] + pf1[4] * pf2[4] + pf1[7] * pf2[7];
pfres2[5] = pf1[1] * pf2[2] + pf1[4] * pf2[5] + pf1[7] * pf2[8];
pfres2[6] = pf1[2] * pf2[0] + pf1[5] * pf2[3] + pf1[8] * pf2[6];
pfres2[7] = pf1[2] * pf2[1] + pf1[5] * pf2[4] + pf1[8] * pf2[7];
pfres2[8] = pf1[2] * pf2[2] + pf1[5] * pf2[5] + pf1[8] * pf2[8];
pfres2[6] = pf1[2]*pf2[0]+pf1[5]*pf2[3]+pf1[8]*pf2[6];
pfres2[7] = pf1[2]*pf2[1]+pf1[5]*pf2[4]+pf1[8]*pf2[7];
pfres2[8] = pf1[2]*pf2[2]+pf1[5]*pf2[5]+pf1[8]*pf2[8];
if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T));
return pfres;
}
@ -360,11 +386,16 @@ template <class T>
inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2)
{
T* pfres2;
if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T));
else pfres2 = pfres;
for(int i = 0; i < 4; ++i) {
for(int j = 0; j < 4; ++j) {
if (pfres == pf1)
pfres2 = (T*)alloca(16 * sizeof(T));
else
pfres2 = pfres;
for (int i = 0; i < 4; ++i)
{
for (int j = 0; j < 4; ++j)
{
pfres[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12];
}
}
@ -381,8 +412,11 @@ inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) {
template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
{
T* pfres2;
if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T));
else pfres2 = pfres;
if (pfres == pf)
pfres2 = (T*)alloca(3 * stride * sizeof(T));
else
pfres2 = pfres;
// inverse = C^t / det(pf) where C is the matrix of coefficients
@ -390,29 +424,40 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1];
pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2];
pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1];
pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2];
pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0];
pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2];
pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0];
pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1];
pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0];
T fdet = pf[0*stride + 2] * pfres2[2*stride + 0] + pf[1*stride + 2] * pfres2[2*stride + 1] +
pf[2*stride + 2] * pfres2[2*stride + 2];
pf[2*stride + 2] * pfres2[2*stride + 2];
if( fabs(fdet) < 1e-6 ) return NULL;
if (fabs(fdet) < 1e-6) return NULL;
fdet = 1 / fdet;
//if( pfdet != NULL ) *pfdet = fdet;
if( pfres != pf ) {
pfres[0*stride+0] *= fdet; pfres[0*stride+1] *= fdet; pfres[0*stride+2] *= fdet;
pfres[1*stride+0] *= fdet; pfres[1*stride+1] *= fdet; pfres[1*stride+2] *= fdet;
pfres[2*stride+0] *= fdet; pfres[2*stride+1] *= fdet; pfres[2*stride+2] *= fdet;
if (pfres != pf)
{
pfres[0*stride+0] *= fdet;
pfres[0*stride+1] *= fdet;
pfres[0*stride+2] *= fdet;
pfres[1*stride+0] *= fdet;
pfres[1*stride+1] *= fdet;
pfres[1*stride+2] *= fdet;
pfres[2*stride+0] *= fdet;
pfres[2*stride+1] *= fdet;
pfres[2*stride+2] *= fdet;
return pfres;
}
pfres[0*stride+0] = pfres2[0*stride+0] * fdet;
pfres[0*stride+1] = pfres2[0*stride+1] * fdet;
pfres[0*stride+2] = pfres2[0*stride+2] * fdet;
pfres[1*stride+0] = pfres2[1*stride+0] * fdet;
@ -430,8 +475,11 @@ inline dReal* inv3(const dReal* pf, dReal* pfres, int stride) { return _inv3<dRe
template <class T> inline T* _inv4(const T* pf, T* pfres)
{
T* pfres2;
if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T));
else pfres2 = pfres;
if (pfres == pf)
pfres2 = (T*)alloca(16 * sizeof(T));
else
pfres2 = pfres;
// inverse = C^t / det(pf) where C is the matrix of coefficients
@ -439,7 +487,9 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
// determinants of all possibel 2x2 submatrices formed by last two rows
T fd0, fd1, fd2;
T f1, f2, f3;
fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0];
fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1];
fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2];
@ -482,20 +532,24 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
pfres2[3*4 + 3] = pf[2*4 + 0] * fd1 - pf[2*4 + 1] * f3 + pf[2*4 + 2] * fd0;
T fdet = pf[0*4 + 3] * pfres2[3*4 + 0] + pf[1*4 + 3] * pfres2[3*4 + 1] +
pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];
pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];
if( fabs(fdet) < 1e-6) return NULL;
if (fabs(fdet) < 1e-6) return NULL;
fdet = 1 / fdet;
//if( pfdet != NULL ) *pfdet = fdet;
if( pfres2 == pfres ) {
if (pfres2 == pfres)
{
mult(pfres, fdet, 16);
return pfres;
}
int i = 0;
while(i < 16) {
while (i < 16)
{
pfres[i] = pfres2[i] * fdet;
++i;
}
@ -507,18 +561,26 @@ inline dReal* inv4(const dReal* pf, dReal* pfres) { return _inv4<dReal>(pf, pfre
template <class T> inline T* _transpose3(const T* pf, T* pfres)
{
assert( pf != NULL && pfres != NULL );
assert(pf != NULL && pfres != NULL);
if( pf == pfres ) {
if (pf == pfres)
{
rswap(pfres[1], pfres[3]);
rswap(pfres[2], pfres[6]);
rswap(pfres[5], pfres[7]);
return pfres;
}
pfres[0] = pf[0]; pfres[1] = pf[3]; pfres[2] = pf[6];
pfres[3] = pf[1]; pfres[4] = pf[4]; pfres[5] = pf[7];
pfres[6] = pf[2]; pfres[7] = pf[5]; pfres[8] = pf[8];
pfres[0] = pf[0];
pfres[1] = pf[3];
pfres[2] = pf[6];
pfres[3] = pf[1];
pfres[4] = pf[4];
pfres[5] = pf[7];
pfres[6] = pf[2];
pfres[7] = pf[5];
pfres[8] = pf[8];
return pfres;
}
@ -528,9 +590,10 @@ inline double* transpose3(const double* pf, double* pfres) { return _transpose3(
template <class T> inline T* _transpose4(const T* pf, T* pfres)
{
assert( pf != NULL && pfres != NULL );
assert(pf != NULL && pfres != NULL);
if( pf == pfres ) {
if (pf == pfres)
{
rswap(pfres[1], pfres[4]);
rswap(pfres[2], pfres[8]);
rswap(pfres[3], pfres[12]);
@ -540,10 +603,23 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
return pfres;
}
pfres[0] = pf[0]; pfres[1] = pf[4]; pfres[2] = pf[8]; pfres[3] = pf[12];
pfres[4] = pf[1]; pfres[5] = pf[5]; pfres[6] = pf[9]; pfres[7] = pf[13];
pfres[8] = pf[2]; pfres[9] = pf[6]; pfres[10] = pf[10]; pfres[11] = pf[14];
pfres[12] = pf[3]; pfres[13] = pf[7]; pfres[14] = pf[11]; pfres[15] = pf[15];
pfres[0] = pf[0];
pfres[1] = pf[4];
pfres[2] = pf[8];
pfres[3] = pf[12];
pfres[4] = pf[1];
pfres[5] = pf[5];
pfres[6] = pf[9];
pfres[7] = pf[13];
pfres[8] = pf[2];
pfres[9] = pf[6];
pfres[10] = pf[10];
pfres[11] = pf[14];
pfres[12] = pf[3];
pfres[13] = pf[7];
pfres[14] = pf[11];
pfres[15] = pf[15];
return pfres;
}
@ -552,37 +628,37 @@ inline double* transpose4(const double* pf, double* pfres) { return _transpose4(
inline dReal dot2(const dReal* pf1, const dReal* pf2)
{
assert( pf1 != NULL && pf2 != NULL );
assert(pf1 != NULL && pf2 != NULL);
return pf1[0]*pf2[0] + pf1[1]*pf2[1];
}
inline dReal dot3(const dReal* pf1, const dReal* pf2)
{
assert( pf1 != NULL && pf2 != NULL );
assert(pf1 != NULL && pf2 != NULL);
return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2];
}
inline dReal dot4(const dReal* pf1, const dReal* pf2)
{
assert( pf1 != NULL && pf2 != NULL );
assert(pf1 != NULL && pf2 != NULL);
return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2] + pf1[3] * pf2[3];
}
inline dReal lengthsqr2(const dReal* pf)
{
assert( pf != NULL );
assert(pf != NULL);
return pf[0] * pf[0] + pf[1] * pf[1];
}
inline dReal lengthsqr3(const dReal* pf)
{
assert( pf != NULL );
assert(pf != NULL);
return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];
}
inline dReal lengthsqr4(const dReal* pf)
{
assert( pf != NULL );
assert(pf != NULL);
return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];
}
@ -590,7 +666,7 @@ inline dReal* normalize2(dReal* pfout, const dReal* pf)
{
assert(pf != NULL);
dReal f = pf[0]*pf[0] + pf[1]*pf[1];
dReal f = pf[0] * pf[0] + pf[1] * pf[1];
f = 1.0f / sqrtf(f);
pfout[0] = pf[0] * f;
pfout[1] = pf[1] * f;
@ -602,7 +678,7 @@ inline dReal* normalize3(dReal* pfout, const dReal* pf)
{
assert(pf != NULL);
dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2];
dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];
f = 1.0f / sqrtf(f);
pfout[0] = pf[0] * f;
@ -616,7 +692,7 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf)
{
assert(pf != NULL);
dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2] + pf[3]*pf[3];
dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];
f = 1.0f / sqrtf(f);
pfout[0] = pf[0] * f;
@ -629,22 +705,25 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf)
inline dReal* cross3(dReal* pfout, const dReal* pf1, const dReal* pf2)
{
assert( pfout != NULL && pf1 != NULL && pf2 != NULL );
assert(pfout != NULL && pf1 != NULL && pf2 != NULL);
dReal temp[3];
temp[0] = pf1[1] * pf2[2] - pf1[2] * pf2[1];
temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2];
temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0];
pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2];
pfout[0] = temp[0];
pfout[1] = temp[1];
pfout[2] = temp[2];
return pfout;
}
template <class T> inline void mult(T* pf, T fa, int r)
{
assert( pf != NULL );
assert(pf != NULL);
while(r > 0) {
while (r > 0)
{
--r;
pf[r] *= fa;
}
@ -653,25 +732,32 @@ template <class T> inline void mult(T* pf, T fa, int r)
template <class T, class S, class R>
inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
{
assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
int j, k;
if( !badd ) memset(pfres, 0, sizeof(S) * r1 * c2);
if (!badd) memset(pfres, 0, sizeof(S) * r1 * c2);
while(r1 > 0) {
while (r1 > 0)
{
--r1;
j = 0;
while(j < c2) {
while (j < c2)
{
k = 0;
while(k < c1) {
while (k < c1)
{
pfres[j] += pf1[k] * pf2[k*c2 + j];
++k;
}
++j;
}
pf1 += c1;
pfres += c2;
}
@ -681,26 +767,32 @@ inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
template <class T, class S, class R>
inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
{
assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
int i, j, k;
if( !badd ) memset(pfres, 0, sizeof(S) * c1 * c2);
if (!badd) memset(pfres, 0, sizeof(S) * c1 * c2);
i = 0;
while(i < c1) {
while (i < c1)
{
j = 0;
while(j < c2) {
while (j < c2)
{
k = 0;
while(k < r1) {
while (k < r1)
{
pfres[j] += pf1[k*c1] * pf2[k*c2 + j];
++k;
}
++j;
}
pfres += c2;
++pf1;
++i;
@ -712,25 +804,32 @@ inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
template <class T, class S, class R>
inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool badd)
{
assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
int j, k;
if( !badd ) memset(pfres, 0, sizeof(S) * r1 * r2);
if (!badd) memset(pfres, 0, sizeof(S) * r1 * r2);
while(r1 > 0) {
while (r1 > 0)
{
--r1;
j = 0;
while(j < r2) {
while (j < r2)
{
k = 0;
while(k < c1) {
while (k < c1)
{
pfres[j] += pf1[k] * pf2[j*c1 + k];
++k;
}
++j;
}
pf1 += c1;
pfres += r2;
}
@ -739,88 +838,107 @@ inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool b
template <class T> inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp)
{
assert( pf1 != NULL && pf2 != NULL );
assert(pf1 != NULL && pf2 != NULL);
int j, k;
bool bdel = false;
if( pftemp == NULL ) {
if (pftemp == NULL)
{
pftemp = new T[c];
bdel = true;
}
while(r > 0) {
while (r > 0)
{
--r;
j = 0;
while(j < c) {
while (j < c)
{
pftemp[j] = 0.0;
k = 0;
while(k < c) {
while (k < c)
{
pftemp[j] += pf1[k] * pf2[k*c + j];
++k;
}
++j;
}
memcpy(pf1, pftemp, c * sizeof(T));
pf1 += c;
}
if( bdel ) delete[] pftemp;
if (bdel) delete[] pftemp;
return pf1;
}
template <class T, class S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S* pftemp)
{
assert( pf1 != NULL && pf2 != NULL );
assert(pf1 != NULL && pf2 != NULL);
int i, j, k;
bool bdel = false;
if( pftemp == NULL ) {
if (pftemp == NULL)
{
pftemp = new S[r2];
bdel = true;
}
// do columns first
j = 0;
while(j < c2) {
while (j < c2)
{
i = 0;
while(i < r2) {
while (i < r2)
{
pftemp[i] = 0.0;
k = 0;
while(k < r2) {
while (k < r2)
{
pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j];
++k;
}
++i;
}
i = 0;
while(i < r2) {
*(pf2+i*c2+j) = pftemp[i];
while (i < r2)
{
*(pf2 + i*c2 + j) = pftemp[i];
++i;
}
++j;
}
if( bdel ) delete[] pftemp;
if (bdel) delete[] pftemp;
return pf1;
}
template <class T> inline void add(T* pf1, T* pf2, int r)
{
assert( pf1 != NULL && pf2 != NULL);
assert(pf1 != NULL && pf2 != NULL);
while(r > 0) {
while (r > 0)
{
--r;
pf1[r] += pf2[r];
}
@ -828,9 +946,10 @@ template <class T> inline void add(T* pf1, T* pf2, int r)
template <class T> inline void sub(T* pf1, T* pf2, int r)
{
assert( pf1 != NULL && pf2 != NULL);
assert(pf1 != NULL && pf2 != NULL);
while(r > 0) {
while (r > 0)
{
--r;
pf1[r] -= pf2[r];
}
@ -838,10 +957,12 @@ template <class T> inline void sub(T* pf1, T* pf2, int r)
template <class T> inline T normsqr(T* pf1, int r)
{
assert( pf1 != NULL );
assert(pf1 != NULL);
T d = 0.0;
while(r > 0) {
while (r > 0)
{
--r;
d += pf1[r] * pf1[r];
}
@ -852,7 +973,9 @@ template <class T> inline T normsqr(T* pf1, int r)
template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
{
T d = 0;
while(length > 0) {
while (length > 0)
{
--length;
d += sqr(pf1[length] - pf2[length]);
}
@ -863,7 +986,9 @@ template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
template <class T> inline T dot(T* pf1, T* pf2, int length)
{
T d = 0;
while(length > 0) {
while (length > 0)
{
--length;
d += pf1[length] * pf2[length];
}
@ -874,7 +999,9 @@ template <class T> inline T dot(T* pf1, T* pf2, int length)
template <class T> inline T sum(T* pf, int length)
{
T d = 0;
while(length > 0) {
while (length > 0)
{
--length;
d += pf[length];
}
@ -886,18 +1013,23 @@ template <class T> inline bool inv2(T* pf, T* pfres)
{
T fdet = pf[0] * pf[3] - pf[1] * pf[2];
if( fabs(fdet) < 1e-16 ) return false;
if (fabs(fdet) < 1e-16) return false;
fdet = 1 / fdet;
//if( pfdet != NULL ) *pfdet = fdet;
if( pfres != pf ) {
pfres[0] = fdet * pf[3]; pfres[1] = -fdet * pf[1];
pfres[2] = -fdet * pf[2]; pfres[3] = fdet * pf[0];
if (pfres != pf)
{
pfres[0] = fdet * pf[3];
pfres[1] = -fdet * pf[1];
pfres[2] = -fdet * pf[2];
pfres[3] = fdet * pf[0];
return true;
}
dReal ftemp = pf[0];
pfres[0] = pf[3] * fdet;
pfres[1] *= -fdet;
pfres[2] *= -fdet;