mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Part 2 of the re-formatting; ran AStyle over the headers.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2932 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
a6c4df49ea
commit
12ad5308ed
|
@ -36,6 +36,7 @@ using namespace std;
|
|||
|
||||
class GLWindow
|
||||
{
|
||||
|
||||
private:
|
||||
#ifdef GL_X11_WINDOW
|
||||
Display *glDisplay;
|
||||
|
@ -248,6 +249,7 @@ extern u8* g_pBasePS2Mem;
|
|||
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
|
||||
|
||||
// PS2 vertex
|
||||
|
||||
struct VertexGPU
|
||||
{
|
||||
// gained from XYZ2, XYZ3, XYZF2, XYZF3,
|
||||
|
@ -264,6 +266,7 @@ struct VertexGPU
|
|||
};
|
||||
|
||||
// Almost same with previous, controlled by prim.fst flagf
|
||||
|
||||
struct Vertex
|
||||
{
|
||||
u16 x, y, f, resv0; // note: xy is 12d3
|
||||
|
@ -281,7 +284,8 @@ extern int ppf;
|
|||
|
||||
// PSM values
|
||||
// PSM types == Texture Storage Format
|
||||
enum PSM_value{
|
||||
enum PSM_value
|
||||
{
|
||||
PSMCT32 = 0, // 000000
|
||||
PSMCT24 = 1, // 000001
|
||||
PSMCT16 = 2, // 000010
|
||||
|
@ -328,7 +332,8 @@ inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
|
|||
|
||||
//----------------------- Data from registers -----------------------
|
||||
|
||||
typedef union {
|
||||
typedef union
|
||||
{
|
||||
s64 SD;
|
||||
u64 UD;
|
||||
s32 SL[2];
|
||||
|
@ -340,7 +345,9 @@ typedef union {
|
|||
} reg64;
|
||||
|
||||
/* general purpose regs structs */
|
||||
typedef struct {
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int fbp;
|
||||
int fbw;
|
||||
int fbh;
|
||||
|
@ -349,7 +356,8 @@ typedef struct {
|
|||
} frameInfo;
|
||||
|
||||
// Create frame structure from known data
|
||||
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
|
||||
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm)
|
||||
{
|
||||
frameInfo frame;
|
||||
frame.fbp = fbp;
|
||||
frame.fbw = fbw;
|
||||
|
@ -359,11 +367,14 @@ inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
|
|||
return frame;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
u16 prim;
|
||||
|
||||
union {
|
||||
struct {
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u16 iip : 1;
|
||||
u16 tme : 1;
|
||||
u16 fge : 1;
|
||||
|
@ -380,8 +391,10 @@ typedef struct {
|
|||
|
||||
extern primInfo *prim;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 ate : 1;
|
||||
u32 atst : 3;
|
||||
u32 aref : 8;
|
||||
|
@ -395,13 +408,15 @@ typedef union {
|
|||
u32 _val;
|
||||
} pixTest;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int bp;
|
||||
int bw;
|
||||
int psm;
|
||||
} bufInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int tbp0;
|
||||
int tbw;
|
||||
int cbp;
|
||||
|
@ -432,13 +447,17 @@ union tex_0_info
|
|||
u64 csa : 5;
|
||||
u64 cld : 3;
|
||||
};
|
||||
|
||||
u64 _u64;
|
||||
u32 _u32[2];
|
||||
u16 _u16[4];
|
||||
u8 _u8[8];
|
||||
tex_0_info(u64 data) { _u64 = data; }
|
||||
|
||||
tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; }
|
||||
|
||||
tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; }
|
||||
|
||||
u32 tbw_mult()
|
||||
{
|
||||
if (tbw == 0)
|
||||
|
@ -446,26 +465,34 @@ union tex_0_info
|
|||
else
|
||||
return ((u32)tbw << 6);
|
||||
}
|
||||
|
||||
u32 psm_fix()
|
||||
{
|
||||
// printf ("psm %d\n", psm);
|
||||
if (psm == 9) return 1;
|
||||
|
||||
return psm;
|
||||
}
|
||||
|
||||
u32 tw_exp()
|
||||
{
|
||||
if (tw > 10) return (1 << 10);
|
||||
|
||||
return (1 << tw);
|
||||
}
|
||||
|
||||
u32 th_exp()
|
||||
{
|
||||
if (th > 10) return (1 << 10);
|
||||
|
||||
return (1 << th);
|
||||
}
|
||||
|
||||
u32 cpsm_fix()
|
||||
{
|
||||
return cpsm & 0xe;
|
||||
}
|
||||
|
||||
u32 csa_fix()
|
||||
{
|
||||
if (cpsm < 2)
|
||||
|
@ -480,7 +507,8 @@ union tex_0_info
|
|||
#define TEX_HIGHLIGHT 2
|
||||
#define TEX_HIGHLIGHT2 3
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int lcm;
|
||||
int mxl;
|
||||
int mmag;
|
||||
|
@ -490,7 +518,8 @@ typedef struct {
|
|||
int k;
|
||||
} tex1Info;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int wms;
|
||||
int wmt;
|
||||
int minu;
|
||||
|
@ -499,24 +528,28 @@ typedef struct {
|
|||
int maxv;
|
||||
} clampInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int cbw;
|
||||
int cou;
|
||||
int cov;
|
||||
} clutInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int tbp[3];
|
||||
int tbw[3];
|
||||
} miptbpInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
u16 aem;
|
||||
u8 ta[2];
|
||||
float fta[2];
|
||||
} texaInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int sx;
|
||||
int sy;
|
||||
int dx;
|
||||
|
@ -524,9 +557,12 @@ typedef struct {
|
|||
int dir;
|
||||
} trxposInfo;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u8 a : 2;
|
||||
u8 b : 2;
|
||||
u8 c : 2;
|
||||
|
@ -538,17 +574,20 @@ typedef struct {
|
|||
u8 fix : 8;
|
||||
} alphaInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
u16 zbp; // u16 address / 64
|
||||
u8 psm;
|
||||
u8 zmsk;
|
||||
} zbufInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int fba;
|
||||
} fbaInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
Vertex gsvertex[3];
|
||||
u32 rgba;
|
||||
float q;
|
||||
|
@ -608,21 +647,26 @@ static __forceinline u32 RGBA16to32(u16 c)
|
|||
|
||||
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
|
||||
// f is a u16
|
||||
static __forceinline u16 Float16ToBYTE(u16 f) {
|
||||
static __forceinline u16 Float16ToBYTE(u16 f)
|
||||
{
|
||||
//assert( !(f & 0x8000) );
|
||||
if (f & 0x8000) return 0;
|
||||
|
||||
u16 d = ((((f & 0x3ff) | 0x400) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
|
||||
|
||||
return d > 255 ? 255 : d;
|
||||
}
|
||||
|
||||
static __forceinline u16 Float16ToALPHA(u16 f) {
|
||||
static __forceinline u16 Float16ToALPHA(u16 f)
|
||||
{
|
||||
//assert( !(f & 0x8000) );
|
||||
if (f & 0x8000) return 0;
|
||||
|
||||
// round up instead of down (crash and burn), too much and charlie breaks
|
||||
u16 d = (((((f & 0x3ff) | 0x400)) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
|
||||
|
||||
d = (d) >> 1;
|
||||
|
||||
return d > 255 ? 255 : d;
|
||||
}
|
||||
|
||||
|
@ -651,11 +695,13 @@ static __forceinline u16 Float16ToALPHA(u16 f) {
|
|||
inline float Clamp(float fx, float fmin, float fmax)
|
||||
{
|
||||
if (fx < fmin) return fmin;
|
||||
|
||||
return fx > fmax ? fmax : fx;
|
||||
}
|
||||
|
||||
// PSMT16, 16S have shorter color per pixel, also cluted textures with half storage.
|
||||
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
|
||||
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0)
|
||||
{
|
||||
if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1))
|
||||
return true;
|
||||
else
|
||||
|
@ -686,6 +732,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
|
|||
{
|
||||
//return text_0_info(data).tbw_mult();
|
||||
int result = ZZOglGet_tbw_TexBits(data);
|
||||
|
||||
if (result == 0)
|
||||
return 64;
|
||||
else
|
||||
|
@ -706,7 +753,9 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
|
|||
//return tex_0_info(data).psm_fix();
|
||||
int result = ZZOglGet_psm_TexBits(data) ;
|
||||
// printf ("result %d\n", result);
|
||||
|
||||
if (result == 9) result = 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -723,7 +772,9 @@ static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data)
|
|||
{
|
||||
//return tex_0_info(data).tw_exp();
|
||||
u16 result = ZZOglGet_tw_TexBits(data);
|
||||
|
||||
if (result > 10) result = 10;
|
||||
|
||||
return (1 << result);
|
||||
}
|
||||
|
||||
|
@ -741,7 +792,9 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
|
|||
{
|
||||
//return tex_0_info(dataLO, dataHI).th_exp();
|
||||
u16 result = ZZOglGet_th_TexBits(dataLO, dataHI);
|
||||
|
||||
if (result > 10) result = 10;
|
||||
|
||||
return (1 << result);
|
||||
}
|
||||
|
||||
|
@ -811,73 +864,75 @@ static __forceinline u8 ZZOglGet_cld_TexBits(u32 data)
|
|||
//-------------------------- frames
|
||||
// FrameInfo bits.
|
||||
// Obtain fbp -- frame Buffer Base Pointer (Word Address/2048) -- from data. Bits 0-15
|
||||
inline int
|
||||
ZZOglGet_fbp_FrameBits(u32 data) {
|
||||
inline int ZZOglGet_fbp_FrameBits(u32 data)
|
||||
{
|
||||
return ((data) & 0x1ff);
|
||||
}
|
||||
|
||||
// So we got adress / 64, henceby frame fbp and tex tbp have the same dimension -- "real adress" is x64.
|
||||
inline int
|
||||
ZZOglGet_fbp_FrameBitsMult(u32 data) {
|
||||
// So we got address / 64, henceby frame fbp and tex tbp have the same dimension -- "real address" is x64.
|
||||
inline int ZZOglGet_fbp_FrameBitsMult(u32 data)
|
||||
{
|
||||
return (ZZOglGet_fbp_FrameBits(data) << 5);
|
||||
}
|
||||
|
||||
// Obtain fbw -- width (Texels/64) -- from data. Bits 16-23
|
||||
inline int
|
||||
ZZOglGet_fbw_FrameBits(u32 data) {
|
||||
inline int ZZOglGet_fbw_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 16) & 0x3f);
|
||||
}
|
||||
|
||||
inline int
|
||||
ZZOglGet_fbw_FrameBitsMult(u32 data) {
|
||||
inline int ZZOglGet_fbw_FrameBitsMult(u32 data)
|
||||
{
|
||||
return (ZZOglGet_fbw_FrameBits(data) << 6);
|
||||
}
|
||||
|
||||
|
||||
// Obtain psm -- Pixel Storage Format -- from data. Bits 24-29.
|
||||
// (data & 0x3f000000) >> 24
|
||||
inline int
|
||||
ZZOglGet_psm_FrameBits(u32 data) {
|
||||
inline int ZZOglGet_psm_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 24) & 0x3f);
|
||||
}
|
||||
|
||||
// Function for calculating overal height from frame data.
|
||||
inline int
|
||||
ZZOgl_fbh_Calc (int fbp, int fbw, int psm) {
|
||||
inline int ZZOgl_fbh_Calc(int fbp, int fbw, int psm)
|
||||
{
|
||||
int fbh = (1024 * 1024 - 64 * fbp) / fbw;
|
||||
fbh &= ~0x1f;
|
||||
if (PSMT_ISHALF(psm))
|
||||
fbh *= 2;
|
||||
if (fbh > 1024)
|
||||
fbh = 1024;
|
||||
|
||||
if (PSMT_ISHALF(psm)) fbh *= 2;
|
||||
if (fbh > 1024) fbh = 1024;
|
||||
|
||||
return fbh ;
|
||||
}
|
||||
inline int
|
||||
ZZOgl_fbh_Calc (frameInfo frame) {
|
||||
|
||||
inline int ZZOgl_fbh_Calc(frameInfo frame)
|
||||
{
|
||||
return ZZOgl_fbh_Calc(frame.fbp, frame.fbw, frame.psm);
|
||||
}
|
||||
|
||||
// Calculate fbh from data, It does not set in register
|
||||
inline int
|
||||
ZZOglGet_fbh_FrameBitsCalc (u32 data) {
|
||||
inline int ZZOglGet_fbh_FrameBitsCalc(u32 data)
|
||||
{
|
||||
int fbh = 0;
|
||||
int fbp = ZZOglGet_fbp_FrameBits(data);
|
||||
int fbw = ZZOglGet_fbw_FrameBits(data);
|
||||
int psm = ZZOglGet_psm_FrameBits(data);
|
||||
if (fbw > 0)
|
||||
fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
|
||||
|
||||
if (fbw > 0) fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
|
||||
|
||||
return fbh ;
|
||||
}
|
||||
|
||||
// Obtain fbm -- frame mask -- from data. All higher word.
|
||||
inline u32
|
||||
ZZOglGet_fbm_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbm_FrameBits(u32 data)
|
||||
{
|
||||
return (data);
|
||||
}
|
||||
|
||||
// Obtain fbm -- frame mask -- from data. All higher word. Fixed from psm == PCMT24 (without alpha)
|
||||
inline u32
|
||||
ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
|
||||
inline u32 ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI)
|
||||
{
|
||||
if (PSMT_BITMODE(ZZOglGet_psm_FrameBits(dataLO)) == 1)
|
||||
return (dataHI | 0xff000000);
|
||||
else
|
||||
|
@ -885,53 +940,51 @@ ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
|
|||
}
|
||||
|
||||
// obtain colormask RED
|
||||
inline u32
|
||||
ZZOglGet_fbmRed_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmRed_FrameBits(u32 data)
|
||||
{
|
||||
return (data & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Green
|
||||
inline u32
|
||||
ZZOglGet_fbmGreen_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmGreen_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 8) & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Blue
|
||||
inline u32
|
||||
ZZOglGet_fbmBlue_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmBlue_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 16) & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Alpha
|
||||
inline u32
|
||||
ZZOglGet_fbmAlpha_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmAlpha_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 24) & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Alpha
|
||||
inline u32
|
||||
ZZOglGet_fbmHighByte(u32 data) {
|
||||
inline u32 ZZOglGet_fbmHighByte(u32 data)
|
||||
{
|
||||
return (!!(data & 0x80000000));
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------- tex0 comparison
|
||||
// Check if old and new tex0 registers have only clut difference
|
||||
inline bool
|
||||
ZZOglAllExceptClutIsSame( u32* oldtex, u32* newtex) {
|
||||
inline bool ZZOglAllExceptClutIsSame(u32* oldtex, u32* newtex)
|
||||
{
|
||||
return ((oldtex[0] == newtex[0]) && ((oldtex[1] & 0x1f) == (newtex[1] & 0x1f)));
|
||||
}
|
||||
|
||||
// Check if the CLUT registers are same, except CLD
|
||||
inline bool
|
||||
ZZOglClutMinusCLDunchanged( u32* oldtex, u32* newtex) {
|
||||
inline bool ZZOglClutMinusCLDunchanged(u32* oldtex, u32* newtex)
|
||||
{
|
||||
return ((oldtex[1] & 0x1fffffe0) == (newtex[1] & 0x1fffffe0));
|
||||
}
|
||||
|
||||
// Check if CLUT storage mode is not changed (CSA, CSM and CSPM)
|
||||
inline bool
|
||||
ZZOglClutStorageUnchanged( u32* oldtex, u32* newtex) {
|
||||
inline bool ZZOglClutStorageUnchanged(u32* oldtex, u32* newtex)
|
||||
{
|
||||
return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000));
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ union GIFTag
|
|||
{
|
||||
u64 ai64[2];
|
||||
u32 ai32[4];
|
||||
|
||||
struct
|
||||
{
|
||||
u32 NLOOP : 15;
|
||||
|
@ -51,6 +52,7 @@ union GIFTag
|
|||
u32 NREG : 4;
|
||||
u64 REGS : 64;
|
||||
};
|
||||
|
||||
void set(u32 *data)
|
||||
{
|
||||
for (int i = 0; i <= 3; i++)
|
||||
|
@ -58,10 +60,12 @@ union GIFTag
|
|||
ai32[i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
GIFTag(u32 *data)
|
||||
{
|
||||
set(data);
|
||||
}
|
||||
|
||||
GIFTag(){ ai64[0] = 0; ai64[1] = 0; }
|
||||
};
|
||||
|
||||
|
@ -101,7 +105,6 @@ typedef struct
|
|||
// Hmm....
|
||||
nreg = tag.NREG << 2;
|
||||
if (nreg == 0) nreg = 64;
|
||||
|
||||
regs = tag.REGS;
|
||||
reg = 0;
|
||||
|
||||
|
@ -124,13 +127,12 @@ typedef struct
|
|||
reg = 0;
|
||||
nloop--;
|
||||
|
||||
if (nloop == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (nloop == 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
void setTag(u32 *data)
|
||||
{
|
||||
|
@ -158,7 +160,6 @@ typedef struct
|
|||
regs = *(u64 *)(data + 2);
|
||||
regn = 0;
|
||||
if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
|
||||
|
||||
break;
|
||||
|
||||
case GIF_FLG_REGLIST:
|
||||
|
@ -167,6 +168,7 @@ typedef struct
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
} pathInfo;
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ const int BLOCK_TEXHEIGHT = 512;
|
|||
|
||||
extern PCSX2_ALIGNED16(u32 tempblock[64]);
|
||||
|
||||
|
||||
typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
|
||||
typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
|
||||
|
@ -53,6 +52,7 @@ enum Psm_Size
|
|||
|
||||
// Both of the following structs should probably be local class variables or in a namespace,
|
||||
// but this works for the moment.
|
||||
|
||||
struct TransferData
|
||||
{
|
||||
// Signed because Visual C++ is weird.
|
||||
|
@ -88,6 +88,7 @@ struct TransferFuncts
|
|||
};
|
||||
|
||||
// rest not visible externally
|
||||
|
||||
struct BLOCK
|
||||
{
|
||||
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
|
||||
|
@ -278,7 +279,9 @@ static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32
|
|||
{
|
||||
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
|
@ -305,6 +308,7 @@ static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 b
|
|||
{
|
||||
u32 addr = getPixelAddress4(x, y, bp, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
@ -330,7 +334,9 @@ static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32
|
|||
{
|
||||
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z(x, y, bp, bw);
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
|
@ -380,9 +386,11 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
|
|||
{
|
||||
u32 addr = getPixelAddress4(x, y, bp, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)
|
||||
return pix >> 4;
|
||||
else return pix & 0xf;
|
||||
else
|
||||
return pix & 0xf;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
|
@ -432,7 +440,9 @@ static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u3
|
|||
{
|
||||
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
|
@ -459,6 +469,7 @@ static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32
|
|||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
@ -484,7 +495,9 @@ static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u
|
|||
{
|
||||
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
|
@ -534,6 +547,7 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
|
|||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)
|
||||
return pix >> 4;
|
||||
else
|
||||
|
|
|
@ -15,6 +15,7 @@ template <class T>
|
|||
static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
assert((nSize % widthlimit) == 0 && widthlimit <= 4);
|
||||
|
||||
if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
|
||||
{
|
||||
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
|
@ -28,6 +29,7 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
|
||||
|
@ -65,6 +67,7 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -97,6 +100,7 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
|
|||
else
|
||||
{
|
||||
assert(/*(nSize%widthlimit) == 0 &&*/ widthlimit == 8);
|
||||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
|
||||
|
@ -129,11 +133,14 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
|
|||
tempX += nSize / 3;
|
||||
nSize = 0;
|
||||
}
|
||||
|
||||
assert(gs.imageTransfer == -1 || nSize == 0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -149,6 +156,7 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthli
|
|||
wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
|
||||
if (widthlimit > 2)
|
||||
{
|
||||
wp(pstart, (tempX + 2) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
|
@ -181,6 +189,7 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthli
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -189,11 +198,18 @@ template <class T>
|
|||
{
|
||||
switch (data.psm)
|
||||
{
|
||||
case PSM_: return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
|
||||
case PSM_4_: return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
|
||||
case PSM_24_: return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
|
||||
case PSM_:
|
||||
return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
|
||||
|
||||
case PSM_4_:
|
||||
return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
|
||||
|
||||
case PSM_24_:
|
||||
return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -206,8 +222,10 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
|
|||
{
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
buf += pitch - fracX;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -221,8 +239,10 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
|
|||
{
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
buf += 3 * (pitch - fracX);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -237,8 +257,10 @@ static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthli
|
|||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
buf += (pitch - fracX) / 2;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -247,11 +269,18 @@ template <class T>
|
|||
{
|
||||
switch (data.psm)
|
||||
{
|
||||
case PSM_: return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
case PSM_4_: return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
case PSM_24_: return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
case PSM_:
|
||||
return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
|
||||
case PSM_4_:
|
||||
return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
|
||||
case PSM_24_:
|
||||
return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -82,7 +82,8 @@ static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align)
|
|||
|
||||
static __forceinline void pcsx2_aligned_free(void* pmem)
|
||||
{
|
||||
if( pmem != NULL ) {
|
||||
if (pmem != NULL)
|
||||
{
|
||||
char* p = (char*)pmem;
|
||||
free(p - (int)*(u16*)(p - 2));
|
||||
}
|
||||
|
@ -113,20 +114,24 @@ struct RECT
|
|||
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x, y, w, h;
|
||||
} Rect;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x, y;
|
||||
} Point;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x0, y0;
|
||||
int x1, y1;
|
||||
} Rect2;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x, y, c;
|
||||
} PointC;
|
||||
|
||||
|
@ -145,6 +150,7 @@ typedef struct {
|
|||
#define GSOPTION_LOADED 0x8000
|
||||
|
||||
//Configuration values.
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u8 mrtdepth; // write color in render target
|
||||
|
@ -310,10 +316,12 @@ static __forceinline u64 GetTickFrequency()
|
|||
|
||||
static __forceinline u64 GetCPUTicks()
|
||||
{
|
||||
|
||||
struct timeval t;
|
||||
gettimeofday(&t, NULL);
|
||||
return ((u64)t.tv_sec*GetTickFrequency()) + t.tv_usec;
|
||||
}
|
||||
|
||||
#else
|
||||
static __aligned16 LARGE_INTEGER lfreq;
|
||||
|
||||
|
@ -333,34 +341,39 @@ static __forceinline u64 GetCPUTicks()
|
|||
QueryPerformanceCounter(&count);
|
||||
return count.QuadPart;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
|
||||
class CInterfacePtr
|
||||
{
|
||||
|
||||
public:
|
||||
inline CInterfacePtr() : ptr(NULL) {}
|
||||
inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if (ptr != NULL) ptr->AddRef(); }
|
||||
inline ~CInterfacePtr() { if (ptr != NULL) ptr->Release(); }
|
||||
|
||||
inline T* operator*() { assert(ptr != NULL); return *ptr; }
|
||||
inline T* operator->() { return ptr; }
|
||||
inline T* get() { return ptr; }
|
||||
|
||||
inline void release() {
|
||||
inline void release()
|
||||
{
|
||||
if (ptr != NULL) { ptr->Release(); ptr = NULL; }
|
||||
}
|
||||
|
||||
inline operator T*() { return ptr; }
|
||||
|
||||
inline bool operator==(T* rhs) { return ptr == rhs; }
|
||||
inline bool operator!=(T* rhs) { return ptr != rhs; }
|
||||
|
||||
inline CInterfacePtr& operator= (T* newptr) {
|
||||
inline CInterfacePtr& operator= (T* newptr)
|
||||
{
|
||||
if (ptr != NULL) ptr->Release();
|
||||
|
||||
ptr = newptr;
|
||||
|
||||
if (ptr != NULL) ptr->AddRef();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -391,6 +404,7 @@ public:
|
|||
|
||||
class DVProfileFunc
|
||||
{
|
||||
|
||||
public:
|
||||
u32 dwUserData;
|
||||
static __forceinline DVProfileFunc(char* pname) {}
|
||||
|
|
|
@ -584,7 +584,7 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
|
|||
return false;
|
||||
}
|
||||
|
||||
// First try to draw frame from targets. It's
|
||||
// First try to draw frame from targets.
|
||||
inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
|
||||
{
|
||||
// get the start and end addresses of the buffer
|
||||
|
@ -662,8 +662,14 @@ inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
|
|||
// The same as the previous, but from memory.
|
||||
// If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
|
||||
// this is the function that does it.
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int interlace, int bInterlace)
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
|
||||
{
|
||||
// get the start and end addresses of the buffer
|
||||
int bpp = RenderGetBpp(texframe.psm);
|
||||
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
|
||||
|
||||
int start, end;
|
||||
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
|
||||
|
||||
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
|
||||
{
|
||||
|
@ -676,23 +682,23 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
|
|||
if ((pmemtarg == NULL) || (bInterlace >= 2))
|
||||
ZZLog::Error_Log("CRCR Check for memory shader fault.");
|
||||
|
||||
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
|
||||
|
||||
SetShaderCaller("RenderCheckForMemory");
|
||||
|
||||
SetTexVariablesInt(0, g_bCRTCBilinear ? 2 : 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
|
||||
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
|
||||
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
|
||||
|
||||
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
|
||||
|
||||
// finally render from the memory (note that the stencil buffer will keep previous regions)
|
||||
Vector v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
|
||||
Vector v;
|
||||
|
||||
// Fixme: Why is this here?
|
||||
// We should probably call RenderSetTargetBitTex instead.
|
||||
if (g_bCRTCBilinear)
|
||||
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(texframe.tw, texframe.th, -0.5f, -0.5f), "g_fBitBltTex");
|
||||
v = RenderSetTargetBitTex(texframe.tw, texframe.th, -0.5f, -0.5f, INTERLACE_COUNT);
|
||||
else
|
||||
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th), "g_fBitBltTex");
|
||||
v = RenderSetTargetBitTex(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th, INTERLACE_COUNT);
|
||||
|
||||
// finally render from the memory (note that the stencil buffer will keep previous regions)
|
||||
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
|
||||
|
||||
v = RenderSetTargetBitTrans(texframe.th);
|
||||
|
||||
|
@ -700,10 +706,12 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
|
|||
|
||||
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
|
||||
|
||||
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
|
||||
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
|
||||
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
|
||||
|
||||
SETPIXELSHADER(ppsCRTC[bInterlace].prog);
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
}
|
||||
|
||||
|
@ -909,7 +917,6 @@ void ZeroGS::RenderCRTC(int interlace)
|
|||
// start from the last circuit
|
||||
for (int i = !PMODE->SLBG; i >= 0; --i)
|
||||
{
|
||||
|
||||
tex0Info& texframe = dispinfo[i];
|
||||
|
||||
if (texframe.th <= 1) continue;
|
||||
|
@ -928,7 +935,7 @@ void ZeroGS::RenderCRTC(int interlace)
|
|||
|
||||
// if we could not draw image from target's do it from memory
|
||||
if (!RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace))
|
||||
RenderCheckForMemory(texframe, listTargs, interlace, bInterlace);
|
||||
RenderCheckForMemory(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
|
|
@ -337,7 +337,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
|
|||
}
|
||||
|
||||
// After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made.
|
||||
inline void ZeroGS::VB::CheckFrame16vs32Convesion()
|
||||
inline void ZeroGS::VB::CheckFrame16vs32Conversion()
|
||||
{
|
||||
if (prndr->status & CRenderTarget::TS_NeedConvert32)
|
||||
{
|
||||
|
@ -393,7 +393,7 @@ void ZeroGS::VB::CheckFrame(int tbp)
|
|||
|
||||
bChanged = CheckFrameResolveRender(tbp);
|
||||
|
||||
CheckFrame16vs32Convesion();
|
||||
CheckFrame16vs32Conversion();
|
||||
}
|
||||
else if (bNeedZCheck)
|
||||
{
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
#ifndef RasterFont_Header
|
||||
#define RasterFont_Header
|
||||
|
||||
class RasterFont {
|
||||
class RasterFont
|
||||
{
|
||||
|
||||
protected:
|
||||
int fontOffset;
|
||||
|
||||
|
|
|
@ -22,23 +22,27 @@
|
|||
#define TARGET_VIRTUAL_KEY 0x80000000
|
||||
#include "PS2Edefs.h"
|
||||
|
||||
inline Vector DefaultOneColor( FRAGMENTSHADER ptr ) {
|
||||
inline Vector DefaultOneColor(FRAGMENTSHADER ptr)
|
||||
{
|
||||
Vector v = Vector(1, 1, 1, 1);
|
||||
cgGLSetParameter4fv(ptr.sOneColor, v);
|
||||
return v ;
|
||||
}
|
||||
|
||||
namespace ZeroGS {
|
||||
namespace ZeroGS
|
||||
{
|
||||
|
||||
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb);
|
||||
|
||||
// manages render targets
|
||||
|
||||
class CRenderTargetMngr
|
||||
{
|
||||
public:
|
||||
typedef map<u32, CRenderTarget*> MAPTARGETS;
|
||||
|
||||
enum TargetOptions {
|
||||
enum TargetOptions
|
||||
{
|
||||
TO_DepthBuffer = 1,
|
||||
TO_StrictHeight = 2, // height returned has to be the same as requested
|
||||
TO_Virtual = 4
|
||||
|
@ -50,7 +54,8 @@ namespace ZeroGS {
|
|||
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
|
||||
|
||||
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
|
||||
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) {
|
||||
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
|
||||
{
|
||||
MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb));
|
||||
|
||||
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
|
||||
|
@ -68,7 +73,8 @@ namespace ZeroGS {
|
|||
|
||||
// resolves all targets within a range
|
||||
__forceinline void Resolve(int start, int end);
|
||||
__forceinline void ResolveAll() {
|
||||
__forceinline void ResolveAll()
|
||||
{
|
||||
for (MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it)
|
||||
it->second->Resolve();
|
||||
}
|
||||
|
@ -77,7 +83,8 @@ namespace ZeroGS {
|
|||
void DestroyIntersecting(CRenderTarget* prndr);
|
||||
|
||||
// promotes a target from virtual to real
|
||||
inline CRenderTarget* Promote(u32 key) {
|
||||
inline CRenderTarget* Promote(u32 key)
|
||||
{
|
||||
assert(!(key & TARGET_VIRTUAL_KEY));
|
||||
|
||||
// promote to regular targ
|
||||
|
@ -90,7 +97,9 @@ namespace ZeroGS {
|
|||
DestroyIntersecting(ptarg);
|
||||
|
||||
it = mapTargets.find(key);
|
||||
if( it != mapTargets.end() ) {
|
||||
|
||||
if (it != mapTargets.end())
|
||||
{
|
||||
DestroyTarg(it->second);
|
||||
it->second = ptarg;
|
||||
}
|
||||
|
@ -101,6 +110,7 @@ namespace ZeroGS {
|
|||
ptarg->status = CRenderTarget::TS_Resolved;
|
||||
else
|
||||
ptarg->status = CRenderTarget::TS_NeedUpdate;
|
||||
|
||||
return ptarg;
|
||||
}
|
||||
|
||||
|
@ -111,8 +121,10 @@ namespace ZeroGS {
|
|||
|
||||
class CMemoryTargetMngr
|
||||
{
|
||||
|
||||
public:
|
||||
CMemoryTargetMngr() : curstamp(0) {}
|
||||
|
||||
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
||||
CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||
CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
||||
|
@ -138,10 +150,12 @@ namespace ZeroGS {
|
|||
void Destroy();
|
||||
|
||||
// since GetTex can delete textures to free up mem, it is dangerous if using that texture, so specify at least one other tex to save
|
||||
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) {
|
||||
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete)
|
||||
{
|
||||
map<u32, u32>::iterator it = mapTextures.find(bitvalue);
|
||||
if( it != mapTextures.end() )
|
||||
return it->second;
|
||||
|
||||
if (it != mapTextures.end()) return it->second;
|
||||
|
||||
return GetTexInt(bitvalue, ptexDoNotDelete);
|
||||
}
|
||||
|
||||
|
@ -152,24 +166,31 @@ namespace ZeroGS {
|
|||
};
|
||||
|
||||
// manages
|
||||
|
||||
class CRangeManager
|
||||
{
|
||||
public:
|
||||
CRangeManager() {
|
||||
CRangeManager()
|
||||
{
|
||||
ranges.reserve(16);
|
||||
}
|
||||
|
||||
// [start, end)
|
||||
struct RANGE {
|
||||
|
||||
struct RANGE
|
||||
{
|
||||
RANGE() {}
|
||||
|
||||
inline RANGE(int start, int end) : start(start), end(end) {}
|
||||
|
||||
int start, end;
|
||||
};
|
||||
|
||||
// works in semi logN
|
||||
void Insert(int start, int end);
|
||||
void RangeSanityCheck();
|
||||
inline void Clear() {
|
||||
inline void Clear()
|
||||
{
|
||||
ranges.resize(0);
|
||||
}
|
||||
|
||||
|
@ -183,7 +204,8 @@ namespace ZeroGS {
|
|||
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
|
||||
|
||||
// Real rendered width, depends on AA and AAneg.
|
||||
inline int RW(int tbw) {
|
||||
inline int RW(int tbw)
|
||||
{
|
||||
if (s_AAx >= s_AAz)
|
||||
return (tbw << (s_AAx - s_AAz));
|
||||
else
|
||||
|
@ -191,7 +213,8 @@ namespace ZeroGS {
|
|||
}
|
||||
|
||||
// Real rendered height, depends on AA and AAneg.
|
||||
inline int RH(int tbh) {
|
||||
inline int RH(int tbh)
|
||||
{
|
||||
if (s_AAy >= s_AAw)
|
||||
return (tbh << (s_AAy - s_AAw));
|
||||
else
|
||||
|
@ -204,7 +227,8 @@ namespace ZeroGS {
|
|||
}*/
|
||||
|
||||
// This pattern of functions is called 3 times, so I add creating Targets list into one.
|
||||
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end) {
|
||||
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
|
||||
{
|
||||
list<ZeroGS::CRenderTarget*> listTargs;
|
||||
s_DepthRTs.GetTargs(start, end, listTargs);
|
||||
s_RTs.GetTargs(start, end, listTargs);
|
||||
|
@ -224,24 +248,30 @@ namespace ZeroGS {
|
|||
|
||||
// Code width and height of frame into key, that used in targetmanager
|
||||
// This is 3 variants of one function, Key dependant on fbp and fbw.
|
||||
inline u32 GetFrameKey (const frameInfo& frame) {
|
||||
inline u32 GetFrameKey(const frameInfo& frame)
|
||||
{
|
||||
return (((frame.fbw) << 16) | (frame.fbp));
|
||||
}
|
||||
inline u32 GetFrameKey ( CRenderTarget* frame ) {
|
||||
|
||||
inline u32 GetFrameKey(CRenderTarget* frame)
|
||||
{
|
||||
return (((frame->fbw) << 16) | (frame->fbp));
|
||||
}
|
||||
|
||||
inline u32 GetFrameKey (int fbp, int fbw, VB& curvb) {
|
||||
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb)
|
||||
{
|
||||
return (((fbw) << 16) | (fbp));
|
||||
}
|
||||
|
||||
inline u16 ShiftHeight (int fbh, int fbp, int fbhCalc) {
|
||||
inline u16 ShiftHeight(int fbh, int fbp, int fbhCalc)
|
||||
{
|
||||
return fbh;
|
||||
}
|
||||
|
||||
//FIXME: this code for P4 ad KH1. It should not be such strange!
|
||||
//FIXME: this code is for P4 and KH1. It should not be so strange!
|
||||
//Dummy targets was deleted from mapTargets, but not erased.
|
||||
inline u32 GetFrameKeyDummy (const frameInfo& frame) {
|
||||
inline u32 GetFrameKeyDummy(const frameInfo& frame)
|
||||
{
|
||||
// if (frame.fbp > 0x2000 && ZZOgl_fbh_Calc(frame) < 0x400 && ZZOgl_fbh_Calc(frame) != frame.fbh)
|
||||
// printf ("Z %x %x %x %x\n", frame.fbh, frame.fbhCalc, frame.fbp, ZZOgl_fbh_Calc(frame));
|
||||
// height over 1024 would shrink to 1024, so dummy targets with calculated size more than 0x400 should be
|
||||
|
@ -252,7 +282,8 @@ inline u32 GetFrameKeyDummy (const frameInfo& frame) {
|
|||
return (((frame.fbw) << 16) | frame.fbh);
|
||||
}
|
||||
|
||||
inline u32 GetFrameKeyDummy ( CRenderTarget* frame ) {
|
||||
inline u32 GetFrameKeyDummy(CRenderTarget* frame)
|
||||
{
|
||||
if (/*frame->fbp > 0x2000 && */ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm) < 0x300)
|
||||
return (((frame->fbw) << 16) | ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm));
|
||||
else
|
||||
|
|
|
@ -143,11 +143,13 @@ using namespace std;
|
|||
extern const char* ShaderCallerName;
|
||||
extern const char* ShaderHandleName;
|
||||
|
||||
inline void SetShaderCaller(const char* Name) {
|
||||
inline void SetShaderCaller(const char* Name)
|
||||
{
|
||||
ShaderCallerName = Name;
|
||||
}
|
||||
|
||||
inline void SetHandleName(const char* Name) {
|
||||
inline void SetHandleName(const char* Name)
|
||||
{
|
||||
ShaderHandleName = Name;
|
||||
}
|
||||
|
||||
|
@ -180,33 +182,43 @@ const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
|
|||
|
||||
//------------------------ Inlines -------------------------
|
||||
|
||||
inline const char *error_name(int err) {
|
||||
switch (err) {
|
||||
inline const char *error_name(int err)
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case GL_NO_ERROR:
|
||||
return "GL_NO_ERROR";
|
||||
|
||||
case GL_INVALID_ENUM:
|
||||
return "GL_INVALID_ENUM";
|
||||
|
||||
case GL_INVALID_VALUE:
|
||||
return "GL_INVALID_VALUE";
|
||||
|
||||
case GL_INVALID_OPERATION:
|
||||
return "GL_INVALID_OPERATION";
|
||||
|
||||
case GL_STACK_OVERFLOW:
|
||||
return "GL_STACK_OVERFLOW";
|
||||
|
||||
case GL_STACK_UNDERFLOW:
|
||||
return "GL_STACK_UNDERFLOW";
|
||||
|
||||
case GL_OUT_OF_MEMORY:
|
||||
return "GL_OUT_OF_MEMORY";
|
||||
|
||||
case GL_TABLE_TOO_LARGE:
|
||||
return "GL_TABLE_TOO_LARGE";
|
||||
|
||||
default:
|
||||
return "Unknown GL error";
|
||||
}
|
||||
}
|
||||
|
||||
// inline for extemely ofthen used sequence
|
||||
// inline for an extemely often used sequence
|
||||
// This is turning off all gl functions. Safe to do updates.
|
||||
inline void
|
||||
DisableAllgl () {
|
||||
inline void DisableAllgl()
|
||||
{
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_ALPHA_TEST);
|
||||
|
@ -217,12 +229,12 @@ DisableAllgl () {
|
|||
}
|
||||
|
||||
// Calculate maximum height for target
|
||||
inline int
|
||||
get_maxheight(int fbp, int fbw, int psm)
|
||||
inline int get_maxheight(int fbp, int fbw, int psm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (fbw == 0) return 0;
|
||||
|
||||
if (PSMT_ISHALF(psm))
|
||||
ret = (((0x00100000 - 64 * fbp) / fbw) & ~0x1f) * 2;
|
||||
else
|
||||
|
@ -231,15 +243,15 @@ get_maxheight(int fbp, int fbw, int psm)
|
|||
return ret;
|
||||
}
|
||||
|
||||
// Does psm need Alpha test with alpha expansion
|
||||
inline int
|
||||
nNeedAlpha(u8 psm) {
|
||||
// Does psm need Alpha test with alpha expansion?
|
||||
inline int nNeedAlpha(u8 psm)
|
||||
{
|
||||
return (psm == PSMCT24 || psm == PSMCT16 || psm == PSMCT16S);
|
||||
}
|
||||
|
||||
// Get color storage model psm, that is important on flush stage.
|
||||
inline u8
|
||||
GetTexCPSM(const tex0Info& tex) {
|
||||
inline u8 GetTexCPSM(const tex0Info& tex)
|
||||
{
|
||||
if (PSMT_ISCLUT(tex.psm))
|
||||
return tex.cpsm;
|
||||
else
|
||||
|
@ -257,6 +269,7 @@ GetTexCPSM(const tex0Info& tex) {
|
|||
#endif
|
||||
|
||||
// ------------------------ Types -------------------------
|
||||
|
||||
struct FRAGMENTSHADER
|
||||
{
|
||||
FRAGMENTSHADER() : prog(0), sMemory(0), sFinal(0), sBitwiseANDX(0), sBitwiseANDY(0), sInterlace(0), sCLUT(0), sOneColor(0), sBitBltZ(0),
|
||||
|
@ -275,6 +288,7 @@ struct FRAGMENTSHADER
|
|||
{
|
||||
CGparameter p;
|
||||
p = cgGetNamedParameter(prog, name);
|
||||
|
||||
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE) var = p;
|
||||
}
|
||||
|
||||
|
@ -283,12 +297,14 @@ struct FRAGMENTSHADER
|
|||
CGparameter p;
|
||||
|
||||
p = cgGetNamedParameter(prog, name);
|
||||
|
||||
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
|
||||
{
|
||||
cgGLSetTextureParameter(p, texobj);
|
||||
cgGLEnableTextureParameter(p);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -297,11 +313,13 @@ struct FRAGMENTSHADER
|
|||
CGparameter p;
|
||||
|
||||
p = cgGetNamedParameter(prog, name);
|
||||
|
||||
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
|
||||
{
|
||||
cgConnectParameter(tex, p);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -310,12 +328,14 @@ struct FRAGMENTSHADER
|
|||
CGparameter p;
|
||||
|
||||
p = cgGetNamedParameter(prog, name);
|
||||
|
||||
if (p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE)
|
||||
{
|
||||
//cgGLEnableTextureParameter(p);
|
||||
tex = p;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -330,6 +350,7 @@ struct FRAGMENTSHADER
|
|||
cgGLSetParameter4fv(p, v);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
@ -337,9 +358,11 @@ struct FRAGMENTSHADER
|
|||
struct VERTEXSHADER
|
||||
{
|
||||
VERTEXSHADER() : prog(0), sBitBltPos(0), sBitBltTex(0) {}
|
||||
|
||||
CGprogram prog;
|
||||
CGparameter sBitBltPos, sBitBltTex, fBitBltTrans; // vertex shader constants
|
||||
};
|
||||
|
||||
// ------------------------ Variables -------------------------
|
||||
// all textures have this width
|
||||
//#define GPU_TEXWIDTH 512
|
||||
|
@ -469,7 +492,6 @@ enum GAME_HACK_OPTIONS
|
|||
|
||||
#define USEALPHATESTING (!(g_GameSettings&GAME_NOALPHATEST))
|
||||
|
||||
|
||||
// CRC Information
|
||||
enum Title_Info
|
||||
{
|
||||
|
@ -571,8 +593,6 @@ enum GAME_HACK_OPTIONS
|
|||
s32 v_thresh, t_thresh;
|
||||
};
|
||||
|
||||
// Note; all the options surrounded by /**/ are ones that were getting chosen previously because of missing break statements, and might not be appropriate.
|
||||
// I'll have to check and see if they work better with or without them.
|
||||
static const Game_Info crc_game_list[] =
|
||||
{
|
||||
{0xA3D63039, Xenosaga, JP, GAME_DOPARALLELCTX, 64, 32},
|
||||
|
@ -641,7 +661,8 @@ extern u8* g_pbyGSMemory;
|
|||
extern u8* g_pbyGSClut; // the temporary clut buffer
|
||||
extern CGparameter g_vparamPosXY[2], g_fparamFogColor;
|
||||
|
||||
namespace ZeroGS {
|
||||
namespace ZeroGS
|
||||
{
|
||||
|
||||
typedef void (*DrawFn)();
|
||||
|
||||
|
@ -652,8 +673,10 @@ namespace ZeroGS {
|
|||
};
|
||||
|
||||
// managers render-to-texture targets
|
||||
|
||||
class CRenderTarget
|
||||
{
|
||||
|
||||
public:
|
||||
CRenderTarget();
|
||||
virtual ~CRenderTarget();
|
||||
|
@ -666,7 +689,8 @@ namespace ZeroGS {
|
|||
void SetViewport();
|
||||
|
||||
// copies/creates the feedback contents
|
||||
inline void CreateFeedback() {
|
||||
inline void CreateFeedback()
|
||||
{
|
||||
if (ptexFeedback == 0 || !(status&TS_FeedbackReady))
|
||||
_CreateFeedback();
|
||||
}
|
||||
|
@ -678,6 +702,7 @@ namespace ZeroGS {
|
|||
virtual void ConvertTo16(); // converts a psm==0 target, to a psm==2
|
||||
|
||||
virtual bool IsDepth() { return false; }
|
||||
|
||||
void SetRenderTarget(int targ);
|
||||
|
||||
void* psys; // system data used for comparison
|
||||
|
@ -702,7 +727,8 @@ namespace ZeroGS {
|
|||
// this is optionally used when feedback effects are used (render target is used as a texture when rendering to itself)
|
||||
u32 ptexFeedback;
|
||||
|
||||
enum TargetStatus {
|
||||
enum TargetStatus
|
||||
{
|
||||
TS_Resolved = 1,
|
||||
TS_NeedUpdate = 2,
|
||||
TS_Virtual = 4, // currently not mapped to memory
|
||||
|
@ -712,14 +738,17 @@ namespace ZeroGS {
|
|||
};
|
||||
inline Vector DefaultBitBltPos() ;
|
||||
inline Vector DefaultBitBltTex() ;
|
||||
|
||||
private:
|
||||
void _CreateFeedback();
|
||||
inline bool InitialiseDefaultTexture(u32 *p_ptr, int fbw, int fbh) ;
|
||||
};
|
||||
|
||||
// manages zbuffers
|
||||
|
||||
class CDepthTarget : public CRenderTarget
|
||||
{
|
||||
|
||||
public:
|
||||
CDepthTarget();
|
||||
virtual ~CDepthTarget();
|
||||
|
@ -741,6 +770,7 @@ namespace ZeroGS {
|
|||
};
|
||||
|
||||
// manages contiguous chunks of memory (width is always 1024)
|
||||
|
||||
class CMemoryTarget
|
||||
{
|
||||
public:
|
||||
|
@ -748,6 +778,7 @@ namespace ZeroGS {
|
|||
{
|
||||
inline TEXTURE() : tex(0), memptr(NULL), ref(0) {}
|
||||
inline ~TEXTURE() { glDeleteTextures(1, &tex); _aligned_free(memptr); }
|
||||
|
||||
u32 tex;
|
||||
u8* memptr; // GPU memory used for comparison
|
||||
int ref;
|
||||
|
@ -755,9 +786,12 @@ namespace ZeroGS {
|
|||
|
||||
inline CMemoryTarget() : ptex(NULL), starty(0), height(0), realy(0), realheight(0), usedstamp(0), psm(0), cpsm(0), channels(0), clearminy(0), clearmaxy(0), validatecount(0) {}
|
||||
|
||||
inline CMemoryTarget(const CMemoryTarget& r) {
|
||||
inline CMemoryTarget(const CMemoryTarget& r)
|
||||
{
|
||||
ptex = r.ptex;
|
||||
|
||||
if (ptex != NULL) ptex->ref++;
|
||||
|
||||
starty = r.starty;
|
||||
height = r.height;
|
||||
realy = r.realy;
|
||||
|
@ -776,10 +810,11 @@ namespace ZeroGS {
|
|||
|
||||
~CMemoryTarget() { Destroy(); }
|
||||
|
||||
inline void Destroy() {
|
||||
if( ptex != NULL && ptex->ref > 0 ) {
|
||||
if( --ptex->ref <= 0 )
|
||||
delete ptex;
|
||||
inline void Destroy()
|
||||
{
|
||||
if (ptex != NULL && ptex->ref > 0)
|
||||
{
|
||||
if (--ptex->ref <= 0) delete ptex;
|
||||
}
|
||||
|
||||
ptex = NULL;
|
||||
|
@ -820,10 +855,13 @@ namespace ZeroGS {
|
|||
|
||||
void Destroy();
|
||||
|
||||
inline bool CheckPrim() {
|
||||
inline bool CheckPrim()
|
||||
{
|
||||
static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
|
||||
|
||||
if ((PRIMMASK & prim->_val) != (PRIMMASK & curprim._val) || primtype[prim->prim] != primtype[curprim.prim])
|
||||
return nCount > 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -852,7 +890,7 @@ namespace ZeroGS {
|
|||
inline int FindMinimalHeightConstrain(int maxpos);
|
||||
|
||||
inline int CheckFrameResolveRender(int tbp);
|
||||
inline void CheckFrame16vs32Convesion();
|
||||
inline void CheckFrame16vs32Conversion();
|
||||
inline int CheckFrameResolveDepth(int tbp);
|
||||
|
||||
inline void FlushTexUnchangedClutDontUpdate() ;
|
||||
|
@ -861,10 +899,12 @@ namespace ZeroGS {
|
|||
inline void FlushTexSetNewVars(u32 psm) ;
|
||||
|
||||
// notify VB that nVerts need to be written to pbuf
|
||||
inline void NotifyWrite(int nVerts) {
|
||||
inline void NotifyWrite(int nVerts)
|
||||
{
|
||||
assert(pBufferData != NULL && nCount <= nNumVertices && nVerts > 0);
|
||||
|
||||
if( nCount + nVerts > nNumVertices ) {
|
||||
if (nCount + nVerts > nNumVertices)
|
||||
{
|
||||
// recreate except with a bigger count
|
||||
VertexGPU* ptemp = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nNumVertices * 2, 256);
|
||||
memcpy_amd(ptemp, pBufferData, sizeof(VertexGPU) * nCount);
|
||||
|
@ -875,8 +915,10 @@ namespace ZeroGS {
|
|||
}
|
||||
}
|
||||
|
||||
void Init(int nVerts) {
|
||||
if( pBufferData == NULL && nVerts > 0 ) {
|
||||
void Init(int nVerts)
|
||||
{
|
||||
if (pBufferData == NULL && nVerts > 0)
|
||||
{
|
||||
pBufferData = (VertexGPU*)_aligned_malloc(sizeof(VertexGPU) * nVerts, 256);
|
||||
nNumVertices = nVerts;
|
||||
}
|
||||
|
@ -889,13 +931,16 @@ namespace ZeroGS {
|
|||
u8 bNeedTexCheck;
|
||||
u8 dummy0;
|
||||
|
||||
union {
|
||||
struct {
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u8 bTexConstsSync; // only pixel shader constants that context owns
|
||||
u8 bVarsTexSync; // texture info
|
||||
u8 bVarsSetTarg;
|
||||
u8 dummy1;
|
||||
};
|
||||
|
||||
u32 bSyncVars;
|
||||
};
|
||||
|
||||
|
@ -926,7 +971,8 @@ namespace ZeroGS {
|
|||
// if tcc == 0 than no alpha used, aem used for alpha expanding and I am not sure
|
||||
// that it's correct, psm -- color mode,
|
||||
inline bool
|
||||
IsAlphaTestExpansion(VB& curvb){
|
||||
IsAlphaTestExpansion(VB& curvb)
|
||||
{
|
||||
return (curvb.tex0.tcc && gs.texa.aem && nNeedAlpha(GetTexCPSM(curvb.tex0)));
|
||||
}
|
||||
|
||||
|
@ -1051,8 +1097,8 @@ namespace ZeroGS {
|
|||
void CaptureFrame();
|
||||
|
||||
// Perform clutting for flushed texture. Better check if it needs a prior call.
|
||||
inline void
|
||||
CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx) {
|
||||
inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
|
||||
{
|
||||
tex0->cbp = ZZOglGet_cbp_TexBits(Data);
|
||||
tex0->cpsm = ZZOglGet_cpsm_TexBits(Data);
|
||||
tex0->csm = ZZOglGet_csm_TexBits(Data);
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
template <class T> inline T RAD_2_DEG(T radians) { return (radians * (T)57.29577951); }
|
||||
|
||||
class Transform;
|
||||
|
||||
class TransformMatrix;
|
||||
|
||||
typedef float dReal;
|
||||
|
@ -35,6 +36,7 @@ inline dReal* inv4(const dReal* pf, dReal* pfres);
|
|||
|
||||
// class used for 3 and 4 dim vectors and quaternions
|
||||
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
|
||||
|
||||
class Vector
|
||||
{
|
||||
public:
|
||||
|
@ -45,7 +47,6 @@ public:
|
|||
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
|
||||
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
|
||||
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
|
||||
|
||||
dReal operator[](int i) const { return (&x)[i]; }
|
||||
dReal& operator[](int i) { return (&x)[i]; }
|
||||
|
||||
|
@ -56,7 +57,6 @@ public:
|
|||
// SCALAR FUNCTIONS
|
||||
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
|
||||
inline void normalize() { normalize4(&x, &x); }
|
||||
|
||||
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
|
||||
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
|
||||
inline void SetColor(u32 color)
|
||||
|
@ -69,23 +69,18 @@ public:
|
|||
// 3 dim cross product, w is not touched
|
||||
/// this = this x v
|
||||
inline void Cross(const Vector &v) { cross3(&x, &x, v); }
|
||||
|
||||
/// this = u x v
|
||||
inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
|
||||
|
||||
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
|
||||
inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
|
||||
inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
|
||||
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
|
||||
inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
|
||||
|
||||
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
|
||||
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
|
||||
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
|
||||
|
||||
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
|
||||
friend Vector operator*(float f, const Vector& v);
|
||||
//friend ostream& operator<<(ostream& O, const Vector& v);
|
||||
//friend istream& operator>>(istream& I, Vector& v);
|
||||
|
@ -113,16 +108,20 @@ struct OBB
|
|||
struct TRIANGLE
|
||||
{
|
||||
TRIANGLE() {}
|
||||
|
||||
TRIANGLE(const Vector& v1, const Vector& v2, const Vector& v3) : v1(v1), v2(v2), v3(v3) {}
|
||||
|
||||
~TRIANGLE() {}
|
||||
|
||||
Vector v1, v2, v3; //!< the vertices of the triangle
|
||||
|
||||
const Vector& operator[](int i) const { return (&v1)[i]; }
|
||||
|
||||
Vector& operator[](int i) { return (&v1)[i]; }
|
||||
|
||||
/// assumes CCW ordering of vertices
|
||||
inline Vector ComputeNormal() {
|
||||
inline Vector ComputeNormal()
|
||||
{
|
||||
Vector normal;
|
||||
cross3(normal, v2 - v1, v3 - v1);
|
||||
return normal;
|
||||
|
@ -239,29 +238,47 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
|
|||
d = b * b - 4.0f * c + 1e-16f;
|
||||
|
||||
if (d < 0) return false;
|
||||
if( d < 1e-16f ) {
|
||||
|
||||
if (d < 1e-16f)
|
||||
{
|
||||
a = -0.5f * b;
|
||||
peigs[0] = a; peigs[1] = a;
|
||||
fv1x = pfmat[1]; fv1y = a - pfmat[0];
|
||||
peigs[0] = a;
|
||||
peigs[1] = a;
|
||||
fv1x = pfmat[1];
|
||||
fv1y = a - pfmat[0];
|
||||
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
|
||||
fv1x *= c; fv1y *= c;
|
||||
fv2x = -fv1y; fv2y = fv1x;
|
||||
fv1x *= c;
|
||||
fv1y *= c;
|
||||
fv2x = -fv1y;
|
||||
fv2y = fv1x;
|
||||
return true;
|
||||
}
|
||||
|
||||
// two roots
|
||||
d = sqrtf(d);
|
||||
|
||||
a = -0.5f * (b + d);
|
||||
peigs[0] = a;
|
||||
fv1x = pfmat[1]; fv1y = a-pfmat[0];
|
||||
|
||||
fv1x = pfmat[1];
|
||||
fv1y = a - pfmat[0];
|
||||
|
||||
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
|
||||
fv1x *= c; fv1y *= c;
|
||||
|
||||
fv1x *= c;
|
||||
fv1y *= c;
|
||||
|
||||
a += d;
|
||||
peigs[1] = a;
|
||||
fv2x = pfmat[1]; fv2y = a-pfmat[0];
|
||||
|
||||
fv2x = pfmat[1];
|
||||
fv2y = a - pfmat[0];
|
||||
|
||||
c = 1 / sqrtf(fv2x * fv2x + fv2y * fv2y);
|
||||
fv2x *= c; fv2y *= c;
|
||||
|
||||
fv2x *= c;
|
||||
fv2y *= c;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -273,8 +290,11 @@ template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
|
|||
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
|
||||
T* pfres2;
|
||||
if( pfres == pf1 || pfres == pf2 ) pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == pf1 || pfres == pf2)
|
||||
pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
pfres2[0*4+0] = pf1[0*4+0] * pf2[0*4+0] + pf1[0*4+1] * pf2[1*4+0] + pf1[0*4+2] * pf2[2*4+0];
|
||||
pfres2[0*4+1] = pf1[0*4+0] * pf2[0*4+1] + pf1[0*4+1] * pf2[1*4+1] + pf1[0*4+2] * pf2[2*4+1];
|
||||
|
@ -294,6 +314,7 @@ template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
|
|||
}
|
||||
|
||||
inline dReal* mult3(dReal* pfres, const dReal* pf1, const dReal* pf2) { return _mult3<dReal>(pfres, pf1, pf2); }
|
||||
|
||||
inline double* mult3(double* pfres, const double* pf1, const double* pf2) { return _mult3<double>(pfres, pf1, pf2); }
|
||||
|
||||
template <class T>
|
||||
|
@ -302,8 +323,11 @@ inline T* _mult4(T* pfres, const T* p1, const T* p2)
|
|||
assert(pfres != NULL && p1 != NULL && p2 != NULL);
|
||||
|
||||
T* pfres2;
|
||||
if( pfres == p1 || pfres == p2 ) pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == p1 || pfres == p2)
|
||||
pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
pfres2[0*4+0] = p1[0*4+0] * p2[0*4+0] + p1[0*4+1] * p2[1*4+0] + p1[0*4+2] * p2[2*4+0] + p1[0*4+3] * p2[3*4+0];
|
||||
pfres2[0*4+1] = p1[0*4+0] * p2[0*4+1] + p1[0*4+1] * p2[1*4+1] + p1[0*4+2] * p2[2*4+1] + p1[0*4+3] * p2[3*4+1];
|
||||
|
@ -326,6 +350,7 @@ inline T* _mult4(T* pfres, const T* p1, const T* p2)
|
|||
pfres2[3*4+3] = p1[3*4+0] * p2[0*4+3] + p1[3*4+1] * p2[1*4+3] + p1[3*4+2] * p2[2*4+3] + p1[3*4+3] * p2[3*4+3];
|
||||
|
||||
if (pfres != pfres2) memcpy(pfres, pfres2, sizeof(T)*16);
|
||||
|
||||
return pfres;
|
||||
}
|
||||
|
||||
|
@ -336,17 +361,18 @@ template <class T>
|
|||
inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == pf1)
|
||||
pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
pfres2[0] = pf1[0] * pf2[0] + pf1[3] * pf2[3] + pf1[6] * pf2[6];
|
||||
pfres2[1] = pf1[0] * pf2[1] + pf1[3] * pf2[4] + pf1[6] * pf2[7];
|
||||
pfres2[2] = pf1[0] * pf2[2] + pf1[3] * pf2[5] + pf1[6] * pf2[8];
|
||||
|
||||
pfres2[3] = pf1[1] * pf2[0] + pf1[4] * pf2[3] + pf1[7] * pf2[6];
|
||||
pfres2[4] = pf1[1] * pf2[1] + pf1[4] * pf2[4] + pf1[7] * pf2[7];
|
||||
pfres2[5] = pf1[1] * pf2[2] + pf1[4] * pf2[5] + pf1[7] * pf2[8];
|
||||
|
||||
pfres2[6] = pf1[2] * pf2[0] + pf1[5] * pf2[3] + pf1[8] * pf2[6];
|
||||
pfres2[7] = pf1[2] * pf2[1] + pf1[5] * pf2[4] + pf1[8] * pf2[7];
|
||||
pfres2[8] = pf1[2] * pf2[2] + pf1[5] * pf2[5] + pf1[8] * pf2[8];
|
||||
|
@ -360,11 +386,16 @@ template <class T>
|
|||
inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
for(int i = 0; i < 4; ++i) {
|
||||
for(int j = 0; j < 4; ++j) {
|
||||
if (pfres == pf1)
|
||||
pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
pfres[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12];
|
||||
}
|
||||
}
|
||||
|
@ -381,8 +412,11 @@ inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) {
|
|||
template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == pf)
|
||||
pfres2 = (T*)alloca(3 * stride * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
// inverse = C^t / det(pf) where C is the matrix of coefficients
|
||||
|
||||
|
@ -390,9 +424,11 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
|
|||
pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1];
|
||||
pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2];
|
||||
pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1];
|
||||
|
||||
pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2];
|
||||
pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0];
|
||||
pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2];
|
||||
|
||||
pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0];
|
||||
pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1];
|
||||
pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0];
|
||||
|
@ -403,16 +439,25 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
|
|||
if (fabs(fdet) < 1e-6) return NULL;
|
||||
|
||||
fdet = 1 / fdet;
|
||||
|
||||
//if( pfdet != NULL ) *pfdet = fdet;
|
||||
|
||||
if( pfres != pf ) {
|
||||
pfres[0*stride+0] *= fdet; pfres[0*stride+1] *= fdet; pfres[0*stride+2] *= fdet;
|
||||
pfres[1*stride+0] *= fdet; pfres[1*stride+1] *= fdet; pfres[1*stride+2] *= fdet;
|
||||
pfres[2*stride+0] *= fdet; pfres[2*stride+1] *= fdet; pfres[2*stride+2] *= fdet;
|
||||
if (pfres != pf)
|
||||
{
|
||||
pfres[0*stride+0] *= fdet;
|
||||
pfres[0*stride+1] *= fdet;
|
||||
pfres[0*stride+2] *= fdet;
|
||||
pfres[1*stride+0] *= fdet;
|
||||
pfres[1*stride+1] *= fdet;
|
||||
pfres[1*stride+2] *= fdet;
|
||||
pfres[2*stride+0] *= fdet;
|
||||
pfres[2*stride+1] *= fdet;
|
||||
pfres[2*stride+2] *= fdet;
|
||||
return pfres;
|
||||
}
|
||||
|
||||
pfres[0*stride+0] = pfres2[0*stride+0] * fdet;
|
||||
|
||||
pfres[0*stride+1] = pfres2[0*stride+1] * fdet;
|
||||
pfres[0*stride+2] = pfres2[0*stride+2] * fdet;
|
||||
pfres[1*stride+0] = pfres2[1*stride+0] * fdet;
|
||||
|
@ -430,8 +475,11 @@ inline dReal* inv3(const dReal* pf, dReal* pfres, int stride) { return _inv3<dRe
|
|||
template <class T> inline T* _inv4(const T* pf, T* pfres)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == pf)
|
||||
pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
// inverse = C^t / det(pf) where C is the matrix of coefficients
|
||||
|
||||
|
@ -439,7 +487,9 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
|
|||
|
||||
// determinants of all possibel 2x2 submatrices formed by last two rows
|
||||
T fd0, fd1, fd2;
|
||||
|
||||
T f1, f2, f3;
|
||||
|
||||
fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0];
|
||||
fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1];
|
||||
fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2];
|
||||
|
@ -487,15 +537,19 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
|
|||
if (fabs(fdet) < 1e-6) return NULL;
|
||||
|
||||
fdet = 1 / fdet;
|
||||
|
||||
//if( pfdet != NULL ) *pfdet = fdet;
|
||||
|
||||
if( pfres2 == pfres ) {
|
||||
if (pfres2 == pfres)
|
||||
{
|
||||
mult(pfres, fdet, 16);
|
||||
return pfres;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
while(i < 16) {
|
||||
|
||||
while (i < 16)
|
||||
{
|
||||
pfres[i] = pfres2[i] * fdet;
|
||||
++i;
|
||||
}
|
||||
|
@ -509,16 +563,24 @@ template <class T> inline T* _transpose3(const T* pf, T* pfres)
|
|||
{
|
||||
assert(pf != NULL && pfres != NULL);
|
||||
|
||||
if( pf == pfres ) {
|
||||
if (pf == pfres)
|
||||
{
|
||||
rswap(pfres[1], pfres[3]);
|
||||
rswap(pfres[2], pfres[6]);
|
||||
rswap(pfres[5], pfres[7]);
|
||||
return pfres;
|
||||
}
|
||||
|
||||
pfres[0] = pf[0]; pfres[1] = pf[3]; pfres[2] = pf[6];
|
||||
pfres[3] = pf[1]; pfres[4] = pf[4]; pfres[5] = pf[7];
|
||||
pfres[6] = pf[2]; pfres[7] = pf[5]; pfres[8] = pf[8];
|
||||
pfres[0] = pf[0];
|
||||
|
||||
pfres[1] = pf[3];
|
||||
pfres[2] = pf[6];
|
||||
pfres[3] = pf[1];
|
||||
pfres[4] = pf[4];
|
||||
pfres[5] = pf[7];
|
||||
pfres[6] = pf[2];
|
||||
pfres[7] = pf[5];
|
||||
pfres[8] = pf[8];
|
||||
|
||||
return pfres;
|
||||
}
|
||||
|
@ -530,7 +592,8 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
|
|||
{
|
||||
assert(pf != NULL && pfres != NULL);
|
||||
|
||||
if( pf == pfres ) {
|
||||
if (pf == pfres)
|
||||
{
|
||||
rswap(pfres[1], pfres[4]);
|
||||
rswap(pfres[2], pfres[8]);
|
||||
rswap(pfres[3], pfres[12]);
|
||||
|
@ -540,10 +603,23 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
|
|||
return pfres;
|
||||
}
|
||||
|
||||
pfres[0] = pf[0]; pfres[1] = pf[4]; pfres[2] = pf[8]; pfres[3] = pf[12];
|
||||
pfres[4] = pf[1]; pfres[5] = pf[5]; pfres[6] = pf[9]; pfres[7] = pf[13];
|
||||
pfres[8] = pf[2]; pfres[9] = pf[6]; pfres[10] = pf[10]; pfres[11] = pf[14];
|
||||
pfres[12] = pf[3]; pfres[13] = pf[7]; pfres[14] = pf[11]; pfres[15] = pf[15];
|
||||
pfres[0] = pf[0];
|
||||
|
||||
pfres[1] = pf[4];
|
||||
pfres[2] = pf[8];
|
||||
pfres[3] = pf[12];
|
||||
pfres[4] = pf[1];
|
||||
pfres[5] = pf[5];
|
||||
pfres[6] = pf[9];
|
||||
pfres[7] = pf[13];
|
||||
pfres[8] = pf[2];
|
||||
pfres[9] = pf[6];
|
||||
pfres[10] = pf[10];
|
||||
pfres[11] = pf[14];
|
||||
pfres[12] = pf[3];
|
||||
pfres[13] = pf[7];
|
||||
pfres[14] = pf[11];
|
||||
pfres[15] = pf[15];
|
||||
return pfres;
|
||||
}
|
||||
|
||||
|
@ -636,7 +712,9 @@ inline dReal* cross3(dReal* pfout, const dReal* pf1, const dReal* pf2)
|
|||
temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2];
|
||||
temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0];
|
||||
|
||||
pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2];
|
||||
pfout[0] = temp[0];
|
||||
pfout[1] = temp[1];
|
||||
pfout[2] = temp[2];
|
||||
return pfout;
|
||||
}
|
||||
|
||||
|
@ -644,7 +722,8 @@ template <class T> inline void mult(T* pf, T fa, int r)
|
|||
{
|
||||
assert(pf != NULL);
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
pf[r] *= fa;
|
||||
}
|
||||
|
@ -658,20 +737,27 @@ inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
|
|||
|
||||
if (!badd) memset(pfres, 0, sizeof(S) * r1 * c2);
|
||||
|
||||
while(r1 > 0) {
|
||||
while (r1 > 0)
|
||||
{
|
||||
--r1;
|
||||
|
||||
j = 0;
|
||||
while(j < c2) {
|
||||
|
||||
while (j < c2)
|
||||
{
|
||||
k = 0;
|
||||
while(k < c1) {
|
||||
|
||||
while (k < c1)
|
||||
{
|
||||
pfres[j] += pf1[k] * pf2[k*c2 + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
pf1 += c1;
|
||||
|
||||
pfres += c2;
|
||||
}
|
||||
|
||||
|
@ -687,20 +773,26 @@ inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
|
|||
if (!badd) memset(pfres, 0, sizeof(S) * c1 * c2);
|
||||
|
||||
i = 0;
|
||||
while(i < c1) {
|
||||
|
||||
while (i < c1)
|
||||
{
|
||||
j = 0;
|
||||
while(j < c2) {
|
||||
|
||||
while (j < c2)
|
||||
{
|
||||
k = 0;
|
||||
while(k < r1) {
|
||||
|
||||
while (k < r1)
|
||||
{
|
||||
pfres[j] += pf1[k*c1] * pf2[k*c2 + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
pfres += c2;
|
||||
|
||||
++pf1;
|
||||
|
||||
++i;
|
||||
|
@ -717,20 +809,27 @@ inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool b
|
|||
|
||||
if (!badd) memset(pfres, 0, sizeof(S) * r1 * r2);
|
||||
|
||||
while(r1 > 0) {
|
||||
while (r1 > 0)
|
||||
{
|
||||
--r1;
|
||||
|
||||
j = 0;
|
||||
while(j < r2) {
|
||||
|
||||
while (j < r2)
|
||||
{
|
||||
k = 0;
|
||||
while(k < c1) {
|
||||
|
||||
while (k < c1)
|
||||
{
|
||||
pfres[j] += pf1[k] * pf2[j*c1 + k];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
pf1 += c1;
|
||||
|
||||
pfres += r2;
|
||||
}
|
||||
|
||||
|
@ -744,28 +843,36 @@ template <class T> inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp)
|
|||
int j, k;
|
||||
bool bdel = false;
|
||||
|
||||
if( pftemp == NULL ) {
|
||||
if (pftemp == NULL)
|
||||
{
|
||||
pftemp = new T[c];
|
||||
bdel = true;
|
||||
}
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
|
||||
j = 0;
|
||||
while(j < c) {
|
||||
|
||||
while (j < c)
|
||||
{
|
||||
|
||||
pftemp[j] = 0.0;
|
||||
|
||||
k = 0;
|
||||
while(k < c) {
|
||||
|
||||
while (k < c)
|
||||
{
|
||||
pftemp[j] += pf1[k] * pf2[k*c + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
memcpy(pf1, pftemp, c * sizeof(T));
|
||||
|
||||
pf1 += c;
|
||||
}
|
||||
|
||||
|
@ -781,29 +888,39 @@ template <class T, class S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S*
|
|||
int i, j, k;
|
||||
bool bdel = false;
|
||||
|
||||
if( pftemp == NULL ) {
|
||||
if (pftemp == NULL)
|
||||
{
|
||||
pftemp = new S[r2];
|
||||
bdel = true;
|
||||
}
|
||||
|
||||
// do columns first
|
||||
j = 0;
|
||||
while(j < c2) {
|
||||
|
||||
while (j < c2)
|
||||
{
|
||||
i = 0;
|
||||
while(i < r2) {
|
||||
|
||||
while (i < r2)
|
||||
{
|
||||
|
||||
pftemp[i] = 0.0;
|
||||
|
||||
k = 0;
|
||||
while(k < r2) {
|
||||
|
||||
while (k < r2)
|
||||
{
|
||||
pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while(i < r2) {
|
||||
|
||||
while (i < r2)
|
||||
{
|
||||
*(pf2 + i*c2 + j) = pftemp[i];
|
||||
++i;
|
||||
}
|
||||
|
@ -820,7 +937,8 @@ template <class T> inline void add(T* pf1, T* pf2, int r)
|
|||
{
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
pf1[r] += pf2[r];
|
||||
}
|
||||
|
@ -830,7 +948,8 @@ template <class T> inline void sub(T* pf1, T* pf2, int r)
|
|||
{
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
pf1[r] -= pf2[r];
|
||||
}
|
||||
|
@ -841,7 +960,9 @@ template <class T> inline T normsqr(T* pf1, int r)
|
|||
assert(pf1 != NULL);
|
||||
|
||||
T d = 0.0;
|
||||
while(r > 0) {
|
||||
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
d += pf1[r] * pf1[r];
|
||||
}
|
||||
|
@ -852,7 +973,9 @@ template <class T> inline T normsqr(T* pf1, int r)
|
|||
template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
|
||||
{
|
||||
T d = 0;
|
||||
while(length > 0) {
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
--length;
|
||||
d += sqr(pf1[length] - pf2[length]);
|
||||
}
|
||||
|
@ -863,7 +986,9 @@ template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
|
|||
template <class T> inline T dot(T* pf1, T* pf2, int length)
|
||||
{
|
||||
T d = 0;
|
||||
while(length > 0) {
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
--length;
|
||||
d += pf1[length] * pf2[length];
|
||||
}
|
||||
|
@ -874,7 +999,9 @@ template <class T> inline T dot(T* pf1, T* pf2, int length)
|
|||
template <class T> inline T sum(T* pf, int length)
|
||||
{
|
||||
T d = 0;
|
||||
while(length > 0) {
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
--length;
|
||||
d += pf[length];
|
||||
}
|
||||
|
@ -889,15 +1016,20 @@ template <class T> inline bool inv2(T* pf, T* pfres)
|
|||
if (fabs(fdet) < 1e-16) return false;
|
||||
|
||||
fdet = 1 / fdet;
|
||||
|
||||
//if( pfdet != NULL ) *pfdet = fdet;
|
||||
|
||||
if( pfres != pf ) {
|
||||
pfres[0] = fdet * pf[3]; pfres[1] = -fdet * pf[1];
|
||||
pfres[2] = -fdet * pf[2]; pfres[3] = fdet * pf[0];
|
||||
if (pfres != pf)
|
||||
{
|
||||
pfres[0] = fdet * pf[3];
|
||||
pfres[1] = -fdet * pf[1];
|
||||
pfres[2] = -fdet * pf[2];
|
||||
pfres[3] = fdet * pf[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
dReal ftemp = pf[0];
|
||||
|
||||
pfres[0] = pf[3] * fdet;
|
||||
pfres[1] *= -fdet;
|
||||
pfres[2] *= -fdet;
|
||||
|
|
Loading…
Reference in New Issue