mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Part 2 of the re-formatting; ran AStyle over the headers.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2932 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
a6c4df49ea
commit
12ad5308ed
|
@ -36,6 +36,7 @@ using namespace std;
|
|||
|
||||
class GLWindow
|
||||
{
|
||||
|
||||
private:
|
||||
#ifdef GL_X11_WINDOW
|
||||
Display *glDisplay;
|
||||
|
@ -248,6 +249,7 @@ extern u8* g_pBasePS2Mem;
|
|||
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
|
||||
|
||||
// PS2 vertex
|
||||
|
||||
struct VertexGPU
|
||||
{
|
||||
// gained from XYZ2, XYZ3, XYZF2, XYZF3,
|
||||
|
@ -264,6 +266,7 @@ struct VertexGPU
|
|||
};
|
||||
|
||||
// Almost same with previous, controlled by prim.fst flagf
|
||||
|
||||
struct Vertex
|
||||
{
|
||||
u16 x, y, f, resv0; // note: xy is 12d3
|
||||
|
@ -281,7 +284,8 @@ extern int ppf;
|
|||
|
||||
// PSM values
|
||||
// PSM types == Texture Storage Format
|
||||
enum PSM_value{
|
||||
enum PSM_value
|
||||
{
|
||||
PSMCT32 = 0, // 000000
|
||||
PSMCT24 = 1, // 000001
|
||||
PSMCT16 = 2, // 000010
|
||||
|
@ -328,7 +332,8 @@ inline bool PSMT_IS32BIT(int psm) {return !!(psm <= 1);}
|
|||
|
||||
//----------------------- Data from registers -----------------------
|
||||
|
||||
typedef union {
|
||||
typedef union
|
||||
{
|
||||
s64 SD;
|
||||
u64 UD;
|
||||
s32 SL[2];
|
||||
|
@ -340,7 +345,9 @@ typedef union {
|
|||
} reg64;
|
||||
|
||||
/* general purpose regs structs */
|
||||
typedef struct {
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int fbp;
|
||||
int fbw;
|
||||
int fbh;
|
||||
|
@ -349,7 +356,8 @@ typedef struct {
|
|||
} frameInfo;
|
||||
|
||||
// Create frame structure from known data
|
||||
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
|
||||
inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm)
|
||||
{
|
||||
frameInfo frame;
|
||||
frame.fbp = fbp;
|
||||
frame.fbw = fbw;
|
||||
|
@ -359,11 +367,14 @@ inline frameInfo CreateFrame(int fbp, int fbw, int fbh, int psm, u32 fbm){
|
|||
return frame;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
u16 prim;
|
||||
|
||||
union {
|
||||
struct {
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u16 iip : 1;
|
||||
u16 tme : 1;
|
||||
u16 fge : 1;
|
||||
|
@ -380,8 +391,10 @@ typedef struct {
|
|||
|
||||
extern primInfo *prim;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 ate : 1;
|
||||
u32 atst : 3;
|
||||
u32 aref : 8;
|
||||
|
@ -395,13 +408,15 @@ typedef union {
|
|||
u32 _val;
|
||||
} pixTest;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int bp;
|
||||
int bw;
|
||||
int psm;
|
||||
} bufInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int tbp0;
|
||||
int tbw;
|
||||
int cbp;
|
||||
|
@ -432,13 +447,17 @@ union tex_0_info
|
|||
u64 csa : 5;
|
||||
u64 cld : 3;
|
||||
};
|
||||
|
||||
u64 _u64;
|
||||
u32 _u32[2];
|
||||
u16 _u16[4];
|
||||
u8 _u8[8];
|
||||
tex_0_info(u64 data) { _u64 = data; }
|
||||
|
||||
tex_0_info(u32 data) { _u32[0] = data; _u32[1] = 0; }
|
||||
|
||||
tex_0_info(u32 data0, u32 data1) { _u32[0] = data0; _u32[1] = data1; }
|
||||
|
||||
u32 tbw_mult()
|
||||
{
|
||||
if (tbw == 0)
|
||||
|
@ -446,26 +465,34 @@ union tex_0_info
|
|||
else
|
||||
return ((u32)tbw << 6);
|
||||
}
|
||||
|
||||
u32 psm_fix()
|
||||
{
|
||||
// printf ("psm %d\n", psm);
|
||||
if ( psm == 9 ) return 1;
|
||||
// printf ("psm %d\n", psm);
|
||||
if (psm == 9) return 1;
|
||||
|
||||
return psm;
|
||||
}
|
||||
|
||||
u32 tw_exp()
|
||||
{
|
||||
if (tw > 10) return (1<<10);
|
||||
return (1<<tw);
|
||||
if (tw > 10) return (1 << 10);
|
||||
|
||||
return (1 << tw);
|
||||
}
|
||||
|
||||
u32 th_exp()
|
||||
{
|
||||
if (th > 10) return (1<<10);
|
||||
return (1<<th);
|
||||
if (th > 10) return (1 << 10);
|
||||
|
||||
return (1 << th);
|
||||
}
|
||||
|
||||
u32 cpsm_fix()
|
||||
{
|
||||
return cpsm & 0xe;
|
||||
}
|
||||
|
||||
u32 csa_fix()
|
||||
{
|
||||
if (cpsm < 2)
|
||||
|
@ -480,7 +507,8 @@ union tex_0_info
|
|||
#define TEX_HIGHLIGHT 2
|
||||
#define TEX_HIGHLIGHT2 3
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int lcm;
|
||||
int mxl;
|
||||
int mmag;
|
||||
|
@ -490,7 +518,8 @@ typedef struct {
|
|||
int k;
|
||||
} tex1Info;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int wms;
|
||||
int wmt;
|
||||
int minu;
|
||||
|
@ -499,24 +528,28 @@ typedef struct {
|
|||
int maxv;
|
||||
} clampInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int cbw;
|
||||
int cou;
|
||||
int cov;
|
||||
} clutInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int tbp[3];
|
||||
int tbw[3];
|
||||
} miptbpInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
u16 aem;
|
||||
u8 ta[2];
|
||||
float fta[2];
|
||||
} texaInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int sx;
|
||||
int sy;
|
||||
int dx;
|
||||
|
@ -524,9 +557,12 @@ typedef struct {
|
|||
int dir;
|
||||
} trxposInfo;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u8 a : 2;
|
||||
u8 b : 2;
|
||||
u8 c : 2;
|
||||
|
@ -538,17 +574,20 @@ typedef struct {
|
|||
u8 fix : 8;
|
||||
} alphaInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
u16 zbp; // u16 address / 64
|
||||
u8 psm;
|
||||
u8 zmsk;
|
||||
} zbufInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int fba;
|
||||
} fbaInfo;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
Vertex gsvertex[3];
|
||||
u32 rgba;
|
||||
float q;
|
||||
|
@ -593,36 +632,41 @@ extern GSinternal gs;
|
|||
static __forceinline u16 RGBA32to16(u32 c)
|
||||
{
|
||||
return (u16)((((c) & 0x000000f8) >> 3) |
|
||||
(((c) & 0x0000f800) >> 6) |
|
||||
(((c) & 0x00f80000) >> 9) |
|
||||
(((c) & 0x80000000) >> 16));
|
||||
(((c) & 0x0000f800) >> 6) |
|
||||
(((c) & 0x00f80000) >> 9) |
|
||||
(((c) & 0x80000000) >> 16));
|
||||
}
|
||||
|
||||
static __forceinline u32 RGBA16to32(u16 c)
|
||||
{
|
||||
return (((c) & 0x001f) << 3) |
|
||||
(((c) & 0x03e0) << 6) |
|
||||
(((c) & 0x7c00) << 9) |
|
||||
(((c) & 0x8000) ? 0xff000000 : 0);
|
||||
return (((c) & 0x001f) << 3) |
|
||||
(((c) & 0x03e0) << 6) |
|
||||
(((c) & 0x7c00) << 9) |
|
||||
(((c) & 0x8000) ? 0xff000000 : 0);
|
||||
}
|
||||
|
||||
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
|
||||
// f is a u16
|
||||
static __forceinline u16 Float16ToBYTE(u16 f) {
|
||||
static __forceinline u16 Float16ToBYTE(u16 f)
|
||||
{
|
||||
//assert( !(f & 0x8000) );
|
||||
if( f & 0x8000 ) return 0;
|
||||
if (f & 0x8000) return 0;
|
||||
|
||||
u16 d = ((((f & 0x3ff) | 0x400) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
|
||||
|
||||
u16 d = ((((f&0x3ff)|0x400)*255)>>(10-((f>>10)&0x1f)+15));
|
||||
return d > 255 ? 255 : d;
|
||||
}
|
||||
|
||||
static __forceinline u16 Float16ToALPHA(u16 f) {
|
||||
static __forceinline u16 Float16ToALPHA(u16 f)
|
||||
{
|
||||
//assert( !(f & 0x8000) );
|
||||
if( f & 0x8000 ) return 0;
|
||||
if (f & 0x8000) return 0;
|
||||
|
||||
// round up instead of down (crash and burn), too much and charlie breaks
|
||||
u16 d = (((((f&0x3ff)|0x400))*255)>>(10-((f>>10)&0x1f)+15));
|
||||
d = (d)>>1;
|
||||
u16 d = (((((f & 0x3ff) | 0x400)) * 255) >> (10 - ((f >> 10) & 0x1f) + 15));
|
||||
|
||||
d = (d) >> 1;
|
||||
|
||||
return d > 255 ? 255 : d;
|
||||
}
|
||||
|
||||
|
@ -650,12 +694,14 @@ static __forceinline u16 Float16ToALPHA(u16 f) {
|
|||
|
||||
inline float Clamp(float fx, float fmin, float fmax)
|
||||
{
|
||||
if( fx < fmin ) return fmin;
|
||||
if (fx < fmin) return fmin;
|
||||
|
||||
return fx > fmax ? fmax : fx;
|
||||
}
|
||||
|
||||
// PSMT16, 16S have shorter color per pixel, also cluted textures with half storage.
|
||||
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
|
||||
inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0)
|
||||
{
|
||||
if (PSMT_IS16BIT(tex0.psm) || (PSMT_ISCLUT(tex0.psm) && tex0.cpsm > 1))
|
||||
return true;
|
||||
else
|
||||
|
@ -670,7 +716,7 @@ inline bool PSMT_ISHALF_STORAGE(const tex0Info& tex0) {
|
|||
static __forceinline int ZZOglGet_tbp0_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(data).tbp0;
|
||||
return (data ) & 0x3fff;
|
||||
return (data) & 0x3fff;
|
||||
}
|
||||
|
||||
// Obtain tbw -- Texture Buffer Width (Texels/64) -- from data, do not multiply to 64. Bits 14-19
|
||||
|
@ -686,6 +732,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
|
|||
{
|
||||
//return text_0_info(data).tbw_mult();
|
||||
int result = ZZOglGet_tbw_TexBits(data);
|
||||
|
||||
if (result == 0)
|
||||
return 64;
|
||||
else
|
||||
|
@ -697,7 +744,7 @@ static __forceinline int ZZOglGet_tbw_TexBitsMult(u32 data)
|
|||
static __forceinline int ZZOglGet_psm_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(data).psm;
|
||||
return ((data >> 20) & 0x3f);
|
||||
return ((data >> 20) & 0x3f);
|
||||
}
|
||||
|
||||
// Obtain psm -- Pixel Storage Format -- from data. Bits 20-25. Fix incorrect psm == 9
|
||||
|
@ -706,7 +753,9 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
|
|||
//return tex_0_info(data).psm_fix();
|
||||
int result = ZZOglGet_psm_TexBits(data) ;
|
||||
// printf ("result %d\n", result);
|
||||
if ( result == 9 ) result = 1;
|
||||
|
||||
if (result == 9) result = 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -715,7 +764,7 @@ static __forceinline int ZZOglGet_psm_TexBitsFix(u32 data)
|
|||
static __forceinline u16 ZZOglGet_tw_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(data).tw;
|
||||
return ((data >> 26) & 0xf);
|
||||
return ((data >> 26) & 0xf);
|
||||
}
|
||||
|
||||
// Obtain tw -- Texture Width (Width = TW) -- from data. Width could newer be more than 1024.
|
||||
|
@ -723,8 +772,10 @@ static __forceinline u16 ZZOglGet_tw_TexBitsExp(u32 data)
|
|||
{
|
||||
//return tex_0_info(data).tw_exp();
|
||||
u16 result = ZZOglGet_tw_TexBits(data);
|
||||
|
||||
if (result > 10) result = 10;
|
||||
return (1<<result);
|
||||
|
||||
return (1 << result);
|
||||
}
|
||||
|
||||
// TH set at the border of upper and higher words.
|
||||
|
@ -741,8 +792,10 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
|
|||
{
|
||||
//return tex_0_info(dataLO, dataHI).th_exp();
|
||||
u16 result = ZZOglGet_th_TexBits(dataLO, dataHI);
|
||||
|
||||
if (result > 10) result = 10;
|
||||
return (1<<result);
|
||||
|
||||
return (1 << result);
|
||||
}
|
||||
|
||||
// Tex0Info bits, higher word.
|
||||
|
@ -751,7 +804,7 @@ static __forceinline u16 ZZOglGet_th_TexBitsExp(u32 dataLO, u32 dataHI)
|
|||
static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(0, data).tcc;
|
||||
return ((data >> 2) & 0x1);
|
||||
return ((data >> 2) & 0x1);
|
||||
}
|
||||
|
||||
// Obtain tfx -- Texture Function (0=modulate, 1=decal, 2=hilight, 3=hilight2) -- from data. Bit 4-5
|
||||
|
@ -759,7 +812,7 @@ static __forceinline u8 ZZOglGet_tcc_TexBits(u32 data)
|
|||
static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(0, data).tfx;
|
||||
return ((data >> 3) & 0x3);
|
||||
return ((data >> 3) & 0x3);
|
||||
}
|
||||
|
||||
// Obtain cbp from data -- Clut Buffer Base Pointer (Address/256) -- Bits 5-18
|
||||
|
@ -767,7 +820,7 @@ static __forceinline u8 ZZOglGet_tfx_TexBits(u32 data)
|
|||
static __forceinline int ZZOglGet_cbp_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(0, data).cbp;
|
||||
return ((data >> 5) & 0x3fff);
|
||||
return ((data >> 5) & 0x3fff);
|
||||
}
|
||||
|
||||
// Obtain cpsm from data -- Clut pixel Storage Format -- Bits 19-22. 22nd is at no use.
|
||||
|
@ -794,7 +847,7 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
|
|||
{
|
||||
//return tex_0_info(0, data).csa_fix();
|
||||
|
||||
if ((data & 0x700000) == 0 ) // it is cpsm < 2 check
|
||||
if ((data & 0x700000) == 0) // it is cpsm < 2 check
|
||||
return ((data >> 24) & 0xf);
|
||||
else
|
||||
return ((data >> 24) & 0x1f);
|
||||
|
@ -805,79 +858,81 @@ static __forceinline u8 ZZOglGet_csa_TexBits(u32 data)
|
|||
static __forceinline u8 ZZOglGet_cld_TexBits(u32 data)
|
||||
{
|
||||
//return tex_0_info(0, data).cld;
|
||||
return ((data >> 29) & 0x7);
|
||||
return ((data >> 29) & 0x7);
|
||||
}
|
||||
|
||||
//-------------------------- frames
|
||||
// FrameInfo bits.
|
||||
// Obtain fbp -- frame Buffer Base Pointer (Word Address/2048) -- from data. Bits 0-15
|
||||
inline int
|
||||
ZZOglGet_fbp_FrameBits(u32 data) {
|
||||
return ((data ) & 0x1ff);
|
||||
inline int ZZOglGet_fbp_FrameBits(u32 data)
|
||||
{
|
||||
return ((data) & 0x1ff);
|
||||
}
|
||||
|
||||
// So we got adress / 64, henceby frame fbp and tex tbp have the same dimension -- "real adress" is x64.
|
||||
inline int
|
||||
ZZOglGet_fbp_FrameBitsMult(u32 data) {
|
||||
// So we got address / 64, henceby frame fbp and tex tbp have the same dimension -- "real address" is x64.
|
||||
inline int ZZOglGet_fbp_FrameBitsMult(u32 data)
|
||||
{
|
||||
return (ZZOglGet_fbp_FrameBits(data) << 5);
|
||||
}
|
||||
|
||||
// Obtain fbw -- width (Texels/64) -- from data. Bits 16-23
|
||||
inline int
|
||||
ZZOglGet_fbw_FrameBits(u32 data) {
|
||||
inline int ZZOglGet_fbw_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 16) & 0x3f);
|
||||
}
|
||||
|
||||
inline int
|
||||
ZZOglGet_fbw_FrameBitsMult(u32 data) {
|
||||
inline int ZZOglGet_fbw_FrameBitsMult(u32 data)
|
||||
{
|
||||
return (ZZOglGet_fbw_FrameBits(data) << 6);
|
||||
}
|
||||
|
||||
|
||||
// Obtain psm -- Pixel Storage Format -- from data. Bits 24-29.
|
||||
// (data & 0x3f000000) >> 24
|
||||
inline int
|
||||
ZZOglGet_psm_FrameBits(u32 data) {
|
||||
return ((data >> 24) & 0x3f);
|
||||
inline int ZZOglGet_psm_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 24) & 0x3f);
|
||||
}
|
||||
|
||||
// Function for calculating overal height from frame data.
|
||||
inline int
|
||||
ZZOgl_fbh_Calc (int fbp, int fbw, int psm) {
|
||||
int fbh = ( 1024 * 1024 - 64 * fbp ) / fbw;
|
||||
inline int ZZOgl_fbh_Calc(int fbp, int fbw, int psm)
|
||||
{
|
||||
int fbh = (1024 * 1024 - 64 * fbp) / fbw;
|
||||
fbh &= ~0x1f;
|
||||
if (PSMT_ISHALF(psm))
|
||||
fbh *= 2;
|
||||
if (fbh > 1024)
|
||||
fbh = 1024;
|
||||
|
||||
if (PSMT_ISHALF(psm)) fbh *= 2;
|
||||
if (fbh > 1024) fbh = 1024;
|
||||
|
||||
return fbh ;
|
||||
}
|
||||
inline int
|
||||
ZZOgl_fbh_Calc (frameInfo frame) {
|
||||
|
||||
inline int ZZOgl_fbh_Calc(frameInfo frame)
|
||||
{
|
||||
return ZZOgl_fbh_Calc(frame.fbp, frame.fbw, frame.psm);
|
||||
}
|
||||
|
||||
// Calculate fbh from data, It does not set in register
|
||||
inline int
|
||||
ZZOglGet_fbh_FrameBitsCalc (u32 data) {
|
||||
inline int ZZOglGet_fbh_FrameBitsCalc(u32 data)
|
||||
{
|
||||
int fbh = 0;
|
||||
int fbp = ZZOglGet_fbp_FrameBits(data);
|
||||
int fbw = ZZOglGet_fbw_FrameBits(data);
|
||||
int psm = ZZOglGet_psm_FrameBits(data);
|
||||
if (fbw > 0)
|
||||
fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
|
||||
|
||||
if (fbw > 0) fbh = ZZOgl_fbh_Calc(fbp, fbw, psm) ;
|
||||
|
||||
return fbh ;
|
||||
}
|
||||
|
||||
// Obtain fbm -- frame mask -- from data. All higher word.
|
||||
inline u32
|
||||
ZZOglGet_fbm_FrameBits(u32 data) {
|
||||
return (data);
|
||||
inline u32 ZZOglGet_fbm_FrameBits(u32 data)
|
||||
{
|
||||
return (data);
|
||||
}
|
||||
|
||||
// Obtain fbm -- frame mask -- from data. All higher word. Fixed from psm == PCMT24 (without alpha)
|
||||
inline u32
|
||||
ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
|
||||
inline u32 ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI)
|
||||
{
|
||||
if (PSMT_BITMODE(ZZOglGet_psm_FrameBits(dataLO)) == 1)
|
||||
return (dataHI | 0xff000000);
|
||||
else
|
||||
|
@ -885,53 +940,51 @@ ZZOglGet_fbm_FrameBitsFix(u32 dataLO, u32 dataHI) {
|
|||
}
|
||||
|
||||
// obtain colormask RED
|
||||
inline u32
|
||||
ZZOglGet_fbmRed_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmRed_FrameBits(u32 data)
|
||||
{
|
||||
return (data & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Green
|
||||
inline u32
|
||||
ZZOglGet_fbmGreen_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmGreen_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 8) & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Blue
|
||||
inline u32
|
||||
ZZOglGet_fbmBlue_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmBlue_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 16) & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Alpha
|
||||
inline u32
|
||||
ZZOglGet_fbmAlpha_FrameBits(u32 data) {
|
||||
inline u32 ZZOglGet_fbmAlpha_FrameBits(u32 data)
|
||||
{
|
||||
return ((data >> 24) & 0xff);
|
||||
}
|
||||
|
||||
// obtain colormask Alpha
|
||||
inline u32
|
||||
ZZOglGet_fbmHighByte(u32 data) {
|
||||
inline u32 ZZOglGet_fbmHighByte(u32 data)
|
||||
{
|
||||
return (!!(data & 0x80000000));
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------- tex0 comparison
|
||||
// Check if old and new tex0 registers have only clut difference
|
||||
inline bool
|
||||
ZZOglAllExceptClutIsSame( u32* oldtex, u32* newtex) {
|
||||
inline bool ZZOglAllExceptClutIsSame(u32* oldtex, u32* newtex)
|
||||
{
|
||||
return ((oldtex[0] == newtex[0]) && ((oldtex[1] & 0x1f) == (newtex[1] & 0x1f)));
|
||||
}
|
||||
|
||||
// Check if the CLUT registers are same, except CLD
|
||||
inline bool
|
||||
ZZOglClutMinusCLDunchanged( u32* oldtex, u32* newtex) {
|
||||
inline bool ZZOglClutMinusCLDunchanged(u32* oldtex, u32* newtex)
|
||||
{
|
||||
return ((oldtex[1] & 0x1fffffe0) == (newtex[1] & 0x1fffffe0));
|
||||
}
|
||||
|
||||
// Check if CLUT storage mode is not changed (CSA, CSM and CSPM)
|
||||
inline bool
|
||||
ZZOglClutStorageUnchanged( u32* oldtex, u32* newtex) {
|
||||
inline bool ZZOglClutStorageUnchanged(u32* oldtex, u32* newtex)
|
||||
{
|
||||
return ((oldtex[1] & 0x1ff10000) == (newtex[1] & 0x1ff10000));
|
||||
}
|
||||
|
||||
|
|
|
@ -39,29 +39,33 @@ union GIFTag
|
|||
{
|
||||
u64 ai64[2];
|
||||
u32 ai32[4];
|
||||
|
||||
struct
|
||||
{
|
||||
u32 NLOOP:15;
|
||||
u32 EOP:1;
|
||||
u32 _PAD1:16;
|
||||
u32 _PAD2:14;
|
||||
u32 PRE:1;
|
||||
u32 PRIM:11;
|
||||
u32 FLG:2; // enum GIF_FLG
|
||||
u32 NREG:4;
|
||||
u64 REGS:64;
|
||||
u32 NLOOP : 15;
|
||||
u32 EOP : 1;
|
||||
u32 _PAD1 : 16;
|
||||
u32 _PAD2 : 14;
|
||||
u32 PRE : 1;
|
||||
u32 PRIM : 11;
|
||||
u32 FLG : 2; // enum GIF_FLG
|
||||
u32 NREG : 4;
|
||||
u64 REGS : 64;
|
||||
};
|
||||
|
||||
void set(u32 *data)
|
||||
{
|
||||
for(int i = 0; i <= 3; i++)
|
||||
for (int i = 0; i <= 3; i++)
|
||||
{
|
||||
ai32[i] = data[i];
|
||||
}
|
||||
}
|
||||
|
||||
GIFTag(u32 *data)
|
||||
{
|
||||
set(data);
|
||||
}
|
||||
|
||||
GIFTag(){ ai64[0] = 0; ai64[1] = 0; }
|
||||
};
|
||||
|
||||
|
@ -101,13 +105,12 @@ typedef struct
|
|||
// Hmm....
|
||||
nreg = tag.NREG << 2;
|
||||
if (nreg == 0) nreg = 64;
|
||||
|
||||
regs = tag.REGS;
|
||||
reg = 0;
|
||||
|
||||
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
|
||||
// data[3], data[2], data[1], data[0],
|
||||
// path->eop, path->nloop, mode, path->nreg, tag.PRE);
|
||||
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
|
||||
// data[3], data[2], data[1], data[0],
|
||||
// path->eop, path->nloop, mode, path->nreg, tag.PRE);
|
||||
}
|
||||
|
||||
u32 GetReg()
|
||||
|
@ -124,49 +127,48 @@ typedef struct
|
|||
reg = 0;
|
||||
nloop--;
|
||||
|
||||
if (nloop == 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (nloop == 0) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
void setTag(u32 *data)
|
||||
{
|
||||
tag.set(data);
|
||||
void setTag(u32 *data)
|
||||
{
|
||||
tag.set(data);
|
||||
|
||||
nloop = tag.NLOOP;
|
||||
eop = tag.EOP;
|
||||
u32 tagpre = tag.PRE;
|
||||
u32 tagprim = tag.PRIM;
|
||||
u32 tagflg = tag.FLG;
|
||||
nloop = tag.NLOOP;
|
||||
eop = tag.EOP;
|
||||
u32 tagpre = tag.PRE;
|
||||
u32 tagprim = tag.PRIM;
|
||||
u32 tagflg = tag.FLG;
|
||||
|
||||
// Hmm....
|
||||
nreg = tag.NREG << 2;
|
||||
if (nreg == 0) nreg = 64;
|
||||
// Hmm....
|
||||
nreg = tag.NREG << 2;
|
||||
if (nreg == 0) nreg = 64;
|
||||
|
||||
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
|
||||
// data[3], data[2], data[1], data[0],
|
||||
// path->eop, path->nloop, tagflg, path->nreg, tagpre);
|
||||
// ZZLog::GS_Log("GIFtag: %8.8lx_%8.8lx_%8.8lx_%8.8lx: EOP=%d, NLOOP=%x, FLG=%x, NREG=%d, PRE=%d",
|
||||
// data[3], data[2], data[1], data[0],
|
||||
// path->eop, path->nloop, tagflg, path->nreg, tagpre);
|
||||
|
||||
mode = tagflg;
|
||||
mode = tagflg;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case GIF_FLG_PACKED:
|
||||
regs = *(u64 *)(data+2);
|
||||
regn = 0;
|
||||
if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
|
||||
switch (mode)
|
||||
{
|
||||
case GIF_FLG_PACKED:
|
||||
regs = *(u64 *)(data + 2);
|
||||
regn = 0;
|
||||
if (tagpre) GIFRegHandlerPRIM((u32*)&tagprim);
|
||||
break;
|
||||
|
||||
break;
|
||||
case GIF_FLG_REGLIST:
|
||||
regs = *(u64 *)(data + 2);
|
||||
regn = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
case GIF_FLG_REGLIST:
|
||||
regs = *(u64 *)(data+2);
|
||||
regn = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} pathInfo;
|
||||
|
||||
|
|
|
@ -23,9 +23,9 @@
|
|||
#include <vector>
|
||||
|
||||
// works only when base is a power of 2
|
||||
static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val)+(base-1))&~(base-1)); }
|
||||
static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base-1)); }
|
||||
static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base-1)); }
|
||||
static __forceinline int ROUND_UPPOW2(int val, int base) { return (((val) + (base - 1))&~(base - 1)); }
|
||||
static __forceinline int ROUND_DOWNPOW2(int val, int base) { return ((val)&~(base - 1)); }
|
||||
static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base - 1)); }
|
||||
|
||||
// d3d texture dims
|
||||
const int BLOCK_TEXWIDTH = 128;
|
||||
|
@ -33,13 +33,12 @@ const int BLOCK_TEXHEIGHT = 512;
|
|||
|
||||
extern PCSX2_ALIGNED16(u32 tempblock[64]);
|
||||
|
||||
|
||||
typedef u32 ( *_getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
typedef u32 (*_getPixelAddress_0)(int x, int y, u32 bw);
|
||||
typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
|
||||
typedef void (*_writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
|
||||
typedef void (*_writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
|
||||
typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
|
||||
typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
|
||||
typedef u32(*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
|
||||
typedef u32(*_readPixel_0)(const void* pmem, int x, int y, u32 bw);
|
||||
typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
|
||||
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
|
||||
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
|
||||
|
@ -53,6 +52,7 @@ enum Psm_Size
|
|||
|
||||
// Both of the following structs should probably be local class variables or in a namespace,
|
||||
// but this works for the moment.
|
||||
|
||||
struct TransferData
|
||||
{
|
||||
// Signed because Visual C++ is weird.
|
||||
|
@ -88,6 +88,7 @@ struct TransferFuncts
|
|||
};
|
||||
|
||||
// rest not visible externally
|
||||
|
||||
struct BLOCK
|
||||
{
|
||||
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
|
||||
|
@ -142,14 +143,14 @@ extern u32 g_pageTable4[128][128];
|
|||
|
||||
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
@ -165,70 +166,70 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
|||
|
||||
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16S[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
u32 word = bp * 256 + basepage * 8192 + g_pageTable8[y&63][x&127];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
u32 word = bp * 512 + basepage * 16384 + g_pageTable4[y&127][x&127];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>7) * ((bw+127)>>7)) + (x>>7);
|
||||
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32Z[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>5) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
@ -238,28 +239,28 @@ static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
|
|||
|
||||
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16Z[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 128 + basepage * 4096 + g_pageTable16SZ[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y>>6) * (bw>>6)) + (x>>6);
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
@ -276,9 +277,11 @@ static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32
|
|||
|
||||
static __forceinline void writePixel24(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
|
||||
u8 *pix = (u8*)&pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)];
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
|
@ -305,20 +308,21 @@ static __forceinline void writePixel4(void* pmem, int x, int y, u32 pixel, u32 b
|
|||
{
|
||||
u32 addr = getPixelAddress4(x, y, bp, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
|
||||
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HL(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HL(x, y, bp, bw)+3;
|
||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3;
|
||||
*p = (*p & 0xf0) | pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HH(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HH(x, y, bp, bw)+3;
|
||||
*p = (*p & 0x0f) | (pixel<<4);
|
||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3;
|
||||
*p = (*p & 0x0f) | (pixel << 4);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
|
@ -328,9 +332,11 @@ static __forceinline void writePixel32Z(void* pmem, int x, int y, u32 pixel, u32
|
|||
|
||||
static __forceinline void writePixel24Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z(x, y, bp, bw);
|
||||
u8 *pix = (u8*)&pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z(x, y, bp, bw);
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
|
@ -380,20 +386,22 @@ static __forceinline u32 readPixel4(const void* pmem, int x, int y, u32 bp, u32
|
|||
{
|
||||
u32 addr = getPixelAddress4(x, y, bp, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)
|
||||
return pix >> 4;
|
||||
else return pix & 0xf;
|
||||
return pix >> 4;
|
||||
else
|
||||
return pix & 0xf;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HL(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL(x, y, bp, bw)+3;
|
||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL(x, y, bp, bw) + 3;
|
||||
return *p & 0x0f;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HH(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH(x, y, bp, bw) + 3;
|
||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH(x, y, bp, bw) + 3;
|
||||
return *p >> 4;
|
||||
}
|
||||
|
||||
|
@ -430,9 +438,11 @@ static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u3
|
|||
|
||||
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)&((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
u8 *pix = (u8*)&pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
|
@ -459,20 +469,21 @@ static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32
|
|||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
if (addr & 0x1) ((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
|
||||
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HL_0(x, y, bw)+3;
|
||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
|
||||
*p = (*p & 0xf0) | pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4*getPixelAddress4HH_0(x, y, bw)+3;
|
||||
*p = (*p & 0x0f) | (pixel<<4);
|
||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
|
||||
*p = (*p & 0x0f) | (pixel << 4);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
|
@ -482,9 +493,11 @@ static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u
|
|||
|
||||
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)pmem + 4*getPixelAddress32Z_0(x, y, bw);
|
||||
u8 *pix = (u8*)&pixel;
|
||||
buf[0] = pix[0]; buf[1] = pix[1]; buf[2] = pix[2];
|
||||
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
|
@ -534,6 +547,7 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
|
|||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)
|
||||
return pix >> 4;
|
||||
else
|
||||
|
@ -542,13 +556,13 @@ static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
|
|||
|
||||
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HL_0(x, y, bw)+3;
|
||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
|
||||
return *p & 0x0f;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem+4*getPixelAddress4HH_0(x, y, bw) + 3;
|
||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
|
||||
return *p >> 4;
|
||||
}
|
||||
|
||||
|
|
|
@ -14,57 +14,60 @@ extern u8* pstart;
|
|||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
assert( (nSize%widthlimit) == 0 && widthlimit <= 4 );
|
||||
if ((gs.imageEndX-gs.trxpos.dx) % widthlimit)
|
||||
assert((nSize % widthlimit) == 0 && widthlimit <= 4);
|
||||
|
||||
if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
|
||||
{
|
||||
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
|
||||
for(; tempY < endY; ++tempY)
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(; tempY < endY; ++tempY)
|
||||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
|
||||
{
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
if( nSize < widthlimit ) return NULL;
|
||||
if (nSize < widthlimit) return NULL;
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, buf[0], gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 1 )
|
||||
if (widthlimit > 1)
|
||||
{
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, buf[1], gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, tempY % 2048, buf[1], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 2 )
|
||||
if (widthlimit > 2)
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, buf[2], gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 2) % 2048, tempY % 2048, buf[2], gs.dstbuf.bw);
|
||||
|
||||
if( widthlimit > 3 )
|
||||
if (widthlimit > 3)
|
||||
{
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, buf[3], gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 3) % 2048, tempY % 2048, buf[3], gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tempX >= gs.imageEndX )
|
||||
if (tempX >= gs.imageEndX)
|
||||
{
|
||||
assert(tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize*sizeof(T)/4 == 0 );
|
||||
assert(gs.imageTransfer == -1 || nSize*sizeof(T) / 4 == 0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -72,47 +75,14 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, u32 widthlim
|
|||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
if (widthlimit != 8 || ((gs.imageEndX-gs.trxpos.dx)%widthlimit))
|
||||
if (widthlimit != 8 || ((gs.imageEndX - gs.trxpos.dx) % widthlimit))
|
||||
{
|
||||
//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
for(; tempY < endY; ++tempY)
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if( tempX >= gs.imageEndX )
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( /*(nSize%widthlimit) == 0 &&*/ widthlimit == 8 );
|
||||
for(; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3*widthlimit)
|
||||
{
|
||||
if (nSize < widthlimit) return NULL;
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
|
||||
wp(pstart, tempX%2048, tempY%2048, *(u32*)(buf+0), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *(u32*)(buf+3), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *(u32*)(buf+6), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *(u32*)(buf+9), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *(u32*)(buf+12), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *(u32*)(buf+15), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *(u32*)(buf+18), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *(u32*)(buf+21), gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
|
@ -122,18 +92,55 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
|
|||
}
|
||||
else
|
||||
{
|
||||
if ( nSize < 0 )
|
||||
{
|
||||
/* extracted too much */
|
||||
assert( (nSize%3)==0 && nSize > -24 );
|
||||
tempX += nSize/3;
|
||||
nSize = 0;
|
||||
}
|
||||
assert( gs.imageTransfer == -1 || nSize == 0 );
|
||||
assert(gs.imageTransfer == -1 || nSize == 0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(/*(nSize%widthlimit) == 0 &&*/ widthlimit == 8);
|
||||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
|
||||
{
|
||||
if (nSize < widthlimit) return NULL;
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
|
||||
wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf + 0), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, tempY % 2048, *(u32*)(buf + 3), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 2) % 2048, tempY % 2048, *(u32*)(buf + 6), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 3) % 2048, tempY % 2048, *(u32*)(buf + 9), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 4) % 2048, tempY % 2048, *(u32*)(buf + 12), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 5) % 2048, tempY % 2048, *(u32*)(buf + 15), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 6) % 2048, tempY % 2048, *(u32*)(buf + 18), gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 7) % 2048, tempY % 2048, *(u32*)(buf + 21), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nSize < 0)
|
||||
{
|
||||
/* extracted too much */
|
||||
assert((nSize % 3) == 0 && nSize > -24);
|
||||
tempX += nSize / 3;
|
||||
nSize = 0;
|
||||
}
|
||||
|
||||
assert(gs.imageTransfer == -1 || nSize == 0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -141,73 +148,84 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, u32 widthl
|
|||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
for(; tempY < endY; ++tempY)
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for(; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
if ( widthlimit > 2 )
|
||||
|
||||
if (widthlimit > 2)
|
||||
{
|
||||
wp(pstart, (tempX+2)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+3)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 2) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 3) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
|
||||
if( widthlimit > 4 )
|
||||
if (widthlimit > 4)
|
||||
{
|
||||
wp(pstart, (tempX+4)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+5)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 4) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 5) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
|
||||
if( widthlimit > 6 )
|
||||
if (widthlimit > 6)
|
||||
{
|
||||
wp(pstart, (tempX+6)%2048, tempY%2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+7)%2048, tempY%2048, *buf>>4, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 6) % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 7) % 2048, tempY % 2048, *buf >> 4, gs.dstbuf.bw);
|
||||
buf++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( tempX >= gs.imageEndX )
|
||||
if (tempX >= gs.imageEndX)
|
||||
{
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert( gs.imageTransfer == -1 || (nSize/32) == 0 );
|
||||
assert(gs.imageTransfer == -1 || (nSize / 32) == 0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
switch (data.psm)
|
||||
{
|
||||
case PSM_: return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
|
||||
case PSM_4_: return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
|
||||
case PSM_24_: return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
|
||||
}
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
static __forceinline const T *TransmitHostLocalY(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 endY, const T *buf)
|
||||
{
|
||||
switch (data.psm)
|
||||
{
|
||||
case PSM_:
|
||||
return TransmitHostLocalY_<T>(wp, widthlimit, endY, buf);
|
||||
|
||||
case PSM_4_:
|
||||
return TransmitHostLocalY_4<T>(wp, widthlimit, endY, buf);
|
||||
|
||||
case PSM_24_:
|
||||
return TransmitHostLocalY_24<T>(wp, widthlimit, endY, buf);
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
for (u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
|
||||
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0], gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
buf += pitch - fracX;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -215,14 +233,16 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
|
|||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
for (u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
|
||||
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, *(u32*)buf, gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
|
||||
}
|
||||
buf += 3*(pitch-fracX);
|
||||
|
||||
buf += 3 * (pitch - fracX);
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
@ -230,30 +250,39 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
|
|||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
for(u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
for (u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for(tempX = startX; tempX < gs.imageEndX; tempX+=2, buf++)
|
||||
for (tempX = startX; tempX < gs.imageEndX; tempX += 2, buf++)
|
||||
{
|
||||
wp(pstart, tempX%2048, (tempY+tempi)%2048, buf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX+1)%2048, (tempY+tempi)%2048, buf[0]>>4, gs.dstbuf.bw);
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);
|
||||
}
|
||||
buf += (pitch-fracX)/2;
|
||||
|
||||
buf += (pitch - fracX) / 2;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
switch (data.psm)
|
||||
{
|
||||
case PSM_: return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
case PSM_4_: return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
case PSM_24_: return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
}
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
static __forceinline const T *TransmitHostLocalX(TransferData data, _writePixel_0 wp, u32 widthlimit, u32 blockheight, u32 startX, const T *buf)
|
||||
{
|
||||
switch (data.psm)
|
||||
{
|
||||
case PSM_:
|
||||
return TransmitHostLocalX_<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
|
||||
case PSM_4_:
|
||||
return TransmitHostLocalX_4<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
|
||||
case PSM_24_:
|
||||
return TransmitHostLocalX_24<T>(wp, widthlimit, blockheight, startX, buf);
|
||||
}
|
||||
|
||||
assert(0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// calculate pitch in source buffer
|
||||
static __forceinline u32 TransPitch(u32 pitch, u32 size)
|
||||
|
|
|
@ -70,21 +70,22 @@ extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file
|
|||
// declare linux equivalents
|
||||
static __forceinline void* pcsx2_aligned_malloc(size_t size, size_t align)
|
||||
{
|
||||
assert( align < 0x10000 );
|
||||
char* p = (char*)malloc(size+align);
|
||||
int off = 2+align - ((int)(uptr)(p+2) % align);
|
||||
assert(align < 0x10000);
|
||||
char* p = (char*)malloc(size + align);
|
||||
int off = 2 + align - ((int)(uptr)(p + 2) % align);
|
||||
|
||||
p += off;
|
||||
*(u16*)(p-2) = off;
|
||||
*(u16*)(p - 2) = off;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static __forceinline void pcsx2_aligned_free(void* pmem)
|
||||
{
|
||||
if( pmem != NULL ) {
|
||||
if (pmem != NULL)
|
||||
{
|
||||
char* p = (char*)pmem;
|
||||
free(p - (int)*(u16*)(p-2));
|
||||
free(p - (int)*(u16*)(p - 2));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -98,7 +99,7 @@ inline unsigned long timeGetTime()
|
|||
timeb t;
|
||||
ftime(&t);
|
||||
|
||||
return (unsigned long)(t.time*1000+t.millitm);
|
||||
return (unsigned long)(t.time*1000 + t.millitm);
|
||||
}
|
||||
|
||||
struct RECT
|
||||
|
@ -113,20 +114,24 @@ struct RECT
|
|||
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x, y, w, h;
|
||||
} Rect;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x, y;
|
||||
} Point;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x0, y0;
|
||||
int x1, y1;
|
||||
} Rect2;
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
int x, y, c;
|
||||
} PointC;
|
||||
|
||||
|
@ -145,6 +150,7 @@ typedef struct {
|
|||
#define GSOPTION_LOADED 0x8000
|
||||
|
||||
//Configuration values.
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u8 mrtdepth; // write color in render target
|
||||
|
@ -227,18 +233,18 @@ extern void __LogToConsole(const char *fmt, ...);
|
|||
|
||||
namespace ZZLog
|
||||
{
|
||||
extern void Message(const char *fmt, ...);
|
||||
extern void Log(const char *fmt, ...);
|
||||
extern void WriteToConsole(const char *fmt, ...);
|
||||
extern void Print(const char *fmt, ...);
|
||||
|
||||
extern void Greg_Log(const char *fmt, ...);
|
||||
extern void Prim_Log(const char *fmt, ...);
|
||||
extern void GS_Log(const char *fmt, ...);
|
||||
|
||||
extern void Debug_Log(const char *fmt, ...);
|
||||
extern void Warn_Log(const char *fmt, ...);
|
||||
extern void Error_Log(const char *fmt, ...);
|
||||
extern void Message(const char *fmt, ...);
|
||||
extern void Log(const char *fmt, ...);
|
||||
extern void WriteToConsole(const char *fmt, ...);
|
||||
extern void Print(const char *fmt, ...);
|
||||
|
||||
extern void Greg_Log(const char *fmt, ...);
|
||||
extern void Prim_Log(const char *fmt, ...);
|
||||
extern void GS_Log(const char *fmt, ...);
|
||||
|
||||
extern void Debug_Log(const char *fmt, ...);
|
||||
extern void Warn_Log(const char *fmt, ...);
|
||||
extern void Error_Log(const char *fmt, ...);
|
||||
};
|
||||
|
||||
#define REG64(name) \
|
||||
|
@ -247,14 +253,14 @@ union name \
|
|||
u64 i64; \
|
||||
u32 ai32[2]; \
|
||||
struct { \
|
||||
|
||||
|
||||
#define REG128(name)\
|
||||
union name \
|
||||
{ \
|
||||
u64 ai64[2]; \
|
||||
u32 ai32[4]; \
|
||||
struct { \
|
||||
|
||||
|
||||
#define REG64_(prefix, name) REG64(prefix##name)
|
||||
#define REG128_(prefix, name) REG128(prefix##name)
|
||||
|
||||
|
@ -266,13 +272,13 @@ union name \
|
|||
{ \
|
||||
u64 i64; \
|
||||
u32 ai32[2]; \
|
||||
|
||||
|
||||
#define REG128_SET(name)\
|
||||
union name \
|
||||
{ \
|
||||
u64 ai64[2]; \
|
||||
u32 ai32[4]; \
|
||||
|
||||
|
||||
#define REG_SET_END };
|
||||
|
||||
extern void LoadConfig();
|
||||
|
@ -310,16 +316,18 @@ static __forceinline u64 GetTickFrequency()
|
|||
|
||||
static __forceinline u64 GetCPUTicks()
|
||||
{
|
||||
|
||||
struct timeval t;
|
||||
gettimeofday(&t, NULL);
|
||||
return ((u64)t.tv_sec*GetTickFrequency())+t.tv_usec;
|
||||
return ((u64)t.tv_sec*GetTickFrequency()) + t.tv_usec;
|
||||
}
|
||||
|
||||
#else
|
||||
static __aligned16 LARGE_INTEGER lfreq;
|
||||
|
||||
static __forceinline void InitCPUTicks()
|
||||
{
|
||||
QueryPerformanceFrequency( &lfreq );
|
||||
QueryPerformanceFrequency(&lfreq);
|
||||
}
|
||||
|
||||
static __forceinline u64 GetTickFrequency()
|
||||
|
@ -330,42 +338,47 @@ static __forceinline u64 GetTickFrequency()
|
|||
static __forceinline u64 GetCPUTicks()
|
||||
{
|
||||
LARGE_INTEGER count;
|
||||
QueryPerformanceCounter( &count );
|
||||
QueryPerformanceCounter(&count);
|
||||
return count.QuadPart;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
|
||||
class CInterfacePtr
|
||||
{
|
||||
public:
|
||||
inline CInterfacePtr() : ptr(NULL) {}
|
||||
inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if ( ptr != NULL ) ptr->AddRef(); }
|
||||
inline ~CInterfacePtr() { if( ptr != NULL ) ptr->Release(); }
|
||||
|
||||
inline T* operator* () { assert( ptr != NULL); return *ptr; }
|
||||
inline T* operator->() { return ptr; }
|
||||
inline T* get() { return ptr; }
|
||||
public:
|
||||
inline CInterfacePtr() : ptr(NULL) {}
|
||||
inline explicit CInterfacePtr(T* newptr) : ptr(newptr) { if (ptr != NULL) ptr->AddRef(); }
|
||||
inline ~CInterfacePtr() { if (ptr != NULL) ptr->Release(); }
|
||||
inline T* operator*() { assert(ptr != NULL); return *ptr; }
|
||||
inline T* operator->() { return ptr; }
|
||||
inline T* get() { return ptr; }
|
||||
|
||||
inline void release() {
|
||||
if( ptr != NULL ) { ptr->Release(); ptr = NULL; }
|
||||
}
|
||||
inline void release()
|
||||
{
|
||||
if (ptr != NULL) { ptr->Release(); ptr = NULL; }
|
||||
}
|
||||
|
||||
inline operator T*() { return ptr; }
|
||||
inline operator T*() { return ptr; }
|
||||
inline bool operator==(T* rhs) { return ptr == rhs; }
|
||||
inline bool operator!=(T* rhs) { return ptr != rhs; }
|
||||
|
||||
inline bool operator==(T* rhs) { return ptr == rhs; }
|
||||
inline bool operator!=(T* rhs) { return ptr != rhs; }
|
||||
inline CInterfacePtr& operator= (T* newptr)
|
||||
{
|
||||
if (ptr != NULL) ptr->Release();
|
||||
|
||||
inline CInterfacePtr& operator= (T* newptr) {
|
||||
if( ptr != NULL ) ptr->Release();
|
||||
ptr = newptr;
|
||||
ptr = newptr;
|
||||
|
||||
if( ptr != NULL ) ptr->AddRef();
|
||||
return *this;
|
||||
}
|
||||
if (ptr != NULL) ptr->AddRef();
|
||||
|
||||
private:
|
||||
T* ptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
T* ptr;
|
||||
};
|
||||
|
||||
|
||||
|
@ -380,24 +393,25 @@ void DVProfClear(); // clears all the profilers
|
|||
|
||||
class DVProfileFunc
|
||||
{
|
||||
public:
|
||||
u32 dwUserData;
|
||||
DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; }
|
||||
DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
|
||||
~DVProfileFunc() { DVProfEnd(dwUserData); }
|
||||
public:
|
||||
u32 dwUserData;
|
||||
DVProfileFunc(char* pname) { DVProfRegister(pname); dwUserData = 0; }
|
||||
DVProfileFunc(char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
|
||||
~DVProfileFunc() { DVProfEnd(dwUserData); }
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
class DVProfileFunc
|
||||
{
|
||||
public:
|
||||
u32 dwUserData;
|
||||
static __forceinline DVProfileFunc(char* pname) {}
|
||||
static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { }
|
||||
~DVProfileFunc() {}
|
||||
|
||||
public:
|
||||
u32 dwUserData;
|
||||
static __forceinline DVProfileFunc(char* pname) {}
|
||||
static __forceinline DVProfileFunc(char* pname, u32 dwUserData) { }
|
||||
~DVProfileFunc() {}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif // UTIL_H_INCLUDED
|
||||
|
|
|
@ -584,7 +584,7 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
|
|||
return false;
|
||||
}
|
||||
|
||||
// First try to draw frame from targets. It's
|
||||
// First try to draw frame from targets.
|
||||
inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
|
||||
{
|
||||
// get the start and end addresses of the buffer
|
||||
|
@ -662,9 +662,15 @@ inline bool RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
|
|||
// The same as the previous, but from memory.
|
||||
// If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
|
||||
// this is the function that does it.
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int interlace, int bInterlace)
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
|
||||
{
|
||||
|
||||
// get the start and end addresses of the buffer
|
||||
int bpp = RenderGetBpp(texframe.psm);
|
||||
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
|
||||
|
||||
int start, end;
|
||||
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
|
||||
|
||||
for (list<CRenderTarget*>::iterator it = listTargs.begin(); it != listTargs.end(); ++it)
|
||||
{
|
||||
(*it)->Resolve();
|
||||
|
@ -676,34 +682,36 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
|
|||
if ((pmemtarg == NULL) || (bInterlace >= 2))
|
||||
ZZLog::Error_Log("CRCR Check for memory shader fault.");
|
||||
|
||||
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
|
||||
|
||||
SetShaderCaller("RenderCheckForMemory");
|
||||
|
||||
SetTexVariablesInt(0, g_bCRTCBilinear ? 2 : 0, texframe, pmemtarg, &ppsCRTC[bInterlace], 1);
|
||||
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
|
||||
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
|
||||
|
||||
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
|
||||
|
||||
// finally render from the memory (note that the stencil buffer will keep previous regions)
|
||||
Vector v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
|
||||
|
||||
Vector v;
|
||||
|
||||
// Fixme: Why is this here?
|
||||
// We should probably call RenderSetTargetBitTex instead.
|
||||
if (g_bCRTCBilinear)
|
||||
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(texframe.tw, texframe.th, -0.5f, -0.5f), "g_fBitBltTex");
|
||||
v = RenderSetTargetBitTex(texframe.tw, texframe.th, -0.5f, -0.5f, INTERLACE_COUNT);
|
||||
else
|
||||
ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, Vector(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th), "g_fBitBltTex");
|
||||
v = RenderSetTargetBitTex(1, 1, -0.5f / (float)texframe.tw, -0.5f / (float)texframe.th, INTERLACE_COUNT);
|
||||
|
||||
// finally render from the memory (note that the stencil buffer will keep previous regions)
|
||||
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
|
||||
|
||||
v = RenderSetTargetBitTrans(texframe.th);
|
||||
|
||||
v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);
|
||||
|
||||
Vector valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
|
||||
|
||||
cgGLSetTextureParameter(ppsCRTC[bInterlace].sMemory, pmemtarg->ptex->tex);
|
||||
cgGLEnableTextureParameter(ppsCRTC[bInterlace].sMemory);
|
||||
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
|
||||
|
||||
SETPIXELSHADER(ppsCRTC[bInterlace].prog);
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
}
|
||||
|
||||
|
@ -909,7 +917,6 @@ void ZeroGS::RenderCRTC(int interlace)
|
|||
// start from the last circuit
|
||||
for (int i = !PMODE->SLBG; i >= 0; --i)
|
||||
{
|
||||
|
||||
tex0Info& texframe = dispinfo[i];
|
||||
|
||||
if (texframe.th <= 1) continue;
|
||||
|
@ -928,7 +935,7 @@ void ZeroGS::RenderCRTC(int interlace)
|
|||
|
||||
// if we could not draw image from target's do it from memory
|
||||
if (!RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace))
|
||||
RenderCheckForMemory(texframe, listTargs, interlace, bInterlace);
|
||||
RenderCheckForMemory(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
|
|
@ -337,7 +337,7 @@ inline int ZeroGS::VB::CheckFrameResolveRender(int tbp)
|
|||
}
|
||||
|
||||
// After frame resetting, it is possible that 16 to 32 or 32 to 16 (color bits) conversion should be made.
|
||||
inline void ZeroGS::VB::CheckFrame16vs32Convesion()
|
||||
inline void ZeroGS::VB::CheckFrame16vs32Conversion()
|
||||
{
|
||||
if (prndr->status & CRenderTarget::TS_NeedConvert32)
|
||||
{
|
||||
|
@ -393,7 +393,7 @@ void ZeroGS::VB::CheckFrame(int tbp)
|
|||
|
||||
bChanged = CheckFrameResolveRender(tbp);
|
||||
|
||||
CheckFrame16vs32Convesion();
|
||||
CheckFrame16vs32Conversion();
|
||||
}
|
||||
else if (bNeedZCheck)
|
||||
{
|
||||
|
|
|
@ -1,22 +1,24 @@
|
|||
#ifndef RasterFont_Header
|
||||
#define RasterFont_Header
|
||||
|
||||
class RasterFont {
|
||||
protected:
|
||||
int fontOffset;
|
||||
class RasterFont
|
||||
{
|
||||
|
||||
public:
|
||||
RasterFont();
|
||||
~RasterFont(void);
|
||||
static int debug;
|
||||
protected:
|
||||
int fontOffset;
|
||||
|
||||
// some useful constants
|
||||
enum {char_width = 10};
|
||||
enum {char_height = 15};
|
||||
public:
|
||||
RasterFont();
|
||||
~RasterFont(void);
|
||||
static int debug;
|
||||
|
||||
// and the happy helper functions
|
||||
void printString(const char *s, double x, double y, double z=0.0);
|
||||
void printCenteredString(const char *s, double y, int screen_width, double z=0.0);
|
||||
// some useful constants
|
||||
enum {char_width = 10};
|
||||
enum {char_height = 15};
|
||||
|
||||
// and the happy helper functions
|
||||
void printString(const char *s, double x, double y, double z = 0.0);
|
||||
void printCenteredString(const char *s, double y, int screen_width, double z = 0.0);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,23 +22,27 @@
|
|||
#define TARGET_VIRTUAL_KEY 0x80000000
|
||||
#include "PS2Edefs.h"
|
||||
|
||||
inline Vector DefaultOneColor( FRAGMENTSHADER ptr ) {
|
||||
Vector v = Vector ( 1, 1, 1, 1 );
|
||||
cgGLSetParameter4fv( ptr.sOneColor, v);
|
||||
inline Vector DefaultOneColor(FRAGMENTSHADER ptr)
|
||||
{
|
||||
Vector v = Vector(1, 1, 1, 1);
|
||||
cgGLSetParameter4fv(ptr.sOneColor, v);
|
||||
return v ;
|
||||
}
|
||||
|
||||
namespace ZeroGS {
|
||||
namespace ZeroGS
|
||||
{
|
||||
|
||||
inline u32 GetFrameKey (int fbp, int fbw, VB& curvb);
|
||||
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb);
|
||||
|
||||
// manages render targets
|
||||
class CRenderTargetMngr
|
||||
{
|
||||
// manages render targets
|
||||
|
||||
class CRenderTargetMngr
|
||||
{
|
||||
public:
|
||||
typedef map<u32, CRenderTarget*> MAPTARGETS;
|
||||
|
||||
enum TargetOptions {
|
||||
enum TargetOptions
|
||||
{
|
||||
TO_DepthBuffer = 1,
|
||||
TO_StrictHeight = 2, // height returned has to be the same as requested
|
||||
TO_Virtual = 4
|
||||
|
@ -50,16 +54,17 @@ namespace ZeroGS {
|
|||
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
|
||||
|
||||
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
|
||||
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb) {
|
||||
MAPTARGETS::iterator it = mapTargets.find (GetFrameKey(fbp, fbw, curvb));
|
||||
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
|
||||
{
|
||||
MAPTARGETS::iterator it = mapTargets.find(GetFrameKey(fbp, fbw, curvb));
|
||||
|
||||
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
|
||||
{
|
||||
printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
|
||||
printf("%x %x\n", fbp, fbw);
|
||||
for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
|
||||
printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
|
||||
}*/
|
||||
/* if (fbp == 0x3600 && fbw == 0x100 && it == mapTargets.end())
|
||||
{
|
||||
printf("%x\n", GetFrameKey(fbp, fbw, curvb)) ;
|
||||
printf("%x %x\n", fbp, fbw);
|
||||
for(MAPTARGETS::iterator it1 = mapTargets.begin(); it1 != mapTargets.end(); ++it1)
|
||||
printf ("\t %x %x %x %x\n", it1->second->fbw, it1->second->fbh, it1->second->psm, it1->second->fbp);
|
||||
}*/
|
||||
return it != mapTargets.end() ? it->second : NULL;
|
||||
}
|
||||
|
||||
|
@ -68,8 +73,9 @@ namespace ZeroGS {
|
|||
|
||||
// resolves all targets within a range
|
||||
__forceinline void Resolve(int start, int end);
|
||||
__forceinline void ResolveAll() {
|
||||
for(MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it )
|
||||
__forceinline void ResolveAll()
|
||||
{
|
||||
for (MAPTARGETS::iterator it = mapTargets.begin(); it != mapTargets.end(); ++it)
|
||||
it->second->Resolve();
|
||||
}
|
||||
|
||||
|
@ -77,12 +83,13 @@ namespace ZeroGS {
|
|||
void DestroyIntersecting(CRenderTarget* prndr);
|
||||
|
||||
// promotes a target from virtual to real
|
||||
inline CRenderTarget* Promote(u32 key) {
|
||||
assert( !(key & TARGET_VIRTUAL_KEY) );
|
||||
inline CRenderTarget* Promote(u32 key)
|
||||
{
|
||||
assert(!(key & TARGET_VIRTUAL_KEY));
|
||||
|
||||
// promote to regular targ
|
||||
CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key|TARGET_VIRTUAL_KEY);
|
||||
assert( it != mapTargets.end() );
|
||||
CRenderTargetMngr::MAPTARGETS::iterator it = mapTargets.find(key | TARGET_VIRTUAL_KEY);
|
||||
assert(it != mapTargets.end());
|
||||
|
||||
CRenderTarget* ptarg = it->second;
|
||||
mapTargets.erase(it);
|
||||
|
@ -90,31 +97,36 @@ namespace ZeroGS {
|
|||
DestroyIntersecting(ptarg);
|
||||
|
||||
it = mapTargets.find(key);
|
||||
if( it != mapTargets.end() ) {
|
||||
|
||||
if (it != mapTargets.end())
|
||||
{
|
||||
DestroyTarg(it->second);
|
||||
it->second = ptarg;
|
||||
}
|
||||
else
|
||||
mapTargets[key] = ptarg;
|
||||
|
||||
if( g_GameSettings & GAME_RESOLVEPROMOTED )
|
||||
ptarg->status = CRenderTarget::TS_Resolved;
|
||||
else
|
||||
ptarg->status = CRenderTarget::TS_NeedUpdate;
|
||||
return ptarg;
|
||||
if (g_GameSettings & GAME_RESOLVEPROMOTED)
|
||||
ptarg->status = CRenderTarget::TS_Resolved;
|
||||
else
|
||||
ptarg->status = CRenderTarget::TS_NeedUpdate;
|
||||
|
||||
return ptarg;
|
||||
}
|
||||
|
||||
static void DestroyTarg(CRenderTarget* ptarg);
|
||||
|
||||
MAPTARGETS mapTargets, mapDummyTargs;
|
||||
};
|
||||
};
|
||||
|
||||
class CMemoryTargetMngr
|
||||
{
|
||||
|
||||
class CMemoryTargetMngr
|
||||
{
|
||||
public:
|
||||
CMemoryTargetMngr() : curstamp(0) {}
|
||||
|
||||
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
||||
CMemoryTarget* MemoryTarget_SearchExistTarget (int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||
CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||
CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
||||
|
||||
void Destroy(); // destroy all targs
|
||||
|
@ -122,26 +134,28 @@ namespace ZeroGS {
|
|||
void ClearRange(int starty, int endy); // set all targets to cleared
|
||||
void DestroyCleared(); // flush all cleared targes
|
||||
void DestroyOldest();
|
||||
|
||||
|
||||
list<CMemoryTarget> listTargets, listClearedTargets;
|
||||
u32 curstamp;
|
||||
|
||||
private:
|
||||
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
|
||||
};
|
||||
};
|
||||
|
||||
class CBitwiseTextureMngr
|
||||
{
|
||||
class CBitwiseTextureMngr
|
||||
{
|
||||
public:
|
||||
~CBitwiseTextureMngr() { Destroy(); }
|
||||
|
||||
void Destroy();
|
||||
|
||||
// since GetTex can delete textures to free up mem, it is dangerous if using that texture, so specify at least one other tex to save
|
||||
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete) {
|
||||
__forceinline u32 GetTex(u32 bitvalue, u32 ptexDoNotDelete)
|
||||
{
|
||||
map<u32, u32>::iterator it = mapTextures.find(bitvalue);
|
||||
if( it != mapTextures.end() )
|
||||
return it->second;
|
||||
|
||||
if (it != mapTextures.end()) return it->second;
|
||||
|
||||
return GetTexInt(bitvalue, ptexDoNotDelete);
|
||||
}
|
||||
|
||||
|
@ -149,99 +163,115 @@ namespace ZeroGS {
|
|||
u32 GetTexInt(u32 bitvalue, u32 ptexDoNotDelete);
|
||||
|
||||
map<u32, u32> mapTextures;
|
||||
};
|
||||
};
|
||||
|
||||
// manages
|
||||
class CRangeManager
|
||||
{
|
||||
// manages
|
||||
|
||||
class CRangeManager
|
||||
{
|
||||
public:
|
||||
CRangeManager() {
|
||||
CRangeManager()
|
||||
{
|
||||
ranges.reserve(16);
|
||||
}
|
||||
|
||||
// [start, end)
|
||||
struct RANGE {
|
||||
|
||||
struct RANGE
|
||||
{
|
||||
RANGE() {}
|
||||
|
||||
inline RANGE(int start, int end) : start(start), end(end) {}
|
||||
|
||||
int start, end;
|
||||
};
|
||||
|
||||
// works in semi logN
|
||||
void Insert(int start, int end);
|
||||
void RangeSanityCheck();
|
||||
inline void Clear() {
|
||||
inline void Clear()
|
||||
{
|
||||
ranges.resize(0);
|
||||
}
|
||||
|
||||
vector<RANGE> ranges; // organized in ascending order, non-intersecting
|
||||
};
|
||||
};
|
||||
|
||||
extern CRenderTargetMngr s_RTs, s_DepthRTs;
|
||||
extern CBitwiseTextureMngr s_BitwiseTextures;
|
||||
extern CMemoryTargetMngr g_MemTargs;
|
||||
extern CRenderTargetMngr s_RTs, s_DepthRTs;
|
||||
extern CBitwiseTextureMngr s_BitwiseTextures;
|
||||
extern CMemoryTargetMngr g_MemTargs;
|
||||
|
||||
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
|
||||
extern u8 s_AAx, s_AAy, s_AAz, s_AAw;
|
||||
|
||||
// Real rendered width, depends on AA and AAneg.
|
||||
inline int RW(int tbw) {
|
||||
if (s_AAx >= s_AAz)
|
||||
return (tbw << ( s_AAx - s_AAz ));
|
||||
else
|
||||
return (tbw >> ( s_AAz - s_AAx ));
|
||||
}
|
||||
// Real rendered width, depends on AA and AAneg.
|
||||
inline int RW(int tbw)
|
||||
{
|
||||
if (s_AAx >= s_AAz)
|
||||
return (tbw << (s_AAx - s_AAz));
|
||||
else
|
||||
return (tbw >> (s_AAz - s_AAx));
|
||||
}
|
||||
|
||||
// Real rendered height, depends on AA and AAneg.
|
||||
inline int RH(int tbh) {
|
||||
if (s_AAy >= s_AAw)
|
||||
return (tbh << ( s_AAy - s_AAw ));
|
||||
else
|
||||
return (tbh >> ( s_AAw - s_AAy ));
|
||||
}
|
||||
// Real rendered height, depends on AA and AAneg.
|
||||
inline int RH(int tbh)
|
||||
{
|
||||
if (s_AAy >= s_AAw)
|
||||
return (tbh << (s_AAy - s_AAw));
|
||||
else
|
||||
return (tbh >> (s_AAw - s_AAy));
|
||||
}
|
||||
|
||||
/* inline void CreateTargetsList(int start, int end, list<ZeroGS::CRenderTarget*>& listTargs) {
|
||||
s_DepthRTs.GetTargs(start, end, listTargs);
|
||||
s_RTs.GetTargs(start, end, listTargs);
|
||||
}*/
|
||||
|
||||
// This pattern of functions is called 3 times, so I add creating Targets list into one.
|
||||
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end) {
|
||||
list<ZeroGS::CRenderTarget*> listTargs;
|
||||
s_DepthRTs.GetTargs(start, end, listTargs);
|
||||
s_RTs.GetTargs(start, end, listTargs);
|
||||
return listTargs;
|
||||
}
|
||||
// This pattern of functions is called 3 times, so I add creating Targets list into one.
|
||||
inline list<ZeroGS::CRenderTarget*> CreateTargetsList(int start, int end)
|
||||
{
|
||||
list<ZeroGS::CRenderTarget*> listTargs;
|
||||
s_DepthRTs.GetTargs(start, end, listTargs);
|
||||
s_RTs.GetTargs(start, end, listTargs);
|
||||
return listTargs;
|
||||
}
|
||||
|
||||
extern Vector g_vdepth;
|
||||
extern int icurctx;
|
||||
extern Vector g_vdepth;
|
||||
extern int icurctx;
|
||||
|
||||
extern VERTEXSHADER pvsBitBlt;
|
||||
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
|
||||
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
|
||||
extern GLuint vboRect;
|
||||
extern VERTEXSHADER pvsBitBlt;
|
||||
extern FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
|
||||
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
|
||||
extern GLuint vboRect;
|
||||
|
||||
// Unworking
|
||||
#define PSMPOSITION 28
|
||||
|
||||
// Code width and height of frame into key, that used in targetmanager
|
||||
// This is 3 variants of one function, Key dependant on fbp and fbw.
|
||||
inline u32 GetFrameKey (const frameInfo& frame) {
|
||||
inline u32 GetFrameKey(const frameInfo& frame)
|
||||
{
|
||||
return (((frame.fbw) << 16) | (frame.fbp));
|
||||
}
|
||||
inline u32 GetFrameKey ( CRenderTarget* frame ) {
|
||||
|
||||
inline u32 GetFrameKey(CRenderTarget* frame)
|
||||
{
|
||||
return (((frame->fbw) << 16) | (frame->fbp));
|
||||
}
|
||||
|
||||
inline u32 GetFrameKey (int fbp, int fbw, VB& curvb) {
|
||||
inline u32 GetFrameKey(int fbp, int fbw, VB& curvb)
|
||||
{
|
||||
return (((fbw) << 16) | (fbp));
|
||||
}
|
||||
|
||||
inline u16 ShiftHeight (int fbh, int fbp, int fbhCalc) {
|
||||
inline u16 ShiftHeight(int fbh, int fbp, int fbhCalc)
|
||||
{
|
||||
return fbh;
|
||||
}
|
||||
|
||||
//FIXME: this code for P4 ad KH1. It should not be such strange!
|
||||
//FIXME: this code is for P4 and KH1. It should not be so strange!
|
||||
//Dummy targets was deleted from mapTargets, but not erased.
|
||||
inline u32 GetFrameKeyDummy (const frameInfo& frame) {
|
||||
inline u32 GetFrameKeyDummy(const frameInfo& frame)
|
||||
{
|
||||
// if (frame.fbp > 0x2000 && ZZOgl_fbh_Calc(frame) < 0x400 && ZZOgl_fbh_Calc(frame) != frame.fbh)
|
||||
// printf ("Z %x %x %x %x\n", frame.fbh, frame.fbhCalc, frame.fbp, ZZOgl_fbh_Calc(frame));
|
||||
// height over 1024 would shrink to 1024, so dummy targets with calculated size more than 0x400 should be
|
||||
|
@ -252,7 +282,8 @@ inline u32 GetFrameKeyDummy (const frameInfo& frame) {
|
|||
return (((frame.fbw) << 16) | frame.fbh);
|
||||
}
|
||||
|
||||
inline u32 GetFrameKeyDummy ( CRenderTarget* frame ) {
|
||||
inline u32 GetFrameKeyDummy(CRenderTarget* frame)
|
||||
{
|
||||
if (/*frame->fbp > 0x2000 && */ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm) < 0x300)
|
||||
return (((frame->fbw) << 16) | ZZOgl_fbh_Calc(frame->fbp, frame->fbw, frame->psm));
|
||||
else
|
||||
|
|
|
@ -106,7 +106,7 @@ extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);
|
|||
extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut);
|
||||
extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut);
|
||||
|
||||
extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters );
|
||||
extern void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters);
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -17,6 +17,7 @@
|
|||
template <class T> inline T RAD_2_DEG(T radians) { return (radians * (T)57.29577951); }
|
||||
|
||||
class Transform;
|
||||
|
||||
class TransformMatrix;
|
||||
|
||||
typedef float dReal;
|
||||
|
@ -35,63 +36,57 @@ inline dReal* inv4(const dReal* pf, dReal* pfres);
|
|||
|
||||
// class used for 3 and 4 dim vectors and quaternions
|
||||
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
|
||||
|
||||
class Vector
|
||||
{
|
||||
public:
|
||||
dReal x, y, z, w;
|
||||
public:
|
||||
dReal x, y, z, w;
|
||||
|
||||
Vector() : x(0), y(0), z(0), w(0) {}
|
||||
Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
|
||||
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
|
||||
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
|
||||
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
|
||||
Vector() : x(0), y(0), z(0), w(0) {}
|
||||
Vector(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
|
||||
Vector(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
|
||||
Vector(const Vector &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
|
||||
Vector(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
|
||||
dReal operator[](int i) const { return (&x)[i]; }
|
||||
dReal& operator[](int i) { return (&x)[i]; }
|
||||
|
||||
// casting operators
|
||||
operator dReal*() { return &x; }
|
||||
operator const dReal*() const { return (const dReal*)&x; }
|
||||
|
||||
// SCALAR FUNCTIONS
|
||||
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
|
||||
inline void normalize() { normalize4(&x, &x); }
|
||||
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
|
||||
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
|
||||
inline void SetColor(u32 color)
|
||||
{
|
||||
x = (color & 0xff) / 255.0f;
|
||||
y = ((color >> 8) & 0xff) / 255.0f;
|
||||
z = ((color >> 16) & 0xff) / 255.0f;
|
||||
}
|
||||
|
||||
dReal operator[](int i) const { return (&x)[i]; }
|
||||
dReal& operator[](int i) { return (&x)[i]; }
|
||||
|
||||
// casting operators
|
||||
operator dReal* () { return &x; }
|
||||
operator const dReal* () const { return (const dReal*)&x; }
|
||||
|
||||
// SCALAR FUNCTIONS
|
||||
inline dReal dot(const Vector &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
|
||||
inline void normalize() { normalize4(&x, &x); }
|
||||
|
||||
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
|
||||
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
|
||||
inline void SetColor(u32 color)
|
||||
{
|
||||
x = (color & 0xff) / 255.0f;
|
||||
y = ((color >> 8) & 0xff) / 255.0f;
|
||||
z = ((color >> 16) & 0xff) / 255.0f;
|
||||
}
|
||||
|
||||
// 3 dim cross product, w is not touched
|
||||
/// this = this x v
|
||||
inline void Cross(const Vector &v) { cross3(&x, &x, v); }
|
||||
|
||||
/// this = u x v
|
||||
inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
|
||||
|
||||
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
|
||||
inline Vector operator+(const Vector &r) const { Vector v; v.x = x+r.x; v.y = y+r.y; v.z = z+r.z; v.w = w+r.w; return v; }
|
||||
inline Vector operator-(const Vector &r) const { Vector v; v.x = x-r.x; v.y = y-r.y; v.z = z-r.z; v.w = w-r.w; return v; }
|
||||
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x*x; v.y = r.y*y; v.z = r.z*z; v.w = r.w*w; return v; }
|
||||
inline Vector operator*(dReal k) const { Vector v; v.x = k*x; v.y = k*y; v.z = k*z; v.w = k*w; return v; }
|
||||
|
||||
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
|
||||
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
|
||||
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
|
||||
|
||||
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
inline Vector& operator /= (const dReal _k) { dReal k=1/_k; x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
|
||||
friend Vector operator* (float f, const Vector& v);
|
||||
//friend ostream& operator<<(ostream& O, const Vector& v);
|
||||
//friend istream& operator>>(istream& I, Vector& v);
|
||||
// 3 dim cross product, w is not touched
|
||||
/// this = this x v
|
||||
inline void Cross(const Vector &v) { cross3(&x, &x, v); }
|
||||
/// this = u x v
|
||||
inline void Cross(const Vector &u, const Vector &v) { cross3(&x, u, v); }
|
||||
inline Vector operator-() const { Vector v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
|
||||
inline Vector operator+(const Vector &r) const { Vector v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
|
||||
inline Vector operator-(const Vector &r) const { Vector v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
|
||||
inline Vector operator*(const Vector &r) const { Vector v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
|
||||
inline Vector operator*(dReal k) const { Vector v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
|
||||
inline Vector& operator += (const Vector& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
|
||||
inline Vector& operator -= (const Vector& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
|
||||
inline Vector& operator *= (const Vector& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
|
||||
inline Vector& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
inline Vector& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
friend Vector operator*(float f, const Vector& v);
|
||||
//friend ostream& operator<<(ostream& O, const Vector& v);
|
||||
//friend istream& operator>>(istream& I, Vector& v);
|
||||
};
|
||||
|
||||
inline Vector operator* (float f, const Vector& left)
|
||||
inline Vector operator*(float f, const Vector& left)
|
||||
{
|
||||
Vector v;
|
||||
v.x = f * left.x;
|
||||
|
@ -113,18 +108,22 @@ struct OBB
|
|||
struct TRIANGLE
|
||||
{
|
||||
TRIANGLE() {}
|
||||
|
||||
TRIANGLE(const Vector& v1, const Vector& v2, const Vector& v3) : v1(v1), v2(v2), v3(v3) {}
|
||||
|
||||
~TRIANGLE() {}
|
||||
|
||||
Vector v1, v2, v3; //!< the vertices of the triangle
|
||||
|
||||
const Vector& operator[](int i) const { return (&v1)[i]; }
|
||||
Vector& operator[](int i) { return (&v1)[i]; }
|
||||
|
||||
Vector& operator[](int i) { return (&v1)[i]; }
|
||||
|
||||
/// assumes CCW ordering of vertices
|
||||
inline Vector ComputeNormal() {
|
||||
inline Vector ComputeNormal()
|
||||
{
|
||||
Vector normal;
|
||||
cross3(normal, v2-v1, v3-v1);
|
||||
cross3(normal, v2 - v1, v3 - v1);
|
||||
return normal;
|
||||
}
|
||||
};
|
||||
|
@ -172,8 +171,8 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf);
|
|||
inline bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2x, dReal& fv2y);
|
||||
|
||||
// Simple routines for linear algebra algorithms //
|
||||
int CubicRoots (double c0, double c1, double c2, double *r0, double *r1, double *r2);
|
||||
bool QLAlgorithm3 (dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag);
|
||||
int CubicRoots(double c0, double c1, double c2, double *r0, double *r1, double *r2);
|
||||
bool QLAlgorithm3(dReal* m_aafEntry, dReal* afDiag, dReal* afSubDiag);
|
||||
|
||||
void EigenSymmetric3(dReal* fCovariance, dReal* eval, dReal* fAxes);
|
||||
|
||||
|
@ -182,7 +181,7 @@ void GetCovarBasisVectors(dReal fCovariance[3][3], Vector* vRight, Vector* vUp,
|
|||
// first root returned is always >= second, roots are defined if the quadratic doesn't have real solutions
|
||||
void QuadraticSolver(dReal* pfQuadratic, dReal* pfRoots);
|
||||
|
||||
int insideQuadrilateral(const Vector* p0,const Vector* p1, const Vector* p2,const Vector* p3);
|
||||
int insideQuadrilateral(const Vector* p0, const Vector* p1, const Vector* p2, const Vector* p3);
|
||||
int insideTriangle(const Vector* p0, const Vector* p1, const Vector* p2);
|
||||
|
||||
// multiplies a matrix by a scalar
|
||||
|
@ -238,30 +237,48 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
|
|||
c = pfmat[0] * pfmat[3] - pfmat[1] * pfmat[2];
|
||||
d = b * b - 4.0f * c + 1e-16f;
|
||||
|
||||
if( d < 0 ) return false;
|
||||
if( d < 1e-16f ) {
|
||||
if (d < 0) return false;
|
||||
|
||||
if (d < 1e-16f)
|
||||
{
|
||||
a = -0.5f * b;
|
||||
peigs[0] = a; peigs[1] = a;
|
||||
fv1x = pfmat[1]; fv1y = a - pfmat[0];
|
||||
c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y);
|
||||
fv1x *= c; fv1y *= c;
|
||||
fv2x = -fv1y; fv2y = fv1x;
|
||||
peigs[0] = a;
|
||||
peigs[1] = a;
|
||||
fv1x = pfmat[1];
|
||||
fv1y = a - pfmat[0];
|
||||
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
|
||||
fv1x *= c;
|
||||
fv1y *= c;
|
||||
fv2x = -fv1y;
|
||||
fv2y = fv1x;
|
||||
return true;
|
||||
}
|
||||
|
||||
// two roots
|
||||
d = sqrtf(d);
|
||||
|
||||
a = -0.5f * (b + d);
|
||||
peigs[0] = a;
|
||||
fv1x = pfmat[1]; fv1y = a-pfmat[0];
|
||||
c = 1 / sqrtf(fv1x*fv1x + fv1y*fv1y);
|
||||
fv1x *= c; fv1y *= c;
|
||||
|
||||
fv1x = pfmat[1];
|
||||
fv1y = a - pfmat[0];
|
||||
|
||||
c = 1 / sqrtf(fv1x * fv1x + fv1y * fv1y);
|
||||
|
||||
fv1x *= c;
|
||||
fv1y *= c;
|
||||
|
||||
a += d;
|
||||
peigs[1] = a;
|
||||
fv2x = pfmat[1]; fv2y = a-pfmat[0];
|
||||
c = 1 / sqrtf(fv2x*fv2x + fv2y*fv2y);
|
||||
fv2x *= c; fv2y *= c;
|
||||
|
||||
fv2x = pfmat[1];
|
||||
fv2y = a - pfmat[0];
|
||||
|
||||
c = 1 / sqrtf(fv2x * fv2x + fv2y * fv2y);
|
||||
|
||||
fv2x *= c;
|
||||
fv2y *= c;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -270,62 +287,70 @@ bool eig2(const dReal* pfmat, dReal* peigs, dReal& fv1x, dReal& fv1y, dReal& fv2
|
|||
// Functions that are replacable by ipp library funcs
|
||||
template <class T> inline T* _mult3(T* pfres, const T* pf1, const T* pf2)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL && pfres != NULL );
|
||||
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
|
||||
T* pfres2;
|
||||
if( pfres == pf1 || pfres == pf2 ) pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
pfres2[0*4+0] = pf1[0*4+0]*pf2[0*4+0]+pf1[0*4+1]*pf2[1*4+0]+pf1[0*4+2]*pf2[2*4+0];
|
||||
pfres2[0*4+1] = pf1[0*4+0]*pf2[0*4+1]+pf1[0*4+1]*pf2[1*4+1]+pf1[0*4+2]*pf2[2*4+1];
|
||||
pfres2[0*4+2] = pf1[0*4+0]*pf2[0*4+2]+pf1[0*4+1]*pf2[1*4+2]+pf1[0*4+2]*pf2[2*4+2];
|
||||
if (pfres == pf1 || pfres == pf2)
|
||||
pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
pfres2[1*4+0] = pf1[1*4+0]*pf2[0*4+0]+pf1[1*4+1]*pf2[1*4+0]+pf1[1*4+2]*pf2[2*4+0];
|
||||
pfres2[1*4+1] = pf1[1*4+0]*pf2[0*4+1]+pf1[1*4+1]*pf2[1*4+1]+pf1[1*4+2]*pf2[2*4+1];
|
||||
pfres2[1*4+2] = pf1[1*4+0]*pf2[0*4+2]+pf1[1*4+1]*pf2[1*4+2]+pf1[1*4+2]*pf2[2*4+2];
|
||||
pfres2[0*4+0] = pf1[0*4+0] * pf2[0*4+0] + pf1[0*4+1] * pf2[1*4+0] + pf1[0*4+2] * pf2[2*4+0];
|
||||
pfres2[0*4+1] = pf1[0*4+0] * pf2[0*4+1] + pf1[0*4+1] * pf2[1*4+1] + pf1[0*4+2] * pf2[2*4+1];
|
||||
pfres2[0*4+2] = pf1[0*4+0] * pf2[0*4+2] + pf1[0*4+1] * pf2[1*4+2] + pf1[0*4+2] * pf2[2*4+2];
|
||||
|
||||
pfres2[1*4+0] = pf1[1*4+0] * pf2[0*4+0] + pf1[1*4+1] * pf2[1*4+0] + pf1[1*4+2] * pf2[2*4+0];
|
||||
pfres2[1*4+1] = pf1[1*4+0] * pf2[0*4+1] + pf1[1*4+1] * pf2[1*4+1] + pf1[1*4+2] * pf2[2*4+1];
|
||||
pfres2[1*4+2] = pf1[1*4+0] * pf2[0*4+2] + pf1[1*4+1] * pf2[1*4+2] + pf1[1*4+2] * pf2[2*4+2];
|
||||
|
||||
pfres2[2*4+0] = pf1[2*4+0] * pf2[0*4+0] + pf1[2*4+1] * pf2[1*4+0] + pf1[2*4+2] * pf2[2*4+0];
|
||||
pfres2[2*4+1] = pf1[2*4+0] * pf2[0*4+1] + pf1[2*4+1] * pf2[1*4+1] + pf1[2*4+2] * pf2[2*4+1];
|
||||
pfres2[2*4+2] = pf1[2*4+0] * pf2[0*4+2] + pf1[2*4+1] * pf2[1*4+2] + pf1[2*4+2] * pf2[2*4+2];
|
||||
|
||||
pfres2[2*4+0] = pf1[2*4+0]*pf2[0*4+0]+pf1[2*4+1]*pf2[1*4+0]+pf1[2*4+2]*pf2[2*4+0];
|
||||
pfres2[2*4+1] = pf1[2*4+0]*pf2[0*4+1]+pf1[2*4+1]*pf2[1*4+1]+pf1[2*4+2]*pf2[2*4+1];
|
||||
pfres2[2*4+2] = pf1[2*4+0]*pf2[0*4+2]+pf1[2*4+1]*pf2[1*4+2]+pf1[2*4+2]*pf2[2*4+2];
|
||||
|
||||
if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
|
||||
if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T));
|
||||
|
||||
return pfres;
|
||||
}
|
||||
|
||||
inline dReal* mult3(dReal* pfres, const dReal* pf1, const dReal* pf2) { return _mult3<dReal>(pfres, pf1, pf2); }
|
||||
|
||||
inline double* mult3(double* pfres, const double* pf1, const double* pf2) { return _mult3<double>(pfres, pf1, pf2); }
|
||||
|
||||
template <class T>
|
||||
inline T* _mult4(T* pfres, const T* p1, const T* p2)
|
||||
{
|
||||
assert( pfres != NULL && p1 != NULL && p2 != NULL );
|
||||
assert(pfres != NULL && p1 != NULL && p2 != NULL);
|
||||
|
||||
T* pfres2;
|
||||
if( pfres == p1 || pfres == p2 ) pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
pfres2[0*4+0] = p1[0*4+0]*p2[0*4+0] + p1[0*4+1]*p2[1*4+0] + p1[0*4+2]*p2[2*4+0] + p1[0*4+3]*p2[3*4+0];
|
||||
pfres2[0*4+1] = p1[0*4+0]*p2[0*4+1] + p1[0*4+1]*p2[1*4+1] + p1[0*4+2]*p2[2*4+1] + p1[0*4+3]*p2[3*4+1];
|
||||
pfres2[0*4+2] = p1[0*4+0]*p2[0*4+2] + p1[0*4+1]*p2[1*4+2] + p1[0*4+2]*p2[2*4+2] + p1[0*4+3]*p2[3*4+2];
|
||||
pfres2[0*4+3] = p1[0*4+0]*p2[0*4+3] + p1[0*4+1]*p2[1*4+3] + p1[0*4+2]*p2[2*4+3] + p1[0*4+3]*p2[3*4+3];
|
||||
if (pfres == p1 || pfres == p2)
|
||||
pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
pfres2[1*4+0] = p1[1*4+0]*p2[0*4+0] + p1[1*4+1]*p2[1*4+0] + p1[1*4+2]*p2[2*4+0] + p1[1*4+3]*p2[3*4+0];
|
||||
pfres2[1*4+1] = p1[1*4+0]*p2[0*4+1] + p1[1*4+1]*p2[1*4+1] + p1[1*4+2]*p2[2*4+1] + p1[1*4+3]*p2[3*4+1];
|
||||
pfres2[1*4+2] = p1[1*4+0]*p2[0*4+2] + p1[1*4+1]*p2[1*4+2] + p1[1*4+2]*p2[2*4+2] + p1[1*4+3]*p2[3*4+2];
|
||||
pfres2[1*4+3] = p1[1*4+0]*p2[0*4+3] + p1[1*4+1]*p2[1*4+3] + p1[1*4+2]*p2[2*4+3] + p1[1*4+3]*p2[3*4+3];
|
||||
pfres2[0*4+0] = p1[0*4+0] * p2[0*4+0] + p1[0*4+1] * p2[1*4+0] + p1[0*4+2] * p2[2*4+0] + p1[0*4+3] * p2[3*4+0];
|
||||
pfres2[0*4+1] = p1[0*4+0] * p2[0*4+1] + p1[0*4+1] * p2[1*4+1] + p1[0*4+2] * p2[2*4+1] + p1[0*4+3] * p2[3*4+1];
|
||||
pfres2[0*4+2] = p1[0*4+0] * p2[0*4+2] + p1[0*4+1] * p2[1*4+2] + p1[0*4+2] * p2[2*4+2] + p1[0*4+3] * p2[3*4+2];
|
||||
pfres2[0*4+3] = p1[0*4+0] * p2[0*4+3] + p1[0*4+1] * p2[1*4+3] + p1[0*4+2] * p2[2*4+3] + p1[0*4+3] * p2[3*4+3];
|
||||
|
||||
pfres2[2*4+0] = p1[2*4+0]*p2[0*4+0] + p1[2*4+1]*p2[1*4+0] + p1[2*4+2]*p2[2*4+0] + p1[2*4+3]*p2[3*4+0];
|
||||
pfres2[2*4+1] = p1[2*4+0]*p2[0*4+1] + p1[2*4+1]*p2[1*4+1] + p1[2*4+2]*p2[2*4+1] + p1[2*4+3]*p2[3*4+1];
|
||||
pfres2[2*4+2] = p1[2*4+0]*p2[0*4+2] + p1[2*4+1]*p2[1*4+2] + p1[2*4+2]*p2[2*4+2] + p1[2*4+3]*p2[3*4+2];
|
||||
pfres2[2*4+3] = p1[2*4+0]*p2[0*4+3] + p1[2*4+1]*p2[1*4+3] + p1[2*4+2]*p2[2*4+3] + p1[2*4+3]*p2[3*4+3];
|
||||
pfres2[1*4+0] = p1[1*4+0] * p2[0*4+0] + p1[1*4+1] * p2[1*4+0] + p1[1*4+2] * p2[2*4+0] + p1[1*4+3] * p2[3*4+0];
|
||||
pfres2[1*4+1] = p1[1*4+0] * p2[0*4+1] + p1[1*4+1] * p2[1*4+1] + p1[1*4+2] * p2[2*4+1] + p1[1*4+3] * p2[3*4+1];
|
||||
pfres2[1*4+2] = p1[1*4+0] * p2[0*4+2] + p1[1*4+1] * p2[1*4+2] + p1[1*4+2] * p2[2*4+2] + p1[1*4+3] * p2[3*4+2];
|
||||
pfres2[1*4+3] = p1[1*4+0] * p2[0*4+3] + p1[1*4+1] * p2[1*4+3] + p1[1*4+2] * p2[2*4+3] + p1[1*4+3] * p2[3*4+3];
|
||||
|
||||
pfres2[2*4+0] = p1[2*4+0] * p2[0*4+0] + p1[2*4+1] * p2[1*4+0] + p1[2*4+2] * p2[2*4+0] + p1[2*4+3] * p2[3*4+0];
|
||||
pfres2[2*4+1] = p1[2*4+0] * p2[0*4+1] + p1[2*4+1] * p2[1*4+1] + p1[2*4+2] * p2[2*4+1] + p1[2*4+3] * p2[3*4+1];
|
||||
pfres2[2*4+2] = p1[2*4+0] * p2[0*4+2] + p1[2*4+1] * p2[1*4+2] + p1[2*4+2] * p2[2*4+2] + p1[2*4+3] * p2[3*4+2];
|
||||
pfres2[2*4+3] = p1[2*4+0] * p2[0*4+3] + p1[2*4+1] * p2[1*4+3] + p1[2*4+2] * p2[2*4+3] + p1[2*4+3] * p2[3*4+3];
|
||||
|
||||
pfres2[3*4+0] = p1[3*4+0] * p2[0*4+0] + p1[3*4+1] * p2[1*4+0] + p1[3*4+2] * p2[2*4+0] + p1[3*4+3] * p2[3*4+0];
|
||||
pfres2[3*4+1] = p1[3*4+0] * p2[0*4+1] + p1[3*4+1] * p2[1*4+1] + p1[3*4+2] * p2[2*4+1] + p1[3*4+3] * p2[3*4+1];
|
||||
pfres2[3*4+2] = p1[3*4+0] * p2[0*4+2] + p1[3*4+1] * p2[1*4+2] + p1[3*4+2] * p2[2*4+2] + p1[3*4+3] * p2[3*4+2];
|
||||
pfres2[3*4+3] = p1[3*4+0] * p2[0*4+3] + p1[3*4+1] * p2[1*4+3] + p1[3*4+2] * p2[2*4+3] + p1[3*4+3] * p2[3*4+3];
|
||||
|
||||
pfres2[3*4+0] = p1[3*4+0]*p2[0*4+0] + p1[3*4+1]*p2[1*4+0] + p1[3*4+2]*p2[2*4+0] + p1[3*4+3]*p2[3*4+0];
|
||||
pfres2[3*4+1] = p1[3*4+0]*p2[0*4+1] + p1[3*4+1]*p2[1*4+1] + p1[3*4+2]*p2[2*4+1] + p1[3*4+3]*p2[3*4+1];
|
||||
pfres2[3*4+2] = p1[3*4+0]*p2[0*4+2] + p1[3*4+1]*p2[1*4+2] + p1[3*4+2]*p2[2*4+2] + p1[3*4+3]*p2[3*4+2];
|
||||
pfres2[3*4+3] = p1[3*4+0]*p2[0*4+3] + p1[3*4+1]*p2[1*4+3] + p1[3*4+2]*p2[2*4+3] + p1[3*4+3]*p2[3*4+3];
|
||||
if (pfres != pfres2) memcpy(pfres, pfres2, sizeof(T)*16);
|
||||
|
||||
if( pfres != pfres2 ) memcpy(pfres, pfres2, sizeof(T)*16);
|
||||
return pfres;
|
||||
}
|
||||
|
||||
|
@ -336,22 +361,23 @@ template <class T>
|
|||
inline T* _multtrans3(T* pfres, const T* pf1, const T* pf2)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf1 ) pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
pfres2[0] = pf1[0]*pf2[0]+pf1[3]*pf2[3]+pf1[6]*pf2[6];
|
||||
pfres2[1] = pf1[0]*pf2[1]+pf1[3]*pf2[4]+pf1[6]*pf2[7];
|
||||
pfres2[2] = pf1[0]*pf2[2]+pf1[3]*pf2[5]+pf1[6]*pf2[8];
|
||||
if (pfres == pf1)
|
||||
pfres2 = (T*)alloca(9 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
pfres2[3] = pf1[1]*pf2[0]+pf1[4]*pf2[3]+pf1[7]*pf2[6];
|
||||
pfres2[4] = pf1[1]*pf2[1]+pf1[4]*pf2[4]+pf1[7]*pf2[7];
|
||||
pfres2[5] = pf1[1]*pf2[2]+pf1[4]*pf2[5]+pf1[7]*pf2[8];
|
||||
pfres2[0] = pf1[0] * pf2[0] + pf1[3] * pf2[3] + pf1[6] * pf2[6];
|
||||
pfres2[1] = pf1[0] * pf2[1] + pf1[3] * pf2[4] + pf1[6] * pf2[7];
|
||||
pfres2[2] = pf1[0] * pf2[2] + pf1[3] * pf2[5] + pf1[6] * pf2[8];
|
||||
pfres2[3] = pf1[1] * pf2[0] + pf1[4] * pf2[3] + pf1[7] * pf2[6];
|
||||
pfres2[4] = pf1[1] * pf2[1] + pf1[4] * pf2[4] + pf1[7] * pf2[7];
|
||||
pfres2[5] = pf1[1] * pf2[2] + pf1[4] * pf2[5] + pf1[7] * pf2[8];
|
||||
pfres2[6] = pf1[2] * pf2[0] + pf1[5] * pf2[3] + pf1[8] * pf2[6];
|
||||
pfres2[7] = pf1[2] * pf2[1] + pf1[5] * pf2[4] + pf1[8] * pf2[7];
|
||||
pfres2[8] = pf1[2] * pf2[2] + pf1[5] * pf2[5] + pf1[8] * pf2[8];
|
||||
|
||||
pfres2[6] = pf1[2]*pf2[0]+pf1[5]*pf2[3]+pf1[8]*pf2[6];
|
||||
pfres2[7] = pf1[2]*pf2[1]+pf1[5]*pf2[4]+pf1[8]*pf2[7];
|
||||
pfres2[8] = pf1[2]*pf2[2]+pf1[5]*pf2[5]+pf1[8]*pf2[8];
|
||||
|
||||
if( pfres2 != pfres ) memcpy(pfres, pfres2, 9*sizeof(T));
|
||||
if (pfres2 != pfres) memcpy(pfres, pfres2, 9*sizeof(T));
|
||||
|
||||
return pfres;
|
||||
}
|
||||
|
@ -360,11 +386,16 @@ template <class T>
|
|||
inline T* _multtrans4(T* pfres, const T* pf1, const T* pf2)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf1 ) pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
for(int i = 0; i < 4; ++i) {
|
||||
for(int j = 0; j < 4; ++j) {
|
||||
if (pfres == pf1)
|
||||
pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
for (int j = 0; j < 4; ++j)
|
||||
{
|
||||
pfres[4*i+j] = pf1[i] * pf2[j] + pf1[i+4] * pf2[j+4] + pf1[i+8] * pf2[j+8] + pf1[i+12] * pf2[j+12];
|
||||
}
|
||||
}
|
||||
|
@ -381,8 +412,11 @@ inline double* multtrans4(double* pfres, const double* pf1, const double* pf2) {
|
|||
template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf ) pfres2 = (T*)alloca(3 * stride * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == pf)
|
||||
pfres2 = (T*)alloca(3 * stride * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
// inverse = C^t / det(pf) where C is the matrix of coefficients
|
||||
|
||||
|
@ -390,29 +424,40 @@ template <class T> inline T* _inv3(const T* pf, T* pfres, int stride)
|
|||
pfres2[0*stride + 0] = pf[1*stride + 1] * pf[2*stride + 2] - pf[1*stride + 2] * pf[2*stride + 1];
|
||||
pfres2[0*stride + 1] = pf[0*stride + 2] * pf[2*stride + 1] - pf[0*stride + 1] * pf[2*stride + 2];
|
||||
pfres2[0*stride + 2] = pf[0*stride + 1] * pf[1*stride + 2] - pf[0*stride + 2] * pf[1*stride + 1];
|
||||
|
||||
pfres2[1*stride + 0] = pf[1*stride + 2] * pf[2*stride + 0] - pf[1*stride + 0] * pf[2*stride + 2];
|
||||
pfres2[1*stride + 1] = pf[0*stride + 0] * pf[2*stride + 2] - pf[0*stride + 2] * pf[2*stride + 0];
|
||||
pfres2[1*stride + 2] = pf[0*stride + 2] * pf[1*stride + 0] - pf[0*stride + 0] * pf[1*stride + 2];
|
||||
|
||||
pfres2[2*stride + 0] = pf[1*stride + 0] * pf[2*stride + 1] - pf[1*stride + 1] * pf[2*stride + 0];
|
||||
pfres2[2*stride + 1] = pf[0*stride + 1] * pf[2*stride + 0] - pf[0*stride + 0] * pf[2*stride + 1];
|
||||
pfres2[2*stride + 2] = pf[0*stride + 0] * pf[1*stride + 1] - pf[0*stride + 1] * pf[1*stride + 0];
|
||||
|
||||
T fdet = pf[0*stride + 2] * pfres2[2*stride + 0] + pf[1*stride + 2] * pfres2[2*stride + 1] +
|
||||
pf[2*stride + 2] * pfres2[2*stride + 2];
|
||||
pf[2*stride + 2] * pfres2[2*stride + 2];
|
||||
|
||||
if( fabs(fdet) < 1e-6 ) return NULL;
|
||||
if (fabs(fdet) < 1e-6) return NULL;
|
||||
|
||||
fdet = 1 / fdet;
|
||||
|
||||
//if( pfdet != NULL ) *pfdet = fdet;
|
||||
|
||||
if( pfres != pf ) {
|
||||
pfres[0*stride+0] *= fdet; pfres[0*stride+1] *= fdet; pfres[0*stride+2] *= fdet;
|
||||
pfres[1*stride+0] *= fdet; pfres[1*stride+1] *= fdet; pfres[1*stride+2] *= fdet;
|
||||
pfres[2*stride+0] *= fdet; pfres[2*stride+1] *= fdet; pfres[2*stride+2] *= fdet;
|
||||
if (pfres != pf)
|
||||
{
|
||||
pfres[0*stride+0] *= fdet;
|
||||
pfres[0*stride+1] *= fdet;
|
||||
pfres[0*stride+2] *= fdet;
|
||||
pfres[1*stride+0] *= fdet;
|
||||
pfres[1*stride+1] *= fdet;
|
||||
pfres[1*stride+2] *= fdet;
|
||||
pfres[2*stride+0] *= fdet;
|
||||
pfres[2*stride+1] *= fdet;
|
||||
pfres[2*stride+2] *= fdet;
|
||||
return pfres;
|
||||
}
|
||||
|
||||
pfres[0*stride+0] = pfres2[0*stride+0] * fdet;
|
||||
|
||||
pfres[0*stride+1] = pfres2[0*stride+1] * fdet;
|
||||
pfres[0*stride+2] = pfres2[0*stride+2] * fdet;
|
||||
pfres[1*stride+0] = pfres2[1*stride+0] * fdet;
|
||||
|
@ -430,8 +475,11 @@ inline dReal* inv3(const dReal* pf, dReal* pfres, int stride) { return _inv3<dRe
|
|||
template <class T> inline T* _inv4(const T* pf, T* pfres)
|
||||
{
|
||||
T* pfres2;
|
||||
if( pfres == pf ) pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else pfres2 = pfres;
|
||||
|
||||
if (pfres == pf)
|
||||
pfres2 = (T*)alloca(16 * sizeof(T));
|
||||
else
|
||||
pfres2 = pfres;
|
||||
|
||||
// inverse = C^t / det(pf) where C is the matrix of coefficients
|
||||
|
||||
|
@ -439,7 +487,9 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
|
|||
|
||||
// determinants of all possibel 2x2 submatrices formed by last two rows
|
||||
T fd0, fd1, fd2;
|
||||
|
||||
T f1, f2, f3;
|
||||
|
||||
fd0 = pf[2*4 + 0] * pf[3*4 + 1] - pf[2*4 + 1] * pf[3*4 + 0];
|
||||
fd1 = pf[2*4 + 1] * pf[3*4 + 2] - pf[2*4 + 2] * pf[3*4 + 1];
|
||||
fd2 = pf[2*4 + 2] * pf[3*4 + 3] - pf[2*4 + 3] * pf[3*4 + 2];
|
||||
|
@ -482,20 +532,24 @@ template <class T> inline T* _inv4(const T* pf, T* pfres)
|
|||
pfres2[3*4 + 3] = pf[2*4 + 0] * fd1 - pf[2*4 + 1] * f3 + pf[2*4 + 2] * fd0;
|
||||
|
||||
T fdet = pf[0*4 + 3] * pfres2[3*4 + 0] + pf[1*4 + 3] * pfres2[3*4 + 1] +
|
||||
pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];
|
||||
pf[2*4 + 3] * pfres2[3*4 + 2] + pf[3*4 + 3] * pfres2[3*4 + 3];
|
||||
|
||||
if( fabs(fdet) < 1e-6) return NULL;
|
||||
if (fabs(fdet) < 1e-6) return NULL;
|
||||
|
||||
fdet = 1 / fdet;
|
||||
|
||||
//if( pfdet != NULL ) *pfdet = fdet;
|
||||
|
||||
if( pfres2 == pfres ) {
|
||||
if (pfres2 == pfres)
|
||||
{
|
||||
mult(pfres, fdet, 16);
|
||||
return pfres;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
while(i < 16) {
|
||||
|
||||
while (i < 16)
|
||||
{
|
||||
pfres[i] = pfres2[i] * fdet;
|
||||
++i;
|
||||
}
|
||||
|
@ -507,18 +561,26 @@ inline dReal* inv4(const dReal* pf, dReal* pfres) { return _inv4<dReal>(pf, pfre
|
|||
|
||||
template <class T> inline T* _transpose3(const T* pf, T* pfres)
|
||||
{
|
||||
assert( pf != NULL && pfres != NULL );
|
||||
assert(pf != NULL && pfres != NULL);
|
||||
|
||||
if( pf == pfres ) {
|
||||
if (pf == pfres)
|
||||
{
|
||||
rswap(pfres[1], pfres[3]);
|
||||
rswap(pfres[2], pfres[6]);
|
||||
rswap(pfres[5], pfres[7]);
|
||||
return pfres;
|
||||
}
|
||||
|
||||
pfres[0] = pf[0]; pfres[1] = pf[3]; pfres[2] = pf[6];
|
||||
pfres[3] = pf[1]; pfres[4] = pf[4]; pfres[5] = pf[7];
|
||||
pfres[6] = pf[2]; pfres[7] = pf[5]; pfres[8] = pf[8];
|
||||
pfres[0] = pf[0];
|
||||
|
||||
pfres[1] = pf[3];
|
||||
pfres[2] = pf[6];
|
||||
pfres[3] = pf[1];
|
||||
pfres[4] = pf[4];
|
||||
pfres[5] = pf[7];
|
||||
pfres[6] = pf[2];
|
||||
pfres[7] = pf[5];
|
||||
pfres[8] = pf[8];
|
||||
|
||||
return pfres;
|
||||
}
|
||||
|
@ -528,9 +590,10 @@ inline double* transpose3(const double* pf, double* pfres) { return _transpose3(
|
|||
|
||||
template <class T> inline T* _transpose4(const T* pf, T* pfres)
|
||||
{
|
||||
assert( pf != NULL && pfres != NULL );
|
||||
assert(pf != NULL && pfres != NULL);
|
||||
|
||||
if( pf == pfres ) {
|
||||
if (pf == pfres)
|
||||
{
|
||||
rswap(pfres[1], pfres[4]);
|
||||
rswap(pfres[2], pfres[8]);
|
||||
rswap(pfres[3], pfres[12]);
|
||||
|
@ -540,10 +603,23 @@ template <class T> inline T* _transpose4(const T* pf, T* pfres)
|
|||
return pfres;
|
||||
}
|
||||
|
||||
pfres[0] = pf[0]; pfres[1] = pf[4]; pfres[2] = pf[8]; pfres[3] = pf[12];
|
||||
pfres[4] = pf[1]; pfres[5] = pf[5]; pfres[6] = pf[9]; pfres[7] = pf[13];
|
||||
pfres[8] = pf[2]; pfres[9] = pf[6]; pfres[10] = pf[10]; pfres[11] = pf[14];
|
||||
pfres[12] = pf[3]; pfres[13] = pf[7]; pfres[14] = pf[11]; pfres[15] = pf[15];
|
||||
pfres[0] = pf[0];
|
||||
|
||||
pfres[1] = pf[4];
|
||||
pfres[2] = pf[8];
|
||||
pfres[3] = pf[12];
|
||||
pfres[4] = pf[1];
|
||||
pfres[5] = pf[5];
|
||||
pfres[6] = pf[9];
|
||||
pfres[7] = pf[13];
|
||||
pfres[8] = pf[2];
|
||||
pfres[9] = pf[6];
|
||||
pfres[10] = pf[10];
|
||||
pfres[11] = pf[14];
|
||||
pfres[12] = pf[3];
|
||||
pfres[13] = pf[7];
|
||||
pfres[14] = pf[11];
|
||||
pfres[15] = pf[15];
|
||||
return pfres;
|
||||
}
|
||||
|
||||
|
@ -552,37 +628,37 @@ inline double* transpose4(const double* pf, double* pfres) { return _transpose4(
|
|||
|
||||
inline dReal dot2(const dReal* pf1, const dReal* pf2)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL );
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
return pf1[0]*pf2[0] + pf1[1]*pf2[1];
|
||||
}
|
||||
|
||||
inline dReal dot3(const dReal* pf1, const dReal* pf2)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL );
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2];
|
||||
}
|
||||
|
||||
inline dReal dot4(const dReal* pf1, const dReal* pf2)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL );
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
return pf1[0]*pf2[0] + pf1[1]*pf2[1] + pf1[2]*pf2[2] + pf1[3] * pf2[3];
|
||||
}
|
||||
|
||||
inline dReal lengthsqr2(const dReal* pf)
|
||||
{
|
||||
assert( pf != NULL );
|
||||
assert(pf != NULL);
|
||||
return pf[0] * pf[0] + pf[1] * pf[1];
|
||||
}
|
||||
|
||||
inline dReal lengthsqr3(const dReal* pf)
|
||||
{
|
||||
assert( pf != NULL );
|
||||
assert(pf != NULL);
|
||||
return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];
|
||||
}
|
||||
|
||||
inline dReal lengthsqr4(const dReal* pf)
|
||||
{
|
||||
assert( pf != NULL );
|
||||
assert(pf != NULL);
|
||||
return pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];
|
||||
}
|
||||
|
||||
|
@ -590,7 +666,7 @@ inline dReal* normalize2(dReal* pfout, const dReal* pf)
|
|||
{
|
||||
assert(pf != NULL);
|
||||
|
||||
dReal f = pf[0]*pf[0] + pf[1]*pf[1];
|
||||
dReal f = pf[0] * pf[0] + pf[1] * pf[1];
|
||||
f = 1.0f / sqrtf(f);
|
||||
pfout[0] = pf[0] * f;
|
||||
pfout[1] = pf[1] * f;
|
||||
|
@ -602,7 +678,7 @@ inline dReal* normalize3(dReal* pfout, const dReal* pf)
|
|||
{
|
||||
assert(pf != NULL);
|
||||
|
||||
dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2];
|
||||
dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2];
|
||||
|
||||
f = 1.0f / sqrtf(f);
|
||||
pfout[0] = pf[0] * f;
|
||||
|
@ -616,7 +692,7 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf)
|
|||
{
|
||||
assert(pf != NULL);
|
||||
|
||||
dReal f = pf[0]*pf[0] + pf[1]*pf[1] + pf[2]*pf[2] + pf[3]*pf[3];
|
||||
dReal f = pf[0] * pf[0] + pf[1] * pf[1] + pf[2] * pf[2] + pf[3] * pf[3];
|
||||
|
||||
f = 1.0f / sqrtf(f);
|
||||
pfout[0] = pf[0] * f;
|
||||
|
@ -629,22 +705,25 @@ inline dReal* normalize4(dReal* pfout, const dReal* pf)
|
|||
|
||||
inline dReal* cross3(dReal* pfout, const dReal* pf1, const dReal* pf2)
|
||||
{
|
||||
assert( pfout != NULL && pf1 != NULL && pf2 != NULL );
|
||||
assert(pfout != NULL && pf1 != NULL && pf2 != NULL);
|
||||
|
||||
dReal temp[3];
|
||||
temp[0] = pf1[1] * pf2[2] - pf1[2] * pf2[1];
|
||||
temp[1] = pf1[2] * pf2[0] - pf1[0] * pf2[2];
|
||||
temp[2] = pf1[0] * pf2[1] - pf1[1] * pf2[0];
|
||||
|
||||
pfout[0] = temp[0]; pfout[1] = temp[1]; pfout[2] = temp[2];
|
||||
pfout[0] = temp[0];
|
||||
pfout[1] = temp[1];
|
||||
pfout[2] = temp[2];
|
||||
return pfout;
|
||||
}
|
||||
|
||||
template <class T> inline void mult(T* pf, T fa, int r)
|
||||
{
|
||||
assert( pf != NULL );
|
||||
assert(pf != NULL);
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
pf[r] *= fa;
|
||||
}
|
||||
|
@ -653,25 +732,32 @@ template <class T> inline void mult(T* pf, T fa, int r)
|
|||
template <class T, class S, class R>
|
||||
inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
int j, k;
|
||||
|
||||
if( !badd ) memset(pfres, 0, sizeof(S) * r1 * c2);
|
||||
if (!badd) memset(pfres, 0, sizeof(S) * r1 * c2);
|
||||
|
||||
while(r1 > 0) {
|
||||
while (r1 > 0)
|
||||
{
|
||||
--r1;
|
||||
|
||||
j = 0;
|
||||
while(j < c2) {
|
||||
|
||||
while (j < c2)
|
||||
{
|
||||
k = 0;
|
||||
while(k < c1) {
|
||||
|
||||
while (k < c1)
|
||||
{
|
||||
pfres[j] += pf1[k] * pf2[k*c2 + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
pf1 += c1;
|
||||
|
||||
pfres += c2;
|
||||
}
|
||||
|
||||
|
@ -681,26 +767,32 @@ inline T* mult(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
|
|||
template <class T, class S, class R>
|
||||
inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
int i, j, k;
|
||||
|
||||
if( !badd ) memset(pfres, 0, sizeof(S) * c1 * c2);
|
||||
if (!badd) memset(pfres, 0, sizeof(S) * c1 * c2);
|
||||
|
||||
i = 0;
|
||||
while(i < c1) {
|
||||
|
||||
while (i < c1)
|
||||
{
|
||||
j = 0;
|
||||
while(j < c2) {
|
||||
|
||||
while (j < c2)
|
||||
{
|
||||
k = 0;
|
||||
while(k < r1) {
|
||||
|
||||
while (k < r1)
|
||||
{
|
||||
pfres[j] += pf1[k*c1] * pf2[k*c2 + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
pfres += c2;
|
||||
|
||||
++pf1;
|
||||
|
||||
++i;
|
||||
|
@ -712,25 +804,32 @@ inline T* multtrans(T* pf1, R* pf2, int r1, int c1, int c2, S* pfres, bool badd)
|
|||
template <class T, class S, class R>
|
||||
inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool badd)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
assert(pf1 != NULL && pf2 != NULL && pfres != NULL);
|
||||
int j, k;
|
||||
|
||||
if( !badd ) memset(pfres, 0, sizeof(S) * r1 * r2);
|
||||
if (!badd) memset(pfres, 0, sizeof(S) * r1 * r2);
|
||||
|
||||
while(r1 > 0) {
|
||||
while (r1 > 0)
|
||||
{
|
||||
--r1;
|
||||
|
||||
j = 0;
|
||||
while(j < r2) {
|
||||
|
||||
while (j < r2)
|
||||
{
|
||||
k = 0;
|
||||
while(k < c1) {
|
||||
|
||||
while (k < c1)
|
||||
{
|
||||
pfres[j] += pf1[k] * pf2[j*c1 + k];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
pf1 += c1;
|
||||
|
||||
pfres += r2;
|
||||
}
|
||||
|
||||
|
@ -739,88 +838,107 @@ inline T* multtrans_to2(T* pf1, R* pf2, int r1, int c1, int r2, S* pfres, bool b
|
|||
|
||||
template <class T> inline T* multto1(T* pf1, T* pf2, int r, int c, T* pftemp)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL );
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
|
||||
int j, k;
|
||||
bool bdel = false;
|
||||
|
||||
if( pftemp == NULL ) {
|
||||
if (pftemp == NULL)
|
||||
{
|
||||
pftemp = new T[c];
|
||||
bdel = true;
|
||||
}
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
|
||||
j = 0;
|
||||
while(j < c) {
|
||||
|
||||
while (j < c)
|
||||
{
|
||||
|
||||
pftemp[j] = 0.0;
|
||||
|
||||
k = 0;
|
||||
while(k < c) {
|
||||
|
||||
while (k < c)
|
||||
{
|
||||
pftemp[j] += pf1[k] * pf2[k*c + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
memcpy(pf1, pftemp, c * sizeof(T));
|
||||
|
||||
pf1 += c;
|
||||
}
|
||||
|
||||
if( bdel ) delete[] pftemp;
|
||||
if (bdel) delete[] pftemp;
|
||||
|
||||
return pf1;
|
||||
}
|
||||
|
||||
template <class T, class S> inline T* multto2(T* pf1, S* pf2, int r2, int c2, S* pftemp)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL );
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
|
||||
int i, j, k;
|
||||
bool bdel = false;
|
||||
|
||||
if( pftemp == NULL ) {
|
||||
if (pftemp == NULL)
|
||||
{
|
||||
pftemp = new S[r2];
|
||||
bdel = true;
|
||||
}
|
||||
|
||||
// do columns first
|
||||
j = 0;
|
||||
while(j < c2) {
|
||||
|
||||
while (j < c2)
|
||||
{
|
||||
i = 0;
|
||||
while(i < r2) {
|
||||
|
||||
while (i < r2)
|
||||
{
|
||||
|
||||
pftemp[i] = 0.0;
|
||||
|
||||
k = 0;
|
||||
while(k < r2) {
|
||||
|
||||
while (k < r2)
|
||||
{
|
||||
pftemp[i] += pf1[i*r2 + k] * pf2[k*c2 + j];
|
||||
++k;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while(i < r2) {
|
||||
*(pf2+i*c2+j) = pftemp[i];
|
||||
|
||||
while (i < r2)
|
||||
{
|
||||
*(pf2 + i*c2 + j) = pftemp[i];
|
||||
++i;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
|
||||
if( bdel ) delete[] pftemp;
|
||||
if (bdel) delete[] pftemp;
|
||||
|
||||
return pf1;
|
||||
}
|
||||
|
||||
template <class T> inline void add(T* pf1, T* pf2, int r)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL);
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
pf1[r] += pf2[r];
|
||||
}
|
||||
|
@ -828,9 +946,10 @@ template <class T> inline void add(T* pf1, T* pf2, int r)
|
|||
|
||||
template <class T> inline void sub(T* pf1, T* pf2, int r)
|
||||
{
|
||||
assert( pf1 != NULL && pf2 != NULL);
|
||||
assert(pf1 != NULL && pf2 != NULL);
|
||||
|
||||
while(r > 0) {
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
pf1[r] -= pf2[r];
|
||||
}
|
||||
|
@ -838,10 +957,12 @@ template <class T> inline void sub(T* pf1, T* pf2, int r)
|
|||
|
||||
template <class T> inline T normsqr(T* pf1, int r)
|
||||
{
|
||||
assert( pf1 != NULL );
|
||||
assert(pf1 != NULL);
|
||||
|
||||
T d = 0.0;
|
||||
while(r > 0) {
|
||||
|
||||
while (r > 0)
|
||||
{
|
||||
--r;
|
||||
d += pf1[r] * pf1[r];
|
||||
}
|
||||
|
@ -852,7 +973,9 @@ template <class T> inline T normsqr(T* pf1, int r)
|
|||
template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
|
||||
{
|
||||
T d = 0;
|
||||
while(length > 0) {
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
--length;
|
||||
d += sqr(pf1[length] - pf2[length]);
|
||||
}
|
||||
|
@ -863,7 +986,9 @@ template <class T> inline T lengthsqr(T* pf1, T* pf2, int length)
|
|||
template <class T> inline T dot(T* pf1, T* pf2, int length)
|
||||
{
|
||||
T d = 0;
|
||||
while(length > 0) {
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
--length;
|
||||
d += pf1[length] * pf2[length];
|
||||
}
|
||||
|
@ -874,7 +999,9 @@ template <class T> inline T dot(T* pf1, T* pf2, int length)
|
|||
template <class T> inline T sum(T* pf, int length)
|
||||
{
|
||||
T d = 0;
|
||||
while(length > 0) {
|
||||
|
||||
while (length > 0)
|
||||
{
|
||||
--length;
|
||||
d += pf[length];
|
||||
}
|
||||
|
@ -886,18 +1013,23 @@ template <class T> inline bool inv2(T* pf, T* pfres)
|
|||
{
|
||||
T fdet = pf[0] * pf[3] - pf[1] * pf[2];
|
||||
|
||||
if( fabs(fdet) < 1e-16 ) return false;
|
||||
if (fabs(fdet) < 1e-16) return false;
|
||||
|
||||
fdet = 1 / fdet;
|
||||
|
||||
//if( pfdet != NULL ) *pfdet = fdet;
|
||||
|
||||
if( pfres != pf ) {
|
||||
pfres[0] = fdet * pf[3]; pfres[1] = -fdet * pf[1];
|
||||
pfres[2] = -fdet * pf[2]; pfres[3] = fdet * pf[0];
|
||||
if (pfres != pf)
|
||||
{
|
||||
pfres[0] = fdet * pf[3];
|
||||
pfres[1] = -fdet * pf[1];
|
||||
pfres[2] = -fdet * pf[2];
|
||||
pfres[3] = fdet * pf[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
dReal ftemp = pf[0];
|
||||
|
||||
pfres[0] = pf[3] * fdet;
|
||||
pfres[1] *= -fdet;
|
||||
pfres[2] *= -fdet;
|
||||
|
|
Loading…
Reference in New Issue