mirror of https://github.com/PCSX2/pcsx2.git
GregMiscellaneous: zzogl-pg:
* Some boost tuning: do big loop in reverse order. * Add a function to get ns timing. Could be useful for benchmark. git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3799 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
57f1e7badb
commit
7afdf9e7c7
|
@ -87,6 +87,9 @@ static __forceinline void pcsx2_aligned_free(void* pmem)
|
||||||
#define _aligned_malloc pcsx2_aligned_malloc
|
#define _aligned_malloc pcsx2_aligned_malloc
|
||||||
#define _aligned_free pcsx2_aligned_free
|
#define _aligned_free pcsx2_aligned_free
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __LINUX__
|
||||||
#include <sys/timeb.h> // ftime(), struct timeb
|
#include <sys/timeb.h> // ftime(), struct timeb
|
||||||
|
|
||||||
inline unsigned long timeGetTime()
|
inline unsigned long timeGetTime()
|
||||||
|
@ -97,6 +100,15 @@ inline unsigned long timeGetTime()
|
||||||
return (unsigned long)(t.time*1000 + t.millitm);
|
return (unsigned long)(t.time*1000 + t.millitm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
inline unsigned long timeGetPreciseTime()
|
||||||
|
{
|
||||||
|
timespec t;
|
||||||
|
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
|
||||||
|
|
||||||
|
return t.tv_nsec;
|
||||||
|
}
|
||||||
|
|
||||||
struct RECT
|
struct RECT
|
||||||
{
|
{
|
||||||
int left, top;
|
int left, top;
|
||||||
|
@ -138,6 +150,7 @@ enum GSWindowDim
|
||||||
GSDim_1024,
|
GSDim_1024,
|
||||||
GSDim_1280,
|
GSDim_1280,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef union
|
typedef union
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
|
|
|
@ -2930,7 +2930,7 @@ void FlushTransferRanges(const tex0Info* ptex)
|
||||||
template <typename T, typename Tret>
|
template <typename T, typename Tret>
|
||||||
inline Tret dummy_return(T value) { return value; }
|
inline Tret dummy_return(T value) { return value; }
|
||||||
|
|
||||||
template <typename T, typename Tsrc, T (*convfn)(Tsrc)>
|
template <typename Tdst, Tdst (*convfn)(u32)>
|
||||||
inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const int psm, u32 fbm)
|
inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const int psm, u32 fbm)
|
||||||
{
|
{
|
||||||
u32 mask, imask;
|
u32 mask, imask;
|
||||||
|
@ -2947,17 +2947,21 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
|
||||||
imask = fbm;
|
imask = fbm;
|
||||||
}
|
}
|
||||||
|
|
||||||
Tsrc* src = (Tsrc*)(psrc);
|
Tdst* pPageOffset = (Tdst*)g_pbyGSMemory + fbp*(256/sizeof(Tdst));
|
||||||
T* pPageOffset = (T*)g_pbyGSMemory + fbp*(256/sizeof(T)), *dst;
|
Tdst *dst;
|
||||||
|
|
||||||
int maxfbh = (MEMORY_END-fbp*256) / (sizeof(T) * fbw);
|
int maxfbh = (MEMORY_END-fbp*256) / (sizeof(Tdst) * fbw);
|
||||||
if( maxfbh > fbh ) maxfbh = fbh;
|
if( maxfbh > fbh ) maxfbh = fbh;
|
||||||
|
ZZLog::Debug_Log("*** Resolve 32 bits: %dx%d in %x", maxfbh, fbw, psm);
|
||||||
|
|
||||||
ZZLog::Debug_Log("*** Resolve 32 bits: %dx%d in %x\n", maxfbh, fbw, psm);
|
// Start the src array at the end to reduce testing in loop
|
||||||
|
u32 raw_size = RH(Pitch(fbw))/sizeof(u32);
|
||||||
|
u32* src = (u32*)(psrc) + maxfbh*raw_size;
|
||||||
|
|
||||||
for(int i = 0; i < maxfbh; ++i) {
|
for(int i = maxfbh; i > 0; --i) {
|
||||||
for(int j = 0; j < fbw; ++j) {
|
src -= raw_size;
|
||||||
T dsrc = (T)convfn(src[RW(j)]);
|
for(int j = fbw; j > 0; --j) {
|
||||||
|
Tdst dsrc = (Tdst)convfn(src[RW(j)]);
|
||||||
// They are 3 methods to call the functions
|
// They are 3 methods to call the functions
|
||||||
// macro (compact, inline) but need a nice psm ; swich (inline) ; function pointer (compact)
|
// macro (compact, inline) but need a nice psm ; swich (inline) ; function pointer (compact)
|
||||||
// Use a switch to allow inlining of the getPixel function.
|
// Use a switch to allow inlining of the getPixel function.
|
||||||
|
@ -2994,7 +2998,6 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
|
||||||
}
|
}
|
||||||
*dst = (dsrc & mask) | (*dst & imask);
|
*dst = (dsrc & mask) | (*dst & imask);
|
||||||
}
|
}
|
||||||
src += RH(Pitch(fbw))/sizeof(Tsrc);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3018,28 +3021,28 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
|
||||||
// the psm switch in Resolve_32_Bit
|
// the psm switch in Resolve_32_Bit
|
||||||
case PSMCT32:
|
case PSMCT32:
|
||||||
case PSMCT24:
|
case PSMCT24:
|
||||||
Resolve_32_Bit<u32, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMCT32, fbm);
|
Resolve_32_Bit<u32, dummy_return >(psrc, fbp, fbw, fbh, PSMCT32, fbm);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSMCT16:
|
case PSMCT16:
|
||||||
Resolve_32_Bit<u16, u32, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16, fbm);
|
Resolve_32_Bit<u16, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16, fbm);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSMCT16S:
|
case PSMCT16S:
|
||||||
Resolve_32_Bit<u16, u32, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16S, fbm);
|
Resolve_32_Bit<u16, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16S, fbm);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSMT32Z:
|
case PSMT32Z:
|
||||||
case PSMT24Z:
|
case PSMT24Z:
|
||||||
Resolve_32_Bit<u32, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT32Z, fbm);
|
Resolve_32_Bit<u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT32Z, fbm);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSMT16Z:
|
case PSMT16Z:
|
||||||
Resolve_32_Bit<u16, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT16Z, fbm);
|
Resolve_32_Bit<u16, dummy_return >(psrc, fbp, fbw, fbh, PSMT16Z, fbm);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSMT16SZ:
|
case PSMT16SZ:
|
||||||
Resolve_32_Bit<u16, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT16SZ, fbm);
|
Resolve_32_Bit<u16, dummy_return >(psrc, fbp, fbw, fbh, PSMT16SZ, fbm);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -545,7 +545,7 @@ __forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
|
||||||
|
|
||||||
int Values[100] = {0, };
|
int Values[100] = {0, };
|
||||||
|
|
||||||
void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
|
inline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
|
||||||
{
|
{
|
||||||
int index = Index;
|
int index = Index;
|
||||||
p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
|
p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
|
||||||
|
|
Loading…
Reference in New Issue