GregMiscellaneous: zzogl-pg:

* Some boost tuning: do big loop in reverse order. * Add a function to get ns timing. Could be useful for benchmark. git-svn-id: http://pcsx2.googlecode.com/svn/branches/GregMiscellaneous@3799 96395faa-99c1-11dd-bbfe-3dabce05a288
2010-09-18 16:53:32 +00:00 · 2010-09-18 16:53:32 +00:00 · 7afdf9e7c7
parent 57f1e7badb
commit 7afdf9e7c7
3 changed files with 32 additions and 16 deletions
--- a/plugins/zzogl-pg/opengl/Util.h
+++ b/plugins/zzogl-pg/opengl/Util.h
@ -87,6 +87,9 @@ static __forceinline void pcsx2_aligned_free(void* pmem)
 #define _aligned_malloc pcsx2_aligned_malloc
 #define _aligned_free pcsx2_aligned_free
 #endif
 #ifdef __LINUX__
 #include <sys/timeb.h>	// ftime(), struct timeb
 inline unsigned long timeGetTime()
@ -97,6 +100,15 @@ inline unsigned long timeGetTime()
 	return (unsigned long)(t.time*1000 + t.millitm);
 }
 #include <time.h>
 inline unsigned long timeGetPreciseTime()
 {
    timespec t;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
    return t.tv_nsec;
 }
 struct RECT
 {
 	int left, top;
@ -138,6 +150,7 @@ enum GSWindowDim
 	GSDim_1024,
 	GSDim_1280,
 };
 typedef union 
 {
 	struct
--- a/plugins/zzogl-pg/opengl/targets.cpp
+++ b/plugins/zzogl-pg/opengl/targets.cpp
@ -2930,7 +2930,7 @@ void FlushTransferRanges(const tex0Info* ptex)
 template <typename T, typename Tret>
 inline Tret dummy_return(T value) { return value; }
-template <typename T, typename Tsrc, T (*convfn)(Tsrc)>
+template <typename Tdst, Tdst (*convfn)(u32)>
 inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const int psm, u32 fbm)
 {
    u32 mask, imask;
@ -2947,17 +2947,21 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
        imask = fbm;
    }
-    Tsrc* src = (Tsrc*)(psrc);
+    Tdst* pPageOffset = (Tdst*)g_pbyGSMemory + fbp*(256/sizeof(Tdst));
-    T* pPageOffset = (T*)g_pbyGSMemory + fbp*(256/sizeof(T)), *dst;
+    Tdst *dst;
-    int maxfbh = (MEMORY_END-fbp*256) / (sizeof(T) * fbw);
+    int maxfbh = (MEMORY_END-fbp*256) / (sizeof(Tdst) * fbw);
    if( maxfbh > fbh ) maxfbh = fbh;
    ZZLog::Debug_Log("*** Resolve 32 bits: %dx%d in %x", maxfbh, fbw, psm);
-    ZZLog::Debug_Log("*** Resolve 32 bits: %dx%d in %x\n", maxfbh, fbw, psm);
+    // Start the src array at the end to reduce testing in loop
    u32 raw_size = RH(Pitch(fbw))/sizeof(u32);
    u32* src = (u32*)(psrc) + maxfbh*raw_size;
-    for(int i = 0; i < maxfbh; ++i) {
+    for(int i = maxfbh; i > 0; --i) {
-        for(int j = 0; j < fbw; ++j) {
+        src -= raw_size;
-            T dsrc = (T)convfn(src[RW(j)]);
+        for(int j = fbw; j > 0; --j) {
            Tdst dsrc = (Tdst)convfn(src[RW(j)]);
            // They are 3 methods to call the functions
            // macro (compact, inline) but need a nice psm ; swich (inline) ; function pointer (compact)
            // Use a switch to allow inlining of the getPixel function.
@ -2994,7 +2998,6 @@ inline void Resolve_32_Bit(const void* psrc, int fbp, int fbw, int fbh, const in
            }
            *dst = (dsrc & mask) | (*dst & imask);
        }
        src += RH(Pitch(fbw))/sizeof(Tsrc);
    }
 }
@ -3018,28 +3021,28 @@ void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, boo
        // the psm switch in Resolve_32_Bit
        case PSMCT32:
        case PSMCT24:
-            Resolve_32_Bit<u32, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMCT32, fbm);
+            Resolve_32_Bit<u32, dummy_return >(psrc, fbp, fbw, fbh, PSMCT32, fbm);
            break;
        case PSMCT16:
-            Resolve_32_Bit<u16, u32, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16, fbm);
+            Resolve_32_Bit<u16, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16, fbm);
            break;
        case PSMCT16S:
-            Resolve_32_Bit<u16, u32, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16S, fbm);
+            Resolve_32_Bit<u16, RGBA32to16 >(psrc, fbp, fbw, fbh, PSMCT16S, fbm);
            break;
        case PSMT32Z:
        case PSMT24Z:
-            Resolve_32_Bit<u32, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT32Z, fbm);
+            Resolve_32_Bit<u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT32Z, fbm);
            break;
        case PSMT16Z:
-            Resolve_32_Bit<u16, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT16Z, fbm);
+            Resolve_32_Bit<u16, dummy_return >(psrc, fbp, fbw, fbh, PSMT16Z, fbm);
            break;
        case PSMT16SZ:
-            Resolve_32_Bit<u16, u32, dummy_return >(psrc, fbp, fbw, fbh, PSMT16SZ, fbm);
+            Resolve_32_Bit<u16, dummy_return >(psrc, fbp, fbw, fbh, PSMT16SZ, fbm);
            break;
    }
--- a/plugins/zzogl-pg/opengl/zerogs.cpp
+++ b/plugins/zzogl-pg/opengl/zerogs.cpp
@ -545,7 +545,7 @@ __forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
 int Values[100] = {0, };
-void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
+inline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
 {
 	int index = Index;
 	p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);