From 202f09bf436da0365ec94249c5761d17a6782be8 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 15 Jul 2010 06:46:57 +0000 Subject: [PATCH 01/16] Minor change to the custom memcpy function... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3493 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/src/Utilities/x86/MemcpyVibes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/src/Utilities/x86/MemcpyVibes.cpp b/common/src/Utilities/x86/MemcpyVibes.cpp index 11ac0c10fd..7efcd83f39 100644 --- a/common/src/Utilities/x86/MemcpyVibes.cpp +++ b/common/src/Utilities/x86/MemcpyVibes.cpp @@ -98,8 +98,8 @@ void gen_memcpy_vibes() { xADD(ecx, 256); } const xRegisterSSE xmm_t(x); - xMOVAPS(xmm_t, ptr32[edx+off]); - xMOVAPS(ptr32[ecx+off], xmm_t); + xMOVAPS (xmm_t, ptr32[edx+off]); + xMOVNTPS(ptr32[ecx+off], xmm_t); } _memcpy_vibes[0] = (_memCpyCall)xGetPtr(); From 4a23585a55311d2b35b3b3b19ee9d40397b241cd Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Thu, 15 Jul 2010 16:32:59 +0000 Subject: [PATCH 02/16] [zzogl]: asm work. Asm was totally broken. At least in release build. * use volatile keyword to avoid gcc removing the function... * Use name variable in asm code instead of %n * Fix constraint on s_clut16mask. There are input, not output... Arcum can you look at this 2 things thanks. -> code is still broken in one place. s_clut16mask & s_clut16mask2 re null in the code generated by gcc ! To fix it (do not know why), we can declare them as static. But I'm not sure on the impact and I can not test windows... -> s_clut16mask is declared as a 256bits numbers instead of 128 ! git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3494 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/x86.cpp | 96 ++++++++++++++++----------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/plugins/zzogl-pg/opengl/x86.cpp b/plugins/zzogl-pg/opengl/x86.cpp index 581d023139..a612365267 100644 --- a/plugins/zzogl-pg/opengl/x86.cpp +++ b/plugins/zzogl-pg/opengl/x86.cpp @@ -433,11 +433,11 @@ WriteUnaligned: End: } #else - __asm__(".intel_syntax noprefix\n" - "movdqa xmm0, xmmword ptr [ecx]\n" - "movdqa xmm1, xmmword ptr [ecx+16]\n" - "movdqa xmm2, xmmword ptr [ecx+32]\n" - "movdqa xmm3, xmmword ptr [ecx+48]\n" + __asm__ __volatile__(".intel_syntax noprefix\n" + "movdqa xmm0, xmmword ptr [%[vm]]\n" + "movdqa xmm1, xmmword ptr [%[vm]+16]\n" + "movdqa xmm2, xmmword ptr [%[vm]+32]\n" + "movdqa xmm3, xmmword ptr [%[vm]+48]\n" // rearrange "pshuflw xmm0, xmm0, 0x88\n" @@ -457,14 +457,14 @@ End: "pxor xmm6, xmm6\n" - "test edx, 15\n" + "test %[clut], 15\n" "jnz WriteUnaligned\n" - "movdqa xmm7, [s_clut16mask]\n" // saves upper 16 bits + "movdqa xmm7, s_clut16mask\n" // saves upper 16 bits // have to save interlaced with the old data - "movdqa xmm4, [edx]\n" - "movdqa xmm5, [edx+32]\n" + "movdqa xmm4, [%[clut]]\n" + "movdqa xmm5, [%[clut]+32]\n" "movhlps xmm1, xmm0\n" "movlhps xmm0, xmm2\n"// lower 8 colors @@ -483,29 +483,29 @@ End: "punpckhwd xmm2, xmm6\n" "punpckhwd xmm3, xmm6\n" - "movdqa [edx], xmm0\n" - "movdqa [edx+32], xmm1\n" + "movdqa [%[clut]], xmm0\n" + "movdqa [%[clut]+32], xmm1\n" "movdqa xmm5, xmm7\n" - "pand xmm7, [edx+16]\n" - "pand xmm5, [edx+48]\n" + "pand xmm7, [%[clut]+16]\n" + "pand xmm5, [%[clut]+48]\n" "por xmm2, xmm7\n" "por xmm3, xmm5\n" - "movdqa [edx+16], xmm2\n" - "movdqa [edx+48], xmm3\n" + "movdqa [%[clut]+16], xmm2\n" + "movdqa [%[clut]+48], xmm3\n" "jmp WriteCLUT_T16_I4_CSM1_End\n" "WriteUnaligned:\n" - // %edx is offset by 2 - "sub edx, 2\n" + // %[clut] is offset by 2 + "sub %[clut], 2\n" - "movdqa xmm7, [[s_clut16mask2]]\n" // saves lower 16 bits + "movdqa xmm7, s_clut16mask2\n" // saves lower 16 bits // have to save interlaced with the old data - "movdqa xmm4, [edx]\n" - "movdqa xmm5, [edx+32]\n" + "movdqa xmm4, [%[clut]]\n" + "movdqa xmm5, [%[clut]+32]\n" "movhlps xmm1, xmm0\n" "movlhps xmm0, xmm2\n" // lower 8 colors @@ -528,24 +528,24 @@ End: "pslld xmm2, 16\n" "pslld xmm3, 16\n" - "movdqa [edx], xmm0\n" - "movdqa [edx+32], xmm1\n" + "movdqa [%[clut]], xmm0\n" + "movdqa [%[clut]+32], xmm1\n" "movdqa xmm5, xmm7\n" - "pand xmm7, [edx+16]\n" - "pand xmm5, [edx+48]\n" + "pand xmm7, [%[clut]+16]\n" + "pand xmm5, [%[clut]+48]\n" "por xmm2, xmm7\n" "por xmm3, xmm5\n" - "movdqa [edx+16], xmm2\n" - "movdqa [edx+48], xmm3\n" + "movdqa [%[clut]+16], xmm2\n" + "movdqa [%[clut]+48], xmm3\n" "WriteCLUT_T16_I4_CSM1_End:\n" "\n" ".att_syntax\n" - : [s_clut16mask] "=m" (s_clut16mask), [s_clut16mask2] "=m" (s_clut16mask2) - : "c" (vm), "d" (clut) - : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + : + : [vm] "r" (vm), [clut] "r" (clut), [s_clut16mask] "m" (*s_clut16mask), [s_clut16mask2] "m" (*s_clut16mask2) + : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); #endif // _MSC_VER } @@ -718,15 +718,15 @@ Z16Loop: } #else // _MSC_VER - __asm__(".intel_syntax\n" + __asm__ __volatile__(".intel_syntax\n" "pxor %%xmm7, %%xmm7\n" "Z16Loop:\n" // unpack 64 bytes at a time - "movdqa %%xmm0, [%0]\n" - "movdqa %%xmm2, [%0+16]\n" - "movdqa %%xmm4, [%0+32]\n" - "movdqa %%xmm6, [%0+48]\n" + "movdqa %%xmm0, [%[src]]\n" + "movdqa %%xmm2, [%[src]+16]\n" + "movdqa %%xmm4, [%[src]+32]\n" + "movdqa %%xmm6, [%[src]+48]\n" "movdqa %%xmm1, %%xmm0\n" "movdqa %%xmm3, %%xmm2\n" @@ -738,35 +738,35 @@ Z16Loop: "punpckhwd %%xmm3, %%xmm7\n" // start saving - "movdqa [%1], %%xmm0\n" - "movdqa [%1+16], %%xmm1\n" + "movdqa [%[dst]], %%xmm0\n" + "movdqa [%[dst]+16], %%xmm1\n" "punpcklwd %%xmm4, %%xmm7\n" "punpckhwd %%xmm5, %%xmm7\n" - "movdqa [%1+32], %%xmm2\n" - "movdqa [%1+48], %%xmm3\n" + "movdqa [%[dst]+32], %%xmm2\n" + "movdqa [%[dst]+48], %%xmm3\n" "movdqa %%xmm0, %%xmm6\n" "punpcklwd %%xmm6, %%xmm7\n" - "movdqa [%1+64], %%xmm4\n" - "movdqa [%1+80], %%xmm5\n" + "movdqa [%[dst]+64], %%xmm4\n" + "movdqa [%[dst]+80], %%xmm5\n" "punpckhwd %%xmm0, %%xmm7\n" - "movdqa [%1+96], %%xmm6\n" - "movdqa [%1+112], %%xmm0\n" + "movdqa [%[dst]+96], %%xmm6\n" + "movdqa [%[dst]+112], %%xmm0\n" - "add %0, 64\n" - "add %1, 128\n" - "sub %2, 1\n" + "add %[src], 64\n" + "add %[dst], 128\n" + "sub %[iters], 1\n" "jne Z16Loop\n" ".att_syntax\n" - : "=r"(src), "=r"(dst), "=r"(iters) - : "0"(src), "1"(dst), "2"(iters) - : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" + : "=&r"(src), "=&r"(dst), "=&r"(iters) + : [src] "0"(src), [dst] "1"(dst), [iters] "2"(iters) + : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); #endif // _MSC_VER } From 71bcec243e76d0ccac8f14dbdf9f9c8f9b75644d Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Thu, 15 Jul 2010 16:52:25 +0000 Subject: [PATCH 03/16] [zzogl]: ooups forget 2 lines. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3495 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/x86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/zzogl-pg/opengl/x86.cpp b/plugins/zzogl-pg/opengl/x86.cpp index a612365267..28adce0aa3 100644 --- a/plugins/zzogl-pg/opengl/x86.cpp +++ b/plugins/zzogl-pg/opengl/x86.cpp @@ -460,7 +460,7 @@ End: "test %[clut], 15\n" "jnz WriteUnaligned\n" - "movdqa xmm7, s_clut16mask\n" // saves upper 16 bits + "movdqa xmm7, %[s_clut16mask]\n" // saves upper 16 bits // have to save interlaced with the old data "movdqa xmm4, [%[clut]]\n" @@ -501,7 +501,7 @@ End: // %[clut] is offset by 2 "sub %[clut], 2\n" - "movdqa xmm7, s_clut16mask2\n" // saves lower 16 bits + "movdqa xmm7, %[s_clut16mask2]\n" // saves lower 16 bits // have to save interlaced with the old data "movdqa xmm4, [%[clut]]\n" From 6250ec1f370e53f63fe86bf80343e4a0b92ac6ac Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Thu, 15 Jul 2010 18:07:49 +0000 Subject: [PATCH 04/16] [IPU]: properly fix gcc c++0x error. Declared coeff as s16 instead of u16. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3498 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/IPU/yuv2rgb.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pcsx2/IPU/yuv2rgb.cpp b/pcsx2/IPU/yuv2rgb.cpp index 423cacc71c..5fcda2ef18 100644 --- a/pcsx2/IPU/yuv2rgb.cpp +++ b/pcsx2/IPU/yuv2rgb.cpp @@ -63,11 +63,11 @@ struct SSE2_Tables u16 Y_mask[8]; // offset -32 u16 round_1bit[8]; // offset -16 - u16 Y_coefficients[8]; // offset 0 - u16 GCr_coefficients[8];// offset 16 - u16 GCb_coefficients[8];// offset 32 - u16 RCr_coefficients[8];// offset 48 - u16 BCb_coefficients[8];// offset 64 + s16 Y_coefficients[8]; // offset 0 + s16 GCr_coefficients[8];// offset 16 + s16 GCb_coefficients[8];// offset 32 + s16 RCr_coefficients[8];// offset 48 + s16 BCb_coefficients[8];// offset 64 }; enum From 7e6dba3b7abc3d3246887909c2c398f65bd1fd7a Mon Sep 17 00:00:00 2001 From: "gregory.hainaut" Date: Thu, 15 Jul 2010 22:24:35 +0000 Subject: [PATCH 05/16] [cmake] Add a fatal error for pure amd64 distributions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3499 96395faa-99c1-11dd-bbfe-3dabce05a288 --- CMakeLists.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4182f0342d..2225d7fc34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,19 @@ cmake_minimum_required(VERSION 2.6) # Variable to check that people use the good file set(TOP_CMAKE_WAS_SOURCED TRUE) +# Print a clear message that 64bits is not supported +# It would avoid compilation failure later. +if(CMAKE_SIZEOF_VOID_P MATCHES "8") + message(FATAL_ERROR " + PCSX2 does not support 64bits environment. Please install a 32bits chroot or a 32bits OS. + PCSX2 have neither no plan to support the 64bits architecture in the future. + It will need a complete rewrite of the core emulator and a lots of time + + However when linux distribution will support properly multi-arch package, it will + be at least possible to easily compile and install PCSX2 witout too much hassle (the chroot environment)") +endif(CMAKE_SIZEOF_VOID_P MATCHES "8") + + # set module path set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) From f2d637a6e0a5ab0e111677a99156f437f0250a26 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 16 Jul 2010 03:48:13 +0000 Subject: [PATCH 06/16] zzogl-pg: * Removed extern "C" and applied static const to s_clut16 vars. Should be fine since the old x86.S files that needed extern "C" have been removed from zzogl. * Fixed a compilation error in Win32/Debug builds. * Changed some references of DEVBUILD ZEROGS_DEVBUILD. Not sure if all of them should be changed over or not, so I just stuck to some of the more obviously correct bits. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3500 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 2 +- plugins/zzogl-pg/opengl/ZZLog.h | 380 ++++++++++++------------- plugins/zzogl-pg/opengl/ZZoglFlush.cpp | 2 +- plugins/zzogl-pg/opengl/targets.cpp | 8 +- plugins/zzogl-pg/opengl/x86.cpp | 7 +- plugins/zzogl-pg/opengl/zerogs.h | 2 +- 6 files changed, 195 insertions(+), 206 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index d8074035c3..4c098fb82b 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -686,7 +686,7 @@ g_MemTargs.ClearRange(dststart, dstend); - #ifdef DEVBUILD + #ifdef ZEROGS_DEVBUILD if (g_bSaveTrans) { diff --git a/plugins/zzogl-pg/opengl/ZZLog.h b/plugins/zzogl-pg/opengl/ZZLog.h index da299266ed..443e71435b 100644 --- a/plugins/zzogl-pg/opengl/ZZLog.h +++ b/plugins/zzogl-pg/opengl/ZZLog.h @@ -1,197 +1,193 @@ -/* ZZ Open GL graphics plugin - * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com - * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - +/* ZZ Open GL graphics plugin + * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com + * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + #ifndef ZZLOG_H_INCLUDED #define ZZLOG_H_INCLUDED - -#include "Util.h" - - -//Logging for errors that are called often should have a time counter. -#ifdef __LINUX__ -static u32 __attribute__((unused)) lasttime = 0; -static u32 __attribute__((unused)) BigTime = 5000; -static bool __attribute__((unused)) SPAM_PASS; -#else -static u32 lasttime = 0; -static u32 BigTime = 5000; -static bool SPAM_PASS; -#endif - -#define ERROR_LOG_SPAM(text) { \ - if( timeGetTime() - lasttime > BigTime ) { \ - ZZLog::Error_Log(text); \ - lasttime = timeGetTime(); \ - } \ -} -// The same macro with one-argument substitution. -#define ERROR_LOG_SPAMA(fmt, value) { \ - if( timeGetTime() - lasttime > BigTime ) { \ - ZZLog::Error_Log(fmt, value); \ - lasttime = timeGetTime(); \ - } \ -} - -#define ERROR_LOG_SPAM_TEST(text) {\ - if( timeGetTime() - lasttime > BigTime ) { \ - ZZLog::Error_Log(text); \ - lasttime = timeGetTime(); \ - SPAM_PASS = true; \ - } \ - else \ - SPAM_PASS = false; \ -} - -#if DEBUG_PROF -#define FILE_IS_IN_CHECK ((strcmp(__FILE__, "targets.cpp") == 0) || (strcmp(__FILE__, "ZZoglFlush.cpp") == 0)) - -#define FUNCLOG {\ - static bool Was_Here = false; \ - static unsigned long int waslasttime = 0; \ - if (!Was_Here && FILE_IS_IN_CHECK) { \ - Was_Here = true;\ - ZZLog::Error_Log("%s:%d %s", __FILE__, __LINE__, __func__); \ - waslasttime = timeGetTime(); \ - } \ - if (FILE_IS_IN_CHECK && (timeGetTime() - waslasttime > BigTime )) { \ - Was_Here = false; \ - } \ -} -#else -#define FUNCLOG -#endif - -//#define WRITE_GREG_LOGS -//#define WRITE_PRIM_LOGS -#if defined(_DEBUG) && !defined(ZEROGS_DEVBUILD) -#define ZEROGS_DEVBUILD -#endif - -#ifdef ZEROGS_DEVBUILD -//#define DEVBUILD -#endif - - -// sends a message to output window if assert fails -#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } } -#define BMSG_RETURN(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return; } } -#define BMSG_RETURNX(x, str, rtype) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return (##rtype); } } -#define B(x) { if( !(x) ) { ZZLog::Log(_#x"\n"); ZZLog::Log(#x"\n"); } } -#define B_RETURN(x) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return; } } -#define B_RETURNX(x, rtype) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return (##rtype); } } -#define B_G(x, action) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); action; } } - -#define GL_REPORT_ERROR() \ -{ \ - GLenum err = glGetError(); \ - if( err != GL_NO_ERROR ) \ - { \ - ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \ - ZeroGS::HandleGLError(); \ - } \ -} - -#ifdef _DEBUG -# define GL_REPORT_ERRORD() \ -{ \ - GLenum err = glGetError(); \ - if( err != GL_NO_ERROR ) \ - { \ - ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \ - ZeroGS::HandleGLError(); \ - } \ -} -#else -# define GL_REPORT_ERRORD() -#endif - - -inline const char *error_name(int err) -{ - switch (err) - { - case GL_NO_ERROR: - return "GL_NO_ERROR"; - - case GL_INVALID_ENUM: - return "GL_INVALID_ENUM"; - - case GL_INVALID_VALUE: - return "GL_INVALID_VALUE"; - - case GL_INVALID_OPERATION: - return "GL_INVALID_OPERATION"; - - case GL_STACK_OVERFLOW: - return "GL_STACK_OVERFLOW"; - - case GL_STACK_UNDERFLOW: - return "GL_STACK_UNDERFLOW"; - - case GL_OUT_OF_MEMORY: - return "GL_OUT_OF_MEMORY"; - - case GL_TABLE_TOO_LARGE: - return "GL_TABLE_TOO_LARGE"; - - case GL_INVALID_FRAMEBUFFER_OPERATION: - return "GL_INVALID_FRAMEBUFFER_OPERATION"; - - default: - return "Unknown GL error"; - } -} - -extern void __LogToConsole(const char *fmt, ...); - -// Subset of zerogs, to avoid that whole huge header. -namespace ZeroGS -{ -extern void AddMessage(const char* pstr, u32 ms); -extern void SetAA(int mode); -extern void SetNegAA(int mode); -extern bool Create(int width, int height); -extern void Destroy(bool bD3D); -extern void StartCapture(); -extern void StopCapture(); -} - -namespace ZZLog -{ -extern bool IsLogging(); -void SetDir(const char* dir); -extern bool Open(); -extern void Close(); -extern void Message(const char *fmt, ...); -extern void Log(const char *fmt, ...); -void WriteToScreen(const char* pstr, u32 ms = 5000); -extern void WriteToConsole(const char *fmt, ...); -extern void Print(const char *fmt, ...); -extern void WriteLn(const char *fmt, ...); - -extern void Greg_Log(const char *fmt, ...); -extern void Prim_Log(const char *fmt, ...); -extern void GS_Log(const char *fmt, ...); - -extern void Debug_Log(const char *fmt, ...); -extern void Warn_Log(const char *fmt, ...); -extern void Error_Log(const char *fmt, ...); + +#include "Util.h" + + +//Logging for errors that are called often should have a time counter. +#ifdef __LINUX__ +static u32 __attribute__((unused)) lasttime = 0; +static u32 __attribute__((unused)) BigTime = 5000; +static bool __attribute__((unused)) SPAM_PASS; +#else +static u32 lasttime = 0; +static u32 BigTime = 5000; +static bool SPAM_PASS; +#endif + +#define ERROR_LOG_SPAM(text) { \ + if( timeGetTime() - lasttime > BigTime ) { \ + ZZLog::Error_Log(text); \ + lasttime = timeGetTime(); \ + } \ +} +// The same macro with one-argument substitution. +#define ERROR_LOG_SPAMA(fmt, value) { \ + if( timeGetTime() - lasttime > BigTime ) { \ + ZZLog::Error_Log(fmt, value); \ + lasttime = timeGetTime(); \ + } \ +} + +#define ERROR_LOG_SPAM_TEST(text) {\ + if( timeGetTime() - lasttime > BigTime ) { \ + ZZLog::Error_Log(text); \ + lasttime = timeGetTime(); \ + SPAM_PASS = true; \ + } \ + else \ + SPAM_PASS = false; \ +} + +#if DEBUG_PROF +#define FILE_IS_IN_CHECK ((strcmp(__FILE__, "targets.cpp") == 0) || (strcmp(__FILE__, "ZZoglFlush.cpp") == 0)) + +#define FUNCLOG {\ + static bool Was_Here = false; \ + static unsigned long int waslasttime = 0; \ + if (!Was_Here && FILE_IS_IN_CHECK) { \ + Was_Here = true;\ + ZZLog::Error_Log("%s:%d %s", __FILE__, __LINE__, __func__); \ + waslasttime = timeGetTime(); \ + } \ + if (FILE_IS_IN_CHECK && (timeGetTime() - waslasttime > BigTime )) { \ + Was_Here = false; \ + } \ +} +#else +#define FUNCLOG +#endif + +//#define WRITE_GREG_LOGS +//#define WRITE_PRIM_LOGS +#if defined(_DEBUG) && !defined(ZEROGS_DEVBUILD) +#define ZEROGS_DEVBUILD +#endif + + +// sends a message to output window if assert fails +#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } } +#define BMSG_RETURN(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return; } } +#define BMSG_RETURNX(x, str, rtype) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return (##rtype); } } +#define B(x) { if( !(x) ) { ZZLog::Log(_#x"\n"); ZZLog::Log(#x"\n"); } } +#define B_RETURN(x) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return; } } +#define B_RETURNX(x, rtype) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return (##rtype); } } +#define B_G(x, action) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); action; } } + +#define GL_REPORT_ERROR() \ +{ \ + GLenum err = glGetError(); \ + if( err != GL_NO_ERROR ) \ + { \ + ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \ + ZeroGS::HandleGLError(); \ + } \ +} + +#ifdef _DEBUG +# define GL_REPORT_ERRORD() \ +{ \ + GLenum err = glGetError(); \ + if( err != GL_NO_ERROR ) \ + { \ + ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \ + ZeroGS::HandleGLError(); \ + } \ +} +#else +# define GL_REPORT_ERRORD() +#endif + + +inline const char *error_name(int err) +{ + switch (err) + { + case GL_NO_ERROR: + return "GL_NO_ERROR"; + + case GL_INVALID_ENUM: + return "GL_INVALID_ENUM"; + + case GL_INVALID_VALUE: + return "GL_INVALID_VALUE"; + + case GL_INVALID_OPERATION: + return "GL_INVALID_OPERATION"; + + case GL_STACK_OVERFLOW: + return "GL_STACK_OVERFLOW"; + + case GL_STACK_UNDERFLOW: + return "GL_STACK_UNDERFLOW"; + + case GL_OUT_OF_MEMORY: + return "GL_OUT_OF_MEMORY"; + + case GL_TABLE_TOO_LARGE: + return "GL_TABLE_TOO_LARGE"; + + case GL_INVALID_FRAMEBUFFER_OPERATION: + return "GL_INVALID_FRAMEBUFFER_OPERATION"; + + default: + return "Unknown GL error"; + } +} + +extern void __LogToConsole(const char *fmt, ...); + +// Subset of zerogs, to avoid that whole huge header. +namespace ZeroGS +{ +extern void AddMessage(const char* pstr, u32 ms); +extern void SetAA(int mode); +extern void SetNegAA(int mode); +extern bool Create(int width, int height); +extern void Destroy(bool bD3D); +extern void StartCapture(); +extern void StopCapture(); +} + +namespace ZZLog +{ +extern bool IsLogging(); +void SetDir(const char* dir); +extern bool Open(); +extern void Close(); +extern void Message(const char *fmt, ...); +extern void Log(const char *fmt, ...); +void WriteToScreen(const char* pstr, u32 ms = 5000); +extern void WriteToConsole(const char *fmt, ...); +extern void Print(const char *fmt, ...); +extern void WriteLn(const char *fmt, ...); + +extern void Greg_Log(const char *fmt, ...); +extern void Prim_Log(const char *fmt, ...); +extern void GS_Log(const char *fmt, ...); + +extern void Debug_Log(const char *fmt, ...); +extern void Warn_Log(const char *fmt, ...); +extern void Error_Log(const char *fmt, ...); }; #endif // ZZLOG_H_INCLUDED diff --git a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp index 29ea9546e2..e4338b555a 100644 --- a/plugins/zzogl-pg/opengl/ZZoglFlush.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglFlush.cpp @@ -29,7 +29,7 @@ using namespace ZeroGS; //------------------ Defines -#ifndef DEVBUILD +#ifndef ZEROGS_DEVBUILD #define INC_GENVARS() #define INC_TEXVARS() diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 6adf5e690f..391452608c 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -42,10 +42,6 @@ extern bool g_bUpdateStencil; # define INC_RESOLVE() ++g_nResolve #endif -#ifdef DEVBUILD -//static int g_bSaveResolved = 0; -#endif - extern int s_nResolved; extern u32 g_nResolve; extern bool g_bSaveTrans; @@ -302,7 +298,7 @@ void ZeroGS::CRenderTarget::Resolve() GL_REPORT_ERRORD(); -#if defined(DEVBUILD) +#if defined(ZEROGS_DEVBUILD) if (g_bSaveResolved) { @@ -328,7 +324,7 @@ void ZeroGS::CRenderTarget::Resolve(int startrange, int endrange) // flush if necessary FlushIfNecesary(this) ; -#if defined(DEVBUILD) +#if defined(ZEROGS_DEVBUILD) if (g_bSaveResolved) { SaveTexture("resolved.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh)); diff --git a/plugins/zzogl-pg/opengl/x86.cpp b/plugins/zzogl-pg/opengl/x86.cpp index 28adce0aa3..d32718dbfb 100644 --- a/plugins/zzogl-pg/opengl/x86.cpp +++ b/plugins/zzogl-pg/opengl/x86.cpp @@ -308,13 +308,10 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut) } -extern "C" -{ - PCSX2_ALIGNED16(int s_clut16mask2[4]) = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; - PCSX2_ALIGNED16(int s_clut16mask[8]) = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, +static const __aligned16 int s_clut16mask2[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; +static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff }; -} extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut) { diff --git a/plugins/zzogl-pg/opengl/zerogs.h b/plugins/zzogl-pg/opengl/zerogs.h index 3c06df3cd5..6491297e3f 100644 --- a/plugins/zzogl-pg/opengl/zerogs.h +++ b/plugins/zzogl-pg/opengl/zerogs.h @@ -96,7 +96,7 @@ extern u32 ptexBilinearBlocks; // State parameters -#ifdef DEVBUILD +#ifdef ZEROGS_DEVBUILD extern char* EFFECT_NAME; extern char* EFFECT_DIR; extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve; From cfe4d607b3ab26ee3dbb9402000b5253a026c4d9 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 00:12:07 +0000 Subject: [PATCH 07/16] zzogl-pg: Working more on the new register code. Combined the KickVertex functions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3506 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/NewRegs.cpp | 45 +++++++---------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/plugins/zzogl-pg/opengl/NewRegs.cpp b/plugins/zzogl-pg/opengl/NewRegs.cpp index bf670b0e31..b47d0ce938 100644 --- a/plugins/zzogl-pg/opengl/NewRegs.cpp +++ b/plugins/zzogl-pg/opengl/NewRegs.cpp @@ -125,27 +125,16 @@ void __fastcall GIFPackedRegHandlerUV(u32* data) ZZLog::Greg_Log("Packed UV: 0x%x, 0x%x", r->U, r->V); } -void __forceinline KICK_VERTEX2() +void __forceinline KickVertex(bool adc) { FUNCLOG - - if (++gs.primC >= (int)g_primmult[prim->prim]) - { - if (NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])(); - - gs.primC -= g_primsub[prim->prim]; - } -} - -void __forceinline KICK_VERTEX3() -{ - FUNCLOG - if (++gs.primC >= (int)g_primmult[prim->prim]) { + if (!adc && NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])(); + gs.primC -= g_primsub[prim->prim]; - if (prim->prim == 5) + if (adc && prim->prim == 5) { /* tri fans need special processing */ if (gs.nTriFanVert == gs.primIndex) @@ -161,14 +150,7 @@ void __fastcall GIFPackedRegHandlerXYZF2(u32* data) gs.add_vertex(r->X, r->Y,r->Z, r->F); // Fix Vertexes up later. - if (data[3] & 0x8000) - { - KICK_VERTEX3(); - } - else - { - KICK_VERTEX2(); - } + KickVertex(!!(r->ADC)); ZZLog::Greg_Log("Packed XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F); } @@ -179,14 +161,7 @@ void __fastcall GIFPackedRegHandlerXYZ2(u32* data) gs.add_vertex(r->X, r->Y,r->Z); // Fix Vertexes up later. - if (data[3] & 0x8000) - { - KICK_VERTEX3(); - } - else - { - KICK_VERTEX2(); - } + KickVertex(!!(r->ADC)); ZZLog::Greg_Log("Packed XYZ2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z); } @@ -285,7 +260,7 @@ void __fastcall GIFRegHandlerXYZF2(u32* data) GIFRegXYZF* r = (GIFRegXYZF*)(data); gs.add_vertex(r->X, r->Y,r->Z, r->F); - KICK_VERTEX2(); + KickVertex(false); ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F); } @@ -295,7 +270,7 @@ void __fastcall GIFRegHandlerXYZ2(u32* data) GIFRegXYZ* r = (GIFRegXYZ*)(data); gs.add_vertex(r->X, r->Y,r->Z); - KICK_VERTEX2(); + KickVertex(false); ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z); } @@ -382,7 +357,7 @@ void __fastcall GIFRegHandlerXYZF3(u32* data) GIFRegXYZF* r = (GIFRegXYZF*)(data); gs.add_vertex(r->X, r->Y,r->Z, r->F); - KICK_VERTEX3(); + KickVertex(true); ZZLog::Greg_Log("XYZF3: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F); } @@ -392,7 +367,7 @@ void __fastcall GIFRegHandlerXYZ3(u32* data) GIFRegXYZ* r = (GIFRegXYZ*)(data); gs.add_vertex(r->X, r->Y,r->Z); - KICK_VERTEX3(); + KickVertex(true); ZZLog::Greg_Log("XYZ3: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z); } From 3143be1da1847601e4b2285e447fce0ef3990dc1 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 01:12:33 +0000 Subject: [PATCH 08/16] zzogl-pg: Converted the TransferLocalLocal defines into inlined functions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3508 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 227 +++++++++++++------------ plugins/zzogl-pg/opengl/NewRegs.cpp | 3 +- 2 files changed, 122 insertions(+), 108 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index 4c098fb82b..caec95a8fa 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -371,6 +371,89 @@ } } +__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) +{ + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + + if ((gs.imageWnew & widthlimit) != 0) return; + assert(widthlimit <= 4); + + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) + { + wp(pDstBuf, j2%2048, i2%2048, + rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 1) + { + wp(pDstBuf, (j2+1)%2048, i2%2048, + rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 2) + { + wp(pDstBuf, (j2+2)%2048, i2%2048, + rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 3) + { + wp(pDstBuf, (j2+3)%2048, i2%2048, + rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + } + } + } + } + } +} + +__forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddress_0 gdp) +{ + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + + assert((gs.imageWnew % 8) == 0); + + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) + { + /* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ + u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); + u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + } + } +} + // dir depends on trxpos.dirx & trxpos.diry void TransferLocalLocal() { @@ -408,98 +491,28 @@ //(*it)->status |= CRenderTarget::TS_NeedUpdate; } } - - u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; - u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; - - #define TRANSFERLOCALLOCAL(srcpsm, dstpsm, widthlimit) { \ - if( (gs.imageWnew&widthlimit)!=0 ) break; \ - assert( (gs.imageWnew&widthlimit)==0 && widthlimit <= 4); \ - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) { \ - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) { \ - \ - writePixel##dstpsm##_0(pDstBuf, j2%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - \ - if( widthlimit > 1 ) { \ - writePixel##dstpsm##_0(pDstBuf, (j2+1)%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - \ - if( widthlimit > 2 ) { \ - writePixel##dstpsm##_0(pDstBuf, (j2+2)%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - \ - if( widthlimit > 3 ) { \ - writePixel##dstpsm##_0(pDstBuf, (j2+3)%2048, i2%2048, \ - readPixel##srcpsm##_0(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \ - } \ - } \ - } \ - } \ - } \ - } \ - - #define TRANSFERLOCALLOCAL_4(srcpsm, dstpsm) { \ - assert( (gs.imageWnew%8) == 0 ); \ - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; ++i, ++i2) { \ - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=8, j2+=8) { \ - /* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ \ - u32 read = getPixelAddress##srcpsm##_0(j%2048, i%2048, gs.srcbuf.bw); \ - u32 write = getPixelAddress##dstpsm##_0(j2%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+1)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+1)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - \ - read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - \ - read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - \ - read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \ - \ - read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \ - write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \ - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \ - } \ - } \ - } \ switch (gs.srcbuf.psm) { case PSMCT32: if (gs.dstbuf.psm == PSMCT32) { - TRANSFERLOCALLOCAL(32, 32, 2); + _TransferLocalLocal(writePixel32_0, readPixel32_0, 2); } else { - TRANSFERLOCALLOCAL(32, 32Z, 2); + _TransferLocalLocal(writePixel32_0, readPixel32Z_0, 2); } break; case PSMCT24: if (gs.dstbuf.psm == PSMCT24) { - TRANSFERLOCALLOCAL(24, 24, 4); + _TransferLocalLocal(writePixel24_0, readPixel24_0, 4); } else { - TRANSFERLOCALLOCAL(24, 24Z, 4); + _TransferLocalLocal(writePixel24_0, readPixel24Z_0, 4); } break; @@ -507,19 +520,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16, 16, 4); + _TransferLocalLocal(writePixel16_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16, 16S, 4); + _TransferLocalLocal(writePixel16_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16, 16Z, 4); + _TransferLocalLocal(writePixel16_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16, 16SZ, 4); + _TransferLocalLocal(writePixel16_0, readPixel16SZ_0, 4); break; } break; @@ -528,19 +541,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16S, 16, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16S, 16S, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16S, 16Z, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16S, 16SZ, 4); + _TransferLocalLocal(writePixel16S_0, readPixel16SZ_0, 4); break; } break; @@ -548,11 +561,11 @@ case PSMT8: if (gs.dstbuf.psm == PSMT8) { - TRANSFERLOCALLOCAL(8, 8, 4); + _TransferLocalLocal(writePixel8_0, readPixel8_0, 4); } else { - TRANSFERLOCALLOCAL(8, 8H, 4); + _TransferLocalLocal(writePixel8_0, readPixel8H_0, 4); } break; @@ -561,15 +574,15 @@ { case PSMT4: - TRANSFERLOCALLOCAL_4(4, 4); + _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4_0); break; case PSMT4HL: - TRANSFERLOCALLOCAL_4(4, 4HL); + _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4HL_0); break; case PSMT4HH: - TRANSFERLOCALLOCAL_4(4, 4HH); + _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4HH_0); break; } break; @@ -577,11 +590,11 @@ case PSMT8H: if (gs.dstbuf.psm == PSMT8) { - TRANSFERLOCALLOCAL(8H, 8, 4); + _TransferLocalLocal(writePixel8H_0, readPixel8_0, 4); } else { - TRANSFERLOCALLOCAL(8H, 8H, 4); + _TransferLocalLocal(writePixel8H_0, readPixel8H_0, 4); } break; @@ -589,15 +602,15 @@ switch (gs.dstbuf.psm) { case PSMT4: - TRANSFERLOCALLOCAL_4(4HL, 4); + _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4_0); break; case PSMT4HL: - TRANSFERLOCALLOCAL_4(4HL, 4HL); + _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4HL_0); break; case PSMT4HH: - TRANSFERLOCALLOCAL_4(4HL, 4HH); + _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4HH_0); break; } break; @@ -606,15 +619,15 @@ switch (gs.dstbuf.psm) { case PSMT4: - TRANSFERLOCALLOCAL_4(4HH, 4); + _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4_0); break; case PSMT4HL: - TRANSFERLOCALLOCAL_4(4HH, 4HL); + _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4HL_0); break; case PSMT4HH: - TRANSFERLOCALLOCAL_4(4HH, 4HH); + _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4HH_0); break; } break; @@ -622,22 +635,22 @@ case PSMT32Z: if (gs.dstbuf.psm == PSMCT32) { - TRANSFERLOCALLOCAL(32Z, 32, 2); + _TransferLocalLocal(writePixel32Z_0, readPixel32_0, 2); } else { - TRANSFERLOCALLOCAL(32Z, 32Z, 2); + _TransferLocalLocal(writePixel32Z_0, readPixel32Z_0, 2); } break; case PSMT24Z: if (gs.dstbuf.psm == PSMCT24) { - TRANSFERLOCALLOCAL(24Z, 24, 4); + _TransferLocalLocal(writePixel24Z_0, readPixel24_0, 4); } else { - TRANSFERLOCALLOCAL(24Z, 24Z, 4); + _TransferLocalLocal(writePixel24Z_0, readPixel24Z_0, 4); } break; @@ -645,19 +658,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16Z, 16, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16Z, 16S, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16Z, 16Z, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16Z, 16SZ, 4); + _TransferLocalLocal(writePixel16Z_0, readPixel16SZ_0, 4); break; } break; @@ -666,19 +679,19 @@ switch (gs.dstbuf.psm) { case PSMCT16: - TRANSFERLOCALLOCAL(16SZ, 16, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16_0, 4); break; case PSMCT16S: - TRANSFERLOCALLOCAL(16SZ, 16S, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16S_0, 4); break; case PSMT16Z: - TRANSFERLOCALLOCAL(16SZ, 16Z, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16Z_0, 4); break; case PSMT16SZ: - TRANSFERLOCALLOCAL(16SZ, 16SZ, 4); + _TransferLocalLocal(writePixel16SZ_0, readPixel16SZ_0, 4); break; } break; diff --git a/plugins/zzogl-pg/opengl/NewRegs.cpp b/plugins/zzogl-pg/opengl/NewRegs.cpp index b47d0ce938..2646ce066b 100644 --- a/plugins/zzogl-pg/opengl/NewRegs.cpp +++ b/plugins/zzogl-pg/opengl/NewRegs.cpp @@ -890,6 +890,7 @@ void __fastcall GIFRegHandlerTRXREG(u32* data) void __fastcall GIFRegHandlerTRXDIR(u32* data) { FUNCLOG + GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data); // Oh dear... // terminate any previous transfers @@ -909,7 +910,7 @@ void __fastcall GIFRegHandlerTRXDIR(u32* data) gs.dstbuf = gs.dstbufnew; gs.trxpos = gs.trxposnew; - gs.imageTransfer = data[0] & 0x3; + gs.imageTransfer = r->XDIR; gs.imageWnew = gs.imageWtemp; gs.imageHnew = gs.imageHtemp; From 1a3373a9a45fb414e86c925960f24d07c975265f Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 01:52:46 +0000 Subject: [PATCH 09/16] zzogl-pg: Add some helper functions, and rework the _TransferLocalLocal functions a bit. (Needs more testing) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3509 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 104 ++++++++++++++++++++++++- plugins/zzogl-pg/opengl/Mem.h | 61 +++++++++++++++ 2 files changed, 162 insertions(+), 3 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index caec95a8fa..a19402ddc4 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -371,7 +371,7 @@ } } -__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) +/*__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) { u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; @@ -414,6 +414,96 @@ __forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddres assert((gs.imageWnew % 8) == 0); + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) + { + // NOTE: the 2 conseq 4bit values are in NOT in the same byte + u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); + u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + + read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); + + read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); + write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); + pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); + } + } +}*/ + +__forceinline void _TransferLocalLocal() +{ + //ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); + _writePixel_0 wp = writePixelFunction_0(gs.srcbuf.psm); + _readPixel_0 rp = readPixelFunction_0(gs.dstbuf.psm); + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + u32 widthlimit = 4; + + if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2; + if ((gs.imageWnew & widthlimit) != 0) return; + + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) + { + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) + { + wp(pDstBuf, j2%2048, i2%2048, + rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + //if (widthlimit > 1) + { + wp(pDstBuf, (j2+1)%2048, i2%2048, + rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + if (widthlimit > 2) + { + wp(pDstBuf, (j2+2)%2048, i2%2048, + rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + //if (widthlimit > 3) + { + wp(pDstBuf, (j2+3)%2048, i2%2048, + rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + } + } + } + } + } +} + +__forceinline void _TransferLocalLocal_4() +{ + //ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); + _getPixelAddress_0 gsp = getPixelFunction_0(gs.srcbuf.psm); + _getPixelAddress_0 gdp = getPixelFunction_0(gs.dstbuf.psm); + u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; + u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + + assert((gs.imageWnew % 8) == 0); + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) { for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) @@ -492,7 +582,15 @@ __forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddres } } - switch (gs.srcbuf.psm) + if (PSMT_BITMODE(gs.srcbuf.psm) != 4) + { + _TransferLocalLocal(); + } + else + { + _TransferLocalLocal_4(); + } + /*switch (gs.srcbuf.psm) { case PSMCT32: if (gs.dstbuf.psm == PSMCT32) @@ -695,7 +793,7 @@ __forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddres break; } break; - } + }*/ g_MemTargs.ClearRange(dststart, dstend); diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 2cc7627c58..9f7ef546c0 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -266,6 +266,27 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) return word; } +static __forceinline _getPixelAddress_0 getPixelFunction_0(u32 psm) +{ + switch(psm) + { + case PSMCT32: return getPixelAddress32_0; + case PSMCT24: return getPixelAddress24_0; + case PSMCT16: return getPixelAddress16_0; + case PSMCT16S: return getPixelAddress16S_0; + case PSMT8: return getPixelAddress8_0; + case PSMT4: return getPixelAddress4_0; + case PSMT8H: return getPixelAddress8H_0; + case PSMT4HL: return getPixelAddress4HL_0; + case PSMT4HH: return getPixelAddress4HH_0; + case PSMT32Z: return getPixelAddress32Z_0; + case PSMT24Z: return getPixelAddress24Z_0; + case PSMT16Z: return getPixelAddress16Z_0; + case PSMT16SZ: return getPixelAddress16SZ_0; + default: return getPixelAddress32_0; + } +} + #define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw) #define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw) @@ -511,6 +532,26 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, ((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel; } +static __forceinline _writePixel_0 writePixelFunction_0(u32 psm) +{ + switch(psm) + { + case PSMCT32: return writePixel32_0; + case PSMCT24: return writePixel24_0; + case PSMCT16: return writePixel16_0; + case PSMCT16S: return writePixel16S_0; + case PSMT8: return writePixel8_0; + case PSMT4: return writePixel4_0; + case PSMT8H: return writePixel8H_0; + case PSMT4HL: return writePixel4HL_0; + case PSMT4HH: return writePixel4HH_0; + case PSMT32Z: return writePixel32Z_0; + case PSMT24Z: return writePixel24Z_0; + case PSMT16Z: return writePixel16Z_0; + case PSMT16SZ: return writePixel16SZ_0; + default: return writePixel32_0; + } +} /////////////// @@ -589,4 +630,24 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)]; } +static __forceinline _readPixel_0 readPixelFunction_0(u32 psm) +{ + switch(psm) + { + case PSMCT32: return readPixel32_0; + case PSMCT24: return readPixel24_0; + case PSMCT16: return readPixel16_0; + case PSMCT16S: return readPixel16S_0; + case PSMT8: return readPixel8_0; + case PSMT4: return readPixel4_0; + case PSMT8H: return readPixel8H_0; + case PSMT4HL: return readPixel4HL_0; + case PSMT4HH: return readPixel4HH_0; + case PSMT32Z: return readPixel32Z_0; + case PSMT24Z: return readPixel24Z_0; + case PSMT16Z: return readPixel16Z_0; + case PSMT16SZ: return readPixel16SZ_0; + default: return readPixel32_0; + } +} #endif /* __MEM_H__ */ From ecad8b7a4fcbf639c0ae61d863189bc50ac3025d Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 06:10:39 +0000 Subject: [PATCH 10/16] zzogl-pg: Convert the helper function into arrays. Use them in other places as well. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3510 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 360 ++----------------------- plugins/zzogl-pg/opengl/Mem.h | 20 +- plugins/zzogl-pg/opengl/Mem_Tables.cpp | 50 ++++ 3 files changed, 81 insertions(+), 349 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index a19402ddc4..5aaa23ed89 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -27,7 +27,6 @@ #include "zerogs.h" #include "targets.h" - namespace ZeroGS { extern CRangeManager s_RangeMngr; // manages overwritten memory @@ -249,8 +248,10 @@ } template - void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp) + void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart) { + _readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm]; + int i = x, j = y; T* pbuf = (T*)pbyMem; u32 nSize = nQWordSize * 16 / sizeof(T); @@ -275,8 +276,10 @@ } } - void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp) + void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart) { + _readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm]; + int i = x, j = y; u8* pbuf = (u8*)pbyMem; u32 nSize = nQWordSize * 16 / 3; @@ -312,153 +315,28 @@ assert(gs.imageTransfer == 1); u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp; - int i = gs.imageY, j = gs.imageX; - - switch (gs.srcbuf.psm) + + switch(PSMT_BITMODE(gs.srcbuf.psm)) { - - case PSMCT32: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel32_0); - break; - - case PSMCT24: - TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24_0); - break; - - case PSMCT16: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16_0); - break; - - case PSMCT16S: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16S_0); - break; - - case PSMT8: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel8_0); - break; - - case PSMT8H: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel8H_0); - break; - - case PSMT32Z: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel32Z_0); - break; - - case PSMT24Z: - TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24Z_0); - break; - - case PSMT16Z: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16Z_0); - break; - - case PSMT16SZ: - TransferLocalHost(pbyMem, nQWordSize, i, j, pstart, readPixel16SZ_0); - break; - - default: - assert(0); + case 0: TransferLocalHost(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break; + case 1: TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break; + case 2: TransferLocalHost(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break; + case 3: TransferLocalHost(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break; + default: assert(0); break; } - gs.imageY = i; - gs.imageX = j; - if (gs.imageY >= gs.imageEndY) { assert(gs.imageY == gs.imageEndY); gs.imageTransfer = -1; } } - -/*__forceinline void _TransferLocalLocal(_writePixel_0 wp, _readPixel_0 rp, u32 widthlimit) -{ - u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; - u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; - - if ((gs.imageWnew & widthlimit) != 0) return; - assert(widthlimit <= 4); - - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) - { - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) - { - wp(pDstBuf, j2%2048, i2%2048, - rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - if (widthlimit > 1) - { - wp(pDstBuf, (j2+1)%2048, i2%2048, - rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - if (widthlimit > 2) - { - wp(pDstBuf, (j2+2)%2048, i2%2048, - rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - if (widthlimit > 3) - { - wp(pDstBuf, (j2+3)%2048, i2%2048, - rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - } - } - } - } - } -} - -__forceinline void _TransferLocalLocal_4(_getPixelAddress_0 gsp, _getPixelAddress_0 gdp) -{ - u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; - u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; - - assert((gs.imageWnew % 8) == 0); - - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) - { - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) - { - // NOTE: the 2 conseq 4bit values are in NOT in the same byte - u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); - u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); - - read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); - - read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); - - read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); - - read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); - - read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); - - read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); - - read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw); - write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw); - pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); - } - } -}*/ __forceinline void _TransferLocalLocal() { //ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); - _writePixel_0 wp = writePixelFunction_0(gs.srcbuf.psm); - _readPixel_0 rp = readPixelFunction_0(gs.dstbuf.psm); + _writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm]; //writePixelFunction_0(gs.srcbuf.psm); + _readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm]; //readPixelFunction_0(gs.dstbuf.psm); u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; u32 widthlimit = 4; @@ -497,8 +375,8 @@ __forceinline void _TransferLocalLocal() __forceinline void _TransferLocalLocal_4() { //ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); - _getPixelAddress_0 gsp = getPixelFunction_0(gs.srcbuf.psm); - _getPixelAddress_0 gdp = getPixelFunction_0(gs.dstbuf.psm); + _getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm]; //getPixelFunction_0(gs.srcbuf.psm); + _getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm]; //getPixelFunction_0(gs.dstbuf.psm); u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; @@ -590,210 +468,6 @@ __forceinline void _TransferLocalLocal_4() { _TransferLocalLocal_4(); } - /*switch (gs.srcbuf.psm) - { - case PSMCT32: - if (gs.dstbuf.psm == PSMCT32) - { - _TransferLocalLocal(writePixel32_0, readPixel32_0, 2); - } - else - { - _TransferLocalLocal(writePixel32_0, readPixel32Z_0, 2); - } - break; - - case PSMCT24: - if (gs.dstbuf.psm == PSMCT24) - { - _TransferLocalLocal(writePixel24_0, readPixel24_0, 4); - } - else - { - _TransferLocalLocal(writePixel24_0, readPixel24Z_0, 4); - } - break; - - case PSMCT16: - switch (gs.dstbuf.psm) - { - case PSMCT16: - _TransferLocalLocal(writePixel16_0, readPixel16_0, 4); - break; - - case PSMCT16S: - _TransferLocalLocal(writePixel16_0, readPixel16S_0, 4); - break; - - case PSMT16Z: - _TransferLocalLocal(writePixel16_0, readPixel16Z_0, 4); - break; - - case PSMT16SZ: - _TransferLocalLocal(writePixel16_0, readPixel16SZ_0, 4); - break; - } - break; - - case PSMCT16S: - switch (gs.dstbuf.psm) - { - case PSMCT16: - _TransferLocalLocal(writePixel16S_0, readPixel16_0, 4); - break; - - case PSMCT16S: - _TransferLocalLocal(writePixel16S_0, readPixel16S_0, 4); - break; - - case PSMT16Z: - _TransferLocalLocal(writePixel16S_0, readPixel16Z_0, 4); - break; - - case PSMT16SZ: - _TransferLocalLocal(writePixel16S_0, readPixel16SZ_0, 4); - break; - } - break; - - case PSMT8: - if (gs.dstbuf.psm == PSMT8) - { - _TransferLocalLocal(writePixel8_0, readPixel8_0, 4); - } - else - { - _TransferLocalLocal(writePixel8_0, readPixel8H_0, 4); - } - break; - - case PSMT4: - switch (gs.dstbuf.psm) - { - - case PSMT4: - _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4_0); - break; - - case PSMT4HL: - _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4HL_0); - break; - - case PSMT4HH: - _TransferLocalLocal_4(getPixelAddress4_0, getPixelAddress4HH_0); - break; - } - break; - - case PSMT8H: - if (gs.dstbuf.psm == PSMT8) - { - _TransferLocalLocal(writePixel8H_0, readPixel8_0, 4); - } - else - { - _TransferLocalLocal(writePixel8H_0, readPixel8H_0, 4); - } - break; - - case PSMT4HL: - switch (gs.dstbuf.psm) - { - case PSMT4: - _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4_0); - break; - - case PSMT4HL: - _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4HL_0); - break; - - case PSMT4HH: - _TransferLocalLocal_4(getPixelAddress4HL_0, getPixelAddress4HH_0); - break; - } - break; - - case PSMT4HH: - switch (gs.dstbuf.psm) - { - case PSMT4: - _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4_0); - break; - - case PSMT4HL: - _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4HL_0); - break; - - case PSMT4HH: - _TransferLocalLocal_4(getPixelAddress4HH_0, getPixelAddress4HH_0); - break; - } - break; - - case PSMT32Z: - if (gs.dstbuf.psm == PSMCT32) - { - _TransferLocalLocal(writePixel32Z_0, readPixel32_0, 2); - } - else - { - _TransferLocalLocal(writePixel32Z_0, readPixel32Z_0, 2); - } - break; - - case PSMT24Z: - if (gs.dstbuf.psm == PSMCT24) - { - _TransferLocalLocal(writePixel24Z_0, readPixel24_0, 4); - } - else - { - _TransferLocalLocal(writePixel24Z_0, readPixel24Z_0, 4); - } - break; - - case PSMT16Z: - switch (gs.dstbuf.psm) - { - case PSMCT16: - _TransferLocalLocal(writePixel16Z_0, readPixel16_0, 4); - break; - - case PSMCT16S: - _TransferLocalLocal(writePixel16Z_0, readPixel16S_0, 4); - break; - - case PSMT16Z: - _TransferLocalLocal(writePixel16Z_0, readPixel16Z_0, 4); - break; - - case PSMT16SZ: - _TransferLocalLocal(writePixel16Z_0, readPixel16SZ_0, 4); - break; - } - break; - - case PSMT16SZ: - switch (gs.dstbuf.psm) - { - case PSMCT16: - _TransferLocalLocal(writePixel16SZ_0, readPixel16_0, 4); - break; - - case PSMCT16S: - _TransferLocalLocal(writePixel16SZ_0, readPixel16S_0, 4); - break; - - case PSMT16Z: - _TransferLocalLocal(writePixel16SZ_0, readPixel16Z_0, 4); - break; - - case PSMT16SZ: - _TransferLocalLocal(writePixel16SZ_0, readPixel16SZ_0, 4); - break; - } - break; - }*/ g_MemTargs.ClearRange(dststart, dstend); diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 9f7ef546c0..17cd34a1fb 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -44,6 +44,10 @@ typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize); typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize); typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask); +extern _getPixelAddress_0 getPixelFun_0[64]; +extern _writePixel_0 writePixelFun_0[64]; +extern _readPixel_0 readPixelFun_0[64]; + enum Psm_Size { PSM_ = 0, @@ -268,7 +272,8 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) static __forceinline _getPixelAddress_0 getPixelFunction_0(u32 psm) { - switch(psm) + return getPixelFun_0[psm]; + /*switch(psm) { case PSMCT32: return getPixelAddress32_0; case PSMCT24: return getPixelAddress24_0; @@ -284,7 +289,7 @@ static __forceinline _getPixelAddress_0 getPixelFunction_0(u32 psm) case PSMT16Z: return getPixelAddress16Z_0; case PSMT16SZ: return getPixelAddress16SZ_0; default: return getPixelAddress32_0; - } + }*/ } #define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw) @@ -532,9 +537,11 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, ((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel; } + static __forceinline _writePixel_0 writePixelFunction_0(u32 psm) { - switch(psm) + return writePixelFun_0[psm]; + /*switch(psm) { case PSMCT32: return writePixel32_0; case PSMCT24: return writePixel24_0; @@ -550,7 +557,7 @@ static __forceinline _writePixel_0 writePixelFunction_0(u32 psm) case PSMT16Z: return writePixel16Z_0; case PSMT16SZ: return writePixel16SZ_0; default: return writePixel32_0; - } + }*/ } /////////////// @@ -632,7 +639,8 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) static __forceinline _readPixel_0 readPixelFunction_0(u32 psm) { - switch(psm) + return readPixelFun_0[psm]; + /*switch(psm) { case PSMCT32: return readPixel32_0; case PSMCT24: return readPixel24_0; @@ -648,6 +656,6 @@ static __forceinline _readPixel_0 readPixelFunction_0(u32 psm) case PSMT16Z: return readPixel16Z_0; case PSMT16SZ: return readPixel16SZ_0; default: return readPixel32_0; - } + }*/ } #endif /* __MEM_H__ */ diff --git a/plugins/zzogl-pg/opengl/Mem_Tables.cpp b/plugins/zzogl-pg/opengl/Mem_Tables.cpp index 620cc72090..f3c67eb8d7 100644 --- a/plugins/zzogl-pg/opengl/Mem_Tables.cpp +++ b/plugins/zzogl-pg/opengl/Mem_Tables.cpp @@ -18,6 +18,7 @@ */ #include "GS.h" +#include "Mem.h" u32 g_blockTable32[4][8] = { @@ -247,3 +248,52 @@ u32 g_pageTable16Z[64][64]; u32 g_pageTable16SZ[64][64]; u32 g_pageTable8[64][128]; u32 g_pageTable4[128][128]; + +/* PSM reference array +{ 32, 24, 16, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, 16S, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, 8, 4, NULL, NULL, NULL, + NULL, NULL, NULL, 8H, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 4HL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, 4HH, NULL, NULL, NULL, + 32Z, 24Z, 16Z, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, 16SZ, NULL, NULL, NULL, NULL, NULL }; +*/ + +_getPixelAddress_0 getPixelFun_0[64] = +{ + getPixelAddress32_0, getPixelAddress24_0, getPixelAddress16_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, getPixelAddress16S_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, getPixelAddress8_0, getPixelAddress4_0, NULL, NULL, NULL, + NULL, NULL, NULL, getPixelAddress8H_0, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, getPixelAddress4HL_0, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, getPixelAddress4HH_0, NULL, NULL, NULL, + getPixelAddress32Z_0, getPixelAddress24Z_0, getPixelAddress16Z_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, getPixelAddress16SZ_0, NULL, NULL, NULL, NULL, NULL +}; + +_writePixel_0 writePixelFun_0[64] = +{ + writePixel32_0, writePixel24_0, writePixel16_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, writePixel16S_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, writePixel8_0, writePixel4_0, NULL, NULL, NULL, + NULL, NULL, NULL, writePixel8H_0, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, writePixel4HL_0, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, writePixel4HH_0, NULL, NULL, NULL, + writePixel32Z_0, writePixel24Z_0, writePixel16Z_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, writePixel16SZ_0, NULL, NULL, NULL, NULL, NULL +}; + +_readPixel_0 readPixelFun_0[64] = +{ + readPixel32_0, readPixel24_0, readPixel16_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, readPixel16S_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, readPixel8_0, readPixel4_0, NULL, NULL, NULL, + NULL, NULL, NULL, readPixel8H_0, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, readPixel4HL_0, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, readPixel4HH_0, NULL, NULL, NULL, + readPixel32Z_0, readPixel24Z_0, readPixel16Z_0, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, readPixel16SZ_0, NULL, NULL, NULL, NULL, NULL +}; + + From 155e0a1bfafa12a80fe55c5b4547b1133edd8822 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 06:56:07 +0000 Subject: [PATCH 11/16] zzogl-pg: Cleaning up a bit after the last commit. Expand a hack to remove lines when AA is on to cover AA at x8 & x16. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3511 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/HostMemory.cpp | 48 +++++++++--------- plugins/zzogl-pg/opengl/Mem.h | 68 -------------------------- plugins/zzogl-pg/opengl/targets.cpp | 4 +- plugins/zzogl-pg/opengl/zerogs.cpp | 13 ++--- 4 files changed, 31 insertions(+), 102 deletions(-) diff --git a/plugins/zzogl-pg/opengl/HostMemory.cpp b/plugins/zzogl-pg/opengl/HostMemory.cpp index 5aaa23ed89..2f45eab73d 100644 --- a/plugins/zzogl-pg/opengl/HostMemory.cpp +++ b/plugins/zzogl-pg/opengl/HostMemory.cpp @@ -327,6 +327,7 @@ if (gs.imageY >= gs.imageEndY) { + ZZLog::Error_Log("gs.imageY >= gs.imageEndY!"); assert(gs.imageY == gs.imageEndY); gs.imageTransfer = -1; } @@ -335,38 +336,35 @@ __forceinline void _TransferLocalLocal() { //ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); - _writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm]; //writePixelFunction_0(gs.srcbuf.psm); - _readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm]; //readPixelFunction_0(gs.dstbuf.psm); + _writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm]; + _readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm]; u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; u32 widthlimit = 4; + u32 maxX = gs.trxpos.sx + gs.imageWnew; + u32 maxY = gs.trxpos.sy + gs.imageHnew; if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2; if ((gs.imageWnew & widthlimit) != 0) return; - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++) { - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit) { wp(pDstBuf, j2%2048, i2%2048, - rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + wp(pDstBuf, (j2+1)%2048, i2%2048, + rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - //if (widthlimit > 1) + if (widthlimit > 2) { - wp(pDstBuf, (j2+1)%2048, i2%2048, - rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - if (widthlimit > 2) - { - wp(pDstBuf, (j2+2)%2048, i2%2048, - rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - - //if (widthlimit > 3) - { - wp(pDstBuf, (j2+3)%2048, i2%2048, - rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); - } - } + // Then widthlimit == 4. + wp(pDstBuf, (j2+2)%2048, i2%2048, + rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); + + wp(pDstBuf, (j2+3)%2048, i2%2048, + rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); } } } @@ -375,16 +373,18 @@ __forceinline void _TransferLocalLocal() __forceinline void _TransferLocalLocal_4() { //ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm); - _getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm]; //getPixelFunction_0(gs.srcbuf.psm); - _getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm]; //getPixelFunction_0(gs.dstbuf.psm); + _getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm]; + _getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm]; u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256; + u32 maxX = gs.trxpos.sx + gs.imageWnew; + u32 maxY = gs.trxpos.sy + gs.imageHnew; assert((gs.imageWnew % 8) == 0); - for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy + gs.imageHnew; ++i, ++i2) + for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2) { - for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx + gs.imageWnew; j += 8, j2 += 8) + for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8) { /* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw); diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 17cd34a1fb..7b347078fc 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -270,33 +270,9 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) return word; } -static __forceinline _getPixelAddress_0 getPixelFunction_0(u32 psm) -{ - return getPixelFun_0[psm]; - /*switch(psm) - { - case PSMCT32: return getPixelAddress32_0; - case PSMCT24: return getPixelAddress24_0; - case PSMCT16: return getPixelAddress16_0; - case PSMCT16S: return getPixelAddress16S_0; - case PSMT8: return getPixelAddress8_0; - case PSMT4: return getPixelAddress4_0; - case PSMT8H: return getPixelAddress8H_0; - case PSMT4HL: return getPixelAddress4HL_0; - case PSMT4HH: return getPixelAddress4HH_0; - case PSMT32Z: return getPixelAddress32Z_0; - case PSMT24Z: return getPixelAddress24Z_0; - case PSMT16Z: return getPixelAddress16Z_0; - case PSMT16SZ: return getPixelAddress16SZ_0; - default: return getPixelAddress32_0; - }*/ -} - #define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw) #define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw) - - static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel; @@ -537,29 +513,6 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, ((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel; } - -static __forceinline _writePixel_0 writePixelFunction_0(u32 psm) -{ - return writePixelFun_0[psm]; - /*switch(psm) - { - case PSMCT32: return writePixel32_0; - case PSMCT24: return writePixel24_0; - case PSMCT16: return writePixel16_0; - case PSMCT16S: return writePixel16S_0; - case PSMT8: return writePixel8_0; - case PSMT4: return writePixel4_0; - case PSMT8H: return writePixel8H_0; - case PSMT4HL: return writePixel4HL_0; - case PSMT4HH: return writePixel4HH_0; - case PSMT32Z: return writePixel32Z_0; - case PSMT24Z: return writePixel24Z_0; - case PSMT16Z: return writePixel16Z_0; - case PSMT16SZ: return writePixel16SZ_0; - default: return writePixel32_0; - }*/ -} - /////////////// static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) @@ -637,25 +590,4 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)]; } -static __forceinline _readPixel_0 readPixelFunction_0(u32 psm) -{ - return readPixelFun_0[psm]; - /*switch(psm) - { - case PSMCT32: return readPixel32_0; - case PSMCT24: return readPixel24_0; - case PSMCT16: return readPixel16_0; - case PSMCT16S: return readPixel16S_0; - case PSMT8: return readPixel8_0; - case PSMT4: return readPixel4_0; - case PSMT8H: return readPixel8H_0; - case PSMT4HL: return readPixel4HL_0; - case PSMT4HH: return readPixel4HH_0; - case PSMT32Z: return readPixel32Z_0; - case PSMT24Z: return readPixel24Z_0; - case PSMT16Z: return readPixel16Z_0; - case PSMT16SZ: return readPixel16SZ_0; - default: return readPixel32_0; - }*/ -} #endif /* __MEM_H__ */ diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index 391452608c..d24376eb38 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -2286,10 +2286,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info { // This is not unusual situation, when vector does not 16bit alignment, that is destructive for SSE2 // instruction movdqa [%eax], xmm0 - // The idea would be resise vector to 15 elements, that set ptxedata to aligned position. + // The idea would be resize vector to 15 elements, that set ptxedata to aligned position. // Later we would move eax by 16, so only we should verify is first element align // FIXME. As I see, texdata used only once here, it does not have any impact on other code. // Probably, usage of _aligned_maloc() would be preferable. + + // Note: this often happens when changing AA. int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15 ptexdata = &texdata[disalignment]; // Set pointer to aligned element dst = (u16*)ptexdata; diff --git a/plugins/zzogl-pg/opengl/zerogs.cpp b/plugins/zzogl-pg/opengl/zerogs.cpp index 2d04ed3e27..c311e7771b 100644 --- a/plugins/zzogl-pg/opengl/zerogs.cpp +++ b/plugins/zzogl-pg/opengl/zerogs.cpp @@ -803,15 +803,10 @@ void ZeroGS::KickSprite() int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex); int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex); - - // sprite is too small and AA shows lines (tek4) - - if (s_AAx) - { - gs.gsvertex[last].x += 4; - - if (s_AAy) gs.gsvertex[last].y += 4; - } + + // sprite is too small and AA shows lines (tek4, Mana Khemia) + gs.gsvertex[last].x += (4*s_AAx); + gs.gsvertex[last].y += (4*s_AAy); // might be bad sprite (KH dialog text) //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y ) From c2804a28061337088d367472e4fb35fc4d70b00e Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 10:55:22 +0000 Subject: [PATCH 12/16] zzogl-pg: Change the messages when finding a game crc a bit. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3512 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/GSmain.cpp | 23 +++++++++++++++++------ plugins/zzogl-pg/opengl/targets.cpp | 1 + 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/plugins/zzogl-pg/opengl/GSmain.cpp b/plugins/zzogl-pg/opengl/GSmain.cpp index 9e15dc7d85..83d8d34af3 100644 --- a/plugins/zzogl-pg/opengl/GSmain.cpp +++ b/plugins/zzogl-pg/opengl/GSmain.cpp @@ -141,7 +141,7 @@ void CALLBACK GSsetGameCRC(int crc, int options) g_LastCRC = crc; - ZZLog::Error_Log("CRC = %x", crc); + if (crc != 0) ZZLog::Error_Log("Current game CRC is %x.", crc); if (CRCValueChanged && (crc != 0)) { @@ -149,13 +149,24 @@ void CALLBACK GSsetGameCRC(int crc, int options) { if (crc_game_list[i].crc == crc) { - if (crc_game_list[i].v_thresh > 0) VALIDATE_THRESH = crc_game_list[i].v_thresh; - if (crc_game_list[i].t_thresh > 0) TEXDESTROY_THRESH = crc_game_list[i].t_thresh; + ZZLog::Error_Log("Found CRC[%x] in crc game list.", crc); + + if (crc_game_list[i].v_thresh > 0) + { + VALIDATE_THRESH = crc_game_list[i].v_thresh; + ZZLog::Error_Log("Setting VALIDATE_THRESH to %d", VALIDATE_THRESH); + } + if (crc_game_list[i].t_thresh > 0) + { + TEXDESTROY_THRESH = crc_game_list[i].t_thresh; + ZZLog::Error_Log("Setting TEXDESTROY_THRESH to %d", VALIDATE_THRESH); + } conf.def_hacks._u32 |= crc_game_list[i].flags; - - ZZLog::Error_Log("Found CRC[%x] in crc game list.", crc); - + if (crc_game_list[i].flags != 0) + { + ZZLog::Error_Log("Enabling flags (0x%x).", crc_game_list[i].flags); + } return; } } diff --git a/plugins/zzogl-pg/opengl/targets.cpp b/plugins/zzogl-pg/opengl/targets.cpp index d24376eb38..41e0ccbabf 100644 --- a/plugins/zzogl-pg/opengl/targets.cpp +++ b/plugins/zzogl-pg/opengl/targets.cpp @@ -246,6 +246,7 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co } // set render states + // Bleh. I *really* need to fix this. << 3 when setting the scissors, then >> 3 when using them... --Arcum42 scissorrect.x = scissor.x0 >> 3; scissorrect.y = (scissor.y0 >> 3) + dy; scissorrect.w = (scissor.x1 >> 3) + 1; From a0225a2318634e50c12bce7ba545ca18e16477f9 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 10:56:26 +0000 Subject: [PATCH 13/16] zzogl-pg: Lets try that again. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3513 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/GSmain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/zzogl-pg/opengl/GSmain.cpp b/plugins/zzogl-pg/opengl/GSmain.cpp index 83d8d34af3..0edbbfa066 100644 --- a/plugins/zzogl-pg/opengl/GSmain.cpp +++ b/plugins/zzogl-pg/opengl/GSmain.cpp @@ -159,7 +159,7 @@ void CALLBACK GSsetGameCRC(int crc, int options) if (crc_game_list[i].t_thresh > 0) { TEXDESTROY_THRESH = crc_game_list[i].t_thresh; - ZZLog::Error_Log("Setting TEXDESTROY_THRESH to %d", VALIDATE_THRESH); + ZZLog::Error_Log("Setting TEXDESTROY_THRESH to %d", TEXDESTROY_THRESH); } conf.def_hacks._u32 |= crc_game_list[i].flags; From 8e7a7cf5febb5b35d71e6c866b2b8665fcb18e99 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sat, 17 Jul 2010 11:39:04 +0000 Subject: [PATCH 14/16] zzogl-pg: Now it lists all the enabled hacks, and if they were manually or automatically enabled. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3514 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zzogl-pg/opengl/GSmain.cpp | 66 +++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/plugins/zzogl-pg/opengl/GSmain.cpp b/plugins/zzogl-pg/opengl/GSmain.cpp index 0edbbfa066..dd46a08ee3 100644 --- a/plugins/zzogl-pg/opengl/GSmain.cpp +++ b/plugins/zzogl-pg/opengl/GSmain.cpp @@ -126,6 +126,53 @@ void CALLBACK GSsetLogDir(const char* dir) ZZLog::SetDir(dir); } +void ReportHacks(gameHacks hacks) +{ + if (hacks.texture_targs) ZZLog::WriteLn("'Texture targs' hack enabled."); + if (hacks.auto_reset) ZZLog::WriteLn("'Auto reset' hack enabled."); + if (hacks.interlace_2x) ZZLog::WriteLn("'Interlace 2x' hack enabled."); + if (hacks.texa) ZZLog::WriteLn("'Texa' hack enabled."); + if (hacks.no_target_resolve) ZZLog::WriteLn("'No target resolve' hack enabled."); + if (hacks.exact_color) ZZLog::WriteLn("Exact color hack enabled."); + if (hacks.no_color_clamp) ZZLog::WriteLn("'No color clamp' hack enabled."); + if (hacks.no_alpha_fail) ZZLog::WriteLn("'No alpha fail' hack enabled."); + if (hacks.no_depth_update) ZZLog::WriteLn("'No depth update' hack enabled."); + if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled."); + if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled."); + if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled."); + if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled."); + if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled."); + if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled."); + if (hacks.resolve_promoted) ZZLog::WriteLn("'Resolve promoted' hack enabled."); + if (hacks.fast_update) ZZLog::WriteLn("'Fast update' hack enabled."); + if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled."); + if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled."); + if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled."); + if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled."); + if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled."); + if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled."); + if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled."); + if (hacks.partial_depth) ZZLog::WriteLn("'Partial depth' hack enabled."); + if (hacks.reget) ZZLog::WriteLn("Reget hack enabled."); + if (hacks.gust) ZZLog::WriteLn("Gust hack enabled."); + if (hacks.no_logz) ZZLog::WriteLn("'No logz' hack enabled."); +} + +void ListHacks() +{ + if (conf.def_hacks._u32 != 0) + { + ZZLog::WriteLn("AutoEnabling these hacks:"); + ReportHacks(conf.def_hacks); + } + + if (conf.hacks._u32 != 0) + { + ZZLog::WriteLn("You've manually enabled these hacks:"); + ReportHacks(conf.hacks); + } +} + void CALLBACK GSsetGameCRC(int crc, int options) { // TEXDESTROY_THRESH starts out at 16. @@ -133,15 +180,15 @@ void CALLBACK GSsetGameCRC(int crc, int options) conf.mrtdepth = (conf.settings().disable_mrt_depth != 0); if (!conf.mrtdepth) - ZZLog::Error_Log("Disabling MRT depth writing."); + ZZLog::WriteLn("Disabling MRT depth writing."); else - ZZLog::Error_Log("Enabling MRT depth writing."); + ZZLog::WriteLn("Enabling MRT depth writing."); bool CRCValueChanged = (g_LastCRC != crc); g_LastCRC = crc; - if (crc != 0) ZZLog::Error_Log("Current game CRC is %x.", crc); + if (crc != 0) ZZLog::WriteLn("Current game CRC is %x.", crc); if (CRCValueChanged && (crc != 0)) { @@ -149,28 +196,27 @@ void CALLBACK GSsetGameCRC(int crc, int options) { if (crc_game_list[i].crc == crc) { - ZZLog::Error_Log("Found CRC[%x] in crc game list.", crc); + ZZLog::WriteLn("Found CRC[%x] in crc game list.", crc); if (crc_game_list[i].v_thresh > 0) { VALIDATE_THRESH = crc_game_list[i].v_thresh; - ZZLog::Error_Log("Setting VALIDATE_THRESH to %d", VALIDATE_THRESH); + ZZLog::WriteLn("Setting VALIDATE_THRESH to %d", VALIDATE_THRESH); } + if (crc_game_list[i].t_thresh > 0) { TEXDESTROY_THRESH = crc_game_list[i].t_thresh; - ZZLog::Error_Log("Setting TEXDESTROY_THRESH to %d", TEXDESTROY_THRESH); + ZZLog::WriteLn("Setting TEXDESTROY_THRESH to %d", TEXDESTROY_THRESH); } conf.def_hacks._u32 |= crc_game_list[i].flags; - if (crc_game_list[i].flags != 0) - { - ZZLog::Error_Log("Enabling flags (0x%x).", crc_game_list[i].flags); - } + ListHacks(); return; } } } + ListHacks(); } void CALLBACK GSsetFrameSkip(int frameskip) From 6fdfdf604fc286b44346897d4dc9d8cd63c0b59a Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 17 Jul 2010 13:53:09 +0000 Subject: [PATCH 15/16] vifUnpack: Made V3_## Unpacks work the same as V4_32, this is how legacy did it to, for some reason i made it copy the 3rd vector in to the fourth >.< git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3516 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/newVif_UnpackSSE.cpp | 155 ++++++++++++++++++--------------- 1 file changed, 84 insertions(+), 71 deletions(-) diff --git a/pcsx2/x86/newVif_UnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp index 7e089b9e7b..32cc3c1f0a 100644 --- a/pcsx2/x86/newVif_UnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -98,27 +98,31 @@ void VifUnpackSSE_Base::xUPK_S_32() const { } void VifUnpackSSE_Base::xUPK_S_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (workReg); -} -else { - xMOV16 (workReg, ptr32[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); -} + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX16 (workReg); + } + else + { + xMOV16 (workReg, ptr32[srcIndirect]); + xPUNPCK.LWD(workReg, workReg); + xShiftR (workReg, 16); + } xPSHUF.D (destReg, workReg, _v0); } void VifUnpackSSE_Base::xUPK_S_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (workReg); -} -else { - xMOV8 (workReg, ptr32[srcIndirect]); - xPUNPCK.LBW(workReg, workReg); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 24); -} + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX8 (workReg); + } + else + { + xMOV8 (workReg, ptr32[srcIndirect]); + xPUNPCK.LBW(workReg, workReg); + xPUNPCK.LWD(workReg, workReg); + xShiftR (workReg, 24); + } xPSHUF.D (destReg, workReg, _v0); } @@ -133,58 +137,63 @@ void VifUnpackSSE_Base::xUPK_V2_32() const { } void VifUnpackSSE_Base::xUPK_V2_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (destReg); -} -else { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); -} + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX16 (destReg); + } + else + { + xMOV32 (destReg, ptr32[srcIndirect]); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 16); + } xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0 } void VifUnpackSSE_Base::xUPK_V2_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (destReg); -} -else { - xMOV16 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); -} + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX8 (destReg); + } + else + { + xMOV16 (destReg, ptr32[srcIndirect]); + xPUNPCK.LBW(destReg, destReg); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 24); + } xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0 } void VifUnpackSSE_Base::xUPK_V3_32() const { xMOV128 (destReg, ptr32[srcIndirect]); - xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0 } void VifUnpackSSE_Base::xUPK_V3_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (destReg); -} -else { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); -} - xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0 + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX16 (destReg); + } + else + { + xMOV64 (destReg, ptr32[srcIndirect]); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 16); + } } void VifUnpackSSE_Base::xUPK_V3_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (destReg); -} -else { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); -} - xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0 + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX8 (destReg); + } + else + { + xMOV32 (destReg, ptr32[srcIndirect]); + xPUNPCK.LBW(destReg, destReg); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 24); + } } void VifUnpackSSE_Base::xUPK_V4_32() const { @@ -192,26 +201,30 @@ void VifUnpackSSE_Base::xUPK_V4_32() const { } void VifUnpackSSE_Base::xUPK_V4_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (destReg); -} -else { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); -} + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX16 (destReg); + } + else + { + xMOV64 (destReg, ptr32[srcIndirect]); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 16); + } } void VifUnpackSSE_Base::xUPK_V4_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (destReg); -} -else { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); -} + if (x86caps.hasStreamingSIMD4Extensions) + { + xPMOVXX8 (destReg); + } + else + { + xMOV32 (destReg, ptr32[srcIndirect]); + xPUNPCK.LBW(destReg, destReg); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 24); + } } void VifUnpackSSE_Base::xUPK_V4_5() const { From b5f1ef528a463a7e35252545d7c6e2c32eb95843 Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Sat, 17 Jul 2010 14:04:38 +0000 Subject: [PATCH 16/16] GSDx: fixed incorrect RGB->YUV conversion when capturing video which was producing off colour results. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3517 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSCapture.cpp | 58 ++++++++++++-------------------------- 1 file changed, 18 insertions(+), 40 deletions(-) diff --git a/plugins/GSdx/GSCapture.cpp b/plugins/GSdx/GSCapture.cpp index b2207e09fb..de8ee02dea 100644 --- a/plugins/GSdx/GSCapture.cpp +++ b/plugins/GSdx/GSCapture.cpp @@ -245,53 +245,31 @@ public: { int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2; - const GSVector4 ys(0.098f, 0.504f, 0.257f, 0.0f); - const GSVector4 us(0.439f / 2, -0.291f / 2, -0.148f / 2, 0.0f); - const GSVector4 vs(-0.071f / 2, -0.368f / 2, 0.439f / 2, 0.0f); + GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f); + GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f); + GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f); const GSVector4 offset(16, 128, 16, 128); - if(rgba) + if (!rgba) + ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw(); + + for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) { - for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) + uint32* s = (uint32*)src; + uint16* d = (uint16*)dst; + + for(int i = 0; i < w; i += 2) { - uint32* s = (uint32*)src; - uint16* d = (uint16*)dst; + GSVector4 c0 = GSVector4(s[i + 0]); + GSVector4 c1 = GSVector4(s[i + 1]); + GSVector4 c2 = c0 + c1; - for(int i = 0; i < w; i += 2) - { - GSVector4 c0 = GSVector4(s[i + 0]); - GSVector4 c1 = GSVector4(s[i + 1]); - GSVector4 c2 = c0 + c1; + GSVector4 lo = (c0 * ys).hadd(c2 * us); + GSVector4 hi = (c1 * ys).hadd(c2 * vs); - GSVector4 lo = (c0 * ys).hadd(c2 * vs); - GSVector4 hi = (c1 * ys).hadd(c2 * us); + GSVector4 c = lo.hadd(hi) + offset; - GSVector4 c = lo.hadd(hi) + offset; - - *((uint32*)&d[i]) = GSVector4i(c).rgba32(); - } - } - } - else - { - for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) - { - uint32* s = (uint32*)src; - uint16* d = (uint16*)dst; - - for(int i = 0; i < w; i += 2) - { - GSVector4 c0 = GSVector4(s[i + 0]).zyxw(); - GSVector4 c1 = GSVector4(s[i + 1]).zyxw(); - GSVector4 c2 = c0 + c1; - - GSVector4 lo = (c0 * ys).hadd(c2 * vs); - GSVector4 hi = (c1 * ys).hadd(c2 * us); - - GSVector4 c = lo.hadd(hi) + offset; - - *((uint32*)&d[i]) = GSVector4i(c).rgba32(); - } + *((uint32*)&d[i]) = GSVector4i(c).rgba32(); } } }