zzogl: painfully merge the zzogl-dev branch

* new memory management
* asm was replaced by intrinsic
* new GLSL backend (AMD only) Cmake is probably broken anyway with the 2 plugins...
* and lots of others stuff that I forgot about it ;)


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5166 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gregory.hainaut 2012-04-19 21:22:08 +00:00
parent 8fcadb3616
commit e3c741bb2a
47 changed files with 7412 additions and 5710 deletions

View File

@ -8,6 +8,7 @@
# Use soundtouch internal lib: -DFORCE_INTERNAL_SOUNDTOUCH=TRUE
# Use zlib internal lib: -DFORCE_INTERNAL_ZLIB=TRUE
# Use sdl1.3 internal lib: -DFORCE_INTERNAL_SDL=TRUE # Not supported yet
# Use GLSL API(else NVIDIA_CG): -DGLSL_API=TRUE
### GCC optimization options
# control C flags : -DUSER_CMAKE_C_FLAGS="cflags"
@ -183,3 +184,10 @@ if(PACKAGE_MODE)
# Compile all source codes with these 2 defines
add_definitions(-DPLUGIN_DIR_COMPILATION=${PLUGIN_DIR} -DGAMEINDEX_DIR_COMPILATION=${GAMEINDEX_DIR})
endif(PACKAGE_MODE)
#-------------------------------------------------------------------------------
# Select nvidia cg shader api by default
#-------------------------------------------------------------------------------
if(NOT DEFINED FORCE_INTERNAL_SOUNDTOUCH)
set(GLSL_API FALSE)
endif(NOT DEFINED FORCE_INTERNAL_SOUNDTOUCH)

View File

@ -53,7 +53,9 @@ if(NOT FORCE_INTERNAL_ZLIB)
endif(NOT FORCE_INTERNAL_ZLIB)
## Use pcsx2 package to find module
include(FindCg)
if(NOT GLSL_API)
include(FindCg)
endif(NOT GLSL_API)
include(FindGlew)
include(FindLibc)
include(FindPortAudio)

View File

@ -6,11 +6,15 @@ set(msg_dep_pcsx2 "check these libraries -> wxWidgets (>=2.8.10), gtk2 (>=
set(msg_dep_cdvdiso "check these libraries -> bzip2 (>=1.0.5), gtk2 (>=2.16)")
set(msg_dep_zerogs "check these libraries -> glew (>=1.5), opengl, X11, nvidia-cg-toolkit (>=2.1)")
set(msg_dep_gsdx "check these libraries -> opengl, X11, pcsx2 SDL")
set(msg_dep_zzogl "check these libraries -> glew (>=1.5), jpeg (>=6.2), opengl, X11, nvidia-cg-toolkit (>=2.1), pcsx2 common libs")
set(msg_dep_onepad "check these libraries -> sdl (>=1.2)")
set(msg_dep_zeropad "check these libraries -> sdl (>=1.2)")
set(msg_dep_spu2x "check these libraries -> soundtouch (>=1.5), alsa, portaudio (>=1.9), pcsx2 common libs")
set(msg_dep_zerospu2 "check these libraries -> soundtouch (>=1.5), alsa")
if(GLSP_API)
set(msg_dep_zzogl "check these libraries -> glew (>=1.5), jpeg (>=6.2), opengl, X11, pcsx2 common libs")
else(GLSP_API)
set(msg_dep_zzogl "check these libraries -> glew (>=1.5), jpeg (>=6.2), opengl, X11, nvidia-cg-toolkit (>=2.1), pcsx2 common libs")
endif(GLSP_API)
#-------------------------------------------------------------------------------
# Pcsx2 core & common libs
@ -153,17 +157,17 @@ endif(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND)
# requires: -GLEW
# -OpenGL
# -X11
# -CG
# -CG (only with cg build
# -JPEG
# -common_libs
#---------------------------------------
if(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND AND JPEG_FOUND AND common_libs)
if((GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND JPEG_FOUND AND common_libs) AND (CG_FOUND OR GLSL_API))
set(zzogl TRUE)
else(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND AND JPEG_FOUND AND common_libs)
else((GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND JPEG_FOUND AND common_libs) AND (CG_FOUND OR GLSL_API))
set(zzogl FALSE)
message(STATUS "Skip build of zzogl: miss some dependencies")
message(STATUS "${msg_dep_zzogl}")
endif(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND AND JPEG_FOUND AND common_libs)
endif((GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND JPEG_FOUND AND common_libs) AND (CG_FOUND OR GLSL_API))
#---------------------------------------
#---------------------------------------

View File

@ -27,7 +27,7 @@
<Project filename="plugins/zerospu2/Linux/ZeroSPU2.cbp">
<Depends filename="3rdparty/SoundTouch/SoundTouch.cbp" />
</Project>
<Project filename="plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp" />
<Project filename="plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp" active="1" />
<Project filename="plugins/GSdx/GSdx.gcc.cbp" active="1" />
<Project filename="3rdparty/SDL-1.3.0-5387/SDL-1.3/SDL-1.3.cbp" />
</Workspace>

View File

@ -45,6 +45,14 @@ if(CMAKE_BUILD_TYPE STREQUAL Release)
add_definitions(${CommonFlags} ${OptimizationFlags} -W)
endif(CMAKE_BUILD_TYPE STREQUAL Release)
# Select the shader API
if(GLSL_API)
add_definitions(-DGLSL_API)
else(GLSL_API)
add_definitions(-DNVIDIA_CG_API)
endif(GLSL_API)
# zzogl sources
set(zzoglSources
GifTransfer.cpp
@ -54,17 +62,17 @@ set(zzoglSources
GSmain.cpp
HostMemory.cpp
Mem.cpp
# memcpy_amd.cpp
Mem_Swizzle.cpp
Mem_Tables.cpp
Profile.cpp
rasterfont.cpp
NewRegs.cpp
Regs.cpp
targets.cpp
x86.cpp
zerogs.cpp
zpipe.cpp
ZZDepthTargets.cpp
ZZMemoryTargets.cpp
ZZRenderTargets.cpp
ZZClut.cpp
ZZHacks.cpp
ZZKeyboard.cpp
@ -74,8 +82,10 @@ set(zzoglSources
ZZoglCRTC.cpp
ZZoglFlush.cpp
ZZoglFlushHack.cpp
ZZoglMem.cpp
ZZoglSave.cpp
ZZoglShaders.cpp
ZZoglShadersGLSL.cpp
ZZoglShoots.cpp
ZZoglVB.cpp
)
@ -94,7 +104,6 @@ set(zzoglHeaders
Mem_Transmit.h
Profile.h
rasterfont.h
NewRegs.h
Regs.h
targets.h
Util.h
@ -102,21 +111,19 @@ set(zzoglHeaders
zerogs.h
zpipe.h
ZZClut.h
ZZoglFlush.h
ZZGl.h
ZZHacks.h
ZZoglDrawing.h
ZZLog.h
ZZoglCRTC.h
ZZoglMath.h
ZZoglMem.h
ZZoglShaders.h
ZZoglShoots.h
ZZoglVB.h
)
# zzogl S sources
set(zzoglSSources
x86-32.S)
# zzogl shader sources
set(zzoglShaderSources
ctx0/ps2hw_ctx.fx
@ -131,9 +138,6 @@ set(zzoglLinuxSources
set(zzoglLinuxHeaders
Linux/Linux.h)
# change language of .S-files to c++
set_source_files_properties(${zzoglSSources} PROPERTIES LANGUAGE CXX)
# add additional include directories
include_directories(.
Linux)
@ -142,7 +146,6 @@ include_directories(.
add_library(${Output} SHARED
${zzoglSources}
${zzoglHeaders}
${zzoglSSources}
${zzoglShaderSources}
${zzoglLinuxSources}
${zzoglLinuxHeaders})
@ -154,7 +157,9 @@ set_target_properties(${Output} PROPERTIES COMPILE_DEFINITIONS USE_GSOPEN2)
target_link_libraries(${Output} Utilities)
# link target with Cg
target_link_libraries(${Output} ${CG_LIBRARIES})
if(NOT GLSP_API)
target_link_libraries(${Output} ${CG_LIBRARIES})
endif(NOT GLSP_API)
# link target with glew
target_link_libraries(${Output} ${GLEW_LIBRARY})
@ -183,10 +188,18 @@ endif(NOT USER_CMAKE_LD_FLAGS STREQUAL "")
if(PACKAGE_MODE)
install(TARGETS ${Output} DESTINATION ${PLUGIN_DIR})
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${PLUGIN_DIR})
if(GLSL_API)
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.glsl DESTINATION ${PLUGIN_DIR})
else(GLSL_API)
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${PLUGIN_DIR})
endif(GLSL_API)
else(PACKAGE_MODE)
install(TARGETS ${Output} DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
if(GLSL_API)
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.glsl DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
else(GLSL_API)
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
endif(GLSL_API)
endif(PACKAGE_MODE)
################################### Replay Loader

View File

@ -198,13 +198,24 @@ void GLWindow::CreateContextGL()
GLXFBConfig *framebuffer_config = glXChooseFBConfig(glDisplay, DefaultScreen(glDisplay), NULL, &fbcount);
if (!framebuffer_config or !fbcount) return;
#if 1
// At least create a 3.0 context with compatibility profile
int attribs[] = {
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
GLX_CONTEXT_MINOR_VERSION_ARB, 0,
// GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB,
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
0
};
#else
// Create a 3.2 core context without compatibility profile
int attribs[] = {
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
GLX_CONTEXT_MINOR_VERSION_ARB, 2,
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB,
0
};
#endif
GLXContext context_temp = glXCreateContextAttribsARB(glDisplay, framebuffer_config[0], NULL, true, attribs);
if (context_temp) {
ZZLog::Error_Log("Create a 3.0 opengl context");

View File

@ -21,7 +21,7 @@
#define __GS_H__
#define USE_OLD_REGS
#define ZZNORMAL_MEMORY
#include "Util.h"
#include "GifTransfer.h"
@ -39,6 +39,8 @@ extern float fFPS;
extern int g_LastCRC;
#define VB_NUMBUFFERS 512 // number of vbo buffer allocated
struct Vector_16F
{
u16 x, y, z, w;
@ -132,21 +134,22 @@ extern GSconf conf;
// PSM values
// PSM types == Texture Storage Format
enum PSM_value
{
PSMCT32 = 0, // 00 0000
PSMCT24 = 1, // 00 0001
PSMCT16 = 2, // 00 0010
PSMCT16S = 10, // 00 1010
PSMT8 = 19, // 01 0011
PSMT4 = 20, // 01 0100
PSMT8H = 27, // 01 1011
PSMT4HL = 36, // 10 0100
PSMT4HH = 44, // 10 1100
PSMT32Z = 48, // 11 0000
PSMT24Z = 49, // 11 0001
PSMT16Z = 50, // 11 0010
PSMT16SZ = 58, // 11 1010
enum PSM_value{
PSMCT32 = 0, // 000000
PSMCT24 = 1, // 000001
PSMCT16 = 2, // 000010
PSMCT16S = 10, // 001010
PSMT8 = 19, // 010011
PSMT4 = 20, // 010100
PSMT8H = 27, // 011011
PSMT4HL = 36, // 100100
PSMT4HH = 44, // 101100
PSMT32Z = 48, // 110000
PSMT24Z = 49, // 110001
PSMT16Z = 50, // 110010
PSMT16SZ = 58, // 111010
PSMT_BAD_PSM = 63 // for every unknown psm.
};
// Check target bit mode. PSMCT32 and 32Z return 0, 24 and 24Z - 1
@ -461,7 +464,6 @@ typedef struct
{
u16 aem;
u8 ta[2];
float fta[2];
} texaInfo;
typedef struct
@ -503,6 +505,14 @@ typedef struct
int fba;
} fbaInfo;
enum transfer_types
{
XFER_HOST_TO_LOCAL = 0,
XFER_LOCAL_TO_HOST = 1,
XFER_LOCAL_TO_LOCAL = 2,
XFER_DEACTIVATED = 3
};
typedef struct
{
Vertex gsvertex[4]; // circular buffer that contains the vertex
@ -537,15 +547,20 @@ typedef struct
texaInfo texa;
trxposInfo trxpos, trxposnew;
int imageWtemp, imageHtemp;
int imageTransfer;
int imageWnew, imageHnew, imageX, imageY, imageEndX, imageEndY;
bool transferring;
Point image, imageEnd;
Size imageNew, imageTemp;
pathInfo path[4];
GIFRegDIMX dimx;
GSMemory mem;
GSClut clut_buffer;
// Subject to change.
int vsync, interlace;
int primNext(int inc = 1)
{
// Note: ArraySize(gsvertex) == 2^n => modulo is replaced by an and instruction
@ -615,7 +630,7 @@ static __forceinline u32 RGBA16to32(u16 c)
(((c) & 0x8000) ? 0xff000000 : 0);
}
#if 0
#ifndef ZZNORMAL_MEMORY
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
// f is a u16
static __forceinline u16 Float16ToBYTE(u16 f)
@ -984,4 +999,39 @@ inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
#define CPSM_CSA_BITMASK 0x1f780000
#define CPSM_CSA_NOTMASK 0xe0870000
// I'll find a good place for these later.
extern PSM_value PSM_value_Table[64];
extern bool allowed_psm[256]; // in ZZoglMem.cpp.cpp
inline void FillAlowedPsnTable() {
allowed_psm[PSMCT32] = true;
allowed_psm[PSMCT24] = true;
allowed_psm[PSMCT16] = true;
allowed_psm[PSMCT16S] = true;
allowed_psm[PSMT8] = true;
allowed_psm[PSMT4] = true;
allowed_psm[PSMT8H] = true;
allowed_psm[PSMT4HH] = true;
allowed_psm[PSMT4HL] = true;
allowed_psm[PSMT32Z] = true;
allowed_psm[PSMT24Z] = true;
allowed_psm[PSMT16Z] = true;
allowed_psm[PSMT16SZ] = true;
PSM_value_Table[PSMCT32] = PSMCT32;
PSM_value_Table[PSMCT24] = PSMCT24;
PSM_value_Table[PSMCT16] = PSMCT16;
PSM_value_Table[PSMCT16S] = PSMCT16S;
PSM_value_Table[PSMT8] = PSMT8;
PSM_value_Table[PSMT4] = PSMT4;
PSM_value_Table[PSMT8H] = PSMT8H;
PSM_value_Table[PSMT4HH] = PSMT4HH;
PSM_value_Table[PSMT4HL] = PSMT4HL;
PSM_value_Table[PSMT32Z] = PSMT32Z;
PSM_value_Table[PSMT24Z] = PSMT24Z;
PSM_value_Table[PSMT16Z] = PSMT16Z;
PSM_value_Table[PSMT16SZ] = PSMT16SZ;
};
#endif

View File

@ -22,6 +22,7 @@
#include "Profile.h"
#include "GLWin.h"
#include "ZZoglFlushHack.h"
#include "ZZoglShaders.h"
using namespace std;
@ -82,7 +83,7 @@ extern int ZZSave(s8* pbydata);
extern bool ZZLoad(s8* pbydata);
// switches the render target to the real target, flushes the current render targets and renders the real image
extern void RenderCRTC(int interlace);
extern void RenderCRTC();
#if defined(_WIN32) && defined(_DEBUG)
HANDLE g_hCurrentThread = NULL;
@ -91,37 +92,37 @@ HANDLE g_hCurrentThread = NULL;
extern int VALIDATE_THRESH;
extern u32 TEXDESTROY_THRESH;
u32 CALLBACK PS2EgetLibType()
EXPORT_C_(u32) PS2EgetLibType()
{
return PS2E_LT_GS;
}
char* CALLBACK PS2EgetLibName()
EXPORT_C_(char*) PS2EgetLibName()
{
return libraryName;
}
u32 CALLBACK PS2EgetLibVersion2(u32 type)
EXPORT_C_(u32) PS2EgetLibVersion2(u32 type)
{
return (zgsversion << 16) | (zgsrevision << 8) | zgsbuild | (zgsminor << 24);
}
void CALLBACK GSsetBaseMem(void* pmem)
EXPORT_C_(void) GSsetBaseMem(void* pmem)
{
g_pBasePS2Mem = (u8*)pmem;
}
void CALLBACK GSsetSettingsDir(const char* dir)
EXPORT_C_(void) GSsetSettingsDir(const char* dir)
{
s_strIniPath = (dir == NULL) ? wxString(L"inis") : wxString(dir, wxConvFile);
}
void CALLBACK GSsetLogDir(const char* dir)
EXPORT_C_(void) GSsetLogDir(const char* dir)
{
ZZLog::SetDir(dir);
}
void CALLBACK GSsetGameCRC(int crc, int options)
EXPORT_C_(void) GSsetGameCRC(int crc, int options)
{
// build a list of function pointer for GetSkipCount (SkipDraw)
static GetSkipCount GSC_list[NUMBER_OF_TITLES];
@ -217,7 +218,7 @@ void CALLBACK GSsetGameCRC(int crc, int options)
ListHacks();
}
void CALLBACK GSsetFrameSkip(int frameskip)
EXPORT_C_(void) GSsetFrameSkip(int frameskip)
{
FUNCLOG
s_frameskipping |= frameskip;
@ -232,7 +233,7 @@ void CALLBACK GSsetFrameSkip(int frameskip)
}
}
void CALLBACK GSreset()
EXPORT_C_(void) GSreset()
{
FUNCLOG
@ -242,11 +243,11 @@ void CALLBACK GSreset()
gs.prac = 1;
prim = &gs._prim[0];
gs.imageTransfer = -1;
gs.transferring = false;
gs.q = 1;
}
void CALLBACK GSgifSoftReset(u32 mask)
EXPORT_C_(void) GSgifSoftReset(u32 mask)
{
FUNCLOG
@ -254,11 +255,11 @@ void CALLBACK GSgifSoftReset(u32 mask)
if (mask & 2) memset(&gs.path[1], 0, sizeof(gs.path[1]));
if (mask & 4) memset(&gs.path[2], 0, sizeof(gs.path[2]));
gs.imageTransfer = -1;
gs.transferring = false;
gs.q = 1;
}
s32 CALLBACK GSinit()
EXPORT_C_(s32) GSinit()
{
FUNCLOG
@ -281,7 +282,7 @@ __forceinline void InitMisc()
ResetRegs();
}
s32 CALLBACK GSopen(void *pDsp, char *Title, int multithread)
EXPORT_C_(s32) GSopen(void *pDsp, char *Title, int multithread)
{
FUNCLOG
@ -337,32 +338,34 @@ EXPORT_C_(s32) GSopen2( void* pDsp, u32 flags )
}
#endif
void CALLBACK GSshutdown()
EXPORT_C_(void) GSshutdown()
{
FUNCLOG
ZZLog::Close();
}
void CALLBACK GSclose()
EXPORT_C_(void) GSclose()
{
FUNCLOG
ZZDestroy();
GLWin.CloseWindow();
// Free alocated memory. We could close plugin without closing pcsx2, so we SHOULD free all allocated resources
ZZshExitCleaning();
SaveStateFile = NULL;
SaveStateExists = true; // default value
g_LastCRC = 0;
}
void CALLBACK GSirqCallback(void (*callback)())
EXPORT_C_(void) GSirqCallback(void (*callback)())
{
FUNCLOG
GSirq = callback;
}
void CALLBACK GSwriteCSR(u32 write)
EXPORT_C_(void) GSwriteCSR(u32 write)
{
FUNCLOG
@ -373,7 +376,7 @@ void CALLBACK GSwriteCSR(u32 write)
#define access _access
#endif
void CALLBACK GSchangeSaveState(int newstate, const char* filename)
EXPORT_C_(void) GSchangeSaveState(int newstate, const char* filename)
{
FUNCLOG
@ -428,7 +431,7 @@ static bool get_snapshot_filename(char *filename, char* path, const char* extens
return true;
}
void CALLBACK GSmakeSnapshot(char *path)
EXPORT_C_(void) GSmakeSnapshot(char *path)
{
FUNCLOG
@ -474,7 +477,16 @@ static __forceinline void SetGSTitle()
GLWin.SetTitle(strtitle);
}
void CALLBACK GSvsync(int interlace)
// This isn't implemented for some reason? Adding a field for it for the moment, till I get a chance to look closer.
EXPORT_C_(void) GSsetVsync(int enabled)
{
FUNCLOG
ZZLog::Debug_Log("Setting VSync to 0x%x.", enabled);
gs.vsync = enabled;
}
EXPORT_C_(void) GSvsync(int current_interlace)
{
FUNCLOG
@ -506,8 +518,9 @@ void CALLBACK GSvsync(int interlace)
g_nRealFrame++;
// !interlace? Hmmm... Fixme.
RenderCRTC(!interlace);
// The value passed seems to either be 0 or 0x2000, and we want 0 or 1. Perhaps !! would be better...
gs.interlace = !current_interlace;
RenderCRTC();
GLWin.ProcessEvents();
@ -559,7 +572,7 @@ void CALLBACK GSvsync(int interlace)
}
void CALLBACK GSreadFIFO(u64 *pMem)
EXPORT_C_(void) GSreadFIFO(u64 *pMem)
{
FUNCLOG
@ -571,7 +584,7 @@ void CALLBACK GSreadFIFO(u64 *pMem)
TransferLocalHost((u32*)pMem, 1);
}
void CALLBACK GSreadFIFO2(u64 *pMem, int qwc)
EXPORT_C_(void) GSreadFIFO2(u64 *pMem, int qwc)
{
FUNCLOG
@ -583,7 +596,7 @@ void CALLBACK GSreadFIFO2(u64 *pMem, int qwc)
TransferLocalHost((u32*)pMem, qwc);
}
int CALLBACK GSsetupRecording(int start, void* pData)
EXPORT_C_(int) GSsetupRecording(int start, void* pData)
{
FUNCLOG
@ -595,7 +608,7 @@ int CALLBACK GSsetupRecording(int start, void* pData)
return 1;
}
s32 CALLBACK GSfreeze(int mode, freezeData *data)
EXPORT_C_(s32) GSfreeze(int mode, freezeData *data)
{
FUNCLOG

View File

@ -27,7 +27,7 @@ static int path1_count = 0;
static int nPath3Hack = 0;
void CALLBACK GSgetLastTag(u64* ptag)
EXPORT_C_(void) GSgetLastTag(u64* ptag)
{
FUNCLOG
@ -165,32 +165,36 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
{
int len = min(size, path->nloop);
//ZZLog::Error_Log("GIF_FLG_IMAGE(%d)=%d", gs.imageTransfer, len);
switch (gs.imageTransfer)
if (gs.transferring)
{
case 0:
TransferHostLocal(pMem, len * 4);
break;
switch (gs.imageTransfer)
{
case XFER_HOST_TO_LOCAL:
TransferHostLocal(pMem, len * 4);
break;
case 1:
// This can't happen; downloads can not be started or performed as part of
// a GIFtag operation. They're an entirely separate process that can only be
// done through the ReverseFIFO transfer (aka ReadFIFO). --air
assert(0);
//TransferLocalHost(pMem, len);
break;
case XFER_LOCAL_TO_HOST:
// This can't happen; downloads can not be started or performed as part of
// a GIFtag operation. They're an entirely separate process that can only be
// done through the ReverseFIFO transfer (aka ReadFIFO). --air
assert(0);
//TransferLocalHost(pMem, len);
break;
case 2:
//TransferLocalLocal();
break;
case XFER_LOCAL_TO_LOCAL:
//TransferLocalLocal();
break;
case 3:
//assert(0);
break;
case XFER_DEACTIVATED:
//assert(0);
break;
default:
//assert(0);
break;
default:
//assert(0);
break;
}
}
pMem += len * 4;
@ -236,7 +240,7 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
}
}
void CALLBACK GSgifTransfer1(u32 *pMem, u32 addr)
EXPORT_C_(void) GSgifTransfer1(u32 *pMem, u32 addr)
{
FUNCLOG
@ -250,7 +254,7 @@ void CALLBACK GSgifTransfer1(u32 *pMem, u32 addr)
_GSgifTransfer<0>((u32*)((u8*)pMem + addr), (0x4000 - addr) / 16);
}
void CALLBACK GSgifTransfer2(u32 *pMem, u32 size)
EXPORT_C_(void) GSgifTransfer2(u32 *pMem, u32 size)
{
FUNCLOG
@ -259,7 +263,7 @@ void CALLBACK GSgifTransfer2(u32 *pMem, u32 size)
_GSgifTransfer<1>(const_cast<u32*>(pMem), size);
}
void CALLBACK GSgifTransfer3(u32 *pMem, u32 size)
EXPORT_C_(void) GSgifTransfer3(u32 *pMem, u32 size)
{
FUNCLOG
@ -268,7 +272,7 @@ void CALLBACK GSgifTransfer3(u32 *pMem, u32 size)
_GSgifTransfer<2>(const_cast<u32*>(pMem), size);
}
void CALLBACK GSgifTransfer(const u32 *pMem, u32 size)
EXPORT_C_(void) GSgifTransfer(const u32 *pMem, u32 size)
{
FUNCLOG

View File

@ -18,8 +18,6 @@
*/
#include "GS.h"
#include <Cg/cg.h>
#include <Cg/cgGL.h>
#include <stdlib.h>
#include "Mem.h"
@ -117,14 +115,14 @@ void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h
if (PSMT_ISZTEX(psm))
{
// Somehow, I doubt this code is right. I'll have to look into it. For the moment, I'm keeping it the
// way it was. --arcum42
// This still needs an eye kept on it.
const BLOCK& b = m_Blocks[psm];
bw = (bw + b.width - 1) / b.width;
start = bp * 256 + ((y / b.height) * bw + (x / b.width)) * 0x2000;
end = bp * 256 + (((y + h - 1) / b.height) * bw + (x + w + b.width - 1) / b.width) * 0x2000;
const int x2 = x + w + b.width - 1;
const int y2 = y + h - 1;
bw = bw / b.width;
start = (bp + ((y / b.height) * bw + (x / b.width)) * 0x20) * 0x100;
end = (bp + ((y2 / b.height) * bw + (x2 / b.width)) * 0x20) * 0x100;
return;
}
@ -139,46 +137,101 @@ void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h
}
else
{
// This is what it used to do, which doesn't seem right.
// Keeping it for reference, in case removing it breaks anything.
start /= 2;
end /= 2;
}
}
// Same as GetRectMemAddress, except that we know x & y are zero, so it's simplified a bit.
void GetRectMemAddressZero(int& start, int& end, int psm, int w, int h, int bp, int bw)
{
FUNCLOG
u32 bits = 0;
if (m_Blocks[psm].bpp == 0)
{
ZZLog::Error_Log("ZeroGS: Bad psm 0x%x.", psm);
start = 0;
end = MEMORY_END;
return;
}
if (PSMT_ISZTEX(psm))
{
// This still needs an eye kept on it.
const BLOCK& b = m_Blocks[psm];
const int x2 = w + b.width - 1;
const int y2 = h - 1;
bw = bw / b.width;
start = bp * 0x100;
end = (bp + ((y2 / b.height) * bw + (x2 / b.width)) * 0x20) * 0x100;
return;
}
//int newx = ((x + w - 1 + 31) & ~31) - 1;
//int newy = ((y + h - 1 + 15) & ~15) - 1;
//start = getPixelAddress4(x, y, bp, bw) / 2;
//end = (getPixelAddress4(max(newx, x), max(newy, y), bp, bw) + 2) / 2;
bits = PSMT_BITS_NUM(psm);
start = getPixelFun[psm](0, 0, bp, bw);
end = getPixelFun[psm](w - 1, h - 1, bp, bw) + 1;
if (bits > 0)
{
start *= bits;
end *= bits;
}
else
{
start /= 2;
end /= 2;
}
}
void GetRectMemAddress(int& start, int& end, int psm, Point p, Size s, int bp, int bw)
{
GetRectMemAddress(start, end, psm, p.x, p.y, s.w, s.h, bp, bw);
}
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, Size s, int bp, int bw)
{
GetRectMemAddress(start, end, psm, x, y, s.w, s.h, bp, bw);
}
void GetRectMemAddressZero(int& start, int& end, int psm, Size s, int bp, int bw)
{
GetRectMemAddressZero(start, end, psm, s.w, s.h, bp, bw);
}
void InitTransferHostLocal()
{
FUNCLOG
#if defined(_DEBUG)
// Xenosaga 1.
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0X%x)", gs.trxpos.dx + gs.imageWnew, gs.dstbuf.bw);
if (gs.trxpos.dx + gs.imageNew.w > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0X%x)", gs.trxpos.dx + gs.imageNew.w, gs.dstbuf.bw);
#endif
//bool bHasFlushed = false;
gs.imageX = gs.trxpos.dx;
gs.imageY = gs.trxpos.dy;
gs.image.x = gs.trxpos.dx;
gs.image.y = gs.trxpos.dy;
gs.imageEndX = gs.imageX + gs.imageWnew;
gs.imageEndY = gs.imageY + gs.imageHnew;
gs.imageEnd.x = gs.image.x + gs.imageNew.w;
gs.imageEnd.y = gs.image.y + gs.imageNew.h;
assert(gs.imageEndX < 2048 && gs.imageEndY < 2048);
assert(gs.imageEnd.x < 2048 && gs.imageEnd.y < 2048);
// This needs to be looked in to, since psm should *not* be 63.
// hack! viewful joe
if (gs.dstbuf.psm == 63) gs.dstbuf.psm = 0;
if (gs.dstbuf.psm == 63)
{
ZZLog::WriteLn("gs.dstbuf.psm set to 0!");
gs.dstbuf.psm = 0;
}
int start, end;
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
if (end > MEMORY_END)
{
@ -189,7 +242,7 @@ void InitTransferHostLocal()
// MEMORY_END is 0x400000...
ZZLog::Warn_Log("Init host local out of bounds! (end == 0x%x)", end);
//gs.imageTransfer = -1;
//gs.transferring = false;
end = MEMORY_END;
}
@ -198,17 +251,18 @@ void InitTransferHostLocal()
if (vb[0].nCount > 0) Flush(0);
if (vb[1].nCount > 0) Flush(1);
//ZZLog::Prim_Log("trans: bp:%x x:%x y:%x w:%x h:%x\n", gs.dstbuf.bp, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew);
//ZZLog::Prim_Log("trans: bp:%x x:%x y:%x w:%x h:%x\n", gs.dstbuf.bp, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew.w, gs.imageNew.h);
}
void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
{
FUNCLOG
int start, end;
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
int start = -1, end = -1;
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.image, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
if ((start == -1) || (end == -1)) ZZLog::WriteLn("start == %d, end == %d", start, end);
assert(start < gs_imageEnd);
end = gs_imageEnd;
@ -272,8 +326,8 @@ void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
{
tex0Info t;
t.tbp0 = gs.dstbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tw = gs.imageNew.w;
t.th = gs.imageNew.h;
t.tbw = gs.dstbuf.bw;
t.psm = gs.dstbuf.psm;
SaveTex(&t, 0);
@ -285,24 +339,24 @@ void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
void InitTransferLocalHost()
{
FUNCLOG
assert(gs.trxpos.sx + gs.imageWnew <= 2048 && gs.trxpos.sy + gs.imageHnew <= 2048);
assert(gs.trxpos.sx + gs.imageNew.w <= 2048 && gs.trxpos.sy + gs.imageNew.h <= 2048);
#if defined(_DEBUG)
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0x%x)", gs.trxpos.sx + gs.imageWnew, gs.srcbuf.bw);
if (gs.trxpos.sx + gs.imageNew.w > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0x%x)", gs.trxpos.sx + gs.imageNew.w, gs.srcbuf.bw);
#endif
gs.imageX = gs.trxpos.sx;
gs.imageY = gs.trxpos.sy;
gs.image.x = gs.trxpos.sx;
gs.image.y = gs.trxpos.sy;
gs.imageEndX = gs.imageX + gs.imageWnew;
gs.imageEndY = gs.imageY + gs.imageHnew;
gs.imageEnd.x = gs.image.x + gs.imageNew.w;
gs.imageEnd.y = gs.image.y + gs.imageNew.h;
s_vTransferCache.resize(0);
int start, end;
GetRectMemAddress(start, end, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
GetRectMemAddress(start, end, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageNew, gs.srcbuf.bp, gs.srcbuf.bw);
ResolveInRange(start, end);
}
@ -316,16 +370,16 @@ void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
T* pbuf = (T*)pbyMem;
u32 nSize = nQWordSize * 16 / sizeof(T);
for (; i < gs.imageEndY; ++i)
for (; i < gs.imageEnd.y; ++i)
{
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
for (; j < gs.imageEnd.x && nSize > 0; ++j, --nSize)
{
*pbuf++ = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
}
if (j >= gs.imageEndX)
if (j >= gs.imageEnd.x)
{
assert(j == gs.imageEndX);
assert(j == gs.imageEnd.x);
j = gs.trxpos.sx;
}
else
@ -344,9 +398,9 @@ void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *psta
u8* pbuf = (u8*)pbyMem;
u32 nSize = nQWordSize * 16 / 3;
for (; i < gs.imageEndY; ++i)
for (; i < gs.imageEnd.y; ++i)
{
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
for (; j < gs.imageEnd.x && nSize > 0; ++j, --nSize)
{
u32 p = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
pbuf[0] = (u8)p;
@ -355,9 +409,9 @@ void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *psta
pbuf += 3;
}
if (j >= gs.imageEndX)
if (j >= gs.imageEnd.x)
{
assert(j == gs.imageEndX);
assert(j == gs.imageEnd.x);
j = gs.trxpos.sx;
}
else
@ -372,34 +426,34 @@ void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *psta
void TransferLocalHost(void* pbyMem, u32 nQWordSize)
{
FUNCLOG
assert(gs.imageTransfer == 1);
assert(gs.imageTransfer == XFER_LOCAL_TO_HOST);
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
switch(PSMT_BITMODE(gs.srcbuf.psm))
{
case 0:
TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
case 1:
TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
case 2:
TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
case 3:
TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
break;
default:
assert(0);
break;
case 0:
TransferLocalHost<u32>(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
break;
case 1:
TransferLocalHost_24(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
break;
case 2:
TransferLocalHost<u16>(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
break;
case 3:
TransferLocalHost<u8>(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
break;
default:
assert(0);
break;
}
if (gs.imageY >= gs.imageEndY)
if (gs.image.y >= gs.imageEnd.y)
{
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
assert(gs.imageY == gs.imageEndY);
gs.imageTransfer = -1;
ZZLog::Error_Log("gs.image.y >= gs.imageEnd.y!");
assert(gs.image.y == gs.imageEnd.y);
gs.transferring = false;
}
}
@ -411,11 +465,11 @@ __forceinline void _TransferLocalLocal()
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 widthlimit = 4;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
u32 maxX = gs.trxpos.sx + gs.imageNew.w;
u32 maxY = gs.trxpos.sy + gs.imageNew.h;
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
if ((gs.imageWnew & widthlimit) != 0) return;
if ((gs.imageNew.w & widthlimit) != 0) return;
for(u32 i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
{
@ -447,10 +501,10 @@ __forceinline void _TransferLocalLocal_4()
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
u32 maxX = gs.trxpos.sx + gs.imageNew.w;
u32 maxY = gs.trxpos.sy + gs.imageNew.h;
assert((gs.imageWnew % 8) == 0);
assert((gs.imageNew.w % 8) == 0);
for(u32 i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
{
@ -498,21 +552,21 @@ void TransferLocalLocal()
FUNCLOG
//ZZLog::Error_Log("I'z in your code, transferring your memory...");
assert(gs.imageTransfer == 2);
assert(gs.trxpos.sx + gs.imageWnew < 2048 && gs.trxpos.sy + gs.imageHnew < 2048);
assert(gs.trxpos.dx + gs.imageWnew < 2048 && gs.trxpos.dy + gs.imageHnew < 2048);
assert(gs.imageTransfer == XFER_LOCAL_TO_LOCAL);
assert(gs.trxpos.sx + gs.imageNew.w < 2048 && gs.trxpos.sy + gs.imageNew.h < 2048);
assert(gs.trxpos.dx + gs.imageNew.w < 2048 && gs.trxpos.dy + gs.imageNew.h < 2048);
assert((gs.srcbuf.psm&0x7) == (gs.dstbuf.psm&0x7));
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, src width exceeded.(0x%x > 0x%x)", gs.trxpos.sx + gs.imageWnew, gs.srcbuf.bw);
if (gs.trxpos.sx + gs.imageNew.w > gs.srcbuf.bw)
ZZLog::Debug_Log("Transfer error, src width exceeded.(0x%x > 0x%x)", gs.trxpos.sx + gs.imageNew.w, gs.srcbuf.bw);
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, dst width exceeded.(0x%x > 0x%x)", gs.trxpos.dx + gs.imageWnew, gs.dstbuf.bw);
if (gs.trxpos.dx + gs.imageNew.w > gs.dstbuf.bw)
ZZLog::Debug_Log("Transfer error, dst width exceeded.(0x%x > 0x%x)", gs.trxpos.dx + gs.imageNew.w, gs.dstbuf.bw);
int srcstart, srcend, dststart, dstend;
GetRectMemAddress(srcstart, srcend, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
GetRectMemAddress(dststart, dstend, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
GetRectMemAddress(srcstart, srcend, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageNew, gs.srcbuf.bp, gs.srcbuf.bw);
GetRectMemAddress(dststart, dstend, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
// resolve the targs
ResolveInRange(srcstart, srcend);
@ -547,15 +601,15 @@ void TransferLocalLocal()
{
tex0Info t;
t.tbp0 = gs.dstbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tw = gs.imageNew.w;
t.th = gs.imageNew.h;
t.tbw = gs.dstbuf.bw;
t.psm = gs.dstbuf.psm;
SaveTex(&t, 0);
t.tbp0 = gs.srcbuf.bp;
t.tw = gs.imageWnew;
t.th = gs.imageHnew;
t.tw = gs.imageNew.w;
t.th = gs.imageNew.h;
t.tbw = gs.srcbuf.bw;
t.psm = gs.srcbuf.psm;
SaveTex(&t, 0);
@ -563,16 +617,4 @@ void TransferLocalLocal()
#endif
}
__forceinline void TerminateLocalHost()
{
FUNCLOG
//ZZLog::Error_Log("Terminate Local Host!");
}
__forceinline void TerminateHostLocal()
{
FUNCLOG
gs.imageTransfer = -1;
}

View File

@ -96,7 +96,10 @@ inline u8* _MemoryAddress(int x)
}
extern void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
extern void GetRectMemAddress(int& start, int& end, int psm, Point p, Size s, int bp, int bw);
extern void GetRectMemAddress(int& start, int& end, int psm, int x, int y, Size s, int bp, int bw);
extern void GetRectMemAddressZero(int& start, int& end, int psm, int w, int h, int bp, int bw);
extern void GetRectMemAddressZero(int& start, int& end, int psm, Size s, int bp, int bw);
// called when trxdir is accessed. If host is involved, transfers memory to temp buffer byTransferBuf.
// Otherwise performs the transfer. TODO: Perhaps divide the transfers into chunks?
@ -108,7 +111,4 @@ extern void TransferLocalHost(void* pbyMem, u32 nQWordSize);
extern void TransferLocalLocal();
extern void TerminateLocalHost();
extern void TerminateHostLocal();
#endif // HOSTMEMORY_H_INCLUDED

View File

@ -35,7 +35,7 @@ extern bool THR_bCtrl;
static map<string, confOptsStruct> mapConfOpts;
static gameHacks tempHacks;
void CALLBACK GSkeyEvent(keyEvent *ev)
EXPORT_C_(void) GSkeyEvent(keyEvent *ev)
{
static bool bAlt = false;
@ -450,7 +450,7 @@ void DisplayDialog()
gtk_widget_destroy(dialog);
}
void CALLBACK GSconfigure()
EXPORT_C_(void) GSconfigure()
{
char strcurdir[256];
getcwd(strcurdir, 256);
@ -484,12 +484,12 @@ void SysMessage(const char *fmt, ...)
gtk_widget_destroy(dialog);
}
void CALLBACK GSabout()
EXPORT_C_(void) GSabout()
{
SysMessage("ZZOgl PG: by Zeydlitz (PG version worked on by arcum42, gregory, and the pcsx2 development team). Based off of ZeroGS, by zerofrog.");
}
s32 CALLBACK GStest()
EXPORT_C_(s32) GStest()
{
return 0;
}

View File

@ -16,11 +16,14 @@
<Compiler>
<Add option="-Wall" />
<Add option="-g" />
<Add option="-I/opt/cg/include" />
<Add option="-L/opt/cg/lib" />
<Add option="-DZEROGS_DEVBUILD" />
<Add option="-D_DEBUG" />
</Compiler>
<Linker>
<Add library="../../../../../deps/debug/libUtilities.a" />
<Add library="Cg" />
</Linker>
</Target>
<Target title="Devel">
@ -33,12 +36,15 @@
<Compiler>
<Add option="-O2" />
<Add option="-g" />
<Add option="-I/opt/cg/include" />
<Add option="-L/opt/cg/lib" />
<Add option="-W" />
<Add option="-DZEROGS_DEVBUILD" />
<Add option="-DNDEBUG" />
</Compiler>
<Linker>
<Add library="../../../../../deps/release/libUtilities.a" />
<Add library="Cg" />
</Linker>
</Target>
<Target title="Release">
@ -50,10 +56,69 @@
<Option createStaticLib="1" />
<Compiler>
<Add option="-O2" />
<Add option="-I/opt/cg/include" />
<Add option="-L/opt/cg/lib" />
<Add option="-W" />
<Add option="-DRELEASE_TO_PUBLIC" />
<Add option="-DNDEBUG" />
</Compiler>
<Linker>
<Add option="-s" />
<Add library="../../../../../deps/release/libUtilities.a" />
<Add library="Cg" />
</Linker>
</Target>
<Target title="GLSL - Debug">
<Option output="../../../../../bin/plugins/ZZOgl-PG-GLSL-dbg.so" prefix_auto="0" extension_auto="0" />
<Option object_output="obj/Debug/" />
<Option type="3" />
<Option compiler="gcc" />
<Option createDefFile="1" />
<Option createStaticLib="1" />
<Compiler>
<Add option="-Wall" />
<Add option="-g" />
<Add option="-DZEROGS_DEVBUILD" />
<Add option="-D_DEBUG" />
<Add option="-DGLSL_API" />
</Compiler>
<Linker>
<Add library="../../../../../deps/debug/libUtilities.a" />
</Linker>
</Target>
<Target title="GLSL - Devel">
<Option output="../../../../../bin/plugins/ZZOgl-PG-GLSL-dev" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="3" />
<Option compiler="gcc" />
<Option createDefFile="1" />
<Option createStaticLib="1" />
<Compiler>
<Add option="-O2" />
<Add option="-g" />
<Add option="-W" />
<Add option="-DZEROGS_DEVBUILD" />
<Add option="-DNDEBUG" />
<Add option="-DGLSL_API" />
</Compiler>
<Linker>
<Add library="../../../../../deps/release/libUtilities.a" />
</Linker>
</Target>
<Target title="GLSL - Release">
<Option output="../../../../../bin/plugins/ZZOgl-PG-GLSL" prefix_auto="1" extension_auto="1" />
<Option object_output="obj/Release/" />
<Option type="3" />
<Option compiler="gcc" />
<Option createDefFile="1" />
<Option createStaticLib="1" />
<Compiler>
<Add option="-O2" />
<Add option="-W" />
<Add option="-DRELEASE_TO_PUBLIC" />
<Add option="-DNDEBUG" />
<Add option="-DGLSL_API" />
</Compiler>
<Linker>
<Add option="-s" />
<Add library="../../../../../deps/release/libUtilities.a" />
@ -68,8 +133,6 @@
<Add option="-Wno-unused-value" />
<Add option="-Wunused-variable" />
<Add option="-m32" />
<Add option="-I/opt/cg/include" />
<Add option="-L/opt/cg/lib" />
<Add option="-msse2" />
<Add option="-fno-regmove" />
<Add option="-fno-strict-aliasing" />
@ -96,10 +159,10 @@
<Add library="z" />
<Add library="dl" />
<Add library="stdc++" />
<Add library="Cg" />
</Linker>
<ExtraCommands>
<Add after="cp $(PROJECT_DIR)/../../ps2hw.dat $(TARGET_OUTPUT_DIR)/" />
<Add after="cp $(PROJECT_DIR)/../../ps2hw.glsl $(TARGET_OUTPUT_DIR)/" />
<Mode after="always" />
</ExtraCommands>
<Unit filename="../../CRC.h" />
@ -121,8 +184,6 @@
<Unit filename="../../Mem_Swizzle.h" />
<Unit filename="../../Mem_Tables.cpp" />
<Unit filename="../../Mem_Transmit.h" />
<Unit filename="../../NewRegs.cpp" />
<Unit filename="../../NewRegs.h" />
<Unit filename="../../Profile.cpp" />
<Unit filename="../../Profile.h" />
<Unit filename="../../Regs.cpp" />
@ -147,24 +208,29 @@
</Unit>
<Unit filename="../../ZZClut.cpp" />
<Unit filename="../../ZZClut.h" />
<Unit filename="../../ZZDepthTargets.cpp" />
<Unit filename="../../ZZGl.h" />
<Unit filename="../../ZZHacks.cpp" />
<Unit filename="../../ZZHacks.h" />
<Unit filename="../../ZZKeyboard.cpp" />
<Unit filename="../../ZZLog.cpp" />
<Unit filename="../../ZZLog.h" />
<Unit filename="../../ZZMemoryTargets.cpp" />
<Unit filename="../../ZZRenderTargets.cpp" />
<Unit filename="../../ZZoglCRTC.cpp" />
<Unit filename="../../ZZoglCRTC.h" />
<Unit filename="../../ZZoglCreate.cpp" />
<Unit filename="../../ZZoglDrawing.cpp" />
<Unit filename="../../ZZoglDrawing.h" />
<Unit filename="../../ZZoglFlush.cpp" />
<Unit filename="../../ZZoglFlush.h" />
<Unit filename="../../ZZoglFlushHack.cpp" />
<Unit filename="../../ZZoglFlushHack.h" />
<Unit filename="../../ZZoglMath.h" />
<Unit filename="../../ZZoglSave.cpp" />
<Unit filename="../../ZZoglShaders.cpp" />
<Unit filename="../../ZZoglShaders.h" />
<Unit filename="../../ZZoglShadersGLSL.cpp" />
<Unit filename="../../ZZoglShoots.cpp" />
<Unit filename="../../ZZoglShoots.h" />
<Unit filename="../../ZZoglVB.cpp" />

View File

@ -25,9 +25,11 @@
#include "Mem_Transmit.h"
#include "Mem_Swizzle.h"
#ifdef ZEROGS_SSE2
#include <emmintrin.h>
#include <immintrin.h>
#endif
#ifdef ZZNORMAL_MEMORY
BLOCK m_Blocks[0x40]; // do so blocks are indexable
PCSX2_ALIGNED16(u32 tempblock[64]);
@ -53,41 +55,41 @@ u8* pstart;
template <class T>
static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
{
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) &&
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.image.x == gs.trxpos.dx) &&
(alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx));
if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit)
if ((gs.imageEnd.x - gs.trxpos.dx) % data.widthlimit)
{
/* hack */
int testwidth = (int)nSize -
(gs.imageEndY - gs.imageY) * (gs.imageEndX - gs.trxpos.dx)
+ (gs.imageX - gs.trxpos.dx);
(gs.imageEnd.y - gs.image.y) * (gs.imageEnd.x - gs.trxpos.dx)
+ (gs.image.x - gs.trxpos.dx);
if ((testwidth <= data.widthlimit) && (testwidth >= -data.widthlimit))
{
/* don't transfer */
/*ZZLog::Debug_Log("Bad texture %s: %d %d %d", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/
/*ZZLog::Debug_Log("Bad texture %s: %d %d %d", #psm, gs.trxpos.dx, gs.imageEnd.x, nQWordSize);*/
//ZZLog::Error_Log("Bad texture: testwidth = %d; data.widthlimit = %d", testwidth, data.widthlimit);
gs.imageTransfer = -1;
gs.transferring = false;
}
bCanAlign = false;
}
/* first align on block boundary */
if (MOD_POW2(gs.imageY, data.blockheight) || !bCanAlign)
if (MOD_POW2(gs.image.y, data.blockheight) || !bCanAlign)
{
u32 transwidth;
if (!bCanAlign)
endY = gs.imageEndY; /* transfer the whole image */
endY = gs.imageEnd.y; /* transfer the whole image */
else
assert(endY < gs.imageEndY); /* part of alignment condition */
assert(endY < gs.imageEnd.y); /* part of alignment condition */
if (((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEndX - gs.imageX) % data.widthlimit))
if (((gs.imageEnd.x - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEnd.x - gs.image.x) % data.widthlimit))
{
/* transmit with a width of 1 */
transwidth = (1 + (DSTPSM == PSMT4));
transwidth = (1 + (gs.dstbuf.psm == PSMT4));
}
else
{
@ -98,7 +100,7 @@ static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFun
if (pbuf == NULL) return NULL;
if (nSize == 0 || tempY == gs.imageEndY) return NULL;
if (nSize == 0 || tempY == gs.imageEnd.y) return NULL;
}
return pbuf;
@ -112,14 +114,14 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
_SwizzleBlock swizzle;
/* can align! */
pitch = gs.imageEndX - gs.trxpos.dx;
pitch = gs.imageEnd.x - gs.trxpos.dx;
area = pitch * data.blockheight;
fracX = gs.imageEndX - alignedPt.x;
fracX = gs.imageEnd.x - alignedPt.x;
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0;
if (bAligned || ((DSTPSM == PSMCT24) || (DSTPSM == PSMT8H) || (DSTPSM == PSMT4HH) || (DSTPSM == PSMT4HL)))
if (bAligned || ((gs.dstbuf.psm == PSMCT24) || (gs.dstbuf.psm == PSMT8H) || (gs.dstbuf.psm == PSMT4HH) || (gs.dstbuf.psm == PSMT4HL)))
swizzle = (fun.Swizzle);
else
swizzle = (fun.Swizzle_u);
@ -140,7 +142,7 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
#endif
/* transfer the rest */
if (alignedPt.x < gs.imageEndX)
if (alignedPt.x < gs.imageEnd.x)
{
pbuf = TransmitHostLocalX<T>(data.psm, fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
@ -161,19 +163,19 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
static __forceinline int FinishTransfer(TransferData data, int nLeftOver)
{
if (tempY >= gs.imageEndY)
if (tempY >= gs.imageEnd.y)
{
assert(gs.imageTransfer == -1 || tempY == gs.imageEndY);
gs.imageTransfer = -1;
assert( gs.transferring == false || tempY == gs.imageEnd.y);
gs.transferring = false;
/*int start, end;
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
g_MemTargs.ClearRange(start, end);*/
}
else
{
/* update new params */
gs.imageY = tempY;
gs.imageX = tempX;
gs.image.y = tempY;
gs.image.x = tempX;
}
return (nSize * TransPitch(2, data.transfersize) + nLeftOver) / 2;
@ -182,23 +184,23 @@ static __forceinline int FinishTransfer(TransferData data, int nLeftOver)
template <class T>
static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSize)
{
assert(gs.imageTransfer == 0);
assert(gs.imageTransfer == XFER_HOST_TO_LOCAL);
TransferData data = tData[psm];
TransferFuncts fun(psm);
pstart = g_pbyGSMemory + gs.dstbuf.bp * 256;
const T* pbuf = (const T*)pbyMem;
const int tp2 = TransPitch(2, data.transfersize);
int nLeftOver = (nQWordSize * 4 * 2) % tp2;
tempY = gs.imageY;
tempX = gs.imageX;
tempY = gs.image.y;
tempX = gs.image.x;
Point alignedPt;
nSize = (nQWordSize * 4 * 2) / tp2;
nSize = min(nSize, gs.imageWnew * gs.imageHnew);
nSize = min(nSize, gs.imageNew.w * gs.imageNew.h);
int endY = ROUND_UPPOW2(gs.imageY, data.blockheight);
alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight);
alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth);
int endY = ROUND_UPPOW2(gs.image.y, data.blockheight);
alignedPt.y = ROUND_DOWNPOW2(gs.imageEnd.y, data.blockheight);
alignedPt.x = ROUND_DOWNPOW2(gs.imageEnd.x, data.blockwidth);
pbuf = AlignOnBlockBoundry<T>(data, fun, alignedPt, endY, pbuf);
@ -210,12 +212,12 @@ static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSiz
if (TransPitch(nSize, data.transfersize) / 4 > 0)
{
pbuf = TransmitHostLocalY<T>(psm, fun.wp, data.widthlimit, gs.imageEndY, pbuf);
pbuf = TransmitHostLocalY<T>(psm, fun.wp, data.widthlimit, gs.imageEnd.y, pbuf);
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
/* sometimes wrong sizes are sent (tekken tag) */
assert(gs.imageTransfer == -1 || TransPitch(nSize, data.transfersize) / 4 <= 2);
assert(gs.transferring == false || TransPitch(nSize, data.transfersize) / 4 <= 2);
}
return FinishTransfer(data, nLeftOver);
@ -382,3 +384,5 @@ void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, in
m_Blocks[PSMT4] = b;
m_Blocks[PSMT4].SetFun(PSMT4);
}
#endif

View File

@ -32,7 +32,13 @@ static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base - 1))
const int BLOCK_TEXWIDTH = 128;
const int BLOCK_TEXHEIGHT = 512;
extern PCSX2_ALIGNED16(u32 tempblock[64]);
// PSM is u6 value, so we MUST guarantee, that we don't crush on incorrect psm.
#define MAX_PSM 64
#define TABLE_WIDTH 8
#ifndef ZZNORMAL_MEMORY
#include "ZZoglMem.h"
#endif
typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
@ -54,6 +60,7 @@ extern _SwizzleBlock swizzleBlockUnFun[64];
extern _TransferHostLocal TransferHostLocalFun[64];
extern _TransferLocalHost TransferLocalHostFun[64];
// Both of the following structs should probably be local class variables or in a namespace,
// but this works for the moment.
@ -68,6 +75,9 @@ struct TransferData
u32 psm;
};
#ifdef ZZNORMAL_MEMORY
extern PCSX2_ALIGNED16(u32 tempblock[64]);
struct TransferFuncts
{
_writePixel_0 wp;
@ -500,6 +510,8 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw)
///////////////
#endif
extern int TransferHostLocal32(const void* pbyMem, u32 nQWordSize);
extern int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize);
extern int TransferHostLocal24(const void* pbyMem, u32 nQWordSize);

View File

@ -21,15 +21,11 @@
#include "Mem.h"
#include "Mem_Swizzle.h"
#ifdef ZEROGS_SSE2
#include <emmintrin.h>
#include <immintrin.h>
#endif
// WARNING a sfence instruction must be call after SwizzleBlock sse2 function
// Current port of the ASM function to intrinsic
#define INTRINSIC_PORT_16
#define INTRINSIC_PORT_8
#define INTRINSIC_PORT_4
#ifdef ZEROGS_SSE2
static const __aligned16 u32 mask_24b_H[4] = {0xFF000000, 0x0000FFFF, 0xFF000000, 0x0000FFFF};
static const __aligned16 u32 mask_24b_L[4] = {0x00FFFFFF, 0x00000000, 0x00FFFFFF, 0x00000000};
@ -501,29 +497,17 @@ __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch)
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_16
SwizzleBlock16_sse2_I<true>(dst, src, pitch);
#else
SwizzleBlock16_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_8
SwizzleBlock8_sse2_I<true>(dst, src, pitch);
#else
SwizzleBlock8_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_4
SwizzleBlock4_sse2_I<true>(dst, src, pitch);
#else
SwizzleBlock4_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch)
@ -533,29 +517,17 @@ __forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch)
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_16
SwizzleBlock16_sse2_I<false>(dst, src, pitch);
#else
SwizzleBlock16u_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_8
SwizzleBlock8_sse2_I<false>(dst, src, pitch);
#else
SwizzleBlock8u_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch)
{
#ifdef INTRINSIC_PORT_4
SwizzleBlock4_sse2_I<false>(dst, src, pitch);
#else
SwizzleBlock4u_sse2(dst, src, pitch);
#endif
}
__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch)

View File

@ -250,6 +250,92 @@ u32 g_pageTable16SZ[64][64];
u32 g_pageTable8[64][128];
u32 g_pageTable4[128][128];
//maximum PSM is 58, so our arrays have 58 + 1 = 59 elements
// This table is used for fasr access to memory storage data. Field meaning is following:
// 0 -- the number (1 << [psm][0]) is number of pixels per storage format. It's 0 if stored 1 pixel, 1 for 2 pixels (16-bit), 2 for 4 pixels (PSMT8) and 3 for 8 (PSMT4)
// 5 -- is 3 - [psm][0]. Just for speed
// 3, 4 -- size-1 of pageTable for psm. It used to clump x, y otside boundaries.
// 1, 2 -- the number (1 << [psm][1]) and (1 << [psm[2]]) is also size of pageTable. So [psm][3] = (1 << [psm][1]) - 1
// Also note, that [psm][1] = 5 + ([psm][0] + 1) / 2, and [psm][2] = 6 + [psm][0] / 2.
// 6 -- pixel mask, (1 << [psm][5]) - 1, if be used to word, it leave only bytes for pixel formay
// 7 -- starting position of data in word, PSMT8H, 4HL, 4HH are stored data not from the begining.
u32 ZZ_DT[MAX_PSM][TABLE_WIDTH] = {
{0, 5, 6, 31, 63, 3, 0xffffffff, 0}, // 0 PSMCT32
{0, 5, 6, 31, 63, 3, 0x00ffffff, 0}, // 1 PSMCT24
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 2 PSMCT16
{0, }, // 3
{0, }, // 4
{0, }, // 5
{0, }, // 6
{0, }, // 7
{0, }, // 8
{0, }, // 9
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 10 PSMCT16S
{0, }, // 11
{0, }, // 12
{0, }, // 13
{0, }, // 14
{0, }, // 15
{0, }, // 16
{0, }, // 17
{0, }, // 18
{2, 6, 7, 63, 127, 1, 0x000000ff, 0}, // 19 PSMT8
{3, 7, 7, 127, 127, 0, 0x0000000f, 0}, // 20 PSMT4
{0, }, // 21
{0, }, // 22
{0, }, // 23
{0, }, // 24
{0, }, // 25
{0, }, // 26
{0, 5, 6, 31, 63, 3, 0x000000ff, 24}, // 27 PSMT8H
{0, }, // 28
{0, }, // 29
{0, }, // 30
{0, }, // 31
{0, }, // 32
{0, }, // 33
{0, }, // 34
{0, }, // 35
{0, 5, 6, 31, 63, 3, 0x0000000f, 24}, // 36 PSMT4HL
{0, }, // 37
{0, }, // 38
{0, }, // 39
{0, }, // 40
{0, }, // 41
{0, }, // 42
{0, }, // 43
{0, 5, 6, 31, 63, 3, 0x0000000f, 28}, // 44 PSMT4HH
{0, }, // 45
{0, }, // 46
{0, }, // 47
{0, 5, 6, 31, 63, 3, 0xffffffff, 0}, // 48 PSMCT32Z
{0, 5, 6, 31, 63, 3, 0x00ffffff, 0}, // 49 PSMCT24Z
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 50 PSMCT16Z
{0, }, // 51
{0, }, // 52
{0, }, // 53
{0, }, // 54
{0, }, // 55
{0, }, // 56
{0, }, // 57
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 58 PSMCT16SZ
{0, }, // 59
{0, }, // 60
{0, }, // 61
{0, }, // 62
{0, }, // 63
};
//maxium PSM is 58, so our arrays have 58 + 1 = 59 elements
u32** g_pageTable[MAX_PSM] = {NULL,};
u32** g_blockTable[MAX_PSM] = {NULL, };
u32** g_columnTable[MAX_PSM] = {NULL, };
u32 g_pageTable2[MAX_PSM][127][127] = {0, };
u32** g_pageTableNew[MAX_PSM] = {NULL,};
/* PSM reference array
{ 32, 24, 16, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, 16S, NULL, NULL, NULL, NULL, NULL,

View File

@ -23,7 +23,6 @@
#include "GS.h"
#include "Mem.h"
#define DSTPSM gs.dstbuf.psm
extern int tempX, tempY;
extern int pitch, area, fracX;
extern int nSize;
@ -37,13 +36,13 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
{
assert((nSize % widthlimit) == 0 && widthlimit <= 4);
if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
if ((gs.imageEnd.x - gs.trxpos.dx) % widthlimit)
{
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEnd.x, gs.dstbuf.psm);
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
{
/* write as many pixel at one time as possible */
wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
@ -53,7 +52,7 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
{
/* write as many pixel at one time as possible */
@ -77,14 +76,14 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
}
}
if (tempX >= gs.imageEndX)
if (tempX >= gs.imageEnd.x)
{
assert(tempX == gs.imageEndX);
assert(tempX == gs.imageEnd.x);
tempX = gs.trxpos.dx;
}
else
{
assert(gs.imageTransfer == -1 || nSize*sizeof(T) / 4 == 0);
assert(gs.transferring == false || nSize*sizeof(T) / 4 == 0);
return NULL;
}
}
@ -96,24 +95,24 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
template <class T>
static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthlimit, int endY, const T *buf)
{
if (widthlimit != 8 || ((gs.imageEndX - gs.trxpos.dx) % widthlimit))
if (widthlimit != 8 || ((gs.imageEnd.x - gs.trxpos.dx) % widthlimit))
{
//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEnd.x, gs.dstbuf.psm);
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
{
wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf), gs.dstbuf.bw);
}
if (tempX >= gs.imageEndX)
if (tempX >= gs.imageEnd.x)
{
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
assert(gs.transferring == false || tempX == gs.imageEnd.x);
tempX = gs.trxpos.dx;
}
else
{
assert(gs.imageTransfer == -1 || nSize == 0);
assert(gs.transferring == false || nSize == 0);
return NULL;
}
}
@ -124,7 +123,7 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthl
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
{
if (nSize < widthlimit) return NULL;
@ -140,9 +139,9 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthl
wp(pstart, (tempX + 7) % 2048, tempY % 2048, *(u32*)(buf + 21), gs.dstbuf.bw);
}
if (tempX >= gs.imageEndX)
if (tempX >= gs.imageEnd.x)
{
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
assert(gs.transferring == false || tempX == gs.imageEnd.x);
tempX = gs.trxpos.dx;
}
else
@ -155,7 +154,7 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthl
nSize = 0;
}
assert(gs.imageTransfer == -1 || nSize == 0);
assert(gs.transferring == false || nSize == 0);
return NULL;
}
@ -171,7 +170,7 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, s32 widthli
{
for (; tempY < endY; ++tempY)
{
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
{
/* write as many pixel at one time as possible */
wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
@ -200,13 +199,13 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, s32 widthli
}
}
if (tempX >= gs.imageEndX)
if (tempX >= gs.imageEnd.x)
{
tempX = gs.trxpos.dx;
}
else
{
assert(gs.imageTransfer == -1 || (nSize / 32) == 0);
assert(gs.transferring == false || (nSize / 32) == 0);
return NULL;
}
}
@ -238,7 +237,7 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
{
for (u32 tempi = 0; tempi < blockheight; ++tempi)
{
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
for (tempX = startX; tempX < gs.imageEnd.x; tempX++, buf++)
{
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
}
@ -255,7 +254,7 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
{
for (u32 tempi = 0; tempi < blockheight; ++tempi)
{
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
for (tempX = startX; tempX < gs.imageEnd.x; tempX++, buf += 3)
{
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
}
@ -272,7 +271,7 @@ static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthli
{
for (u32 tempi = 0; tempi < blockheight; ++tempi)
{
for (tempX = startX; tempX < gs.imageEndX; tempX += 2, buf++)
for (tempX = startX; tempX < gs.imageEnd.x; tempX += 2, buf++)
{
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);

File diff suppressed because it is too large Load Diff

View File

@ -1,973 +0,0 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef NEWREGS_H_INCLUDED
#define NEWREGS_H_INCLUDED
#ifdef USE_OLD_REGS
#include "Regs.h"
#else
enum GIF_REG
{
GIF_REG_PRIM = 0x00,
GIF_REG_RGBA = 0x01,
GIF_REG_STQ = 0x02,
GIF_REG_UV = 0x03,
GIF_REG_XYZF2 = 0x04,
GIF_REG_XYZ2 = 0x05,
GIF_REG_TEX0_1 = 0x06,
GIF_REG_TEX0_2 = 0x07,
GIF_REG_CLAMP_1 = 0x08,
GIF_REG_CLAMP_2 = 0x09,
GIF_REG_FOG = 0x0a,
GIF_REG_XYZF3 = 0x0c,
GIF_REG_XYZ3 = 0x0d,
GIF_REG_A_D = 0x0e,
GIF_REG_NOP = 0x0f,
};
enum GIF_A_D_REG
{
GIF_A_D_REG_PRIM = 0x00,
GIF_A_D_REG_RGBAQ = 0x01,
GIF_A_D_REG_ST = 0x02,
GIF_A_D_REG_UV = 0x03,
GIF_A_D_REG_XYZF2 = 0x04,
GIF_A_D_REG_XYZ2 = 0x05,
GIF_A_D_REG_TEX0_1 = 0x06,
GIF_A_D_REG_TEX0_2 = 0x07,
GIF_A_D_REG_CLAMP_1 = 0x08,
GIF_A_D_REG_CLAMP_2 = 0x09,
GIF_A_D_REG_FOG = 0x0a,
GIF_A_D_REG_XYZF3 = 0x0c,
GIF_A_D_REG_XYZ3 = 0x0d,
GIF_A_D_REG_NOP = 0x0f,
GIF_A_D_REG_TEX1_1 = 0x14,
GIF_A_D_REG_TEX1_2 = 0x15,
GIF_A_D_REG_TEX2_1 = 0x16,
GIF_A_D_REG_TEX2_2 = 0x17,
GIF_A_D_REG_XYOFFSET_1 = 0x18,
GIF_A_D_REG_XYOFFSET_2 = 0x19,
GIF_A_D_REG_PRMODECONT = 0x1a,
GIF_A_D_REG_PRMODE = 0x1b,
GIF_A_D_REG_TEXCLUT = 0x1c,
GIF_A_D_REG_SCANMSK = 0x22,
GIF_A_D_REG_MIPTBP1_1 = 0x34,
GIF_A_D_REG_MIPTBP1_2 = 0x35,
GIF_A_D_REG_MIPTBP2_1 = 0x36,
GIF_A_D_REG_MIPTBP2_2 = 0x37,
GIF_A_D_REG_TEXA = 0x3b,
GIF_A_D_REG_FOGCOL = 0x3d,
GIF_A_D_REG_TEXFLUSH = 0x3f,
GIF_A_D_REG_SCISSOR_1 = 0x40,
GIF_A_D_REG_SCISSOR_2 = 0x41,
GIF_A_D_REG_ALPHA_1 = 0x42,
GIF_A_D_REG_ALPHA_2 = 0x43,
GIF_A_D_REG_DIMX = 0x44,
GIF_A_D_REG_DTHE = 0x45,
GIF_A_D_REG_COLCLAMP = 0x46,
GIF_A_D_REG_TEST_1 = 0x47,
GIF_A_D_REG_TEST_2 = 0x48,
GIF_A_D_REG_PABE = 0x49,
GIF_A_D_REG_FBA_1 = 0x4a,
GIF_A_D_REG_FBA_2 = 0x4b,
GIF_A_D_REG_FRAME_1 = 0x4c,
GIF_A_D_REG_FRAME_2 = 0x4d,
GIF_A_D_REG_ZBUF_1 = 0x4e,
GIF_A_D_REG_ZBUF_2 = 0x4f,
GIF_A_D_REG_BITBLTBUF = 0x50,
GIF_A_D_REG_TRXPOS = 0x51,
GIF_A_D_REG_TRXREG = 0x52,
GIF_A_D_REG_TRXDIR = 0x53,
GIF_A_D_REG_HWREG = 0x54,
GIF_A_D_REG_SIGNAL = 0x60,
GIF_A_D_REG_FINISH = 0x61,
GIF_A_D_REG_LABEL = 0x62,
};
// In case we want to change to/from __fastcall for GIF register handlers:
#define __gifCall __fastcall
typedef void __gifCall FnType_GIFRegHandler(const u32* data);
typedef FnType_GIFRegHandler* GIFRegHandler;
extern FnType_GIFRegHandler GIFPackedRegHandlerNull;
extern FnType_GIFRegHandler GIFPackedRegHandlerRGBA;
extern FnType_GIFRegHandler GIFPackedRegHandlerSTQ;
extern FnType_GIFRegHandler GIFPackedRegHandlerUV;
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZF2;
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZ2;
extern FnType_GIFRegHandler GIFPackedRegHandlerFOG;
extern FnType_GIFRegHandler GIFPackedRegHandlerA_D;
extern FnType_GIFRegHandler GIFPackedRegHandlerNOP;
// These are unimplemented, and fall back on the non-packed versions.
extern FnType_GIFRegHandler GIFPackedRegHandlerPRIM;
template<u32 i>
extern FnType_GIFRegHandler GIFPackedRegHandlerTEX0;
template<u32 i>
extern FnType_GIFRegHandler GIFPackedRegHandlerCLAMP;
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZF3;
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZ3;
extern FnType_GIFRegHandler GIFRegHandlerNull;
extern FnType_GIFRegHandler GIFRegHandlerPRIM;
extern FnType_GIFRegHandler GIFRegHandlerRGBAQ;
extern FnType_GIFRegHandler GIFRegHandlerST;
extern FnType_GIFRegHandler GIFRegHandlerUV;
extern FnType_GIFRegHandler GIFRegHandlerXYZF2;
extern FnType_GIFRegHandler GIFRegHandlerXYZ2;
template<u32 i>
extern FnType_GIFRegHandler GIFRegHandlerTEX0;
template<u32 i>
extern FnType_GIFRegHandler GIFRegHandlerCLAMP;
extern FnType_GIFRegHandler GIFRegHandlerFOG;
extern FnType_GIFRegHandler GIFRegHandlerXYZF3;
extern FnType_GIFRegHandler GIFRegHandlerXYZ3;
extern FnType_GIFRegHandler GIFRegHandlerNOP;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerTEX1;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerTEX2;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerXYOFFSET;
extern FnType_GIFRegHandler GIFRegHandlerPRMODECONT;
extern FnType_GIFRegHandler GIFRegHandlerPRMODE;
extern FnType_GIFRegHandler GIFRegHandlerTEXCLUT;
extern FnType_GIFRegHandler GIFRegHandlerSCANMSK;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP1;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP2;
extern FnType_GIFRegHandler GIFRegHandlerTEXA;
extern FnType_GIFRegHandler GIFRegHandlerFOGCOL;
extern FnType_GIFRegHandler GIFRegHandlerTEXFLUSH;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerSCISSOR;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerALPHA;
extern FnType_GIFRegHandler GIFRegHandlerDIMX;
extern FnType_GIFRegHandler GIFRegHandlerDTHE;
extern FnType_GIFRegHandler GIFRegHandlerCOLCLAMP;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerTEST;
extern FnType_GIFRegHandler GIFRegHandlerPABE;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerFBA;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerFRAME;
template <u32 i>
extern FnType_GIFRegHandler GIFRegHandlerZBUF;
extern FnType_GIFRegHandler GIFRegHandlerBITBLTBUF;
extern FnType_GIFRegHandler GIFRegHandlerTRXPOS;
extern FnType_GIFRegHandler GIFRegHandlerTRXREG;
extern FnType_GIFRegHandler GIFRegHandlerTRXDIR;
extern FnType_GIFRegHandler GIFRegHandlerHWREG;
extern FnType_GIFRegHandler GIFRegHandlerSIGNAL;
extern FnType_GIFRegHandler GIFRegHandlerFINISH;
extern FnType_GIFRegHandler GIFRegHandlerLABEL;
// GifReg & GifPackedReg structs from GSdx, slightly modified
enum GS_ATST
{
ATST_NEVER = 0,
ATST_ALWAYS = 1,
ATST_LESS = 2,
ATST_LEQUAL = 3,
ATST_EQUAL = 4,
ATST_GEQUAL = 5,
ATST_GREATER = 6,
ATST_NOTEQUAL = 7,
};
enum GS_AFAIL
{
AFAIL_KEEP = 0,
AFAIL_FB_ONLY = 1,
AFAIL_ZB_ONLY = 2,
AFAIL_RGB_ONLY = 3,
};
// GIFReg
REG64_(GIFReg, ALPHA)
u32 A:2;
u32 B:2;
u32 C:2;
u32 D:2;
u32 _PAD1:24;
u32 FIX:8;
u32 _PAD2:24;
REG_END2
// opaque => output will be Cs/As
__forceinline bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == D && C == 2 && FIX == 0x80);}
__forceinline bool IsOpaque(int amin, int amax) const {return (A == B || amax == 0) && D == 0 || A == 0 && B == D && amin == 0x80 && amax == 0x80;}
REG_END2
REG64_(GIFReg, BITBLTBUF)
u32 SBP:14;
u32 _PAD1:2;
u32 SBW:6;
u32 _PAD2:2;
u32 SPSM:6;
u32 _PAD3:2;
u32 DBP:14;
u32 _PAD4:2;
u32 DBW:6;
u32 _PAD5:2;
u32 DPSM:6;
u32 _PAD6:2;
REG_END
REG64_(GIFReg, CLAMP)
union
{
struct
{
u32 WMS:2;
u32 WMT:2;
u32 MINU:10;
u32 MAXU:10;
u32 _PAD1:8;
u32 _PAD2:2;
u32 MAXV:10;
u32 _PAD3:20;
};
struct
{
u64 _PAD4:24;
u64 MINV:10;
u64 _PAD5:30;
};
};
REG_END
REG64_(GIFReg, COLCLAMP)
u32 CLAMP:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, DIMX)
s32 DM00:3;
s32 _PAD00:1;
s32 DM01:3;
s32 _PAD01:1;
s32 DM02:3;
s32 _PAD02:1;
s32 DM03:3;
s32 _PAD03:1;
s32 DM10:3;
s32 _PAD10:1;
s32 DM11:3;
s32 _PAD11:1;
s32 DM12:3;
s32 _PAD12:1;
s32 DM13:3;
s32 _PAD13:1;
s32 DM20:3;
s32 _PAD20:1;
s32 DM21:3;
s32 _PAD21:1;
s32 DM22:3;
s32 _PAD22:1;
s32 DM23:3;
s32 _PAD23:1;
s32 DM30:3;
s32 _PAD30:1;
s32 DM31:3;
s32 _PAD31:1;
s32 DM32:3;
s32 _PAD32:1;
s32 DM33:3;
s32 _PAD33:1;
REG_END
REG64_(GIFReg, DTHE)
u32 DTHE:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, FBA)
u32 FBA:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, FINISH)
u32 _PAD1:32;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, FOG)
u32 _PAD1:32;
u32 _PAD2:24;
u32 F:8;
REG_END
REG64_(GIFReg, FOGCOL)
u32 FCR:8;
u32 FCG:8;
u32 FCB:8;
u32 _PAD1:8;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, FRAME)
u32 FBP:9;
u32 _PAD1:7;
u32 FBW:6;
u32 _PAD2:2;
u32 PSM:6;
u32 _PAD3:2;
u32 FBMSK:32;
REG_END2
u32 Block() const {return FBP << 5;}
REG_END2
REG64_(GIFReg, HWREG)
u32 DATA_LOWER:32;
u32 DATA_UPPER:32;
REG_END
REG64_(GIFReg, LABEL)
u32 ID:32;
u32 IDMSK:32;
REG_END
REG64_(GIFReg, MIPTBP1)
u64 TBP1:14;
u64 TBW1:6;
u64 TBP2:14;
u64 TBW2:6;
u64 TBP3:14;
u64 TBW3:6;
u64 _PAD:4;
REG_END
REG64_(GIFReg, MIPTBP2)
u64 TBP4:14;
u64 TBW4:6;
u64 TBP5:14;
u64 TBW5:6;
u64 TBP6:14;
u64 TBW6:6;
u64 _PAD:4;
REG_END
REG64_(GIFReg, NOP)
u32 _PAD1:32;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, PABE)
u32 PABE:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, PRIM)
u32 PRIM:3;
u32 IIP:1;
u32 TME:1;
u32 FGE:1;
u32 ABE:1;
u32 AA1:1;
u32 FST:1;
u32 CTXT:1;
u32 FIX:1;
u32 _PAD1:21;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, PRMODE)
u32 _PRIM:3;
u32 IIP:1;
u32 TME:1;
u32 FGE:1;
u32 ABE:1;
u32 AA1:1;
u32 FST:1;
u32 CTXT:1;
u32 FIX:1;
u32 _PAD2:21;
u32 _PAD3:32;
REG_END
REG64_(GIFReg, PRMODECONT)
u32 AC:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, RGBAQ)
u32 R:8;
u32 G:8;
u32 B:8;
u32 A:8;
float Q;
REG_END
REG64_(GIFReg, SCANMSK)
u32 MSK:2;
u32 _PAD1:30;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, SCISSOR)
u32 SCAX0:11;
u32 _PAD1:5;
u32 SCAX1:11;
u32 _PAD2:5;
u32 SCAY0:11;
u32 _PAD3:5;
u32 SCAY1:11;
u32 _PAD4:5;
REG_END
REG64_(GIFReg, SIGNAL)
u32 ID:32;
u32 IDMSK:32;
REG_END
REG64_(GIFReg, ST)
float S;
float T;
REG_END
REG64_(GIFReg, TEST)
u32 ATE:1;
u32 ATST:3;
u32 AREF:8;
u32 AFAIL:2;
u32 DATE:1;
u32 DATM:1;
u32 ZTE:1;
u32 ZTST:2;
u32 _PAD1:13;
u32 _PAD2:32;
REG_END2
__forceinline bool DoFirstPass() {return !ATE || ATST != ATST_NEVER;} // not all pixels fail automatically
__forceinline bool DoSecondPass() {return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP;} // pixels may fail, write fb/z
__forceinline bool NoSecondPass() {return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP;} // pixels may fail, no output
REG_END2
REG64_(GIFReg, TEX0)
union
{
struct
{
u32 TBP0:14;
u32 TBW:6;
u32 PSM:6;
u32 TW:4;
u32 _PAD1:2;
u32 _PAD2:2;
u32 TCC:1;
u32 TFX:2;
u32 CBP:14;
u32 CPSM:4;
u32 CSM:1;
u32 CSA:5;
u32 CLD:3;
};
struct
{
u64 _PAD3:30;
u64 TH:4;
u64 _PAD4:30;
};
};
REG_END2
__forceinline bool IsRepeating() {return (u32)((u32)1 << TW) > (u32)(TBW << (u32)6);}
REG_END2
REG64_(GIFReg, TEX1)
u32 LCM:1;
u32 _PAD1:1;
u32 MXL:3;
u32 MMAG:1;
u32 MMIN:3;
u32 MTBA:1;
u32 _PAD2:9;
u32 L:2;
u32 _PAD3:11;
s32 K:12; // 1:7:4
u32 _PAD4:20;
REG_END2
bool IsMinLinear() const {return (MMIN == 1) || (MMIN & 4);}
bool IsMagLinear() const {return MMAG;}
REG_END2
REG64_(GIFReg, TEX2)
u32 _PAD1:20;
u32 PSM:6;
u32 _PAD2:6;
u32 _PAD3:5;
u32 CBP:14;
u32 CPSM:4;
u32 CSM:1;
u32 CSA:5;
u32 CLD:3;
REG_END
REG64_(GIFReg, TEXA)
u32 TA0:8;
u32 _PAD1:7;
u32 AEM:1;
u32 _PAD2:16;
u32 TA1:8;
u32 _PAD3:24;
REG_END
REG64_(GIFReg, TEXCLUT)
u32 CBW:6;
u32 COU:6;
u32 COV:10;
u32 _PAD1:10;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, TEXFLUSH)
u32 _PAD1:32;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, TRXDIR)
u32 XDIR:2;
u32 _PAD1:30;
u32 _PAD2:32;
REG_END
REG64_(GIFReg, TRXPOS)
u32 SSAX:11;
u32 _PAD1:5;
u32 SSAY:11;
u32 _PAD2:5;
u32 DSAX:11;
u32 _PAD3:5;
u32 DSAY:11;
u32 DIRY:1;
u32 DIRX:1;
u32 _PAD4:3;
REG_END
REG64_(GIFReg, TRXREG)
u32 RRW:12;
u32 _PAD1:20;
u32 RRH:12;
u32 _PAD2:20;
REG_END
REG64_(GIFReg, UV)
u32 U:14;
u32 _PAD1:2;
u32 V:14;
u32 _PAD2:2;
u32 _PAD3:32;
REG_END
REG64_(GIFReg, XYOFFSET)
u32 OFX:16;
u32 _PAD1:16;
u32 OFY:16;
u32 _PAD2:16;
REG_END
REG64_(GIFReg, XYZ)
u32 X:16;
u32 Y:16;
u32 Z:32;
REG_END
REG64_(GIFReg, XYZF)
u32 X:16;
u32 Y:16;
u32 Z:24;
u32 F:8;
REG_END
REG64_(GIFReg, ZBUF)
u32 ZBP:9;
u32 _PAD1:15;
// u32 PSM:4;
// u32 _PAD2:4;
u32 PSM:6;
u32 _PAD2:2;
u32 ZMSK:1;
u32 _PAD3:31;
REG_END2
u32 Block() const {return ZBP << 5;}
REG_END2
REG64_SET(GIFReg)
GIFRegALPHA ALPHA;
GIFRegBITBLTBUF BITBLTBUF;
GIFRegCLAMP CLAMP;
GIFRegCOLCLAMP COLCLAMP;
GIFRegDIMX DIMX;
GIFRegDTHE DTHE;
GIFRegFBA FBA;
GIFRegFINISH FINISH;
GIFRegFOG FOG;
GIFRegFOGCOL FOGCOL;
GIFRegFRAME FRAME;
GIFRegHWREG HWREG;
GIFRegLABEL LABEL;
GIFRegMIPTBP1 MIPTBP1;
GIFRegMIPTBP2 MIPTBP2;
GIFRegNOP NOP;
GIFRegPABE PABE;
GIFRegPRIM PRIM;
GIFRegPRMODE PRMODE;
GIFRegPRMODECONT PRMODECONT;
GIFRegRGBAQ RGBAQ;
GIFRegSCANMSK SCANMSK;
GIFRegSCISSOR SCISSOR;
GIFRegSIGNAL SIGNAL;
GIFRegST ST;
GIFRegTEST TEST;
GIFRegTEX0 TEX0;
GIFRegTEX1 TEX1;
GIFRegTEX2 TEX2;
GIFRegTEXA TEXA;
GIFRegTEXCLUT TEXCLUT;
GIFRegTEXFLUSH TEXFLUSH;
GIFRegTRXDIR TRXDIR;
GIFRegTRXPOS TRXPOS;
GIFRegTRXREG TRXREG;
GIFRegUV UV;
GIFRegXYOFFSET XYOFFSET;
GIFRegXYZ XYZ;
GIFRegXYZF XYZF;
GIFRegZBUF ZBUF;
REG_SET_END
// GIFPacked
REG128_(GIFPacked, PRIM)
u32 PRIM:11;
u32 _PAD1:21;
u32 _PAD2:32;
u32 _PAD3:32;
u32 _PAD4:32;
REG_END
REG128_(GIFPacked, RGBA)
u32 R:8;
u32 _PAD1:24;
u32 G:8;
u32 _PAD2:24;
u32 B:8;
u32 _PAD3:24;
u32 A:8;
u32 _PAD4:24;
REG_END
REG128_(GIFPacked, STQ)
float S;
float T;
float Q;
u32 _PAD1:32;
REG_END
REG128_(GIFPacked, UV)
u32 U:14;
u32 _PAD1:18;
u32 V:14;
u32 _PAD2:18;
u32 _PAD3:32;
u32 _PAD4:32;
REG_END
REG128_(GIFPacked, XYZF2)
u32 X:16;
u32 _PAD1:16;
u32 Y:16;
u32 _PAD2:16;
u32 _PAD3:4;
u32 Z:24;
u32 _PAD4:4;
u32 _PAD5:4;
u32 F:8;
u32 _PAD6:3;
u32 ADC:1;
u32 _PAD7:16;
REG_END
REG128_(GIFPacked, XYZ2)
u32 X:16;
u32 _PAD1:16;
u32 Y:16;
u32 _PAD2:16;
u32 Z:32;
u32 _PAD3:15;
u32 ADC:1;
u32 _PAD4:16;
REG_END
REG128_(GIFPacked, FOG)
u32 _PAD1:32;
u32 _PAD2:32;
u32 _PAD3:32;
u32 _PAD4:4;
u32 F:8;
u32 _PAD5:20;
REG_END
REG128_(GIFPacked, A_D)
u64 DATA:64;
u32 ADDR:8; // enum GIF_A_D_REG
u32 _PAD1:24;
u32 _PAD2:32;
REG_END
REG128_(GIFPacked, NOP)
u32 _PAD1:32;
u32 _PAD2:32;
u32 _PAD3:32;
u32 _PAD4:32;
REG_END
REG128_SET(GIFPackedReg)
GIFReg r;
GIFPackedPRIM PRIM;
GIFPackedRGBA RGBA;
GIFPackedSTQ STQ;
GIFPackedUV UV;
GIFPackedXYZF2 XYZF2;
GIFPackedXYZ2 XYZ2;
GIFPackedFOG FOG;
GIFPackedA_D A_D;
GIFPackedNOP NOP;
REG_SET_END
REG64_(GSReg, BGCOLOR)
u32 R:8;
u32 G:8;
u32 B:8;
u32 _PAD1:8;
u32 _PAD2:32;
REG_END
REG64_(GSReg, BUSDIR)
u32 DIR:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
REG64_(GSReg, CSR)
u32 SIGNAL:1;
u32 FINISH:1;
u32 HSINT:1;
u32 VSINT:1;
u32 EDWINT:1;
u32 ZERO1:1;
u32 ZERO2:1;
u32 _PAD1:1;
u32 FLUSH:1;
u32 RESET:1;
u32 _PAD2:2;
u32 NFIELD:1;
u32 FIELD:1;
u32 FIFO:2;
u32 REV:8;
u32 ID:8;
u32 _PAD3:32;
REG_END
REG64_(GSReg, DISPFB) // (-1/2)
u32 FBP:9;
u32 FBW:6;
u32 PSM:5;
u32 _PAD:12;
u32 DBX:11;
u32 DBY:11;
u32 _PAD2:10;
REG_END
REG64_(GSReg, DISPLAY) // (-1/2)
u32 DX:12;
u32 DY:11;
u32 MAGH:4;
u32 MAGV:2;
u32 _PAD:3;
u32 DW:12;
u32 DH:11;
u32 _PAD2:9;
REG_END
REG64_(GSReg, EXTBUF)
u32 EXBP:14;
u32 EXBW:6;
u32 FBIN:2;
u32 WFFMD:1;
u32 EMODA:2;
u32 EMODC:2;
u32 _PAD1:5;
u32 WDX:11;
u32 WDY:11;
u32 _PAD2:10;
REG_END
REG64_(GSReg, EXTDATA)
u32 SX:12;
u32 SY:11;
u32 SMPH:4;
u32 SMPV:2;
u32 _PAD1:3;
u32 WW:12;
u32 WH:11;
u32 _PAD2:9;
REG_END
REG64_(GSReg, EXTWRITE)
u32 WRITE;
u32 _PAD2:32;
REG_END
REG64_(GSReg, IMR)
u32 _PAD1:8;
u32 SIGMSK:1;
u32 FINISHMSK:1;
u32 HSMSK:1;
u32 VSMSK:1;
u32 EDWMSK:1;
u32 _PAD2:19;
u32 _PAD3:32;
REG_END
REG64_(GSReg, PMODE)
u32 EN1:1;
u32 EN2:1;
u32 CRTMD:3;
u32 MMOD:1;
u32 AMOD:1;
u32 SLBG:1;
u32 ALP:8;
u32 _PAD:16;
u32 _PAD1:32;
REG_END
REG64_(GSReg, SIGLBLID)
u32 SIGID:32;
u32 LBLID:32;
REG_END
REG64_(GSReg, SMODE1)
u32 RC:3;
u32 LC:7;
u32 T1248:2;
u32 SLCK:1;
u32 CMOD:2;
u32 EX:1;
u32 PRST:1;
u32 SINT:1;
u32 XPCK:1;
u32 PCK2:2;
u32 SPML:4;
u32 GCONT:1;
u32 PHS:1;
u32 PVS:1;
u32 PEHS:1;
u32 PEVS:1;
u32 CLKSEL:2;
u32 NVCK:1;
u32 SLCK2:1;
u32 VCKSEL:2;
u32 VHP:1;
u32 _PAD1:27;
REG_END
REG64_(GSReg, SMODE2)
u32 INT:1;
u32 FFMD:1;
u32 DPMS:2;
u32 _PAD2:28;
u32 _PAD3:32;
REG_END
REG64_(GSReg, SIGBLID)
u32 SIGID;
u32 LBLID;
REG_END
#define PMODE ((GSRegPMODE*)(g_pBasePS2Mem+0x0000))
#define SMODE1 ((GSRegSMODE1*)(g_pBasePS2Mem+0x0010))
#define SMODE2 ((GSRegSMODE2*)(g_pBasePS2Mem+0x0020))
// SRFSH
#define SYNCH1 ((GSRegSYNCH1*)(g_pBasePS2Mem+0x0040))
#define SYNCH2 ((GSRegSYNCH2*)(g_pBasePS2Mem+0x0050))
#define SYNCV ((GSRegSYNCV*)(g_pBasePS2Mem+0x0060))
#define DISPFB1 ((GSRegDISPFB*)(g_pBasePS2Mem+0x0070))
#define DISPLAY1 ((GSRegDISPLAY*)(g_pBasePS2Mem+0x0080))
#define DISPFB2 ((GSRegDISPFB*)(g_pBasePS2Mem+0x0090))
#define DISPLAY2 ((GSRegDISPLAY*)(g_pBasePS2Mem+0x00a0))
#define EXTBUF ((GSRegEXTBUF*)(g_pBasePS2Mem+0x00b0))
#define EXTDATA ((GSRegEXTDATA*)(g_pBasePS2Mem+0x00c0))
#define EXTWRITE ((GSRegEXTWRITE*)(g_pBasePS2Mem+0x00d0))
#define BGCOLOR ((GSRegBGCOLOR*)(g_pBasePS2Mem+0x00e0))
#define CSR ((GSRegCSR*)(g_pBasePS2Mem+0x1000))
#define IMR ((GSRegIMR*)(g_pBasePS2Mem+0x1010))
#define BUSDIR ((GSRegBUSDIR*)(g_pBasePS2Mem+0x1040))
#define SIGLBLID ((GSRegSIGBLID*)(g_pBasePS2Mem+0x1080))
//
// sps2tags.h
//
#define GET_GIF_REG(tag, reg) \
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
#define GET_GSFPS (((SMODE1->CMOD&1) ? 50 : 60) / (SMODE2->INT ? 1 : 2))
extern void WriteTempRegs();
extern void SetFrameSkip(bool skip);
extern void ResetRegs();
extern void SetTexFlush();
extern void SetFogColor(u32 fog);
extern void SetFogColor(GIFRegFOGCOL* fog);
extern bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op
// flush current vertices, call before setting new registers (the main render method)
void Flush(int context);
void FlushBoth();
// called on a primitive switch
void Prim();
#endif
#endif // NEWREGS_H_INCLUDED

View File

@ -26,7 +26,6 @@
#include "ZZoglVB.h"
#include "ZZoglDrawing.h"
#ifdef USE_OLD_REGS
#ifdef _MSC_VER
#pragma warning(disable:4244)
@ -157,6 +156,9 @@ void __gifCall GIFPackedRegHandlerFOG(const u32* data)
void __gifCall GIFPackedRegHandlerA_D(const u32* data)
{
FUNCLOG
// GIFPackedA_D* r = (GIFPackedA_D*)(data);
//
// g_GIFRegHandlers[r->ADDR](data);
if ((data[2] & 0xff) < 100)
g_GIFRegHandlers[data[2] & 0xff](data);
@ -188,21 +190,20 @@ void __gifCall GIFRegHandlerNull(const u32* data)
void __gifCall GIFRegHandlerPRIM(const u32 *data)
{
FUNCLOG
GIFRegPRIM* r = (GIFRegPRIM*)(data);
//if (data[0] & ~0x3ff)
//{
//ZZLog::Warn_Log("Warning: unknown bits in prim %8.8lx_%8.8lx", data[1], data[0]);
//}
gs.primC = 0;
u16 prim_type = (data[0]) & 0x7;
prim->prim = prim_type;
gs._prim[0].prim = prim_type;
gs._prim[1].prim = prim_type;
gs._prim[1]._val = (data[0] >> 3) & 0xff;
prim->prim = r->PRIM;
gs._prim[0].prim = r->PRIM;
gs._prim[1].prim = r->PRIM;
gs._prim[1]._val = (data[0] >> 3) & 0xff; // Setting the next 8 flags after prim at once.
gs.new_tri_fan = !(prim_type ^ PRIM_TRIANGLE_FAN);
gs.new_tri_fan = !(r->PRIM ^ PRIM_TRIANGLE_FAN);
ZZKick->DirtyValidPrevPrim();
Prim();
@ -211,6 +212,10 @@ void __gifCall GIFRegHandlerPRIM(const u32 *data)
void __gifCall GIFRegHandlerRGBAQ(const u32* data)
{
FUNCLOG
// GIFRegRGBAQ* r = (GIFRegRGBAQ*)(data);
// gs.rgba = (r->R | (r->G << 8) | (r->B << 16) | (r->A << 24));
// gs.vertexregs.rgba = gs.rgba;
// gs.vertexregs.q = r->Q;
gs.rgba = data[0];
gs.vertexregs.rgba = data[0];
*(u32*)&gs.vertexregs.q = data[1];
@ -219,6 +224,9 @@ void __gifCall GIFRegHandlerRGBAQ(const u32* data)
void __gifCall GIFRegHandlerST(const u32* data)
{
FUNCLOG
// GIFRegST* r = (GIFRegST*)(data);
// gs.vertexregs.s = r->S;
// gs.vertexregs.t = r->T;
*(u32*)&gs.vertexregs.s = data[0] & 0xffffff00;
*(u32*)&gs.vertexregs.t = data[1] & 0xffffff00;
//*(u32*)&gs.q = data[2];
@ -445,7 +453,10 @@ void __gifCall GIFRegHandlerXYOFFSET(const u32* data)
void __gifCall GIFRegHandlerPRMODECONT(const u32* data)
{
FUNCLOG
gs.prac = data[0] & 0x1;
// Turns all the text into colored blocks on the initial Mana Khemia dialog if not run.
GIFRegPRMODECONT* r = (GIFRegPRMODECONT*)(data);
// gs.prac = data[0] & 0x1;
gs.prac = r->AC;
prim = &gs._prim[gs.prac];
Prim();
@ -468,6 +479,7 @@ void __gifCall GIFRegHandlerTEXCLUT(const u32* data)
vb[0].FlushTexData();
vb[1].FlushTexData();
//Fixme
gs.clut.cbw = r->CBW << 6;
gs.clut.cou = r->COU << 4;
gs.clut.cov = r->COV;
@ -477,9 +489,6 @@ void __gifCall GIFRegHandlerSCANMSK(const u32* data)
{
FUNCLOG
GIFRegSCANMSK* r = (GIFRegSCANMSK*)(data);
// FlushBoth();
// ResolveC(&vb[0]);
// ResolveZ(&vb[0]);
gs.smask = r->MSK;
REG_LOG("Scanmsk == %d", gs.smask);
@ -534,23 +543,20 @@ void __gifCall GIFRegHandlerMIPTBP2(const u32* data)
void __gifCall GIFRegHandlerTEXA(const u32* data)
{
FUNCLOG
texaInfo newinfo;
newinfo.aem = (data[0] >> 15) & 0x1;
newinfo.ta[0] = data[0] & 0xff;
newinfo.ta[1] = data[1] & 0xff;
if (*(u32*)&newinfo != *(u32*)&gs.texa)
// Background of initial Mana Khemia dialog.
GIFRegTEXA* r = (GIFRegTEXA*)(data);
if ((r->AEM != gs.texa.aem) || (r->TA0 != gs.texa.ta[0]) || (r->TA1 != gs.texa.ta[1]))
{
FlushBoth();
*(u32*)&gs.texa = *(u32*) & newinfo;
gs.texa.fta[0] = newinfo.ta[0] / 255.0f;
gs.texa.fta[1] = newinfo.ta[1] / 255.0f;
vb[0].bTexConstsSync = false;
vb[1].bTexConstsSync = false;
}
gs.texa.aem = r->AEM;
gs.texa.ta[0] = r->TA0;
gs.texa.ta[1] = r->TA1;
}
void __gifCall GIFRegHandlerFOGCOL(const u32* data)
@ -564,6 +570,7 @@ void __gifCall GIFRegHandlerFOGCOL(const u32* data)
void __gifCall GIFRegHandlerTEXFLUSH(const u32* data)
{
FUNCLOG
// Not actually handled by GSDX.
SetTexFlush();
}
@ -597,6 +604,12 @@ void __gifCall GIFRegHandlerALPHA(const u32* data)
{
FUNCLOG
alphaInfo newalpha;
// newalpha.a = r->A;
// newalpha.b = r->B;
// newalpha.c = r->C;
// newalpha.d = r->D;
// newalpha.fix = r->FIX;
newalpha.abcd = *(u8*)data;
newalpha.fix = *(u8*)(data + 1);
@ -610,9 +623,11 @@ void __gifCall GIFRegHandlerALPHA(const u32* data)
if (newalpha.d == 3) newalpha.d = 0;
*(u16*)&vb[ctxt].alpha = *(u16*) & newalpha;
// vb[i].alpha = newalpha;
}
}
// DIMX & DTHE are both for dithering, and not currently implemented.
void __gifCall GIFRegHandlerDIMX(const u32* data)
{
FUNCLOG
@ -647,14 +662,18 @@ template <u32 ctxt>
void __gifCall GIFRegHandlerTEST(const u32* data)
{
FUNCLOG
// GIFRegTEST* r = (GIFRegTEST*)(data);
pixTest* test = &vb[ctxt].test;
if ((*(u32*)test & 0x0007ffff) == (data[0] & 0x0007ffff)) return;
//if (test->_val != r->ai32[0])
if ((*(u32*)test & 0x0007ffff) != (data[0] & 0x0007ffff))
{
Flush(ctxt);
Flush(ctxt);
*(u32*)test = data[0];
*(u32*)test = data[0];
//test->_val = r->ai32[0];
}
// test.ate = (data[0] ) & 0x1;
// test.atst = (data[0] >> 1) & 0x7;
@ -688,6 +707,48 @@ void __gifCall GIFRegHandlerFBA(const u32* data)
vb[ctxt].fba.fba = r->FBA;
}
/*
template<u32 i>
void __gifCall GIFRegHandlerFRAME(const u32* data)
{
FUNCLOG
// Affects opening dialogs, movie, and menu on Mana Khemia.
GIFRegFRAME* r = (GIFRegFRAME*)(data);
frameInfo& gsfb = vb[i].gsfb;
int fbw = r->FBW * 64;
int fbp = r->FBP * 32;
int fbh = 0;
if (gs.dthe != 0)
{
// Dither here.
//ZZLog::Error_Log("frameWrite: Dither!");
}
if ((gsfb.fbp == fbp) &&
(gsfb.fbw == fbw) &&
(gsfb.psm == r->PSM) &&
(gsfb.fbm == ZZOglGet_fbm_FrameBitsFix(data[0], data[1])))
{
return;
}
FlushBoth();
if (r->FBW > 0) fbh = ZZOgl_fbh_Calc(r->FBP, r->FBW, r->PSM);
gsfb.fbp = fbp;
gsfb.fbw = fbw;
gsfb.psm = r->PSM;
gsfb.fbh = fbh;
gsfb.fbm = ZZOglGet_fbm_FrameBitsFix(data[0], data[1]);
vb[i].bNeedFrameCheck = 1;
ZZLog::Greg_Log("FRAME_%d", i);
}*/
template <u32 ctxt>
void __gifCall GIFRegHandlerFRAME(const u32* data)
{
@ -715,6 +776,41 @@ void __gifCall GIFRegHandlerFRAME(const u32* data)
vb[ctxt].bNeedFrameCheck = 1;
}
/*
template <u32 i>
void __gifCall GIFRegHandlerZBUF(const u32* data)
{
FUNCLOG
// I'll wait a bit on this one.
GIFRegZBUF* r = (GIFRegZBUF*)(data);
ZZLog::Greg_Log("ZBUF_1");
zbufInfo& zbuf = vb[i].zbuf;
int psm = (0x30 | r->PSM);
int zbp = r->ZBP * 32;
if (zbuf.zbp == zbp &&
zbuf.psm == psm &&
zbuf.zmsk == r->ZMSK)
{
return;
}
// error detection
if (m_Blocks[psm].bpp == 0) return;
FlushBoth();
zbuf.zbp = zbp;
zbuf.psm = psm;
zbuf.zmsk = r->ZMSK;
vb[i].zprimmask = 0xffffffff;
if (zbuf.psm > 0x31) vb[i].zprimmask = 0xffff;
vb[i].bNeedZCheck = 1;
}*/
template <u32 ctxt>
void __gifCall GIFRegHandlerZBUF(const u32* data)
{
@ -758,6 +854,17 @@ void __gifCall GIFRegHandlerBITBLTBUF(const u32* data)
gs.dstbufnew.psm = r->DPSM;
if (gs.dstbufnew.bw == 0) gs.dstbufnew.bw = 64;
// GSdx does this:
/*if((gs.srcbufnew.bw & 1) && (gs.srcbufnew.psm == PSM_PSMT8 || gs.srcbufnew.psm == PSM_PSMT4))
{
gs.srcbufnew.bw &= ~1;
}
if((gs.dstbufnew.bw & 1) && (gs.dstbufnew.psm == PSM_PSMT8 || gs.dstbufnew.psm == PSM_PSMT4))
{
gs.dstbufnew.bw &= ~1; // namcoXcapcom: 5, 11, refered to as 4, 10 in TEX0.TBW later
}*/
}
void __gifCall GIFRegHandlerTRXPOS(const u32* data)
@ -777,53 +884,44 @@ void __gifCall GIFRegHandlerTRXREG(const u32* data)
{
FUNCLOG
GIFRegTRXREG* r = (GIFRegTRXREG*)(data);
gs.imageWtemp = r->RRW;
gs.imageHtemp = r->RRH;
gs.imageTemp.w = r->RRW;
gs.imageTemp.h = r->RRH;
}
void __gifCall GIFRegHandlerTRXDIR(const u32* data)
{
FUNCLOG
// terminate any previous transfers
switch (gs.imageTransfer)
{
case 0: // host->loc
TerminateHostLocal();
break;
case 1: // loc->host
TerminateLocalHost();
break;
}
GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data);
gs.srcbuf = gs.srcbufnew;
gs.dstbuf = gs.dstbufnew;
gs.imageNew.w = gs.imageTemp.w;
gs.imageNew.h = gs.imageTemp.h;
gs.trxpos = gs.trxposnew;
gs.imageTransfer = data[0] & 0x3;
gs.imageWnew = gs.imageWtemp;
gs.imageHnew = gs.imageHtemp;
gs.imageTransfer = r->XDIR;
gs.transferring = true;
if (gs.imageWnew > 0 && gs.imageHnew > 0)
if (gs.imageNew.w > 0 && gs.imageNew.h > 0)
{
switch (gs.imageTransfer)
{
case 0: // host->loc
case XFER_HOST_TO_LOCAL: // host->loc
InitTransferHostLocal();
break;
case 1: // loc->host
case XFER_LOCAL_TO_HOST: // loc->host
InitTransferLocalHost();
break;
case 2:
case XFER_LOCAL_TO_LOCAL:
TransferLocalLocal();
break;
case 3:
gs.imageTransfer = -1;
case XFER_DEACTIVATED:
ZZLog::WriteLn("Image Transfer = 3?");
gs.transferring = false;
break;
default:
@ -833,9 +931,9 @@ void __gifCall GIFRegHandlerTRXDIR(const u32* data)
else
{
#if defined(ZEROGS_DEVBUILD)
ZZLog::Warn_Log("Dummy transfer.");
//ZZLog::Warn_Log("Dummy transfer.");
#endif
gs.imageTransfer = -1;
gs.transferring = false;
}
}
@ -843,7 +941,7 @@ void __gifCall GIFRegHandlerHWREG(const u32* data)
{
FUNCLOG
if (gs.imageTransfer == 0)
if (gs.transferring && gs.imageTransfer == XFER_HOST_TO_LOCAL)
{
TransferHostLocal(data, 2);
}
@ -866,14 +964,9 @@ void __gifCall GIFRegHandlerSIGNAL(const u32* data)
{
SIGLBLID->SIGID = (SIGLBLID->SIGID & ~data[1]) | (data[0] & data[1]);
// if (gs.CSRw & 0x1) CSR->SIGNAL = 1;
// if (!IMR->SIGMSK && GSirq)
// GSirq();
if (gs.CSRw & 0x1)
{
CSR->SIGNAL = 1;
//gs.CSRw &= ~1;
}
if (!IMR->SIGMSK && GSirq) GSirq();
@ -889,17 +982,6 @@ void __gifCall GIFRegHandlerFINISH(const u32* data)
if (gs.CSRw & 0x2) CSR->FINISH = 1;
if (!IMR->FINISHMSK && GSirq) GSirq();
// if( gs.CSRw & 2 ) {
// //gs.CSRw &= ~2;
// //CSR->FINISH = 0;
//
//
// }
// CSR->FINISH = 1;
//
// if( !IMR->FINISHMSK && GSirq )
// GSirq();
}
}
@ -913,7 +995,6 @@ void __gifCall GIFRegHandlerLABEL(const u32* data)
}
}
void SetMultithreaded()
{
// Some older versions of PCSX2 didn't properly set the irq callback to NULL
@ -1077,4 +1158,3 @@ void SetFrameSkip(bool skip)
}
}
#endif

View File

@ -20,9 +20,6 @@
#ifndef __GSREGS_H__
#define __GSREGS_H__
#ifdef USE_OLD_REGS
enum GIF_REG
{
GIF_REG_PRIM = 0x00,
@ -193,7 +190,7 @@ enum GS_ATST
ATST_EQUAL = 4,
ATST_GEQUAL = 5,
ATST_GREATER = 6,
ATST_NOTEQUAL = 7,
ATST_NOTEQUAL = 7
};
enum GS_AFAIL
@ -201,9 +198,24 @@ enum GS_AFAIL
AFAIL_KEEP = 0,
AFAIL_FB_ONLY = 1,
AFAIL_ZB_ONLY = 2,
AFAIL_RGB_ONLY = 3,
AFAIL_RGB_ONLY = 3
};
enum GS_TFX
{
TFX_MODULATE = 0,
TFX_DECAL = 1,
TFX_HIGHLIGHT = 2,
TFX_HIGHLIGHT2 = 3
};
enum GS_CLAMP
{
CLAMP_REPEAT = 0,
CLAMP_CLAMP = 1,
CLAMP_REGION_CLAMP = 2,
CLAMP_REGION_REPEAT = 3
};
// GIFReg
REG64_(GIFReg, ALPHA)
@ -763,7 +775,8 @@ REG128_SET(GIFPackedReg)
GIFPackedNOP NOP;
REG_SET_END
// This register stores the background color. Theoretically it'd get blended with the image in some cases, but we don't appear to be
// using it. See PMODE->SLBG. GSDx *is* using it.
REG64_(GSReg, BGCOLOR)
u32 R:8;
u32 G:8;
@ -772,12 +785,15 @@ REG64_(GSReg, BGCOLOR)
u32 _PAD2:32;
REG_END
// This register switches the direction of Fifo. 0 - Host -> Local; 1 - Local -> Host. Fifo is supposed to be empty at the time.
// Unchecked by GSdx or ZZOgl.
REG64_(GSReg, BUSDIR)
u32 DIR:1;
u32 _PAD1:31;
u32 _PAD2:32;
REG_END
// Mostly looks handled by pcsx2.
REG64_(GSReg, CSR)
u32 SIGNAL:1;
u32 FINISH:1;
@ -798,6 +814,12 @@ REG64_(GSReg, CSR)
u32 _PAD3:32;
REG_END
// Settings for whichever circuit we're using. (Again, see PMODE.)
// -- FBP - Frame Buffer Pointer. address / 2048.
// -- FBW - Frame Buffer Width. width / 64.
// -- PSM - psm, but 5 bit. 0 - PSMCT32; 1 - PSMCT24; 2 - PSMCT16; 10 - PSMCT16S; 18 - PS-GPU24?
// -- DBX - Upper left x coords of rectangle.
// -- DBY - Upper left y coords of rectangle.
REG64_(GSReg, DISPFB) // (-1/2)
u32 FBP:9;
u32 FBW:6;
@ -808,6 +830,14 @@ REG64_(GSReg, DISPFB) // (-1/2)
u32 _PAD2:10;
REG_END
// Settings for whichever display we're using.
// -- DX - X position in the display area.
// -- DY - Y position in the display area.
// -- MAGH - Horizontal Magnification; x1 - x16.
// -- MAGV - Vertical Magnification; x1 - x16.
// -- DW - Display Area Width - 1.
// -- DH - Display Area Height - 1.
REG64_(GSReg, DISPLAY) // (-1/2)
u32 DX:12;
u32 DY:11;
@ -819,6 +849,16 @@ REG64_(GSReg, DISPLAY) // (-1/2)
u32 _PAD2:9;
REG_END
// This register has settings for the frame buffer when writing back. These next three registers are unused in ZZOgl & GSDx.
// -- EXBP - Base pointer of the buffer / 64.
// -- EXBW - Width of the buffer / 64.
// -- FBIN - Whether we use OUT1 or OUT2. 0 - 1; 1 - 2.
// -- WFFMD - Interlace Mode; 0 - Field; 1 - Frame.
// -- EMODA - When processing an input alpha value; 0 - write it as is; 1 Convert from RGB to luminence value Y. 2 - Same as 1, only /2. 3 - 0.
// -- EMODC - When processing an input color value; 0 - write it as is; 1 Convert from RGB to luminence value Y. 2 - Convert to YCbCr. 3 - Write Alpha to RGB.
// -- WDX - X coords.
// -- WDY - Y coords.
REG64_(GSReg, EXTBUF)
u32 EXBP:14;
u32 EXBW:6;
@ -832,6 +872,14 @@ REG64_(GSReg, EXTBUF)
u32 _PAD2:10;
REG_END
// Sets where you read when the write above is performed.
// -- SX - X coords.
// -- SX - Y coords.
// -- SMPH - Horiz Sampling rate.
// -- SMPV - Vert Sampling rate.
// -- WW - Rect Width - 1
// -- WH - Rect Height - 1
REG64_(GSReg, EXTDATA)
u32 SX:12;
u32 SY:11;
@ -843,11 +891,13 @@ REG64_(GSReg, EXTDATA)
u32 _PAD2:9;
REG_END
// Starts or stops the aforementioned write.
REG64_(GSReg, EXTWRITE)
u32 WRITE;
u32 _PAD2:32;
REG_END
// Pcsx2 handles this.
REG64_(GSReg, IMR)
u32 _PAD1:8;
u32 SIGMSK:1;
@ -859,6 +909,16 @@ REG64_(GSReg, IMR)
u32 _PAD3:32;
REG_END
// The fields of PMODE are:
// -- EN1 - Read Circuit 1; 0 - off, 1 - on.
// -- EN2 - Read Circuit 2; 0 - off, 1 - on.
// -- CRTMD - Always 1.
// -- MMOD - For Alpha blending, the selection is: 0 - The Alpha value of circuit 1, 1 - The ALP register value.
// -- AMOD - The OUT1 Alpha value selection: 0 - Read circuit 1, 1 - Read Circuit 2.
// -- SLBG - The Alpha blending type: 0 - blended with the output of Read circuit 1, 1 - blended with the background color.
// -- ALP - The fixed Alpha value.
//
REG64_(GSReg, PMODE)
u32 EN1:1;
u32 EN2:1;
@ -871,11 +931,13 @@ REG64_(GSReg, PMODE)
u32 _PAD1:32;
REG_END
// Pcsx2 handles this.
REG64_(GSReg, SIGLBLID)
u32 SIGID:32;
u32 LBLID:32;
REG_END
// Not sure about this one...
REG64_(GSReg, SMODE1)
u32 RC:3;
u32 LC:7;
@ -901,6 +963,11 @@ REG64_(GSReg, SMODE1)
u32 _PAD1:27;
REG_END
// The fields of SMODE2 are:
// -- INT - 0 for non-interlaced; 1 for interlaced.
// -- FFMD - 0 for field mode (read every other line); 1 for frame mode (read every line)
// -- DPMS - VESA DPMS mode setting; 0 - on, 1 - standby, 2 - suspend, 3 - off.
//
REG64_(GSReg, SMODE2)
u32 INT:1;
u32 FFMD:1;
@ -914,6 +981,8 @@ REG64_(GSReg, SIGBLID)
u32 LBLID;
REG_END
extern u8* g_pBasePS2Mem;
#define PMODE ((GSRegPMODE*)(g_pBasePS2Mem+0x0000))
#define SMODE1 ((GSRegSMODE1*)(g_pBasePS2Mem+0x0010))
#define SMODE2 ((GSRegSMODE2*)(g_pBasePS2Mem+0x0020))
@ -942,6 +1011,21 @@ REG_END
#define GET_GSFPS (((SMODE1->CMOD&1) ? 50 : 60) / (SMODE2->INT ? 1 : 2))
static __forceinline GSRegDISPLAY* Display_Reg(int circuit)
{
return (circuit) ? DISPLAY2 : DISPLAY1;
}
static __forceinline GSRegDISPFB* Dispfb_Reg(int circuit)
{
return (circuit) ? DISPFB2 : DISPFB1;
}
static __forceinline bool Circuit_Enabled(int circuit)
{
return (circuit) ? PMODE->EN2 : PMODE->EN1;
}
extern void WriteTempRegs();
extern void SetFrameSkip(bool skip);
extern void ResetRegs();
@ -958,7 +1042,4 @@ void FlushBoth();
// called on a primitive switch
void Prim();
#else
#include "NewRegs.h"
#endif
#endif

View File

@ -58,10 +58,12 @@
#include "CRC.h"
#include "ZZLog.h"
#ifdef _WIN32
// need C definitions -- no mangling please!
extern "C" u32 CALLBACK PS2EgetLibType(void);
extern "C" u32 CALLBACK PS2EgetLibVersion2(u32 type);
extern "C" char* CALLBACK PS2EgetLibName(void);
#endif
#include "ZZoglMath.h"
#include "Profile.h"

View File

@ -22,7 +22,7 @@
#include "Util.h"
#if defined(ZEROGS_SSE2)
#include <emmintrin.h>
#include <immintrin.h>
#endif
// Local Clut buffer:

View File

@ -0,0 +1,314 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <stdlib.h>
#include <math.h>
#include "GS.h"
#include "Mem.h"
#include "x86.h"
#include "targets.h"
#include "ZZoglShaders.h"
#include "ZZClut.h"
#include "ZZoglVB.h"
#ifdef ZEROGS_SSE2
#include <immintrin.h>
#endif
extern bool g_bUpdateStencil;
void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode);
void SetWriteDepth();
bool IsWriteDepth();
bool IsWriteDestAlphaTest();
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
CDepthTarget::CDepthTarget() : CRenderTarget(), pdepth(0), pstencil(0), icount(0) {}
CDepthTarget::~CDepthTarget()
{
FUNCLOG
Destroy();
}
bool CDepthTarget::Create(const frameInfo& frame)
{
FUNCLOG
if (!CRenderTarget::Create(frame)) return false;
GL_REPORT_ERROR();
glGenRenderbuffersEXT(1, &pdepth);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, pdepth);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, RW(fbw), RH(fbh));
if (glGetError() != GL_NO_ERROR)
{
// try a separate depth and stencil buffer
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, pdepth);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT24, RW(fbw), RH(fbh));
if (g_bUpdateStencil)
{
glGenRenderbuffersEXT(1, &pstencil);
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, pstencil);
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_STENCIL_INDEX8_EXT, RW(fbw), RH(fbh));
if (glGetError() != GL_NO_ERROR)
{
ZZLog::Error_Log("Failed to create depth buffer %dx%d.", RW(fbw), RH(fbh));
return false;
}
}
else
{
pstencil = 0;
}
}
else
{
pstencil = pdepth;
}
status = TS_NeedUpdate;
return true;
}
void CDepthTarget::Destroy()
{
FUNCLOG
if (status) // In this case Framebuffer extension is off-use and lead to segfault
{
ResetRenderTarget(1);
FB::Attach(GL_DEPTH_ATTACHMENT_EXT);
FB::Attach(GL_STENCIL_ATTACHMENT_EXT);
GL_REPORT_ERRORD();
if (pstencil != 0)
{
if (pstencil != pdepth) glDeleteRenderbuffersEXT(1, &pstencil);
pstencil = 0;
}
if (pdepth != 0)
{
glDeleteRenderbuffersEXT(1, &pdepth);
pdepth = 0;
}
GL_REPORT_ERRORD();
}
CRenderTarget::Destroy();
}
extern int g_nDepthUsed; // > 0 if depth is used
void CDepthTarget::Resolve()
{
FUNCLOG
if (g_nDepthUsed > 0 && conf.mrtdepth && !(status & TS_Virtual) && IsWriteDepth() && !(conf.settings().no_depth_resolve))
CRenderTarget::Resolve();
else
{
// flush if necessary
FlushIfNecesary(this);
if (!(status & TS_Virtual)) status |= TS_Resolved;
}
if (!(status&TS_Virtual))
{
SetWriteDepth();
}
}
void CDepthTarget::Resolve(int startrange, int endrange)
{
FUNCLOG
if (g_nDepthUsed > 0 && conf.mrtdepth && !(status&TS_Virtual) && IsWriteDepth())
{
CRenderTarget::Resolve(startrange, endrange);
}
else
{
// flush if necessary
FlushIfNecesary(this) ;
if (!(status & TS_Virtual))
status |= TS_Resolved;
}
if (!(status&TS_Virtual))
{
SetWriteDepth();
}
}
void CDepthTarget::Update(int context, CRenderTarget* prndr)
{
FUNCLOG
assert(!(status & TS_Virtual));
// align the rect to the nearest page
// note that fbp is always aligned on page boundaries
tex0Info texframe;
texframe.tbp0 = fbp;
texframe.tbw = fbw;
texframe.tw = fbw;
texframe.th = fbh;
texframe.psm = psm;
// FIXME some field are not initialized...
// in particular the clut related one
assert(!PSMT_ISCLUT(psm));
DisableAllgl();
VB& curvb = vb[context];
if (curvb.test.zte == 0) return;
SetShaderCaller("CDepthTarget::Update");
glEnable(GL_DEPTH_TEST);
glDepthMask(!curvb.zbuf.zmsk);
static const u32 g_dwZCmp[] = { GL_NEVER, GL_ALWAYS, GL_GEQUAL, GL_GREATER };
glDepthFunc(g_dwZCmp[curvb.test.ztst]);
// write color and zero out stencil buf, always 0 context!
SetTexVariablesInt(0, 0, texframe, false, &ppsBitBltDepth, 1);
ZZshGLSetTextureParameter(ppsBitBltDepth.prog, ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth");
float4 v = DefaultBitBltPos();
v = DefaultBitBltTex();
v.x = 1;
v.y = 2;
v.z = PSMT_IS16Z(psm) ? 1.0f : 0.0f;
v.w = g_filog32;
ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sOneColor, v, "g_fOneColor");
float4 vdepth = g_vdepth;
if (psm == PSMT24Z)
{
vdepth.w = 0;
}
else if (psm != PSMT32Z)
{
vdepth.z = vdepth.w = 0;
}
assert(ppsBitBltDepth.sBitBltZ != 0);
ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sBitBltZ, (vdepth*(255.0f / 256.0f)), "g_fBitBltZ");
assert(pdepth != 0);
//GLint w1 = 0;
//GLint h1 = 0;
FB::Attach2D(0, ptex);
//glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_WIDTH_EXT, &w1);
//glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_HEIGHT_EXT, &h1);
SetDepthStencilSurface();
FB::Attach2D(1);
GLenum buffer = GL_COLOR_ATTACHMENT0_EXT;
//ZZLog::Error_Log("CDepthTarget::Update: w1 = 0x%x; h1 = 0x%x", w1, h1);
DrawBuffers(&buffer);
SetViewport();
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
SET_STREAM();
ZZshSetVertexShader(pvsBitBlt.prog);
ZZshSetPixelShader(ppsBitBltDepth.prog);
DrawTriangleArray();
status = TS_Resolved;
if (!IsWriteDepth())
{
ResetRenderTarget(1);
}
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
glEnable(GL_SCISSOR_TEST);
#ifdef _DEBUG
if (g_bSaveZUpdate)
{
SaveTex(&texframe, 1);
SaveTexture("frame1.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh));
}
#endif
}
void CDepthTarget::SetDepthStencilSurface()
{
FUNCLOG
FB::Attach(GL_DEPTH_ATTACHMENT_EXT, pdepth);
if (pstencil)
{
// there's a bug with attaching stencil and depth buffers
FB::Attach(GL_STENCIL_ATTACHMENT_EXT, pstencil);
if (icount++ < 8) // not going to fail if succeeded 4 times
{
GL_REPORT_ERRORD();
if (FB::State() != GL_FRAMEBUFFER_COMPLETE_EXT)
{
FB::Attach(GL_STENCIL_ATTACHMENT_EXT);
if (pstencil != pdepth) glDeleteRenderbuffersEXT(1, &pstencil);
pstencil = 0;
g_bUpdateStencil = 0;
}
}
}
else
{
FB::Attach(GL_STENCIL_ATTACHMENT_EXT);
}
}

View File

@ -47,8 +47,11 @@ inline void* wglGetProcAddress(const char* x)
#endif
#include "Mem.h"
extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
// Defines
extern GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT
extern GLenum s_rgbeq, s_alphaeq;
#ifndef GL_DEPTH24_STENCIL8_EXT // allows FBOs to support stencils
# define GL_DEPTH_STENCIL_EXT 0x84F9
@ -57,28 +60,50 @@ extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
# define GL_TEXTURE_STENCIL_SIZE_EXT 0x88F1
#endif
#define GL_STENCILFUNC(func, ref, mask) { \
s_stencilfunc = func; \
s_stencilref = ref; \
s_stencilmask = mask; \
glStencilFunc(func, ref, mask); \
#ifdef _WIN32
#define GL_LOADFN(name) { \
if( (*(void**)&name = (void*)wglGetProcAddress(#name)) == NULL ) { \
ZZLog::Error_Log("Failed to find %s, exiting.", #name); \
} \
}
#else
// let GLEW take care of it
#define GL_LOADFN(name)
#endif
static __forceinline void GL_STENCILFUNC(GLenum func, GLint ref, GLuint mask)
{
s_stencilfunc = func;
s_stencilref = ref;
s_stencilmask = mask;
glStencilFunc(func, ref, mask);
}
#define GL_STENCILFUNC_SET() glStencilFunc(s_stencilfunc, s_stencilref, s_stencilmask)
static __forceinline void GL_STENCILFUNC_SET()
{
glStencilFunc(s_stencilfunc, s_stencilref, s_stencilmask);
}
// sets the data stream
#define SET_STREAM() { \
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)8); \
glSecondaryColorPointerEXT(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)12); \
glTexCoordPointer(3, GL_FLOAT, sizeof(VertexGPU), (void*)16); \
glVertexPointer(4, GL_SHORT, sizeof(VertexGPU), (void*)0); \
static __forceinline void SET_STREAM()
{
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)8);
glSecondaryColorPointerEXT(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)12);
glTexCoordPointer(3, GL_FLOAT, sizeof(VertexGPU), (void*)16);
glVertexPointer(4, GL_SHORT, sizeof(VertexGPU), (void*)0);
}
// global alpha blending settings
extern GLenum g_internalRGBAFloat16Fmt;
//static __forceinline void SAFE_RELEASE_TEX(u32& x)
//{
// if (x != 0)
// {
// glDeleteTextures(1, &x);
// x = 0;
// }
//}
#define SAFE_RELEASE_TEX(x) { if( (x) != 0 ) { glDeleteTextures(1, &(x)); x = 0; } }
// inline for an extremely often used sequence
@ -104,12 +129,192 @@ extern void (APIENTRY *zgsBlendEquationSeparateEXT)(GLenum, GLenum);
extern void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum);
#endif
static __forceinline void DrawTriangleArray()
{
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
GL_REPORT_ERRORD();
}
// ------------------------ Types -------------------------
static __forceinline void DrawBuffers(GLenum *buffer)
{
if (glDrawBuffers != NULL)
{
glDrawBuffers(1, buffer);
}
/////////////////////
// graphics resources
extern GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT
GL_REPORT_ERRORD();
}
namespace FB
{
extern u32 buf;
static __forceinline void Create()
{
glGenFramebuffersEXT(1, &buf);
}
static __forceinline void Bind()
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, buf);
}
static __forceinline void Unbind()
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
}
static __forceinline GLenum State()
{
return glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
}
static __forceinline void Attach2D(int attach, int id = 0)
{
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT + attach, GL_TEXTURE_RECTANGLE_NV, id, 0);
GL_REPORT_ERRORD();
}
static __forceinline void Attach(GLenum rend, GLuint id = 0)
{
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, rend, GL_RENDERBUFFER_EXT, id);
}
};
static __forceinline void ResetRenderTarget(int index)
{
FB::Attach2D(index);
}
static __forceinline void TextureImage(GLenum tex_type, GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage2D(tex_type, 0, iFormat, width, height, 0, format, type, pixels);
}
static __forceinline void Texture2D(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
TextureImage(GL_TEXTURE_2D, iFormat, width, height, format, type, pixels);
}
static __forceinline void Texture2D(GLint iFormat, GLenum format, GLenum type, const GLvoid* pixels)
{
TextureImage(GL_TEXTURE_2D, iFormat, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, format, type, pixels);
}
static __forceinline void TextureRect(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
TextureImage(GL_TEXTURE_RECTANGLE_NV, iFormat, width, height, format, type, pixels);
}
static __forceinline void TextureRect2(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
TextureImage(GL_TEXTURE_RECTANGLE, iFormat, width, height, format, type, pixels);
}
static __forceinline void Texture3D(GLint iFormat, GLint width, GLint height, GLint depth, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage3D(GL_TEXTURE_3D, 0, iFormat, width, height, depth, 0, format, type, pixels);
}
static __forceinline void setTex2DFilters(GLint type)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, type);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, type);
}
static __forceinline void setTex2DWrap(GLint type)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, type);
}
static __forceinline void setTex3DFilters(GLint type)
{
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, type);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, type);
}
static __forceinline void setTex3DWrap(GLint type)
{
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, type);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, type);
}
static __forceinline void setRectFilters(GLint type)
{
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, type);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, type);
}
static __forceinline void setRectWrap(GLint type)
{
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, type);
}
static __forceinline void setRectWrap2(GLint type)
{
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, type);
}
static __forceinline void GL_BLEND_SET()
{
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha);
}
static __forceinline void GL_BLEND_RGB(GLenum src, GLenum dst)
{
s_srcrgb = src;
s_dstrgb = dst;
GL_BLEND_SET();
}
static __forceinline void GL_BLEND_ALPHA(GLenum src, GLenum dst)
{
s_srcalpha = src;
s_dstalpha = dst;
GL_BLEND_SET();
}
static __forceinline void GL_BLEND_ALL(GLenum srcrgb, GLenum dstrgb, GLenum srcalpha, GLenum dstalpha)
{
s_srcrgb = srcrgb;
s_dstrgb = dstrgb;
s_srcalpha = srcalpha;
s_dstalpha = dstalpha;
GL_BLEND_SET();
}
static __forceinline void GL_ZTEST(bool enable)
{
if (enable)
glEnable(GL_DEPTH_TEST);
else
glDisable(GL_DEPTH_TEST);
}
static __forceinline void GL_ALPHATEST(bool enable)
{
if (enable)
glEnable(GL_ALPHA_TEST);
else
glDisable(GL_ALPHA_TEST);
}
static __forceinline void GL_BLENDEQ_RGB(GLenum eq)
{
s_rgbeq = eq;
zgsBlendEquationSeparateEXT(s_rgbeq, s_alphaeq);
}
static __forceinline void GL_BLENDEQ_ALPHA(GLenum eq)
{
s_alphaeq = eq;
zgsBlendEquationSeparateEXT(s_rgbeq, s_alphaeq);
}
// GL prototypes
extern PFNGLISRENDERBUFFEREXTPROC glIsRenderbufferEXT;

View File

@ -27,7 +27,7 @@ extern GSconf conf;
using namespace std;
static list<MESSAGE> listMsgs;
const char* logging_prefix = "ZZOgl-PG";
void ProcessMessages()
{
FUNCLOG
@ -130,12 +130,14 @@ void _Log(const char *str)
void _WriteToConsole(const char *str)
{
fprintf(stderr,"ZZogl-PG: %s", str);
fprintf(stderr,"%s: ", logging_prefix);
fprintf(stderr,"%s", str);
}
void _Print(const char *str)
{
fprintf(stderr,"ZZogl-PG: %s", str);
fprintf(stderr,"%s: ", logging_prefix);
fprintf(stderr,"%s", str);
if (IsLogging()) fprintf(gsLog, str);
}
@ -169,7 +171,7 @@ void WriteToConsole(const char *fmt, ...)
va_start(list, fmt);
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
va_end(list);
}
@ -182,7 +184,7 @@ void Print(const char *fmt, ...)
if (IsLogging()) vfprintf(gsLog, fmt, list);
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
va_end(list);
@ -197,7 +199,7 @@ void WriteLn(const char *fmt, ...)
if (IsLogging()) vfprintf(gsLog, fmt, list);
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
va_end(list);
fprintf(stderr,"\n");
@ -237,7 +239,7 @@ void Prim_Log(const char *fmt, ...)
{
if (IsLogging()) vfprintf(gsLog, fmt, list);
fprintf(stderr, "ZZogl-PG(PRIM): ");
fprintf(stderr, "%s(PRIM): ", logging_prefix);
vfprintf(stderr, fmt, list);
vprintf(fmt, list);
@ -262,7 +264,7 @@ void GS_Log(const char *fmt, ...)
fprintf(gsLog, "\n");
}
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
fprintf(stderr, "\n");
@ -283,7 +285,7 @@ void Warn_Log(const char *fmt, ...)
fprintf(gsLog, "\n");
}
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s(Warning): ", logging_prefix);
vfprintf(stderr, fmt, list);
fprintf(stderr, "\n");
@ -304,7 +306,7 @@ void Dev_Log(const char *fmt, ...)
fprintf(gsLog, "\n");
}
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
fprintf(stderr, "\n");
@ -325,7 +327,7 @@ void Debug_Log(const char *fmt, ...)
fprintf(gsLog, "\n");
}
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
fprintf(stderr, "\n");
@ -345,7 +347,7 @@ void Error_Log(const char *fmt, ...)
fprintf(gsLog, "\n");
}
fprintf(stderr, "ZZogl-PG: ");
fprintf(stderr, "%s: ", logging_prefix);
vfprintf(stderr, fmt, list);
fprintf(stderr, "\n");

View File

@ -0,0 +1,620 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include <stdlib.h>
#include <math.h>
#include "GS.h"
#include "Mem.h"
#include "targets.h"
#include "ZZClut.h"
#ifdef ZEROGS_SSE2
#include <immintrin.h>
#endif
extern int g_TransferredToGPU;
extern int VALIDATE_THRESH;
extern u32 TEXDESTROY_THRESH;
#define FORCE_TEXDESTROY_THRESH (3) // destroy texture after FORCE_TEXDESTROY_THRESH frames
void CMemoryTargetMngr::Destroy()
{
FUNCLOG
listTargets.clear();
listClearedTargets.clear();
}
bool CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int endtex, bool bDeleteBadTex)
{
FUNCLOG
if (clearmaxy == 0) return true;
int checkstarty = max(starttex, clearminy);
int checkendy = min(endtex, clearmaxy);
if (checkstarty >= checkendy) return true;
if (validatecount++ > VALIDATE_THRESH)
{
height = 0;
return false;
}
// lock and compare
assert(ptex != NULL && ptex->memptr != NULL);
int result = memcmp_mmx(ptex->memptr + MemorySize(checkstarty-realy), MemoryAddress(checkstarty), MemorySize(checkendy-checkstarty));
if (result == 0)
{
clearmaxy = 0;
return true;
}
if (!bDeleteBadTex) return false;
// delete clearminy, clearmaxy range (not the checkstarty, checkendy range)
//int newstarty = 0;
if (clearminy <= starty)
{
if (clearmaxy < starty + height)
{
// preserve end
height = starty + height - clearmaxy;
starty = clearmaxy;
assert(height > 0);
}
else
{
// destroy
height = 0;
}
}
else
{
// beginning can be preserved
height = clearminy - starty;
}
clearmaxy = 0;
assert((starty >= realy) && ((starty + height) <= (realy + realheight)));
return false;
}
#define TARGET_THRESH 0x500
extern int g_MaxTexWidth, g_MaxTexHeight; // Maximum height & width of supported texture.
//#define SORT_TARGETS
inline list<CMemoryTarget>::iterator CMemoryTargetMngr::DestroyTargetIter(list<CMemoryTarget>::iterator& it)
{
// find the target and destroy
list<CMemoryTarget>::iterator itprev = it;
++it;
listClearedTargets.splice(listClearedTargets.end(), listTargets, itprev);
if (listClearedTargets.size() > TEXDESTROY_THRESH)
{
listClearedTargets.pop_front();
}
return it;
}
// Compare target to current texture info
// Not same format -> 1
// Same format, not same data (clut only) -> 2
// identical -> 0
int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize)
{
if (PSMT_ISCLUT(it->psm) != PSMT_ISCLUT(tex0.psm))
return 1;
if (PSMT_ISCLUT(tex0.psm)) {
if (it->psm != tex0.psm || it->cpsm != tex0.cpsm || it->clutsize != clutsize)
return 1;
if (PSMT_IS32BIT(tex0.cpsm)) {
if (Cmp_ClutBuffer_SavedClut<u32>((u32*)&it->clut[0], tex0.csa, clutsize))
return 2;
} else {
if (Cmp_ClutBuffer_SavedClut<u16>((u16*)&it->clut[0], tex0.csa, clutsize))
return 2;
}
} else {
if (PSMT_IS16BIT(tex0.psm) != PSMT_IS16BIT(it->psm))
return 1;
}
return 0;
}
void CMemoryTargetMngr::GetClutVariables(int& clutsize, const tex0Info& tex0)
{
clutsize = 0;
if (PSMT_ISCLUT(tex0.psm))
{
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
if (PSMT_IS32BIT(tex0.cpsm))
clutsize = min(entries, 256 - tex0.csa * 16) * 4;
else
clutsize = min(entries, 512 - tex0.csa * 16) * 2;
}
}
void CMemoryTargetMngr::GetMemAddress(int& start, int& end, const tex0Info& tex0)
{
int nbStart, nbEnd;
GetRectMemAddressZero(nbStart, nbEnd, tex0.psm, tex0.tw, tex0.th, tex0.tbp0, tex0.tbw);
assert(nbStart < nbEnd);
nbEnd = min(nbEnd, MEMORY_END);
start = nbStart / (4 * GPU_TEXWIDTH);
end = (nbEnd + GPU_TEXWIDTH * 4 - 1) / (4 * GPU_TEXWIDTH);
assert(start < end);
}
CMemoryTarget* CMemoryTargetMngr::SearchExistTarget(int start, int end, int clutsize, const tex0Info& tex0, int forcevalidate)
{
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
{
if (it->starty <= start && it->starty + it->height >= end)
{
int res = CompareTarget(it, tex0, clutsize);
if (res == 1)
{
if (it->validatecount++ > VALIDATE_THRESH)
{
it = DestroyTargetIter(it);
if (listTargets.size() == 0) break;
}
else
++it;
continue;
}
else if (res == 2)
{
++it;
continue;
}
if (forcevalidate) //&& listTargets.size() < TARGET_THRESH ) {
{
// do more validation checking. delete if not been used for a while
if (!it->ValidateTex(tex0, start, end, curstamp > it->usedstamp + FORCE_TEXDESTROY_THRESH))
{
if (it->height <= 0)
{
it = DestroyTargetIter(it);
if (listTargets.size() == 0) break;
}
else
++it;
continue;
}
}
it->usedstamp = curstamp;
it->validatecount = 0;
return &(*it);
}
#ifdef SORT_TARGETS
else if (it->starty >= end) break;
#endif
++it;
}
return NULL;
}
CMemoryTarget* CMemoryTargetMngr::ClearedTargetsSearch(int fmt, int widthmult, int channels, int height)
{
CMemoryTarget* targ = NULL;
if (listClearedTargets.size() > 0)
{
list<CMemoryTarget>::iterator itbest = listClearedTargets.begin();
while (itbest != listClearedTargets.end())
{
if ((height == itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels))
{
// check channels
if (PIXELS_PER_WORD(itbest->psm) == channels) break;
}
++itbest;
}
if (itbest != listClearedTargets.end())
{
listTargets.splice(listTargets.end(), listClearedTargets, itbest);
targ = &listTargets.back();
targ->validatecount = 0;
}
else
{
// create a new
listTargets.push_back(CMemoryTarget());
targ = &listTargets.back();
}
}
else
{
listTargets.push_back(CMemoryTarget());
targ = &listTargets.back();
}
return targ;
}
CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forcevalidate)
{
FUNCLOG
int start, end, clutsize;
GetClutVariables(clutsize, tex0);
GetMemAddress(start, end, tex0);
CMemoryTarget* it = SearchExistTarget(start, end, clutsize, tex0, forcevalidate);
if (it != NULL) return it;
// couldn't find so create
CMemoryTarget* targ;
u32 fmt;
u32 internal_fmt;
if (PSMT_ISHALF_STORAGE(tex0)) {
// RGBA_5551 storage format
fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV;
internal_fmt = GL_RGB5_A1;
} else {
// RGBA_8888 storage format
fmt = GL_UNSIGNED_BYTE;
internal_fmt = GL_RGBA;
}
int widthmult = 1, channels = 1;
// If our texture is too big and could not be placed in 1 GPU texture. Pretty rare in modern cards.
if ((g_MaxTexHeight < 4096) && (end - start > g_MaxTexHeight))
{
// In this rare case we made a texture of half height and place it on the screen.
ZZLog::Debug_Log("Making a half height texture (start - end == 0x%x)", (end-start));
widthmult = 2;
}
channels = PIXELS_PER_WORD(tex0.psm);
targ = ClearedTargetsSearch(fmt, widthmult, channels, end - start);
if (targ->ptex != NULL)
{
assert(end - start <= targ->realheight && targ->fmt == fmt && targ->widthmult == widthmult);
// good enough, so init
targ->realy = targ->starty = start;
targ->usedstamp = curstamp;
targ->psm = tex0.psm;
targ->cpsm = tex0.cpsm;
targ->height = end - start;
} else {
// not initialized yet
targ->fmt = fmt;
targ->realy = targ->starty = start;
targ->realheight = targ->height = end - start;
targ->usedstamp = curstamp;
targ->psm = tex0.psm;
targ->cpsm = tex0.cpsm;
targ->widthmult = widthmult;
targ->channels = channels;
targ->texH = (targ->realheight + widthmult - 1)/widthmult;
targ->texW = GPU_TEXWIDTH * widthmult * channels;
// alloc the mem
targ->ptex = new CMemoryTarget::TEXTURE();
targ->ptex->ref = 1;
}
#if defined(ZEROGS_DEVBUILD)
g_TransferredToGPU += MemorySize(channels * targ->height);
#endif
// fill with data
if (targ->ptex->memptr == NULL)
{
targ->ptex->memptr = (u8*)_aligned_malloc(MemorySize(targ->realheight), 16);
assert(targ->ptex->ref > 0);
}
memcpy_amd(targ->ptex->memptr, MemoryAddress(targ->realy), MemorySize(targ->height));
__aligned16 u8* ptexdata = NULL;
bool has_data = false;
if (PSMT_ISCLUT(tex0.psm))
{
assert(clutsize > 0);
// Local clut parameter
targ->cpsm = tex0.cpsm;
// Allocate a local clut array
targ->clutsize = clutsize;
if(targ->clut == NULL)
targ->clut = (u8*)_aligned_malloc(clutsize, 16);
else {
// In case it could occured
// realloc would be better but you need to get it from libutilies first
// _aligned_realloc is brought in from ScopedAlloc.h now. --arcum42
_aligned_free(targ->clut);
targ->clut = (u8*)_aligned_malloc(clutsize, 16);
}
// texture parameter
ptexdata = (u8*)_aligned_malloc(CLUT_PIXEL_SIZE(tex0.cpsm) * targ->texH * targ->texW, 16);
has_data = true;
u8* psrc = (u8*)(MemoryAddress(targ->realy));
// Fill a local clut then build the real texture
if (PSMT_IS32BIT(tex0.cpsm))
{
ClutBuffer_to_Array<u32>((u32*)targ->clut, tex0.csa, clutsize);
Build_Clut_Texture<u32>(tex0.psm, targ->height, (u32*)targ->clut, psrc, (u32*)ptexdata);
}
else
{
ClutBuffer_to_Array<u16>((u16*)targ->clut, tex0.csa, clutsize);
Build_Clut_Texture<u16>(tex0.psm, targ->height, (u16*)targ->clut, psrc, (u16*)ptexdata);
}
assert(targ->clutsize > 0);
}
else if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
{
ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16);
has_data = true;
// needs to be 8 bit, use xmm for unpacking
u16* dst = (u16*)ptexdata;
u16* src = (u16*)(MemoryAddress(targ->realy));
#ifdef ZEROGS_SSE2
assert(((u32)(uptr)dst) % 16 == 0);
__m128i zero_128 = _mm_setzero_si128();
// NOTE: future performance improvement
// SSE4.1 support uncacheable load 128bits. Maybe it can
// avoid some cache pollution
// NOTE2: I create multiple _n variable to mimic the previous ASM behavior
// but I'm not sure there are real gains.
for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i)
{
// Convert 16 bits pixels to 32bits (zero extended)
// Batch 64 bytes (32 pixels) at once.
__m128i pixels_1 = _mm_load_si128((__m128i*)src);
__m128i pixels_2 = _mm_load_si128((__m128i*)(src+8));
__m128i pixels_3 = _mm_load_si128((__m128i*)(src+16));
__m128i pixels_4 = _mm_load_si128((__m128i*)(src+24));
__m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128);
__m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128);
__m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128);
__m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128);
// Note: bypass cache
_mm_stream_si128((__m128i*)dst, pix_low_1);
_mm_stream_si128((__m128i*)(dst+8), pix_high_1);
_mm_stream_si128((__m128i*)(dst+16), pix_low_2);
_mm_stream_si128((__m128i*)(dst+24), pix_high_2);
__m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128);
__m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128);
__m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128);
__m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128);
// Note: bypass cache
_mm_stream_si128((__m128i*)(dst+32), pix_low_3);
_mm_stream_si128((__m128i*)(dst+40), pix_high_3);
_mm_stream_si128((__m128i*)(dst+48), pix_low_4);
_mm_stream_si128((__m128i*)(dst+56), pix_high_4);
src += 32;
dst += 64;
}
// It is advise to use a fence instruction after non temporal move (mm_stream) instruction...
// store fence insures that previous store are finish before execute new one.
_mm_sfence();
#else // ZEROGS_SSE2
for (int i = 0; i < targ->height; ++i)
{
for (int j = 0; j < GPU_TEXWIDTH; ++j)
{
dst[0] = src[0];
dst[1] = 0;
dst[2] = src[1];
dst[3] = 0;
dst += 4;
src += 2;
}
}
#endif // ZEROGS_SSE2
}
else
{
ptexdata = targ->ptex->memptr;
// We really don't want to deallocate memptr. As a reminder...
has_data = false;
}
// create the texture
GL_REPORT_ERRORD();
assert(ptexdata != NULL);
if (targ->ptex->tex == 0) glGenTextures(1, &targ->ptex->tex);
glBindTexture(GL_TEXTURE_RECTANGLE_NV, targ->ptex->tex);
TextureRect(internal_fmt, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata);
while (glGetError() != GL_NO_ERROR)
{
// release resources until can create
if (listClearedTargets.size() > 0)
{
listClearedTargets.pop_front();
}
else
{
if (listTargets.size() == 0)
{
ZZLog::Error_Log("Failed to create %dx%x texture.", targ->texW, targ->texH);
channels = 1;
if (has_data) _aligned_free(ptexdata);
return NULL;
}
DestroyOldest();
}
TextureRect(internal_fmt, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata);
}
setRectWrap(GL_CLAMP);
if (has_data) _aligned_free(ptexdata);
assert(tex0.psm != 0xd);
return targ;
}
void CMemoryTargetMngr::ClearRange(int nbStartY, int nbEndY)
{
FUNCLOG
int starty = nbStartY / (4 * GPU_TEXWIDTH);
int endy = (nbEndY + 4 * GPU_TEXWIDTH - 1) / (4 * GPU_TEXWIDTH);
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
{
if (it->starty < endy && (it->starty + it->height) > starty)
{
// intersects, reduce valid texture mem (or totally delete texture)
// there are 4 cases
int miny = max(it->starty, starty);
int maxy = min(it->starty + it->height, endy);
assert(miny < maxy);
if (it->clearmaxy == 0)
{
it->clearminy = miny;
it->clearmaxy = maxy;
}
else
{
if (it->clearminy > miny) it->clearminy = miny;
if (it->clearmaxy < maxy) it->clearmaxy = maxy;
}
}
++it;
}
}
void CMemoryTargetMngr::DestroyCleared()
{
FUNCLOG
for (list<CMemoryTarget>::iterator it = listClearedTargets.begin(); it != listClearedTargets.end();)
{
if (it->usedstamp < curstamp - (FORCE_TEXDESTROY_THRESH -1))
{
it = listClearedTargets.erase(it);
continue;
}
++it;
}
if ((curstamp % FORCE_TEXDESTROY_THRESH) == 0)
{
// purge old targets every FORCE_TEXDESTROY_THRESH frames
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
{
if (it->usedstamp < curstamp - FORCE_TEXDESTROY_THRESH)
{
it = listTargets.erase(it);
continue;
}
++it;
}
}
++curstamp;
}
void CMemoryTargetMngr::DestroyOldest()
{
FUNCLOG
if (listTargets.size() == 0)
return;
list<CMemoryTarget>::iterator it, itbest;
it = itbest = listTargets.begin();
while (it != listTargets.end())
{
if (it->usedstamp < itbest->usedstamp) itbest = it;
++it;
}
listTargets.erase(itbest);
}

File diff suppressed because it is too large Load Diff

View File

@ -21,7 +21,7 @@
// It draw picture direct on screen, so here we have interlacing and frame skipping.
//------------------ Includes
#include "Util.h"
#include "Util.h"
#include "ZZoglCRTC.h"
#include "GLWin.h"
#include "ZZoglShaders.h"
@ -58,6 +58,17 @@ extern void ZZDestroy();
extern void ChangeDeviceSize(int nNewWidth, int nNewHeight);
extern GLuint vboRect;
// I'm making this variable global for the moment in the course of fiddling with the interlace code
// to try and make it more straightforward.
int interlace_mode = 0; // 0 - not interlacing, 1 - interlacing.
bool bUsingStencil = false;
bool INTERLACE_COUNT()
{
return (interlace_mode && (gs.interlace == conf.interlace));
}
// Adjusts vertex shader BitBltPos vector v to preserve aspect ratio. It used to emulate 4:3 or 16:9.
void AdjustTransToAspect(float4& v)
{
@ -151,20 +162,20 @@ inline void FrameSavingHelper()
}
// Function populated tex0Info[2] array
inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
inline void FrameObtainDispinfo(tex0Info* dispinfo)
{
for (int i = 0; i < 2; ++i)
{
if (!(*(u32*)(PMODE) & (1 << i)))
if (!Circuit_Enabled(i))
{
dispinfo[i].tw = 0;
dispinfo[i].th = 0;
continue;
}
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
GSRegDISPLAY* pd = i ? DISPLAY2 : DISPLAY1;
GSRegDISPFB* pfb = Dispfb_Reg(i);
GSRegDISPLAY* pd = Display_Reg(i);
int magh = pd->MAGH + 1;
int magv = pd->MAGV + 1;
@ -177,7 +188,8 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
// hack!!
// 2 * dispinfo[i].tw / dispinfo[i].th <= 1, metal slug 4
if (bInterlace && 2 * dispinfo[i].tw / dispinfo[i].th <= 1 && !(conf.settings().interlace_2x))
// Note: This is what causes the double image if interlace is off on the Final Fantasy X-2 opening.
if (interlace_mode && 2 * dispinfo[i].tw / dispinfo[i].th <= 1 && !(conf.settings().interlace_2x))
{
dispinfo[i].th >>= 1;
}
@ -187,9 +199,9 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
extern bool s_bWriteDepth;
// Something should be done before Renderering the picture.
inline void RenderStartHelper(u32 bInterlace)
inline void RenderStartHelper()
{
if (conf.mrtdepth && pvs[8] == NULL)
if (conf.mrtdepth && ZZshExistProgram(pvs[8]))
{
conf.mrtdepth = 0;
s_bWriteDepth = false;
@ -209,14 +221,13 @@ inline void RenderStartHelper(u32 bInterlace)
vb[0].fba.fba = 0;
vb[1].fba.fba = 0;
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
FB::Unbind(); // switch to the backbuffer
glViewport(0, 0, GLWin.backbuffer.w, GLWin.backbuffer.h);
// if interlace, only clear every other vsync
if (!bInterlace)
if (!interlace_mode)
{
//u32 color = COLOR_ARGB(0, BGCOLOR->R, BGCOLOR->G, BGCOLOR->B);
glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
}
@ -232,7 +243,7 @@ inline void RenderStartHelper(u32 bInterlace)
GL_REPORT_ERRORD();
if (bInterlace) g_PrevBitwiseTexX = -1; // reset since will be using
if (interlace_mode) g_PrevBitwiseTexX = -1; // reset since will be using
}
// Settings for interlace texture multiplied vector;
@ -240,14 +251,14 @@ inline void RenderStartHelper(u32 bInterlace)
// on image y coords. So if we write valpha.z * F + valpha.w + 0.5, it would be switching odd
// and even strings at each frame.
// valpha.x and y are used for image blending.
inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTSHADER* prog)
inline float4 RenderGetForClip(int psm, CRTC_TYPE render_type)
{
SetShaderCaller("RenderGetForClip");
FRAGMENTSHADER* prog = curr_pps(render_type);
float4 valpha;
// first render the current render targets, then from ptexMem
if (psm == 1)
if (psm == PSMCT24)
{
valpha.x = 1;
valpha.y = 0;
@ -258,9 +269,9 @@ inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS
valpha.y = 1;
}
if (bInterlace)
if (interlace_mode)
{
if (interlace == (conf.interlace & 1))
if (gs.interlace == (conf.interlace & 1))
{
// pass if odd
valpha.z = 1.0f;
@ -286,17 +297,21 @@ inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS
}
// Put interlaced texture in use for shader prog.
// Note: if frame interlaced it's th is halved, so we should x2 it.
inline void RenderCreateInterlaceTex(u32 bInterlace, int th, FRAGMENTSHADER* prog)
// Note: if the frame is interlaced, its th is halved, so we should multiply it by 2.
inline void RenderCreateInterlaceTex(int th, CRTC_TYPE render_type)
{
if (!bInterlace) return;
int interlacetex = CreateInterlaceTex(2 * th);
FRAGMENTSHADER* prog;
int interlacetex;
if (!interlace_mode) return;
prog = curr_pps(render_type);
interlacetex = CreateInterlaceTex(2 * th);
ZZshGLSetTextureParameter(prog->prog, prog->sInterlace, interlacetex, "Interlace");
}
// Well, do blending setup prior to second pass of half-frame drawing
// Do blending setup prior to second pass of half-frame drawing.
inline void RenderSetupBlending()
{
// setup right blending
@ -305,12 +320,14 @@ inline void RenderSetupBlending()
if (PMODE->MMOD)
{
// Use the ALP register for alpha blending.
glBlendColorEXT(PMODE->ALP*(1 / 255.0f), PMODE->ALP*(1 / 255.0f), PMODE->ALP*(1 / 255.0f), 0.5f);
s_srcrgb = GL_CONSTANT_COLOR_EXT;
s_dstrgb = GL_ONE_MINUS_CONSTANT_COLOR_EXT;
}
else
{
// Use the alpha value of circuit 1 for alpha blending.
s_srcrgb = GL_SRC_ALPHA;
s_dstrgb = GL_ONE_MINUS_SRC_ALPHA;
}
@ -332,17 +349,19 @@ inline void RenderSetupBlending()
// each frame could be drawn in two stages, so blending should be different for them
inline void RenderSetupStencil(int i)
{
glStencilMask(1 << i);
s_stencilmask = 1 << i;
glStencilMask(s_stencilmask);
GL_STENCILFUNC_SET();
}
// do stencil check for each found target i -- texturing stage
inline void RenderUpdateStencil(int i, bool* bUsingStencil)
inline void RenderUpdateStencil(int i)
{
if (!(*bUsingStencil)) glClear(GL_STENCIL_BUFFER_BIT);
*bUsingStencil = 1;
if (!bUsingStencil)
{
glClear(GL_STENCIL_BUFFER_BIT);
bUsingStencil = true;
}
glEnable(GL_STENCIL_TEST);
GL_STENCILFUNC(GL_NOTEQUAL, 3, 1 << i);
@ -351,16 +370,16 @@ inline void RenderUpdateStencil(int i, bool* bUsingStencil)
}
// CRTC24 could not be rendered
inline void RenderCRTC24helper(u32 bInterlace, int interlace, int psm)
/*inline void RenderCRTC24helper(int psm)
{
ZZLog::Debug_Log("ZZogl: CRTC24!!! I'm trying to show something.");
SetShaderCaller("RenderCRTC24helper");
// assume that data is already in ptexMem (do Resolve?)
RenderGetForClip(bInterlace, interlace, psm, &ppsCRTC24[bInterlace]);
ZZshSetPixelShader(ppsCRTC24[bInterlace].prog);
RenderGetForClip(psm, CRTC_RENDER_24);
ZZshSetPixelShader(curr_ppsCRTC24()->prog);
DrawTriangleArray();
}
}*/
// Maybe I do this function global-defined. Calculate bits per pixel for
// each psm. It's the only place with PSMCT16 which have a different bpp.
@ -394,7 +413,7 @@ inline int RenderGetOffsets(int* dby, int* movy, tex0Info& texframe, CRenderTarg
}
// BltBit shader calculate vertex (4 coord's pixel) position at the viewport.
inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
inline float4 RenderSetTargetBitPos(int dh, int th, int movy)
{
SetShaderCaller("RenderSetTargetBitPos");
float4 v;
@ -408,7 +427,7 @@ inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
AdjustTransToAspect(v);
if (isInterlace)
if (INTERLACE_COUNT())
{
// move down by 1 pixel
v.w += 1.0f / (float)dh ;
@ -423,7 +442,7 @@ inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
// For example, use tw / X and tw / X magnify the viewport.
// Interlaced output is little out of VB, it could be seen as an evil blinking line on top
// and bottom, so we try to remove it.
inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool isInterlace)
inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw)
{
SetShaderCaller("RenderSetTargetBitTex");
@ -432,7 +451,7 @@ inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool
// Incorrect Aspect ratio on interlaced frames
if (isInterlace)
if (INTERLACE_COUNT())
{
v.y -= 1.0f / conf.height;
v.w += 1.0f / conf.height;
@ -455,10 +474,11 @@ inline float4 RenderSetTargetBitTrans(int th)
// use g_fInvTexDims to store inverse texture dims
// Seems, that Targ shader does not use it
inline float4 RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHADER* prog)
inline float4 RenderSetTargetInvTex(int tw, int th, CRTC_TYPE render_type)
{
SetShaderCaller("RenderSetTargetInvTex");
FRAGMENTSHADER* prog = curr_pps(render_type);
float4 v = float4(0, 0, 0, 0);
if (prog->sInvTexDims)
@ -496,17 +516,20 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
return false;
}
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace);
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int circuit);
// First try to draw frame from targets.
inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int circuit)
{
// get the start and end addresses of the buffer
int bpp = RenderGetBpp(texframe.psm);
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
GSRegDISPFB* pfb = Dispfb_Reg(circuit);
int start, end;
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
int tex_th = (interlace_mode) ? texframe.th * 2 : texframe.th;
//ZZLog::WriteLn("Render checking for targets, circuit %d", circuit);
GetRectMemAddressZero(start, end, texframe.psm, texframe.tw, tex_th, texframe.tbp0, texframe.tbw);
// We need share list of targets between functions
s_RTs.GetTargs(start, end, listTargs);
@ -517,10 +540,14 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
if (ptarg->fbw == texframe.tbw && !(ptarg->status&CRenderTarget::TS_NeedUpdate) && ((256 / bpp)*(texframe.tbp0 - ptarg->fbp)) % texframe.tbw == 0)
{
FRAGMENTSHADER* pps;
int dby = pfb->DBY;
int movy = 0;
if (RenderLookForABetterTarget(ptarg->fbp, texframe.tbp0, listTargs, it)) continue;
if (RenderLookForABetterTarget(ptarg->fbp, texframe.tbp0, listTargs, it))
{
continue;
}
if (g_bSaveFinalFrame) SaveTexture("frame1.tga", GL_TEXTURE_RECTANGLE_NV, ptarg->ptex, RW(ptarg->fbw), RH(ptarg->fbh));
@ -529,37 +556,42 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
if (dh >= 64)
{
if (ptarg->fbh - dby < texframe.th - movy && !(*bUsingStencil))
RenderUpdateStencil(i, bUsingStencil);
else if (ptarg->fbh - dby > 2 * ( texframe.th - movy ))
if (ptarg->fbh - dby < tex_th - movy && !bUsingStencil)
{
RenderUpdateStencil(circuit);
}
else if (ptarg->fbh - dby > 2 * ( tex_th - movy )) // I'm not sure this is needed any more.
{
// Sometimes calculated position onscreen is misaligned, ie in FFX-2 intro. In such case some part of image are out of
// border's and we should move it manually.
dby -= ((ptarg->fbh - dby) >> 2) - ((texframe.th + movy) >> 1) ;
dby -= ((ptarg->fbh - dby) >> 2) - ((tex_th + movy) >> 1);
}
SetShaderCaller("RenderCheckForTargets");
// Texture
float4 v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby), INTERLACE_COUNT);
float4 v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby));
// dest rect
v = RenderSetTargetBitPos(dh, texframe.th, movy, INTERLACE_COUNT);
v = RenderSetTargetBitPos(dh, texframe.th, movy);
v = RenderSetTargetBitTrans(ptarg->fbh);
v = RenderSetTargetInvTex(bInterlace, texframe.tbw, ptarg->fbh, &ppsCRTCTarg[bInterlace]) ; // FIXME. This is no use
v = RenderSetTargetInvTex(texframe.tbw, ptarg->fbh, CRTC_RENDER_TARG); // FIXME. This is no use
float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]);
float4 valpha = RenderGetForClip(texframe.psm, CRTC_RENDER_TARG);
pps = curr_ppsCRTCTarg();
// inside vb[0]'s target area, so render that region only
ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].prog, ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target");
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTCTarg[bInterlace]);
ZZshGLSetTextureParameter(pps->prog, pps->sFinal, ptarg->ptex, "CRTC target");
RenderCreateInterlaceTex(texframe.th, CRTC_RENDER_TARG);
ZZshSetPixelShader(ppsCRTCTarg[bInterlace].prog);
ZZshSetPixelShader(pps->prog);
DrawTriangleArray();
if (abs(dh - (int)texframe.th) <= 1) return;
if (abs(dh - (int)texframe.th) <= 1)
{
return;
}
if (abs(dh - (int)ptarg->fbh) <= 1)
{
@ -571,14 +603,14 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
++it;
}
RenderCheckForMemory(texframe, listTargs, i, bUsingStencil, interlace, bInterlace);
RenderCheckForMemory(texframe, listTargs, circuit);
}
// The same as the previous, but from memory.
// If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
// this is the function that does it.
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int circuit)
{
float4 v;
@ -588,9 +620,9 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
}
// context has to be 0
if (bInterlace >= 2) ZZLog::Error_Log("CRCR Check for memory shader fault.");
if (interlace_mode >= 2) ZZLog::Error_Log("CRCR Check for memory shader fault.");
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
//if (!bUsingStencil) RenderUpdateStencil(i);
SetShaderCaller("RenderCheckForMemory");
@ -601,7 +633,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
h1 = texframe.th;
w2 = -0.5f;
h2 = -0.5f;
SetTexVariablesInt(0, 2, texframe, false, &ppsCRTC[bInterlace], 1);
SetTexVariablesInt(0, 2, texframe, false, curr_ppsCRTC(), 1);
}
else
{
@ -609,24 +641,24 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
h1 = 1;
w2 = -0.5f / (float)texframe.tw;
h2 = -0.5f / (float)texframe.th;
SetTexVariablesInt(0, 0, texframe, false, &ppsCRTC[bInterlace], 1);
SetTexVariablesInt(0, 0, texframe, false, curr_ppsCRTC(), 1);
}
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
// Fixme: Why is this here?
// We should probably call RenderSetTargetBitTex instead.
v = RenderSetTargetBitTex(w1, h1, w2, h2, INTERLACE_COUNT);
v = RenderSetTargetBitTex(w1, h1, w2, h2);
// finally render from the memory (note that the stencil buffer will keep previous regions)
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
v = RenderSetTargetBitPos(1, 1, 0);
v = RenderSetTargetBitTrans(texframe.th);
v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);
float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
v = RenderSetTargetInvTex(texframe.tw, texframe.th, CRTC_RENDER);
float4 valpha = RenderGetForClip(texframe.psm, CRTC_RENDER);
ZZshGLSetTextureParameter(ppsCRTC[bInterlace].prog, ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
ZZshSetPixelShader(ppsCRTC[bInterlace].prog);
ZZshGLSetTextureParameter(curr_ppsCRTC()->prog, curr_ppsCRTC()->sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
RenderCreateInterlaceTex(texframe.th, CRTC_RENDER_TARG);
ZZshSetPixelShader(curr_ppsCRTC()->prog);
DrawTriangleArray();
}
@ -657,7 +689,7 @@ inline void DisplayFPS()
DrawText(str, left, top, 0xffc0ffff);
}
// SnapeShoot helper
// Snapshot helper
inline void MakeSnapshot()
{
@ -694,7 +726,7 @@ void ZZReset()
s_nLastResolveReset = 0;
icurctx = -1;
g_vsprog = g_psprog = 0;
g_vsprog = g_psprog = sZero;
ZZGSStateReset();
ZZDestroy();
@ -759,12 +791,10 @@ inline void AfterRendererUnimportantJob()
maxmin = 608;
}
extern u32 s_uFramebuffer;
// Swich Framebuffers
inline void AfterRendererSwitchBackToTextures()
{
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer);
FB::Bind();
g_MemTargs.DestroyCleared();
@ -815,18 +845,6 @@ inline void AfterRendererAutoresetTargets()
s_RTs.ResolveAll();
return;
// s_RTs.Destroy();
// s_DepthRTs.ResolveAll();
// s_DepthRTs.Destroy();
//
// vb[0].prndr = NULL;
// vb[0].pdepth = NULL;
// vb[0].bNeedFrameCheck = 1;
// vb[0].bNeedZCheck = 1;
// vb[1].prndr = NULL;
// vb[1].pdepth = NULL;
// vb[1].bNeedFrameCheck = 1;
// vb[1].bNeedZCheck = 1;
}
}
@ -840,49 +858,59 @@ inline void AfterRendererAutoresetTargets()
}
int count = 0;
// The main renderer function
void RenderCRTC(int interlace)
void RenderCRTC()
{
if (FrameSkippingHelper()) return;
u32 bInterlace = SMODE2->INT && SMODE2->FFMD && (conf.interlace < 2);
RenderStartHelper(bInterlace);
bool bUsingStencil = false;
tex0Info dispinfo[2];
if (FrameSkippingHelper()) return;
// If we are in frame mode and interlacing, and we haven't forced interlacing off, interlace_mode is 1.
interlace_mode = SMODE2->INT && SMODE2->FFMD && (conf.interlace < 2);
bUsingStencil = false;
FrameObtainDispinfo(bInterlace, dispinfo);
RenderStartHelper();
FrameObtainDispinfo(dispinfo);
// start from the last circuit
for (int i = !PMODE->SLBG; i >= 0; --i)
{
if (!Circuit_Enabled(i)) continue;
tex0Info& texframe = dispinfo[i];
if (texframe.th <= 1) continue;
// I don't think this is neccessary, now that we make sure the ciruit we are working with is enabled.
/*if (texframe.th <= 1)
{
continue;
}*/
if (SMODE2->INT && SMODE2->FFMD)
{
texframe.th >>= 1;
// Final Fantasy X-2 issue here.
if (conf.interlace == 2 && texframe.th >= 512)
/*if (conf.interlace == 2 && texframe.th >= 512)
{
texframe.th >>= 1;
}*/
}
if (i == 0) RenderSetupBlending();
if (bUsingStencil) RenderSetupStencil(i);
if (texframe.psm == 0x12)
/*if (texframe.psm == 0x12) // Probably broken - 0x12 isn't a valid psm. 24 bit is 1.
{
RenderCRTC24helper(bInterlace, interlace, texframe.psm);
RenderCRTC24helper(texframe.psm);
continue;
}
}*/
// We shader targets between two functions, so declare it here;
list<CRenderTarget*> listTargs;
// if we could not draw image from target's do it from memory
RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
// if we could not draw image from target's, do it from memory
RenderCheckForTargets(texframe, listTargs, i);
}
GL_REPORT_ERRORD();

View File

@ -21,11 +21,8 @@
#define ZZOGLCRTC_H_INCLUDED
#include <stdlib.h>
#include "targets.h"
#define INTERLACE_COUNT (bInterlace && interlace == (conf.interlace))
#ifdef _WIN32
extern HDC hDC; // Private GDI Device Context
extern HGLRC hRC; // Permanent Rendering Context

View File

@ -35,42 +35,6 @@
# include "Win32.h"
#endif
//------------------ Defines
#ifdef _WIN32
#define GL_LOADFN(name) { \
if( (*(void**)&name = (void*)wglGetProcAddress(#name)) == NULL ) { \
ZZLog::Error_Log("Failed to find %s, exiting.", #name); \
} \
}
#else
// let GLEW take care of it
#define GL_LOADFN(name)
#endif
#define GL_BLEND_RGB(src, dst) { \
s_srcrgb = src; \
s_dstrgb = dst; \
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha); \
}
#define GL_BLEND_ALPHA(src, dst) { \
s_srcalpha = src; \
s_dstalpha = dst; \
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha); \
}
#define GL_BLEND_ALL(srcrgb, dstrgb, srcalpha, dstalpha) { \
s_srcrgb = srcrgb; \
s_dstrgb = dstrgb; \
s_srcalpha = srcalpha; \
s_dstalpha = dstalpha; \
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha); \
}
#define GL_BLEND_SET() zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha)
#define VB_NUMBUFFERS 512
// ----------------- Types
typedef void (APIENTRYP _PFNSWAPINTERVAL)(int);
@ -81,8 +45,8 @@ extern bool ZZshLoadExtraEffects();
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
GLuint vboRect = 0;
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
int g_nCurVBOIndex = 0;
GLuint g_vboBuffers[VB_NUMBUFFERS]; // VBOs for all drawing commands
u32 g_nCurVBOIndex = 0;
inline bool CreateImportantCheck();
inline void CreateOtherCheck();
@ -125,10 +89,10 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL;
extern u8* s_lpShaderResources;
// String's for shader file in developer mode
#ifdef ZEROGS_DEVBUILD
//#ifdef ZEROGS_DEVBUILD
char* EFFECT_NAME = "";
char* EFFECT_DIR = "";
#endif
//#endif
/////////////////////
// graphics resources
@ -143,12 +107,17 @@ GLenum g_internalRGBAFloat16Fmt = GL_RGBA_FLOAT16_ATI;
u32 ptexLogo = 0;
int nLogoWidth, nLogoHeight;
u32 s_ptexInterlace = 0; // holds interlace fields
static bool vb_buffer_allocated = false;
//------------------ Global Variables
int GPU_TEXWIDTH = 512;
float g_fiGPU_TEXWIDTH = 1/512.0f;
int g_MaxTexWidth = 4096, g_MaxTexHeight = 4096;
u32 s_uFramebuffer = 0;
namespace FB
{
u32 buf = 0;
};
RasterFont* font_p = NULL;
float g_fBlockMult = 1;
@ -157,7 +126,7 @@ float g_fBlockMult = 1;
u32 ptexBlocks = 0, ptexConv16to32 = 0; // holds information on block tiling
u32 ptexBilinearBlocks = 0;
u32 ptexConv32to16 = 0;
int g_nDepthBias = 0;
// int g_nDepthBias = 0;
extern void Delete_Avi_Capture();
extern void ZZDestroy();
@ -505,7 +474,12 @@ bool ZZCreate(int _width, int _height)
GPU_TEXWIDTH = min (g_MaxTexWidth/8, 1024);
g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH;
// FIXME: not clean maybe re integrate the function in shader files --greg
#ifndef GLSL_API
if (!CreateOpenShadersFile()) return false;
#else
if (!ZZshCreateOpenShadersFile()) return false;
#endif
GL_REPORT_ERROR();
@ -520,16 +494,16 @@ bool ZZCreate(int _width, int _height)
if (err != GL_NO_ERROR) bSuccess = false;
glGenFramebuffersEXT(1, &s_uFramebuffer);
FB::Create();
if (s_uFramebuffer == 0)
if (FB::buf == 0)
{
ZZLog::Error_Log("Failed to create the renderbuffer.");
}
GL_REPORT_ERRORD();
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer);
FB::Bind();
DrawBuffers(s_drawbuffers);
@ -600,14 +574,15 @@ bool ZZCreate(int _width, int _height)
g_nCurVBOIndex = 0;
g_vboBuffers.resize(VB_NUMBUFFERS);
glGenBuffers((GLsizei)g_vboBuffers.size(), &g_vboBuffers[0]);
for (int i = 0; i < (int)g_vboBuffers.size(); ++i)
{
glBindBuffer(GL_ARRAY_BUFFER, g_vboBuffers[i]);
glBufferData(GL_ARRAY_BUFFER, 0x100*sizeof(VertexGPU), NULL, GL_STREAM_DRAW);
}
if (!vb_buffer_allocated) {
glGenBuffers((GLsizei)ArraySize(g_vboBuffers), g_vboBuffers);
for (int i = 0; i < ArraySize(g_vboBuffers); ++i)
{
glBindBuffer(GL_ARRAY_BUFFER, g_vboBuffers[i]);
glBufferData(GL_ARRAY_BUFFER, 0x100*sizeof(VertexGPU), NULL, GL_STREAM_DRAW);
}
vb_buffer_allocated = true; // mark the buffer allocated
}
GL_REPORT_ERROR();
if (err != GL_NO_ERROR) bSuccess = false;
@ -616,6 +591,11 @@ bool ZZCreate(int _width, int _height)
g_fBlockMult = 1;
bool do_not_use_billinear = false;
#ifndef ZZNORMAL_MEMORY
FillAlowedPsnTable();
FillBlockTables();
#endif
vector<char> vBlockData, vBilinearData;
BLOCK::FillBlocks(vBlockData, vBilinearData, 1);
@ -781,7 +761,7 @@ bool ZZCreate(int _width, int _height)
// This was changed in SetAA - should we be changing it back?
glPointSize(1.0f);
g_nDepthBias = 0;
// g_nDepthBias = 0;
glEnable(GL_POLYGON_OFFSET_FILL);
glEnable(GL_POLYGON_OFFSET_LINE);
@ -791,7 +771,7 @@ bool ZZCreate(int _width, int _height)
vb[0].Init(VB_BUFFERSIZE);
vb[1].Init(VB_BUFFERSIZE);
g_vsprog = g_psprog = 0;
g_vsprog = g_psprog = sZero;
if (glGetError() == GL_NO_ERROR)
{
@ -823,10 +803,10 @@ void ZZDestroy()
vb[0].Destroy();
vb[1].Destroy();
if (g_vboBuffers.size() > 0)
if (vb_buffer_allocated)
{
glDeleteBuffers((GLsizei)g_vboBuffers.size(), &g_vboBuffers[0]);
g_vboBuffers.clear();
glDeleteBuffers((GLsizei)ArraySize(g_vboBuffers), g_vboBuffers);
vb_buffer_allocated = false; // mark the buffer unallocated
}
g_nCurVBOIndex = 0;
@ -864,8 +844,8 @@ void ZZDestroy()
SAFE_RELEASE_PROG(ppsCRTCTarg[1].prog);
SAFE_RELEASE_PROG(ppsCRTC[0].prog);
SAFE_RELEASE_PROG(ppsCRTC[1].prog);
SAFE_RELEASE_PROG(ppsCRTC24[0].prog);
SAFE_RELEASE_PROG(ppsCRTC24[1].prog);
// SAFE_RELEASE_PROG(ppsCRTC24[0].prog);
// SAFE_RELEASE_PROG(ppsCRTC24[1].prog);
SAFE_RELEASE_PROG(ppsOne.prog);
safe_delete(font_p);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,123 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZOGLFLUSH_H_INCLUDED
#define ZZOGLFLUSH_H_INCLUDED
#ifndef ZEROGS_DEVBUILD
#define INC_GENVARS()
#define INC_TEXVARS()
#define INC_ALPHAVARS()
#define INC_RESOLVE()
#define g_bUpdateEffect 0
#define g_bSaveTex 0
#define g_bSaveResolved 0
#else // defined(ZEROGS_DEVBUILD)
#define INC_GENVARS() ++g_nGenVars
#define INC_TEXVARS() ++g_nTexVars
#define INC_ALPHAVARS() ++g_nAlphaVars
#define INC_RESOLVE() ++g_nResolve
extern bool g_bUpdateEffect;
extern bool g_bSaveTex; // saves the current texture
extern bool g_bSaveResolved;
#endif // !defined(ZEROGS_DEVBUILD)
enum StencilBits
{
STENCIL_ALPHABIT = 1, // if set, dest alpha >= 0x80
STENCIL_PIXELWRITE = 2, // if set, pixel just written (reset after every Flush)
STENCIL_FBA = 4, // if set, just written pixel's alpha >= 0 (reset after every Flush)
STENCIL_SPECIAL = 8 // if set, indicates that pixel passed its alpha test (reset after every Flush)
//STENCIL_PBE = 16
};
#define STENCIL_CLEAR (2|4|8|16)
enum ColorMask
{
COLORMASK_RED = 1,
COLORMASK_GREEN = 2,
COLORMASK_BLUE = 4,
COLORMASK_ALPHA = 8
};
#define GL_COLORMASK(mask) glColorMask(!!((mask)&COLORMASK_RED), !!((mask)&COLORMASK_GREEN), !!((mask)&COLORMASK_BLUE), !!((mask)&COLORMASK_ALPHA))
// extern int g_nDepthBias;
extern float g_fBlockMult; // used for old cards, that do not support Alpha-32float textures. We store block data in u16 and use it.
extern u32 g_nCurVBOIndex;
extern u8* g_pbyGSClut;
extern int ppf;
extern bool s_bTexFlush;
extern vector<u32> s_vecTempTextures; // temporary textures, released at the end of every frame
extern GLuint g_vboBuffers[VB_NUMBUFFERS]; // VBOs for all drawing commands
extern CRangeManager s_RangeMngr; // manages overwritten memory // zz
#if 0
typedef union
{
struct
{
u8 _bNeedAlphaColor; // set if vAlphaBlendColor needs to be set
u8 _b2XAlphaTest; // Only valid when bNeedAlphaColor is set. if 1st bit set set, double all alpha testing values
// otherwise alpha testing needs to be done separately.
u8 _bDestAlphaColor; // set to 1 if blending with dest color (process only one tri at a time). If 2, dest alpha is always 1.
u8 _bAlphaClamping; // if first bit is set, do min; if second bit, do max
};
u32 _bAlphaState;
} g_flag_vars;
extern g_flag_vars g_vars;
#endif
//#define bNeedAlphaColor g_vars._bNeedAlphaColor
//#define b2XAlphaTest g_vars._b2XAlphaTest
//#define bDestAlphaColor g_vars._bDestAlphaColor
//#define bAlphaClamping g_vars._bAlphaClamping
void FlushTransferRanges(const tex0Info* ptex); //zz
// use to update the state
void SetTexVariables(int context, FRAGMENTSHADER* pfragment); // zz
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint); // zz
void SetAlphaVariables(const alphaInfo& ainfo); // zzz
//void ResetAlphaVariables();
inline void SetAlphaTestInt(pixTest curtest);
inline void RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor);
inline void RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting);
inline void ProcessStencil(const VB& curvb);
inline void RenderFBA(const VB& curvb, ZZshParameter sOneColor);
inline void ProcessFBA(const VB& curvb, ZZshParameter sOneColor); // zz
void SetContextTarget(int context);
void SetWriteDepth();
bool IsWriteDepth();
void SetDestAlphaTest();
#endif // ZZOGLFLUSH_H_INCLUDED

View File

@ -212,7 +212,7 @@ typedef Vector4<float> float4;
// Reimplement, swiping a bunch of code from GSdx and adapting it. (specifically GSVector.h)
// This doesn't include more then half of the functions in there, as well as some of the structs...
#include <xmmintrin.h>
#include <immintrin.h>
#include "Pcsx2Types.h"

View File

@ -0,0 +1,564 @@
/* ZeroGS KOSMOS
* Copyright (C) 2005-2006 zerofrog@gmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "GS.h"
#include "Util.h"
#include "ZZoglMem.h"
#include "targets.h"
#include "x86.h"
#include "Mem_Swizzle.h"
#ifndef ZZNORMAL_MEMORY
bool allowed_psm[256] = {false, }; // Sometimes we got strange unknown psm
PSM_value PSM_value_Table[64] = {PSMT_BAD_PSM, }; // for int -> PSM_value
// return array of pointer of array string,
// We SHOULD do memory allocation for u32** -- otherwize we have a lot of trouble!
// if bw and bh are set correctly, as dimensions of table, than array have pointers
// to table rows, so array[i][j] = table[i][j];
inline u32** InitTable(int bh, int bw, u32* table) {
u32** array = (u32**)malloc(bh * sizeof(u32*));
for (int i = 0; i < bh; i++) {
array[i] = &table[i * bw];
}
return array;
}
// initialize dynamic arrays (u32**) for each regular psm.
inline void SetTable(int psm) {
switch (psm) {
case PSMCT32:
g_pageTable[psm] = InitTable( 32, 64, &g_pageTable32[0][0]);
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32[0][0]);
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
break;
case PSMCT24:
g_pageTable[psm] = g_pageTable[PSMCT32];;
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32[0][0]);
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
break;
case PSMCT16:
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16[0][0]);
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16[0][0]);
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
break;
case PSMCT16S:
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16S[0][0]);
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16S[0][0]);
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
break;
case PSMT8:
g_pageTable[psm] = InitTable( 64, 128, &g_pageTable8[0][0]);
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable8[0][0]);
g_columnTable[psm] = InitTable( 16, 16, &g_columnTable8[0][0]);
break;
case PSMT8H:
g_pageTable[psm] = g_pageTable[PSMCT32];
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable8[0][0]);
g_columnTable[psm] = InitTable( 16, 16, &g_columnTable8[0][0]);
break;
case PSMT4:
g_pageTable[psm] = InitTable(128, 128, &g_pageTable4[0][0]);
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable4[0][0]);
g_columnTable[psm] = InitTable( 16, 32, &g_columnTable4[0][0]);
break;
case PSMT4HL:
case PSMT4HH:
g_pageTable[psm] = g_pageTable[PSMCT32];
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable4[0][0]);
g_columnTable[psm] = InitTable( 16, 32, &g_columnTable4[0][0]);
break;
case PSMT32Z:
g_pageTable[psm] = InitTable( 32, 64, &g_pageTable32Z[0][0]);
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32Z[0][0]);
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
break;
case PSMT24Z:
g_pageTable[psm] = g_pageTable[PSMT32Z];
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32Z[0][0]);
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
break;
case PSMT16Z:
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16Z[0][0]);
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16Z[0][0]);
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
break;
case PSMT16SZ:
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16SZ[0][0]);
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16SZ[0][0]);
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
break;
}
}
// After this, the function arrays with u32** have memory set and filled.
void FillBlockTables() {
for (int i = 0; i < MAX_PSM; i++)
SetTable(i);
}
// Deallocate memory for u32** arrays.
void DestroyBlockTables() {
for (int i = 0; i < MAX_PSM; i++) {
if (g_pageTable[i] != NULL && (i != PSMT8H && i != PSMT4HL && i != PSMT4HH && i != PSMCT24 && i != PSMT24Z))
free(g_pageTable[i]);
if (g_blockTable[i] != NULL)
free(g_blockTable[i]);
if (g_columnTable[i] != NULL)
free(g_columnTable[i]);
}
}
void FillNewPageTable() {
int k = 0;
for (int psm = 0; psm < MAX_PSM; psm ++)
if (allowed_psm[psm]) {
for (u32 i = 0; i < 127; i++)
for(u32 j = 0; j < 127; j++) {
u32 address;
u32 shift;
address = g_pageTable[psm][i & ZZ_DT[psm][3]][j & ZZ_DT[psm][4]];
shift = (((address << ZZ_DT[psm][5]) & 0x7 ) << 3)+ ZZ_DT[psm][7]; // last part is for 8H, 4HL and 4HH -- they have data from 24 and 28 byte
g_pageTable2[k][i][j] = (address >> ZZ_DT[psm][0]) + (shift << 16); // now lower 16 byte of page table is 32-bit aligned address, and upper --
// shift.
}
g_pageTableNew[psm] = InitTable( 128, 128, &g_pageTable2[k][0][0]);
k++;;
}
}
BLOCK m_Blocks[MAX_PSM]; // Do so that blocks are indexable.
// At the begining and the end of each string we should made unaligned writes, with nSize checks. We should be sure that all
// these pixels are inside one widthlimit space.
template <int psm>
inline bool DoOneTransmitStep(void* pstart, int& nSize, int endj, const void* pbuf, int& k, int& i, int& j, int widthlimit) {
for (; j < endj && nSize > 0; j++, k++, nSize -= 1) {
writePixelMem<psm, false>((u32*)pstart, j%2048, i%2048, (u32*)(pbuf), k, gs.dstbuf.bw);
}
return (nSize == 0);
}
// FFX has PSMT8 transmit (starting intro -- sword and hairs).
// Persona 4 texts at start are PSMCT32 (and there is also PSMCT16 transmit somwhere after that).
// Tekken V has PSMCT24 and PSMT4 transfers
// This function transfers "Y" block pixels. I use little another code than Zerofrog. My code often uses widthmult != 1 addition (Zerofrog's code
// have an strict condition for fast path: width of transferred data should be widthlimit multiplied by j; EndY also should be multiplied. But
// the usual data block of 255 pixels becomes transfered by 1.
// I should check, maybe Unaligned_Start and Unaligned_End often == 0, and I could try a fastpath -- with this block off.
template <int psm, int widthlimit>
inline bool TRANSMIT_HOSTLOCAL_Y(u32* pbuf, int& nSize, u8* pstart, int endY, int& i, int& j, int& k) {
// if (psm != PSMT8 && psm != 0 && psm != PSMT4 && psm != PSMCT24)
// ERROR_LOG("This is usable function TRANSMIT_HOSTLOCAL_Y at ZZoglMem.cpp %d %d %d %d %d\n", psm, widthlimit, i, j, nSize);
int q = (gs.trxpos.dx - j) % widthlimit;
if (DoOneTransmitStep<psm>(pstart, nSize, q, pbuf, k, i, j, widthlimit)) return true; // After this j and dx are compatible by modyle of widthlimit
int Unaligned_Start = (gs.trxpos.dx % widthlimit == 0) ? 0 : widthlimit - gs.trxpos.dx % widthlimit; // gs.trpos.dx + Unaligned_Start is multiple of widthlimit
for (; i < endY; ++i) {
if (DoOneTransmitStep<psm>(pstart, nSize, j + Unaligned_Start, pbuf, k, i, j, widthlimit)) return true; // This operation made j % widthlimit == 0.
//assert (j % widthlimit != 0);
for (; j < gs.imageEnd.x - widthlimit + 1 && nSize >= widthlimit; j += widthlimit, nSize -= widthlimit) {
writePixelsFromMemory<psm, true, widthlimit>(pstart, pbuf, k, j % 2048, i % 2048, gs.dstbuf.bw);
}
assert ( gs.imageEnd.x - j < widthlimit || nSize < widthlimit);
if (DoOneTransmitStep<psm>(pstart, nSize, gs.imageEnd.x, pbuf, k, i, j, widthlimit)) return true; // There are 2 reasons for finish of previous for: 1) nSize < widthlimit
// 2) j > gs.imageEnd.x - widthlimit + 1. We would try to write pixels up do
// EndX, it's no more widthlimit pixels
j = gs.trxpos.dx;
}
return false;
}
// PSMT4 -- Tekken V
template <int psm, int widthlimit>
inline void TRANSMIT_HOSTLOCAL_X(u32* pbuf, int& nSize, u8* pstart, int& i, int& j, int& k, int blockheight, int startX, int pitch, int fracX) {
if (psm != PSMT8 && psm != PSMT4)
ZZLog::Error_Log("This is usable function TRANSMIT_HOSTLOCAL_X at ZZoglMem.cpp %d %d %d %d %d\n", psm, widthlimit, i, j, nSize);
for(int tempi = 0; tempi < blockheight; ++tempi) {
for(j = startX; j < gs.imageEnd.x; j++, k++) {
writePixelMem<psm, false>((u32*)pstart, j%2048, (i + tempi)%2048, (u32*)(pbuf), k, gs.dstbuf.bw);
}
k += ( pitch - fracX );
}
}
template <int psm>
inline int TRANSMIT_PITCH(int pitch) {
return (PSM_BITS_PER_PIXEL<psm>() * pitch) >> 3;
}
// ------------------------
// | Y |
// ------------------------
// | block | |
// | aligned area | X |
// | | |
// ------------------------
// | Y |
// ------------------------
template <int psmX>
int FinishTransfer(int i, int j, int nSize, int nLeftOver)
{
if( i >= gs.imageEnd.y )
{
assert( gs.transferring == false || i == gs.imageEnd.y );
gs.transferring = false;
}
else {
/* update new params */
gs.image.y = i;
gs.image.x = j;
}
return (nSize * TRANSMIT_PITCH<psmX>(2) + nLeftOver)/2;
}
template<int psmX, int widthlimit, int blockbits, int blockwidth, int blockheight>
int TransferHostLocal(const void* pbyMem, u32 nQWordSize)
{
assert( gs.imageTransfer == XFER_HOST_TO_LOCAL );
u8* pstart = g_pbyGSMemory + gs.dstbuf.bp*256;
int i = gs.image.y, j = gs.image.x;
const u8* pbuf = (const u8*)pbyMem;
int nLeftOver = (nQWordSize*4*2)%(TRANSMIT_PITCH<psmX>(2));
int nSize = nQWordSize*4*2/TRANSMIT_PITCH<psmX>(2);
nSize = min(nSize, gs.imageNew.w * gs.imageNew.h);
int pitch, area, fracX;
int endY = ROUND_UPPOW2(i, blockheight);
Point alignedPt;
alignedPt.x = ROUND_DOWNPOW2(gs.imageEnd.x, blockwidth);
alignedPt.y = ROUND_DOWNPOW2(gs.imageEnd.y, blockheight);
bool bAligned;
bool bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (j == gs.trxpos.dx) && (alignedPt.y > endY) && alignedPt.x > gs.trxpos.dx;
if( (gs.imageEnd.x - gs.trxpos.dx) % widthlimit ) {
/* hack */
int testwidth = (int)nSize - (gs.imageEnd.y - i) * (gs.imageEnd.x - gs.trxpos.dx) + (j - gs.trxpos.dx);
if((testwidth <= widthlimit) && (testwidth >= -widthlimit)) {
/* don't transfer */
/*ZZLog::Debug_Log("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEnd.x, nQWordSize);*/
gs.transferring = false;
}
bCanAlign = false;
}
/* first align on block boundary */
if( MOD_POW2(i, blockheight) || !bCanAlign ) {
if( !bCanAlign )
endY = gs.imageEnd.y; /* transfer the whole image */
else
assert( endY < gs.imageEnd.y); /* part of alignment condition */
int limit = widthlimit;
if (((gs.imageEnd.x - gs.trxpos.dx) % widthlimit) || ((gs.imageEnd.x - j) % widthlimit))
/* transmit with a width of 1 */
limit = 1 + (gs.dstbuf.psm == PSMT4);
/*TRANSMIT_HOSTLOCAL_Y##TransSfx(psm, T, limit, endY)*/
int k = 0;
if (TRANSMIT_HOSTLOCAL_Y<psmX, widthlimit>((u32*)pbuf, nSize, pstart, endY, i, j, k))
return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
pbuf += TRANSMIT_PITCH<psmX>(k);
if (nSize == 0 || i == gs.imageEnd.y) return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
}
assert( MOD_POW2(i, blockheight) == 0 && j == gs.trxpos.dx);
/* can align! */
pitch = gs.imageEnd.x - gs.trxpos.dx;
area = pitch * blockheight;
fracX = gs.imageEnd.x - alignedPt.x;
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
bAligned = !((uptr)pbuf & 0xf) && ((TRANSMIT_PITCH<psmX>(pitch)&0xf) == 0);
/* transfer aligning to blocks */
for(; i < alignedPt.y && nSize >= area; i += blockheight, nSize -= area) {
for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += blockwidth, pbuf += TRANSMIT_PITCH<psmX>(blockwidth)) {
SwizzleBlock<psmX>((u32*)(pstart + getPixelAddress<psmX>(tempj, i, gs.dstbuf.bw)*blockbits/8),
(u32*)pbuf, TRANSMIT_PITCH<psmX>(pitch));
}
/* transfer the rest */
if( alignedPt.x < gs.imageEnd.x ) {
int k = 0;
TRANSMIT_HOSTLOCAL_X<psmX, widthlimit>((u32*)pbuf, nSize, pstart, i, j, k, blockheight, alignedPt.x, pitch, fracX);
pbuf += TRANSMIT_PITCH<psmX>(k - alignedPt.x + gs.trxpos.dx);
}
else pbuf += (blockheight-1)*TRANSMIT_PITCH<psmX>(pitch);
j = gs.trxpos.dx;
}
if( TRANSMIT_PITCH<psmX>(nSize)/4 > 0 ) {
int k = 0;
TRANSMIT_HOSTLOCAL_Y<psmX, widthlimit>((u32*)pbuf, nSize, pstart, gs.imageEnd.y, i, j, k);
pbuf += TRANSMIT_PITCH<psmX>(k);
/* sometimes wrong sizes are sent (tekken tag) */
assert( gs.transferring == false || TRANSMIT_PITCH<psmX>(nSize)/4 <= 2 );
}
return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
}
inline int TransferHostLocal32(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMCT32, 2, 32, 8, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT32Z, 2, 32, 8, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal24(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMCT24, 8, 32, 8, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT24Z, 8, 32, 8, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal16(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMCT16, 4, 16, 16, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal16S(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMCT16S, 4, 16, 16, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal16Z(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT16Z, 4, 16, 16, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal16SZ(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT16SZ, 4, 16, 16, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal8(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT8, 4, 8, 16, 16>( pbyMem, nQWordSize);
}
inline int TransferHostLocal4(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT4, 8, 4, 32, 16>( pbyMem, nQWordSize);
}
inline int TransferHostLocal8H(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT8H, 4, 32, 8, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal4HL(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT4HL, 8, 32, 8, 8>( pbyMem, nQWordSize);
}
inline int TransferHostLocal4HH(const void* pbyMem, u32 nQWordSize)
{
return TransferHostLocal<PSMT4HH, 8, 32, 8, 8>( pbyMem, nQWordSize);
}
void TransferLocalHost32(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost24(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost16(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost16S(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost8(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost4(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost8H(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost4HL(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost4HH(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost32Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) { FUNCLOG }
inline void FILL_BLOCK(BLOCK& b, int floatfmt, vector<char>& vBlockData, vector<char>& vBilinearData, int ox, int oy, int psmX) {
int bw = ZZ_DT[psmX][4] + 1;
int bh = ZZ_DT[psmX][3] + 1;
int mult = 1 << ZZ_DT[psmX][0];
b.vTexDims = float4 (BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)(bh), 0, 0);
b.vTexBlock = float4( (float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT);
b.width = bw;
b.height = bh;
b.colwidth = bh / 4;
b.colheight = bw / 8;
b.bpp = 32/mult;
b.pageTable = g_pageTable[psmX];
b.blockTable = g_blockTable[psmX];
b.columnTable = g_columnTable[psmX];
// This is never true.
//assert( sizeof(g_pageTable[psmX]) == bw*bh*sizeof(g_pageTable[psmX][0][0]) );
float* psrcf = (float*)&vBlockData[0] + ox + oy * BLOCK_TEXWIDTH;
u16* psrcw = (u16*)&vBlockData[0] + ox + oy * BLOCK_TEXWIDTH;
for(int i = 0; i < bh; ++i) {
for(int j = 0; j < bw; ++j) {
/* fill the table */
u32 u = g_blockTable[psmX][(i / b.colheight)][(j / b.colwidth)] * 64 * mult + g_columnTable[psmX][i%b.colheight][j%b.colwidth];
b.pageTable[i][j] = u;
if( floatfmt ) {
psrcf[i*BLOCK_TEXWIDTH+j] = (float)(u) / (float)(GPU_TEXWIDTH*mult);
}
else {
psrcw[i*BLOCK_TEXWIDTH+j] = u;
}
}
}
if( floatfmt ) {
float4* psrcv = (float4*)&vBilinearData[0] + ox + oy * BLOCK_TEXWIDTH;
for(int i = 0; i < bh; ++i) {
for(int j = 0; j < bw; ++j) {
float4* pv = &psrcv[i*BLOCK_TEXWIDTH+j];
pv->x = psrcf[i*BLOCK_TEXWIDTH+j];
pv->y = psrcf[i*BLOCK_TEXWIDTH+((j+1)%bw)];
pv->z = psrcf[((i+1)%bh)*BLOCK_TEXWIDTH+j];
pv->w = psrcf[((i+1)%bh)*BLOCK_TEXWIDTH+((j+1)%bw)];
}
}
}
}
void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
{
FUNCLOG
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * (floatfmt ? 4 : 2));
if (floatfmt)
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
BLOCK b;
memset(m_Blocks, 0, sizeof(m_Blocks));
// 32
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 0, PSMCT32);
b.TransferHostLocal = TransferHostLocal32;
b.TransferLocalHost = TransferLocalHost32;
m_Blocks[PSMCT32] = b;
// 24 (same as 32 except write/readPixel are different)
b.TransferHostLocal = TransferHostLocal24;
b.TransferLocalHost = TransferLocalHost24;
m_Blocks[PSMCT24] = b;
// 8H (same as 32 except write/readPixel are different)
b.TransferHostLocal = TransferHostLocal8H;
b.TransferLocalHost = TransferLocalHost8H;
m_Blocks[PSMT8H] = b;
b.TransferHostLocal = TransferHostLocal4HL;
b.TransferLocalHost = TransferLocalHost4HL;
m_Blocks[PSMT4HL] = b;
b.TransferHostLocal = TransferHostLocal4HH;
b.TransferLocalHost = TransferLocalHost4HH;
m_Blocks[PSMT4HH] = b;
// 32z
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 0, PSMT32Z);
b.TransferHostLocal = TransferHostLocal32Z;
b.TransferLocalHost = TransferLocalHost32Z;
m_Blocks[PSMT32Z] = b;
// 24Z (same as 32Z except write/readPixel are different)
b.TransferHostLocal = TransferHostLocal24Z;
b.TransferLocalHost = TransferLocalHost24Z;
m_Blocks[PSMT24Z] = b;
// 16
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 32, PSMCT16);
b.TransferHostLocal = TransferHostLocal16;
b.TransferLocalHost = TransferLocalHost16;
m_Blocks[PSMCT16] = b;
// 16s
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 32, PSMCT16S);
b.TransferHostLocal = TransferHostLocal16S;
b.TransferLocalHost = TransferLocalHost16S;
m_Blocks[PSMCT16S] = b;
// 16z
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 96, PSMT16Z);
b.TransferHostLocal = TransferHostLocal16Z;
b.TransferLocalHost = TransferLocalHost16Z;
m_Blocks[PSMT16Z] = b;
// 16sz
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 96, PSMT16SZ);
b.TransferHostLocal = TransferHostLocal16SZ;
b.TransferLocalHost = TransferLocalHost16SZ;
m_Blocks[PSMT16SZ] = b;
// 8
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 160, PSMT8);
b.TransferHostLocal = TransferHostLocal8;
b.TransferLocalHost = TransferLocalHost8;
m_Blocks[PSMT8] = b;
// 4
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 224, PSMT4);
b.TransferHostLocal = TransferHostLocal4;
b.TransferLocalHost = TransferLocalHost4;
m_Blocks[PSMT4] = b;
}
#endif

View File

@ -0,0 +1,790 @@
/* ZeroGS KOSMOS
* Copyright (C) 2005-2006 zerofrog@gmail.com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __ZZOGL_MEM_H__
#define __ZZOGL_MEM_H__
#include <assert.h>
#include <vector>
#include "GS.h"
#include "Util.h"
#include "Mem.h"
#ifndef ZZNORMAL_MEMORY
extern u32 g_blockTable32[4][8];
extern u32 g_blockTable32Z[4][8];
extern u32 g_blockTable16[8][4];
extern u32 g_blockTable16S[8][4];
extern u32 g_blockTable16Z[8][4];
extern u32 g_blockTable16SZ[8][4];
extern u32 g_blockTable8[4][8];
extern u32 g_blockTable4[8][4];
extern u32 g_columnTable32[8][8];
extern u32 g_columnTable16[8][16];
extern u32 g_columnTable8[16][16];
extern u32 g_columnTable4[16][32];
//--
extern u32 g_pageTable32[32][64];
extern u32 g_pageTable32Z[32][64];
extern u32 g_pageTable16[64][64];
extern u32 g_pageTable16S[64][64];
extern u32 g_pageTable16Z[64][64];
extern u32 g_pageTable16SZ[64][64];
extern u32 g_pageTable8[64][128];
extern u32 g_pageTable4[128][128];
//maximum PSM is 58, so our arrays have 58 + 1 = 59 elements
// This table is used for fast access to memory storage data.
extern u32 ZZ_DT[MAX_PSM][TABLE_WIDTH];
//maxium PSM is 58, so our arrays have 58 + 1 = 59 elements
extern u32** g_pageTable[MAX_PSM];
extern u32** g_blockTable[MAX_PSM];
extern u32** g_columnTable[MAX_PSM];
extern u32 g_pageTable2[MAX_PSM][127][127];
extern u32** g_pageTableNew[MAX_PSM];
// rest not visible externally
struct BLOCK
{
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
// shader constants for this block
float4 vTexBlock;
float4 vTexDims;
int width, height; // dims of one page in pixels
int bpp;
int colwidth, colheight;
u32** pageTable; // offset inside each page
u32** blockTable;
u32** columnTable;
// Nobody use this, so we better remove it.
// u32 (*getPixelAddress)(int x, int y, u32 bp, u32 bw);
// u32 (*getPixelAddress_0)(int x, int y, u32 bw);
// void (*writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
// void (*writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
// u32 (*readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
// u32 (*readPixel_0)(const void* pmem, int x, int y, u32 bw);
int (*TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
void (*TransferLocalHost)(void* pbyMem, u32 nQWordSize);
// texture must be of dims BLOCK_TEXWIDTH and BLOCK_TEXHEIGHT
static void FillBlocks(std::vector<char>& vBlockData, std::vector<char>& vBilinearData, int floatfmt);
};
void FillBlockTables();
void DestroyBlockTables();
void FillNewPageTable();
extern BLOCK m_Blocks[];
extern u32 g_blockTable32[4][8];
extern u32 g_blockTable32Z[4][8];
extern u32 g_blockTable16[8][4];
extern u32 g_blockTable16S[8][4];
extern u32 g_blockTable16Z[8][4];
extern u32 g_blockTable16SZ[8][4];
extern u32 g_blockTable8[4][8];
extern u32 g_blockTable4[8][4];
extern u32 g_columnTable32[8][8];
extern u32 g_columnTable16[8][16];
extern u32 g_columnTable8[16][16];
extern u32 g_columnTable4[16][32];
extern u32 g_pageTable32[32][64];
extern u32 g_pageTable32Z[32][64];
extern u32 g_pageTable16[64][64];
extern u32 g_pageTable16S[64][64];
extern u32 g_pageTable16Z[64][64];
extern u32 g_pageTable16SZ[64][64];
extern u32 g_pageTable8[64][128];
extern u32 g_pageTable4[128][128];
extern u32** g_pageTable[MAX_PSM];
extern u32** g_blockTable[MAX_PSM];
extern u32** g_columnTable[MAX_PSM];
extern u32 ZZ_DT[MAX_PSM][TABLE_WIDTH];
extern u32** g_pageTableNew[MAX_PSM];
static __forceinline void MaskedOR(u32* dst, u32 pixel, u32 mask = 0xffffffff) {
if (mask == 0xffffffff)
*dst = pixel;
else
*dst = (*dst & (~mask)) | (pixel & mask);
}
// This two defines seems like idiotic code, but in reality it have one, but big importance -- this code
// made psm variable (and psm2 in second case) -- constant, so optimiser could properly pass proper function
#define PSM_SWITCHCASE(X) { \
switch (psm) { \
case PSMCT32: { \
const int psmC = PSMCT32; \
X; } \
break; \
case PSMT32Z: { \
const int psmC = PSMT32Z; \
X; } \
break; \
case PSMCT24: { \
const int psmC = PSMCT24; \
X; } \
break; \
case PSMT24Z: { \
const int psmC = PSMT24Z; \
X; } \
break; \
case PSMCT16: { \
const int psmC = PSMCT16; \
X; } \
break; \
case PSMCT16S: { \
const int psmC = PSMCT16S; \
X; } \
break; \
case PSMT16Z: { \
const int psmC = PSMT16Z; \
X; } \
break; \
case PSMT16SZ: { \
const int psmC = PSMT16SZ; \
X; } \
break; \
case PSMT8: { \
const int psmC = PSMT8; \
X; } \
break; \
case PSMT8H: { \
const int psmC = PSMT8H; \
X; } \
break; \
case PSMT4HH: { \
const int psmC = PSMT4HH; \
X; } \
break; \
case PSMT4HL: { \
const int psmC = PSMT4HL; \
X; } \
break; \
case PSMT4: { \
const int psmC = PSMT4; \
X; } \
break; \
}\
}
#define PSM_SWITCHCASE_2(X) { \
switch (psm) { \
case PSMCT32: \
if( psm2 == PSMCT32 ) { const int psmC = PSMCT32, psmC1 = PSMCT32; X; } \
else { const int psmC = PSMCT32, psmC1 = PSMT32Z; X; } \
break; \
case PSMCT24: \
if( psm2 == PSMCT24 ) { const int psmC = PSMCT24, psmC1 = PSMCT24; X; } \
else { const int psmC = PSMCT24, psmC1 = PSMT24Z; X; } \
break; \
case PSMT32Z: \
if( psm2 == PSMT32Z ) { const int psmC = PSMT32Z, psmC1 = PSMCT32; X; } \
else { const int psmC = PSMT32Z, psmC1 = PSMT32Z; X; } \
break; \
case PSMT24Z: \
if( psm2 == PSMCT24 ) { const int psmC = PSMT24Z, psmC1 = PSMCT24; X; } \
else { const int psmC = PSMT24Z, psmC1 = PSMT24Z; X; } \
break; \
case PSMCT16: \
switch(psm2) { \
case PSMCT16: { const int psmC = PSMCT16, psmC1 = PSMCT16; X; } break; \
case PSMCT16S: { const int psmC = PSMCT16, psmC1 = PSMCT16S; X; } break; \
case PSMT16Z: { const int psmC = PSMCT16, psmC1 = PSMT16Z; X; } break; \
case PSMT16SZ: { const int psmC = PSMCT16, psmC1 = PSMT16SZ; X; } break; \
} \
break; \
case PSMCT16S: \
switch(psm2) { \
case PSMCT16: { const int psmC = PSMCT16S, psmC1 = PSMCT16; X; } break; \
case PSMCT16S: { const int psmC = PSMCT16S, psmC1 = PSMCT16S; X; } break; \
case PSMT16Z: { const int psmC = PSMCT16S, psmC1 = PSMT16Z; X; } break; \
case PSMT16SZ: { const int psmC = PSMCT16S, psmC1 = PSMT16SZ; X; } break; \
} \
break; \
case PSMT16Z: \
switch(psm2) { \
case PSMCT16: { const int psmC = PSMT16Z, psmC1 = PSMCT16; X; } break; \
case PSMCT16S: { const int psmC = PSMT16Z, psmC1 = PSMCT16S; X; } break; \
case PSMT16Z: { const int psmC = PSMT16Z, psmC1 = PSMT16Z; X; } break; \
case PSMT16SZ: { const int psmC = PSMT16Z, psmC1 = PSMT16SZ; X; } break; \
} \
break; \
case PSMT16SZ: \
switch(psm2) { \
case PSMCT16: { const int psmC = PSMT16SZ, psmC1 = PSMCT16; X; } break; \
case PSMCT16S: { const int psmC = PSMT16SZ, psmC1 = PSMCT16S; X; } break; \
case PSMT16Z: { const int psmC = PSMT16SZ, psmC1 = PSMT16Z; X; } break; \
case PSMT16SZ: { const int psmC = PSMT16SZ, psmC1 = PSMT16SZ; X; } break; \
} \
break; \
case PSMT8: \
if( psm2 == PSMT8 ) { const int psmC = PSMT8, psmC1 = PSMT8; X; } \
else { const int psmC = PSMT8, psmC1 = PSMT8H; X; } \
break; \
case PSMT8H: \
if( psm2 == PSMT8H ) { const int psmC = PSMT8H, psmC1 = PSMT8; X; } \
else { const int psmC = PSMT8H, psmC1 = PSMT8H; X; } \
break; \
case PSMT4: \
switch(psm2) { \
case PSMT4: { const int psmC = PSMT4, psmC1 = PSMT4; X; } break; \
case PSMT4HL: { const int psmC = PSMT4, psmC1 = PSMT4HL; X; } break; \
case PSMT4HH: { const int psmC = PSMT4, psmC1 = PSMT4HH; X; } break; \
} \
break; \
case PSMT4HL: \
switch(psm2) { \
case PSMT4: { const int psmC = PSMT4HL, psmC1 = PSMT4; X; } break; \
case PSMT4HL: { const int psmC = PSMT4HL, psmC1 = PSMT4HL; X; } break; \
case PSMT4HH: { const int psmC = PSMT4HL, psmC1 = PSMT4HH; X; } break; \
} \
break; \
case PSMT4HH: \
switch(psm2) { \
case PSMT4: { const int psmC = PSMT4HH, psmC1 = PSMT4; X; } break; \
case PSMT4HL: { const int psmC = PSMT4HH, psmC1 = PSMT4HL; X; } break; \
case PSMT4HH: { const int psmC = PSMT4HH, psmC1 = PSMT4HH; X; } break; \
} \
break; \
} \
}
template <int psm>
static __forceinline void setPsmtConstantsX(u8& A, u8& B, u8& C, u8& D, u8& E, u8& F, u32& G, u8& H) {
switch (psm) {
case PSMCT32:
case PSMT32Z:
A = 5; B = 6; C = 0; D = 31; E = 63; F = 0; H = 1; G = 0xffffffff;
break;
case PSMCT24:
case PSMT24Z:
A = 5; B = 6; C = 0; D = 31; E = 63; F = 0; H = 1; G = 0xffffff;
break;
case PSMT8H:
A = 5; B = 6; C = 0; D = 31; E = 63; F = 24; H = 4; G = 0xff;
break;
case PSMT4HH:
A = 5; B = 6; C = 0; D = 31; E = 63; F = 28; H = 8; G = 0xf;
break;
case PSMT4HL:
A = 5; B = 6; C = 0; D = 31; E = 63; F = 24; H = 8; G = 0xf;
break;
case PSMCT16:
case PSMT16Z:
case PSMCT16S:
case PSMT16SZ:
A = 6; B = 6; C = 1; D = 63; E = 63; F = 0; H = 2; G = 0xffff;
break;
case PSMT8:
A = 6; B = 7; C = 2; D = 63; E = 127; F = 0; H = 4; G = 0xff;
break;
case PSMT4:
A = 7; B = 7; C = 3; D = 127; E = 127; F = 0; H = 8; G = 0xf;
break;
}
}
// This is where the NEW_CODE define used to be.
// ------------------------------------------ get Address functions ------------------------------------
// Yes, only 1 function to all cases of life!
// Warning! We switch bp and bw for usage of default value, so be warned! It's
// not C, it's C++, so not it.
template <int psm>
static __forceinline u32 getPixelAddress(int x, int y, u32 bw, u32 bp = 0) {
u32 basepage;
u32 word;
u8 A = 0, B = 0, C = 0, D = 0, E = 0, F = 0; u32 G = 0; u8 H= 0;
setPsmtConstantsX<psm>(A, B, C, D, E, F, G, H);
basepage = ((y>>A) * (bw>>B)) + (x>>B);
word = ((bp * 64 + basepage * 2048) << C) + g_pageTable[psm][y&D][x&E];
return word;
}
// It's Zerofrog's function. I need to eliminate them all! All access should be 32-bit aligned.
static __forceinline u32 getPixelAddress(int psm, int x, int y, u32 bw, u32 bp = 0) {
PSM_SWITCHCASE(return getPixelAddress<psmC>(x, y, bw, bp) ;)
return 0;
}
// This is compatibility code, for reference,
#define Def_getPixelAddress(psmT, psmX) \
static __forceinline u32 getPixelAddress##psmT(int x, int y, u32 bp, u32 bw) { \
return getPixelAddress<psmX>(x, y, bw, bp); } \
static __forceinline u32 getPixelAddress##psmT##_0(int x, int y, u32 bw) { \
return getPixelAddress<psmX>(x, y, bw); } \
Def_getPixelAddress(32, PSMCT32)
Def_getPixelAddress(16, PSMCT16)
Def_getPixelAddress(16S, PSMCT16S)
Def_getPixelAddress(8, PSMT8)
Def_getPixelAddress(4, PSMT4)
Def_getPixelAddress(32Z, PSMT32Z)
Def_getPixelAddress(16Z, PSMT16Z)
Def_getPixelAddress(16SZ, PSMT16SZ)
#define getPixelAddress24 getPixelAddress32
#define getPixelAddress24_0 getPixelAddress32_0
#define getPixelAddress8H getPixelAddress32
#define getPixelAddress8H_0 getPixelAddress32_0
#define getPixelAddress4HL getPixelAddress32
#define getPixelAddress4HL_0 getPixelAddress32_0
#define getPixelAddress4HH getPixelAddress32
#define getPixelAddress4HH_0 getPixelAddress32_0
#define getPixelAddress24Z getPixelAddress32Z
#define getPixelAddress24Z_0 getPixelAddress32Z_0
// Check FFX-1 (very begining) for PSMT8
// Check Tekken menu for PSMT4
// ZZ_DT[7] is needed only for PSMT8H, PSMT4HL and PSMT4HH -- at this case word contain data not from a begining.
// This function return shift from 32-bit aligned address and shift -- number of byte in u32 order.
// so if ((u32*)mem + getPixelAddress_Aligned32) is exact location of u32, where our pixel data stored.
// Just for remember:
// PMSCT32, 24, 32Z, 24Z, 8HH, 4HL and 4HH have ZZ_DT[psm] == 3, so shift is always 0.
// PSMCT16, 16S, 16SZ, 16Z have ZZ_DT[psm] == 2, so shift is 0 or 16.
// PSMT8 ZZ_DT[psm] == 1, shift is 0, 8, 16, 24
// PSMT4 ZZ_DT[psm] == 0, shift is 0, 4, 8, 12, 16, 20, 24, 28.
// It allow us to made a fast access to pixels in the same basepage: if x % N == 0 (N = 1, 2, 4, 8, .. 64)
// than we could guarantee that all pixels form x to x + N - 1 are in the same basepage.
template <int psm>
static __forceinline u32* getPixelBasepage(const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
u32 basepage;
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
basepage = ((y>>A) * (bw>>B)) + (x>>B);
return ((u32*)pmem + (bp * 64 + basepage * 2048));
}
// And this is offset for this pixels.
template <int psm>
static __forceinline u32* getPixelOffset(u32& mask, u32& shift, const void* pmem, int x, int y) {
u32 word;
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
word = (g_pageTable[psm][y&D][x&E] << (3 - C));
shift = ((word & 0x7) << 2) + F;
mask &= G << shift;
return ((u32*)pmem + ((word & ~0x7) >> 3));
}
template <int psm>
static __forceinline u32* getPixelAddress_A32(u32& mask, u32& shift, const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
return getPixelOffset<psm>(mask, shift, getPixelBasepage<psm>(pmem, x, y, bw, bp), x, y);
}
template <int psm>
static __forceinline u32* getPixelBaseAddress_A32(const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
u32 word;
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
word = (g_pageTable[psm][y&D][x&E] << (3 - C));
return ((u32*)getPixelBasepage<psm>(pmem, x, y, bw, bp) + ((word & ~0x7) >> 3));
}
// Wrapper for cases, where psm is not constant, should be avoided inside cycles
static __forceinline u32* getPixelAddress_A32(u32& mask, u32& shift, int psm, const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
PSM_SWITCHCASE( return getPixelAddress_A32<psmC>(mask, shift, pmem, x, y, bw, bp) );
return 0;
}
static __forceinline u32* getClutAddress(u8* pmem, const tex0Info& tex0) {
if (PSMT_ISHALF(tex0.cpsm))
return (u32*)(pmem + 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0) );
else
return (u32*)(pmem + 64 * (tex0.csa & 15));
}
//--------------------------------------------- Write Pixel -----------------------------------------------------------
// Set proper mask for transfering multiple bytes per word.
template <int psm>
inline u32 HandleWritemask(u32 Writemask) {
u8 G = PSM_BITS_PER_PIXEL<psm>();
u32 dmask = Writemask & ((1 << G) - 1); // drop all bits in writemask, that could not be used
u32 mask;
switch (psm) {
case PSMT8H: // modes with non-zero start bit should be handled differently
return 0xff000000;
case PSMT4HL:
return 0x0f000000;
case PSMT4HH:
return 0xf0000000;
default:
mask = dmask; // 32 targets and lower
if (G < 24) {
mask |= dmask << G; // 16 targets and lower
if (G < 16) {
mask |= dmask << (2 * G); // 8 targets and lower
mask |= dmask << (3 * G);
if (G < 8) {
mask |= dmask << (4 * G); // 4 targets
mask |= dmask << (5 * G);
mask |= dmask << (6 * G);
mask |= dmask << (7 * G);
}}}
return mask;
}
}
//push pixel data at position x,y, according psm storage format. pixel do not need to be properly masked, wrong bit's would not be used
//mask should be made according PSM.
template <int psm>
static __forceinline void writePixel(void* pmem, int x, int y, u32 pixel, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 shift;
u32* p = getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp);
MaskedOR (p, pixel << shift, mask);
}
static __forceinline void writePixel(int psm, void* pmem, int x, int y, u32 pixel, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
PSM_SWITCHCASE(writePixel<psmC>(pmem, x, y, pixel, bw, bp, mask));
}
// Put pixel data from memory. Pixel is p, memory start from pixel, and we should count pmove words and shift resulting word to shift
// 24 targets could be outside of 32-bit borders.
template <int psm>
static __forceinline void pushPixelMem(u32* p, u32* pixel, int pmove, int shift, u32 mask = 0xffffffff) {
if (psm != PSMCT24 || psm != PSMT24Z) {
if (shift > 0)
MaskedOR (p, (*(pixel + pmove)) << (shift), mask);
else
MaskedOR (p, (*(pixel + pmove)) >> (-shift), mask);
}
else { // for 24 and 24Z psm data could be not-aligned by 32. Merde!
u64 pixel64 = (*(u64*)(pixel + pmove) ) >> (-shift); // we read more data, but for 24 targets shift always negative and resulting data is u32
MaskedOR(p, (u32)pixel64, mask); // drop upper part, we don't need it. all data is stored in lower part of u64 after shift
// MaskedOR(p, (u32)((u8*)pixel + count * 3), mask);
}
}
// use it if pixel already shifted by needed number of bytes.
// offseted mean that we should skip basepage calculation, pmem is link to basepage'ed memory. Just a little quicker.
template <int psm, int offseted>
static __forceinline void writePixelMem(const void* pmem, int x, int y, u32* pixel, int count, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 shift;
u32* p;
if (offseted)
p = getPixelOffset<psm>(mask, shift, pmem, x, y);
else
p = getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp);
int A = PSM_BITS_PER_PIXEL<psm>();
int pmove = (count * A) >> 5;
int pshift = (count * A) & 31; // we assume, that if shift outside word, than user want next pixel data
pushPixelMem<psm>(p, pixel, pmove, (int)shift - pshift, mask);
}
// This function push several pixels. Note, that for 32, 24, 8HH, 4HL, 4HH it's simply write (and pixel should not be properly masked), 16 do push 2 pixels (and x should be even).
// 8 push 4 pixels: 0,0; 0,1; 1,0 and 1,1. 4 push 8: 0,0; 0,1; 1,0; 1,1; 2,0, 2,1; 3,0; 3,1.
template <int psm>
static __forceinline void writePixelWord(const void* pmem, int x, int y, u32 pixel, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 maskA = mask, shift;
u32* p = getPixelAddress_A32<psm>(maskA, shift, pmem, x, y, bw, bp);
/* if (PSM_NON_FULL_WORD<psm>())
maskA = maskA & mask;
else
maskA = mask;*/
MaskedOR (p, pixel, mask);
}
// ------------------------------------- Read Pixel ---------------------------------------
template <int psm>
static __forceinline u32 readPixel(const void* pmem, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 shift;
u32* p = getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp);
return ((*p & mask) >> shift);
}
static __forceinline u32 readPixel(int psm, const void* pmem, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
PSM_SWITCHCASE(return readPixel<psmC>(pmem, x, y, bw, bp, mask););
return 0;
}
template <int psm>
static __forceinline u32 readPixelWord(const void* pmem, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 maskA = 0xffffffff, shift;
if (PSM_NON_FULL_WORD<psm>())
return *getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp) & mask;
else
return *getPixelAddress_A32<psm>(maskA, shift, pmem, x, y, bw, bp) & mask;
}
template <int psm>
static __forceinline void fillMemoryFromPixels(u32* dst, const void* pmem, int& count, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 pixel;
u8 I = PSM_BITS_PER_PIXEL<psm>();
int K = count / PSM_PIXELS_STORED_PER_WORD<psm>(); // offset for pmem, count for 32, count / 2 for 16, etc.
pixel = readPixel<psm>(pmem, x, y, bw, bp, mask); // I prefer not to use for here. It's slow
if (I < 32) {
pixel += readPixel<psm>(pmem, x + 1, y, bw, bp, mask) << I;
if (I < 16) { // 8 and 4 targets
pixel += readPixel<psm>(pmem, x + 2, y, bw, bp, mask) << (2 * I);
pixel += readPixel<psm>(pmem, x + 3, y, bw, bp, mask) << (3 * I);
if (I < 8) { // This is for 4, 4HH and 4HL
pixel += readPixel<psm>(pmem, x + 4, y, bw, bp, mask) << (4 * I);
pixel += readPixel<psm>(pmem, x + 5, y, bw, bp, mask) << (5 * I);
pixel += readPixel<psm>(pmem, x + 6, y, bw, bp, mask) << (6 * I);
pixel += readPixel<psm>(pmem, x + 7, y, bw, bp, mask) << (7 * I);
}}}
if (I != 24) {
*(dst + K) = pixel;
}
else { // 24. should have special care.
// ERROR_LOG("special care %d\n", count);
MaskedOR((u32*)((u8*)dst + 3 * count), pixel, 0xffffff);
}
count += PSM_PIXELS_STORED_PER_WORD<psm>();
}
// Fill count pixels form continues memory region, starting from pmem, First pixel to read have number shift in this region.
// Read no more than count pixels. We could assert, that all this pixels would be place in the same basepage
// Shift is automaticaly increased by count (or decreased if count < 0)
template <int psm, bool offseted, int count>
static __forceinline void writePixelsFromMemory(void* dst, const void* pmem, int& shift, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
const void* base;
if (offseted)
base = getPixelBasepage<psm>(dst, x, y, bw, bp);
else
base = (const void*)dst;
shift += count;
writePixelMem<psm, offseted>(base, x, y, (u32*)pmem, shift - count, bw, bp, mask); // I prefer not to use for here. It's slow
if (count < 2) return;
writePixelMem<psm, offseted>(base, x + 1, y, (u32*)pmem, shift - count + 1, bw, bp, mask);
if (count < 3) return;
writePixelMem<psm, offseted>(base, x + 2, y, (u32*)pmem, shift - count + 2, bw, bp, mask);
if (count < 4) return;
writePixelMem<psm, offseted>(base, x + 3, y, (u32*)pmem, shift - count + 3, bw, bp, mask);
if (count < 5) return;
writePixelMem<psm, offseted>(base, x + 4, y, (u32*)pmem, shift - count + 4, bw, bp, mask);
if (count < 6) return;
writePixelMem<psm, offseted>(base, x + 5, y, (u32*)pmem, shift - count + 5, bw, bp, mask);
if (count < 7) return;
writePixelMem<psm, offseted>(base, x + 6, y, (u32*)pmem, shift - count + 6, bw, bp, mask);
if (count < 8) return;
writePixelMem<psm, offseted>(base, x + 7, y, (u32*)pmem, shift - count + 7, bw, bp, mask);
}
// Use it if we don't know that starting pixel is aligned for multiple-pixel write
template <int psm, bool offseted>
static __forceinline void writeUnalignedPixelsFromMemory(void* dst, int div, const void* pmem, int& shift, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
switch (div){
case 0: return; // Pixels are aligned, so we could move on
case 1: writePixelsFromMemory<psm, offseted, 1>(dst, pmem, shift, x, y, bw, bp, mask);
return;
case 2: writePixelsFromMemory<psm, offseted, 2>(dst, pmem, shift, x, y, bw, bp, mask);
return;
case 3: writePixelsFromMemory<psm, offseted, 3>(dst, pmem, shift, x, y, bw, bp, mask);
return;
case 4: writePixelsFromMemory<psm, offseted, 4>(dst, pmem, shift, x, y, bw, bp, mask);
return;
case 5: writePixelsFromMemory<psm, offseted, 5>(dst, pmem, shift, x, y, bw, bp, mask);
return;
case 6: writePixelsFromMemory<psm, offseted, 6>(dst, pmem, shift, x, y, bw, bp, mask);
return;
case 7: writePixelsFromMemory<psm, offseted, 7>(dst, pmem, shift, x, y, bw, bp, mask);
return;
}
}
// This little swizzle function used to convert data form memory. z is first byte in destination block, and y is number of word, in which we look look for data.
// s is shift by number of pixels, that should be used in masking
template <int psm, int y, int z>
static __forceinline u32 BitmaskinPSM(u32* pmem, u8 x) {
u8 H = PSM_BITCOUNT<psm>();
u8 I = PSM_BITS_PER_PIXEL<psm>() ; // length of bitmask in bits.
if (PSM_BITMODE<psm>() != 1) { // PSMCT24 and 24Z should be handle separated, as it could pass 32-bit storage.
u8 k = (x & (H - 1)) * I; // shift of PC data -- in PC we use pixels from constant position: x / H word and k is shift: x = ( x % H ) * H + k / I
// in PS2 we use all bit position from 0 by I pixels.
u32 J = ((1 << I) - 1) << k; // bitmask (of length ) & mask, moved by position k
// gcc complains repeatedly about this always being false. I'll investigate later.
if (z > k)
return ((*(pmem + x/H + y)) & J) << (z - k); // we use PX data from *mem + and properly shift
else // This formula loo little swizzled.
return ((*(pmem + x/H + y)) & J) >> (k - z);
}
else { // only 24 targets
u8* mem = ((u8*)pmem + (x * 3) + 4 * y); // Our pixel's is disaligned on 32-bit. So just use u8*.
return *(u32*)mem; // Mask would be handled later
}
}
// We use this function to limit number of memory R/W. This function fill all pixels for data with coordindates x, y. inside block data.
// Only rule is x, y should be < 8 (it automatically fill all needed pixels, that lie in blockdata, but have coords more than 8).
template <int psm>
static __forceinline void fillPixelsFromMemory(u32* dst, u32* pmem, int x, int y, int pitch, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
u32 pixel = 0;
const u8 H = PSM_PIXELS_PER_WORD<psm>();
if (PSM_PIXEL_SHIFT<psm>() == 0) // We could not use calculated constants as templated parameters.
pixel = BitmaskinPSM<psm, 0, 0>(pmem, x); // First pixel x,y is the common part of all psmt path's
else {
if (PSM_PIXEL_SHIFT<psm>() == 24) // 8H and 4HL have 1 pixel, but shifted to 24 bits. 4HH -- 28 bits.
pixel = BitmaskinPSM<psm, 0, 24>(pmem, x);
else
pixel = BitmaskinPSM<psm, 0, 28>(pmem, x);
}
if (H > 1) {
const u8 G = psm & 0x7; // Bitmode, we use it for better chance of switch optimization
int div = ( x < 4 ) ? 4 : -4; // secondary row have shift by +4 or -4 pixels
switch (G) {
case 2:
pixel |= BitmaskinPSM<psm, 4, 16>(pmem, x);
break;
case 3:
pixel |= BitmaskinPSM<psm, 2, 16>(pmem, x);
pixel |= BitmaskinPSM<psm, 0, 8>(pmem + 2 * pitch, x + div);
pixel |= BitmaskinPSM<psm, 2, 24>(pmem + 2 * pitch, x + div);
break;
case 4:
pixel |= BitmaskinPSM<psm, 1, 8>(pmem, x);
pixel |= BitmaskinPSM<psm, 2, 16>(pmem, x);
pixel |= BitmaskinPSM<psm, 3, 24>(pmem, x);
pixel |= BitmaskinPSM<psm, 0, 4>(pmem + 2 * pitch, x + div);
pixel |= BitmaskinPSM<psm, 1, 12>(pmem + 2 * pitch, x + div);
pixel |= BitmaskinPSM<psm, 2, 20>(pmem + 2 * pitch, x + div);
pixel |= BitmaskinPSM<psm, 3, 28>(pmem + 2 * pitch, x + div);
break;
}
}
writePixelWord<psm>(dst, x, y, pixel, bw, bp, HandleWritemask<psm>(mask)); // use it for 32, 24, 8H, 4HL and 4HH
}
template <int psm>
void writeWordPixel(u32* pmem, u32 pixel, u32 mask) {
if (psm == PSMT4HH || psm == PSMT8H || psm == PSMT4HL || psm == PSMCT24 || psm == PSMT24Z)
MaskedOR(pmem, pixel, mask);
else
*pmem = pixel;
}
// Get pixel from src and put in in src. We assume, that psm of both buffers are the same and (sx-dx) & E == (sy - dy) & D == 0;
// Also in this case we could transfer the whole word
template <int psm>
void transferPixelFast(void* dst, void* src, int dx, int dy, int sx, int sy, u32 dbw, u32 sbw ) {
u32 Dbasepage, Sbasepage;
u32 word, mask = 0xffffffff;
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
assert ( ((sx-dx) & E == (sy - dy) & D) && ((sy - dy) & D == 0) );
Dbasepage = ((dy>>A) * (dbw>>B)) + (dx>>B);
Sbasepage = ((sy>>A) * (sbw>>B)) + (sx>>B);
word = (g_pageTable[psm][sy&D][sx&E] >> C);
u32* dstp = (u32*)dst + Dbasepage * 2048 + word;
u32* srcp = (u32*)src + Sbasepage * 2048 + word;
writeWordPixel<psm>(dstp, *srcp, G << F);
}
// if we could not guarantee, that buffer suize shared same page Table address
template <int psm>
void transferPixel(void* dst, void* src, int dx, int dy, int sx, int sy, u32 dbw, u32 sbw ) {
u32 mask = 0xffffffff, shift;
u32* dstp = getPixelAddress_A32<psm>(mask, shift, dst, dx, dy, dbw);
u32* srcp = getPixelAddress_A32<psm>(mask, shift, src, sx, sy, sbw);
writeWordPixel<psm>(dstp, *srcp, mask); // write whole word
}
#define Def_getReadWrite(psmT, psmX) \
static __forceinline void writePixel##psmT(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { \
writePixel<psmX>(pmem, x, y, pixel, bw, bp); } \
static __forceinline u32 readPixel##psmT(const void* pmem, int x, int y, u32 bp, u32 bw) { \
return readPixel<psmX>(pmem, x, y, bw, bp); } \
static __forceinline void writePixel##psmT##_0(void* pmem, int x, int y, u32 pixel, u32 bw) { \
writePixel<psmX>(pmem, x, y, pixel, bw); } \
static __forceinline u32 readPixel##psmT##_0(const void* pmem, int x, int y, u32 bw) { \
return readPixel<psmX>(pmem, x, y, bw); }
Def_getReadWrite(32, PSMCT32);
Def_getReadWrite(24, PSMCT24);
Def_getReadWrite(16, PSMCT16);
Def_getReadWrite(16S, PSMCT16);
Def_getReadWrite(8, PSMT8);
Def_getReadWrite(8H, PSMT8H);
Def_getReadWrite(4, PSMT4);
Def_getReadWrite(4HH, PSMT4HH);
Def_getReadWrite(4HL, PSMT4HL);
Def_getReadWrite(32Z, PSMCT32);
Def_getReadWrite(24Z, PSMCT24);
Def_getReadWrite(16Z, PSMCT16);
Def_getReadWrite(16SZ, PSMCT16);
#endif // Zeydlitz's code
#endif /* __ZZOGL_MEM_H__ */

View File

@ -89,8 +89,7 @@ int ZZSave(s8* pbydata)
return 0;
}
extern u32 s_uFramebuffer;
extern int g_nCurVBOIndex;
extern u32 g_nCurVBOIndex;
bool ZZLoad(s8* pbydata)
{
@ -163,7 +162,7 @@ bool ZZLoad(s8* pbydata)
icurctx = -1;
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer); // switch to the backbuffer
FB::Bind(); // switch to the backbuffer
SetFogColor(gs.fogcol);
GL_REPORT_ERRORD();

View File

@ -17,7 +17,12 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
//#ifdef NVIDIA_CG_API // This code is only for NVIDIA cg-toolkit API
// By default enable nvidia cg api
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API)
#define NVIDIA_CG_API
#endif
#ifdef NVIDIA_CG_API // This code is only for NVIDIA cg-toolkit API
// ZZogl Shader manipulation functions.
//------------------- Includes
@ -75,7 +80,7 @@ const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex1
#endif
const char* g_pShaders[4] = { "full", "reduced", "accurate", "accurate-reduced" };
// ----------------- Global Variables
// ----------------- Global Variables
ZZshContext g_cgcontext;
ZZshProfile cgvProf, cgfProf;
@ -85,10 +90,10 @@ ZZshProgram pvs[16] = {NULL};
ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ
ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0;
#ifdef DEVBUILD
char* EFFECT_NAME; // All this variables used for testing and set manually
char* EFFECT_DIR;
#endif
//#ifdef DEVBUILD
extern char* EFFECT_NAME; // All this variables used for testing and set manually
extern char* EFFECT_DIR;
//#endif
bool g_bCRTCBilinear = true;
@ -96,14 +101,9 @@ float4 g_vdepth, vlogz;
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
FRAGMENTSHADER ppsCRTC[2], /*ppsCRTC24[2],*/ ppsCRTCTarg[2];
VERTEXSHADER pvsBitBlt;
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
extern u32 ptexBilinearBlocks;
extern u32 ptexConv32to16;
inline bool LoadEffects();
extern bool s_bWriteDepth;
@ -141,14 +141,14 @@ void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata)
{
ZZLog::Error_Log("%s->%s: %s\n", ShaderCallerName, ShaderHandleName, cgGetErrorString(err));
const char* listing = cgGetLastListing(g_cgcontext);
if (listing != NULL)
if (listing != NULL)
ZZLog::Debug_Log(" last listing: %s\n", listing);
}
bool ZZshStartUsingShaders() {
cgSetErrorHandler(HandleCgError, NULL);
g_cgcontext = cgCreateContext();
cgvProf = CG_PROFILE_ARBVP1;
cgfProf = CG_PROFILE_ARBFP1;
cgGLEnableProfile(cgvProf);
@ -194,10 +194,14 @@ bool ZZshStartUsingShaders() {
ZZLog::GS_Log("Creating extra effects.");
B_G(ZZshLoadExtraEffects(), return false);
ZZLog::GS_Log("using %s shaders\n", g_pShaders[g_nPixelShaderVer]);
ZZLog::GS_Log("using %s shaders\n", g_pShaders[g_nPixelShaderVer]);
return true;
}
void ZZshExitCleaning() {
// nothing to do with cg
}
// open shader file according to build target
bool ZZshCreateOpenShadersFile() {
#ifndef DEVBUILD
@ -272,7 +276,7 @@ void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name)
cgGLSetParameter4fv(param, v);
}
void ZZshSetParameter4fv(ZZshProgram prog, ZZshParameter param, const float* v, const char* name) {
void ZZshSetParameter4fv(ZZshProgram prog, ZZshParameter param, const float* v, const char* name) {
ShaderHandleName = name;
cgGLSetParameter4fv(param, v);
}
@ -313,15 +317,15 @@ void ZZshDefaultOneColor( FRAGMENTSHADER ptr ) {
void ZZshSetVertexShader(ZZshProgram prog) {
if ((prog) != g_vsprog) {
cgGLBindProgram(prog);
g_vsprog = prog;
cgGLBindProgram(prog);
g_vsprog = prog;
}
}
void ZZshSetPixelShader(ZZshProgram prog) {
if ((prog) != g_psprog) {
cgGLBindProgram(prog);
g_psprog = prog;
cgGLBindProgram(prog);
g_psprog = prog;
}
}
@ -447,7 +451,7 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context)
vlogz = float4( 1.0f, 0.0f, 0.0f, 0.0f);
}
else {
g_vdepth = float4( 256.0f*65536.0f, 65536.0f, 256.0f, 65536.0f*65536.0f);
g_vdepth = float4( 256.0f*65536.0f, 65536.0f, 256.0f, 65536.0f*65536.0f);
vlogz = float4( 0.0f, 1.0f, 0.0f, 0.0f);
}
@ -458,7 +462,7 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context)
p = cgGetNamedParameter(prog, "g_fZMin"); // Switch to flat-z when needed
if( p != NULL && cgIsParameterUsed(p, prog) == CG_TRUE ) {
//ZZLog::Error_Log("Use flat-z\n");
cgGLSetParameter4fv(p, vlogz);
cgGLSetParameter4fv(p, vlogz);
}
else
ZZLog::Error_Log("Shader file version is outdated! Only log-Z is possible.");
@ -483,6 +487,48 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context)
}
#ifndef DEVBUILD
#if 0
static __forceinline void LOAD_VS(int Index, ZZshProgram prog)
{
assert(mapShaderResources.find(Index) != mapShaderResources.end());
header = mapShaderResources[Index];
assert((header) != NULL && (header)->index == (Index));
prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgvProf, NULL, NULL);
if (!cgIsProgram(prog))
{
ZZLog::Error_Log("Failed to load vs %d: \n%s", Index, cgGetLastListing(g_cgcontext));
return false;
}
cgGLLoadProgram(prog);
if (cgGetError() != CG_NO_ERROR) ZZLog::Error_Log("Failed to load program %d.", Index);
SetupVertexProgramParameters(prog, !!(Index&SH_CONTEXT1));
}
static __forceinline void LOAD_VS(int Index, FRAGMENTSHADER fragment)
{
bLoadSuccess = true;
assert(mapShaderResources.find(Index) != mapShaderResources.end());
header = mapShaderResources[Index];
fragment.prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgfProf, NULL, NULL);
if (!cgIsProgram(fragment.prog))
{
ZZLog::Error_Log("Failed to load ps %d: \n%s", Index, cgGetLastListing(g_cgcontext));
return false;
}
cgGLLoadProgram(fragment.prog);
if (cgGetError() != CG_NO_ERROR)
{
ZZLog::Error_Log("failed to load program %d.", Index);
bLoadSuccess = false;
}
SetupFragmentProgramParameters(&fragment, !!(Index&SH_CONTEXT1), 0);
}
#endif
#define LOAD_VS(Index, prog) { \
assert( mapShaderResources.find(Index) != mapShaderResources.end() ); \
@ -569,7 +615,7 @@ bool ZZshLoadExtraEffects()
// pvs[2*i+8] = pvs[2*i+8+1] = NULL;
// }
}
LOAD_VS(SH_BITBLTVS, pvsBitBlt.prog);
pvsBitBlt.sBitBltPos = cgGetNamedParameter(pvsBitBlt.prog, "g_fBitBltPos");
pvsBitBlt.sBitBltTex = cgGetNamedParameter(pvsBitBlt.prog, "g_fBitBltTex");
@ -596,7 +642,7 @@ bool ZZshLoadExtraEffects()
LOAD_PS(SH_BITBLTDEPTHPS, ppsBitBltDepth);
LOAD_PS(SH_CRTCTARGPS, ppsCRTCTarg[0]);
LOAD_PS(SH_CRTCTARGINTERPS, ppsCRTCTarg[1]);
g_bCRTCBilinear = true;
LOAD_PS(SH_CRTCPS, ppsCRTC[0]);
if( !bLoadSuccess ) {
@ -611,9 +657,9 @@ bool ZZshLoadExtraEffects()
if( !bLoadSuccess )
ZZLog::Error_Log("Failed to create CRTC shaders.");
LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]);
LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]);
// LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]);
// LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]);
LOAD_PS(SH_ZEROPS, ppsOne);
LOAD_PS(SH_BASETEXTUREPS, ppsBaseTexture);
LOAD_PS(SH_CONVERT16TO32PS, ppsConvert16to32);
@ -645,13 +691,13 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
texwrap = TEXWRAP_REPEAT_CLAMP;
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
assert( index < ArraySize(ppsTexture) );
FRAGMENTSHADER* pf = ppsTexture+index;
if( pbFailed != NULL ) *pbFailed = false;
if( pf->prog != NULL )
if( pf->prog != NULL )
return pf;
if( (g_nPixelShaderVer & SHADER_ACCURATE) && mapShaderResources.find(index+NUM_SHADERS*SHADER_ACCURATE) != mapShaderResources.end() )
@ -684,7 +730,7 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
return NULL;
}
#else // not RELEASE_TO_PUBLIC
#define LOAD_VS(name, prog, shaderver) { \
@ -793,9 +839,9 @@ bool ZZshLoadExtraEffects()
}
LOAD_PS("BitBltDepthPS", ppsBitBltDepth, cgfProf);
LOAD_PS("CRTCTargPS", ppsCRTCTarg[0], cgfProf);
LOAD_PS("CRTCTargPS", ppsCRTCTarg[0], cgfProf);
LOAD_PS("CRTCTargInterPS", ppsCRTCTarg[1], cgfProf);
g_bCRTCBilinear = true;
LOAD_PS("CRTCPS", ppsCRTC[0], cgfProf);
if( !bLoadSuccess ) {
@ -810,8 +856,8 @@ bool ZZshLoadExtraEffects()
if( !bLoadSuccess )
ZZLog::Error_Log("Failed to create CRTC shaders.");
LOAD_PS("CRTC24PS", ppsCRTC24[0], cgfProf); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], cgfProf);
// LOAD_PS("CRTC24PS", ppsCRTC24[0], cgfProf); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], cgfProf);
LOAD_PS("ZeroPS", ppsOne, cgfProf);
LOAD_PS("BaseTexturePS", ppsBaseTexture, cgfProf);
LOAD_PS("Convert16to32PS", ppsConvert16to32, cgfProf);
@ -828,7 +874,7 @@ bool ZZshLoadExtraEffects()
FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed)
{
int texwrap;
assert( texfilter < NUM_FILTERS );
//assert( g_nPixelShaderVer == SHADER_30 );
if( clamp.wms == clamp.wmt ) {
@ -851,9 +897,9 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
FRAGMENTSHADER* pf = ppsTexture+index;
if( pf->prog != NULL )
if( pf->prog != NULL )
return pf;
pf->prog = LoadShaderFromType(EFFECT_DIR, EFFECT_NAME, type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, g_nPixelShaderVer, context);
if( pf->prog != NULL ) {
@ -886,4 +932,4 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
#endif // RELEASE_TO_PUBLIC
//#endif // NVIDIA_CG_API
#endif // NVIDIA_CG_API

View File

@ -33,8 +33,10 @@
#include "ZZoglMath.h"
#include "GS.h"
// For output
// By default enable nvidia cg api
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API)
#define NVIDIA_CG_API
#endif
// --------------------------- API abstraction level --------------------------------
#ifdef NVIDIA_CG_API // Code for NVIDIA cg-toolkit API
@ -56,9 +58,60 @@ inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
#endif // end NVIDIA cg-toolkit API
#ifdef GLSL_API
enum ZZshPARAMTYPE {
ZZ_UNDEFINED,
ZZ_TEXTURE_2D,
ZZ_TEXTURE_RECT,
ZZ_TEXTURE_3D,
ZZ_FLOAT4,
};
typedef struct {
const char* ShName; // Name of uniform
ZZshPARAMTYPE type; // Choose between parameter type
float fvalue[4];
GLuint sampler; // Number of texture unit in array
GLint texid; // Number of texture - texid.
bool Constant; // Uniform could be constants, does not change at program flow
bool Settled; // Check if Uniform value was set.
} ZZshParamInfo;
typedef struct {
void* link;
bool isFragment;
} ZZshShaderLink;
#define ZZshProgram GLuint
#define ZZshShader GLuint
#define ZZshParameter GLint
#define ZZshContext int
#define ZZshProfile int
#define ZZshError int
#define ZZshIndex GLuint
const ZZshParamInfo qZero = {ShName:"", type:ZZ_UNDEFINED, fvalue:{0}, sampler: -1, texid: 0, Constant: false, Settled: false};
#define pZero 0
const ZZshShaderLink sZero = {link: NULL, isFragment: false};
inline bool ZZshActiveParameter(ZZshParameter param) {return (param > -1); }
#define SAFE_RELEASE_PROG(x) { /*don't know what to do*/ }
// ---------------------------
#endif
//const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC};
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC, ZZ_SH_NONE};
// We have "compatible" shaders, as RegularFogVS and RegularFogPS. if don't need to wory about incompatible shaders
// It used only in GLSL mode.
@ -92,6 +145,7 @@ struct FRAGMENTSHADER
string filename;
#endif
#ifdef NVIDIA_CG_API
void set_uniform_param(ZZshParameter &var, const char *name)
{
ZZshParameter p;
@ -161,6 +215,7 @@ struct FRAGMENTSHADER
return false;
}
#endif
};
struct VERTEXSHADER
@ -183,8 +238,32 @@ struct VERTEXSHADER
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
extern FRAGMENTSHADER ppsCRTC[2], /*ppsCRTC24[2],*/ ppsCRTCTarg[2];
extern int interlace_mode;
enum CRTC_TYPE
{
CRTC_RENDER,
//CRTC_RENDER_24,
CRTC_RENDER_TARG
};
static __forceinline FRAGMENTSHADER* curr_ppsCRTC() { return &ppsCRTC[interlace_mode]; }
//static __forceinline FRAGMENTSHADER* curr_ppsCRTC24() { return &ppsCRTC24[interlace_mode]; }
static __forceinline FRAGMENTSHADER* curr_ppsCRTCTarg() { return &ppsCRTCTarg[interlace_mode]; }
static __forceinline FRAGMENTSHADER* curr_pps(CRTC_TYPE render_type)
{
switch (render_type)
{
case CRTC_RENDER: return curr_ppsCRTC();
//case CRTC_RENDER_24: return curr_ppsCRTC24();
case CRTC_RENDER_TARG: return curr_ppsCRTCTarg();
default: return NULL;
}
}
// ------------------------- Functions -------------------------------
#ifdef NVIDIA_CG_API
@ -192,6 +271,11 @@ inline bool ZZshExistProgram(FRAGMENTSHADER* pf) {return (pf->prog != NULL); };
inline bool ZZshExistProgram(VERTEXSHADER* pf) {return (pf->prog != NULL); };
inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog != NULL); };
#endif
#ifdef GLSL_API
inline bool ZZshExistProgram(FRAGMENTSHADER* pf) {return (pf->Shader != 0); };
inline bool ZZshExistProgram(VERTEXSHADER* pf) {return (pf->Shader != 0); };
inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog.link != NULL); } // This is used for pvs mainly. No NULL means that we do LOAD_VS
#endif
extern const char* ShaderCallerName;
extern const char* ShaderHandleName;
@ -222,10 +306,17 @@ extern void ZZshDefaultOneColor( FRAGMENTSHADER ptr );
extern void ZZshSetVertexShader(ZZshShaderLink prog);
extern void ZZshSetPixelShader(ZZshShaderLink prog);
extern bool ZZshLoadExtraEffects();
extern void ZZshExitCleaning();
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
// only sets a limited amount of state (for Update)
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
// only sets a limited amount of state (for Update)
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
extern u32 ptexBilinearBlocks;
extern u32 ptexConv32to16;
#endif

View File

@ -0,0 +1,979 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009 zeydlitz@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2006
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef GLSL_API // This code is only for GLSL API
// ZZogl Shader manipulation functions.
/*
* used cg calls:
* cgGLIsProfileSupported -- don't needed
* cgGetErrorString -- later
* cgGetLastListing -- later
* cgSetErrorHandler -- later
* cgCreateContext -- think that don't need
* cgGLEnableProfile -- don't need
* cgGLSetOptimalOptions -- don't need?
* cgGLSetManageTextureParameters -- what's this?
* cgCreateParameter -- don't need
* cgGLLoadProgram void LinkProgram(uint program)
* cgGetError -- later
* cgGLDisableProfile -- don't need
* cgGLSetParameter4fv
* cgGetNamedParameter
* cgGLEnableTextureParameter
* cgIsParameterUsed
* cgGLBindProgram void UseProgram(uint program)
* cgConnectParameter
* cgIsProgram bool IsProgram(uint program)
* cgCreateProgramFromFile
*/
//------------------- Includes
#include "Util.h"
#include "ZZoglShaders.h"
#include "zpipe.h"
#include <math.h>
#include <map>
#include <fcntl.h> // this for open(). Maybe linux-specific
#include <sys/mman.h> // and this for mmap
// ----------------- Defines
#define TEXWRAP_REPEAT 0
#define TEXWRAP_CLAMP 1
#define TEXWRAP_REGION_REPEAT 2
#define TEXWRAP_REPEAT_CLAMP 3
#ifdef DEVBUILD
# define UNIFORM_ERROR_LOG ZZLog::Error_Log
#else
# define UNIFORM_ERROR_LOG
#endif
// Set it to 0 to diable context usage, 1 -- to enable. FFX-1 have a strange issue with ClampExt.
#define NOCONTEXT 0
#define NUMBER_OF_SAMPLERS 11
#define MAX_SHADER_NAME_SIZE 25
#define MAX_UNIFORM_NAME_SIZE 20
#define DEFINE_STRING_SIZE 256
//------------------ Constants
// Used in a logarithmic Z-test, as (1-o(1))/log(MAX_U32).
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" };
const static char* g_pShaders[4] = { "full", "reduced", "accurate", "accurate-reduced" };
// ----------------- Global Variables
ZZshContext g_cgcontext;
ZZshProfile cgvProf, cgfProf;
int g_nPixelShaderVer = 0; // default
u8* s_lpShaderResources = NULL;
ZZshShaderLink pvs[16] = {sZero}, g_vsprog = sZero, g_psprog = sZero; // 2 -- ZZ
ZZshParameter g_vparamPosXY[2] = {pZero}, g_fparamFogColor = pZero;
ZZshProgram ZZshMainProgram;
char* ZZshSource; // Shader's source data.
off_t ZZshSourceSize;
extern char* EFFECT_NAME; // All this variables used for testing and set manually
extern char* EFFECT_DIR;
bool g_bCRTCBilinear = true;
float4 g_vdepth, vlogz;
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
FRAGMENTSHADER ppsCRTC[2], /*ppsCRTC24[2],*/ ppsCRTCTarg[2];
VERTEXSHADER pvsStore[16];
VERTEXSHADER pvsBitBlt;
inline bool LoadEffects();
extern bool s_bWriteDepth;
struct SHADERHEADER
{
unsigned int index, offset, size; // if highest bit of index is set, pixel shader
};
map<int, SHADERHEADER*> mapShaderResources;
// Debug variable, store name of the function that call the shader.
const char* ShaderCallerName = "";
const char* ShaderHandleName = "";
int NumActiveUniforms, NumGlobalUniforms;
ZZshParamInfo UniformsIndex[MAX_ACTIVE_UNIFORMS] = {qZero};
const char* ShaderNames[MAX_ACTIVE_SHADERS] = {""};
ZZshShaderType ShaderTypes[MAX_ACTIVE_SHADERS] = {ZZ_SH_NONE};
ZZshProgram CompiledPrograms[MAX_ACTIVE_SHADERS][MAX_ACTIVE_SHADERS] = {{0}};
const char* TextureUnits[NUMBER_OF_SAMPLERS] =
{"g_sMemory[0]", "g_sMemory[1]", "g_sSrcFinal", "g_sBitwiseANDX", "g_sBitwiseANDY", "g_sInterlace", \
"g_sCLUT", "g_sBlocks", "g_sBilinearBlocks", "g_sConv16to32", "g_sConv32to16"};
ZZshPARAMTYPE TextureTypes[NUMBER_OF_SAMPLERS] =
{ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, \
ZZ_TEXTURE_2D, ZZ_TEXTURE_2D, ZZ_TEXTURE_2D, ZZ_TEXTURE_2D, ZZ_TEXTURE_3D} ;
//------------------ Code
inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps) {
return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps) ;
}
// Nothing need to be done.
bool ZZshCheckProfilesSupport() {
return true;
}
// Error handler. Setup in ZZogl_Create once.
void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata)
{/*
ZZLog::Error_Log("%s->%s: %s", ShaderCallerName, ShaderHandleName, cgGetErrorString(err));
const char* listing = cgGetLastListing(g_cgcontext);
if (listing != NULL)
ZZLog::Debug_Log(" last listing: %s", listing);
*/
}
float ZeroFloat4[4] = {0};
inline void SettleFloat(float* f, const float* v) {
f[0] = v[0];
f[1] = v[1];
f[2] = v[2];
f[3] = v[3];
}
inline ZZshParamInfo ParamInfo(const char* ShName, ZZshPARAMTYPE type, const float fvalue[], GLuint sampler, GLint texid, bool Constant, bool Settled) {
ZZshParamInfo x;
x.ShName = new char[MAX_UNIFORM_NAME_SIZE];
x.ShName = ShName;
x.type = type;
SettleFloat(x.fvalue, fvalue);
x.sampler = sampler;
x.texid = texid;
x.Constant = Constant;
x.Settled = Settled;
return x;
}
inline void SetGlobalUniform(ZZshParameter* param, const char* name) {
*param = NumActiveUniforms;
UniformsIndex[NumActiveUniforms] = ParamInfo(name, ZZ_FLOAT4, ZeroFloat4, -1, 0, false, false);
NumActiveUniforms++;
}
bool ZZshStartUsingShaders() {
ZZLog::Error_Log("Creating effects.");
B_G(LoadEffects(), return false);
if (!glCreateShader)
{
ZZLog::Error_Log("GLSL shaders is not supported, stop.");
return false;
}
// create a sample shader
clampInfo temp;
memset(&temp, 0, sizeof(temp));
temp.wms = 3; temp.wmt = 3;
g_nPixelShaderVer = 0;//SHADER_ACCURATE;
// test
bool bFailed;
FRAGMENTSHADER* pfrag = ZZshLoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed);
if( bFailed || pfrag == NULL ) {
g_nPixelShaderVer = SHADER_ACCURATE|SHADER_REDUCED;
pfrag = ZZshLoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed);
if( pfrag != NULL )
glLinkProgram(pfrag->Shader);
if( bFailed || pfrag == NULL || glGetError() != GL_NO_ERROR) {
g_nPixelShaderVer = SHADER_REDUCED;
ZZLog::Error_Log("Basic shader test failed.");
}
}
ZZshMainProgram = glCreateProgram();
NumActiveUniforms = 0;
SetGlobalUniform(&g_fparamFogColor, "g_fFogColor");
SetGlobalUniform(&g_vparamPosXY[0], "g_fPosXY[0]");
SetGlobalUniform(&g_vparamPosXY[1], NOCONTEXT?"g_fPosXY[1]":"g_fPosXY[0]");
NumGlobalUniforms = NumActiveUniforms;
if (g_nPixelShaderVer & SHADER_REDUCED)
conf.bilinear = 0;
ZZLog::Error_Log("Creating extra effects.");
B_G(ZZshLoadExtraEffects(), return false);
ZZLog::Error_Log("Using %s shaders.", g_pShaders[g_nPixelShaderVer]);
return true;
}
// open shader file according to build target
bool ZZshCreateOpenShadersFile() {
std::string ShaderFileName("plugins/ps2hw.glsl");
int ShaderFD = open(ShaderFileName.c_str(), O_RDONLY);
struct stat sb;
if ((ShaderFD == -1) || (fstat(ShaderFD, &sb) == -1)) {
// Each linux distributions have his rules for path so we give them the possibility to
// change it with compilation flags. -- Gregory
#ifdef PLUGIN_DIR_COMPILATION
#define xPLUGIN_DIR_str(s) PLUGIN_DIR_str(s)
#define PLUGIN_DIR_str(s) #s
ShaderFileName = string(xPLUGIN_DIR_str(PLUGIN_DIR_COMPILATION)) + "/ps2hw.glsl";
ShaderFD = open(ShaderFileName.c_str(), O_RDONLY);
#endif
if ((ShaderFD == -1) || (fstat(ShaderFD, &sb) == -1)) {
ZZLog::Error_Log("No source for %s: \n", ShaderFileName.c_str());
return false;
}
}
ZZshSourceSize = sb.st_size;
ZZshSource = (char*)mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, ShaderFD, 0); // This function directly maped file into memory.
ZZshSource[ ZZshSourceSize - 1] = 0; // Made source null-terminated.
close(ShaderFD);
return true;
}
void ZZshExitCleaning() {
munmap(ZZshSource, ZZshSourceSize);
}
// Disable CG
void ZZshGLDisableProfile() { // This stop all other shader programs from running;
glUseProgram(0);
}
//Enable CG
void ZZshGLEnableProfile() {
}
//-------------------------------------------------------------------------------------
// The same function for texture, also to cgGLEnable
void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name) {
if (param > -1) {
// ZZLog::Error_Log("Set texture parameter %s %d... Ok", name, texobj);
UniformsIndex[param].texid = texobj;
UniformsIndex[param].Settled = true;
}
}
void ZZshGLSetTextureParameter(ZZshShaderLink prog, ZZshParameter param, GLuint texobj, const char* name) {
if (param > -1) {
// ZZLog::Error_Log("Set texture parameter %s %d... Ok", name, texobj);
UniformsIndex[param].texid = texobj;
UniformsIndex[param].Settled = true;
}
}
// This is helper of cgGLSetParameter4fv, made for debug purpose.
// Name could be any string. We must use it on compilation time, because erroneus handler does not
// return name
void ZZshSetParameter4fv(ZZshShaderLink prog, ZZshParameter param, const float* v, const char* name) {
if (param > -1) {
// ZZLog::Error_Log("Set float parameter %s %f, %f, %f, %f... Ok", name, v[0], v[1], v[2], v[3]);
SettleFloat(UniformsIndex[param].fvalue, v);
UniformsIndex[param].Settled = true;
}
}
void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name) {
if (param > -1) {
// ZZLog::Error_Log("Set float parameter %s %f, %f, %f, %f... Ok", name, v[0], v[1], v[2], v[3]);
SettleFloat(UniformsIndex[param].fvalue, v);
UniformsIndex[param].Settled = true;
}
}
// The same stuff, but also with retry of param, name should be USED name of param for prog.
void ZZshSetParameter4fvWithRetry(ZZshParameter* param, ZZshShaderLink prog, const float* v, const char* name) {
if (param != NULL)
ZZshSetParameter4fv(prog, *param, v, name);
}
// Used sometimes for color 1.
void ZZshDefaultOneColor( FRAGMENTSHADER ptr ) {
// return;
ShaderHandleName = "Set Default One colot";
float4 v = float4 ( 1, 1, 1, 1 );
ZZshSetParameter4fv(ptr.prog, ptr.sOneColor, v, "DegaultOne");
}
//-------------------------------------------------------------------------------------
const GLchar * EmptyVertex = "void main(void) {gl_Position = ftransform();}";
const GLchar * EmptyFragment = "void main(void) {gl_FragColor = gl_Color;}";
inline ZZshProgram UseEmptyProgram(const char* name, GLenum shaderType) {
GLuint shader = glCreateShader(shaderType);
if (shaderType == GL_VERTEX_SHADER)
glShaderSource(shader, 1, &EmptyVertex, NULL);
else
glShaderSource(shader, 1, &EmptyFragment, NULL);
glCompileShader(shader);
ZZshProgram prog = glCreateProgram();
glAttachShader(prog, shader);
glLinkProgram(prog);
if( !glIsProgram(prog) || glGetError() != GL_NO_ERROR ) {
ZZLog::Error_Log("Failed to load empty shader for %s:", name);
return -1;
}
ZZLog::Error_Log("Used Empty program for %s... Ok.",name);
return prog;
}
ZZshShaderType ZZshGetShaderType(const char* name) {
if (strncmp(name, "TextureFog", 10) == 0) return ZZ_SH_TEXTURE_FOG;
if (strncmp(name, "Texture", 7) == 0) return ZZ_SH_TEXTURE;
if (strncmp(name, "RegularFog", 10) == 0) return ZZ_SH_REGULAR_FOG;
if (strncmp(name, "Regular", 7) == 0) return ZZ_SH_REGULAR;
if (strncmp(name, "Zero", 4) == 0) return ZZ_SH_ZERO;
return ZZ_SH_CRTC;
}
inline ZZshShader UseEmptyShader(const char* name, GLenum shaderType) {
GLuint shader = glCreateShader(shaderType);
if (shaderType == GL_VERTEX_SHADER)
glShaderSource(shader, 1, &EmptyVertex, NULL);
else
glShaderSource(shader, 1, &EmptyFragment, NULL);
glCompileShader(shader);
ShaderNames[shader] = name;
ShaderTypes[shader] = ZZshGetShaderType(name);
ZZLog::Error_Log("Used Empty shader for %s... Ok.",name);
return shader;
}
inline bool GetCompilationLog(GLuint shader) {
GLint CompileStatus;
glGetShaderiv(shader, GL_COMPILE_STATUS, &CompileStatus);
if (CompileStatus == GL_TRUE)
return true;
int* lenght, infologlength;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infologlength);
char* InfoLog = new char[infologlength];
glGetShaderInfoLog(shader, infologlength, lenght, InfoLog);
ZZLog::Error_Log("Compiling... %d:\t %s", shader, InfoLog);
return false;
}
inline bool CompileShader(ZZshProgram& shader, const char* DefineString, const char* name, GLenum shaderType) {
const GLchar* ShaderSource[2];
ShaderSource[0] = (const GLchar*)DefineString;
ShaderSource[1] = (const GLchar*)ZZshSource;
shader = glCreateShader(shaderType);
glShaderSource(shader, 2, &ShaderSource[0], NULL);
glCompileShader(shader);
ZZLog::Debug_Log("Creating shader %d for %s", shader, name);
if (!GetCompilationLog(shader)) {
ZZLog::Error_Log("Failed to compile shader for %s:", name);
return false;
}
ShaderTypes[shader] = ZZshGetShaderType(name);
ShaderNames[shader] = name;
GL_REPORT_ERRORD();
return true;
}
inline bool LoadShaderFromFile(ZZshShader& shader, const char* DefineString, const char* name, GLenum ShaderType) { // Linux specific, as I presume
if (!CompileShader(shader, DefineString, name, ShaderType)) {
ZZLog::Error_Log("Failed to compile shader for %s: ", name);
return false;
}
ZZLog::Error_Log("Used shader for %s... Ok",name);
return true;
}
inline bool GetLinkLog(ZZshProgram prog) {
GLint LinkStatus;
glGetProgramiv(prog, GL_LINK_STATUS, &LinkStatus);
int unif, atrib;
glGetProgramiv(prog, GL_ACTIVE_UNIFORMS, &unif);
glGetProgramiv(prog, GL_ACTIVE_ATTRIBUTES, &atrib);
UNIFORM_ERROR_LOG("Uniforms %d, attributes %d", unif, atrib);
if (LinkStatus == GL_TRUE && glIsProgram(prog)) return true;
#ifdef DEVBUILD
int* lenght, infologlength;
glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &infologlength);
char* InfoLog = new char[infologlength];
glGetProgramInfoLog(prog, infologlength, lenght, InfoLog);
if (!infologlength == 0)
ZZLog::Error_Log("Linking %d... %d:\t %s", prog, infologlength, InfoLog);
#endif
return false;
}
//-------------------------------------------------------------------------------------
inline ZZshProgram madeProgram(ZZshShader shader, ZZshShader shader2, char* name) {
ZZshProgram prog = glCreateProgram();
glAttachShader(prog, shader);
if (shader2 != 0)
glAttachShader(prog, shader2);
glLinkProgram(prog);
if (!GetLinkLog(prog)) {
ZZLog::Error_Log("Failed to link shader for %s: ", name);
prog = UseEmptyProgram(name, GL_FRAGMENT_SHADER);
}
glDetachShader(prog, shader);
ZZLog::Error_Log("Made shader program for %s... Ok",name);
return prog;
}
void PutParametersInProgam(int start, int finish) {
for (int i = start; i < finish; i++) {
ZZshParamInfo param = UniformsIndex[i];
GLint location = glGetUniformLocation(ZZshMainProgram, param.ShName);
if (location != -1 && param.type != ZZ_UNDEFINED) {
UNIFORM_ERROR_LOG("\tTry uniform %d %d %d %s...\t\t", i, location, param.type, param.ShName);
if (!param.Settled && !param.Constant) {
UNIFORM_ERROR_LOG("\tUnsettled, non-constant uniform, could be bug: %d %s", param.type, param.ShName);
continue;
}
if (param.type == ZZ_FLOAT4) {
glUniform4fv(location, 1, param.fvalue);
}
else
{
glActiveTexture(GL_TEXTURE0 + param.sampler);
if (param.type == ZZ_TEXTURE_2D)
glBindTexture(GL_TEXTURE_2D, param.texid);
else if (param.type == ZZ_TEXTURE_3D)
glBindTexture(GL_TEXTURE_3D, param.texid);
else
glBindTexture(GL_TEXTURE_RECTANGLE, param.texid);
GL_REPORT_ERRORD();
}
if (glGetError() == GL_NO_ERROR)
UNIFORM_ERROR_LOG("Ok. Param name %s, location %d, type %d", param.ShName, location, param.type);
else
ZZLog::Error_Log("error in PutParametersInProgam param name %s, location %d, type %d", param.ShName, location, param.type);
if (!param.Constant) // Unset used parameters
UniformsIndex[i].Settled == false;
}
else if (start != 0 && location == -1 && param.Settled) // No global variable
ZZLog::Error_Log("Warning! Unused, but set uniform %d, %s", location, param.ShName);
}
GL_REPORT_ERRORD();
}
void PutSInProgam(int start, int finish) {
for (int i = start; i < finish; i++) {
ZZshParamInfo param = UniformsIndex[i];
GLint location = glGetUniformLocation(ZZshMainProgram, param.ShName);
if (location != -1 && param.type != ZZ_UNDEFINED) {
if (param.type != ZZ_FLOAT4) {
UNIFORM_ERROR_LOG("\tTry sampler %d %d %d %s %d...\t\t", i, location, param.type, param.ShName, param.sampler);
if (glGetError() == GL_NO_ERROR)
UNIFORM_ERROR_LOG("Ok");
else
UNIFORM_ERROR_LOG("error!");
glUniform1i(location, param.sampler);
}
}
}
GL_REPORT_ERRORD();
}
bool ValidateProgram(ZZshProgram Prog) {
GLint isValid;
glGetProgramiv(Prog, GL_VALIDATE_STATUS, &isValid);
if (!isValid) {
glValidateProgram(Prog);
int* lenght, infologlength;
glGetProgramiv(Prog, GL_INFO_LOG_LENGTH, &infologlength);
char* InfoLog = new char[infologlength];
glGetProgramInfoLog(Prog, infologlength, lenght, InfoLog);
ZZLog::Error_Log("Validation %d... %d:\t %s", Prog, infologlength, InfoLog);
}
return (isValid != 0);
}
void PutParametersAndRun(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
UNIFORM_ERROR_LOG("Run program %s(%d) \t+\t%s(%d)", ShaderNames[vs->Shader], vs->Shader, ShaderNames[ps->Shader], ps->Shader);
glUseProgram(ZZshMainProgram);
if (glGetError() != GL_NO_ERROR) {
ZZLog::Error_Log("Something weird happened on Linking stage.");
glUseProgram(0);
return;
}
PutSInProgam(vs->ParametersStart, vs->ParametersFinish);
PutSInProgam(ps->ParametersStart, ps->ParametersFinish);
PutParametersInProgam(0, NumGlobalUniforms);
PutParametersInProgam(vs->ParametersStart, vs->ParametersFinish);
PutParametersInProgam(ps->ParametersStart, ps->ParametersFinish);
ValidateProgram(ZZshMainProgram);
GL_REPORT_ERRORD();
}
void CreateAndRunMain(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
ZZLog::Error_Log("\n---> New shader program %d, %s(%d) \t+\t%s(%d).", ZZshMainProgram, ShaderNames[vs->Shader], vs->Shader, ShaderNames[ps->Shader], ps->Shader);
if (vs->Shader != 0)
glAttachShader(ZZshMainProgram, vs->Shader);
if (ps->Shader != 0)
glAttachShader(ZZshMainProgram, ps->Shader);
glLinkProgram(ZZshMainProgram);
if (!GetLinkLog(ZZshMainProgram)) {
ZZLog::Error_Log("Main program linkage error, don't use any shader for this stage.");
return;
}
GL_REPORT_ERRORD();
PutParametersAndRun(vs, ps);
GL_REPORT_ERRORD();
}
inline bool ZZshCheckShaderCompatibility(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
if (vs == NULL) return false;
if (vs->ShaderType == ZZ_SH_ZERO) return true; // ZeroPS is compatible with everything
if (ps == NULL) return false;
return (vs->ShaderType == ps->ShaderType);
}
void ZZshSetShader(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
if (!ZZshCheckShaderCompatibility(vs, ps)) // We don't need to link uncompatible shaders
return;
int vss = (vs!=NULL)?vs->Shader:0;
int pss = (ps!=NULL)?ps->Shader:0;
if (vss !=0 && pss != 0) {
if (CompiledPrograms[vss][pss] != 0 && glIsProgram(CompiledPrograms[vss][pss])) {
ZZshMainProgram = CompiledPrograms[vs->Shader][ps->Shader];
PutParametersAndRun(vs, ps);
}
else {
ZZshProgram NewProgram = glCreateProgram();
ZZshMainProgram = NewProgram;
CompiledPrograms[vss][pss] = NewProgram;
CreateAndRunMain(vs, ps) ;
}
}
}
void ZZshSetVertexShader(ZZshShaderLink prog) {
g_vsprog = prog;
ZZshSetShader((VERTEXSHADER*)(g_vsprog.link), (FRAGMENTSHADER*)(g_psprog.link)) ;
}
void ZZshSetPixelShader(ZZshShaderLink prog) {
g_psprog = prog;
ZZshSetShader((VERTEXSHADER*)(g_vsprog.link), (FRAGMENTSHADER*)(g_psprog.link)) ;
}
//------------------------------------------------------------------------------------------------------------------
// For several reason texobj could not be put in sampler directly, only though GL_TEXTUREi interface. So we need to check correct sampler for each one.
inline void SettleTextureUnit(ZZshParamInfo* param, const char* name) {
for (int i = 0; i < NUMBER_OF_SAMPLERS; i++) {
if (strcmp(TextureUnits[i], name) == 0) {
param->sampler = i;
param->type = TextureTypes[i];
return;
}
}
}
inline int SetUniformParam(ZZshProgram prog, ZZshParameter* param, const char* name) {
GLint p = glGetUniformLocation(prog, name);
if (p > -1) {
*param = NumActiveUniforms;
UniformsIndex[NumActiveUniforms] = ParamInfo(name, ZZ_FLOAT4, ZeroFloat4, -1, 0, false, false); // By define Uniform is FLOAT4
SettleTextureUnit(&(UniformsIndex[NumActiveUniforms]), name);
UNIFORM_ERROR_LOG("uniform %s \t\t%d %d", name, p, UniformsIndex[NumActiveUniforms].type);
NumActiveUniforms++;
}
else
*param = -1;
return p;
}
#define SET_UNIFORMPARAM(var, name) { \
p = SetUniformParam(prog, &(pf->var), name); \
}
#define INIT_SAMPLERPARAM(tex, name) { \
ZZshParameter x; \
p = SetUniformParam(prog, &x, name); \
(UniformsIndex[x]).Constant = true; \
ZZshGLSetTextureParameter(pf->prog, x, tex, name); \
}
#define INIT_UNIFORMPARAM(var, name) { \
ZZshParameter x; \
p = SetUniformParam(prog, &x, name); \
(UniformsIndex[x]).Constant = true; \
ZZshSetParameter4fv(pf->prog, x, var, name); \
}
char* AddContextToName(const char* name, int context) {
char* newname = new char[MAX_UNIFORM_NAME_SIZE];
sprintf(newname, "%s[%d]", name, context * NOCONTEXT);
return newname;
}
void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type)
{
// uniform parameters
GLint p;
pf->prog.link = (void*)pf; // Setting autolink
pf->prog.isFragment = true; // Setting autolink
pf->ShaderType = ShaderTypes[pf->Shader];
pf->ParametersStart = NumActiveUniforms;
ZZshProgram prog = madeProgram(pf->Shader, 0, "");
glUseProgram(prog);
GL_REPORT_ERRORD();
SET_UNIFORMPARAM(sOneColor, "g_fOneColor");
SET_UNIFORMPARAM(sBitBltZ, "g_fBitBltZ");
SET_UNIFORMPARAM(sInvTexDims, "g_fInvTexDims");
SET_UNIFORMPARAM(fTexAlpha2, AddContextToName("fTexAlpha2", context));
SET_UNIFORMPARAM(fTexOffset, AddContextToName("g_fTexOffset", context));
SET_UNIFORMPARAM(fTexDims, AddContextToName("g_fTexDims", context));
SET_UNIFORMPARAM(fTexBlock, AddContextToName("g_fTexBlock", context));
SET_UNIFORMPARAM(fClampExts, AddContextToName("g_fClampExts", context)); // FIXME: There is a bug, that lead FFX-1 to incorrect CLAMP if this uniform have context.
SET_UNIFORMPARAM(fTexWrapMode, AddContextToName("TexWrapMode", context));
SET_UNIFORMPARAM(fRealTexDims, AddContextToName("g_fRealTexDims", context));
SET_UNIFORMPARAM(fTestBlack, AddContextToName("g_fTestBlack", context));
SET_UNIFORMPARAM(fPageOffset, AddContextToName("g_fPageOffset", context));
SET_UNIFORMPARAM(fTexAlpha, AddContextToName("fTexAlpha", context));
GL_REPORT_ERRORD();
// textures
INIT_SAMPLERPARAM(ptexBlocks, "g_sBlocks");
if (type == 3)
{INIT_SAMPLERPARAM(ptexConv16to32, "g_sConv16to32");}
else if (type == 4)
{INIT_SAMPLERPARAM(ptexConv32to16, "g_sConv32to16");}
else
{INIT_SAMPLERPARAM(ptexBilinearBlocks, "g_sBilinearBlocks");}
GL_REPORT_ERRORD();
SET_UNIFORMPARAM(sMemory, AddContextToName("g_sMemory", context));
SET_UNIFORMPARAM(sFinal, "g_sSrcFinal");
SET_UNIFORMPARAM(sBitwiseANDX, "g_sBitwiseANDX");
SET_UNIFORMPARAM(sBitwiseANDY, "g_sBitwiseANDY");
SET_UNIFORMPARAM(sCLUT, "g_sCLUT");
SET_UNIFORMPARAM(sInterlace, "g_sInterlace");
GL_REPORT_ERRORD();
// set global shader constants
INIT_UNIFORMPARAM(float4(0.5f, (conf.settings().exact_color)?0.9f/256.0f:0.5f/256.0f, 0,1/255.0f), "g_fExactColor");
INIT_UNIFORMPARAM(float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f ), "g_fBilinear");
INIT_UNIFORMPARAM(float4(1.0f/256.0f, 1.0004f, 1, 0.5f), "g_fZBias");
INIT_UNIFORMPARAM(float4(0,1, 0.001f, 0.5f), "g_fc0");
INIT_UNIFORMPARAM(float4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f), "g_fMult");
pf->ParametersFinish = NumActiveUniforms;
if (NumActiveUniforms > MAX_ACTIVE_UNIFORMS)
ZZLog::Error_Log("Too many shader variables. You may increase the limit in source %d.", NumActiveUniforms);
glUseProgram(0);
GL_REPORT_ERRORD();
}
void SetupVertexProgramParameters(VERTEXSHADER* pf, int context)
{
GLint p;
pf->prog.link = (void*)pf; // Setting autolink
pf->prog.isFragment = false; // Setting autolink
pf->ShaderType = ShaderTypes[pf->Shader];
pf->ParametersStart = NumActiveUniforms;
ZZshProgram prog = madeProgram(pf->Shader, 0, "");
glUseProgram(prog);
GL_REPORT_ERRORD();
// Set Z-test, log or no log;
if (conf.settings().no_logz) {
g_vdepth = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
vlogz = float4( 1.0f, 0.0f, 0.0f, 0.0f);
}
else {
g_vdepth = float4( 256.0f*65536.0f, 65536.0f, 256.0f, 65536.0f*65536.0f);
vlogz = float4( 0.0f, 1.0f, 0.0f, 0.0f);
}
INIT_UNIFORMPARAM(g_vdepth, "g_fZ");
if (p > -1) {
INIT_UNIFORMPARAM(vlogz, "g_fZMin");
if (p == -1) ZZLog::Error_Log ("Shader file version is outdated! Only log-Z is possible.");
}
GL_REPORT_ERRORD();
float4 vnorm = float4(g_filog32, 0, 0,0);
INIT_UNIFORMPARAM(vnorm, "g_fZNorm");
INIT_UNIFORMPARAM(float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f ), "g_fBilinear");
INIT_UNIFORMPARAM(float4(1.0f/256.0f, 1.0004f, 1, 0.5f), "g_fZBias") ;
INIT_UNIFORMPARAM(float4(0,1, 0.001f, 0.5f), "g_fc0");
SET_UNIFORMPARAM(sBitBltPos, "g_fBitBltPos");
SET_UNIFORMPARAM(sBitBltTex, "g_fBitBltTex");
SET_UNIFORMPARAM(fBitBltTrans, "g_fBitBltTrans");
pf->ParametersFinish = NumActiveUniforms;
if (NumActiveUniforms > MAX_ACTIVE_UNIFORMS)
ZZLog::Error_Log("Too many shader variables. You may increase the limit in the source.");
glUseProgram(0);
GL_REPORT_ERRORD();
}
const int GLSL_VERSION = 130; // Sampler2DRect appear in 1.3
// We use strictly compilation from source for GSLS
static __forceinline void GlslHeaderString(char* header_string, const char* name, const char* depth)
{
sprintf(header_string, "#version %d\n#define %s main\n%s\n", GLSL_VERSION, name, depth);
}
static __forceinline bool LOAD_VS(char* DefineString, const char* name, VERTEXSHADER vertex, int shaderver, ZZshProfile context, const char* depth)
{
bool flag;
char temp[200];
GlslHeaderString(temp, name, depth);
sprintf(DefineString, "%s#define VERTEX_SHADER 1\n#define CTX %d\n", temp, context * NOCONTEXT);
//ZZLog::WriteLn("Define for VS == '%s'", DefineString);
flag = LoadShaderFromFile(vertex.Shader, DefineString, name, GL_VERTEX_SHADER);
SetupVertexProgramParameters(&vertex, context);
return flag;
}
static __forceinline bool LOAD_PS(char* DefineString, const char* name, FRAGMENTSHADER fragment, int shaderver, ZZshProfile context, const char* depth)
{
bool flag;
char temp[200];
GlslHeaderString(temp, name, depth);
sprintf(DefineString, "%s#define FRAGMENT_SHADER 1\n#define CTX %d\n", temp, context * NOCONTEXT);
//ZZLog::WriteLn("Define for PS == '%s'", DefineString);
flag = LoadShaderFromFile(fragment.Shader, DefineString, name, GL_FRAGMENT_SHADER);
SetupFragmentProgramParameters(&fragment, context, 0);
return flag;
}
inline bool LoadEffects()
{
// clear the textures
for(u32 i = 0; i < ArraySize(ppsTexture); ++i) {
SAFE_RELEASE_PROG(ppsTexture[i].prog);
}
#ifndef _DEBUG
memset(ppsTexture, 0, sizeof(ppsTexture));
#endif
return true;
}
bool ZZshLoadExtraEffects() {
bool bLoadSuccess = true;
char DefineString[DEFINE_STRING_SIZE] = "";
const char* writedepth = "#define WRITE_DEPTH 1\n"; // should we write depth field
const char* pvsshaders[4] = { "RegularVS", "TextureVS", "RegularFogVS", "TextureFogVS" };
for (int i = 0; i < 4; ++i) {
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 * i], cgvProf, 0, "")) bLoadSuccess = false;
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 *i + 1 ], cgvProf, 1, "")) bLoadSuccess = false;
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 *i + 8 ], cgvProf, 0, writedepth)) bLoadSuccess = false;
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 *i + 8 + 1], cgvProf, 1, writedepth)) bLoadSuccess = false;
}
for (int i = 0; i < 16; ++i)
pvs[i] = pvsStore[i].prog;
if (!LOAD_VS(DefineString, "BitBltVS", pvsBitBlt, cgvProf, 0, "")) bLoadSuccess = false;
GLint p;
GL_REPORT_ERRORD();
if (!LOAD_PS(DefineString, "RegularPS", ppsRegular[0], cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "RegularFogPS", ppsRegular[1], cgfProf, 0, "")) bLoadSuccess = false;
if( conf.mrtdepth ) {
if (!LOAD_PS(DefineString, "RegularPS", ppsRegular[2], cgfProf, 0, writedepth)) bLoadSuccess = false;
if (!bLoadSuccess) conf.mrtdepth = 0;
if (!LOAD_PS(DefineString, "RegularFogPS", ppsRegular[3], cgfProf, 0, writedepth)) bLoadSuccess = false;
if (!bLoadSuccess) conf.mrtdepth = 0;
}
if (!LOAD_PS(DefineString, "BitBltPS", ppsBitBlt[0], cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "BitBltAAPS", ppsBitBlt[1], cgfProf, 0, "")) bLoadSuccess = false;
if (!bLoadSuccess) {
ZZLog::Error_Log("Failed to load BitBltAAPS, using BitBltPS.");
if (!LOAD_PS(DefineString, "BitBltPS", ppsBitBlt[1], cgfProf, 0, "")) bLoadSuccess = false;
}
if (!LOAD_PS(DefineString, "BitBltDepthPS", ppsBitBltDepth, cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "CRTCTargPS", ppsCRTCTarg[0], cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "CRTCTargInterPS", ppsCRTCTarg[1], cgfProf, 0, "")) bLoadSuccess = false;
g_bCRTCBilinear = true;
if (!LOAD_PS(DefineString, "CRTCPS", ppsCRTC[0], cgfProf, 0, "")) bLoadSuccess = false;
if( !bLoadSuccess ) {
// switch to simpler
g_bCRTCBilinear = false;
if (!LOAD_PS(DefineString, "CRTCPS_Nearest", ppsCRTC[0], cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "CRTCInterPS_Nearest", ppsCRTC[0], cgfProf, 0, "")) bLoadSuccess = false;
}
else {
if (!LOAD_PS(DefineString, "CRTCInterPS", ppsCRTC[1], cgfProf, 0, "")) bLoadSuccess = false;
}
if( !bLoadSuccess )
ZZLog::Error_Log("Failed to create CRTC shaders.");
// if (!LOAD_PS(DefineString, "CRTC24PS", ppsCRTC24[0], cgfProf, 0, "")) bLoadSuccess = false;
// if (!LOAD_PS(DefineString, "CRTC24InterPS", ppsCRTC24[1], cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "ZeroPS", ppsOne, cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "BaseTexturePS", ppsBaseTexture, cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "Convert16to32PS", ppsConvert16to32, cgfProf, 0, "")) bLoadSuccess = false;
if (!LOAD_PS(DefineString, "Convert32to16PS", ppsConvert32to16, cgfProf, 0, "")) bLoadSuccess = false;
GL_REPORT_ERRORD();
return true;
}
const static char* g_pPsTexWrap[] = { "#define REPEAT 1\n", "#define CLAMP 1\n", "#define REGION_REPEAT 1\n", "" };
static ZZshShader LoadShaderFromType(const char* srcdir, const char* srcfile, int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int ps, int context) {
assert( texwrap < NUM_TEXWRAPS);
assert( type < NUM_TYPES );
//ZZLog::Error_Log("\n");
ZZshProgram prog;
char* name = new char[MAX_SHADER_NAME_SIZE];
sprintf(name, "Texture%s%d_%sPS", fog?"Fog":"", texfilter, g_pTexTypes[type]);
ZZLog::Debug_Log("Starting shader for %s", name);
const char* AddWrap = g_pPsTexWrap[texwrap];
const char* AddDepth = writedepth?"#define WRITE_DEPTH 1\n":"";
const char* AddAEM = testaem?"#define TEST_AEM 1\n":"";
const char* AddExcolor = exactcolor?"#define EXACT_COLOR 1\n":"";
const char* AddAccurate = (ps & SHADER_ACCURATE)?"#define ACCURATE_DECOMPRESSION 1\n":"";
char DefineString[DEFINE_STRING_SIZE] = "";
char temp[200];
GlslHeaderString(temp, name, AddWrap);
sprintf(DefineString, "%s#define FRAGMENT_SHADER 1\n%s%s%s%s\n#define CTX %d\n", temp, AddDepth, AddAEM, AddExcolor, AddAccurate, context * NOCONTEXT);
ZZshShader shader;
if (!CompileShader(shader, DefineString, name, GL_FRAGMENT_SHADER))
return UseEmptyShader(name, GL_FRAGMENT_SHADER);
ZZLog::Debug_Log("Used shader for type:%d filter:%d wrap:%d for:%d depth:%d aem:%d color:%d decompression:%d ctx:%d... Ok \n", type, texfilter, texwrap, fog, writedepth, testaem, exactcolor, ps, context);
GL_REPORT_ERRORD();
return shader;
}
FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed)
{
int texwrap;
assert( texfilter < NUM_FILTERS );
//assert( g_nPixelShaderVer == SHADER_30 );
if( clamp.wms == clamp.wmt ) {
switch( clamp.wms ) {
case 0: texwrap = TEXWRAP_REPEAT; break;
case 1: texwrap = TEXWRAP_CLAMP; break;
case 2: texwrap = TEXWRAP_CLAMP; break;
default:
texwrap = TEXWRAP_REGION_REPEAT; break;
}
}
else if( clamp.wms==3||clamp.wmt==3)
texwrap = TEXWRAP_REGION_REPEAT;
else
texwrap = TEXWRAP_REPEAT_CLAMP;
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
if( pbFailed != NULL ) *pbFailed = false;
FRAGMENTSHADER* pf = ppsTexture+index;
if (ZZshExistProgram(pf))
{
return pf;
}
pf->Shader = LoadShaderFromType(EFFECT_DIR, EFFECT_NAME, type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, g_nPixelShaderVer, context);
if (ZZshExistProgram(pf)) {
SetupFragmentProgramParameters(pf, context, type);
GL_REPORT_ERRORD();
if( glGetError() != GL_NO_ERROR ) {
ZZLog::Error_Log("Failed to load shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms+clamp.wmt);
if (pbFailed != NULL ) *pbFailed = true;
return pf;
}
return pf;
}
ZZLog::Error_Log("Failed to create shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms+clamp.wmt);
if( pbFailed != NULL ) *pbFailed = true;
GL_REPORT_ERRORD();
return NULL;
}
#endif // GLSL_API

View File

@ -0,0 +1,812 @@
// Cg Shaders for PS2 GS emulation
// divides by z for every pixel, instead of in vertex shader
// fixes kh textures
#extension ARB_texture_rectangle: enable
#define GL_compatibility_profile 1
#define PERSPECTIVE_CORRECT_TEX
// When writting GLSL code we should change variables in code according to denominator
// Not than in and out variables are differ!
// in POSITION set by glVertexPointer goes to gl_Vertex;
// out POSITION goes to gl_position
// in COLOR0 gl_Color
// out COLOR0 gl_FrontColor
// in TEXCOORD0 gl_MultiTexCoord0
// out TEXCOORD0 gl_TexCoord[0]
//in Fragments:
// in TEXCOORD0 gl_TexCoord[0]
// out COLOR0 gl_FragData[0]
//#define TEST_AEM // tests AEM for black pixels
//#define REGION_REPEAT // set if texture wrapping mode is region repeat
//#define WRITE_DEPTH // set if depth is also written in a MRT
//#define ACCURATE_DECOMPRESSION // set for less capable hardware ATI Radeon 9000 series
//#define EXACT_COLOR // make sure the output color is clamped to 1/255 boundaries (for alpha testing)
#ifdef PERSPECTIVE_CORRECT_TEX
#define TEX_XY tex.xy/tex.z
#define TEX_DECL vec4
#else
#define TEX_XY tex.xy
#define TEX_DECL vec4
#endif
#ifdef WRITE_DEPTH
#define DOZWRITE(x) x
#else
#define DOZWRITE(x)
#endif
// NVidia CG-data types
#define half2 vec2
#define half3 vec3
#define half4 vec4
#define float2 vec2
#define float3 vec3
#define float4 vec4
// main ps2 memory, each pixel is stored in 32bit color
uniform sampler2DRect g_sMemory[2];
// per context pixel shader constants
uniform half4 fTexAlpha2[2];
uniform float4 g_fTexOffset[2]; // converts the page and block offsets into the mem addr/1024
uniform float4 g_fTexDims[2]; // mult by tex dims when accessing the block texture
uniform float4 g_fTexBlock[2];
uniform float4 g_fClampExts[2]; // if clamping the texture, use (minu, minv, maxu, maxv)
uniform float4 TexWrapMode[2]; // 0 - repeat/clamp, 1 - region rep (use fRegRepMask)
uniform float4 g_fRealTexDims[2]; // tex dims used for linear filtering (w,h,1/w,1/h)
// (alpha0, alpha1, 1 if highlight2 and tcc is rgba, 1-y)
uniform half4 g_fTestBlack[2]; // used for aem bit
uniform float4 g_fPageOffset[2];
uniform half4 fTexAlpha[2];
// vertex shader constants
uniform float4 g_fPosXY[2];
// used to get the tiled offset into a page given the linear offset
uniform sampler2DRect g_sSrcFinal;
uniform sampler2D g_sBlocks;
uniform sampler2D g_sBilinearBlocks;
uniform sampler2D g_sConv16to32;
uniform sampler3D g_sConv32to16;
uniform sampler2DRect g_sBitwiseANDX;
uniform sampler2DRect g_sBitwiseANDY;
uniform sampler2DRect g_sInterlace;
// used only on rare cases where the render target is PSMT8H
uniform sampler2D g_sCLUT;
// global pixel shader constants
uniform float4 g_fInvTexDims; // similar to g_fClutOff
uniform float4 g_fFogColor;
// used for rectblitting
uniform float4 g_fBitBltZ;
uniform half4 g_fOneColor; // col*.xxxy+.zzzw
// vertex shader constants
uniform float4 g_fBitBltPos;
uniform float4 g_fZ; // transforms d3dcolor z into float z
uniform float4 g_fZNorm;
uniform float4 g_fZMin = float4(0.0f, 1.0f, 0.0f, 0.0f);
uniform float4 g_fBitBltTex;
// pixel shader consts
// .z is used for the addressing fn
uniform half4 g_fExactColor = half4(0.5,0.5/256.0f,0,1/255.0f);
uniform float4 g_fBilinear = float4(-0.7f, -0.65f, 0.9,1/32767.0f);
uniform float4 g_fZBias = half4(1.0f/256.0f, 1.0004f, 1, 0.5); // also for vs
uniform float4 g_fc0 = float4(0,1, 0.001, 0.5f); // also for vs
uniform float4 g_fMult = float4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f);
// vertex shader consts
uniform float4 g_fBitBltTrans = float4(0.5f, -0.5f, 0.5, 0.5 + 0.4/416.0f);
// given a local tex coord, returns the coord in the memory
float2 ps2memcoord(float2 realtex)
{
float4 off;
// block off
realtex.xy = realtex.xy * g_fTexDims[CTX].xy + g_fTexDims[CTX].zw;
realtex.xy = (realtex.xy - fract(realtex.xy)) * g_fMult.zw;
float2 fblock = fract(realtex.xy);
off.xy = realtex.xy-fblock.xy;
#ifdef ACCURATE_DECOMPRESSION
off.zw = texture(g_sBlocks, g_fTexBlock[CTX].xy*fblock + g_fTexBlock[CTX].zw).ar;
off.x = dot(off.xy, g_fTexOffset[CTX].xy);
float r = g_fTexOffset[CTX].w;
float f = fract(off.x);
float fadd = g_fTexOffset[CTX].z * off.z;
off.w = off.x + fadd + r;
off.x = fract(f + fadd + r);
off.w -= off.x ;
#else
off.z = texture(g_sBlocks, g_fTexBlock[CTX].xy*fblock + g_fTexBlock[CTX].zw).a;
// combine the two
off.x = dot(off.xyz, g_fTexOffset[CTX].xyz)+g_fTexOffset[CTX].w;
off.x = modf(off.x, off.w);
#endif
off.xy = off.xw * g_fPageOffset[CTX].zy + g_fPageOffset[CTX].wx;
//off.y = off.w * g_fPageOffset[CTX].y + g_fPageOffset[CTX].x;
return off.xy;
}
// find all texcoords for bilinear filtering
// assume that orgtex are already on boundaries
void ps2memcoord4(float4 orgtex, out float4 off0, out float4 off1)
{
//float4 off0, off1, off2, off3;
float4 realtex;
// block off
realtex = (orgtex * g_fTexDims[CTX].xyxy + g_fTexDims[CTX].zwzw);// * g_fMult.zwzw;
float4 fblock = fract(realtex.xyzw);
float4 ftransblock = g_fTexBlock[CTX].xyxy*fblock + g_fTexBlock[CTX].zwzw;
realtex -= fblock;
float4 transvals = g_fTexOffset[CTX].x * realtex.xzxz + g_fTexOffset[CTX].y * realtex.yyww + g_fTexOffset[CTX].w;
float4 colors;// = texture(g_sBilinearBlocks, ftransblock.xy);
// this is faster on ffx ingame
colors.x = texture(g_sBlocks, ftransblock.xy).a;
colors.y = texture(g_sBlocks, ftransblock.zy).a;
colors.z = texture(g_sBlocks, ftransblock.xw).a;
colors.w = texture(g_sBlocks, ftransblock.zw).a;
float4 fr, rem;
#ifdef ACCURATE_DECOMPRESSION
fr = fract(transvals);
float4 fadd = colors * g_fTexOffset[CTX].z;
rem = transvals + fadd;
fr = fract(fr + fadd);
rem -= fr;
#else
transvals += colors * g_fTexOffset[CTX].z;
fr = modf(transvals, rem);
#endif
rem = rem * g_fPageOffset[CTX].y + g_fPageOffset[CTX].x;
fr = fr * g_fPageOffset[CTX].z + g_fPageOffset[CTX].w;
// combine
off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy;
off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww;
}
void ps2memcoord4_fast(float4 orgtex, out float4 off0, out float4 off1)
{
float4 realtex;
realtex = (orgtex * g_fTexDims[CTX].xyxy + g_fTexDims[CTX].zwzw);// * g_fMult.zwzw;
float4 fblock = fract(realtex.xyzw);
float2 ftransblock = g_fTexBlock[CTX].xy*fblock.xy + g_fTexBlock[CTX].zw;
realtex -= fblock;
float4 transvals = g_fTexOffset[CTX].x * realtex.xzxz + g_fTexOffset[CTX].y * realtex.yyww + g_fTexOffset[CTX].w;
float4 colors = texture(g_sBilinearBlocks, ftransblock.xy);
float4 fr, rem;
#ifdef ACCURATE_DECOMPRESSION
fr = fract(transvals);
float4 fadd = colors * g_fTexOffset[CTX].z;
rem = transvals + fadd;
fr = fract(fr + fadd);
rem -= fr;
#else
transvals += colors * g_fTexOffset[CTX].z;
fr = modf(transvals, rem);
#endif
rem = rem * g_fPageOffset[CTX].y + g_fPageOffset[CTX].x;
fr = fr * g_fPageOffset[CTX].z;
off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy;
off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww;
}
// Wrapping modes
#if defined(REPEAT)
float2 ps2addr(float2 coord)
{
return fract(coord.xy);
}
#elif defined(CLAMP)
float2 ps2addr(float2 coord)
{
return clamp(coord.xy, g_fClampExts[CTX].xy, g_fClampExts[CTX].zw);
}
#elif defined(REGION_REPEAT)
// computes the local tex coord along with addressing modes
float2 ps2addr(float2 coord)
{
float2 final = fract(clamp(coord.xy, g_fClampExts[CTX].xy, g_fClampExts[CTX].zw));
if( TexWrapMode[CTX].x > g_fBilinear.z ) // region repeat mode for x (umsk&x)|ufix
final.x = texture(g_sBitwiseANDX, abs(coord.x)*TexWrapMode[CTX].zx).x * g_fClampExts[CTX].x + g_fClampExts[CTX].z;
if( TexWrapMode[CTX].y > g_fBilinear.z ) // region repeat mode for x (vmsk&x)|vfix
final.y = texture(g_sBitwiseANDY, abs(coord.y)*TexWrapMode[CTX].wy).x * g_fClampExts[CTX].y + g_fClampExts[CTX].w;
return final;
}
#else
float2 ps2addr(float2 coord)
{
return fract(clamp(coord.xy, g_fClampExts[CTX].xy, g_fClampExts[CTX].zw));
}
#endif
half4 tex2DPS_32(float2 tex0)
{
return texture(g_sMemory[CTX], ps2memcoord(tex0).xy);
}
// use when texture is not tiled -- shader 1
half4 tex2DPS_tex32(float2 tex0)
{
return texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw)*g_fZBias.zzzw+g_fPageOffset[CTX].w;
}
// use when texture is not tiled -- shader 2
half4 tex2DPS_clut32(float2 tex0)
{
float index = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw).a+g_fPageOffset[CTX].w;
return texture(g_sCLUT, index*g_fExactColor.xz+g_fExactColor.yz);
}
// Shader 3
// use when texture is not tiled and converting from 32bit to 16bit
// don't convert on the block level, only on the column level
// so every other 8 pixels, use the upper bits instead of lower
half4 tex2DPS_tex32to16(float2 tex0)
{
bool upper = false;
tex0.y += g_fPageOffset[CTX].z;
float2 ffrac = mod(tex0, g_fTexOffset[CTX].xy);
tex0.xy = g_fc0.ww * (tex0.xy + ffrac);
if( ffrac.x > g_fTexOffset[CTX].z ) {
tex0.x -= g_fTexOffset[CTX].z;
upper = true;
}
if( ffrac.y >= g_fTexOffset[CTX].w ) {
tex0.y -= g_fTexOffset[CTX].w;
tex0.x += g_fc0.w;
}
half4 color = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw)*g_fZBias.zzzw+g_fPageOffset[CTX].w;
float2 uv = upper ? color.xw : color.zy;
return texture(g_sConv16to32, uv+g_fPageOffset[CTX].xy);
}
// Shader 4
// used when a 16 bit texture is used an 8h
half4 tex2DPS_tex16to8h(float2 tex0)
{
float4 final;
float2 ffrac = mod(tex0+g_fPageOffset[CTX].zw, g_fTexOffset[CTX].xy);
tex0.xy = g_fPageOffset[CTX].xy * tex0.xy - ffrac * g_fc0.yw;
if( ffrac.x > g_fTexOffset[CTX].x*g_fc0.w )
tex0.x += g_fTexOffset[CTX].x*g_fc0.w;
if( tex0.x >= g_fc0.y ) tex0 += g_fTexOffset[CTX].zw;
float4 upper = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw);
// only need alpha
float index = texture(g_sConv32to16, upper.zyx-g_fc0.z).y + upper.w*g_fc0.w*g_fc0.w;
return texture(g_sCLUT, index+g_fExactColor.yz);
}
// Shader 5
// used when a 16 bit texture is used a 32bit one
half4 tex2DPS_tex16to32(float2 tex0)
{
float4 final;
float2 ffrac = mod(tex0+g_fPageOffset[CTX].zw, g_fTexOffset[CTX].xy);
//tex0.xy = g_fPageOffset[CTX].xy * tex0.xy - ffrac * g_fc0.yw;
tex0.y += g_fPageOffset[CTX].y * ffrac.y;
if( ffrac.x > g_fTexOffset[CTX].z ) {
tex0.x -= g_fTexOffset[CTX].z;
tex0.y += g_fTexOffset[CTX].w;
}
float fconst = g_fc0.w*g_fc0.w;
float4 lower = texture(g_sSrcFinal, g_fTexDims[CTX].xy*tex0);
float4 upper = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw);
final.zy = texture(g_sConv32to16, lower.zyx).xy + lower.ww*fconst;
final.xw = texture(g_sConv32to16, upper.zyx).xy + upper.ww*fconst;
return final;
}
half4 tex2DPS_tex16to32h(float2 tex0)
{
float4 final = vec4(0.0, 0.0, 0.0, 0.0);
return final;
}
//half4 f;
//f.w = old.y > (127.2f/255.0f) ? 1 : 0;
//old.y -= 0.5f * f.w;
//f.xyz = fract(old.yyx*half3(2.002*255.0f/256.0f, 64.025f*255.0f/256.0f, 8.002*255.0f/256.0f));
//f.y += old.x * (0.25f*255.0f/256.0f);
////////////////////////////////
// calculates the texture color
////////////////////////////////
#define decl_ps2shade(num) \
decl_ps2shade_##num(_32) \
decl_ps2shade_##num(_tex32) \
decl_ps2shade_##num(_clut32) \
decl_ps2shade_##num(_tex32to16) \
decl_ps2shade_##num(_tex16to8h) \
decl_ps2shade_##num(_tex16to32h)
// nearest
#define decl_ps2shade_0(bit) \
float4 ps2shade0##bit( TEX_DECL tex) \
{ \
return tex2DPS##bit( ps2addr(TEX_XY)); \
}
// do fast memcoord4 calcs when textures behave well
#ifdef REPEAT
#define PS2MEMCOORD4 ps2memcoord4
#else
#define PS2MEMCOORD4 ps2memcoord4
#endif
#define decl_BilinearFilter(bit, addrfn) \
half4 BilinearFilter##bit(float2 tex0) \
{ \
float4 off0, off1; \
float4 ftex; \
float2 ffrac; \
ftex.xy = tex0 + g_fBilinear.xy * g_fRealTexDims[CTX].zw; \
ffrac = fract(ftex.xy*g_fRealTexDims[CTX].xy); \
ftex.xy -= ffrac.xy * g_fRealTexDims[CTX].zw; \
\
ftex.zw = ps2addr(ftex.xy + g_fRealTexDims[CTX].zw); \
ftex.xy = ps2addr(ftex.xy); \
\
PS2MEMCOORD4(ftex, off0, off1); \
half4 c0 = texture(g_sMemory[CTX], off0.xy); \
half4 c1 = texture(g_sMemory[CTX], off0.zw); \
half4 c2 = texture(g_sMemory[CTX], off1.xy); \
half4 c3 = texture(g_sMemory[CTX], off1.zw); \
return mix( mix(c0, c1, vec4(ffrac.x)), mix(c2, c3, ffrac.x), vec4(ffrac.y) ); \
}
decl_BilinearFilter(_32, ps2addr)
decl_BilinearFilter(_tex32, ps2addr)
decl_BilinearFilter(_clut32, ps2addr)
decl_BilinearFilter(_tex32to16, ps2addr)
decl_BilinearFilter(_tex16to8h, ps2addr)
decl_BilinearFilter(_tex16to32h, ps2addr)
//TODO! For mip maps, only apply when LOD >= 0
// lcm == 0, LOD = log(1/Q)*L + K, lcm == 1, LOD = K
// bilinear
#define decl_ps2shade_1(bit) \
half4 ps2shade1##bit(TEX_DECL tex) \
{ \
return BilinearFilter##bit(TEX_XY); \
}
// nearest, mip nearest
#define decl_ps2shade_2(bit) \
half4 ps2shade2##bit(TEX_DECL tex) \
{ \
return tex2DPS##bit( ps2addr(TEX_XY)); \
}
// nearest, mip linear
#define decl_ps2shade_3(bit) \
half4 ps2shade3##bit(TEX_DECL tex) \
{ \
return tex2DPS##bit(ps2addr(TEX_XY)); \
}
// linear, mip nearest
#define decl_ps2shade_4(bit) \
half4 ps2shade4##bit(TEX_DECL tex) \
{ \
return BilinearFilter##bit(TEX_XY); \
}
// linear, mip linear
#define decl_ps2shade_5(bit) \
half4 ps2shade5##bit(TEX_DECL tex) \
{ \
return BilinearFilter##bit(TEX_XY); \
}
decl_ps2shade(0)
decl_ps2shade(1)
decl_ps2shade(2)
decl_ps2shade(3)
decl_ps2shade(4)
decl_ps2shade(5)
half4 ps2CalcShade(half4 texcol, half4 color)
{
#ifdef TEST_AEM
if( dot(texcol.xyzw, g_fTestBlack[CTX].xyzw) <= g_fc0.z )
texcol.w = g_fc0.x;
else
#endif
texcol.w = texcol.w * fTexAlpha[CTX].y + fTexAlpha[CTX].x;
texcol = texcol * (fTexAlpha2[CTX].zzzw * color + fTexAlpha2[CTX].xxxy) + fTexAlpha[CTX].zzzw * color.wwww;
return texcol;
}
// final ops on the color
#ifdef EXACT_COLOR
half4 ps2FinalColor(half4 col)
{
// g_fOneColor has to scale by 255
half4 temp = col * g_fOneColor.xxxy + g_fOneColor.zzzw;
temp.w = floor(temp.w)*g_fExactColor.w;
return temp;
}
#else
half4 ps2FinalColor(half4 col)
{
return col * g_fOneColor.xxxy + g_fOneColor.zzzw;
}
#endif
#ifdef FRAGMENT_SHADER // This is code only for FRAGMENTS (pixel shader)
void RegularPS() {
// whenever outputting depth, make sure to mult by 255/256 and 1
gl_FragData[0] = ps2FinalColor(gl_Color);
DOZWRITE(gl_FragData[1] = gl_TexCoord[0];)
}
#ifdef WRITE_DEPTH
#define DECL_TEXPS(num, bit) \
void Texture##num##bit##PS() \
{ \
gl_FragData[0] = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color)); \
gl_FragData[1] = gl_TexCoord[1]; \
}
#else
#define DECL_TEXPS(num, bit) \
void Texture##num##bit##PS() \
{ \
gl_FragData[0] = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color)); \
}
#endif
#define DECL_TEXPS_(num) \
DECL_TEXPS(num, _32) \
DECL_TEXPS(num, _tex32) \
DECL_TEXPS(num, _clut32) \
DECL_TEXPS(num, _tex32to16) \
DECL_TEXPS(num, _tex16to8h)
DECL_TEXPS_(0)
DECL_TEXPS_(1)
DECL_TEXPS_(2)
DECL_TEXPS_(3)
DECL_TEXPS_(4)
DECL_TEXPS_(5)
void RegularFogPS() {
half4 c;
c.xyz = mix(g_fFogColor.xyz, gl_Color.xyz, vec3(gl_TexCoord[0].x));
c.w = gl_Color.w;
gl_FragData[0] = ps2FinalColor(c);
DOZWRITE(gl_FragData[1] = gl_TexCoord[1];)
}
#ifdef WRITE_DEPTH
#define DECL_TEXFOGPS(num, bit) \
void TextureFog##num##bit##PS() \
{ \
half4 c = ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color); \
c.xyz = mix(g_fFogColor.xyz, c.xyz, vec3(gl_TexCoord[1].x)); \
gl_FragData[0] = ps2FinalColor(c); \
gl_FragData[1] = gl_TexCoord[2]; \
}
#else
#define DECL_TEXFOGPS(num, bit) \
void TextureFog##num##bit##PS() \
{ \
half4 c = ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color); \
c.xyz = mix(g_fFogColor.xyz, c.xyz, vec3(gl_TexCoord[1].x)); \
gl_FragData[0] = ps2FinalColor(c); \
}
#endif
#define DECL_TEXFOGPS_(num) \
DECL_TEXFOGPS(num, _32) \
DECL_TEXFOGPS(num, _tex32) \
DECL_TEXFOGPS(num, _clut32) \
DECL_TEXFOGPS(num, _tex32to16) \
DECL_TEXFOGPS(num, _tex16to8h)
DECL_TEXFOGPS_(0)
DECL_TEXFOGPS_(1)
DECL_TEXFOGPS_(2)
DECL_TEXFOGPS_(3)
DECL_TEXFOGPS_(4)
DECL_TEXFOGPS_(5)
//-------------------------------------------------------
// Techniques not related to the main primitive commands
half4 BilinearBitBlt(float2 tex0)
{
float4 ftex;
float2 ffrac;
ffrac.xy = fract(tex0*g_fRealTexDims[CTX].xy);
ftex.xy = tex0 - ffrac.xy * g_fRealTexDims[CTX].zw;
ftex.zw = ftex.xy + g_fRealTexDims[CTX].zw;
float4 off0, off1;
ps2memcoord4_fast(ftex, off0, off1);
half4 c0 = texture(g_sMemory[CTX], off0.xy);
half4 c1 = texture(g_sMemory[CTX], off0.zw);
half4 c2 = texture(g_sMemory[CTX], off1.xy);
half4 c3 = texture(g_sMemory[CTX], off1.zw);
return mix( mix(c0, c1, vec4(ffrac.x)), mix(c2, c3, vec4(ffrac.x)), vec4(ffrac.y) );
}
void BitBltPS() {
gl_FragData[0] = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy).xy)*g_fOneColor.xxxy;
}
// used when AA
void BitBltAAPS() {
gl_FragData[0] = BilinearBitBlt(gl_TexCoord[0].xy) * g_fOneColor.xxxy;
}
void BitBltDepthPS() {
vec4 data;
data = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy));
gl_FragData[0] = data + g_fZBias.y;
gl_FragDepth = (log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w) * g_fZMin.y + dot(data, g_fBitBltZ) * g_fZMin.x ;
}
void BitBltDepthMRTPS() {
vec4 data;
data = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy));
gl_FragData[0] = data + g_fZBias.y;
gl_FragData[1].x = g_fc0.x;
gl_FragDepth = (log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w) * g_fZMin.y + dot(data, g_fBitBltZ) * g_fZMin.x ;
}
/*static const float BlurKernel[9] = {
0.027601,
0.066213,
0.123701,
0.179952,
0.205065,
0.179952,
0.123701,
0.066213,
0.027601
};*/
half4 BilinearFloat16(float2 tex0)
{
return texture(g_sSrcFinal, tex0.xy);
}
void CRTCTargInterPS() {
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float4 c = BilinearFloat16(gl_TexCoord[0].xy);
c.w = ( g_fc0.w*c.w * g_fOneColor.x + g_fOneColor.y ) * finter;
gl_FragData[0] = c;
}
void CRTCTargPS() {
float4 c = BilinearFloat16(gl_TexCoord[0].xy);
c.w = g_fc0.w * c.w * g_fOneColor.x + g_fOneColor.y;
gl_FragData[0] = c;
}
void CRTCInterPS() {
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float2 filtcoord = trunc(gl_TexCoord[0].xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
half4 c = BilinearBitBlt(filtcoord);
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
gl_FragData[0] = c;
}
// simpler
void CRTCInterPS_Nearest() {
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
half4 c = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy).xy);
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
gl_FragData[0] = c;
}
void CRTCPS() {
float2 filtcoord = gl_TexCoord[0].xy * g_fInvTexDims.xy+g_fInvTexDims.zw;
half4 c = BilinearBitBlt(filtcoord);
c.w = c.w * g_fOneColor.x + g_fOneColor.y;
gl_FragData[0] = c;
}
// simpler
void CRTCPS_Nearest() {
half4 c = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy).xy);
c.w = c.w * g_fOneColor.x + g_fOneColor.y;
gl_FragData[0] = c;
}
void CRTC24InterPS() {
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
float2 filtcoord = trunc(gl_TexCoord[0].xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
half4 c = texture(g_sMemory[CTX], ps2memcoord(filtcoord).xy);
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
gl_FragData[0] = c;
}
void CRTC24PS() {
float2 filtcoord = trunc(gl_TexCoord[0].xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
half4 c = texture(g_sMemory[CTX], ps2memcoord(filtcoord).xy);
c.w = c.w * g_fOneColor.x + g_fOneColor.y;
gl_FragData[0] = c;
}
void ZeroPS() {
gl_FragData[0] = g_fOneColor;
}
void BaseTexturePS() {
gl_FragData[0] = texture(g_sSrcFinal, gl_TexCoord[0].xy) * g_fOneColor;
}
void Convert16to32PS() {
float4 final;
float2 ffrac = mod ( gl_TexCoord[0].xy + g_fTexDims[CTX].zw, g_fTexOffset[CTX].xy);
float2 tex0 = g_fTexDims[CTX].xy * gl_TexCoord[0].xy - ffrac * g_fc0.yw;
if (ffrac.x > g_fTexOffset[CTX].x*g_fc0.w)
tex0.x += g_fTexOffset[CTX].x*g_fc0.w;
if (tex0.x >= g_fc0.y)
tex0 += g_fTexOffset[CTX].zw;
float4 lower = texture(g_sSrcFinal, tex0);
float4 upper = texture(g_sSrcFinal, tex0 + g_fPageOffset[CTX].xy);
final.zy = texture(g_sConv32to16, lower.zyx).xy + lower.ww*g_fPageOffset[CTX].zw;
final.xw = texture(g_sConv32to16, upper.zyx).xy + upper.ww*g_fPageOffset[CTX].zw;
gl_FragData[0]= final;
}
// use when texture is not tiled and converting from 32bit to 16bit
// don't convert on the block level, only on the column level
// so every other 8 pixels, use the upper bits instead of lower
void Convert32to16PS() {
bool upper = false;
float2 ffrac = mod(gl_TexCoord[0].xy + g_fTexDims[CTX].zw, g_fTexOffset[CTX].xy);
float2 tex0 = g_fc0.ww * (gl_TexCoord[0].xy + ffrac);
if( ffrac.x > g_fTexOffset[CTX].z ) {
tex0.x -= g_fTexOffset[CTX].z;
upper = true;
}
if( ffrac.y >= g_fTexOffset[CTX].w ) {
tex0.y -= g_fTexOffset[CTX].w;
tex0.x += g_fc0.w;
}
half4 color = texture(g_sSrcFinal, tex0*g_fTexDims[CTX].xy)*g_fc0.yyyw;
float2 uv = upper ? color.xw : color.zy;
gl_FragData[0] = texture(g_sConv16to32, uv*g_fPageOffset[CTX].xy+g_fPageOffset[CTX].zw)*g_fTexDims[CTX].xxxy;
}
#endif //FRAGMENT_SHADER
#ifdef VERTEX_SHADER
float4 OutPosition(float4 vertex) {
float4 Position;
Position.xy = gl_Vertex.xy * g_fPosXY[CTX].xy + g_fPosXY[CTX].zw;
Position.z = (log(g_fc0.y + dot(g_fZ, gl_SecondaryColor.zyxw)) * g_fZNorm.x + g_fZNorm.y) * g_fZMin.y + dot(g_fZ, gl_SecondaryColor.zyxw) * g_fZMin.x ;
Position.w = g_fc0.y;
return Position;
}
// just smooth shadering
void RegularVS() {
gl_Position = OutPosition(gl_Vertex);
gl_FrontColor = gl_Color;
DOZWRITE(gl_TexCoord[0] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[0].w = g_fc0.y;)
}
// diffuse texture mapping
void TextureVS() {
gl_Position = OutPosition(gl_Vertex);
gl_FrontColor = gl_Color;
#ifdef PERSPECTIVE_CORRECT_TEX
gl_TexCoord[0].xyz = gl_MultiTexCoord0.xyz;
#else
gl_TexCoord[0].xy = gl_MultiTexCoord0.xy/gl_MultiTexCoord0.z;
#endif
DOZWRITE(gl_TexCoord[1] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[1].w = g_fc0.y;)
}
void RegularFogVS() {
float4 position = OutPosition(gl_Vertex);
gl_Position = position;
gl_FrontColor = gl_Color;
gl_TexCoord[0].x = position.z * g_fBilinear.w;
DOZWRITE(gl_TexCoord[1] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[1].w = g_fc0.y;)
}
void TextureFogVS() {
gl_Position = OutPosition(gl_Vertex);
gl_FrontColor = gl_Color;
#ifdef PERSPECTIVE_CORRECT_TEX
gl_TexCoord[0].xyz = gl_MultiTexCoord0.xyz;
#else
gl_TexCoord[0].xy = gl_MultiTexCoord0.xy / gl_MultiTexCoord0.z;
#endif
gl_TexCoord[1].x = gl_Vertex.z * g_fBilinear.w;
DOZWRITE(gl_TexCoord[2] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[2].w = g_fc0.y;)
}
void BitBltVS() {
vec4 position;
position.xy = gl_Vertex.xy * g_fBitBltPos.xy + g_fBitBltPos.zw;
position.zw = g_fc0.xy;
gl_Position = position;
gl_TexCoord[0].xy = gl_MultiTexCoord0.xy * g_fBitBltTex.xy + g_fBitBltTex.zw;
gl_TexCoord[1].xy = position.xy * g_fBitBltTrans.xy + g_fBitBltTrans.zw;
}
#endif VERTEX_SHADER

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,9 @@
#define VB_BUFFERSIZE 0x4000
extern void FlushIfNecesary(void* ptr);
extern bool g_bSaveZUpdate;
// all textures have this width
extern int GPU_TEXWIDTH;
extern float g_fiGPU_TEXWIDTH;
@ -101,10 +104,10 @@ class CRenderTarget
TS_Virtual = 4, // currently not mapped to memory
TS_FeedbackReady = 8, // feedback effect is ready and doesn't need to be updated
TS_NeedConvert32 = 16,
TS_NeedConvert16 = 32,
TS_NeedConvert16 = 32
};
inline float4 DefaultBitBltPos();
inline float4 DefaultBitBltTex();
float4 DefaultBitBltPos();
float4 DefaultBitBltTex();
private:
void _CreateFeedback();
@ -310,9 +313,12 @@ class CRenderTargetMngr
return ptarg;
}
static void DestroyTarg(CRenderTarget* ptarg);
void DestroyTarg(CRenderTarget* ptarg);
void PrintTargets();
MAPTARGETS mapTargets, mapDummyTargs;
private:
void DestroyAllTargetsHelper(void* ptr);
};
class CMemoryTargetMngr
@ -487,108 +493,6 @@ inline u32 GetFrameKeyDummy(CRenderTarget* frame)
return GetFrameKeyDummy(frame->fbp, frame->fbw, frame->fbh, frame->psm);
}
#include "Mem.h"
static __forceinline void DrawTriangleArray()
{
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
GL_REPORT_ERRORD();
}
static __forceinline void DrawBuffers(GLenum *buffer)
{
if (glDrawBuffers != NULL)
{
glDrawBuffers(1, buffer);
}
GL_REPORT_ERRORD();
}
static __forceinline void FBTexture(int attach, int id = 0)
{
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT + attach, GL_TEXTURE_RECTANGLE_NV, id, 0);
GL_REPORT_ERRORD();
}
static __forceinline void ResetRenderTarget(int index)
{
FBTexture(index);
}
static __forceinline void Texture2D(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage2D(GL_TEXTURE_2D, 0, iFormat, width, height, 0, format, type, pixels);
}
static __forceinline void Texture2D(GLint iFormat, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage2D(GL_TEXTURE_2D, 0, iFormat, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, format, type, pixels);
}
static __forceinline void Texture3D(GLint iFormat, GLint width, GLint height, GLint depth, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage3D(GL_TEXTURE_3D, 0, iFormat, width, height, depth, 0, format, type, pixels);
}
static __forceinline void TextureRect(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, iFormat, width, height, 0, format, type, pixels);
}
static __forceinline void TextureRect2(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
{
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, iFormat, width, height, 0, format, type, pixels);
}
static __forceinline void TextureRect(GLenum attach, GLuint id = 0)
{
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, attach, GL_RENDERBUFFER_EXT, id);
}
static __forceinline void setTex2DFilters(GLint type)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, type);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, type);
}
static __forceinline void setTex2DWrap(GLint type)
{
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, type);
}
static __forceinline void setTex3DFilters(GLint type)
{
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, type);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, type);
}
static __forceinline void setTex3DWrap(GLint type)
{
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, type);
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, type);
}
static __forceinline void setRectFilters(GLint type)
{
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, type);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, type);
}
static __forceinline void setRectWrap(GLint type)
{
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, type);
}
static __forceinline void setRectWrap2(GLint type)
{
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, type);
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, type);
}
//------------------------ Inlines -------------------------
// Calculate maximum height for target
@ -604,4 +508,12 @@ inline int get_maxheight(int fbp, int fbw, int psm)
return ret;
}
// memory size for one row of texture. It depends on width of texture and number of bytes
// per pixel
inline u32 Pitch(int fbw) { return (RW(fbw) * 4) ; }
// memory size of whole texture. It is number of rows multiplied by memory size of row
inline u32 Tex_Memory_Size(int fbw, int fbh) { return (RH(fbh) * Pitch(fbw)); }
#endif

View File

@ -22,9 +22,13 @@
#include "x86.h"
#if defined(ZEROGS_SSE2)
#include <emmintrin.h>
#include <immintrin.h>
#endif
// Note: all codes of this files is deprecated. Keeping for reference.
// swizzling
//These were only used in the old version of RESOLVE_32_BITS. Keeping for reference.

View File

@ -22,153 +22,99 @@
#include "GS.h"
extern "C" void __fastcall SwizzleBlock32_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern "C" void __fastcall SwizzleBlock16_sse2(u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall SwizzleBlock8_sse2(u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall SwizzleBlock4_sse2(u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall SwizzleBlock32u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern "C" void __fastcall SwizzleBlock16u_sse2(u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall SwizzleBlock8u_sse2(u8* dst, u8* src, int srcpitch);
extern "C" void __fastcall SwizzleBlock4u_sse2(u8* dst, u8* src, int srcpitch);
#ifndef ZZNORMAL_MEMORY
// StarOcean use 24 in logo and 4HH and 4HL in menu subfont
// Tony hawk use 16, but have a lot of trouble
// This function move one blockwidth * blockheigh data block from src to dst, in assumption, that in dst we store swizzled data,
template <int psm>
inline void __fastcall SwizzleBlock(u32* dst, u32* src, int pitch, u32 WriteMask = 0xffffffff) {
u8 B = (PSM_PIXELS_PER_WORD<psm>() > 2)? 4 : 2;
// frame swizzling
assert ((pitch & 3) == 0 );
#if 0
// no AA
extern "C" void __fastcall FrameSwizzleBlock32_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall FrameSwizzleBlock16_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock32_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock32Z_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock16_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock16Z_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
u32* src1 = src;
u32* src2 = src + pitch / 4;
// AA 2x
extern "C" void __fastcall FrameSwizzleBlock32A2_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall FrameSwizzleBlock16A2_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock32A2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock32ZA2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock16A2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock16ZA2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
for(int j = 0; j < 4 ; j++, src1 += B * pitch / 4, src2 += B * pitch / 4)
for(int i = 0; i < 8; i++) {
fillPixelsFromMemory<psm>(dst, src1, i, B * j, pitch /4, 0, 0, WriteMask);
fillPixelsFromMemory<psm>(dst, src2, i, B * j + 1, pitch / 4 , 0, 0, WriteMask);
}
}
// AA 4x
extern "C" void __fastcall FrameSwizzleBlock32A4_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall FrameSwizzleBlock16A4_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock32A4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock32ZA4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock16A4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern "C" void __fastcall Frame16SwizzleBlock16ZA4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
// Simply AA multiplication. We does not use src[j << AA], but prefer to keep more central pixel in data.
// We does not use mixing of neighbour pixels, because it does not give any noticiable bonus, but speed penalty is big.
template <u8 AA>
inline u32 mixed_pixel(u32* src, int j) {
if (AA == 0)
return src[j] ;
/*extern void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch);*/
if (AA == 1)
return src[(j << 1) + 1];
// no AA
extern void __fastcall FrameSwizzleBlock32_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall FrameSwizzleBlock24_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall FrameSwizzleBlock16_c(u16* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock32_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock32Z_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock16_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock16Z_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
if (AA == 2)
return src[(j << 2) + 2];
}
// AA 2x
extern void __fastcall FrameSwizzleBlock32A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall FrameSwizzleBlock24A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall FrameSwizzleBlock16A2_c(u16* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock32A2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock32ZA2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock16A2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock16ZA2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
// We fill destination word for pixel number j (j < 8). For 16-bit storage upper size of this word is pixel of j + 8,
// and RGBA data should be convert to ARGB16.
// WARNING: floating storage is never be testing
template <int psm, bool is_float, u8 AA>
inline u32 convert_pixel(u32* src, int j) {
if (is_float) {
Vector_16F* fsrc = (Vector_16F*)src; // We use simplified code for float, it seems not
// to be used anyway.
if (PSM_ISHALF<psm>()) {
return Float16ToARGB16 ( fsrc[j << AA]) + (Float16ToARGB16(fsrc[(j + 8) << AA]) << 16);
}
else {
return Float16ToARGB ( fsrc[j << AA] );
}
}
else {
if (PSM_ISHALF<psm>()) {
return RGBA32to16(mixed_pixel<AA>(src, j)) + (RGBA32to16(mixed_pixel<AA>(src, j + 8)) << 16);
}
else {
return mixed_pixel<AA>(src, j);
}
}
}
// AA 4x
extern void __fastcall FrameSwizzleBlock32A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall FrameSwizzleBlock24A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall FrameSwizzleBlock16A4_c(u16* dst, u32* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock32A4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock32ZA4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock16A4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
extern void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
// put data in u32 destination word for pixel x, y < 8 in swizzled block. Note, that in 16-bit target we put 2 pixels (x,y
// and x+8, y) in the same word.
template <int pix, int x, int y, int psm, bool is_float, u8 AA>
inline void SettleSwizzlePixel(u32* dst, u32* src, int srcpitch, u32 mask) {
u32 tmp = convert_pixel<psm, is_float, AA>(src + y * srcpitch, x);
MaskedOR (dst + pix, tmp, mask); // Don't forget to use mask.
}
// Put in dst memory location swizzled block for src. We does not calculate pixel address there at all.
template <int psm, bool is_float, u8 AA>
void __fastcall FrameSwizzleBlock(u32* dst, int sj, int si, u32* src, int srcpitch, u32 WriteMask) {
u32 mask = HandleWritemask<psm>(WriteMask); // This function made correct mask for 32, 24 and 16 target's
for (int i = 0; i < 4; i++) {
SettleSwizzlePixel<0, 0, 0, psm, is_float, AA>(dst, src, srcpitch, mask); // it's possible to put one for here, but I don't know, what's faster
SettleSwizzlePixel<1, 1, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<2, 0, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<3, 1, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<4, 2, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<5, 3, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<6, 2, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<7, 3, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<8, 4, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<9, 5, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<10, 4, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<11, 5, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<12, 6, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<13, 7, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<14, 6, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
SettleSwizzlePixel<15, 7, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
src += 2 * srcpitch;
dst += 16;
}
}
#endif
extern void __fastcall SwizzleColumn32_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch);
extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch);
// extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa);
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut);
// extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut);
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32 csa);
extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut);
extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut);
extern void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters);
#ifdef ZEROGS_SSE2
#define FrameSwizzleBlock32 FrameSwizzleBlock32_c
#define FrameSwizzleBlock24 FrameSwizzleBlock24_c
#define FrameSwizzleBlock16 FrameSwizzleBlock16_c
#define Frame16SwizzleBlock32 Frame16SwizzleBlock32_c
#define Frame16SwizzleBlock32Z Frame16SwizzleBlock32Z_c
#define Frame16SwizzleBlock16 Frame16SwizzleBlock16_c
#define Frame16SwizzleBlock16Z Frame16SwizzleBlock16Z_c
#define FrameSwizzleBlock32A2 FrameSwizzleBlock32A2_c
#define FrameSwizzleBlock24A2 FrameSwizzleBlock24A2_c
#define FrameSwizzleBlock16A2 FrameSwizzleBlock16A2_c
#define Frame16SwizzleBlock32A2 Frame16SwizzleBlock32A2_c
#define Frame16SwizzleBlock32ZA2 Frame16SwizzleBlock32ZA2_c
#define Frame16SwizzleBlock16A2 Frame16SwizzleBlock16A2_c
#define Frame16SwizzleBlock16ZA2 Frame16SwizzleBlock16ZA2_c
#define FrameSwizzleBlock32A4 FrameSwizzleBlock32A4_c
#define FrameSwizzleBlock24A4 FrameSwizzleBlock24A4_c
#define FrameSwizzleBlock16A4 FrameSwizzleBlock16A4_c
#define Frame16SwizzleBlock32A4 Frame16SwizzleBlock32A4_c
#define Frame16SwizzleBlock32ZA4 Frame16SwizzleBlock32ZA4_c
#define Frame16SwizzleBlock16A4 Frame16SwizzleBlock16A4_c
#define Frame16SwizzleBlock16ZA4 Frame16SwizzleBlock16ZA4_c
#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_sse2
#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_sse2
#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_sse2
#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_sse2
#else
#define FrameSwizzleBlock32 FrameSwizzleBlock32_c
#define FrameSwizzleBlock16 FrameSwizzleBlock16_c
#define Frame16SwizzleBlock32 Frame16SwizzleBlock32_c
#define Frame16SwizzleBlock32Z Frame16SwizzleBlock32Z_c
#define Frame16SwizzleBlock16 Frame16SwizzleBlock16_c
#define Frame16SwizzleBlock16Z Frame16SwizzleBlock16Z_c
#define FrameSwizzleBlock32A2 FrameSwizzleBlock32A2_c
#define FrameSwizzleBlock16A2 FrameSwizzleBlock16A2_c
#define Frame16SwizzleBlock32A2 Frame16SwizzleBlock32A2_c
#define Frame16SwizzleBlock32ZA2 Frame16SwizzleBlock32ZA2_c
#define Frame16SwizzleBlock16A2 Frame16SwizzleBlock16A2_c
#define Frame16SwizzleBlock16ZA2 Frame16SwizzleBlock16ZA2_c
#define FrameSwizzleBlock32A4 FrameSwizzleBlock32A4_c
#define FrameSwizzleBlock16A4 FrameSwizzleBlock16A4_c
#define Frame16SwizzleBlock32A4 Frame16SwizzleBlock32A4_c
#define Frame16SwizzleBlock32ZA4 Frame16SwizzleBlock32ZA4_c
#define Frame16SwizzleBlock16A4 Frame16SwizzleBlock16A4_c
#define Frame16SwizzleBlock16ZA4 Frame16SwizzleBlock16ZA4_c
#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_c
#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_c
#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_c
#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_c
#endif
#endif

View File

@ -43,7 +43,7 @@ void HandleGLError()
{
FUNCLOG
// check the error status of this framebuffer */
GLenum error = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
GLenum error = FB::State();
// if error != GL_FRAMEBUFFER_COMPLETE_EXT, there's an error of some sort
@ -210,7 +210,7 @@ void SetAA(int mode)
// GL_REPORT_ERROR();
//
// fAlpha = 1;
// glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
// FB::Unbind(); // switch to the backbuffer
//
// DisableAllgl() ;
// SetShaderCaller("RenderCustom");
@ -311,7 +311,7 @@ void ExtWrite()
// else if (PSMT_ISHALF(texframe.psm)) bpp = 2;
//
// // get the start and end addresses of the buffer
// GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
// GetRectMemAddressZero(start, end, texframe.psm, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
}
////////////