mirror of https://github.com/PCSX2/pcsx2.git
zzogl: painfully merge the zzogl-dev branch
* new memory management * asm was replaced by intrinsic * new GLSL backend (AMD only) Cmake is probably broken anyway with the 2 plugins... * and lots of others stuff that I forgot about it ;) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5166 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
8fcadb3616
commit
e3c741bb2a
|
@ -8,6 +8,7 @@
|
|||
# Use soundtouch internal lib: -DFORCE_INTERNAL_SOUNDTOUCH=TRUE
|
||||
# Use zlib internal lib: -DFORCE_INTERNAL_ZLIB=TRUE
|
||||
# Use sdl1.3 internal lib: -DFORCE_INTERNAL_SDL=TRUE # Not supported yet
|
||||
# Use GLSL API(else NVIDIA_CG): -DGLSL_API=TRUE
|
||||
|
||||
### GCC optimization options
|
||||
# control C flags : -DUSER_CMAKE_C_FLAGS="cflags"
|
||||
|
@ -183,3 +184,10 @@ if(PACKAGE_MODE)
|
|||
# Compile all source codes with these 2 defines
|
||||
add_definitions(-DPLUGIN_DIR_COMPILATION=${PLUGIN_DIR} -DGAMEINDEX_DIR_COMPILATION=${GAMEINDEX_DIR})
|
||||
endif(PACKAGE_MODE)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Select nvidia cg shader api by default
|
||||
#-------------------------------------------------------------------------------
|
||||
if(NOT DEFINED FORCE_INTERNAL_SOUNDTOUCH)
|
||||
set(GLSL_API FALSE)
|
||||
endif(NOT DEFINED FORCE_INTERNAL_SOUNDTOUCH)
|
||||
|
|
|
@ -53,7 +53,9 @@ if(NOT FORCE_INTERNAL_ZLIB)
|
|||
endif(NOT FORCE_INTERNAL_ZLIB)
|
||||
|
||||
## Use pcsx2 package to find module
|
||||
include(FindCg)
|
||||
if(NOT GLSL_API)
|
||||
include(FindCg)
|
||||
endif(NOT GLSL_API)
|
||||
include(FindGlew)
|
||||
include(FindLibc)
|
||||
include(FindPortAudio)
|
||||
|
|
|
@ -6,11 +6,15 @@ set(msg_dep_pcsx2 "check these libraries -> wxWidgets (>=2.8.10), gtk2 (>=
|
|||
set(msg_dep_cdvdiso "check these libraries -> bzip2 (>=1.0.5), gtk2 (>=2.16)")
|
||||
set(msg_dep_zerogs "check these libraries -> glew (>=1.5), opengl, X11, nvidia-cg-toolkit (>=2.1)")
|
||||
set(msg_dep_gsdx "check these libraries -> opengl, X11, pcsx2 SDL")
|
||||
set(msg_dep_zzogl "check these libraries -> glew (>=1.5), jpeg (>=6.2), opengl, X11, nvidia-cg-toolkit (>=2.1), pcsx2 common libs")
|
||||
set(msg_dep_onepad "check these libraries -> sdl (>=1.2)")
|
||||
set(msg_dep_zeropad "check these libraries -> sdl (>=1.2)")
|
||||
set(msg_dep_spu2x "check these libraries -> soundtouch (>=1.5), alsa, portaudio (>=1.9), pcsx2 common libs")
|
||||
set(msg_dep_zerospu2 "check these libraries -> soundtouch (>=1.5), alsa")
|
||||
if(GLSP_API)
|
||||
set(msg_dep_zzogl "check these libraries -> glew (>=1.5), jpeg (>=6.2), opengl, X11, pcsx2 common libs")
|
||||
else(GLSP_API)
|
||||
set(msg_dep_zzogl "check these libraries -> glew (>=1.5), jpeg (>=6.2), opengl, X11, nvidia-cg-toolkit (>=2.1), pcsx2 common libs")
|
||||
endif(GLSP_API)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Pcsx2 core & common libs
|
||||
|
@ -153,17 +157,17 @@ endif(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND)
|
|||
# requires: -GLEW
|
||||
# -OpenGL
|
||||
# -X11
|
||||
# -CG
|
||||
# -CG (only with cg build
|
||||
# -JPEG
|
||||
# -common_libs
|
||||
#---------------------------------------
|
||||
if(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND AND JPEG_FOUND AND common_libs)
|
||||
if((GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND JPEG_FOUND AND common_libs) AND (CG_FOUND OR GLSL_API))
|
||||
set(zzogl TRUE)
|
||||
else(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND AND JPEG_FOUND AND common_libs)
|
||||
else((GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND JPEG_FOUND AND common_libs) AND (CG_FOUND OR GLSL_API))
|
||||
set(zzogl FALSE)
|
||||
message(STATUS "Skip build of zzogl: miss some dependencies")
|
||||
message(STATUS "${msg_dep_zzogl}")
|
||||
endif(GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND CG_FOUND AND JPEG_FOUND AND common_libs)
|
||||
endif((GLEW_FOUND AND OPENGL_FOUND AND X11_FOUND AND JPEG_FOUND AND common_libs) AND (CG_FOUND OR GLSL_API))
|
||||
#---------------------------------------
|
||||
|
||||
#---------------------------------------
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<Project filename="plugins/zerospu2/Linux/ZeroSPU2.cbp">
|
||||
<Depends filename="3rdparty/SoundTouch/SoundTouch.cbp" />
|
||||
</Project>
|
||||
<Project filename="plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp" />
|
||||
<Project filename="plugins/zzogl-pg/opengl/Linux/zzogl-pg/zzogl-pg.cbp" active="1" />
|
||||
<Project filename="plugins/GSdx/GSdx.gcc.cbp" active="1" />
|
||||
<Project filename="3rdparty/SDL-1.3.0-5387/SDL-1.3/SDL-1.3.cbp" />
|
||||
</Workspace>
|
||||
|
|
|
@ -45,6 +45,14 @@ if(CMAKE_BUILD_TYPE STREQUAL Release)
|
|||
add_definitions(${CommonFlags} ${OptimizationFlags} -W)
|
||||
endif(CMAKE_BUILD_TYPE STREQUAL Release)
|
||||
|
||||
# Select the shader API
|
||||
if(GLSL_API)
|
||||
add_definitions(-DGLSL_API)
|
||||
else(GLSL_API)
|
||||
add_definitions(-DNVIDIA_CG_API)
|
||||
endif(GLSL_API)
|
||||
|
||||
|
||||
# zzogl sources
|
||||
set(zzoglSources
|
||||
GifTransfer.cpp
|
||||
|
@ -54,17 +62,17 @@ set(zzoglSources
|
|||
GSmain.cpp
|
||||
HostMemory.cpp
|
||||
Mem.cpp
|
||||
# memcpy_amd.cpp
|
||||
Mem_Swizzle.cpp
|
||||
Mem_Tables.cpp
|
||||
Profile.cpp
|
||||
rasterfont.cpp
|
||||
NewRegs.cpp
|
||||
Regs.cpp
|
||||
targets.cpp
|
||||
x86.cpp
|
||||
zerogs.cpp
|
||||
zpipe.cpp
|
||||
ZZDepthTargets.cpp
|
||||
ZZMemoryTargets.cpp
|
||||
ZZRenderTargets.cpp
|
||||
ZZClut.cpp
|
||||
ZZHacks.cpp
|
||||
ZZKeyboard.cpp
|
||||
|
@ -74,8 +82,10 @@ set(zzoglSources
|
|||
ZZoglCRTC.cpp
|
||||
ZZoglFlush.cpp
|
||||
ZZoglFlushHack.cpp
|
||||
ZZoglMem.cpp
|
||||
ZZoglSave.cpp
|
||||
ZZoglShaders.cpp
|
||||
ZZoglShadersGLSL.cpp
|
||||
ZZoglShoots.cpp
|
||||
ZZoglVB.cpp
|
||||
)
|
||||
|
@ -94,7 +104,6 @@ set(zzoglHeaders
|
|||
Mem_Transmit.h
|
||||
Profile.h
|
||||
rasterfont.h
|
||||
NewRegs.h
|
||||
Regs.h
|
||||
targets.h
|
||||
Util.h
|
||||
|
@ -102,21 +111,19 @@ set(zzoglHeaders
|
|||
zerogs.h
|
||||
zpipe.h
|
||||
ZZClut.h
|
||||
ZZoglFlush.h
|
||||
ZZGl.h
|
||||
ZZHacks.h
|
||||
ZZoglDrawing.h
|
||||
ZZLog.h
|
||||
ZZoglCRTC.h
|
||||
ZZoglMath.h
|
||||
ZZoglMem.h
|
||||
ZZoglShaders.h
|
||||
ZZoglShoots.h
|
||||
ZZoglVB.h
|
||||
)
|
||||
|
||||
# zzogl S sources
|
||||
set(zzoglSSources
|
||||
x86-32.S)
|
||||
|
||||
# zzogl shader sources
|
||||
set(zzoglShaderSources
|
||||
ctx0/ps2hw_ctx.fx
|
||||
|
@ -131,9 +138,6 @@ set(zzoglLinuxSources
|
|||
set(zzoglLinuxHeaders
|
||||
Linux/Linux.h)
|
||||
|
||||
# change language of .S-files to c++
|
||||
set_source_files_properties(${zzoglSSources} PROPERTIES LANGUAGE CXX)
|
||||
|
||||
# add additional include directories
|
||||
include_directories(.
|
||||
Linux)
|
||||
|
@ -142,7 +146,6 @@ include_directories(.
|
|||
add_library(${Output} SHARED
|
||||
${zzoglSources}
|
||||
${zzoglHeaders}
|
||||
${zzoglSSources}
|
||||
${zzoglShaderSources}
|
||||
${zzoglLinuxSources}
|
||||
${zzoglLinuxHeaders})
|
||||
|
@ -154,7 +157,9 @@ set_target_properties(${Output} PROPERTIES COMPILE_DEFINITIONS USE_GSOPEN2)
|
|||
target_link_libraries(${Output} Utilities)
|
||||
|
||||
# link target with Cg
|
||||
target_link_libraries(${Output} ${CG_LIBRARIES})
|
||||
if(NOT GLSP_API)
|
||||
target_link_libraries(${Output} ${CG_LIBRARIES})
|
||||
endif(NOT GLSP_API)
|
||||
|
||||
# link target with glew
|
||||
target_link_libraries(${Output} ${GLEW_LIBRARY})
|
||||
|
@ -183,10 +188,18 @@ endif(NOT USER_CMAKE_LD_FLAGS STREQUAL "")
|
|||
|
||||
if(PACKAGE_MODE)
|
||||
install(TARGETS ${Output} DESTINATION ${PLUGIN_DIR})
|
||||
if(GLSL_API)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.glsl DESTINATION ${PLUGIN_DIR})
|
||||
else(GLSL_API)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${PLUGIN_DIR})
|
||||
endif(GLSL_API)
|
||||
else(PACKAGE_MODE)
|
||||
install(TARGETS ${Output} DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
|
||||
if(GLSL_API)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.glsl DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
|
||||
else(GLSL_API)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins)
|
||||
endif(GLSL_API)
|
||||
endif(PACKAGE_MODE)
|
||||
|
||||
################################### Replay Loader
|
||||
|
|
|
@ -198,13 +198,24 @@ void GLWindow::CreateContextGL()
|
|||
GLXFBConfig *framebuffer_config = glXChooseFBConfig(glDisplay, DefaultScreen(glDisplay), NULL, &fbcount);
|
||||
if (!framebuffer_config or !fbcount) return;
|
||||
|
||||
#if 1
|
||||
// At least create a 3.0 context with compatibility profile
|
||||
int attribs[] = {
|
||||
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
|
||||
GLX_CONTEXT_MINOR_VERSION_ARB, 0,
|
||||
// GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB,
|
||||
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
|
||||
0
|
||||
};
|
||||
#else
|
||||
// Create a 3.2 core context without compatibility profile
|
||||
int attribs[] = {
|
||||
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
|
||||
GLX_CONTEXT_MINOR_VERSION_ARB, 2,
|
||||
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB,
|
||||
0
|
||||
};
|
||||
#endif
|
||||
GLXContext context_temp = glXCreateContextAttribsARB(glDisplay, framebuffer_config[0], NULL, true, attribs);
|
||||
if (context_temp) {
|
||||
ZZLog::Error_Log("Create a 3.0 opengl context");
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#define __GS_H__
|
||||
|
||||
|
||||
#define USE_OLD_REGS
|
||||
#define ZZNORMAL_MEMORY
|
||||
|
||||
#include "Util.h"
|
||||
#include "GifTransfer.h"
|
||||
|
@ -39,6 +39,8 @@ extern float fFPS;
|
|||
|
||||
extern int g_LastCRC;
|
||||
|
||||
#define VB_NUMBUFFERS 512 // number of vbo buffer allocated
|
||||
|
||||
struct Vector_16F
|
||||
{
|
||||
u16 x, y, z, w;
|
||||
|
@ -132,21 +134,22 @@ extern GSconf conf;
|
|||
|
||||
// PSM values
|
||||
// PSM types == Texture Storage Format
|
||||
enum PSM_value
|
||||
{
|
||||
PSMCT32 = 0, // 00 0000
|
||||
PSMCT24 = 1, // 00 0001
|
||||
PSMCT16 = 2, // 00 0010
|
||||
PSMCT16S = 10, // 00 1010
|
||||
PSMT8 = 19, // 01 0011
|
||||
PSMT4 = 20, // 01 0100
|
||||
PSMT8H = 27, // 01 1011
|
||||
PSMT4HL = 36, // 10 0100
|
||||
PSMT4HH = 44, // 10 1100
|
||||
PSMT32Z = 48, // 11 0000
|
||||
PSMT24Z = 49, // 11 0001
|
||||
PSMT16Z = 50, // 11 0010
|
||||
PSMT16SZ = 58, // 11 1010
|
||||
enum PSM_value{
|
||||
PSMCT32 = 0, // 000000
|
||||
PSMCT24 = 1, // 000001
|
||||
PSMCT16 = 2, // 000010
|
||||
PSMCT16S = 10, // 001010
|
||||
PSMT8 = 19, // 010011
|
||||
PSMT4 = 20, // 010100
|
||||
PSMT8H = 27, // 011011
|
||||
PSMT4HL = 36, // 100100
|
||||
PSMT4HH = 44, // 101100
|
||||
PSMT32Z = 48, // 110000
|
||||
PSMT24Z = 49, // 110001
|
||||
PSMT16Z = 50, // 110010
|
||||
PSMT16SZ = 58, // 111010
|
||||
|
||||
PSMT_BAD_PSM = 63 // for every unknown psm.
|
||||
};
|
||||
|
||||
// Check target bit mode. PSMCT32 and 32Z return 0, 24 and 24Z - 1
|
||||
|
@ -461,7 +464,6 @@ typedef struct
|
|||
{
|
||||
u16 aem;
|
||||
u8 ta[2];
|
||||
float fta[2];
|
||||
} texaInfo;
|
||||
|
||||
typedef struct
|
||||
|
@ -503,6 +505,14 @@ typedef struct
|
|||
int fba;
|
||||
} fbaInfo;
|
||||
|
||||
enum transfer_types
|
||||
{
|
||||
XFER_HOST_TO_LOCAL = 0,
|
||||
XFER_LOCAL_TO_HOST = 1,
|
||||
XFER_LOCAL_TO_LOCAL = 2,
|
||||
XFER_DEACTIVATED = 3
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Vertex gsvertex[4]; // circular buffer that contains the vertex
|
||||
|
@ -537,15 +547,20 @@ typedef struct
|
|||
texaInfo texa;
|
||||
trxposInfo trxpos, trxposnew;
|
||||
|
||||
int imageWtemp, imageHtemp;
|
||||
|
||||
int imageTransfer;
|
||||
int imageWnew, imageHnew, imageX, imageY, imageEndX, imageEndY;
|
||||
bool transferring;
|
||||
|
||||
Point image, imageEnd;
|
||||
Size imageNew, imageTemp;
|
||||
|
||||
pathInfo path[4];
|
||||
GIFRegDIMX dimx;
|
||||
GSMemory mem;
|
||||
GSClut clut_buffer;
|
||||
|
||||
// Subject to change.
|
||||
int vsync, interlace;
|
||||
|
||||
int primNext(int inc = 1)
|
||||
{
|
||||
// Note: ArraySize(gsvertex) == 2^n => modulo is replaced by an and instruction
|
||||
|
@ -615,7 +630,7 @@ static __forceinline u32 RGBA16to32(u16 c)
|
|||
(((c) & 0x8000) ? 0xff000000 : 0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
#ifndef ZZNORMAL_MEMORY
|
||||
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
|
||||
// f is a u16
|
||||
static __forceinline u16 Float16ToBYTE(u16 f)
|
||||
|
@ -984,4 +999,39 @@ inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
|
|||
#define CPSM_CSA_BITMASK 0x1f780000
|
||||
#define CPSM_CSA_NOTMASK 0xe0870000
|
||||
|
||||
// I'll find a good place for these later.
|
||||
|
||||
extern PSM_value PSM_value_Table[64];
|
||||
extern bool allowed_psm[256]; // in ZZoglMem.cpp.cpp
|
||||
inline void FillAlowedPsnTable() {
|
||||
|
||||
allowed_psm[PSMCT32] = true;
|
||||
allowed_psm[PSMCT24] = true;
|
||||
allowed_psm[PSMCT16] = true;
|
||||
allowed_psm[PSMCT16S] = true;
|
||||
allowed_psm[PSMT8] = true;
|
||||
allowed_psm[PSMT4] = true;
|
||||
allowed_psm[PSMT8H] = true;
|
||||
allowed_psm[PSMT4HH] = true;
|
||||
allowed_psm[PSMT4HL] = true;
|
||||
allowed_psm[PSMT32Z] = true;
|
||||
allowed_psm[PSMT24Z] = true;
|
||||
allowed_psm[PSMT16Z] = true;
|
||||
allowed_psm[PSMT16SZ] = true;
|
||||
|
||||
PSM_value_Table[PSMCT32] = PSMCT32;
|
||||
PSM_value_Table[PSMCT24] = PSMCT24;
|
||||
PSM_value_Table[PSMCT16] = PSMCT16;
|
||||
PSM_value_Table[PSMCT16S] = PSMCT16S;
|
||||
PSM_value_Table[PSMT8] = PSMT8;
|
||||
PSM_value_Table[PSMT4] = PSMT4;
|
||||
PSM_value_Table[PSMT8H] = PSMT8H;
|
||||
PSM_value_Table[PSMT4HH] = PSMT4HH;
|
||||
PSM_value_Table[PSMT4HL] = PSMT4HL;
|
||||
PSM_value_Table[PSMT32Z] = PSMT32Z;
|
||||
PSM_value_Table[PSMT24Z] = PSMT24Z;
|
||||
PSM_value_Table[PSMT16Z] = PSMT16Z;
|
||||
PSM_value_Table[PSMT16SZ] = PSMT16SZ;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "Profile.h"
|
||||
#include "GLWin.h"
|
||||
#include "ZZoglFlushHack.h"
|
||||
#include "ZZoglShaders.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
@ -82,7 +83,7 @@ extern int ZZSave(s8* pbydata);
|
|||
extern bool ZZLoad(s8* pbydata);
|
||||
|
||||
// switches the render target to the real target, flushes the current render targets and renders the real image
|
||||
extern void RenderCRTC(int interlace);
|
||||
extern void RenderCRTC();
|
||||
|
||||
#if defined(_WIN32) && defined(_DEBUG)
|
||||
HANDLE g_hCurrentThread = NULL;
|
||||
|
@ -91,37 +92,37 @@ HANDLE g_hCurrentThread = NULL;
|
|||
extern int VALIDATE_THRESH;
|
||||
extern u32 TEXDESTROY_THRESH;
|
||||
|
||||
u32 CALLBACK PS2EgetLibType()
|
||||
EXPORT_C_(u32) PS2EgetLibType()
|
||||
{
|
||||
return PS2E_LT_GS;
|
||||
}
|
||||
|
||||
char* CALLBACK PS2EgetLibName()
|
||||
EXPORT_C_(char*) PS2EgetLibName()
|
||||
{
|
||||
return libraryName;
|
||||
}
|
||||
|
||||
u32 CALLBACK PS2EgetLibVersion2(u32 type)
|
||||
EXPORT_C_(u32) PS2EgetLibVersion2(u32 type)
|
||||
{
|
||||
return (zgsversion << 16) | (zgsrevision << 8) | zgsbuild | (zgsminor << 24);
|
||||
}
|
||||
|
||||
void CALLBACK GSsetBaseMem(void* pmem)
|
||||
EXPORT_C_(void) GSsetBaseMem(void* pmem)
|
||||
{
|
||||
g_pBasePS2Mem = (u8*)pmem;
|
||||
}
|
||||
|
||||
void CALLBACK GSsetSettingsDir(const char* dir)
|
||||
EXPORT_C_(void) GSsetSettingsDir(const char* dir)
|
||||
{
|
||||
s_strIniPath = (dir == NULL) ? wxString(L"inis") : wxString(dir, wxConvFile);
|
||||
}
|
||||
|
||||
void CALLBACK GSsetLogDir(const char* dir)
|
||||
EXPORT_C_(void) GSsetLogDir(const char* dir)
|
||||
{
|
||||
ZZLog::SetDir(dir);
|
||||
}
|
||||
|
||||
void CALLBACK GSsetGameCRC(int crc, int options)
|
||||
EXPORT_C_(void) GSsetGameCRC(int crc, int options)
|
||||
{
|
||||
// build a list of function pointer for GetSkipCount (SkipDraw)
|
||||
static GetSkipCount GSC_list[NUMBER_OF_TITLES];
|
||||
|
@ -217,7 +218,7 @@ void CALLBACK GSsetGameCRC(int crc, int options)
|
|||
ListHacks();
|
||||
}
|
||||
|
||||
void CALLBACK GSsetFrameSkip(int frameskip)
|
||||
EXPORT_C_(void) GSsetFrameSkip(int frameskip)
|
||||
{
|
||||
FUNCLOG
|
||||
s_frameskipping |= frameskip;
|
||||
|
@ -232,7 +233,7 @@ void CALLBACK GSsetFrameSkip(int frameskip)
|
|||
}
|
||||
}
|
||||
|
||||
void CALLBACK GSreset()
|
||||
EXPORT_C_(void) GSreset()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -242,11 +243,11 @@ void CALLBACK GSreset()
|
|||
|
||||
gs.prac = 1;
|
||||
prim = &gs._prim[0];
|
||||
gs.imageTransfer = -1;
|
||||
gs.transferring = false;
|
||||
gs.q = 1;
|
||||
}
|
||||
|
||||
void CALLBACK GSgifSoftReset(u32 mask)
|
||||
EXPORT_C_(void) GSgifSoftReset(u32 mask)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -254,11 +255,11 @@ void CALLBACK GSgifSoftReset(u32 mask)
|
|||
if (mask & 2) memset(&gs.path[1], 0, sizeof(gs.path[1]));
|
||||
if (mask & 4) memset(&gs.path[2], 0, sizeof(gs.path[2]));
|
||||
|
||||
gs.imageTransfer = -1;
|
||||
gs.transferring = false;
|
||||
gs.q = 1;
|
||||
}
|
||||
|
||||
s32 CALLBACK GSinit()
|
||||
EXPORT_C_(s32) GSinit()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -281,7 +282,7 @@ __forceinline void InitMisc()
|
|||
ResetRegs();
|
||||
}
|
||||
|
||||
s32 CALLBACK GSopen(void *pDsp, char *Title, int multithread)
|
||||
EXPORT_C_(s32) GSopen(void *pDsp, char *Title, int multithread)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -337,32 +338,34 @@ EXPORT_C_(s32) GSopen2( void* pDsp, u32 flags )
|
|||
}
|
||||
#endif
|
||||
|
||||
void CALLBACK GSshutdown()
|
||||
EXPORT_C_(void) GSshutdown()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
ZZLog::Close();
|
||||
}
|
||||
void CALLBACK GSclose()
|
||||
EXPORT_C_(void) GSclose()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
ZZDestroy();
|
||||
GLWin.CloseWindow();
|
||||
|
||||
// Free alocated memory. We could close plugin without closing pcsx2, so we SHOULD free all allocated resources
|
||||
ZZshExitCleaning();
|
||||
SaveStateFile = NULL;
|
||||
SaveStateExists = true; // default value
|
||||
g_LastCRC = 0;
|
||||
}
|
||||
|
||||
void CALLBACK GSirqCallback(void (*callback)())
|
||||
EXPORT_C_(void) GSirqCallback(void (*callback)())
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
GSirq = callback;
|
||||
}
|
||||
|
||||
void CALLBACK GSwriteCSR(u32 write)
|
||||
EXPORT_C_(void) GSwriteCSR(u32 write)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -373,7 +376,7 @@ void CALLBACK GSwriteCSR(u32 write)
|
|||
#define access _access
|
||||
#endif
|
||||
|
||||
void CALLBACK GSchangeSaveState(int newstate, const char* filename)
|
||||
EXPORT_C_(void) GSchangeSaveState(int newstate, const char* filename)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -428,7 +431,7 @@ static bool get_snapshot_filename(char *filename, char* path, const char* extens
|
|||
return true;
|
||||
}
|
||||
|
||||
void CALLBACK GSmakeSnapshot(char *path)
|
||||
EXPORT_C_(void) GSmakeSnapshot(char *path)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -474,7 +477,16 @@ static __forceinline void SetGSTitle()
|
|||
GLWin.SetTitle(strtitle);
|
||||
}
|
||||
|
||||
void CALLBACK GSvsync(int interlace)
|
||||
// This isn't implemented for some reason? Adding a field for it for the moment, till I get a chance to look closer.
|
||||
EXPORT_C_(void) GSsetVsync(int enabled)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
ZZLog::Debug_Log("Setting VSync to 0x%x.", enabled);
|
||||
gs.vsync = enabled;
|
||||
}
|
||||
|
||||
EXPORT_C_(void) GSvsync(int current_interlace)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -506,8 +518,9 @@ void CALLBACK GSvsync(int interlace)
|
|||
|
||||
g_nRealFrame++;
|
||||
|
||||
// !interlace? Hmmm... Fixme.
|
||||
RenderCRTC(!interlace);
|
||||
// The value passed seems to either be 0 or 0x2000, and we want 0 or 1. Perhaps !! would be better...
|
||||
gs.interlace = !current_interlace;
|
||||
RenderCRTC();
|
||||
|
||||
GLWin.ProcessEvents();
|
||||
|
||||
|
@ -559,7 +572,7 @@ void CALLBACK GSvsync(int interlace)
|
|||
|
||||
}
|
||||
|
||||
void CALLBACK GSreadFIFO(u64 *pMem)
|
||||
EXPORT_C_(void) GSreadFIFO(u64 *pMem)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -571,7 +584,7 @@ void CALLBACK GSreadFIFO(u64 *pMem)
|
|||
TransferLocalHost((u32*)pMem, 1);
|
||||
}
|
||||
|
||||
void CALLBACK GSreadFIFO2(u64 *pMem, int qwc)
|
||||
EXPORT_C_(void) GSreadFIFO2(u64 *pMem, int qwc)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -583,7 +596,7 @@ void CALLBACK GSreadFIFO2(u64 *pMem, int qwc)
|
|||
TransferLocalHost((u32*)pMem, qwc);
|
||||
}
|
||||
|
||||
int CALLBACK GSsetupRecording(int start, void* pData)
|
||||
EXPORT_C_(int) GSsetupRecording(int start, void* pData)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -595,7 +608,7 @@ int CALLBACK GSsetupRecording(int start, void* pData)
|
|||
return 1;
|
||||
}
|
||||
|
||||
s32 CALLBACK GSfreeze(int mode, freezeData *data)
|
||||
EXPORT_C_(s32) GSfreeze(int mode, freezeData *data)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ static int path1_count = 0;
|
|||
|
||||
static int nPath3Hack = 0;
|
||||
|
||||
void CALLBACK GSgetLastTag(u64* ptag)
|
||||
EXPORT_C_(void) GSgetLastTag(u64* ptag)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -166,13 +166,15 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
|
|||
int len = min(size, path->nloop);
|
||||
//ZZLog::Error_Log("GIF_FLG_IMAGE(%d)=%d", gs.imageTransfer, len);
|
||||
|
||||
if (gs.transferring)
|
||||
{
|
||||
switch (gs.imageTransfer)
|
||||
{
|
||||
case 0:
|
||||
case XFER_HOST_TO_LOCAL:
|
||||
TransferHostLocal(pMem, len * 4);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
case XFER_LOCAL_TO_HOST:
|
||||
// This can't happen; downloads can not be started or performed as part of
|
||||
// a GIFtag operation. They're an entirely separate process that can only be
|
||||
// done through the ReverseFIFO transfer (aka ReadFIFO). --air
|
||||
|
@ -180,11 +182,11 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
|
|||
//TransferLocalHost(pMem, len);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
case XFER_LOCAL_TO_LOCAL:
|
||||
//TransferLocalLocal();
|
||||
break;
|
||||
|
||||
case 3:
|
||||
case XFER_DEACTIVATED:
|
||||
//assert(0);
|
||||
break;
|
||||
|
||||
|
@ -193,6 +195,8 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
|
|||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
pMem += len * 4;
|
||||
|
||||
path->nloop -= len;
|
||||
|
@ -236,7 +240,7 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
|
|||
}
|
||||
}
|
||||
|
||||
void CALLBACK GSgifTransfer1(u32 *pMem, u32 addr)
|
||||
EXPORT_C_(void) GSgifTransfer1(u32 *pMem, u32 addr)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -250,7 +254,7 @@ void CALLBACK GSgifTransfer1(u32 *pMem, u32 addr)
|
|||
_GSgifTransfer<0>((u32*)((u8*)pMem + addr), (0x4000 - addr) / 16);
|
||||
}
|
||||
|
||||
void CALLBACK GSgifTransfer2(u32 *pMem, u32 size)
|
||||
EXPORT_C_(void) GSgifTransfer2(u32 *pMem, u32 size)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -259,7 +263,7 @@ void CALLBACK GSgifTransfer2(u32 *pMem, u32 size)
|
|||
_GSgifTransfer<1>(const_cast<u32*>(pMem), size);
|
||||
}
|
||||
|
||||
void CALLBACK GSgifTransfer3(u32 *pMem, u32 size)
|
||||
EXPORT_C_(void) GSgifTransfer3(u32 *pMem, u32 size)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
@ -268,7 +272,7 @@ void CALLBACK GSgifTransfer3(u32 *pMem, u32 size)
|
|||
_GSgifTransfer<2>(const_cast<u32*>(pMem), size);
|
||||
}
|
||||
|
||||
void CALLBACK GSgifTransfer(const u32 *pMem, u32 size)
|
||||
EXPORT_C_(void) GSgifTransfer(const u32 *pMem, u32 size)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
|
|
|
@ -18,8 +18,6 @@
|
|||
*/
|
||||
|
||||
#include "GS.h"
|
||||
#include <Cg/cg.h>
|
||||
#include <Cg/cgGL.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "Mem.h"
|
||||
|
@ -117,14 +115,14 @@ void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h
|
|||
|
||||
if (PSMT_ISZTEX(psm))
|
||||
{
|
||||
// Somehow, I doubt this code is right. I'll have to look into it. For the moment, I'm keeping it the
|
||||
// way it was. --arcum42
|
||||
|
||||
// This still needs an eye kept on it.
|
||||
const BLOCK& b = m_Blocks[psm];
|
||||
const int x2 = x + w + b.width - 1;
|
||||
const int y2 = y + h - 1;
|
||||
bw = bw / b.width;
|
||||
|
||||
bw = (bw + b.width - 1) / b.width;
|
||||
start = bp * 256 + ((y / b.height) * bw + (x / b.width)) * 0x2000;
|
||||
end = bp * 256 + (((y + h - 1) / b.height) * bw + (x + w + b.width - 1) / b.width) * 0x2000;
|
||||
start = (bp + ((y / b.height) * bw + (x / b.width)) * 0x20) * 0x100;
|
||||
end = (bp + ((y2 / b.height) * bw + (x2 / b.width)) * 0x20) * 0x100;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -139,46 +137,101 @@ void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h
|
|||
}
|
||||
else
|
||||
{
|
||||
// This is what it used to do, which doesn't seem right.
|
||||
// Keeping it for reference, in case removing it breaks anything.
|
||||
|
||||
//int newx = ((x + w - 1 + 31) & ~31) - 1;
|
||||
//int newy = ((y + h - 1 + 15) & ~15) - 1;
|
||||
//start = getPixelAddress4(x, y, bp, bw) / 2;
|
||||
//end = (getPixelAddress4(max(newx, x), max(newy, y), bp, bw) + 2) / 2;
|
||||
|
||||
start /= 2;
|
||||
end /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Same as GetRectMemAddress, except that we know x & y are zero, so it's simplified a bit.
|
||||
void GetRectMemAddressZero(int& start, int& end, int psm, int w, int h, int bp, int bw)
|
||||
{
|
||||
FUNCLOG
|
||||
u32 bits = 0;
|
||||
|
||||
if (m_Blocks[psm].bpp == 0)
|
||||
{
|
||||
ZZLog::Error_Log("ZeroGS: Bad psm 0x%x.", psm);
|
||||
start = 0;
|
||||
end = MEMORY_END;
|
||||
return;
|
||||
}
|
||||
|
||||
if (PSMT_ISZTEX(psm))
|
||||
{
|
||||
// This still needs an eye kept on it.
|
||||
const BLOCK& b = m_Blocks[psm];
|
||||
const int x2 = w + b.width - 1;
|
||||
const int y2 = h - 1;
|
||||
bw = bw / b.width;
|
||||
|
||||
start = bp * 0x100;
|
||||
end = (bp + ((y2 / b.height) * bw + (x2 / b.width)) * 0x20) * 0x100;
|
||||
return;
|
||||
}
|
||||
|
||||
bits = PSMT_BITS_NUM(psm);
|
||||
start = getPixelFun[psm](0, 0, bp, bw);
|
||||
end = getPixelFun[psm](w - 1, h - 1, bp, bw) + 1;
|
||||
|
||||
if (bits > 0)
|
||||
{
|
||||
start *= bits;
|
||||
end *= bits;
|
||||
}
|
||||
else
|
||||
{
|
||||
start /= 2;
|
||||
end /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GetRectMemAddress(int& start, int& end, int psm, Point p, Size s, int bp, int bw)
|
||||
{
|
||||
GetRectMemAddress(start, end, psm, p.x, p.y, s.w, s.h, bp, bw);
|
||||
}
|
||||
|
||||
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, Size s, int bp, int bw)
|
||||
{
|
||||
GetRectMemAddress(start, end, psm, x, y, s.w, s.h, bp, bw);
|
||||
}
|
||||
|
||||
void GetRectMemAddressZero(int& start, int& end, int psm, Size s, int bp, int bw)
|
||||
{
|
||||
GetRectMemAddressZero(start, end, psm, s.w, s.h, bp, bw);
|
||||
}
|
||||
|
||||
void InitTransferHostLocal()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
#if defined(_DEBUG)
|
||||
// Xenosaga 1.
|
||||
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0X%x)", gs.trxpos.dx + gs.imageWnew, gs.dstbuf.bw);
|
||||
if (gs.trxpos.dx + gs.imageNew.w > gs.dstbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0X%x)", gs.trxpos.dx + gs.imageNew.w, gs.dstbuf.bw);
|
||||
#endif
|
||||
|
||||
//bool bHasFlushed = false;
|
||||
|
||||
gs.imageX = gs.trxpos.dx;
|
||||
gs.imageY = gs.trxpos.dy;
|
||||
gs.image.x = gs.trxpos.dx;
|
||||
gs.image.y = gs.trxpos.dy;
|
||||
|
||||
gs.imageEndX = gs.imageX + gs.imageWnew;
|
||||
gs.imageEndY = gs.imageY + gs.imageHnew;
|
||||
gs.imageEnd.x = gs.image.x + gs.imageNew.w;
|
||||
gs.imageEnd.y = gs.image.y + gs.imageNew.h;
|
||||
|
||||
assert(gs.imageEndX < 2048 && gs.imageEndY < 2048);
|
||||
assert(gs.imageEnd.x < 2048 && gs.imageEnd.y < 2048);
|
||||
|
||||
// This needs to be looked in to, since psm should *not* be 63.
|
||||
// hack! viewful joe
|
||||
if (gs.dstbuf.psm == 63) gs.dstbuf.psm = 0;
|
||||
if (gs.dstbuf.psm == 63)
|
||||
{
|
||||
ZZLog::WriteLn("gs.dstbuf.psm set to 0!");
|
||||
gs.dstbuf.psm = 0;
|
||||
}
|
||||
|
||||
int start, end;
|
||||
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
|
||||
if (end > MEMORY_END)
|
||||
{
|
||||
|
@ -189,7 +242,7 @@ void InitTransferHostLocal()
|
|||
// MEMORY_END is 0x400000...
|
||||
|
||||
ZZLog::Warn_Log("Init host local out of bounds! (end == 0x%x)", end);
|
||||
//gs.imageTransfer = -1;
|
||||
//gs.transferring = false;
|
||||
end = MEMORY_END;
|
||||
}
|
||||
|
||||
|
@ -198,17 +251,18 @@ void InitTransferHostLocal()
|
|||
if (vb[0].nCount > 0) Flush(0);
|
||||
if (vb[1].nCount > 0) Flush(1);
|
||||
|
||||
//ZZLog::Prim_Log("trans: bp:%x x:%x y:%x w:%x h:%x\n", gs.dstbuf.bp, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew);
|
||||
//ZZLog::Prim_Log("trans: bp:%x x:%x y:%x w:%x h:%x\n", gs.dstbuf.bp, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew.w, gs.imageNew.h);
|
||||
}
|
||||
|
||||
void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
int start, end;
|
||||
int start = -1, end = -1;
|
||||
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.image, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
|
||||
if ((start == -1) || (end == -1)) ZZLog::WriteLn("start == %d, end == %d", start, end);
|
||||
assert(start < gs_imageEnd);
|
||||
end = gs_imageEnd;
|
||||
|
||||
|
@ -272,8 +326,8 @@ void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
|
|||
{
|
||||
tex0Info t;
|
||||
t.tbp0 = gs.dstbuf.bp;
|
||||
t.tw = gs.imageWnew;
|
||||
t.th = gs.imageHnew;
|
||||
t.tw = gs.imageNew.w;
|
||||
t.th = gs.imageNew.h;
|
||||
t.tbw = gs.dstbuf.bw;
|
||||
t.psm = gs.dstbuf.psm;
|
||||
SaveTex(&t, 0);
|
||||
|
@ -285,24 +339,24 @@ void TransferHostLocal(const void* pbyMem, u32 nQWordSize)
|
|||
void InitTransferLocalHost()
|
||||
{
|
||||
FUNCLOG
|
||||
assert(gs.trxpos.sx + gs.imageWnew <= 2048 && gs.trxpos.sy + gs.imageHnew <= 2048);
|
||||
assert(gs.trxpos.sx + gs.imageNew.w <= 2048 && gs.trxpos.sy + gs.imageNew.h <= 2048);
|
||||
|
||||
#if defined(_DEBUG)
|
||||
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0x%x)", gs.trxpos.sx + gs.imageWnew, gs.srcbuf.bw);
|
||||
if (gs.trxpos.sx + gs.imageNew.w > gs.srcbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, width exceeded. (0x%x > 0x%x)", gs.trxpos.sx + gs.imageNew.w, gs.srcbuf.bw);
|
||||
#endif
|
||||
|
||||
gs.imageX = gs.trxpos.sx;
|
||||
gs.imageY = gs.trxpos.sy;
|
||||
gs.image.x = gs.trxpos.sx;
|
||||
gs.image.y = gs.trxpos.sy;
|
||||
|
||||
gs.imageEndX = gs.imageX + gs.imageWnew;
|
||||
gs.imageEndY = gs.imageY + gs.imageHnew;
|
||||
gs.imageEnd.x = gs.image.x + gs.imageNew.w;
|
||||
gs.imageEnd.y = gs.image.y + gs.imageNew.h;
|
||||
|
||||
s_vTransferCache.resize(0);
|
||||
|
||||
int start, end;
|
||||
|
||||
GetRectMemAddress(start, end, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
|
||||
GetRectMemAddress(start, end, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageNew, gs.srcbuf.bp, gs.srcbuf.bw);
|
||||
|
||||
ResolveInRange(start, end);
|
||||
}
|
||||
|
@ -316,16 +370,16 @@ void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
|
|||
T* pbuf = (T*)pbyMem;
|
||||
u32 nSize = nQWordSize * 16 / sizeof(T);
|
||||
|
||||
for (; i < gs.imageEndY; ++i)
|
||||
for (; i < gs.imageEnd.y; ++i)
|
||||
{
|
||||
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
|
||||
for (; j < gs.imageEnd.x && nSize > 0; ++j, --nSize)
|
||||
{
|
||||
*pbuf++ = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
|
||||
}
|
||||
|
||||
if (j >= gs.imageEndX)
|
||||
if (j >= gs.imageEnd.x)
|
||||
{
|
||||
assert(j == gs.imageEndX);
|
||||
assert(j == gs.imageEnd.x);
|
||||
j = gs.trxpos.sx;
|
||||
}
|
||||
else
|
||||
|
@ -344,9 +398,9 @@ void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *psta
|
|||
u8* pbuf = (u8*)pbyMem;
|
||||
u32 nSize = nQWordSize * 16 / 3;
|
||||
|
||||
for (; i < gs.imageEndY; ++i)
|
||||
for (; i < gs.imageEnd.y; ++i)
|
||||
{
|
||||
for (; j < gs.imageEndX && nSize > 0; ++j, --nSize)
|
||||
for (; j < gs.imageEnd.x && nSize > 0; ++j, --nSize)
|
||||
{
|
||||
u32 p = rp(pstart, j % 2048, i % 2048, gs.srcbuf.bw);
|
||||
pbuf[0] = (u8)p;
|
||||
|
@ -355,9 +409,9 @@ void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *psta
|
|||
pbuf += 3;
|
||||
}
|
||||
|
||||
if (j >= gs.imageEndX)
|
||||
if (j >= gs.imageEnd.x)
|
||||
{
|
||||
assert(j == gs.imageEndX);
|
||||
assert(j == gs.imageEnd.x);
|
||||
j = gs.trxpos.sx;
|
||||
}
|
||||
else
|
||||
|
@ -372,34 +426,34 @@ void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *psta
|
|||
void TransferLocalHost(void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
FUNCLOG
|
||||
assert(gs.imageTransfer == 1);
|
||||
assert(gs.imageTransfer == XFER_LOCAL_TO_HOST);
|
||||
|
||||
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
|
||||
|
||||
switch(PSMT_BITMODE(gs.srcbuf.psm))
|
||||
{
|
||||
case 0:
|
||||
TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
|
||||
TransferLocalHost<u32>(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
|
||||
break;
|
||||
case 1:
|
||||
TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
|
||||
TransferLocalHost_24(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
|
||||
break;
|
||||
case 2:
|
||||
TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
|
||||
TransferLocalHost<u16>(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
|
||||
break;
|
||||
case 3:
|
||||
TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart);
|
||||
TransferLocalHost<u8>(pbyMem, nQWordSize, gs.image.y, gs.image.x, pstart);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (gs.imageY >= gs.imageEndY)
|
||||
if (gs.image.y >= gs.imageEnd.y)
|
||||
{
|
||||
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
|
||||
assert(gs.imageY == gs.imageEndY);
|
||||
gs.imageTransfer = -1;
|
||||
ZZLog::Error_Log("gs.image.y >= gs.imageEnd.y!");
|
||||
assert(gs.image.y == gs.imageEnd.y);
|
||||
gs.transferring = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -411,11 +465,11 @@ __forceinline void _TransferLocalLocal()
|
|||
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
|
||||
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
|
||||
u32 widthlimit = 4;
|
||||
u32 maxX = gs.trxpos.sx + gs.imageWnew;
|
||||
u32 maxY = gs.trxpos.sy + gs.imageHnew;
|
||||
u32 maxX = gs.trxpos.sx + gs.imageNew.w;
|
||||
u32 maxY = gs.trxpos.sy + gs.imageNew.h;
|
||||
|
||||
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
|
||||
if ((gs.imageWnew & widthlimit) != 0) return;
|
||||
if ((gs.imageNew.w & widthlimit) != 0) return;
|
||||
|
||||
for(u32 i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
|
||||
{
|
||||
|
@ -447,10 +501,10 @@ __forceinline void _TransferLocalLocal_4()
|
|||
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
|
||||
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
|
||||
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
|
||||
u32 maxX = gs.trxpos.sx + gs.imageWnew;
|
||||
u32 maxY = gs.trxpos.sy + gs.imageHnew;
|
||||
u32 maxX = gs.trxpos.sx + gs.imageNew.w;
|
||||
u32 maxY = gs.trxpos.sy + gs.imageNew.h;
|
||||
|
||||
assert((gs.imageWnew % 8) == 0);
|
||||
assert((gs.imageNew.w % 8) == 0);
|
||||
|
||||
for(u32 i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
|
||||
{
|
||||
|
@ -498,21 +552,21 @@ void TransferLocalLocal()
|
|||
FUNCLOG
|
||||
|
||||
//ZZLog::Error_Log("I'z in your code, transferring your memory...");
|
||||
assert(gs.imageTransfer == 2);
|
||||
assert(gs.trxpos.sx + gs.imageWnew < 2048 && gs.trxpos.sy + gs.imageHnew < 2048);
|
||||
assert(gs.trxpos.dx + gs.imageWnew < 2048 && gs.trxpos.dy + gs.imageHnew < 2048);
|
||||
assert(gs.imageTransfer == XFER_LOCAL_TO_LOCAL);
|
||||
assert(gs.trxpos.sx + gs.imageNew.w < 2048 && gs.trxpos.sy + gs.imageNew.h < 2048);
|
||||
assert(gs.trxpos.dx + gs.imageNew.w < 2048 && gs.trxpos.dy + gs.imageNew.h < 2048);
|
||||
assert((gs.srcbuf.psm&0x7) == (gs.dstbuf.psm&0x7));
|
||||
|
||||
if (gs.trxpos.sx + gs.imageWnew > gs.srcbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, src width exceeded.(0x%x > 0x%x)", gs.trxpos.sx + gs.imageWnew, gs.srcbuf.bw);
|
||||
if (gs.trxpos.sx + gs.imageNew.w > gs.srcbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, src width exceeded.(0x%x > 0x%x)", gs.trxpos.sx + gs.imageNew.w, gs.srcbuf.bw);
|
||||
|
||||
if (gs.trxpos.dx + gs.imageWnew > gs.dstbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, dst width exceeded.(0x%x > 0x%x)", gs.trxpos.dx + gs.imageWnew, gs.dstbuf.bw);
|
||||
if (gs.trxpos.dx + gs.imageNew.w > gs.dstbuf.bw)
|
||||
ZZLog::Debug_Log("Transfer error, dst width exceeded.(0x%x > 0x%x)", gs.trxpos.dx + gs.imageNew.w, gs.dstbuf.bw);
|
||||
|
||||
int srcstart, srcend, dststart, dstend;
|
||||
|
||||
GetRectMemAddress(srcstart, srcend, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageWnew, gs.imageHnew, gs.srcbuf.bp, gs.srcbuf.bw);
|
||||
GetRectMemAddress(dststart, dstend, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
GetRectMemAddress(srcstart, srcend, gs.srcbuf.psm, gs.trxpos.sx, gs.trxpos.sy, gs.imageNew, gs.srcbuf.bp, gs.srcbuf.bw);
|
||||
GetRectMemAddress(dststart, dstend, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
|
||||
// resolve the targs
|
||||
ResolveInRange(srcstart, srcend);
|
||||
|
@ -547,15 +601,15 @@ void TransferLocalLocal()
|
|||
{
|
||||
tex0Info t;
|
||||
t.tbp0 = gs.dstbuf.bp;
|
||||
t.tw = gs.imageWnew;
|
||||
t.th = gs.imageHnew;
|
||||
t.tw = gs.imageNew.w;
|
||||
t.th = gs.imageNew.h;
|
||||
t.tbw = gs.dstbuf.bw;
|
||||
t.psm = gs.dstbuf.psm;
|
||||
SaveTex(&t, 0);
|
||||
|
||||
t.tbp0 = gs.srcbuf.bp;
|
||||
t.tw = gs.imageWnew;
|
||||
t.th = gs.imageHnew;
|
||||
t.tw = gs.imageNew.w;
|
||||
t.th = gs.imageNew.h;
|
||||
t.tbw = gs.srcbuf.bw;
|
||||
t.psm = gs.srcbuf.psm;
|
||||
SaveTex(&t, 0);
|
||||
|
@ -564,15 +618,3 @@ void TransferLocalLocal()
|
|||
#endif
|
||||
}
|
||||
|
||||
__forceinline void TerminateLocalHost()
|
||||
{
|
||||
FUNCLOG
|
||||
//ZZLog::Error_Log("Terminate Local Host!");
|
||||
}
|
||||
|
||||
__forceinline void TerminateHostLocal()
|
||||
{
|
||||
FUNCLOG
|
||||
gs.imageTransfer = -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -96,7 +96,10 @@ inline u8* _MemoryAddress(int x)
|
|||
}
|
||||
|
||||
extern void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
|
||||
|
||||
extern void GetRectMemAddress(int& start, int& end, int psm, Point p, Size s, int bp, int bw);
|
||||
extern void GetRectMemAddress(int& start, int& end, int psm, int x, int y, Size s, int bp, int bw);
|
||||
extern void GetRectMemAddressZero(int& start, int& end, int psm, int w, int h, int bp, int bw);
|
||||
extern void GetRectMemAddressZero(int& start, int& end, int psm, Size s, int bp, int bw);
|
||||
|
||||
// called when trxdir is accessed. If host is involved, transfers memory to temp buffer byTransferBuf.
|
||||
// Otherwise performs the transfer. TODO: Perhaps divide the transfers into chunks?
|
||||
|
@ -108,7 +111,4 @@ extern void TransferLocalHost(void* pbyMem, u32 nQWordSize);
|
|||
|
||||
extern void TransferLocalLocal();
|
||||
|
||||
extern void TerminateLocalHost();
|
||||
extern void TerminateHostLocal();
|
||||
|
||||
#endif // HOSTMEMORY_H_INCLUDED
|
||||
|
|
|
@ -35,7 +35,7 @@ extern bool THR_bCtrl;
|
|||
static map<string, confOptsStruct> mapConfOpts;
|
||||
static gameHacks tempHacks;
|
||||
|
||||
void CALLBACK GSkeyEvent(keyEvent *ev)
|
||||
EXPORT_C_(void) GSkeyEvent(keyEvent *ev)
|
||||
{
|
||||
static bool bAlt = false;
|
||||
|
||||
|
@ -450,7 +450,7 @@ void DisplayDialog()
|
|||
gtk_widget_destroy(dialog);
|
||||
}
|
||||
|
||||
void CALLBACK GSconfigure()
|
||||
EXPORT_C_(void) GSconfigure()
|
||||
{
|
||||
char strcurdir[256];
|
||||
getcwd(strcurdir, 256);
|
||||
|
@ -484,12 +484,12 @@ void SysMessage(const char *fmt, ...)
|
|||
gtk_widget_destroy(dialog);
|
||||
}
|
||||
|
||||
void CALLBACK GSabout()
|
||||
EXPORT_C_(void) GSabout()
|
||||
{
|
||||
SysMessage("ZZOgl PG: by Zeydlitz (PG version worked on by arcum42, gregory, and the pcsx2 development team). Based off of ZeroGS, by zerofrog.");
|
||||
}
|
||||
|
||||
s32 CALLBACK GStest()
|
||||
EXPORT_C_(s32) GStest()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -16,11 +16,14 @@
|
|||
<Compiler>
|
||||
<Add option="-Wall" />
|
||||
<Add option="-g" />
|
||||
<Add option="-I/opt/cg/include" />
|
||||
<Add option="-L/opt/cg/lib" />
|
||||
<Add option="-DZEROGS_DEVBUILD" />
|
||||
<Add option="-D_DEBUG" />
|
||||
</Compiler>
|
||||
<Linker>
|
||||
<Add library="../../../../../deps/debug/libUtilities.a" />
|
||||
<Add library="Cg" />
|
||||
</Linker>
|
||||
</Target>
|
||||
<Target title="Devel">
|
||||
|
@ -33,12 +36,15 @@
|
|||
<Compiler>
|
||||
<Add option="-O2" />
|
||||
<Add option="-g" />
|
||||
<Add option="-I/opt/cg/include" />
|
||||
<Add option="-L/opt/cg/lib" />
|
||||
<Add option="-W" />
|
||||
<Add option="-DZEROGS_DEVBUILD" />
|
||||
<Add option="-DNDEBUG" />
|
||||
</Compiler>
|
||||
<Linker>
|
||||
<Add library="../../../../../deps/release/libUtilities.a" />
|
||||
<Add library="Cg" />
|
||||
</Linker>
|
||||
</Target>
|
||||
<Target title="Release">
|
||||
|
@ -50,10 +56,69 @@
|
|||
<Option createStaticLib="1" />
|
||||
<Compiler>
|
||||
<Add option="-O2" />
|
||||
<Add option="-I/opt/cg/include" />
|
||||
<Add option="-L/opt/cg/lib" />
|
||||
<Add option="-W" />
|
||||
<Add option="-DRELEASE_TO_PUBLIC" />
|
||||
<Add option="-DNDEBUG" />
|
||||
</Compiler>
|
||||
<Linker>
|
||||
<Add option="-s" />
|
||||
<Add library="../../../../../deps/release/libUtilities.a" />
|
||||
<Add library="Cg" />
|
||||
</Linker>
|
||||
</Target>
|
||||
<Target title="GLSL - Debug">
|
||||
<Option output="../../../../../bin/plugins/ZZOgl-PG-GLSL-dbg.so" prefix_auto="0" extension_auto="0" />
|
||||
<Option object_output="obj/Debug/" />
|
||||
<Option type="3" />
|
||||
<Option compiler="gcc" />
|
||||
<Option createDefFile="1" />
|
||||
<Option createStaticLib="1" />
|
||||
<Compiler>
|
||||
<Add option="-Wall" />
|
||||
<Add option="-g" />
|
||||
<Add option="-DZEROGS_DEVBUILD" />
|
||||
<Add option="-D_DEBUG" />
|
||||
<Add option="-DGLSL_API" />
|
||||
</Compiler>
|
||||
<Linker>
|
||||
<Add library="../../../../../deps/debug/libUtilities.a" />
|
||||
</Linker>
|
||||
</Target>
|
||||
<Target title="GLSL - Devel">
|
||||
<Option output="../../../../../bin/plugins/ZZOgl-PG-GLSL-dev" prefix_auto="1" extension_auto="1" />
|
||||
<Option object_output="obj/Release/" />
|
||||
<Option type="3" />
|
||||
<Option compiler="gcc" />
|
||||
<Option createDefFile="1" />
|
||||
<Option createStaticLib="1" />
|
||||
<Compiler>
|
||||
<Add option="-O2" />
|
||||
<Add option="-g" />
|
||||
<Add option="-W" />
|
||||
<Add option="-DZEROGS_DEVBUILD" />
|
||||
<Add option="-DNDEBUG" />
|
||||
<Add option="-DGLSL_API" />
|
||||
</Compiler>
|
||||
<Linker>
|
||||
<Add library="../../../../../deps/release/libUtilities.a" />
|
||||
</Linker>
|
||||
</Target>
|
||||
<Target title="GLSL - Release">
|
||||
<Option output="../../../../../bin/plugins/ZZOgl-PG-GLSL" prefix_auto="1" extension_auto="1" />
|
||||
<Option object_output="obj/Release/" />
|
||||
<Option type="3" />
|
||||
<Option compiler="gcc" />
|
||||
<Option createDefFile="1" />
|
||||
<Option createStaticLib="1" />
|
||||
<Compiler>
|
||||
<Add option="-O2" />
|
||||
<Add option="-W" />
|
||||
<Add option="-DRELEASE_TO_PUBLIC" />
|
||||
<Add option="-DNDEBUG" />
|
||||
<Add option="-DGLSL_API" />
|
||||
</Compiler>
|
||||
<Linker>
|
||||
<Add option="-s" />
|
||||
<Add library="../../../../../deps/release/libUtilities.a" />
|
||||
|
@ -68,8 +133,6 @@
|
|||
<Add option="-Wno-unused-value" />
|
||||
<Add option="-Wunused-variable" />
|
||||
<Add option="-m32" />
|
||||
<Add option="-I/opt/cg/include" />
|
||||
<Add option="-L/opt/cg/lib" />
|
||||
<Add option="-msse2" />
|
||||
<Add option="-fno-regmove" />
|
||||
<Add option="-fno-strict-aliasing" />
|
||||
|
@ -96,10 +159,10 @@
|
|||
<Add library="z" />
|
||||
<Add library="dl" />
|
||||
<Add library="stdc++" />
|
||||
<Add library="Cg" />
|
||||
</Linker>
|
||||
<ExtraCommands>
|
||||
<Add after="cp $(PROJECT_DIR)/../../ps2hw.dat $(TARGET_OUTPUT_DIR)/" />
|
||||
<Add after="cp $(PROJECT_DIR)/../../ps2hw.glsl $(TARGET_OUTPUT_DIR)/" />
|
||||
<Mode after="always" />
|
||||
</ExtraCommands>
|
||||
<Unit filename="../../CRC.h" />
|
||||
|
@ -121,8 +184,6 @@
|
|||
<Unit filename="../../Mem_Swizzle.h" />
|
||||
<Unit filename="../../Mem_Tables.cpp" />
|
||||
<Unit filename="../../Mem_Transmit.h" />
|
||||
<Unit filename="../../NewRegs.cpp" />
|
||||
<Unit filename="../../NewRegs.h" />
|
||||
<Unit filename="../../Profile.cpp" />
|
||||
<Unit filename="../../Profile.h" />
|
||||
<Unit filename="../../Regs.cpp" />
|
||||
|
@ -147,24 +208,29 @@
|
|||
</Unit>
|
||||
<Unit filename="../../ZZClut.cpp" />
|
||||
<Unit filename="../../ZZClut.h" />
|
||||
<Unit filename="../../ZZDepthTargets.cpp" />
|
||||
<Unit filename="../../ZZGl.h" />
|
||||
<Unit filename="../../ZZHacks.cpp" />
|
||||
<Unit filename="../../ZZHacks.h" />
|
||||
<Unit filename="../../ZZKeyboard.cpp" />
|
||||
<Unit filename="../../ZZLog.cpp" />
|
||||
<Unit filename="../../ZZLog.h" />
|
||||
<Unit filename="../../ZZMemoryTargets.cpp" />
|
||||
<Unit filename="../../ZZRenderTargets.cpp" />
|
||||
<Unit filename="../../ZZoglCRTC.cpp" />
|
||||
<Unit filename="../../ZZoglCRTC.h" />
|
||||
<Unit filename="../../ZZoglCreate.cpp" />
|
||||
<Unit filename="../../ZZoglDrawing.cpp" />
|
||||
<Unit filename="../../ZZoglDrawing.h" />
|
||||
<Unit filename="../../ZZoglFlush.cpp" />
|
||||
<Unit filename="../../ZZoglFlush.h" />
|
||||
<Unit filename="../../ZZoglFlushHack.cpp" />
|
||||
<Unit filename="../../ZZoglFlushHack.h" />
|
||||
<Unit filename="../../ZZoglMath.h" />
|
||||
<Unit filename="../../ZZoglSave.cpp" />
|
||||
<Unit filename="../../ZZoglShaders.cpp" />
|
||||
<Unit filename="../../ZZoglShaders.h" />
|
||||
<Unit filename="../../ZZoglShadersGLSL.cpp" />
|
||||
<Unit filename="../../ZZoglShoots.cpp" />
|
||||
<Unit filename="../../ZZoglShoots.h" />
|
||||
<Unit filename="../../ZZoglVB.cpp" />
|
||||
|
|
|
@ -25,9 +25,11 @@
|
|||
#include "Mem_Transmit.h"
|
||||
#include "Mem_Swizzle.h"
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ZZNORMAL_MEMORY
|
||||
|
||||
BLOCK m_Blocks[0x40]; // do so blocks are indexable
|
||||
|
||||
PCSX2_ALIGNED16(u32 tempblock[64]);
|
||||
|
@ -53,41 +55,41 @@ u8* pstart;
|
|||
template <class T>
|
||||
static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
|
||||
{
|
||||
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) &&
|
||||
bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.image.x == gs.trxpos.dx) &&
|
||||
(alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx));
|
||||
|
||||
if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit)
|
||||
if ((gs.imageEnd.x - gs.trxpos.dx) % data.widthlimit)
|
||||
{
|
||||
/* hack */
|
||||
int testwidth = (int)nSize -
|
||||
(gs.imageEndY - gs.imageY) * (gs.imageEndX - gs.trxpos.dx)
|
||||
+ (gs.imageX - gs.trxpos.dx);
|
||||
(gs.imageEnd.y - gs.image.y) * (gs.imageEnd.x - gs.trxpos.dx)
|
||||
+ (gs.image.x - gs.trxpos.dx);
|
||||
|
||||
if ((testwidth <= data.widthlimit) && (testwidth >= -data.widthlimit))
|
||||
{
|
||||
/* don't transfer */
|
||||
/*ZZLog::Debug_Log("Bad texture %s: %d %d %d", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/
|
||||
/*ZZLog::Debug_Log("Bad texture %s: %d %d %d", #psm, gs.trxpos.dx, gs.imageEnd.x, nQWordSize);*/
|
||||
//ZZLog::Error_Log("Bad texture: testwidth = %d; data.widthlimit = %d", testwidth, data.widthlimit);
|
||||
gs.imageTransfer = -1;
|
||||
gs.transferring = false;
|
||||
}
|
||||
|
||||
bCanAlign = false;
|
||||
}
|
||||
|
||||
/* first align on block boundary */
|
||||
if (MOD_POW2(gs.imageY, data.blockheight) || !bCanAlign)
|
||||
if (MOD_POW2(gs.image.y, data.blockheight) || !bCanAlign)
|
||||
{
|
||||
u32 transwidth;
|
||||
|
||||
if (!bCanAlign)
|
||||
endY = gs.imageEndY; /* transfer the whole image */
|
||||
endY = gs.imageEnd.y; /* transfer the whole image */
|
||||
else
|
||||
assert(endY < gs.imageEndY); /* part of alignment condition */
|
||||
assert(endY < gs.imageEnd.y); /* part of alignment condition */
|
||||
|
||||
if (((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEndX - gs.imageX) % data.widthlimit))
|
||||
if (((gs.imageEnd.x - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEnd.x - gs.image.x) % data.widthlimit))
|
||||
{
|
||||
/* transmit with a width of 1 */
|
||||
transwidth = (1 + (DSTPSM == PSMT4));
|
||||
transwidth = (1 + (gs.dstbuf.psm == PSMT4));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -98,7 +100,7 @@ static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFun
|
|||
|
||||
if (pbuf == NULL) return NULL;
|
||||
|
||||
if (nSize == 0 || tempY == gs.imageEndY) return NULL;
|
||||
if (nSize == 0 || tempY == gs.imageEnd.y) return NULL;
|
||||
}
|
||||
|
||||
return pbuf;
|
||||
|
@ -112,14 +114,14 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
|
|||
_SwizzleBlock swizzle;
|
||||
|
||||
/* can align! */
|
||||
pitch = gs.imageEndX - gs.trxpos.dx;
|
||||
pitch = gs.imageEnd.x - gs.trxpos.dx;
|
||||
area = pitch * data.blockheight;
|
||||
fracX = gs.imageEndX - alignedPt.x;
|
||||
fracX = gs.imageEnd.x - alignedPt.x;
|
||||
|
||||
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
|
||||
bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0;
|
||||
|
||||
if (bAligned || ((DSTPSM == PSMCT24) || (DSTPSM == PSMT8H) || (DSTPSM == PSMT4HH) || (DSTPSM == PSMT4HL)))
|
||||
if (bAligned || ((gs.dstbuf.psm == PSMCT24) || (gs.dstbuf.psm == PSMT8H) || (gs.dstbuf.psm == PSMT4HH) || (gs.dstbuf.psm == PSMT4HL)))
|
||||
swizzle = (fun.Swizzle);
|
||||
else
|
||||
swizzle = (fun.Swizzle_u);
|
||||
|
@ -140,7 +142,7 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
|
|||
#endif
|
||||
|
||||
/* transfer the rest */
|
||||
if (alignedPt.x < gs.imageEndX)
|
||||
if (alignedPt.x < gs.imageEnd.x)
|
||||
{
|
||||
pbuf = TransmitHostLocalX<T>(data.psm, fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
|
||||
|
||||
|
@ -161,19 +163,19 @@ static __forceinline const T* TransferAligningToBlocks(TransferData data, Transf
|
|||
|
||||
static __forceinline int FinishTransfer(TransferData data, int nLeftOver)
|
||||
{
|
||||
if (tempY >= gs.imageEndY)
|
||||
if (tempY >= gs.imageEnd.y)
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempY == gs.imageEndY);
|
||||
gs.imageTransfer = -1;
|
||||
assert( gs.transferring == false || tempY == gs.imageEnd.y);
|
||||
gs.transferring = false;
|
||||
/*int start, end;
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageNew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
g_MemTargs.ClearRange(start, end);*/
|
||||
}
|
||||
else
|
||||
{
|
||||
/* update new params */
|
||||
gs.imageY = tempY;
|
||||
gs.imageX = tempX;
|
||||
gs.image.y = tempY;
|
||||
gs.image.x = tempX;
|
||||
}
|
||||
|
||||
return (nSize * TransPitch(2, data.transfersize) + nLeftOver) / 2;
|
||||
|
@ -182,23 +184,23 @@ static __forceinline int FinishTransfer(TransferData data, int nLeftOver)
|
|||
template <class T>
|
||||
static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
assert(gs.imageTransfer == 0);
|
||||
assert(gs.imageTransfer == XFER_HOST_TO_LOCAL);
|
||||
TransferData data = tData[psm];
|
||||
TransferFuncts fun(psm);
|
||||
pstart = g_pbyGSMemory + gs.dstbuf.bp * 256;
|
||||
const T* pbuf = (const T*)pbyMem;
|
||||
const int tp2 = TransPitch(2, data.transfersize);
|
||||
int nLeftOver = (nQWordSize * 4 * 2) % tp2;
|
||||
tempY = gs.imageY;
|
||||
tempX = gs.imageX;
|
||||
tempY = gs.image.y;
|
||||
tempX = gs.image.x;
|
||||
Point alignedPt;
|
||||
|
||||
nSize = (nQWordSize * 4 * 2) / tp2;
|
||||
nSize = min(nSize, gs.imageWnew * gs.imageHnew);
|
||||
nSize = min(nSize, gs.imageNew.w * gs.imageNew.h);
|
||||
|
||||
int endY = ROUND_UPPOW2(gs.imageY, data.blockheight);
|
||||
alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight);
|
||||
alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth);
|
||||
int endY = ROUND_UPPOW2(gs.image.y, data.blockheight);
|
||||
alignedPt.y = ROUND_DOWNPOW2(gs.imageEnd.y, data.blockheight);
|
||||
alignedPt.x = ROUND_DOWNPOW2(gs.imageEnd.x, data.blockwidth);
|
||||
|
||||
pbuf = AlignOnBlockBoundry<T>(data, fun, alignedPt, endY, pbuf);
|
||||
|
||||
|
@ -210,12 +212,12 @@ static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSiz
|
|||
|
||||
if (TransPitch(nSize, data.transfersize) / 4 > 0)
|
||||
{
|
||||
pbuf = TransmitHostLocalY<T>(psm, fun.wp, data.widthlimit, gs.imageEndY, pbuf);
|
||||
pbuf = TransmitHostLocalY<T>(psm, fun.wp, data.widthlimit, gs.imageEnd.y, pbuf);
|
||||
|
||||
if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
|
||||
|
||||
/* sometimes wrong sizes are sent (tekken tag) */
|
||||
assert(gs.imageTransfer == -1 || TransPitch(nSize, data.transfersize) / 4 <= 2);
|
||||
assert(gs.transferring == false || TransPitch(nSize, data.transfersize) / 4 <= 2);
|
||||
}
|
||||
|
||||
return FinishTransfer(data, nLeftOver);
|
||||
|
@ -382,3 +384,5 @@ void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, in
|
|||
m_Blocks[PSMT4] = b;
|
||||
m_Blocks[PSMT4].SetFun(PSMT4);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -32,7 +32,13 @@ static __forceinline int MOD_POW2(int val, int base) { return ((val)&(base - 1))
|
|||
const int BLOCK_TEXWIDTH = 128;
|
||||
const int BLOCK_TEXHEIGHT = 512;
|
||||
|
||||
extern PCSX2_ALIGNED16(u32 tempblock[64]);
|
||||
// PSM is u6 value, so we MUST guarantee, that we don't crush on incorrect psm.
|
||||
#define MAX_PSM 64
|
||||
#define TABLE_WIDTH 8
|
||||
|
||||
#ifndef ZZNORMAL_MEMORY
|
||||
#include "ZZoglMem.h"
|
||||
#endif
|
||||
|
||||
typedef u32(*_getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
typedef u32(*_getPixelAddress_0)(int x, int y, u32 bw);
|
||||
|
@ -54,6 +60,7 @@ extern _SwizzleBlock swizzleBlockUnFun[64];
|
|||
extern _TransferHostLocal TransferHostLocalFun[64];
|
||||
extern _TransferLocalHost TransferLocalHostFun[64];
|
||||
|
||||
|
||||
// Both of the following structs should probably be local class variables or in a namespace,
|
||||
// but this works for the moment.
|
||||
|
||||
|
@ -68,6 +75,9 @@ struct TransferData
|
|||
u32 psm;
|
||||
};
|
||||
|
||||
#ifdef ZZNORMAL_MEMORY
|
||||
extern PCSX2_ALIGNED16(u32 tempblock[64]);
|
||||
|
||||
struct TransferFuncts
|
||||
{
|
||||
_writePixel_0 wp;
|
||||
|
@ -500,6 +510,8 @@ static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw)
|
|||
|
||||
///////////////
|
||||
|
||||
#endif
|
||||
|
||||
extern int TransferHostLocal32(const void* pbyMem, u32 nQWordSize);
|
||||
extern int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize);
|
||||
extern int TransferHostLocal24(const void* pbyMem, u32 nQWordSize);
|
||||
|
|
|
@ -21,15 +21,11 @@
|
|||
#include "Mem.h"
|
||||
#include "Mem_Swizzle.h"
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
// WARNING a sfence instruction must be call after SwizzleBlock sse2 function
|
||||
|
||||
// Current port of the ASM function to intrinsic
|
||||
#define INTRINSIC_PORT_16
|
||||
#define INTRINSIC_PORT_8
|
||||
#define INTRINSIC_PORT_4
|
||||
#ifdef ZEROGS_SSE2
|
||||
static const __aligned16 u32 mask_24b_H[4] = {0xFF000000, 0x0000FFFF, 0xFF000000, 0x0000FFFF};
|
||||
static const __aligned16 u32 mask_24b_L[4] = {0x00FFFFFF, 0x00000000, 0x00FFFFFF, 0x00000000};
|
||||
|
@ -501,29 +497,17 @@ __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch)
|
|||
|
||||
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch)
|
||||
{
|
||||
#ifdef INTRINSIC_PORT_16
|
||||
SwizzleBlock16_sse2_I<true>(dst, src, pitch);
|
||||
#else
|
||||
SwizzleBlock16_sse2(dst, src, pitch);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch)
|
||||
{
|
||||
#ifdef INTRINSIC_PORT_8
|
||||
SwizzleBlock8_sse2_I<true>(dst, src, pitch);
|
||||
#else
|
||||
SwizzleBlock8_sse2(dst, src, pitch);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch)
|
||||
{
|
||||
#ifdef INTRINSIC_PORT_4
|
||||
SwizzleBlock4_sse2_I<true>(dst, src, pitch);
|
||||
#else
|
||||
SwizzleBlock4_sse2(dst, src, pitch);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch)
|
||||
|
@ -533,29 +517,17 @@ __forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch)
|
|||
|
||||
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch)
|
||||
{
|
||||
#ifdef INTRINSIC_PORT_16
|
||||
SwizzleBlock16_sse2_I<false>(dst, src, pitch);
|
||||
#else
|
||||
SwizzleBlock16u_sse2(dst, src, pitch);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch)
|
||||
{
|
||||
#ifdef INTRINSIC_PORT_8
|
||||
SwizzleBlock8_sse2_I<false>(dst, src, pitch);
|
||||
#else
|
||||
SwizzleBlock8u_sse2(dst, src, pitch);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch)
|
||||
{
|
||||
#ifdef INTRINSIC_PORT_4
|
||||
SwizzleBlock4_sse2_I<false>(dst, src, pitch);
|
||||
#else
|
||||
SwizzleBlock4u_sse2(dst, src, pitch);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch)
|
||||
|
|
|
@ -250,6 +250,92 @@ u32 g_pageTable16SZ[64][64];
|
|||
u32 g_pageTable8[64][128];
|
||||
u32 g_pageTable4[128][128];
|
||||
|
||||
|
||||
//maximum PSM is 58, so our arrays have 58 + 1 = 59 elements
|
||||
|
||||
// This table is used for fasr access to memory storage data. Field meaning is following:
|
||||
// 0 -- the number (1 << [psm][0]) is number of pixels per storage format. It's 0 if stored 1 pixel, 1 for 2 pixels (16-bit), 2 for 4 pixels (PSMT8) and 3 for 8 (PSMT4)
|
||||
// 5 -- is 3 - [psm][0]. Just for speed
|
||||
// 3, 4 -- size-1 of pageTable for psm. It used to clump x, y otside boundaries.
|
||||
// 1, 2 -- the number (1 << [psm][1]) and (1 << [psm[2]]) is also size of pageTable. So [psm][3] = (1 << [psm][1]) - 1
|
||||
// Also note, that [psm][1] = 5 + ([psm][0] + 1) / 2, and [psm][2] = 6 + [psm][0] / 2.
|
||||
// 6 -- pixel mask, (1 << [psm][5]) - 1, if be used to word, it leave only bytes for pixel formay
|
||||
// 7 -- starting position of data in word, PSMT8H, 4HL, 4HH are stored data not from the begining.
|
||||
u32 ZZ_DT[MAX_PSM][TABLE_WIDTH] = {
|
||||
{0, 5, 6, 31, 63, 3, 0xffffffff, 0}, // 0 PSMCT32
|
||||
{0, 5, 6, 31, 63, 3, 0x00ffffff, 0}, // 1 PSMCT24
|
||||
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 2 PSMCT16
|
||||
{0, }, // 3
|
||||
{0, }, // 4
|
||||
{0, }, // 5
|
||||
{0, }, // 6
|
||||
{0, }, // 7
|
||||
{0, }, // 8
|
||||
{0, }, // 9
|
||||
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 10 PSMCT16S
|
||||
{0, }, // 11
|
||||
{0, }, // 12
|
||||
{0, }, // 13
|
||||
{0, }, // 14
|
||||
{0, }, // 15
|
||||
{0, }, // 16
|
||||
{0, }, // 17
|
||||
{0, }, // 18
|
||||
{2, 6, 7, 63, 127, 1, 0x000000ff, 0}, // 19 PSMT8
|
||||
{3, 7, 7, 127, 127, 0, 0x0000000f, 0}, // 20 PSMT4
|
||||
{0, }, // 21
|
||||
{0, }, // 22
|
||||
{0, }, // 23
|
||||
{0, }, // 24
|
||||
{0, }, // 25
|
||||
{0, }, // 26
|
||||
{0, 5, 6, 31, 63, 3, 0x000000ff, 24}, // 27 PSMT8H
|
||||
{0, }, // 28
|
||||
{0, }, // 29
|
||||
{0, }, // 30
|
||||
{0, }, // 31
|
||||
{0, }, // 32
|
||||
{0, }, // 33
|
||||
{0, }, // 34
|
||||
{0, }, // 35
|
||||
{0, 5, 6, 31, 63, 3, 0x0000000f, 24}, // 36 PSMT4HL
|
||||
{0, }, // 37
|
||||
{0, }, // 38
|
||||
{0, }, // 39
|
||||
{0, }, // 40
|
||||
{0, }, // 41
|
||||
{0, }, // 42
|
||||
{0, }, // 43
|
||||
{0, 5, 6, 31, 63, 3, 0x0000000f, 28}, // 44 PSMT4HH
|
||||
{0, }, // 45
|
||||
{0, }, // 46
|
||||
{0, }, // 47
|
||||
{0, 5, 6, 31, 63, 3, 0xffffffff, 0}, // 48 PSMCT32Z
|
||||
{0, 5, 6, 31, 63, 3, 0x00ffffff, 0}, // 49 PSMCT24Z
|
||||
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 50 PSMCT16Z
|
||||
{0, }, // 51
|
||||
{0, }, // 52
|
||||
{0, }, // 53
|
||||
{0, }, // 54
|
||||
{0, }, // 55
|
||||
{0, }, // 56
|
||||
{0, }, // 57
|
||||
{1, 6, 6, 63, 63, 2, 0x0000ffff, 0}, // 58 PSMCT16SZ
|
||||
{0, }, // 59
|
||||
{0, }, // 60
|
||||
{0, }, // 61
|
||||
{0, }, // 62
|
||||
{0, }, // 63
|
||||
};
|
||||
|
||||
|
||||
//maxium PSM is 58, so our arrays have 58 + 1 = 59 elements
|
||||
u32** g_pageTable[MAX_PSM] = {NULL,};
|
||||
u32** g_blockTable[MAX_PSM] = {NULL, };
|
||||
u32** g_columnTable[MAX_PSM] = {NULL, };
|
||||
u32 g_pageTable2[MAX_PSM][127][127] = {0, };
|
||||
u32** g_pageTableNew[MAX_PSM] = {NULL,};
|
||||
|
||||
/* PSM reference array
|
||||
{ 32, 24, 16, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, 16S, NULL, NULL, NULL, NULL, NULL,
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
|
||||
#define DSTPSM gs.dstbuf.psm
|
||||
extern int tempX, tempY;
|
||||
extern int pitch, area, fracX;
|
||||
extern int nSize;
|
||||
|
@ -37,13 +36,13 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
|
|||
{
|
||||
assert((nSize % widthlimit) == 0 && widthlimit <= 4);
|
||||
|
||||
if ((gs.imageEndX - gs.trxpos.dx) % widthlimit)
|
||||
if ((gs.imageEnd.x - gs.trxpos.dx) % widthlimit)
|
||||
{
|
||||
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
// ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEnd.x, gs.dstbuf.psm);
|
||||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
|
||||
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += 1, nSize -= 1, buf += 1)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX % 2048, tempY % 2048, buf[0], gs.dstbuf.bw);
|
||||
|
@ -53,7 +52,7 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
|
|||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
|
||||
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += widthlimit)
|
||||
{
|
||||
|
||||
/* write as many pixel at one time as possible */
|
||||
|
@ -77,14 +76,14 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
|
|||
}
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
if (tempX >= gs.imageEnd.x)
|
||||
{
|
||||
assert(tempX == gs.imageEndX);
|
||||
assert(tempX == gs.imageEnd.x);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || nSize*sizeof(T) / 4 == 0);
|
||||
assert(gs.transferring == false || nSize*sizeof(T) / 4 == 0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -96,24 +95,24 @@ static __forceinline const T *TransmitHostLocalY_(_writePixel_0 wp, s32 widthlim
|
|||
template <class T>
|
||||
static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthlimit, int endY, const T *buf)
|
||||
{
|
||||
if (widthlimit != 8 || ((gs.imageEndX - gs.trxpos.dx) % widthlimit))
|
||||
if (widthlimit != 8 || ((gs.imageEnd.x - gs.trxpos.dx) % widthlimit))
|
||||
{
|
||||
//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEndX, DSTPSM);
|
||||
//ZZLog::GS_Log("Bad Transmission! %d %d, psm: %d", gs.trxpos.dx, gs.imageEnd.x, gs.dstbuf.psm);
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
|
||||
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += 1, nSize -= 1, buf += 3)
|
||||
{
|
||||
wp(pstart, tempX % 2048, tempY % 2048, *(u32*)(buf), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
if (tempX >= gs.imageEnd.x)
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
assert(gs.transferring == false || tempX == gs.imageEnd.x);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || nSize == 0);
|
||||
assert(gs.transferring == false || nSize == 0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -124,7 +123,7 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthl
|
|||
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
|
||||
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += widthlimit, nSize -= widthlimit, buf += 3 * widthlimit)
|
||||
{
|
||||
if (nSize < widthlimit) return NULL;
|
||||
|
||||
|
@ -140,9 +139,9 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthl
|
|||
wp(pstart, (tempX + 7) % 2048, tempY % 2048, *(u32*)(buf + 21), gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
if (tempX >= gs.imageEnd.x)
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || tempX == gs.imageEndX);
|
||||
assert(gs.transferring == false || tempX == gs.imageEnd.x);
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
|
@ -155,7 +154,7 @@ static __forceinline const T *TransmitHostLocalY_24(_writePixel_0 wp, s32 widthl
|
|||
nSize = 0;
|
||||
}
|
||||
|
||||
assert(gs.imageTransfer == -1 || nSize == 0);
|
||||
assert(gs.transferring == false || nSize == 0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -171,7 +170,7 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, s32 widthli
|
|||
{
|
||||
for (; tempY < endY; ++tempY)
|
||||
{
|
||||
for (; tempX < gs.imageEndX && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
for (; tempX < gs.imageEnd.x && nSize > 0; tempX += widthlimit, nSize -= widthlimit)
|
||||
{
|
||||
/* write as many pixel at one time as possible */
|
||||
wp(pstart, tempX % 2048, tempY % 2048, *buf&0x0f, gs.dstbuf.bw);
|
||||
|
@ -200,13 +199,13 @@ static __forceinline const T *TransmitHostLocalY_4(_writePixel_0 wp, s32 widthli
|
|||
}
|
||||
}
|
||||
|
||||
if (tempX >= gs.imageEndX)
|
||||
if (tempX >= gs.imageEnd.x)
|
||||
{
|
||||
tempX = gs.trxpos.dx;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(gs.imageTransfer == -1 || (nSize / 32) == 0);
|
||||
assert(gs.transferring == false || (nSize / 32) == 0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -238,7 +237,7 @@ static __forceinline const T *TransmitHostLocalX_(_writePixel_0 wp, u32 widthlim
|
|||
{
|
||||
for (u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf++)
|
||||
for (tempX = startX; tempX < gs.imageEnd.x; tempX++, buf++)
|
||||
{
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0], gs.dstbuf.bw);
|
||||
}
|
||||
|
@ -255,7 +254,7 @@ static __forceinline const T *TransmitHostLocalX_24(_writePixel_0 wp, u32 widthl
|
|||
{
|
||||
for (u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for (tempX = startX; tempX < gs.imageEndX; tempX++, buf += 3)
|
||||
for (tempX = startX; tempX < gs.imageEnd.x; tempX++, buf += 3)
|
||||
{
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, *(u32*)buf, gs.dstbuf.bw);
|
||||
}
|
||||
|
@ -272,7 +271,7 @@ static __forceinline const T *TransmitHostLocalX_4(_writePixel_0 wp, u32 widthli
|
|||
{
|
||||
for (u32 tempi = 0; tempi < blockheight; ++tempi)
|
||||
{
|
||||
for (tempX = startX; tempX < gs.imageEndX; tempX += 2, buf++)
|
||||
for (tempX = startX; tempX < gs.imageEnd.x; tempX += 2, buf++)
|
||||
{
|
||||
wp(pstart, tempX % 2048, (tempY + tempi) % 2048, buf[0]&0x0f, gs.dstbuf.bw);
|
||||
wp(pstart, (tempX + 1) % 2048, (tempY + tempi) % 2048, buf[0] >> 4, gs.dstbuf.bw);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,973 +0,0 @@
|
|||
/* ZZ Open GL graphics plugin
|
||||
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
|
||||
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef NEWREGS_H_INCLUDED
|
||||
#define NEWREGS_H_INCLUDED
|
||||
|
||||
#ifdef USE_OLD_REGS
|
||||
#include "Regs.h"
|
||||
#else
|
||||
|
||||
enum GIF_REG
|
||||
{
|
||||
GIF_REG_PRIM = 0x00,
|
||||
GIF_REG_RGBA = 0x01,
|
||||
GIF_REG_STQ = 0x02,
|
||||
GIF_REG_UV = 0x03,
|
||||
GIF_REG_XYZF2 = 0x04,
|
||||
GIF_REG_XYZ2 = 0x05,
|
||||
GIF_REG_TEX0_1 = 0x06,
|
||||
GIF_REG_TEX0_2 = 0x07,
|
||||
GIF_REG_CLAMP_1 = 0x08,
|
||||
GIF_REG_CLAMP_2 = 0x09,
|
||||
GIF_REG_FOG = 0x0a,
|
||||
GIF_REG_XYZF3 = 0x0c,
|
||||
GIF_REG_XYZ3 = 0x0d,
|
||||
GIF_REG_A_D = 0x0e,
|
||||
GIF_REG_NOP = 0x0f,
|
||||
};
|
||||
|
||||
enum GIF_A_D_REG
|
||||
{
|
||||
GIF_A_D_REG_PRIM = 0x00,
|
||||
GIF_A_D_REG_RGBAQ = 0x01,
|
||||
GIF_A_D_REG_ST = 0x02,
|
||||
GIF_A_D_REG_UV = 0x03,
|
||||
GIF_A_D_REG_XYZF2 = 0x04,
|
||||
GIF_A_D_REG_XYZ2 = 0x05,
|
||||
GIF_A_D_REG_TEX0_1 = 0x06,
|
||||
GIF_A_D_REG_TEX0_2 = 0x07,
|
||||
GIF_A_D_REG_CLAMP_1 = 0x08,
|
||||
GIF_A_D_REG_CLAMP_2 = 0x09,
|
||||
GIF_A_D_REG_FOG = 0x0a,
|
||||
GIF_A_D_REG_XYZF3 = 0x0c,
|
||||
GIF_A_D_REG_XYZ3 = 0x0d,
|
||||
GIF_A_D_REG_NOP = 0x0f,
|
||||
GIF_A_D_REG_TEX1_1 = 0x14,
|
||||
GIF_A_D_REG_TEX1_2 = 0x15,
|
||||
GIF_A_D_REG_TEX2_1 = 0x16,
|
||||
GIF_A_D_REG_TEX2_2 = 0x17,
|
||||
GIF_A_D_REG_XYOFFSET_1 = 0x18,
|
||||
GIF_A_D_REG_XYOFFSET_2 = 0x19,
|
||||
GIF_A_D_REG_PRMODECONT = 0x1a,
|
||||
GIF_A_D_REG_PRMODE = 0x1b,
|
||||
GIF_A_D_REG_TEXCLUT = 0x1c,
|
||||
GIF_A_D_REG_SCANMSK = 0x22,
|
||||
GIF_A_D_REG_MIPTBP1_1 = 0x34,
|
||||
GIF_A_D_REG_MIPTBP1_2 = 0x35,
|
||||
GIF_A_D_REG_MIPTBP2_1 = 0x36,
|
||||
GIF_A_D_REG_MIPTBP2_2 = 0x37,
|
||||
GIF_A_D_REG_TEXA = 0x3b,
|
||||
GIF_A_D_REG_FOGCOL = 0x3d,
|
||||
GIF_A_D_REG_TEXFLUSH = 0x3f,
|
||||
GIF_A_D_REG_SCISSOR_1 = 0x40,
|
||||
GIF_A_D_REG_SCISSOR_2 = 0x41,
|
||||
GIF_A_D_REG_ALPHA_1 = 0x42,
|
||||
GIF_A_D_REG_ALPHA_2 = 0x43,
|
||||
GIF_A_D_REG_DIMX = 0x44,
|
||||
GIF_A_D_REG_DTHE = 0x45,
|
||||
GIF_A_D_REG_COLCLAMP = 0x46,
|
||||
GIF_A_D_REG_TEST_1 = 0x47,
|
||||
GIF_A_D_REG_TEST_2 = 0x48,
|
||||
GIF_A_D_REG_PABE = 0x49,
|
||||
GIF_A_D_REG_FBA_1 = 0x4a,
|
||||
GIF_A_D_REG_FBA_2 = 0x4b,
|
||||
GIF_A_D_REG_FRAME_1 = 0x4c,
|
||||
GIF_A_D_REG_FRAME_2 = 0x4d,
|
||||
GIF_A_D_REG_ZBUF_1 = 0x4e,
|
||||
GIF_A_D_REG_ZBUF_2 = 0x4f,
|
||||
GIF_A_D_REG_BITBLTBUF = 0x50,
|
||||
GIF_A_D_REG_TRXPOS = 0x51,
|
||||
GIF_A_D_REG_TRXREG = 0x52,
|
||||
GIF_A_D_REG_TRXDIR = 0x53,
|
||||
GIF_A_D_REG_HWREG = 0x54,
|
||||
GIF_A_D_REG_SIGNAL = 0x60,
|
||||
GIF_A_D_REG_FINISH = 0x61,
|
||||
GIF_A_D_REG_LABEL = 0x62,
|
||||
};
|
||||
// In case we want to change to/from __fastcall for GIF register handlers:
|
||||
#define __gifCall __fastcall
|
||||
|
||||
typedef void __gifCall FnType_GIFRegHandler(const u32* data);
|
||||
typedef FnType_GIFRegHandler* GIFRegHandler;
|
||||
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerNull;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerRGBA;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerSTQ;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerUV;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZF2;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZ2;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerFOG;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerA_D;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerNOP;
|
||||
|
||||
// These are unimplemented, and fall back on the non-packed versions.
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerPRIM;
|
||||
|
||||
template<u32 i>
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerTEX0;
|
||||
|
||||
template<u32 i>
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerCLAMP;
|
||||
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZF3;
|
||||
extern FnType_GIFRegHandler GIFPackedRegHandlerXYZ3;
|
||||
|
||||
extern FnType_GIFRegHandler GIFRegHandlerNull;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerPRIM;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerRGBAQ;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerST;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerUV;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerXYZF2;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerXYZ2;
|
||||
|
||||
template<u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEX0;
|
||||
|
||||
template<u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerCLAMP;
|
||||
|
||||
extern FnType_GIFRegHandler GIFRegHandlerFOG;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerXYZF3;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerXYZ3;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerNOP;
|
||||
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEX1;
|
||||
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEX2;
|
||||
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerXYOFFSET;
|
||||
|
||||
extern FnType_GIFRegHandler GIFRegHandlerPRMODECONT;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerPRMODE;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEXCLUT;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerSCANMSK;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP1;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerMIPTBP2;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEXA;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerFOGCOL;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEXFLUSH;
|
||||
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerSCISSOR;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerALPHA;
|
||||
|
||||
extern FnType_GIFRegHandler GIFRegHandlerDIMX;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerDTHE;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerCOLCLAMP;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTEST;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerPABE;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerFBA;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerFRAME;
|
||||
template <u32 i>
|
||||
extern FnType_GIFRegHandler GIFRegHandlerZBUF;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerBITBLTBUF;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTRXPOS;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTRXREG;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerTRXDIR;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerHWREG;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerSIGNAL;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerFINISH;
|
||||
extern FnType_GIFRegHandler GIFRegHandlerLABEL;
|
||||
|
||||
// GifReg & GifPackedReg structs from GSdx, slightly modified
|
||||
|
||||
enum GS_ATST
|
||||
{
|
||||
ATST_NEVER = 0,
|
||||
ATST_ALWAYS = 1,
|
||||
ATST_LESS = 2,
|
||||
ATST_LEQUAL = 3,
|
||||
ATST_EQUAL = 4,
|
||||
ATST_GEQUAL = 5,
|
||||
ATST_GREATER = 6,
|
||||
ATST_NOTEQUAL = 7,
|
||||
};
|
||||
|
||||
enum GS_AFAIL
|
||||
{
|
||||
AFAIL_KEEP = 0,
|
||||
AFAIL_FB_ONLY = 1,
|
||||
AFAIL_ZB_ONLY = 2,
|
||||
AFAIL_RGB_ONLY = 3,
|
||||
};
|
||||
|
||||
// GIFReg
|
||||
|
||||
REG64_(GIFReg, ALPHA)
|
||||
u32 A:2;
|
||||
u32 B:2;
|
||||
u32 C:2;
|
||||
u32 D:2;
|
||||
u32 _PAD1:24;
|
||||
u32 FIX:8;
|
||||
u32 _PAD2:24;
|
||||
REG_END2
|
||||
// opaque => output will be Cs/As
|
||||
__forceinline bool IsOpaque() const {return (A == B || C == 2 && FIX == 0) && D == 0 || (A == 0 && B == D && C == 2 && FIX == 0x80);}
|
||||
__forceinline bool IsOpaque(int amin, int amax) const {return (A == B || amax == 0) && D == 0 || A == 0 && B == D && amin == 0x80 && amax == 0x80;}
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, BITBLTBUF)
|
||||
u32 SBP:14;
|
||||
u32 _PAD1:2;
|
||||
u32 SBW:6;
|
||||
u32 _PAD2:2;
|
||||
u32 SPSM:6;
|
||||
u32 _PAD3:2;
|
||||
u32 DBP:14;
|
||||
u32 _PAD4:2;
|
||||
u32 DBW:6;
|
||||
u32 _PAD5:2;
|
||||
u32 DPSM:6;
|
||||
u32 _PAD6:2;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, CLAMP)
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 WMS:2;
|
||||
u32 WMT:2;
|
||||
u32 MINU:10;
|
||||
u32 MAXU:10;
|
||||
u32 _PAD1:8;
|
||||
u32 _PAD2:2;
|
||||
u32 MAXV:10;
|
||||
u32 _PAD3:20;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u64 _PAD4:24;
|
||||
u64 MINV:10;
|
||||
u64 _PAD5:30;
|
||||
};
|
||||
};
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, COLCLAMP)
|
||||
u32 CLAMP:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, DIMX)
|
||||
s32 DM00:3;
|
||||
s32 _PAD00:1;
|
||||
s32 DM01:3;
|
||||
s32 _PAD01:1;
|
||||
s32 DM02:3;
|
||||
s32 _PAD02:1;
|
||||
s32 DM03:3;
|
||||
s32 _PAD03:1;
|
||||
s32 DM10:3;
|
||||
s32 _PAD10:1;
|
||||
s32 DM11:3;
|
||||
s32 _PAD11:1;
|
||||
s32 DM12:3;
|
||||
s32 _PAD12:1;
|
||||
s32 DM13:3;
|
||||
s32 _PAD13:1;
|
||||
s32 DM20:3;
|
||||
s32 _PAD20:1;
|
||||
s32 DM21:3;
|
||||
s32 _PAD21:1;
|
||||
s32 DM22:3;
|
||||
s32 _PAD22:1;
|
||||
s32 DM23:3;
|
||||
s32 _PAD23:1;
|
||||
s32 DM30:3;
|
||||
s32 _PAD30:1;
|
||||
s32 DM31:3;
|
||||
s32 _PAD31:1;
|
||||
s32 DM32:3;
|
||||
s32 _PAD32:1;
|
||||
s32 DM33:3;
|
||||
s32 _PAD33:1;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, DTHE)
|
||||
u32 DTHE:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, FBA)
|
||||
u32 FBA:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, FINISH)
|
||||
u32 _PAD1:32;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, FOG)
|
||||
u32 _PAD1:32;
|
||||
u32 _PAD2:24;
|
||||
u32 F:8;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, FOGCOL)
|
||||
u32 FCR:8;
|
||||
u32 FCG:8;
|
||||
u32 FCB:8;
|
||||
u32 _PAD1:8;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, FRAME)
|
||||
u32 FBP:9;
|
||||
u32 _PAD1:7;
|
||||
u32 FBW:6;
|
||||
u32 _PAD2:2;
|
||||
u32 PSM:6;
|
||||
u32 _PAD3:2;
|
||||
u32 FBMSK:32;
|
||||
REG_END2
|
||||
u32 Block() const {return FBP << 5;}
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, HWREG)
|
||||
u32 DATA_LOWER:32;
|
||||
u32 DATA_UPPER:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, LABEL)
|
||||
u32 ID:32;
|
||||
u32 IDMSK:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, MIPTBP1)
|
||||
u64 TBP1:14;
|
||||
u64 TBW1:6;
|
||||
u64 TBP2:14;
|
||||
u64 TBW2:6;
|
||||
u64 TBP3:14;
|
||||
u64 TBW3:6;
|
||||
u64 _PAD:4;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, MIPTBP2)
|
||||
u64 TBP4:14;
|
||||
u64 TBW4:6;
|
||||
u64 TBP5:14;
|
||||
u64 TBW5:6;
|
||||
u64 TBP6:14;
|
||||
u64 TBW6:6;
|
||||
u64 _PAD:4;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, NOP)
|
||||
u32 _PAD1:32;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, PABE)
|
||||
u32 PABE:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, PRIM)
|
||||
u32 PRIM:3;
|
||||
u32 IIP:1;
|
||||
u32 TME:1;
|
||||
u32 FGE:1;
|
||||
u32 ABE:1;
|
||||
u32 AA1:1;
|
||||
u32 FST:1;
|
||||
u32 CTXT:1;
|
||||
u32 FIX:1;
|
||||
u32 _PAD1:21;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, PRMODE)
|
||||
u32 _PRIM:3;
|
||||
u32 IIP:1;
|
||||
u32 TME:1;
|
||||
u32 FGE:1;
|
||||
u32 ABE:1;
|
||||
u32 AA1:1;
|
||||
u32 FST:1;
|
||||
u32 CTXT:1;
|
||||
u32 FIX:1;
|
||||
u32 _PAD2:21;
|
||||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, PRMODECONT)
|
||||
u32 AC:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, RGBAQ)
|
||||
u32 R:8;
|
||||
u32 G:8;
|
||||
u32 B:8;
|
||||
u32 A:8;
|
||||
float Q;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, SCANMSK)
|
||||
u32 MSK:2;
|
||||
u32 _PAD1:30;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, SCISSOR)
|
||||
u32 SCAX0:11;
|
||||
u32 _PAD1:5;
|
||||
u32 SCAX1:11;
|
||||
u32 _PAD2:5;
|
||||
u32 SCAY0:11;
|
||||
u32 _PAD3:5;
|
||||
u32 SCAY1:11;
|
||||
u32 _PAD4:5;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, SIGNAL)
|
||||
u32 ID:32;
|
||||
u32 IDMSK:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, ST)
|
||||
float S;
|
||||
float T;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TEST)
|
||||
u32 ATE:1;
|
||||
u32 ATST:3;
|
||||
u32 AREF:8;
|
||||
u32 AFAIL:2;
|
||||
u32 DATE:1;
|
||||
u32 DATM:1;
|
||||
u32 ZTE:1;
|
||||
u32 ZTST:2;
|
||||
u32 _PAD1:13;
|
||||
u32 _PAD2:32;
|
||||
REG_END2
|
||||
__forceinline bool DoFirstPass() {return !ATE || ATST != ATST_NEVER;} // not all pixels fail automatically
|
||||
__forceinline bool DoSecondPass() {return ATE && ATST != ATST_ALWAYS && AFAIL != AFAIL_KEEP;} // pixels may fail, write fb/z
|
||||
__forceinline bool NoSecondPass() {return ATE && ATST != ATST_ALWAYS && AFAIL == AFAIL_KEEP;} // pixels may fail, no output
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, TEX0)
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 TBP0:14;
|
||||
u32 TBW:6;
|
||||
u32 PSM:6;
|
||||
u32 TW:4;
|
||||
u32 _PAD1:2;
|
||||
u32 _PAD2:2;
|
||||
u32 TCC:1;
|
||||
u32 TFX:2;
|
||||
u32 CBP:14;
|
||||
u32 CPSM:4;
|
||||
u32 CSM:1;
|
||||
u32 CSA:5;
|
||||
u32 CLD:3;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
u64 _PAD3:30;
|
||||
u64 TH:4;
|
||||
u64 _PAD4:30;
|
||||
};
|
||||
};
|
||||
REG_END2
|
||||
__forceinline bool IsRepeating() {return (u32)((u32)1 << TW) > (u32)(TBW << (u32)6);}
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, TEX1)
|
||||
u32 LCM:1;
|
||||
u32 _PAD1:1;
|
||||
u32 MXL:3;
|
||||
u32 MMAG:1;
|
||||
u32 MMIN:3;
|
||||
u32 MTBA:1;
|
||||
u32 _PAD2:9;
|
||||
u32 L:2;
|
||||
u32 _PAD3:11;
|
||||
s32 K:12; // 1:7:4
|
||||
u32 _PAD4:20;
|
||||
REG_END2
|
||||
bool IsMinLinear() const {return (MMIN == 1) || (MMIN & 4);}
|
||||
bool IsMagLinear() const {return MMAG;}
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, TEX2)
|
||||
u32 _PAD1:20;
|
||||
u32 PSM:6;
|
||||
u32 _PAD2:6;
|
||||
u32 _PAD3:5;
|
||||
u32 CBP:14;
|
||||
u32 CPSM:4;
|
||||
u32 CSM:1;
|
||||
u32 CSA:5;
|
||||
u32 CLD:3;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TEXA)
|
||||
u32 TA0:8;
|
||||
u32 _PAD1:7;
|
||||
u32 AEM:1;
|
||||
u32 _PAD2:16;
|
||||
u32 TA1:8;
|
||||
u32 _PAD3:24;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TEXCLUT)
|
||||
u32 CBW:6;
|
||||
u32 COU:6;
|
||||
u32 COV:10;
|
||||
u32 _PAD1:10;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TEXFLUSH)
|
||||
u32 _PAD1:32;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TRXDIR)
|
||||
u32 XDIR:2;
|
||||
u32 _PAD1:30;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TRXPOS)
|
||||
u32 SSAX:11;
|
||||
u32 _PAD1:5;
|
||||
u32 SSAY:11;
|
||||
u32 _PAD2:5;
|
||||
u32 DSAX:11;
|
||||
u32 _PAD3:5;
|
||||
u32 DSAY:11;
|
||||
u32 DIRY:1;
|
||||
u32 DIRX:1;
|
||||
u32 _PAD4:3;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, TRXREG)
|
||||
u32 RRW:12;
|
||||
u32 _PAD1:20;
|
||||
u32 RRH:12;
|
||||
u32 _PAD2:20;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, UV)
|
||||
u32 U:14;
|
||||
u32 _PAD1:2;
|
||||
u32 V:14;
|
||||
u32 _PAD2:2;
|
||||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, XYOFFSET)
|
||||
u32 OFX:16;
|
||||
u32 _PAD1:16;
|
||||
u32 OFY:16;
|
||||
u32 _PAD2:16;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, XYZ)
|
||||
u32 X:16;
|
||||
u32 Y:16;
|
||||
u32 Z:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, XYZF)
|
||||
u32 X:16;
|
||||
u32 Y:16;
|
||||
u32 Z:24;
|
||||
u32 F:8;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, ZBUF)
|
||||
u32 ZBP:9;
|
||||
u32 _PAD1:15;
|
||||
// u32 PSM:4;
|
||||
// u32 _PAD2:4;
|
||||
u32 PSM:6;
|
||||
u32 _PAD2:2;
|
||||
u32 ZMSK:1;
|
||||
u32 _PAD3:31;
|
||||
REG_END2
|
||||
u32 Block() const {return ZBP << 5;}
|
||||
REG_END2
|
||||
|
||||
REG64_SET(GIFReg)
|
||||
GIFRegALPHA ALPHA;
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
GIFRegCLAMP CLAMP;
|
||||
GIFRegCOLCLAMP COLCLAMP;
|
||||
GIFRegDIMX DIMX;
|
||||
GIFRegDTHE DTHE;
|
||||
GIFRegFBA FBA;
|
||||
GIFRegFINISH FINISH;
|
||||
GIFRegFOG FOG;
|
||||
GIFRegFOGCOL FOGCOL;
|
||||
GIFRegFRAME FRAME;
|
||||
GIFRegHWREG HWREG;
|
||||
GIFRegLABEL LABEL;
|
||||
GIFRegMIPTBP1 MIPTBP1;
|
||||
GIFRegMIPTBP2 MIPTBP2;
|
||||
GIFRegNOP NOP;
|
||||
GIFRegPABE PABE;
|
||||
GIFRegPRIM PRIM;
|
||||
GIFRegPRMODE PRMODE;
|
||||
GIFRegPRMODECONT PRMODECONT;
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
GIFRegSCANMSK SCANMSK;
|
||||
GIFRegSCISSOR SCISSOR;
|
||||
GIFRegSIGNAL SIGNAL;
|
||||
GIFRegST ST;
|
||||
GIFRegTEST TEST;
|
||||
GIFRegTEX0 TEX0;
|
||||
GIFRegTEX1 TEX1;
|
||||
GIFRegTEX2 TEX2;
|
||||
GIFRegTEXA TEXA;
|
||||
GIFRegTEXCLUT TEXCLUT;
|
||||
GIFRegTEXFLUSH TEXFLUSH;
|
||||
GIFRegTRXDIR TRXDIR;
|
||||
GIFRegTRXPOS TRXPOS;
|
||||
GIFRegTRXREG TRXREG;
|
||||
GIFRegUV UV;
|
||||
GIFRegXYOFFSET XYOFFSET;
|
||||
GIFRegXYZ XYZ;
|
||||
GIFRegXYZF XYZF;
|
||||
GIFRegZBUF ZBUF;
|
||||
REG_SET_END
|
||||
|
||||
// GIFPacked
|
||||
|
||||
REG128_(GIFPacked, PRIM)
|
||||
u32 PRIM:11;
|
||||
u32 _PAD1:21;
|
||||
u32 _PAD2:32;
|
||||
u32 _PAD3:32;
|
||||
u32 _PAD4:32;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, RGBA)
|
||||
u32 R:8;
|
||||
u32 _PAD1:24;
|
||||
u32 G:8;
|
||||
u32 _PAD2:24;
|
||||
u32 B:8;
|
||||
u32 _PAD3:24;
|
||||
u32 A:8;
|
||||
u32 _PAD4:24;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, STQ)
|
||||
float S;
|
||||
float T;
|
||||
float Q;
|
||||
u32 _PAD1:32;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, UV)
|
||||
u32 U:14;
|
||||
u32 _PAD1:18;
|
||||
u32 V:14;
|
||||
u32 _PAD2:18;
|
||||
u32 _PAD3:32;
|
||||
u32 _PAD4:32;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, XYZF2)
|
||||
u32 X:16;
|
||||
u32 _PAD1:16;
|
||||
u32 Y:16;
|
||||
u32 _PAD2:16;
|
||||
u32 _PAD3:4;
|
||||
u32 Z:24;
|
||||
u32 _PAD4:4;
|
||||
u32 _PAD5:4;
|
||||
u32 F:8;
|
||||
u32 _PAD6:3;
|
||||
u32 ADC:1;
|
||||
u32 _PAD7:16;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, XYZ2)
|
||||
u32 X:16;
|
||||
u32 _PAD1:16;
|
||||
u32 Y:16;
|
||||
u32 _PAD2:16;
|
||||
u32 Z:32;
|
||||
u32 _PAD3:15;
|
||||
u32 ADC:1;
|
||||
u32 _PAD4:16;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, FOG)
|
||||
u32 _PAD1:32;
|
||||
u32 _PAD2:32;
|
||||
u32 _PAD3:32;
|
||||
u32 _PAD4:4;
|
||||
u32 F:8;
|
||||
u32 _PAD5:20;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, A_D)
|
||||
u64 DATA:64;
|
||||
u32 ADDR:8; // enum GIF_A_D_REG
|
||||
u32 _PAD1:24;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG128_(GIFPacked, NOP)
|
||||
u32 _PAD1:32;
|
||||
u32 _PAD2:32;
|
||||
u32 _PAD3:32;
|
||||
u32 _PAD4:32;
|
||||
REG_END
|
||||
|
||||
REG128_SET(GIFPackedReg)
|
||||
GIFReg r;
|
||||
GIFPackedPRIM PRIM;
|
||||
GIFPackedRGBA RGBA;
|
||||
GIFPackedSTQ STQ;
|
||||
GIFPackedUV UV;
|
||||
GIFPackedXYZF2 XYZF2;
|
||||
GIFPackedXYZ2 XYZ2;
|
||||
GIFPackedFOG FOG;
|
||||
GIFPackedA_D A_D;
|
||||
GIFPackedNOP NOP;
|
||||
REG_SET_END
|
||||
|
||||
REG64_(GSReg, BGCOLOR)
|
||||
u32 R:8;
|
||||
u32 G:8;
|
||||
u32 B:8;
|
||||
u32 _PAD1:8;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, BUSDIR)
|
||||
u32 DIR:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, CSR)
|
||||
u32 SIGNAL:1;
|
||||
u32 FINISH:1;
|
||||
u32 HSINT:1;
|
||||
u32 VSINT:1;
|
||||
u32 EDWINT:1;
|
||||
u32 ZERO1:1;
|
||||
u32 ZERO2:1;
|
||||
u32 _PAD1:1;
|
||||
u32 FLUSH:1;
|
||||
u32 RESET:1;
|
||||
u32 _PAD2:2;
|
||||
u32 NFIELD:1;
|
||||
u32 FIELD:1;
|
||||
u32 FIFO:2;
|
||||
u32 REV:8;
|
||||
u32 ID:8;
|
||||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, DISPFB) // (-1/2)
|
||||
u32 FBP:9;
|
||||
u32 FBW:6;
|
||||
u32 PSM:5;
|
||||
u32 _PAD:12;
|
||||
u32 DBX:11;
|
||||
u32 DBY:11;
|
||||
u32 _PAD2:10;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, DISPLAY) // (-1/2)
|
||||
u32 DX:12;
|
||||
u32 DY:11;
|
||||
u32 MAGH:4;
|
||||
u32 MAGV:2;
|
||||
u32 _PAD:3;
|
||||
u32 DW:12;
|
||||
u32 DH:11;
|
||||
u32 _PAD2:9;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, EXTBUF)
|
||||
u32 EXBP:14;
|
||||
u32 EXBW:6;
|
||||
u32 FBIN:2;
|
||||
u32 WFFMD:1;
|
||||
u32 EMODA:2;
|
||||
u32 EMODC:2;
|
||||
u32 _PAD1:5;
|
||||
u32 WDX:11;
|
||||
u32 WDY:11;
|
||||
u32 _PAD2:10;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, EXTDATA)
|
||||
u32 SX:12;
|
||||
u32 SY:11;
|
||||
u32 SMPH:4;
|
||||
u32 SMPV:2;
|
||||
u32 _PAD1:3;
|
||||
u32 WW:12;
|
||||
u32 WH:11;
|
||||
u32 _PAD2:9;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, EXTWRITE)
|
||||
u32 WRITE;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, IMR)
|
||||
u32 _PAD1:8;
|
||||
u32 SIGMSK:1;
|
||||
u32 FINISHMSK:1;
|
||||
u32 HSMSK:1;
|
||||
u32 VSMSK:1;
|
||||
u32 EDWMSK:1;
|
||||
u32 _PAD2:19;
|
||||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, PMODE)
|
||||
u32 EN1:1;
|
||||
u32 EN2:1;
|
||||
u32 CRTMD:3;
|
||||
u32 MMOD:1;
|
||||
u32 AMOD:1;
|
||||
u32 SLBG:1;
|
||||
u32 ALP:8;
|
||||
u32 _PAD:16;
|
||||
u32 _PAD1:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, SIGLBLID)
|
||||
u32 SIGID:32;
|
||||
u32 LBLID:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, SMODE1)
|
||||
u32 RC:3;
|
||||
u32 LC:7;
|
||||
u32 T1248:2;
|
||||
u32 SLCK:1;
|
||||
u32 CMOD:2;
|
||||
u32 EX:1;
|
||||
u32 PRST:1;
|
||||
u32 SINT:1;
|
||||
u32 XPCK:1;
|
||||
u32 PCK2:2;
|
||||
u32 SPML:4;
|
||||
u32 GCONT:1;
|
||||
u32 PHS:1;
|
||||
u32 PVS:1;
|
||||
u32 PEHS:1;
|
||||
u32 PEVS:1;
|
||||
u32 CLKSEL:2;
|
||||
u32 NVCK:1;
|
||||
u32 SLCK2:1;
|
||||
u32 VCKSEL:2;
|
||||
u32 VHP:1;
|
||||
u32 _PAD1:27;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, SMODE2)
|
||||
u32 INT:1;
|
||||
u32 FFMD:1;
|
||||
u32 DPMS:2;
|
||||
u32 _PAD2:28;
|
||||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
REG64_(GSReg, SIGBLID)
|
||||
u32 SIGID;
|
||||
u32 LBLID;
|
||||
REG_END
|
||||
|
||||
#define PMODE ((GSRegPMODE*)(g_pBasePS2Mem+0x0000))
|
||||
#define SMODE1 ((GSRegSMODE1*)(g_pBasePS2Mem+0x0010))
|
||||
#define SMODE2 ((GSRegSMODE2*)(g_pBasePS2Mem+0x0020))
|
||||
// SRFSH
|
||||
#define SYNCH1 ((GSRegSYNCH1*)(g_pBasePS2Mem+0x0040))
|
||||
#define SYNCH2 ((GSRegSYNCH2*)(g_pBasePS2Mem+0x0050))
|
||||
#define SYNCV ((GSRegSYNCV*)(g_pBasePS2Mem+0x0060))
|
||||
#define DISPFB1 ((GSRegDISPFB*)(g_pBasePS2Mem+0x0070))
|
||||
#define DISPLAY1 ((GSRegDISPLAY*)(g_pBasePS2Mem+0x0080))
|
||||
#define DISPFB2 ((GSRegDISPFB*)(g_pBasePS2Mem+0x0090))
|
||||
#define DISPLAY2 ((GSRegDISPLAY*)(g_pBasePS2Mem+0x00a0))
|
||||
#define EXTBUF ((GSRegEXTBUF*)(g_pBasePS2Mem+0x00b0))
|
||||
#define EXTDATA ((GSRegEXTDATA*)(g_pBasePS2Mem+0x00c0))
|
||||
#define EXTWRITE ((GSRegEXTWRITE*)(g_pBasePS2Mem+0x00d0))
|
||||
#define BGCOLOR ((GSRegBGCOLOR*)(g_pBasePS2Mem+0x00e0))
|
||||
#define CSR ((GSRegCSR*)(g_pBasePS2Mem+0x1000))
|
||||
#define IMR ((GSRegIMR*)(g_pBasePS2Mem+0x1010))
|
||||
#define BUSDIR ((GSRegBUSDIR*)(g_pBasePS2Mem+0x1040))
|
||||
#define SIGLBLID ((GSRegSIGBLID*)(g_pBasePS2Mem+0x1080))
|
||||
|
||||
//
|
||||
// sps2tags.h
|
||||
//
|
||||
#define GET_GIF_REG(tag, reg) \
|
||||
(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
|
||||
|
||||
#define GET_GSFPS (((SMODE1->CMOD&1) ? 50 : 60) / (SMODE2->INT ? 1 : 2))
|
||||
|
||||
extern void WriteTempRegs();
|
||||
extern void SetFrameSkip(bool skip);
|
||||
extern void ResetRegs();
|
||||
|
||||
extern void SetTexFlush();
|
||||
extern void SetFogColor(u32 fog);
|
||||
extern void SetFogColor(GIFRegFOGCOL* fog);
|
||||
extern bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will change after this tex0 op
|
||||
|
||||
// flush current vertices, call before setting new registers (the main render method)
|
||||
void Flush(int context);
|
||||
void FlushBoth();
|
||||
|
||||
// called on a primitive switch
|
||||
void Prim();
|
||||
|
||||
#endif
|
||||
|
||||
#endif // NEWREGS_H_INCLUDED
|
|
@ -26,7 +26,6 @@
|
|||
#include "ZZoglVB.h"
|
||||
#include "ZZoglDrawing.h"
|
||||
|
||||
#ifdef USE_OLD_REGS
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244)
|
||||
|
@ -157,6 +156,9 @@ void __gifCall GIFPackedRegHandlerFOG(const u32* data)
|
|||
void __gifCall GIFPackedRegHandlerA_D(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// GIFPackedA_D* r = (GIFPackedA_D*)(data);
|
||||
//
|
||||
// g_GIFRegHandlers[r->ADDR](data);
|
||||
|
||||
if ((data[2] & 0xff) < 100)
|
||||
g_GIFRegHandlers[data[2] & 0xff](data);
|
||||
|
@ -188,21 +190,20 @@ void __gifCall GIFRegHandlerNull(const u32* data)
|
|||
void __gifCall GIFRegHandlerPRIM(const u32 *data)
|
||||
{
|
||||
FUNCLOG
|
||||
GIFRegPRIM* r = (GIFRegPRIM*)(data);
|
||||
|
||||
//if (data[0] & ~0x3ff)
|
||||
//{
|
||||
//ZZLog::Warn_Log("Warning: unknown bits in prim %8.8lx_%8.8lx", data[1], data[0]);
|
||||
//}
|
||||
|
||||
|
||||
gs.primC = 0;
|
||||
u16 prim_type = (data[0]) & 0x7;
|
||||
prim->prim = prim_type;
|
||||
gs._prim[0].prim = prim_type;
|
||||
gs._prim[1].prim = prim_type;
|
||||
gs._prim[1]._val = (data[0] >> 3) & 0xff;
|
||||
prim->prim = r->PRIM;
|
||||
gs._prim[0].prim = r->PRIM;
|
||||
gs._prim[1].prim = r->PRIM;
|
||||
gs._prim[1]._val = (data[0] >> 3) & 0xff; // Setting the next 8 flags after prim at once.
|
||||
|
||||
gs.new_tri_fan = !(prim_type ^ PRIM_TRIANGLE_FAN);
|
||||
gs.new_tri_fan = !(r->PRIM ^ PRIM_TRIANGLE_FAN);
|
||||
ZZKick->DirtyValidPrevPrim();
|
||||
|
||||
Prim();
|
||||
|
@ -211,6 +212,10 @@ void __gifCall GIFRegHandlerPRIM(const u32 *data)
|
|||
void __gifCall GIFRegHandlerRGBAQ(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// GIFRegRGBAQ* r = (GIFRegRGBAQ*)(data);
|
||||
// gs.rgba = (r->R | (r->G << 8) | (r->B << 16) | (r->A << 24));
|
||||
// gs.vertexregs.rgba = gs.rgba;
|
||||
// gs.vertexregs.q = r->Q;
|
||||
gs.rgba = data[0];
|
||||
gs.vertexregs.rgba = data[0];
|
||||
*(u32*)&gs.vertexregs.q = data[1];
|
||||
|
@ -219,6 +224,9 @@ void __gifCall GIFRegHandlerRGBAQ(const u32* data)
|
|||
void __gifCall GIFRegHandlerST(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// GIFRegST* r = (GIFRegST*)(data);
|
||||
// gs.vertexregs.s = r->S;
|
||||
// gs.vertexregs.t = r->T;
|
||||
*(u32*)&gs.vertexregs.s = data[0] & 0xffffff00;
|
||||
*(u32*)&gs.vertexregs.t = data[1] & 0xffffff00;
|
||||
//*(u32*)&gs.q = data[2];
|
||||
|
@ -445,7 +453,10 @@ void __gifCall GIFRegHandlerXYOFFSET(const u32* data)
|
|||
void __gifCall GIFRegHandlerPRMODECONT(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
gs.prac = data[0] & 0x1;
|
||||
// Turns all the text into colored blocks on the initial Mana Khemia dialog if not run.
|
||||
GIFRegPRMODECONT* r = (GIFRegPRMODECONT*)(data);
|
||||
// gs.prac = data[0] & 0x1;
|
||||
gs.prac = r->AC;
|
||||
prim = &gs._prim[gs.prac];
|
||||
|
||||
Prim();
|
||||
|
@ -468,6 +479,7 @@ void __gifCall GIFRegHandlerTEXCLUT(const u32* data)
|
|||
vb[0].FlushTexData();
|
||||
vb[1].FlushTexData();
|
||||
|
||||
//Fixme
|
||||
gs.clut.cbw = r->CBW << 6;
|
||||
gs.clut.cou = r->COU << 4;
|
||||
gs.clut.cov = r->COV;
|
||||
|
@ -477,9 +489,6 @@ void __gifCall GIFRegHandlerSCANMSK(const u32* data)
|
|||
{
|
||||
FUNCLOG
|
||||
GIFRegSCANMSK* r = (GIFRegSCANMSK*)(data);
|
||||
// FlushBoth();
|
||||
// ResolveC(&vb[0]);
|
||||
// ResolveZ(&vb[0]);
|
||||
|
||||
gs.smask = r->MSK;
|
||||
REG_LOG("Scanmsk == %d", gs.smask);
|
||||
|
@ -534,23 +543,20 @@ void __gifCall GIFRegHandlerMIPTBP2(const u32* data)
|
|||
void __gifCall GIFRegHandlerTEXA(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
texaInfo newinfo;
|
||||
newinfo.aem = (data[0] >> 15) & 0x1;
|
||||
newinfo.ta[0] = data[0] & 0xff;
|
||||
newinfo.ta[1] = data[1] & 0xff;
|
||||
// Background of initial Mana Khemia dialog.
|
||||
GIFRegTEXA* r = (GIFRegTEXA*)(data);
|
||||
|
||||
if (*(u32*)&newinfo != *(u32*)&gs.texa)
|
||||
if ((r->AEM != gs.texa.aem) || (r->TA0 != gs.texa.ta[0]) || (r->TA1 != gs.texa.ta[1]))
|
||||
{
|
||||
FlushBoth();
|
||||
|
||||
*(u32*)&gs.texa = *(u32*) & newinfo;
|
||||
|
||||
gs.texa.fta[0] = newinfo.ta[0] / 255.0f;
|
||||
gs.texa.fta[1] = newinfo.ta[1] / 255.0f;
|
||||
|
||||
vb[0].bTexConstsSync = false;
|
||||
vb[1].bTexConstsSync = false;
|
||||
}
|
||||
|
||||
gs.texa.aem = r->AEM;
|
||||
gs.texa.ta[0] = r->TA0;
|
||||
gs.texa.ta[1] = r->TA1;
|
||||
}
|
||||
|
||||
void __gifCall GIFRegHandlerFOGCOL(const u32* data)
|
||||
|
@ -564,6 +570,7 @@ void __gifCall GIFRegHandlerFOGCOL(const u32* data)
|
|||
void __gifCall GIFRegHandlerTEXFLUSH(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// Not actually handled by GSDX.
|
||||
SetTexFlush();
|
||||
}
|
||||
|
||||
|
@ -597,6 +604,12 @@ void __gifCall GIFRegHandlerALPHA(const u32* data)
|
|||
{
|
||||
FUNCLOG
|
||||
alphaInfo newalpha;
|
||||
// newalpha.a = r->A;
|
||||
// newalpha.b = r->B;
|
||||
// newalpha.c = r->C;
|
||||
// newalpha.d = r->D;
|
||||
// newalpha.fix = r->FIX;
|
||||
|
||||
newalpha.abcd = *(u8*)data;
|
||||
newalpha.fix = *(u8*)(data + 1);
|
||||
|
||||
|
@ -610,9 +623,11 @@ void __gifCall GIFRegHandlerALPHA(const u32* data)
|
|||
if (newalpha.d == 3) newalpha.d = 0;
|
||||
|
||||
*(u16*)&vb[ctxt].alpha = *(u16*) & newalpha;
|
||||
// vb[i].alpha = newalpha;
|
||||
}
|
||||
}
|
||||
|
||||
// DIMX & DTHE are both for dithering, and not currently implemented.
|
||||
void __gifCall GIFRegHandlerDIMX(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
|
@ -647,14 +662,18 @@ template <u32 ctxt>
|
|||
void __gifCall GIFRegHandlerTEST(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// GIFRegTEST* r = (GIFRegTEST*)(data);
|
||||
|
||||
pixTest* test = &vb[ctxt].test;
|
||||
|
||||
if ((*(u32*)test & 0x0007ffff) == (data[0] & 0x0007ffff)) return;
|
||||
|
||||
//if (test->_val != r->ai32[0])
|
||||
if ((*(u32*)test & 0x0007ffff) != (data[0] & 0x0007ffff))
|
||||
{
|
||||
Flush(ctxt);
|
||||
|
||||
*(u32*)test = data[0];
|
||||
//test->_val = r->ai32[0];
|
||||
}
|
||||
|
||||
// test.ate = (data[0] ) & 0x1;
|
||||
// test.atst = (data[0] >> 1) & 0x7;
|
||||
|
@ -688,6 +707,48 @@ void __gifCall GIFRegHandlerFBA(const u32* data)
|
|||
vb[ctxt].fba.fba = r->FBA;
|
||||
}
|
||||
|
||||
/*
|
||||
template<u32 i>
|
||||
void __gifCall GIFRegHandlerFRAME(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// Affects opening dialogs, movie, and menu on Mana Khemia.
|
||||
|
||||
GIFRegFRAME* r = (GIFRegFRAME*)(data);
|
||||
frameInfo& gsfb = vb[i].gsfb;
|
||||
|
||||
int fbw = r->FBW * 64;
|
||||
int fbp = r->FBP * 32;
|
||||
int fbh = 0;
|
||||
|
||||
if (gs.dthe != 0)
|
||||
{
|
||||
// Dither here.
|
||||
//ZZLog::Error_Log("frameWrite: Dither!");
|
||||
}
|
||||
|
||||
if ((gsfb.fbp == fbp) &&
|
||||
(gsfb.fbw == fbw) &&
|
||||
(gsfb.psm == r->PSM) &&
|
||||
(gsfb.fbm == ZZOglGet_fbm_FrameBitsFix(data[0], data[1])))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
FlushBoth();
|
||||
if (r->FBW > 0) fbh = ZZOgl_fbh_Calc(r->FBP, r->FBW, r->PSM);
|
||||
|
||||
gsfb.fbp = fbp;
|
||||
gsfb.fbw = fbw;
|
||||
gsfb.psm = r->PSM;
|
||||
gsfb.fbh = fbh;
|
||||
gsfb.fbm = ZZOglGet_fbm_FrameBitsFix(data[0], data[1]);
|
||||
|
||||
|
||||
vb[i].bNeedFrameCheck = 1;
|
||||
ZZLog::Greg_Log("FRAME_%d", i);
|
||||
}*/
|
||||
|
||||
template <u32 ctxt>
|
||||
void __gifCall GIFRegHandlerFRAME(const u32* data)
|
||||
{
|
||||
|
@ -715,6 +776,41 @@ void __gifCall GIFRegHandlerFRAME(const u32* data)
|
|||
vb[ctxt].bNeedFrameCheck = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
template <u32 i>
|
||||
void __gifCall GIFRegHandlerZBUF(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// I'll wait a bit on this one.
|
||||
GIFRegZBUF* r = (GIFRegZBUF*)(data);
|
||||
ZZLog::Greg_Log("ZBUF_1");
|
||||
|
||||
zbufInfo& zbuf = vb[i].zbuf;
|
||||
int psm = (0x30 | r->PSM);
|
||||
int zbp = r->ZBP * 32;
|
||||
|
||||
if (zbuf.zbp == zbp &&
|
||||
zbuf.psm == psm &&
|
||||
zbuf.zmsk == r->ZMSK)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// error detection
|
||||
if (m_Blocks[psm].bpp == 0) return;
|
||||
|
||||
FlushBoth();
|
||||
|
||||
zbuf.zbp = zbp;
|
||||
zbuf.psm = psm;
|
||||
zbuf.zmsk = r->ZMSK;
|
||||
|
||||
vb[i].zprimmask = 0xffffffff;
|
||||
|
||||
if (zbuf.psm > 0x31) vb[i].zprimmask = 0xffff;
|
||||
|
||||
vb[i].bNeedZCheck = 1;
|
||||
}*/
|
||||
template <u32 ctxt>
|
||||
void __gifCall GIFRegHandlerZBUF(const u32* data)
|
||||
{
|
||||
|
@ -758,6 +854,17 @@ void __gifCall GIFRegHandlerBITBLTBUF(const u32* data)
|
|||
gs.dstbufnew.psm = r->DPSM;
|
||||
|
||||
if (gs.dstbufnew.bw == 0) gs.dstbufnew.bw = 64;
|
||||
// GSdx does this:
|
||||
|
||||
/*if((gs.srcbufnew.bw & 1) && (gs.srcbufnew.psm == PSM_PSMT8 || gs.srcbufnew.psm == PSM_PSMT4))
|
||||
{
|
||||
gs.srcbufnew.bw &= ~1;
|
||||
}
|
||||
|
||||
if((gs.dstbufnew.bw & 1) && (gs.dstbufnew.psm == PSM_PSMT8 || gs.dstbufnew.psm == PSM_PSMT4))
|
||||
{
|
||||
gs.dstbufnew.bw &= ~1; // namcoXcapcom: 5, 11, refered to as 4, 10 in TEX0.TBW later
|
||||
}*/
|
||||
}
|
||||
|
||||
void __gifCall GIFRegHandlerTRXPOS(const u32* data)
|
||||
|
@ -777,53 +884,44 @@ void __gifCall GIFRegHandlerTRXREG(const u32* data)
|
|||
{
|
||||
FUNCLOG
|
||||
GIFRegTRXREG* r = (GIFRegTRXREG*)(data);
|
||||
gs.imageWtemp = r->RRW;
|
||||
gs.imageHtemp = r->RRH;
|
||||
gs.imageTemp.w = r->RRW;
|
||||
gs.imageTemp.h = r->RRH;
|
||||
}
|
||||
|
||||
void __gifCall GIFRegHandlerTRXDIR(const u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
// terminate any previous transfers
|
||||
|
||||
switch (gs.imageTransfer)
|
||||
{
|
||||
|
||||
case 0: // host->loc
|
||||
TerminateHostLocal();
|
||||
break;
|
||||
|
||||
case 1: // loc->host
|
||||
TerminateLocalHost();
|
||||
break;
|
||||
}
|
||||
GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data);
|
||||
|
||||
gs.srcbuf = gs.srcbufnew;
|
||||
|
||||
gs.dstbuf = gs.dstbufnew;
|
||||
gs.trxpos = gs.trxposnew;
|
||||
gs.imageTransfer = data[0] & 0x3;
|
||||
gs.imageWnew = gs.imageWtemp;
|
||||
gs.imageHnew = gs.imageHtemp;
|
||||
|
||||
if (gs.imageWnew > 0 && gs.imageHnew > 0)
|
||||
gs.imageNew.w = gs.imageTemp.w;
|
||||
gs.imageNew.h = gs.imageTemp.h;
|
||||
|
||||
gs.trxpos = gs.trxposnew;
|
||||
gs.imageTransfer = r->XDIR;
|
||||
gs.transferring = true;
|
||||
|
||||
if (gs.imageNew.w > 0 && gs.imageNew.h > 0)
|
||||
{
|
||||
switch (gs.imageTransfer)
|
||||
{
|
||||
case 0: // host->loc
|
||||
case XFER_HOST_TO_LOCAL: // host->loc
|
||||
InitTransferHostLocal();
|
||||
break;
|
||||
|
||||
case 1: // loc->host
|
||||
case XFER_LOCAL_TO_HOST: // loc->host
|
||||
InitTransferLocalHost();
|
||||
break;
|
||||
|
||||
case 2:
|
||||
case XFER_LOCAL_TO_LOCAL:
|
||||
TransferLocalLocal();
|
||||
break;
|
||||
|
||||
case 3:
|
||||
gs.imageTransfer = -1;
|
||||
case XFER_DEACTIVATED:
|
||||
ZZLog::WriteLn("Image Transfer = 3?");
|
||||
gs.transferring = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -833,9 +931,9 @@ void __gifCall GIFRegHandlerTRXDIR(const u32* data)
|
|||
else
|
||||
{
|
||||
#if defined(ZEROGS_DEVBUILD)
|
||||
ZZLog::Warn_Log("Dummy transfer.");
|
||||
//ZZLog::Warn_Log("Dummy transfer.");
|
||||
#endif
|
||||
gs.imageTransfer = -1;
|
||||
gs.transferring = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -843,7 +941,7 @@ void __gifCall GIFRegHandlerHWREG(const u32* data)
|
|||
{
|
||||
FUNCLOG
|
||||
|
||||
if (gs.imageTransfer == 0)
|
||||
if (gs.transferring && gs.imageTransfer == XFER_HOST_TO_LOCAL)
|
||||
{
|
||||
TransferHostLocal(data, 2);
|
||||
}
|
||||
|
@ -866,14 +964,9 @@ void __gifCall GIFRegHandlerSIGNAL(const u32* data)
|
|||
{
|
||||
SIGLBLID->SIGID = (SIGLBLID->SIGID & ~data[1]) | (data[0] & data[1]);
|
||||
|
||||
// if (gs.CSRw & 0x1) CSR->SIGNAL = 1;
|
||||
// if (!IMR->SIGMSK && GSirq)
|
||||
// GSirq();
|
||||
|
||||
if (gs.CSRw & 0x1)
|
||||
{
|
||||
CSR->SIGNAL = 1;
|
||||
//gs.CSRw &= ~1;
|
||||
}
|
||||
|
||||
if (!IMR->SIGMSK && GSirq) GSirq();
|
||||
|
@ -889,17 +982,6 @@ void __gifCall GIFRegHandlerFINISH(const u32* data)
|
|||
if (gs.CSRw & 0x2) CSR->FINISH = 1;
|
||||
|
||||
if (!IMR->FINISHMSK && GSirq) GSirq();
|
||||
|
||||
// if( gs.CSRw & 2 ) {
|
||||
// //gs.CSRw &= ~2;
|
||||
// //CSR->FINISH = 0;
|
||||
//
|
||||
//
|
||||
// }
|
||||
// CSR->FINISH = 1;
|
||||
//
|
||||
// if( !IMR->FINISHMSK && GSirq )
|
||||
// GSirq();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -913,7 +995,6 @@ void __gifCall GIFRegHandlerLABEL(const u32* data)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void SetMultithreaded()
|
||||
{
|
||||
// Some older versions of PCSX2 didn't properly set the irq callback to NULL
|
||||
|
@ -1077,4 +1158,3 @@ void SetFrameSkip(bool skip)
|
|||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,9 +20,6 @@
|
|||
#ifndef __GSREGS_H__
|
||||
#define __GSREGS_H__
|
||||
|
||||
|
||||
#ifdef USE_OLD_REGS
|
||||
|
||||
enum GIF_REG
|
||||
{
|
||||
GIF_REG_PRIM = 0x00,
|
||||
|
@ -193,7 +190,7 @@ enum GS_ATST
|
|||
ATST_EQUAL = 4,
|
||||
ATST_GEQUAL = 5,
|
||||
ATST_GREATER = 6,
|
||||
ATST_NOTEQUAL = 7,
|
||||
ATST_NOTEQUAL = 7
|
||||
};
|
||||
|
||||
enum GS_AFAIL
|
||||
|
@ -201,9 +198,24 @@ enum GS_AFAIL
|
|||
AFAIL_KEEP = 0,
|
||||
AFAIL_FB_ONLY = 1,
|
||||
AFAIL_ZB_ONLY = 2,
|
||||
AFAIL_RGB_ONLY = 3,
|
||||
AFAIL_RGB_ONLY = 3
|
||||
};
|
||||
|
||||
enum GS_TFX
|
||||
{
|
||||
TFX_MODULATE = 0,
|
||||
TFX_DECAL = 1,
|
||||
TFX_HIGHLIGHT = 2,
|
||||
TFX_HIGHLIGHT2 = 3
|
||||
};
|
||||
|
||||
enum GS_CLAMP
|
||||
{
|
||||
CLAMP_REPEAT = 0,
|
||||
CLAMP_CLAMP = 1,
|
||||
CLAMP_REGION_CLAMP = 2,
|
||||
CLAMP_REGION_REPEAT = 3
|
||||
};
|
||||
// GIFReg
|
||||
|
||||
REG64_(GIFReg, ALPHA)
|
||||
|
@ -763,7 +775,8 @@ REG128_SET(GIFPackedReg)
|
|||
GIFPackedNOP NOP;
|
||||
REG_SET_END
|
||||
|
||||
|
||||
// This register stores the background color. Theoretically it'd get blended with the image in some cases, but we don't appear to be
|
||||
// using it. See PMODE->SLBG. GSDx *is* using it.
|
||||
REG64_(GSReg, BGCOLOR)
|
||||
u32 R:8;
|
||||
u32 G:8;
|
||||
|
@ -772,12 +785,15 @@ REG64_(GSReg, BGCOLOR)
|
|||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
// This register switches the direction of Fifo. 0 - Host -> Local; 1 - Local -> Host. Fifo is supposed to be empty at the time.
|
||||
// Unchecked by GSdx or ZZOgl.
|
||||
REG64_(GSReg, BUSDIR)
|
||||
u32 DIR:1;
|
||||
u32 _PAD1:31;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
// Mostly looks handled by pcsx2.
|
||||
REG64_(GSReg, CSR)
|
||||
u32 SIGNAL:1;
|
||||
u32 FINISH:1;
|
||||
|
@ -798,6 +814,12 @@ REG64_(GSReg, CSR)
|
|||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
// Settings for whichever circuit we're using. (Again, see PMODE.)
|
||||
// -- FBP - Frame Buffer Pointer. address / 2048.
|
||||
// -- FBW - Frame Buffer Width. width / 64.
|
||||
// -- PSM - psm, but 5 bit. 0 - PSMCT32; 1 - PSMCT24; 2 - PSMCT16; 10 - PSMCT16S; 18 - PS-GPU24?
|
||||
// -- DBX - Upper left x coords of rectangle.
|
||||
// -- DBY - Upper left y coords of rectangle.
|
||||
REG64_(GSReg, DISPFB) // (-1/2)
|
||||
u32 FBP:9;
|
||||
u32 FBW:6;
|
||||
|
@ -808,6 +830,14 @@ REG64_(GSReg, DISPFB) // (-1/2)
|
|||
u32 _PAD2:10;
|
||||
REG_END
|
||||
|
||||
// Settings for whichever display we're using.
|
||||
// -- DX - X position in the display area.
|
||||
// -- DY - Y position in the display area.
|
||||
// -- MAGH - Horizontal Magnification; x1 - x16.
|
||||
// -- MAGV - Vertical Magnification; x1 - x16.
|
||||
// -- DW - Display Area Width - 1.
|
||||
// -- DH - Display Area Height - 1.
|
||||
|
||||
REG64_(GSReg, DISPLAY) // (-1/2)
|
||||
u32 DX:12;
|
||||
u32 DY:11;
|
||||
|
@ -819,6 +849,16 @@ REG64_(GSReg, DISPLAY) // (-1/2)
|
|||
u32 _PAD2:9;
|
||||
REG_END
|
||||
|
||||
// This register has settings for the frame buffer when writing back. These next three registers are unused in ZZOgl & GSDx.
|
||||
// -- EXBP - Base pointer of the buffer / 64.
|
||||
// -- EXBW - Width of the buffer / 64.
|
||||
// -- FBIN - Whether we use OUT1 or OUT2. 0 - 1; 1 - 2.
|
||||
// -- WFFMD - Interlace Mode; 0 - Field; 1 - Frame.
|
||||
// -- EMODA - When processing an input alpha value; 0 - write it as is; 1 Convert from RGB to luminence value Y. 2 - Same as 1, only /2. 3 - 0.
|
||||
// -- EMODC - When processing an input color value; 0 - write it as is; 1 Convert from RGB to luminence value Y. 2 - Convert to YCbCr. 3 - Write Alpha to RGB.
|
||||
// -- WDX - X coords.
|
||||
// -- WDY - Y coords.
|
||||
|
||||
REG64_(GSReg, EXTBUF)
|
||||
u32 EXBP:14;
|
||||
u32 EXBW:6;
|
||||
|
@ -832,6 +872,14 @@ REG64_(GSReg, EXTBUF)
|
|||
u32 _PAD2:10;
|
||||
REG_END
|
||||
|
||||
// Sets where you read when the write above is performed.
|
||||
// -- SX - X coords.
|
||||
// -- SX - Y coords.
|
||||
// -- SMPH - Horiz Sampling rate.
|
||||
// -- SMPV - Vert Sampling rate.
|
||||
// -- WW - Rect Width - 1
|
||||
// -- WH - Rect Height - 1
|
||||
|
||||
REG64_(GSReg, EXTDATA)
|
||||
u32 SX:12;
|
||||
u32 SY:11;
|
||||
|
@ -843,11 +891,13 @@ REG64_(GSReg, EXTDATA)
|
|||
u32 _PAD2:9;
|
||||
REG_END
|
||||
|
||||
// Starts or stops the aforementioned write.
|
||||
REG64_(GSReg, EXTWRITE)
|
||||
u32 WRITE;
|
||||
u32 _PAD2:32;
|
||||
REG_END
|
||||
|
||||
// Pcsx2 handles this.
|
||||
REG64_(GSReg, IMR)
|
||||
u32 _PAD1:8;
|
||||
u32 SIGMSK:1;
|
||||
|
@ -859,6 +909,16 @@ REG64_(GSReg, IMR)
|
|||
u32 _PAD3:32;
|
||||
REG_END
|
||||
|
||||
// The fields of PMODE are:
|
||||
// -- EN1 - Read Circuit 1; 0 - off, 1 - on.
|
||||
// -- EN2 - Read Circuit 2; 0 - off, 1 - on.
|
||||
// -- CRTMD - Always 1.
|
||||
// -- MMOD - For Alpha blending, the selection is: 0 - The Alpha value of circuit 1, 1 - The ALP register value.
|
||||
// -- AMOD - The OUT1 Alpha value selection: 0 - Read circuit 1, 1 - Read Circuit 2.
|
||||
// -- SLBG - The Alpha blending type: 0 - blended with the output of Read circuit 1, 1 - blended with the background color.
|
||||
// -- ALP - The fixed Alpha value.
|
||||
//
|
||||
|
||||
REG64_(GSReg, PMODE)
|
||||
u32 EN1:1;
|
||||
u32 EN2:1;
|
||||
|
@ -871,11 +931,13 @@ REG64_(GSReg, PMODE)
|
|||
u32 _PAD1:32;
|
||||
REG_END
|
||||
|
||||
// Pcsx2 handles this.
|
||||
REG64_(GSReg, SIGLBLID)
|
||||
u32 SIGID:32;
|
||||
u32 LBLID:32;
|
||||
REG_END
|
||||
|
||||
// Not sure about this one...
|
||||
REG64_(GSReg, SMODE1)
|
||||
u32 RC:3;
|
||||
u32 LC:7;
|
||||
|
@ -901,6 +963,11 @@ REG64_(GSReg, SMODE1)
|
|||
u32 _PAD1:27;
|
||||
REG_END
|
||||
|
||||
// The fields of SMODE2 are:
|
||||
// -- INT - 0 for non-interlaced; 1 for interlaced.
|
||||
// -- FFMD - 0 for field mode (read every other line); 1 for frame mode (read every line)
|
||||
// -- DPMS - VESA DPMS mode setting; 0 - on, 1 - standby, 2 - suspend, 3 - off.
|
||||
//
|
||||
REG64_(GSReg, SMODE2)
|
||||
u32 INT:1;
|
||||
u32 FFMD:1;
|
||||
|
@ -914,6 +981,8 @@ REG64_(GSReg, SIGBLID)
|
|||
u32 LBLID;
|
||||
REG_END
|
||||
|
||||
extern u8* g_pBasePS2Mem;
|
||||
|
||||
#define PMODE ((GSRegPMODE*)(g_pBasePS2Mem+0x0000))
|
||||
#define SMODE1 ((GSRegSMODE1*)(g_pBasePS2Mem+0x0010))
|
||||
#define SMODE2 ((GSRegSMODE2*)(g_pBasePS2Mem+0x0020))
|
||||
|
@ -942,6 +1011,21 @@ REG_END
|
|||
|
||||
#define GET_GSFPS (((SMODE1->CMOD&1) ? 50 : 60) / (SMODE2->INT ? 1 : 2))
|
||||
|
||||
static __forceinline GSRegDISPLAY* Display_Reg(int circuit)
|
||||
{
|
||||
return (circuit) ? DISPLAY2 : DISPLAY1;
|
||||
}
|
||||
|
||||
static __forceinline GSRegDISPFB* Dispfb_Reg(int circuit)
|
||||
{
|
||||
return (circuit) ? DISPFB2 : DISPFB1;
|
||||
}
|
||||
|
||||
static __forceinline bool Circuit_Enabled(int circuit)
|
||||
{
|
||||
return (circuit) ? PMODE->EN2 : PMODE->EN1;
|
||||
}
|
||||
|
||||
extern void WriteTempRegs();
|
||||
extern void SetFrameSkip(bool skip);
|
||||
extern void ResetRegs();
|
||||
|
@ -958,7 +1042,4 @@ void FlushBoth();
|
|||
// called on a primitive switch
|
||||
void Prim();
|
||||
|
||||
#else
|
||||
#include "NewRegs.h"
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -58,10 +58,12 @@
|
|||
#include "CRC.h"
|
||||
#include "ZZLog.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
// need C definitions -- no mangling please!
|
||||
extern "C" u32 CALLBACK PS2EgetLibType(void);
|
||||
extern "C" u32 CALLBACK PS2EgetLibVersion2(u32 type);
|
||||
extern "C" char* CALLBACK PS2EgetLibName(void);
|
||||
#endif
|
||||
|
||||
#include "ZZoglMath.h"
|
||||
#include "Profile.h"
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include "Util.h"
|
||||
|
||||
#if defined(ZEROGS_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
// Local Clut buffer:
|
||||
|
|
|
@ -0,0 +1,314 @@
|
|||
/* ZZ Open GL graphics plugin
|
||||
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
|
||||
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
#include "x86.h"
|
||||
#include "targets.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#include "ZZClut.h"
|
||||
#include "ZZoglVB.h"
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
extern bool g_bUpdateStencil;
|
||||
|
||||
void _Resolve(const void* psrc, int fbp, int fbw, int fbh, int psm, u32 fbm, bool mode);
|
||||
void SetWriteDepth();
|
||||
bool IsWriteDepth();
|
||||
bool IsWriteDestAlphaTest();
|
||||
|
||||
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
|
||||
|
||||
CDepthTarget::CDepthTarget() : CRenderTarget(), pdepth(0), pstencil(0), icount(0) {}
|
||||
|
||||
CDepthTarget::~CDepthTarget()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
Destroy();
|
||||
}
|
||||
|
||||
bool CDepthTarget::Create(const frameInfo& frame)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (!CRenderTarget::Create(frame)) return false;
|
||||
|
||||
GL_REPORT_ERROR();
|
||||
|
||||
glGenRenderbuffersEXT(1, &pdepth);
|
||||
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, pdepth);
|
||||
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH24_STENCIL8_EXT, RW(fbw), RH(fbh));
|
||||
|
||||
if (glGetError() != GL_NO_ERROR)
|
||||
{
|
||||
// try a separate depth and stencil buffer
|
||||
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, pdepth);
|
||||
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT24, RW(fbw), RH(fbh));
|
||||
|
||||
if (g_bUpdateStencil)
|
||||
{
|
||||
glGenRenderbuffersEXT(1, &pstencil);
|
||||
glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, pstencil);
|
||||
glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_STENCIL_INDEX8_EXT, RW(fbw), RH(fbh));
|
||||
|
||||
if (glGetError() != GL_NO_ERROR)
|
||||
{
|
||||
ZZLog::Error_Log("Failed to create depth buffer %dx%d.", RW(fbw), RH(fbh));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pstencil = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pstencil = pdepth;
|
||||
}
|
||||
|
||||
status = TS_NeedUpdate;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CDepthTarget::Destroy()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (status) // In this case Framebuffer extension is off-use and lead to segfault
|
||||
{
|
||||
ResetRenderTarget(1);
|
||||
FB::Attach(GL_DEPTH_ATTACHMENT_EXT);
|
||||
FB::Attach(GL_STENCIL_ATTACHMENT_EXT);
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
if (pstencil != 0)
|
||||
{
|
||||
if (pstencil != pdepth) glDeleteRenderbuffersEXT(1, &pstencil);
|
||||
pstencil = 0;
|
||||
}
|
||||
|
||||
if (pdepth != 0)
|
||||
{
|
||||
glDeleteRenderbuffersEXT(1, &pdepth);
|
||||
pdepth = 0;
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
CRenderTarget::Destroy();
|
||||
}
|
||||
|
||||
|
||||
extern int g_nDepthUsed; // > 0 if depth is used
|
||||
|
||||
void CDepthTarget::Resolve()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (g_nDepthUsed > 0 && conf.mrtdepth && !(status & TS_Virtual) && IsWriteDepth() && !(conf.settings().no_depth_resolve))
|
||||
CRenderTarget::Resolve();
|
||||
else
|
||||
{
|
||||
// flush if necessary
|
||||
FlushIfNecesary(this);
|
||||
|
||||
if (!(status & TS_Virtual)) status |= TS_Resolved;
|
||||
}
|
||||
|
||||
if (!(status&TS_Virtual))
|
||||
{
|
||||
SetWriteDepth();
|
||||
}
|
||||
}
|
||||
|
||||
void CDepthTarget::Resolve(int startrange, int endrange)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (g_nDepthUsed > 0 && conf.mrtdepth && !(status&TS_Virtual) && IsWriteDepth())
|
||||
{
|
||||
CRenderTarget::Resolve(startrange, endrange);
|
||||
}
|
||||
else
|
||||
{
|
||||
// flush if necessary
|
||||
FlushIfNecesary(this) ;
|
||||
|
||||
if (!(status & TS_Virtual))
|
||||
status |= TS_Resolved;
|
||||
}
|
||||
|
||||
if (!(status&TS_Virtual))
|
||||
{
|
||||
SetWriteDepth();
|
||||
}
|
||||
}
|
||||
|
||||
void CDepthTarget::Update(int context, CRenderTarget* prndr)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
assert(!(status & TS_Virtual));
|
||||
|
||||
// align the rect to the nearest page
|
||||
// note that fbp is always aligned on page boundaries
|
||||
tex0Info texframe;
|
||||
texframe.tbp0 = fbp;
|
||||
texframe.tbw = fbw;
|
||||
texframe.tw = fbw;
|
||||
texframe.th = fbh;
|
||||
texframe.psm = psm;
|
||||
// FIXME some field are not initialized...
|
||||
// in particular the clut related one
|
||||
assert(!PSMT_ISCLUT(psm));
|
||||
|
||||
DisableAllgl();
|
||||
|
||||
VB& curvb = vb[context];
|
||||
|
||||
if (curvb.test.zte == 0) return;
|
||||
|
||||
SetShaderCaller("CDepthTarget::Update");
|
||||
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
||||
glDepthMask(!curvb.zbuf.zmsk);
|
||||
|
||||
static const u32 g_dwZCmp[] = { GL_NEVER, GL_ALWAYS, GL_GEQUAL, GL_GREATER };
|
||||
|
||||
glDepthFunc(g_dwZCmp[curvb.test.ztst]);
|
||||
|
||||
// write color and zero out stencil buf, always 0 context!
|
||||
SetTexVariablesInt(0, 0, texframe, false, &ppsBitBltDepth, 1);
|
||||
ZZshGLSetTextureParameter(ppsBitBltDepth.prog, ppsBitBltDepth.sMemory, vb[0].pmemtarg->ptex->tex, "BitBltDepth");
|
||||
|
||||
float4 v = DefaultBitBltPos();
|
||||
|
||||
v = DefaultBitBltTex();
|
||||
|
||||
v.x = 1;
|
||||
v.y = 2;
|
||||
v.z = PSMT_IS16Z(psm) ? 1.0f : 0.0f;
|
||||
v.w = g_filog32;
|
||||
ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sOneColor, v, "g_fOneColor");
|
||||
|
||||
float4 vdepth = g_vdepth;
|
||||
|
||||
if (psm == PSMT24Z)
|
||||
{
|
||||
vdepth.w = 0;
|
||||
}
|
||||
else if (psm != PSMT32Z)
|
||||
{
|
||||
vdepth.z = vdepth.w = 0;
|
||||
}
|
||||
|
||||
assert(ppsBitBltDepth.sBitBltZ != 0);
|
||||
|
||||
ZZshSetParameter4fv(ppsBitBltDepth.prog, ppsBitBltDepth.sBitBltZ, (vdepth*(255.0f / 256.0f)), "g_fBitBltZ");
|
||||
|
||||
assert(pdepth != 0);
|
||||
//GLint w1 = 0;
|
||||
//GLint h1 = 0;
|
||||
|
||||
FB::Attach2D(0, ptex);
|
||||
//glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_WIDTH_EXT, &w1);
|
||||
//glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_HEIGHT_EXT, &h1);
|
||||
SetDepthStencilSurface();
|
||||
|
||||
FB::Attach2D(1);
|
||||
|
||||
GLenum buffer = GL_COLOR_ATTACHMENT0_EXT;
|
||||
|
||||
//ZZLog::Error_Log("CDepthTarget::Update: w1 = 0x%x; h1 = 0x%x", w1, h1);
|
||||
DrawBuffers(&buffer);
|
||||
|
||||
SetViewport();
|
||||
|
||||
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vboRect);
|
||||
|
||||
SET_STREAM();
|
||||
ZZshSetVertexShader(pvsBitBlt.prog);
|
||||
ZZshSetPixelShader(ppsBitBltDepth.prog);
|
||||
|
||||
DrawTriangleArray();
|
||||
|
||||
status = TS_Resolved;
|
||||
|
||||
if (!IsWriteDepth())
|
||||
{
|
||||
ResetRenderTarget(1);
|
||||
}
|
||||
|
||||
if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
|
||||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
|
||||
#ifdef _DEBUG
|
||||
if (g_bSaveZUpdate)
|
||||
{
|
||||
SaveTex(&texframe, 1);
|
||||
SaveTexture("frame1.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void CDepthTarget::SetDepthStencilSurface()
|
||||
{
|
||||
FUNCLOG
|
||||
FB::Attach(GL_DEPTH_ATTACHMENT_EXT, pdepth);
|
||||
|
||||
if (pstencil)
|
||||
{
|
||||
// there's a bug with attaching stencil and depth buffers
|
||||
FB::Attach(GL_STENCIL_ATTACHMENT_EXT, pstencil);
|
||||
|
||||
if (icount++ < 8) // not going to fail if succeeded 4 times
|
||||
{
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
if (FB::State() != GL_FRAMEBUFFER_COMPLETE_EXT)
|
||||
{
|
||||
FB::Attach(GL_STENCIL_ATTACHMENT_EXT);
|
||||
|
||||
if (pstencil != pdepth) glDeleteRenderbuffersEXT(1, &pstencil);
|
||||
|
||||
pstencil = 0;
|
||||
g_bUpdateStencil = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
FB::Attach(GL_STENCIL_ATTACHMENT_EXT);
|
||||
}
|
||||
}
|
||||
|
|
@ -47,8 +47,11 @@ inline void* wglGetProcAddress(const char* x)
|
|||
|
||||
#endif
|
||||
|
||||
#include "Mem.h"
|
||||
|
||||
extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
|
||||
// Defines
|
||||
extern GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT
|
||||
extern GLenum s_rgbeq, s_alphaeq;
|
||||
|
||||
#ifndef GL_DEPTH24_STENCIL8_EXT // allows FBOs to support stencils
|
||||
# define GL_DEPTH_STENCIL_EXT 0x84F9
|
||||
|
@ -57,28 +60,50 @@ extern u32 s_stencilfunc, s_stencilref, s_stencilmask;
|
|||
# define GL_TEXTURE_STENCIL_SIZE_EXT 0x88F1
|
||||
#endif
|
||||
|
||||
#define GL_STENCILFUNC(func, ref, mask) { \
|
||||
s_stencilfunc = func; \
|
||||
s_stencilref = ref; \
|
||||
s_stencilmask = mask; \
|
||||
glStencilFunc(func, ref, mask); \
|
||||
#ifdef _WIN32
|
||||
#define GL_LOADFN(name) { \
|
||||
if( (*(void**)&name = (void*)wglGetProcAddress(#name)) == NULL ) { \
|
||||
ZZLog::Error_Log("Failed to find %s, exiting.", #name); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// let GLEW take care of it
|
||||
#define GL_LOADFN(name)
|
||||
#endif
|
||||
|
||||
static __forceinline void GL_STENCILFUNC(GLenum func, GLint ref, GLuint mask)
|
||||
{
|
||||
s_stencilfunc = func;
|
||||
s_stencilref = ref;
|
||||
s_stencilmask = mask;
|
||||
glStencilFunc(func, ref, mask);
|
||||
}
|
||||
|
||||
#define GL_STENCILFUNC_SET() glStencilFunc(s_stencilfunc, s_stencilref, s_stencilmask)
|
||||
|
||||
static __forceinline void GL_STENCILFUNC_SET()
|
||||
{
|
||||
glStencilFunc(s_stencilfunc, s_stencilref, s_stencilmask);
|
||||
}
|
||||
|
||||
// sets the data stream
|
||||
#define SET_STREAM() { \
|
||||
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)8); \
|
||||
glSecondaryColorPointerEXT(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)12); \
|
||||
glTexCoordPointer(3, GL_FLOAT, sizeof(VertexGPU), (void*)16); \
|
||||
glVertexPointer(4, GL_SHORT, sizeof(VertexGPU), (void*)0); \
|
||||
static __forceinline void SET_STREAM()
|
||||
{
|
||||
glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)8);
|
||||
glSecondaryColorPointerEXT(4, GL_UNSIGNED_BYTE, sizeof(VertexGPU), (void*)12);
|
||||
glTexCoordPointer(3, GL_FLOAT, sizeof(VertexGPU), (void*)16);
|
||||
glVertexPointer(4, GL_SHORT, sizeof(VertexGPU), (void*)0);
|
||||
}
|
||||
|
||||
|
||||
// global alpha blending settings
|
||||
extern GLenum g_internalRGBAFloat16Fmt;
|
||||
|
||||
//static __forceinline void SAFE_RELEASE_TEX(u32& x)
|
||||
//{
|
||||
// if (x != 0)
|
||||
// {
|
||||
// glDeleteTextures(1, &x);
|
||||
// x = 0;
|
||||
// }
|
||||
//}
|
||||
#define SAFE_RELEASE_TEX(x) { if( (x) != 0 ) { glDeleteTextures(1, &(x)); x = 0; } }
|
||||
|
||||
// inline for an extremely often used sequence
|
||||
|
@ -104,12 +129,192 @@ extern void (APIENTRY *zgsBlendEquationSeparateEXT)(GLenum, GLenum);
|
|||
extern void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum);
|
||||
#endif
|
||||
|
||||
static __forceinline void DrawTriangleArray()
|
||||
{
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
// ------------------------ Types -------------------------
|
||||
static __forceinline void DrawBuffers(GLenum *buffer)
|
||||
{
|
||||
if (glDrawBuffers != NULL)
|
||||
{
|
||||
glDrawBuffers(1, buffer);
|
||||
}
|
||||
|
||||
/////////////////////
|
||||
// graphics resources
|
||||
extern GLenum s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha; // set by zgsBlendFuncSeparateEXT
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
|
||||
namespace FB
|
||||
{
|
||||
extern u32 buf;
|
||||
|
||||
static __forceinline void Create()
|
||||
{
|
||||
glGenFramebuffersEXT(1, &buf);
|
||||
}
|
||||
|
||||
static __forceinline void Bind()
|
||||
{
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, buf);
|
||||
}
|
||||
|
||||
static __forceinline void Unbind()
|
||||
{
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
|
||||
}
|
||||
|
||||
static __forceinline GLenum State()
|
||||
{
|
||||
return glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
|
||||
}
|
||||
|
||||
static __forceinline void Attach2D(int attach, int id = 0)
|
||||
{
|
||||
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT + attach, GL_TEXTURE_RECTANGLE_NV, id, 0);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
static __forceinline void Attach(GLenum rend, GLuint id = 0)
|
||||
{
|
||||
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, rend, GL_RENDERBUFFER_EXT, id);
|
||||
}
|
||||
};
|
||||
|
||||
static __forceinline void ResetRenderTarget(int index)
|
||||
{
|
||||
FB::Attach2D(index);
|
||||
}
|
||||
|
||||
static __forceinline void TextureImage(GLenum tex_type, GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage2D(tex_type, 0, iFormat, width, height, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void Texture2D(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
TextureImage(GL_TEXTURE_2D, iFormat, width, height, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void Texture2D(GLint iFormat, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
TextureImage(GL_TEXTURE_2D, iFormat, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void TextureRect(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
TextureImage(GL_TEXTURE_RECTANGLE_NV, iFormat, width, height, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void TextureRect2(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
TextureImage(GL_TEXTURE_RECTANGLE, iFormat, width, height, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void Texture3D(GLint iFormat, GLint width, GLint height, GLint depth, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, iFormat, width, height, depth, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void setTex2DFilters(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, type);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, type);
|
||||
}
|
||||
|
||||
static __forceinline void setTex2DWrap(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, type);
|
||||
}
|
||||
|
||||
static __forceinline void setTex3DFilters(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, type);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, type);
|
||||
}
|
||||
|
||||
static __forceinline void setTex3DWrap(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, type);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, type);
|
||||
}
|
||||
|
||||
static __forceinline void setRectFilters(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, type);
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, type);
|
||||
}
|
||||
|
||||
static __forceinline void setRectWrap(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, type);
|
||||
}
|
||||
|
||||
static __forceinline void setRectWrap2(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, type);
|
||||
}
|
||||
|
||||
static __forceinline void GL_BLEND_SET()
|
||||
{
|
||||
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha);
|
||||
}
|
||||
|
||||
static __forceinline void GL_BLEND_RGB(GLenum src, GLenum dst)
|
||||
{
|
||||
s_srcrgb = src;
|
||||
s_dstrgb = dst;
|
||||
GL_BLEND_SET();
|
||||
}
|
||||
|
||||
static __forceinline void GL_BLEND_ALPHA(GLenum src, GLenum dst)
|
||||
{
|
||||
s_srcalpha = src;
|
||||
s_dstalpha = dst;
|
||||
GL_BLEND_SET();
|
||||
}
|
||||
|
||||
static __forceinline void GL_BLEND_ALL(GLenum srcrgb, GLenum dstrgb, GLenum srcalpha, GLenum dstalpha)
|
||||
{
|
||||
s_srcrgb = srcrgb;
|
||||
s_dstrgb = dstrgb;
|
||||
s_srcalpha = srcalpha;
|
||||
s_dstalpha = dstalpha;
|
||||
GL_BLEND_SET();
|
||||
}
|
||||
|
||||
static __forceinline void GL_ZTEST(bool enable)
|
||||
{
|
||||
if (enable)
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
else
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
}
|
||||
|
||||
static __forceinline void GL_ALPHATEST(bool enable)
|
||||
{
|
||||
if (enable)
|
||||
glEnable(GL_ALPHA_TEST);
|
||||
else
|
||||
glDisable(GL_ALPHA_TEST);
|
||||
}
|
||||
|
||||
static __forceinline void GL_BLENDEQ_RGB(GLenum eq)
|
||||
{
|
||||
s_rgbeq = eq;
|
||||
zgsBlendEquationSeparateEXT(s_rgbeq, s_alphaeq);
|
||||
}
|
||||
|
||||
static __forceinline void GL_BLENDEQ_ALPHA(GLenum eq)
|
||||
{
|
||||
s_alphaeq = eq;
|
||||
zgsBlendEquationSeparateEXT(s_rgbeq, s_alphaeq);
|
||||
}
|
||||
|
||||
// GL prototypes
|
||||
extern PFNGLISRENDERBUFFEREXTPROC glIsRenderbufferEXT;
|
||||
|
|
|
@ -27,7 +27,7 @@ extern GSconf conf;
|
|||
using namespace std;
|
||||
|
||||
static list<MESSAGE> listMsgs;
|
||||
|
||||
const char* logging_prefix = "ZZOgl-PG";
|
||||
void ProcessMessages()
|
||||
{
|
||||
FUNCLOG
|
||||
|
@ -130,12 +130,14 @@ void _Log(const char *str)
|
|||
|
||||
void _WriteToConsole(const char *str)
|
||||
{
|
||||
fprintf(stderr,"ZZogl-PG: %s", str);
|
||||
fprintf(stderr,"%s: ", logging_prefix);
|
||||
fprintf(stderr,"%s", str);
|
||||
}
|
||||
|
||||
void _Print(const char *str)
|
||||
{
|
||||
fprintf(stderr,"ZZogl-PG: %s", str);
|
||||
fprintf(stderr,"%s: ", logging_prefix);
|
||||
fprintf(stderr,"%s", str);
|
||||
|
||||
if (IsLogging()) fprintf(gsLog, str);
|
||||
}
|
||||
|
@ -169,7 +171,7 @@ void WriteToConsole(const char *fmt, ...)
|
|||
|
||||
va_start(list, fmt);
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
va_end(list);
|
||||
}
|
||||
|
@ -182,7 +184,7 @@ void Print(const char *fmt, ...)
|
|||
|
||||
if (IsLogging()) vfprintf(gsLog, fmt, list);
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
|
||||
va_end(list);
|
||||
|
@ -197,7 +199,7 @@ void WriteLn(const char *fmt, ...)
|
|||
|
||||
if (IsLogging()) vfprintf(gsLog, fmt, list);
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
va_end(list);
|
||||
fprintf(stderr,"\n");
|
||||
|
@ -237,7 +239,7 @@ void Prim_Log(const char *fmt, ...)
|
|||
{
|
||||
if (IsLogging()) vfprintf(gsLog, fmt, list);
|
||||
|
||||
fprintf(stderr, "ZZogl-PG(PRIM): ");
|
||||
fprintf(stderr, "%s(PRIM): ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
|
||||
vprintf(fmt, list);
|
||||
|
@ -262,7 +264,7 @@ void GS_Log(const char *fmt, ...)
|
|||
fprintf(gsLog, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
@ -283,7 +285,7 @@ void Warn_Log(const char *fmt, ...)
|
|||
fprintf(gsLog, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s(Warning): ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
@ -304,7 +306,7 @@ void Dev_Log(const char *fmt, ...)
|
|||
fprintf(gsLog, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
@ -325,7 +327,7 @@ void Debug_Log(const char *fmt, ...)
|
|||
fprintf(gsLog, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
@ -345,7 +347,7 @@ void Error_Log(const char *fmt, ...)
|
|||
fprintf(gsLog, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
fprintf(stderr, "%s: ", logging_prefix);
|
||||
vfprintf(stderr, fmt, list);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
|
|
|
@ -0,0 +1,620 @@
|
|||
/* ZZ Open GL graphics plugin
|
||||
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
|
||||
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
#include "targets.h"
|
||||
#include "ZZClut.h"
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
extern int g_TransferredToGPU;
|
||||
|
||||
extern int VALIDATE_THRESH;
|
||||
extern u32 TEXDESTROY_THRESH;
|
||||
#define FORCE_TEXDESTROY_THRESH (3) // destroy texture after FORCE_TEXDESTROY_THRESH frames
|
||||
|
||||
void CMemoryTargetMngr::Destroy()
|
||||
{
|
||||
FUNCLOG
|
||||
listTargets.clear();
|
||||
listClearedTargets.clear();
|
||||
}
|
||||
|
||||
bool CMemoryTarget::ValidateTex(const tex0Info& tex0, int starttex, int endtex, bool bDeleteBadTex)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (clearmaxy == 0) return true;
|
||||
|
||||
int checkstarty = max(starttex, clearminy);
|
||||
int checkendy = min(endtex, clearmaxy);
|
||||
|
||||
if (checkstarty >= checkendy) return true;
|
||||
|
||||
if (validatecount++ > VALIDATE_THRESH)
|
||||
{
|
||||
height = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// lock and compare
|
||||
assert(ptex != NULL && ptex->memptr != NULL);
|
||||
|
||||
int result = memcmp_mmx(ptex->memptr + MemorySize(checkstarty-realy), MemoryAddress(checkstarty), MemorySize(checkendy-checkstarty));
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
clearmaxy = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!bDeleteBadTex) return false;
|
||||
|
||||
// delete clearminy, clearmaxy range (not the checkstarty, checkendy range)
|
||||
//int newstarty = 0;
|
||||
if (clearminy <= starty)
|
||||
{
|
||||
if (clearmaxy < starty + height)
|
||||
{
|
||||
// preserve end
|
||||
height = starty + height - clearmaxy;
|
||||
starty = clearmaxy;
|
||||
assert(height > 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
// destroy
|
||||
height = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// beginning can be preserved
|
||||
height = clearminy - starty;
|
||||
}
|
||||
|
||||
clearmaxy = 0;
|
||||
|
||||
assert((starty >= realy) && ((starty + height) <= (realy + realheight)));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#define TARGET_THRESH 0x500
|
||||
|
||||
extern int g_MaxTexWidth, g_MaxTexHeight; // Maximum height & width of supported texture.
|
||||
|
||||
//#define SORT_TARGETS
|
||||
inline list<CMemoryTarget>::iterator CMemoryTargetMngr::DestroyTargetIter(list<CMemoryTarget>::iterator& it)
|
||||
{
|
||||
// find the target and destroy
|
||||
list<CMemoryTarget>::iterator itprev = it;
|
||||
++it;
|
||||
listClearedTargets.splice(listClearedTargets.end(), listTargets, itprev);
|
||||
|
||||
if (listClearedTargets.size() > TEXDESTROY_THRESH)
|
||||
{
|
||||
listClearedTargets.pop_front();
|
||||
}
|
||||
|
||||
return it;
|
||||
}
|
||||
|
||||
// Compare target to current texture info
|
||||
// Not same format -> 1
|
||||
// Same format, not same data (clut only) -> 2
|
||||
// identical -> 0
|
||||
int CMemoryTargetMngr::CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize)
|
||||
{
|
||||
if (PSMT_ISCLUT(it->psm) != PSMT_ISCLUT(tex0.psm))
|
||||
return 1;
|
||||
|
||||
if (PSMT_ISCLUT(tex0.psm)) {
|
||||
if (it->psm != tex0.psm || it->cpsm != tex0.cpsm || it->clutsize != clutsize)
|
||||
return 1;
|
||||
|
||||
if (PSMT_IS32BIT(tex0.cpsm)) {
|
||||
if (Cmp_ClutBuffer_SavedClut<u32>((u32*)&it->clut[0], tex0.csa, clutsize))
|
||||
return 2;
|
||||
} else {
|
||||
if (Cmp_ClutBuffer_SavedClut<u16>((u16*)&it->clut[0], tex0.csa, clutsize))
|
||||
return 2;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (PSMT_IS16BIT(tex0.psm) != PSMT_IS16BIT(it->psm))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CMemoryTargetMngr::GetClutVariables(int& clutsize, const tex0Info& tex0)
|
||||
{
|
||||
clutsize = 0;
|
||||
|
||||
if (PSMT_ISCLUT(tex0.psm))
|
||||
{
|
||||
int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
|
||||
|
||||
if (PSMT_IS32BIT(tex0.cpsm))
|
||||
clutsize = min(entries, 256 - tex0.csa * 16) * 4;
|
||||
else
|
||||
clutsize = min(entries, 512 - tex0.csa * 16) * 2;
|
||||
}
|
||||
}
|
||||
|
||||
void CMemoryTargetMngr::GetMemAddress(int& start, int& end, const tex0Info& tex0)
|
||||
{
|
||||
int nbStart, nbEnd;
|
||||
GetRectMemAddressZero(nbStart, nbEnd, tex0.psm, tex0.tw, tex0.th, tex0.tbp0, tex0.tbw);
|
||||
assert(nbStart < nbEnd);
|
||||
nbEnd = min(nbEnd, MEMORY_END);
|
||||
|
||||
start = nbStart / (4 * GPU_TEXWIDTH);
|
||||
end = (nbEnd + GPU_TEXWIDTH * 4 - 1) / (4 * GPU_TEXWIDTH);
|
||||
assert(start < end);
|
||||
|
||||
}
|
||||
|
||||
CMemoryTarget* CMemoryTargetMngr::SearchExistTarget(int start, int end, int clutsize, const tex0Info& tex0, int forcevalidate)
|
||||
{
|
||||
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
|
||||
{
|
||||
|
||||
if (it->starty <= start && it->starty + it->height >= end)
|
||||
{
|
||||
|
||||
int res = CompareTarget(it, tex0, clutsize);
|
||||
|
||||
if (res == 1)
|
||||
{
|
||||
if (it->validatecount++ > VALIDATE_THRESH)
|
||||
{
|
||||
it = DestroyTargetIter(it);
|
||||
|
||||
if (listTargets.size() == 0) break;
|
||||
}
|
||||
else
|
||||
++it;
|
||||
|
||||
continue;
|
||||
}
|
||||
else if (res == 2)
|
||||
{
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (forcevalidate) //&& listTargets.size() < TARGET_THRESH ) {
|
||||
{
|
||||
// do more validation checking. delete if not been used for a while
|
||||
|
||||
if (!it->ValidateTex(tex0, start, end, curstamp > it->usedstamp + FORCE_TEXDESTROY_THRESH))
|
||||
{
|
||||
|
||||
if (it->height <= 0)
|
||||
{
|
||||
it = DestroyTargetIter(it);
|
||||
|
||||
if (listTargets.size() == 0) break;
|
||||
}
|
||||
else
|
||||
++it;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
it->usedstamp = curstamp;
|
||||
|
||||
it->validatecount = 0;
|
||||
|
||||
return &(*it);
|
||||
}
|
||||
|
||||
#ifdef SORT_TARGETS
|
||||
else if (it->starty >= end) break;
|
||||
|
||||
#endif
|
||||
|
||||
++it;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CMemoryTarget* CMemoryTargetMngr::ClearedTargetsSearch(int fmt, int widthmult, int channels, int height)
|
||||
{
|
||||
CMemoryTarget* targ = NULL;
|
||||
|
||||
if (listClearedTargets.size() > 0)
|
||||
{
|
||||
list<CMemoryTarget>::iterator itbest = listClearedTargets.begin();
|
||||
|
||||
while (itbest != listClearedTargets.end())
|
||||
{
|
||||
if ((height == itbest->realheight) && (itbest->fmt == fmt) && (itbest->widthmult == widthmult) && (itbest->channels == channels))
|
||||
{
|
||||
// check channels
|
||||
if (PIXELS_PER_WORD(itbest->psm) == channels) break;
|
||||
}
|
||||
|
||||
++itbest;
|
||||
}
|
||||
|
||||
if (itbest != listClearedTargets.end())
|
||||
{
|
||||
listTargets.splice(listTargets.end(), listClearedTargets, itbest);
|
||||
targ = &listTargets.back();
|
||||
targ->validatecount = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// create a new
|
||||
listTargets.push_back(CMemoryTarget());
|
||||
targ = &listTargets.back();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
listTargets.push_back(CMemoryTarget());
|
||||
targ = &listTargets.back();
|
||||
}
|
||||
|
||||
return targ;
|
||||
}
|
||||
|
||||
CMemoryTarget* CMemoryTargetMngr::GetMemoryTarget(const tex0Info& tex0, int forcevalidate)
|
||||
{
|
||||
FUNCLOG
|
||||
int start, end, clutsize;
|
||||
|
||||
GetClutVariables(clutsize, tex0);
|
||||
GetMemAddress(start, end, tex0);
|
||||
|
||||
CMemoryTarget* it = SearchExistTarget(start, end, clutsize, tex0, forcevalidate);
|
||||
|
||||
if (it != NULL) return it;
|
||||
|
||||
// couldn't find so create
|
||||
CMemoryTarget* targ;
|
||||
|
||||
u32 fmt;
|
||||
u32 internal_fmt;
|
||||
if (PSMT_ISHALF_STORAGE(tex0)) {
|
||||
// RGBA_5551 storage format
|
||||
fmt = GL_UNSIGNED_SHORT_1_5_5_5_REV;
|
||||
internal_fmt = GL_RGB5_A1;
|
||||
} else {
|
||||
// RGBA_8888 storage format
|
||||
fmt = GL_UNSIGNED_BYTE;
|
||||
internal_fmt = GL_RGBA;
|
||||
}
|
||||
|
||||
int widthmult = 1, channels = 1;
|
||||
|
||||
// If our texture is too big and could not be placed in 1 GPU texture. Pretty rare in modern cards.
|
||||
if ((g_MaxTexHeight < 4096) && (end - start > g_MaxTexHeight))
|
||||
{
|
||||
// In this rare case we made a texture of half height and place it on the screen.
|
||||
ZZLog::Debug_Log("Making a half height texture (start - end == 0x%x)", (end-start));
|
||||
widthmult = 2;
|
||||
}
|
||||
|
||||
channels = PIXELS_PER_WORD(tex0.psm);
|
||||
|
||||
targ = ClearedTargetsSearch(fmt, widthmult, channels, end - start);
|
||||
|
||||
if (targ->ptex != NULL)
|
||||
{
|
||||
assert(end - start <= targ->realheight && targ->fmt == fmt && targ->widthmult == widthmult);
|
||||
|
||||
// good enough, so init
|
||||
targ->realy = targ->starty = start;
|
||||
targ->usedstamp = curstamp;
|
||||
targ->psm = tex0.psm;
|
||||
targ->cpsm = tex0.cpsm;
|
||||
targ->height = end - start;
|
||||
} else {
|
||||
// not initialized yet
|
||||
targ->fmt = fmt;
|
||||
targ->realy = targ->starty = start;
|
||||
targ->realheight = targ->height = end - start;
|
||||
targ->usedstamp = curstamp;
|
||||
targ->psm = tex0.psm;
|
||||
targ->cpsm = tex0.cpsm;
|
||||
targ->widthmult = widthmult;
|
||||
targ->channels = channels;
|
||||
targ->texH = (targ->realheight + widthmult - 1)/widthmult;
|
||||
targ->texW = GPU_TEXWIDTH * widthmult * channels;
|
||||
|
||||
// alloc the mem
|
||||
targ->ptex = new CMemoryTarget::TEXTURE();
|
||||
targ->ptex->ref = 1;
|
||||
}
|
||||
|
||||
#if defined(ZEROGS_DEVBUILD)
|
||||
g_TransferredToGPU += MemorySize(channels * targ->height);
|
||||
#endif
|
||||
|
||||
// fill with data
|
||||
if (targ->ptex->memptr == NULL)
|
||||
{
|
||||
targ->ptex->memptr = (u8*)_aligned_malloc(MemorySize(targ->realheight), 16);
|
||||
assert(targ->ptex->ref > 0);
|
||||
}
|
||||
|
||||
memcpy_amd(targ->ptex->memptr, MemoryAddress(targ->realy), MemorySize(targ->height));
|
||||
|
||||
__aligned16 u8* ptexdata = NULL;
|
||||
bool has_data = false;
|
||||
|
||||
if (PSMT_ISCLUT(tex0.psm))
|
||||
{
|
||||
assert(clutsize > 0);
|
||||
|
||||
// Local clut parameter
|
||||
targ->cpsm = tex0.cpsm;
|
||||
|
||||
// Allocate a local clut array
|
||||
targ->clutsize = clutsize;
|
||||
if(targ->clut == NULL)
|
||||
targ->clut = (u8*)_aligned_malloc(clutsize, 16);
|
||||
else {
|
||||
// In case it could occured
|
||||
// realloc would be better but you need to get it from libutilies first
|
||||
// _aligned_realloc is brought in from ScopedAlloc.h now. --arcum42
|
||||
_aligned_free(targ->clut);
|
||||
targ->clut = (u8*)_aligned_malloc(clutsize, 16);
|
||||
}
|
||||
|
||||
// texture parameter
|
||||
ptexdata = (u8*)_aligned_malloc(CLUT_PIXEL_SIZE(tex0.cpsm) * targ->texH * targ->texW, 16);
|
||||
has_data = true;
|
||||
|
||||
u8* psrc = (u8*)(MemoryAddress(targ->realy));
|
||||
|
||||
// Fill a local clut then build the real texture
|
||||
if (PSMT_IS32BIT(tex0.cpsm))
|
||||
{
|
||||
ClutBuffer_to_Array<u32>((u32*)targ->clut, tex0.csa, clutsize);
|
||||
Build_Clut_Texture<u32>(tex0.psm, targ->height, (u32*)targ->clut, psrc, (u32*)ptexdata);
|
||||
}
|
||||
else
|
||||
{
|
||||
ClutBuffer_to_Array<u16>((u16*)targ->clut, tex0.csa, clutsize);
|
||||
Build_Clut_Texture<u16>(tex0.psm, targ->height, (u16*)targ->clut, psrc, (u16*)ptexdata);
|
||||
}
|
||||
|
||||
assert(targ->clutsize > 0);
|
||||
}
|
||||
else if (tex0.psm == PSMT16Z || tex0.psm == PSMT16SZ)
|
||||
{
|
||||
ptexdata = (u8*)_aligned_malloc(4 * targ->texH * targ->texW, 16);
|
||||
has_data = true;
|
||||
|
||||
// needs to be 8 bit, use xmm for unpacking
|
||||
u16* dst = (u16*)ptexdata;
|
||||
u16* src = (u16*)(MemoryAddress(targ->realy));
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
assert(((u32)(uptr)dst) % 16 == 0);
|
||||
|
||||
__m128i zero_128 = _mm_setzero_si128();
|
||||
// NOTE: future performance improvement
|
||||
// SSE4.1 support uncacheable load 128bits. Maybe it can
|
||||
// avoid some cache pollution
|
||||
// NOTE2: I create multiple _n variable to mimic the previous ASM behavior
|
||||
// but I'm not sure there are real gains.
|
||||
for (int i = targ->height * GPU_TEXWIDTH/16 ; i > 0 ; --i)
|
||||
{
|
||||
// Convert 16 bits pixels to 32bits (zero extended)
|
||||
// Batch 64 bytes (32 pixels) at once.
|
||||
__m128i pixels_1 = _mm_load_si128((__m128i*)src);
|
||||
__m128i pixels_2 = _mm_load_si128((__m128i*)(src+8));
|
||||
__m128i pixels_3 = _mm_load_si128((__m128i*)(src+16));
|
||||
__m128i pixels_4 = _mm_load_si128((__m128i*)(src+24));
|
||||
|
||||
__m128i pix_low_1 = _mm_unpacklo_epi16(pixels_1, zero_128);
|
||||
__m128i pix_high_1 = _mm_unpackhi_epi16(pixels_1, zero_128);
|
||||
__m128i pix_low_2 = _mm_unpacklo_epi16(pixels_2, zero_128);
|
||||
__m128i pix_high_2 = _mm_unpackhi_epi16(pixels_2, zero_128);
|
||||
|
||||
// Note: bypass cache
|
||||
_mm_stream_si128((__m128i*)dst, pix_low_1);
|
||||
_mm_stream_si128((__m128i*)(dst+8), pix_high_1);
|
||||
_mm_stream_si128((__m128i*)(dst+16), pix_low_2);
|
||||
_mm_stream_si128((__m128i*)(dst+24), pix_high_2);
|
||||
|
||||
__m128i pix_low_3 = _mm_unpacklo_epi16(pixels_3, zero_128);
|
||||
__m128i pix_high_3 = _mm_unpackhi_epi16(pixels_3, zero_128);
|
||||
__m128i pix_low_4 = _mm_unpacklo_epi16(pixels_4, zero_128);
|
||||
__m128i pix_high_4 = _mm_unpackhi_epi16(pixels_4, zero_128);
|
||||
|
||||
// Note: bypass cache
|
||||
_mm_stream_si128((__m128i*)(dst+32), pix_low_3);
|
||||
_mm_stream_si128((__m128i*)(dst+40), pix_high_3);
|
||||
_mm_stream_si128((__m128i*)(dst+48), pix_low_4);
|
||||
_mm_stream_si128((__m128i*)(dst+56), pix_high_4);
|
||||
|
||||
src += 32;
|
||||
dst += 64;
|
||||
}
|
||||
// It is advise to use a fence instruction after non temporal move (mm_stream) instruction...
|
||||
// store fence insures that previous store are finish before execute new one.
|
||||
_mm_sfence();
|
||||
#else // ZEROGS_SSE2
|
||||
|
||||
for (int i = 0; i < targ->height; ++i)
|
||||
{
|
||||
for (int j = 0; j < GPU_TEXWIDTH; ++j)
|
||||
{
|
||||
dst[0] = src[0];
|
||||
dst[1] = 0;
|
||||
dst[2] = src[1];
|
||||
dst[3] = 0;
|
||||
dst += 4;
|
||||
src += 2;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ZEROGS_SSE2
|
||||
}
|
||||
else
|
||||
{
|
||||
ptexdata = targ->ptex->memptr;
|
||||
// We really don't want to deallocate memptr. As a reminder...
|
||||
has_data = false;
|
||||
}
|
||||
|
||||
// create the texture
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
assert(ptexdata != NULL);
|
||||
|
||||
if (targ->ptex->tex == 0) glGenTextures(1, &targ->ptex->tex);
|
||||
|
||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, targ->ptex->tex);
|
||||
|
||||
TextureRect(internal_fmt, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata);
|
||||
|
||||
while (glGetError() != GL_NO_ERROR)
|
||||
{
|
||||
// release resources until can create
|
||||
if (listClearedTargets.size() > 0)
|
||||
{
|
||||
listClearedTargets.pop_front();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (listTargets.size() == 0)
|
||||
{
|
||||
ZZLog::Error_Log("Failed to create %dx%x texture.", targ->texW, targ->texH);
|
||||
channels = 1;
|
||||
if (has_data) _aligned_free(ptexdata);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DestroyOldest();
|
||||
}
|
||||
|
||||
TextureRect(internal_fmt, targ->texW, targ->texH, GL_RGBA, fmt, ptexdata);
|
||||
}
|
||||
|
||||
setRectWrap(GL_CLAMP);
|
||||
if (has_data) _aligned_free(ptexdata);
|
||||
|
||||
assert(tex0.psm != 0xd);
|
||||
|
||||
return targ;
|
||||
}
|
||||
|
||||
void CMemoryTargetMngr::ClearRange(int nbStartY, int nbEndY)
|
||||
{
|
||||
FUNCLOG
|
||||
int starty = nbStartY / (4 * GPU_TEXWIDTH);
|
||||
int endy = (nbEndY + 4 * GPU_TEXWIDTH - 1) / (4 * GPU_TEXWIDTH);
|
||||
|
||||
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
|
||||
{
|
||||
|
||||
if (it->starty < endy && (it->starty + it->height) > starty)
|
||||
{
|
||||
|
||||
// intersects, reduce valid texture mem (or totally delete texture)
|
||||
// there are 4 cases
|
||||
int miny = max(it->starty, starty);
|
||||
int maxy = min(it->starty + it->height, endy);
|
||||
assert(miny < maxy);
|
||||
|
||||
if (it->clearmaxy == 0)
|
||||
{
|
||||
it->clearminy = miny;
|
||||
it->clearmaxy = maxy;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (it->clearminy > miny) it->clearminy = miny;
|
||||
if (it->clearmaxy < maxy) it->clearmaxy = maxy;
|
||||
}
|
||||
}
|
||||
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
void CMemoryTargetMngr::DestroyCleared()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
for (list<CMemoryTarget>::iterator it = listClearedTargets.begin(); it != listClearedTargets.end();)
|
||||
{
|
||||
if (it->usedstamp < curstamp - (FORCE_TEXDESTROY_THRESH -1))
|
||||
{
|
||||
it = listClearedTargets.erase(it);
|
||||
continue;
|
||||
}
|
||||
|
||||
++it;
|
||||
}
|
||||
|
||||
if ((curstamp % FORCE_TEXDESTROY_THRESH) == 0)
|
||||
{
|
||||
// purge old targets every FORCE_TEXDESTROY_THRESH frames
|
||||
for (list<CMemoryTarget>::iterator it = listTargets.begin(); it != listTargets.end();)
|
||||
{
|
||||
if (it->usedstamp < curstamp - FORCE_TEXDESTROY_THRESH)
|
||||
{
|
||||
it = listTargets.erase(it);
|
||||
continue;
|
||||
}
|
||||
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
++curstamp;
|
||||
}
|
||||
|
||||
void CMemoryTargetMngr::DestroyOldest()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (listTargets.size() == 0)
|
||||
return;
|
||||
|
||||
list<CMemoryTarget>::iterator it, itbest;
|
||||
|
||||
it = itbest = listTargets.begin();
|
||||
|
||||
while (it != listTargets.end())
|
||||
{
|
||||
if (it->usedstamp < itbest->usedstamp) itbest = it;
|
||||
++it;
|
||||
}
|
||||
|
||||
listTargets.erase(itbest);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -21,7 +21,7 @@
|
|||
// It draw picture direct on screen, so here we have interlacing and frame skipping.
|
||||
|
||||
//------------------ Includes
|
||||
#include "Util.h"
|
||||
#include "Util.h"
|
||||
#include "ZZoglCRTC.h"
|
||||
#include "GLWin.h"
|
||||
#include "ZZoglShaders.h"
|
||||
|
@ -58,6 +58,17 @@ extern void ZZDestroy();
|
|||
extern void ChangeDeviceSize(int nNewWidth, int nNewHeight);
|
||||
|
||||
extern GLuint vboRect;
|
||||
|
||||
// I'm making this variable global for the moment in the course of fiddling with the interlace code
|
||||
// to try and make it more straightforward.
|
||||
int interlace_mode = 0; // 0 - not interlacing, 1 - interlacing.
|
||||
bool bUsingStencil = false;
|
||||
|
||||
bool INTERLACE_COUNT()
|
||||
{
|
||||
return (interlace_mode && (gs.interlace == conf.interlace));
|
||||
}
|
||||
|
||||
// Adjusts vertex shader BitBltPos vector v to preserve aspect ratio. It used to emulate 4:3 or 16:9.
|
||||
void AdjustTransToAspect(float4& v)
|
||||
{
|
||||
|
@ -151,20 +162,20 @@ inline void FrameSavingHelper()
|
|||
}
|
||||
|
||||
// Function populated tex0Info[2] array
|
||||
inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
|
||||
inline void FrameObtainDispinfo(tex0Info* dispinfo)
|
||||
{
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
|
||||
if (!(*(u32*)(PMODE) & (1 << i)))
|
||||
if (!Circuit_Enabled(i))
|
||||
{
|
||||
dispinfo[i].tw = 0;
|
||||
dispinfo[i].th = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
|
||||
GSRegDISPLAY* pd = i ? DISPLAY2 : DISPLAY1;
|
||||
GSRegDISPFB* pfb = Dispfb_Reg(i);
|
||||
GSRegDISPLAY* pd = Display_Reg(i);
|
||||
|
||||
int magh = pd->MAGH + 1;
|
||||
int magv = pd->MAGV + 1;
|
||||
|
||||
|
@ -177,7 +188,8 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
|
|||
// hack!!
|
||||
// 2 * dispinfo[i].tw / dispinfo[i].th <= 1, metal slug 4
|
||||
|
||||
if (bInterlace && 2 * dispinfo[i].tw / dispinfo[i].th <= 1 && !(conf.settings().interlace_2x))
|
||||
// Note: This is what causes the double image if interlace is off on the Final Fantasy X-2 opening.
|
||||
if (interlace_mode && 2 * dispinfo[i].tw / dispinfo[i].th <= 1 && !(conf.settings().interlace_2x))
|
||||
{
|
||||
dispinfo[i].th >>= 1;
|
||||
}
|
||||
|
@ -187,9 +199,9 @@ inline void FrameObtainDispinfo(u32 bInterlace, tex0Info* dispinfo)
|
|||
extern bool s_bWriteDepth;
|
||||
|
||||
// Something should be done before Renderering the picture.
|
||||
inline void RenderStartHelper(u32 bInterlace)
|
||||
inline void RenderStartHelper()
|
||||
{
|
||||
if (conf.mrtdepth && pvs[8] == NULL)
|
||||
if (conf.mrtdepth && ZZshExistProgram(pvs[8]))
|
||||
{
|
||||
conf.mrtdepth = 0;
|
||||
s_bWriteDepth = false;
|
||||
|
@ -209,14 +221,13 @@ inline void RenderStartHelper(u32 bInterlace)
|
|||
vb[0].fba.fba = 0;
|
||||
vb[1].fba.fba = 0;
|
||||
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
|
||||
FB::Unbind(); // switch to the backbuffer
|
||||
|
||||
glViewport(0, 0, GLWin.backbuffer.w, GLWin.backbuffer.h);
|
||||
|
||||
// if interlace, only clear every other vsync
|
||||
if (!bInterlace)
|
||||
if (!interlace_mode)
|
||||
{
|
||||
//u32 color = COLOR_ARGB(0, BGCOLOR->R, BGCOLOR->G, BGCOLOR->B);
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
|
||||
}
|
||||
|
||||
|
@ -232,7 +243,7 @@ inline void RenderStartHelper(u32 bInterlace)
|
|||
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
if (bInterlace) g_PrevBitwiseTexX = -1; // reset since will be using
|
||||
if (interlace_mode) g_PrevBitwiseTexX = -1; // reset since will be using
|
||||
}
|
||||
|
||||
// Settings for interlace texture multiplied vector;
|
||||
|
@ -240,14 +251,14 @@ inline void RenderStartHelper(u32 bInterlace)
|
|||
// on image y coords. So if we write valpha.z * F + valpha.w + 0.5, it would be switching odd
|
||||
// and even strings at each frame.
|
||||
// valpha.x and y are used for image blending.
|
||||
inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTSHADER* prog)
|
||||
inline float4 RenderGetForClip(int psm, CRTC_TYPE render_type)
|
||||
{
|
||||
SetShaderCaller("RenderGetForClip");
|
||||
|
||||
FRAGMENTSHADER* prog = curr_pps(render_type);
|
||||
float4 valpha;
|
||||
// first render the current render targets, then from ptexMem
|
||||
|
||||
if (psm == 1)
|
||||
if (psm == PSMCT24)
|
||||
{
|
||||
valpha.x = 1;
|
||||
valpha.y = 0;
|
||||
|
@ -258,9 +269,9 @@ inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS
|
|||
valpha.y = 1;
|
||||
}
|
||||
|
||||
if (bInterlace)
|
||||
if (interlace_mode)
|
||||
{
|
||||
if (interlace == (conf.interlace & 1))
|
||||
if (gs.interlace == (conf.interlace & 1))
|
||||
{
|
||||
// pass if odd
|
||||
valpha.z = 1.0f;
|
||||
|
@ -286,17 +297,21 @@ inline float4 RenderGetForClip(u32 bInterlace, int interlace, int psm, FRAGMENTS
|
|||
}
|
||||
|
||||
// Put interlaced texture in use for shader prog.
|
||||
// Note: if frame interlaced it's th is halved, so we should x2 it.
|
||||
inline void RenderCreateInterlaceTex(u32 bInterlace, int th, FRAGMENTSHADER* prog)
|
||||
// Note: if the frame is interlaced, its th is halved, so we should multiply it by 2.
|
||||
inline void RenderCreateInterlaceTex(int th, CRTC_TYPE render_type)
|
||||
{
|
||||
if (!bInterlace) return;
|
||||
FRAGMENTSHADER* prog;
|
||||
int interlacetex;
|
||||
|
||||
int interlacetex = CreateInterlaceTex(2 * th);
|
||||
if (!interlace_mode) return;
|
||||
|
||||
prog = curr_pps(render_type);
|
||||
interlacetex = CreateInterlaceTex(2 * th);
|
||||
|
||||
ZZshGLSetTextureParameter(prog->prog, prog->sInterlace, interlacetex, "Interlace");
|
||||
}
|
||||
|
||||
// Well, do blending setup prior to second pass of half-frame drawing
|
||||
// Do blending setup prior to second pass of half-frame drawing.
|
||||
inline void RenderSetupBlending()
|
||||
{
|
||||
// setup right blending
|
||||
|
@ -305,12 +320,14 @@ inline void RenderSetupBlending()
|
|||
|
||||
if (PMODE->MMOD)
|
||||
{
|
||||
// Use the ALP register for alpha blending.
|
||||
glBlendColorEXT(PMODE->ALP*(1 / 255.0f), PMODE->ALP*(1 / 255.0f), PMODE->ALP*(1 / 255.0f), 0.5f);
|
||||
s_srcrgb = GL_CONSTANT_COLOR_EXT;
|
||||
s_dstrgb = GL_ONE_MINUS_CONSTANT_COLOR_EXT;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use the alpha value of circuit 1 for alpha blending.
|
||||
s_srcrgb = GL_SRC_ALPHA;
|
||||
s_dstrgb = GL_ONE_MINUS_SRC_ALPHA;
|
||||
}
|
||||
|
@ -332,17 +349,19 @@ inline void RenderSetupBlending()
|
|||
// each frame could be drawn in two stages, so blending should be different for them
|
||||
inline void RenderSetupStencil(int i)
|
||||
{
|
||||
glStencilMask(1 << i);
|
||||
s_stencilmask = 1 << i;
|
||||
glStencilMask(s_stencilmask);
|
||||
GL_STENCILFUNC_SET();
|
||||
}
|
||||
|
||||
// do stencil check for each found target i -- texturing stage
|
||||
inline void RenderUpdateStencil(int i, bool* bUsingStencil)
|
||||
inline void RenderUpdateStencil(int i)
|
||||
{
|
||||
if (!(*bUsingStencil)) glClear(GL_STENCIL_BUFFER_BIT);
|
||||
|
||||
*bUsingStencil = 1;
|
||||
if (!bUsingStencil)
|
||||
{
|
||||
glClear(GL_STENCIL_BUFFER_BIT);
|
||||
bUsingStencil = true;
|
||||
}
|
||||
|
||||
glEnable(GL_STENCIL_TEST);
|
||||
GL_STENCILFUNC(GL_NOTEQUAL, 3, 1 << i);
|
||||
|
@ -351,16 +370,16 @@ inline void RenderUpdateStencil(int i, bool* bUsingStencil)
|
|||
}
|
||||
|
||||
// CRTC24 could not be rendered
|
||||
inline void RenderCRTC24helper(u32 bInterlace, int interlace, int psm)
|
||||
/*inline void RenderCRTC24helper(int psm)
|
||||
{
|
||||
ZZLog::Debug_Log("ZZogl: CRTC24!!! I'm trying to show something.");
|
||||
SetShaderCaller("RenderCRTC24helper");
|
||||
// assume that data is already in ptexMem (do Resolve?)
|
||||
RenderGetForClip(bInterlace, interlace, psm, &ppsCRTC24[bInterlace]);
|
||||
ZZshSetPixelShader(ppsCRTC24[bInterlace].prog);
|
||||
RenderGetForClip(psm, CRTC_RENDER_24);
|
||||
ZZshSetPixelShader(curr_ppsCRTC24()->prog);
|
||||
|
||||
DrawTriangleArray();
|
||||
}
|
||||
}*/
|
||||
|
||||
// Maybe I do this function global-defined. Calculate bits per pixel for
|
||||
// each psm. It's the only place with PSMCT16 which have a different bpp.
|
||||
|
@ -394,7 +413,7 @@ inline int RenderGetOffsets(int* dby, int* movy, tex0Info& texframe, CRenderTarg
|
|||
}
|
||||
|
||||
// BltBit shader calculate vertex (4 coord's pixel) position at the viewport.
|
||||
inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
|
||||
inline float4 RenderSetTargetBitPos(int dh, int th, int movy)
|
||||
{
|
||||
SetShaderCaller("RenderSetTargetBitPos");
|
||||
float4 v;
|
||||
|
@ -408,7 +427,7 @@ inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
|
|||
|
||||
AdjustTransToAspect(v);
|
||||
|
||||
if (isInterlace)
|
||||
if (INTERLACE_COUNT())
|
||||
{
|
||||
// move down by 1 pixel
|
||||
v.w += 1.0f / (float)dh ;
|
||||
|
@ -423,7 +442,7 @@ inline float4 RenderSetTargetBitPos(int dh, int th, int movy, bool isInterlace)
|
|||
// For example, use tw / X and tw / X magnify the viewport.
|
||||
// Interlaced output is little out of VB, it could be seen as an evil blinking line on top
|
||||
// and bottom, so we try to remove it.
|
||||
inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool isInterlace)
|
||||
inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw)
|
||||
{
|
||||
SetShaderCaller("RenderSetTargetBitTex");
|
||||
|
||||
|
@ -432,7 +451,7 @@ inline float4 RenderSetTargetBitTex(float th, float tw, float dh, float dw, bool
|
|||
|
||||
// Incorrect Aspect ratio on interlaced frames
|
||||
|
||||
if (isInterlace)
|
||||
if (INTERLACE_COUNT())
|
||||
{
|
||||
v.y -= 1.0f / conf.height;
|
||||
v.w += 1.0f / conf.height;
|
||||
|
@ -455,10 +474,11 @@ inline float4 RenderSetTargetBitTrans(int th)
|
|||
|
||||
// use g_fInvTexDims to store inverse texture dims
|
||||
// Seems, that Targ shader does not use it
|
||||
inline float4 RenderSetTargetInvTex(int bInterlace, int tw, int th, FRAGMENTSHADER* prog)
|
||||
inline float4 RenderSetTargetInvTex(int tw, int th, CRTC_TYPE render_type)
|
||||
{
|
||||
SetShaderCaller("RenderSetTargetInvTex");
|
||||
|
||||
FRAGMENTSHADER* prog = curr_pps(render_type);
|
||||
float4 v = float4(0, 0, 0, 0);
|
||||
|
||||
if (prog->sInvTexDims)
|
||||
|
@ -496,17 +516,20 @@ inline bool RenderLookForABetterTarget(int fbp, int tbp, list<CRenderTarget*>& l
|
|||
return false;
|
||||
}
|
||||
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace);
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int circuit);
|
||||
|
||||
// First try to draw frame from targets.
|
||||
inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
|
||||
inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& listTargs, int circuit)
|
||||
{
|
||||
// get the start and end addresses of the buffer
|
||||
int bpp = RenderGetBpp(texframe.psm);
|
||||
GSRegDISPFB* pfb = i ? DISPFB2 : DISPFB1;
|
||||
GSRegDISPFB* pfb = Dispfb_Reg(circuit);
|
||||
|
||||
int start, end;
|
||||
GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
|
||||
int tex_th = (interlace_mode) ? texframe.th * 2 : texframe.th;
|
||||
|
||||
//ZZLog::WriteLn("Render checking for targets, circuit %d", circuit);
|
||||
GetRectMemAddressZero(start, end, texframe.psm, texframe.tw, tex_th, texframe.tbp0, texframe.tbw);
|
||||
|
||||
// We need share list of targets between functions
|
||||
s_RTs.GetTargs(start, end, listTargs);
|
||||
|
@ -517,10 +540,14 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
|
|||
|
||||
if (ptarg->fbw == texframe.tbw && !(ptarg->status&CRenderTarget::TS_NeedUpdate) && ((256 / bpp)*(texframe.tbp0 - ptarg->fbp)) % texframe.tbw == 0)
|
||||
{
|
||||
FRAGMENTSHADER* pps;
|
||||
int dby = pfb->DBY;
|
||||
int movy = 0;
|
||||
|
||||
if (RenderLookForABetterTarget(ptarg->fbp, texframe.tbp0, listTargs, it)) continue;
|
||||
if (RenderLookForABetterTarget(ptarg->fbp, texframe.tbp0, listTargs, it))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (g_bSaveFinalFrame) SaveTexture("frame1.tga", GL_TEXTURE_RECTANGLE_NV, ptarg->ptex, RW(ptarg->fbw), RH(ptarg->fbh));
|
||||
|
||||
|
@ -529,37 +556,42 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
|
|||
|
||||
if (dh >= 64)
|
||||
{
|
||||
|
||||
if (ptarg->fbh - dby < texframe.th - movy && !(*bUsingStencil))
|
||||
RenderUpdateStencil(i, bUsingStencil);
|
||||
else if (ptarg->fbh - dby > 2 * ( texframe.th - movy ))
|
||||
if (ptarg->fbh - dby < tex_th - movy && !bUsingStencil)
|
||||
{
|
||||
RenderUpdateStencil(circuit);
|
||||
}
|
||||
else if (ptarg->fbh - dby > 2 * ( tex_th - movy )) // I'm not sure this is needed any more.
|
||||
{
|
||||
// Sometimes calculated position onscreen is misaligned, ie in FFX-2 intro. In such case some part of image are out of
|
||||
// border's and we should move it manually.
|
||||
dby -= ((ptarg->fbh - dby) >> 2) - ((texframe.th + movy) >> 1) ;
|
||||
dby -= ((ptarg->fbh - dby) >> 2) - ((tex_th + movy) >> 1);
|
||||
}
|
||||
|
||||
SetShaderCaller("RenderCheckForTargets");
|
||||
|
||||
// Texture
|
||||
float4 v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby), INTERLACE_COUNT);
|
||||
float4 v = RenderSetTargetBitTex((float)RW(texframe.tw), (float)RH(dh), (float)RW(pfb->DBX), (float)RH(dby));
|
||||
|
||||
// dest rect
|
||||
v = RenderSetTargetBitPos(dh, texframe.th, movy, INTERLACE_COUNT);
|
||||
v = RenderSetTargetBitPos(dh, texframe.th, movy);
|
||||
v = RenderSetTargetBitTrans(ptarg->fbh);
|
||||
v = RenderSetTargetInvTex(bInterlace, texframe.tbw, ptarg->fbh, &ppsCRTCTarg[bInterlace]) ; // FIXME. This is no use
|
||||
v = RenderSetTargetInvTex(texframe.tbw, ptarg->fbh, CRTC_RENDER_TARG); // FIXME. This is no use
|
||||
|
||||
float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTCTarg[bInterlace]);
|
||||
float4 valpha = RenderGetForClip(texframe.psm, CRTC_RENDER_TARG);
|
||||
pps = curr_ppsCRTCTarg();
|
||||
|
||||
// inside vb[0]'s target area, so render that region only
|
||||
ZZshGLSetTextureParameter(ppsCRTCTarg[bInterlace].prog, ppsCRTCTarg[bInterlace].sFinal, ptarg->ptex, "CRTC target");
|
||||
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTCTarg[bInterlace]);
|
||||
ZZshGLSetTextureParameter(pps->prog, pps->sFinal, ptarg->ptex, "CRTC target");
|
||||
RenderCreateInterlaceTex(texframe.th, CRTC_RENDER_TARG);
|
||||
|
||||
ZZshSetPixelShader(ppsCRTCTarg[bInterlace].prog);
|
||||
ZZshSetPixelShader(pps->prog);
|
||||
|
||||
DrawTriangleArray();
|
||||
|
||||
if (abs(dh - (int)texframe.th) <= 1) return;
|
||||
if (abs(dh - (int)texframe.th) <= 1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (abs(dh - (int)ptarg->fbh) <= 1)
|
||||
{
|
||||
|
@ -571,14 +603,14 @@ inline void RenderCheckForTargets(tex0Info& texframe, list<CRenderTarget*>& list
|
|||
|
||||
++it;
|
||||
}
|
||||
RenderCheckForMemory(texframe, listTargs, i, bUsingStencil, interlace, bInterlace);
|
||||
RenderCheckForMemory(texframe, listTargs, circuit);
|
||||
}
|
||||
|
||||
|
||||
// The same as the previous, but from memory.
|
||||
// If you ever wondered why a picture from a minute ago suddenly flashes on the screen (say, in Mana Khemia),
|
||||
// this is the function that does it.
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int i, bool* bUsingStencil, int interlace, int bInterlace)
|
||||
inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listTargs, int circuit)
|
||||
{
|
||||
float4 v;
|
||||
|
||||
|
@ -588,9 +620,9 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
|
|||
}
|
||||
|
||||
// context has to be 0
|
||||
if (bInterlace >= 2) ZZLog::Error_Log("CRCR Check for memory shader fault.");
|
||||
if (interlace_mode >= 2) ZZLog::Error_Log("CRCR Check for memory shader fault.");
|
||||
|
||||
//if (!(*bUsingStencil)) RenderUpdateStencil(i, bUsingStencil);
|
||||
//if (!bUsingStencil) RenderUpdateStencil(i);
|
||||
|
||||
SetShaderCaller("RenderCheckForMemory");
|
||||
|
||||
|
@ -601,7 +633,7 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
|
|||
h1 = texframe.th;
|
||||
w2 = -0.5f;
|
||||
h2 = -0.5f;
|
||||
SetTexVariablesInt(0, 2, texframe, false, &ppsCRTC[bInterlace], 1);
|
||||
SetTexVariablesInt(0, 2, texframe, false, curr_ppsCRTC(), 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -609,24 +641,24 @@ inline void RenderCheckForMemory(tex0Info& texframe, list<CRenderTarget*>& listT
|
|||
h1 = 1;
|
||||
w2 = -0.5f / (float)texframe.tw;
|
||||
h2 = -0.5f / (float)texframe.th;
|
||||
SetTexVariablesInt(0, 0, texframe, false, &ppsCRTC[bInterlace], 1);
|
||||
SetTexVariablesInt(0, 0, texframe, false, curr_ppsCRTC(), 1);
|
||||
}
|
||||
|
||||
if (g_bSaveFinalFrame) SaveTex(&texframe, g_bSaveFinalFrame - 1 > 0);
|
||||
|
||||
// Fixme: Why is this here?
|
||||
// We should probably call RenderSetTargetBitTex instead.
|
||||
v = RenderSetTargetBitTex(w1, h1, w2, h2, INTERLACE_COUNT);
|
||||
v = RenderSetTargetBitTex(w1, h1, w2, h2);
|
||||
|
||||
// finally render from the memory (note that the stencil buffer will keep previous regions)
|
||||
v = RenderSetTargetBitPos(1, 1, 0, INTERLACE_COUNT);
|
||||
v = RenderSetTargetBitPos(1, 1, 0);
|
||||
v = RenderSetTargetBitTrans(texframe.th);
|
||||
v = RenderSetTargetInvTex(bInterlace, texframe.tw, texframe.th, &ppsCRTC[bInterlace]);
|
||||
float4 valpha = RenderGetForClip(bInterlace, interlace, texframe.psm, &ppsCRTC[bInterlace]);
|
||||
v = RenderSetTargetInvTex(texframe.tw, texframe.th, CRTC_RENDER);
|
||||
float4 valpha = RenderGetForClip(texframe.psm, CRTC_RENDER);
|
||||
|
||||
ZZshGLSetTextureParameter(ppsCRTC[bInterlace].prog, ppsCRTC[bInterlace].sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
|
||||
RenderCreateInterlaceTex(bInterlace, texframe.th, &ppsCRTC[bInterlace]);
|
||||
ZZshSetPixelShader(ppsCRTC[bInterlace].prog);
|
||||
ZZshGLSetTextureParameter(curr_ppsCRTC()->prog, curr_ppsCRTC()->sMemory, vb[0].pmemtarg->ptex->tex, "CRTC memory");
|
||||
RenderCreateInterlaceTex(texframe.th, CRTC_RENDER_TARG);
|
||||
ZZshSetPixelShader(curr_ppsCRTC()->prog);
|
||||
|
||||
DrawTriangleArray();
|
||||
}
|
||||
|
@ -657,7 +689,7 @@ inline void DisplayFPS()
|
|||
DrawText(str, left, top, 0xffc0ffff);
|
||||
}
|
||||
|
||||
// SnapeShoot helper
|
||||
// Snapshot helper
|
||||
inline void MakeSnapshot()
|
||||
{
|
||||
|
||||
|
@ -694,7 +726,7 @@ void ZZReset()
|
|||
s_nLastResolveReset = 0;
|
||||
|
||||
icurctx = -1;
|
||||
g_vsprog = g_psprog = 0;
|
||||
g_vsprog = g_psprog = sZero;
|
||||
|
||||
ZZGSStateReset();
|
||||
ZZDestroy();
|
||||
|
@ -759,12 +791,10 @@ inline void AfterRendererUnimportantJob()
|
|||
maxmin = 608;
|
||||
}
|
||||
|
||||
extern u32 s_uFramebuffer;
|
||||
|
||||
// Swich Framebuffers
|
||||
inline void AfterRendererSwitchBackToTextures()
|
||||
{
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer);
|
||||
FB::Bind();
|
||||
|
||||
g_MemTargs.DestroyCleared();
|
||||
|
||||
|
@ -815,18 +845,6 @@ inline void AfterRendererAutoresetTargets()
|
|||
|
||||
s_RTs.ResolveAll();
|
||||
return;
|
||||
// s_RTs.Destroy();
|
||||
// s_DepthRTs.ResolveAll();
|
||||
// s_DepthRTs.Destroy();
|
||||
//
|
||||
// vb[0].prndr = NULL;
|
||||
// vb[0].pdepth = NULL;
|
||||
// vb[0].bNeedFrameCheck = 1;
|
||||
// vb[0].bNeedZCheck = 1;
|
||||
// vb[1].prndr = NULL;
|
||||
// vb[1].pdepth = NULL;
|
||||
// vb[1].bNeedFrameCheck = 1;
|
||||
// vb[1].bNeedZCheck = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -840,49 +858,59 @@ inline void AfterRendererAutoresetTargets()
|
|||
}
|
||||
|
||||
int count = 0;
|
||||
|
||||
// The main renderer function
|
||||
void RenderCRTC(int interlace)
|
||||
void RenderCRTC()
|
||||
{
|
||||
if (FrameSkippingHelper()) return;
|
||||
|
||||
u32 bInterlace = SMODE2->INT && SMODE2->FFMD && (conf.interlace < 2);
|
||||
|
||||
RenderStartHelper(bInterlace);
|
||||
|
||||
bool bUsingStencil = false;
|
||||
tex0Info dispinfo[2];
|
||||
|
||||
FrameObtainDispinfo(bInterlace, dispinfo);
|
||||
if (FrameSkippingHelper()) return;
|
||||
|
||||
// If we are in frame mode and interlacing, and we haven't forced interlacing off, interlace_mode is 1.
|
||||
interlace_mode = SMODE2->INT && SMODE2->FFMD && (conf.interlace < 2);
|
||||
bUsingStencil = false;
|
||||
|
||||
RenderStartHelper();
|
||||
|
||||
FrameObtainDispinfo(dispinfo);
|
||||
|
||||
// start from the last circuit
|
||||
for (int i = !PMODE->SLBG; i >= 0; --i)
|
||||
{
|
||||
if (!Circuit_Enabled(i)) continue;
|
||||
tex0Info& texframe = dispinfo[i];
|
||||
|
||||
if (texframe.th <= 1) continue;
|
||||
// I don't think this is neccessary, now that we make sure the ciruit we are working with is enabled.
|
||||
/*if (texframe.th <= 1)
|
||||
{
|
||||
continue;
|
||||
}*/
|
||||
|
||||
if (SMODE2->INT && SMODE2->FFMD)
|
||||
{
|
||||
texframe.th >>= 1;
|
||||
|
||||
// Final Fantasy X-2 issue here.
|
||||
if (conf.interlace == 2 && texframe.th >= 512)
|
||||
/*if (conf.interlace == 2 && texframe.th >= 512)
|
||||
{
|
||||
texframe.th >>= 1;
|
||||
}*/
|
||||
}
|
||||
|
||||
if (i == 0) RenderSetupBlending();
|
||||
if (bUsingStencil) RenderSetupStencil(i);
|
||||
|
||||
if (texframe.psm == 0x12)
|
||||
/*if (texframe.psm == 0x12) // Probably broken - 0x12 isn't a valid psm. 24 bit is 1.
|
||||
{
|
||||
RenderCRTC24helper(bInterlace, interlace, texframe.psm);
|
||||
RenderCRTC24helper(texframe.psm);
|
||||
continue;
|
||||
}
|
||||
}*/
|
||||
|
||||
// We shader targets between two functions, so declare it here;
|
||||
list<CRenderTarget*> listTargs;
|
||||
|
||||
// if we could not draw image from target's do it from memory
|
||||
RenderCheckForTargets(texframe, listTargs, i, &bUsingStencil, interlace, bInterlace);
|
||||
// if we could not draw image from target's, do it from memory
|
||||
RenderCheckForTargets(texframe, listTargs, i);
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
|
|
@ -21,11 +21,8 @@
|
|||
#define ZZOGLCRTC_H_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "targets.h"
|
||||
|
||||
#define INTERLACE_COUNT (bInterlace && interlace == (conf.interlace))
|
||||
|
||||
#ifdef _WIN32
|
||||
extern HDC hDC; // Private GDI Device Context
|
||||
extern HGLRC hRC; // Permanent Rendering Context
|
||||
|
|
|
@ -35,42 +35,6 @@
|
|||
# include "Win32.h"
|
||||
#endif
|
||||
|
||||
//------------------ Defines
|
||||
|
||||
#ifdef _WIN32
|
||||
#define GL_LOADFN(name) { \
|
||||
if( (*(void**)&name = (void*)wglGetProcAddress(#name)) == NULL ) { \
|
||||
ZZLog::Error_Log("Failed to find %s, exiting.", #name); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
// let GLEW take care of it
|
||||
#define GL_LOADFN(name)
|
||||
#endif
|
||||
|
||||
#define GL_BLEND_RGB(src, dst) { \
|
||||
s_srcrgb = src; \
|
||||
s_dstrgb = dst; \
|
||||
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha); \
|
||||
}
|
||||
|
||||
#define GL_BLEND_ALPHA(src, dst) { \
|
||||
s_srcalpha = src; \
|
||||
s_dstalpha = dst; \
|
||||
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha); \
|
||||
}
|
||||
|
||||
#define GL_BLEND_ALL(srcrgb, dstrgb, srcalpha, dstalpha) { \
|
||||
s_srcrgb = srcrgb; \
|
||||
s_dstrgb = dstrgb; \
|
||||
s_srcalpha = srcalpha; \
|
||||
s_dstalpha = dstalpha; \
|
||||
zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha); \
|
||||
}
|
||||
|
||||
#define GL_BLEND_SET() zgsBlendFuncSeparateEXT(s_srcrgb, s_dstrgb, s_srcalpha, s_dstalpha)
|
||||
#define VB_NUMBUFFERS 512
|
||||
|
||||
// ----------------- Types
|
||||
typedef void (APIENTRYP _PFNSWAPINTERVAL)(int);
|
||||
|
||||
|
@ -81,8 +45,8 @@ extern bool ZZshLoadExtraEffects();
|
|||
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
|
||||
|
||||
GLuint vboRect = 0;
|
||||
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
||||
int g_nCurVBOIndex = 0;
|
||||
GLuint g_vboBuffers[VB_NUMBUFFERS]; // VBOs for all drawing commands
|
||||
u32 g_nCurVBOIndex = 0;
|
||||
|
||||
inline bool CreateImportantCheck();
|
||||
inline void CreateOtherCheck();
|
||||
|
@ -125,10 +89,10 @@ void (APIENTRY *zgsBlendFuncSeparateEXT)(GLenum, GLenum, GLenum, GLenum) = NULL;
|
|||
extern u8* s_lpShaderResources;
|
||||
|
||||
// String's for shader file in developer mode
|
||||
#ifdef ZEROGS_DEVBUILD
|
||||
//#ifdef ZEROGS_DEVBUILD
|
||||
char* EFFECT_NAME = "";
|
||||
char* EFFECT_DIR = "";
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
/////////////////////
|
||||
// graphics resources
|
||||
|
@ -143,12 +107,17 @@ GLenum g_internalRGBAFloat16Fmt = GL_RGBA_FLOAT16_ATI;
|
|||
u32 ptexLogo = 0;
|
||||
int nLogoWidth, nLogoHeight;
|
||||
u32 s_ptexInterlace = 0; // holds interlace fields
|
||||
static bool vb_buffer_allocated = false;
|
||||
|
||||
//------------------ Global Variables
|
||||
int GPU_TEXWIDTH = 512;
|
||||
float g_fiGPU_TEXWIDTH = 1/512.0f;
|
||||
int g_MaxTexWidth = 4096, g_MaxTexHeight = 4096;
|
||||
u32 s_uFramebuffer = 0;
|
||||
|
||||
namespace FB
|
||||
{
|
||||
u32 buf = 0;
|
||||
};
|
||||
|
||||
RasterFont* font_p = NULL;
|
||||
float g_fBlockMult = 1;
|
||||
|
@ -157,7 +126,7 @@ float g_fBlockMult = 1;
|
|||
u32 ptexBlocks = 0, ptexConv16to32 = 0; // holds information on block tiling
|
||||
u32 ptexBilinearBlocks = 0;
|
||||
u32 ptexConv32to16 = 0;
|
||||
int g_nDepthBias = 0;
|
||||
// int g_nDepthBias = 0;
|
||||
|
||||
extern void Delete_Avi_Capture();
|
||||
extern void ZZDestroy();
|
||||
|
@ -505,7 +474,12 @@ bool ZZCreate(int _width, int _height)
|
|||
GPU_TEXWIDTH = min (g_MaxTexWidth/8, 1024);
|
||||
g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH;
|
||||
|
||||
// FIXME: not clean maybe re integrate the function in shader files --greg
|
||||
#ifndef GLSL_API
|
||||
if (!CreateOpenShadersFile()) return false;
|
||||
#else
|
||||
if (!ZZshCreateOpenShadersFile()) return false;
|
||||
#endif
|
||||
|
||||
GL_REPORT_ERROR();
|
||||
|
||||
|
@ -520,16 +494,16 @@ bool ZZCreate(int _width, int _height)
|
|||
|
||||
if (err != GL_NO_ERROR) bSuccess = false;
|
||||
|
||||
glGenFramebuffersEXT(1, &s_uFramebuffer);
|
||||
FB::Create();
|
||||
|
||||
if (s_uFramebuffer == 0)
|
||||
if (FB::buf == 0)
|
||||
{
|
||||
ZZLog::Error_Log("Failed to create the renderbuffer.");
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer);
|
||||
FB::Bind();
|
||||
|
||||
DrawBuffers(s_drawbuffers);
|
||||
|
||||
|
@ -600,14 +574,15 @@ bool ZZCreate(int _width, int _height)
|
|||
|
||||
g_nCurVBOIndex = 0;
|
||||
|
||||
g_vboBuffers.resize(VB_NUMBUFFERS);
|
||||
glGenBuffers((GLsizei)g_vboBuffers.size(), &g_vboBuffers[0]);
|
||||
|
||||
for (int i = 0; i < (int)g_vboBuffers.size(); ++i)
|
||||
if (!vb_buffer_allocated) {
|
||||
glGenBuffers((GLsizei)ArraySize(g_vboBuffers), g_vboBuffers);
|
||||
for (int i = 0; i < ArraySize(g_vboBuffers); ++i)
|
||||
{
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_vboBuffers[i]);
|
||||
glBufferData(GL_ARRAY_BUFFER, 0x100*sizeof(VertexGPU), NULL, GL_STREAM_DRAW);
|
||||
}
|
||||
vb_buffer_allocated = true; // mark the buffer allocated
|
||||
}
|
||||
|
||||
GL_REPORT_ERROR();
|
||||
if (err != GL_NO_ERROR) bSuccess = false;
|
||||
|
@ -616,6 +591,11 @@ bool ZZCreate(int _width, int _height)
|
|||
g_fBlockMult = 1;
|
||||
bool do_not_use_billinear = false;
|
||||
|
||||
#ifndef ZZNORMAL_MEMORY
|
||||
FillAlowedPsnTable();
|
||||
FillBlockTables();
|
||||
#endif
|
||||
|
||||
vector<char> vBlockData, vBilinearData;
|
||||
BLOCK::FillBlocks(vBlockData, vBilinearData, 1);
|
||||
|
||||
|
@ -781,7 +761,7 @@ bool ZZCreate(int _width, int _height)
|
|||
// This was changed in SetAA - should we be changing it back?
|
||||
glPointSize(1.0f);
|
||||
|
||||
g_nDepthBias = 0;
|
||||
// g_nDepthBias = 0;
|
||||
|
||||
glEnable(GL_POLYGON_OFFSET_FILL);
|
||||
glEnable(GL_POLYGON_OFFSET_LINE);
|
||||
|
@ -791,7 +771,7 @@ bool ZZCreate(int _width, int _height)
|
|||
vb[0].Init(VB_BUFFERSIZE);
|
||||
vb[1].Init(VB_BUFFERSIZE);
|
||||
|
||||
g_vsprog = g_psprog = 0;
|
||||
g_vsprog = g_psprog = sZero;
|
||||
|
||||
if (glGetError() == GL_NO_ERROR)
|
||||
{
|
||||
|
@ -823,10 +803,10 @@ void ZZDestroy()
|
|||
vb[0].Destroy();
|
||||
vb[1].Destroy();
|
||||
|
||||
if (g_vboBuffers.size() > 0)
|
||||
if (vb_buffer_allocated)
|
||||
{
|
||||
glDeleteBuffers((GLsizei)g_vboBuffers.size(), &g_vboBuffers[0]);
|
||||
g_vboBuffers.clear();
|
||||
glDeleteBuffers((GLsizei)ArraySize(g_vboBuffers), g_vboBuffers);
|
||||
vb_buffer_allocated = false; // mark the buffer unallocated
|
||||
}
|
||||
|
||||
g_nCurVBOIndex = 0;
|
||||
|
@ -864,8 +844,8 @@ void ZZDestroy()
|
|||
SAFE_RELEASE_PROG(ppsCRTCTarg[1].prog);
|
||||
SAFE_RELEASE_PROG(ppsCRTC[0].prog);
|
||||
SAFE_RELEASE_PROG(ppsCRTC[1].prog);
|
||||
SAFE_RELEASE_PROG(ppsCRTC24[0].prog);
|
||||
SAFE_RELEASE_PROG(ppsCRTC24[1].prog);
|
||||
// SAFE_RELEASE_PROG(ppsCRTC24[0].prog);
|
||||
// SAFE_RELEASE_PROG(ppsCRTC24[1].prog);
|
||||
SAFE_RELEASE_PROG(ppsOne.prog);
|
||||
|
||||
safe_delete(font_p);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,123 @@
|
|||
/* ZZ Open GL graphics plugin
|
||||
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
|
||||
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef ZZOGLFLUSH_H_INCLUDED
|
||||
#define ZZOGLFLUSH_H_INCLUDED
|
||||
|
||||
#ifndef ZEROGS_DEVBUILD
|
||||
|
||||
#define INC_GENVARS()
|
||||
#define INC_TEXVARS()
|
||||
#define INC_ALPHAVARS()
|
||||
#define INC_RESOLVE()
|
||||
|
||||
#define g_bUpdateEffect 0
|
||||
#define g_bSaveTex 0
|
||||
#define g_bSaveResolved 0
|
||||
|
||||
#else // defined(ZEROGS_DEVBUILD)
|
||||
|
||||
#define INC_GENVARS() ++g_nGenVars
|
||||
#define INC_TEXVARS() ++g_nTexVars
|
||||
#define INC_ALPHAVARS() ++g_nAlphaVars
|
||||
#define INC_RESOLVE() ++g_nResolve
|
||||
|
||||
extern bool g_bUpdateEffect;
|
||||
extern bool g_bSaveTex; // saves the current texture
|
||||
extern bool g_bSaveResolved;
|
||||
#endif // !defined(ZEROGS_DEVBUILD)
|
||||
|
||||
enum StencilBits
|
||||
{
|
||||
STENCIL_ALPHABIT = 1, // if set, dest alpha >= 0x80
|
||||
STENCIL_PIXELWRITE = 2, // if set, pixel just written (reset after every Flush)
|
||||
STENCIL_FBA = 4, // if set, just written pixel's alpha >= 0 (reset after every Flush)
|
||||
STENCIL_SPECIAL = 8 // if set, indicates that pixel passed its alpha test (reset after every Flush)
|
||||
//STENCIL_PBE = 16
|
||||
};
|
||||
#define STENCIL_CLEAR (2|4|8|16)
|
||||
|
||||
enum ColorMask
|
||||
{
|
||||
COLORMASK_RED = 1,
|
||||
COLORMASK_GREEN = 2,
|
||||
COLORMASK_BLUE = 4,
|
||||
COLORMASK_ALPHA = 8
|
||||
|
||||
};
|
||||
#define GL_COLORMASK(mask) glColorMask(!!((mask)&COLORMASK_RED), !!((mask)&COLORMASK_GREEN), !!((mask)&COLORMASK_BLUE), !!((mask)&COLORMASK_ALPHA))
|
||||
|
||||
// extern int g_nDepthBias;
|
||||
extern float g_fBlockMult; // used for old cards, that do not support Alpha-32float textures. We store block data in u16 and use it.
|
||||
extern u32 g_nCurVBOIndex;
|
||||
extern u8* g_pbyGSClut;
|
||||
extern int ppf;
|
||||
|
||||
extern bool s_bTexFlush;
|
||||
|
||||
extern vector<u32> s_vecTempTextures; // temporary textures, released at the end of every frame
|
||||
extern GLuint g_vboBuffers[VB_NUMBUFFERS]; // VBOs for all drawing commands
|
||||
extern CRangeManager s_RangeMngr; // manages overwritten memory // zz
|
||||
|
||||
#if 0
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
u8 _bNeedAlphaColor; // set if vAlphaBlendColor needs to be set
|
||||
u8 _b2XAlphaTest; // Only valid when bNeedAlphaColor is set. if 1st bit set set, double all alpha testing values
|
||||
// otherwise alpha testing needs to be done separately.
|
||||
u8 _bDestAlphaColor; // set to 1 if blending with dest color (process only one tri at a time). If 2, dest alpha is always 1.
|
||||
u8 _bAlphaClamping; // if first bit is set, do min; if second bit, do max
|
||||
};
|
||||
|
||||
u32 _bAlphaState;
|
||||
} g_flag_vars;
|
||||
|
||||
extern g_flag_vars g_vars;
|
||||
#endif
|
||||
|
||||
//#define bNeedAlphaColor g_vars._bNeedAlphaColor
|
||||
//#define b2XAlphaTest g_vars._b2XAlphaTest
|
||||
//#define bDestAlphaColor g_vars._bDestAlphaColor
|
||||
//#define bAlphaClamping g_vars._bAlphaClamping
|
||||
|
||||
void FlushTransferRanges(const tex0Info* ptex); //zz
|
||||
|
||||
// use to update the state
|
||||
void SetTexVariables(int context, FRAGMENTSHADER* pfragment); // zz
|
||||
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint); // zz
|
||||
void SetAlphaVariables(const alphaInfo& ainfo); // zzz
|
||||
//void ResetAlphaVariables();
|
||||
|
||||
inline void SetAlphaTestInt(pixTest curtest);
|
||||
|
||||
inline void RenderAlphaTest(const VB& curvb, ZZshParameter sOneColor);
|
||||
inline void RenderStencil(const VB& curvb, u32 dwUsingSpecialTesting);
|
||||
inline void ProcessStencil(const VB& curvb);
|
||||
inline void RenderFBA(const VB& curvb, ZZshParameter sOneColor);
|
||||
inline void ProcessFBA(const VB& curvb, ZZshParameter sOneColor); // zz
|
||||
|
||||
void SetContextTarget(int context);
|
||||
|
||||
void SetWriteDepth();
|
||||
bool IsWriteDepth();
|
||||
void SetDestAlphaTest();
|
||||
|
||||
#endif // ZZOGLFLUSH_H_INCLUDED
|
|
@ -212,7 +212,7 @@ typedef Vector4<float> float4;
|
|||
|
||||
// Reimplement, swiping a bunch of code from GSdx and adapting it. (specifically GSVector.h)
|
||||
// This doesn't include more then half of the functions in there, as well as some of the structs...
|
||||
#include <xmmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "Pcsx2Types.h"
|
||||
|
||||
|
|
|
@ -0,0 +1,564 @@
|
|||
/* ZeroGS KOSMOS
|
||||
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include "GS.h"
|
||||
#include "Util.h"
|
||||
#include "ZZoglMem.h"
|
||||
#include "targets.h"
|
||||
#include "x86.h"
|
||||
|
||||
#include "Mem_Swizzle.h"
|
||||
|
||||
#ifndef ZZNORMAL_MEMORY
|
||||
|
||||
bool allowed_psm[256] = {false, }; // Sometimes we got strange unknown psm
|
||||
PSM_value PSM_value_Table[64] = {PSMT_BAD_PSM, }; // for int -> PSM_value
|
||||
|
||||
// return array of pointer of array string,
|
||||
// We SHOULD do memory allocation for u32** -- otherwize we have a lot of trouble!
|
||||
// if bw and bh are set correctly, as dimensions of table, than array have pointers
|
||||
// to table rows, so array[i][j] = table[i][j];
|
||||
inline u32** InitTable(int bh, int bw, u32* table) {
|
||||
u32** array = (u32**)malloc(bh * sizeof(u32*));
|
||||
for (int i = 0; i < bh; i++) {
|
||||
array[i] = &table[i * bw];
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
// initialize dynamic arrays (u32**) for each regular psm.
|
||||
inline void SetTable(int psm) {
|
||||
switch (psm) {
|
||||
case PSMCT32:
|
||||
g_pageTable[psm] = InitTable( 32, 64, &g_pageTable32[0][0]);
|
||||
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
||||
break;
|
||||
|
||||
case PSMCT24:
|
||||
g_pageTable[psm] = g_pageTable[PSMCT32];;
|
||||
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
||||
break;
|
||||
|
||||
case PSMCT16:
|
||||
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16[0][0]);
|
||||
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16S[0][0]);
|
||||
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16S[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT8:
|
||||
g_pageTable[psm] = InitTable( 64, 128, &g_pageTable8[0][0]);
|
||||
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable8[0][0]);
|
||||
g_columnTable[psm] = InitTable( 16, 16, &g_columnTable8[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT8H:
|
||||
g_pageTable[psm] = g_pageTable[PSMCT32];
|
||||
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable8[0][0]);
|
||||
g_columnTable[psm] = InitTable( 16, 16, &g_columnTable8[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT4:
|
||||
g_pageTable[psm] = InitTable(128, 128, &g_pageTable4[0][0]);
|
||||
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable4[0][0]);
|
||||
g_columnTable[psm] = InitTable( 16, 32, &g_columnTable4[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
case PSMT4HH:
|
||||
g_pageTable[psm] = g_pageTable[PSMCT32];
|
||||
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable4[0][0]);
|
||||
g_columnTable[psm] = InitTable( 16, 32, &g_columnTable4[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT32Z:
|
||||
g_pageTable[psm] = InitTable( 32, 64, &g_pageTable32Z[0][0]);
|
||||
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32Z[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT24Z:
|
||||
g_pageTable[psm] = g_pageTable[PSMT32Z];
|
||||
g_blockTable[psm] = InitTable( 4, 8, &g_blockTable32Z[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 8, &g_columnTable32[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16Z[0][0]);
|
||||
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16Z[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
g_pageTable[psm] = InitTable( 64, 64, &g_pageTable16SZ[0][0]);
|
||||
g_blockTable[psm] = InitTable( 8, 4, &g_blockTable16SZ[0][0]);
|
||||
g_columnTable[psm] = InitTable( 8, 16, &g_columnTable16[0][0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// After this, the function arrays with u32** have memory set and filled.
|
||||
void FillBlockTables() {
|
||||
for (int i = 0; i < MAX_PSM; i++)
|
||||
SetTable(i);
|
||||
}
|
||||
|
||||
// Deallocate memory for u32** arrays.
|
||||
void DestroyBlockTables() {
|
||||
for (int i = 0; i < MAX_PSM; i++) {
|
||||
if (g_pageTable[i] != NULL && (i != PSMT8H && i != PSMT4HL && i != PSMT4HH && i != PSMCT24 && i != PSMT24Z))
|
||||
free(g_pageTable[i]);
|
||||
|
||||
if (g_blockTable[i] != NULL)
|
||||
free(g_blockTable[i]);
|
||||
|
||||
if (g_columnTable[i] != NULL)
|
||||
free(g_columnTable[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void FillNewPageTable() {
|
||||
int k = 0;
|
||||
for (int psm = 0; psm < MAX_PSM; psm ++)
|
||||
if (allowed_psm[psm]) {
|
||||
for (u32 i = 0; i < 127; i++)
|
||||
for(u32 j = 0; j < 127; j++) {
|
||||
u32 address;
|
||||
u32 shift;
|
||||
|
||||
address = g_pageTable[psm][i & ZZ_DT[psm][3]][j & ZZ_DT[psm][4]];
|
||||
shift = (((address << ZZ_DT[psm][5]) & 0x7 ) << 3)+ ZZ_DT[psm][7]; // last part is for 8H, 4HL and 4HH -- they have data from 24 and 28 byte
|
||||
g_pageTable2[k][i][j] = (address >> ZZ_DT[psm][0]) + (shift << 16); // now lower 16 byte of page table is 32-bit aligned address, and upper --
|
||||
// shift.
|
||||
}
|
||||
g_pageTableNew[psm] = InitTable( 128, 128, &g_pageTable2[k][0][0]);
|
||||
k++;;
|
||||
}
|
||||
}
|
||||
|
||||
BLOCK m_Blocks[MAX_PSM]; // Do so that blocks are indexable.
|
||||
|
||||
// At the begining and the end of each string we should made unaligned writes, with nSize checks. We should be sure that all
|
||||
// these pixels are inside one widthlimit space.
|
||||
template <int psm>
|
||||
inline bool DoOneTransmitStep(void* pstart, int& nSize, int endj, const void* pbuf, int& k, int& i, int& j, int widthlimit) {
|
||||
for (; j < endj && nSize > 0; j++, k++, nSize -= 1) {
|
||||
writePixelMem<psm, false>((u32*)pstart, j%2048, i%2048, (u32*)(pbuf), k, gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
return (nSize == 0);
|
||||
}
|
||||
|
||||
// FFX has PSMT8 transmit (starting intro -- sword and hairs).
|
||||
// Persona 4 texts at start are PSMCT32 (and there is also PSMCT16 transmit somwhere after that).
|
||||
// Tekken V has PSMCT24 and PSMT4 transfers
|
||||
|
||||
// This function transfers "Y" block pixels. I use little another code than Zerofrog. My code often uses widthmult != 1 addition (Zerofrog's code
|
||||
// have an strict condition for fast path: width of transferred data should be widthlimit multiplied by j; EndY also should be multiplied. But
|
||||
// the usual data block of 255 pixels becomes transfered by 1.
|
||||
// I should check, maybe Unaligned_Start and Unaligned_End often == 0, and I could try a fastpath -- with this block off.
|
||||
template <int psm, int widthlimit>
|
||||
inline bool TRANSMIT_HOSTLOCAL_Y(u32* pbuf, int& nSize, u8* pstart, int endY, int& i, int& j, int& k) {
|
||||
// if (psm != PSMT8 && psm != 0 && psm != PSMT4 && psm != PSMCT24)
|
||||
// ERROR_LOG("This is usable function TRANSMIT_HOSTLOCAL_Y at ZZoglMem.cpp %d %d %d %d %d\n", psm, widthlimit, i, j, nSize);
|
||||
|
||||
int q = (gs.trxpos.dx - j) % widthlimit;
|
||||
if (DoOneTransmitStep<psm>(pstart, nSize, q, pbuf, k, i, j, widthlimit)) return true; // After this j and dx are compatible by modyle of widthlimit
|
||||
|
||||
int Unaligned_Start = (gs.trxpos.dx % widthlimit == 0) ? 0 : widthlimit - gs.trxpos.dx % widthlimit; // gs.trpos.dx + Unaligned_Start is multiple of widthlimit
|
||||
for (; i < endY; ++i) {
|
||||
if (DoOneTransmitStep<psm>(pstart, nSize, j + Unaligned_Start, pbuf, k, i, j, widthlimit)) return true; // This operation made j % widthlimit == 0.
|
||||
//assert (j % widthlimit != 0);
|
||||
|
||||
for (; j < gs.imageEnd.x - widthlimit + 1 && nSize >= widthlimit; j += widthlimit, nSize -= widthlimit) {
|
||||
writePixelsFromMemory<psm, true, widthlimit>(pstart, pbuf, k, j % 2048, i % 2048, gs.dstbuf.bw);
|
||||
}
|
||||
|
||||
assert ( gs.imageEnd.x - j < widthlimit || nSize < widthlimit);
|
||||
if (DoOneTransmitStep<psm>(pstart, nSize, gs.imageEnd.x, pbuf, k, i, j, widthlimit)) return true; // There are 2 reasons for finish of previous for: 1) nSize < widthlimit
|
||||
// 2) j > gs.imageEnd.x - widthlimit + 1. We would try to write pixels up do
|
||||
// EndX, it's no more widthlimit pixels
|
||||
j = gs.trxpos.dx;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// PSMT4 -- Tekken V
|
||||
template <int psm, int widthlimit>
|
||||
inline void TRANSMIT_HOSTLOCAL_X(u32* pbuf, int& nSize, u8* pstart, int& i, int& j, int& k, int blockheight, int startX, int pitch, int fracX) {
|
||||
if (psm != PSMT8 && psm != PSMT4)
|
||||
ZZLog::Error_Log("This is usable function TRANSMIT_HOSTLOCAL_X at ZZoglMem.cpp %d %d %d %d %d\n", psm, widthlimit, i, j, nSize);
|
||||
|
||||
for(int tempi = 0; tempi < blockheight; ++tempi) {
|
||||
for(j = startX; j < gs.imageEnd.x; j++, k++) {
|
||||
writePixelMem<psm, false>((u32*)pstart, j%2048, (i + tempi)%2048, (u32*)(pbuf), k, gs.dstbuf.bw);
|
||||
}
|
||||
k += ( pitch - fracX );
|
||||
}
|
||||
}
|
||||
|
||||
template <int psm>
|
||||
inline int TRANSMIT_PITCH(int pitch) {
|
||||
return (PSM_BITS_PER_PIXEL<psm>() * pitch) >> 3;
|
||||
}
|
||||
|
||||
// ------------------------
|
||||
// | Y |
|
||||
// ------------------------
|
||||
// | block | |
|
||||
// | aligned area | X |
|
||||
// | | |
|
||||
// ------------------------
|
||||
// | Y |
|
||||
// ------------------------
|
||||
|
||||
|
||||
template <int psmX>
|
||||
int FinishTransfer(int i, int j, int nSize, int nLeftOver)
|
||||
{
|
||||
if( i >= gs.imageEnd.y )
|
||||
{
|
||||
assert( gs.transferring == false || i == gs.imageEnd.y );
|
||||
gs.transferring = false;
|
||||
}
|
||||
else {
|
||||
/* update new params */
|
||||
gs.image.y = i;
|
||||
gs.image.x = j;
|
||||
}
|
||||
|
||||
return (nSize * TRANSMIT_PITCH<psmX>(2) + nLeftOver)/2;
|
||||
}
|
||||
|
||||
template<int psmX, int widthlimit, int blockbits, int blockwidth, int blockheight>
|
||||
int TransferHostLocal(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
assert( gs.imageTransfer == XFER_HOST_TO_LOCAL );
|
||||
u8* pstart = g_pbyGSMemory + gs.dstbuf.bp*256;
|
||||
|
||||
int i = gs.image.y, j = gs.image.x;
|
||||
|
||||
const u8* pbuf = (const u8*)pbyMem;
|
||||
int nLeftOver = (nQWordSize*4*2)%(TRANSMIT_PITCH<psmX>(2));
|
||||
int nSize = nQWordSize*4*2/TRANSMIT_PITCH<psmX>(2);
|
||||
nSize = min(nSize, gs.imageNew.w * gs.imageNew.h);
|
||||
|
||||
int pitch, area, fracX;
|
||||
int endY = ROUND_UPPOW2(i, blockheight);
|
||||
Point alignedPt;
|
||||
|
||||
alignedPt.x = ROUND_DOWNPOW2(gs.imageEnd.x, blockwidth);
|
||||
alignedPt.y = ROUND_DOWNPOW2(gs.imageEnd.y, blockheight);
|
||||
|
||||
bool bAligned;
|
||||
bool bCanAlign = MOD_POW2(gs.trxpos.dx, blockwidth) == 0 && (j == gs.trxpos.dx) && (alignedPt.y > endY) && alignedPt.x > gs.trxpos.dx;
|
||||
|
||||
if( (gs.imageEnd.x - gs.trxpos.dx) % widthlimit ) {
|
||||
/* hack */
|
||||
int testwidth = (int)nSize - (gs.imageEnd.y - i) * (gs.imageEnd.x - gs.trxpos.dx) + (j - gs.trxpos.dx);
|
||||
if((testwidth <= widthlimit) && (testwidth >= -widthlimit)) {
|
||||
/* don't transfer */
|
||||
/*ZZLog::Debug_Log("bad texture %s: %d %d %d\n", #psm, gs.trxpos.dx, gs.imageEnd.x, nQWordSize);*/
|
||||
gs.transferring = false;
|
||||
}
|
||||
bCanAlign = false;
|
||||
}
|
||||
|
||||
/* first align on block boundary */
|
||||
if( MOD_POW2(i, blockheight) || !bCanAlign ) {
|
||||
|
||||
if( !bCanAlign )
|
||||
endY = gs.imageEnd.y; /* transfer the whole image */
|
||||
else
|
||||
assert( endY < gs.imageEnd.y); /* part of alignment condition */
|
||||
|
||||
int limit = widthlimit;
|
||||
if (((gs.imageEnd.x - gs.trxpos.dx) % widthlimit) || ((gs.imageEnd.x - j) % widthlimit))
|
||||
/* transmit with a width of 1 */
|
||||
limit = 1 + (gs.dstbuf.psm == PSMT4);
|
||||
/*TRANSMIT_HOSTLOCAL_Y##TransSfx(psm, T, limit, endY)*/
|
||||
int k = 0;
|
||||
|
||||
if (TRANSMIT_HOSTLOCAL_Y<psmX, widthlimit>((u32*)pbuf, nSize, pstart, endY, i, j, k))
|
||||
return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
|
||||
|
||||
pbuf += TRANSMIT_PITCH<psmX>(k);
|
||||
|
||||
if (nSize == 0 || i == gs.imageEnd.y) return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
|
||||
}
|
||||
|
||||
assert( MOD_POW2(i, blockheight) == 0 && j == gs.trxpos.dx);
|
||||
|
||||
/* can align! */
|
||||
pitch = gs.imageEnd.x - gs.trxpos.dx;
|
||||
area = pitch * blockheight;
|
||||
fracX = gs.imageEnd.x - alignedPt.x;
|
||||
|
||||
/* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
|
||||
bAligned = !((uptr)pbuf & 0xf) && ((TRANSMIT_PITCH<psmX>(pitch)&0xf) == 0);
|
||||
|
||||
/* transfer aligning to blocks */
|
||||
for(; i < alignedPt.y && nSize >= area; i += blockheight, nSize -= area) {
|
||||
|
||||
for(int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += blockwidth, pbuf += TRANSMIT_PITCH<psmX>(blockwidth)) {
|
||||
SwizzleBlock<psmX>((u32*)(pstart + getPixelAddress<psmX>(tempj, i, gs.dstbuf.bw)*blockbits/8),
|
||||
(u32*)pbuf, TRANSMIT_PITCH<psmX>(pitch));
|
||||
}
|
||||
|
||||
/* transfer the rest */
|
||||
if( alignedPt.x < gs.imageEnd.x ) {
|
||||
int k = 0;
|
||||
TRANSMIT_HOSTLOCAL_X<psmX, widthlimit>((u32*)pbuf, nSize, pstart, i, j, k, blockheight, alignedPt.x, pitch, fracX);
|
||||
pbuf += TRANSMIT_PITCH<psmX>(k - alignedPt.x + gs.trxpos.dx);
|
||||
}
|
||||
else pbuf += (blockheight-1)*TRANSMIT_PITCH<psmX>(pitch);
|
||||
j = gs.trxpos.dx;
|
||||
}
|
||||
|
||||
if( TRANSMIT_PITCH<psmX>(nSize)/4 > 0 ) {
|
||||
int k = 0;
|
||||
TRANSMIT_HOSTLOCAL_Y<psmX, widthlimit>((u32*)pbuf, nSize, pstart, gs.imageEnd.y, i, j, k);
|
||||
pbuf += TRANSMIT_PITCH<psmX>(k);
|
||||
/* sometimes wrong sizes are sent (tekken tag) */
|
||||
assert( gs.transferring == false || TRANSMIT_PITCH<psmX>(nSize)/4 <= 2 );
|
||||
}
|
||||
|
||||
return FinishTransfer<psmX>(i, j, nSize, nLeftOver);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal32(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMCT32, 2, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT32Z, 2, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal24(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMCT24, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT24Z, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal16(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMCT16, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal16S(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMCT16S, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal16Z(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT16Z, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal16SZ(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT16SZ, 4, 16, 16, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal8(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT8, 4, 8, 16, 16>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal4(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT4, 8, 4, 32, 16>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal8H(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT8H, 4, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal4HL(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT4HL, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
inline int TransferHostLocal4HH(const void* pbyMem, u32 nQWordSize)
|
||||
{
|
||||
return TransferHostLocal<PSMT4HH, 8, 32, 8, 8>( pbyMem, nQWordSize);
|
||||
}
|
||||
|
||||
void TransferLocalHost32(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost24(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost16(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost16S(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost8(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost4(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost8H(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost4HL(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost4HH(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost32Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) { FUNCLOG }
|
||||
|
||||
inline void FILL_BLOCK(BLOCK& b, int floatfmt, vector<char>& vBlockData, vector<char>& vBilinearData, int ox, int oy, int psmX) {
|
||||
int bw = ZZ_DT[psmX][4] + 1;
|
||||
int bh = ZZ_DT[psmX][3] + 1;
|
||||
int mult = 1 << ZZ_DT[psmX][0];
|
||||
|
||||
b.vTexDims = float4 (BLOCK_TEXWIDTH/(float)(bw), BLOCK_TEXHEIGHT/(float)(bh), 0, 0);
|
||||
b.vTexBlock = float4( (float)bw/BLOCK_TEXWIDTH, (float)bh/BLOCK_TEXHEIGHT, ((float)ox+0.2f)/BLOCK_TEXWIDTH, ((float)oy+0.05f)/BLOCK_TEXHEIGHT);
|
||||
b.width = bw;
|
||||
b.height = bh;
|
||||
b.colwidth = bh / 4;
|
||||
b.colheight = bw / 8;
|
||||
b.bpp = 32/mult;
|
||||
|
||||
b.pageTable = g_pageTable[psmX];
|
||||
b.blockTable = g_blockTable[psmX];
|
||||
b.columnTable = g_columnTable[psmX];
|
||||
|
||||
// This is never true.
|
||||
//assert( sizeof(g_pageTable[psmX]) == bw*bh*sizeof(g_pageTable[psmX][0][0]) );
|
||||
float* psrcf = (float*)&vBlockData[0] + ox + oy * BLOCK_TEXWIDTH;
|
||||
u16* psrcw = (u16*)&vBlockData[0] + ox + oy * BLOCK_TEXWIDTH;
|
||||
for(int i = 0; i < bh; ++i) {
|
||||
for(int j = 0; j < bw; ++j) {
|
||||
/* fill the table */
|
||||
u32 u = g_blockTable[psmX][(i / b.colheight)][(j / b.colwidth)] * 64 * mult + g_columnTable[psmX][i%b.colheight][j%b.colwidth];
|
||||
b.pageTable[i][j] = u;
|
||||
if( floatfmt ) {
|
||||
psrcf[i*BLOCK_TEXWIDTH+j] = (float)(u) / (float)(GPU_TEXWIDTH*mult);
|
||||
}
|
||||
else {
|
||||
psrcw[i*BLOCK_TEXWIDTH+j] = u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( floatfmt ) {
|
||||
float4* psrcv = (float4*)&vBilinearData[0] + ox + oy * BLOCK_TEXWIDTH;
|
||||
for(int i = 0; i < bh; ++i) {
|
||||
for(int j = 0; j < bw; ++j) {
|
||||
float4* pv = &psrcv[i*BLOCK_TEXWIDTH+j];
|
||||
pv->x = psrcf[i*BLOCK_TEXWIDTH+j];
|
||||
pv->y = psrcf[i*BLOCK_TEXWIDTH+((j+1)%bw)];
|
||||
pv->z = psrcf[((i+1)%bh)*BLOCK_TEXWIDTH+j];
|
||||
pv->w = psrcf[((i+1)%bh)*BLOCK_TEXWIDTH+((j+1)%bw)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
||||
{
|
||||
FUNCLOG
|
||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * (floatfmt ? 4 : 2));
|
||||
|
||||
if (floatfmt)
|
||||
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
|
||||
|
||||
BLOCK b;
|
||||
|
||||
memset(m_Blocks, 0, sizeof(m_Blocks));
|
||||
|
||||
// 32
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 0, PSMCT32);
|
||||
b.TransferHostLocal = TransferHostLocal32;
|
||||
b.TransferLocalHost = TransferLocalHost32;
|
||||
m_Blocks[PSMCT32] = b;
|
||||
|
||||
// 24 (same as 32 except write/readPixel are different)
|
||||
b.TransferHostLocal = TransferHostLocal24;
|
||||
b.TransferLocalHost = TransferLocalHost24;
|
||||
m_Blocks[PSMCT24] = b;
|
||||
|
||||
// 8H (same as 32 except write/readPixel are different)
|
||||
b.TransferHostLocal = TransferHostLocal8H;
|
||||
b.TransferLocalHost = TransferLocalHost8H;
|
||||
m_Blocks[PSMT8H] = b;
|
||||
|
||||
b.TransferHostLocal = TransferHostLocal4HL;
|
||||
b.TransferLocalHost = TransferLocalHost4HL;
|
||||
m_Blocks[PSMT4HL] = b;
|
||||
|
||||
b.TransferHostLocal = TransferHostLocal4HH;
|
||||
b.TransferLocalHost = TransferLocalHost4HH;
|
||||
m_Blocks[PSMT4HH] = b;
|
||||
|
||||
// 32z
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 0, PSMT32Z);
|
||||
b.TransferHostLocal = TransferHostLocal32Z;
|
||||
b.TransferLocalHost = TransferLocalHost32Z;
|
||||
m_Blocks[PSMT32Z] = b;
|
||||
|
||||
// 24Z (same as 32Z except write/readPixel are different)
|
||||
b.TransferHostLocal = TransferHostLocal24Z;
|
||||
b.TransferLocalHost = TransferLocalHost24Z;
|
||||
m_Blocks[PSMT24Z] = b;
|
||||
|
||||
// 16
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 32, PSMCT16);
|
||||
b.TransferHostLocal = TransferHostLocal16;
|
||||
b.TransferLocalHost = TransferLocalHost16;
|
||||
m_Blocks[PSMCT16] = b;
|
||||
|
||||
// 16s
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 32, PSMCT16S);
|
||||
b.TransferHostLocal = TransferHostLocal16S;
|
||||
b.TransferLocalHost = TransferLocalHost16S;
|
||||
m_Blocks[PSMCT16S] = b;
|
||||
|
||||
// 16z
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 96, PSMT16Z);
|
||||
b.TransferHostLocal = TransferHostLocal16Z;
|
||||
b.TransferLocalHost = TransferLocalHost16Z;
|
||||
m_Blocks[PSMT16Z] = b;
|
||||
|
||||
// 16sz
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 64, 96, PSMT16SZ);
|
||||
b.TransferHostLocal = TransferHostLocal16SZ;
|
||||
b.TransferLocalHost = TransferLocalHost16SZ;
|
||||
m_Blocks[PSMT16SZ] = b;
|
||||
|
||||
// 8
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 160, PSMT8);
|
||||
b.TransferHostLocal = TransferHostLocal8;
|
||||
b.TransferLocalHost = TransferLocalHost8;
|
||||
m_Blocks[PSMT8] = b;
|
||||
|
||||
// 4
|
||||
FILL_BLOCK(b, floatfmt, vBlockData, vBilinearData, 0, 224, PSMT4);
|
||||
b.TransferHostLocal = TransferHostLocal4;
|
||||
b.TransferLocalHost = TransferLocalHost4;
|
||||
m_Blocks[PSMT4] = b;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,790 @@
|
|||
/* ZeroGS KOSMOS
|
||||
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef __ZZOGL_MEM_H__
|
||||
#define __ZZOGL_MEM_H__
|
||||
|
||||
#include <assert.h>
|
||||
#include <vector>
|
||||
#include "GS.h"
|
||||
#include "Util.h"
|
||||
#include "Mem.h"
|
||||
|
||||
#ifndef ZZNORMAL_MEMORY
|
||||
|
||||
extern u32 g_blockTable32[4][8];
|
||||
extern u32 g_blockTable32Z[4][8];
|
||||
extern u32 g_blockTable16[8][4];
|
||||
extern u32 g_blockTable16S[8][4];
|
||||
|
||||
extern u32 g_blockTable16Z[8][4];
|
||||
|
||||
extern u32 g_blockTable16SZ[8][4];
|
||||
|
||||
extern u32 g_blockTable8[4][8];
|
||||
extern u32 g_blockTable4[8][4];
|
||||
|
||||
extern u32 g_columnTable32[8][8];
|
||||
extern u32 g_columnTable16[8][16];
|
||||
extern u32 g_columnTable8[16][16];
|
||||
extern u32 g_columnTable4[16][32];
|
||||
|
||||
//--
|
||||
|
||||
extern u32 g_pageTable32[32][64];
|
||||
extern u32 g_pageTable32Z[32][64];
|
||||
extern u32 g_pageTable16[64][64];
|
||||
extern u32 g_pageTable16S[64][64];
|
||||
extern u32 g_pageTable16Z[64][64];
|
||||
extern u32 g_pageTable16SZ[64][64];
|
||||
extern u32 g_pageTable8[64][128];
|
||||
extern u32 g_pageTable4[128][128];
|
||||
|
||||
|
||||
//maximum PSM is 58, so our arrays have 58 + 1 = 59 elements
|
||||
|
||||
// This table is used for fast access to memory storage data.
|
||||
extern u32 ZZ_DT[MAX_PSM][TABLE_WIDTH];
|
||||
|
||||
|
||||
//maxium PSM is 58, so our arrays have 58 + 1 = 59 elements
|
||||
extern u32** g_pageTable[MAX_PSM];
|
||||
extern u32** g_blockTable[MAX_PSM];
|
||||
extern u32** g_columnTable[MAX_PSM];
|
||||
extern u32 g_pageTable2[MAX_PSM][127][127];
|
||||
extern u32** g_pageTableNew[MAX_PSM];
|
||||
|
||||
// rest not visible externally
|
||||
struct BLOCK
|
||||
{
|
||||
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
|
||||
|
||||
// shader constants for this block
|
||||
float4 vTexBlock;
|
||||
float4 vTexDims;
|
||||
int width, height; // dims of one page in pixels
|
||||
int bpp;
|
||||
int colwidth, colheight;
|
||||
u32** pageTable; // offset inside each page
|
||||
u32** blockTable;
|
||||
u32** columnTable;
|
||||
|
||||
// Nobody use this, so we better remove it.
|
||||
// u32 (*getPixelAddress)(int x, int y, u32 bp, u32 bw);
|
||||
// u32 (*getPixelAddress_0)(int x, int y, u32 bw);
|
||||
// void (*writePixel)(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw);
|
||||
// void (*writePixel_0)(void* pmem, int x, int y, u32 pixel, u32 bw);
|
||||
// u32 (*readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw);
|
||||
// u32 (*readPixel_0)(const void* pmem, int x, int y, u32 bw);
|
||||
int (*TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
|
||||
void (*TransferLocalHost)(void* pbyMem, u32 nQWordSize);
|
||||
|
||||
// texture must be of dims BLOCK_TEXWIDTH and BLOCK_TEXHEIGHT
|
||||
static void FillBlocks(std::vector<char>& vBlockData, std::vector<char>& vBilinearData, int floatfmt);
|
||||
};
|
||||
|
||||
void FillBlockTables();
|
||||
void DestroyBlockTables();
|
||||
void FillNewPageTable();
|
||||
|
||||
extern BLOCK m_Blocks[];
|
||||
|
||||
extern u32 g_blockTable32[4][8];
|
||||
extern u32 g_blockTable32Z[4][8];
|
||||
extern u32 g_blockTable16[8][4];
|
||||
extern u32 g_blockTable16S[8][4];
|
||||
extern u32 g_blockTable16Z[8][4];
|
||||
extern u32 g_blockTable16SZ[8][4];
|
||||
extern u32 g_blockTable8[4][8];
|
||||
extern u32 g_blockTable4[8][4];
|
||||
|
||||
extern u32 g_columnTable32[8][8];
|
||||
extern u32 g_columnTable16[8][16];
|
||||
extern u32 g_columnTable8[16][16];
|
||||
extern u32 g_columnTable4[16][32];
|
||||
|
||||
extern u32 g_pageTable32[32][64];
|
||||
extern u32 g_pageTable32Z[32][64];
|
||||
extern u32 g_pageTable16[64][64];
|
||||
extern u32 g_pageTable16S[64][64];
|
||||
extern u32 g_pageTable16Z[64][64];
|
||||
extern u32 g_pageTable16SZ[64][64];
|
||||
extern u32 g_pageTable8[64][128];
|
||||
extern u32 g_pageTable4[128][128];
|
||||
|
||||
|
||||
extern u32** g_pageTable[MAX_PSM];
|
||||
extern u32** g_blockTable[MAX_PSM];
|
||||
extern u32** g_columnTable[MAX_PSM];
|
||||
extern u32 ZZ_DT[MAX_PSM][TABLE_WIDTH];
|
||||
extern u32** g_pageTableNew[MAX_PSM];
|
||||
|
||||
static __forceinline void MaskedOR(u32* dst, u32 pixel, u32 mask = 0xffffffff) {
|
||||
if (mask == 0xffffffff)
|
||||
*dst = pixel;
|
||||
else
|
||||
*dst = (*dst & (~mask)) | (pixel & mask);
|
||||
}
|
||||
|
||||
// This two defines seems like idiotic code, but in reality it have one, but big importance -- this code
|
||||
// made psm variable (and psm2 in second case) -- constant, so optimiser could properly pass proper function
|
||||
#define PSM_SWITCHCASE(X) { \
|
||||
switch (psm) { \
|
||||
case PSMCT32: { \
|
||||
const int psmC = PSMCT32; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT32Z: { \
|
||||
const int psmC = PSMT32Z; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMCT24: { \
|
||||
const int psmC = PSMCT24; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT24Z: { \
|
||||
const int psmC = PSMT24Z; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMCT16: { \
|
||||
const int psmC = PSMCT16; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMCT16S: { \
|
||||
const int psmC = PSMCT16S; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT16Z: { \
|
||||
const int psmC = PSMT16Z; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT16SZ: { \
|
||||
const int psmC = PSMT16SZ; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT8: { \
|
||||
const int psmC = PSMT8; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT8H: { \
|
||||
const int psmC = PSMT8H; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT4HH: { \
|
||||
const int psmC = PSMT4HH; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT4HL: { \
|
||||
const int psmC = PSMT4HL; \
|
||||
X; } \
|
||||
break; \
|
||||
case PSMT4: { \
|
||||
const int psmC = PSMT4; \
|
||||
X; } \
|
||||
break; \
|
||||
}\
|
||||
}
|
||||
|
||||
#define PSM_SWITCHCASE_2(X) { \
|
||||
switch (psm) { \
|
||||
case PSMCT32: \
|
||||
if( psm2 == PSMCT32 ) { const int psmC = PSMCT32, psmC1 = PSMCT32; X; } \
|
||||
else { const int psmC = PSMCT32, psmC1 = PSMT32Z; X; } \
|
||||
break; \
|
||||
case PSMCT24: \
|
||||
if( psm2 == PSMCT24 ) { const int psmC = PSMCT24, psmC1 = PSMCT24; X; } \
|
||||
else { const int psmC = PSMCT24, psmC1 = PSMT24Z; X; } \
|
||||
break; \
|
||||
case PSMT32Z: \
|
||||
if( psm2 == PSMT32Z ) { const int psmC = PSMT32Z, psmC1 = PSMCT32; X; } \
|
||||
else { const int psmC = PSMT32Z, psmC1 = PSMT32Z; X; } \
|
||||
break; \
|
||||
case PSMT24Z: \
|
||||
if( psm2 == PSMCT24 ) { const int psmC = PSMT24Z, psmC1 = PSMCT24; X; } \
|
||||
else { const int psmC = PSMT24Z, psmC1 = PSMT24Z; X; } \
|
||||
break; \
|
||||
case PSMCT16: \
|
||||
switch(psm2) { \
|
||||
case PSMCT16: { const int psmC = PSMCT16, psmC1 = PSMCT16; X; } break; \
|
||||
case PSMCT16S: { const int psmC = PSMCT16, psmC1 = PSMCT16S; X; } break; \
|
||||
case PSMT16Z: { const int psmC = PSMCT16, psmC1 = PSMT16Z; X; } break; \
|
||||
case PSMT16SZ: { const int psmC = PSMCT16, psmC1 = PSMT16SZ; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
case PSMCT16S: \
|
||||
switch(psm2) { \
|
||||
case PSMCT16: { const int psmC = PSMCT16S, psmC1 = PSMCT16; X; } break; \
|
||||
case PSMCT16S: { const int psmC = PSMCT16S, psmC1 = PSMCT16S; X; } break; \
|
||||
case PSMT16Z: { const int psmC = PSMCT16S, psmC1 = PSMT16Z; X; } break; \
|
||||
case PSMT16SZ: { const int psmC = PSMCT16S, psmC1 = PSMT16SZ; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
case PSMT16Z: \
|
||||
switch(psm2) { \
|
||||
case PSMCT16: { const int psmC = PSMT16Z, psmC1 = PSMCT16; X; } break; \
|
||||
case PSMCT16S: { const int psmC = PSMT16Z, psmC1 = PSMCT16S; X; } break; \
|
||||
case PSMT16Z: { const int psmC = PSMT16Z, psmC1 = PSMT16Z; X; } break; \
|
||||
case PSMT16SZ: { const int psmC = PSMT16Z, psmC1 = PSMT16SZ; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
case PSMT16SZ: \
|
||||
switch(psm2) { \
|
||||
case PSMCT16: { const int psmC = PSMT16SZ, psmC1 = PSMCT16; X; } break; \
|
||||
case PSMCT16S: { const int psmC = PSMT16SZ, psmC1 = PSMCT16S; X; } break; \
|
||||
case PSMT16Z: { const int psmC = PSMT16SZ, psmC1 = PSMT16Z; X; } break; \
|
||||
case PSMT16SZ: { const int psmC = PSMT16SZ, psmC1 = PSMT16SZ; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
case PSMT8: \
|
||||
if( psm2 == PSMT8 ) { const int psmC = PSMT8, psmC1 = PSMT8; X; } \
|
||||
else { const int psmC = PSMT8, psmC1 = PSMT8H; X; } \
|
||||
break; \
|
||||
case PSMT8H: \
|
||||
if( psm2 == PSMT8H ) { const int psmC = PSMT8H, psmC1 = PSMT8; X; } \
|
||||
else { const int psmC = PSMT8H, psmC1 = PSMT8H; X; } \
|
||||
break; \
|
||||
case PSMT4: \
|
||||
switch(psm2) { \
|
||||
case PSMT4: { const int psmC = PSMT4, psmC1 = PSMT4; X; } break; \
|
||||
case PSMT4HL: { const int psmC = PSMT4, psmC1 = PSMT4HL; X; } break; \
|
||||
case PSMT4HH: { const int psmC = PSMT4, psmC1 = PSMT4HH; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
case PSMT4HL: \
|
||||
switch(psm2) { \
|
||||
case PSMT4: { const int psmC = PSMT4HL, psmC1 = PSMT4; X; } break; \
|
||||
case PSMT4HL: { const int psmC = PSMT4HL, psmC1 = PSMT4HL; X; } break; \
|
||||
case PSMT4HH: { const int psmC = PSMT4HL, psmC1 = PSMT4HH; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
case PSMT4HH: \
|
||||
switch(psm2) { \
|
||||
case PSMT4: { const int psmC = PSMT4HH, psmC1 = PSMT4; X; } break; \
|
||||
case PSMT4HL: { const int psmC = PSMT4HH, psmC1 = PSMT4HL; X; } break; \
|
||||
case PSMT4HH: { const int psmC = PSMT4HH, psmC1 = PSMT4HH; X; } break; \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
|
||||
template <int psm>
|
||||
static __forceinline void setPsmtConstantsX(u8& A, u8& B, u8& C, u8& D, u8& E, u8& F, u32& G, u8& H) {
|
||||
switch (psm) {
|
||||
case PSMCT32:
|
||||
case PSMT32Z:
|
||||
A = 5; B = 6; C = 0; D = 31; E = 63; F = 0; H = 1; G = 0xffffffff;
|
||||
break;
|
||||
|
||||
case PSMCT24:
|
||||
case PSMT24Z:
|
||||
A = 5; B = 6; C = 0; D = 31; E = 63; F = 0; H = 1; G = 0xffffff;
|
||||
break;
|
||||
|
||||
case PSMT8H:
|
||||
A = 5; B = 6; C = 0; D = 31; E = 63; F = 24; H = 4; G = 0xff;
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
A = 5; B = 6; C = 0; D = 31; E = 63; F = 28; H = 8; G = 0xf;
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
A = 5; B = 6; C = 0; D = 31; E = 63; F = 24; H = 8; G = 0xf;
|
||||
break;
|
||||
|
||||
case PSMCT16:
|
||||
case PSMT16Z:
|
||||
case PSMCT16S:
|
||||
case PSMT16SZ:
|
||||
A = 6; B = 6; C = 1; D = 63; E = 63; F = 0; H = 2; G = 0xffff;
|
||||
break;
|
||||
|
||||
case PSMT8:
|
||||
A = 6; B = 7; C = 2; D = 63; E = 127; F = 0; H = 4; G = 0xff;
|
||||
break;
|
||||
|
||||
case PSMT4:
|
||||
A = 7; B = 7; C = 3; D = 127; E = 127; F = 0; H = 8; G = 0xf;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// This is where the NEW_CODE define used to be.
|
||||
|
||||
// ------------------------------------------ get Address functions ------------------------------------
|
||||
// Yes, only 1 function to all cases of life!
|
||||
// Warning! We switch bp and bw for usage of default value, so be warned! It's
|
||||
// not C, it's C++, so not it.
|
||||
template <int psm>
|
||||
static __forceinline u32 getPixelAddress(int x, int y, u32 bw, u32 bp = 0) {
|
||||
u32 basepage;
|
||||
u32 word;
|
||||
|
||||
u8 A = 0, B = 0, C = 0, D = 0, E = 0, F = 0; u32 G = 0; u8 H= 0;
|
||||
setPsmtConstantsX<psm>(A, B, C, D, E, F, G, H);
|
||||
basepage = ((y>>A) * (bw>>B)) + (x>>B);
|
||||
word = ((bp * 64 + basepage * 2048) << C) + g_pageTable[psm][y&D][x&E];
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
// It's Zerofrog's function. I need to eliminate them all! All access should be 32-bit aligned.
|
||||
static __forceinline u32 getPixelAddress(int psm, int x, int y, u32 bw, u32 bp = 0) {
|
||||
PSM_SWITCHCASE(return getPixelAddress<psmC>(x, y, bw, bp) ;)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This is compatibility code, for reference,
|
||||
#define Def_getPixelAddress(psmT, psmX) \
|
||||
static __forceinline u32 getPixelAddress##psmT(int x, int y, u32 bp, u32 bw) { \
|
||||
return getPixelAddress<psmX>(x, y, bw, bp); } \
|
||||
static __forceinline u32 getPixelAddress##psmT##_0(int x, int y, u32 bw) { \
|
||||
return getPixelAddress<psmX>(x, y, bw); } \
|
||||
|
||||
Def_getPixelAddress(32, PSMCT32)
|
||||
Def_getPixelAddress(16, PSMCT16)
|
||||
Def_getPixelAddress(16S, PSMCT16S)
|
||||
Def_getPixelAddress(8, PSMT8)
|
||||
Def_getPixelAddress(4, PSMT4)
|
||||
Def_getPixelAddress(32Z, PSMT32Z)
|
||||
Def_getPixelAddress(16Z, PSMT16Z)
|
||||
Def_getPixelAddress(16SZ, PSMT16SZ)
|
||||
|
||||
#define getPixelAddress24 getPixelAddress32
|
||||
#define getPixelAddress24_0 getPixelAddress32_0
|
||||
#define getPixelAddress8H getPixelAddress32
|
||||
#define getPixelAddress8H_0 getPixelAddress32_0
|
||||
#define getPixelAddress4HL getPixelAddress32
|
||||
#define getPixelAddress4HL_0 getPixelAddress32_0
|
||||
#define getPixelAddress4HH getPixelAddress32
|
||||
#define getPixelAddress4HH_0 getPixelAddress32_0
|
||||
#define getPixelAddress24Z getPixelAddress32Z
|
||||
#define getPixelAddress24Z_0 getPixelAddress32Z_0
|
||||
|
||||
// Check FFX-1 (very begining) for PSMT8
|
||||
// Check Tekken menu for PSMT4
|
||||
// ZZ_DT[7] is needed only for PSMT8H, PSMT4HL and PSMT4HH -- at this case word contain data not from a begining.
|
||||
|
||||
// This function return shift from 32-bit aligned address and shift -- number of byte in u32 order.
|
||||
// so if ((u32*)mem + getPixelAddress_Aligned32) is exact location of u32, where our pixel data stored.
|
||||
// Just for remember:
|
||||
// PMSCT32, 24, 32Z, 24Z, 8HH, 4HL and 4HH have ZZ_DT[psm] == 3, so shift is always 0.
|
||||
// PSMCT16, 16S, 16SZ, 16Z have ZZ_DT[psm] == 2, so shift is 0 or 16.
|
||||
// PSMT8 ZZ_DT[psm] == 1, shift is 0, 8, 16, 24
|
||||
// PSMT4 ZZ_DT[psm] == 0, shift is 0, 4, 8, 12, 16, 20, 24, 28.
|
||||
|
||||
// It allow us to made a fast access to pixels in the same basepage: if x % N == 0 (N = 1, 2, 4, 8, .. 64)
|
||||
// than we could guarantee that all pixels form x to x + N - 1 are in the same basepage.
|
||||
template <int psm>
|
||||
static __forceinline u32* getPixelBasepage(const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
|
||||
u32 basepage;
|
||||
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
|
||||
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
|
||||
basepage = ((y>>A) * (bw>>B)) + (x>>B);
|
||||
return ((u32*)pmem + (bp * 64 + basepage * 2048));
|
||||
}
|
||||
|
||||
// And this is offset for this pixels.
|
||||
template <int psm>
|
||||
static __forceinline u32* getPixelOffset(u32& mask, u32& shift, const void* pmem, int x, int y) {
|
||||
u32 word;
|
||||
|
||||
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
|
||||
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
|
||||
|
||||
word = (g_pageTable[psm][y&D][x&E] << (3 - C));
|
||||
shift = ((word & 0x7) << 2) + F;
|
||||
mask &= G << shift;
|
||||
|
||||
return ((u32*)pmem + ((word & ~0x7) >> 3));
|
||||
}
|
||||
|
||||
|
||||
template <int psm>
|
||||
static __forceinline u32* getPixelAddress_A32(u32& mask, u32& shift, const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
|
||||
return getPixelOffset<psm>(mask, shift, getPixelBasepage<psm>(pmem, x, y, bw, bp), x, y);
|
||||
|
||||
}
|
||||
|
||||
template <int psm>
|
||||
static __forceinline u32* getPixelBaseAddress_A32(const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
|
||||
u32 word;
|
||||
|
||||
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
|
||||
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
|
||||
|
||||
word = (g_pageTable[psm][y&D][x&E] << (3 - C));
|
||||
return ((u32*)getPixelBasepage<psm>(pmem, x, y, bw, bp) + ((word & ~0x7) >> 3));
|
||||
}
|
||||
|
||||
// Wrapper for cases, where psm is not constant, should be avoided inside cycles
|
||||
static __forceinline u32* getPixelAddress_A32(u32& mask, u32& shift, int psm, const void* pmem, int x, int y, u32 bw, u32 bp = 0) {
|
||||
PSM_SWITCHCASE( return getPixelAddress_A32<psmC>(mask, shift, pmem, x, y, bw, bp) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __forceinline u32* getClutAddress(u8* pmem, const tex0Info& tex0) {
|
||||
if (PSMT_ISHALF(tex0.cpsm))
|
||||
return (u32*)(pmem + 64 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0) );
|
||||
else
|
||||
return (u32*)(pmem + 64 * (tex0.csa & 15));
|
||||
}
|
||||
|
||||
//--------------------------------------------- Write Pixel -----------------------------------------------------------
|
||||
// Set proper mask for transfering multiple bytes per word.
|
||||
template <int psm>
|
||||
inline u32 HandleWritemask(u32 Writemask) {
|
||||
u8 G = PSM_BITS_PER_PIXEL<psm>();
|
||||
u32 dmask = Writemask & ((1 << G) - 1); // drop all bits in writemask, that could not be used
|
||||
u32 mask;
|
||||
|
||||
switch (psm) {
|
||||
case PSMT8H: // modes with non-zero start bit should be handled differently
|
||||
return 0xff000000;
|
||||
case PSMT4HL:
|
||||
return 0x0f000000;
|
||||
case PSMT4HH:
|
||||
return 0xf0000000;
|
||||
default:
|
||||
mask = dmask; // 32 targets and lower
|
||||
|
||||
if (G < 24) {
|
||||
mask |= dmask << G; // 16 targets and lower
|
||||
if (G < 16) {
|
||||
mask |= dmask << (2 * G); // 8 targets and lower
|
||||
mask |= dmask << (3 * G);
|
||||
if (G < 8) {
|
||||
mask |= dmask << (4 * G); // 4 targets
|
||||
mask |= dmask << (5 * G);
|
||||
mask |= dmask << (6 * G);
|
||||
mask |= dmask << (7 * G);
|
||||
}}}
|
||||
return mask;
|
||||
}
|
||||
}
|
||||
|
||||
//push pixel data at position x,y, according psm storage format. pixel do not need to be properly masked, wrong bit's would not be used
|
||||
//mask should be made according PSM.
|
||||
template <int psm>
|
||||
static __forceinline void writePixel(void* pmem, int x, int y, u32 pixel, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 shift;
|
||||
u32* p = getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp);
|
||||
|
||||
MaskedOR (p, pixel << shift, mask);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel(int psm, void* pmem, int x, int y, u32 pixel, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
PSM_SWITCHCASE(writePixel<psmC>(pmem, x, y, pixel, bw, bp, mask));
|
||||
}
|
||||
|
||||
// Put pixel data from memory. Pixel is p, memory start from pixel, and we should count pmove words and shift resulting word to shift
|
||||
// 24 targets could be outside of 32-bit borders.
|
||||
template <int psm>
|
||||
static __forceinline void pushPixelMem(u32* p, u32* pixel, int pmove, int shift, u32 mask = 0xffffffff) {
|
||||
if (psm != PSMCT24 || psm != PSMT24Z) {
|
||||
if (shift > 0)
|
||||
MaskedOR (p, (*(pixel + pmove)) << (shift), mask);
|
||||
else
|
||||
MaskedOR (p, (*(pixel + pmove)) >> (-shift), mask);
|
||||
}
|
||||
else { // for 24 and 24Z psm data could be not-aligned by 32. Merde!
|
||||
u64 pixel64 = (*(u64*)(pixel + pmove) ) >> (-shift); // we read more data, but for 24 targets shift always negative and resulting data is u32
|
||||
MaskedOR(p, (u32)pixel64, mask); // drop upper part, we don't need it. all data is stored in lower part of u64 after shift
|
||||
|
||||
// MaskedOR(p, (u32)((u8*)pixel + count * 3), mask);
|
||||
}
|
||||
}
|
||||
|
||||
// use it if pixel already shifted by needed number of bytes.
|
||||
// offseted mean that we should skip basepage calculation, pmem is link to basepage'ed memory. Just a little quicker.
|
||||
template <int psm, int offseted>
|
||||
static __forceinline void writePixelMem(const void* pmem, int x, int y, u32* pixel, int count, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 shift;
|
||||
u32* p;
|
||||
|
||||
if (offseted)
|
||||
p = getPixelOffset<psm>(mask, shift, pmem, x, y);
|
||||
else
|
||||
p = getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp);
|
||||
|
||||
int A = PSM_BITS_PER_PIXEL<psm>();
|
||||
|
||||
int pmove = (count * A) >> 5;
|
||||
int pshift = (count * A) & 31; // we assume, that if shift outside word, than user want next pixel data
|
||||
|
||||
pushPixelMem<psm>(p, pixel, pmove, (int)shift - pshift, mask);
|
||||
}
|
||||
|
||||
|
||||
// This function push several pixels. Note, that for 32, 24, 8HH, 4HL, 4HH it's simply write (and pixel should not be properly masked), 16 do push 2 pixels (and x should be even).
|
||||
// 8 push 4 pixels: 0,0; 0,1; 1,0 and 1,1. 4 push 8: 0,0; 0,1; 1,0; 1,1; 2,0, 2,1; 3,0; 3,1.
|
||||
template <int psm>
|
||||
static __forceinline void writePixelWord(const void* pmem, int x, int y, u32 pixel, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 maskA = mask, shift;
|
||||
u32* p = getPixelAddress_A32<psm>(maskA, shift, pmem, x, y, bw, bp);
|
||||
|
||||
/* if (PSM_NON_FULL_WORD<psm>())
|
||||
maskA = maskA & mask;
|
||||
else
|
||||
maskA = mask;*/
|
||||
|
||||
MaskedOR (p, pixel, mask);
|
||||
}
|
||||
|
||||
// ------------------------------------- Read Pixel ---------------------------------------
|
||||
template <int psm>
|
||||
static __forceinline u32 readPixel(const void* pmem, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 shift;
|
||||
u32* p = getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp);
|
||||
|
||||
return ((*p & mask) >> shift);
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel(int psm, const void* pmem, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
PSM_SWITCHCASE(return readPixel<psmC>(pmem, x, y, bw, bp, mask););
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <int psm>
|
||||
static __forceinline u32 readPixelWord(const void* pmem, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 maskA = 0xffffffff, shift;
|
||||
if (PSM_NON_FULL_WORD<psm>())
|
||||
return *getPixelAddress_A32<psm>(mask, shift, pmem, x, y, bw, bp) & mask;
|
||||
else
|
||||
return *getPixelAddress_A32<psm>(maskA, shift, pmem, x, y, bw, bp) & mask;
|
||||
}
|
||||
|
||||
template <int psm>
|
||||
static __forceinline void fillMemoryFromPixels(u32* dst, const void* pmem, int& count, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 pixel;
|
||||
|
||||
u8 I = PSM_BITS_PER_PIXEL<psm>();
|
||||
int K = count / PSM_PIXELS_STORED_PER_WORD<psm>(); // offset for pmem, count for 32, count / 2 for 16, etc.
|
||||
|
||||
pixel = readPixel<psm>(pmem, x, y, bw, bp, mask); // I prefer not to use for here. It's slow
|
||||
if (I < 32) {
|
||||
pixel += readPixel<psm>(pmem, x + 1, y, bw, bp, mask) << I;
|
||||
if (I < 16) { // 8 and 4 targets
|
||||
pixel += readPixel<psm>(pmem, x + 2, y, bw, bp, mask) << (2 * I);
|
||||
pixel += readPixel<psm>(pmem, x + 3, y, bw, bp, mask) << (3 * I);
|
||||
if (I < 8) { // This is for 4, 4HH and 4HL
|
||||
pixel += readPixel<psm>(pmem, x + 4, y, bw, bp, mask) << (4 * I);
|
||||
pixel += readPixel<psm>(pmem, x + 5, y, bw, bp, mask) << (5 * I);
|
||||
pixel += readPixel<psm>(pmem, x + 6, y, bw, bp, mask) << (6 * I);
|
||||
pixel += readPixel<psm>(pmem, x + 7, y, bw, bp, mask) << (7 * I);
|
||||
}}}
|
||||
|
||||
if (I != 24) {
|
||||
*(dst + K) = pixel;
|
||||
}
|
||||
else { // 24. should have special care.
|
||||
// ERROR_LOG("special care %d\n", count);
|
||||
MaskedOR((u32*)((u8*)dst + 3 * count), pixel, 0xffffff);
|
||||
}
|
||||
count += PSM_PIXELS_STORED_PER_WORD<psm>();
|
||||
}
|
||||
|
||||
|
||||
// Fill count pixels form continues memory region, starting from pmem, First pixel to read have number shift in this region.
|
||||
// Read no more than count pixels. We could assert, that all this pixels would be place in the same basepage
|
||||
// Shift is automaticaly increased by count (or decreased if count < 0)
|
||||
template <int psm, bool offseted, int count>
|
||||
static __forceinline void writePixelsFromMemory(void* dst, const void* pmem, int& shift, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
const void* base;
|
||||
if (offseted)
|
||||
base = getPixelBasepage<psm>(dst, x, y, bw, bp);
|
||||
else
|
||||
base = (const void*)dst;
|
||||
|
||||
shift += count;
|
||||
writePixelMem<psm, offseted>(base, x, y, (u32*)pmem, shift - count, bw, bp, mask); // I prefer not to use for here. It's slow
|
||||
if (count < 2) return;
|
||||
writePixelMem<psm, offseted>(base, x + 1, y, (u32*)pmem, shift - count + 1, bw, bp, mask);
|
||||
if (count < 3) return;
|
||||
writePixelMem<psm, offseted>(base, x + 2, y, (u32*)pmem, shift - count + 2, bw, bp, mask);
|
||||
if (count < 4) return;
|
||||
writePixelMem<psm, offseted>(base, x + 3, y, (u32*)pmem, shift - count + 3, bw, bp, mask);
|
||||
if (count < 5) return;
|
||||
writePixelMem<psm, offseted>(base, x + 4, y, (u32*)pmem, shift - count + 4, bw, bp, mask);
|
||||
if (count < 6) return;
|
||||
writePixelMem<psm, offseted>(base, x + 5, y, (u32*)pmem, shift - count + 5, bw, bp, mask);
|
||||
if (count < 7) return;
|
||||
writePixelMem<psm, offseted>(base, x + 6, y, (u32*)pmem, shift - count + 6, bw, bp, mask);
|
||||
if (count < 8) return;
|
||||
writePixelMem<psm, offseted>(base, x + 7, y, (u32*)pmem, shift - count + 7, bw, bp, mask);
|
||||
}
|
||||
|
||||
// Use it if we don't know that starting pixel is aligned for multiple-pixel write
|
||||
template <int psm, bool offseted>
|
||||
static __forceinline void writeUnalignedPixelsFromMemory(void* dst, int div, const void* pmem, int& shift, int x, int y, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
switch (div){
|
||||
case 0: return; // Pixels are aligned, so we could move on
|
||||
case 1: writePixelsFromMemory<psm, offseted, 1>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
case 2: writePixelsFromMemory<psm, offseted, 2>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
case 3: writePixelsFromMemory<psm, offseted, 3>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
case 4: writePixelsFromMemory<psm, offseted, 4>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
case 5: writePixelsFromMemory<psm, offseted, 5>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
case 6: writePixelsFromMemory<psm, offseted, 6>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
case 7: writePixelsFromMemory<psm, offseted, 7>(dst, pmem, shift, x, y, bw, bp, mask);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// This little swizzle function used to convert data form memory. z is first byte in destination block, and y is number of word, in which we look look for data.
|
||||
// s is shift by number of pixels, that should be used in masking
|
||||
template <int psm, int y, int z>
|
||||
static __forceinline u32 BitmaskinPSM(u32* pmem, u8 x) {
|
||||
|
||||
u8 H = PSM_BITCOUNT<psm>();
|
||||
u8 I = PSM_BITS_PER_PIXEL<psm>() ; // length of bitmask in bits.
|
||||
|
||||
|
||||
if (PSM_BITMODE<psm>() != 1) { // PSMCT24 and 24Z should be handle separated, as it could pass 32-bit storage.
|
||||
u8 k = (x & (H - 1)) * I; // shift of PC data -- in PC we use pixels from constant position: x / H word and k is shift: x = ( x % H ) * H + k / I
|
||||
// in PS2 we use all bit position from 0 by I pixels.
|
||||
|
||||
u32 J = ((1 << I) - 1) << k; // bitmask (of length ) & mask, moved by position k
|
||||
|
||||
// gcc complains repeatedly about this always being false. I'll investigate later.
|
||||
if (z > k)
|
||||
return ((*(pmem + x/H + y)) & J) << (z - k); // we use PX data from *mem + and properly shift
|
||||
else // This formula loo little swizzled.
|
||||
return ((*(pmem + x/H + y)) & J) >> (k - z);
|
||||
}
|
||||
else { // only 24 targets
|
||||
u8* mem = ((u8*)pmem + (x * 3) + 4 * y); // Our pixel's is disaligned on 32-bit. So just use u8*.
|
||||
return *(u32*)mem; // Mask would be handled later
|
||||
}
|
||||
}
|
||||
|
||||
// We use this function to limit number of memory R/W. This function fill all pixels for data with coordindates x, y. inside block data.
|
||||
// Only rule is x, y should be < 8 (it automatically fill all needed pixels, that lie in blockdata, but have coords more than 8).
|
||||
template <int psm>
|
||||
static __forceinline void fillPixelsFromMemory(u32* dst, u32* pmem, int x, int y, int pitch, u32 bw, u32 bp = 0, u32 mask = 0xffffffff) {
|
||||
u32 pixel = 0;
|
||||
const u8 H = PSM_PIXELS_PER_WORD<psm>();
|
||||
|
||||
if (PSM_PIXEL_SHIFT<psm>() == 0) // We could not use calculated constants as templated parameters.
|
||||
pixel = BitmaskinPSM<psm, 0, 0>(pmem, x); // First pixel x,y is the common part of all psmt path's
|
||||
else {
|
||||
if (PSM_PIXEL_SHIFT<psm>() == 24) // 8H and 4HL have 1 pixel, but shifted to 24 bits. 4HH -- 28 bits.
|
||||
pixel = BitmaskinPSM<psm, 0, 24>(pmem, x);
|
||||
else
|
||||
pixel = BitmaskinPSM<psm, 0, 28>(pmem, x);
|
||||
}
|
||||
if (H > 1) {
|
||||
const u8 G = psm & 0x7; // Bitmode, we use it for better chance of switch optimization
|
||||
int div = ( x < 4 ) ? 4 : -4; // secondary row have shift by +4 or -4 pixels
|
||||
|
||||
switch (G) {
|
||||
case 2:
|
||||
pixel |= BitmaskinPSM<psm, 4, 16>(pmem, x);
|
||||
break;
|
||||
case 3:
|
||||
pixel |= BitmaskinPSM<psm, 2, 16>(pmem, x);
|
||||
pixel |= BitmaskinPSM<psm, 0, 8>(pmem + 2 * pitch, x + div);
|
||||
pixel |= BitmaskinPSM<psm, 2, 24>(pmem + 2 * pitch, x + div);
|
||||
break;
|
||||
case 4:
|
||||
pixel |= BitmaskinPSM<psm, 1, 8>(pmem, x);
|
||||
pixel |= BitmaskinPSM<psm, 2, 16>(pmem, x);
|
||||
pixel |= BitmaskinPSM<psm, 3, 24>(pmem, x);
|
||||
|
||||
pixel |= BitmaskinPSM<psm, 0, 4>(pmem + 2 * pitch, x + div);
|
||||
pixel |= BitmaskinPSM<psm, 1, 12>(pmem + 2 * pitch, x + div);
|
||||
pixel |= BitmaskinPSM<psm, 2, 20>(pmem + 2 * pitch, x + div);
|
||||
pixel |= BitmaskinPSM<psm, 3, 28>(pmem + 2 * pitch, x + div);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
writePixelWord<psm>(dst, x, y, pixel, bw, bp, HandleWritemask<psm>(mask)); // use it for 32, 24, 8H, 4HL and 4HH
|
||||
}
|
||||
|
||||
template <int psm>
|
||||
void writeWordPixel(u32* pmem, u32 pixel, u32 mask) {
|
||||
if (psm == PSMT4HH || psm == PSMT8H || psm == PSMT4HL || psm == PSMCT24 || psm == PSMT24Z)
|
||||
MaskedOR(pmem, pixel, mask);
|
||||
else
|
||||
*pmem = pixel;
|
||||
}
|
||||
|
||||
// Get pixel from src and put in in src. We assume, that psm of both buffers are the same and (sx-dx) & E == (sy - dy) & D == 0;
|
||||
// Also in this case we could transfer the whole word
|
||||
template <int psm>
|
||||
void transferPixelFast(void* dst, void* src, int dx, int dy, int sx, int sy, u32 dbw, u32 sbw ) {
|
||||
u32 Dbasepage, Sbasepage;
|
||||
u32 word, mask = 0xffffffff;
|
||||
|
||||
u8 A = 0, B = 0, C = 0 , D = 0, E = 0, F = 0; u32 G = 0; u8 H = 0;
|
||||
setPsmtConstantsX<psm> (A, B, C, D, E, F, G, H);
|
||||
assert ( ((sx-dx) & E == (sy - dy) & D) && ((sy - dy) & D == 0) );
|
||||
|
||||
Dbasepage = ((dy>>A) * (dbw>>B)) + (dx>>B);
|
||||
Sbasepage = ((sy>>A) * (sbw>>B)) + (sx>>B);
|
||||
|
||||
word = (g_pageTable[psm][sy&D][sx&E] >> C);
|
||||
|
||||
u32* dstp = (u32*)dst + Dbasepage * 2048 + word;
|
||||
u32* srcp = (u32*)src + Sbasepage * 2048 + word;
|
||||
|
||||
writeWordPixel<psm>(dstp, *srcp, G << F);
|
||||
}
|
||||
|
||||
// if we could not guarantee, that buffer suize shared same page Table address
|
||||
template <int psm>
|
||||
void transferPixel(void* dst, void* src, int dx, int dy, int sx, int sy, u32 dbw, u32 sbw ) {
|
||||
u32 mask = 0xffffffff, shift;
|
||||
u32* dstp = getPixelAddress_A32<psm>(mask, shift, dst, dx, dy, dbw);
|
||||
u32* srcp = getPixelAddress_A32<psm>(mask, shift, src, sx, sy, sbw);
|
||||
writeWordPixel<psm>(dstp, *srcp, mask); // write whole word
|
||||
}
|
||||
|
||||
#define Def_getReadWrite(psmT, psmX) \
|
||||
static __forceinline void writePixel##psmT(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) { \
|
||||
writePixel<psmX>(pmem, x, y, pixel, bw, bp); } \
|
||||
static __forceinline u32 readPixel##psmT(const void* pmem, int x, int y, u32 bp, u32 bw) { \
|
||||
return readPixel<psmX>(pmem, x, y, bw, bp); } \
|
||||
static __forceinline void writePixel##psmT##_0(void* pmem, int x, int y, u32 pixel, u32 bw) { \
|
||||
writePixel<psmX>(pmem, x, y, pixel, bw); } \
|
||||
static __forceinline u32 readPixel##psmT##_0(const void* pmem, int x, int y, u32 bw) { \
|
||||
return readPixel<psmX>(pmem, x, y, bw); }
|
||||
|
||||
Def_getReadWrite(32, PSMCT32);
|
||||
Def_getReadWrite(24, PSMCT24);
|
||||
Def_getReadWrite(16, PSMCT16);
|
||||
Def_getReadWrite(16S, PSMCT16);
|
||||
Def_getReadWrite(8, PSMT8);
|
||||
Def_getReadWrite(8H, PSMT8H);
|
||||
Def_getReadWrite(4, PSMT4);
|
||||
Def_getReadWrite(4HH, PSMT4HH);
|
||||
Def_getReadWrite(4HL, PSMT4HL);
|
||||
Def_getReadWrite(32Z, PSMCT32);
|
||||
Def_getReadWrite(24Z, PSMCT24);
|
||||
Def_getReadWrite(16Z, PSMCT16);
|
||||
Def_getReadWrite(16SZ, PSMCT16);
|
||||
|
||||
#endif // Zeydlitz's code
|
||||
|
||||
#endif /* __ZZOGL_MEM_H__ */
|
|
@ -89,8 +89,7 @@ int ZZSave(s8* pbydata)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern u32 s_uFramebuffer;
|
||||
extern int g_nCurVBOIndex;
|
||||
extern u32 g_nCurVBOIndex;
|
||||
|
||||
bool ZZLoad(s8* pbydata)
|
||||
{
|
||||
|
@ -163,7 +162,7 @@ bool ZZLoad(s8* pbydata)
|
|||
|
||||
icurctx = -1;
|
||||
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, s_uFramebuffer); // switch to the backbuffer
|
||||
FB::Bind(); // switch to the backbuffer
|
||||
SetFogColor(gs.fogcol);
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
|
|
@ -17,7 +17,12 @@
|
|||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
//#ifdef NVIDIA_CG_API // This code is only for NVIDIA cg-toolkit API
|
||||
// By default enable nvidia cg api
|
||||
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API)
|
||||
#define NVIDIA_CG_API
|
||||
#endif
|
||||
|
||||
#ifdef NVIDIA_CG_API // This code is only for NVIDIA cg-toolkit API
|
||||
// ZZogl Shader manipulation functions.
|
||||
|
||||
//------------------- Includes
|
||||
|
@ -85,10 +90,10 @@ ZZshProgram pvs[16] = {NULL};
|
|||
ZZshProgram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ
|
||||
ZZshParameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0;
|
||||
|
||||
#ifdef DEVBUILD
|
||||
char* EFFECT_NAME; // All this variables used for testing and set manually
|
||||
char* EFFECT_DIR;
|
||||
#endif
|
||||
//#ifdef DEVBUILD
|
||||
extern char* EFFECT_NAME; // All this variables used for testing and set manually
|
||||
extern char* EFFECT_DIR;
|
||||
//#endif
|
||||
|
||||
bool g_bCRTCBilinear = true;
|
||||
|
||||
|
@ -96,14 +101,9 @@ float4 g_vdepth, vlogz;
|
|||
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
|
||||
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
|
||||
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
|
||||
FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
|
||||
FRAGMENTSHADER ppsCRTC[2], /*ppsCRTC24[2],*/ ppsCRTCTarg[2];
|
||||
VERTEXSHADER pvsBitBlt;
|
||||
|
||||
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
|
||||
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
|
||||
extern u32 ptexBilinearBlocks;
|
||||
extern u32 ptexConv32to16;
|
||||
|
||||
inline bool LoadEffects();
|
||||
extern bool s_bWriteDepth;
|
||||
|
||||
|
@ -198,6 +198,10 @@ bool ZZshStartUsingShaders() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void ZZshExitCleaning() {
|
||||
// nothing to do with cg
|
||||
}
|
||||
|
||||
// open shader file according to build target
|
||||
bool ZZshCreateOpenShadersFile() {
|
||||
#ifndef DEVBUILD
|
||||
|
@ -483,6 +487,48 @@ void SetupVertexProgramParameters(ZZshProgram prog, int context)
|
|||
}
|
||||
|
||||
#ifndef DEVBUILD
|
||||
#if 0
|
||||
static __forceinline void LOAD_VS(int Index, ZZshProgram prog)
|
||||
{
|
||||
assert(mapShaderResources.find(Index) != mapShaderResources.end());
|
||||
header = mapShaderResources[Index];
|
||||
assert((header) != NULL && (header)->index == (Index));
|
||||
prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgvProf, NULL, NULL);
|
||||
if (!cgIsProgram(prog))
|
||||
{
|
||||
ZZLog::Error_Log("Failed to load vs %d: \n%s", Index, cgGetLastListing(g_cgcontext));
|
||||
return false;
|
||||
}
|
||||
cgGLLoadProgram(prog);
|
||||
|
||||
if (cgGetError() != CG_NO_ERROR) ZZLog::Error_Log("Failed to load program %d.", Index);
|
||||
SetupVertexProgramParameters(prog, !!(Index&SH_CONTEXT1));
|
||||
}
|
||||
|
||||
|
||||
static __forceinline void LOAD_VS(int Index, FRAGMENTSHADER fragment)
|
||||
{
|
||||
bLoadSuccess = true;
|
||||
assert(mapShaderResources.find(Index) != mapShaderResources.end());
|
||||
header = mapShaderResources[Index];
|
||||
fragment.prog = cgCreateProgram(g_cgcontext, CG_OBJECT, (char*)(s_lpShaderResources + (header)->offset), cgfProf, NULL, NULL);
|
||||
if (!cgIsProgram(fragment.prog))
|
||||
{
|
||||
ZZLog::Error_Log("Failed to load ps %d: \n%s", Index, cgGetLastListing(g_cgcontext));
|
||||
return false;
|
||||
}
|
||||
|
||||
cgGLLoadProgram(fragment.prog);
|
||||
|
||||
if (cgGetError() != CG_NO_ERROR)
|
||||
{
|
||||
ZZLog::Error_Log("failed to load program %d.", Index);
|
||||
bLoadSuccess = false;
|
||||
}
|
||||
|
||||
SetupFragmentProgramParameters(&fragment, !!(Index&SH_CONTEXT1), 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define LOAD_VS(Index, prog) { \
|
||||
assert( mapShaderResources.find(Index) != mapShaderResources.end() ); \
|
||||
|
@ -612,8 +658,8 @@ bool ZZshLoadExtraEffects()
|
|||
if( !bLoadSuccess )
|
||||
ZZLog::Error_Log("Failed to create CRTC shaders.");
|
||||
|
||||
LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]);
|
||||
LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]);
|
||||
// LOAD_PS(SH_CRTC24PS, ppsCRTC24[0]);
|
||||
// LOAD_PS(SH_CRTC24INTERPS, ppsCRTC24[1]);
|
||||
LOAD_PS(SH_ZEROPS, ppsOne);
|
||||
LOAD_PS(SH_BASETEXTUREPS, ppsBaseTexture);
|
||||
LOAD_PS(SH_CONVERT16TO32PS, ppsConvert16to32);
|
||||
|
@ -811,7 +857,7 @@ bool ZZshLoadExtraEffects()
|
|||
if( !bLoadSuccess )
|
||||
ZZLog::Error_Log("Failed to create CRTC shaders.");
|
||||
|
||||
LOAD_PS("CRTC24PS", ppsCRTC24[0], cgfProf); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], cgfProf);
|
||||
// LOAD_PS("CRTC24PS", ppsCRTC24[0], cgfProf); LOAD_PS("CRTC24InterPS", ppsCRTC24[1], cgfProf);
|
||||
LOAD_PS("ZeroPS", ppsOne, cgfProf);
|
||||
LOAD_PS("BaseTexturePS", ppsBaseTexture, cgfProf);
|
||||
LOAD_PS("Convert16to32PS", ppsConvert16to32, cgfProf);
|
||||
|
@ -886,4 +932,4 @@ FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testae
|
|||
|
||||
#endif // RELEASE_TO_PUBLIC
|
||||
|
||||
//#endif // NVIDIA_CG_API
|
||||
#endif // NVIDIA_CG_API
|
||||
|
|
|
@ -33,8 +33,10 @@
|
|||
#include "ZZoglMath.h"
|
||||
#include "GS.h"
|
||||
|
||||
// For output
|
||||
// By default enable nvidia cg api
|
||||
#if !defined(GLSL_API) && !defined(NVIDIA_CG_API)
|
||||
#define NVIDIA_CG_API
|
||||
#endif
|
||||
// --------------------------- API abstraction level --------------------------------
|
||||
|
||||
#ifdef NVIDIA_CG_API // Code for NVIDIA cg-toolkit API
|
||||
|
@ -56,9 +58,60 @@ inline bool ZZshActiveParameter(ZZshParameter param) {return (param !=NULL); }
|
|||
|
||||
#endif // end NVIDIA cg-toolkit API
|
||||
|
||||
#ifdef GLSL_API
|
||||
|
||||
enum ZZshPARAMTYPE {
|
||||
ZZ_UNDEFINED,
|
||||
ZZ_TEXTURE_2D,
|
||||
ZZ_TEXTURE_RECT,
|
||||
ZZ_TEXTURE_3D,
|
||||
ZZ_FLOAT4,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const char* ShName; // Name of uniform
|
||||
ZZshPARAMTYPE type; // Choose between parameter type
|
||||
|
||||
float fvalue[4];
|
||||
GLuint sampler; // Number of texture unit in array
|
||||
GLint texid; // Number of texture - texid.
|
||||
|
||||
bool Constant; // Uniform could be constants, does not change at program flow
|
||||
bool Settled; // Check if Uniform value was set.
|
||||
} ZZshParamInfo;
|
||||
|
||||
typedef struct {
|
||||
void* link;
|
||||
bool isFragment;
|
||||
} ZZshShaderLink;
|
||||
|
||||
#define ZZshProgram GLuint
|
||||
#define ZZshShader GLuint
|
||||
#define ZZshParameter GLint
|
||||
#define ZZshContext int
|
||||
#define ZZshProfile int
|
||||
#define ZZshError int
|
||||
#define ZZshIndex GLuint
|
||||
|
||||
const ZZshParamInfo qZero = {ShName:"", type:ZZ_UNDEFINED, fvalue:{0}, sampler: -1, texid: 0, Constant: false, Settled: false};
|
||||
|
||||
#define pZero 0
|
||||
|
||||
const ZZshShaderLink sZero = {link: NULL, isFragment: false};
|
||||
|
||||
inline bool ZZshActiveParameter(ZZshParameter param) {return (param > -1); }
|
||||
#define SAFE_RELEASE_PROG(x) { /*don't know what to do*/ }
|
||||
|
||||
// ---------------------------
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
//const static char* g_pPsTexWrap[] = { "-DREPEAT", "-DCLAMP", "-DREGION_REPEAT", NULL };
|
||||
|
||||
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC};
|
||||
enum ZZshShaderType {ZZ_SH_ZERO, ZZ_SH_REGULAR, ZZ_SH_REGULAR_FOG, ZZ_SH_TEXTURE, ZZ_SH_TEXTURE_FOG, ZZ_SH_CRTC, ZZ_SH_NONE};
|
||||
// We have "compatible" shaders, as RegularFogVS and RegularFogPS. if don't need to wory about incompatible shaders
|
||||
// It used only in GLSL mode.
|
||||
|
||||
|
@ -92,6 +145,7 @@ struct FRAGMENTSHADER
|
|||
string filename;
|
||||
#endif
|
||||
|
||||
#ifdef NVIDIA_CG_API
|
||||
void set_uniform_param(ZZshParameter &var, const char *name)
|
||||
{
|
||||
ZZshParameter p;
|
||||
|
@ -161,6 +215,7 @@ struct FRAGMENTSHADER
|
|||
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
struct VERTEXSHADER
|
||||
|
@ -183,8 +238,32 @@ struct VERTEXSHADER
|
|||
extern FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
|
||||
|
||||
extern FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
|
||||
extern FRAGMENTSHADER ppsCRTC[2], ppsCRTC24[2], ppsCRTCTarg[2];
|
||||
extern FRAGMENTSHADER ppsCRTC[2], /*ppsCRTC24[2],*/ ppsCRTCTarg[2];
|
||||
|
||||
extern int interlace_mode;
|
||||
|
||||
enum CRTC_TYPE
|
||||
{
|
||||
CRTC_RENDER,
|
||||
//CRTC_RENDER_24,
|
||||
CRTC_RENDER_TARG
|
||||
};
|
||||
|
||||
static __forceinline FRAGMENTSHADER* curr_ppsCRTC() { return &ppsCRTC[interlace_mode]; }
|
||||
//static __forceinline FRAGMENTSHADER* curr_ppsCRTC24() { return &ppsCRTC24[interlace_mode]; }
|
||||
static __forceinline FRAGMENTSHADER* curr_ppsCRTCTarg() { return &ppsCRTCTarg[interlace_mode]; }
|
||||
|
||||
static __forceinline FRAGMENTSHADER* curr_pps(CRTC_TYPE render_type)
|
||||
{
|
||||
switch (render_type)
|
||||
{
|
||||
case CRTC_RENDER: return curr_ppsCRTC();
|
||||
//case CRTC_RENDER_24: return curr_ppsCRTC24();
|
||||
case CRTC_RENDER_TARG: return curr_ppsCRTCTarg();
|
||||
default: return NULL;
|
||||
}
|
||||
|
||||
}
|
||||
// ------------------------- Functions -------------------------------
|
||||
|
||||
#ifdef NVIDIA_CG_API
|
||||
|
@ -192,6 +271,11 @@ inline bool ZZshExistProgram(FRAGMENTSHADER* pf) {return (pf->prog != NULL); };
|
|||
inline bool ZZshExistProgram(VERTEXSHADER* pf) {return (pf->prog != NULL); };
|
||||
inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog != NULL); };
|
||||
#endif
|
||||
#ifdef GLSL_API
|
||||
inline bool ZZshExistProgram(FRAGMENTSHADER* pf) {return (pf->Shader != 0); };
|
||||
inline bool ZZshExistProgram(VERTEXSHADER* pf) {return (pf->Shader != 0); };
|
||||
inline bool ZZshExistProgram(ZZshShaderLink prog) {return (prog.link != NULL); } // This is used for pvs mainly. No NULL means that we do LOAD_VS
|
||||
#endif
|
||||
|
||||
extern const char* ShaderCallerName;
|
||||
extern const char* ShaderHandleName;
|
||||
|
@ -222,10 +306,17 @@ extern void ZZshDefaultOneColor( FRAGMENTSHADER ptr );
|
|||
extern void ZZshSetVertexShader(ZZshShaderLink prog);
|
||||
extern void ZZshSetPixelShader(ZZshShaderLink prog);
|
||||
extern bool ZZshLoadExtraEffects();
|
||||
extern void ZZshExitCleaning();
|
||||
|
||||
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
|
||||
|
||||
// only sets a limited amount of state (for Update)
|
||||
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
|
||||
// only sets a limited amount of state (for Update)
|
||||
void SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0, bool CheckVB, FRAGMENTSHADER* pfragment, int force);
|
||||
|
||||
extern u32 ptexBlocks; // holds information on block tiling. It's texture number in OpenGL -- if 0 than such texture
|
||||
extern u32 ptexConv16to32; // does not exists. This textures should be created on start and released on finish.
|
||||
extern u32 ptexBilinearBlocks;
|
||||
extern u32 ptexConv32to16;
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,979 @@
|
|||
/* ZZ Open GL graphics plugin
|
||||
* Copyright (c)2009 zeydlitz@gmail.com
|
||||
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2006
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifdef GLSL_API // This code is only for GLSL API
|
||||
// ZZogl Shader manipulation functions.
|
||||
|
||||
/*
|
||||
* used cg calls:
|
||||
* cgGLIsProfileSupported -- don't needed
|
||||
* cgGetErrorString -- later
|
||||
* cgGetLastListing -- later
|
||||
* cgSetErrorHandler -- later
|
||||
* cgCreateContext -- think that don't need
|
||||
* cgGLEnableProfile -- don't need
|
||||
* cgGLSetOptimalOptions -- don't need?
|
||||
* cgGLSetManageTextureParameters -- what's this?
|
||||
* cgCreateParameter -- don't need
|
||||
* cgGLLoadProgram void LinkProgram(uint program)
|
||||
* cgGetError -- later
|
||||
* cgGLDisableProfile -- don't need
|
||||
* cgGLSetParameter4fv
|
||||
* cgGetNamedParameter
|
||||
* cgGLEnableTextureParameter
|
||||
* cgIsParameterUsed
|
||||
* cgGLBindProgram void UseProgram(uint program)
|
||||
* cgConnectParameter
|
||||
* cgIsProgram bool IsProgram(uint program)
|
||||
* cgCreateProgramFromFile
|
||||
*/
|
||||
|
||||
//------------------- Includes
|
||||
#include "Util.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#include "zpipe.h"
|
||||
#include <math.h>
|
||||
#include <map>
|
||||
#include <fcntl.h> // this for open(). Maybe linux-specific
|
||||
#include <sys/mman.h> // and this for mmap
|
||||
|
||||
// ----------------- Defines
|
||||
|
||||
#define TEXWRAP_REPEAT 0
|
||||
#define TEXWRAP_CLAMP 1
|
||||
#define TEXWRAP_REGION_REPEAT 2
|
||||
#define TEXWRAP_REPEAT_CLAMP 3
|
||||
|
||||
#ifdef DEVBUILD
|
||||
# define UNIFORM_ERROR_LOG ZZLog::Error_Log
|
||||
#else
|
||||
# define UNIFORM_ERROR_LOG
|
||||
#endif
|
||||
|
||||
// Set it to 0 to diable context usage, 1 -- to enable. FFX-1 have a strange issue with ClampExt.
|
||||
#define NOCONTEXT 0
|
||||
#define NUMBER_OF_SAMPLERS 11
|
||||
#define MAX_SHADER_NAME_SIZE 25
|
||||
#define MAX_UNIFORM_NAME_SIZE 20
|
||||
#define DEFINE_STRING_SIZE 256
|
||||
//------------------ Constants
|
||||
|
||||
// Used in a logarithmic Z-test, as (1-o(1))/log(MAX_U32).
|
||||
const float g_filog32 = 0.999f / (32.0f * logf(2.0f));
|
||||
|
||||
const static char* g_pTexTypes[] = { "32", "tex32", "clut32", "tex32to16", "tex16to8h" };
|
||||
const static char* g_pShaders[4] = { "full", "reduced", "accurate", "accurate-reduced" };
|
||||
|
||||
// ----------------- Global Variables
|
||||
|
||||
ZZshContext g_cgcontext;
|
||||
ZZshProfile cgvProf, cgfProf;
|
||||
int g_nPixelShaderVer = 0; // default
|
||||
u8* s_lpShaderResources = NULL;
|
||||
ZZshShaderLink pvs[16] = {sZero}, g_vsprog = sZero, g_psprog = sZero; // 2 -- ZZ
|
||||
ZZshParameter g_vparamPosXY[2] = {pZero}, g_fparamFogColor = pZero;
|
||||
|
||||
ZZshProgram ZZshMainProgram;
|
||||
char* ZZshSource; // Shader's source data.
|
||||
off_t ZZshSourceSize;
|
||||
|
||||
extern char* EFFECT_NAME; // All this variables used for testing and set manually
|
||||
extern char* EFFECT_DIR;
|
||||
|
||||
bool g_bCRTCBilinear = true;
|
||||
|
||||
float4 g_vdepth, vlogz;
|
||||
FRAGMENTSHADER ppsBitBlt[2], ppsBitBltDepth, ppsOne;
|
||||
FRAGMENTSHADER ppsBaseTexture, ppsConvert16to32, ppsConvert32to16;
|
||||
FRAGMENTSHADER ppsRegular[4], ppsTexture[NUM_SHADERS];
|
||||
FRAGMENTSHADER ppsCRTC[2], /*ppsCRTC24[2],*/ ppsCRTCTarg[2];
|
||||
VERTEXSHADER pvsStore[16];
|
||||
VERTEXSHADER pvsBitBlt;
|
||||
|
||||
inline bool LoadEffects();
|
||||
extern bool s_bWriteDepth;
|
||||
|
||||
struct SHADERHEADER
|
||||
{
|
||||
unsigned int index, offset, size; // if highest bit of index is set, pixel shader
|
||||
};
|
||||
map<int, SHADERHEADER*> mapShaderResources;
|
||||
|
||||
// Debug variable, store name of the function that call the shader.
|
||||
const char* ShaderCallerName = "";
|
||||
const char* ShaderHandleName = "";
|
||||
|
||||
int NumActiveUniforms, NumGlobalUniforms;
|
||||
ZZshParamInfo UniformsIndex[MAX_ACTIVE_UNIFORMS] = {qZero};
|
||||
const char* ShaderNames[MAX_ACTIVE_SHADERS] = {""};
|
||||
ZZshShaderType ShaderTypes[MAX_ACTIVE_SHADERS] = {ZZ_SH_NONE};
|
||||
|
||||
ZZshProgram CompiledPrograms[MAX_ACTIVE_SHADERS][MAX_ACTIVE_SHADERS] = {{0}};
|
||||
const char* TextureUnits[NUMBER_OF_SAMPLERS] =
|
||||
{"g_sMemory[0]", "g_sMemory[1]", "g_sSrcFinal", "g_sBitwiseANDX", "g_sBitwiseANDY", "g_sInterlace", \
|
||||
"g_sCLUT", "g_sBlocks", "g_sBilinearBlocks", "g_sConv16to32", "g_sConv32to16"};
|
||||
ZZshPARAMTYPE TextureTypes[NUMBER_OF_SAMPLERS] =
|
||||
{ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, ZZ_TEXTURE_RECT, \
|
||||
ZZ_TEXTURE_2D, ZZ_TEXTURE_2D, ZZ_TEXTURE_2D, ZZ_TEXTURE_2D, ZZ_TEXTURE_3D} ;
|
||||
|
||||
//------------------ Code
|
||||
|
||||
inline int GET_SHADER_INDEX(int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int context, int ps) {
|
||||
return type + texfilter*NUM_TYPES + NUM_FILTERS*NUM_TYPES*texwrap + NUM_TEXWRAPS*NUM_FILTERS*NUM_TYPES*(fog+2*writedepth+4*testaem+8*exactcolor+16*context+32*ps) ;
|
||||
}
|
||||
|
||||
// Nothing need to be done.
|
||||
bool ZZshCheckProfilesSupport() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Error handler. Setup in ZZogl_Create once.
|
||||
void HandleCgError(ZZshContext ctx, ZZshError err, void* appdata)
|
||||
{/*
|
||||
ZZLog::Error_Log("%s->%s: %s", ShaderCallerName, ShaderHandleName, cgGetErrorString(err));
|
||||
const char* listing = cgGetLastListing(g_cgcontext);
|
||||
if (listing != NULL)
|
||||
ZZLog::Debug_Log(" last listing: %s", listing);
|
||||
*/
|
||||
}
|
||||
|
||||
float ZeroFloat4[4] = {0};
|
||||
|
||||
inline void SettleFloat(float* f, const float* v) {
|
||||
f[0] = v[0];
|
||||
f[1] = v[1];
|
||||
f[2] = v[2];
|
||||
f[3] = v[3];
|
||||
}
|
||||
|
||||
inline ZZshParamInfo ParamInfo(const char* ShName, ZZshPARAMTYPE type, const float fvalue[], GLuint sampler, GLint texid, bool Constant, bool Settled) {
|
||||
ZZshParamInfo x;
|
||||
x.ShName = new char[MAX_UNIFORM_NAME_SIZE];
|
||||
x.ShName = ShName;
|
||||
x.type = type;
|
||||
SettleFloat(x.fvalue, fvalue);
|
||||
x.sampler = sampler;
|
||||
x.texid = texid;
|
||||
x.Constant = Constant;
|
||||
x.Settled = Settled;
|
||||
return x;
|
||||
}
|
||||
|
||||
inline void SetGlobalUniform(ZZshParameter* param, const char* name) {
|
||||
*param = NumActiveUniforms;
|
||||
UniformsIndex[NumActiveUniforms] = ParamInfo(name, ZZ_FLOAT4, ZeroFloat4, -1, 0, false, false);
|
||||
NumActiveUniforms++;
|
||||
}
|
||||
|
||||
bool ZZshStartUsingShaders() {
|
||||
|
||||
ZZLog::Error_Log("Creating effects.");
|
||||
B_G(LoadEffects(), return false);
|
||||
if (!glCreateShader)
|
||||
{
|
||||
ZZLog::Error_Log("GLSL shaders is not supported, stop.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// create a sample shader
|
||||
clampInfo temp;
|
||||
memset(&temp, 0, sizeof(temp));
|
||||
temp.wms = 3; temp.wmt = 3;
|
||||
|
||||
g_nPixelShaderVer = 0;//SHADER_ACCURATE;
|
||||
// test
|
||||
bool bFailed;
|
||||
FRAGMENTSHADER* pfrag = ZZshLoadShadeEffect(0, 1, 1, 1, 1, temp, 0, &bFailed);
|
||||
if( bFailed || pfrag == NULL ) {
|
||||
g_nPixelShaderVer = SHADER_ACCURATE|SHADER_REDUCED;
|
||||
|
||||
pfrag = ZZshLoadShadeEffect(0, 0, 1, 1, 0, temp, 0, &bFailed);
|
||||
if( pfrag != NULL )
|
||||
glLinkProgram(pfrag->Shader);
|
||||
if( bFailed || pfrag == NULL || glGetError() != GL_NO_ERROR) {
|
||||
g_nPixelShaderVer = SHADER_REDUCED;
|
||||
ZZLog::Error_Log("Basic shader test failed.");
|
||||
}
|
||||
}
|
||||
ZZshMainProgram = glCreateProgram();
|
||||
NumActiveUniforms = 0;
|
||||
SetGlobalUniform(&g_fparamFogColor, "g_fFogColor");
|
||||
SetGlobalUniform(&g_vparamPosXY[0], "g_fPosXY[0]");
|
||||
SetGlobalUniform(&g_vparamPosXY[1], NOCONTEXT?"g_fPosXY[1]":"g_fPosXY[0]");
|
||||
NumGlobalUniforms = NumActiveUniforms;
|
||||
|
||||
if (g_nPixelShaderVer & SHADER_REDUCED)
|
||||
conf.bilinear = 0;
|
||||
|
||||
ZZLog::Error_Log("Creating extra effects.");
|
||||
B_G(ZZshLoadExtraEffects(), return false);
|
||||
|
||||
ZZLog::Error_Log("Using %s shaders.", g_pShaders[g_nPixelShaderVer]);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// open shader file according to build target
|
||||
bool ZZshCreateOpenShadersFile() {
|
||||
std::string ShaderFileName("plugins/ps2hw.glsl");
|
||||
int ShaderFD = open(ShaderFileName.c_str(), O_RDONLY);
|
||||
struct stat sb;
|
||||
if ((ShaderFD == -1) || (fstat(ShaderFD, &sb) == -1)) {
|
||||
// Each linux distributions have his rules for path so we give them the possibility to
|
||||
// change it with compilation flags. -- Gregory
|
||||
#ifdef PLUGIN_DIR_COMPILATION
|
||||
#define xPLUGIN_DIR_str(s) PLUGIN_DIR_str(s)
|
||||
#define PLUGIN_DIR_str(s) #s
|
||||
ShaderFileName = string(xPLUGIN_DIR_str(PLUGIN_DIR_COMPILATION)) + "/ps2hw.glsl";
|
||||
ShaderFD = open(ShaderFileName.c_str(), O_RDONLY);
|
||||
#endif
|
||||
if ((ShaderFD == -1) || (fstat(ShaderFD, &sb) == -1)) {
|
||||
ZZLog::Error_Log("No source for %s: \n", ShaderFileName.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
ZZshSourceSize = sb.st_size;
|
||||
ZZshSource = (char*)mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, ShaderFD, 0); // This function directly maped file into memory.
|
||||
ZZshSource[ ZZshSourceSize - 1] = 0; // Made source null-terminated.
|
||||
|
||||
close(ShaderFD);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ZZshExitCleaning() {
|
||||
munmap(ZZshSource, ZZshSourceSize);
|
||||
}
|
||||
|
||||
// Disable CG
|
||||
void ZZshGLDisableProfile() { // This stop all other shader programs from running;
|
||||
glUseProgram(0);
|
||||
}
|
||||
//Enable CG
|
||||
void ZZshGLEnableProfile() {
|
||||
}
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
// The same function for texture, also to cgGLEnable
|
||||
void ZZshGLSetTextureParameter(ZZshParameter param, GLuint texobj, const char* name) {
|
||||
if (param > -1) {
|
||||
// ZZLog::Error_Log("Set texture parameter %s %d... Ok", name, texobj);
|
||||
UniformsIndex[param].texid = texobj;
|
||||
UniformsIndex[param].Settled = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ZZshGLSetTextureParameter(ZZshShaderLink prog, ZZshParameter param, GLuint texobj, const char* name) {
|
||||
if (param > -1) {
|
||||
// ZZLog::Error_Log("Set texture parameter %s %d... Ok", name, texobj);
|
||||
UniformsIndex[param].texid = texobj;
|
||||
UniformsIndex[param].Settled = true;
|
||||
}
|
||||
}
|
||||
|
||||
// This is helper of cgGLSetParameter4fv, made for debug purpose.
|
||||
// Name could be any string. We must use it on compilation time, because erroneus handler does not
|
||||
// return name
|
||||
void ZZshSetParameter4fv(ZZshShaderLink prog, ZZshParameter param, const float* v, const char* name) {
|
||||
if (param > -1) {
|
||||
// ZZLog::Error_Log("Set float parameter %s %f, %f, %f, %f... Ok", name, v[0], v[1], v[2], v[3]);
|
||||
SettleFloat(UniformsIndex[param].fvalue, v);
|
||||
UniformsIndex[param].Settled = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ZZshSetParameter4fv(ZZshParameter param, const float* v, const char* name) {
|
||||
if (param > -1) {
|
||||
// ZZLog::Error_Log("Set float parameter %s %f, %f, %f, %f... Ok", name, v[0], v[1], v[2], v[3]);
|
||||
SettleFloat(UniformsIndex[param].fvalue, v);
|
||||
UniformsIndex[param].Settled = true;
|
||||
}
|
||||
}
|
||||
|
||||
// The same stuff, but also with retry of param, name should be USED name of param for prog.
|
||||
void ZZshSetParameter4fvWithRetry(ZZshParameter* param, ZZshShaderLink prog, const float* v, const char* name) {
|
||||
if (param != NULL)
|
||||
ZZshSetParameter4fv(prog, *param, v, name);
|
||||
}
|
||||
|
||||
// Used sometimes for color 1.
|
||||
void ZZshDefaultOneColor( FRAGMENTSHADER ptr ) {
|
||||
// return;
|
||||
ShaderHandleName = "Set Default One colot";
|
||||
float4 v = float4 ( 1, 1, 1, 1 );
|
||||
ZZshSetParameter4fv(ptr.prog, ptr.sOneColor, v, "DegaultOne");
|
||||
}
|
||||
//-------------------------------------------------------------------------------------
|
||||
|
||||
const GLchar * EmptyVertex = "void main(void) {gl_Position = ftransform();}";
|
||||
const GLchar * EmptyFragment = "void main(void) {gl_FragColor = gl_Color;}";
|
||||
|
||||
inline ZZshProgram UseEmptyProgram(const char* name, GLenum shaderType) {
|
||||
GLuint shader = glCreateShader(shaderType);
|
||||
if (shaderType == GL_VERTEX_SHADER)
|
||||
glShaderSource(shader, 1, &EmptyVertex, NULL);
|
||||
else
|
||||
glShaderSource(shader, 1, &EmptyFragment, NULL);
|
||||
|
||||
glCompileShader(shader);
|
||||
ZZshProgram prog = glCreateProgram();
|
||||
glAttachShader(prog, shader);
|
||||
glLinkProgram(prog);
|
||||
if( !glIsProgram(prog) || glGetError() != GL_NO_ERROR ) {
|
||||
ZZLog::Error_Log("Failed to load empty shader for %s:", name);
|
||||
return -1;
|
||||
}
|
||||
ZZLog::Error_Log("Used Empty program for %s... Ok.",name);
|
||||
return prog;
|
||||
}
|
||||
|
||||
ZZshShaderType ZZshGetShaderType(const char* name) {
|
||||
if (strncmp(name, "TextureFog", 10) == 0) return ZZ_SH_TEXTURE_FOG;
|
||||
if (strncmp(name, "Texture", 7) == 0) return ZZ_SH_TEXTURE;
|
||||
if (strncmp(name, "RegularFog", 10) == 0) return ZZ_SH_REGULAR_FOG;
|
||||
if (strncmp(name, "Regular", 7) == 0) return ZZ_SH_REGULAR;
|
||||
if (strncmp(name, "Zero", 4) == 0) return ZZ_SH_ZERO;
|
||||
return ZZ_SH_CRTC;
|
||||
}
|
||||
|
||||
inline ZZshShader UseEmptyShader(const char* name, GLenum shaderType) {
|
||||
GLuint shader = glCreateShader(shaderType);
|
||||
if (shaderType == GL_VERTEX_SHADER)
|
||||
glShaderSource(shader, 1, &EmptyVertex, NULL);
|
||||
else
|
||||
glShaderSource(shader, 1, &EmptyFragment, NULL);
|
||||
|
||||
glCompileShader(shader);
|
||||
|
||||
ShaderNames[shader] = name;
|
||||
ShaderTypes[shader] = ZZshGetShaderType(name);
|
||||
|
||||
ZZLog::Error_Log("Used Empty shader for %s... Ok.",name);
|
||||
return shader;
|
||||
}
|
||||
|
||||
inline bool GetCompilationLog(GLuint shader) {
|
||||
GLint CompileStatus;
|
||||
glGetShaderiv(shader, GL_COMPILE_STATUS, &CompileStatus);
|
||||
if (CompileStatus == GL_TRUE)
|
||||
return true;
|
||||
|
||||
int* lenght, infologlength;
|
||||
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infologlength);
|
||||
char* InfoLog = new char[infologlength];
|
||||
glGetShaderInfoLog(shader, infologlength, lenght, InfoLog);
|
||||
ZZLog::Error_Log("Compiling... %d:\t %s", shader, InfoLog);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool CompileShader(ZZshProgram& shader, const char* DefineString, const char* name, GLenum shaderType) {
|
||||
const GLchar* ShaderSource[2];
|
||||
ShaderSource[0] = (const GLchar*)DefineString;
|
||||
ShaderSource[1] = (const GLchar*)ZZshSource;
|
||||
|
||||
shader = glCreateShader(shaderType);
|
||||
glShaderSource(shader, 2, &ShaderSource[0], NULL);
|
||||
glCompileShader(shader);
|
||||
ZZLog::Debug_Log("Creating shader %d for %s", shader, name);
|
||||
|
||||
if (!GetCompilationLog(shader)) {
|
||||
ZZLog::Error_Log("Failed to compile shader for %s:", name);
|
||||
return false;
|
||||
}
|
||||
|
||||
ShaderTypes[shader] = ZZshGetShaderType(name);
|
||||
ShaderNames[shader] = name;
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool LoadShaderFromFile(ZZshShader& shader, const char* DefineString, const char* name, GLenum ShaderType) { // Linux specific, as I presume
|
||||
if (!CompileShader(shader, DefineString, name, ShaderType)) {
|
||||
ZZLog::Error_Log("Failed to compile shader for %s: ", name);
|
||||
return false;
|
||||
}
|
||||
|
||||
ZZLog::Error_Log("Used shader for %s... Ok",name);
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool GetLinkLog(ZZshProgram prog) {
|
||||
GLint LinkStatus;
|
||||
glGetProgramiv(prog, GL_LINK_STATUS, &LinkStatus);
|
||||
|
||||
int unif, atrib;
|
||||
glGetProgramiv(prog, GL_ACTIVE_UNIFORMS, &unif);
|
||||
glGetProgramiv(prog, GL_ACTIVE_ATTRIBUTES, &atrib);
|
||||
UNIFORM_ERROR_LOG("Uniforms %d, attributes %d", unif, atrib);
|
||||
|
||||
if (LinkStatus == GL_TRUE && glIsProgram(prog)) return true;
|
||||
|
||||
#ifdef DEVBUILD
|
||||
int* lenght, infologlength;
|
||||
glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &infologlength);
|
||||
char* InfoLog = new char[infologlength];
|
||||
glGetProgramInfoLog(prog, infologlength, lenght, InfoLog);
|
||||
if (!infologlength == 0)
|
||||
ZZLog::Error_Log("Linking %d... %d:\t %s", prog, infologlength, InfoLog);
|
||||
#endif
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------
|
||||
inline ZZshProgram madeProgram(ZZshShader shader, ZZshShader shader2, char* name) {
|
||||
ZZshProgram prog = glCreateProgram();
|
||||
glAttachShader(prog, shader);
|
||||
if (shader2 != 0)
|
||||
glAttachShader(prog, shader2);
|
||||
glLinkProgram(prog);
|
||||
if (!GetLinkLog(prog)) {
|
||||
ZZLog::Error_Log("Failed to link shader for %s: ", name);
|
||||
prog = UseEmptyProgram(name, GL_FRAGMENT_SHADER);
|
||||
}
|
||||
glDetachShader(prog, shader);
|
||||
|
||||
ZZLog::Error_Log("Made shader program for %s... Ok",name);
|
||||
return prog;
|
||||
}
|
||||
|
||||
void PutParametersInProgam(int start, int finish) {
|
||||
for (int i = start; i < finish; i++) {
|
||||
ZZshParamInfo param = UniformsIndex[i];
|
||||
GLint location = glGetUniformLocation(ZZshMainProgram, param.ShName);
|
||||
|
||||
if (location != -1 && param.type != ZZ_UNDEFINED) {
|
||||
UNIFORM_ERROR_LOG("\tTry uniform %d %d %d %s...\t\t", i, location, param.type, param.ShName);
|
||||
|
||||
if (!param.Settled && !param.Constant) {
|
||||
UNIFORM_ERROR_LOG("\tUnsettled, non-constant uniform, could be bug: %d %s", param.type, param.ShName);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (param.type == ZZ_FLOAT4) {
|
||||
glUniform4fv(location, 1, param.fvalue);
|
||||
}
|
||||
else
|
||||
{
|
||||
glActiveTexture(GL_TEXTURE0 + param.sampler);
|
||||
if (param.type == ZZ_TEXTURE_2D)
|
||||
glBindTexture(GL_TEXTURE_2D, param.texid);
|
||||
else if (param.type == ZZ_TEXTURE_3D)
|
||||
glBindTexture(GL_TEXTURE_3D, param.texid);
|
||||
else
|
||||
glBindTexture(GL_TEXTURE_RECTANGLE, param.texid);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
if (glGetError() == GL_NO_ERROR)
|
||||
UNIFORM_ERROR_LOG("Ok. Param name %s, location %d, type %d", param.ShName, location, param.type);
|
||||
else
|
||||
ZZLog::Error_Log("error in PutParametersInProgam param name %s, location %d, type %d", param.ShName, location, param.type);
|
||||
|
||||
if (!param.Constant) // Unset used parameters
|
||||
UniformsIndex[i].Settled == false;
|
||||
}
|
||||
else if (start != 0 && location == -1 && param.Settled) // No global variable
|
||||
ZZLog::Error_Log("Warning! Unused, but set uniform %d, %s", location, param.ShName);
|
||||
}
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
void PutSInProgam(int start, int finish) {
|
||||
for (int i = start; i < finish; i++) {
|
||||
ZZshParamInfo param = UniformsIndex[i];
|
||||
GLint location = glGetUniformLocation(ZZshMainProgram, param.ShName);
|
||||
|
||||
if (location != -1 && param.type != ZZ_UNDEFINED) {
|
||||
if (param.type != ZZ_FLOAT4) {
|
||||
UNIFORM_ERROR_LOG("\tTry sampler %d %d %d %s %d...\t\t", i, location, param.type, param.ShName, param.sampler);
|
||||
if (glGetError() == GL_NO_ERROR)
|
||||
UNIFORM_ERROR_LOG("Ok");
|
||||
else
|
||||
UNIFORM_ERROR_LOG("error!");
|
||||
glUniform1i(location, param.sampler);
|
||||
}
|
||||
}
|
||||
}
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
bool ValidateProgram(ZZshProgram Prog) {
|
||||
GLint isValid;
|
||||
glGetProgramiv(Prog, GL_VALIDATE_STATUS, &isValid);
|
||||
|
||||
if (!isValid) {
|
||||
glValidateProgram(Prog);
|
||||
int* lenght, infologlength;
|
||||
glGetProgramiv(Prog, GL_INFO_LOG_LENGTH, &infologlength);
|
||||
char* InfoLog = new char[infologlength];
|
||||
glGetProgramInfoLog(Prog, infologlength, lenght, InfoLog);
|
||||
ZZLog::Error_Log("Validation %d... %d:\t %s", Prog, infologlength, InfoLog);
|
||||
}
|
||||
return (isValid != 0);
|
||||
}
|
||||
|
||||
void PutParametersAndRun(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
|
||||
UNIFORM_ERROR_LOG("Run program %s(%d) \t+\t%s(%d)", ShaderNames[vs->Shader], vs->Shader, ShaderNames[ps->Shader], ps->Shader);
|
||||
|
||||
glUseProgram(ZZshMainProgram);
|
||||
if (glGetError() != GL_NO_ERROR) {
|
||||
ZZLog::Error_Log("Something weird happened on Linking stage.");
|
||||
|
||||
glUseProgram(0);
|
||||
return;
|
||||
}
|
||||
|
||||
PutSInProgam(vs->ParametersStart, vs->ParametersFinish);
|
||||
PutSInProgam(ps->ParametersStart, ps->ParametersFinish);
|
||||
|
||||
PutParametersInProgam(0, NumGlobalUniforms);
|
||||
PutParametersInProgam(vs->ParametersStart, vs->ParametersFinish);
|
||||
PutParametersInProgam(ps->ParametersStart, ps->ParametersFinish);
|
||||
|
||||
ValidateProgram(ZZshMainProgram);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
void CreateAndRunMain(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
|
||||
ZZLog::Error_Log("\n---> New shader program %d, %s(%d) \t+\t%s(%d).", ZZshMainProgram, ShaderNames[vs->Shader], vs->Shader, ShaderNames[ps->Shader], ps->Shader);
|
||||
|
||||
if (vs->Shader != 0)
|
||||
glAttachShader(ZZshMainProgram, vs->Shader);
|
||||
if (ps->Shader != 0)
|
||||
glAttachShader(ZZshMainProgram, ps->Shader);
|
||||
|
||||
glLinkProgram(ZZshMainProgram);
|
||||
if (!GetLinkLog(ZZshMainProgram)) {
|
||||
ZZLog::Error_Log("Main program linkage error, don't use any shader for this stage.");
|
||||
return;
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
PutParametersAndRun(vs, ps);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
inline bool ZZshCheckShaderCompatibility(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
|
||||
if (vs == NULL) return false;
|
||||
if (vs->ShaderType == ZZ_SH_ZERO) return true; // ZeroPS is compatible with everything
|
||||
if (ps == NULL) return false;
|
||||
|
||||
return (vs->ShaderType == ps->ShaderType);
|
||||
}
|
||||
|
||||
void ZZshSetShader(VERTEXSHADER* vs, FRAGMENTSHADER* ps) {
|
||||
if (!ZZshCheckShaderCompatibility(vs, ps)) // We don't need to link uncompatible shaders
|
||||
return;
|
||||
|
||||
int vss = (vs!=NULL)?vs->Shader:0;
|
||||
int pss = (ps!=NULL)?ps->Shader:0;
|
||||
|
||||
if (vss !=0 && pss != 0) {
|
||||
if (CompiledPrograms[vss][pss] != 0 && glIsProgram(CompiledPrograms[vss][pss])) {
|
||||
ZZshMainProgram = CompiledPrograms[vs->Shader][ps->Shader];
|
||||
PutParametersAndRun(vs, ps);
|
||||
}
|
||||
else {
|
||||
ZZshProgram NewProgram = glCreateProgram();
|
||||
ZZshMainProgram = NewProgram;
|
||||
CompiledPrograms[vss][pss] = NewProgram;
|
||||
CreateAndRunMain(vs, ps) ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ZZshSetVertexShader(ZZshShaderLink prog) {
|
||||
g_vsprog = prog;
|
||||
ZZshSetShader((VERTEXSHADER*)(g_vsprog.link), (FRAGMENTSHADER*)(g_psprog.link)) ;
|
||||
}
|
||||
|
||||
void ZZshSetPixelShader(ZZshShaderLink prog) {
|
||||
g_psprog = prog;
|
||||
ZZshSetShader((VERTEXSHADER*)(g_vsprog.link), (FRAGMENTSHADER*)(g_psprog.link)) ;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
// For several reason texobj could not be put in sampler directly, only though GL_TEXTUREi interface. So we need to check correct sampler for each one.
|
||||
inline void SettleTextureUnit(ZZshParamInfo* param, const char* name) {
|
||||
for (int i = 0; i < NUMBER_OF_SAMPLERS; i++) {
|
||||
if (strcmp(TextureUnits[i], name) == 0) {
|
||||
param->sampler = i;
|
||||
param->type = TextureTypes[i];
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline int SetUniformParam(ZZshProgram prog, ZZshParameter* param, const char* name) {
|
||||
GLint p = glGetUniformLocation(prog, name);
|
||||
if (p > -1) {
|
||||
*param = NumActiveUniforms;
|
||||
UniformsIndex[NumActiveUniforms] = ParamInfo(name, ZZ_FLOAT4, ZeroFloat4, -1, 0, false, false); // By define Uniform is FLOAT4
|
||||
|
||||
SettleTextureUnit(&(UniformsIndex[NumActiveUniforms]), name);
|
||||
UNIFORM_ERROR_LOG("uniform %s \t\t%d %d", name, p, UniformsIndex[NumActiveUniforms].type);
|
||||
|
||||
NumActiveUniforms++;
|
||||
}
|
||||
else
|
||||
*param = -1;
|
||||
return p;
|
||||
}
|
||||
|
||||
#define SET_UNIFORMPARAM(var, name) { \
|
||||
p = SetUniformParam(prog, &(pf->var), name); \
|
||||
}
|
||||
|
||||
#define INIT_SAMPLERPARAM(tex, name) { \
|
||||
ZZshParameter x; \
|
||||
p = SetUniformParam(prog, &x, name); \
|
||||
(UniformsIndex[x]).Constant = true; \
|
||||
ZZshGLSetTextureParameter(pf->prog, x, tex, name); \
|
||||
}
|
||||
|
||||
#define INIT_UNIFORMPARAM(var, name) { \
|
||||
ZZshParameter x; \
|
||||
p = SetUniformParam(prog, &x, name); \
|
||||
(UniformsIndex[x]).Constant = true; \
|
||||
ZZshSetParameter4fv(pf->prog, x, var, name); \
|
||||
}
|
||||
|
||||
char* AddContextToName(const char* name, int context) {
|
||||
char* newname = new char[MAX_UNIFORM_NAME_SIZE];
|
||||
sprintf(newname, "%s[%d]", name, context * NOCONTEXT);
|
||||
return newname;
|
||||
}
|
||||
|
||||
void SetupFragmentProgramParameters(FRAGMENTSHADER* pf, int context, int type)
|
||||
{
|
||||
// uniform parameters
|
||||
GLint p;
|
||||
pf->prog.link = (void*)pf; // Setting autolink
|
||||
pf->prog.isFragment = true; // Setting autolink
|
||||
pf->ShaderType = ShaderTypes[pf->Shader];
|
||||
|
||||
pf->ParametersStart = NumActiveUniforms;
|
||||
ZZshProgram prog = madeProgram(pf->Shader, 0, "");
|
||||
glUseProgram(prog);
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
SET_UNIFORMPARAM(sOneColor, "g_fOneColor");
|
||||
SET_UNIFORMPARAM(sBitBltZ, "g_fBitBltZ");
|
||||
SET_UNIFORMPARAM(sInvTexDims, "g_fInvTexDims");
|
||||
SET_UNIFORMPARAM(fTexAlpha2, AddContextToName("fTexAlpha2", context));
|
||||
SET_UNIFORMPARAM(fTexOffset, AddContextToName("g_fTexOffset", context));
|
||||
SET_UNIFORMPARAM(fTexDims, AddContextToName("g_fTexDims", context));
|
||||
SET_UNIFORMPARAM(fTexBlock, AddContextToName("g_fTexBlock", context));
|
||||
SET_UNIFORMPARAM(fClampExts, AddContextToName("g_fClampExts", context)); // FIXME: There is a bug, that lead FFX-1 to incorrect CLAMP if this uniform have context.
|
||||
SET_UNIFORMPARAM(fTexWrapMode, AddContextToName("TexWrapMode", context));
|
||||
SET_UNIFORMPARAM(fRealTexDims, AddContextToName("g_fRealTexDims", context));
|
||||
SET_UNIFORMPARAM(fTestBlack, AddContextToName("g_fTestBlack", context));
|
||||
SET_UNIFORMPARAM(fPageOffset, AddContextToName("g_fPageOffset", context));
|
||||
SET_UNIFORMPARAM(fTexAlpha, AddContextToName("fTexAlpha", context));
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
// textures
|
||||
INIT_SAMPLERPARAM(ptexBlocks, "g_sBlocks");
|
||||
if (type == 3)
|
||||
{INIT_SAMPLERPARAM(ptexConv16to32, "g_sConv16to32");}
|
||||
else if (type == 4)
|
||||
{INIT_SAMPLERPARAM(ptexConv32to16, "g_sConv32to16");}
|
||||
else
|
||||
{INIT_SAMPLERPARAM(ptexBilinearBlocks, "g_sBilinearBlocks");}
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
SET_UNIFORMPARAM(sMemory, AddContextToName("g_sMemory", context));
|
||||
SET_UNIFORMPARAM(sFinal, "g_sSrcFinal");
|
||||
SET_UNIFORMPARAM(sBitwiseANDX, "g_sBitwiseANDX");
|
||||
SET_UNIFORMPARAM(sBitwiseANDY, "g_sBitwiseANDY");
|
||||
SET_UNIFORMPARAM(sCLUT, "g_sCLUT");
|
||||
SET_UNIFORMPARAM(sInterlace, "g_sInterlace");
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
// set global shader constants
|
||||
INIT_UNIFORMPARAM(float4(0.5f, (conf.settings().exact_color)?0.9f/256.0f:0.5f/256.0f, 0,1/255.0f), "g_fExactColor");
|
||||
INIT_UNIFORMPARAM(float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f ), "g_fBilinear");
|
||||
INIT_UNIFORMPARAM(float4(1.0f/256.0f, 1.0004f, 1, 0.5f), "g_fZBias");
|
||||
INIT_UNIFORMPARAM(float4(0,1, 0.001f, 0.5f), "g_fc0");
|
||||
INIT_UNIFORMPARAM(float4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f), "g_fMult");
|
||||
pf->ParametersFinish = NumActiveUniforms;
|
||||
if (NumActiveUniforms > MAX_ACTIVE_UNIFORMS)
|
||||
ZZLog::Error_Log("Too many shader variables. You may increase the limit in source %d.", NumActiveUniforms);
|
||||
|
||||
glUseProgram(0);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
void SetupVertexProgramParameters(VERTEXSHADER* pf, int context)
|
||||
{
|
||||
GLint p;
|
||||
pf->prog.link = (void*)pf; // Setting autolink
|
||||
pf->prog.isFragment = false; // Setting autolink
|
||||
pf->ShaderType = ShaderTypes[pf->Shader];
|
||||
|
||||
pf->ParametersStart = NumActiveUniforms;
|
||||
|
||||
ZZshProgram prog = madeProgram(pf->Shader, 0, "");
|
||||
glUseProgram(prog);
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
// Set Z-test, log or no log;
|
||||
if (conf.settings().no_logz) {
|
||||
g_vdepth = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
|
||||
vlogz = float4( 1.0f, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
else {
|
||||
g_vdepth = float4( 256.0f*65536.0f, 65536.0f, 256.0f, 65536.0f*65536.0f);
|
||||
vlogz = float4( 0.0f, 1.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
INIT_UNIFORMPARAM(g_vdepth, "g_fZ");
|
||||
if (p > -1) {
|
||||
INIT_UNIFORMPARAM(vlogz, "g_fZMin");
|
||||
if (p == -1) ZZLog::Error_Log ("Shader file version is outdated! Only log-Z is possible.");
|
||||
}
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
float4 vnorm = float4(g_filog32, 0, 0,0);
|
||||
INIT_UNIFORMPARAM(vnorm, "g_fZNorm");
|
||||
INIT_UNIFORMPARAM(float4(-0.2f, -0.65f, 0.9f, 1.0f / 32767.0f ), "g_fBilinear");
|
||||
INIT_UNIFORMPARAM(float4(1.0f/256.0f, 1.0004f, 1, 0.5f), "g_fZBias") ;
|
||||
INIT_UNIFORMPARAM(float4(0,1, 0.001f, 0.5f), "g_fc0");
|
||||
|
||||
SET_UNIFORMPARAM(sBitBltPos, "g_fBitBltPos");
|
||||
SET_UNIFORMPARAM(sBitBltTex, "g_fBitBltTex");
|
||||
SET_UNIFORMPARAM(fBitBltTrans, "g_fBitBltTrans");
|
||||
pf->ParametersFinish = NumActiveUniforms;
|
||||
if (NumActiveUniforms > MAX_ACTIVE_UNIFORMS)
|
||||
ZZLog::Error_Log("Too many shader variables. You may increase the limit in the source.");
|
||||
|
||||
glUseProgram(0);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
const int GLSL_VERSION = 130; // Sampler2DRect appear in 1.3
|
||||
|
||||
// We use strictly compilation from source for GSLS
|
||||
static __forceinline void GlslHeaderString(char* header_string, const char* name, const char* depth)
|
||||
{
|
||||
sprintf(header_string, "#version %d\n#define %s main\n%s\n", GLSL_VERSION, name, depth);
|
||||
}
|
||||
|
||||
static __forceinline bool LOAD_VS(char* DefineString, const char* name, VERTEXSHADER vertex, int shaderver, ZZshProfile context, const char* depth)
|
||||
{
|
||||
bool flag;
|
||||
char temp[200];
|
||||
GlslHeaderString(temp, name, depth);
|
||||
sprintf(DefineString, "%s#define VERTEX_SHADER 1\n#define CTX %d\n", temp, context * NOCONTEXT);
|
||||
//ZZLog::WriteLn("Define for VS == '%s'", DefineString);
|
||||
flag = LoadShaderFromFile(vertex.Shader, DefineString, name, GL_VERTEX_SHADER);
|
||||
SetupVertexProgramParameters(&vertex, context);
|
||||
return flag;
|
||||
}
|
||||
|
||||
static __forceinline bool LOAD_PS(char* DefineString, const char* name, FRAGMENTSHADER fragment, int shaderver, ZZshProfile context, const char* depth)
|
||||
{
|
||||
bool flag;
|
||||
char temp[200];
|
||||
GlslHeaderString(temp, name, depth);
|
||||
sprintf(DefineString, "%s#define FRAGMENT_SHADER 1\n#define CTX %d\n", temp, context * NOCONTEXT);
|
||||
//ZZLog::WriteLn("Define for PS == '%s'", DefineString);
|
||||
flag = LoadShaderFromFile(fragment.Shader, DefineString, name, GL_FRAGMENT_SHADER);
|
||||
SetupFragmentProgramParameters(&fragment, context, 0);
|
||||
return flag;
|
||||
}
|
||||
|
||||
inline bool LoadEffects()
|
||||
{
|
||||
// clear the textures
|
||||
for(u32 i = 0; i < ArraySize(ppsTexture); ++i) {
|
||||
SAFE_RELEASE_PROG(ppsTexture[i].prog);
|
||||
}
|
||||
|
||||
#ifndef _DEBUG
|
||||
memset(ppsTexture, 0, sizeof(ppsTexture));
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ZZshLoadExtraEffects() {
|
||||
bool bLoadSuccess = true;
|
||||
char DefineString[DEFINE_STRING_SIZE] = "";
|
||||
const char* writedepth = "#define WRITE_DEPTH 1\n"; // should we write depth field
|
||||
|
||||
|
||||
const char* pvsshaders[4] = { "RegularVS", "TextureVS", "RegularFogVS", "TextureFogVS" };
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 * i], cgvProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 *i + 1 ], cgvProf, 1, "")) bLoadSuccess = false;
|
||||
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 *i + 8 ], cgvProf, 0, writedepth)) bLoadSuccess = false;
|
||||
if (!LOAD_VS(DefineString, pvsshaders[i], pvsStore[2 *i + 8 + 1], cgvProf, 1, writedepth)) bLoadSuccess = false;
|
||||
}
|
||||
for (int i = 0; i < 16; ++i)
|
||||
pvs[i] = pvsStore[i].prog;
|
||||
|
||||
if (!LOAD_VS(DefineString, "BitBltVS", pvsBitBlt, cgvProf, 0, "")) bLoadSuccess = false;
|
||||
GLint p;
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
if (!LOAD_PS(DefineString, "RegularPS", ppsRegular[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "RegularFogPS", ppsRegular[1], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
|
||||
if( conf.mrtdepth ) {
|
||||
if (!LOAD_PS(DefineString, "RegularPS", ppsRegular[2], cgfProf, 0, writedepth)) bLoadSuccess = false;
|
||||
if (!bLoadSuccess) conf.mrtdepth = 0;
|
||||
|
||||
if (!LOAD_PS(DefineString, "RegularFogPS", ppsRegular[3], cgfProf, 0, writedepth)) bLoadSuccess = false;
|
||||
if (!bLoadSuccess) conf.mrtdepth = 0;
|
||||
}
|
||||
|
||||
if (!LOAD_PS(DefineString, "BitBltPS", ppsBitBlt[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "BitBltAAPS", ppsBitBlt[1], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!bLoadSuccess) {
|
||||
ZZLog::Error_Log("Failed to load BitBltAAPS, using BitBltPS.");
|
||||
if (!LOAD_PS(DefineString, "BitBltPS", ppsBitBlt[1], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
}
|
||||
|
||||
if (!LOAD_PS(DefineString, "BitBltDepthPS", ppsBitBltDepth, cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "CRTCTargPS", ppsCRTCTarg[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "CRTCTargInterPS", ppsCRTCTarg[1], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
|
||||
g_bCRTCBilinear = true;
|
||||
if (!LOAD_PS(DefineString, "CRTCPS", ppsCRTC[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if( !bLoadSuccess ) {
|
||||
// switch to simpler
|
||||
g_bCRTCBilinear = false;
|
||||
if (!LOAD_PS(DefineString, "CRTCPS_Nearest", ppsCRTC[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "CRTCInterPS_Nearest", ppsCRTC[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
}
|
||||
else {
|
||||
if (!LOAD_PS(DefineString, "CRTCInterPS", ppsCRTC[1], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
}
|
||||
|
||||
if( !bLoadSuccess )
|
||||
ZZLog::Error_Log("Failed to create CRTC shaders.");
|
||||
|
||||
// if (!LOAD_PS(DefineString, "CRTC24PS", ppsCRTC24[0], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
// if (!LOAD_PS(DefineString, "CRTC24InterPS", ppsCRTC24[1], cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "ZeroPS", ppsOne, cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "BaseTexturePS", ppsBaseTexture, cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "Convert16to32PS", ppsConvert16to32, cgfProf, 0, "")) bLoadSuccess = false;
|
||||
if (!LOAD_PS(DefineString, "Convert32to16PS", ppsConvert32to16, cgfProf, 0, "")) bLoadSuccess = false;
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
return true;
|
||||
}
|
||||
|
||||
const static char* g_pPsTexWrap[] = { "#define REPEAT 1\n", "#define CLAMP 1\n", "#define REGION_REPEAT 1\n", "" };
|
||||
|
||||
static ZZshShader LoadShaderFromType(const char* srcdir, const char* srcfile, int type, int texfilter, int texwrap, int fog, int writedepth, int testaem, int exactcolor, int ps, int context) {
|
||||
|
||||
assert( texwrap < NUM_TEXWRAPS);
|
||||
assert( type < NUM_TYPES );
|
||||
//ZZLog::Error_Log("\n");
|
||||
|
||||
ZZshProgram prog;
|
||||
|
||||
char* name = new char[MAX_SHADER_NAME_SIZE];
|
||||
sprintf(name, "Texture%s%d_%sPS", fog?"Fog":"", texfilter, g_pTexTypes[type]);
|
||||
|
||||
ZZLog::Debug_Log("Starting shader for %s", name);
|
||||
|
||||
const char* AddWrap = g_pPsTexWrap[texwrap];
|
||||
const char* AddDepth = writedepth?"#define WRITE_DEPTH 1\n":"";
|
||||
const char* AddAEM = testaem?"#define TEST_AEM 1\n":"";
|
||||
const char* AddExcolor = exactcolor?"#define EXACT_COLOR 1\n":"";
|
||||
const char* AddAccurate = (ps & SHADER_ACCURATE)?"#define ACCURATE_DECOMPRESSION 1\n":"";
|
||||
char DefineString[DEFINE_STRING_SIZE] = "";
|
||||
char temp[200];
|
||||
GlslHeaderString(temp, name, AddWrap);
|
||||
sprintf(DefineString, "%s#define FRAGMENT_SHADER 1\n%s%s%s%s\n#define CTX %d\n", temp, AddDepth, AddAEM, AddExcolor, AddAccurate, context * NOCONTEXT);
|
||||
|
||||
ZZshShader shader;
|
||||
if (!CompileShader(shader, DefineString, name, GL_FRAGMENT_SHADER))
|
||||
return UseEmptyShader(name, GL_FRAGMENT_SHADER);
|
||||
|
||||
ZZLog::Debug_Log("Used shader for type:%d filter:%d wrap:%d for:%d depth:%d aem:%d color:%d decompression:%d ctx:%d... Ok \n", type, texfilter, texwrap, fog, writedepth, testaem, exactcolor, ps, context);
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
return shader;
|
||||
}
|
||||
|
||||
FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed)
|
||||
{
|
||||
int texwrap;
|
||||
|
||||
assert( texfilter < NUM_FILTERS );
|
||||
//assert( g_nPixelShaderVer == SHADER_30 );
|
||||
if( clamp.wms == clamp.wmt ) {
|
||||
switch( clamp.wms ) {
|
||||
case 0: texwrap = TEXWRAP_REPEAT; break;
|
||||
case 1: texwrap = TEXWRAP_CLAMP; break;
|
||||
case 2: texwrap = TEXWRAP_CLAMP; break;
|
||||
default:
|
||||
texwrap = TEXWRAP_REGION_REPEAT; break;
|
||||
}
|
||||
}
|
||||
else if( clamp.wms==3||clamp.wmt==3)
|
||||
texwrap = TEXWRAP_REGION_REPEAT;
|
||||
else
|
||||
texwrap = TEXWRAP_REPEAT_CLAMP;
|
||||
|
||||
int index = GET_SHADER_INDEX(type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, context, 0);
|
||||
|
||||
if( pbFailed != NULL ) *pbFailed = false;
|
||||
|
||||
FRAGMENTSHADER* pf = ppsTexture+index;
|
||||
|
||||
if (ZZshExistProgram(pf))
|
||||
{
|
||||
return pf;
|
||||
}
|
||||
pf->Shader = LoadShaderFromType(EFFECT_DIR, EFFECT_NAME, type, texfilter, texwrap, fog, s_bWriteDepth, testaem, exactcolor, g_nPixelShaderVer, context);
|
||||
|
||||
if (ZZshExistProgram(pf)) {
|
||||
SetupFragmentProgramParameters(pf, context, type);
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
if( glGetError() != GL_NO_ERROR ) {
|
||||
ZZLog::Error_Log("Failed to load shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms+clamp.wmt);
|
||||
if (pbFailed != NULL ) *pbFailed = true;
|
||||
return pf;
|
||||
}
|
||||
|
||||
return pf;
|
||||
}
|
||||
|
||||
ZZLog::Error_Log("Failed to create shader %d,%d,%d,%d.", type, fog, texfilter, 4*clamp.wms+clamp.wmt);
|
||||
if( pbFailed != NULL ) *pbFailed = true;
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif // GLSL_API
|
|
@ -0,0 +1,812 @@
|
|||
// Cg Shaders for PS2 GS emulation
|
||||
|
||||
// divides by z for every pixel, instead of in vertex shader
|
||||
// fixes kh textures
|
||||
|
||||
#extension ARB_texture_rectangle: enable
|
||||
#define GL_compatibility_profile 1
|
||||
#define PERSPECTIVE_CORRECT_TEX
|
||||
|
||||
// When writting GLSL code we should change variables in code according to denominator
|
||||
// Not than in and out variables are differ!
|
||||
// in POSITION set by glVertexPointer goes to gl_Vertex;
|
||||
// out POSITION goes to gl_position
|
||||
// in COLOR0 gl_Color
|
||||
// out COLOR0 gl_FrontColor
|
||||
// in TEXCOORD0 gl_MultiTexCoord0
|
||||
// out TEXCOORD0 gl_TexCoord[0]
|
||||
|
||||
//in Fragments:
|
||||
// in TEXCOORD0 gl_TexCoord[0]
|
||||
// out COLOR0 gl_FragData[0]
|
||||
|
||||
//#define TEST_AEM // tests AEM for black pixels
|
||||
//#define REGION_REPEAT // set if texture wrapping mode is region repeat
|
||||
//#define WRITE_DEPTH // set if depth is also written in a MRT
|
||||
//#define ACCURATE_DECOMPRESSION // set for less capable hardware ATI Radeon 9000 series
|
||||
//#define EXACT_COLOR // make sure the output color is clamped to 1/255 boundaries (for alpha testing)
|
||||
|
||||
#ifdef PERSPECTIVE_CORRECT_TEX
|
||||
#define TEX_XY tex.xy/tex.z
|
||||
#define TEX_DECL vec4
|
||||
#else
|
||||
#define TEX_XY tex.xy
|
||||
#define TEX_DECL vec4
|
||||
#endif
|
||||
|
||||
#ifdef WRITE_DEPTH
|
||||
#define DOZWRITE(x) x
|
||||
#else
|
||||
#define DOZWRITE(x)
|
||||
#endif
|
||||
|
||||
// NVidia CG-data types
|
||||
#define half2 vec2
|
||||
#define half3 vec3
|
||||
#define half4 vec4
|
||||
#define float2 vec2
|
||||
#define float3 vec3
|
||||
#define float4 vec4
|
||||
|
||||
// main ps2 memory, each pixel is stored in 32bit color
|
||||
uniform sampler2DRect g_sMemory[2];
|
||||
|
||||
// per context pixel shader constants
|
||||
uniform half4 fTexAlpha2[2];
|
||||
|
||||
uniform float4 g_fTexOffset[2]; // converts the page and block offsets into the mem addr/1024
|
||||
uniform float4 g_fTexDims[2]; // mult by tex dims when accessing the block texture
|
||||
uniform float4 g_fTexBlock[2];
|
||||
|
||||
uniform float4 g_fClampExts[2]; // if clamping the texture, use (minu, minv, maxu, maxv)
|
||||
uniform float4 TexWrapMode[2]; // 0 - repeat/clamp, 1 - region rep (use fRegRepMask)
|
||||
|
||||
uniform float4 g_fRealTexDims[2]; // tex dims used for linear filtering (w,h,1/w,1/h)
|
||||
|
||||
// (alpha0, alpha1, 1 if highlight2 and tcc is rgba, 1-y)
|
||||
uniform half4 g_fTestBlack[2]; // used for aem bit
|
||||
|
||||
uniform float4 g_fPageOffset[2];
|
||||
|
||||
uniform half4 fTexAlpha[2];
|
||||
|
||||
// vertex shader constants
|
||||
uniform float4 g_fPosXY[2];
|
||||
|
||||
// used to get the tiled offset into a page given the linear offset
|
||||
uniform sampler2DRect g_sSrcFinal;
|
||||
uniform sampler2D g_sBlocks;
|
||||
uniform sampler2D g_sBilinearBlocks;
|
||||
uniform sampler2D g_sConv16to32;
|
||||
uniform sampler3D g_sConv32to16;
|
||||
uniform sampler2DRect g_sBitwiseANDX;
|
||||
uniform sampler2DRect g_sBitwiseANDY;
|
||||
uniform sampler2DRect g_sInterlace;
|
||||
|
||||
// used only on rare cases where the render target is PSMT8H
|
||||
uniform sampler2D g_sCLUT;
|
||||
|
||||
// global pixel shader constants
|
||||
uniform float4 g_fInvTexDims; // similar to g_fClutOff
|
||||
uniform float4 g_fFogColor;
|
||||
|
||||
// used for rectblitting
|
||||
uniform float4 g_fBitBltZ;
|
||||
|
||||
uniform half4 g_fOneColor; // col*.xxxy+.zzzw
|
||||
|
||||
// vertex shader constants
|
||||
uniform float4 g_fBitBltPos;
|
||||
uniform float4 g_fZ; // transforms d3dcolor z into float z
|
||||
uniform float4 g_fZNorm;
|
||||
uniform float4 g_fZMin = float4(0.0f, 1.0f, 0.0f, 0.0f);
|
||||
uniform float4 g_fBitBltTex;
|
||||
|
||||
// pixel shader consts
|
||||
// .z is used for the addressing fn
|
||||
uniform half4 g_fExactColor = half4(0.5,0.5/256.0f,0,1/255.0f);
|
||||
uniform float4 g_fBilinear = float4(-0.7f, -0.65f, 0.9,1/32767.0f);
|
||||
uniform float4 g_fZBias = half4(1.0f/256.0f, 1.0004f, 1, 0.5); // also for vs
|
||||
uniform float4 g_fc0 = float4(0,1, 0.001, 0.5f); // also for vs
|
||||
uniform float4 g_fMult = float4(1/1024.0f, 0.2f/1024.0f, 1/128.0f, 1/512.0f);
|
||||
|
||||
// vertex shader consts
|
||||
uniform float4 g_fBitBltTrans = float4(0.5f, -0.5f, 0.5, 0.5 + 0.4/416.0f);
|
||||
|
||||
// given a local tex coord, returns the coord in the memory
|
||||
float2 ps2memcoord(float2 realtex)
|
||||
{
|
||||
float4 off;
|
||||
|
||||
// block off
|
||||
realtex.xy = realtex.xy * g_fTexDims[CTX].xy + g_fTexDims[CTX].zw;
|
||||
realtex.xy = (realtex.xy - fract(realtex.xy)) * g_fMult.zw;
|
||||
float2 fblock = fract(realtex.xy);
|
||||
off.xy = realtex.xy-fblock.xy;
|
||||
|
||||
#ifdef ACCURATE_DECOMPRESSION
|
||||
off.zw = texture(g_sBlocks, g_fTexBlock[CTX].xy*fblock + g_fTexBlock[CTX].zw).ar;
|
||||
off.x = dot(off.xy, g_fTexOffset[CTX].xy);
|
||||
float r = g_fTexOffset[CTX].w;
|
||||
float f = fract(off.x);
|
||||
float fadd = g_fTexOffset[CTX].z * off.z;
|
||||
off.w = off.x + fadd + r;
|
||||
off.x = fract(f + fadd + r);
|
||||
off.w -= off.x ;
|
||||
#else
|
||||
off.z = texture(g_sBlocks, g_fTexBlock[CTX].xy*fblock + g_fTexBlock[CTX].zw).a;
|
||||
|
||||
// combine the two
|
||||
off.x = dot(off.xyz, g_fTexOffset[CTX].xyz)+g_fTexOffset[CTX].w;
|
||||
off.x = modf(off.x, off.w);
|
||||
#endif
|
||||
|
||||
off.xy = off.xw * g_fPageOffset[CTX].zy + g_fPageOffset[CTX].wx;
|
||||
//off.y = off.w * g_fPageOffset[CTX].y + g_fPageOffset[CTX].x;
|
||||
return off.xy;
|
||||
}
|
||||
|
||||
// find all texcoords for bilinear filtering
|
||||
// assume that orgtex are already on boundaries
|
||||
void ps2memcoord4(float4 orgtex, out float4 off0, out float4 off1)
|
||||
{
|
||||
//float4 off0, off1, off2, off3;
|
||||
float4 realtex;
|
||||
|
||||
// block off
|
||||
realtex = (orgtex * g_fTexDims[CTX].xyxy + g_fTexDims[CTX].zwzw);// * g_fMult.zwzw;
|
||||
float4 fblock = fract(realtex.xyzw);
|
||||
float4 ftransblock = g_fTexBlock[CTX].xyxy*fblock + g_fTexBlock[CTX].zwzw;
|
||||
realtex -= fblock;
|
||||
|
||||
float4 transvals = g_fTexOffset[CTX].x * realtex.xzxz + g_fTexOffset[CTX].y * realtex.yyww + g_fTexOffset[CTX].w;
|
||||
|
||||
float4 colors;// = texture(g_sBilinearBlocks, ftransblock.xy);
|
||||
|
||||
// this is faster on ffx ingame
|
||||
colors.x = texture(g_sBlocks, ftransblock.xy).a;
|
||||
colors.y = texture(g_sBlocks, ftransblock.zy).a;
|
||||
colors.z = texture(g_sBlocks, ftransblock.xw).a;
|
||||
colors.w = texture(g_sBlocks, ftransblock.zw).a;
|
||||
|
||||
float4 fr, rem;
|
||||
|
||||
#ifdef ACCURATE_DECOMPRESSION
|
||||
fr = fract(transvals);
|
||||
float4 fadd = colors * g_fTexOffset[CTX].z;
|
||||
rem = transvals + fadd;
|
||||
fr = fract(fr + fadd);
|
||||
rem -= fr;
|
||||
#else
|
||||
transvals += colors * g_fTexOffset[CTX].z;
|
||||
|
||||
fr = modf(transvals, rem);
|
||||
#endif
|
||||
|
||||
rem = rem * g_fPageOffset[CTX].y + g_fPageOffset[CTX].x;
|
||||
fr = fr * g_fPageOffset[CTX].z + g_fPageOffset[CTX].w;
|
||||
|
||||
// combine
|
||||
off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy;
|
||||
off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww;
|
||||
}
|
||||
|
||||
void ps2memcoord4_fast(float4 orgtex, out float4 off0, out float4 off1)
|
||||
{
|
||||
float4 realtex;
|
||||
|
||||
realtex = (orgtex * g_fTexDims[CTX].xyxy + g_fTexDims[CTX].zwzw);// * g_fMult.zwzw;
|
||||
float4 fblock = fract(realtex.xyzw);
|
||||
float2 ftransblock = g_fTexBlock[CTX].xy*fblock.xy + g_fTexBlock[CTX].zw;
|
||||
realtex -= fblock;
|
||||
|
||||
float4 transvals = g_fTexOffset[CTX].x * realtex.xzxz + g_fTexOffset[CTX].y * realtex.yyww + g_fTexOffset[CTX].w;
|
||||
|
||||
float4 colors = texture(g_sBilinearBlocks, ftransblock.xy);
|
||||
float4 fr, rem;
|
||||
|
||||
#ifdef ACCURATE_DECOMPRESSION
|
||||
fr = fract(transvals);
|
||||
float4 fadd = colors * g_fTexOffset[CTX].z;
|
||||
rem = transvals + fadd;
|
||||
fr = fract(fr + fadd);
|
||||
rem -= fr;
|
||||
#else
|
||||
transvals += colors * g_fTexOffset[CTX].z;
|
||||
|
||||
fr = modf(transvals, rem);
|
||||
#endif
|
||||
|
||||
rem = rem * g_fPageOffset[CTX].y + g_fPageOffset[CTX].x;
|
||||
fr = fr * g_fPageOffset[CTX].z;
|
||||
|
||||
off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy;
|
||||
off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww;
|
||||
}
|
||||
|
||||
// Wrapping modes
|
||||
#if defined(REPEAT)
|
||||
|
||||
float2 ps2addr(float2 coord)
|
||||
{
|
||||
return fract(coord.xy);
|
||||
}
|
||||
|
||||
#elif defined(CLAMP)
|
||||
|
||||
float2 ps2addr(float2 coord)
|
||||
{
|
||||
return clamp(coord.xy, g_fClampExts[CTX].xy, g_fClampExts[CTX].zw);
|
||||
}
|
||||
|
||||
#elif defined(REGION_REPEAT)
|
||||
|
||||
// computes the local tex coord along with addressing modes
|
||||
float2 ps2addr(float2 coord)
|
||||
{
|
||||
float2 final = fract(clamp(coord.xy, g_fClampExts[CTX].xy, g_fClampExts[CTX].zw));
|
||||
|
||||
if( TexWrapMode[CTX].x > g_fBilinear.z ) // region repeat mode for x (umsk&x)|ufix
|
||||
final.x = texture(g_sBitwiseANDX, abs(coord.x)*TexWrapMode[CTX].zx).x * g_fClampExts[CTX].x + g_fClampExts[CTX].z;
|
||||
if( TexWrapMode[CTX].y > g_fBilinear.z ) // region repeat mode for x (vmsk&x)|vfix
|
||||
final.y = texture(g_sBitwiseANDY, abs(coord.y)*TexWrapMode[CTX].wy).x * g_fClampExts[CTX].y + g_fClampExts[CTX].w;
|
||||
|
||||
return final;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
float2 ps2addr(float2 coord)
|
||||
{
|
||||
return fract(clamp(coord.xy, g_fClampExts[CTX].xy, g_fClampExts[CTX].zw));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
half4 tex2DPS_32(float2 tex0)
|
||||
{
|
||||
return texture(g_sMemory[CTX], ps2memcoord(tex0).xy);
|
||||
}
|
||||
|
||||
// use when texture is not tiled -- shader 1
|
||||
half4 tex2DPS_tex32(float2 tex0)
|
||||
{
|
||||
return texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw)*g_fZBias.zzzw+g_fPageOffset[CTX].w;
|
||||
}
|
||||
|
||||
// use when texture is not tiled -- shader 2
|
||||
half4 tex2DPS_clut32(float2 tex0)
|
||||
{
|
||||
float index = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw).a+g_fPageOffset[CTX].w;
|
||||
return texture(g_sCLUT, index*g_fExactColor.xz+g_fExactColor.yz);
|
||||
}
|
||||
|
||||
// Shader 3
|
||||
// use when texture is not tiled and converting from 32bit to 16bit
|
||||
// don't convert on the block level, only on the column level
|
||||
// so every other 8 pixels, use the upper bits instead of lower
|
||||
half4 tex2DPS_tex32to16(float2 tex0)
|
||||
{
|
||||
bool upper = false;
|
||||
tex0.y += g_fPageOffset[CTX].z;
|
||||
float2 ffrac = mod(tex0, g_fTexOffset[CTX].xy);
|
||||
tex0.xy = g_fc0.ww * (tex0.xy + ffrac);
|
||||
if( ffrac.x > g_fTexOffset[CTX].z ) {
|
||||
tex0.x -= g_fTexOffset[CTX].z;
|
||||
upper = true;
|
||||
}
|
||||
if( ffrac.y >= g_fTexOffset[CTX].w ) {
|
||||
tex0.y -= g_fTexOffset[CTX].w;
|
||||
tex0.x += g_fc0.w;
|
||||
}
|
||||
|
||||
half4 color = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw)*g_fZBias.zzzw+g_fPageOffset[CTX].w;
|
||||
float2 uv = upper ? color.xw : color.zy;
|
||||
return texture(g_sConv16to32, uv+g_fPageOffset[CTX].xy);
|
||||
}
|
||||
|
||||
// Shader 4
|
||||
// used when a 16 bit texture is used an 8h
|
||||
half4 tex2DPS_tex16to8h(float2 tex0)
|
||||
{
|
||||
float4 final;
|
||||
float2 ffrac = mod(tex0+g_fPageOffset[CTX].zw, g_fTexOffset[CTX].xy);
|
||||
tex0.xy = g_fPageOffset[CTX].xy * tex0.xy - ffrac * g_fc0.yw;
|
||||
|
||||
if( ffrac.x > g_fTexOffset[CTX].x*g_fc0.w )
|
||||
tex0.x += g_fTexOffset[CTX].x*g_fc0.w;
|
||||
if( tex0.x >= g_fc0.y ) tex0 += g_fTexOffset[CTX].zw;
|
||||
|
||||
float4 upper = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw);
|
||||
|
||||
// only need alpha
|
||||
float index = texture(g_sConv32to16, upper.zyx-g_fc0.z).y + upper.w*g_fc0.w*g_fc0.w;
|
||||
return texture(g_sCLUT, index+g_fExactColor.yz);
|
||||
}
|
||||
|
||||
// Shader 5
|
||||
// used when a 16 bit texture is used a 32bit one
|
||||
half4 tex2DPS_tex16to32(float2 tex0)
|
||||
{
|
||||
float4 final;
|
||||
float2 ffrac = mod(tex0+g_fPageOffset[CTX].zw, g_fTexOffset[CTX].xy);
|
||||
//tex0.xy = g_fPageOffset[CTX].xy * tex0.xy - ffrac * g_fc0.yw;
|
||||
tex0.y += g_fPageOffset[CTX].y * ffrac.y;
|
||||
|
||||
if( ffrac.x > g_fTexOffset[CTX].z ) {
|
||||
tex0.x -= g_fTexOffset[CTX].z;
|
||||
tex0.y += g_fTexOffset[CTX].w;
|
||||
}
|
||||
|
||||
float fconst = g_fc0.w*g_fc0.w;
|
||||
float4 lower = texture(g_sSrcFinal, g_fTexDims[CTX].xy*tex0);
|
||||
float4 upper = texture(g_sMemory[CTX], g_fTexDims[CTX].xy*tex0+g_fTexDims[CTX].zw);
|
||||
|
||||
final.zy = texture(g_sConv32to16, lower.zyx).xy + lower.ww*fconst;
|
||||
final.xw = texture(g_sConv32to16, upper.zyx).xy + upper.ww*fconst;
|
||||
return final;
|
||||
}
|
||||
|
||||
half4 tex2DPS_tex16to32h(float2 tex0)
|
||||
{
|
||||
float4 final = vec4(0.0, 0.0, 0.0, 0.0);
|
||||
return final;
|
||||
}
|
||||
|
||||
//half4 f;
|
||||
//f.w = old.y > (127.2f/255.0f) ? 1 : 0;
|
||||
//old.y -= 0.5f * f.w;
|
||||
//f.xyz = fract(old.yyx*half3(2.002*255.0f/256.0f, 64.025f*255.0f/256.0f, 8.002*255.0f/256.0f));
|
||||
//f.y += old.x * (0.25f*255.0f/256.0f);
|
||||
|
||||
////////////////////////////////
|
||||
// calculates the texture color
|
||||
////////////////////////////////
|
||||
|
||||
#define decl_ps2shade(num) \
|
||||
decl_ps2shade_##num(_32) \
|
||||
decl_ps2shade_##num(_tex32) \
|
||||
decl_ps2shade_##num(_clut32) \
|
||||
decl_ps2shade_##num(_tex32to16) \
|
||||
decl_ps2shade_##num(_tex16to8h) \
|
||||
decl_ps2shade_##num(_tex16to32h)
|
||||
|
||||
// nearest
|
||||
#define decl_ps2shade_0(bit) \
|
||||
float4 ps2shade0##bit( TEX_DECL tex) \
|
||||
{ \
|
||||
return tex2DPS##bit( ps2addr(TEX_XY)); \
|
||||
}
|
||||
|
||||
// do fast memcoord4 calcs when textures behave well
|
||||
#ifdef REPEAT
|
||||
#define PS2MEMCOORD4 ps2memcoord4
|
||||
#else
|
||||
#define PS2MEMCOORD4 ps2memcoord4
|
||||
#endif
|
||||
|
||||
|
||||
#define decl_BilinearFilter(bit, addrfn) \
|
||||
half4 BilinearFilter##bit(float2 tex0) \
|
||||
{ \
|
||||
float4 off0, off1; \
|
||||
float4 ftex; \
|
||||
float2 ffrac; \
|
||||
ftex.xy = tex0 + g_fBilinear.xy * g_fRealTexDims[CTX].zw; \
|
||||
ffrac = fract(ftex.xy*g_fRealTexDims[CTX].xy); \
|
||||
ftex.xy -= ffrac.xy * g_fRealTexDims[CTX].zw; \
|
||||
\
|
||||
ftex.zw = ps2addr(ftex.xy + g_fRealTexDims[CTX].zw); \
|
||||
ftex.xy = ps2addr(ftex.xy); \
|
||||
\
|
||||
PS2MEMCOORD4(ftex, off0, off1); \
|
||||
half4 c0 = texture(g_sMemory[CTX], off0.xy); \
|
||||
half4 c1 = texture(g_sMemory[CTX], off0.zw); \
|
||||
half4 c2 = texture(g_sMemory[CTX], off1.xy); \
|
||||
half4 c3 = texture(g_sMemory[CTX], off1.zw); \
|
||||
return mix( mix(c0, c1, vec4(ffrac.x)), mix(c2, c3, ffrac.x), vec4(ffrac.y) ); \
|
||||
}
|
||||
|
||||
decl_BilinearFilter(_32, ps2addr)
|
||||
decl_BilinearFilter(_tex32, ps2addr)
|
||||
decl_BilinearFilter(_clut32, ps2addr)
|
||||
decl_BilinearFilter(_tex32to16, ps2addr)
|
||||
decl_BilinearFilter(_tex16to8h, ps2addr)
|
||||
decl_BilinearFilter(_tex16to32h, ps2addr)
|
||||
|
||||
//TODO! For mip maps, only apply when LOD >= 0
|
||||
// lcm == 0, LOD = log(1/Q)*L + K, lcm == 1, LOD = K
|
||||
|
||||
// bilinear
|
||||
#define decl_ps2shade_1(bit) \
|
||||
half4 ps2shade1##bit(TEX_DECL tex) \
|
||||
{ \
|
||||
return BilinearFilter##bit(TEX_XY); \
|
||||
}
|
||||
|
||||
// nearest, mip nearest
|
||||
#define decl_ps2shade_2(bit) \
|
||||
half4 ps2shade2##bit(TEX_DECL tex) \
|
||||
{ \
|
||||
return tex2DPS##bit( ps2addr(TEX_XY)); \
|
||||
}
|
||||
|
||||
// nearest, mip linear
|
||||
#define decl_ps2shade_3(bit) \
|
||||
half4 ps2shade3##bit(TEX_DECL tex) \
|
||||
{ \
|
||||
return tex2DPS##bit(ps2addr(TEX_XY)); \
|
||||
}
|
||||
|
||||
// linear, mip nearest
|
||||
#define decl_ps2shade_4(bit) \
|
||||
half4 ps2shade4##bit(TEX_DECL tex) \
|
||||
{ \
|
||||
return BilinearFilter##bit(TEX_XY); \
|
||||
}
|
||||
|
||||
// linear, mip linear
|
||||
#define decl_ps2shade_5(bit) \
|
||||
half4 ps2shade5##bit(TEX_DECL tex) \
|
||||
{ \
|
||||
return BilinearFilter##bit(TEX_XY); \
|
||||
}
|
||||
|
||||
decl_ps2shade(0)
|
||||
decl_ps2shade(1)
|
||||
decl_ps2shade(2)
|
||||
decl_ps2shade(3)
|
||||
decl_ps2shade(4)
|
||||
decl_ps2shade(5)
|
||||
|
||||
|
||||
half4 ps2CalcShade(half4 texcol, half4 color)
|
||||
{
|
||||
#ifdef TEST_AEM
|
||||
if( dot(texcol.xyzw, g_fTestBlack[CTX].xyzw) <= g_fc0.z )
|
||||
texcol.w = g_fc0.x;
|
||||
else
|
||||
#endif
|
||||
texcol.w = texcol.w * fTexAlpha[CTX].y + fTexAlpha[CTX].x;
|
||||
|
||||
texcol = texcol * (fTexAlpha2[CTX].zzzw * color + fTexAlpha2[CTX].xxxy) + fTexAlpha[CTX].zzzw * color.wwww;
|
||||
|
||||
return texcol;
|
||||
}
|
||||
|
||||
// final ops on the color
|
||||
#ifdef EXACT_COLOR
|
||||
|
||||
half4 ps2FinalColor(half4 col)
|
||||
{
|
||||
// g_fOneColor has to scale by 255
|
||||
half4 temp = col * g_fOneColor.xxxy + g_fOneColor.zzzw;
|
||||
temp.w = floor(temp.w)*g_fExactColor.w;
|
||||
return temp;
|
||||
}
|
||||
|
||||
#else
|
||||
half4 ps2FinalColor(half4 col)
|
||||
{
|
||||
return col * g_fOneColor.xxxy + g_fOneColor.zzzw;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef FRAGMENT_SHADER // This is code only for FRAGMENTS (pixel shader)
|
||||
|
||||
void RegularPS() {
|
||||
// whenever outputting depth, make sure to mult by 255/256 and 1
|
||||
gl_FragData[0] = ps2FinalColor(gl_Color);
|
||||
DOZWRITE(gl_FragData[1] = gl_TexCoord[0];)
|
||||
}
|
||||
|
||||
#ifdef WRITE_DEPTH
|
||||
|
||||
#define DECL_TEXPS(num, bit) \
|
||||
void Texture##num##bit##PS() \
|
||||
{ \
|
||||
gl_FragData[0] = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color)); \
|
||||
gl_FragData[1] = gl_TexCoord[1]; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define DECL_TEXPS(num, bit) \
|
||||
void Texture##num##bit##PS() \
|
||||
{ \
|
||||
gl_FragData[0] = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color)); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define DECL_TEXPS_(num) \
|
||||
DECL_TEXPS(num, _32) \
|
||||
DECL_TEXPS(num, _tex32) \
|
||||
DECL_TEXPS(num, _clut32) \
|
||||
DECL_TEXPS(num, _tex32to16) \
|
||||
DECL_TEXPS(num, _tex16to8h)
|
||||
|
||||
DECL_TEXPS_(0)
|
||||
DECL_TEXPS_(1)
|
||||
DECL_TEXPS_(2)
|
||||
DECL_TEXPS_(3)
|
||||
DECL_TEXPS_(4)
|
||||
DECL_TEXPS_(5)
|
||||
|
||||
void RegularFogPS() {
|
||||
half4 c;
|
||||
c.xyz = mix(g_fFogColor.xyz, gl_Color.xyz, vec3(gl_TexCoord[0].x));
|
||||
c.w = gl_Color.w;
|
||||
gl_FragData[0] = ps2FinalColor(c);
|
||||
DOZWRITE(gl_FragData[1] = gl_TexCoord[1];)
|
||||
}
|
||||
|
||||
#ifdef WRITE_DEPTH
|
||||
|
||||
#define DECL_TEXFOGPS(num, bit) \
|
||||
void TextureFog##num##bit##PS() \
|
||||
{ \
|
||||
half4 c = ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color); \
|
||||
c.xyz = mix(g_fFogColor.xyz, c.xyz, vec3(gl_TexCoord[1].x)); \
|
||||
gl_FragData[0] = ps2FinalColor(c); \
|
||||
gl_FragData[1] = gl_TexCoord[2]; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define DECL_TEXFOGPS(num, bit) \
|
||||
void TextureFog##num##bit##PS() \
|
||||
{ \
|
||||
half4 c = ps2CalcShade(ps2shade##num##bit(gl_TexCoord[0]), gl_Color); \
|
||||
c.xyz = mix(g_fFogColor.xyz, c.xyz, vec3(gl_TexCoord[1].x)); \
|
||||
gl_FragData[0] = ps2FinalColor(c); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define DECL_TEXFOGPS_(num) \
|
||||
DECL_TEXFOGPS(num, _32) \
|
||||
DECL_TEXFOGPS(num, _tex32) \
|
||||
DECL_TEXFOGPS(num, _clut32) \
|
||||
DECL_TEXFOGPS(num, _tex32to16) \
|
||||
DECL_TEXFOGPS(num, _tex16to8h)
|
||||
|
||||
DECL_TEXFOGPS_(0)
|
||||
DECL_TEXFOGPS_(1)
|
||||
DECL_TEXFOGPS_(2)
|
||||
DECL_TEXFOGPS_(3)
|
||||
DECL_TEXFOGPS_(4)
|
||||
DECL_TEXFOGPS_(5)
|
||||
|
||||
//-------------------------------------------------------
|
||||
// Techniques not related to the main primitive commands
|
||||
half4 BilinearBitBlt(float2 tex0)
|
||||
{
|
||||
float4 ftex;
|
||||
float2 ffrac;
|
||||
|
||||
ffrac.xy = fract(tex0*g_fRealTexDims[CTX].xy);
|
||||
ftex.xy = tex0 - ffrac.xy * g_fRealTexDims[CTX].zw;
|
||||
ftex.zw = ftex.xy + g_fRealTexDims[CTX].zw;
|
||||
|
||||
float4 off0, off1;
|
||||
ps2memcoord4_fast(ftex, off0, off1);
|
||||
half4 c0 = texture(g_sMemory[CTX], off0.xy);
|
||||
half4 c1 = texture(g_sMemory[CTX], off0.zw);
|
||||
half4 c2 = texture(g_sMemory[CTX], off1.xy);
|
||||
half4 c3 = texture(g_sMemory[CTX], off1.zw);
|
||||
|
||||
return mix( mix(c0, c1, vec4(ffrac.x)), mix(c2, c3, vec4(ffrac.x)), vec4(ffrac.y) );
|
||||
}
|
||||
|
||||
void BitBltPS() {
|
||||
gl_FragData[0] = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy).xy)*g_fOneColor.xxxy;
|
||||
}
|
||||
|
||||
// used when AA
|
||||
void BitBltAAPS() {
|
||||
gl_FragData[0] = BilinearBitBlt(gl_TexCoord[0].xy) * g_fOneColor.xxxy;
|
||||
}
|
||||
|
||||
void BitBltDepthPS() {
|
||||
vec4 data;
|
||||
data = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy));
|
||||
gl_FragData[0] = data + g_fZBias.y;
|
||||
gl_FragDepth = (log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w) * g_fZMin.y + dot(data, g_fBitBltZ) * g_fZMin.x ;
|
||||
}
|
||||
|
||||
void BitBltDepthMRTPS() {
|
||||
vec4 data;
|
||||
data = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy));
|
||||
gl_FragData[0] = data + g_fZBias.y;
|
||||
gl_FragData[1].x = g_fc0.x;
|
||||
gl_FragDepth = (log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w) * g_fZMin.y + dot(data, g_fBitBltZ) * g_fZMin.x ;
|
||||
}
|
||||
|
||||
/*static const float BlurKernel[9] = {
|
||||
0.027601,
|
||||
0.066213,
|
||||
0.123701,
|
||||
0.179952,
|
||||
0.205065,
|
||||
0.179952,
|
||||
0.123701,
|
||||
0.066213,
|
||||
0.027601
|
||||
};*/
|
||||
|
||||
half4 BilinearFloat16(float2 tex0)
|
||||
{
|
||||
return texture(g_sSrcFinal, tex0.xy);
|
||||
}
|
||||
|
||||
void CRTCTargInterPS() {
|
||||
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
|
||||
float4 c = BilinearFloat16(gl_TexCoord[0].xy);
|
||||
c.w = ( g_fc0.w*c.w * g_fOneColor.x + g_fOneColor.y ) * finter;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
void CRTCTargPS() {
|
||||
float4 c = BilinearFloat16(gl_TexCoord[0].xy);
|
||||
c.w = g_fc0.w * c.w * g_fOneColor.x + g_fOneColor.y;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
void CRTCInterPS() {
|
||||
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
|
||||
float2 filtcoord = trunc(gl_TexCoord[0].xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
|
||||
half4 c = BilinearBitBlt(filtcoord);
|
||||
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
// simpler
|
||||
void CRTCInterPS_Nearest() {
|
||||
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
|
||||
half4 c = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy).xy);
|
||||
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
void CRTCPS() {
|
||||
float2 filtcoord = gl_TexCoord[0].xy * g_fInvTexDims.xy+g_fInvTexDims.zw;
|
||||
half4 c = BilinearBitBlt(filtcoord);
|
||||
c.w = c.w * g_fOneColor.x + g_fOneColor.y;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
// simpler
|
||||
void CRTCPS_Nearest() {
|
||||
half4 c = texture(g_sMemory[CTX], ps2memcoord(gl_TexCoord[0].xy).xy);
|
||||
c.w = c.w * g_fOneColor.x + g_fOneColor.y;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
void CRTC24InterPS() {
|
||||
float finter = texture(g_sInterlace, gl_TexCoord[1].yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;
|
||||
float2 filtcoord = trunc(gl_TexCoord[0].xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
|
||||
|
||||
half4 c = texture(g_sMemory[CTX], ps2memcoord(filtcoord).xy);
|
||||
c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
void CRTC24PS() {
|
||||
float2 filtcoord = trunc(gl_TexCoord[0].xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;
|
||||
half4 c = texture(g_sMemory[CTX], ps2memcoord(filtcoord).xy);
|
||||
c.w = c.w * g_fOneColor.x + g_fOneColor.y;
|
||||
gl_FragData[0] = c;
|
||||
}
|
||||
|
||||
void ZeroPS() {
|
||||
gl_FragData[0] = g_fOneColor;
|
||||
}
|
||||
|
||||
void BaseTexturePS() {
|
||||
gl_FragData[0] = texture(g_sSrcFinal, gl_TexCoord[0].xy) * g_fOneColor;
|
||||
}
|
||||
|
||||
void Convert16to32PS() {
|
||||
float4 final;
|
||||
float2 ffrac = mod ( gl_TexCoord[0].xy + g_fTexDims[CTX].zw, g_fTexOffset[CTX].xy);
|
||||
float2 tex0 = g_fTexDims[CTX].xy * gl_TexCoord[0].xy - ffrac * g_fc0.yw;
|
||||
|
||||
if (ffrac.x > g_fTexOffset[CTX].x*g_fc0.w)
|
||||
tex0.x += g_fTexOffset[CTX].x*g_fc0.w;
|
||||
if (tex0.x >= g_fc0.y)
|
||||
tex0 += g_fTexOffset[CTX].zw;
|
||||
|
||||
float4 lower = texture(g_sSrcFinal, tex0);
|
||||
float4 upper = texture(g_sSrcFinal, tex0 + g_fPageOffset[CTX].xy);
|
||||
|
||||
final.zy = texture(g_sConv32to16, lower.zyx).xy + lower.ww*g_fPageOffset[CTX].zw;
|
||||
final.xw = texture(g_sConv32to16, upper.zyx).xy + upper.ww*g_fPageOffset[CTX].zw;
|
||||
|
||||
gl_FragData[0]= final;
|
||||
}
|
||||
|
||||
// use when texture is not tiled and converting from 32bit to 16bit
|
||||
// don't convert on the block level, only on the column level
|
||||
// so every other 8 pixels, use the upper bits instead of lower
|
||||
void Convert32to16PS() {
|
||||
bool upper = false;
|
||||
float2 ffrac = mod(gl_TexCoord[0].xy + g_fTexDims[CTX].zw, g_fTexOffset[CTX].xy);
|
||||
float2 tex0 = g_fc0.ww * (gl_TexCoord[0].xy + ffrac);
|
||||
if( ffrac.x > g_fTexOffset[CTX].z ) {
|
||||
tex0.x -= g_fTexOffset[CTX].z;
|
||||
upper = true;
|
||||
}
|
||||
if( ffrac.y >= g_fTexOffset[CTX].w ) {
|
||||
tex0.y -= g_fTexOffset[CTX].w;
|
||||
tex0.x += g_fc0.w;
|
||||
}
|
||||
|
||||
half4 color = texture(g_sSrcFinal, tex0*g_fTexDims[CTX].xy)*g_fc0.yyyw;
|
||||
float2 uv = upper ? color.xw : color.zy;
|
||||
gl_FragData[0] = texture(g_sConv16to32, uv*g_fPageOffset[CTX].xy+g_fPageOffset[CTX].zw)*g_fTexDims[CTX].xxxy;
|
||||
}
|
||||
#endif //FRAGMENT_SHADER
|
||||
|
||||
#ifdef VERTEX_SHADER
|
||||
|
||||
float4 OutPosition(float4 vertex) {
|
||||
float4 Position;
|
||||
Position.xy = gl_Vertex.xy * g_fPosXY[CTX].xy + g_fPosXY[CTX].zw;
|
||||
Position.z = (log(g_fc0.y + dot(g_fZ, gl_SecondaryColor.zyxw)) * g_fZNorm.x + g_fZNorm.y) * g_fZMin.y + dot(g_fZ, gl_SecondaryColor.zyxw) * g_fZMin.x ;
|
||||
Position.w = g_fc0.y;
|
||||
return Position;
|
||||
}
|
||||
|
||||
// just smooth shadering
|
||||
void RegularVS() {
|
||||
gl_Position = OutPosition(gl_Vertex);
|
||||
gl_FrontColor = gl_Color;
|
||||
DOZWRITE(gl_TexCoord[0] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[0].w = g_fc0.y;)
|
||||
}
|
||||
|
||||
// diffuse texture mapping
|
||||
void TextureVS() {
|
||||
gl_Position = OutPosition(gl_Vertex);
|
||||
gl_FrontColor = gl_Color;
|
||||
#ifdef PERSPECTIVE_CORRECT_TEX
|
||||
gl_TexCoord[0].xyz = gl_MultiTexCoord0.xyz;
|
||||
#else
|
||||
gl_TexCoord[0].xy = gl_MultiTexCoord0.xy/gl_MultiTexCoord0.z;
|
||||
#endif
|
||||
DOZWRITE(gl_TexCoord[1] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[1].w = g_fc0.y;)
|
||||
}
|
||||
|
||||
void RegularFogVS() {
|
||||
float4 position = OutPosition(gl_Vertex);
|
||||
gl_Position = position;
|
||||
gl_FrontColor = gl_Color;
|
||||
gl_TexCoord[0].x = position.z * g_fBilinear.w;
|
||||
DOZWRITE(gl_TexCoord[1] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[1].w = g_fc0.y;)
|
||||
}
|
||||
|
||||
void TextureFogVS() {
|
||||
gl_Position = OutPosition(gl_Vertex);
|
||||
gl_FrontColor = gl_Color;
|
||||
#ifdef PERSPECTIVE_CORRECT_TEX
|
||||
gl_TexCoord[0].xyz = gl_MultiTexCoord0.xyz;
|
||||
#else
|
||||
gl_TexCoord[0].xy = gl_MultiTexCoord0.xy / gl_MultiTexCoord0.z;
|
||||
#endif
|
||||
gl_TexCoord[1].x = gl_Vertex.z * g_fBilinear.w;
|
||||
DOZWRITE(gl_TexCoord[2] = gl_SecondaryColor * g_fZBias.x + g_fZBias.y; gl_TexCoord[2].w = g_fc0.y;)
|
||||
}
|
||||
|
||||
void BitBltVS() {
|
||||
vec4 position;
|
||||
position.xy = gl_Vertex.xy * g_fBitBltPos.xy + g_fBitBltPos.zw;
|
||||
position.zw = g_fc0.xy;
|
||||
gl_Position = position;
|
||||
|
||||
gl_TexCoord[0].xy = gl_MultiTexCoord0.xy * g_fBitBltTex.xy + g_fBitBltTex.zw;
|
||||
gl_TexCoord[1].xy = position.xy * g_fBitBltTrans.xy + g_fBitBltTrans.zw;
|
||||
}
|
||||
|
||||
#endif VERTEX_SHADER
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -34,6 +34,9 @@
|
|||
|
||||
#define VB_BUFFERSIZE 0x4000
|
||||
|
||||
extern void FlushIfNecesary(void* ptr);
|
||||
extern bool g_bSaveZUpdate;
|
||||
|
||||
// all textures have this width
|
||||
extern int GPU_TEXWIDTH;
|
||||
extern float g_fiGPU_TEXWIDTH;
|
||||
|
@ -101,10 +104,10 @@ class CRenderTarget
|
|||
TS_Virtual = 4, // currently not mapped to memory
|
||||
TS_FeedbackReady = 8, // feedback effect is ready and doesn't need to be updated
|
||||
TS_NeedConvert32 = 16,
|
||||
TS_NeedConvert16 = 32,
|
||||
TS_NeedConvert16 = 32
|
||||
};
|
||||
inline float4 DefaultBitBltPos();
|
||||
inline float4 DefaultBitBltTex();
|
||||
float4 DefaultBitBltPos();
|
||||
float4 DefaultBitBltTex();
|
||||
|
||||
private:
|
||||
void _CreateFeedback();
|
||||
|
@ -310,9 +313,12 @@ class CRenderTargetMngr
|
|||
return ptarg;
|
||||
}
|
||||
|
||||
static void DestroyTarg(CRenderTarget* ptarg);
|
||||
void DestroyTarg(CRenderTarget* ptarg);
|
||||
void PrintTargets();
|
||||
MAPTARGETS mapTargets, mapDummyTargs;
|
||||
private:
|
||||
|
||||
void DestroyAllTargetsHelper(void* ptr);
|
||||
};
|
||||
|
||||
class CMemoryTargetMngr
|
||||
|
@ -487,108 +493,6 @@ inline u32 GetFrameKeyDummy(CRenderTarget* frame)
|
|||
return GetFrameKeyDummy(frame->fbp, frame->fbw, frame->fbh, frame->psm);
|
||||
}
|
||||
|
||||
#include "Mem.h"
|
||||
|
||||
static __forceinline void DrawTriangleArray()
|
||||
{
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
static __forceinline void DrawBuffers(GLenum *buffer)
|
||||
{
|
||||
if (glDrawBuffers != NULL)
|
||||
{
|
||||
glDrawBuffers(1, buffer);
|
||||
}
|
||||
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
static __forceinline void FBTexture(int attach, int id = 0)
|
||||
{
|
||||
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT + attach, GL_TEXTURE_RECTANGLE_NV, id, 0);
|
||||
GL_REPORT_ERRORD();
|
||||
}
|
||||
|
||||
static __forceinline void ResetRenderTarget(int index)
|
||||
{
|
||||
FBTexture(index);
|
||||
}
|
||||
|
||||
static __forceinline void Texture2D(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, iFormat, width, height, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void Texture2D(GLint iFormat, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, iFormat, BLOCK_TEXWIDTH, BLOCK_TEXHEIGHT, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void Texture3D(GLint iFormat, GLint width, GLint height, GLint depth, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage3D(GL_TEXTURE_3D, 0, iFormat, width, height, depth, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void TextureRect(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, iFormat, width, height, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void TextureRect2(GLint iFormat, GLint width, GLint height, GLenum format, GLenum type, const GLvoid* pixels)
|
||||
{
|
||||
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, iFormat, width, height, 0, format, type, pixels);
|
||||
}
|
||||
|
||||
static __forceinline void TextureRect(GLenum attach, GLuint id = 0)
|
||||
{
|
||||
glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, attach, GL_RENDERBUFFER_EXT, id);
|
||||
}
|
||||
|
||||
static __forceinline void setTex2DFilters(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, type);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, type);
|
||||
}
|
||||
|
||||
static __forceinline void setTex2DWrap(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, type);
|
||||
}
|
||||
|
||||
static __forceinline void setTex3DFilters(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, type);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, type);
|
||||
}
|
||||
|
||||
static __forceinline void setTex3DWrap(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, type);
|
||||
glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, type);
|
||||
}
|
||||
|
||||
static __forceinline void setRectFilters(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, type);
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, type);
|
||||
}
|
||||
|
||||
static __forceinline void setRectWrap(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, type);
|
||||
}
|
||||
|
||||
static __forceinline void setRectWrap2(GLint type)
|
||||
{
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, type);
|
||||
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, type);
|
||||
}
|
||||
|
||||
//------------------------ Inlines -------------------------
|
||||
|
||||
// Calculate maximum height for target
|
||||
|
@ -604,4 +508,12 @@ inline int get_maxheight(int fbp, int fbw, int psm)
|
|||
return ret;
|
||||
}
|
||||
|
||||
// memory size for one row of texture. It depends on width of texture and number of bytes
|
||||
// per pixel
|
||||
inline u32 Pitch(int fbw) { return (RW(fbw) * 4) ; }
|
||||
|
||||
// memory size of whole texture. It is number of rows multiplied by memory size of row
|
||||
inline u32 Tex_Memory_Size(int fbw, int fbh) { return (RH(fbh) * Pitch(fbw)); }
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,9 +22,13 @@
|
|||
#include "x86.h"
|
||||
|
||||
#if defined(ZEROGS_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
// Note: all codes of this files is deprecated. Keeping for reference.
|
||||
|
||||
|
||||
// swizzling
|
||||
|
||||
//These were only used in the old version of RESOLVE_32_BITS. Keeping for reference.
|
||||
|
|
|
@ -22,153 +22,99 @@
|
|||
|
||||
#include "GS.h"
|
||||
|
||||
extern "C" void __fastcall SwizzleBlock32_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
|
||||
extern "C" void __fastcall SwizzleBlock16_sse2(u8* dst, u8* src, int srcpitch);
|
||||
extern "C" void __fastcall SwizzleBlock8_sse2(u8* dst, u8* src, int srcpitch);
|
||||
extern "C" void __fastcall SwizzleBlock4_sse2(u8* dst, u8* src, int srcpitch);
|
||||
extern "C" void __fastcall SwizzleBlock32u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
|
||||
extern "C" void __fastcall SwizzleBlock16u_sse2(u8* dst, u8* src, int srcpitch);
|
||||
extern "C" void __fastcall SwizzleBlock8u_sse2(u8* dst, u8* src, int srcpitch);
|
||||
extern "C" void __fastcall SwizzleBlock4u_sse2(u8* dst, u8* src, int srcpitch);
|
||||
#ifndef ZZNORMAL_MEMORY
|
||||
// StarOcean use 24 in logo and 4HH and 4HL in menu subfont
|
||||
// Tony hawk use 16, but have a lot of trouble
|
||||
// This function move one blockwidth * blockheigh data block from src to dst, in assumption, that in dst we store swizzled data,
|
||||
template <int psm>
|
||||
inline void __fastcall SwizzleBlock(u32* dst, u32* src, int pitch, u32 WriteMask = 0xffffffff) {
|
||||
u8 B = (PSM_PIXELS_PER_WORD<psm>() > 2)? 4 : 2;
|
||||
|
||||
// frame swizzling
|
||||
assert ((pitch & 3) == 0 );
|
||||
|
||||
#if 0
|
||||
// no AA
|
||||
extern "C" void __fastcall FrameSwizzleBlock32_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall FrameSwizzleBlock16_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock32_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock32Z_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock16_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock16Z_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
u32* src1 = src;
|
||||
u32* src2 = src + pitch / 4;
|
||||
|
||||
// AA 2x
|
||||
extern "C" void __fastcall FrameSwizzleBlock32A2_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall FrameSwizzleBlock16A2_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock32A2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock32ZA2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock16A2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock16ZA2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
for(int j = 0; j < 4 ; j++, src1 += B * pitch / 4, src2 += B * pitch / 4)
|
||||
for(int i = 0; i < 8; i++) {
|
||||
fillPixelsFromMemory<psm>(dst, src1, i, B * j, pitch /4, 0, 0, WriteMask);
|
||||
fillPixelsFromMemory<psm>(dst, src2, i, B * j + 1, pitch / 4 , 0, 0, WriteMask);
|
||||
}
|
||||
}
|
||||
|
||||
// AA 4x
|
||||
extern "C" void __fastcall FrameSwizzleBlock32A4_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall FrameSwizzleBlock16A4_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock32A4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock32ZA4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock16A4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern "C" void __fastcall Frame16SwizzleBlock16ZA4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
// Simply AA multiplication. We does not use src[j << AA], but prefer to keep more central pixel in data.
|
||||
// We does not use mixing of neighbour pixels, because it does not give any noticiable bonus, but speed penalty is big.
|
||||
template <u8 AA>
|
||||
inline u32 mixed_pixel(u32* src, int j) {
|
||||
if (AA == 0)
|
||||
return src[j] ;
|
||||
|
||||
/*extern void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
|
||||
extern void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch);
|
||||
extern void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch);
|
||||
extern void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch);*/
|
||||
if (AA == 1)
|
||||
return src[(j << 1) + 1];
|
||||
|
||||
// no AA
|
||||
extern void __fastcall FrameSwizzleBlock32_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall FrameSwizzleBlock24_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall FrameSwizzleBlock16_c(u16* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock32_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock32Z_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock16_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock16Z_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
if (AA == 2)
|
||||
return src[(j << 2) + 2];
|
||||
}
|
||||
|
||||
// AA 2x
|
||||
extern void __fastcall FrameSwizzleBlock32A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall FrameSwizzleBlock24A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall FrameSwizzleBlock16A2_c(u16* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock32A2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock32ZA2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock16A2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock16ZA2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
// We fill destination word for pixel number j (j < 8). For 16-bit storage upper size of this word is pixel of j + 8,
|
||||
// and RGBA data should be convert to ARGB16.
|
||||
// WARNING: floating storage is never be testing
|
||||
template <int psm, bool is_float, u8 AA>
|
||||
inline u32 convert_pixel(u32* src, int j) {
|
||||
if (is_float) {
|
||||
Vector_16F* fsrc = (Vector_16F*)src; // We use simplified code for float, it seems not
|
||||
// to be used anyway.
|
||||
if (PSM_ISHALF<psm>()) {
|
||||
return Float16ToARGB16 ( fsrc[j << AA]) + (Float16ToARGB16(fsrc[(j + 8) << AA]) << 16);
|
||||
}
|
||||
else {
|
||||
return Float16ToARGB ( fsrc[j << AA] );
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (PSM_ISHALF<psm>()) {
|
||||
return RGBA32to16(mixed_pixel<AA>(src, j)) + (RGBA32to16(mixed_pixel<AA>(src, j + 8)) << 16);
|
||||
}
|
||||
else {
|
||||
return mixed_pixel<AA>(src, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AA 4x
|
||||
extern void __fastcall FrameSwizzleBlock32A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall FrameSwizzleBlock24A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall FrameSwizzleBlock16A4_c(u16* dst, u32* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock32A4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock32ZA4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock16A4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
extern void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask);
|
||||
// put data in u32 destination word for pixel x, y < 8 in swizzled block. Note, that in 16-bit target we put 2 pixels (x,y
|
||||
// and x+8, y) in the same word.
|
||||
template <int pix, int x, int y, int psm, bool is_float, u8 AA>
|
||||
inline void SettleSwizzlePixel(u32* dst, u32* src, int srcpitch, u32 mask) {
|
||||
u32 tmp = convert_pixel<psm, is_float, AA>(src + y * srcpitch, x);
|
||||
MaskedOR (dst + pix, tmp, mask); // Don't forget to use mask.
|
||||
}
|
||||
|
||||
// Put in dst memory location swizzled block for src. We does not calculate pixel address there at all.
|
||||
template <int psm, bool is_float, u8 AA>
|
||||
void __fastcall FrameSwizzleBlock(u32* dst, int sj, int si, u32* src, int srcpitch, u32 WriteMask) {
|
||||
u32 mask = HandleWritemask<psm>(WriteMask); // This function made correct mask for 32, 24 and 16 target's
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
SettleSwizzlePixel<0, 0, 0, psm, is_float, AA>(dst, src, srcpitch, mask); // it's possible to put one for here, but I don't know, what's faster
|
||||
SettleSwizzlePixel<1, 1, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<2, 0, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<3, 1, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<4, 2, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<5, 3, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<6, 2, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<7, 3, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<8, 4, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<9, 5, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<10, 4, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<11, 5, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<12, 6, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<13, 7, 0, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<14, 6, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
SettleSwizzlePixel<15, 7, 1, psm, is_float, AA>(dst, src, srcpitch, mask);
|
||||
|
||||
src += 2 * srcpitch;
|
||||
dst += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void __fastcall SwizzleColumn32_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff);
|
||||
extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch);
|
||||
extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch);
|
||||
extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch);
|
||||
|
||||
// extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut);
|
||||
extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32 csa);
|
||||
extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut);
|
||||
// extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut);
|
||||
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32 csa);
|
||||
extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut);
|
||||
extern void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* vm, u32* clut);
|
||||
extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut);
|
||||
|
||||
extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut);
|
||||
extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut);
|
||||
|
||||
extern void SSE2_UnswizzleZ16Target(u16* dst, u16* src, int iters);
|
||||
|
||||
#ifdef ZEROGS_SSE2
|
||||
|
||||
#define FrameSwizzleBlock32 FrameSwizzleBlock32_c
|
||||
#define FrameSwizzleBlock24 FrameSwizzleBlock24_c
|
||||
#define FrameSwizzleBlock16 FrameSwizzleBlock16_c
|
||||
#define Frame16SwizzleBlock32 Frame16SwizzleBlock32_c
|
||||
#define Frame16SwizzleBlock32Z Frame16SwizzleBlock32Z_c
|
||||
#define Frame16SwizzleBlock16 Frame16SwizzleBlock16_c
|
||||
#define Frame16SwizzleBlock16Z Frame16SwizzleBlock16Z_c
|
||||
|
||||
#define FrameSwizzleBlock32A2 FrameSwizzleBlock32A2_c
|
||||
#define FrameSwizzleBlock24A2 FrameSwizzleBlock24A2_c
|
||||
#define FrameSwizzleBlock16A2 FrameSwizzleBlock16A2_c
|
||||
#define Frame16SwizzleBlock32A2 Frame16SwizzleBlock32A2_c
|
||||
#define Frame16SwizzleBlock32ZA2 Frame16SwizzleBlock32ZA2_c
|
||||
#define Frame16SwizzleBlock16A2 Frame16SwizzleBlock16A2_c
|
||||
#define Frame16SwizzleBlock16ZA2 Frame16SwizzleBlock16ZA2_c
|
||||
|
||||
#define FrameSwizzleBlock32A4 FrameSwizzleBlock32A4_c
|
||||
#define FrameSwizzleBlock24A4 FrameSwizzleBlock24A4_c
|
||||
#define FrameSwizzleBlock16A4 FrameSwizzleBlock16A4_c
|
||||
#define Frame16SwizzleBlock32A4 Frame16SwizzleBlock32A4_c
|
||||
#define Frame16SwizzleBlock32ZA4 Frame16SwizzleBlock32ZA4_c
|
||||
#define Frame16SwizzleBlock16A4 Frame16SwizzleBlock16A4_c
|
||||
#define Frame16SwizzleBlock16ZA4 Frame16SwizzleBlock16ZA4_c
|
||||
|
||||
#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_sse2
|
||||
#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_sse2
|
||||
#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_sse2
|
||||
#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_sse2
|
||||
|
||||
#else
|
||||
|
||||
#define FrameSwizzleBlock32 FrameSwizzleBlock32_c
|
||||
#define FrameSwizzleBlock16 FrameSwizzleBlock16_c
|
||||
#define Frame16SwizzleBlock32 Frame16SwizzleBlock32_c
|
||||
#define Frame16SwizzleBlock32Z Frame16SwizzleBlock32Z_c
|
||||
#define Frame16SwizzleBlock16 Frame16SwizzleBlock16_c
|
||||
#define Frame16SwizzleBlock16Z Frame16SwizzleBlock16Z_c
|
||||
|
||||
#define FrameSwizzleBlock32A2 FrameSwizzleBlock32A2_c
|
||||
#define FrameSwizzleBlock16A2 FrameSwizzleBlock16A2_c
|
||||
#define Frame16SwizzleBlock32A2 Frame16SwizzleBlock32A2_c
|
||||
#define Frame16SwizzleBlock32ZA2 Frame16SwizzleBlock32ZA2_c
|
||||
#define Frame16SwizzleBlock16A2 Frame16SwizzleBlock16A2_c
|
||||
#define Frame16SwizzleBlock16ZA2 Frame16SwizzleBlock16ZA2_c
|
||||
|
||||
#define FrameSwizzleBlock32A4 FrameSwizzleBlock32A4_c
|
||||
#define FrameSwizzleBlock16A4 FrameSwizzleBlock16A4_c
|
||||
#define Frame16SwizzleBlock32A4 Frame16SwizzleBlock32A4_c
|
||||
#define Frame16SwizzleBlock32ZA4 Frame16SwizzleBlock32ZA4_c
|
||||
#define Frame16SwizzleBlock16A4 Frame16SwizzleBlock16A4_c
|
||||
#define Frame16SwizzleBlock16ZA4 Frame16SwizzleBlock16ZA4_c
|
||||
|
||||
#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_c
|
||||
#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_c
|
||||
#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_c
|
||||
#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_c
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -43,7 +43,7 @@ void HandleGLError()
|
|||
{
|
||||
FUNCLOG
|
||||
// check the error status of this framebuffer */
|
||||
GLenum error = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
|
||||
GLenum error = FB::State();
|
||||
|
||||
// if error != GL_FRAMEBUFFER_COMPLETE_EXT, there's an error of some sort
|
||||
|
||||
|
@ -210,7 +210,7 @@ void SetAA(int mode)
|
|||
// GL_REPORT_ERROR();
|
||||
//
|
||||
// fAlpha = 1;
|
||||
// glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
|
||||
// FB::Unbind(); // switch to the backbuffer
|
||||
//
|
||||
// DisableAllgl() ;
|
||||
// SetShaderCaller("RenderCustom");
|
||||
|
@ -311,7 +311,7 @@ void ExtWrite()
|
|||
// else if (PSMT_ISHALF(texframe.psm)) bpp = 2;
|
||||
//
|
||||
// // get the start and end addresses of the buffer
|
||||
// GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
|
||||
// GetRectMemAddressZero(start, end, texframe.psm, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
|
||||
}
|
||||
|
||||
////////////
|
||||
|
|
Loading…
Reference in New Issue