mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Merge back GregMiscellaneous branch (3867)
* Various clean * Replace ASM by intrinsics (much more portable) * Various performance tuning (expect 10%-20% speedup ^_^ ) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3868 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c875caec15
commit
c7a929a530
|
@ -205,6 +205,8 @@ inline bool PSMT_HAS_SHARED_BITS (int fpsm, int tpsm) {
|
||||||
return (SUM == 0x15 || SUM == 0x1D || SUM == 0x2C || SUM == 0x30);
|
return (SUM == 0x15 || SUM == 0x1D || SUM == 0x2C || SUM == 0x30);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If a clut is in 32-bit color, its size is 4 bytes, and 16-bit clut has a 2 byte size.
|
||||||
|
inline int CLUT_PIXEL_SIZE(int cpsm) {return ((cpsm <= 1) ? 4 : 2); }
|
||||||
|
|
||||||
//----------------------- Data from registers -----------------------
|
//----------------------- Data from registers -----------------------
|
||||||
|
|
||||||
|
@ -542,7 +544,9 @@ typedef struct
|
||||||
|
|
||||||
extern GSinternal gs;
|
extern GSinternal gs;
|
||||||
|
|
||||||
static __forceinline u16 RGBA32to16(u32 c)
|
// Note the function is used in a template parameter so it must be declared extern
|
||||||
|
// Note2: In this case extern is not compatible with __forceinline so just inline it...
|
||||||
|
extern inline u16 RGBA32to16(u32 c)
|
||||||
{
|
{
|
||||||
return (u16)((((c) & 0x000000f8) >> 3) |
|
return (u16)((((c) & 0x000000f8) >> 3) |
|
||||||
(((c) & 0x0000f800) >> 6) |
|
(((c) & 0x0000f800) >> 6) |
|
||||||
|
@ -558,6 +562,7 @@ static __forceinline u32 RGBA16to32(u16 c)
|
||||||
(((c) & 0x8000) ? 0xff000000 : 0);
|
(((c) & 0x8000) ? 0xff000000 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
|
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
|
||||||
// f is a u16
|
// f is a u16
|
||||||
static __forceinline u16 Float16ToBYTE(u16 f)
|
static __forceinline u16 Float16ToBYTE(u16 f)
|
||||||
|
@ -603,6 +608,7 @@ static __forceinline u16 Float16ToALPHA(u16 f)
|
||||||
// used for Z values
|
// used for Z values
|
||||||
#define Float16ToARGB_Z(f) COLOR_ARGB((u32)Float16ToBYTE_2(f.w), Float16ToBYTE_2(f.x), Float16ToBYTE_2(f.y), Float16ToBYTE_2(f.z))
|
#define Float16ToARGB_Z(f) COLOR_ARGB((u32)Float16ToBYTE_2(f.w), Float16ToBYTE_2(f.x), Float16ToBYTE_2(f.y), Float16ToBYTE_2(f.z))
|
||||||
#define Float16ToARGB16_Z(f) ((Float16ToBYTE_2(f.y)<<8)|Float16ToBYTE_2(f.z))
|
#define Float16ToARGB16_Z(f) ((Float16ToBYTE_2(f.y)<<8)|Float16ToBYTE_2(f.z))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
inline float Clamp(float fx, float fmin, float fmax)
|
inline float Clamp(float fx, float fmin, float fmax)
|
||||||
|
|
|
@ -38,6 +38,7 @@ using namespace std;
|
||||||
#include "targets.h"
|
#include "targets.h"
|
||||||
#include "ZZoglShaders.h"
|
#include "ZZoglShaders.h"
|
||||||
#include "ZZoglFlushHack.h"
|
#include "ZZoglFlushHack.h"
|
||||||
|
#include "ZZoglFlushHack.h"
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma warning(disable:4244)
|
#pragma warning(disable:4244)
|
||||||
|
@ -68,7 +69,7 @@ extern const char* pbilinear[];
|
||||||
// statistics
|
// statistics
|
||||||
u32 g_nGenVars = 0, g_nTexVars = 0, g_nAlphaVars = 0, g_nResolve = 0;
|
u32 g_nGenVars = 0, g_nTexVars = 0, g_nAlphaVars = 0, g_nResolve = 0;
|
||||||
|
|
||||||
#define VER 2
|
#define VER 3
|
||||||
const unsigned char zgsversion = PS2E_GS_VERSION;
|
const unsigned char zgsversion = PS2E_GS_VERSION;
|
||||||
unsigned char zgsrevision = 0; // revision and build gives plugin version
|
unsigned char zgsrevision = 0; // revision and build gives plugin version
|
||||||
unsigned char zgsbuild = VER;
|
unsigned char zgsbuild = VER;
|
||||||
|
@ -143,6 +144,7 @@ void ReportHacks(gameHacks hacks)
|
||||||
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
|
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
|
||||||
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
|
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
|
||||||
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
|
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
|
||||||
|
if (hacks.no_stencil) ZZLog::WriteLn("'No stencil' hack enabled.");
|
||||||
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
|
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
|
||||||
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
|
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
|
||||||
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
|
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
|
||||||
|
@ -151,7 +153,7 @@ void ReportHacks(gameHacks hacks)
|
||||||
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
|
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
|
||||||
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
|
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
|
||||||
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
|
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
|
||||||
if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
|
//if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
|
||||||
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
|
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
|
||||||
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
|
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
|
||||||
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
|
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
|
||||||
|
@ -382,6 +384,7 @@ void CALLBACK GSclose()
|
||||||
|
|
||||||
SaveStateFile = NULL;
|
SaveStateFile = NULL;
|
||||||
SaveStateExists = true; // default value
|
SaveStateExists = true; // default value
|
||||||
|
g_LastCRC = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CALLBACK GSirqCallback(void (*callback)())
|
void CALLBACK GSirqCallback(void (*callback)())
|
||||||
|
|
|
@ -88,8 +88,6 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
|
||||||
pMem += 4;
|
pMem += 4;
|
||||||
size--;
|
size--;
|
||||||
|
|
||||||
if ((conf.settings().path3) && (index == 2) && path->eop) nPath3Hack = 1;
|
|
||||||
|
|
||||||
// eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and
|
// eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and
|
||||||
// values other than the EOP field are disregarded."
|
// values other than the EOP field are disregarded."
|
||||||
if (path->nloop > 0)
|
if (path->nloop > 0)
|
||||||
|
|
|
@ -79,6 +79,11 @@
|
||||||
static vector<u8> s_vTempBuffer, s_vTransferCache;
|
static vector<u8> s_vTempBuffer, s_vTransferCache;
|
||||||
static int gs_imageEnd = 0;
|
static int gs_imageEnd = 0;
|
||||||
|
|
||||||
|
// From the start of monster labs. In all 3 cases, psm == 0.
|
||||||
|
// ZZogl-PG: GetRectMemAddress(0x3f4000, 0x404000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f40, 0x100);
|
||||||
|
// ZZogl-PG: GetRectMemAddress(0x3f8000, 0x408000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f80, 0x100);
|
||||||
|
// ZZogl-PG: GetRectMemAddress(0x3fc000, 0x40c000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3fc0, 0x100);
|
||||||
|
|
||||||
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw)
|
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw)
|
||||||
{
|
{
|
||||||
FUNCLOG
|
FUNCLOG
|
||||||
|
@ -158,7 +163,7 @@
|
||||||
|
|
||||||
if (end > MEMORY_END)
|
if (end > MEMORY_END)
|
||||||
{
|
{
|
||||||
ZZLog::Warn_Log("Host local out of bounds!");
|
ZZLog::Warn_Log("Init host local out of bounds! (end == 0x%x)", end);
|
||||||
//gs.imageTransfer = -1;
|
//gs.imageTransfer = -1;
|
||||||
end = MEMORY_END;
|
end = MEMORY_END;
|
||||||
}
|
}
|
||||||
|
@ -180,7 +185,6 @@
|
||||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||||
|
|
||||||
assert(start < gs_imageEnd);
|
assert(start < gs_imageEnd);
|
||||||
|
|
||||||
end = gs_imageEnd;
|
end = gs_imageEnd;
|
||||||
|
|
||||||
// sometimes games can decompress to alpha channel of render target only, in this case
|
// sometimes games can decompress to alpha channel of render target only, in this case
|
||||||
|
@ -434,20 +438,20 @@ __forceinline void _TransferLocalLocal_4()
|
||||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||||
|
|
||||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
read = gsp((j+4)%2048, i%2048, gs.srcbuf.bw);
|
||||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
write = gdp((j2+4)%2048, i2%2048, gs.dstbuf.bw);
|
||||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||||
|
|
||||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
read = gsp((j+5)%2048, i%2048, gs.srcbuf.bw);
|
||||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
write = gdp((j2+5)%2048, i2%2048, gs.dstbuf.bw);
|
||||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||||
|
|
||||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
read = gsp((j+6)%2048, i%2048, gs.srcbuf.bw);
|
||||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
write = gdp((j2+6)%2048, i2%2048, gs.dstbuf.bw);
|
||||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||||
|
|
||||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
read = gsp((j+7)%2048, i%2048, gs.srcbuf.bw);
|
||||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
write = gdp((j2+7)%2048, i2%2048, gs.dstbuf.bw);
|
||||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -132,28 +132,35 @@ void CreateGameHackTable(GtkWidget *treeview, gameHacks hacks)
|
||||||
mapConfOpts.clear();
|
mapConfOpts.clear();
|
||||||
|
|
||||||
add_map_entry(GAME_TEXTURETARGS, "00000001", "Tex Target checking - 00000001\nLego Racers");
|
add_map_entry(GAME_TEXTURETARGS, "00000001", "Tex Target checking - 00000001\nLego Racers");
|
||||||
add_map_entry(GAME_AUTORESET, "00000002", "Auto reset targs - 00000002\nShadow Hearts, Samurai Warriors. Use when game is slow and toggling AA fixes it.");
|
add_map_entry(GAME_AUTORESET, "00000002", "Auto reset targs - 00000002\nUse when game is slow and toggling AA fixes it. Samurai Warriors. (Automatically on for Shadow Hearts)");
|
||||||
add_map_entry(GAME_NOTARGETRESOLVE, "00000010", "No target resolves - 00000010\nStops all resolving of targets. Try this first for really slow games. Dark Cloud 1");
|
add_map_entry(GAME_INTERLACE2X, "00000004", "Interlace 2X - 00000004\nFixes 2x bigger screen. Gradius 3.");
|
||||||
add_map_entry(GAME_EXACTCOLOR, "00000020", "Exact color testing - 00000020\nFixes overbright or shadow/black artifacts (Crash 'n Burn).");
|
//GAME_TEXAHACK (still implemented)
|
||||||
add_map_entry(GAME_NOCOLORCLAMP, "00000040", "No color clamping - 00000040\nSpeeds up games, but might be too bright or too dim.");
|
add_map_entry(GAME_NOTARGETRESOLVE, "00000010", "No target resolves - 00000010\nStops all resolving of targets. Try this first for really slow games. (Automatically on for Dark Cloud 1.)");
|
||||||
add_map_entry(GAME_NOALPHAFAIL, "00000100", "Alpha Fail hack - 00000100\nFor Sonic Unleashed, Shadow the Hedgehog, Ghost in the Shell. Remove vertical stripes or other coloring artefacts. Break Persona 4 and MGS3");
|
add_map_entry(GAME_EXACTCOLOR, "00000020", "Exact color testing - 00000020\nFixes overbright or shadow/black artifacts. Crash 'n Burn.");
|
||||||
|
//add_map_entry(GAME_NOCOLORCLAMP, "00000040", "No color clamping - 00000040\nSpeeds up games, but might be too bright or too dim.");
|
||||||
|
//GAME_FFXHACK
|
||||||
|
add_map_entry(GAME_NOALPHAFAIL, "00000100", "Alpha Fail hack - 00000100\nRemove vertical stripes or other coloring artifacts. Breaks Persona 4 and MGS3. (Automatically on for Sonic Unleashed, Shadow the Hedgehog, & Ghost in the Shell.)");
|
||||||
add_map_entry(GAME_NODEPTHUPDATE, "00000200", "Disable depth updates - 00000200");
|
add_map_entry(GAME_NODEPTHUPDATE, "00000200", "Disable depth updates - 00000200");
|
||||||
add_map_entry(GAME_QUICKRESOLVE1, "00000400", "Resolve Hack #1 - 00000400\nKingdom Hearts. Speeds some games.");
|
add_map_entry(GAME_QUICKRESOLVE1, "00000400", "Resolve Hack #1 - 00000400\n Speeds some games. Kingdom Hearts.");
|
||||||
add_map_entry(GAME_NOQUICKRESOLVE, "00000800", "Resolve Hack #2 - 00000800\nShadow Hearts, Urbz. Destroy FFX");
|
add_map_entry(GAME_NOQUICKRESOLVE, "00000800", "Resolve Hack #2 - 00000800\nShadow Hearts, Urbz. Destroys FFX.");
|
||||||
add_map_entry(GAME_NOTARGETCLUT, "00001000", "No target CLUT - 00001000\nResident Evil 4, or foggy scenes.");
|
add_map_entry(GAME_NOTARGETCLUT, "00001000", "No target CLUT - 00001000\nResident Evil 4, or foggy scenes.");
|
||||||
add_map_entry(GAME_NOSTENCIL, "00002000", "Disable stencil buffer - 00002000\nUsually safe to do for simple scenes. Harvest Moon");
|
add_map_entry(GAME_NOSTENCIL, "00002000", "Disable stencil buffer - 00002000\nUsually safe to do for simple scenes. Harvest Moon.");
|
||||||
|
//GAME_VSSHACKOFF (still implemented)
|
||||||
add_map_entry(GAME_NODEPTHRESOLVE, "00008000", "No depth resolve - 00008000\nMight give z buffer artifacts.");
|
add_map_entry(GAME_NODEPTHRESOLVE, "00008000", "No depth resolve - 00008000\nMight give z buffer artifacts.");
|
||||||
add_map_entry(GAME_FULL16BITRES, "00010000", "Full 16 bit resolution - 00010000\nUse when half the screen is missing.");
|
add_map_entry(GAME_FULL16BITRES, "00010000", "Full 16 bit resolution - 00010000\nUse when half the screen is missing.");
|
||||||
add_map_entry(GAME_RESOLVEPROMOTED, "00020000", "Resolve Hack #3 - 00020000\nNeopets");
|
add_map_entry(GAME_RESOLVEPROMOTED, "00020000", "Resolve Hack #3 - 00020000\nNeopets");
|
||||||
add_map_entry(GAME_FASTUPDATE, "00040000", "Fast Update - 00040000\nOkami. Speeds some games. Needs for Sonic Unleashed");
|
add_map_entry(GAME_FASTUPDATE, "00040000", "Fast Update - 00040000\n Speeds some games. Needed for Sonic Unleashed. Okami.");
|
||||||
add_map_entry(GAME_NOALPHATEST, "00080000", "Disable alpha testing - 00080000");
|
add_map_entry(GAME_NOALPHATEST, "00080000", "Disable alpha testing - 00080000");
|
||||||
add_map_entry(GAME_DISABLEMRTDEPTH, "00100000", "Enable Multiple RTs - 00100000");
|
add_map_entry(GAME_DISABLEMRTDEPTH, "00100000", "Enable Multiple RTs - 00100000");
|
||||||
add_map_entry(GAME_XENOSPECHACK, "01000000", "Specular Highlights - 01000000\nMakes Xenosaga and Okage graphics faster by removing highlights");
|
//GAME_32BITTARGS
|
||||||
add_map_entry(GAME_PARTIALPOINTERS, "02000000", "Partial targets - 02000000");
|
//GAME_PATH3HACK
|
||||||
|
//GAME_DOPARALLELCTX
|
||||||
|
add_map_entry(GAME_XENOSPECHACK, "01000000", "Specular Highlights - 01000000\nMakes graphics faster by removing highlights. (Automatically on for Xenosaga, Okami, & Okage.)");
|
||||||
|
//add_map_entry(GAME_PARTIALPOINTERS, "02000000", "Partial targets - 02000000");
|
||||||
add_map_entry(GAME_PARTIALDEPTH, "04000000", "Partial depth - 04000000");
|
add_map_entry(GAME_PARTIALDEPTH, "04000000", "Partial depth - 04000000");
|
||||||
add_map_entry(GAME_GUSTHACK, "10000000", "Gust fix, made gustgame more clean and fast - 10000000");
|
//GAME_REGETHACK (commented out in code)
|
||||||
add_map_entry(GAME_NOLOGZ, "20000000", "No logarithmic Z, could decrease number of Z-artefacts - 20000000");
|
add_map_entry(GAME_GUSTHACK, "10000000", "Gust fix - 10000000. Makes gust games cleaner and faster. (Automatically on for most Gust games)");
|
||||||
add_map_entry(GAME_INTERLACE2X, "00000004", "Interlace 2X - 00000004\nFixes 2x bigger screen (Gradius 3).");
|
add_map_entry(GAME_NOLOGZ, "20000000", "No logarithmic Z - 20000000. Could decrease number of Z-artifacts.");
|
||||||
add_map_entry(GAME_AUTOSKIPDRAW, "40000000", "Remove blur effect on some games\nSlow games.");
|
add_map_entry(GAME_AUTOSKIPDRAW, "40000000", "Remove blur effect on some games\nSlow games.");
|
||||||
|
|
||||||
for (map<string, confOptsStruct>::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it)
|
for (map<string, confOptsStruct>::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it)
|
||||||
|
@ -255,7 +262,7 @@ void DisplayDialog()
|
||||||
GtkWidget *option_frame, *option_box;
|
GtkWidget *option_frame, *option_box;
|
||||||
GtkWidget *log_check;
|
GtkWidget *log_check;
|
||||||
GtkWidget *int_label, *int_box, *int_holder;
|
GtkWidget *int_label, *int_box, *int_holder;
|
||||||
GtkWidget *bilinear_check;
|
GtkWidget *bilinear_label, *bilinear_box, *bilinear_holder;
|
||||||
GtkWidget *aa_label, *aa_box, *aa_holder;
|
GtkWidget *aa_label, *aa_box, *aa_holder;
|
||||||
GtkWidget *snap_label, *snap_box, *snap_holder;
|
GtkWidget *snap_label, *snap_box, *snap_holder;
|
||||||
GtkWidget *fullscreen_label, *widescreen_check;
|
GtkWidget *fullscreen_label, *widescreen_check;
|
||||||
|
@ -293,9 +300,17 @@ void DisplayDialog()
|
||||||
gtk_box_pack_start(GTK_BOX(int_holder), int_label, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(int_holder), int_label, false, false, 2);
|
||||||
gtk_box_pack_start(GTK_BOX(int_holder), int_box, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(int_holder), int_box, false, false, 2);
|
||||||
|
|
||||||
|
bilinear_label = gtk_label_new("Bilinear Filtering:");
|
||||||
|
bilinear_box = gtk_combo_box_new_text();
|
||||||
|
|
||||||
bilinear_check = gtk_check_button_new_with_label("Bilinear Filtering");
|
gtk_combo_box_append_text(GTK_COMBO_BOX(bilinear_box), "Off");
|
||||||
gtk_widget_set_tooltip_text(bilinear_check, "Best quality is off. Turn on for speed. Toggled by pressing Shift + F5 when running.");
|
gtk_combo_box_append_text(GTK_COMBO_BOX(bilinear_box), "Normal");
|
||||||
|
gtk_combo_box_append_text(GTK_COMBO_BOX(bilinear_box), "Forced");
|
||||||
|
gtk_combo_box_set_active(GTK_COMBO_BOX(bilinear_box), conf.bilinear);
|
||||||
|
gtk_widget_set_tooltip_text(bilinear_box, "Best quality is off. Turn on for speed. Toggled by pressing Shift + F5 when running.");
|
||||||
|
bilinear_holder = gtk_hbox_new(false, 5);
|
||||||
|
gtk_box_pack_start(GTK_BOX(bilinear_holder), bilinear_label, false, false, 2);
|
||||||
|
gtk_box_pack_start(GTK_BOX(bilinear_holder), bilinear_box, false, false, 2);
|
||||||
|
|
||||||
aa_label = gtk_label_new("Anti-Aliasing:");
|
aa_label = gtk_label_new("Anti-Aliasing:");
|
||||||
aa_box = gtk_combo_box_new_text();
|
aa_box = gtk_combo_box_new_text();
|
||||||
|
@ -352,7 +367,7 @@ void DisplayDialog()
|
||||||
gtk_frame_set_shadow_type(GTK_FRAME(option_frame), GTK_SHADOW_NONE);
|
gtk_frame_set_shadow_type(GTK_FRAME(option_frame), GTK_SHADOW_NONE);
|
||||||
|
|
||||||
gtk_box_pack_start(GTK_BOX(option_box), log_check, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(option_box), log_check, false, false, 2);
|
||||||
gtk_box_pack_start(GTK_BOX(option_box), bilinear_check, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(option_box), bilinear_holder, false, false, 2);
|
||||||
gtk_box_pack_start(GTK_BOX(option_box), int_holder, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(option_box), int_holder, false, false, 2);
|
||||||
gtk_box_pack_start(GTK_BOX(option_box), aa_holder, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(option_box), aa_holder, false, false, 2);
|
||||||
gtk_box_pack_start(GTK_BOX(option_box), snap_holder, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(option_box), snap_holder, false, false, 2);
|
||||||
|
@ -370,7 +385,6 @@ void DisplayDialog()
|
||||||
gtk_box_pack_start(GTK_BOX(main_box), option_frame, false, false, 2);
|
gtk_box_pack_start(GTK_BOX(main_box), option_frame, false, false, 2);
|
||||||
|
|
||||||
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(log_check), conf.log);
|
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(log_check), conf.log);
|
||||||
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bilinear_check), conf.bilinear);
|
|
||||||
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(widescreen_check), (conf.widescreen()));
|
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(widescreen_check), (conf.widescreen()));
|
||||||
|
|
||||||
gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), main_frame);
|
gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), main_frame);
|
||||||
|
@ -390,8 +404,10 @@ void DisplayDialog()
|
||||||
if (gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box)) != -1)
|
if (gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box)) != -1)
|
||||||
conf.aa = gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box));
|
conf.aa = gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box));
|
||||||
|
|
||||||
|
if (gtk_combo_box_get_active(GTK_COMBO_BOX(bilinear_box)) != -1)
|
||||||
|
conf.bilinear = gtk_combo_box_get_active(GTK_COMBO_BOX(bilinear_box));
|
||||||
|
|
||||||
conf.log = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(log_check));
|
conf.log = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(log_check));
|
||||||
conf.bilinear = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(bilinear_check));
|
|
||||||
fake_options.widescreen = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widescreen_check));
|
fake_options.widescreen = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widescreen_check));
|
||||||
fake_options.tga_snap = gtk_combo_box_get_active(GTK_COMBO_BOX(snap_box));
|
fake_options.tga_snap = gtk_combo_box_get_active(GTK_COMBO_BOX(snap_box));
|
||||||
|
|
||||||
|
@ -445,7 +461,7 @@ void SysMessage(const char *fmt, ...)
|
||||||
|
|
||||||
void CALLBACK GSabout()
|
void CALLBACK GSabout()
|
||||||
{
|
{
|
||||||
SysMessage("ZZOgl PG: by Zeydlitz (PG version worked on by arcum42). Based off of ZeroGS, by zerofrog.");
|
SysMessage("ZZOgl PG: by Zeydlitz (PG version worked on by arcum42, gregory, and the pcsx2 development team). Based off of ZeroGS, by zerofrog.");
|
||||||
}
|
}
|
||||||
|
|
||||||
s32 CALLBACK GStest()
|
s32 CALLBACK GStest()
|
||||||
|
|
|
@ -152,6 +152,7 @@
|
||||||
<Unit filename="../../ZZoglFlush.cpp" />
|
<Unit filename="../../ZZoglFlush.cpp" />
|
||||||
<Unit filename="../../ZZoglFlushHack.cpp" />
|
<Unit filename="../../ZZoglFlushHack.cpp" />
|
||||||
<Unit filename="../../ZZoglFlushHack.h" />
|
<Unit filename="../../ZZoglFlushHack.h" />
|
||||||
|
<Unit filename="../../ZZoglMath.h" />
|
||||||
<Unit filename="../../ZZoglSave.cpp" />
|
<Unit filename="../../ZZoglSave.cpp" />
|
||||||
<Unit filename="../../ZZoglShaders.cpp" />
|
<Unit filename="../../ZZoglShaders.cpp" />
|
||||||
<Unit filename="../../ZZoglShaders.h" />
|
<Unit filename="../../ZZoglShaders.h" />
|
||||||
|
@ -171,7 +172,6 @@
|
||||||
<Unit filename="../../x86.h" />
|
<Unit filename="../../x86.h" />
|
||||||
<Unit filename="../../zerogs.cpp" />
|
<Unit filename="../../zerogs.cpp" />
|
||||||
<Unit filename="../../zerogs.h" />
|
<Unit filename="../../zerogs.h" />
|
||||||
<Unit filename="../../zerogsmath.h" />
|
|
||||||
<Unit filename="../../zpipe.cpp" />
|
<Unit filename="../../zpipe.cpp" />
|
||||||
<Unit filename="../../zpipe.h" />
|
<Unit filename="../../zpipe.h" />
|
||||||
<Extensions>
|
<Extensions>
|
||||||
|
|
|
@ -241,237 +241,136 @@ void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
||||||
void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
||||||
void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
||||||
|
|
||||||
#define FILL_BLOCK(psm, psmcol) \
|
void fill_block(BLOCK b, vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
||||||
{ \
|
|
||||||
b.pageTable = &g_pageTable##psm[0][0]; \
|
|
||||||
b.blockTable = &g_blockTable##psm[0][0]; \
|
|
||||||
b.columnTable = &g_columnTable##psmcol[0][0]; \
|
|
||||||
\
|
|
||||||
assert( sizeof(g_pageTable##psm) == b.width * b.height * sizeof(g_pageTable##psm[0][0]) ); \
|
|
||||||
\
|
|
||||||
psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
|
||||||
psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
|
||||||
\
|
|
||||||
for(i = 0; i < b.height; ++i) \
|
|
||||||
{ \
|
|
||||||
u32 i_width = i*BLOCK_TEXWIDTH; \
|
|
||||||
for(j = 0; j < b.width; ++j) \
|
|
||||||
{ \
|
|
||||||
/* fill the table */ \
|
|
||||||
u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * b.mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \
|
|
||||||
b.pageTable[i * b.width + j] = u; \
|
|
||||||
psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
|
||||||
\
|
|
||||||
for(i = 0; i < b.height; ++i) \
|
|
||||||
{ \
|
|
||||||
u32 i_width = i*BLOCK_TEXWIDTH; \
|
|
||||||
u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH; \
|
|
||||||
for(j = 0; j < b.width; ++j) \
|
|
||||||
{ \
|
|
||||||
u32 temp = ((j + 1) % b.width); \
|
|
||||||
float4* pv = &psrcv[i_width + j]; \
|
|
||||||
pv->x = psrcf[i_width + j]; \
|
|
||||||
pv->y = psrcf[i_width + temp]; \
|
|
||||||
pv->z = psrcf[i_width2 + j]; \
|
|
||||||
pv->w = psrcf[i_width2 + temp]; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define FILL_BLOCK_NF(psm, psmcol) \
|
|
||||||
{ \
|
|
||||||
b.pageTable = &g_pageTable##psm[0][0]; \
|
|
||||||
b.blockTable = &g_blockTable##psm[0][0]; \
|
|
||||||
b.columnTable = &g_columnTable##psmcol[0][0]; \
|
|
||||||
\
|
|
||||||
assert( sizeof(g_pageTable##psm) == b.width * b.height * sizeof(g_pageTable##psm[0][0]) ); \
|
|
||||||
\
|
|
||||||
psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
|
||||||
psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
|
||||||
\
|
|
||||||
for(i = 0; i < b.height; ++i) \
|
|
||||||
{ \
|
|
||||||
u32 i_width = i*BLOCK_TEXWIDTH; \
|
|
||||||
for(j = 0; j < b.width; ++j) \
|
|
||||||
{ \
|
|
||||||
/* fill the table */ \
|
|
||||||
u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * b.mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \
|
|
||||||
b.pageTable[i * b.width + j] = u; \
|
|
||||||
psrcw[i_width + j] = u; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
void FillBlocksNF(vector<char>& vBlockData, vector<char>& vBilinearData)
|
|
||||||
{
|
{
|
||||||
FUNCLOG
|
float* psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
|
||||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2);
|
|
||||||
|
|
||||||
int i, j;
|
|
||||||
BLOCK b;
|
|
||||||
float* psrcf = NULL;
|
|
||||||
u16* psrcw = NULL;
|
u16* psrcw = NULL;
|
||||||
|
if (!floatfmt)
|
||||||
|
psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
|
||||||
|
|
||||||
memset(m_Blocks, 0, sizeof(m_Blocks));
|
for(int i = 0; i < b.height; ++i)
|
||||||
|
{
|
||||||
|
u32 i_width = i*BLOCK_TEXWIDTH;
|
||||||
|
for(int j = 0; j < b.width; ++j)
|
||||||
|
{
|
||||||
|
/* fill the table */
|
||||||
|
u32 bt = b.blockTable[(i / b.colheight)*(b.width/b.colwidth) + (j / b.colwidth)];
|
||||||
|
u32 ct = b.columnTable[(i%b.colheight)*b.colwidth + (j%b.colwidth)];
|
||||||
|
u32 u = bt * 64 * b.mult + ct;
|
||||||
|
b.pageTable[i * b.width + j] = u;
|
||||||
|
if (floatfmt)
|
||||||
|
psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult);
|
||||||
|
else
|
||||||
|
psrcw[i_width + j] = u;
|
||||||
|
|
||||||
// 32
|
}
|
||||||
b.SetDim(64, 32, 0, 0, 1);
|
|
||||||
FILL_BLOCK_NF(32, 32);
|
|
||||||
m_Blocks[PSMCT32] = b;
|
|
||||||
m_Blocks[PSMCT32].SetFun(PSMCT32);
|
|
||||||
|
|
||||||
// 24 (same as 32 except write/readPixel are different)
|
|
||||||
m_Blocks[PSMCT24] = b;
|
|
||||||
m_Blocks[PSMCT24].SetFun(PSMCT24);
|
|
||||||
|
|
||||||
// 8H (same as 32 except write/readPixel are different)
|
|
||||||
m_Blocks[PSMT8H] = b;
|
|
||||||
m_Blocks[PSMT8H].SetFun(PSMT8H);
|
|
||||||
|
|
||||||
m_Blocks[PSMT4HL] = b;
|
|
||||||
m_Blocks[PSMT4HL].SetFun(PSMT4HL);
|
|
||||||
|
|
||||||
m_Blocks[PSMT4HH] = b;
|
|
||||||
m_Blocks[PSMT4HH].SetFun(PSMT4HH);
|
|
||||||
|
|
||||||
// 32z
|
|
||||||
b.SetDim(64, 32, 64, 0, 1);
|
|
||||||
FILL_BLOCK_NF(32Z, 32);
|
|
||||||
m_Blocks[PSMT32Z] = b;
|
|
||||||
m_Blocks[PSMT32Z].SetFun(PSMT32Z);
|
|
||||||
|
|
||||||
// 24Z (same as 32Z except write/readPixel are different)
|
|
||||||
m_Blocks[PSMT24Z] = b;
|
|
||||||
m_Blocks[PSMT24Z].SetFun(PSMT24Z);
|
|
||||||
|
|
||||||
// 16
|
|
||||||
b.SetDim(64, 64, 0, 32, 2);
|
|
||||||
FILL_BLOCK_NF(16, 16);
|
|
||||||
m_Blocks[PSMCT16] = b;
|
|
||||||
m_Blocks[PSMCT16].SetFun(PSMCT16);
|
|
||||||
|
|
||||||
// 16s
|
|
||||||
b.SetDim(64, 64, 64, 32, 2);
|
|
||||||
FILL_BLOCK_NF(16S, 16);
|
|
||||||
m_Blocks[PSMCT16S] = b;
|
|
||||||
m_Blocks[PSMCT16S].SetFun(PSMCT16S);
|
|
||||||
|
|
||||||
// 16z
|
|
||||||
b.SetDim(64, 64, 0, 96, 2);
|
|
||||||
FILL_BLOCK_NF(16Z, 16);
|
|
||||||
m_Blocks[PSMT16Z] = b;
|
|
||||||
m_Blocks[PSMT16Z].SetFun(PSMT16Z);
|
|
||||||
|
|
||||||
// 16sz
|
|
||||||
b.SetDim(64, 64, 64, 96, 2);
|
|
||||||
FILL_BLOCK_NF(16SZ, 16);
|
|
||||||
m_Blocks[PSMT16SZ] = b;
|
|
||||||
m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
|
|
||||||
|
|
||||||
// 8
|
|
||||||
b.SetDim(128, 64, 0, 160, 4);
|
|
||||||
FILL_BLOCK_NF(8, 8);
|
|
||||||
m_Blocks[PSMT8] = b;
|
|
||||||
m_Blocks[PSMT8].SetFun(PSMT8);
|
|
||||||
|
|
||||||
// 4
|
|
||||||
b.SetDim(128, 128, 0, 224, 8);
|
|
||||||
FILL_BLOCK_NF(4, 4);
|
|
||||||
m_Blocks[PSMT4] = b;
|
|
||||||
m_Blocks[PSMT4].SetFun(PSMT4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (floatfmt) {
|
||||||
|
float4* psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
|
||||||
|
|
||||||
void FillBlocksF(vector<char>& vBlockData, vector<char>& vBilinearData)
|
for(int i = 0; i < b.height; ++i)
|
||||||
{
|
{
|
||||||
FUNCLOG
|
u32 i_width = i*BLOCK_TEXWIDTH;
|
||||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4);
|
u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH;
|
||||||
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
|
for(int j = 0; j < b.width; ++j)
|
||||||
|
{
|
||||||
int i, j;
|
u32 temp = ((j + 1) % b.width);
|
||||||
BLOCK b;
|
float4* pv = &psrcv[i_width + j];
|
||||||
float* psrcf = NULL;
|
pv->x = psrcf[i_width + j];
|
||||||
u16* psrcw = NULL;
|
pv->y = psrcf[i_width + temp];
|
||||||
float4* psrcv = NULL;
|
pv->z = psrcf[i_width2 + j];
|
||||||
|
pv->w = psrcf[i_width2 + temp];
|
||||||
memset(m_Blocks, 0, sizeof(m_Blocks));
|
}
|
||||||
|
}
|
||||||
// 32
|
}
|
||||||
b.SetDim(64, 32, 0, 0, 1);
|
|
||||||
FILL_BLOCK(32, 32);
|
|
||||||
m_Blocks[PSMCT32] = b;
|
|
||||||
m_Blocks[PSMCT32].SetFun(PSMCT32);
|
|
||||||
|
|
||||||
// 24 (same as 32 except write/readPixel are different)
|
|
||||||
m_Blocks[PSMCT24] = b;
|
|
||||||
m_Blocks[PSMCT24].SetFun(PSMCT24);
|
|
||||||
|
|
||||||
// 8H (same as 32 except write/readPixel are different)
|
|
||||||
m_Blocks[PSMT8H] = b;
|
|
||||||
m_Blocks[PSMT8H].SetFun(PSMT8H);
|
|
||||||
|
|
||||||
m_Blocks[PSMT4HL] = b;
|
|
||||||
m_Blocks[PSMT4HL].SetFun(PSMT4HL);
|
|
||||||
|
|
||||||
m_Blocks[PSMT4HH] = b;
|
|
||||||
m_Blocks[PSMT4HH].SetFun(PSMT4HH);
|
|
||||||
|
|
||||||
// 32z
|
|
||||||
b.SetDim(64, 32, 64, 0, 1);
|
|
||||||
FILL_BLOCK(32Z, 32);
|
|
||||||
m_Blocks[PSMT32Z] = b;
|
|
||||||
m_Blocks[PSMT32Z].SetFun(PSMT32Z);
|
|
||||||
|
|
||||||
// 24Z (same as 32Z except write/readPixel are different)
|
|
||||||
m_Blocks[PSMT24Z] = b;
|
|
||||||
m_Blocks[PSMT24Z].SetFun(PSMT24Z);
|
|
||||||
|
|
||||||
// 16
|
|
||||||
b.SetDim(64, 64, 0, 32, 2);
|
|
||||||
FILL_BLOCK(16, 16);
|
|
||||||
m_Blocks[PSMCT16] = b;
|
|
||||||
m_Blocks[PSMCT16].SetFun(PSMCT16);
|
|
||||||
|
|
||||||
// 16s
|
|
||||||
b.SetDim(64, 64, 64, 32, 2);
|
|
||||||
FILL_BLOCK(16S, 16);
|
|
||||||
m_Blocks[PSMCT16S] = b;
|
|
||||||
m_Blocks[PSMCT16S].SetFun(PSMCT16S);
|
|
||||||
|
|
||||||
// 16z
|
|
||||||
b.SetDim(64, 64, 0, 96, 2);
|
|
||||||
FILL_BLOCK(16Z, 16);
|
|
||||||
m_Blocks[PSMT16Z] = b;
|
|
||||||
m_Blocks[PSMT16Z].SetFun(PSMT16Z);
|
|
||||||
|
|
||||||
// 16sz
|
|
||||||
b.SetDim(64, 64, 64, 96, 2);
|
|
||||||
FILL_BLOCK(16SZ, 16);
|
|
||||||
m_Blocks[PSMT16SZ] = b;
|
|
||||||
m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
|
|
||||||
|
|
||||||
// 8
|
|
||||||
b.SetDim(128, 64, 0, 160, 4);
|
|
||||||
FILL_BLOCK(8, 8);
|
|
||||||
m_Blocks[PSMT8] = b;
|
|
||||||
m_Blocks[PSMT8].SetFun(PSMT8);
|
|
||||||
|
|
||||||
// 4
|
|
||||||
b.SetDim(128, 128, 0, 224, 8);
|
|
||||||
FILL_BLOCK(4, 4);
|
|
||||||
m_Blocks[PSMT4] = b;
|
|
||||||
m_Blocks[PSMT4].SetFun(PSMT4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
||||||
{
|
{
|
||||||
FUNCLOG
|
FUNCLOG
|
||||||
if (floatfmt)
|
if (floatfmt) {
|
||||||
FillBlocksF(vBlockData, vBilinearData);
|
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4);
|
||||||
else
|
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
|
||||||
FillBlocksNF(vBlockData, vBilinearData);
|
} else {
|
||||||
|
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
BLOCK b;
|
||||||
|
|
||||||
|
memset(m_Blocks, 0, sizeof(m_Blocks));
|
||||||
|
|
||||||
|
// 32
|
||||||
|
b.SetDim(64, 32, 0, 0, 1);
|
||||||
|
b.SetTable(PSMCT32);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMCT32] = b;
|
||||||
|
m_Blocks[PSMCT32].SetFun(PSMCT32);
|
||||||
|
|
||||||
|
// 24 (same as 32 except write/readPixel are different)
|
||||||
|
m_Blocks[PSMCT24] = b;
|
||||||
|
m_Blocks[PSMCT24].SetFun(PSMCT24);
|
||||||
|
|
||||||
|
// 8H (same as 32 except write/readPixel are different)
|
||||||
|
m_Blocks[PSMT8H] = b;
|
||||||
|
m_Blocks[PSMT8H].SetFun(PSMT8H);
|
||||||
|
|
||||||
|
m_Blocks[PSMT4HL] = b;
|
||||||
|
m_Blocks[PSMT4HL].SetFun(PSMT4HL);
|
||||||
|
|
||||||
|
m_Blocks[PSMT4HH] = b;
|
||||||
|
m_Blocks[PSMT4HH].SetFun(PSMT4HH);
|
||||||
|
|
||||||
|
// 32z
|
||||||
|
b.SetDim(64, 32, 64, 0, 1);
|
||||||
|
b.SetTable(PSMT32Z);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMT32Z] = b;
|
||||||
|
m_Blocks[PSMT32Z].SetFun(PSMT32Z);
|
||||||
|
|
||||||
|
// 24Z (same as 32Z except write/readPixel are different)
|
||||||
|
m_Blocks[PSMT24Z] = b;
|
||||||
|
m_Blocks[PSMT24Z].SetFun(PSMT24Z);
|
||||||
|
|
||||||
|
// 16
|
||||||
|
b.SetDim(64, 64, 0, 32, 2);
|
||||||
|
b.SetTable(PSMCT16);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMCT16] = b;
|
||||||
|
m_Blocks[PSMCT16].SetFun(PSMCT16);
|
||||||
|
|
||||||
|
// 16s
|
||||||
|
b.SetDim(64, 64, 64, 32, 2);
|
||||||
|
b.SetTable(PSMCT16S);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMCT16S] = b;
|
||||||
|
m_Blocks[PSMCT16S].SetFun(PSMCT16S);
|
||||||
|
|
||||||
|
// 16z
|
||||||
|
b.SetDim(64, 64, 0, 96, 2);
|
||||||
|
b.SetTable(PSMT16Z);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMT16Z] = b;
|
||||||
|
m_Blocks[PSMT16Z].SetFun(PSMT16Z);
|
||||||
|
|
||||||
|
// 16sz
|
||||||
|
b.SetDim(64, 64, 64, 96, 2);
|
||||||
|
b.SetTable(PSMT16SZ);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMT16SZ] = b;
|
||||||
|
m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
|
||||||
|
|
||||||
|
// 8
|
||||||
|
b.SetDim(128, 64, 0, 160, 4);
|
||||||
|
b.SetTable(PSMT8);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMT8] = b;
|
||||||
|
m_Blocks[PSMT8].SetFun(PSMT8);
|
||||||
|
|
||||||
|
// 4
|
||||||
|
b.SetDim(128, 128, 0, 224, 8);
|
||||||
|
b.SetTable(PSMT4);
|
||||||
|
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||||
|
m_Blocks[PSMT4] = b;
|
||||||
|
m_Blocks[PSMT4].SetFun(PSMT4);
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,6 +92,29 @@ struct TransferFuncts
|
||||||
extern TransferData tData[64];
|
extern TransferData tData[64];
|
||||||
// rest not visible externally
|
// rest not visible externally
|
||||||
|
|
||||||
|
extern u32 g_blockTable32[4][8];
|
||||||
|
extern u32 g_blockTable32Z[4][8];
|
||||||
|
extern u32 g_blockTable16[8][4];
|
||||||
|
extern u32 g_blockTable16S[8][4];
|
||||||
|
extern u32 g_blockTable16Z[8][4];
|
||||||
|
extern u32 g_blockTable16SZ[8][4];
|
||||||
|
extern u32 g_blockTable8[4][8];
|
||||||
|
extern u32 g_blockTable4[8][4];
|
||||||
|
|
||||||
|
extern u32 g_columnTable32[8][8];
|
||||||
|
extern u32 g_columnTable16[8][16];
|
||||||
|
extern u32 g_columnTable8[16][16];
|
||||||
|
extern u32 g_columnTable4[16][32];
|
||||||
|
|
||||||
|
extern u32 g_pageTable32[32][64];
|
||||||
|
extern u32 g_pageTable32Z[32][64];
|
||||||
|
extern u32 g_pageTable16[64][64];
|
||||||
|
extern u32 g_pageTable16S[64][64];
|
||||||
|
extern u32 g_pageTable16Z[64][64];
|
||||||
|
extern u32 g_pageTable16SZ[64][64];
|
||||||
|
extern u32 g_pageTable8[64][128];
|
||||||
|
extern u32 g_pageTable4[128][128];
|
||||||
|
|
||||||
struct BLOCK
|
struct BLOCK
|
||||||
{
|
{
|
||||||
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
|
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
|
||||||
|
@ -142,47 +165,69 @@ struct BLOCK
|
||||||
TransferHostLocal = TransferHostLocalFun[psm];
|
TransferHostLocal = TransferHostLocalFun[psm];
|
||||||
TransferLocalHost = TransferLocalHostFun[psm];
|
TransferLocalHost = TransferLocalHostFun[psm];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetTable(u32 psm)
|
||||||
|
{
|
||||||
|
switch (psm) {
|
||||||
|
case PSMCT32:
|
||||||
|
assert( sizeof(g_pageTable32) == width * height * sizeof(g_pageTable32[0][0]) );
|
||||||
|
pageTable = &g_pageTable32[0][0];
|
||||||
|
blockTable = &g_blockTable32[0][0];
|
||||||
|
columnTable = &g_columnTable32[0][0];
|
||||||
|
break;
|
||||||
|
case PSMT32Z:
|
||||||
|
assert( sizeof(g_pageTable32Z) == width * height * sizeof(g_pageTable32Z[0][0]) );
|
||||||
|
pageTable = &g_pageTable32Z[0][0];
|
||||||
|
blockTable = &g_blockTable32Z[0][0];
|
||||||
|
columnTable = &g_columnTable32[0][0];
|
||||||
|
break;
|
||||||
|
case PSMCT16:
|
||||||
|
assert( sizeof(g_pageTable16) == width * height * sizeof(g_pageTable16[0][0]) );
|
||||||
|
pageTable = &g_pageTable16[0][0];
|
||||||
|
blockTable = &g_blockTable16[0][0];
|
||||||
|
columnTable = &g_columnTable16[0][0];
|
||||||
|
break;
|
||||||
|
case PSMCT16S:
|
||||||
|
assert( sizeof(g_pageTable16S) == width * height * sizeof(g_pageTable16S[0][0]) );
|
||||||
|
pageTable = &g_pageTable16S[0][0];
|
||||||
|
blockTable = &g_blockTable16S[0][0];
|
||||||
|
columnTable = &g_columnTable16[0][0];
|
||||||
|
break;
|
||||||
|
case PSMT16Z:
|
||||||
|
assert( sizeof(g_pageTable16Z) == width * height * sizeof(g_pageTable16Z[0][0]) );
|
||||||
|
pageTable = &g_pageTable16Z[0][0];
|
||||||
|
blockTable = &g_blockTable16Z[0][0];
|
||||||
|
columnTable = &g_columnTable16[0][0];
|
||||||
|
break;
|
||||||
|
case PSMT16SZ:
|
||||||
|
assert( sizeof(g_pageTable16SZ) == width * height * sizeof(g_pageTable16SZ[0][0]) );
|
||||||
|
pageTable = &g_pageTable16SZ[0][0];
|
||||||
|
blockTable = &g_blockTable16SZ[0][0];
|
||||||
|
columnTable = &g_columnTable16[0][0];
|
||||||
|
break;
|
||||||
|
case PSMT8:
|
||||||
|
assert( sizeof(g_pageTable8) == width * height * sizeof(g_pageTable8[0][0]) );
|
||||||
|
pageTable = &g_pageTable8[0][0];
|
||||||
|
blockTable = &g_blockTable8[0][0];
|
||||||
|
columnTable = &g_columnTable8[0][0];
|
||||||
|
break;
|
||||||
|
case PSMT4:
|
||||||
|
assert( sizeof(g_pageTable4) == width * height * sizeof(g_pageTable4[0][0]) );
|
||||||
|
pageTable = &g_pageTable4[0][0];
|
||||||
|
blockTable = &g_blockTable4[0][0];
|
||||||
|
columnTable = &g_columnTable4[0][0];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pageTable = NULL;
|
||||||
|
blockTable = NULL;
|
||||||
|
columnTable = NULL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
extern BLOCK m_Blocks[];
|
extern BLOCK m_Blocks[];
|
||||||
|
|
||||||
extern u32 g_blockTable32[4][8];
|
|
||||||
extern u32 g_blockTable32Z[4][8];
|
|
||||||
extern u32 g_blockTable16[8][4];
|
|
||||||
extern u32 g_blockTable16S[8][4];
|
|
||||||
extern u32 g_blockTable16Z[8][4];
|
|
||||||
extern u32 g_blockTable16SZ[8][4];
|
|
||||||
extern u32 g_blockTable8[4][8];
|
|
||||||
extern u32 g_blockTable4[8][4];
|
|
||||||
|
|
||||||
extern u32 g_columnTable32[8][8];
|
|
||||||
extern u32 g_columnTable16[8][16];
|
|
||||||
extern u32 g_columnTable8[16][16];
|
|
||||||
extern u32 g_columnTable4[16][32];
|
|
||||||
|
|
||||||
extern u32 g_pageTable32[32][64];
|
|
||||||
extern u32 g_pageTable32Z[32][64];
|
|
||||||
extern u32 g_pageTable16[64][64];
|
|
||||||
extern u32 g_pageTable16S[64][64];
|
|
||||||
extern u32 g_pageTable16Z[64][64];
|
|
||||||
extern u32 g_pageTable16SZ[64][64];
|
|
||||||
extern u32 g_pageTable8[64][128];
|
|
||||||
extern u32 g_pageTable4[128][128];
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define getPixelAddress24 getPixelAddress32
|
#define getPixelAddress24 getPixelAddress32
|
||||||
#define getPixelAddress24_0 getPixelAddress32_0
|
#define getPixelAddress24_0 getPixelAddress32_0
|
||||||
#define getPixelAddress8H getPixelAddress32
|
#define getPixelAddress8H getPixelAddress32
|
||||||
|
@ -191,6 +236,15 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
||||||
#define getPixelAddress4HL_0 getPixelAddress32_0
|
#define getPixelAddress4HL_0 getPixelAddress32_0
|
||||||
#define getPixelAddress4HH getPixelAddress32
|
#define getPixelAddress4HH getPixelAddress32
|
||||||
#define getPixelAddress4HH_0 getPixelAddress32_0
|
#define getPixelAddress4HH_0 getPixelAddress32_0
|
||||||
|
#define getPixelAddress24Z getPixelAddress32Z
|
||||||
|
#define getPixelAddress24Z_0 getPixelAddress32Z_0
|
||||||
|
|
||||||
|
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
|
||||||
|
{
|
||||||
|
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||||
|
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
|
@ -199,13 +253,6 @@ static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||||
|
@ -213,13 +260,6 @@ static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
||||||
|
@ -227,13 +267,6 @@ static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
|
||||||
u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
||||||
|
@ -241,13 +274,6 @@ static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
|
||||||
u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||||
|
@ -255,16 +281,6 @@ static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define getPixelAddress24Z getPixelAddress32Z
|
|
||||||
#define getPixelAddress24Z_0 getPixelAddress32Z_0
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||||
|
@ -272,13 +288,6 @@ static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||||
|
@ -286,15 +295,7 @@ static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
|
///////////////
|
||||||
{
|
|
||||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
|
||||||
u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
//#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
|
|
||||||
//#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
|
|
||||||
|
|
||||||
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||||
{
|
{
|
||||||
|
@ -375,7 +376,6 @@ static __forceinline void writePixel16SZ(void* pmem, int x, int y, u32 pixel, u3
|
||||||
((u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)] = pixel;
|
((u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)] = pixel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////
|
///////////////
|
||||||
|
|
||||||
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw)
|
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||||
|
@ -457,161 +457,48 @@ static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp, u
|
||||||
// Functions that take 0 bps //
|
// Functions that take 0 bps //
|
||||||
///////////////////////////////
|
///////////////////////////////
|
||||||
|
|
||||||
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) { return getPixelAddress32(x, y, 0, bw); }
|
||||||
{
|
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw) { return getPixelAddress16(x, y, 0, bw); }
|
||||||
((u32*)pmem)[getPixelAddress32_0(x, y, bw)] = pixel;
|
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw) { return getPixelAddress16S(x, y, 0, bw); }
|
||||||
}
|
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw) { return getPixelAddress8(x, y, 0, bw); }
|
||||||
|
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw) { return getPixelAddress4(x, y, 0, bw); }
|
||||||
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw) { return getPixelAddress32Z(x, y, 0, bw); }
|
||||||
{
|
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw) { return getPixelAddress16Z(x, y, 0, bw); }
|
||||||
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) { return getPixelAddress16SZ(x, y, 0, bw); }
|
||||||
u8 *pix = (u8*) & pixel;
|
|
||||||
buf[0] = pix[0];
|
|
||||||
buf[1] = pix[1];
|
|
||||||
buf[2] = pix[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u16*)pmem)[getPixelAddress16_0(x, y, bw)] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u16*)pmem)[getPixelAddress16S_0(x, y, bw)] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u8*)pmem)[getPixelAddress8_0(x, y, bw)] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u8*)pmem)[4*getPixelAddress32_0(x, y, bw)+3] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
|
||||||
u8 pix = ((u8*)pmem)[addr/2];
|
|
||||||
|
|
||||||
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
|
||||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
|
|
||||||
*p = (*p & 0xf0) | pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
|
|
||||||
*p = (*p & 0x0f) | (pixel << 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
|
|
||||||
u8 *pix = (u8*) & pixel;
|
|
||||||
buf[0] = pix[0];
|
|
||||||
buf[1] = pix[1];
|
|
||||||
buf[2] = pix[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u16*)pmem)[getPixelAddress16Z_0(x, y, bw)] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
|
||||||
{
|
|
||||||
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////
|
///////////////
|
||||||
|
|
||||||
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)
|
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel32(pmem, x, y, pixel, 0, bw); }
|
||||||
{
|
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel24(pmem, x, y, pixel, 0, bw); }
|
||||||
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16(pmem, x, y, pixel, 0, bw); }
|
||||||
}
|
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16S(pmem, x, y, pixel, 0, bw); }
|
||||||
|
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel8(pmem, x, y, pixel, 0, bw); }
|
||||||
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw)
|
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel8H(pmem, x, y, pixel, 0, bw); }
|
||||||
{
|
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel4(pmem, x, y, pixel, 0, bw); }
|
||||||
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)] & 0xffffff;
|
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel4HL(pmem, x, y, pixel, 0, bw); }
|
||||||
}
|
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel4HH(pmem, x, y, pixel, 0, bw); }
|
||||||
|
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel32Z(pmem, x, y, pixel, 0, bw); }
|
||||||
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw)
|
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel24Z(pmem, x, y, pixel, 0, bw); }
|
||||||
{
|
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16Z(pmem, x, y, pixel, 0, bw); }
|
||||||
return ((const u16*)pmem)[getPixelAddress16_0(x, y, bw)];
|
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16SZ(pmem, x, y, pixel, 0, bw); }
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
return ((const u16*)pmem)[getPixelAddress16S_0(x, y, bw)];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
return ((const u8*)pmem)[getPixelAddress8_0(x, y, bw)];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
return ((const u8*)pmem)[4*getPixelAddress32_0(x, y, bw) + 3];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
|
||||||
u8 pix = ((const u8*)pmem)[addr/2];
|
|
||||||
|
|
||||||
if (addr & 0x1)
|
|
||||||
return pix >> 4;
|
|
||||||
else
|
|
||||||
return pix & 0xf;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
|
|
||||||
return *p & 0x0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
|
|
||||||
return *p >> 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////
|
///////////////
|
||||||
|
|
||||||
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw)
|
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) { return readPixel32(pmem, x, y, 0, bw); }
|
||||||
{
|
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw) { return readPixel24(pmem, x, y, 0, bw); }
|
||||||
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)];
|
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw) { return readPixel16(pmem, x, y, 0, bw); }
|
||||||
}
|
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw) { return readPixel16S(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw) { return readPixel8(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw) { return readPixel8H(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw) { return readPixel4(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw) { return readPixel4HL(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw) { return readPixel4HH(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw) { return readPixel32Z(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw) { return readPixel24Z(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw) { return readPixel16Z(pmem, x, y, 0, bw); }
|
||||||
|
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) { return readPixel16SZ(pmem, x, y, 0, bw); }
|
||||||
|
|
||||||
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw)
|
///////////////
|
||||||
{
|
|
||||||
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] & 0xffffff;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
return ((const u16*)pmem)[getPixelAddress16Z_0(x, y, bw)];
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw)
|
|
||||||
{
|
|
||||||
return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)];
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int TransferHostLocal32(const void* pbyMem, u32 nQWordSize);
|
extern int TransferHostLocal32(const void* pbyMem, u32 nQWordSize);
|
||||||
extern int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize);
|
extern int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize);
|
||||||
|
|
|
@ -638,7 +638,7 @@ void __gifCall GIFRegHandlerSCISSOR(const u32* data)
|
||||||
Flush();
|
Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
m_env.CTXT[i].SCISSOR = (Vector4i)r->SCISSOR;
|
m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR;
|
||||||
|
|
||||||
m_env.CTXT[i].UpdateScissor();*/
|
m_env.CTXT[i].UpdateScissor();*/
|
||||||
ZZLog::Greg_Log("SCISSOR%d", i);
|
ZZLog::Greg_Log("SCISSOR%d", i);
|
||||||
|
|
|
@ -56,6 +56,7 @@ extern "C" char* CALLBACK PS2EgetLibName(void);
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file path
|
extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file path
|
||||||
|
|
||||||
|
@ -87,6 +88,9 @@ static __forceinline void pcsx2_aligned_free(void* pmem)
|
||||||
#define _aligned_malloc pcsx2_aligned_malloc
|
#define _aligned_malloc pcsx2_aligned_malloc
|
||||||
#define _aligned_free pcsx2_aligned_free
|
#define _aligned_free pcsx2_aligned_free
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __LINUX__
|
||||||
#include <sys/timeb.h> // ftime(), struct timeb
|
#include <sys/timeb.h> // ftime(), struct timeb
|
||||||
|
|
||||||
inline unsigned long timeGetTime()
|
inline unsigned long timeGetTime()
|
||||||
|
@ -97,6 +101,15 @@ inline unsigned long timeGetTime()
|
||||||
return (unsigned long)(t.time*1000 + t.millitm);
|
return (unsigned long)(t.time*1000 + t.millitm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
inline unsigned long timeGetPreciseTime()
|
||||||
|
{
|
||||||
|
timespec t;
|
||||||
|
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
|
||||||
|
|
||||||
|
return t.tv_nsec;
|
||||||
|
}
|
||||||
|
|
||||||
struct RECT
|
struct RECT
|
||||||
{
|
{
|
||||||
int left, top;
|
int left, top;
|
||||||
|
@ -138,6 +151,7 @@ enum GSWindowDim
|
||||||
GSDim_1024,
|
GSDim_1024,
|
||||||
GSDim_1280,
|
GSDim_1280,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef union
|
typedef union
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
|
@ -217,7 +231,7 @@ typedef struct
|
||||||
gameHacks settings()
|
gameHacks settings()
|
||||||
{
|
{
|
||||||
gameHacks tempHack;
|
gameHacks tempHack;
|
||||||
tempHack._u32 = (hacks._u32 | def_hacks._u32 | GAME_PATH3HACK);
|
tempHack._u32 = (hacks._u32 | def_hacks._u32);
|
||||||
return tempHack;
|
return tempHack;
|
||||||
}
|
}
|
||||||
bool fullscreen() { return !!(zz_options.fullscreen); }
|
bool fullscreen() { return !!(zz_options.fullscreen); }
|
||||||
|
|
|
@ -25,6 +25,8 @@ void SaveConfig()
|
||||||
WritePrivateProfileString("Settings", "Width", szValue, iniFile.c_str());
|
WritePrivateProfileString("Settings", "Width", szValue, iniFile.c_str());
|
||||||
sprintf(szValue, "%u", conf.height);
|
sprintf(szValue, "%u", conf.height);
|
||||||
WritePrivateProfileString("Settings", "Height", szValue, iniFile.c_str());
|
WritePrivateProfileString("Settings", "Height", szValue, iniFile.c_str());
|
||||||
|
sprintf(szValue, "%u", conf.SkipDraw);
|
||||||
|
WritePrivateProfileString("Settings", "SkipDraw", szValue, iniFile.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoadConfig()
|
void LoadConfig()
|
||||||
|
@ -40,6 +42,7 @@ void LoadConfig()
|
||||||
conf.bilinear = 1;
|
conf.bilinear = 1;
|
||||||
conf.width = 640;
|
conf.width = 640;
|
||||||
conf.height = 480;
|
conf.height = 480;
|
||||||
|
conf.SkipDraw = 0;
|
||||||
|
|
||||||
FILE *fp = fopen(iniFile.c_str(), "rt");
|
FILE *fp = fopen(iniFile.c_str(), "rt");
|
||||||
|
|
||||||
|
@ -67,6 +70,8 @@ void LoadConfig()
|
||||||
conf.width = strtoul(szValue, NULL, 10);
|
conf.width = strtoul(szValue, NULL, 10);
|
||||||
GetPrivateProfileString("Settings", "Height", NULL, szValue, 20, iniFile.c_str());
|
GetPrivateProfileString("Settings", "Height", NULL, szValue, 20, iniFile.c_str());
|
||||||
conf.height = strtoul(szValue, NULL, 10);
|
conf.height = strtoul(szValue, NULL, 10);
|
||||||
|
GetPrivateProfileString("Settings", "SkipDraw", NULL, szValue, 20, iniFile.c_str());
|
||||||
|
conf.SkipDraw = strtoul(szValue, NULL, 10);
|
||||||
|
|
||||||
if (conf.aa < 0 || conf.aa > 4) conf.aa = 0;
|
if (conf.aa < 0 || conf.aa > 4) conf.aa = 0;
|
||||||
|
|
||||||
|
|
|
@ -116,7 +116,7 @@ typedef struct GameHackStruct
|
||||||
u32 HackMask;
|
u32 HackMask;
|
||||||
} GameHack;
|
} GameHack;
|
||||||
|
|
||||||
#define HACK_NUMBER 30
|
#define HACK_NUMBER 25
|
||||||
|
|
||||||
GameHack HackinshTable[HACK_NUMBER] =
|
GameHack HackinshTable[HACK_NUMBER] =
|
||||||
{
|
{
|
||||||
|
@ -127,30 +127,31 @@ GameHack HackinshTable[HACK_NUMBER] =
|
||||||
{"*** 4 TexA hack", GAME_TEXAHACK},
|
{"*** 4 TexA hack", GAME_TEXAHACK},
|
||||||
{"*** 5 No Target Resolve", GAME_NOTARGETRESOLVE},
|
{"*** 5 No Target Resolve", GAME_NOTARGETRESOLVE},
|
||||||
{"*** 6 Exact color", GAME_EXACTCOLOR},
|
{"*** 6 Exact color", GAME_EXACTCOLOR},
|
||||||
{"*** 7 No color clamp", GAME_NOCOLORCLAMP},
|
//{"***xx No color clamp", GAME_NOCOLORCLAMP},
|
||||||
{"*** 8 FFX hack", GAME_FFXHACK},
|
//{"***xx FFX hack", GAME_FFXHACK},
|
||||||
{"*** 9 No Alpha Fail", GAME_NOALPHAFAIL},
|
{"*** 7 No Alpha Fail", GAME_NOALPHAFAIL},
|
||||||
{"***10 No Depth Update", GAME_NODEPTHUPDATE},
|
{"*** 8 No Depth Update", GAME_NODEPTHUPDATE},
|
||||||
{"***11 Quick Resolve 1", GAME_QUICKRESOLVE1},
|
{"*** 9 Quick Resolve 1", GAME_QUICKRESOLVE1},
|
||||||
{"***12 No quick resolve", GAME_NOQUICKRESOLVE},
|
{"***10 No quick resolve", GAME_NOQUICKRESOLVE},
|
||||||
{"***13 Notaget clut", GAME_NOTARGETCLUT},
|
{"***11 Notaget clut", GAME_NOTARGETCLUT},
|
||||||
{"***14 No Stencil", GAME_NOSTENCIL},
|
{"***12 No Stencil", GAME_NOSTENCIL},
|
||||||
{"***15 No Depth resolve", GAME_NODEPTHRESOLVE},
|
{"***13 No Depth resolve", GAME_NODEPTHRESOLVE},
|
||||||
{"***16 Full 16 bit", GAME_FULL16BITRES},
|
{"***14 Full 16 bit", GAME_FULL16BITRES},
|
||||||
{"***17 Resolve promoted", GAME_RESOLVEPROMOTED},
|
{"***15 Resolve promoted", GAME_RESOLVEPROMOTED},
|
||||||
{"***18 Fast Update", GAME_FASTUPDATE},
|
{"***16 Fast Update", GAME_FASTUPDATE},
|
||||||
{"***19 No Alpha Test", GAME_NOALPHATEST},
|
{"***17 No Alpha Test", GAME_NOALPHATEST},
|
||||||
{"***20 Disable MRT deprh", GAME_DISABLEMRTDEPTH},
|
{"***18 Disable MRT depth", GAME_DISABLEMRTDEPTH},
|
||||||
{"***21 32 bit targes", GAME_32BITTARGS},
|
//{"***xx 32 bit targs", GAME_32BITTARGS},
|
||||||
{"***22 path 3 hack", GAME_PATH3HACK},
|
//{"***xx Path 3 hack", GAME_PATH3HACK},
|
||||||
{"***23 parallelise calls", GAME_DOPARALLELCTX},
|
//{"***xx Parallel calls", GAME_DOPARALLELCTX},
|
||||||
{"***24 specular highligths", GAME_XENOSPECHACK},
|
{"***19 Specular highlights", GAME_XENOSPECHACK},
|
||||||
{"***25 partial pointers", GAME_PARTIALPOINTERS},
|
//{"***xx Partial pointers", GAME_PARTIALPOINTERS},
|
||||||
{"***26 partial depth", GAME_PARTIALDEPTH},
|
{"***20 Partial depth", GAME_PARTIALDEPTH},
|
||||||
{"***27 reget hack", GAME_REGETHACK},
|
{"***21 Reget hack", GAME_REGETHACK},
|
||||||
|
|
||||||
{"***28 gust hack", GAME_GUSTHACK},
|
{"***22 Gust hack", GAME_GUSTHACK},
|
||||||
{"***29 log-Z", GAME_NOLOGZ}
|
{"***23 Log-Z", GAME_NOLOGZ},
|
||||||
|
{"***24 Auto skipdraw", GAME_AUTOSKIPDRAW}
|
||||||
};
|
};
|
||||||
|
|
||||||
int CurrentHackSetting = 0;
|
int CurrentHackSetting = 0;
|
||||||
|
@ -172,7 +173,7 @@ void ProcessHackSetting(bool reverse)
|
||||||
{
|
{
|
||||||
CurrentHackSetting++;
|
CurrentHackSetting++;
|
||||||
|
|
||||||
if (CurrentHackSetting == HACK_NUMBER) CurrentHackSetting = 0;
|
if (CurrentHackSetting >= HACK_NUMBER) CurrentHackSetting = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
conf.hacks._u32 |= HackinshTable[CurrentHackSetting].HackMask;
|
conf.hacks._u32 |= HackinshTable[CurrentHackSetting].HackMask;
|
||||||
|
|
|
@ -244,6 +244,27 @@ void Warn_Log(const char *fmt, ...)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Dev_Log(const char *fmt, ...)
|
||||||
|
{
|
||||||
|
#ifdef ZEROGS_DEVBUILD
|
||||||
|
va_list list;
|
||||||
|
|
||||||
|
va_start(list, fmt);
|
||||||
|
|
||||||
|
if (IsLogging())
|
||||||
|
{
|
||||||
|
vfprintf(gsLog, fmt, list);
|
||||||
|
fprintf(gsLog, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "ZZogl-PG: ");
|
||||||
|
vfprintf(stderr, fmt, list);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
|
||||||
|
va_end(list);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void Debug_Log(const char *fmt, ...)
|
void Debug_Log(const char *fmt, ...)
|
||||||
{
|
{
|
||||||
#if _DEBUG
|
#if _DEBUG
|
||||||
|
|
|
@ -185,6 +185,7 @@ extern void Prim_Log(const char *fmt, ...);
|
||||||
extern void GS_Log(const char *fmt, ...);
|
extern void GS_Log(const char *fmt, ...);
|
||||||
|
|
||||||
extern void Debug_Log(const char *fmt, ...);
|
extern void Debug_Log(const char *fmt, ...);
|
||||||
|
extern void Dev_Log(const char *fmt, ...);
|
||||||
extern void Warn_Log(const char *fmt, ...);
|
extern void Warn_Log(const char *fmt, ...);
|
||||||
extern void Error_Log(const char *fmt, ...);
|
extern void Error_Log(const char *fmt, ...);
|
||||||
};
|
};
|
||||||
|
|
|
@ -54,6 +54,7 @@ void ZeroGS::AdjustTransToAspect(float4& v)
|
||||||
{
|
{
|
||||||
double temp;
|
double temp;
|
||||||
float f;
|
float f;
|
||||||
|
const float mult = 1 / 32767.0f;
|
||||||
|
|
||||||
if (conf.width * nBackbufferHeight > conf.height * nBackbufferWidth) // limited by width
|
if (conf.width * nBackbufferHeight > conf.height * nBackbufferWidth) // limited by width
|
||||||
{
|
{
|
||||||
|
@ -74,7 +75,7 @@ void ZeroGS::AdjustTransToAspect(float4& v)
|
||||||
v.z *= f;
|
v.z *= f;
|
||||||
}
|
}
|
||||||
|
|
||||||
v *= 1 / 32767.0f;
|
v *= mult;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool FrameSkippingHelper()
|
inline bool FrameSkippingHelper()
|
||||||
|
|
|
@ -89,7 +89,7 @@ inline u32 CreateInterlaceTex(int width)
|
||||||
|
|
||||||
glGenTextures(1, &s_ptexInterlace);
|
glGenTextures(1, &s_ptexInterlace);
|
||||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, s_ptexInterlace);
|
glBindTexture(GL_TEXTURE_RECTANGLE_NV, s_ptexInterlace);
|
||||||
TextureRect(4, width, 1, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
|
TextureRect(GL_RGBA, width, 1, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
|
||||||
setRectFilters(GL_NEAREST);
|
setRectFilters(GL_NEAREST);
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
|
|
||||||
|
|
|
@ -75,8 +75,6 @@ map<string, GLbyte> mapGLExtensions;
|
||||||
|
|
||||||
namespace ZeroGS
|
namespace ZeroGS
|
||||||
{
|
{
|
||||||
RenderFormatType g_RenderFormatType = RFT_float16;
|
|
||||||
|
|
||||||
extern void KickPoint();
|
extern void KickPoint();
|
||||||
extern void KickLine();
|
extern void KickLine();
|
||||||
extern void KickTriangle();
|
extern void KickTriangle();
|
||||||
|
@ -84,8 +82,8 @@ extern void KickTriangleFan();
|
||||||
extern void KickSprite();
|
extern void KickSprite();
|
||||||
extern void KickDummy();
|
extern void KickDummy();
|
||||||
extern bool LoadEffects();
|
extern bool LoadEffects();
|
||||||
extern bool LoadExtraEffects();
|
extern bool ZZshLoadExtraEffects();
|
||||||
extern FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
|
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
|
||||||
|
|
||||||
GLuint vboRect = 0;
|
GLuint vboRect = 0;
|
||||||
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
||||||
|
@ -270,19 +268,6 @@ inline void ZeroGS::CreateOtherCheck()
|
||||||
if (Max_Texture_Size_NV < 1024)
|
if (Max_Texture_Size_NV < 1024)
|
||||||
ZZLog::Error_Log("Could not properly make bitmasks, so some textures will be missed.");
|
ZZLog::Error_Log("Could not properly make bitmasks, so some textures will be missed.");
|
||||||
|
|
||||||
/* Zeydlitz: we don't support 128-bit targets yet. they are slow and weirdo
|
|
||||||
if( conf.settings() & GAME_32BITTARGS ) {
|
|
||||||
g_RenderFormatType = RFT_byte8;
|
|
||||||
ZZLog::Error_Log("Setting 32 bit render target.");
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if( !IsGLExt("GL_NV_float_buffer") && !IsGLExt("GL_ARB_color_buffer_float") && !IsGLExt("ATI_pixel_format_float") ) {
|
|
||||||
ZZLog::Error_Log("******\nZZogl: GS WARNING: Floating point render targets not supported, switching to 32bit\nZZogl: *********");
|
|
||||||
g_RenderFormatType = RFT_byte8;
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
g_RenderFormatType = RFT_byte8;
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
if (IsGLExt("WGL_EXT_swap_control") || IsGLExt("EXT_swap_control"))
|
if (IsGLExt("WGL_EXT_swap_control") || IsGLExt("EXT_swap_control"))
|
||||||
wglSwapIntervalEXT(0);
|
wglSwapIntervalEXT(0);
|
||||||
|
@ -469,8 +454,6 @@ bool ZeroGS::Create(int _width, int _height)
|
||||||
Destroy(1);
|
Destroy(1);
|
||||||
GSStateReset();
|
GSStateReset();
|
||||||
|
|
||||||
g_RenderFormatType = RFT_float16;
|
|
||||||
|
|
||||||
if (!Create_Window(_width, _height)) return false;
|
if (!Create_Window(_width, _height)) return false;
|
||||||
if (!CreateFillExtensionsMap()) return false;
|
if (!CreateFillExtensionsMap()) return false;
|
||||||
if (!CreateImportantCheck()) return false;
|
if (!CreateImportantCheck()) return false;
|
||||||
|
@ -574,7 +557,7 @@ bool ZeroGS::Create(int _width, int _height)
|
||||||
PBITMAPINFO pinfo = (PBITMAPINFO)LockResource(hBitmapGlob);
|
PBITMAPINFO pinfo = (PBITMAPINFO)LockResource(hBitmapGlob);
|
||||||
|
|
||||||
GLenum tempFmt = (pinfo->bmiHeader.biBitCount == 32) ? GL_RGBA : GL_RGB;
|
GLenum tempFmt = (pinfo->bmiHeader.biBitCount == 32) ? GL_RGBA : GL_RGB;
|
||||||
TextureRect(4, pinfo->bmiHeader.biWidth, pinfo->bmiHeader.biHeight, tempFmt, GL_UNSIGNED_BYTE, (u8*)pinfo + pinfo->bmiHeader.biSize);
|
TextureRect(GL_RGBA, pinfo->bmiHeader.biWidth, pinfo->bmiHeader.biHeight, tempFmt, GL_UNSIGNED_BYTE, (u8*)pinfo + pinfo->bmiHeader.biSize);
|
||||||
|
|
||||||
nLogoWidth = pinfo->bmiHeader.biWidth;
|
nLogoWidth = pinfo->bmiHeader.biWidth;
|
||||||
nLogoHeight = pinfo->bmiHeader.biHeight;
|
nLogoHeight = pinfo->bmiHeader.biHeight;
|
||||||
|
|
|
@ -207,8 +207,6 @@ int icurctx = -1;
|
||||||
extern CRangeManager s_RangeMngr; // manages overwritten memory // zz
|
extern CRangeManager s_RangeMngr; // manages overwritten memory // zz
|
||||||
void FlushTransferRanges(const tex0Info* ptex); //zz
|
void FlushTransferRanges(const tex0Info* ptex); //zz
|
||||||
|
|
||||||
RenderFormatType GetRenderFormat() { return g_RenderFormatType; } //zz
|
|
||||||
|
|
||||||
// use to update the state
|
// use to update the state
|
||||||
void SetTexVariables(int context, FRAGMENTSHADER* pfragment); // zz
|
void SetTexVariables(int context, FRAGMENTSHADER* pfragment); // zz
|
||||||
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint); // zz
|
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint); // zz
|
||||||
|
@ -859,7 +857,7 @@ inline float4 FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRen
|
||||||
// zoe2
|
// zoe2
|
||||||
if (PSMT_ISZTEX(ptextarg->psm)) vpageoffset.w = -1.0f;
|
if (PSMT_ISZTEX(ptextarg->psm)) vpageoffset.w = -1.0f;
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
|
ZZshSetParameter4fv(pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
|
||||||
|
|
||||||
return vpageoffset;
|
return vpageoffset;
|
||||||
}
|
}
|
||||||
|
@ -877,7 +875,7 @@ inline float4 FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
|
||||||
v.y = 16.0f / (float)curvb.tex0.th;
|
v.y = 16.0f / (float)curvb.tex0.th;
|
||||||
v.z = 0.5f * v.x;
|
v.z = 0.5f * v.x;
|
||||||
v.w = 0.5f * v.y;
|
v.w = 0.5f * v.y;
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
|
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
|
||||||
}
|
}
|
||||||
else if (shadertype == 4)
|
else if (shadertype == 4)
|
||||||
{
|
{
|
||||||
|
@ -886,7 +884,7 @@ inline float4 FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
|
||||||
v.y = 16.0f / (float)ptextarg->fbh;
|
v.y = 16.0f / (float)ptextarg->fbh;
|
||||||
v.z = -1;
|
v.z = -1;
|
||||||
v.w = 8.0f / (float)ptextarg->fbh;
|
v.w = 8.0f / (float)ptextarg->fbh;
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
|
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
|
||||||
}
|
}
|
||||||
|
|
||||||
return v;
|
return v;
|
||||||
|
@ -920,7 +918,7 @@ inline float4 FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& cu
|
||||||
if (shadertype == 4)
|
if (shadertype == 4)
|
||||||
vTexDims.z += 8.0f;
|
vTexDims.z += 8.0f;
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexDims, vTexDims, "g_fTexDims");
|
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
|
||||||
|
|
||||||
return vTexDims;
|
return vTexDims;
|
||||||
}
|
}
|
||||||
|
@ -970,7 +968,7 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext
|
||||||
float4 vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
|
float4 vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
|
||||||
|
|
||||||
if (pfragment->sCLUT != NULL && ptexclut != 0)
|
if (pfragment->sCLUT != NULL && ptexclut != 0)
|
||||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sCLUT, ptexclut, "CLUT");
|
ZZshGLSetTextureParameter(pfragment->sCLUT, ptexclut, "CLUT");
|
||||||
|
|
||||||
FlushApplyResizeFilter(curvb, dwFilterOpts, ptextarg, context);
|
FlushApplyResizeFilter(curvb, dwFilterOpts, ptextarg, context);
|
||||||
|
|
||||||
|
@ -1016,13 +1014,13 @@ inline void FlushSetTexture(VB& curvb, FRAGMENTSHADER* pfragment, CRenderTarget*
|
||||||
|
|
||||||
// have to enable the texture parameters(curtest.atst)
|
// have to enable the texture parameters(curtest.atst)
|
||||||
if( curvb.ptexClamp[0] != 0 )
|
if( curvb.ptexClamp[0] != 0 )
|
||||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0");
|
ZZshGLSetTextureParameter(pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0");
|
||||||
|
|
||||||
if( curvb.ptexClamp[1] != 0 )
|
if( curvb.ptexClamp[1] != 0 )
|
||||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1");
|
ZZshGLSetTextureParameter(pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1");
|
||||||
|
|
||||||
if( pfragment->sMemory != NULL && s_ptexCurSet[context] != 0)
|
if( pfragment->sMemory != NULL && s_ptexCurSet[context] != 0)
|
||||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
|
ZZshGLSetTextureParameter(pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1170,13 +1168,13 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf
|
||||||
v.w *= 255;
|
v.w *= 255;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// not using blending so set to defaults
|
// not using blending so set to defaults
|
||||||
float4 v = exactcolor ? float4(1, 510 * 255.0f / 256.0f, 0, 0) : float4(1, 2 * 255.0f / 256.0f, 0, 0);
|
float4 v = exactcolor ? float4(1, 510 * 255.0f / 256.0f, 0, 0) : float4(1, 2 * 255.0f / 256.0f, 0, 0);
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1267,7 +1265,7 @@ inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor)
|
||||||
|
|
||||||
if (exactcolor) v.y *= 255;
|
if (exactcolor) v.y *= 255;
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||||
|
|
||||||
Draw(curvb);
|
Draw(curvb);
|
||||||
|
|
||||||
|
@ -1336,7 +1334,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
|
||||||
|
|
||||||
if (exactcolor) { v.y *= 255; v.w *= 255; }
|
if (exactcolor) { v.y *= 255; v.w *= 255; }
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||||
|
|
||||||
glEnable(GL_BLEND);
|
glEnable(GL_BLEND);
|
||||||
GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA);
|
GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA);
|
||||||
|
@ -1360,7 +1358,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
|
||||||
|
|
||||||
if (exactcolor) v.y *= 255;
|
if (exactcolor) v.y *= 255;
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||||
|
|
||||||
Draw(curvb);
|
Draw(curvb);
|
||||||
|
|
||||||
|
@ -1412,7 +1410,7 @@ inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsin
|
||||||
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
|
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
|
||||||
|
|
||||||
float4 v = float4(0, exactcolor ? 510.0f : 2.0f, 0, 0);
|
float4 v = float4(0, exactcolor ? 510.0f : 2.0f, 0, 0);
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||||
Draw(curvb);
|
Draw(curvb);
|
||||||
|
|
||||||
// don't need to restore
|
// don't need to restore
|
||||||
|
@ -1468,66 +1466,6 @@ inline void AlphaSaveTarget(VB& curvb)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void AlphaColorClamping(VB& curvb, const pixTest curtest)
|
|
||||||
{
|
|
||||||
// clamp the final colors, when enabled ffx2 credits mess up
|
|
||||||
//if (gs.colclamp) ZZLog::Error_Log("ColClamp!");
|
|
||||||
if ((curvb.curprim.abe && bAlphaClamping) && (GetRenderFormat() != RFT_byte8) && !(conf.settings().no_color_clamp)) // if !colclamp, skip
|
|
||||||
{
|
|
||||||
//ZZLog::Error_Log("Clamped.");
|
|
||||||
ResetAlphaVariables();
|
|
||||||
|
|
||||||
// if processing the clamping case, make sure can write to the front buffer
|
|
||||||
glDisable(GL_STENCIL_TEST);
|
|
||||||
glEnable(GL_BLEND);
|
|
||||||
glDisable(GL_ALPHA_TEST);
|
|
||||||
glDisable(GL_DEPTH_TEST);
|
|
||||||
glDepthMask(0);
|
|
||||||
glColorMask(1, 1, 1, 0);
|
|
||||||
|
|
||||||
if (s_bWriteDepth) ResetRenderTarget(1);
|
|
||||||
|
|
||||||
SetShaderCaller("AlphaColorClamping");
|
|
||||||
|
|
||||||
ZZshSetPixelShader(ppsOne.prog);
|
|
||||||
GL_BLEND_RGB(GL_ONE, GL_ONE);
|
|
||||||
|
|
||||||
float f;
|
|
||||||
|
|
||||||
if (bAlphaClamping & 1) // min
|
|
||||||
{
|
|
||||||
f = 0;
|
|
||||||
ZZshSetParameter4fv(ppsOne.prog, ppsOne.sOneColor, &f, "g_fOneColor");
|
|
||||||
GL_BLENDEQ_RGB(GL_MAX_EXT);
|
|
||||||
Draw(curvb);
|
|
||||||
}
|
|
||||||
|
|
||||||
// bios shows white screen
|
|
||||||
if (bAlphaClamping & 2) // max
|
|
||||||
{
|
|
||||||
f = 1;
|
|
||||||
ZZshSetParameter4fv(ppsOne.prog, ppsOne.sOneColor, &f, "g_fOneColor");
|
|
||||||
GL_BLENDEQ_RGB(GL_MIN_EXT);
|
|
||||||
Draw(curvb);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!curvb.zbuf.zmsk)
|
|
||||||
{
|
|
||||||
glDepthMask(1);
|
|
||||||
|
|
||||||
if (s_bWriteDepth)
|
|
||||||
{
|
|
||||||
assert(curvb.pdepth != NULL);
|
|
||||||
curvb.pdepth->SetRenderTarget(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (curvb.test.ate && USEALPHATESTING) glEnable(GL_ALPHA_TEST);
|
|
||||||
|
|
||||||
GL_ZTEST(curtest.zte);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void FlushUndoFiter(u32 dwFilterOpts)
|
inline void FlushUndoFiter(u32 dwFilterOpts)
|
||||||
{
|
{
|
||||||
if (dwFilterOpts)
|
if (dwFilterOpts)
|
||||||
|
@ -1585,7 +1523,6 @@ void ZeroGS::Flush(int context)
|
||||||
|
|
||||||
GL_REPORT_ERRORD();
|
GL_REPORT_ERRORD();
|
||||||
|
|
||||||
AlphaColorClamping(curvb, curtest);
|
|
||||||
FlushUndoFiter(dwFilterOpts);
|
FlushUndoFiter(dwFilterOpts);
|
||||||
|
|
||||||
ppf += curvb.nCount + 0x100000;
|
ppf += curvb.nCount + 0x100000;
|
||||||
|
@ -1988,7 +1925,7 @@ void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
|
||||||
}
|
}
|
||||||
|
|
||||||
// clamp relies on texture width
|
// clamp relies on texture width
|
||||||
inline void SetTexClamping(int context, FRAGMENTSHADER* pfragment )
|
void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
|
||||||
{
|
{
|
||||||
FUNCLOG
|
FUNCLOG
|
||||||
SetShaderCaller("SetTexClamping");
|
SetShaderCaller("SetTexClamping");
|
||||||
|
@ -2004,14 +1941,19 @@ inline void SetTexClamping(int context, FRAGMENTSHADER* pfragment )
|
||||||
switch (pclamp->wms)
|
switch (pclamp->wms)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
v2.x = -1e10; v2.z = 1e10;
|
v2.x = -1e10;
|
||||||
|
v2.z = 1e10;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1: // pclamp
|
case 1: // pclamp
|
||||||
// suikoden5 movie text
|
// suikoden5 movie text
|
||||||
v2.x = 0; v2.z = 1-0.5f/fw;
|
v2.x = 0;
|
||||||
|
v2.z = 1 - 0.5f / fw;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2: // reg pclamp
|
case 2: // reg pclamp
|
||||||
v2.x = (pclamp->minu+0.5f)/fw; v2.z = (pclamp->maxu-0.5f)/fw;
|
v2.x = (pclamp->minu + 0.5f) / fw;
|
||||||
|
v2.z = (pclamp->maxu - 0.5f) / fw;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3: // region rep x
|
case 3: // region rep x
|
||||||
|
@ -2026,20 +1968,27 @@ inline void SetTexClamping(int context, FRAGMENTSHADER* pfragment )
|
||||||
g_PrevBitwiseTexX = correctMinu;
|
g_PrevBitwiseTexX = correctMinu;
|
||||||
ptex[0] = ZeroGS::s_BitwiseTextures.GetTex(correctMinu, 0);
|
ptex[0] = ZeroGS::s_BitwiseTextures.GetTex(correctMinu, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (pclamp->wmt)
|
switch (pclamp->wmt)
|
||||||
{
|
{
|
||||||
|
|
||||||
case 0:
|
case 0:
|
||||||
v2.y = -1e10; v2.w = 1e10;
|
v2.y = -1e10;
|
||||||
|
v2.w = 1e10;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1: // pclamp
|
case 1: // pclamp
|
||||||
// suikoden5 movie text
|
// suikoden5 movie text
|
||||||
v2.y = 0; v2.w = 1-0.5f/fh;
|
v2.y = 0;
|
||||||
|
v2.w = 1 - 0.5f / fh;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2: // reg pclamp
|
case 2: // reg pclamp
|
||||||
v2.y = (pclamp->minv+0.5f)/fh; v2.w = (pclamp->maxv-0.5f)/fh;
|
v2.y = (pclamp->minv + 0.5f) / fh;
|
||||||
|
v2.w = (pclamp->maxv - 0.5f) / fh;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3: // region rep y
|
case 3: // region rep y
|
||||||
|
@ -2049,17 +1998,21 @@ inline void SetTexClamping(int context, FRAGMENTSHADER* pfragment )
|
||||||
v2.w = pclamp->maxv / fh;
|
v2.w = pclamp->maxv / fh;
|
||||||
int correctMinv = pclamp->minv & (~pclamp->maxv); // (A && B) || C == (A && (B && !C)) + C
|
int correctMinv = pclamp->minv & (~pclamp->maxv); // (A && B) || C == (A && (B && !C)) + C
|
||||||
|
|
||||||
if (correctMinv != g_PrevBitwiseTexY) {
|
if (correctMinv != g_PrevBitwiseTexY)
|
||||||
|
{
|
||||||
g_PrevBitwiseTexY = correctMinv;
|
g_PrevBitwiseTexY = correctMinv;
|
||||||
ptex[1] = ZeroGS::s_BitwiseTextures.GetTex(correctMinv, ptex[0]);
|
ptex[1] = ZeroGS::s_BitwiseTextures.GetTex(correctMinv, ptex[0]);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ZZshActiveParameter(pfragment->fTexWrapMode))
|
if (pfragment->fTexWrapMode != 0)
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexWrapMode, v, "g_fTexWrapMode");
|
ZZshSetParameter4fv(pfragment->fTexWrapMode, v, "g_fTexWrapMode");
|
||||||
if (ZZshActiveParameter( pfragment->fClampExts))
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fClampExts, v2, "g_fClampExts");
|
if (pfragment->fClampExts != 0)
|
||||||
|
ZZshSetParameter4fv(pfragment->fClampExts, v2, "g_fClampExts");
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fixme should be in float4 lib
|
// Fixme should be in float4 lib
|
||||||
|
@ -2230,11 +2183,11 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
|
||||||
|
|
||||||
// Test;*/
|
// Test;*/
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexAlpha, valpha, "g_fTexAlpha");
|
ZZshSetParameter4fv(pfragment->fTexAlpha, valpha, "g_fTexAlpha");
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
|
ZZshSetParameter4fv(pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
|
||||||
|
|
||||||
if (IsAlphaTestExpansion(tex0))
|
if (IsAlphaTestExpansion(tex0))
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTestBlack, vblack, "g_fTestBlack");
|
ZZshSetParameter4fv(pfragment->fTestBlack, vblack, "g_fTestBlack");
|
||||||
|
|
||||||
SetTexClamping(context, pfragment);
|
SetTexClamping(context, pfragment);
|
||||||
|
|
||||||
|
@ -2280,7 +2233,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
|
||||||
v.w = 1.0f / (float)fh;
|
v.w = 1.0f / (float)fh;
|
||||||
|
|
||||||
if (pfragment->fRealTexDims)
|
if (pfragment->fRealTexDims)
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fRealTexDims, v, "g_fRealTexDims");
|
ZZshSetParameter4fv(pfragment->fRealTexDims, v, "g_fRealTexDims");
|
||||||
else
|
else
|
||||||
ZZshSetParameter4fv(cgGetNamedParameter(pfragment->prog,"g_fRealTexDims"),v, "g_fRealTexDims");
|
ZZshSetParameter4fv(cgGetNamedParameter(pfragment->prog,"g_fRealTexDims"),v, "g_fRealTexDims");
|
||||||
}
|
}
|
||||||
|
@ -2336,15 +2289,15 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
|
||||||
v.z *= b.bpp * (1 / 32.0f);
|
v.z *= b.bpp * (1 / 32.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexDims, vTexDims, "g_fTexDims");
|
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
|
||||||
|
|
||||||
// ZZshSetParameter4fv(pfragment->prog, pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok.
|
// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok.
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
|
ZZshSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
|
||||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
|
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
|
||||||
|
|
||||||
// get hardware texture dims
|
// get hardware texture dims
|
||||||
//int texheight = (pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult;
|
//int texheight = pmemtarg->texH;
|
||||||
int texwidth = GPU_TEXWIDTH * pmemtarg->widthmult * pmemtarg->channels;
|
int texwidth = pmemtarg->texW;
|
||||||
|
|
||||||
v.y = 1.0f;
|
v.y = 1.0f;
|
||||||
v.x = (fpageint - (float)pmemtarg->realy / (float)pmemtarg->widthmult + 0.5f);//*v.y;
|
v.x = (fpageint - (float)pmemtarg->realy / (float)pmemtarg->widthmult + 0.5f);//*v.y;
|
||||||
|
|
|
@ -1,83 +1,493 @@
|
||||||
/* ZeroGS KOSMOS
|
/* ZZ Open GL graphics plugin
|
||||||
|
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
|
||||||
|
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||||
*
|
*
|
||||||
* Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
*
|
*
|
||||||
* Zerofrog forgot to write any copyright notice after releasing the plugin into GPLv2
|
* This program is distributed in the hope that it will be useful,
|
||||||
* If someone can contact him successfully to clarify this matter that would be great.
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Now that it's down to 82 lines, and most of it's fairly obvious, perhaps it'd be easier to
|
|
||||||
// just reimplement it... -arcum42
|
|
||||||
|
|
||||||
#ifndef ZZOGLMATH_H_INCLUDED
|
#ifndef ZZOGLMATH_H_INCLUDED
|
||||||
#define ZZOGLMATH_H_INCLUDED
|
#define ZZOGLMATH_H_INCLUDED
|
||||||
|
|
||||||
|
//Remind me to check and see if this is necessary, and what uses it. --arcum42
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
#include <alloca.h>
|
#include <alloca.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
typedef float dReal;
|
//#define ZZ_MMATH
|
||||||
|
|
||||||
// class used for 3 and 4 dim vectors and quaternions
|
#ifndef ZZ_MMATH
|
||||||
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
|
|
||||||
|
|
||||||
class float4
|
template <class T>
|
||||||
|
class Vector4
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
dReal x, y, z, w;
|
T x, y, z, w;
|
||||||
|
|
||||||
float4() : x(0), y(0), z(0), w(0) {}
|
Vector4(T x1 = 0, T y1 = 0, T z1 = 0, T w1 = 0)
|
||||||
float4(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
|
{
|
||||||
float4(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
|
x = x1;
|
||||||
float4(const float4 &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
|
y = y1;
|
||||||
float4(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
|
z = z1;
|
||||||
dReal operator[](int i) const { return (&x)[i]; }
|
w = w1;
|
||||||
dReal& operator[](int i) { return (&x)[i]; }
|
}
|
||||||
|
|
||||||
// casting operators
|
Vector4(Vector4<T> &f)
|
||||||
operator dReal*() { return &x; }
|
{
|
||||||
operator const dReal*() const { return (const dReal*)&x; }
|
x = f.x;
|
||||||
|
y = f.y;
|
||||||
|
z = f.z;
|
||||||
|
w = f.w;
|
||||||
|
}
|
||||||
|
|
||||||
// SCALAR FUNCTIONS
|
Vector4(T* f)
|
||||||
inline dReal dot(const float4 &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
|
{
|
||||||
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
|
x = f[0];
|
||||||
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
|
y = f[1];
|
||||||
inline void SetColor(u32 color)
|
z = f[2];
|
||||||
|
w = f[3]; // For some reason, the old code set this to 0.
|
||||||
|
}
|
||||||
|
|
||||||
|
T& operator[](int i)
|
||||||
|
{
|
||||||
|
switch(i)
|
||||||
|
{
|
||||||
|
case 0: return x;
|
||||||
|
case 1: return y;
|
||||||
|
case 2: return z;
|
||||||
|
case 3: return w;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
operator T*()
|
||||||
|
{
|
||||||
|
return (T*) this;
|
||||||
|
}
|
||||||
|
|
||||||
|
operator const T*() const
|
||||||
|
{
|
||||||
|
return (const T*) this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator =(const Vector4<T>& v)
|
||||||
|
{
|
||||||
|
x = v.x;
|
||||||
|
y = v.y;
|
||||||
|
z = v.z;
|
||||||
|
w = v.w;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator ==(const Vector4<T>& v)
|
||||||
|
{
|
||||||
|
return !!( x == v.x &&
|
||||||
|
y == v.y &&
|
||||||
|
z == v.z &&
|
||||||
|
w == v.w );
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator +(const Vector4<T>& v) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x + v.x, y + v.y, z + v.z, w + v.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator -(const Vector4<T>& v) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x - v.x, y - v.y, z - v.z, w - v.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator *(const Vector4<T>& v) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x * v.x, y * v.y, z * v.z, w * v.w);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator /(const Vector4<T>& v) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x / v.x, y / v.y, z / v.z, w / v.w);
|
||||||
|
}
|
||||||
|
Vector4<T> operator +(T val) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x + val, y + val, z + val, w + val);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator -(T val) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x - val, y - val, z - val, w - val);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator *(T val) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x * val, y * val, z * val, w * val);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T> operator /(T val) const
|
||||||
|
{
|
||||||
|
return Vector4<T>(x / val, y / val, z / val, w / val);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator +=(const Vector4<T>& v)
|
||||||
|
{
|
||||||
|
*this = *this + v;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator -=(const Vector4<T>& v)
|
||||||
|
{
|
||||||
|
*this = *this - v;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator *=(const Vector4<T>& v)
|
||||||
|
{
|
||||||
|
*this = *this * v;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator /=(const Vector4<T>& v)
|
||||||
|
{
|
||||||
|
*this = *this - v;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator +=(T val)
|
||||||
|
{
|
||||||
|
*this = *this + (T)val;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator -=(T val)
|
||||||
|
{
|
||||||
|
*this = *this - (T)val;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator *=(T val)
|
||||||
|
{
|
||||||
|
*this = *this * (T)val;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector4<T>& operator /=(T val)
|
||||||
|
{
|
||||||
|
*this = *this / (T)val;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Probably doesn't belong here, but I'll leave it in for the moment.
|
||||||
|
void SetColor(u32 color)
|
||||||
{
|
{
|
||||||
x = (color & 0xff) / 255.0f;
|
x = (color & 0xff) / 255.0f;
|
||||||
y = ((color >> 8) & 0xff) / 255.0f;
|
y = ((color >> 8) & 0xff) / 255.0f;
|
||||||
z = ((color >> 16) & 0xff) / 255.0f;
|
z = ((color >> 16) & 0xff) / 255.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3 dim cross product, w is not touched
|
|
||||||
/// this = this x v
|
|
||||||
/// this = u x v
|
|
||||||
inline float4 operator-() const { float4 v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
|
|
||||||
inline float4 operator+(const float4 &r) const { float4 v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
|
|
||||||
inline float4 operator-(const float4 &r) const { float4 v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
|
|
||||||
inline float4 operator*(const float4 &r) const { float4 v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
|
|
||||||
inline float4 operator*(dReal k) const { float4 v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
|
|
||||||
inline float4& operator += (const float4& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
|
|
||||||
inline float4& operator -= (const float4& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
|
|
||||||
inline float4& operator *= (const float4& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
|
|
||||||
inline float4& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
|
|
||||||
inline float4& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
|
|
||||||
friend float4 operator*(float f, const float4& v);
|
|
||||||
//friend ostream& operator<<(ostream& O, const float4& v);
|
|
||||||
//friend istream& operator>>(istream& I, float4& v);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
inline float4 operator*(float f, const float4& left)
|
typedef Vector4<float> float4;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Reimplement, swiping a bunch of code from GSdx and adapting it. (specifically GSVector.h)
|
||||||
|
// This doesn't include more then half of the functions in there, as well as some of the structs...
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
|
#include "Pcsx2Types.h"
|
||||||
|
|
||||||
|
class float4
|
||||||
{
|
{
|
||||||
float4 v;
|
public:
|
||||||
v.x = f * left.x;
|
union
|
||||||
v.y = f * left.y;
|
{
|
||||||
v.z = f * left.z;
|
struct {float x, y, z, w;};
|
||||||
return v;
|
struct {float r, g, b, a;};
|
||||||
|
struct {float left, top, right, bottom;};
|
||||||
|
float v[4];
|
||||||
|
float f32[4];
|
||||||
|
s8 _s8[16];
|
||||||
|
s16 _s16[8];
|
||||||
|
s32 _s32[4];
|
||||||
|
s64 _s64[2];
|
||||||
|
u8 _u8[16];
|
||||||
|
u16 _u16[8];
|
||||||
|
u32 _u32[4];
|
||||||
|
u64 _u64[2];
|
||||||
|
__m128 m;
|
||||||
|
};
|
||||||
|
|
||||||
|
float4()
|
||||||
|
{
|
||||||
|
m = _mm_setzero_ps();
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // ZZOGLMATH_H_INCLUDED
|
float4(float x, float y, float z, float w = 0)
|
||||||
|
{
|
||||||
|
m = _mm_set_ps(w, z, y, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
float4(float4 &f)
|
||||||
|
{
|
||||||
|
m = f.m;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4(float x, float y)
|
||||||
|
{
|
||||||
|
m = _mm_unpacklo_ps(_mm_load_ss(&x), _mm_load_ss(&y));
|
||||||
|
}
|
||||||
|
|
||||||
|
float4(int x, int y)
|
||||||
|
{
|
||||||
|
m = _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(y)));
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit float4(float f)
|
||||||
|
{
|
||||||
|
m = _mm_set1_ps(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit float4(__m128 m)
|
||||||
|
{
|
||||||
|
this->m = m;
|
||||||
|
}
|
||||||
|
|
||||||
|
float4(float* f)
|
||||||
|
{
|
||||||
|
x = f[0];
|
||||||
|
y = f[1];
|
||||||
|
z = f[2];
|
||||||
|
w = f[3]; // For some reason, the old code set this to 0.
|
||||||
|
}
|
||||||
|
|
||||||
|
float& operator[](int i)
|
||||||
|
{
|
||||||
|
switch(i)
|
||||||
|
{
|
||||||
|
case 0: return x;
|
||||||
|
case 1: return y;
|
||||||
|
case 2: return z;
|
||||||
|
case 3: return w;
|
||||||
|
default: assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
operator float*()
|
||||||
|
{
|
||||||
|
return (float*) this;
|
||||||
|
}
|
||||||
|
|
||||||
|
operator const float*() const
|
||||||
|
{
|
||||||
|
return (const float*) this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator = (float f)
|
||||||
|
{
|
||||||
|
m = _mm_set1_ps(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator = (__m128 m)
|
||||||
|
{
|
||||||
|
this->m = m;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void operator += (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_add_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator -= (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_sub_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator *= (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_mul_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator /= (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_div_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator += (float f)
|
||||||
|
{
|
||||||
|
*this += float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator -= (float f)
|
||||||
|
{
|
||||||
|
*this -= float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator *= (float f)
|
||||||
|
{
|
||||||
|
*this *= float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator /= (float f)
|
||||||
|
{
|
||||||
|
*this /= float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator &= (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_and_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator |= (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_or_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator ^= (const float4& v)
|
||||||
|
{
|
||||||
|
m = _mm_xor_ps(m, v.m);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator + (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_add_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator - (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_sub_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator * (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_mul_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator / (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_div_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator + (const float4& v, float f)
|
||||||
|
{
|
||||||
|
return v + float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator - (const float4& v, float f)
|
||||||
|
{
|
||||||
|
return v - float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator * (const float4& v, float f)
|
||||||
|
{
|
||||||
|
return v * float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator / (const float4& v, float f)
|
||||||
|
{
|
||||||
|
return v / float4(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator & (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_and_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator | (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_or_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator ^ (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_xor_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator == (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_cmpeq_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator != (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_cmpneq_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator > (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_cmpgt_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator < (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_cmplt_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator >= (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_cmpge_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend float4 operator <= (const float4& v1, const float4& v2)
|
||||||
|
{
|
||||||
|
return float4(_mm_cmple_ps(v1.m, v2.m));
|
||||||
|
}
|
||||||
|
|
||||||
|
// This looked interesting, so I thought I'd include it...
|
||||||
|
|
||||||
|
template<int i> float4 shuffle() const
|
||||||
|
{
|
||||||
|
return float4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(i, i, i, i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
|
||||||
|
float4 xs##ys##zs##ws() const {return float4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||||
|
float4 xs##ys##zs##ws(const float4& v) const {return float4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||||
|
|
||||||
|
#define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
|
||||||
|
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \
|
||||||
|
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \
|
||||||
|
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \
|
||||||
|
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \
|
||||||
|
|
||||||
|
#define VECTOR4_SHUFFLE_2(xs, xn, ys, yn) \
|
||||||
|
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, x, 0) \
|
||||||
|
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, y, 1) \
|
||||||
|
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, z, 2) \
|
||||||
|
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, w, 3) \
|
||||||
|
|
||||||
|
#define VECTOR4_SHUFFLE_1(xs, xn) \
|
||||||
|
float4 xs##4() const {return float4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(xn, xn, xn, xn)));} \
|
||||||
|
float4 xs##4(const float4& v) const {return float4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(xn, xn, xn, xn)));} \
|
||||||
|
VECTOR4_SHUFFLE_2(xs, xn, x, 0) \
|
||||||
|
VECTOR4_SHUFFLE_2(xs, xn, y, 1) \
|
||||||
|
VECTOR4_SHUFFLE_2(xs, xn, z, 2) \
|
||||||
|
VECTOR4_SHUFFLE_2(xs, xn, w, 3) \
|
||||||
|
|
||||||
|
VECTOR4_SHUFFLE_1(x, 0)
|
||||||
|
VECTOR4_SHUFFLE_1(y, 1)
|
||||||
|
VECTOR4_SHUFFLE_1(z, 2)
|
||||||
|
VECTOR4_SHUFFLE_1(w, 3)
|
||||||
|
|
||||||
|
// Probably doesn't belong here, but I'll leave it in for the moment.
|
||||||
|
void SetColor(u32 color)
|
||||||
|
{
|
||||||
|
x = (color & 0xff) / 255.0f;
|
||||||
|
y = ((color >> 8) & 0xff) / 255.0f;
|
||||||
|
z = ((color >> 16) & 0xff) / 255.0f;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
|
@ -392,16 +392,16 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
||||||
assert(pmemtarg != NULL);
|
assert(pmemtarg != NULL);
|
||||||
|
|
||||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex);
|
glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex);
|
||||||
srcdata.resize(pmemtarg->realheight * GPU_TEXWIDTH * pmemtarg->widthmult * 4 * 8); // max of 8 cannels
|
srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH);
|
||||||
|
|
||||||
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
||||||
|
|
||||||
u32 offset = pmemtarg->realy * 4 * GPU_TEXWIDTH;
|
u32 offset = MemorySize(pmemtarg->realy);
|
||||||
|
|
||||||
if (ptex->psm == PSMT8)
|
if (ptex->psm == PSMT8)
|
||||||
offset *= PSMT_IS32BIT(ptex->cpsm) ? 4 : 2;
|
offset *= CLUT_PIXEL_SIZE(ptex->cpsm);
|
||||||
else if (ptex->psm == PSMT4)
|
else if (ptex->psm == PSMT4)
|
||||||
offset *= PSMT_IS32BIT(ptex->cpsm) ? 8 : 4;
|
offset *= CLUT_PIXEL_SIZE(ptex->cpsm) * 2;
|
||||||
|
|
||||||
psrc = &srcdata[0] - offset;
|
psrc = &srcdata[0] - offset;
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -51,6 +51,8 @@ class CRenderTargetMngr
|
||||||
void Destroy();
|
void Destroy();
|
||||||
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
|
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
|
||||||
|
|
||||||
|
bool isFound(const frameInfo& frame, MAPTARGETS::iterator& it, u32 opts, u32 key, int maxposheight);
|
||||||
|
|
||||||
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
|
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
|
||||||
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
|
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
|
||||||
{
|
{
|
||||||
|
@ -119,13 +121,13 @@ class CRenderTargetMngr
|
||||||
|
|
||||||
class CMemoryTargetMngr
|
class CMemoryTargetMngr
|
||||||
{
|
{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CMemoryTargetMngr() : curstamp(0) {}
|
CMemoryTargetMngr() : curstamp(0) {}
|
||||||
|
|
||||||
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
||||||
CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
CMemoryTarget* SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||||
CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
CMemoryTarget* ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
||||||
|
int CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize, int nClutOffset);
|
||||||
|
|
||||||
void Destroy(); // destroy all targs
|
void Destroy(); // destroy all targs
|
||||||
|
|
||||||
|
@ -138,6 +140,8 @@ class CMemoryTargetMngr
|
||||||
|
|
||||||
private:
|
private:
|
||||||
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
|
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
|
||||||
|
void GetClutVariables(int& nClutOffset, int& clutsize, const tex0Info& tex0);
|
||||||
|
void GetMemAddress(int& start, int& end, const tex0Info& tex0);
|
||||||
};
|
};
|
||||||
|
|
||||||
class CBitwiseTextureMngr
|
class CBitwiseTextureMngr
|
||||||
|
|
|
@ -20,6 +20,11 @@
|
||||||
|
|
||||||
#ifdef ZEROGS_SSE2
|
#ifdef ZEROGS_SSE2
|
||||||
// SSE2 extensions
|
// SSE2 extensions
|
||||||
|
|
||||||
|
// Note: pshufd 0xea <=> movdqa !!!
|
||||||
|
// What the function does is
|
||||||
|
// Interleave s1 and sd0 -> d1 (high) & sd0 (low)
|
||||||
|
// Interleave s3 and sd2 -> d3 (high) & sd2 (low)
|
||||||
#define punpck(op, sd0, sd2, s1, s3, d1, d3) \
|
#define punpck(op, sd0, sd2, s1, s3, d1, d3) \
|
||||||
movdqa %xmm##d1, %xmm##sd0; \
|
movdqa %xmm##d1, %xmm##sd0; \
|
||||||
pshufd %xmm##d3, %xmm##sd2, 0xe4; \
|
pshufd %xmm##d3, %xmm##sd2, 0xe4; \
|
||||||
|
@ -29,6 +34,15 @@
|
||||||
punpckh##op %xmm##d3, %xmm##s3; \
|
punpckh##op %xmm##d3, %xmm##s3; \
|
||||||
|
|
||||||
|
|
||||||
|
// Input xmm7 == 0x0F0F0F0F 0x0F0F0F0F 0x0F0F0F0F 0x0F0F0F0F
|
||||||
|
// DATA xmm[0-3]
|
||||||
|
// This function does a 4-bits interleaving of 4 xmm registers
|
||||||
|
//
|
||||||
|
// ARG Can not put comment in the middle of the define...
|
||||||
|
// After the first por
|
||||||
|
// low 32bits (4bits packed) == 1.6 0.6 1.4 0.4 1.2 0.2 1.0 0.0
|
||||||
|
// After the second one
|
||||||
|
// low 32bits (4bits packed) == 1.7 0.7 1.5 0.5 1.3 0.3 1.1 0.1
|
||||||
#define punpcknb \
|
#define punpcknb \
|
||||||
movdqa %xmm4, %xmm0; \
|
movdqa %xmm4, %xmm0; \
|
||||||
pshufd %xmm5, %xmm1, 0xe4; \
|
pshufd %xmm5, %xmm1, 0xe4; \
|
||||||
|
@ -48,6 +62,7 @@
|
||||||
\
|
\
|
||||||
movdqa %xmm1, %xmm4; \
|
movdqa %xmm1, %xmm4; \
|
||||||
\
|
\
|
||||||
|
\
|
||||||
movdqa %xmm4, %xmm2; \
|
movdqa %xmm4, %xmm2; \
|
||||||
pshufd %xmm5, %xmm3, 0xe4; \
|
pshufd %xmm5, %xmm3, 0xe4; \
|
||||||
\
|
\
|
||||||
|
@ -68,6 +83,12 @@
|
||||||
\
|
\
|
||||||
punpck(bw, 0, 2, 1, 3, 4, 6);\
|
punpck(bw, 0, 2, 1, 3, 4, 6);\
|
||||||
|
|
||||||
|
// output
|
||||||
|
// low 32 bits 0 (4 bits packed) == 1.3 0.3 1.2 0.2 1.1 0.1 1.0 0.0
|
||||||
|
// low 32 bits 4 (4 bits packed) == 1.19 0.19 1.18 0.18 1.17 0.17 1.16 0.16
|
||||||
|
// low 32 bits 2 (4 bits packed) == 3.3 2.3 3.2 2.2 3.1 2.1 3.0 2.0
|
||||||
|
// low 32 bits 6 (4 bits packed) == 3.19 2.19 3.18 2.18 3.17 2.17 3.16 2.16
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// swizzling
|
// swizzling
|
||||||
|
@ -84,11 +105,15 @@ SwizzleBlock32_sse2:
|
||||||
push %esi
|
push %esi
|
||||||
push %edi
|
push %edi
|
||||||
|
|
||||||
|
// save dst
|
||||||
mov %edi, %ecx
|
mov %edi, %ecx
|
||||||
|
// save src
|
||||||
mov %esi, %edx
|
mov %esi, %edx
|
||||||
|
// get pitch
|
||||||
mov %edx, [%esp+4+8]
|
mov %edx, [%esp+4+8]
|
||||||
mov %ecx, 4
|
mov %ecx, 4
|
||||||
|
|
||||||
|
// get WriteMask
|
||||||
mov %eax, [%esp+8+8]
|
mov %eax, [%esp+8+8]
|
||||||
cmp %eax, 0xffffffff
|
cmp %eax, 0xffffffff
|
||||||
jne SwizzleBlock32_sse2_2
|
jne SwizzleBlock32_sse2_2
|
||||||
|
@ -100,6 +125,8 @@ SwizzleBlock32_sse2_1:
|
||||||
movdqa %xmm1, [%esi+%edx]
|
movdqa %xmm1, [%esi+%edx]
|
||||||
movdqa %xmm5, [%esi+%edx+16]
|
movdqa %xmm5, [%esi+%edx+16]
|
||||||
|
|
||||||
|
// 64bits interleave 1&0 -> 2&0
|
||||||
|
// 64bits interleave 5&4 -> 6&4
|
||||||
punpck(qdq, 0, 4, 1, 5, 2, 6)
|
punpck(qdq, 0, 4, 1, 5, 2, 6)
|
||||||
|
|
||||||
movntps [%edi+16*0], %xmm0
|
movntps [%edi+16*0], %xmm0
|
||||||
|
@ -107,6 +134,7 @@ SwizzleBlock32_sse2_1:
|
||||||
movntps [%edi+16*2], %xmm4
|
movntps [%edi+16*2], %xmm4
|
||||||
movntps [%edi+16*3], %xmm6
|
movntps [%edi+16*3], %xmm6
|
||||||
|
|
||||||
|
// update ptr
|
||||||
lea %esi, [%esi+%edx*2]
|
lea %esi, [%esi+%edx*2]
|
||||||
add %edi, 64
|
add %edi, 64
|
||||||
|
|
||||||
|
@ -120,6 +148,7 @@ SwizzleBlock32_sse2_1:
|
||||||
|
|
||||||
SwizzleBlock32_sse2_2:
|
SwizzleBlock32_sse2_2:
|
||||||
|
|
||||||
|
// WriteMask: 32bits to 4*32bits
|
||||||
movd %xmm7, %eax
|
movd %xmm7, %eax
|
||||||
pshufd %xmm7, %xmm7, 0
|
pshufd %xmm7, %xmm7, 0
|
||||||
|
|
||||||
|
@ -130,13 +159,19 @@ SwizzleBlock32_sse2_3:
|
||||||
movdqa %xmm1, [%esi+%edx]
|
movdqa %xmm1, [%esi+%edx]
|
||||||
movdqa %xmm5, [%esi+%edx+16]
|
movdqa %xmm5, [%esi+%edx+16]
|
||||||
|
|
||||||
|
// 64bits interleave 1&0 -> 2&0
|
||||||
|
// 64bits interleave 5&4 -> 6&4
|
||||||
punpck(qdq, 0, 4, 1, 5, 2, 6)
|
punpck(qdq, 0, 4, 1, 5, 2, 6)
|
||||||
|
|
||||||
|
// save a mask copy
|
||||||
movdqa %xmm3, %xmm7
|
movdqa %xmm3, %xmm7
|
||||||
pshufd %xmm5, %xmm7, 0xe4
|
pshufd %xmm5, %xmm7, 0xe4
|
||||||
|
|
||||||
|
// *dst & ~WriteMask
|
||||||
pandn %xmm3, [%edi+16*0]
|
pandn %xmm3, [%edi+16*0]
|
||||||
|
// *src & WriteMask
|
||||||
pand %xmm0, %xmm7
|
pand %xmm0, %xmm7
|
||||||
|
// Final value to save
|
||||||
por %xmm0, %xmm3
|
por %xmm0, %xmm3
|
||||||
movntps [%edi+16*0], %xmm0
|
movntps [%edi+16*0], %xmm0
|
||||||
|
|
||||||
|
@ -158,6 +193,7 @@ SwizzleBlock32_sse2_3:
|
||||||
por %xmm6, %xmm5
|
por %xmm6, %xmm5
|
||||||
movntps [%edi+16*3], %xmm6
|
movntps [%edi+16*3], %xmm6
|
||||||
|
|
||||||
|
// update ptr
|
||||||
lea %esi, [%esi+%edx*2]
|
lea %esi, [%esi+%edx*2]
|
||||||
add %edi, 64
|
add %edi, 64
|
||||||
|
|
||||||
|
@ -179,6 +215,7 @@ SwizzleBlock16_sse2:
|
||||||
|
|
||||||
push %ebx
|
push %ebx
|
||||||
|
|
||||||
|
// srcpitch
|
||||||
mov %ebx, [%esp+4+4]
|
mov %ebx, [%esp+4+4]
|
||||||
mov %eax, 4
|
mov %eax, 4
|
||||||
|
|
||||||
|
@ -189,7 +226,11 @@ SwizzleBlock16_sse2_1:
|
||||||
movdqa %xmm2, [%edx+%ebx]
|
movdqa %xmm2, [%edx+%ebx]
|
||||||
movdqa %xmm3, [%edx+%ebx+16]
|
movdqa %xmm3, [%edx+%ebx+16]
|
||||||
|
|
||||||
|
// 16bits interleave 1&0 -> 4&0
|
||||||
|
// 16bits interleave 3&2 -> 6&2
|
||||||
punpck(wd, 0, 2, 1, 3, 4, 6)
|
punpck(wd, 0, 2, 1, 3, 4, 6)
|
||||||
|
// 64bits interleave 2&0 -> 1&0
|
||||||
|
// 64bits interleave 6&4 -> 5&4
|
||||||
punpck(qdq, 0, 4, 2, 6, 1, 5)
|
punpck(qdq, 0, 4, 2, 6, 1, 5)
|
||||||
|
|
||||||
movntps [%ecx+16*0], %xmm0
|
movntps [%ecx+16*0], %xmm0
|
||||||
|
@ -197,6 +238,7 @@ SwizzleBlock16_sse2_1:
|
||||||
movntps [%ecx+16*2], %xmm4
|
movntps [%ecx+16*2], %xmm4
|
||||||
movntps [%ecx+16*3], %xmm5
|
movntps [%ecx+16*3], %xmm5
|
||||||
|
|
||||||
|
// update ptr
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
add %ecx, 64
|
add %ecx, 64
|
||||||
|
|
||||||
|
@ -217,7 +259,9 @@ SwizzleBlock8_sse2:
|
||||||
|
|
||||||
push %ebx
|
push %ebx
|
||||||
|
|
||||||
|
// load srcpitch
|
||||||
mov %ebx, [%esp+4+4]
|
mov %ebx, [%esp+4+4]
|
||||||
|
// basic counter
|
||||||
mov %eax, 2
|
mov %eax, 2
|
||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
|
@ -226,14 +270,23 @@ SwizzleBlock8_sse2_1:
|
||||||
|
|
||||||
movdqa %xmm0, [%edx]
|
movdqa %xmm0, [%edx]
|
||||||
movdqa %xmm2, [%edx+%ebx]
|
movdqa %xmm2, [%edx+%ebx]
|
||||||
|
// update src pointer
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
|
|
||||||
|
// 2 3 0 1
|
||||||
pshufd %xmm1, [%edx], 0xb1
|
pshufd %xmm1, [%edx], 0xb1
|
||||||
pshufd %xmm3, [%edx+%ebx], 0xb1
|
pshufd %xmm3, [%edx+%ebx], 0xb1
|
||||||
|
// update src pointer
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
|
|
||||||
|
// 8bits interleave 1&0 -> 4&0
|
||||||
|
// 8bits interleave 3&2 -> 6&2
|
||||||
punpck(bw, 0, 2, 1, 3, 4, 6)
|
punpck(bw, 0, 2, 1, 3, 4, 6)
|
||||||
|
// 16bits interleave 4&0 -> 1&0
|
||||||
|
// 16bits interleave 6&2 -> 3&2
|
||||||
punpck(wd, 0, 2, 4, 6, 1, 3)
|
punpck(wd, 0, 2, 4, 6, 1, 3)
|
||||||
|
// 64bits interleave 2&0 -> 4&0
|
||||||
|
// 64bits interleave 3&1 -> 5&1
|
||||||
punpck(qdq, 0, 1, 2, 3, 4, 5)
|
punpck(qdq, 0, 1, 2, 3, 4, 5)
|
||||||
|
|
||||||
movntps [%ecx+16*0], %xmm0
|
movntps [%ecx+16*0], %xmm0
|
||||||
|
@ -241,18 +294,27 @@ SwizzleBlock8_sse2_1:
|
||||||
movntps [%ecx+16*2], %xmm1
|
movntps [%ecx+16*2], %xmm1
|
||||||
movntps [%ecx+16*3], %xmm5
|
movntps [%ecx+16*3], %xmm5
|
||||||
|
|
||||||
// col 1, 3
|
// col 1, 3 (same as previous column)
|
||||||
|
|
||||||
|
// 2 3 0 1
|
||||||
pshufd %xmm0, [%edx], 0xb1
|
pshufd %xmm0, [%edx], 0xb1
|
||||||
pshufd %xmm2, [%edx+%ebx], 0xb1
|
pshufd %xmm2, [%edx+%ebx], 0xb1
|
||||||
|
// update src pointer
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
|
|
||||||
movdqa %xmm1, [%edx]
|
movdqa %xmm1, [%edx]
|
||||||
movdqa %xmm3, [%edx+%ebx]
|
movdqa %xmm3, [%edx+%ebx]
|
||||||
|
// update src pointer
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
|
|
||||||
|
// 8bits interleave 1&0 -> 4&0
|
||||||
|
// 8bits interleave 3&2 -> 6&2
|
||||||
punpck(bw, 0, 2, 1, 3, 4, 6)
|
punpck(bw, 0, 2, 1, 3, 4, 6)
|
||||||
|
// 16bits interleave 4&0 -> 1&0
|
||||||
|
// 16bits interleave 6&2 -> 3&2
|
||||||
punpck(wd, 0, 2, 4, 6, 1, 3)
|
punpck(wd, 0, 2, 4, 6, 1, 3)
|
||||||
|
// 64bits interleave 2&0 -> 4&0
|
||||||
|
// 64bits interleave 3&1 -> 5&1
|
||||||
punpck(qdq, 0, 1, 2, 3, 4, 5)
|
punpck(qdq, 0, 1, 2, 3, 4, 5)
|
||||||
|
|
||||||
movntps [%ecx+16*4], %xmm0
|
movntps [%ecx+16*4], %xmm0
|
||||||
|
@ -260,6 +322,7 @@ SwizzleBlock8_sse2_1:
|
||||||
movntps [%ecx+16*6], %xmm1
|
movntps [%ecx+16*6], %xmm1
|
||||||
movntps [%ecx+16*7], %xmm5
|
movntps [%ecx+16*7], %xmm5
|
||||||
|
|
||||||
|
// update dst pointer
|
||||||
add %ecx, 128
|
add %ecx, 128
|
||||||
|
|
||||||
dec %eax
|
dec %eax
|
||||||
|
@ -279,10 +342,12 @@ SwizzleBlock4_sse2:
|
||||||
|
|
||||||
push %ebx
|
push %ebx
|
||||||
|
|
||||||
|
// load 4 0x0F0F0F0F
|
||||||
mov %eax, 0xf0f0f0f
|
mov %eax, 0xf0f0f0f
|
||||||
movd %xmm7, %eax
|
movd %xmm7, %eax
|
||||||
pshufd %xmm7, %xmm7, 0
|
pshufd %xmm7, %xmm7, 0
|
||||||
|
|
||||||
|
// load srcpitch
|
||||||
mov %ebx, [%esp+4+4]
|
mov %ebx, [%esp+4+4]
|
||||||
mov %eax, 2
|
mov %eax, 2
|
||||||
|
|
||||||
|
@ -292,20 +357,32 @@ SwizzleBlock4_sse2_1:
|
||||||
|
|
||||||
movdqa %xmm0, [%edx]
|
movdqa %xmm0, [%edx]
|
||||||
movdqa %xmm2, [%edx+%ebx]
|
movdqa %xmm2, [%edx+%ebx]
|
||||||
|
//update src pointer
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
|
|
||||||
movdqa %xmm1, [%edx]
|
movdqa %xmm1, [%edx]
|
||||||
movdqa %xmm3, [%edx+%ebx]
|
movdqa %xmm3, [%edx+%ebx]
|
||||||
|
// update src pointer
|
||||||
lea %edx, [%edx+%ebx*2]
|
lea %edx, [%edx+%ebx*2]
|
||||||
|
|
||||||
|
// - - - - 2 3 0 1
|
||||||
pshuflw %xmm1, %xmm1, 0xb1
|
pshuflw %xmm1, %xmm1, 0xb1
|
||||||
pshuflw %xmm3, %xmm3, 0xb1
|
pshuflw %xmm3, %xmm3, 0xb1
|
||||||
|
// 6 7 4 5 - - - -
|
||||||
pshufhw %xmm1, %xmm1, 0xb1
|
pshufhw %xmm1, %xmm1, 0xb1
|
||||||
pshufhw %xmm3, %xmm3, 0xb1
|
pshufhw %xmm3, %xmm3, 0xb1
|
||||||
|
|
||||||
|
// 4bits interleave 1&0 -> 4&0
|
||||||
|
// 4bits interleave 3&2 -> 6&2
|
||||||
punpcknb
|
punpcknb
|
||||||
|
// 8bits interleave 4&0 -> 1&0
|
||||||
|
// 8bits interleave 6&2 -> 3&2
|
||||||
punpck(bw, 0, 2, 4, 6, 1, 3)
|
punpck(bw, 0, 2, 4, 6, 1, 3)
|
||||||
|
// 8bits interleave 1&0 -> 4&0
|
||||||
|
// 8bits interleave 3&2 -> 6&2
|
||||||
punpck(bw, 0, 2, 1, 3, 4, 6)
|
punpck(bw, 0, 2, 1, 3, 4, 6)
|
||||||
|
// 64bits interleave 2&0 -> 1&0
|
||||||
|
// 64bits interleave 6&4 -> 3&4
|
||||||
punpck(qdq, 0, 4, 2, 6, 1, 3)
|
punpck(qdq, 0, 4, 2, 6, 1, 3)
|
||||||
|
|
||||||
movntps [%ecx+16*0], %xmm0
|
movntps [%ecx+16*0], %xmm0
|
||||||
|
@ -313,7 +390,7 @@ SwizzleBlock4_sse2_1:
|
||||||
movntps [%ecx+16*2], %xmm4
|
movntps [%ecx+16*2], %xmm4
|
||||||
movntps [%ecx+16*3], %xmm3
|
movntps [%ecx+16*3], %xmm3
|
||||||
|
|
||||||
// col 1, 3
|
// col 1, 3 (same as previous column)
|
||||||
|
|
||||||
movdqa %xmm0, [%edx]
|
movdqa %xmm0, [%edx]
|
||||||
movdqa %xmm2, [%edx+%ebx]
|
movdqa %xmm2, [%edx+%ebx]
|
||||||
|
@ -349,6 +426,9 @@ SwizzleBlock4_sse2_1:
|
||||||
|
|
||||||
//
|
//
|
||||||
// swizzling with unaligned reads
|
// swizzling with unaligned reads
|
||||||
|
// Same functions as a above with movdqu instead of movdqa for the reads
|
||||||
|
// Movdqu is as fast as movdqa with aligned address... So do not bother, directly
|
||||||
|
// use movdqu
|
||||||
//
|
//
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
|
|
||||||
#if defined(ZEROGS_SSE2)
|
#if defined(ZEROGS_SSE2)
|
||||||
#include <xmmintrin.h>
|
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -64,23 +63,17 @@ void __fastcall FrameSwizzleBlock32A2_c(u32* dst, u32* src, int srcpitch, u32 Wr
|
||||||
{
|
{
|
||||||
u32* d = &g_columnTable32[0][0];
|
u32* d = &g_columnTable32[0][0];
|
||||||
|
|
||||||
if( WriteMask == 0xffffffff )
|
if( WriteMask == 0xffffffff ) {
|
||||||
{
|
for(int i = 0; i < 8; ++i, d += 8) {
|
||||||
for(int i = 0; i < 8; ++i, d += 8)
|
for(int j = 0; j < 8; ++j) {
|
||||||
{
|
|
||||||
for(int j = 0; j < 8; ++j)
|
|
||||||
{
|
|
||||||
dst[d[j]] = ((src[2*j] + src[2*j+1]) >> 1);
|
dst[d[j]] = ((src[2*j] + src[2*j+1]) >> 1);
|
||||||
}
|
}
|
||||||
src += srcpitch;
|
src += srcpitch;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
{
|
for(int i = 0; i < 8; ++i, d += 8) {
|
||||||
for(int i = 0; i < 8; ++i, d += 8)
|
for(int j = 0; j < 8; ++j) {
|
||||||
{
|
|
||||||
for(int j = 0; j < 8; ++j)
|
|
||||||
{
|
|
||||||
dst[d[j]] = (((src[2*j] + src[2*j+1]) >> 1)&WriteMask)|(dst[d[j]]&~WriteMask);
|
dst[d[j]] = (((src[2*j] + src[2*j+1]) >> 1)&WriteMask)|(dst[d[j]]&~WriteMask);
|
||||||
}
|
}
|
||||||
src += srcpitch;
|
src += srcpitch;
|
||||||
|
@ -92,23 +85,17 @@ void __fastcall FrameSwizzleBlock32A4_c(u32* dst, u32* src, int srcpitch, u32 Wr
|
||||||
{
|
{
|
||||||
u32* d = &g_columnTable32[0][0];
|
u32* d = &g_columnTable32[0][0];
|
||||||
|
|
||||||
if( WriteMask == 0xffffffff )
|
if( WriteMask == 0xffffffff ) {
|
||||||
{
|
for(int i = 0; i < 8; ++i, d += 8) {
|
||||||
for(int i = 0; i < 8; ++i, d += 8)
|
for(int j = 0; j < 8; ++j) {
|
||||||
{
|
|
||||||
for(int j = 0; j < 8; ++j)
|
|
||||||
{
|
|
||||||
dst[d[j]] = ((src[2*j] + src[2*j+1] + src[2*j+srcpitch] + src[2*j+srcpitch+1]) >> 2);
|
dst[d[j]] = ((src[2*j] + src[2*j+1] + src[2*j+srcpitch] + src[2*j+srcpitch+1]) >> 2);
|
||||||
}
|
}
|
||||||
src += srcpitch << 1;
|
src += srcpitch << 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
{
|
for(int i = 0; i < 8; ++i, d += 8) {
|
||||||
for(int i = 0; i < 8; ++i, d += 8)
|
for(int j = 0; j < 8; ++j) {
|
||||||
{
|
|
||||||
for(int j = 0; j < 8; ++j)
|
|
||||||
{
|
|
||||||
dst[d[j]] = (((src[2*j] + src[2*j+1] + src[2*j+srcpitch] + src[2*j+srcpitch+1]) >> 2)&WriteMask)|(dst[d[j]]&~WriteMask);
|
dst[d[j]] = (((src[2*j] + src[2*j+1] + src[2*j+srcpitch] + src[2*j+srcpitch+1]) >> 2)&WriteMask)|(dst[d[j]]&~WriteMask);
|
||||||
}
|
}
|
||||||
src += srcpitch << 1;
|
src += srcpitch << 1;
|
||||||
|
@ -663,6 +650,120 @@ static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0
|
||||||
|
|
||||||
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
|
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
|
||||||
{
|
{
|
||||||
|
#define YET_ANOTHER_INTRINSIC
|
||||||
|
#ifdef YET_ANOTHER_INTRINSIC
|
||||||
|
__m128i vm0 = _mm_load_si128((__m128i*)vm);
|
||||||
|
__m128i vm1 = _mm_load_si128((__m128i*)vm+1);
|
||||||
|
__m128i vm2 = _mm_load_si128((__m128i*)vm+2);
|
||||||
|
__m128i vm3 = _mm_load_si128((__m128i*)vm+3);
|
||||||
|
|
||||||
|
// rearrange 16bits words
|
||||||
|
vm0 = _mm_shufflehi_epi16(vm0, 0x88);
|
||||||
|
vm0 = _mm_shufflelo_epi16(vm0, 0x88); // 6 4 6 4 2 0 2 0
|
||||||
|
vm1 = _mm_shufflehi_epi16(vm1, 0x88);
|
||||||
|
vm1 = _mm_shufflelo_epi16(vm1, 0x88); // 14 12 14 12 10 8 10 8
|
||||||
|
|
||||||
|
// Note: MSVC complains about direct c-cast...
|
||||||
|
// vm0 = (__m128i)_mm_shuffle_ps((__m128)vm0, (__m128)vm1, 0x88); // 14 12 10 8 6 4 2 0
|
||||||
|
__m128 vm0_f = (_mm_shuffle_ps((__m128&)vm0, (__m128&)vm1, 0x88)); // 14 12 10 8 6 4 2 0
|
||||||
|
vm0 = (__m128i&)vm0_f;
|
||||||
|
vm0 = _mm_shuffle_epi32(vm0, 0xD8); // 14 12 6 4 10 8 2 0
|
||||||
|
|
||||||
|
// *** Same jobs for vm2 and vm3
|
||||||
|
vm2 = _mm_shufflehi_epi16(vm2, 0x88);
|
||||||
|
vm2 = _mm_shufflelo_epi16(vm2, 0x88);
|
||||||
|
vm3 = _mm_shufflehi_epi16(vm3, 0x88);
|
||||||
|
vm3 = _mm_shufflelo_epi16(vm3, 0x88);
|
||||||
|
|
||||||
|
// Note: MSVC complains about direct c-cast...
|
||||||
|
// vm2 = (__m128i)_mm_shuffle_ps((__m128)vm2, (__m128)vm3, 0x88);
|
||||||
|
__m128 vm2_f = (_mm_shuffle_ps((__m128&)vm2, (__m128&)vm3, 0x88)); // 14 12 10 8 6 4 2 0
|
||||||
|
vm2 = (__m128i&)vm2_f;
|
||||||
|
vm2 = _mm_shuffle_epi32(vm2, 0xD8);
|
||||||
|
|
||||||
|
// Create a zero register.
|
||||||
|
__m128i zero_128 = _mm_setzero_si128();
|
||||||
|
|
||||||
|
if ((u32)clut & 0x0F) {
|
||||||
|
// Unaligned write.
|
||||||
|
|
||||||
|
u16* clut_word_ptr = (u16*)clut;
|
||||||
|
__m128i clut_mask = _mm_load_si128((__m128i*)s_clut16mask2);
|
||||||
|
|
||||||
|
// Load previous data and clear high 16 bits of double words
|
||||||
|
__m128i clut_0 = _mm_load_si128((__m128i*)(clut_word_ptr-1)); // 6 5 4 3 2 1 0 x
|
||||||
|
__m128i clut_2 = _mm_load_si128((__m128i*)(clut_word_ptr-1)+2); // 22 21 20 19 18 17 16 15
|
||||||
|
clut_0 = _mm_and_si128(clut_0, clut_mask); // - 5 - 3 - 1 - x
|
||||||
|
clut_2 = _mm_and_si128(clut_2, clut_mask); // - 21 - 19 - 17 - 15
|
||||||
|
|
||||||
|
// Convert 16bits to 32 bits vm0 (zero entended)
|
||||||
|
__m128i vm0_low = _mm_unpacklo_epi16(vm0, zero_128); // - 10 - 8 - 2 - 0
|
||||||
|
__m128i vm0_high = _mm_unpackhi_epi16(vm0, zero_128); // - 14 - 12 - 6 - 4
|
||||||
|
|
||||||
|
// shift the value to aligned it with clut
|
||||||
|
vm0_low = _mm_slli_epi32(vm0_low, 16); // 10 - 8 - 2 - 0 -
|
||||||
|
vm0_high = _mm_slli_epi32(vm0_high, 16); // 14 - 12 - 6 - 4 -
|
||||||
|
|
||||||
|
// Interlace old and new data
|
||||||
|
clut_0 = _mm_or_si128(clut_0, vm0_low); // 10 5 8 3 2 1 0 x
|
||||||
|
clut_2 = _mm_or_si128(clut_2, vm0_high); // 14 21 12 19 6 17 4 15
|
||||||
|
|
||||||
|
// Save the result
|
||||||
|
_mm_store_si128((__m128i*)(clut_word_ptr-1), clut_0);
|
||||||
|
_mm_store_si128((__m128i*)(clut_word_ptr-1)+2, clut_2);
|
||||||
|
|
||||||
|
// *** Same jobs for clut_1 and clut_3
|
||||||
|
__m128i clut_1 = _mm_load_si128((__m128i*)(clut_word_ptr-1)+1);
|
||||||
|
__m128i clut_3 = _mm_load_si128((__m128i*)(clut_word_ptr-1)+3);
|
||||||
|
clut_1 = _mm_and_si128(clut_1, clut_mask);
|
||||||
|
clut_3 = _mm_and_si128(clut_3, clut_mask);
|
||||||
|
|
||||||
|
__m128i vm2_low = _mm_unpacklo_epi16(vm2, zero_128);
|
||||||
|
__m128i vm2_high = _mm_unpackhi_epi16(vm2, zero_128);
|
||||||
|
vm2_low = _mm_slli_epi32(vm2_low, 16);
|
||||||
|
vm2_high = _mm_slli_epi32(vm2_high, 16);
|
||||||
|
|
||||||
|
clut_1 = _mm_or_si128(clut_1, vm2_low);
|
||||||
|
clut_3 = _mm_or_si128(clut_3, vm2_high);
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)(clut_word_ptr-1)+1, clut_1);
|
||||||
|
_mm_store_si128((__m128i*)(clut_word_ptr-1)+3, clut_3);
|
||||||
|
} else {
|
||||||
|
// Standard write
|
||||||
|
|
||||||
|
__m128i clut_mask = _mm_load_si128((__m128i*)s_clut16mask);
|
||||||
|
|
||||||
|
// Load previous data and clear low 16 bits of double words
|
||||||
|
__m128i clut_0 = _mm_and_si128(_mm_load_si128((__m128i*)clut), clut_mask); // 7 - 5 - 3 - 1 -
|
||||||
|
__m128i clut_2 = _mm_and_si128(_mm_load_si128((__m128i*)clut+2), clut_mask); // 23 - 21 - 19 - 17 -
|
||||||
|
|
||||||
|
// Convert 16bits to 32 bits vm0 (zero entended)
|
||||||
|
__m128i vm0_low = _mm_unpacklo_epi16(vm0, zero_128); // - 10 - 8 - 2 - 0
|
||||||
|
__m128i vm0_high = _mm_unpackhi_epi16(vm0, zero_128); // - 14 - 12 - 6 - 4
|
||||||
|
|
||||||
|
// Interlace old and new data
|
||||||
|
clut_0 = _mm_or_si128(clut_0, vm0_low); // 7 10 5 8 3 2 1 0
|
||||||
|
clut_2 = _mm_or_si128(clut_2, vm0_high); // 23 14 21 12 19 6 17 4
|
||||||
|
|
||||||
|
// Save the result
|
||||||
|
_mm_store_si128((__m128i*)clut, clut_0);
|
||||||
|
_mm_store_si128((__m128i*)clut+2, clut_2);
|
||||||
|
|
||||||
|
// *** Same jobs for clut_1 and clut_3
|
||||||
|
__m128i clut_1 = _mm_and_si128(_mm_load_si128((__m128i*)clut+1), clut_mask);
|
||||||
|
__m128i clut_3 = _mm_and_si128(_mm_load_si128((__m128i*)clut+3), clut_mask);
|
||||||
|
|
||||||
|
__m128i vm2_low = _mm_unpacklo_epi16(vm2, zero_128);
|
||||||
|
__m128i vm2_high = _mm_unpackhi_epi16(vm2, zero_128);
|
||||||
|
|
||||||
|
clut_1 = _mm_or_si128(clut_1, vm2_low);
|
||||||
|
clut_3 = _mm_or_si128(clut_3, vm2_high);
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i*)clut+1, clut_1);
|
||||||
|
_mm_store_si128((__m128i*)clut+3, clut_3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
__asm
|
__asm
|
||||||
{
|
{
|
||||||
|
@ -893,6 +994,7 @@ End:
|
||||||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
|
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
|
||||||
);
|
);
|
||||||
#endif // _MSC_VER
|
#endif // _MSC_VER
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // ZEROGS_SSE2
|
#endif // ZEROGS_SSE2
|
||||||
|
@ -1115,3 +1217,4 @@ Z16Loop:
|
||||||
);
|
);
|
||||||
#endif // _MSC_VER
|
#endif // _MSC_VER
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,9 @@
|
||||||
#include "targets.h"
|
#include "targets.h"
|
||||||
#include "GLWin.h"
|
#include "GLWin.h"
|
||||||
#include "ZZoglShaders.h"
|
#include "ZZoglShaders.h"
|
||||||
|
#ifdef ZEROGS_SSE2
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
//----------------------- Defines
|
//----------------------- Defines
|
||||||
|
|
||||||
|
@ -95,7 +98,6 @@ namespace ZeroGS
|
||||||
// float4 g_vdepth = float4( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
|
// float4 g_vdepth = float4( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
|
||||||
|
|
||||||
extern CRangeManager s_RangeMngr; // manages overwritten memory
|
extern CRangeManager s_RangeMngr; // manages overwritten memory
|
||||||
GLenum GetRenderTargetFormat() { return GetRenderFormat() == RFT_byte8 ? 4 : g_internalRGBAFloat16Fmt; }
|
|
||||||
|
|
||||||
// returns the first and last addresses aligned to a page that cover
|
// returns the first and last addresses aligned to a page that cover
|
||||||
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
|
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
|
||||||
|
@ -541,7 +543,7 @@ __forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
|
||||||
|
|
||||||
int Values[100] = {0, };
|
int Values[100] = {0, };
|
||||||
|
|
||||||
void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
|
inline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
|
||||||
{
|
{
|
||||||
int index = Index;
|
int index = Index;
|
||||||
p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
|
p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
|
||||||
|
@ -852,6 +854,55 @@ bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
|
||||||
|
|
||||||
bool bRet = false;
|
bool bRet = false;
|
||||||
|
|
||||||
|
// FIXME code generated by intrinsics is the same as the linux asm.
|
||||||
|
// However there is no "cmp %%esi, 0x90" equivalent in the windows asm !!!
|
||||||
|
// So control flow must be check
|
||||||
|
#define TEST_THIS
|
||||||
|
#ifdef TEST_THIS
|
||||||
|
while(entries != 0) {
|
||||||
|
#ifdef ZEROGS_SSE2
|
||||||
|
__m128i result = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src), _mm_load_si128((__m128i*)dst));
|
||||||
|
|
||||||
|
__m128i result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+1), _mm_load_si128((__m128i*)dst+1));
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+2), _mm_load_si128((__m128i*)dst+2));
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+3), _mm_load_si128((__m128i*)dst+3));
|
||||||
|
result = _mm_and_si128(result, result_tmp);
|
||||||
|
|
||||||
|
u32 result_int = _mm_movemask_epi8(result);
|
||||||
|
if (result_int != 0xFF) {
|
||||||
|
bRet = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// I see no point to keep an mmx version. SSE2 versions is probably faster.
|
||||||
|
// Keep a slow portable C version for reference/debug
|
||||||
|
for (int i=0; i < 16 ; i++) {
|
||||||
|
if (*((u32*)src+i) != *((u32*)dst+i)) {
|
||||||
|
bRet = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (entries & 0x10) {
|
||||||
|
src -= 56; // go back and down one column
|
||||||
|
}
|
||||||
|
|
||||||
|
src += 32; // go to the right block
|
||||||
|
|
||||||
|
if (entries == 0x90) {
|
||||||
|
src += 32; // skip whole block
|
||||||
|
}
|
||||||
|
|
||||||
|
dst += 8;
|
||||||
|
entries -= 16;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
// do a fast test with MMX
|
// do a fast test with MMX
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
int storeebx;
|
int storeebx;
|
||||||
|
@ -978,6 +1029,7 @@ Return:
|
||||||
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
|
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
|
||||||
|
|
||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
#endif
|
||||||
return bRet;
|
return bRet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
#include "ZZGl.h"
|
#include "ZZGl.h"
|
||||||
#include "GS.h"
|
#include "GS.h"
|
||||||
|
@ -100,12 +101,6 @@ namespace ZeroGS
|
||||||
|
|
||||||
typedef void (*DrawFn)();
|
typedef void (*DrawFn)();
|
||||||
|
|
||||||
enum RenderFormatType
|
|
||||||
{
|
|
||||||
RFT_byte8 = 0, // A8R8G8B8
|
|
||||||
RFT_float16 = 1, // A32R32B32G32
|
|
||||||
};
|
|
||||||
|
|
||||||
// managers render-to-texture targets
|
// managers render-to-texture targets
|
||||||
|
|
||||||
class CRenderTarget
|
class CRenderTarget
|
||||||
|
@ -237,6 +232,8 @@ class CMemoryTarget
|
||||||
clearminy = r.clearminy;
|
clearminy = r.clearminy;
|
||||||
clearmaxy = r.clearmaxy;
|
clearmaxy = r.clearmaxy;
|
||||||
widthmult = r.widthmult;
|
widthmult = r.widthmult;
|
||||||
|
texH = r.texH;
|
||||||
|
texW = r.texW;
|
||||||
channels = r.channels;
|
channels = r.channels;
|
||||||
validatecount = r.validatecount;
|
validatecount = r.validatecount;
|
||||||
fmt = r.fmt;
|
fmt = r.fmt;
|
||||||
|
@ -267,13 +264,19 @@ class CMemoryTarget
|
||||||
|
|
||||||
int starty, height; // assert(starty >= realy)
|
int starty, height; // assert(starty >= realy)
|
||||||
int realy, realheight; // this is never touched once allocated
|
int realy, realheight; // this is never touched once allocated
|
||||||
|
// realy is start pointer of data in 4M data block (start) and size (end-start).
|
||||||
|
|
||||||
u32 usedstamp;
|
u32 usedstamp;
|
||||||
u8 psm, cpsm; // texture and clut format. For psm, only 16bit/32bit differentiation matters
|
u8 psm, cpsm; // texture and clut format. For psm, only 16bit/32bit differentiation matters
|
||||||
|
|
||||||
u32 fmt;
|
u32 fmt;
|
||||||
|
|
||||||
int widthmult;
|
int widthmult; // Either 1 or 2.
|
||||||
int channels;
|
int channels; // The number of pixels per PSM format word. channels == PIXELS_PER_WORD(psm)
|
||||||
|
// This is the real drawing size in pixels of the texture in renderbuffer.
|
||||||
|
int texW; // (realheight + widthmult - 1)/widthmult == realheight or [(realheight+1)/2]
|
||||||
|
int texH; // GPU_TEXWIDTH *widthmult * channels;
|
||||||
|
|
||||||
int clearminy, clearmaxy; // when maxy > 0, need to check for clearing
|
int clearminy, clearmaxy; // when maxy > 0, need to check for clearing
|
||||||
|
|
||||||
int validatecount; // count how many times has been validated, if too many, destroy
|
int validatecount; // count how many times has been validated, if too many, destroy
|
||||||
|
@ -415,7 +418,6 @@ extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
|
||||||
extern vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
extern vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
||||||
extern GLuint vboRect;
|
extern GLuint vboRect;
|
||||||
extern int g_nCurVBOIndex;
|
extern int g_nCurVBOIndex;
|
||||||
extern RenderFormatType g_RenderFormatType;
|
|
||||||
|
|
||||||
void AddMessage(const char* pstr, u32 ms = 5000);
|
void AddMessage(const char* pstr, u32 ms = 5000);
|
||||||
void DrawText(const char* pstr, int left, int top, u32 color);
|
void DrawText(const char* pstr, int left, int top, u32 color);
|
||||||
|
@ -479,8 +481,6 @@ bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will cha
|
||||||
|
|
||||||
// call to load CLUT data (depending on CLD)
|
// call to load CLUT data (depending on CLD)
|
||||||
void texClutWrite(int ctx);
|
void texClutWrite(int ctx);
|
||||||
RenderFormatType GetRenderFormat();
|
|
||||||
GLenum GetRenderTargetFormat();
|
|
||||||
|
|
||||||
int Save(s8* pbydata);
|
int Save(s8* pbydata);
|
||||||
bool Load(s8* pbydata);
|
bool Load(s8* pbydata);
|
||||||
|
@ -523,7 +523,25 @@ inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
|
||||||
tex0->cld = ZZOglGet_cld_TexBits(Data);
|
tex0->cld = ZZOglGet_cld_TexBits(Data);
|
||||||
|
|
||||||
ZeroGS::texClutWrite(ictx);
|
ZeroGS::texClutWrite(ictx);
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// The size in bytes of x strings (of texture).
|
||||||
|
inline int MemorySize(int x)
|
||||||
|
{
|
||||||
|
return 4 * GPU_TEXWIDTH * x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the address in memory of data block for string x.
|
||||||
|
inline u8* MemoryAddress(int x)
|
||||||
|
{
|
||||||
|
return g_pbyGSMemory + MemorySize(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <u32 mult>
|
||||||
|
inline u8* _MemoryAddress(int x)
|
||||||
|
{
|
||||||
|
return g_pbyGSMemory + mult * x;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue