mirror of https://github.com/PCSX2/pcsx2.git
zzogl-pg: Merge back GregMiscellaneous branch (3867)
* Various clean * Replace ASM by intrinsics (much more portable) * Various performance tuning (expect 10%-20% speedup ^_^ ) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3868 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c875caec15
commit
c7a929a530
|
@ -205,6 +205,8 @@ inline bool PSMT_HAS_SHARED_BITS (int fpsm, int tpsm) {
|
|||
return (SUM == 0x15 || SUM == 0x1D || SUM == 0x2C || SUM == 0x30);
|
||||
}
|
||||
|
||||
// If a clut is in 32-bit color, its size is 4 bytes, and 16-bit clut has a 2 byte size.
|
||||
inline int CLUT_PIXEL_SIZE(int cpsm) {return ((cpsm <= 1) ? 4 : 2); }
|
||||
|
||||
//----------------------- Data from registers -----------------------
|
||||
|
||||
|
@ -542,7 +544,9 @@ typedef struct
|
|||
|
||||
extern GSinternal gs;
|
||||
|
||||
static __forceinline u16 RGBA32to16(u32 c)
|
||||
// Note the function is used in a template parameter so it must be declared extern
|
||||
// Note2: In this case extern is not compatible with __forceinline so just inline it...
|
||||
extern inline u16 RGBA32to16(u32 c)
|
||||
{
|
||||
return (u16)((((c) & 0x000000f8) >> 3) |
|
||||
(((c) & 0x0000f800) >> 6) |
|
||||
|
@ -558,6 +562,7 @@ static __forceinline u32 RGBA16to32(u16 c)
|
|||
(((c) & 0x8000) ? 0xff000000 : 0);
|
||||
}
|
||||
|
||||
#if 0
|
||||
// converts float16 [0,1] to BYTE [0,255] (assumes value is in range, otherwise will take lower 8bits)
|
||||
// f is a u16
|
||||
static __forceinline u16 Float16ToBYTE(u16 f)
|
||||
|
@ -603,6 +608,7 @@ static __forceinline u16 Float16ToALPHA(u16 f)
|
|||
// used for Z values
|
||||
#define Float16ToARGB_Z(f) COLOR_ARGB((u32)Float16ToBYTE_2(f.w), Float16ToBYTE_2(f.x), Float16ToBYTE_2(f.y), Float16ToBYTE_2(f.z))
|
||||
#define Float16ToARGB16_Z(f) ((Float16ToBYTE_2(f.y)<<8)|Float16ToBYTE_2(f.z))
|
||||
#endif
|
||||
|
||||
|
||||
inline float Clamp(float fx, float fmin, float fmax)
|
||||
|
|
|
@ -38,6 +38,7 @@ using namespace std;
|
|||
#include "targets.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#include "ZZoglFlushHack.h"
|
||||
#include "ZZoglFlushHack.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable:4244)
|
||||
|
@ -68,7 +69,7 @@ extern const char* pbilinear[];
|
|||
// statistics
|
||||
u32 g_nGenVars = 0, g_nTexVars = 0, g_nAlphaVars = 0, g_nResolve = 0;
|
||||
|
||||
#define VER 2
|
||||
#define VER 3
|
||||
const unsigned char zgsversion = PS2E_GS_VERSION;
|
||||
unsigned char zgsrevision = 0; // revision and build gives plugin version
|
||||
unsigned char zgsbuild = VER;
|
||||
|
@ -143,6 +144,7 @@ void ReportHacks(gameHacks hacks)
|
|||
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
|
||||
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
|
||||
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
|
||||
if (hacks.no_stencil) ZZLog::WriteLn("'No stencil' hack enabled.");
|
||||
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
|
||||
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
|
||||
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
|
||||
|
@ -151,7 +153,7 @@ void ReportHacks(gameHacks hacks)
|
|||
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
|
||||
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
|
||||
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
|
||||
if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
|
||||
//if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
|
||||
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
|
||||
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
|
||||
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
|
||||
|
@ -382,6 +384,7 @@ void CALLBACK GSclose()
|
|||
|
||||
SaveStateFile = NULL;
|
||||
SaveStateExists = true; // default value
|
||||
g_LastCRC = 0;
|
||||
}
|
||||
|
||||
void CALLBACK GSirqCallback(void (*callback)())
|
||||
|
|
|
@ -87,9 +87,7 @@ template<int index> void _GSgifTransfer(const u32 *pMem, u32 size)
|
|||
path->setTag(pMem);
|
||||
pMem += 4;
|
||||
size--;
|
||||
|
||||
if ((conf.settings().path3) && (index == 2) && path->eop) nPath3Hack = 1;
|
||||
|
||||
|
||||
// eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and
|
||||
// values other than the EOP field are disregarded."
|
||||
if (path->nloop > 0)
|
||||
|
|
|
@ -78,7 +78,12 @@
|
|||
|
||||
static vector<u8> s_vTempBuffer, s_vTransferCache;
|
||||
static int gs_imageEnd = 0;
|
||||
|
||||
|
||||
// From the start of monster labs. In all 3 cases, psm == 0.
|
||||
// ZZogl-PG: GetRectMemAddress(0x3f4000, 0x404000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f40, 0x100);
|
||||
// ZZogl-PG: GetRectMemAddress(0x3f8000, 0x408000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3f80, 0x100);
|
||||
// ZZogl-PG: GetRectMemAddress(0x3fc000, 0x40c000, 0x0, 0x0, 0x0, 0x100, 0x40, 0x3fc0, 0x100);
|
||||
|
||||
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw)
|
||||
{
|
||||
FUNCLOG
|
||||
|
@ -108,7 +113,7 @@
|
|||
bits = PSMT_BITS_NUM(psm);
|
||||
start = getPixelFun[psm](x, y, bp, bw);
|
||||
end = getPixelFun[psm](x + w - 1, y + h - 1, bp, bw) + 1;
|
||||
|
||||
|
||||
if (bits > 0)
|
||||
{
|
||||
start *= bits;
|
||||
|
@ -158,7 +163,7 @@
|
|||
|
||||
if (end > MEMORY_END)
|
||||
{
|
||||
ZZLog::Warn_Log("Host local out of bounds!");
|
||||
ZZLog::Warn_Log("Init host local out of bounds! (end == 0x%x)", end);
|
||||
//gs.imageTransfer = -1;
|
||||
end = MEMORY_END;
|
||||
}
|
||||
|
@ -178,9 +183,8 @@
|
|||
int start, end;
|
||||
|
||||
GetRectMemAddress(start, end, gs.dstbuf.psm, gs.imageX, gs.imageY, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
|
||||
|
||||
|
||||
assert(start < gs_imageEnd);
|
||||
|
||||
end = gs_imageEnd;
|
||||
|
||||
// sometimes games can decompress to alpha channel of render target only, in this case
|
||||
|
@ -434,20 +438,20 @@ __forceinline void _TransferLocalLocal_4()
|
|||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
|
||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
||||
read = gsp((j+4)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+4)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||
|
||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||
read = gsp((j+5)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+5)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
|
||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
||||
read = gsp((j+6)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+6)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||
|
||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||
read = gsp((j+7)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+7)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -132,28 +132,35 @@ void CreateGameHackTable(GtkWidget *treeview, gameHacks hacks)
|
|||
mapConfOpts.clear();
|
||||
|
||||
add_map_entry(GAME_TEXTURETARGS, "00000001", "Tex Target checking - 00000001\nLego Racers");
|
||||
add_map_entry(GAME_AUTORESET, "00000002", "Auto reset targs - 00000002\nShadow Hearts, Samurai Warriors. Use when game is slow and toggling AA fixes it.");
|
||||
add_map_entry(GAME_NOTARGETRESOLVE, "00000010", "No target resolves - 00000010\nStops all resolving of targets. Try this first for really slow games. Dark Cloud 1");
|
||||
add_map_entry(GAME_EXACTCOLOR, "00000020", "Exact color testing - 00000020\nFixes overbright or shadow/black artifacts (Crash 'n Burn).");
|
||||
add_map_entry(GAME_NOCOLORCLAMP, "00000040", "No color clamping - 00000040\nSpeeds up games, but might be too bright or too dim.");
|
||||
add_map_entry(GAME_NOALPHAFAIL, "00000100", "Alpha Fail hack - 00000100\nFor Sonic Unleashed, Shadow the Hedgehog, Ghost in the Shell. Remove vertical stripes or other coloring artefacts. Break Persona 4 and MGS3");
|
||||
add_map_entry(GAME_AUTORESET, "00000002", "Auto reset targs - 00000002\nUse when game is slow and toggling AA fixes it. Samurai Warriors. (Automatically on for Shadow Hearts)");
|
||||
add_map_entry(GAME_INTERLACE2X, "00000004", "Interlace 2X - 00000004\nFixes 2x bigger screen. Gradius 3.");
|
||||
//GAME_TEXAHACK (still implemented)
|
||||
add_map_entry(GAME_NOTARGETRESOLVE, "00000010", "No target resolves - 00000010\nStops all resolving of targets. Try this first for really slow games. (Automatically on for Dark Cloud 1.)");
|
||||
add_map_entry(GAME_EXACTCOLOR, "00000020", "Exact color testing - 00000020\nFixes overbright or shadow/black artifacts. Crash 'n Burn.");
|
||||
//add_map_entry(GAME_NOCOLORCLAMP, "00000040", "No color clamping - 00000040\nSpeeds up games, but might be too bright or too dim.");
|
||||
//GAME_FFXHACK
|
||||
add_map_entry(GAME_NOALPHAFAIL, "00000100", "Alpha Fail hack - 00000100\nRemove vertical stripes or other coloring artifacts. Breaks Persona 4 and MGS3. (Automatically on for Sonic Unleashed, Shadow the Hedgehog, & Ghost in the Shell.)");
|
||||
add_map_entry(GAME_NODEPTHUPDATE, "00000200", "Disable depth updates - 00000200");
|
||||
add_map_entry(GAME_QUICKRESOLVE1, "00000400", "Resolve Hack #1 - 00000400\nKingdom Hearts. Speeds some games.");
|
||||
add_map_entry(GAME_NOQUICKRESOLVE, "00000800", "Resolve Hack #2 - 00000800\nShadow Hearts, Urbz. Destroy FFX");
|
||||
add_map_entry(GAME_QUICKRESOLVE1, "00000400", "Resolve Hack #1 - 00000400\n Speeds some games. Kingdom Hearts.");
|
||||
add_map_entry(GAME_NOQUICKRESOLVE, "00000800", "Resolve Hack #2 - 00000800\nShadow Hearts, Urbz. Destroys FFX.");
|
||||
add_map_entry(GAME_NOTARGETCLUT, "00001000", "No target CLUT - 00001000\nResident Evil 4, or foggy scenes.");
|
||||
add_map_entry(GAME_NOSTENCIL, "00002000", "Disable stencil buffer - 00002000\nUsually safe to do for simple scenes. Harvest Moon");
|
||||
add_map_entry(GAME_NOSTENCIL, "00002000", "Disable stencil buffer - 00002000\nUsually safe to do for simple scenes. Harvest Moon.");
|
||||
//GAME_VSSHACKOFF (still implemented)
|
||||
add_map_entry(GAME_NODEPTHRESOLVE, "00008000", "No depth resolve - 00008000\nMight give z buffer artifacts.");
|
||||
add_map_entry(GAME_FULL16BITRES, "00010000", "Full 16 bit resolution - 00010000\nUse when half the screen is missing.");
|
||||
add_map_entry(GAME_RESOLVEPROMOTED, "00020000", "Resolve Hack #3 - 00020000\nNeopets");
|
||||
add_map_entry(GAME_FASTUPDATE, "00040000", "Fast Update - 00040000\nOkami. Speeds some games. Needs for Sonic Unleashed");
|
||||
add_map_entry(GAME_FASTUPDATE, "00040000", "Fast Update - 00040000\n Speeds some games. Needed for Sonic Unleashed. Okami.");
|
||||
add_map_entry(GAME_NOALPHATEST, "00080000", "Disable alpha testing - 00080000");
|
||||
add_map_entry(GAME_DISABLEMRTDEPTH, "00100000", "Enable Multiple RTs - 00100000");
|
||||
add_map_entry(GAME_XENOSPECHACK, "01000000", "Specular Highlights - 01000000\nMakes Xenosaga and Okage graphics faster by removing highlights");
|
||||
add_map_entry(GAME_PARTIALPOINTERS, "02000000", "Partial targets - 02000000");
|
||||
//GAME_32BITTARGS
|
||||
//GAME_PATH3HACK
|
||||
//GAME_DOPARALLELCTX
|
||||
add_map_entry(GAME_XENOSPECHACK, "01000000", "Specular Highlights - 01000000\nMakes graphics faster by removing highlights. (Automatically on for Xenosaga, Okami, & Okage.)");
|
||||
//add_map_entry(GAME_PARTIALPOINTERS, "02000000", "Partial targets - 02000000");
|
||||
add_map_entry(GAME_PARTIALDEPTH, "04000000", "Partial depth - 04000000");
|
||||
add_map_entry(GAME_GUSTHACK, "10000000", "Gust fix, made gustgame more clean and fast - 10000000");
|
||||
add_map_entry(GAME_NOLOGZ, "20000000", "No logarithmic Z, could decrease number of Z-artefacts - 20000000");
|
||||
add_map_entry(GAME_INTERLACE2X, "00000004", "Interlace 2X - 00000004\nFixes 2x bigger screen (Gradius 3).");
|
||||
//GAME_REGETHACK (commented out in code)
|
||||
add_map_entry(GAME_GUSTHACK, "10000000", "Gust fix - 10000000. Makes gust games cleaner and faster. (Automatically on for most Gust games)");
|
||||
add_map_entry(GAME_NOLOGZ, "20000000", "No logarithmic Z - 20000000. Could decrease number of Z-artifacts.");
|
||||
add_map_entry(GAME_AUTOSKIPDRAW, "40000000", "Remove blur effect on some games\nSlow games.");
|
||||
|
||||
for (map<string, confOptsStruct>::iterator it = mapConfOpts.begin(); it != mapConfOpts.end(); ++it)
|
||||
|
@ -255,7 +262,7 @@ void DisplayDialog()
|
|||
GtkWidget *option_frame, *option_box;
|
||||
GtkWidget *log_check;
|
||||
GtkWidget *int_label, *int_box, *int_holder;
|
||||
GtkWidget *bilinear_check;
|
||||
GtkWidget *bilinear_label, *bilinear_box, *bilinear_holder;
|
||||
GtkWidget *aa_label, *aa_box, *aa_holder;
|
||||
GtkWidget *snap_label, *snap_box, *snap_holder;
|
||||
GtkWidget *fullscreen_label, *widescreen_check;
|
||||
|
@ -293,10 +300,18 @@ void DisplayDialog()
|
|||
gtk_box_pack_start(GTK_BOX(int_holder), int_label, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(int_holder), int_box, false, false, 2);
|
||||
|
||||
|
||||
bilinear_check = gtk_check_button_new_with_label("Bilinear Filtering");
|
||||
gtk_widget_set_tooltip_text(bilinear_check, "Best quality is off. Turn on for speed. Toggled by pressing Shift + F5 when running.");
|
||||
|
||||
bilinear_label = gtk_label_new("Bilinear Filtering:");
|
||||
bilinear_box = gtk_combo_box_new_text();
|
||||
|
||||
gtk_combo_box_append_text(GTK_COMBO_BOX(bilinear_box), "Off");
|
||||
gtk_combo_box_append_text(GTK_COMBO_BOX(bilinear_box), "Normal");
|
||||
gtk_combo_box_append_text(GTK_COMBO_BOX(bilinear_box), "Forced");
|
||||
gtk_combo_box_set_active(GTK_COMBO_BOX(bilinear_box), conf.bilinear);
|
||||
gtk_widget_set_tooltip_text(bilinear_box, "Best quality is off. Turn on for speed. Toggled by pressing Shift + F5 when running.");
|
||||
bilinear_holder = gtk_hbox_new(false, 5);
|
||||
gtk_box_pack_start(GTK_BOX(bilinear_holder), bilinear_label, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(bilinear_holder), bilinear_box, false, false, 2);
|
||||
|
||||
aa_label = gtk_label_new("Anti-Aliasing:");
|
||||
aa_box = gtk_combo_box_new_text();
|
||||
|
||||
|
@ -352,7 +367,7 @@ void DisplayDialog()
|
|||
gtk_frame_set_shadow_type(GTK_FRAME(option_frame), GTK_SHADOW_NONE);
|
||||
|
||||
gtk_box_pack_start(GTK_BOX(option_box), log_check, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(option_box), bilinear_check, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(option_box), bilinear_holder, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(option_box), int_holder, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(option_box), aa_holder, false, false, 2);
|
||||
gtk_box_pack_start(GTK_BOX(option_box), snap_holder, false, false, 2);
|
||||
|
@ -370,7 +385,6 @@ void DisplayDialog()
|
|||
gtk_box_pack_start(GTK_BOX(main_box), option_frame, false, false, 2);
|
||||
|
||||
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(log_check), conf.log);
|
||||
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bilinear_check), conf.bilinear);
|
||||
gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(widescreen_check), (conf.widescreen()));
|
||||
|
||||
gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), main_frame);
|
||||
|
@ -389,9 +403,11 @@ void DisplayDialog()
|
|||
|
||||
if (gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box)) != -1)
|
||||
conf.aa = gtk_combo_box_get_active(GTK_COMBO_BOX(aa_box));
|
||||
|
||||
if (gtk_combo_box_get_active(GTK_COMBO_BOX(bilinear_box)) != -1)
|
||||
conf.bilinear = gtk_combo_box_get_active(GTK_COMBO_BOX(bilinear_box));
|
||||
|
||||
conf.log = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(log_check));
|
||||
conf.bilinear = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(bilinear_check));
|
||||
fake_options.widescreen = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widescreen_check));
|
||||
fake_options.tga_snap = gtk_combo_box_get_active(GTK_COMBO_BOX(snap_box));
|
||||
|
||||
|
@ -445,7 +461,7 @@ void SysMessage(const char *fmt, ...)
|
|||
|
||||
void CALLBACK GSabout()
|
||||
{
|
||||
SysMessage("ZZOgl PG: by Zeydlitz (PG version worked on by arcum42). Based off of ZeroGS, by zerofrog.");
|
||||
SysMessage("ZZOgl PG: by Zeydlitz (PG version worked on by arcum42, gregory, and the pcsx2 development team). Based off of ZeroGS, by zerofrog.");
|
||||
}
|
||||
|
||||
s32 CALLBACK GStest()
|
||||
|
|
|
@ -152,6 +152,7 @@
|
|||
<Unit filename="../../ZZoglFlush.cpp" />
|
||||
<Unit filename="../../ZZoglFlushHack.cpp" />
|
||||
<Unit filename="../../ZZoglFlushHack.h" />
|
||||
<Unit filename="../../ZZoglMath.h" />
|
||||
<Unit filename="../../ZZoglSave.cpp" />
|
||||
<Unit filename="../../ZZoglShaders.cpp" />
|
||||
<Unit filename="../../ZZoglShaders.h" />
|
||||
|
@ -171,7 +172,6 @@
|
|||
<Unit filename="../../x86.h" />
|
||||
<Unit filename="../../zerogs.cpp" />
|
||||
<Unit filename="../../zerogs.h" />
|
||||
<Unit filename="../../zerogsmath.h" />
|
||||
<Unit filename="../../zpipe.cpp" />
|
||||
<Unit filename="../../zpipe.h" />
|
||||
<Extensions>
|
||||
|
|
|
@ -184,7 +184,7 @@ static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSiz
|
|||
tempY = gs.imageY;
|
||||
tempX = gs.imageX;
|
||||
Point alignedPt;
|
||||
|
||||
|
||||
nSize = (nQWordSize * 4 * 2) / tp2;
|
||||
nSize = min(nSize, gs.imageWnew * gs.imageHnew);
|
||||
|
||||
|
@ -241,237 +241,136 @@ void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
|||
void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
||||
void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) {FUNCLOG}
|
||||
|
||||
#define FILL_BLOCK(psm, psmcol) \
|
||||
{ \
|
||||
b.pageTable = &g_pageTable##psm[0][0]; \
|
||||
b.blockTable = &g_blockTable##psm[0][0]; \
|
||||
b.columnTable = &g_columnTable##psmcol[0][0]; \
|
||||
\
|
||||
assert( sizeof(g_pageTable##psm) == b.width * b.height * sizeof(g_pageTable##psm[0][0]) ); \
|
||||
\
|
||||
psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
||||
psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
||||
\
|
||||
for(i = 0; i < b.height; ++i) \
|
||||
{ \
|
||||
u32 i_width = i*BLOCK_TEXWIDTH; \
|
||||
for(j = 0; j < b.width; ++j) \
|
||||
{ \
|
||||
/* fill the table */ \
|
||||
u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * b.mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \
|
||||
b.pageTable[i * b.width + j] = u; \
|
||||
psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
||||
\
|
||||
for(i = 0; i < b.height; ++i) \
|
||||
{ \
|
||||
u32 i_width = i*BLOCK_TEXWIDTH; \
|
||||
u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH; \
|
||||
for(j = 0; j < b.width; ++j) \
|
||||
{ \
|
||||
u32 temp = ((j + 1) % b.width); \
|
||||
float4* pv = &psrcv[i_width + j]; \
|
||||
pv->x = psrcf[i_width + j]; \
|
||||
pv->y = psrcf[i_width + temp]; \
|
||||
pv->z = psrcf[i_width2 + j]; \
|
||||
pv->w = psrcf[i_width2 + temp]; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define FILL_BLOCK_NF(psm, psmcol) \
|
||||
{ \
|
||||
b.pageTable = &g_pageTable##psm[0][0]; \
|
||||
b.blockTable = &g_blockTable##psm[0][0]; \
|
||||
b.columnTable = &g_columnTable##psmcol[0][0]; \
|
||||
\
|
||||
assert( sizeof(g_pageTable##psm) == b.width * b.height * sizeof(g_pageTable##psm[0][0]) ); \
|
||||
\
|
||||
psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
||||
psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH; \
|
||||
\
|
||||
for(i = 0; i < b.height; ++i) \
|
||||
{ \
|
||||
u32 i_width = i*BLOCK_TEXWIDTH; \
|
||||
for(j = 0; j < b.width; ++j) \
|
||||
{ \
|
||||
/* fill the table */ \
|
||||
u32 u = g_blockTable##psm[(i / b.colheight)][(j / b.colwidth)] * 64 * b.mult + g_columnTable##psmcol[i%b.colheight][j%b.colwidth]; \
|
||||
b.pageTable[i * b.width + j] = u; \
|
||||
psrcw[i_width + j] = u; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
void FillBlocksNF(vector<char>& vBlockData, vector<char>& vBilinearData)
|
||||
void fill_block(BLOCK b, vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
||||
{
|
||||
FUNCLOG
|
||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2);
|
||||
float* psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
|
||||
u16* psrcw = NULL;
|
||||
if (!floatfmt)
|
||||
psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
|
||||
|
||||
int i, j;
|
||||
BLOCK b;
|
||||
float* psrcf = NULL;
|
||||
u16* psrcw = NULL;
|
||||
for(int i = 0; i < b.height; ++i)
|
||||
{
|
||||
u32 i_width = i*BLOCK_TEXWIDTH;
|
||||
for(int j = 0; j < b.width; ++j)
|
||||
{
|
||||
/* fill the table */
|
||||
u32 bt = b.blockTable[(i / b.colheight)*(b.width/b.colwidth) + (j / b.colwidth)];
|
||||
u32 ct = b.columnTable[(i%b.colheight)*b.colwidth + (j%b.colwidth)];
|
||||
u32 u = bt * 64 * b.mult + ct;
|
||||
b.pageTable[i * b.width + j] = u;
|
||||
if (floatfmt)
|
||||
psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult);
|
||||
else
|
||||
psrcw[i_width + j] = u;
|
||||
|
||||
memset(m_Blocks, 0, sizeof(m_Blocks));
|
||||
}
|
||||
}
|
||||
|
||||
// 32
|
||||
b.SetDim(64, 32, 0, 0, 1);
|
||||
FILL_BLOCK_NF(32, 32);
|
||||
m_Blocks[PSMCT32] = b;
|
||||
m_Blocks[PSMCT32].SetFun(PSMCT32);
|
||||
if (floatfmt) {
|
||||
float4* psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
|
||||
|
||||
// 24 (same as 32 except write/readPixel are different)
|
||||
m_Blocks[PSMCT24] = b;
|
||||
m_Blocks[PSMCT24].SetFun(PSMCT24);
|
||||
|
||||
// 8H (same as 32 except write/readPixel are different)
|
||||
m_Blocks[PSMT8H] = b;
|
||||
m_Blocks[PSMT8H].SetFun(PSMT8H);
|
||||
|
||||
m_Blocks[PSMT4HL] = b;
|
||||
m_Blocks[PSMT4HL].SetFun(PSMT4HL);
|
||||
|
||||
m_Blocks[PSMT4HH] = b;
|
||||
m_Blocks[PSMT4HH].SetFun(PSMT4HH);
|
||||
|
||||
// 32z
|
||||
b.SetDim(64, 32, 64, 0, 1);
|
||||
FILL_BLOCK_NF(32Z, 32);
|
||||
m_Blocks[PSMT32Z] = b;
|
||||
m_Blocks[PSMT32Z].SetFun(PSMT32Z);
|
||||
|
||||
// 24Z (same as 32Z except write/readPixel are different)
|
||||
m_Blocks[PSMT24Z] = b;
|
||||
m_Blocks[PSMT24Z].SetFun(PSMT24Z);
|
||||
|
||||
// 16
|
||||
b.SetDim(64, 64, 0, 32, 2);
|
||||
FILL_BLOCK_NF(16, 16);
|
||||
m_Blocks[PSMCT16] = b;
|
||||
m_Blocks[PSMCT16].SetFun(PSMCT16);
|
||||
|
||||
// 16s
|
||||
b.SetDim(64, 64, 64, 32, 2);
|
||||
FILL_BLOCK_NF(16S, 16);
|
||||
m_Blocks[PSMCT16S] = b;
|
||||
m_Blocks[PSMCT16S].SetFun(PSMCT16S);
|
||||
|
||||
// 16z
|
||||
b.SetDim(64, 64, 0, 96, 2);
|
||||
FILL_BLOCK_NF(16Z, 16);
|
||||
m_Blocks[PSMT16Z] = b;
|
||||
m_Blocks[PSMT16Z].SetFun(PSMT16Z);
|
||||
|
||||
// 16sz
|
||||
b.SetDim(64, 64, 64, 96, 2);
|
||||
FILL_BLOCK_NF(16SZ, 16);
|
||||
m_Blocks[PSMT16SZ] = b;
|
||||
m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
|
||||
|
||||
// 8
|
||||
b.SetDim(128, 64, 0, 160, 4);
|
||||
FILL_BLOCK_NF(8, 8);
|
||||
m_Blocks[PSMT8] = b;
|
||||
m_Blocks[PSMT8].SetFun(PSMT8);
|
||||
|
||||
// 4
|
||||
b.SetDim(128, 128, 0, 224, 8);
|
||||
FILL_BLOCK_NF(4, 4);
|
||||
m_Blocks[PSMT4] = b;
|
||||
m_Blocks[PSMT4].SetFun(PSMT4);
|
||||
}
|
||||
|
||||
|
||||
void FillBlocksF(vector<char>& vBlockData, vector<char>& vBilinearData)
|
||||
{
|
||||
FUNCLOG
|
||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4);
|
||||
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
|
||||
|
||||
int i, j;
|
||||
BLOCK b;
|
||||
float* psrcf = NULL;
|
||||
u16* psrcw = NULL;
|
||||
float4* psrcv = NULL;
|
||||
|
||||
memset(m_Blocks, 0, sizeof(m_Blocks));
|
||||
|
||||
// 32
|
||||
b.SetDim(64, 32, 0, 0, 1);
|
||||
FILL_BLOCK(32, 32);
|
||||
m_Blocks[PSMCT32] = b;
|
||||
m_Blocks[PSMCT32].SetFun(PSMCT32);
|
||||
|
||||
// 24 (same as 32 except write/readPixel are different)
|
||||
m_Blocks[PSMCT24] = b;
|
||||
m_Blocks[PSMCT24].SetFun(PSMCT24);
|
||||
|
||||
// 8H (same as 32 except write/readPixel are different)
|
||||
m_Blocks[PSMT8H] = b;
|
||||
m_Blocks[PSMT8H].SetFun(PSMT8H);
|
||||
|
||||
m_Blocks[PSMT4HL] = b;
|
||||
m_Blocks[PSMT4HL].SetFun(PSMT4HL);
|
||||
|
||||
m_Blocks[PSMT4HH] = b;
|
||||
m_Blocks[PSMT4HH].SetFun(PSMT4HH);
|
||||
|
||||
// 32z
|
||||
b.SetDim(64, 32, 64, 0, 1);
|
||||
FILL_BLOCK(32Z, 32);
|
||||
m_Blocks[PSMT32Z] = b;
|
||||
m_Blocks[PSMT32Z].SetFun(PSMT32Z);
|
||||
|
||||
// 24Z (same as 32Z except write/readPixel are different)
|
||||
m_Blocks[PSMT24Z] = b;
|
||||
m_Blocks[PSMT24Z].SetFun(PSMT24Z);
|
||||
|
||||
// 16
|
||||
b.SetDim(64, 64, 0, 32, 2);
|
||||
FILL_BLOCK(16, 16);
|
||||
m_Blocks[PSMCT16] = b;
|
||||
m_Blocks[PSMCT16].SetFun(PSMCT16);
|
||||
|
||||
// 16s
|
||||
b.SetDim(64, 64, 64, 32, 2);
|
||||
FILL_BLOCK(16S, 16);
|
||||
m_Blocks[PSMCT16S] = b;
|
||||
m_Blocks[PSMCT16S].SetFun(PSMCT16S);
|
||||
|
||||
// 16z
|
||||
b.SetDim(64, 64, 0, 96, 2);
|
||||
FILL_BLOCK(16Z, 16);
|
||||
m_Blocks[PSMT16Z] = b;
|
||||
m_Blocks[PSMT16Z].SetFun(PSMT16Z);
|
||||
|
||||
// 16sz
|
||||
b.SetDim(64, 64, 64, 96, 2);
|
||||
FILL_BLOCK(16SZ, 16);
|
||||
m_Blocks[PSMT16SZ] = b;
|
||||
m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
|
||||
|
||||
// 8
|
||||
b.SetDim(128, 64, 0, 160, 4);
|
||||
FILL_BLOCK(8, 8);
|
||||
m_Blocks[PSMT8] = b;
|
||||
m_Blocks[PSMT8].SetFun(PSMT8);
|
||||
|
||||
// 4
|
||||
b.SetDim(128, 128, 0, 224, 8);
|
||||
FILL_BLOCK(4, 4);
|
||||
m_Blocks[PSMT4] = b;
|
||||
m_Blocks[PSMT4].SetFun(PSMT4);
|
||||
for(int i = 0; i < b.height; ++i)
|
||||
{
|
||||
u32 i_width = i*BLOCK_TEXWIDTH;
|
||||
u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH;
|
||||
for(int j = 0; j < b.width; ++j)
|
||||
{
|
||||
u32 temp = ((j + 1) % b.width);
|
||||
float4* pv = &psrcv[i_width + j];
|
||||
pv->x = psrcf[i_width + j];
|
||||
pv->y = psrcf[i_width + temp];
|
||||
pv->z = psrcf[i_width2 + j];
|
||||
pv->w = psrcf[i_width2 + temp];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
|
||||
{
|
||||
FUNCLOG
|
||||
if (floatfmt)
|
||||
FillBlocksF(vBlockData, vBilinearData);
|
||||
else
|
||||
FillBlocksNF(vBlockData, vBilinearData);
|
||||
if (floatfmt) {
|
||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4);
|
||||
vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
|
||||
} else {
|
||||
vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2);
|
||||
}
|
||||
|
||||
BLOCK b;
|
||||
|
||||
memset(m_Blocks, 0, sizeof(m_Blocks));
|
||||
|
||||
// 32
|
||||
b.SetDim(64, 32, 0, 0, 1);
|
||||
b.SetTable(PSMCT32);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMCT32] = b;
|
||||
m_Blocks[PSMCT32].SetFun(PSMCT32);
|
||||
|
||||
// 24 (same as 32 except write/readPixel are different)
|
||||
m_Blocks[PSMCT24] = b;
|
||||
m_Blocks[PSMCT24].SetFun(PSMCT24);
|
||||
|
||||
// 8H (same as 32 except write/readPixel are different)
|
||||
m_Blocks[PSMT8H] = b;
|
||||
m_Blocks[PSMT8H].SetFun(PSMT8H);
|
||||
|
||||
m_Blocks[PSMT4HL] = b;
|
||||
m_Blocks[PSMT4HL].SetFun(PSMT4HL);
|
||||
|
||||
m_Blocks[PSMT4HH] = b;
|
||||
m_Blocks[PSMT4HH].SetFun(PSMT4HH);
|
||||
|
||||
// 32z
|
||||
b.SetDim(64, 32, 64, 0, 1);
|
||||
b.SetTable(PSMT32Z);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMT32Z] = b;
|
||||
m_Blocks[PSMT32Z].SetFun(PSMT32Z);
|
||||
|
||||
// 24Z (same as 32Z except write/readPixel are different)
|
||||
m_Blocks[PSMT24Z] = b;
|
||||
m_Blocks[PSMT24Z].SetFun(PSMT24Z);
|
||||
|
||||
// 16
|
||||
b.SetDim(64, 64, 0, 32, 2);
|
||||
b.SetTable(PSMCT16);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMCT16] = b;
|
||||
m_Blocks[PSMCT16].SetFun(PSMCT16);
|
||||
|
||||
// 16s
|
||||
b.SetDim(64, 64, 64, 32, 2);
|
||||
b.SetTable(PSMCT16S);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMCT16S] = b;
|
||||
m_Blocks[PSMCT16S].SetFun(PSMCT16S);
|
||||
|
||||
// 16z
|
||||
b.SetDim(64, 64, 0, 96, 2);
|
||||
b.SetTable(PSMT16Z);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMT16Z] = b;
|
||||
m_Blocks[PSMT16Z].SetFun(PSMT16Z);
|
||||
|
||||
// 16sz
|
||||
b.SetDim(64, 64, 64, 96, 2);
|
||||
b.SetTable(PSMT16SZ);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMT16SZ] = b;
|
||||
m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
|
||||
|
||||
// 8
|
||||
b.SetDim(128, 64, 0, 160, 4);
|
||||
b.SetTable(PSMT8);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMT8] = b;
|
||||
m_Blocks[PSMT8].SetFun(PSMT8);
|
||||
|
||||
// 4
|
||||
b.SetDim(128, 128, 0, 224, 8);
|
||||
b.SetTable(PSMT4);
|
||||
fill_block(b, vBlockData, vBilinearData, floatfmt);
|
||||
m_Blocks[PSMT4] = b;
|
||||
m_Blocks[PSMT4].SetFun(PSMT4);
|
||||
}
|
||||
|
|
|
@ -92,6 +92,29 @@ struct TransferFuncts
|
|||
extern TransferData tData[64];
|
||||
// rest not visible externally
|
||||
|
||||
extern u32 g_blockTable32[4][8];
|
||||
extern u32 g_blockTable32Z[4][8];
|
||||
extern u32 g_blockTable16[8][4];
|
||||
extern u32 g_blockTable16S[8][4];
|
||||
extern u32 g_blockTable16Z[8][4];
|
||||
extern u32 g_blockTable16SZ[8][4];
|
||||
extern u32 g_blockTable8[4][8];
|
||||
extern u32 g_blockTable4[8][4];
|
||||
|
||||
extern u32 g_columnTable32[8][8];
|
||||
extern u32 g_columnTable16[8][16];
|
||||
extern u32 g_columnTable8[16][16];
|
||||
extern u32 g_columnTable4[16][32];
|
||||
|
||||
extern u32 g_pageTable32[32][64];
|
||||
extern u32 g_pageTable32Z[32][64];
|
||||
extern u32 g_pageTable16[64][64];
|
||||
extern u32 g_pageTable16S[64][64];
|
||||
extern u32 g_pageTable16Z[64][64];
|
||||
extern u32 g_pageTable16SZ[64][64];
|
||||
extern u32 g_pageTable8[64][128];
|
||||
extern u32 g_pageTable4[128][128];
|
||||
|
||||
struct BLOCK
|
||||
{
|
||||
BLOCK() { memset(this, 0, sizeof(BLOCK)); }
|
||||
|
@ -142,47 +165,69 @@ struct BLOCK
|
|||
TransferHostLocal = TransferHostLocalFun[psm];
|
||||
TransferLocalHost = TransferLocalHostFun[psm];
|
||||
}
|
||||
|
||||
void SetTable(u32 psm)
|
||||
{
|
||||
switch (psm) {
|
||||
case PSMCT32:
|
||||
assert( sizeof(g_pageTable32) == width * height * sizeof(g_pageTable32[0][0]) );
|
||||
pageTable = &g_pageTable32[0][0];
|
||||
blockTable = &g_blockTable32[0][0];
|
||||
columnTable = &g_columnTable32[0][0];
|
||||
break;
|
||||
case PSMT32Z:
|
||||
assert( sizeof(g_pageTable32Z) == width * height * sizeof(g_pageTable32Z[0][0]) );
|
||||
pageTable = &g_pageTable32Z[0][0];
|
||||
blockTable = &g_blockTable32Z[0][0];
|
||||
columnTable = &g_columnTable32[0][0];
|
||||
break;
|
||||
case PSMCT16:
|
||||
assert( sizeof(g_pageTable16) == width * height * sizeof(g_pageTable16[0][0]) );
|
||||
pageTable = &g_pageTable16[0][0];
|
||||
blockTable = &g_blockTable16[0][0];
|
||||
columnTable = &g_columnTable16[0][0];
|
||||
break;
|
||||
case PSMCT16S:
|
||||
assert( sizeof(g_pageTable16S) == width * height * sizeof(g_pageTable16S[0][0]) );
|
||||
pageTable = &g_pageTable16S[0][0];
|
||||
blockTable = &g_blockTable16S[0][0];
|
||||
columnTable = &g_columnTable16[0][0];
|
||||
break;
|
||||
case PSMT16Z:
|
||||
assert( sizeof(g_pageTable16Z) == width * height * sizeof(g_pageTable16Z[0][0]) );
|
||||
pageTable = &g_pageTable16Z[0][0];
|
||||
blockTable = &g_blockTable16Z[0][0];
|
||||
columnTable = &g_columnTable16[0][0];
|
||||
break;
|
||||
case PSMT16SZ:
|
||||
assert( sizeof(g_pageTable16SZ) == width * height * sizeof(g_pageTable16SZ[0][0]) );
|
||||
pageTable = &g_pageTable16SZ[0][0];
|
||||
blockTable = &g_blockTable16SZ[0][0];
|
||||
columnTable = &g_columnTable16[0][0];
|
||||
break;
|
||||
case PSMT8:
|
||||
assert( sizeof(g_pageTable8) == width * height * sizeof(g_pageTable8[0][0]) );
|
||||
pageTable = &g_pageTable8[0][0];
|
||||
blockTable = &g_blockTable8[0][0];
|
||||
columnTable = &g_columnTable8[0][0];
|
||||
break;
|
||||
case PSMT4:
|
||||
assert( sizeof(g_pageTable4) == width * height * sizeof(g_pageTable4[0][0]) );
|
||||
pageTable = &g_pageTable4[0][0];
|
||||
blockTable = &g_blockTable4[0][0];
|
||||
columnTable = &g_columnTable4[0][0];
|
||||
break;
|
||||
default:
|
||||
pageTable = NULL;
|
||||
blockTable = NULL;
|
||||
columnTable = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
extern BLOCK m_Blocks[];
|
||||
|
||||
extern u32 g_blockTable32[4][8];
|
||||
extern u32 g_blockTable32Z[4][8];
|
||||
extern u32 g_blockTable16[8][4];
|
||||
extern u32 g_blockTable16S[8][4];
|
||||
extern u32 g_blockTable16Z[8][4];
|
||||
extern u32 g_blockTable16SZ[8][4];
|
||||
extern u32 g_blockTable8[4][8];
|
||||
extern u32 g_blockTable4[8][4];
|
||||
|
||||
extern u32 g_columnTable32[8][8];
|
||||
extern u32 g_columnTable16[8][16];
|
||||
extern u32 g_columnTable8[16][16];
|
||||
extern u32 g_columnTable4[16][32];
|
||||
|
||||
extern u32 g_pageTable32[32][64];
|
||||
extern u32 g_pageTable32Z[32][64];
|
||||
extern u32 g_pageTable16[64][64];
|
||||
extern u32 g_pageTable16S[64][64];
|
||||
extern u32 g_pageTable16Z[64][64];
|
||||
extern u32 g_pageTable16SZ[64][64];
|
||||
extern u32 g_pageTable8[64][128];
|
||||
extern u32 g_pageTable4[128][128];
|
||||
|
||||
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
#define getPixelAddress24 getPixelAddress32
|
||||
#define getPixelAddress24_0 getPixelAddress32_0
|
||||
#define getPixelAddress8H getPixelAddress32
|
||||
|
@ -191,6 +236,15 @@ static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw)
|
|||
#define getPixelAddress4HL_0 getPixelAddress32_0
|
||||
#define getPixelAddress4HH getPixelAddress32
|
||||
#define getPixelAddress4HH_0 getPixelAddress32_0
|
||||
#define getPixelAddress24Z getPixelAddress32Z
|
||||
#define getPixelAddress24Z_0 getPixelAddress32Z_0
|
||||
|
||||
static __forceinline u32 getPixelAddress32(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = bp * 64 + basepage * 2048 + g_pageTable32[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
|
@ -199,13 +253,6 @@ static __forceinline u32 getPixelAddress16(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
|
@ -213,13 +260,6 @@ static __forceinline u32 getPixelAddress16S(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16S[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
|
@ -227,13 +267,6 @@ static __forceinline u32 getPixelAddress8(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
u32 word = basepage * 8192 + g_pageTable8[y&63][x&127];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
|
@ -241,13 +274,6 @@ static __forceinline u32 getPixelAddress4(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 7) * ((bw + 127) >> 7)) + (x >> 7);
|
||||
u32 word = basepage * 16384 + g_pageTable4[y&127][x&127];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
|
@ -255,16 +281,6 @@ static __forceinline u32 getPixelAddress32Z(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 5) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 2048 + g_pageTable32Z[y&31][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
#define getPixelAddress24Z getPixelAddress32Z
|
||||
#define getPixelAddress24Z_0 getPixelAddress32Z_0
|
||||
|
||||
static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
|
@ -272,13 +288,6 @@ static __forceinline u32 getPixelAddress16Z(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16Z[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
|
@ -286,15 +295,7 @@ static __forceinline u32 getPixelAddress16SZ(int x, int y, u32 bp, u32 bw)
|
|||
return word;
|
||||
}
|
||||
|
||||
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
|
||||
{
|
||||
u32 basepage = ((y >> 6) * (bw >> 6)) + (x >> 6);
|
||||
u32 word = basepage * 4096 + g_pageTable16SZ[y&63][x&63];
|
||||
return word;
|
||||
}
|
||||
|
||||
//#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
|
||||
//#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
|
||||
///////////////
|
||||
|
||||
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
|
@ -375,7 +376,6 @@ static __forceinline void writePixel16SZ(void* pmem, int x, int y, u32 pixel, u3
|
|||
((u16*)pmem)[getPixelAddress16SZ(x, y, bp, bw)] = pixel;
|
||||
}
|
||||
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32(const void* pmem, int x, int y, u32 bp, u32 bw)
|
||||
|
@ -457,161 +457,48 @@ static __forceinline u32 readPixel16SZ(const void* pmem, int x, int y, u32 bp, u
|
|||
// Functions that take 0 bps //
|
||||
///////////////////////////////
|
||||
|
||||
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*) & ((u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16S_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u8*)pmem)[getPixelAddress8_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u8*)pmem)[4*getPixelAddress32_0(x, y, bw)+3] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)((u8*)pmem)[addr/2] = (pix & 0x0f) | (pixel << 4);
|
||||
else ((u8*)pmem)[addr/2] = (pix & 0xf0) | (pixel);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
|
||||
*p = (*p & 0xf0) | pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *p = (u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
|
||||
*p = (*p & 0x0f) | (pixel << 4);
|
||||
}
|
||||
|
||||
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
u8 *buf = (u8*)pmem + 4 * getPixelAddress32Z_0(x, y, bw);
|
||||
u8 *pix = (u8*) & pixel;
|
||||
buf[0] = pix[0];
|
||||
buf[1] = pix[1];
|
||||
buf[2] = pix[2];
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16Z_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw)
|
||||
{
|
||||
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
|
||||
}
|
||||
static __forceinline u32 getPixelAddress32_0(int x, int y, u32 bw) { return getPixelAddress32(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress16_0(int x, int y, u32 bw) { return getPixelAddress16(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress16S_0(int x, int y, u32 bw) { return getPixelAddress16S(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress8_0(int x, int y, u32 bw) { return getPixelAddress8(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress4_0(int x, int y, u32 bw) { return getPixelAddress4(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress32Z_0(int x, int y, u32 bw) { return getPixelAddress32Z(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress16Z_0(int x, int y, u32 bw) { return getPixelAddress16Z(x, y, 0, bw); }
|
||||
static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw) { return getPixelAddress16SZ(x, y, 0, bw); }
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32_0(x, y, bw)] & 0xffffff;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16S_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u8*)pmem)[getPixelAddress8_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u8*)pmem)[4*getPixelAddress32_0(x, y, bw) + 3];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
u32 addr = getPixelAddress4_0(x, y, bw);
|
||||
u8 pix = ((const u8*)pmem)[addr/2];
|
||||
|
||||
if (addr & 0x1)
|
||||
return pix >> 4;
|
||||
else
|
||||
return pix & 0xf;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HL_0(x, y, bw) + 3;
|
||||
return *p & 0x0f;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
const u8 *p = (const u8*)pmem + 4 * getPixelAddress4HH_0(x, y, bw) + 3;
|
||||
return *p >> 4;
|
||||
}
|
||||
static __forceinline void writePixel32_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel32(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel24_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel24(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel16_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel16S_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16S(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel8_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel8(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel8H_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel8H(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel4_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel4(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel4HL_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel4HL(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel4HH_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel4HH(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel32Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel32Z(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel24Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel24Z(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel16Z_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16Z(pmem, x, y, pixel, 0, bw); }
|
||||
static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel, u32 bw) { writePixel16SZ(pmem, x, y, pixel, 0, bw); }
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)];
|
||||
}
|
||||
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) { return readPixel32(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel24_0(const void* pmem, int x, int y, u32 bw) { return readPixel24(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel16_0(const void* pmem, int x, int y, u32 bw) { return readPixel16(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel16S_0(const void* pmem, int x, int y, u32 bw) { return readPixel16S(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel8_0(const void* pmem, int x, int y, u32 bw) { return readPixel8(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel8H_0(const void* pmem, int x, int y, u32 bw) { return readPixel8H(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel4_0(const void* pmem, int x, int y, u32 bw) { return readPixel4(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel4HL_0(const void* pmem, int x, int y, u32 bw) { return readPixel4HL(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel4HH_0(const void* pmem, int x, int y, u32 bw) { return readPixel4HH(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel32Z_0(const void* pmem, int x, int y, u32 bw) { return readPixel32Z(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw) { return readPixel24Z(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw) { return readPixel16Z(pmem, x, y, 0, bw); }
|
||||
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw) { return readPixel16SZ(pmem, x, y, 0, bw); }
|
||||
|
||||
static __forceinline u32 readPixel24Z_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u32*)pmem)[getPixelAddress32Z_0(x, y, bw)] & 0xffffff;
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16Z_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16Z_0(x, y, bw)];
|
||||
}
|
||||
|
||||
static __forceinline u32 readPixel16SZ_0(const void* pmem, int x, int y, u32 bw)
|
||||
{
|
||||
return ((const u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)];
|
||||
}
|
||||
///////////////
|
||||
|
||||
extern int TransferHostLocal32(const void* pbyMem, u32 nQWordSize);
|
||||
extern int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize);
|
||||
|
|
|
@ -120,9 +120,9 @@ u32 g_columnTable32[8][8] =
|
|||
u32 g_columnTable16[8][16] =
|
||||
{
|
||||
{ 0, 2, 8, 10, 16, 18, 24, 26,
|
||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||
1, 3, 9, 11, 17, 19, 25, 27 },
|
||||
{ 4, 6, 12, 14, 20, 22, 28, 30,
|
||||
5, 7, 13, 15, 21, 23, 29, 31 },
|
||||
5, 7, 13, 15, 21, 23, 29, 31 },
|
||||
{ 32, 34, 40, 42, 48, 50, 56, 58,
|
||||
33, 35, 41, 43, 49, 51, 57, 59 },
|
||||
{ 36, 38, 44, 46, 52, 54, 60, 62,
|
||||
|
@ -139,15 +139,15 @@ u32 g_columnTable16[8][16] =
|
|||
|
||||
u32 g_columnTable8[16][16] =
|
||||
{
|
||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||
{ 0, 4, 16, 20, 32, 36, 48, 52, // column 0
|
||||
2, 6, 18, 22, 34, 38, 50, 54 },
|
||||
{ 8, 12, 24, 28, 40, 44, 56, 60,
|
||||
10, 14, 26, 30, 42, 46, 58, 62 },
|
||||
10, 14, 26, 30, 42, 46, 58, 62 },
|
||||
{ 33, 37, 49, 53, 1, 5, 17, 21,
|
||||
35, 39, 51, 55, 3, 7, 19, 23 },
|
||||
{ 41, 45, 57, 61, 9, 13, 25, 29,
|
||||
43, 47, 59, 63, 11, 15, 27, 31 },
|
||||
{ 96, 100, 112, 116, 64, 68, 80, 84, // column 1
|
||||
{ 96, 100, 112, 116, 64, 68, 80, 84, // column 1
|
||||
98, 102, 114, 118, 66, 70, 82, 86 },
|
||||
{ 104, 108, 120, 124, 72, 76, 88, 92,
|
||||
106, 110, 122, 126, 74, 78, 90, 94 },
|
||||
|
@ -155,7 +155,7 @@ u32 g_columnTable8[16][16] =
|
|||
67, 71, 83, 87, 99, 103, 115, 119 },
|
||||
{ 73, 77, 89, 93, 105, 109, 121, 125,
|
||||
75, 79, 91, 95, 107, 111, 123, 127 },
|
||||
{ 128, 132, 144, 148, 160, 164, 176, 180, // column 2
|
||||
{ 128, 132, 144, 148, 160, 164, 176, 180, // column 2
|
||||
130, 134, 146, 150, 162, 166, 178, 182 },
|
||||
{ 136, 140, 152, 156, 168, 172, 184, 188,
|
||||
138, 142, 154, 158, 170, 174, 186, 190 },
|
||||
|
@ -163,7 +163,7 @@ u32 g_columnTable8[16][16] =
|
|||
163, 167, 179, 183, 131, 135, 147, 151 },
|
||||
{ 169, 173, 185, 189, 137, 141, 153, 157,
|
||||
171, 175, 187, 191, 139, 143, 155, 159 },
|
||||
{ 224, 228, 240, 244, 192, 196, 208, 212, // column 3
|
||||
{ 224, 228, 240, 244, 192, 196, 208, 212, // column 3
|
||||
226, 230, 242, 246, 194, 198, 210, 214 },
|
||||
{ 232, 236, 248, 252, 200, 204, 216, 220,
|
||||
234, 238, 250, 254, 202, 206, 218, 222 },
|
||||
|
@ -175,10 +175,10 @@ u32 g_columnTable8[16][16] =
|
|||
|
||||
u32 g_columnTable4[16][32] =
|
||||
{
|
||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||
2, 10, 34, 42, 66, 74, 98, 106,
|
||||
4, 12, 36, 44, 68, 76, 100, 108,
|
||||
6, 14, 38, 46, 70, 78, 102, 110 },
|
||||
{ 0, 8, 32, 40, 64, 72, 96, 104, // column 0
|
||||
2, 10, 34, 42, 66, 74, 98, 106,
|
||||
4, 12, 36, 44, 68, 76, 100, 108,
|
||||
6, 14, 38, 46, 70, 78, 102, 110 },
|
||||
{ 16, 24, 48, 56, 80, 88, 112, 120,
|
||||
18, 26, 50, 58, 82, 90, 114, 122,
|
||||
20, 28, 52, 60, 84, 92, 116, 124,
|
||||
|
@ -191,7 +191,7 @@ u32 g_columnTable4[16][32] =
|
|||
83, 91, 115, 123, 19, 27, 51, 59,
|
||||
85, 93, 117, 125, 21, 29, 53, 61,
|
||||
87, 95, 119, 127, 23, 31, 55, 63 },
|
||||
{ 192, 200, 224, 232, 128, 136, 160, 168, // column 1
|
||||
{ 192, 200, 224, 232, 128, 136, 160, 168, // column 1
|
||||
194, 202, 226, 234, 130, 138, 162, 170,
|
||||
196, 204, 228, 236, 132, 140, 164, 172,
|
||||
198, 206, 230, 238, 134, 142, 166, 174 },
|
||||
|
@ -207,7 +207,7 @@ u32 g_columnTable4[16][32] =
|
|||
147, 155, 179, 187, 211, 219, 243, 251,
|
||||
149, 157, 181, 189, 213, 221, 245, 253,
|
||||
151, 159, 183, 191, 215, 223, 247, 255 },
|
||||
{ 256, 264, 288, 296, 320, 328, 352, 360, // column 2
|
||||
{ 256, 264, 288, 296, 320, 328, 352, 360, // column 2
|
||||
258, 266, 290, 298, 322, 330, 354, 362,
|
||||
260, 268, 292, 300, 324, 332, 356, 364,
|
||||
262, 270, 294, 302, 326, 334, 358, 366 },
|
||||
|
@ -223,7 +223,7 @@ u32 g_columnTable4[16][32] =
|
|||
339, 347, 371, 379, 275, 283, 307, 315,
|
||||
341, 349, 373, 381, 277, 285, 309, 317,
|
||||
343, 351, 375, 383, 279, 287, 311, 319 },
|
||||
{ 448, 456, 480, 488, 384, 392, 416, 424, // column 3
|
||||
{ 448, 456, 480, 488, 384, 392, 416, 424, // column 3
|
||||
450, 458, 482, 490, 386, 394, 418, 426,
|
||||
452, 460, 484, 492, 388, 396, 420, 428,
|
||||
454, 462, 486, 494, 390, 398, 422, 430 },
|
||||
|
|
|
@ -638,7 +638,7 @@ void __gifCall GIFRegHandlerSCISSOR(const u32* data)
|
|||
Flush();
|
||||
}
|
||||
|
||||
m_env.CTXT[i].SCISSOR = (Vector4i)r->SCISSOR;
|
||||
m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR;
|
||||
|
||||
m_env.CTXT[i].UpdateScissor();*/
|
||||
ZZLog::Greg_Log("SCISSOR%d", i);
|
||||
|
|
|
@ -56,6 +56,7 @@ extern "C" char* CALLBACK PS2EgetLibName(void);
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
|
||||
extern std::string s_strIniPath; // Air's new (r2361) new constant for ini file path
|
||||
|
||||
|
@ -87,6 +88,9 @@ static __forceinline void pcsx2_aligned_free(void* pmem)
|
|||
#define _aligned_malloc pcsx2_aligned_malloc
|
||||
#define _aligned_free pcsx2_aligned_free
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __LINUX__
|
||||
#include <sys/timeb.h> // ftime(), struct timeb
|
||||
|
||||
inline unsigned long timeGetTime()
|
||||
|
@ -97,6 +101,15 @@ inline unsigned long timeGetTime()
|
|||
return (unsigned long)(t.time*1000 + t.millitm);
|
||||
}
|
||||
|
||||
#include <time.h>
|
||||
inline unsigned long timeGetPreciseTime()
|
||||
{
|
||||
timespec t;
|
||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &t);
|
||||
|
||||
return t.tv_nsec;
|
||||
}
|
||||
|
||||
struct RECT
|
||||
{
|
||||
int left, top;
|
||||
|
@ -138,6 +151,7 @@ enum GSWindowDim
|
|||
GSDim_1024,
|
||||
GSDim_1280,
|
||||
};
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
|
@ -217,7 +231,7 @@ typedef struct
|
|||
gameHacks settings()
|
||||
{
|
||||
gameHacks tempHack;
|
||||
tempHack._u32 = (hacks._u32 | def_hacks._u32 | GAME_PATH3HACK);
|
||||
tempHack._u32 = (hacks._u32 | def_hacks._u32);
|
||||
return tempHack;
|
||||
}
|
||||
bool fullscreen() { return !!(zz_options.fullscreen); }
|
||||
|
|
|
@ -25,6 +25,8 @@ void SaveConfig()
|
|||
WritePrivateProfileString("Settings", "Width", szValue, iniFile.c_str());
|
||||
sprintf(szValue, "%u", conf.height);
|
||||
WritePrivateProfileString("Settings", "Height", szValue, iniFile.c_str());
|
||||
sprintf(szValue, "%u", conf.SkipDraw);
|
||||
WritePrivateProfileString("Settings", "SkipDraw", szValue, iniFile.c_str());
|
||||
}
|
||||
|
||||
void LoadConfig()
|
||||
|
@ -40,6 +42,7 @@ void LoadConfig()
|
|||
conf.bilinear = 1;
|
||||
conf.width = 640;
|
||||
conf.height = 480;
|
||||
conf.SkipDraw = 0;
|
||||
|
||||
FILE *fp = fopen(iniFile.c_str(), "rt");
|
||||
|
||||
|
@ -67,6 +70,8 @@ void LoadConfig()
|
|||
conf.width = strtoul(szValue, NULL, 10);
|
||||
GetPrivateProfileString("Settings", "Height", NULL, szValue, 20, iniFile.c_str());
|
||||
conf.height = strtoul(szValue, NULL, 10);
|
||||
GetPrivateProfileString("Settings", "SkipDraw", NULL, szValue, 20, iniFile.c_str());
|
||||
conf.SkipDraw = strtoul(szValue, NULL, 10);
|
||||
|
||||
if (conf.aa < 0 || conf.aa > 4) conf.aa = 0;
|
||||
|
||||
|
|
|
@ -116,7 +116,7 @@ typedef struct GameHackStruct
|
|||
u32 HackMask;
|
||||
} GameHack;
|
||||
|
||||
#define HACK_NUMBER 30
|
||||
#define HACK_NUMBER 25
|
||||
|
||||
GameHack HackinshTable[HACK_NUMBER] =
|
||||
{
|
||||
|
@ -127,30 +127,31 @@ GameHack HackinshTable[HACK_NUMBER] =
|
|||
{"*** 4 TexA hack", GAME_TEXAHACK},
|
||||
{"*** 5 No Target Resolve", GAME_NOTARGETRESOLVE},
|
||||
{"*** 6 Exact color", GAME_EXACTCOLOR},
|
||||
{"*** 7 No color clamp", GAME_NOCOLORCLAMP},
|
||||
{"*** 8 FFX hack", GAME_FFXHACK},
|
||||
{"*** 9 No Alpha Fail", GAME_NOALPHAFAIL},
|
||||
{"***10 No Depth Update", GAME_NODEPTHUPDATE},
|
||||
{"***11 Quick Resolve 1", GAME_QUICKRESOLVE1},
|
||||
{"***12 No quick resolve", GAME_NOQUICKRESOLVE},
|
||||
{"***13 Notaget clut", GAME_NOTARGETCLUT},
|
||||
{"***14 No Stencil", GAME_NOSTENCIL},
|
||||
{"***15 No Depth resolve", GAME_NODEPTHRESOLVE},
|
||||
{"***16 Full 16 bit", GAME_FULL16BITRES},
|
||||
{"***17 Resolve promoted", GAME_RESOLVEPROMOTED},
|
||||
{"***18 Fast Update", GAME_FASTUPDATE},
|
||||
{"***19 No Alpha Test", GAME_NOALPHATEST},
|
||||
{"***20 Disable MRT deprh", GAME_DISABLEMRTDEPTH},
|
||||
{"***21 32 bit targes", GAME_32BITTARGS},
|
||||
{"***22 path 3 hack", GAME_PATH3HACK},
|
||||
{"***23 parallelise calls", GAME_DOPARALLELCTX},
|
||||
{"***24 specular highligths", GAME_XENOSPECHACK},
|
||||
{"***25 partial pointers", GAME_PARTIALPOINTERS},
|
||||
{"***26 partial depth", GAME_PARTIALDEPTH},
|
||||
{"***27 reget hack", GAME_REGETHACK},
|
||||
//{"***xx No color clamp", GAME_NOCOLORCLAMP},
|
||||
//{"***xx FFX hack", GAME_FFXHACK},
|
||||
{"*** 7 No Alpha Fail", GAME_NOALPHAFAIL},
|
||||
{"*** 8 No Depth Update", GAME_NODEPTHUPDATE},
|
||||
{"*** 9 Quick Resolve 1", GAME_QUICKRESOLVE1},
|
||||
{"***10 No quick resolve", GAME_NOQUICKRESOLVE},
|
||||
{"***11 Notaget clut", GAME_NOTARGETCLUT},
|
||||
{"***12 No Stencil", GAME_NOSTENCIL},
|
||||
{"***13 No Depth resolve", GAME_NODEPTHRESOLVE},
|
||||
{"***14 Full 16 bit", GAME_FULL16BITRES},
|
||||
{"***15 Resolve promoted", GAME_RESOLVEPROMOTED},
|
||||
{"***16 Fast Update", GAME_FASTUPDATE},
|
||||
{"***17 No Alpha Test", GAME_NOALPHATEST},
|
||||
{"***18 Disable MRT depth", GAME_DISABLEMRTDEPTH},
|
||||
//{"***xx 32 bit targs", GAME_32BITTARGS},
|
||||
//{"***xx Path 3 hack", GAME_PATH3HACK},
|
||||
//{"***xx Parallel calls", GAME_DOPARALLELCTX},
|
||||
{"***19 Specular highlights", GAME_XENOSPECHACK},
|
||||
//{"***xx Partial pointers", GAME_PARTIALPOINTERS},
|
||||
{"***20 Partial depth", GAME_PARTIALDEPTH},
|
||||
{"***21 Reget hack", GAME_REGETHACK},
|
||||
|
||||
{"***28 gust hack", GAME_GUSTHACK},
|
||||
{"***29 log-Z", GAME_NOLOGZ}
|
||||
{"***22 Gust hack", GAME_GUSTHACK},
|
||||
{"***23 Log-Z", GAME_NOLOGZ},
|
||||
{"***24 Auto skipdraw", GAME_AUTOSKIPDRAW}
|
||||
};
|
||||
|
||||
int CurrentHackSetting = 0;
|
||||
|
@ -172,7 +173,7 @@ void ProcessHackSetting(bool reverse)
|
|||
{
|
||||
CurrentHackSetting++;
|
||||
|
||||
if (CurrentHackSetting == HACK_NUMBER) CurrentHackSetting = 0;
|
||||
if (CurrentHackSetting >= HACK_NUMBER) CurrentHackSetting = 0;
|
||||
}
|
||||
|
||||
conf.hacks._u32 |= HackinshTable[CurrentHackSetting].HackMask;
|
||||
|
|
|
@ -244,6 +244,27 @@ void Warn_Log(const char *fmt, ...)
|
|||
#endif
|
||||
}
|
||||
|
||||
void Dev_Log(const char *fmt, ...)
|
||||
{
|
||||
#ifdef ZEROGS_DEVBUILD
|
||||
va_list list;
|
||||
|
||||
va_start(list, fmt);
|
||||
|
||||
if (IsLogging())
|
||||
{
|
||||
vfprintf(gsLog, fmt, list);
|
||||
fprintf(gsLog, "\n");
|
||||
}
|
||||
|
||||
fprintf(stderr, "ZZogl-PG: ");
|
||||
vfprintf(stderr, fmt, list);
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
va_end(list);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Debug_Log(const char *fmt, ...)
|
||||
{
|
||||
#if _DEBUG
|
||||
|
|
|
@ -185,6 +185,7 @@ extern void Prim_Log(const char *fmt, ...);
|
|||
extern void GS_Log(const char *fmt, ...);
|
||||
|
||||
extern void Debug_Log(const char *fmt, ...);
|
||||
extern void Dev_Log(const char *fmt, ...);
|
||||
extern void Warn_Log(const char *fmt, ...);
|
||||
extern void Error_Log(const char *fmt, ...);
|
||||
};
|
||||
|
|
|
@ -54,6 +54,7 @@ void ZeroGS::AdjustTransToAspect(float4& v)
|
|||
{
|
||||
double temp;
|
||||
float f;
|
||||
const float mult = 1 / 32767.0f;
|
||||
|
||||
if (conf.width * nBackbufferHeight > conf.height * nBackbufferWidth) // limited by width
|
||||
{
|
||||
|
@ -74,7 +75,7 @@ void ZeroGS::AdjustTransToAspect(float4& v)
|
|||
v.z *= f;
|
||||
}
|
||||
|
||||
v *= 1 / 32767.0f;
|
||||
v *= mult;
|
||||
}
|
||||
|
||||
inline bool FrameSkippingHelper()
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef ZZOGLCRTC_H_INCLUDED
|
||||
#define ZZOGLCRTC_H_INCLUDED
|
||||
#ifndef ZZOGLCRTC_H_INCLUDED
|
||||
#define ZZOGLCRTC_H_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "zerogs.h"
|
||||
#include "targets.h"
|
||||
#include "targets.h"
|
||||
|
||||
#define INTERLACE_COUNT (bInterlace && interlace == (conf.interlace))
|
||||
|
||||
|
@ -89,12 +89,12 @@ inline u32 CreateInterlaceTex(int width)
|
|||
|
||||
glGenTextures(1, &s_ptexInterlace);
|
||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, s_ptexInterlace);
|
||||
TextureRect(4, width, 1, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
|
||||
TextureRect(GL_RGBA, width, 1, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
|
||||
setRectFilters(GL_NEAREST);
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
return s_ptexInterlace;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ZZOGLCRTC_H_INCLUDED
|
||||
}
|
||||
|
||||
#endif // ZZOGLCRTC_H_INCLUDED
|
||||
|
|
|
@ -75,8 +75,6 @@ map<string, GLbyte> mapGLExtensions;
|
|||
|
||||
namespace ZeroGS
|
||||
{
|
||||
RenderFormatType g_RenderFormatType = RFT_float16;
|
||||
|
||||
extern void KickPoint();
|
||||
extern void KickLine();
|
||||
extern void KickTriangle();
|
||||
|
@ -84,8 +82,8 @@ extern void KickTriangleFan();
|
|||
extern void KickSprite();
|
||||
extern void KickDummy();
|
||||
extern bool LoadEffects();
|
||||
extern bool LoadExtraEffects();
|
||||
extern FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
|
||||
extern bool ZZshLoadExtraEffects();
|
||||
extern FRAGMENTSHADER* ZZshLoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
|
||||
|
||||
GLuint vboRect = 0;
|
||||
vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
||||
|
@ -270,19 +268,6 @@ inline void ZeroGS::CreateOtherCheck()
|
|||
if (Max_Texture_Size_NV < 1024)
|
||||
ZZLog::Error_Log("Could not properly make bitmasks, so some textures will be missed.");
|
||||
|
||||
/* Zeydlitz: we don't support 128-bit targets yet. they are slow and weirdo
|
||||
if( conf.settings() & GAME_32BITTARGS ) {
|
||||
g_RenderFormatType = RFT_byte8;
|
||||
ZZLog::Error_Log("Setting 32 bit render target.");
|
||||
}
|
||||
else {
|
||||
if( !IsGLExt("GL_NV_float_buffer") && !IsGLExt("GL_ARB_color_buffer_float") && !IsGLExt("ATI_pixel_format_float") ) {
|
||||
ZZLog::Error_Log("******\nZZogl: GS WARNING: Floating point render targets not supported, switching to 32bit\nZZogl: *********");
|
||||
g_RenderFormatType = RFT_byte8;
|
||||
}
|
||||
}*/
|
||||
g_RenderFormatType = RFT_byte8;
|
||||
|
||||
#ifdef _WIN32
|
||||
if (IsGLExt("WGL_EXT_swap_control") || IsGLExt("EXT_swap_control"))
|
||||
wglSwapIntervalEXT(0);
|
||||
|
@ -469,8 +454,6 @@ bool ZeroGS::Create(int _width, int _height)
|
|||
Destroy(1);
|
||||
GSStateReset();
|
||||
|
||||
g_RenderFormatType = RFT_float16;
|
||||
|
||||
if (!Create_Window(_width, _height)) return false;
|
||||
if (!CreateFillExtensionsMap()) return false;
|
||||
if (!CreateImportantCheck()) return false;
|
||||
|
@ -574,7 +557,7 @@ bool ZeroGS::Create(int _width, int _height)
|
|||
PBITMAPINFO pinfo = (PBITMAPINFO)LockResource(hBitmapGlob);
|
||||
|
||||
GLenum tempFmt = (pinfo->bmiHeader.biBitCount == 32) ? GL_RGBA : GL_RGB;
|
||||
TextureRect(4, pinfo->bmiHeader.biWidth, pinfo->bmiHeader.biHeight, tempFmt, GL_UNSIGNED_BYTE, (u8*)pinfo + pinfo->bmiHeader.biSize);
|
||||
TextureRect(GL_RGBA, pinfo->bmiHeader.biWidth, pinfo->bmiHeader.biHeight, tempFmt, GL_UNSIGNED_BYTE, (u8*)pinfo + pinfo->bmiHeader.biSize);
|
||||
|
||||
nLogoWidth = pinfo->bmiHeader.biWidth;
|
||||
nLogoHeight = pinfo->bmiHeader.biHeight;
|
||||
|
|
|
@ -207,8 +207,6 @@ int icurctx = -1;
|
|||
extern CRangeManager s_RangeMngr; // manages overwritten memory // zz
|
||||
void FlushTransferRanges(const tex0Info* ptex); //zz
|
||||
|
||||
RenderFormatType GetRenderFormat() { return g_RenderFormatType; } //zz
|
||||
|
||||
// use to update the state
|
||||
void SetTexVariables(int context, FRAGMENTSHADER* pfragment); // zz
|
||||
void SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint); // zz
|
||||
|
@ -859,7 +857,7 @@ inline float4 FlushSetPageOffset(FRAGMENTSHADER* pfragment, int shadertype, CRen
|
|||
// zoe2
|
||||
if (PSMT_ISZTEX(ptextarg->psm)) vpageoffset.w = -1.0f;
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
|
||||
ZZshSetParameter4fv(pfragment->fPageOffset, vpageoffset, "g_fPageOffset");
|
||||
|
||||
return vpageoffset;
|
||||
}
|
||||
|
@ -877,7 +875,7 @@ inline float4 FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
|
|||
v.y = 16.0f / (float)curvb.tex0.th;
|
||||
v.z = 0.5f * v.x;
|
||||
v.w = 0.5f * v.y;
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
|
||||
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
|
||||
}
|
||||
else if (shadertype == 4)
|
||||
{
|
||||
|
@ -886,7 +884,7 @@ inline float4 FlushSetTexOffset(FRAGMENTSHADER* pfragment, int shadertype, VB& c
|
|||
v.y = 16.0f / (float)ptextarg->fbh;
|
||||
v.z = -1;
|
||||
v.w = 8.0f / (float)ptextarg->fbh;
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
|
||||
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
|
||||
}
|
||||
|
||||
return v;
|
||||
|
@ -920,7 +918,7 @@ inline float4 FlushTextureDims(FRAGMENTSHADER* pfragment, int shadertype, VB& cu
|
|||
if (shadertype == 4)
|
||||
vTexDims.z += 8.0f;
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexDims, vTexDims, "g_fTexDims");
|
||||
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
|
||||
|
||||
return vTexDims;
|
||||
}
|
||||
|
@ -970,7 +968,7 @@ inline FRAGMENTSHADER* FlushUseExistRenderTarget(VB& curvb, CRenderTarget* ptext
|
|||
float4 vTexDims = FlushTextureDims(pfragment, shadertype, curvb, ptextarg);
|
||||
|
||||
if (pfragment->sCLUT != NULL && ptexclut != 0)
|
||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sCLUT, ptexclut, "CLUT");
|
||||
ZZshGLSetTextureParameter(pfragment->sCLUT, ptexclut, "CLUT");
|
||||
|
||||
FlushApplyResizeFilter(curvb, dwFilterOpts, ptextarg, context);
|
||||
|
||||
|
@ -1016,13 +1014,13 @@ inline void FlushSetTexture(VB& curvb, FRAGMENTSHADER* pfragment, CRenderTarget*
|
|||
|
||||
// have to enable the texture parameters(curtest.atst)
|
||||
if( curvb.ptexClamp[0] != 0 )
|
||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0");
|
||||
ZZshGLSetTextureParameter(pfragment->sBitwiseANDX, curvb.ptexClamp[0], "Clamp 0");
|
||||
|
||||
if( curvb.ptexClamp[1] != 0 )
|
||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1");
|
||||
ZZshGLSetTextureParameter(pfragment->sBitwiseANDY, curvb.ptexClamp[1], "Clamp 1");
|
||||
|
||||
if( pfragment->sMemory != NULL && s_ptexCurSet[context] != 0)
|
||||
ZZshGLSetTextureParameter(pfragment->prog, pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
|
||||
ZZshGLSetTextureParameter(pfragment->sMemory, s_ptexCurSet[context], "Clamp memory");
|
||||
|
||||
}
|
||||
|
||||
|
@ -1170,13 +1168,13 @@ inline u32 AlphaRenderAlpha(VB& curvb, const pixTest curtest, FRAGMENTSHADER* pf
|
|||
v.w *= 255;
|
||||
}
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
||||
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||
}
|
||||
else
|
||||
{
|
||||
// not using blending so set to defaults
|
||||
float4 v = exactcolor ? float4(1, 510 * 255.0f / 256.0f, 0, 0) : float4(1, 2 * 255.0f / 256.0f, 0, 0);
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
||||
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||
|
||||
}
|
||||
|
||||
|
@ -1267,7 +1265,7 @@ inline void AlphaPabe(VB& curvb, FRAGMENTSHADER* pfragment, int exactcolor)
|
|||
|
||||
if (exactcolor) v.y *= 255;
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
||||
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||
|
||||
Draw(curvb);
|
||||
|
||||
|
@ -1336,7 +1334,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
|
|||
|
||||
if (exactcolor) { v.y *= 255; v.w *= 255; }
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
||||
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
GL_STENCILFUNC(GL_EQUAL, s_stencilref | STENCIL_FBA, s_stencilmask | STENCIL_FBA);
|
||||
|
@ -1360,7 +1358,7 @@ inline void AlphaFailureTestJob(VB& curvb, const pixTest curtest, FRAGMENTSHADE
|
|||
|
||||
if (exactcolor) v.y *= 255;
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
||||
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||
|
||||
Draw(curvb);
|
||||
|
||||
|
@ -1412,7 +1410,7 @@ inline void AlphaSpecialTesting(VB& curvb, FRAGMENTSHADER* pfragment, u32 dwUsin
|
|||
glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
|
||||
|
||||
float4 v = float4(0, exactcolor ? 510.0f : 2.0f, 0, 0);
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->sOneColor, v, "g_fOneColor");
|
||||
ZZshSetParameter4fv(pfragment->sOneColor, v, "g_fOneColor");
|
||||
Draw(curvb);
|
||||
|
||||
// don't need to restore
|
||||
|
@ -1468,66 +1466,6 @@ inline void AlphaSaveTarget(VB& curvb)
|
|||
#endif
|
||||
}
|
||||
|
||||
inline void AlphaColorClamping(VB& curvb, const pixTest curtest)
|
||||
{
|
||||
// clamp the final colors, when enabled ffx2 credits mess up
|
||||
//if (gs.colclamp) ZZLog::Error_Log("ColClamp!");
|
||||
if ((curvb.curprim.abe && bAlphaClamping) && (GetRenderFormat() != RFT_byte8) && !(conf.settings().no_color_clamp)) // if !colclamp, skip
|
||||
{
|
||||
//ZZLog::Error_Log("Clamped.");
|
||||
ResetAlphaVariables();
|
||||
|
||||
// if processing the clamping case, make sure can write to the front buffer
|
||||
glDisable(GL_STENCIL_TEST);
|
||||
glEnable(GL_BLEND);
|
||||
glDisable(GL_ALPHA_TEST);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glDepthMask(0);
|
||||
glColorMask(1, 1, 1, 0);
|
||||
|
||||
if (s_bWriteDepth) ResetRenderTarget(1);
|
||||
|
||||
SetShaderCaller("AlphaColorClamping");
|
||||
|
||||
ZZshSetPixelShader(ppsOne.prog);
|
||||
GL_BLEND_RGB(GL_ONE, GL_ONE);
|
||||
|
||||
float f;
|
||||
|
||||
if (bAlphaClamping & 1) // min
|
||||
{
|
||||
f = 0;
|
||||
ZZshSetParameter4fv(ppsOne.prog, ppsOne.sOneColor, &f, "g_fOneColor");
|
||||
GL_BLENDEQ_RGB(GL_MAX_EXT);
|
||||
Draw(curvb);
|
||||
}
|
||||
|
||||
// bios shows white screen
|
||||
if (bAlphaClamping & 2) // max
|
||||
{
|
||||
f = 1;
|
||||
ZZshSetParameter4fv(ppsOne.prog, ppsOne.sOneColor, &f, "g_fOneColor");
|
||||
GL_BLENDEQ_RGB(GL_MIN_EXT);
|
||||
Draw(curvb);
|
||||
}
|
||||
|
||||
if (!curvb.zbuf.zmsk)
|
||||
{
|
||||
glDepthMask(1);
|
||||
|
||||
if (s_bWriteDepth)
|
||||
{
|
||||
assert(curvb.pdepth != NULL);
|
||||
curvb.pdepth->SetRenderTarget(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (curvb.test.ate && USEALPHATESTING) glEnable(GL_ALPHA_TEST);
|
||||
|
||||
GL_ZTEST(curtest.zte);
|
||||
}
|
||||
}
|
||||
|
||||
inline void FlushUndoFiter(u32 dwFilterOpts)
|
||||
{
|
||||
if (dwFilterOpts)
|
||||
|
@ -1585,7 +1523,6 @@ void ZeroGS::Flush(int context)
|
|||
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
AlphaColorClamping(curvb, curtest);
|
||||
FlushUndoFiter(dwFilterOpts);
|
||||
|
||||
ppf += curvb.nCount + 0x100000;
|
||||
|
@ -1988,7 +1925,7 @@ void ZeroGS::SetTexInt(int context, FRAGMENTSHADER* pfragment, int settexint)
|
|||
}
|
||||
|
||||
// clamp relies on texture width
|
||||
inline void SetTexClamping(int context, FRAGMENTSHADER* pfragment )
|
||||
void SetTexClamping(int context, FRAGMENTSHADER* pfragment)
|
||||
{
|
||||
FUNCLOG
|
||||
SetShaderCaller("SetTexClamping");
|
||||
|
@ -1998,68 +1935,84 @@ inline void SetTexClamping(int context, FRAGMENTSHADER* pfragment )
|
|||
u32* ptex = ZeroGS::vb[context].ptexClamp;
|
||||
ptex[0] = ptex[1] = 0;
|
||||
|
||||
float fw = ZeroGS::vb[context].tex0.tw;
|
||||
float fh = ZeroGS::vb[context].tex0.th;
|
||||
float fw = ZeroGS::vb[context].tex0.tw ;
|
||||
float fh = ZeroGS::vb[context].tex0.th ;
|
||||
|
||||
switch(pclamp->wms)
|
||||
switch (pclamp->wms)
|
||||
{
|
||||
case 0:
|
||||
v2.x = -1e10; v2.z = 1e10;
|
||||
v2.x = -1e10;
|
||||
v2.z = 1e10;
|
||||
break;
|
||||
|
||||
case 1: // pclamp
|
||||
// suikoden5 movie text
|
||||
v2.x = 0; v2.z = 1-0.5f/fw;
|
||||
v2.x = 0;
|
||||
v2.z = 1 - 0.5f / fw;
|
||||
break;
|
||||
|
||||
case 2: // reg pclamp
|
||||
v2.x = (pclamp->minu+0.5f)/fw; v2.z = (pclamp->maxu-0.5f)/fw;
|
||||
v2.x = (pclamp->minu + 0.5f) / fw;
|
||||
v2.z = (pclamp->maxu - 0.5f) / fw;
|
||||
break;
|
||||
|
||||
case 3: // region rep x
|
||||
v.x = 0.9999f;
|
||||
v.z = (float)fw ;
|
||||
v.z = (float)fw;
|
||||
v2.x = (float)GPU_TEXMASKWIDTH / fw;
|
||||
v2.z = pclamp->maxu / fw;
|
||||
int correctMinu = pclamp->minu & (~pclamp->maxu); // (A && B) || C == (A && (B && !C)) + C
|
||||
|
||||
if (correctMinu != g_PrevBitwiseTexX)
|
||||
if (correctMinu != g_PrevBitwiseTexX)
|
||||
{
|
||||
g_PrevBitwiseTexX = correctMinu;
|
||||
ptex[0] = ZeroGS::s_BitwiseTextures.GetTex(correctMinu, 0);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
switch(pclamp->wmt)
|
||||
switch (pclamp->wmt)
|
||||
{
|
||||
|
||||
case 0:
|
||||
v2.y = -1e10; v2.w = 1e10;
|
||||
v2.y = -1e10;
|
||||
v2.w = 1e10;
|
||||
break;
|
||||
|
||||
case 1: // pclamp
|
||||
// suikoden5 movie text
|
||||
v2.y = 0; v2.w = 1-0.5f/fh;
|
||||
v2.y = 0;
|
||||
v2.w = 1 - 0.5f / fh;
|
||||
break;
|
||||
|
||||
case 2: // reg pclamp
|
||||
v2.y = (pclamp->minv+0.5f)/fh; v2.w = (pclamp->maxv-0.5f)/fh;
|
||||
v2.y = (pclamp->minv + 0.5f) / fh;
|
||||
v2.w = (pclamp->maxv - 0.5f) / fh;
|
||||
break;
|
||||
|
||||
case 3: // region rep y
|
||||
v.y = 0.9999f;
|
||||
v.w = (float)fh ;
|
||||
v.w = (float)fh;
|
||||
v2.y = (float)GPU_TEXMASKWIDTH / fh;
|
||||
v2.w = pclamp->maxv / fh;
|
||||
int correctMinv = pclamp->minv & (~pclamp->maxv); // (A && B) || C == (A && (B && !C)) + C
|
||||
|
||||
if (correctMinv != g_PrevBitwiseTexY) {
|
||||
if (correctMinv != g_PrevBitwiseTexY)
|
||||
{
|
||||
g_PrevBitwiseTexY = correctMinv;
|
||||
ptex[1] = ZeroGS::s_BitwiseTextures.GetTex(correctMinv, ptex[0]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pfragment->fTexWrapMode != 0)
|
||||
ZZshSetParameter4fv(pfragment->fTexWrapMode, v, "g_fTexWrapMode");
|
||||
|
||||
if (pfragment->fClampExts != 0)
|
||||
ZZshSetParameter4fv(pfragment->fClampExts, v2, "g_fClampExts");
|
||||
|
||||
|
||||
if (ZZshActiveParameter(pfragment->fTexWrapMode))
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexWrapMode, v, "g_fTexWrapMode");
|
||||
if (ZZshActiveParameter( pfragment->fClampExts))
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fClampExts, v2, "g_fClampExts");
|
||||
}
|
||||
|
||||
// Fixme should be in float4 lib
|
||||
|
@ -2230,11 +2183,11 @@ void ZeroGS::SetTexVariables(int context, FRAGMENTSHADER* pfragment)
|
|||
|
||||
// Test;*/
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexAlpha, valpha, "g_fTexAlpha");
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
|
||||
ZZshSetParameter4fv(pfragment->fTexAlpha, valpha, "g_fTexAlpha");
|
||||
ZZshSetParameter4fv(pfragment->fTexAlpha2, valpha2, "g_fTexAlpha2");
|
||||
|
||||
if (IsAlphaTestExpansion(tex0))
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTestBlack, vblack, "g_fTestBlack");
|
||||
ZZshSetParameter4fv(pfragment->fTestBlack, vblack, "g_fTestBlack");
|
||||
|
||||
SetTexClamping(context, pfragment);
|
||||
|
||||
|
@ -2280,7 +2233,7 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
|
|||
v.w = 1.0f / (float)fh;
|
||||
|
||||
if (pfragment->fRealTexDims)
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fRealTexDims, v, "g_fRealTexDims");
|
||||
ZZshSetParameter4fv(pfragment->fRealTexDims, v, "g_fRealTexDims");
|
||||
else
|
||||
ZZshSetParameter4fv(cgGetNamedParameter(pfragment->prog,"g_fRealTexDims"),v, "g_fRealTexDims");
|
||||
}
|
||||
|
@ -2336,15 +2289,15 @@ void ZeroGS::SetTexVariablesInt(int context, int bilinear, const tex0Info& tex0,
|
|||
v.z *= b.bpp * (1 / 32.0f);
|
||||
}
|
||||
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexDims, vTexDims, "g_fTexDims");
|
||||
ZZshSetParameter4fv(pfragment->fTexDims, vTexDims, "g_fTexDims");
|
||||
|
||||
// ZZshSetParameter4fv(pfragment->prog, pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok.
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
|
||||
ZZshSetParameter4fv(pfragment->prog, pfragment->fTexOffset, v, "g_fTexOffset");
|
||||
// ZZshSetParameter4fv(pfragment->fTexBlock, b.vTexBlock, "g_fTexBlock"); // I change it, and it's working. Seems casting from float4 to float[4] is ok.
|
||||
ZZshSetParameter4fv(pfragment->fTexBlock, &b.vTexBlock.x, "g_fTexBlock");
|
||||
ZZshSetParameter4fv(pfragment->fTexOffset, v, "g_fTexOffset");
|
||||
|
||||
// get hardware texture dims
|
||||
//int texheight = (pmemtarg->realheight+pmemtarg->widthmult-1)/pmemtarg->widthmult;
|
||||
int texwidth = GPU_TEXWIDTH * pmemtarg->widthmult * pmemtarg->channels;
|
||||
//int texheight = pmemtarg->texH;
|
||||
int texwidth = pmemtarg->texW;
|
||||
|
||||
v.y = 1.0f;
|
||||
v.x = (fpageint - (float)pmemtarg->realy / (float)pmemtarg->widthmult + 0.5f);//*v.y;
|
||||
|
|
|
@ -1,83 +1,493 @@
|
|||
/* ZeroGS KOSMOS
|
||||
*
|
||||
* Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||
*
|
||||
* Zerofrog forgot to write any copyright notice after releasing the plugin into GPLv2
|
||||
* If someone can contact him successfully to clarify this matter that would be great.
|
||||
*/
|
||||
/* ZZ Open GL graphics plugin
|
||||
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
|
||||
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
// Now that it's down to 82 lines, and most of it's fairly obvious, perhaps it'd be easier to
|
||||
// just reimplement it... -arcum42
|
||||
|
||||
#ifndef ZZOGLMATH_H_INCLUDED
|
||||
#define ZZOGLMATH_H_INCLUDED
|
||||
#ifndef ZZOGLMATH_H_INCLUDED
|
||||
#define ZZOGLMATH_H_INCLUDED
|
||||
|
||||
//Remind me to check and see if this is necessary, and what uses it. --arcum42
|
||||
#ifndef _WIN32
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef float dReal;
|
||||
//#define ZZ_MMATH
|
||||
|
||||
// class used for 3 and 4 dim vectors and quaternions
|
||||
// It is better to use this for a 3 dim vector because it is 16byte aligned and SIMD instructions can be used
|
||||
#ifndef ZZ_MMATH
|
||||
|
||||
class float4
|
||||
template <class T>
|
||||
class Vector4
|
||||
{
|
||||
public:
|
||||
dReal x, y, z, w;
|
||||
|
||||
float4() : x(0), y(0), z(0), w(0) {}
|
||||
float4(dReal x, dReal y, dReal z) : x(x), y(y), z(z), w(0) {}
|
||||
float4(dReal x, dReal y, dReal z, dReal w) : x(x), y(y), z(z), w(w) {}
|
||||
float4(const float4 &vec) : x(vec.x), y(vec.y), z(vec.z), w(vec.w) {}
|
||||
float4(const dReal* pf) { assert(pf != NULL); x = pf[0]; y = pf[1]; z = pf[2]; w = 0; }
|
||||
dReal operator[](int i) const { return (&x)[i]; }
|
||||
dReal& operator[](int i) { return (&x)[i]; }
|
||||
T x, y, z, w;
|
||||
|
||||
// casting operators
|
||||
operator dReal*() { return &x; }
|
||||
operator const dReal*() const { return (const dReal*)&x; }
|
||||
Vector4(T x1 = 0, T y1 = 0, T z1 = 0, T w1 = 0)
|
||||
{
|
||||
x = x1;
|
||||
y = y1;
|
||||
z = z1;
|
||||
w = w1;
|
||||
}
|
||||
|
||||
// SCALAR FUNCTIONS
|
||||
inline dReal dot(const float4 &v) const { return x*v.x + y*v.y + z*v.z + w*v.w; }
|
||||
inline void Set3(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; }
|
||||
inline void Set4(const float* pvals) { x = pvals[0]; y = pvals[1]; z = pvals[2]; w = pvals[3]; }
|
||||
inline void SetColor(u32 color)
|
||||
Vector4(Vector4<T> &f)
|
||||
{
|
||||
x = f.x;
|
||||
y = f.y;
|
||||
z = f.z;
|
||||
w = f.w;
|
||||
}
|
||||
|
||||
Vector4(T* f)
|
||||
{
|
||||
x = f[0];
|
||||
y = f[1];
|
||||
z = f[2];
|
||||
w = f[3]; // For some reason, the old code set this to 0.
|
||||
}
|
||||
|
||||
T& operator[](int i)
|
||||
{
|
||||
switch(i)
|
||||
{
|
||||
case 0: return x;
|
||||
case 1: return y;
|
||||
case 2: return z;
|
||||
case 3: return w;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
operator T*()
|
||||
{
|
||||
return (T*) this;
|
||||
}
|
||||
|
||||
operator const T*() const
|
||||
{
|
||||
return (const T*) this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator =(const Vector4<T>& v)
|
||||
{
|
||||
x = v.x;
|
||||
y = v.y;
|
||||
z = v.z;
|
||||
w = v.w;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator ==(const Vector4<T>& v)
|
||||
{
|
||||
return !!( x == v.x &&
|
||||
y == v.y &&
|
||||
z == v.z &&
|
||||
w == v.w );
|
||||
}
|
||||
|
||||
Vector4<T> operator +(const Vector4<T>& v) const
|
||||
{
|
||||
return Vector4<T>(x + v.x, y + v.y, z + v.z, w + v.w);
|
||||
}
|
||||
|
||||
Vector4<T> operator -(const Vector4<T>& v) const
|
||||
{
|
||||
return Vector4<T>(x - v.x, y - v.y, z - v.z, w - v.w);
|
||||
}
|
||||
|
||||
Vector4<T> operator *(const Vector4<T>& v) const
|
||||
{
|
||||
return Vector4<T>(x * v.x, y * v.y, z * v.z, w * v.w);
|
||||
}
|
||||
|
||||
Vector4<T> operator /(const Vector4<T>& v) const
|
||||
{
|
||||
return Vector4<T>(x / v.x, y / v.y, z / v.z, w / v.w);
|
||||
}
|
||||
Vector4<T> operator +(T val) const
|
||||
{
|
||||
return Vector4<T>(x + val, y + val, z + val, w + val);
|
||||
}
|
||||
|
||||
Vector4<T> operator -(T val) const
|
||||
{
|
||||
return Vector4<T>(x - val, y - val, z - val, w - val);
|
||||
}
|
||||
|
||||
Vector4<T> operator *(T val) const
|
||||
{
|
||||
return Vector4<T>(x * val, y * val, z * val, w * val);
|
||||
}
|
||||
|
||||
Vector4<T> operator /(T val) const
|
||||
{
|
||||
return Vector4<T>(x / val, y / val, z / val, w / val);
|
||||
}
|
||||
|
||||
Vector4<T>& operator +=(const Vector4<T>& v)
|
||||
{
|
||||
*this = *this + v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator -=(const Vector4<T>& v)
|
||||
{
|
||||
*this = *this - v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator *=(const Vector4<T>& v)
|
||||
{
|
||||
*this = *this * v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator /=(const Vector4<T>& v)
|
||||
{
|
||||
*this = *this - v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator +=(T val)
|
||||
{
|
||||
*this = *this + (T)val;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator -=(T val)
|
||||
{
|
||||
*this = *this - (T)val;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator *=(T val)
|
||||
{
|
||||
*this = *this * (T)val;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Vector4<T>& operator /=(T val)
|
||||
{
|
||||
*this = *this / (T)val;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Probably doesn't belong here, but I'll leave it in for the moment.
|
||||
void SetColor(u32 color)
|
||||
{
|
||||
x = (color & 0xff) / 255.0f;
|
||||
y = ((color >> 8) & 0xff) / 255.0f;
|
||||
z = ((color >> 16) & 0xff) / 255.0f;
|
||||
}
|
||||
|
||||
// 3 dim cross product, w is not touched
|
||||
/// this = this x v
|
||||
/// this = u x v
|
||||
inline float4 operator-() const { float4 v; v.x = -x; v.y = -y; v.z = -z; v.w = -w; return v; }
|
||||
inline float4 operator+(const float4 &r) const { float4 v; v.x = x + r.x; v.y = y + r.y; v.z = z + r.z; v.w = w + r.w; return v; }
|
||||
inline float4 operator-(const float4 &r) const { float4 v; v.x = x - r.x; v.y = y - r.y; v.z = z - r.z; v.w = w - r.w; return v; }
|
||||
inline float4 operator*(const float4 &r) const { float4 v; v.x = r.x * x; v.y = r.y * y; v.z = r.z * z; v.w = r.w * w; return v; }
|
||||
inline float4 operator*(dReal k) const { float4 v; v.x = k * x; v.y = k * y; v.z = k * z; v.w = k * w; return v; }
|
||||
inline float4& operator += (const float4& r) { x += r.x; y += r.y; z += r.z; w += r.w; return *this; }
|
||||
inline float4& operator -= (const float4& r) { x -= r.x; y -= r.y; z -= r.z; w -= r.w; return *this; }
|
||||
inline float4& operator *= (const float4& r) { x *= r.x; y *= r.y; z *= r.z; w *= r.w; return *this; }
|
||||
inline float4& operator *= (const dReal k) { x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
inline float4& operator /= (const dReal _k) { dReal k = 1 / _k; x *= k; y *= k; z *= k; w *= k; return *this; }
|
||||
friend float4 operator*(float f, const float4& v);
|
||||
//friend ostream& operator<<(ostream& O, const float4& v);
|
||||
//friend istream& operator>>(istream& I, float4& v);
|
||||
};
|
||||
|
||||
inline float4 operator*(float f, const float4& left)
|
||||
typedef Vector4<float> float4;
|
||||
|
||||
#else
|
||||
|
||||
// Reimplement, swiping a bunch of code from GSdx and adapting it. (specifically GSVector.h)
|
||||
// This doesn't include more then half of the functions in there, as well as some of the structs...
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include "Pcsx2Types.h"
|
||||
|
||||
class float4
|
||||
{
|
||||
float4 v;
|
||||
v.x = f * left.x;
|
||||
v.y = f * left.y;
|
||||
v.z = f * left.z;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // ZZOGLMATH_H_INCLUDED
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct {float x, y, z, w;};
|
||||
struct {float r, g, b, a;};
|
||||
struct {float left, top, right, bottom;};
|
||||
float v[4];
|
||||
float f32[4];
|
||||
s8 _s8[16];
|
||||
s16 _s16[8];
|
||||
s32 _s32[4];
|
||||
s64 _s64[2];
|
||||
u8 _u8[16];
|
||||
u16 _u16[8];
|
||||
u32 _u32[4];
|
||||
u64 _u64[2];
|
||||
__m128 m;
|
||||
};
|
||||
|
||||
float4()
|
||||
{
|
||||
m = _mm_setzero_ps();
|
||||
}
|
||||
|
||||
float4(float x, float y, float z, float w = 0)
|
||||
{
|
||||
m = _mm_set_ps(w, z, y, x);
|
||||
}
|
||||
|
||||
float4(float4 &f)
|
||||
{
|
||||
m = f.m;
|
||||
}
|
||||
|
||||
float4(float x, float y)
|
||||
{
|
||||
m = _mm_unpacklo_ps(_mm_load_ss(&x), _mm_load_ss(&y));
|
||||
}
|
||||
|
||||
float4(int x, int y)
|
||||
{
|
||||
m = _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(y)));
|
||||
}
|
||||
|
||||
explicit float4(float f)
|
||||
{
|
||||
m = _mm_set1_ps(f);
|
||||
}
|
||||
|
||||
explicit float4(__m128 m)
|
||||
{
|
||||
this->m = m;
|
||||
}
|
||||
|
||||
float4(float* f)
|
||||
{
|
||||
x = f[0];
|
||||
y = f[1];
|
||||
z = f[2];
|
||||
w = f[3]; // For some reason, the old code set this to 0.
|
||||
}
|
||||
|
||||
float& operator[](int i)
|
||||
{
|
||||
switch(i)
|
||||
{
|
||||
case 0: return x;
|
||||
case 1: return y;
|
||||
case 2: return z;
|
||||
case 3: return w;
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
operator float*()
|
||||
{
|
||||
return (float*) this;
|
||||
}
|
||||
|
||||
operator const float*() const
|
||||
{
|
||||
return (const float*) this;
|
||||
}
|
||||
|
||||
void operator = (float f)
|
||||
{
|
||||
m = _mm_set1_ps(f);
|
||||
}
|
||||
|
||||
void operator = (__m128 m)
|
||||
{
|
||||
this->m = m;
|
||||
}
|
||||
|
||||
|
||||
void operator += (const float4& v)
|
||||
{
|
||||
m = _mm_add_ps(m, v.m);
|
||||
}
|
||||
|
||||
void operator -= (const float4& v)
|
||||
{
|
||||
m = _mm_sub_ps(m, v.m);
|
||||
}
|
||||
|
||||
void operator *= (const float4& v)
|
||||
{
|
||||
m = _mm_mul_ps(m, v.m);
|
||||
}
|
||||
|
||||
void operator /= (const float4& v)
|
||||
{
|
||||
m = _mm_div_ps(m, v.m);
|
||||
}
|
||||
|
||||
void operator += (float f)
|
||||
{
|
||||
*this += float4(f);
|
||||
}
|
||||
|
||||
void operator -= (float f)
|
||||
{
|
||||
*this -= float4(f);
|
||||
}
|
||||
|
||||
void operator *= (float f)
|
||||
{
|
||||
*this *= float4(f);
|
||||
}
|
||||
|
||||
void operator /= (float f)
|
||||
{
|
||||
*this /= float4(f);
|
||||
}
|
||||
|
||||
void operator &= (const float4& v)
|
||||
{
|
||||
m = _mm_and_ps(m, v.m);
|
||||
}
|
||||
|
||||
void operator |= (const float4& v)
|
||||
{
|
||||
m = _mm_or_ps(m, v.m);
|
||||
}
|
||||
|
||||
void operator ^= (const float4& v)
|
||||
{
|
||||
m = _mm_xor_ps(m, v.m);
|
||||
}
|
||||
|
||||
friend float4 operator + (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_add_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator - (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_sub_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator * (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_mul_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator / (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_div_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator + (const float4& v, float f)
|
||||
{
|
||||
return v + float4(f);
|
||||
}
|
||||
|
||||
friend float4 operator - (const float4& v, float f)
|
||||
{
|
||||
return v - float4(f);
|
||||
}
|
||||
|
||||
friend float4 operator * (const float4& v, float f)
|
||||
{
|
||||
return v * float4(f);
|
||||
}
|
||||
|
||||
friend float4 operator / (const float4& v, float f)
|
||||
{
|
||||
return v / float4(f);
|
||||
}
|
||||
|
||||
friend float4 operator & (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_and_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator | (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_or_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator ^ (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_xor_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator == (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_cmpeq_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator != (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_cmpneq_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator > (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_cmpgt_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator < (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_cmplt_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator >= (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_cmpge_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
friend float4 operator <= (const float4& v1, const float4& v2)
|
||||
{
|
||||
return float4(_mm_cmple_ps(v1.m, v2.m));
|
||||
}
|
||||
|
||||
// This looked interesting, so I thought I'd include it...
|
||||
|
||||
template<int i> float4 shuffle() const
|
||||
{
|
||||
return float4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(i, i, i, i)));
|
||||
}
|
||||
|
||||
#define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \
|
||||
float4 xs##ys##zs##ws() const {return float4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||
float4 xs##ys##zs##ws(const float4& v) const {return float4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(wn, zn, yn, xn)));} \
|
||||
|
||||
#define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \
|
||||
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \
|
||||
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \
|
||||
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \
|
||||
VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \
|
||||
|
||||
#define VECTOR4_SHUFFLE_2(xs, xn, ys, yn) \
|
||||
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, x, 0) \
|
||||
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, y, 1) \
|
||||
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, z, 2) \
|
||||
VECTOR4_SHUFFLE_3(xs, xn, ys, yn, w, 3) \
|
||||
|
||||
#define VECTOR4_SHUFFLE_1(xs, xn) \
|
||||
float4 xs##4() const {return float4(_mm_shuffle_ps(m, m, _MM_SHUFFLE(xn, xn, xn, xn)));} \
|
||||
float4 xs##4(const float4& v) const {return float4(_mm_shuffle_ps(m, v.m, _MM_SHUFFLE(xn, xn, xn, xn)));} \
|
||||
VECTOR4_SHUFFLE_2(xs, xn, x, 0) \
|
||||
VECTOR4_SHUFFLE_2(xs, xn, y, 1) \
|
||||
VECTOR4_SHUFFLE_2(xs, xn, z, 2) \
|
||||
VECTOR4_SHUFFLE_2(xs, xn, w, 3) \
|
||||
|
||||
VECTOR4_SHUFFLE_1(x, 0)
|
||||
VECTOR4_SHUFFLE_1(y, 1)
|
||||
VECTOR4_SHUFFLE_1(z, 2)
|
||||
VECTOR4_SHUFFLE_1(w, 3)
|
||||
|
||||
// Probably doesn't belong here, but I'll leave it in for the moment.
|
||||
void SetColor(u32 color)
|
||||
{
|
||||
x = (color & 0xff) / 255.0f;
|
||||
y = ((color >> 8) & 0xff) / 255.0f;
|
||||
z = ((color >> 16) & 0xff) / 255.0f;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -392,16 +392,16 @@ ZeroGS::SaveTex(tex0Info* ptex, int usevid)
|
|||
assert(pmemtarg != NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_RECTANGLE_NV, pmemtarg->ptex->tex);
|
||||
srcdata.resize(pmemtarg->realheight * GPU_TEXWIDTH * pmemtarg->widthmult * 4 * 8); // max of 8 cannels
|
||||
srcdata.resize(4 * pmemtarg->texW * pmemtarg->texH);
|
||||
|
||||
glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, pmemtarg->fmt, &srcdata[0]);
|
||||
|
||||
u32 offset = pmemtarg->realy * 4 * GPU_TEXWIDTH;
|
||||
u32 offset = MemorySize(pmemtarg->realy);
|
||||
|
||||
if (ptex->psm == PSMT8)
|
||||
offset *= PSMT_IS32BIT(ptex->cpsm) ? 4 : 2;
|
||||
offset *= CLUT_PIXEL_SIZE(ptex->cpsm);
|
||||
else if (ptex->psm == PSMT4)
|
||||
offset *= PSMT_IS32BIT(ptex->cpsm) ? 8 : 4;
|
||||
offset *= CLUT_PIXEL_SIZE(ptex->cpsm) * 2;
|
||||
|
||||
psrc = &srcdata[0] - offset;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -50,7 +50,9 @@ class CRenderTargetMngr
|
|||
|
||||
void Destroy();
|
||||
static MAPTARGETS::iterator GetOldestTarg(MAPTARGETS& m);
|
||||
|
||||
|
||||
bool isFound(const frameInfo& frame, MAPTARGETS::iterator& it, u32 opts, u32 key, int maxposheight);
|
||||
|
||||
CRenderTarget* GetTarg(const frameInfo& frame, u32 Options, int maxposheight);
|
||||
inline CRenderTarget* GetTarg(int fbp, int fbw, VB& curvb)
|
||||
{
|
||||
|
@ -119,13 +121,13 @@ class CRenderTargetMngr
|
|||
|
||||
class CMemoryTargetMngr
|
||||
{
|
||||
|
||||
public:
|
||||
CMemoryTargetMngr() : curstamp(0) {}
|
||||
|
||||
CMemoryTarget* GetMemoryTarget(const tex0Info& tex0, int forcevalidate); // pcbp is pointer to start of clut
|
||||
CMemoryTarget* MemoryTarget_SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||
CMemoryTarget* MemoryTarget_ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
||||
CMemoryTarget* SearchExistTarget(int start, int end, int nClutOffset, int clutsize, const tex0Info& tex0, int forcevalidate);
|
||||
CMemoryTarget* ClearedTargetsSearch(int fmt, int widthmult, int channels, int height);
|
||||
int CompareTarget(list<CMemoryTarget>::iterator& it, const tex0Info& tex0, int clutsize, int nClutOffset);
|
||||
|
||||
void Destroy(); // destroy all targs
|
||||
|
||||
|
@ -138,6 +140,8 @@ class CMemoryTargetMngr
|
|||
|
||||
private:
|
||||
list<CMemoryTarget>::iterator DestroyTargetIter(list<CMemoryTarget>::iterator& it);
|
||||
void GetClutVariables(int& nClutOffset, int& clutsize, const tex0Info& tex0);
|
||||
void GetMemAddress(int& start, int& end, const tex0Info& tex0);
|
||||
};
|
||||
|
||||
class CBitwiseTextureMngr
|
||||
|
|
|
@ -4,15 +4,15 @@
|
|||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation either ve%rsion 2, or (at your option)
|
||||
# any later ve%rsion.
|
||||
#
|
||||
#
|
||||
# This Program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Make see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
# http://www.gnu.org/copyleft/gpl.html
|
||||
#
|
||||
#
|
||||
|
@ -20,6 +20,11 @@
|
|||
|
||||
#ifdef ZEROGS_SSE2
|
||||
// SSE2 extensions
|
||||
|
||||
// Note: pshufd 0xea <=> movdqa !!!
|
||||
// What the function does is
|
||||
// Interleave s1 and sd0 -> d1 (high) & sd0 (low)
|
||||
// Interleave s3 and sd2 -> d3 (high) & sd2 (low)
|
||||
#define punpck(op, sd0, sd2, s1, s3, d1, d3) \
|
||||
movdqa %xmm##d1, %xmm##sd0; \
|
||||
pshufd %xmm##d3, %xmm##sd2, 0xe4; \
|
||||
|
@ -28,7 +33,16 @@
|
|||
punpckl##op %xmm##sd2, %xmm##s3; \
|
||||
punpckh##op %xmm##d3, %xmm##s3; \
|
||||
|
||||
|
||||
|
||||
// Input xmm7 == 0x0F0F0F0F 0x0F0F0F0F 0x0F0F0F0F 0x0F0F0F0F
|
||||
// DATA xmm[0-3]
|
||||
// This function does a 4-bits interleaving of 4 xmm registers
|
||||
//
|
||||
// ARG Can not put comment in the middle of the define...
|
||||
// After the first por
|
||||
// low 32bits (4bits packed) == 1.6 0.6 1.4 0.4 1.2 0.2 1.0 0.0
|
||||
// After the second one
|
||||
// low 32bits (4bits packed) == 1.7 0.7 1.5 0.5 1.3 0.3 1.1 0.1
|
||||
#define punpcknb \
|
||||
movdqa %xmm4, %xmm0; \
|
||||
pshufd %xmm5, %xmm1, 0xe4; \
|
||||
|
@ -48,6 +62,7 @@
|
|||
\
|
||||
movdqa %xmm1, %xmm4; \
|
||||
\
|
||||
\
|
||||
movdqa %xmm4, %xmm2; \
|
||||
pshufd %xmm5, %xmm3, 0xe4; \
|
||||
\
|
||||
|
@ -66,7 +81,13 @@
|
|||
\
|
||||
movdqa %xmm3, %xmm4; \
|
||||
\
|
||||
punpck(bw, 0, 2, 1, 3, 4, 6); \
|
||||
punpck(bw, 0, 2, 1, 3, 4, 6);\
|
||||
|
||||
// output
|
||||
// low 32 bits 0 (4 bits packed) == 1.3 0.3 1.2 0.2 1.1 0.1 1.0 0.0
|
||||
// low 32 bits 4 (4 bits packed) == 1.19 0.19 1.18 0.18 1.17 0.17 1.16 0.16
|
||||
// low 32 bits 2 (4 bits packed) == 3.3 2.3 3.2 2.2 3.1 2.1 3.0 2.0
|
||||
// low 32 bits 6 (4 bits packed) == 3.19 2.19 3.18 2.18 3.17 2.17 3.16 2.16
|
||||
|
||||
|
||||
//
|
||||
|
@ -84,11 +105,15 @@ SwizzleBlock32_sse2:
|
|||
push %esi
|
||||
push %edi
|
||||
|
||||
// save dst
|
||||
mov %edi, %ecx
|
||||
// save src
|
||||
mov %esi, %edx
|
||||
// get pitch
|
||||
mov %edx, [%esp+4+8]
|
||||
mov %ecx, 4
|
||||
|
||||
// get WriteMask
|
||||
mov %eax, [%esp+8+8]
|
||||
cmp %eax, 0xffffffff
|
||||
jne SwizzleBlock32_sse2_2
|
||||
|
@ -100,6 +125,8 @@ SwizzleBlock32_sse2_1:
|
|||
movdqa %xmm1, [%esi+%edx]
|
||||
movdqa %xmm5, [%esi+%edx+16]
|
||||
|
||||
// 64bits interleave 1&0 -> 2&0
|
||||
// 64bits interleave 5&4 -> 6&4
|
||||
punpck(qdq, 0, 4, 1, 5, 2, 6)
|
||||
|
||||
movntps [%edi+16*0], %xmm0
|
||||
|
@ -107,6 +134,7 @@ SwizzleBlock32_sse2_1:
|
|||
movntps [%edi+16*2], %xmm4
|
||||
movntps [%edi+16*3], %xmm6
|
||||
|
||||
// update ptr
|
||||
lea %esi, [%esi+%edx*2]
|
||||
add %edi, 64
|
||||
|
||||
|
@ -120,9 +148,10 @@ SwizzleBlock32_sse2_1:
|
|||
|
||||
SwizzleBlock32_sse2_2:
|
||||
|
||||
// WriteMask: 32bits to 4*32bits
|
||||
movd %xmm7, %eax
|
||||
pshufd %xmm7, %xmm7, 0
|
||||
|
||||
|
||||
.align 16
|
||||
SwizzleBlock32_sse2_3:
|
||||
movdqa %xmm0, [%esi]
|
||||
|
@ -130,13 +159,19 @@ SwizzleBlock32_sse2_3:
|
|||
movdqa %xmm1, [%esi+%edx]
|
||||
movdqa %xmm5, [%esi+%edx+16]
|
||||
|
||||
// 64bits interleave 1&0 -> 2&0
|
||||
// 64bits interleave 5&4 -> 6&4
|
||||
punpck(qdq, 0, 4, 1, 5, 2, 6)
|
||||
|
||||
// save a mask copy
|
||||
movdqa %xmm3, %xmm7
|
||||
pshufd %xmm5, %xmm7, 0xe4
|
||||
|
||||
// *dst & ~WriteMask
|
||||
pandn %xmm3, [%edi+16*0]
|
||||
// *src & WriteMask
|
||||
pand %xmm0, %xmm7
|
||||
// Final value to save
|
||||
por %xmm0, %xmm3
|
||||
movntps [%edi+16*0], %xmm0
|
||||
|
||||
|
@ -158,6 +193,7 @@ SwizzleBlock32_sse2_3:
|
|||
por %xmm6, %xmm5
|
||||
movntps [%edi+16*3], %xmm6
|
||||
|
||||
// update ptr
|
||||
lea %esi, [%esi+%edx*2]
|
||||
add %edi, 64
|
||||
|
||||
|
@ -179,6 +215,7 @@ SwizzleBlock16_sse2:
|
|||
|
||||
push %ebx
|
||||
|
||||
// srcpitch
|
||||
mov %ebx, [%esp+4+4]
|
||||
mov %eax, 4
|
||||
|
||||
|
@ -189,7 +226,11 @@ SwizzleBlock16_sse2_1:
|
|||
movdqa %xmm2, [%edx+%ebx]
|
||||
movdqa %xmm3, [%edx+%ebx+16]
|
||||
|
||||
// 16bits interleave 1&0 -> 4&0
|
||||
// 16bits interleave 3&2 -> 6&2
|
||||
punpck(wd, 0, 2, 1, 3, 4, 6)
|
||||
// 64bits interleave 2&0 -> 1&0
|
||||
// 64bits interleave 6&4 -> 5&4
|
||||
punpck(qdq, 0, 4, 2, 6, 1, 5)
|
||||
|
||||
movntps [%ecx+16*0], %xmm0
|
||||
|
@ -197,6 +238,7 @@ SwizzleBlock16_sse2_1:
|
|||
movntps [%ecx+16*2], %xmm4
|
||||
movntps [%ecx+16*3], %xmm5
|
||||
|
||||
// update ptr
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
add %ecx, 64
|
||||
|
||||
|
@ -217,7 +259,9 @@ SwizzleBlock8_sse2:
|
|||
|
||||
push %ebx
|
||||
|
||||
// load srcpitch
|
||||
mov %ebx, [%esp+4+4]
|
||||
// basic counter
|
||||
mov %eax, 2
|
||||
|
||||
.align 16
|
||||
|
@ -226,14 +270,23 @@ SwizzleBlock8_sse2_1:
|
|||
|
||||
movdqa %xmm0, [%edx]
|
||||
movdqa %xmm2, [%edx+%ebx]
|
||||
// update src pointer
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
|
||||
// 2 3 0 1
|
||||
pshufd %xmm1, [%edx], 0xb1
|
||||
pshufd %xmm3, [%edx+%ebx], 0xb1
|
||||
// update src pointer
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
|
||||
// 8bits interleave 1&0 -> 4&0
|
||||
// 8bits interleave 3&2 -> 6&2
|
||||
punpck(bw, 0, 2, 1, 3, 4, 6)
|
||||
// 16bits interleave 4&0 -> 1&0
|
||||
// 16bits interleave 6&2 -> 3&2
|
||||
punpck(wd, 0, 2, 4, 6, 1, 3)
|
||||
// 64bits interleave 2&0 -> 4&0
|
||||
// 64bits interleave 3&1 -> 5&1
|
||||
punpck(qdq, 0, 1, 2, 3, 4, 5)
|
||||
|
||||
movntps [%ecx+16*0], %xmm0
|
||||
|
@ -241,18 +294,27 @@ SwizzleBlock8_sse2_1:
|
|||
movntps [%ecx+16*2], %xmm1
|
||||
movntps [%ecx+16*3], %xmm5
|
||||
|
||||
// col 1, 3
|
||||
// col 1, 3 (same as previous column)
|
||||
|
||||
// 2 3 0 1
|
||||
pshufd %xmm0, [%edx], 0xb1
|
||||
pshufd %xmm2, [%edx+%ebx], 0xb1
|
||||
// update src pointer
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
|
||||
movdqa %xmm1, [%edx]
|
||||
movdqa %xmm3, [%edx+%ebx]
|
||||
// update src pointer
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
|
||||
// 8bits interleave 1&0 -> 4&0
|
||||
// 8bits interleave 3&2 -> 6&2
|
||||
punpck(bw, 0, 2, 1, 3, 4, 6)
|
||||
// 16bits interleave 4&0 -> 1&0
|
||||
// 16bits interleave 6&2 -> 3&2
|
||||
punpck(wd, 0, 2, 4, 6, 1, 3)
|
||||
// 64bits interleave 2&0 -> 4&0
|
||||
// 64bits interleave 3&1 -> 5&1
|
||||
punpck(qdq, 0, 1, 2, 3, 4, 5)
|
||||
|
||||
movntps [%ecx+16*4], %xmm0
|
||||
|
@ -260,6 +322,7 @@ SwizzleBlock8_sse2_1:
|
|||
movntps [%ecx+16*6], %xmm1
|
||||
movntps [%ecx+16*7], %xmm5
|
||||
|
||||
// update dst pointer
|
||||
add %ecx, 128
|
||||
|
||||
dec %eax
|
||||
|
@ -278,11 +341,13 @@ SwizzleBlock8_sse2_1:
|
|||
SwizzleBlock4_sse2:
|
||||
|
||||
push %ebx
|
||||
|
||||
|
||||
// load 4 0x0F0F0F0F
|
||||
mov %eax, 0xf0f0f0f
|
||||
movd %xmm7, %eax
|
||||
movd %xmm7, %eax
|
||||
pshufd %xmm7, %xmm7, 0
|
||||
|
||||
// load srcpitch
|
||||
mov %ebx, [%esp+4+4]
|
||||
mov %eax, 2
|
||||
|
||||
|
@ -292,20 +357,32 @@ SwizzleBlock4_sse2_1:
|
|||
|
||||
movdqa %xmm0, [%edx]
|
||||
movdqa %xmm2, [%edx+%ebx]
|
||||
//update src pointer
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
|
||||
movdqa %xmm1, [%edx]
|
||||
movdqa %xmm3, [%edx+%ebx]
|
||||
// update src pointer
|
||||
lea %edx, [%edx+%ebx*2]
|
||||
|
||||
// - - - - 2 3 0 1
|
||||
pshuflw %xmm1, %xmm1, 0xb1
|
||||
pshuflw %xmm3, %xmm3, 0xb1
|
||||
// 6 7 4 5 - - - -
|
||||
pshufhw %xmm1, %xmm1, 0xb1
|
||||
pshufhw %xmm3, %xmm3, 0xb1
|
||||
|
||||
// 4bits interleave 1&0 -> 4&0
|
||||
// 4bits interleave 3&2 -> 6&2
|
||||
punpcknb
|
||||
// 8bits interleave 4&0 -> 1&0
|
||||
// 8bits interleave 6&2 -> 3&2
|
||||
punpck(bw, 0, 2, 4, 6, 1, 3)
|
||||
// 8bits interleave 1&0 -> 4&0
|
||||
// 8bits interleave 3&2 -> 6&2
|
||||
punpck(bw, 0, 2, 1, 3, 4, 6)
|
||||
// 64bits interleave 2&0 -> 1&0
|
||||
// 64bits interleave 6&4 -> 3&4
|
||||
punpck(qdq, 0, 4, 2, 6, 1, 3)
|
||||
|
||||
movntps [%ecx+16*0], %xmm0
|
||||
|
@ -313,7 +390,7 @@ SwizzleBlock4_sse2_1:
|
|||
movntps [%ecx+16*2], %xmm4
|
||||
movntps [%ecx+16*3], %xmm3
|
||||
|
||||
// col 1, 3
|
||||
// col 1, 3 (same as previous column)
|
||||
|
||||
movdqa %xmm0, [%edx]
|
||||
movdqa %xmm2, [%edx+%ebx]
|
||||
|
@ -349,6 +426,9 @@ SwizzleBlock4_sse2_1:
|
|||
|
||||
//
|
||||
// swizzling with unaligned reads
|
||||
// Same functions as a above with movdqu instead of movdqa for the reads
|
||||
// Movdqu is as fast as movdqa with aligned address... So do not bother, directly
|
||||
// use movdqu
|
||||
//
|
||||
|
||||
//
|
||||
|
@ -400,7 +480,7 @@ SwizzleBlock32u_sse2_2:
|
|||
|
||||
movd %xmm7, %eax
|
||||
pshufd %xmm7, %xmm7, 0
|
||||
|
||||
|
||||
.align 16
|
||||
SwizzleBlock32u_sse2_3:
|
||||
movdqu %xmm0, [%esi]
|
||||
|
@ -480,7 +560,7 @@ SwizzleBlock16u_sse2_1:
|
|||
|
||||
dec %eax
|
||||
jnz SwizzleBlock16u_sse2_1
|
||||
|
||||
|
||||
pop %ebx
|
||||
|
||||
ret 4
|
||||
|
@ -560,9 +640,9 @@ SwizzleBlock8u_sse2_1:
|
|||
SwizzleBlock4u_sse2:
|
||||
|
||||
push %ebx
|
||||
|
||||
|
||||
mov %eax, 0xf0f0f0f
|
||||
movd %xmm7, %eax
|
||||
movd %xmm7, %eax
|
||||
pshufd %xmm7, %xmm7, 0
|
||||
|
||||
mov %ebx, [%esp+4+4]
|
||||
|
@ -628,7 +708,7 @@ SwizzleBlock4u_sse2_1:
|
|||
pop %ebx
|
||||
|
||||
ret 4
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "x86.h"
|
||||
|
||||
#if defined(ZEROGS_SSE2)
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
|
@ -64,23 +63,17 @@ void __fastcall FrameSwizzleBlock32A2_c(u32* dst, u32* src, int srcpitch, u32 Wr
|
|||
{
|
||||
u32* d = &g_columnTable32[0][0];
|
||||
|
||||
if( WriteMask == 0xffffffff )
|
||||
{
|
||||
for(int i = 0; i < 8; ++i, d += 8)
|
||||
{
|
||||
for(int j = 0; j < 8; ++j)
|
||||
{
|
||||
if( WriteMask == 0xffffffff ) {
|
||||
for(int i = 0; i < 8; ++i, d += 8) {
|
||||
for(int j = 0; j < 8; ++j) {
|
||||
dst[d[j]] = ((src[2*j] + src[2*j+1]) >> 1);
|
||||
}
|
||||
src += srcpitch;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < 8; ++i, d += 8)
|
||||
{
|
||||
for(int j = 0; j < 8; ++j)
|
||||
{
|
||||
else {
|
||||
for(int i = 0; i < 8; ++i, d += 8) {
|
||||
for(int j = 0; j < 8; ++j) {
|
||||
dst[d[j]] = (((src[2*j] + src[2*j+1]) >> 1)&WriteMask)|(dst[d[j]]&~WriteMask);
|
||||
}
|
||||
src += srcpitch;
|
||||
|
@ -92,23 +85,17 @@ void __fastcall FrameSwizzleBlock32A4_c(u32* dst, u32* src, int srcpitch, u32 Wr
|
|||
{
|
||||
u32* d = &g_columnTable32[0][0];
|
||||
|
||||
if( WriteMask == 0xffffffff )
|
||||
{
|
||||
for(int i = 0; i < 8; ++i, d += 8)
|
||||
{
|
||||
for(int j = 0; j < 8; ++j)
|
||||
{
|
||||
if( WriteMask == 0xffffffff ) {
|
||||
for(int i = 0; i < 8; ++i, d += 8) {
|
||||
for(int j = 0; j < 8; ++j) {
|
||||
dst[d[j]] = ((src[2*j] + src[2*j+1] + src[2*j+srcpitch] + src[2*j+srcpitch+1]) >> 2);
|
||||
}
|
||||
src += srcpitch << 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < 8; ++i, d += 8)
|
||||
{
|
||||
for(int j = 0; j < 8; ++j)
|
||||
{
|
||||
else {
|
||||
for(int i = 0; i < 8; ++i, d += 8) {
|
||||
for(int j = 0; j < 8; ++j) {
|
||||
dst[d[j]] = (((src[2*j] + src[2*j+1] + src[2*j+srcpitch] + src[2*j+srcpitch+1]) >> 2)&WriteMask)|(dst[d[j]]&~WriteMask);
|
||||
}
|
||||
src += srcpitch << 1;
|
||||
|
@ -663,6 +650,120 @@ static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0
|
|||
|
||||
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
|
||||
{
|
||||
#define YET_ANOTHER_INTRINSIC
|
||||
#ifdef YET_ANOTHER_INTRINSIC
|
||||
__m128i vm0 = _mm_load_si128((__m128i*)vm);
|
||||
__m128i vm1 = _mm_load_si128((__m128i*)vm+1);
|
||||
__m128i vm2 = _mm_load_si128((__m128i*)vm+2);
|
||||
__m128i vm3 = _mm_load_si128((__m128i*)vm+3);
|
||||
|
||||
// rearrange 16bits words
|
||||
vm0 = _mm_shufflehi_epi16(vm0, 0x88);
|
||||
vm0 = _mm_shufflelo_epi16(vm0, 0x88); // 6 4 6 4 2 0 2 0
|
||||
vm1 = _mm_shufflehi_epi16(vm1, 0x88);
|
||||
vm1 = _mm_shufflelo_epi16(vm1, 0x88); // 14 12 14 12 10 8 10 8
|
||||
|
||||
// Note: MSVC complains about direct c-cast...
|
||||
// vm0 = (__m128i)_mm_shuffle_ps((__m128)vm0, (__m128)vm1, 0x88); // 14 12 10 8 6 4 2 0
|
||||
__m128 vm0_f = (_mm_shuffle_ps((__m128&)vm0, (__m128&)vm1, 0x88)); // 14 12 10 8 6 4 2 0
|
||||
vm0 = (__m128i&)vm0_f;
|
||||
vm0 = _mm_shuffle_epi32(vm0, 0xD8); // 14 12 6 4 10 8 2 0
|
||||
|
||||
// *** Same jobs for vm2 and vm3
|
||||
vm2 = _mm_shufflehi_epi16(vm2, 0x88);
|
||||
vm2 = _mm_shufflelo_epi16(vm2, 0x88);
|
||||
vm3 = _mm_shufflehi_epi16(vm3, 0x88);
|
||||
vm3 = _mm_shufflelo_epi16(vm3, 0x88);
|
||||
|
||||
// Note: MSVC complains about direct c-cast...
|
||||
// vm2 = (__m128i)_mm_shuffle_ps((__m128)vm2, (__m128)vm3, 0x88);
|
||||
__m128 vm2_f = (_mm_shuffle_ps((__m128&)vm2, (__m128&)vm3, 0x88)); // 14 12 10 8 6 4 2 0
|
||||
vm2 = (__m128i&)vm2_f;
|
||||
vm2 = _mm_shuffle_epi32(vm2, 0xD8);
|
||||
|
||||
// Create a zero register.
|
||||
__m128i zero_128 = _mm_setzero_si128();
|
||||
|
||||
if ((u32)clut & 0x0F) {
|
||||
// Unaligned write.
|
||||
|
||||
u16* clut_word_ptr = (u16*)clut;
|
||||
__m128i clut_mask = _mm_load_si128((__m128i*)s_clut16mask2);
|
||||
|
||||
// Load previous data and clear high 16 bits of double words
|
||||
__m128i clut_0 = _mm_load_si128((__m128i*)(clut_word_ptr-1)); // 6 5 4 3 2 1 0 x
|
||||
__m128i clut_2 = _mm_load_si128((__m128i*)(clut_word_ptr-1)+2); // 22 21 20 19 18 17 16 15
|
||||
clut_0 = _mm_and_si128(clut_0, clut_mask); // - 5 - 3 - 1 - x
|
||||
clut_2 = _mm_and_si128(clut_2, clut_mask); // - 21 - 19 - 17 - 15
|
||||
|
||||
// Convert 16bits to 32 bits vm0 (zero entended)
|
||||
__m128i vm0_low = _mm_unpacklo_epi16(vm0, zero_128); // - 10 - 8 - 2 - 0
|
||||
__m128i vm0_high = _mm_unpackhi_epi16(vm0, zero_128); // - 14 - 12 - 6 - 4
|
||||
|
||||
// shift the value to aligned it with clut
|
||||
vm0_low = _mm_slli_epi32(vm0_low, 16); // 10 - 8 - 2 - 0 -
|
||||
vm0_high = _mm_slli_epi32(vm0_high, 16); // 14 - 12 - 6 - 4 -
|
||||
|
||||
// Interlace old and new data
|
||||
clut_0 = _mm_or_si128(clut_0, vm0_low); // 10 5 8 3 2 1 0 x
|
||||
clut_2 = _mm_or_si128(clut_2, vm0_high); // 14 21 12 19 6 17 4 15
|
||||
|
||||
// Save the result
|
||||
_mm_store_si128((__m128i*)(clut_word_ptr-1), clut_0);
|
||||
_mm_store_si128((__m128i*)(clut_word_ptr-1)+2, clut_2);
|
||||
|
||||
// *** Same jobs for clut_1 and clut_3
|
||||
__m128i clut_1 = _mm_load_si128((__m128i*)(clut_word_ptr-1)+1);
|
||||
__m128i clut_3 = _mm_load_si128((__m128i*)(clut_word_ptr-1)+3);
|
||||
clut_1 = _mm_and_si128(clut_1, clut_mask);
|
||||
clut_3 = _mm_and_si128(clut_3, clut_mask);
|
||||
|
||||
__m128i vm2_low = _mm_unpacklo_epi16(vm2, zero_128);
|
||||
__m128i vm2_high = _mm_unpackhi_epi16(vm2, zero_128);
|
||||
vm2_low = _mm_slli_epi32(vm2_low, 16);
|
||||
vm2_high = _mm_slli_epi32(vm2_high, 16);
|
||||
|
||||
clut_1 = _mm_or_si128(clut_1, vm2_low);
|
||||
clut_3 = _mm_or_si128(clut_3, vm2_high);
|
||||
|
||||
_mm_store_si128((__m128i*)(clut_word_ptr-1)+1, clut_1);
|
||||
_mm_store_si128((__m128i*)(clut_word_ptr-1)+3, clut_3);
|
||||
} else {
|
||||
// Standard write
|
||||
|
||||
__m128i clut_mask = _mm_load_si128((__m128i*)s_clut16mask);
|
||||
|
||||
// Load previous data and clear low 16 bits of double words
|
||||
__m128i clut_0 = _mm_and_si128(_mm_load_si128((__m128i*)clut), clut_mask); // 7 - 5 - 3 - 1 -
|
||||
__m128i clut_2 = _mm_and_si128(_mm_load_si128((__m128i*)clut+2), clut_mask); // 23 - 21 - 19 - 17 -
|
||||
|
||||
// Convert 16bits to 32 bits vm0 (zero entended)
|
||||
__m128i vm0_low = _mm_unpacklo_epi16(vm0, zero_128); // - 10 - 8 - 2 - 0
|
||||
__m128i vm0_high = _mm_unpackhi_epi16(vm0, zero_128); // - 14 - 12 - 6 - 4
|
||||
|
||||
// Interlace old and new data
|
||||
clut_0 = _mm_or_si128(clut_0, vm0_low); // 7 10 5 8 3 2 1 0
|
||||
clut_2 = _mm_or_si128(clut_2, vm0_high); // 23 14 21 12 19 6 17 4
|
||||
|
||||
// Save the result
|
||||
_mm_store_si128((__m128i*)clut, clut_0);
|
||||
_mm_store_si128((__m128i*)clut+2, clut_2);
|
||||
|
||||
// *** Same jobs for clut_1 and clut_3
|
||||
__m128i clut_1 = _mm_and_si128(_mm_load_si128((__m128i*)clut+1), clut_mask);
|
||||
__m128i clut_3 = _mm_and_si128(_mm_load_si128((__m128i*)clut+3), clut_mask);
|
||||
|
||||
__m128i vm2_low = _mm_unpacklo_epi16(vm2, zero_128);
|
||||
__m128i vm2_high = _mm_unpackhi_epi16(vm2, zero_128);
|
||||
|
||||
clut_1 = _mm_or_si128(clut_1, vm2_low);
|
||||
clut_3 = _mm_or_si128(clut_3, vm2_high);
|
||||
|
||||
_mm_store_si128((__m128i*)clut+1, clut_1);
|
||||
_mm_store_si128((__m128i*)clut+3, clut_3);
|
||||
}
|
||||
|
||||
#else
|
||||
#if defined(_MSC_VER)
|
||||
__asm
|
||||
{
|
||||
|
@ -893,6 +994,7 @@ End:
|
|||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
|
||||
);
|
||||
#endif // _MSC_VER
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // ZEROGS_SSE2
|
||||
|
@ -1115,3 +1217,4 @@ Z16Loop:
|
|||
);
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@
|
|||
#include "targets.h"
|
||||
#include "GLWin.h"
|
||||
#include "ZZoglShaders.h"
|
||||
#ifdef ZEROGS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
//----------------------- Defines
|
||||
|
||||
|
@ -95,7 +98,6 @@ namespace ZeroGS
|
|||
// float4 g_vdepth = float4( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
|
||||
|
||||
extern CRangeManager s_RangeMngr; // manages overwritten memory
|
||||
GLenum GetRenderTargetFormat() { return GetRenderFormat() == RFT_byte8 ? 4 : g_internalRGBAFloat16Fmt; }
|
||||
|
||||
// returns the first and last addresses aligned to a page that cover
|
||||
void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
|
||||
|
@ -541,7 +543,7 @@ __forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
|
|||
|
||||
int Values[100] = {0, };
|
||||
|
||||
void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
|
||||
inline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
|
||||
{
|
||||
int index = Index;
|
||||
p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
|
||||
|
@ -852,6 +854,55 @@ bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
|
|||
|
||||
bool bRet = false;
|
||||
|
||||
// FIXME code generated by intrinsics is the same as the linux asm.
|
||||
// However there is no "cmp %%esi, 0x90" equivalent in the windows asm !!!
|
||||
// So control flow must be check
|
||||
#define TEST_THIS
|
||||
#ifdef TEST_THIS
|
||||
while(entries != 0) {
|
||||
#ifdef ZEROGS_SSE2
|
||||
__m128i result = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src), _mm_load_si128((__m128i*)dst));
|
||||
|
||||
__m128i result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+1), _mm_load_si128((__m128i*)dst+1));
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+2), _mm_load_si128((__m128i*)dst+2));
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
result_tmp = _mm_cmpeq_epi32(_mm_load_si128((__m128i*)src+3), _mm_load_si128((__m128i*)dst+3));
|
||||
result = _mm_and_si128(result, result_tmp);
|
||||
|
||||
u32 result_int = _mm_movemask_epi8(result);
|
||||
if (result_int != 0xFF) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
#else
|
||||
// I see no point to keep an mmx version. SSE2 versions is probably faster.
|
||||
// Keep a slow portable C version for reference/debug
|
||||
for (int i=0; i < 16 ; i++) {
|
||||
if (*((u32*)src+i) != *((u32*)dst+i)) {
|
||||
bRet = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (entries & 0x10) {
|
||||
src -= 56; // go back and down one column
|
||||
}
|
||||
|
||||
src += 32; // go to the right block
|
||||
|
||||
if (entries == 0x90) {
|
||||
src += 32; // skip whole block
|
||||
}
|
||||
|
||||
dst += 8;
|
||||
entries -= 16;
|
||||
}
|
||||
#else
|
||||
|
||||
// do a fast test with MMX
|
||||
#ifdef _MSC_VER
|
||||
int storeebx;
|
||||
|
@ -978,6 +1029,7 @@ Return:
|
|||
".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
|
||||
|
||||
#endif // _WIN32
|
||||
#endif
|
||||
return bRet;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <math.h>
|
||||
|
||||
#include "ZZGl.h"
|
||||
#include "GS.h"
|
||||
|
@ -100,12 +101,6 @@ namespace ZeroGS
|
|||
|
||||
typedef void (*DrawFn)();
|
||||
|
||||
enum RenderFormatType
|
||||
{
|
||||
RFT_byte8 = 0, // A8R8G8B8
|
||||
RFT_float16 = 1, // A32R32B32G32
|
||||
};
|
||||
|
||||
// managers render-to-texture targets
|
||||
|
||||
class CRenderTarget
|
||||
|
@ -237,6 +232,8 @@ class CMemoryTarget
|
|||
clearminy = r.clearminy;
|
||||
clearmaxy = r.clearmaxy;
|
||||
widthmult = r.widthmult;
|
||||
texH = r.texH;
|
||||
texW = r.texW;
|
||||
channels = r.channels;
|
||||
validatecount = r.validatecount;
|
||||
fmt = r.fmt;
|
||||
|
@ -267,14 +264,20 @@ class CMemoryTarget
|
|||
|
||||
int starty, height; // assert(starty >= realy)
|
||||
int realy, realheight; // this is never touched once allocated
|
||||
// realy is start pointer of data in 4M data block (start) and size (end-start).
|
||||
|
||||
u32 usedstamp;
|
||||
u8 psm, cpsm; // texture and clut format. For psm, only 16bit/32bit differentiation matters
|
||||
|
||||
u32 fmt;
|
||||
|
||||
int widthmult;
|
||||
int channels;
|
||||
int clearminy, clearmaxy; // when maxy > 0, need to check for clearing
|
||||
int widthmult; // Either 1 or 2.
|
||||
int channels; // The number of pixels per PSM format word. channels == PIXELS_PER_WORD(psm)
|
||||
// This is the real drawing size in pixels of the texture in renderbuffer.
|
||||
int texW; // (realheight + widthmult - 1)/widthmult == realheight or [(realheight+1)/2]
|
||||
int texH; // GPU_TEXWIDTH *widthmult * channels;
|
||||
|
||||
int clearminy, clearmaxy; // when maxy > 0, need to check for clearing
|
||||
|
||||
int validatecount; // count how many times has been validated, if too many, destroy
|
||||
|
||||
|
@ -415,7 +418,6 @@ extern float fiTexWidth[2], fiTexHeight[2]; // current tex width and height
|
|||
extern vector<GLuint> g_vboBuffers; // VBOs for all drawing commands
|
||||
extern GLuint vboRect;
|
||||
extern int g_nCurVBOIndex;
|
||||
extern RenderFormatType g_RenderFormatType;
|
||||
|
||||
void AddMessage(const char* pstr, u32 ms = 5000);
|
||||
void DrawText(const char* pstr, int left, int top, u32 color);
|
||||
|
@ -479,8 +481,6 @@ bool CheckChangeInClut(u32 highdword, u32 psm); // returns true if clut will cha
|
|||
|
||||
// call to load CLUT data (depending on CLD)
|
||||
void texClutWrite(int ctx);
|
||||
RenderFormatType GetRenderFormat();
|
||||
GLenum GetRenderTargetFormat();
|
||||
|
||||
int Save(s8* pbydata);
|
||||
bool Load(s8* pbydata);
|
||||
|
@ -523,7 +523,25 @@ inline void CluttingForFlushedTex(tex0Info* tex0, u32 Data, int ictx)
|
|||
tex0->cld = ZZOglGet_cld_TexBits(Data);
|
||||
|
||||
ZeroGS::texClutWrite(ictx);
|
||||
};
|
||||
|
||||
// The size in bytes of x strings (of texture).
|
||||
inline int MemorySize(int x)
|
||||
{
|
||||
return 4 * GPU_TEXWIDTH * x;
|
||||
}
|
||||
};
|
||||
|
||||
// Return the address in memory of data block for string x.
|
||||
inline u8* MemoryAddress(int x)
|
||||
{
|
||||
return g_pbyGSMemory + MemorySize(x);
|
||||
}
|
||||
|
||||
template <u32 mult>
|
||||
inline u8* _MemoryAddress(int x)
|
||||
{
|
||||
return g_pbyGSMemory + mult * x;
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue