ReorderingMTGS: Sync with trunk!

git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3523 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-17 22:42:18 +00:00
commit 9ba58e1398
16 changed files with 613 additions and 753 deletions

View File

@ -7,6 +7,19 @@ cmake_minimum_required(VERSION 2.6)
# Variable to check that people use the good file # Variable to check that people use the good file
set(TOP_CMAKE_WAS_SOURCED TRUE) set(TOP_CMAKE_WAS_SOURCED TRUE)
# Print a clear message that 64bits is not supported
# It would avoid compilation failure later.
if(CMAKE_SIZEOF_VOID_P MATCHES "8")
message(FATAL_ERROR "
PCSX2 does not support 64bits environment. Please install a 32bits chroot or a 32bits OS.
PCSX2 have neither no plan to support the 64bits architecture in the future.
It will need a complete rewrite of the core emulator and a lots of time
However when linux distribution will support properly multi-arch package, it will
be at least possible to easily compile and install PCSX2 witout too much hassle (the chroot environment)")
endif(CMAKE_SIZEOF_VOID_P MATCHES "8")
# set module path # set module path
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

View File

@ -98,8 +98,8 @@ void gen_memcpy_vibes() {
xADD(ecx, 256); xADD(ecx, 256);
} }
const xRegisterSSE xmm_t(x); const xRegisterSSE xmm_t(x);
xMOVAPS(xmm_t, ptr32[edx+off]); xMOVAPS (xmm_t, ptr32[edx+off]);
xMOVAPS(ptr32[ecx+off], xmm_t); xMOVNTPS(ptr32[ecx+off], xmm_t);
} }
_memcpy_vibes[0] = (_memCpyCall)xGetPtr(); _memcpy_vibes[0] = (_memCpyCall)xGetPtr();

View File

@ -63,11 +63,11 @@ struct SSE2_Tables
u16 Y_mask[8]; // offset -32 u16 Y_mask[8]; // offset -32
u16 round_1bit[8]; // offset -16 u16 round_1bit[8]; // offset -16
u16 Y_coefficients[8]; // offset 0 s16 Y_coefficients[8]; // offset 0
u16 GCr_coefficients[8];// offset 16 s16 GCr_coefficients[8];// offset 16
u16 GCb_coefficients[8];// offset 32 s16 GCb_coefficients[8];// offset 32
u16 RCr_coefficients[8];// offset 48 s16 RCr_coefficients[8];// offset 48
u16 BCb_coefficients[8];// offset 64 s16 BCb_coefficients[8];// offset 64
}; };
enum enum

View File

@ -98,27 +98,31 @@ void VifUnpackSSE_Base::xUPK_S_32() const {
} }
void VifUnpackSSE_Base::xUPK_S_16() const { void VifUnpackSSE_Base::xUPK_S_16() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (workReg); xPMOVXX16 (workReg);
} }
else { else
{
xMOV16 (workReg, ptr32[srcIndirect]); xMOV16 (workReg, ptr32[srcIndirect]);
xPUNPCK.LWD(workReg, workReg); xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 16); xShiftR (workReg, 16);
} }
xPSHUF.D (destReg, workReg, _v0); xPSHUF.D (destReg, workReg, _v0);
} }
void VifUnpackSSE_Base::xUPK_S_8() const { void VifUnpackSSE_Base::xUPK_S_8() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (workReg); xPMOVXX8 (workReg);
} }
else { else
{
xMOV8 (workReg, ptr32[srcIndirect]); xMOV8 (workReg, ptr32[srcIndirect]);
xPUNPCK.LBW(workReg, workReg); xPUNPCK.LBW(workReg, workReg);
xPUNPCK.LWD(workReg, workReg); xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 24); xShiftR (workReg, 24);
} }
xPSHUF.D (destReg, workReg, _v0); xPSHUF.D (destReg, workReg, _v0);
} }
@ -133,58 +137,63 @@ void VifUnpackSSE_Base::xUPK_V2_32() const {
} }
void VifUnpackSSE_Base::xUPK_V2_16() const { void VifUnpackSSE_Base::xUPK_V2_16() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (destReg); xPMOVXX16 (destReg);
} }
else { else
{
xMOV32 (destReg, ptr32[srcIndirect]); xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg); xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16); xShiftR (destReg, 16);
} }
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0 xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
} }
void VifUnpackSSE_Base::xUPK_V2_8() const { void VifUnpackSSE_Base::xUPK_V2_8() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (destReg); xPMOVXX8 (destReg);
} }
else { else
{
xMOV16 (destReg, ptr32[srcIndirect]); xMOV16 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg); xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg); xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24); xShiftR (destReg, 24);
} }
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0 xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
} }
void VifUnpackSSE_Base::xUPK_V3_32() const { void VifUnpackSSE_Base::xUPK_V3_32() const {
xMOV128 (destReg, ptr32[srcIndirect]); xMOV128 (destReg, ptr32[srcIndirect]);
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
} }
void VifUnpackSSE_Base::xUPK_V3_16() const { void VifUnpackSSE_Base::xUPK_V3_16() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (destReg); xPMOVXX16 (destReg);
} }
else { else
{
xMOV64 (destReg, ptr32[srcIndirect]); xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg); xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16); xShiftR (destReg, 16);
} }
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
} }
void VifUnpackSSE_Base::xUPK_V3_8() const { void VifUnpackSSE_Base::xUPK_V3_8() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (destReg); xPMOVXX8 (destReg);
} }
else { else
{
xMOV32 (destReg, ptr32[srcIndirect]); xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg); xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg); xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24); xShiftR (destReg, 24);
} }
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
} }
void VifUnpackSSE_Base::xUPK_V4_32() const { void VifUnpackSSE_Base::xUPK_V4_32() const {
@ -192,26 +201,30 @@ void VifUnpackSSE_Base::xUPK_V4_32() const {
} }
void VifUnpackSSE_Base::xUPK_V4_16() const { void VifUnpackSSE_Base::xUPK_V4_16() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (destReg); xPMOVXX16 (destReg);
} }
else { else
{
xMOV64 (destReg, ptr32[srcIndirect]); xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg); xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16); xShiftR (destReg, 16);
} }
} }
void VifUnpackSSE_Base::xUPK_V4_8() const { void VifUnpackSSE_Base::xUPK_V4_8() const {
if (x86caps.hasStreamingSIMD4Extensions) { if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (destReg); xPMOVXX8 (destReg);
} }
else { else
{
xMOV32 (destReg, ptr32[srcIndirect]); xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg); xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg); xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24); xShiftR (destReg, 24);
} }
} }
void VifUnpackSSE_Base::xUPK_V4_5() const { void VifUnpackSSE_Base::xUPK_V4_5() const {

View File

@ -245,13 +245,14 @@ public:
{ {
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2; int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2;
const GSVector4 ys(0.098f, 0.504f, 0.257f, 0.0f); GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
const GSVector4 us(0.439f / 2, -0.291f / 2, -0.148f / 2, 0.0f); GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
const GSVector4 vs(-0.071f / 2, -0.368f / 2, 0.439f / 2, 0.0f); GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
const GSVector4 offset(16, 128, 16, 128); const GSVector4 offset(16, 128, 16, 128);
if(rgba) if (!rgba)
{ ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw();
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch) for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{ {
uint32* s = (uint32*)src; uint32* s = (uint32*)src;
@ -263,8 +264,8 @@ public:
GSVector4 c1 = GSVector4(s[i + 1]); GSVector4 c1 = GSVector4(s[i + 1]);
GSVector4 c2 = c0 + c1; GSVector4 c2 = c0 + c1;
GSVector4 lo = (c0 * ys).hadd(c2 * vs); GSVector4 lo = (c0 * ys).hadd(c2 * us);
GSVector4 hi = (c1 * ys).hadd(c2 * us); GSVector4 hi = (c1 * ys).hadd(c2 * vs);
GSVector4 c = lo.hadd(hi) + offset; GSVector4 c = lo.hadd(hi) + offset;
@ -272,29 +273,6 @@ public:
} }
} }
} }
else
{
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{
uint32* s = (uint32*)src;
uint16* d = (uint16*)dst;
for(int i = 0; i < w; i += 2)
{
GSVector4 c0 = GSVector4(s[i + 0]).zyxw();
GSVector4 c1 = GSVector4(s[i + 1]).zyxw();
GSVector4 c2 = c0 + c1;
GSVector4 lo = (c0 * ys).hadd(c2 * vs);
GSVector4 hi = (c1 * ys).hadd(c2 * us);
GSVector4 c = lo.hadd(hi) + offset;
*((uint32*)&d[i]) = GSVector4i(c).rgba32();
}
}
}
}
else if(mt.subtype == MEDIASUBTYPE_RGB32) else if(mt.subtype == MEDIASUBTYPE_RGB32)
{ {
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 4; int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 4;

View File

@ -126,6 +126,53 @@ void CALLBACK GSsetLogDir(const char* dir)
ZZLog::SetDir(dir); ZZLog::SetDir(dir);
} }
void ReportHacks(gameHacks hacks)
{
if (hacks.texture_targs) ZZLog::WriteLn("'Texture targs' hack enabled.");
if (hacks.auto_reset) ZZLog::WriteLn("'Auto reset' hack enabled.");
if (hacks.interlace_2x) ZZLog::WriteLn("'Interlace 2x' hack enabled.");
if (hacks.texa) ZZLog::WriteLn("'Texa' hack enabled.");
if (hacks.no_target_resolve) ZZLog::WriteLn("'No target resolve' hack enabled.");
if (hacks.exact_color) ZZLog::WriteLn("Exact color hack enabled.");
if (hacks.no_color_clamp) ZZLog::WriteLn("'No color clamp' hack enabled.");
if (hacks.no_alpha_fail) ZZLog::WriteLn("'No alpha fail' hack enabled.");
if (hacks.no_depth_update) ZZLog::WriteLn("'No depth update' hack enabled.");
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
if (hacks.resolve_promoted) ZZLog::WriteLn("'Resolve promoted' hack enabled.");
if (hacks.fast_update) ZZLog::WriteLn("'Fast update' hack enabled.");
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
if (hacks.partial_depth) ZZLog::WriteLn("'Partial depth' hack enabled.");
if (hacks.reget) ZZLog::WriteLn("Reget hack enabled.");
if (hacks.gust) ZZLog::WriteLn("Gust hack enabled.");
if (hacks.no_logz) ZZLog::WriteLn("'No logz' hack enabled.");
}
void ListHacks()
{
if (conf.def_hacks._u32 != 0)
{
ZZLog::WriteLn("AutoEnabling these hacks:");
ReportHacks(conf.def_hacks);
}
if (conf.hacks._u32 != 0)
{
ZZLog::WriteLn("You've manually enabled these hacks:");
ReportHacks(conf.hacks);
}
}
void CALLBACK GSsetGameCRC(int crc, int options) void CALLBACK GSsetGameCRC(int crc, int options)
{ {
// TEXDESTROY_THRESH starts out at 16. // TEXDESTROY_THRESH starts out at 16.
@ -133,15 +180,15 @@ void CALLBACK GSsetGameCRC(int crc, int options)
conf.mrtdepth = (conf.settings().disable_mrt_depth != 0); conf.mrtdepth = (conf.settings().disable_mrt_depth != 0);
if (!conf.mrtdepth) if (!conf.mrtdepth)
ZZLog::Error_Log("Disabling MRT depth writing."); ZZLog::WriteLn("Disabling MRT depth writing.");
else else
ZZLog::Error_Log("Enabling MRT depth writing."); ZZLog::WriteLn("Enabling MRT depth writing.");
bool CRCValueChanged = (g_LastCRC != crc); bool CRCValueChanged = (g_LastCRC != crc);
g_LastCRC = crc; g_LastCRC = crc;
ZZLog::Error_Log("CRC = %x", crc); if (crc != 0) ZZLog::WriteLn("Current game CRC is %x.", crc);
if (CRCValueChanged && (crc != 0)) if (CRCValueChanged && (crc != 0))
{ {
@ -149,17 +196,27 @@ void CALLBACK GSsetGameCRC(int crc, int options)
{ {
if (crc_game_list[i].crc == crc) if (crc_game_list[i].crc == crc)
{ {
if (crc_game_list[i].v_thresh > 0) VALIDATE_THRESH = crc_game_list[i].v_thresh; ZZLog::WriteLn("Found CRC[%x] in crc game list.", crc);
if (crc_game_list[i].t_thresh > 0) TEXDESTROY_THRESH = crc_game_list[i].t_thresh;
if (crc_game_list[i].v_thresh > 0)
{
VALIDATE_THRESH = crc_game_list[i].v_thresh;
ZZLog::WriteLn("Setting VALIDATE_THRESH to %d", VALIDATE_THRESH);
}
if (crc_game_list[i].t_thresh > 0)
{
TEXDESTROY_THRESH = crc_game_list[i].t_thresh;
ZZLog::WriteLn("Setting TEXDESTROY_THRESH to %d", TEXDESTROY_THRESH);
}
conf.def_hacks._u32 |= crc_game_list[i].flags; conf.def_hacks._u32 |= crc_game_list[i].flags;
ListHacks();
ZZLog::Error_Log("Found CRC[%x] in crc game list.", crc);
return; return;
} }
} }
} }
ListHacks();
} }
void CALLBACK GSsetFrameSkip(int frameskip) void CALLBACK GSsetFrameSkip(int frameskip)

View File

@ -27,7 +27,6 @@
#include "zerogs.h" #include "zerogs.h"
#include "targets.h" #include "targets.h"
namespace ZeroGS namespace ZeroGS
{ {
extern CRangeManager s_RangeMngr; // manages overwritten memory extern CRangeManager s_RangeMngr; // manages overwritten memory
@ -249,8 +248,10 @@
} }
template <class T> template <class T>
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp) void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{ {
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
int i = x, j = y; int i = x, j = y;
T* pbuf = (T*)pbyMem; T* pbuf = (T*)pbyMem;
u32 nSize = nQWordSize * 16 / sizeof(T); u32 nSize = nQWordSize * 16 / sizeof(T);
@ -275,8 +276,10 @@
} }
} }
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp) void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{ {
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
int i = x, j = y; int i = x, j = y;
u8* pbuf = (u8*)pbyMem; u8* pbuf = (u8*)pbyMem;
u32 nSize = nQWordSize * 16 / 3; u32 nSize = nQWordSize * 16 / 3;
@ -312,65 +315,113 @@
assert(gs.imageTransfer == 1); assert(gs.imageTransfer == 1);
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp; u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
int i = gs.imageY, j = gs.imageX;
switch (gs.srcbuf.psm) switch(PSMT_BITMODE(gs.srcbuf.psm))
{ {
case 0: TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case PSMCT32: case 1: TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32_0); case 2: TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
break; case 3: TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
default: assert(0); break;
case PSMCT24:
TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24_0);
break;
case PSMCT16:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16_0);
break;
case PSMCT16S:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16S_0);
break;
case PSMT8:
TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8_0);
break;
case PSMT8H:
TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8H_0);
break;
case PSMT32Z:
TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32Z_0);
break;
case PSMT24Z:
TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24Z_0);
break;
case PSMT16Z:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16Z_0);
break;
case PSMT16SZ:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16SZ_0);
break;
default:
assert(0);
} }
gs.imageY = i;
gs.imageX = j;
if (gs.imageY >= gs.imageEndY) if (gs.imageY >= gs.imageEndY)
{ {
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
assert(gs.imageY == gs.imageEndY); assert(gs.imageY == gs.imageEndY);
gs.imageTransfer = -1; gs.imageTransfer = -1;
} }
} }
__forceinline void _TransferLocalLocal()
{
//ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm];
_readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 widthlimit = 4;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
if ((gs.imageWnew & widthlimit) != 0) return;
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit)
{
wp(pDstBuf, j2%2048, i2%2048,
rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+1)%2048, i2%2048,
rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
if (widthlimit > 2)
{
// Then widthlimit == 4.
wp(pDstBuf, (j2+2)%2048, i2%2048,
rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+3)%2048, i2%2048,
rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
}
}
}
}
__forceinline void _TransferLocalLocal_4()
{
//ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm];
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
assert((gs.imageWnew % 8) == 0);
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8)
{
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */
u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw);
u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
}
}
}
// dir depends on trxpos.dirx & trxpos.diry // dir depends on trxpos.dirx & trxpos.diry
void TransferLocalLocal() void TransferLocalLocal()
{ {
@ -409,284 +460,18 @@
} }
} }
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256; if (PSMT_BITMODE(gs.srcbuf.psm) != 4)
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
#define TRANSFERLOCALLOCAL(srcpsm, dstpsm, widthlimit) { \
if( (gs.imageWnew&widthlimit)!=0 ) break; \
assert( (gs.imageWnew&widthlimit)==0 && widthlimit <= 4); \
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) { \
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) { \
\
writePixel##dstpsm##_0(pDstBuf, j2%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
\
if( widthlimit > 1 ) { \
writePixel##dstpsm##_0(pDstBuf, (j2+1)%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
\
if( widthlimit > 2 ) { \
writePixel##dstpsm##_0(pDstBuf, (j2+2)%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
\
if( widthlimit > 3 ) { \
writePixel##dstpsm##_0(pDstBuf, (j2+3)%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
} \
} \
} \
} \
} \
} \
#define TRANSFERLOCALLOCAL_4(srcpsm, dstpsm) { \
assert( (gs.imageWnew%8) == 0 ); \
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; ++i, ++i2) { \
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=8, j2+=8) { \
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ \
u32 read = getPixelAddress##srcpsm##_0(j%2048, i%2048, gs.srcbuf.bw); \
u32 write = getPixelAddress##dstpsm##_0(j2%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+1)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+1)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
\
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
\
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
\
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
} \
} \
} \
switch (gs.srcbuf.psm)
{ {
case PSMCT32: _TransferLocalLocal();
if (gs.dstbuf.psm == PSMCT32)
{
TRANSFERLOCALLOCAL(32, 32, 2);
} }
else else
{ {
TRANSFERLOCALLOCAL(32, 32Z, 2); _TransferLocalLocal_4();
}
break;
case PSMCT24:
if (gs.dstbuf.psm == PSMCT24)
{
TRANSFERLOCALLOCAL(24, 24, 4);
}
else
{
TRANSFERLOCALLOCAL(24, 24Z, 4);
}
break;
case PSMCT16:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16, 16SZ, 4);
break;
}
break;
case PSMCT16S:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16S, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16S, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16S, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16S, 16SZ, 4);
break;
}
break;
case PSMT8:
if (gs.dstbuf.psm == PSMT8)
{
TRANSFERLOCALLOCAL(8, 8, 4);
}
else
{
TRANSFERLOCALLOCAL(8, 8H, 4);
}
break;
case PSMT4:
switch (gs.dstbuf.psm)
{
case PSMT4:
TRANSFERLOCALLOCAL_4(4, 4);
break;
case PSMT4HL:
TRANSFERLOCALLOCAL_4(4, 4HL);
break;
case PSMT4HH:
TRANSFERLOCALLOCAL_4(4, 4HH);
break;
}
break;
case PSMT8H:
if (gs.dstbuf.psm == PSMT8)
{
TRANSFERLOCALLOCAL(8H, 8, 4);
}
else
{
TRANSFERLOCALLOCAL(8H, 8H, 4);
}
break;
case PSMT4HL:
switch (gs.dstbuf.psm)
{
case PSMT4:
TRANSFERLOCALLOCAL_4(4HL, 4);
break;
case PSMT4HL:
TRANSFERLOCALLOCAL_4(4HL, 4HL);
break;
case PSMT4HH:
TRANSFERLOCALLOCAL_4(4HL, 4HH);
break;
}
break;
case PSMT4HH:
switch (gs.dstbuf.psm)
{
case PSMT4:
TRANSFERLOCALLOCAL_4(4HH, 4);
break;
case PSMT4HL:
TRANSFERLOCALLOCAL_4(4HH, 4HL);
break;
case PSMT4HH:
TRANSFERLOCALLOCAL_4(4HH, 4HH);
break;
}
break;
case PSMT32Z:
if (gs.dstbuf.psm == PSMCT32)
{
TRANSFERLOCALLOCAL(32Z, 32, 2);
}
else
{
TRANSFERLOCALLOCAL(32Z, 32Z, 2);
}
break;
case PSMT24Z:
if (gs.dstbuf.psm == PSMCT24)
{
TRANSFERLOCALLOCAL(24Z, 24, 4);
}
else
{
TRANSFERLOCALLOCAL(24Z, 24Z, 4);
}
break;
case PSMT16Z:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16Z, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16Z, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16Z, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16Z, 16SZ, 4);
break;
}
break;
case PSMT16SZ:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16SZ, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16SZ, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16SZ, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16SZ, 16SZ, 4);
break;
}
break;
} }
g_MemTargs.ClearRange(dststart, dstend); g_MemTargs.ClearRange(dststart, dstend);
#ifdef DEVBUILD #ifdef ZEROGS_DEVBUILD
if (g_bSaveTrans) if (g_bSaveTrans)
{ {

View File

@ -44,6 +44,10 @@ typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize); typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask); typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
extern _getPixelAddress_0 getPixelFun_0[64];
extern _writePixel_0 writePixelFun_0[64];
extern _readPixel_0 readPixelFun_0[64];
enum Psm_Size enum Psm_Size
{ {
PSM_ = 0, PSM_ = 0,
@ -269,8 +273,6 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw) #define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw) #define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw) static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{ {
((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel; ((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel;
@ -511,7 +513,6 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel,
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel; ((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
} }
/////////////// ///////////////
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw) static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)

View File

@ -18,6 +18,7 @@
*/ */
#include "GS.h" #include "GS.h"
#include "Mem.h"
u32 g_blockTable32[4][8] = u32 g_blockTable32[4][8] =
{ {
@ -247,3 +248,52 @@ u32 g_pageTable16Z[64][64];
u32 g_pageTable16SZ[64][64]; u32 g_pageTable16SZ[64][64];
u32 g_pageTable8[64][128]; u32 g_pageTable8[64][128];
u32 g_pageTable4[128][128]; u32 g_pageTable4[128][128];
/* PSM reference array
{ 32, 24, 16, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, 16S, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, 8, 4, NULL, NULL, NULL,
NULL, NULL, NULL, 8H, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, 4HL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, 4HH, NULL, NULL, NULL,
32Z, 24Z, 16Z, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, 16SZ, NULL, NULL, NULL, NULL, NULL };
*/
_getPixelAddress_0 getPixelFun_0[64] =
{
getPixelAddress32_0, getPixelAddress24_0, getPixelAddress16_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, getPixelAddress16S_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, getPixelAddress8_0, getPixelAddress4_0, NULL, NULL, NULL,
NULL, NULL, NULL, getPixelAddress8H_0, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, getPixelAddress4HL_0, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, getPixelAddress4HH_0, NULL, NULL, NULL,
getPixelAddress32Z_0, getPixelAddress24Z_0, getPixelAddress16Z_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, getPixelAddress16SZ_0, NULL, NULL, NULL, NULL, NULL
};
_writePixel_0 writePixelFun_0[64] =
{
writePixel32_0, writePixel24_0, writePixel16_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, writePixel16S_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, writePixel8_0, writePixel4_0, NULL, NULL, NULL,
NULL, NULL, NULL, writePixel8H_0, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, writePixel4HL_0, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, writePixel4HH_0, NULL, NULL, NULL,
writePixel32Z_0, writePixel24Z_0, writePixel16Z_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, writePixel16SZ_0, NULL, NULL, NULL, NULL, NULL
};
_readPixel_0 readPixelFun_0[64] =
{
readPixel32_0, readPixel24_0, readPixel16_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, readPixel16S_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, readPixel8_0, readPixel4_0, NULL, NULL, NULL,
NULL, NULL, NULL, readPixel8H_0, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, readPixel4HL_0, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, readPixel4HH_0, NULL, NULL, NULL,
readPixel32Z_0, readPixel24Z_0, readPixel16Z_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, readPixel16SZ_0, NULL, NULL, NULL, NULL, NULL
};

View File

@ -125,27 +125,16 @@ void __fastcall GIFPackedRegHandlerUV(u32* data)
ZZLog::Greg_Log("Packed UV: 0x%x, 0x%x", r->U, r->V); ZZLog::Greg_Log("Packed UV: 0x%x, 0x%x", r->U, r->V);
} }
void __forceinline KICK_VERTEX2() void __forceinline KickVertex(bool adc)
{ {
FUNCLOG FUNCLOG
if (++gs.primC >= (int)g_primmult[prim->prim]) if (++gs.primC >= (int)g_primmult[prim->prim])
{ {
if (NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])(); if (!adc && NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])();
gs.primC -= g_primsub[prim->prim]; gs.primC -= g_primsub[prim->prim];
}
}
void __forceinline KICK_VERTEX3() if (adc && prim->prim == 5)
{
FUNCLOG
if (++gs.primC >= (int)g_primmult[prim->prim])
{
gs.primC -= g_primsub[prim->prim];
if (prim->prim == 5)
{ {
/* tri fans need special processing */ /* tri fans need special processing */
if (gs.nTriFanVert == gs.primIndex) if (gs.nTriFanVert == gs.primIndex)
@ -161,14 +150,7 @@ void __fastcall GIFPackedRegHandlerXYZF2(u32* data)
gs.add_vertex(r->X, r->Y,r->Z, r->F); gs.add_vertex(r->X, r->Y,r->Z, r->F);
// Fix Vertexes up later. // Fix Vertexes up later.
if (data[3] & 0x8000) KickVertex(!!(r->ADC));
{
KICK_VERTEX3();
}
else
{
KICK_VERTEX2();
}
ZZLog::Greg_Log("Packed XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F); ZZLog::Greg_Log("Packed XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
} }
@ -179,14 +161,7 @@ void __fastcall GIFPackedRegHandlerXYZ2(u32* data)
gs.add_vertex(r->X, r->Y,r->Z); gs.add_vertex(r->X, r->Y,r->Z);
// Fix Vertexes up later. // Fix Vertexes up later.
if (data[3] & 0x8000) KickVertex(!!(r->ADC));
{
KICK_VERTEX3();
}
else
{
KICK_VERTEX2();
}
ZZLog::Greg_Log("Packed XYZ2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z); ZZLog::Greg_Log("Packed XYZ2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
} }
@ -285,7 +260,7 @@ void __fastcall GIFRegHandlerXYZF2(u32* data)
GIFRegXYZF* r = (GIFRegXYZF*)(data); GIFRegXYZF* r = (GIFRegXYZF*)(data);
gs.add_vertex(r->X, r->Y,r->Z, r->F); gs.add_vertex(r->X, r->Y,r->Z, r->F);
KICK_VERTEX2(); KickVertex(false);
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F); ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
} }
@ -295,7 +270,7 @@ void __fastcall GIFRegHandlerXYZ2(u32* data)
GIFRegXYZ* r = (GIFRegXYZ*)(data); GIFRegXYZ* r = (GIFRegXYZ*)(data);
gs.add_vertex(r->X, r->Y,r->Z); gs.add_vertex(r->X, r->Y,r->Z);
KICK_VERTEX2(); KickVertex(false);
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z); ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
} }
@ -382,7 +357,7 @@ void __fastcall GIFRegHandlerXYZF3(u32* data)
GIFRegXYZF* r = (GIFRegXYZF*)(data); GIFRegXYZF* r = (GIFRegXYZF*)(data);
gs.add_vertex(r->X, r->Y,r->Z, r->F); gs.add_vertex(r->X, r->Y,r->Z, r->F);
KICK_VERTEX3(); KickVertex(true);
ZZLog::Greg_Log("XYZF3: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F); ZZLog::Greg_Log("XYZF3: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
} }
@ -392,7 +367,7 @@ void __fastcall GIFRegHandlerXYZ3(u32* data)
GIFRegXYZ* r = (GIFRegXYZ*)(data); GIFRegXYZ* r = (GIFRegXYZ*)(data);
gs.add_vertex(r->X, r->Y,r->Z); gs.add_vertex(r->X, r->Y,r->Z);
KICK_VERTEX3(); KickVertex(true);
ZZLog::Greg_Log("XYZ3: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z); ZZLog::Greg_Log("XYZ3: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
} }
@ -915,6 +890,7 @@ void __fastcall GIFRegHandlerTRXREG(u32* data)
void __fastcall GIFRegHandlerTRXDIR(u32* data) void __fastcall GIFRegHandlerTRXDIR(u32* data)
{ {
FUNCLOG FUNCLOG
GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data);
// Oh dear... // Oh dear...
// terminate any previous transfers // terminate any previous transfers
@ -934,7 +910,7 @@ void __fastcall GIFRegHandlerTRXDIR(u32* data)
gs.dstbuf = gs.dstbufnew; gs.dstbuf = gs.dstbufnew;
gs.trxpos = gs.trxposnew; gs.trxpos = gs.trxposnew;
gs.imageTransfer = data[0] & 0x3; gs.imageTransfer = r->XDIR;
gs.imageWnew = gs.imageWtemp; gs.imageWnew = gs.imageWtemp;
gs.imageHnew = gs.imageHtemp; gs.imageHnew = gs.imageHtemp;

View File

@ -83,10 +83,6 @@ static bool SPAM_PASS;
#define ZEROGS_DEVBUILD #define ZEROGS_DEVBUILD
#endif #endif
#ifdef ZEROGS_DEVBUILD
//#define DEVBUILD
#endif
// sends a message to output window if assert fails // sends a message to output window if assert fails
#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } } #define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } }

View File

@ -29,7 +29,7 @@
using namespace ZeroGS; using namespace ZeroGS;
//------------------ Defines //------------------ Defines
#ifndef DEVBUILD #ifndef ZEROGS_DEVBUILD
#define INC_GENVARS() #define INC_GENVARS()
#define INC_TEXVARS() #define INC_TEXVARS()

View File

@ -42,10 +42,6 @@ extern bool g_bUpdateStencil;
# define INC_RESOLVE() ++g_nResolve # define INC_RESOLVE() ++g_nResolve
#endif #endif
#ifdef DEVBUILD
//static int g_bSaveResolved = 0;
#endif
extern int s_nResolved; extern int s_nResolved;
extern u32 g_nResolve; extern u32 g_nResolve;
extern bool g_bSaveTrans; extern bool g_bSaveTrans;
@ -250,6 +246,7 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co
} }
// set render states // set render states
// Bleh. I *really* need to fix this. << 3 when setting the scissors, then >> 3 when using them... --Arcum42
scissorrect.x = scissor.x0 >> 3; scissorrect.x = scissor.x0 >> 3;
scissorrect.y = (scissor.y0 >> 3) + dy; scissorrect.y = (scissor.y0 >> 3) + dy;
scissorrect.w = (scissor.x1 >> 3) + 1; scissorrect.w = (scissor.x1 >> 3) + 1;
@ -302,7 +299,7 @@ void ZeroGS::CRenderTarget::Resolve()
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
#if defined(DEVBUILD) #if defined(ZEROGS_DEVBUILD)
if (g_bSaveResolved) if (g_bSaveResolved)
{ {
@ -328,7 +325,7 @@ void ZeroGS::CRenderTarget::Resolve(int startrange, int endrange)
// flush if necessary // flush if necessary
FlushIfNecesary(this) ; FlushIfNecesary(this) ;
#if defined(DEVBUILD) #if defined(ZEROGS_DEVBUILD)
if (g_bSaveResolved) if (g_bSaveResolved)
{ {
SaveTexture("resolved.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh)); SaveTexture("resolved.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh));
@ -2290,10 +2287,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
{ {
// This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2 // This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
// instruction movdqa [%eax], xmm0 // instruction movdqa [%eax], xmm0
// The idea would be resise vector to 15 elements, that set ptxedata to aligned position. // The idea would be resize vector to 15 elements, that set ptxedata to aligned position.
// Later we would move eax by 16, so only we should verify is first element align // Later we would move eax by 16, so only we should verify is first element align
// FIXME. As I see, texdata used only once here, it does not have any impact on other code. // FIXME. As I see, texdata used only once here, it does not have any impact on other code.
// Probably, usage of _aligned_maloc() would be preferable. // Probably, usage of _aligned_maloc() would be preferable.
// Note: this often happens when changing AA.
int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15 int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15
ptexdata = &texdata[disalignment]; // Set pointer to aligned element ptexdata = &texdata[disalignment]; // Set pointer to aligned element
dst = (u16*)ptexdata; dst = (u16*)ptexdata;

View File

@ -308,13 +308,10 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)
} }
extern "C" static const __aligned16 int s_clut16mask2[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
{ static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
PCSX2_ALIGNED16(int s_clut16mask2[4]) = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
PCSX2_ALIGNED16(int s_clut16mask[8]) = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff
}; };
}
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut) extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
{ {
@ -433,11 +430,11 @@ WriteUnaligned:
End: End:
} }
#else #else
__asm__(".intel_syntax noprefix\n" __asm__ __volatile__(".intel_syntax noprefix\n"
"movdqa xmm0, xmmword ptr [ecx]\n" "movdqa xmm0, xmmword ptr [%[vm]]\n"
"movdqa xmm1, xmmword ptr [ecx+16]\n" "movdqa xmm1, xmmword ptr [%[vm]+16]\n"
"movdqa xmm2, xmmword ptr [ecx+32]\n" "movdqa xmm2, xmmword ptr [%[vm]+32]\n"
"movdqa xmm3, xmmword ptr [ecx+48]\n" "movdqa xmm3, xmmword ptr [%[vm]+48]\n"
// rearrange // rearrange
"pshuflw xmm0, xmm0, 0x88\n" "pshuflw xmm0, xmm0, 0x88\n"
@ -457,14 +454,14 @@ End:
"pxor xmm6, xmm6\n" "pxor xmm6, xmm6\n"
"test edx, 15\n" "test %[clut], 15\n"
"jnz WriteUnaligned\n" "jnz WriteUnaligned\n"
"movdqa xmm7, [s_clut16mask]\n" // saves upper 16 bits "movdqa xmm7, %[s_clut16mask]\n" // saves upper 16 bits
// have to save interlaced with the old data // have to save interlaced with the old data
"movdqa xmm4, [edx]\n" "movdqa xmm4, [%[clut]]\n"
"movdqa xmm5, [edx+32]\n" "movdqa xmm5, [%[clut]+32]\n"
"movhlps xmm1, xmm0\n" "movhlps xmm1, xmm0\n"
"movlhps xmm0, xmm2\n"// lower 8 colors "movlhps xmm0, xmm2\n"// lower 8 colors
@ -483,29 +480,29 @@ End:
"punpckhwd xmm2, xmm6\n" "punpckhwd xmm2, xmm6\n"
"punpckhwd xmm3, xmm6\n" "punpckhwd xmm3, xmm6\n"
"movdqa [edx], xmm0\n" "movdqa [%[clut]], xmm0\n"
"movdqa [edx+32], xmm1\n" "movdqa [%[clut]+32], xmm1\n"
"movdqa xmm5, xmm7\n" "movdqa xmm5, xmm7\n"
"pand xmm7, [edx+16]\n" "pand xmm7, [%[clut]+16]\n"
"pand xmm5, [edx+48]\n" "pand xmm5, [%[clut]+48]\n"
"por xmm2, xmm7\n" "por xmm2, xmm7\n"
"por xmm3, xmm5\n" "por xmm3, xmm5\n"
"movdqa [edx+16], xmm2\n" "movdqa [%[clut]+16], xmm2\n"
"movdqa [edx+48], xmm3\n" "movdqa [%[clut]+48], xmm3\n"
"jmp WriteCLUT_T16_I4_CSM1_End\n" "jmp WriteCLUT_T16_I4_CSM1_End\n"
"WriteUnaligned:\n" "WriteUnaligned:\n"
// %edx is offset by 2 // %[clut] is offset by 2
"sub edx, 2\n" "sub %[clut], 2\n"
"movdqa xmm7, [[s_clut16mask2]]\n" // saves lower 16 bits "movdqa xmm7, %[s_clut16mask2]\n" // saves lower 16 bits
// have to save interlaced with the old data // have to save interlaced with the old data
"movdqa xmm4, [edx]\n" "movdqa xmm4, [%[clut]]\n"
"movdqa xmm5, [edx+32]\n" "movdqa xmm5, [%[clut]+32]\n"
"movhlps xmm1, xmm0\n" "movhlps xmm1, xmm0\n"
"movlhps xmm0, xmm2\n" // lower 8 colors "movlhps xmm0, xmm2\n" // lower 8 colors
@ -528,24 +525,24 @@ End:
"pslld xmm2, 16\n" "pslld xmm2, 16\n"
"pslld xmm3, 16\n" "pslld xmm3, 16\n"
"movdqa [edx], xmm0\n" "movdqa [%[clut]], xmm0\n"
"movdqa [edx+32], xmm1\n" "movdqa [%[clut]+32], xmm1\n"
"movdqa xmm5, xmm7\n" "movdqa xmm5, xmm7\n"
"pand xmm7, [edx+16]\n" "pand xmm7, [%[clut]+16]\n"
"pand xmm5, [edx+48]\n" "pand xmm5, [%[clut]+48]\n"
"por xmm2, xmm7\n" "por xmm2, xmm7\n"
"por xmm3, xmm5\n" "por xmm3, xmm5\n"
"movdqa [edx+16], xmm2\n" "movdqa [%[clut]+16], xmm2\n"
"movdqa [edx+48], xmm3\n" "movdqa [%[clut]+48], xmm3\n"
"WriteCLUT_T16_I4_CSM1_End:\n" "WriteCLUT_T16_I4_CSM1_End:\n"
"\n" "\n"
".att_syntax\n" ".att_syntax\n"
: [s_clut16mask] "=m" (s_clut16mask), [s_clut16mask2] "=m" (s_clut16mask2) :
: "c" (vm), "d" (clut) : [vm] "r" (vm), [clut] "r" (clut), [s_clut16mask] "m" (*s_clut16mask), [s_clut16mask2] "m" (*s_clut16mask2)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
); );
#endif // _MSC_VER #endif // _MSC_VER
} }
@ -718,15 +715,15 @@ Z16Loop:
} }
#else // _MSC_VER #else // _MSC_VER
__asm__(".intel_syntax\n" __asm__ __volatile__(".intel_syntax\n"
"pxor %%xmm7, %%xmm7\n" "pxor %%xmm7, %%xmm7\n"
"Z16Loop:\n" "Z16Loop:\n"
// unpack 64 bytes at a time // unpack 64 bytes at a time
"movdqa %%xmm0, [%0]\n" "movdqa %%xmm0, [%[src]]\n"
"movdqa %%xmm2, [%0+16]\n" "movdqa %%xmm2, [%[src]+16]\n"
"movdqa %%xmm4, [%0+32]\n" "movdqa %%xmm4, [%[src]+32]\n"
"movdqa %%xmm6, [%0+48]\n" "movdqa %%xmm6, [%[src]+48]\n"
"movdqa %%xmm1, %%xmm0\n" "movdqa %%xmm1, %%xmm0\n"
"movdqa %%xmm3, %%xmm2\n" "movdqa %%xmm3, %%xmm2\n"
@ -738,35 +735,35 @@ Z16Loop:
"punpckhwd %%xmm3, %%xmm7\n" "punpckhwd %%xmm3, %%xmm7\n"
// start saving // start saving
"movdqa [%1], %%xmm0\n" "movdqa [%[dst]], %%xmm0\n"
"movdqa [%1+16], %%xmm1\n" "movdqa [%[dst]+16], %%xmm1\n"
"punpcklwd %%xmm4, %%xmm7\n" "punpcklwd %%xmm4, %%xmm7\n"
"punpckhwd %%xmm5, %%xmm7\n" "punpckhwd %%xmm5, %%xmm7\n"
"movdqa [%1+32], %%xmm2\n" "movdqa [%[dst]+32], %%xmm2\n"
"movdqa [%1+48], %%xmm3\n" "movdqa [%[dst]+48], %%xmm3\n"
"movdqa %%xmm0, %%xmm6\n" "movdqa %%xmm0, %%xmm6\n"
"punpcklwd %%xmm6, %%xmm7\n" "punpcklwd %%xmm6, %%xmm7\n"
"movdqa [%1+64], %%xmm4\n" "movdqa [%[dst]+64], %%xmm4\n"
"movdqa [%1+80], %%xmm5\n" "movdqa [%[dst]+80], %%xmm5\n"
"punpckhwd %%xmm0, %%xmm7\n" "punpckhwd %%xmm0, %%xmm7\n"
"movdqa [%1+96], %%xmm6\n" "movdqa [%[dst]+96], %%xmm6\n"
"movdqa [%1+112], %%xmm0\n" "movdqa [%[dst]+112], %%xmm0\n"
"add %0, 64\n" "add %[src], 64\n"
"add %1, 128\n" "add %[dst], 128\n"
"sub %2, 1\n" "sub %[iters], 1\n"
"jne Z16Loop\n" "jne Z16Loop\n"
".att_syntax\n" ".att_syntax\n"
: "=r"(src), "=r"(dst), "=r"(iters) : "=&r"(src), "=&r"(dst), "=&r"(iters)
: "0"(src), "1"(dst), "2"(iters) : [src] "0"(src), [dst] "1"(dst), [iters] "2"(iters)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
); );
#endif // _MSC_VER #endif // _MSC_VER
} }

View File

@ -804,14 +804,9 @@ void ZeroGS::KickSprite()
int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex); int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex); int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
// sprite is too small and AA shows lines (tek4) // sprite is too small and AA shows lines (tek4, Mana Khemia)
gs.gsvertex[last].x += (4*s_AAx);
if (s_AAx) gs.gsvertex[last].y += (4*s_AAy);
{
gs.gsvertex[last].x += 4;
if (s_AAy) gs.gsvertex[last].y += 4;
}
// might be bad sprite (KH dialog text) // might be bad sprite (KH dialog text)
//if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y ) //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )

View File

@ -96,7 +96,7 @@ extern u32 ptexBilinearBlocks;
// State parameters // State parameters
#ifdef DEVBUILD #ifdef ZEROGS_DEVBUILD
extern char* EFFECT_NAME; extern char* EFFECT_NAME;
extern char* EFFECT_DIR; extern char* EFFECT_DIR;
extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve; extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;