ReorderingMTGS: Sync with trunk!

git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3523 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-07-17 22:42:18 +00:00
commit 9ba58e1398
16 changed files with 613 additions and 753 deletions

View File

@ -7,6 +7,19 @@ cmake_minimum_required(VERSION 2.6)
# Variable to check that people use the good file
set(TOP_CMAKE_WAS_SOURCED TRUE)
# Print a clear message that 64bits is not supported
# It would avoid compilation failure later.
if(CMAKE_SIZEOF_VOID_P MATCHES "8")
message(FATAL_ERROR "
PCSX2 does not support 64bits environment. Please install a 32bits chroot or a 32bits OS.
PCSX2 have neither no plan to support the 64bits architecture in the future.
It will need a complete rewrite of the core emulator and a lots of time
However when linux distribution will support properly multi-arch package, it will
be at least possible to easily compile and install PCSX2 witout too much hassle (the chroot environment)")
endif(CMAKE_SIZEOF_VOID_P MATCHES "8")
# set module path
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)

View File

@ -98,8 +98,8 @@ void gen_memcpy_vibes() {
xADD(ecx, 256);
}
const xRegisterSSE xmm_t(x);
xMOVAPS(xmm_t, ptr32[edx+off]);
xMOVAPS(ptr32[ecx+off], xmm_t);
xMOVAPS (xmm_t, ptr32[edx+off]);
xMOVNTPS(ptr32[ecx+off], xmm_t);
}
_memcpy_vibes[0] = (_memCpyCall)xGetPtr();

View File

@ -63,11 +63,11 @@ struct SSE2_Tables
u16 Y_mask[8]; // offset -32
u16 round_1bit[8]; // offset -16
u16 Y_coefficients[8]; // offset 0
u16 GCr_coefficients[8];// offset 16
u16 GCb_coefficients[8];// offset 32
u16 RCr_coefficients[8];// offset 48
u16 BCb_coefficients[8];// offset 64
s16 Y_coefficients[8]; // offset 0
s16 GCr_coefficients[8];// offset 16
s16 GCb_coefficients[8];// offset 32
s16 RCr_coefficients[8];// offset 48
s16 BCb_coefficients[8];// offset 64
};
enum

View File

@ -98,27 +98,31 @@ void VifUnpackSSE_Base::xUPK_S_32() const {
}
void VifUnpackSSE_Base::xUPK_S_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (workReg);
}
else {
xMOV16 (workReg, ptr32[srcIndirect]);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 16);
}
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (workReg);
}
else
{
xMOV16 (workReg, ptr32[srcIndirect]);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 16);
}
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (workReg);
}
else {
xMOV8 (workReg, ptr32[srcIndirect]);
xPUNPCK.LBW(workReg, workReg);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 24);
}
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (workReg);
}
else
{
xMOV8 (workReg, ptr32[srcIndirect]);
xPUNPCK.LBW(workReg, workReg);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 24);
}
xPSHUF.D (destReg, workReg, _v0);
}
@ -133,58 +137,63 @@ void VifUnpackSSE_Base::xUPK_V2_32() const {
}
void VifUnpackSSE_Base::xUPK_V2_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (destReg);
}
else
{
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
}
void VifUnpackSSE_Base::xUPK_V2_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV16 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (destReg);
}
else
{
xMOV16 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
}
void VifUnpackSSE_Base::xUPK_V3_32() const {
xMOV128 (destReg, ptr32[srcIndirect]);
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
}
void VifUnpackSSE_Base::xUPK_V3_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (destReg);
}
else
{
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V3_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (destReg);
}
else
{
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V4_32() const {
@ -192,26 +201,30 @@ void VifUnpackSSE_Base::xUPK_V4_32() const {
}
void VifUnpackSSE_Base::xUPK_V4_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX16 (destReg);
}
else
{
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V4_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
if (x86caps.hasStreamingSIMD4Extensions)
{
xPMOVXX8 (destReg);
}
else
{
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V4_5() const {

View File

@ -245,53 +245,31 @@ public:
{
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2;
const GSVector4 ys(0.098f, 0.504f, 0.257f, 0.0f);
const GSVector4 us(0.439f / 2, -0.291f / 2, -0.148f / 2, 0.0f);
const GSVector4 vs(-0.071f / 2, -0.368f / 2, 0.439f / 2, 0.0f);
GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
const GSVector4 offset(16, 128, 16, 128);
if(rgba)
if (!rgba)
ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw();
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
uint32* s = (uint32*)src;
uint16* d = (uint16*)dst;
for(int i = 0; i < w; i += 2)
{
uint32* s = (uint32*)src;
uint16* d = (uint16*)dst;
GSVector4 c0 = GSVector4(s[i + 0]);
GSVector4 c1 = GSVector4(s[i + 1]);
GSVector4 c2 = c0 + c1;
for(int i = 0; i < w; i += 2)
{
GSVector4 c0 = GSVector4(s[i + 0]);
GSVector4 c1 = GSVector4(s[i + 1]);
GSVector4 c2 = c0 + c1;
GSVector4 lo = (c0 * ys).hadd(c2 * us);
GSVector4 hi = (c1 * ys).hadd(c2 * vs);
GSVector4 lo = (c0 * ys).hadd(c2 * vs);
GSVector4 hi = (c1 * ys).hadd(c2 * us);
GSVector4 c = lo.hadd(hi) + offset;
GSVector4 c = lo.hadd(hi) + offset;
*((uint32*)&d[i]) = GSVector4i(c).rgba32();
}
}
}
else
{
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{
uint32* s = (uint32*)src;
uint16* d = (uint16*)dst;
for(int i = 0; i < w; i += 2)
{
GSVector4 c0 = GSVector4(s[i + 0]).zyxw();
GSVector4 c1 = GSVector4(s[i + 1]).zyxw();
GSVector4 c2 = c0 + c1;
GSVector4 lo = (c0 * ys).hadd(c2 * vs);
GSVector4 hi = (c1 * ys).hadd(c2 * us);
GSVector4 c = lo.hadd(hi) + offset;
*((uint32*)&d[i]) = GSVector4i(c).rgba32();
}
*((uint32*)&d[i]) = GSVector4i(c).rgba32();
}
}
}

View File

@ -126,6 +126,53 @@ void CALLBACK GSsetLogDir(const char* dir)
ZZLog::SetDir(dir);
}
void ReportHacks(gameHacks hacks)
{
if (hacks.texture_targs) ZZLog::WriteLn("'Texture targs' hack enabled.");
if (hacks.auto_reset) ZZLog::WriteLn("'Auto reset' hack enabled.");
if (hacks.interlace_2x) ZZLog::WriteLn("'Interlace 2x' hack enabled.");
if (hacks.texa) ZZLog::WriteLn("'Texa' hack enabled.");
if (hacks.no_target_resolve) ZZLog::WriteLn("'No target resolve' hack enabled.");
if (hacks.exact_color) ZZLog::WriteLn("Exact color hack enabled.");
if (hacks.no_color_clamp) ZZLog::WriteLn("'No color clamp' hack enabled.");
if (hacks.no_alpha_fail) ZZLog::WriteLn("'No alpha fail' hack enabled.");
if (hacks.no_depth_update) ZZLog::WriteLn("'No depth update' hack enabled.");
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
if (hacks.resolve_promoted) ZZLog::WriteLn("'Resolve promoted' hack enabled.");
if (hacks.fast_update) ZZLog::WriteLn("'Fast update' hack enabled.");
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
if (hacks.partial_depth) ZZLog::WriteLn("'Partial depth' hack enabled.");
if (hacks.reget) ZZLog::WriteLn("Reget hack enabled.");
if (hacks.gust) ZZLog::WriteLn("Gust hack enabled.");
if (hacks.no_logz) ZZLog::WriteLn("'No logz' hack enabled.");
}
void ListHacks()
{
if (conf.def_hacks._u32 != 0)
{
ZZLog::WriteLn("AutoEnabling these hacks:");
ReportHacks(conf.def_hacks);
}
if (conf.hacks._u32 != 0)
{
ZZLog::WriteLn("You've manually enabled these hacks:");
ReportHacks(conf.hacks);
}
}
void CALLBACK GSsetGameCRC(int crc, int options)
{
// TEXDESTROY_THRESH starts out at 16.
@ -133,15 +180,15 @@ void CALLBACK GSsetGameCRC(int crc, int options)
conf.mrtdepth = (conf.settings().disable_mrt_depth != 0);
if (!conf.mrtdepth)
ZZLog::Error_Log("Disabling MRT depth writing.");
ZZLog::WriteLn("Disabling MRT depth writing.");
else
ZZLog::Error_Log("Enabling MRT depth writing.");
ZZLog::WriteLn("Enabling MRT depth writing.");
bool CRCValueChanged = (g_LastCRC != crc);
g_LastCRC = crc;
ZZLog::Error_Log("CRC = %x", crc);
if (crc != 0) ZZLog::WriteLn("Current game CRC is %x.", crc);
if (CRCValueChanged && (crc != 0))
{
@ -149,17 +196,27 @@ void CALLBACK GSsetGameCRC(int crc, int options)
{
if (crc_game_list[i].crc == crc)
{
if (crc_game_list[i].v_thresh > 0) VALIDATE_THRESH = crc_game_list[i].v_thresh;
if (crc_game_list[i].t_thresh > 0) TEXDESTROY_THRESH = crc_game_list[i].t_thresh;
ZZLog::WriteLn("Found CRC[%x] in crc game list.", crc);
if (crc_game_list[i].v_thresh > 0)
{
VALIDATE_THRESH = crc_game_list[i].v_thresh;
ZZLog::WriteLn("Setting VALIDATE_THRESH to %d", VALIDATE_THRESH);
}
if (crc_game_list[i].t_thresh > 0)
{
TEXDESTROY_THRESH = crc_game_list[i].t_thresh;
ZZLog::WriteLn("Setting TEXDESTROY_THRESH to %d", TEXDESTROY_THRESH);
}
conf.def_hacks._u32 |= crc_game_list[i].flags;
ZZLog::Error_Log("Found CRC[%x] in crc game list.", crc);
ListHacks();
return;
}
}
}
ListHacks();
}
void CALLBACK GSsetFrameSkip(int frameskip)

View File

@ -27,7 +27,6 @@
#include "zerogs.h"
#include "targets.h"
namespace ZeroGS
{
extern CRangeManager s_RangeMngr; // manages overwritten memory
@ -249,8 +248,10 @@
}
template <class T>
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp)
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
int i = x, j = y;
T* pbuf = (T*)pbyMem;
u32 nSize = nQWordSize * 16 / sizeof(T);
@ -275,8 +276,10 @@
}
}
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp)
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
{
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
int i = x, j = y;
u8* pbuf = (u8*)pbyMem;
u32 nSize = nQWordSize * 16 / 3;
@ -312,65 +315,113 @@
assert(gs.imageTransfer == 1);
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
int i = gs.imageY, j = gs.imageX;
switch (gs.srcbuf.psm)
switch(PSMT_BITMODE(gs.srcbuf.psm))
{
case PSMCT32:
TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32_0);
break;
case PSMCT24:
TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24_0);
break;
case PSMCT16:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16_0);
break;
case PSMCT16S:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16S_0);
break;
case PSMT8:
TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8_0);
break;
case PSMT8H:
TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8H_0);
break;
case PSMT32Z:
TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32Z_0);
break;
case PSMT24Z:
TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24Z_0);
break;
case PSMT16Z:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16Z_0);
break;
case PSMT16SZ:
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16SZ_0);
break;
default:
assert(0);
case 0: TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case 1: TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case 2: TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
case 3: TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
default: assert(0); break;
}
gs.imageY = i;
gs.imageX = j;
if (gs.imageY >= gs.imageEndY)
{
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
assert(gs.imageY == gs.imageEndY);
gs.imageTransfer = -1;
}
}
__forceinline void _TransferLocalLocal()
{
//ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm];
_readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 widthlimit = 4;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
if ((gs.imageWnew & widthlimit) != 0) return;
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit)
{
wp(pDstBuf, j2%2048, i2%2048,
rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+1)%2048, i2%2048,
rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
if (widthlimit > 2)
{
// Then widthlimit == 4.
wp(pDstBuf, (j2+2)%2048, i2%2048,
rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
wp(pDstBuf, (j2+3)%2048, i2%2048,
rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
}
}
}
}
__forceinline void _TransferLocalLocal_4()
{
//ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
_getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm];
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
u32 maxX = gs.trxpos.sx + gs.imageWnew;
u32 maxY = gs.trxpos.sy + gs.imageHnew;
assert((gs.imageWnew % 8) == 0);
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
{
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8)
{
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */
u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw);
u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
}
}
}
// dir depends on trxpos.dirx & trxpos.diry
void TransferLocalLocal()
{
@ -408,285 +459,19 @@
//(*it)->status |= CRenderTarget::TS_NeedUpdate;
}
}
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
#define TRANSFERLOCALLOCAL(srcpsm, dstpsm, widthlimit) { \
if( (gs.imageWnew&widthlimit)!=0 ) break; \
assert( (gs.imageWnew&widthlimit)==0 && widthlimit <= 4); \
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) { \
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) { \
\
writePixel##dstpsm##_0(pDstBuf, j2%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
\
if( widthlimit > 1 ) { \
writePixel##dstpsm##_0(pDstBuf, (j2+1)%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
\
if( widthlimit > 2 ) { \
writePixel##dstpsm##_0(pDstBuf, (j2+2)%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
\
if( widthlimit > 3 ) { \
writePixel##dstpsm##_0(pDstBuf, (j2+3)%2048, i2%2048, \
readPixel##srcpsm##_0(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
} \
} \
} \
} \
} \
} \
#define TRANSFERLOCALLOCAL_4(srcpsm, dstpsm) { \
assert( (gs.imageWnew%8) == 0 ); \
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; ++i, ++i2) { \
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=8, j2+=8) { \
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ \
u32 read = getPixelAddress##srcpsm##_0(j%2048, i%2048, gs.srcbuf.bw); \
u32 write = getPixelAddress##dstpsm##_0(j2%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+1)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+1)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
\
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
\
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
\
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
\
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
} \
} \
} \
switch (gs.srcbuf.psm)
if (PSMT_BITMODE(gs.srcbuf.psm) != 4)
{
case PSMCT32:
if (gs.dstbuf.psm == PSMCT32)
{
TRANSFERLOCALLOCAL(32, 32, 2);
}
else
{
TRANSFERLOCALLOCAL(32, 32Z, 2);
}
break;
case PSMCT24:
if (gs.dstbuf.psm == PSMCT24)
{
TRANSFERLOCALLOCAL(24, 24, 4);
}
else
{
TRANSFERLOCALLOCAL(24, 24Z, 4);
}
break;
case PSMCT16:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16, 16SZ, 4);
break;
}
break;
case PSMCT16S:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16S, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16S, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16S, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16S, 16SZ, 4);
break;
}
break;
case PSMT8:
if (gs.dstbuf.psm == PSMT8)
{
TRANSFERLOCALLOCAL(8, 8, 4);
}
else
{
TRANSFERLOCALLOCAL(8, 8H, 4);
}
break;
case PSMT4:
switch (gs.dstbuf.psm)
{
case PSMT4:
TRANSFERLOCALLOCAL_4(4, 4);
break;
case PSMT4HL:
TRANSFERLOCALLOCAL_4(4, 4HL);
break;
case PSMT4HH:
TRANSFERLOCALLOCAL_4(4, 4HH);
break;
}
break;
case PSMT8H:
if (gs.dstbuf.psm == PSMT8)
{
TRANSFERLOCALLOCAL(8H, 8, 4);
}
else
{
TRANSFERLOCALLOCAL(8H, 8H, 4);
}
break;
case PSMT4HL:
switch (gs.dstbuf.psm)
{
case PSMT4:
TRANSFERLOCALLOCAL_4(4HL, 4);
break;
case PSMT4HL:
TRANSFERLOCALLOCAL_4(4HL, 4HL);
break;
case PSMT4HH:
TRANSFERLOCALLOCAL_4(4HL, 4HH);
break;
}
break;
case PSMT4HH:
switch (gs.dstbuf.psm)
{
case PSMT4:
TRANSFERLOCALLOCAL_4(4HH, 4);
break;
case PSMT4HL:
TRANSFERLOCALLOCAL_4(4HH, 4HL);
break;
case PSMT4HH:
TRANSFERLOCALLOCAL_4(4HH, 4HH);
break;
}
break;
case PSMT32Z:
if (gs.dstbuf.psm == PSMCT32)
{
TRANSFERLOCALLOCAL(32Z, 32, 2);
}
else
{
TRANSFERLOCALLOCAL(32Z, 32Z, 2);
}
break;
case PSMT24Z:
if (gs.dstbuf.psm == PSMCT24)
{
TRANSFERLOCALLOCAL(24Z, 24, 4);
}
else
{
TRANSFERLOCALLOCAL(24Z, 24Z, 4);
}
break;
case PSMT16Z:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16Z, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16Z, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16Z, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16Z, 16SZ, 4);
break;
}
break;
case PSMT16SZ:
switch (gs.dstbuf.psm)
{
case PSMCT16:
TRANSFERLOCALLOCAL(16SZ, 16, 4);
break;
case PSMCT16S:
TRANSFERLOCALLOCAL(16SZ, 16S, 4);
break;
case PSMT16Z:
TRANSFERLOCALLOCAL(16SZ, 16Z, 4);
break;
case PSMT16SZ:
TRANSFERLOCALLOCAL(16SZ, 16SZ, 4);
break;
}
break;
_TransferLocalLocal();
}
else
{
_TransferLocalLocal_4();
}
g_MemTargs.ClearRange(dststart, dstend);
#ifdef DEVBUILD
#ifdef ZEROGS_DEVBUILD
if (g_bSaveTrans)
{

View File

@ -44,6 +44,10 @@ typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
extern _getPixelAddress_0 getPixelFun_0[64];
extern _writePixel_0 writePixelFun_0[64];
extern _readPixel_0 readPixelFun_0[64];
enum Psm_Size
{
PSM_ = 0,
@ -269,8 +273,6 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
{
((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel;
@ -511,7 +513,6 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel,
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
}
///////////////
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)

View File

@ -18,6 +18,7 @@
*/
#include "GS.h"
#include "Mem.h"
u32 g_blockTable32[4][8] =
{
@ -247,3 +248,52 @@ u32 g_pageTable16Z[64][64];
u32 g_pageTable16SZ[64][64];
u32 g_pageTable8[64][128];
u32 g_pageTable4[128][128];
/* PSM reference array
{ 32, 24, 16, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, 16S, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, 8, 4, NULL, NULL, NULL,
NULL, NULL, NULL, 8H, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, 4HL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, 4HH, NULL, NULL, NULL,
32Z, 24Z, 16Z, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, 16SZ, NULL, NULL, NULL, NULL, NULL };
*/
_getPixelAddress_0 getPixelFun_0[64] =
{
getPixelAddress32_0, getPixelAddress24_0, getPixelAddress16_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, getPixelAddress16S_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, getPixelAddress8_0, getPixelAddress4_0, NULL, NULL, NULL,
NULL, NULL, NULL, getPixelAddress8H_0, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, getPixelAddress4HL_0, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, getPixelAddress4HH_0, NULL, NULL, NULL,
getPixelAddress32Z_0, getPixelAddress24Z_0, getPixelAddress16Z_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, getPixelAddress16SZ_0, NULL, NULL, NULL, NULL, NULL
};
_writePixel_0 writePixelFun_0[64] =
{
writePixel32_0, writePixel24_0, writePixel16_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, writePixel16S_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, writePixel8_0, writePixel4_0, NULL, NULL, NULL,
NULL, NULL, NULL, writePixel8H_0, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, writePixel4HL_0, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, writePixel4HH_0, NULL, NULL, NULL,
writePixel32Z_0, writePixel24Z_0, writePixel16Z_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, writePixel16SZ_0, NULL, NULL, NULL, NULL, NULL
};
_readPixel_0 readPixelFun_0[64] =
{
readPixel32_0, readPixel24_0, readPixel16_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, readPixel16S_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, readPixel8_0, readPixel4_0, NULL, NULL, NULL,
NULL, NULL, NULL, readPixel8H_0, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, readPixel4HL_0, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, readPixel4HH_0, NULL, NULL, NULL,
readPixel32Z_0, readPixel24Z_0, readPixel16Z_0, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, readPixel16SZ_0, NULL, NULL, NULL, NULL, NULL
};

View File

@ -125,27 +125,16 @@ void __fastcall GIFPackedRegHandlerUV(u32* data)
ZZLog::Greg_Log("Packed UV: 0x%x, 0x%x", r->U, r->V);
}
void __forceinline KICK_VERTEX2()
void __forceinline KickVertex(bool adc)
{
FUNCLOG
if (++gs.primC >= (int)g_primmult[prim->prim])
{
if (NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])();
gs.primC -= g_primsub[prim->prim];
}
}
void __forceinline KICK_VERTEX3()
{
FUNCLOG
if (++gs.primC >= (int)g_primmult[prim->prim])
{
if (!adc && NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])();
gs.primC -= g_primsub[prim->prim];
if (prim->prim == 5)
if (adc && prim->prim == 5)
{
/* tri fans need special processing */
if (gs.nTriFanVert == gs.primIndex)
@ -161,14 +150,7 @@ void __fastcall GIFPackedRegHandlerXYZF2(u32* data)
gs.add_vertex(r->X, r->Y,r->Z, r->F);
// Fix Vertexes up later.
if (data[3] & 0x8000)
{
KICK_VERTEX3();
}
else
{
KICK_VERTEX2();
}
KickVertex(!!(r->ADC));
ZZLog::Greg_Log("Packed XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
}
@ -179,14 +161,7 @@ void __fastcall GIFPackedRegHandlerXYZ2(u32* data)
gs.add_vertex(r->X, r->Y,r->Z);
// Fix Vertexes up later.
if (data[3] & 0x8000)
{
KICK_VERTEX3();
}
else
{
KICK_VERTEX2();
}
KickVertex(!!(r->ADC));
ZZLog::Greg_Log("Packed XYZ2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
}
@ -285,7 +260,7 @@ void __fastcall GIFRegHandlerXYZF2(u32* data)
GIFRegXYZF* r = (GIFRegXYZF*)(data);
gs.add_vertex(r->X, r->Y,r->Z, r->F);
KICK_VERTEX2();
KickVertex(false);
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
}
@ -295,7 +270,7 @@ void __fastcall GIFRegHandlerXYZ2(u32* data)
GIFRegXYZ* r = (GIFRegXYZ*)(data);
gs.add_vertex(r->X, r->Y,r->Z);
KICK_VERTEX2();
KickVertex(false);
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
}
@ -382,7 +357,7 @@ void __fastcall GIFRegHandlerXYZF3(u32* data)
GIFRegXYZF* r = (GIFRegXYZF*)(data);
gs.add_vertex(r->X, r->Y,r->Z, r->F);
KICK_VERTEX3();
KickVertex(true);
ZZLog::Greg_Log("XYZF3: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
}
@ -392,7 +367,7 @@ void __fastcall GIFRegHandlerXYZ3(u32* data)
GIFRegXYZ* r = (GIFRegXYZ*)(data);
gs.add_vertex(r->X, r->Y,r->Z);
KICK_VERTEX3();
KickVertex(true);
ZZLog::Greg_Log("XYZ3: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
}
@ -915,6 +890,7 @@ void __fastcall GIFRegHandlerTRXREG(u32* data)
void __fastcall GIFRegHandlerTRXDIR(u32* data)
{
FUNCLOG
GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data);
// Oh dear...
// terminate any previous transfers
@ -934,7 +910,7 @@ void __fastcall GIFRegHandlerTRXDIR(u32* data)
gs.dstbuf = gs.dstbufnew;
gs.trxpos = gs.trxposnew;
gs.imageTransfer = data[0] & 0x3;
gs.imageTransfer = r->XDIR;
gs.imageWnew = gs.imageWtemp;
gs.imageHnew = gs.imageHtemp;

View File

@ -1,197 +1,193 @@
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
/* ZZ Open GL graphics plugin
* Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
* Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef ZZLOG_H_INCLUDED
#define ZZLOG_H_INCLUDED
#include "Util.h"
//Logging for errors that are called often should have a time counter.
#ifdef __LINUX__
static u32 __attribute__((unused)) lasttime = 0;
static u32 __attribute__((unused)) BigTime = 5000;
static bool __attribute__((unused)) SPAM_PASS;
#else
static u32 lasttime = 0;
static u32 BigTime = 5000;
static bool SPAM_PASS;
#endif
#define ERROR_LOG_SPAM(text) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
} \
}
// The same macro with one-argument substitution.
#define ERROR_LOG_SPAMA(fmt, value) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(fmt, value); \
lasttime = timeGetTime(); \
} \
}
#define ERROR_LOG_SPAM_TEST(text) {\
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
SPAM_PASS = true; \
} \
else \
SPAM_PASS = false; \
}
#if DEBUG_PROF
#define FILE_IS_IN_CHECK ((strcmp(__FILE__, "targets.cpp") == 0) || (strcmp(__FILE__, "ZZoglFlush.cpp") == 0))
#define FUNCLOG {\
static bool Was_Here = false; \
static unsigned long int waslasttime = 0; \
if (!Was_Here && FILE_IS_IN_CHECK) { \
Was_Here = true;\
ZZLog::Error_Log("%s:%d %s", __FILE__, __LINE__, __func__); \
waslasttime = timeGetTime(); \
} \
if (FILE_IS_IN_CHECK && (timeGetTime() - waslasttime > BigTime )) { \
Was_Here = false; \
} \
}
#else
#define FUNCLOG
#endif
//#define WRITE_GREG_LOGS
//#define WRITE_PRIM_LOGS
#if defined(_DEBUG) && !defined(ZEROGS_DEVBUILD)
#define ZEROGS_DEVBUILD
#endif
#ifdef ZEROGS_DEVBUILD
//#define DEVBUILD
#endif
// sends a message to output window if assert fails
#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } }
#define BMSG_RETURN(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return; } }
#define BMSG_RETURNX(x, str, rtype) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return (##rtype); } }
#define B(x) { if( !(x) ) { ZZLog::Log(_#x"\n"); ZZLog::Log(#x"\n"); } }
#define B_RETURN(x) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return; } }
#define B_RETURNX(x, rtype) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return (##rtype); } }
#define B_G(x, action) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); action; } }
#define GL_REPORT_ERROR() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
ZeroGS::HandleGLError(); \
} \
}
#ifdef _DEBUG
# define GL_REPORT_ERRORD() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
ZeroGS::HandleGLError(); \
} \
}
#else
# define GL_REPORT_ERRORD()
#endif
inline const char *error_name(int err)
{
switch (err)
{
case GL_NO_ERROR:
return "GL_NO_ERROR";
case GL_INVALID_ENUM:
return "GL_INVALID_ENUM";
case GL_INVALID_VALUE:
return "GL_INVALID_VALUE";
case GL_INVALID_OPERATION:
return "GL_INVALID_OPERATION";
case GL_STACK_OVERFLOW:
return "GL_STACK_OVERFLOW";
case GL_STACK_UNDERFLOW:
return "GL_STACK_UNDERFLOW";
case GL_OUT_OF_MEMORY:
return "GL_OUT_OF_MEMORY";
case GL_TABLE_TOO_LARGE:
return "GL_TABLE_TOO_LARGE";
case GL_INVALID_FRAMEBUFFER_OPERATION:
return "GL_INVALID_FRAMEBUFFER_OPERATION";
default:
return "Unknown GL error";
}
}
extern void __LogToConsole(const char *fmt, ...);
// Subset of zerogs, to avoid that whole huge header.
namespace ZeroGS
{
extern void AddMessage(const char* pstr, u32 ms);
extern void SetAA(int mode);
extern void SetNegAA(int mode);
extern bool Create(int width, int height);
extern void Destroy(bool bD3D);
extern void StartCapture();
extern void StopCapture();
}
namespace ZZLog
{
extern bool IsLogging();
void SetDir(const char* dir);
extern bool Open();
extern void Close();
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
void WriteToScreen(const char* pstr, u32 ms = 5000);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void WriteLn(const char *fmt, ...);
extern void Greg_Log(const char *fmt, ...);
extern void Prim_Log(const char *fmt, ...);
extern void GS_Log(const char *fmt, ...);
extern void Debug_Log(const char *fmt, ...);
extern void Warn_Log(const char *fmt, ...);
extern void Error_Log(const char *fmt, ...);
#include "Util.h"
//Logging for errors that are called often should have a time counter.
#ifdef __LINUX__
static u32 __attribute__((unused)) lasttime = 0;
static u32 __attribute__((unused)) BigTime = 5000;
static bool __attribute__((unused)) SPAM_PASS;
#else
static u32 lasttime = 0;
static u32 BigTime = 5000;
static bool SPAM_PASS;
#endif
#define ERROR_LOG_SPAM(text) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
} \
}
// The same macro with one-argument substitution.
#define ERROR_LOG_SPAMA(fmt, value) { \
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(fmt, value); \
lasttime = timeGetTime(); \
} \
}
#define ERROR_LOG_SPAM_TEST(text) {\
if( timeGetTime() - lasttime > BigTime ) { \
ZZLog::Error_Log(text); \
lasttime = timeGetTime(); \
SPAM_PASS = true; \
} \
else \
SPAM_PASS = false; \
}
#if DEBUG_PROF
#define FILE_IS_IN_CHECK ((strcmp(__FILE__, "targets.cpp") == 0) || (strcmp(__FILE__, "ZZoglFlush.cpp") == 0))
#define FUNCLOG {\
static bool Was_Here = false; \
static unsigned long int waslasttime = 0; \
if (!Was_Here && FILE_IS_IN_CHECK) { \
Was_Here = true;\
ZZLog::Error_Log("%s:%d %s", __FILE__, __LINE__, __func__); \
waslasttime = timeGetTime(); \
} \
if (FILE_IS_IN_CHECK && (timeGetTime() - waslasttime > BigTime )) { \
Was_Here = false; \
} \
}
#else
#define FUNCLOG
#endif
//#define WRITE_GREG_LOGS
//#define WRITE_PRIM_LOGS
#if defined(_DEBUG) && !defined(ZEROGS_DEVBUILD)
#define ZEROGS_DEVBUILD
#endif
// sends a message to output window if assert fails
#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } }
#define BMSG_RETURN(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return; } }
#define BMSG_RETURNX(x, str, rtype) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); return (##rtype); } }
#define B(x) { if( !(x) ) { ZZLog::Log(_#x"\n"); ZZLog::Log(#x"\n"); } }
#define B_RETURN(x) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return; } }
#define B_RETURNX(x, rtype) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); return (##rtype); } }
#define B_G(x, action) { if( !(x) ) { ZZLog::Error_Log("%s:%d: %s", __FILE__, (u32)__LINE__, #x); action; } }
#define GL_REPORT_ERROR() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s(0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
ZeroGS::HandleGLError(); \
} \
}
#ifdef _DEBUG
# define GL_REPORT_ERRORD() \
{ \
GLenum err = glGetError(); \
if( err != GL_NO_ERROR ) \
{ \
ZZLog::Error_Log("%s:%d: gl error %s (0x%x)", __FILE__, (int)__LINE__, error_name(err), err); \
ZeroGS::HandleGLError(); \
} \
}
#else
# define GL_REPORT_ERRORD()
#endif
inline const char *error_name(int err)
{
switch (err)
{
case GL_NO_ERROR:
return "GL_NO_ERROR";
case GL_INVALID_ENUM:
return "GL_INVALID_ENUM";
case GL_INVALID_VALUE:
return "GL_INVALID_VALUE";
case GL_INVALID_OPERATION:
return "GL_INVALID_OPERATION";
case GL_STACK_OVERFLOW:
return "GL_STACK_OVERFLOW";
case GL_STACK_UNDERFLOW:
return "GL_STACK_UNDERFLOW";
case GL_OUT_OF_MEMORY:
return "GL_OUT_OF_MEMORY";
case GL_TABLE_TOO_LARGE:
return "GL_TABLE_TOO_LARGE";
case GL_INVALID_FRAMEBUFFER_OPERATION:
return "GL_INVALID_FRAMEBUFFER_OPERATION";
default:
return "Unknown GL error";
}
}
extern void __LogToConsole(const char *fmt, ...);
// Subset of zerogs, to avoid that whole huge header.
namespace ZeroGS
{
extern void AddMessage(const char* pstr, u32 ms);
extern void SetAA(int mode);
extern void SetNegAA(int mode);
extern bool Create(int width, int height);
extern void Destroy(bool bD3D);
extern void StartCapture();
extern void StopCapture();
}
namespace ZZLog
{
extern bool IsLogging();
void SetDir(const char* dir);
extern bool Open();
extern void Close();
extern void Message(const char *fmt, ...);
extern void Log(const char *fmt, ...);
void WriteToScreen(const char* pstr, u32 ms = 5000);
extern void WriteToConsole(const char *fmt, ...);
extern void Print(const char *fmt, ...);
extern void WriteLn(const char *fmt, ...);
extern void Greg_Log(const char *fmt, ...);
extern void Prim_Log(const char *fmt, ...);
extern void GS_Log(const char *fmt, ...);
extern void Debug_Log(const char *fmt, ...);
extern void Warn_Log(const char *fmt, ...);
extern void Error_Log(const char *fmt, ...);
};
#endif // ZZLOG_H_INCLUDED

View File

@ -29,7 +29,7 @@
using namespace ZeroGS;
//------------------ Defines
#ifndef DEVBUILD
#ifndef ZEROGS_DEVBUILD
#define INC_GENVARS()
#define INC_TEXVARS()

View File

@ -42,10 +42,6 @@ extern bool g_bUpdateStencil;
# define INC_RESOLVE() ++g_nResolve
#endif
#ifdef DEVBUILD
//static int g_bSaveResolved = 0;
#endif
extern int s_nResolved;
extern u32 g_nResolve;
extern bool g_bSaveTrans;
@ -250,6 +246,7 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co
}
// set render states
// Bleh. I *really* need to fix this. << 3 when setting the scissors, then >> 3 when using them... --Arcum42
scissorrect.x = scissor.x0 >> 3;
scissorrect.y = (scissor.y0 >> 3) + dy;
scissorrect.w = (scissor.x1 >> 3) + 1;
@ -302,7 +299,7 @@ void ZeroGS::CRenderTarget::Resolve()
GL_REPORT_ERRORD();
#if defined(DEVBUILD)
#if defined(ZEROGS_DEVBUILD)
if (g_bSaveResolved)
{
@ -328,7 +325,7 @@ void ZeroGS::CRenderTarget::Resolve(int startrange, int endrange)
// flush if necessary
FlushIfNecesary(this) ;
#if defined(DEVBUILD)
#if defined(ZEROGS_DEVBUILD)
if (g_bSaveResolved)
{
SaveTexture("resolved.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh));
@ -2290,10 +2287,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
{
// This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
// instruction movdqa [%eax], xmm0
// The idea would be resise vector to 15 elements, that set ptxedata to aligned position.
// The idea would be resize vector to 15 elements, that set ptxedata to aligned position.
// Later we would move eax by 16, so only we should verify is first element align
// FIXME. As I see, texdata used only once here, it does not have any impact on other code.
// Probably, usage of _aligned_maloc() would be preferable.
// Note: this often happens when changing AA.
int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15
ptexdata = &texdata[disalignment]; // Set pointer to aligned element
dst = (u16*)ptexdata;

View File

@ -308,13 +308,10 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)
}
extern "C"
{
PCSX2_ALIGNED16(int s_clut16mask2[4]) = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
PCSX2_ALIGNED16(int s_clut16mask[8]) = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
static const __aligned16 int s_clut16mask2[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff
};
}
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
{
@ -433,11 +430,11 @@ WriteUnaligned:
End:
}
#else
__asm__(".intel_syntax noprefix\n"
"movdqa xmm0, xmmword ptr [ecx]\n"
"movdqa xmm1, xmmword ptr [ecx+16]\n"
"movdqa xmm2, xmmword ptr [ecx+32]\n"
"movdqa xmm3, xmmword ptr [ecx+48]\n"
__asm__ __volatile__(".intel_syntax noprefix\n"
"movdqa xmm0, xmmword ptr [%[vm]]\n"
"movdqa xmm1, xmmword ptr [%[vm]+16]\n"
"movdqa xmm2, xmmword ptr [%[vm]+32]\n"
"movdqa xmm3, xmmword ptr [%[vm]+48]\n"
// rearrange
"pshuflw xmm0, xmm0, 0x88\n"
@ -457,14 +454,14 @@ End:
"pxor xmm6, xmm6\n"
"test edx, 15\n"
"test %[clut], 15\n"
"jnz WriteUnaligned\n"
"movdqa xmm7, [s_clut16mask]\n" // saves upper 16 bits
"movdqa xmm7, %[s_clut16mask]\n" // saves upper 16 bits
// have to save interlaced with the old data
"movdqa xmm4, [edx]\n"
"movdqa xmm5, [edx+32]\n"
"movdqa xmm4, [%[clut]]\n"
"movdqa xmm5, [%[clut]+32]\n"
"movhlps xmm1, xmm0\n"
"movlhps xmm0, xmm2\n"// lower 8 colors
@ -483,29 +480,29 @@ End:
"punpckhwd xmm2, xmm6\n"
"punpckhwd xmm3, xmm6\n"
"movdqa [edx], xmm0\n"
"movdqa [edx+32], xmm1\n"
"movdqa [%[clut]], xmm0\n"
"movdqa [%[clut]+32], xmm1\n"
"movdqa xmm5, xmm7\n"
"pand xmm7, [edx+16]\n"
"pand xmm5, [edx+48]\n"
"pand xmm7, [%[clut]+16]\n"
"pand xmm5, [%[clut]+48]\n"
"por xmm2, xmm7\n"
"por xmm3, xmm5\n"
"movdqa [edx+16], xmm2\n"
"movdqa [edx+48], xmm3\n"
"movdqa [%[clut]+16], xmm2\n"
"movdqa [%[clut]+48], xmm3\n"
"jmp WriteCLUT_T16_I4_CSM1_End\n"
"WriteUnaligned:\n"
// %edx is offset by 2
"sub edx, 2\n"
// %[clut] is offset by 2
"sub %[clut], 2\n"
"movdqa xmm7, [[s_clut16mask2]]\n" // saves lower 16 bits
"movdqa xmm7, %[s_clut16mask2]\n" // saves lower 16 bits
// have to save interlaced with the old data
"movdqa xmm4, [edx]\n"
"movdqa xmm5, [edx+32]\n"
"movdqa xmm4, [%[clut]]\n"
"movdqa xmm5, [%[clut]+32]\n"
"movhlps xmm1, xmm0\n"
"movlhps xmm0, xmm2\n" // lower 8 colors
@ -528,24 +525,24 @@ End:
"pslld xmm2, 16\n"
"pslld xmm3, 16\n"
"movdqa [edx], xmm0\n"
"movdqa [edx+32], xmm1\n"
"movdqa [%[clut]], xmm0\n"
"movdqa [%[clut]+32], xmm1\n"
"movdqa xmm5, xmm7\n"
"pand xmm7, [edx+16]\n"
"pand xmm5, [edx+48]\n"
"pand xmm7, [%[clut]+16]\n"
"pand xmm5, [%[clut]+48]\n"
"por xmm2, xmm7\n"
"por xmm3, xmm5\n"
"movdqa [edx+16], xmm2\n"
"movdqa [edx+48], xmm3\n"
"movdqa [%[clut]+16], xmm2\n"
"movdqa [%[clut]+48], xmm3\n"
"WriteCLUT_T16_I4_CSM1_End:\n"
"\n"
".att_syntax\n"
: [s_clut16mask] "=m" (s_clut16mask), [s_clut16mask2] "=m" (s_clut16mask2)
: "c" (vm), "d" (clut)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
:
: [vm] "r" (vm), [clut] "r" (clut), [s_clut16mask] "m" (*s_clut16mask), [s_clut16mask2] "m" (*s_clut16mask2)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
);
#endif // _MSC_VER
}
@ -718,15 +715,15 @@ Z16Loop:
}
#else // _MSC_VER
__asm__(".intel_syntax\n"
__asm__ __volatile__(".intel_syntax\n"
"pxor %%xmm7, %%xmm7\n"
"Z16Loop:\n"
// unpack 64 bytes at a time
"movdqa %%xmm0, [%0]\n"
"movdqa %%xmm2, [%0+16]\n"
"movdqa %%xmm4, [%0+32]\n"
"movdqa %%xmm6, [%0+48]\n"
"movdqa %%xmm0, [%[src]]\n"
"movdqa %%xmm2, [%[src]+16]\n"
"movdqa %%xmm4, [%[src]+32]\n"
"movdqa %%xmm6, [%[src]+48]\n"
"movdqa %%xmm1, %%xmm0\n"
"movdqa %%xmm3, %%xmm2\n"
@ -738,35 +735,35 @@ Z16Loop:
"punpckhwd %%xmm3, %%xmm7\n"
// start saving
"movdqa [%1], %%xmm0\n"
"movdqa [%1+16], %%xmm1\n"
"movdqa [%[dst]], %%xmm0\n"
"movdqa [%[dst]+16], %%xmm1\n"
"punpcklwd %%xmm4, %%xmm7\n"
"punpckhwd %%xmm5, %%xmm7\n"
"movdqa [%1+32], %%xmm2\n"
"movdqa [%1+48], %%xmm3\n"
"movdqa [%[dst]+32], %%xmm2\n"
"movdqa [%[dst]+48], %%xmm3\n"
"movdqa %%xmm0, %%xmm6\n"
"punpcklwd %%xmm6, %%xmm7\n"
"movdqa [%1+64], %%xmm4\n"
"movdqa [%1+80], %%xmm5\n"
"movdqa [%[dst]+64], %%xmm4\n"
"movdqa [%[dst]+80], %%xmm5\n"
"punpckhwd %%xmm0, %%xmm7\n"
"movdqa [%1+96], %%xmm6\n"
"movdqa [%1+112], %%xmm0\n"
"movdqa [%[dst]+96], %%xmm6\n"
"movdqa [%[dst]+112], %%xmm0\n"
"add %0, 64\n"
"add %1, 128\n"
"sub %2, 1\n"
"add %[src], 64\n"
"add %[dst], 128\n"
"sub %[iters], 1\n"
"jne Z16Loop\n"
".att_syntax\n"
: "=r"(src), "=r"(dst), "=r"(iters)
: "0"(src), "1"(dst), "2"(iters)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
: "=&r"(src), "=&r"(dst), "=&r"(iters)
: [src] "0"(src), [dst] "1"(dst), [iters] "2"(iters)
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
);
#endif // _MSC_VER
}

View File

@ -803,15 +803,10 @@ void ZeroGS::KickSprite()
int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
// sprite is too small and AA shows lines (tek4)
if (s_AAx)
{
gs.gsvertex[last].x += 4;
if (s_AAy) gs.gsvertex[last].y += 4;
}
// sprite is too small and AA shows lines (tek4, Mana Khemia)
gs.gsvertex[last].x += (4*s_AAx);
gs.gsvertex[last].y += (4*s_AAy);
// might be bad sprite (KH dialog text)
//if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )

View File

@ -96,7 +96,7 @@ extern u32 ptexBilinearBlocks;
// State parameters
#ifdef DEVBUILD
#ifdef ZEROGS_DEVBUILD
extern char* EFFECT_NAME;
extern char* EFFECT_DIR;
extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;