mirror of https://github.com/PCSX2/pcsx2.git
ReorderingMTGS: Sync with trunk!
git-svn-id: http://pcsx2.googlecode.com/svn/branches/ReorderingMTGS@3523 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
commit
9ba58e1398
|
@ -7,6 +7,19 @@ cmake_minimum_required(VERSION 2.6)
|
|||
# Variable to check that people use the good file
|
||||
set(TOP_CMAKE_WAS_SOURCED TRUE)
|
||||
|
||||
# Print a clear message that 64bits is not supported
|
||||
# It would avoid compilation failure later.
|
||||
if(CMAKE_SIZEOF_VOID_P MATCHES "8")
|
||||
message(FATAL_ERROR "
|
||||
PCSX2 does not support 64bits environment. Please install a 32bits chroot or a 32bits OS.
|
||||
PCSX2 have neither no plan to support the 64bits architecture in the future.
|
||||
It will need a complete rewrite of the core emulator and a lots of time
|
||||
|
||||
However when linux distribution will support properly multi-arch package, it will
|
||||
be at least possible to easily compile and install PCSX2 witout too much hassle (the chroot environment)")
|
||||
endif(CMAKE_SIZEOF_VOID_P MATCHES "8")
|
||||
|
||||
|
||||
# set module path
|
||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ void gen_memcpy_vibes() {
|
|||
}
|
||||
const xRegisterSSE xmm_t(x);
|
||||
xMOVAPS (xmm_t, ptr32[edx+off]);
|
||||
xMOVAPS(ptr32[ecx+off], xmm_t);
|
||||
xMOVNTPS(ptr32[ecx+off], xmm_t);
|
||||
}
|
||||
|
||||
_memcpy_vibes[0] = (_memCpyCall)xGetPtr();
|
||||
|
|
|
@ -63,11 +63,11 @@ struct SSE2_Tables
|
|||
u16 Y_mask[8]; // offset -32
|
||||
u16 round_1bit[8]; // offset -16
|
||||
|
||||
u16 Y_coefficients[8]; // offset 0
|
||||
u16 GCr_coefficients[8];// offset 16
|
||||
u16 GCb_coefficients[8];// offset 32
|
||||
u16 RCr_coefficients[8];// offset 48
|
||||
u16 BCb_coefficients[8];// offset 64
|
||||
s16 Y_coefficients[8]; // offset 0
|
||||
s16 GCr_coefficients[8];// offset 16
|
||||
s16 GCb_coefficients[8];// offset 32
|
||||
s16 RCr_coefficients[8];// offset 48
|
||||
s16 BCb_coefficients[8];// offset 64
|
||||
};
|
||||
|
||||
enum
|
||||
|
|
|
@ -98,10 +98,12 @@ void VifUnpackSSE_Base::xUPK_S_32() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (workReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
|
@ -110,10 +112,12 @@ else {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (workReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV8 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(workReg, workReg);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
|
@ -133,10 +137,12 @@ void VifUnpackSSE_Base::xUPK_V2_32() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
|
@ -145,10 +151,12 @@ else {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV16 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
|
@ -159,32 +167,33 @@ else {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_V3_32() const {
|
||||
xMOV128 (destReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
}
|
||||
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
}
|
||||
xPSHUF.D (destReg, destReg, 0xA4); //v2v2v1v0
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_32() const {
|
||||
|
@ -192,10 +201,12 @@ void VifUnpackSSE_Base::xUPK_V4_32() const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
|
@ -203,10 +214,12 @@ else {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
|
|
|
@ -245,13 +245,14 @@ public:
|
|||
{
|
||||
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 2;
|
||||
|
||||
const GSVector4 ys(0.098f, 0.504f, 0.257f, 0.0f);
|
||||
const GSVector4 us(0.439f / 2, -0.291f / 2, -0.148f / 2, 0.0f);
|
||||
const GSVector4 vs(-0.071f / 2, -0.368f / 2, 0.439f / 2, 0.0f);
|
||||
GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
|
||||
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
|
||||
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
|
||||
const GSVector4 offset(16, 128, 16, 128);
|
||||
|
||||
if(rgba)
|
||||
{
|
||||
if (!rgba)
|
||||
ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw();
|
||||
|
||||
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
||||
{
|
||||
uint32* s = (uint32*)src;
|
||||
|
@ -263,8 +264,8 @@ public:
|
|||
GSVector4 c1 = GSVector4(s[i + 1]);
|
||||
GSVector4 c2 = c0 + c1;
|
||||
|
||||
GSVector4 lo = (c0 * ys).hadd(c2 * vs);
|
||||
GSVector4 hi = (c1 * ys).hadd(c2 * us);
|
||||
GSVector4 lo = (c0 * ys).hadd(c2 * us);
|
||||
GSVector4 hi = (c1 * ys).hadd(c2 * vs);
|
||||
|
||||
GSVector4 c = lo.hadd(hi) + offset;
|
||||
|
||||
|
@ -272,29 +273,6 @@ public:
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
||||
{
|
||||
uint32* s = (uint32*)src;
|
||||
uint16* d = (uint16*)dst;
|
||||
|
||||
for(int i = 0; i < w; i += 2)
|
||||
{
|
||||
GSVector4 c0 = GSVector4(s[i + 0]).zyxw();
|
||||
GSVector4 c1 = GSVector4(s[i + 1]).zyxw();
|
||||
GSVector4 c2 = c0 + c1;
|
||||
|
||||
GSVector4 lo = (c0 * ys).hadd(c2 * vs);
|
||||
GSVector4 hi = (c1 * ys).hadd(c2 * us);
|
||||
|
||||
GSVector4 c = lo.hadd(hi) + offset;
|
||||
|
||||
*((uint32*)&d[i]) = GSVector4i(c).rgba32();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(mt.subtype == MEDIASUBTYPE_RGB32)
|
||||
{
|
||||
int dstpitch = ((VIDEOINFOHEADER*)mt.Format())->bmiHeader.biWidth * 4;
|
||||
|
|
|
@ -126,6 +126,53 @@ void CALLBACK GSsetLogDir(const char* dir)
|
|||
ZZLog::SetDir(dir);
|
||||
}
|
||||
|
||||
void ReportHacks(gameHacks hacks)
|
||||
{
|
||||
if (hacks.texture_targs) ZZLog::WriteLn("'Texture targs' hack enabled.");
|
||||
if (hacks.auto_reset) ZZLog::WriteLn("'Auto reset' hack enabled.");
|
||||
if (hacks.interlace_2x) ZZLog::WriteLn("'Interlace 2x' hack enabled.");
|
||||
if (hacks.texa) ZZLog::WriteLn("'Texa' hack enabled.");
|
||||
if (hacks.no_target_resolve) ZZLog::WriteLn("'No target resolve' hack enabled.");
|
||||
if (hacks.exact_color) ZZLog::WriteLn("Exact color hack enabled.");
|
||||
if (hacks.no_color_clamp) ZZLog::WriteLn("'No color clamp' hack enabled.");
|
||||
if (hacks.no_alpha_fail) ZZLog::WriteLn("'No alpha fail' hack enabled.");
|
||||
if (hacks.no_depth_update) ZZLog::WriteLn("'No depth update' hack enabled.");
|
||||
if (hacks.quick_resolve_1) ZZLog::WriteLn("'Quick resolve 1' enabled.");
|
||||
if (hacks.no_quick_resolve) ZZLog::WriteLn("'No Quick resolve' hack enabled.");
|
||||
if (hacks.no_target_clut) ZZLog::WriteLn("'No target clut' hack enabled.");
|
||||
if (hacks.vss_hack_off) ZZLog::WriteLn("VSS hack enabled.");
|
||||
if (hacks.no_depth_resolve) ZZLog::WriteLn("'No depth resolve' hack enabled.");
|
||||
if (hacks.full_16_bit_res) ZZLog::WriteLn("'Full 16 bit resolution' hack enabled.");
|
||||
if (hacks.resolve_promoted) ZZLog::WriteLn("'Resolve promoted' hack enabled.");
|
||||
if (hacks.fast_update) ZZLog::WriteLn("'Fast update' hack enabled.");
|
||||
if (hacks.no_alpha_test) ZZLog::WriteLn("'No alpha test' hack enabled.");
|
||||
if (hacks.disable_mrt_depth) ZZLog::WriteLn("'Disable mrt depth' hack enabled.");
|
||||
if (hacks.args_32_bit) ZZLog::WriteLn("'Args 32 bit' hack enabled.");
|
||||
if (hacks.path3) ZZLog::WriteLn("'Path3' hack enabled.");
|
||||
if (hacks.parallel_context) ZZLog::WriteLn("'Parallel context' hack enabled.");
|
||||
if (hacks.xenosaga_spec) ZZLog::WriteLn("'Xenosaga spec' hack enabled.");
|
||||
if (hacks.partial_pointers) ZZLog::WriteLn("'Partial pointers' hack enabled.");
|
||||
if (hacks.partial_depth) ZZLog::WriteLn("'Partial depth' hack enabled.");
|
||||
if (hacks.reget) ZZLog::WriteLn("Reget hack enabled.");
|
||||
if (hacks.gust) ZZLog::WriteLn("Gust hack enabled.");
|
||||
if (hacks.no_logz) ZZLog::WriteLn("'No logz' hack enabled.");
|
||||
}
|
||||
|
||||
void ListHacks()
|
||||
{
|
||||
if (conf.def_hacks._u32 != 0)
|
||||
{
|
||||
ZZLog::WriteLn("AutoEnabling these hacks:");
|
||||
ReportHacks(conf.def_hacks);
|
||||
}
|
||||
|
||||
if (conf.hacks._u32 != 0)
|
||||
{
|
||||
ZZLog::WriteLn("You've manually enabled these hacks:");
|
||||
ReportHacks(conf.hacks);
|
||||
}
|
||||
}
|
||||
|
||||
void CALLBACK GSsetGameCRC(int crc, int options)
|
||||
{
|
||||
// TEXDESTROY_THRESH starts out at 16.
|
||||
|
@ -133,15 +180,15 @@ void CALLBACK GSsetGameCRC(int crc, int options)
|
|||
conf.mrtdepth = (conf.settings().disable_mrt_depth != 0);
|
||||
|
||||
if (!conf.mrtdepth)
|
||||
ZZLog::Error_Log("Disabling MRT depth writing.");
|
||||
ZZLog::WriteLn("Disabling MRT depth writing.");
|
||||
else
|
||||
ZZLog::Error_Log("Enabling MRT depth writing.");
|
||||
ZZLog::WriteLn("Enabling MRT depth writing.");
|
||||
|
||||
bool CRCValueChanged = (g_LastCRC != crc);
|
||||
|
||||
g_LastCRC = crc;
|
||||
|
||||
ZZLog::Error_Log("CRC = %x", crc);
|
||||
if (crc != 0) ZZLog::WriteLn("Current game CRC is %x.", crc);
|
||||
|
||||
if (CRCValueChanged && (crc != 0))
|
||||
{
|
||||
|
@ -149,17 +196,27 @@ void CALLBACK GSsetGameCRC(int crc, int options)
|
|||
{
|
||||
if (crc_game_list[i].crc == crc)
|
||||
{
|
||||
if (crc_game_list[i].v_thresh > 0) VALIDATE_THRESH = crc_game_list[i].v_thresh;
|
||||
if (crc_game_list[i].t_thresh > 0) TEXDESTROY_THRESH = crc_game_list[i].t_thresh;
|
||||
ZZLog::WriteLn("Found CRC[%x] in crc game list.", crc);
|
||||
|
||||
if (crc_game_list[i].v_thresh > 0)
|
||||
{
|
||||
VALIDATE_THRESH = crc_game_list[i].v_thresh;
|
||||
ZZLog::WriteLn("Setting VALIDATE_THRESH to %d", VALIDATE_THRESH);
|
||||
}
|
||||
|
||||
if (crc_game_list[i].t_thresh > 0)
|
||||
{
|
||||
TEXDESTROY_THRESH = crc_game_list[i].t_thresh;
|
||||
ZZLog::WriteLn("Setting TEXDESTROY_THRESH to %d", TEXDESTROY_THRESH);
|
||||
}
|
||||
|
||||
conf.def_hacks._u32 |= crc_game_list[i].flags;
|
||||
|
||||
ZZLog::Error_Log("Found CRC[%x] in crc game list.", crc);
|
||||
|
||||
ListHacks();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
ListHacks();
|
||||
}
|
||||
|
||||
void CALLBACK GSsetFrameSkip(int frameskip)
|
||||
|
|
|
@ -27,7 +27,6 @@
|
|||
#include "zerogs.h"
|
||||
#include "targets.h"
|
||||
|
||||
|
||||
namespace ZeroGS
|
||||
{
|
||||
extern CRangeManager s_RangeMngr; // manages overwritten memory
|
||||
|
@ -249,8 +248,10 @@
|
|||
}
|
||||
|
||||
template <class T>
|
||||
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp)
|
||||
void TransferLocalHost(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
|
||||
{
|
||||
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
|
||||
|
||||
int i = x, j = y;
|
||||
T* pbuf = (T*)pbyMem;
|
||||
u32 nSize = nQWordSize * 16 / sizeof(T);
|
||||
|
@ -275,8 +276,10 @@
|
|||
}
|
||||
}
|
||||
|
||||
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart, _readPixel_0 rp)
|
||||
void TransferLocalHost_24(void* pbyMem, u32 nQWordSize, int& x, int& y, u8 *pstart)
|
||||
{
|
||||
_readPixel_0 rp = readPixelFun_0[gs.srcbuf.psm];
|
||||
|
||||
int i = x, j = y;
|
||||
u8* pbuf = (u8*)pbyMem;
|
||||
u32 nSize = nQWordSize * 16 / 3;
|
||||
|
@ -312,65 +315,113 @@
|
|||
assert(gs.imageTransfer == 1);
|
||||
|
||||
u8* pstart = g_pbyGSMemory + 256 * gs.srcbuf.bp;
|
||||
int i = gs.imageY, j = gs.imageX;
|
||||
|
||||
switch (gs.srcbuf.psm)
|
||||
switch(PSMT_BITMODE(gs.srcbuf.psm))
|
||||
{
|
||||
|
||||
case PSMCT32:
|
||||
TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32_0);
|
||||
break;
|
||||
|
||||
case PSMCT24:
|
||||
TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24_0);
|
||||
break;
|
||||
|
||||
case PSMCT16:
|
||||
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16_0);
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16S_0);
|
||||
break;
|
||||
|
||||
case PSMT8:
|
||||
TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8_0);
|
||||
break;
|
||||
|
||||
case PSMT8H:
|
||||
TransferLocalHost<u8>(pbyMem, nQWordSize, i, j, pstart, readPixel8H_0);
|
||||
break;
|
||||
|
||||
case PSMT32Z:
|
||||
TransferLocalHost<u32>(pbyMem, nQWordSize, i, j, pstart, readPixel32Z_0);
|
||||
break;
|
||||
|
||||
case PSMT24Z:
|
||||
TransferLocalHost_24(pbyMem, nQWordSize, i, j, pstart, readPixel24Z_0);
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16Z_0);
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
TransferLocalHost<u16>(pbyMem, nQWordSize, i, j, pstart, readPixel16SZ_0);
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
case 0: TransferLocalHost<u32>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
|
||||
case 1: TransferLocalHost_24(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
|
||||
case 2: TransferLocalHost<u16>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
|
||||
case 3: TransferLocalHost<u8>(pbyMem, nQWordSize, gs.imageY, gs.imageX, pstart); break;
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
gs.imageY = i;
|
||||
gs.imageX = j;
|
||||
|
||||
if (gs.imageY >= gs.imageEndY)
|
||||
{
|
||||
ZZLog::Error_Log("gs.imageY >= gs.imageEndY!");
|
||||
assert(gs.imageY == gs.imageEndY);
|
||||
gs.imageTransfer = -1;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void _TransferLocalLocal()
|
||||
{
|
||||
//ZZLog::Error_Log("TransferLocalLocal(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
|
||||
_writePixel_0 wp = writePixelFun_0[gs.srcbuf.psm];
|
||||
_readPixel_0 rp = readPixelFun_0[gs.dstbuf.psm];
|
||||
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
|
||||
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
|
||||
u32 widthlimit = 4;
|
||||
u32 maxX = gs.trxpos.sx + gs.imageWnew;
|
||||
u32 maxY = gs.trxpos.sy + gs.imageHnew;
|
||||
|
||||
if (PSMT_BITMODE(gs.srcbuf.psm) == 0) widthlimit = 2;
|
||||
if ((gs.imageWnew & widthlimit) != 0) return;
|
||||
|
||||
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; i++, i2++)
|
||||
{
|
||||
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += widthlimit, j2 += widthlimit)
|
||||
{
|
||||
wp(pDstBuf, j2%2048, i2%2048,
|
||||
rp(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
|
||||
|
||||
wp(pDstBuf, (j2+1)%2048, i2%2048,
|
||||
rp(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
|
||||
|
||||
if (widthlimit > 2)
|
||||
{
|
||||
// Then widthlimit == 4.
|
||||
wp(pDstBuf, (j2+2)%2048, i2%2048,
|
||||
rp(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
|
||||
|
||||
wp(pDstBuf, (j2+3)%2048, i2%2048,
|
||||
rp(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline void _TransferLocalLocal_4()
|
||||
{
|
||||
//ZZLog::Error_Log("TransferLocalLocal_4(0x%x, 0x%x)", gs.srcbuf.psm, gs.dstbuf.psm);
|
||||
_getPixelAddress_0 gsp = getPixelFun_0[gs.srcbuf.psm];
|
||||
_getPixelAddress_0 gdp = getPixelFun_0[gs.dstbuf.psm];
|
||||
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
|
||||
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
|
||||
u32 maxX = gs.trxpos.sx + gs.imageWnew;
|
||||
u32 maxY = gs.trxpos.sy + gs.imageHnew;
|
||||
|
||||
assert((gs.imageWnew % 8) == 0);
|
||||
|
||||
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < maxY; ++i, ++i2)
|
||||
{
|
||||
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < maxX; j += 8, j2 += 8)
|
||||
{
|
||||
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */
|
||||
u32 read = gsp(j%2048, i%2048, gs.srcbuf.bw);
|
||||
u32 write = gdp(j2%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||
|
||||
read = gsp((j+1)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+1)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
|
||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||
|
||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
|
||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||
|
||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
|
||||
read = gsp((j+2)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+2)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f);
|
||||
|
||||
read = gsp((j+3)%2048, i%2048, gs.srcbuf.bw);
|
||||
write = gdp((j2+3)%2048, i2%2048, gs.dstbuf.bw);
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// dir depends on trxpos.dirx & trxpos.diry
|
||||
void TransferLocalLocal()
|
||||
{
|
||||
|
@ -409,284 +460,18 @@
|
|||
}
|
||||
}
|
||||
|
||||
u8* pSrcBuf = g_pbyGSMemory + gs.srcbuf.bp * 256;
|
||||
u8* pDstBuf = g_pbyGSMemory + gs.dstbuf.bp * 256;
|
||||
|
||||
#define TRANSFERLOCALLOCAL(srcpsm, dstpsm, widthlimit) { \
|
||||
if( (gs.imageWnew&widthlimit)!=0 ) break; \
|
||||
assert( (gs.imageWnew&widthlimit)==0 && widthlimit <= 4); \
|
||||
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; i++, i2++) { \
|
||||
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=widthlimit, j2+=widthlimit) { \
|
||||
\
|
||||
writePixel##dstpsm##_0(pDstBuf, j2%2048, i2%2048, \
|
||||
readPixel##srcpsm##_0(pSrcBuf, j%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
|
||||
\
|
||||
if( widthlimit > 1 ) { \
|
||||
writePixel##dstpsm##_0(pDstBuf, (j2+1)%2048, i2%2048, \
|
||||
readPixel##srcpsm##_0(pSrcBuf, (j+1)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
|
||||
\
|
||||
if( widthlimit > 2 ) { \
|
||||
writePixel##dstpsm##_0(pDstBuf, (j2+2)%2048, i2%2048, \
|
||||
readPixel##srcpsm##_0(pSrcBuf, (j+2)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
|
||||
\
|
||||
if( widthlimit > 3 ) { \
|
||||
writePixel##dstpsm##_0(pDstBuf, (j2+3)%2048, i2%2048, \
|
||||
readPixel##srcpsm##_0(pSrcBuf, (j+3)%2048, i%2048, gs.srcbuf.bw), gs.dstbuf.bw); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define TRANSFERLOCALLOCAL_4(srcpsm, dstpsm) { \
|
||||
assert( (gs.imageWnew%8) == 0 ); \
|
||||
for(int i = gs.trxpos.sy, i2 = gs.trxpos.dy; i < gs.trxpos.sy+gs.imageHnew; ++i, ++i2) { \
|
||||
for(int j = gs.trxpos.sx, j2 = gs.trxpos.dx; j < gs.trxpos.sx+gs.imageWnew; j+=8, j2+=8) { \
|
||||
/* NOTE: the 2 conseq 4bit values are in NOT in the same byte */ \
|
||||
u32 read = getPixelAddress##srcpsm##_0(j%2048, i%2048, gs.srcbuf.bw); \
|
||||
u32 write = getPixelAddress##dstpsm##_0(j2%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+1)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+1)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+2)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+2)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0xf0)|(pSrcBuf[read]&0x0f); \
|
||||
\
|
||||
read = getPixelAddress##srcpsm##_0((j+3)%2048, i%2048, gs.srcbuf.bw); \
|
||||
write = getPixelAddress##dstpsm##_0((j2+3)%2048, i2%2048, gs.dstbuf.bw); \
|
||||
pDstBuf[write] = (pDstBuf[write]&0x0f)|(pSrcBuf[read]&0xf0); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
|
||||
switch (gs.srcbuf.psm)
|
||||
if (PSMT_BITMODE(gs.srcbuf.psm) != 4)
|
||||
{
|
||||
case PSMCT32:
|
||||
if (gs.dstbuf.psm == PSMCT32)
|
||||
{
|
||||
TRANSFERLOCALLOCAL(32, 32, 2);
|
||||
_TransferLocalLocal();
|
||||
}
|
||||
else
|
||||
{
|
||||
TRANSFERLOCALLOCAL(32, 32Z, 2);
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMCT24:
|
||||
if (gs.dstbuf.psm == PSMCT24)
|
||||
{
|
||||
TRANSFERLOCALLOCAL(24, 24, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
TRANSFERLOCALLOCAL(24, 24Z, 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMCT16:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
case PSMCT16:
|
||||
TRANSFERLOCALLOCAL(16, 16, 4);
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
TRANSFERLOCALLOCAL(16, 16S, 4);
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
TRANSFERLOCALLOCAL(16, 16Z, 4);
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
TRANSFERLOCALLOCAL(16, 16SZ, 4);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
case PSMCT16:
|
||||
TRANSFERLOCALLOCAL(16S, 16, 4);
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
TRANSFERLOCALLOCAL(16S, 16S, 4);
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
TRANSFERLOCALLOCAL(16S, 16Z, 4);
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
TRANSFERLOCALLOCAL(16S, 16SZ, 4);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT8:
|
||||
if (gs.dstbuf.psm == PSMT8)
|
||||
{
|
||||
TRANSFERLOCALLOCAL(8, 8, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
TRANSFERLOCALLOCAL(8, 8H, 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT4:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
|
||||
case PSMT4:
|
||||
TRANSFERLOCALLOCAL_4(4, 4);
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
TRANSFERLOCALLOCAL_4(4, 4HL);
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
TRANSFERLOCALLOCAL_4(4, 4HH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT8H:
|
||||
if (gs.dstbuf.psm == PSMT8)
|
||||
{
|
||||
TRANSFERLOCALLOCAL(8H, 8, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
TRANSFERLOCALLOCAL(8H, 8H, 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
case PSMT4:
|
||||
TRANSFERLOCALLOCAL_4(4HL, 4);
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
TRANSFERLOCALLOCAL_4(4HL, 4HL);
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
TRANSFERLOCALLOCAL_4(4HL, 4HH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
case PSMT4:
|
||||
TRANSFERLOCALLOCAL_4(4HH, 4);
|
||||
break;
|
||||
|
||||
case PSMT4HL:
|
||||
TRANSFERLOCALLOCAL_4(4HH, 4HL);
|
||||
break;
|
||||
|
||||
case PSMT4HH:
|
||||
TRANSFERLOCALLOCAL_4(4HH, 4HH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT32Z:
|
||||
if (gs.dstbuf.psm == PSMCT32)
|
||||
{
|
||||
TRANSFERLOCALLOCAL(32Z, 32, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
TRANSFERLOCALLOCAL(32Z, 32Z, 2);
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT24Z:
|
||||
if (gs.dstbuf.psm == PSMCT24)
|
||||
{
|
||||
TRANSFERLOCALLOCAL(24Z, 24, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
TRANSFERLOCALLOCAL(24Z, 24Z, 4);
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
case PSMCT16:
|
||||
TRANSFERLOCALLOCAL(16Z, 16, 4);
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
TRANSFERLOCALLOCAL(16Z, 16S, 4);
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
TRANSFERLOCALLOCAL(16Z, 16Z, 4);
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
TRANSFERLOCALLOCAL(16Z, 16SZ, 4);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
switch (gs.dstbuf.psm)
|
||||
{
|
||||
case PSMCT16:
|
||||
TRANSFERLOCALLOCAL(16SZ, 16, 4);
|
||||
break;
|
||||
|
||||
case PSMCT16S:
|
||||
TRANSFERLOCALLOCAL(16SZ, 16S, 4);
|
||||
break;
|
||||
|
||||
case PSMT16Z:
|
||||
TRANSFERLOCALLOCAL(16SZ, 16Z, 4);
|
||||
break;
|
||||
|
||||
case PSMT16SZ:
|
||||
TRANSFERLOCALLOCAL(16SZ, 16SZ, 4);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
_TransferLocalLocal_4();
|
||||
}
|
||||
|
||||
g_MemTargs.ClearRange(dststart, dstend);
|
||||
|
||||
#ifdef DEVBUILD
|
||||
#ifdef ZEROGS_DEVBUILD
|
||||
|
||||
if (g_bSaveTrans)
|
||||
{
|
||||
|
|
|
@ -44,6 +44,10 @@ typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize);
|
|||
typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize);
|
||||
typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask);
|
||||
|
||||
extern _getPixelAddress_0 getPixelFun_0[64];
|
||||
extern _writePixel_0 writePixelFun_0[64];
|
||||
extern _readPixel_0 readPixelFun_0[64];
|
||||
|
||||
enum Psm_Size
|
||||
{
|
||||
PSM_ = 0,
|
||||
|
@ -269,8 +273,6 @@ static __forceinline u32 getPixelAddress16SZ_0(int x, int y, u32 bw)
|
|||
#define getPixelAddress_0(psm,x,y,bw) getPixelAddress##psm##_0(x,y,bw)
|
||||
#define getPixelAddress(psm,x,y,bp,bw) getPixelAddress##psm##(x,y,bp,bw)
|
||||
|
||||
|
||||
|
||||
static __forceinline void writePixel32(void* pmem, int x, int y, u32 pixel, u32 bp, u32 bw)
|
||||
{
|
||||
((u32*)pmem)[getPixelAddress32(x, y, bp, bw)] = pixel;
|
||||
|
@ -511,7 +513,6 @@ static __forceinline void writePixel16SZ_0(void* pmem, int x, int y, u32 pixel,
|
|||
((u16*)pmem)[getPixelAddress16SZ_0(x, y, bw)] = pixel;
|
||||
}
|
||||
|
||||
|
||||
///////////////
|
||||
|
||||
static __forceinline u32 readPixel32_0(const void* pmem, int x, int y, u32 bw)
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
|
||||
#include "GS.h"
|
||||
#include "Mem.h"
|
||||
|
||||
u32 g_blockTable32[4][8] =
|
||||
{
|
||||
|
@ -247,3 +248,52 @@ u32 g_pageTable16Z[64][64];
|
|||
u32 g_pageTable16SZ[64][64];
|
||||
u32 g_pageTable8[64][128];
|
||||
u32 g_pageTable4[128][128];
|
||||
|
||||
/* PSM reference array
|
||||
{ 32, 24, 16, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, 16S, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, 8, 4, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, 8H, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, 4HL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, 4HH, NULL, NULL, NULL,
|
||||
32Z, 24Z, 16Z, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, 16SZ, NULL, NULL, NULL, NULL, NULL };
|
||||
*/
|
||||
|
||||
_getPixelAddress_0 getPixelFun_0[64] =
|
||||
{
|
||||
getPixelAddress32_0, getPixelAddress24_0, getPixelAddress16_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, getPixelAddress16S_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, getPixelAddress8_0, getPixelAddress4_0, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, getPixelAddress8H_0, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, getPixelAddress4HL_0, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, getPixelAddress4HH_0, NULL, NULL, NULL,
|
||||
getPixelAddress32Z_0, getPixelAddress24Z_0, getPixelAddress16Z_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, getPixelAddress16SZ_0, NULL, NULL, NULL, NULL, NULL
|
||||
};
|
||||
|
||||
_writePixel_0 writePixelFun_0[64] =
|
||||
{
|
||||
writePixel32_0, writePixel24_0, writePixel16_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, writePixel16S_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, writePixel8_0, writePixel4_0, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, writePixel8H_0, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, writePixel4HL_0, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, writePixel4HH_0, NULL, NULL, NULL,
|
||||
writePixel32Z_0, writePixel24Z_0, writePixel16Z_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, writePixel16SZ_0, NULL, NULL, NULL, NULL, NULL
|
||||
};
|
||||
|
||||
_readPixel_0 readPixelFun_0[64] =
|
||||
{
|
||||
readPixel32_0, readPixel24_0, readPixel16_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, readPixel16S_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, readPixel8_0, readPixel4_0, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, readPixel8H_0, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, readPixel4HL_0, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, readPixel4HH_0, NULL, NULL, NULL,
|
||||
readPixel32Z_0, readPixel24Z_0, readPixel16Z_0, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, readPixel16SZ_0, NULL, NULL, NULL, NULL, NULL
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -125,27 +125,16 @@ void __fastcall GIFPackedRegHandlerUV(u32* data)
|
|||
ZZLog::Greg_Log("Packed UV: 0x%x, 0x%x", r->U, r->V);
|
||||
}
|
||||
|
||||
void __forceinline KICK_VERTEX2()
|
||||
void __forceinline KickVertex(bool adc)
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (++gs.primC >= (int)g_primmult[prim->prim])
|
||||
{
|
||||
if (NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])();
|
||||
if (!adc && NoHighlights(prim->ctxt)) (*ZeroGS::drawfn[prim->prim])();
|
||||
|
||||
gs.primC -= g_primsub[prim->prim];
|
||||
}
|
||||
}
|
||||
|
||||
void __forceinline KICK_VERTEX3()
|
||||
{
|
||||
FUNCLOG
|
||||
|
||||
if (++gs.primC >= (int)g_primmult[prim->prim])
|
||||
{
|
||||
gs.primC -= g_primsub[prim->prim];
|
||||
|
||||
if (prim->prim == 5)
|
||||
if (adc && prim->prim == 5)
|
||||
{
|
||||
/* tri fans need special processing */
|
||||
if (gs.nTriFanVert == gs.primIndex)
|
||||
|
@ -161,14 +150,7 @@ void __fastcall GIFPackedRegHandlerXYZF2(u32* data)
|
|||
gs.add_vertex(r->X, r->Y,r->Z, r->F);
|
||||
|
||||
// Fix Vertexes up later.
|
||||
if (data[3] & 0x8000)
|
||||
{
|
||||
KICK_VERTEX3();
|
||||
}
|
||||
else
|
||||
{
|
||||
KICK_VERTEX2();
|
||||
}
|
||||
KickVertex(!!(r->ADC));
|
||||
ZZLog::Greg_Log("Packed XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
|
||||
}
|
||||
|
||||
|
@ -179,14 +161,7 @@ void __fastcall GIFPackedRegHandlerXYZ2(u32* data)
|
|||
gs.add_vertex(r->X, r->Y,r->Z);
|
||||
|
||||
// Fix Vertexes up later.
|
||||
if (data[3] & 0x8000)
|
||||
{
|
||||
KICK_VERTEX3();
|
||||
}
|
||||
else
|
||||
{
|
||||
KICK_VERTEX2();
|
||||
}
|
||||
KickVertex(!!(r->ADC));
|
||||
ZZLog::Greg_Log("Packed XYZ2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
|
||||
}
|
||||
|
||||
|
@ -285,7 +260,7 @@ void __fastcall GIFRegHandlerXYZF2(u32* data)
|
|||
GIFRegXYZF* r = (GIFRegXYZF*)(data);
|
||||
gs.add_vertex(r->X, r->Y,r->Z, r->F);
|
||||
|
||||
KICK_VERTEX2();
|
||||
KickVertex(false);
|
||||
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
|
||||
}
|
||||
|
||||
|
@ -295,7 +270,7 @@ void __fastcall GIFRegHandlerXYZ2(u32* data)
|
|||
GIFRegXYZ* r = (GIFRegXYZ*)(data);
|
||||
gs.add_vertex(r->X, r->Y,r->Z);
|
||||
|
||||
KICK_VERTEX2();
|
||||
KickVertex(false);
|
||||
ZZLog::Greg_Log("XYZF2: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
|
||||
}
|
||||
|
||||
|
@ -382,7 +357,7 @@ void __fastcall GIFRegHandlerXYZF3(u32* data)
|
|||
GIFRegXYZF* r = (GIFRegXYZF*)(data);
|
||||
gs.add_vertex(r->X, r->Y,r->Z, r->F);
|
||||
|
||||
KICK_VERTEX3();
|
||||
KickVertex(true);
|
||||
ZZLog::Greg_Log("XYZF3: 0x%x, 0x%x, 0x%x, %f", r->X, r->Y, r->Z, r->F);
|
||||
}
|
||||
|
||||
|
@ -392,7 +367,7 @@ void __fastcall GIFRegHandlerXYZ3(u32* data)
|
|||
GIFRegXYZ* r = (GIFRegXYZ*)(data);
|
||||
gs.add_vertex(r->X, r->Y,r->Z);
|
||||
|
||||
KICK_VERTEX3();
|
||||
KickVertex(true);
|
||||
ZZLog::Greg_Log("XYZ3: 0x%x, 0x%x, 0x%x", r->X, r->Y, r->Z);
|
||||
}
|
||||
|
||||
|
@ -915,6 +890,7 @@ void __fastcall GIFRegHandlerTRXREG(u32* data)
|
|||
void __fastcall GIFRegHandlerTRXDIR(u32* data)
|
||||
{
|
||||
FUNCLOG
|
||||
GIFRegTRXDIR* r = (GIFRegTRXDIR*)(data);
|
||||
// Oh dear...
|
||||
|
||||
// terminate any previous transfers
|
||||
|
@ -934,7 +910,7 @@ void __fastcall GIFRegHandlerTRXDIR(u32* data)
|
|||
gs.dstbuf = gs.dstbufnew;
|
||||
gs.trxpos = gs.trxposnew;
|
||||
|
||||
gs.imageTransfer = data[0] & 0x3;
|
||||
gs.imageTransfer = r->XDIR;
|
||||
gs.imageWnew = gs.imageWtemp;
|
||||
gs.imageHnew = gs.imageHtemp;
|
||||
|
||||
|
|
|
@ -83,10 +83,6 @@ static bool SPAM_PASS;
|
|||
#define ZEROGS_DEVBUILD
|
||||
#endif
|
||||
|
||||
#ifdef ZEROGS_DEVBUILD
|
||||
//#define DEVBUILD
|
||||
#endif
|
||||
|
||||
|
||||
// sends a message to output window if assert fails
|
||||
#define BMSG(x, str) { if( !(x) ) { ZZLog::Log(str); ZZLog::Log(str); } }
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
using namespace ZeroGS;
|
||||
|
||||
//------------------ Defines
|
||||
#ifndef DEVBUILD
|
||||
#ifndef ZEROGS_DEVBUILD
|
||||
|
||||
#define INC_GENVARS()
|
||||
#define INC_TEXVARS()
|
||||
|
|
|
@ -42,10 +42,6 @@ extern bool g_bUpdateStencil;
|
|||
# define INC_RESOLVE() ++g_nResolve
|
||||
#endif
|
||||
|
||||
#ifdef DEVBUILD
|
||||
//static int g_bSaveResolved = 0;
|
||||
#endif
|
||||
|
||||
extern int s_nResolved;
|
||||
extern u32 g_nResolve;
|
||||
extern bool g_bSaveTrans;
|
||||
|
@ -250,6 +246,7 @@ void ZeroGS::CRenderTarget::SetTarget(int fbplocal, const Rect2& scissor, int co
|
|||
}
|
||||
|
||||
// set render states
|
||||
// Bleh. I *really* need to fix this. << 3 when setting the scissors, then >> 3 when using them... --Arcum42
|
||||
scissorrect.x = scissor.x0 >> 3;
|
||||
scissorrect.y = (scissor.y0 >> 3) + dy;
|
||||
scissorrect.w = (scissor.x1 >> 3) + 1;
|
||||
|
@ -302,7 +299,7 @@ void ZeroGS::CRenderTarget::Resolve()
|
|||
|
||||
GL_REPORT_ERRORD();
|
||||
|
||||
#if defined(DEVBUILD)
|
||||
#if defined(ZEROGS_DEVBUILD)
|
||||
|
||||
if (g_bSaveResolved)
|
||||
{
|
||||
|
@ -328,7 +325,7 @@ void ZeroGS::CRenderTarget::Resolve(int startrange, int endrange)
|
|||
// flush if necessary
|
||||
FlushIfNecesary(this) ;
|
||||
|
||||
#if defined(DEVBUILD)
|
||||
#if defined(ZEROGS_DEVBUILD)
|
||||
if (g_bSaveResolved)
|
||||
{
|
||||
SaveTexture("resolved.tga", GL_TEXTURE_RECTANGLE_NV, ptex, RW(fbw), RH(fbh));
|
||||
|
@ -2290,10 +2287,12 @@ ZeroGS::CMemoryTarget* ZeroGS::CMemoryTargetMngr::GetMemoryTarget(const tex0Info
|
|||
{
|
||||
// This is not unusual situation, when vector<u8> does not 16bit alignment, that is destructive for SSE2
|
||||
// instruction movdqa [%eax], xmm0
|
||||
// The idea would be resise vector to 15 elements, that set ptxedata to aligned position.
|
||||
// The idea would be resize vector to 15 elements, that set ptxedata to aligned position.
|
||||
// Later we would move eax by 16, so only we should verify is first element align
|
||||
// FIXME. As I see, texdata used only once here, it does not have any impact on other code.
|
||||
// Probably, usage of _aligned_maloc() would be preferable.
|
||||
|
||||
// Note: this often happens when changing AA.
|
||||
int disalignment = 16 - ((u32)(uptr)dst) % 16; // This is value of shift. It could be 0 < disalignment <= 15
|
||||
ptexdata = &texdata[disalignment]; // Set pointer to aligned element
|
||||
dst = (u16*)ptexdata;
|
||||
|
|
|
@ -308,13 +308,10 @@ extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut)
|
|||
}
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
PCSX2_ALIGNED16(int s_clut16mask2[4]) = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
|
||||
PCSX2_ALIGNED16(int s_clut16mask[8]) = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
|
||||
static const __aligned16 int s_clut16mask2[4] = { 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff };
|
||||
static const __aligned16 int s_clut16mask[8] = { 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
|
||||
0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff
|
||||
};
|
||||
}
|
||||
|
||||
extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut)
|
||||
{
|
||||
|
@ -433,11 +430,11 @@ WriteUnaligned:
|
|||
End:
|
||||
}
|
||||
#else
|
||||
__asm__(".intel_syntax noprefix\n"
|
||||
"movdqa xmm0, xmmword ptr [ecx]\n"
|
||||
"movdqa xmm1, xmmword ptr [ecx+16]\n"
|
||||
"movdqa xmm2, xmmword ptr [ecx+32]\n"
|
||||
"movdqa xmm3, xmmword ptr [ecx+48]\n"
|
||||
__asm__ __volatile__(".intel_syntax noprefix\n"
|
||||
"movdqa xmm0, xmmword ptr [%[vm]]\n"
|
||||
"movdqa xmm1, xmmword ptr [%[vm]+16]\n"
|
||||
"movdqa xmm2, xmmword ptr [%[vm]+32]\n"
|
||||
"movdqa xmm3, xmmword ptr [%[vm]+48]\n"
|
||||
|
||||
// rearrange
|
||||
"pshuflw xmm0, xmm0, 0x88\n"
|
||||
|
@ -457,14 +454,14 @@ End:
|
|||
|
||||
"pxor xmm6, xmm6\n"
|
||||
|
||||
"test edx, 15\n"
|
||||
"test %[clut], 15\n"
|
||||
"jnz WriteUnaligned\n"
|
||||
|
||||
"movdqa xmm7, [s_clut16mask]\n" // saves upper 16 bits
|
||||
"movdqa xmm7, %[s_clut16mask]\n" // saves upper 16 bits
|
||||
|
||||
// have to save interlaced with the old data
|
||||
"movdqa xmm4, [edx]\n"
|
||||
"movdqa xmm5, [edx+32]\n"
|
||||
"movdqa xmm4, [%[clut]]\n"
|
||||
"movdqa xmm5, [%[clut]+32]\n"
|
||||
"movhlps xmm1, xmm0\n"
|
||||
"movlhps xmm0, xmm2\n"// lower 8 colors
|
||||
|
||||
|
@ -483,29 +480,29 @@ End:
|
|||
"punpckhwd xmm2, xmm6\n"
|
||||
"punpckhwd xmm3, xmm6\n"
|
||||
|
||||
"movdqa [edx], xmm0\n"
|
||||
"movdqa [edx+32], xmm1\n"
|
||||
"movdqa [%[clut]], xmm0\n"
|
||||
"movdqa [%[clut]+32], xmm1\n"
|
||||
|
||||
"movdqa xmm5, xmm7\n"
|
||||
"pand xmm7, [edx+16]\n"
|
||||
"pand xmm5, [edx+48]\n"
|
||||
"pand xmm7, [%[clut]+16]\n"
|
||||
"pand xmm5, [%[clut]+48]\n"
|
||||
|
||||
"por xmm2, xmm7\n"
|
||||
"por xmm3, xmm5\n"
|
||||
|
||||
"movdqa [edx+16], xmm2\n"
|
||||
"movdqa [edx+48], xmm3\n"
|
||||
"movdqa [%[clut]+16], xmm2\n"
|
||||
"movdqa [%[clut]+48], xmm3\n"
|
||||
"jmp WriteCLUT_T16_I4_CSM1_End\n"
|
||||
|
||||
"WriteUnaligned:\n"
|
||||
// %edx is offset by 2
|
||||
"sub edx, 2\n"
|
||||
// %[clut] is offset by 2
|
||||
"sub %[clut], 2\n"
|
||||
|
||||
"movdqa xmm7, [[s_clut16mask2]]\n" // saves lower 16 bits
|
||||
"movdqa xmm7, %[s_clut16mask2]\n" // saves lower 16 bits
|
||||
|
||||
// have to save interlaced with the old data
|
||||
"movdqa xmm4, [edx]\n"
|
||||
"movdqa xmm5, [edx+32]\n"
|
||||
"movdqa xmm4, [%[clut]]\n"
|
||||
"movdqa xmm5, [%[clut]+32]\n"
|
||||
"movhlps xmm1, xmm0\n"
|
||||
"movlhps xmm0, xmm2\n" // lower 8 colors
|
||||
|
||||
|
@ -528,24 +525,24 @@ End:
|
|||
"pslld xmm2, 16\n"
|
||||
"pslld xmm3, 16\n"
|
||||
|
||||
"movdqa [edx], xmm0\n"
|
||||
"movdqa [edx+32], xmm1\n"
|
||||
"movdqa [%[clut]], xmm0\n"
|
||||
"movdqa [%[clut]+32], xmm1\n"
|
||||
|
||||
"movdqa xmm5, xmm7\n"
|
||||
"pand xmm7, [edx+16]\n"
|
||||
"pand xmm5, [edx+48]\n"
|
||||
"pand xmm7, [%[clut]+16]\n"
|
||||
"pand xmm5, [%[clut]+48]\n"
|
||||
|
||||
"por xmm2, xmm7\n"
|
||||
"por xmm3, xmm5\n"
|
||||
|
||||
"movdqa [edx+16], xmm2\n"
|
||||
"movdqa [edx+48], xmm3\n"
|
||||
"movdqa [%[clut]+16], xmm2\n"
|
||||
"movdqa [%[clut]+48], xmm3\n"
|
||||
"WriteCLUT_T16_I4_CSM1_End:\n"
|
||||
"\n"
|
||||
".att_syntax\n"
|
||||
: [s_clut16mask] "=m" (s_clut16mask), [s_clut16mask2] "=m" (s_clut16mask2)
|
||||
: "c" (vm), "d" (clut)
|
||||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
:
|
||||
: [vm] "r" (vm), [clut] "r" (clut), [s_clut16mask] "m" (*s_clut16mask), [s_clut16mask2] "m" (*s_clut16mask2)
|
||||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
|
||||
);
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
|
@ -718,15 +715,15 @@ Z16Loop:
|
|||
}
|
||||
#else // _MSC_VER
|
||||
|
||||
__asm__(".intel_syntax\n"
|
||||
__asm__ __volatile__(".intel_syntax\n"
|
||||
"pxor %%xmm7, %%xmm7\n"
|
||||
|
||||
"Z16Loop:\n"
|
||||
// unpack 64 bytes at a time
|
||||
"movdqa %%xmm0, [%0]\n"
|
||||
"movdqa %%xmm2, [%0+16]\n"
|
||||
"movdqa %%xmm4, [%0+32]\n"
|
||||
"movdqa %%xmm6, [%0+48]\n"
|
||||
"movdqa %%xmm0, [%[src]]\n"
|
||||
"movdqa %%xmm2, [%[src]+16]\n"
|
||||
"movdqa %%xmm4, [%[src]+32]\n"
|
||||
"movdqa %%xmm6, [%[src]+48]\n"
|
||||
|
||||
"movdqa %%xmm1, %%xmm0\n"
|
||||
"movdqa %%xmm3, %%xmm2\n"
|
||||
|
@ -738,35 +735,35 @@ Z16Loop:
|
|||
"punpckhwd %%xmm3, %%xmm7\n"
|
||||
|
||||
// start saving
|
||||
"movdqa [%1], %%xmm0\n"
|
||||
"movdqa [%1+16], %%xmm1\n"
|
||||
"movdqa [%[dst]], %%xmm0\n"
|
||||
"movdqa [%[dst]+16], %%xmm1\n"
|
||||
|
||||
"punpcklwd %%xmm4, %%xmm7\n"
|
||||
"punpckhwd %%xmm5, %%xmm7\n"
|
||||
|
||||
"movdqa [%1+32], %%xmm2\n"
|
||||
"movdqa [%1+48], %%xmm3\n"
|
||||
"movdqa [%[dst]+32], %%xmm2\n"
|
||||
"movdqa [%[dst]+48], %%xmm3\n"
|
||||
|
||||
"movdqa %%xmm0, %%xmm6\n"
|
||||
"punpcklwd %%xmm6, %%xmm7\n"
|
||||
|
||||
"movdqa [%1+64], %%xmm4\n"
|
||||
"movdqa [%1+80], %%xmm5\n"
|
||||
"movdqa [%[dst]+64], %%xmm4\n"
|
||||
"movdqa [%[dst]+80], %%xmm5\n"
|
||||
|
||||
"punpckhwd %%xmm0, %%xmm7\n"
|
||||
|
||||
"movdqa [%1+96], %%xmm6\n"
|
||||
"movdqa [%1+112], %%xmm0\n"
|
||||
"movdqa [%[dst]+96], %%xmm6\n"
|
||||
"movdqa [%[dst]+112], %%xmm0\n"
|
||||
|
||||
"add %0, 64\n"
|
||||
"add %1, 128\n"
|
||||
"sub %2, 1\n"
|
||||
"add %[src], 64\n"
|
||||
"add %[dst], 128\n"
|
||||
"sub %[iters], 1\n"
|
||||
"jne Z16Loop\n"
|
||||
|
||||
".att_syntax\n"
|
||||
: "=r"(src), "=r"(dst), "=r"(iters)
|
||||
: "0"(src), "1"(dst), "2"(iters)
|
||||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
: "=&r"(src), "=&r"(dst), "=&r"(iters)
|
||||
: [src] "0"(src), [dst] "1"(dst), [iters] "2"(iters)
|
||||
: "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory"
|
||||
);
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
|
|
|
@ -804,14 +804,9 @@ void ZeroGS::KickSprite()
|
|||
int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
|
||||
int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
|
||||
|
||||
// sprite is too small and AA shows lines (tek4)
|
||||
|
||||
if (s_AAx)
|
||||
{
|
||||
gs.gsvertex[last].x += 4;
|
||||
|
||||
if (s_AAy) gs.gsvertex[last].y += 4;
|
||||
}
|
||||
// sprite is too small and AA shows lines (tek4, Mana Khemia)
|
||||
gs.gsvertex[last].x += (4*s_AAx);
|
||||
gs.gsvertex[last].y += (4*s_AAy);
|
||||
|
||||
// might be bad sprite (KH dialog text)
|
||||
//if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )
|
||||
|
|
|
@ -96,7 +96,7 @@ extern u32 ptexBilinearBlocks;
|
|||
// State parameters
|
||||
|
||||
|
||||
#ifdef DEVBUILD
|
||||
#ifdef ZEROGS_DEVBUILD
|
||||
extern char* EFFECT_NAME;
|
||||
extern char* EFFECT_DIR;
|
||||
extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;
|
||||
|
|
Loading…
Reference in New Issue