pcsx2/pcsx2/GS/GSState.cpp

3340 lines
92 KiB
C++

/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "GSState.h"
#include "GSGL.h"
#include "GSUtil.h"
#include <algorithm> // clamp
#include <cfloat> // FLT_MAX
int GSState::s_n = 0;
GSState::GSState()
: m_version(7)
, m_gsc(NULL)
, m_skip(0)
, m_skip_offset(0)
, m_q(1.0f)
, m_scanmask_used(false)
, m_vt(this)
, m_regs(NULL)
, m_crc(0)
, m_options(0)
, m_frameskip(0)
{
// m_nativeres seems to be a hack. Unfortunately it impacts draw call number which make debug painful in the replayer.
// Let's keep it disabled to ease debug.
m_nativeres = GSConfig.UpscaleMultiplier == 1;
m_mipmap = theApp.GetConfigB("mipmap");
m_NTSC_Saturation = theApp.GetConfigB("NTSC_Saturation");
if (theApp.GetConfigB("UserHacks"))
{
m_userhacks_auto_flush = theApp.GetConfigB("UserHacks_AutoFlush");
m_userhacks_wildhack = theApp.GetConfigB("UserHacks_WildHack");
}
else
{
m_userhacks_auto_flush = false;
m_userhacks_wildhack = false;
}
s_n = 0;
s_dump = theApp.GetConfigB("dump");
s_save = theApp.GetConfigB("save");
s_savet = theApp.GetConfigB("savet");
s_savez = theApp.GetConfigB("savez");
s_savef = theApp.GetConfigB("savef");
s_saven = theApp.GetConfigI("saven");
s_savel = theApp.GetConfigI("savel");
m_dump_root = "";
#if defined(__unix__)
if (s_dump)
{
GSmkdir(root_hw.c_str());
GSmkdir(root_sw.c_str());
}
#endif
m_crc_hack_level = GSConfig.CRCHack;
if (m_crc_hack_level == CRCHackLevel::Automatic)
m_crc_hack_level = GSUtil::GetRecommendedCRCHackLevel(GSConfig.Renderer);
memset(&m_v, 0, sizeof(m_v));
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
m_v.RGBAQ.Q = 1.0f;
GrowVertexBuffer();
m_sssize = 0;
m_sssize += sizeof(m_version);
m_sssize += sizeof(m_env.PRIM);
m_sssize += sizeof(m_env.PRMODECONT);
m_sssize += sizeof(m_env.TEXCLUT);
m_sssize += sizeof(m_env.SCANMSK);
m_sssize += sizeof(m_env.TEXA);
m_sssize += sizeof(m_env.FOGCOL);
m_sssize += sizeof(m_env.DIMX);
m_sssize += sizeof(m_env.DTHE);
m_sssize += sizeof(m_env.COLCLAMP);
m_sssize += sizeof(m_env.PABE);
m_sssize += sizeof(m_env.BITBLTBUF);
m_sssize += sizeof(m_env.TRXDIR);
m_sssize += sizeof(m_env.TRXPOS);
m_sssize += sizeof(m_env.TRXREG);
m_sssize += sizeof(m_env.TRXREG); // obsolete
for (int i = 0; i < 2; i++)
{
m_sssize += sizeof(m_env.CTXT[i].XYOFFSET);
m_sssize += sizeof(m_env.CTXT[i].TEX0);
m_sssize += sizeof(m_env.CTXT[i].TEX1);
m_sssize += sizeof(m_env.CTXT[i].CLAMP);
m_sssize += sizeof(m_env.CTXT[i].MIPTBP1);
m_sssize += sizeof(m_env.CTXT[i].MIPTBP2);
m_sssize += sizeof(m_env.CTXT[i].SCISSOR);
m_sssize += sizeof(m_env.CTXT[i].ALPHA);
m_sssize += sizeof(m_env.CTXT[i].TEST);
m_sssize += sizeof(m_env.CTXT[i].FBA);
m_sssize += sizeof(m_env.CTXT[i].FRAME);
m_sssize += sizeof(m_env.CTXT[i].ZBUF);
}
m_sssize += sizeof(m_v.RGBAQ);
m_sssize += sizeof(m_v.ST);
m_sssize += sizeof(m_v.UV);
m_sssize += sizeof(m_v.FOG);
m_sssize += sizeof(m_v.XYZ);
m_sssize += sizeof(GIFReg); // obsolete
m_sssize += sizeof(m_tr.x);
m_sssize += sizeof(m_tr.y);
m_sssize += m_mem.m_vmsize;
m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].reg)) * std::size(m_path);
m_sssize += sizeof(m_q);
PRIM = &m_env.PRIM;
//CSR->rREV = 0x20;
m_env.PRMODECONT.AC = 1;
tex_flushed = true;
Reset();
ResetHandlers();
}
GSState::~GSState()
{
if (m_vertex.buff)
_aligned_free(m_vertex.buff);
if (m_index.buff)
_aligned_free(m_index.buff);
}
void GSState::SetFrameSkip(int skip)
{
if (m_frameskip == skip)
return;
m_frameskip = skip;
if (skip)
{
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP;
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = &GSState::GIFPackedRegHandlerNOP;
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = &GSState::GIFPackedRegHandlerNOP;
}
else
{
UpdateVertexKick();
}
}
void GSState::Reset()
{
// FIXME: bios logo not shown cut in half after reset, missing graphics in GoW after first FMV
// memset(m_mem.m_vm8, 0, m_mem.m_vmsize);
memset(&m_path, 0, sizeof(m_path));
memset(&m_v, 0, sizeof(m_v));
m_env.Reset();
PRIM = &m_env.PRIM;
UpdateContext();
UpdateVertexKick();
m_env.UpdateDIMX();
for (size_t i = 0; i < 2; i++)
{
m_env.CTXT[i].UpdateScissor();
m_env.CTXT[i].offset.fb = m_mem.GetOffset(m_env.CTXT[i].FRAME.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].FRAME.PSM);
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
m_env.CTXT[i].offset.tex = m_mem.GetOffset(m_env.CTXT[i].TEX0.TBP0, m_env.CTXT[i].TEX0.TBW, m_env.CTXT[i].TEX0.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
}
UpdateScissor();
m_vertex.head = 0;
m_vertex.tail = 0;
m_vertex.next = 0;
m_index.tail = 0;
m_scanmask_used = false;
}
template<bool auto_flush, bool index_swap>
void GSState::SetPrimHandlers()
{
#define SetHandlerXYZ(P, auto_flush, index_swap) \
m_fpGIFPackedRegHandlerXYZ[P][0] = &GSState::GIFPackedRegHandlerXYZF2<P, 0, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerXYZ[P][1] = &GSState::GIFPackedRegHandlerXYZF2<P, 1, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerXYZ[P][2] = &GSState::GIFPackedRegHandlerXYZ2<P, 0, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerXYZ[P][3] = &GSState::GIFPackedRegHandlerXYZ2<P, 1, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][0] = &GSState::GIFRegHandlerXYZF2<P, 0, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0, auto_flush, index_swap>; \
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2<P, auto_flush, index_swap>; \
m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2<P, auto_flush, index_swap>;
SetHandlerXYZ(GS_POINTLIST, true, false);
SetHandlerXYZ(GS_LINELIST, auto_flush, index_swap);
SetHandlerXYZ(GS_LINESTRIP, auto_flush, index_swap);
SetHandlerXYZ(GS_TRIANGLELIST, auto_flush, index_swap);
SetHandlerXYZ(GS_TRIANGLESTRIP, auto_flush, index_swap);
SetHandlerXYZ(GS_TRIANGLEFAN, auto_flush, index_swap);
SetHandlerXYZ(GS_SPRITE, auto_flush, false);
SetHandlerXYZ(GS_INVALID, auto_flush, false);
#undef SetHandlerXYZ
}
void GSState::ResetHandlers()
{
std::fill(std::begin(m_fpGIFPackedRegHandlers), std::end(m_fpGIFPackedRegHandlers), &GSState::GIFPackedRegHandlerNull);
m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerPRIM;
m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
m_fpGIFPackedRegHandlers[GIF_REG_UV] = m_userhacks_wildhack ? &GSState::GIFPackedRegHandlerUV_Hack : &GSState::GIFPackedRegHandlerUV;
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<0>;
m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<1>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
// swap first/last indices when the provoking vertex is the first (D3D/Vulkan)
const bool index_swap = GSConfig.UseHardwareRenderer() && !g_gs_device->Features().provoking_vertex_last;
if (m_userhacks_auto_flush)
index_swap ? SetPrimHandlers<true, true>() : SetPrimHandlers<true, false>();
else
index_swap ? SetPrimHandlers<false, true>() : SetPrimHandlers<false, false>();
std::fill(std::begin(m_fpGIFRegHandlers), std::end(m_fpGIFRegHandlers), &GSState::GIFRegHandlerNull);
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ + 0x10] = &GSState::GIFRegHandlerRGBAQ;
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = m_userhacks_wildhack ? &GSState::GIFRegHandlerUV_Hack : &GSState::GIFRegHandlerUV;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG;
m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT;
m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK;
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA;
m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL;
m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH;
m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX;
m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE;
m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP;
m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE;
m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>;
m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>;
m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF;
m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS;
m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG;
m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR;
m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG;
m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerNull;
m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerNull;
m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerNull;
}
bool GSState::isinterlaced()
{
return !!m_regs->SMODE2.INT;
}
GSVideoMode GSState::GetVideoMode()
{
// TODO: Get confirmation of videomode from SYSCALL ? not necessary but would be nice.
// Other videomodes can't be detected on our side without the help of the data from core
// You can only identify a limited number of video modes based on the info from CRTC registers.
const u8 Colorburst = m_regs->SMODE1.CMOD; // Subcarrier frequency
const u8 PLL_Divider = m_regs->SMODE1.LC; // Phased lock loop divider
switch (Colorburst)
{
case 0:
if (isinterlaced() && PLL_Divider == 22)
return GSVideoMode::HDTV_1080I;
else if (!isinterlaced() && PLL_Divider == 22)
return GSVideoMode::HDTV_720P;
else if (!isinterlaced() && PLL_Divider == 32)
return GSVideoMode::SDTV_480P; // TODO: 576P will also be reported as 480P, find some way to differeniate.
else
return GSVideoMode::VESA;
case 2:
return GSVideoMode::NTSC;
case 3:
return GSVideoMode::PAL;
default:
return GSVideoMode::Unknown;
}
__assume(0); // unreachable
}
// There are some cases where the PS2 seems to saturate the output circuit size when the developer requests for a higher
// unsupported value with respect to the current video mode via the DISP registers, the following function handles such cases.
// NOTE: This function is totally hacky as there are no documents related to saturation of output dimensions, function is
// generally just based on technical and intellectual guesses.
void GSState::SaturateOutputSize(GSVector4i& r)
{
const GSVideoMode videomode = GetVideoMode();
const bool is_ntsc = videomode == GSVideoMode::NTSC;
const bool is_pal = videomode == GSVideoMode::PAL;
//Some games (such as Pool Paradise) use alternate line reading and provide a massive height which is really half.
if (r.height() > 640 && (is_ntsc || is_pal))
{
r.bottom = r.top + (r.height() / 2);
return;
}
const auto& SMODE2 = m_regs->SMODE2;
const auto& PMODE = m_regs->PMODE;
// Limit games to standard NTSC resolutions. games with 512X512 (PAL resolution) on NTSC video mode produces black border on the bottom.
// 512 X 448 is the resolution generally used by NTSC, saturating the height value seems to get rid of the black borders.
// Though it's quite a bad hack as it affects binaries which are patched to run on a non-native video mode.
const bool interlaced_field = SMODE2.INT && !SMODE2.FFMD;
const bool single_frame_output = SMODE2.INT && SMODE2.FFMD && (PMODE.EN1 ^ PMODE.EN2);
const bool unsupported_output_size = r.height() > 448 && r.width() < 640;
const bool saturate =
m_NTSC_Saturation &&
is_ntsc &&
(interlaced_field || single_frame_output) &&
unsupported_output_size;
if (saturate)
r.bottom = r.top + 448;
}
GSVector4i GSState::GetDisplayRect(int i)
{
if (!IsEnabled(0) && !IsEnabled(1))
return {};
// If no specific context is requested then pass the merged rectangle as return value
if (i == -1)
{
if (IsEnabled(0) && IsEnabled(1))
{
const GSVector4i disp1_rect = GetDisplayRect(0);
const GSVector4i disp2_rect = GetDisplayRect(1);
const GSVector4i intersect = disp1_rect.rintersect(disp2_rect);
const GSVector4i combined = disp1_rect.runion_ordered(disp2_rect);
// If the conditions for passing the merged rectangle is unsatisfied, then
// pass the rectangle with the bigger size.
const bool can_be_merged =
intersect.width() == 0 ||
intersect.height() == 0 ||
intersect.xyxy().eq(combined.xyxy());
if (can_be_merged)
return combined;
if (disp1_rect.rarea() > disp2_rect.rarea())
return disp1_rect;
return disp2_rect;
}
i = m_regs->PMODE.EN2;
}
const auto& DISP = m_regs->DISP[i].DISPLAY;
const u32 DW = DISP.DW + 1;
const u32 DH = DISP.DH + 1;
const u32 DX = DISP.DX;
const u32 DY = DISP.DY;
const u32 MAGH = DISP.MAGH + 1;
const u32 MAGV = DISP.MAGV + 1;
const GSVector2i magnification(MAGH, MAGV);
const int width = DW / magnification.x;
const int height = DH / magnification.y;
// Set up the display rectangle based on the values obtained from DISPLAY registers
GSVector4i rectangle;
rectangle.left = DX / magnification.x;
rectangle.top = DY / magnification.y;
rectangle.right = rectangle.left + width;
rectangle.bottom = rectangle.top + height;
SaturateOutputSize(rectangle);
return rectangle;
}
GSVector4i GSState::GetFrameRect(int i)
{
// If no specific context is requested then pass the merged rectangle as return value
if (i == -1)
return GetFrameRect(0).runion(GetFrameRect(1));
GSVector4i rectangle = GetDisplayRect(i);
int w = rectangle.width();
int h = rectangle.height();
if (isinterlaced() && m_regs->SMODE2.FFMD && h > 1)
h >>= 1;
const u32 DBX = m_regs->DISP[i].DISPFB.DBX;
const u32 DBY = m_regs->DISP[i].DISPFB.DBY;
rectangle.left = DBX;
rectangle.top = DBY;
rectangle.right = rectangle.left + w;
rectangle.bottom = rectangle.top + h;
return rectangle;
}
int GSState::GetFramebufferHeight()
{
// Framebuffer height is 11 bits max
constexpr int height_limit = (1 << 11);
const GSVector4i disp1_rect = GetFrameRect(0);
const GSVector4i disp2_rect = GetFrameRect(1);
const GSVector4i combined = disp1_rect.runion(disp2_rect);
// DBY isn't an offset to the frame memory but rather an offset to read output circuit inside
// the frame memory, hence the top offset should also be calculated for the total height of the
// frame memory. Also we need to wrap the value only when we're dealing with values with range of the
// frame memory (offset + read output circuit height, IOW bottom of merged_output)
const int max_height = std::max(disp1_rect.height(), disp2_rect.height());
const int frame_memory_height = std::max(max_height, combined.bottom % height_limit);
if (frame_memory_height > 1024)
GL_PERF("Massive framebuffer height detected! (height:%d)", frame_memory_height);
return frame_memory_height;
}
bool GSState::IsEnabled(int i)
{
ASSERT(i >= 0 && i < 2);
const auto& DISP = m_regs->DISP[i].DISPLAY;
const bool disp1_enabled = m_regs->PMODE.EN1;
const bool disp2_enabled = m_regs->PMODE.EN2;
if ((i == 0 && disp1_enabled) || (i == 1 && disp2_enabled))
return DISP.DW && DISP.DH;
return false;
}
float GSState::GetTvRefreshRate()
{
const GSVideoMode videomode = GetVideoMode();
//TODO: Check vertical frequencies for VESA video modes, old ones were untested.
switch (videomode)
{
case GSVideoMode::NTSC:
case GSVideoMode::SDTV_480P:
return (60 / 1.001f);
case GSVideoMode::PAL:
return 50;
case GSVideoMode::HDTV_720P:
case GSVideoMode::HDTV_1080I:
return 60;
default:
Console.Error("GS: Unknown video mode. Please report: https://github.com/PCSX2/pcsx2/issues");
return 0;
}
__assume(0); // unreachable
}
void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
{
}
void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
{
const GSVector4i mask = GSVector4i::load(0x0c080400);
const GSVector4i v = GSVector4i::load<false>(r).shuffle8(mask);
m_v.RGBAQ.U32[0] = (u32)GSVector4i::store(v);
m_v.RGBAQ.Q = m_q;
}
void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
{
const GSVector4i st = GSVector4i::loadl(&r->U64[0]);
GSVector4i q = GSVector4i::loadl(&r->U64[1]);
GSVector4i::storel(&m_v.ST, st);
// Vexx (character shadow)
// q = 0 (st also 0 on the first 16 vertices), setting it to 1.0f to avoid div by zero later
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero());
// Suikoden 4
// creates some nan for Q. Let's avoid undefined behavior (See GIFRegHandlerRGBAQ)
q = GSVector4i::cast(GSVector4::cast(q).replace_nan(GSVector4::m_max));
GSVector4::store(&m_q, GSVector4::cast(q));
// hide behind a define for now to avoid spam in the above cases for users
#if defined(PCSX2_DEVBUILD) || defined(_DEBUG)
if (std::isnan(m_v.ST.S) || std::isnan(m_v.ST.T))
Console.Warning("S or T is nan");
#endif
}
void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r)
{
const GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
m_v.UV = (u32)GSVector4i::store(v.ps32(v));
}
void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r)
{
const GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff();
m_v.UV = (u32)GSVector4i::store(v.ps32(v));
m_isPackedUV_HackFlag = true;
}
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r)
{
GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
GSVector4i zf = GSVector4i::loadl(&r->U64[1]);
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
m_v.m[1] = xy.upl32(zf);
VertexKick<prim, auto_flush, index_swap>(adc ? 1 : r->XYZF2.Skip());
}
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r)
{
const GSVector4i xy = GSVector4i::loadl(&r->U64[0]);
const GSVector4i z = GSVector4i::loadl(&r->U64[1]);
const GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV));
VertexKick<prim, auto_flush, index_swap>(adc ? 1 : r->XYZ2.Skip());
}
void GSState::GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r)
{
m_v.FOG = r->FOG.F;
}
void GSState::GIFPackedRegHandlerA_D(const GIFPackedReg* RESTRICT r)
{
(this->*m_fpGIFRegHandlers[r->A_D.ADDR & 0x7F])(&r->r);
}
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
{
}
template <u32 prim, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, u32 size)
{
ASSERT(size > 0 && size % 3 == 0);
const GIFPackedReg* RESTRICT r_end = r + size;
while (r < r_end)
{
GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
GSVector4i q = GSVector4i::loadl(&r[0].U64[1]);
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
GSVector4i zf = GSVector4i::loadl(&r[2].U64[1]);
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::load((int)m_v.UV));
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
VertexKick<prim, auto_flush, index_swap>(r[2].XYZF2.Skip());
r += 3;
}
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
}
template <u32 prim, bool auto_flush, bool index_swap>
void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, u32 size)
{
ASSERT(size > 0 && size % 3 == 0);
const GIFPackedReg* RESTRICT r_end = r + size;
while (r < r_end)
{
GSVector4i st = GSVector4i::loadl(&r[0].U64[0]);
GSVector4i q = GSVector4i::loadl(&r[0].U64[1]);
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
q = q.blend8(GSVector4i::cast(GSVector4::m_one), q == GSVector4i::zero()); // see GIFPackedRegHandlerSTQ
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
GSVector4i xy = GSVector4i::loadl(&r[2].U64[0]);
GSVector4i z = GSVector4i::loadl(&r[2].U64[1]);
GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one
VertexKick<prim, auto_flush, index_swap>(r[2].XYZ2.Skip());
r += 3;
}
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
}
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, u32 size)
{
}
void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
{
}
__forceinline void GSState::ApplyPRIM(u32 prim)
{
if (GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists
{
u32 prim_mask = 0x7f8;
if (GSConfig.UseHardwareRenderer() && GSUtil::GetPrimClass(prim & 7) == GS_TRIANGLE_CLASS)
prim_mask &= ~0x80; // Mask out AA1.
if (m_env.PRMODECONT.AC == 1 && (m_env.PRIM.U32[0] ^ prim) & prim_mask) // all fields except PRIM
Flush();
}
else
{
Flush();
}
if (m_env.PRMODECONT.AC == 1)
{
m_env.PRIM.U32[0] = prim;
UpdateContext();
}
else
{
m_env.PRIM.PRIM = prim & 0x7;
}
UpdateVertexKick();
ASSERT(m_index.tail == 0 || !g_gs_device->Features().provoking_vertex_last || m_index.buff[m_index.tail - 1] + 1 == m_vertex.next);
if (m_index.tail == 0)
m_vertex.next = 0;
m_vertex.head = m_vertex.tail = m_vertex.next; // remove unused vertices from the end of the vertex buffer
}
void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
{
ALIGN_STACK(32);
ApplyPRIM(r->PRIM.U32[0]);
}
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
{
GSVector4i rgbaq = (GSVector4i)r->RGBAQ;
GSVector4i q = rgbaq.blend8(GSVector4i::cast(GSVector4::m_one), rgbaq == GSVector4i::zero()).yyyy(); // see GIFPackedRegHandlerSTQ
// Silent Hill output a nan in Q to emulate the flash light. Unfortunately it
// breaks GSVertexTrace code that rely on min/max.
q = GSVector4i::cast(GSVector4::cast(q).replace_nan(GSVector4::m_max));
m_v.RGBAQ = rgbaq.upl32(q);
}
void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r)
{
m_v.ST = (GSVector4i)r->ST;
#if defined(PCSX2_DEVBUILD) || defined(_DEBUG)
if (std::isnan(m_v.ST.S) || std::isnan(m_v.ST.T))
Console.Warning("S or T is nan");
#endif
}
void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r)
{
m_v.UV = r->UV.U32[0] & 0x3fff3fff;
}
void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r)
{
m_v.UV = r->UV.U32[0] & 0x3fff3fff;
m_isPackedUV_HackFlag = false;
}
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFRegHandlerXYZF2(const GIFReg* RESTRICT r)
{
GSVector4i xyzf = GSVector4i::loadl(&r->XYZF);
GSVector4i xyz = xyzf & (GSVector4i::xffffffff().upl32(GSVector4i::x00ffffff()));
GSVector4i uvf = GSVector4i::load((int)m_v.UV).upl32(xyzf.srl32(24).srl<4>());
m_v.m[1] = xyz.upl64(uvf);
VertexKick<prim, auto_flush, index_swap>(adc);
}
template <u32 prim, u32 adc, bool auto_flush, bool index_swap>
void GSState::GIFRegHandlerXYZ2(const GIFReg* RESTRICT r)
{
m_v.m[1] = GSVector4i::load(&r->XYZ, &m_v.UV);
VertexKick<prim, auto_flush, index_swap>(adc);
}
template <int i>
void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
{
// TODO: Paletted Formats
// 8-bit and 4 bit formats need to be addressed with a buffer width divisible 2.
// However, not doing so is possible and does have a behavior on the GS.
// When implementing such code care must be taken not to apply it unless it is
// used for a draw. Galaxy Angel will send TEX0 with a PSM of T8 and a TBW of 7
// only to immediately update it to CT32 with TEX2. The old code used to apply a
// correction on the TEX0 setting which caused the game to draw the CT32 texture
// with an incorrect buffer width.
//
// Bouken Jidai Katsugeki Goemon apparently uses a TBW of 1 but this game is currently
// extremely broken for the same reasons as MLB Power Pros in that it spams TEX0 with
// complete garbage making for a nice 1G heap of GSOffset.
GL_REG("Apply TEX0_%d = 0x%x_%x", i, TEX0.U32[1], TEX0.U32[0]);
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT);
// clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this)
constexpr u64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA
if (wt || PRIM->CTXT == i && ((TEX0.U64 ^ m_env.CTXT[i].TEX0.U64) & mask))
Flush();
TEX0.CPSM &= 0xa; // 1010b
if ((TEX0.U32[0] ^ m_env.CTXT[i].TEX0.U32[0]) & 0x3ffffff) // TBP0 TBW PSM
m_env.CTXT[i].offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
m_env.CTXT[i].TEX0 = (GSVector4i)TEX0;
if (wt)
{
GIFRegBITBLTBUF BITBLTBUF;
GSVector4i r;
if (TEX0.CSM == 0)
{
BITBLTBUF.SBP = TEX0.CBP;
BITBLTBUF.SBW = 1;
BITBLTBUF.SPSM = TEX0.CSM;
r.left = 0;
r.top = 0;
r.right = GSLocalMemory::m_psm[TEX0.CPSM].bs.x;
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].bs.y;
int blocks = 4;
if (GSLocalMemory::m_psm[TEX0.CPSM].bpp == 16)
blocks >>= 1;
if (GSLocalMemory::m_psm[TEX0.PSM].bpp == 4)
blocks >>= 1;
for (int j = 0; j < blocks; j++, BITBLTBUF.SBP++)
InvalidateLocalMem(BITBLTBUF, r, true);
}
else
{
BITBLTBUF.SBP = TEX0.CBP;
BITBLTBUF.SBW = m_env.TEXCLUT.CBW;
BITBLTBUF.SPSM = TEX0.CSM;
r.left = m_env.TEXCLUT.COU;
r.top = m_env.TEXCLUT.COV;
r.right = r.left + GSLocalMemory::m_psm[TEX0.CPSM].pal;
r.bottom = r.top + 1;
InvalidateLocalMem(BITBLTBUF, r, true);
}
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
}
}
template <int i>
void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
{
GL_REG("TEX0_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
GIFRegTEX0 TEX0 = r->TEX0;
const bool MTBAreload = ((tex_flushed && (i == m_env.PRIM.CTXT)) || (r->TEX0.TBP0 != m_env.CTXT[i].TEX0.TBP0)) ? true : false;
// Spec max is 10
//
// Yakuza (minimap)
// Sets TW/TH to 0
// Drawn using solid colors, the texture is really a 1x1 white texel,
// modulated by the vertex color. Cannot change the dimension because S/T are normalized.
//
// Tokyo Xtreme Racer Drift 2 (text)
// Sets TW/TH to 0
// there used to be a case to force this to 10
// but GetSizeFixedTEX0 sorts this now
TEX0.TW = std::clamp<u32>(TEX0.TW, 0, 10);
TEX0.TH = std::clamp<u32>(TEX0.TH, 0, 10);
ApplyTEX0<i>(TEX0);
// When the texture cache reloads any MIPS need to update too, so let's do that here.
// This is essentially triggered by the texture page change, so if TEXFLUSH is called and a draw doesn't proceed it, or TBP changes.
// Textures must be of equal width/height and a minimum of 32x32
if (MTBAreload && m_env.CTXT[i].TEX1.MTBA && m_env.CTXT[i].TEX0.TW == m_env.CTXT[i].TEX0.TH && m_env.CTXT[i].TEX0.TW >= 5)
{
// NOTE 1: TEX1.MXL must not be automatically set to 3 here.
// NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4)
// NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width)
u32 bp = m_env.CTXT[i].TEX0.TBP0;
u32 bw = m_env.CTXT[i]. TEX0.TBW;
u32 w = 1u << m_env.CTXT[i].TEX0.TW;
u32 h = 1u << m_env.CTXT[i].TEX0.TH;
u32 minwidth = m_env.CTXT[i].TEX1.MMIN >= 4 ? 8 : 1;
const u32 bpp = GSLocalMemory::m_psm[m_env.CTXT[i].TEX0.PSM].bpp;
bp += (int)((w * h * ((float)bpp / 8))) >> 8;
if (w > minwidth)
{
bw = std::max<u32>(bw >> 1, 1);
w = std::max<u32>(w >> 1, 1);
h = std::max<u32>(h >> 1, 1);
}
m_env.CTXT[i].MIPTBP1.TBP1 = bp;
m_env.CTXT[i].MIPTBP1.TBW1 = bw;
bp += (int)((w * h * ((float)bpp / 8))) >> 8;
if (w > minwidth)
{
bw = std::max<u32>(bw >> 1, 1);
w = std::max<u32>(w >> 1, 1);
h = std::max<u32>(h >> 1, 1);
}
m_env.CTXT[i].MIPTBP1.TBP2 = bp;
m_env.CTXT[i].MIPTBP1.TBW2 = bw;
bp += (int)((w * h * ((float)bpp / 8))) >> 8;
if (w > minwidth)
{
bw = std::max<u32>(bw >> 1, 1);
}
m_env.CTXT[i].MIPTBP1.TBP3 = bp;
m_env.CTXT[i].MIPTBP1.TBW3 = bw;
}
tex_flushed = false;
}
template <int i>
void GSState::GIFRegHandlerCLAMP(const GIFReg* RESTRICT r)
{
GL_REG("CLAMP_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->CLAMP != m_env.CTXT[i].CLAMP)
Flush();
m_env.CTXT[i].CLAMP = (GSVector4i)r->CLAMP;
}
void GSState::GIFRegHandlerFOG(const GIFReg* RESTRICT r)
{
m_v.FOG = r->FOG.F;
}
void GSState::GIFRegHandlerNOP(const GIFReg* RESTRICT r)
{
}
template <int i>
void GSState::GIFRegHandlerTEX1(const GIFReg* RESTRICT r)
{
GL_REG("TEX1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->TEX1 != m_env.CTXT[i].TEX1)
Flush();
m_env.CTXT[i].TEX1 = (GSVector4i)r->TEX1;
}
template <int i>
void GSState::GIFRegHandlerTEX2(const GIFReg* RESTRICT r)
{
GL_REG("TEX2_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
// TEX2 is a masked write to TEX0, for performing CLUT swaps (palette swaps).
// It only applies the following fields:
// CLD, CSA, CSM, CPSM, CBP, PSM.
// It ignores these fields (uses existing values in the context):
// TFX, TCC, TH, TW, TBW, and TBP0
constexpr u64 mask = 0xFFFFFFE003F00000ull; // TEX2 bits
GIFRegTEX0 TEX0;
TEX0.U64 = (m_env.CTXT[i].TEX0.U64 & ~mask) | (r->U64 & mask);
ApplyTEX0<i>(TEX0);
}
template <int i>
void GSState::GIFRegHandlerXYOFFSET(const GIFReg* RESTRICT r)
{
GL_REG("XYOFFSET_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
const GSVector4i o = (GSVector4i)r->XYOFFSET & GSVector4i::x0000ffff();
if (!o.eq(m_env.CTXT[i].XYOFFSET))
Flush();
m_env.CTXT[i].XYOFFSET = o;
m_env.CTXT[i].UpdateScissor();
UpdateScissor();
}
void GSState::GIFRegHandlerPRMODECONT(const GIFReg* RESTRICT r)
{
GL_REG("PRMODECONT = 0x%x_%x", r->U32[1], r->U32[0]);
m_env.PRMODECONT.AC = r->PRMODECONT.AC;
}
void GSState::GIFRegHandlerPRMODE(const GIFReg* RESTRICT r)
{
GL_REG("PRMODE = 0x%x_%x", r->U32[1], r->U32[0]);
if (!m_env.PRMODECONT.AC)
{
u32 prim_mask = 0x7f8;
if (GSConfig.UseHardwareRenderer() && GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GS_TRIANGLE_CLASS)
prim_mask &= ~0x80; // Mask out AA1.
if ((m_env.PRIM.U32[0] ^ r->PRMODE.U32[0]) & prim_mask)
Flush();
}
else
{
return;
}
const u32 _PRIM = m_env.PRIM.PRIM;
m_env.PRIM = (GSVector4i)r->PRMODE;
m_env.PRIM.PRIM = _PRIM;
UpdateContext();
}
void GSState::GIFRegHandlerTEXCLUT(const GIFReg* RESTRICT r)
{
GL_REG("TEXCLUT = 0x%x_%x", r->U32[1], r->U32[0]);
if (r->TEXCLUT != m_env.TEXCLUT)
Flush();
m_env.TEXCLUT = (GSVector4i)r->TEXCLUT;
}
void GSState::GIFRegHandlerSCANMSK(const GIFReg* RESTRICT r)
{
if (r->SCANMSK != m_env.SCANMSK)
Flush();
m_env.SCANMSK = (GSVector4i)r->SCANMSK;
if (m_env.SCANMSK.MSK & 2)
m_scanmask_used = true;
}
template <int i>
void GSState::GIFRegHandlerMIPTBP1(const GIFReg* RESTRICT r)
{
GL_REG("MIPTBP1_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->MIPTBP1 != m_env.CTXT[i].MIPTBP1)
Flush();
m_env.CTXT[i].MIPTBP1 = (GSVector4i)r->MIPTBP1;
}
template <int i>
void GSState::GIFRegHandlerMIPTBP2(const GIFReg* RESTRICT r)
{
GL_REG("MIPTBP2_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->MIPTBP2 != m_env.CTXT[i].MIPTBP2)
Flush();
m_env.CTXT[i].MIPTBP2 = (GSVector4i)r->MIPTBP2;
}
void GSState::GIFRegHandlerTEXA(const GIFReg* RESTRICT r)
{
GL_REG("TEXA = 0x%x_%x", r->U32[1], r->U32[0]);
if (r->TEXA != m_env.TEXA)
Flush();
m_env.TEXA = (GSVector4i)r->TEXA;
}
void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
{
GL_REG("FOGCOL = 0x%x_%x", r->U32[1], r->U32[0]);
if (r->FOGCOL != m_env.FOGCOL)
Flush();
m_env.FOGCOL = (GSVector4i)r->FOGCOL;
}
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
{
GL_REG("TEXFLUSH = 0x%x_%x PRIM TME %x", r->U32[1], r->U32[0], PRIM->TME);
tex_flushed = true;
// Some games do a single sprite draw to itself, then flush the texture cache, then use that texture again.
// This won't get picked up by the new autoflush logic (which checks for page crossings for the PS2 Texture Cache flush)
// so we need to do it here.
if(m_userhacks_auto_flush)
Flush();
}
template <int i>
void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
{
if (PRIM->CTXT == i && r->SCISSOR != m_env.CTXT[i].SCISSOR)
Flush();
m_env.CTXT[i].SCISSOR = (GSVector4i)r->SCISSOR;
m_env.CTXT[i].UpdateScissor();
UpdateScissor();
}
template <int i>
void GSState::GIFRegHandlerALPHA(const GIFReg* RESTRICT r)
{
GL_REG("ALPHA = 0x%x_%x", r->U32[1], r->U32[0]);
if (PRIM->CTXT == i && r->ALPHA != m_env.CTXT[i].ALPHA)
Flush();
m_env.CTXT[i].ALPHA = (GSVector4i)r->ALPHA;
// value of 3 is not allowed by the spec
// acts like 2 on real hw, so just clamp it
m_env.CTXT[i].ALPHA.A = std::clamp<u32>(r->ALPHA.A, 0, 2);
m_env.CTXT[i].ALPHA.B = std::clamp<u32>(r->ALPHA.B, 0, 2);
m_env.CTXT[i].ALPHA.C = std::clamp<u32>(r->ALPHA.C, 0, 2);
m_env.CTXT[i].ALPHA.D = std::clamp<u32>(r->ALPHA.D, 0, 2);
}
void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r)
{
bool update = false;
if (r->DIMX != m_env.DIMX)
{
Flush();
update = true;
}
m_env.DIMX = (GSVector4i)r->DIMX;
if (update)
m_env.UpdateDIMX();
}
void GSState::GIFRegHandlerDTHE(const GIFReg* RESTRICT r)
{
if (r->DTHE != m_env.DTHE)
Flush();
m_env.DTHE = (GSVector4i)r->DTHE;
}
void GSState::GIFRegHandlerCOLCLAMP(const GIFReg* RESTRICT r)
{
if (r->COLCLAMP != m_env.COLCLAMP)
Flush();
m_env.COLCLAMP = (GSVector4i)r->COLCLAMP;
}
template <int i>
void GSState::GIFRegHandlerTEST(const GIFReg* RESTRICT r)
{
if (PRIM->CTXT == i && r->TEST != m_env.CTXT[i].TEST)
Flush();
m_env.CTXT[i].TEST = (GSVector4i)r->TEST;
}
void GSState::GIFRegHandlerPABE(const GIFReg* RESTRICT r)
{
if (r->PABE != m_env.PABE)
Flush();
m_env.PABE = (GSVector4i)r->PABE;
}
template <int i>
void GSState::GIFRegHandlerFBA(const GIFReg* RESTRICT r)
{
if (PRIM->CTXT == i && r->FBA != m_env.CTXT[i].FBA)
Flush();
m_env.CTXT[i].FBA = (GSVector4i)r->FBA;
}
template <int i>
void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
{
GL_REG("FRAME_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
GIFRegFRAME NewFrame = r->FRAME;
// FBW is clamped between 1 and 32, however this is wrong, FBW of 0 *should* work and does on Dobiestation
// However there is some issues so even software mode is incorrect on PCSX2, but this works better..
NewFrame.FBW = std::clamp(NewFrame.FBW, 1U, 32U);
if (PRIM->CTXT == i && NewFrame != m_env.CTXT[i].FRAME)
Flush();
if ((NewFrame.PSM & 0x30) == 0x30)
m_env.CTXT[i].ZBUF.PSM &= ~0x30;
else
m_env.CTXT[i].ZBUF.PSM |= 0x30;
if ((m_env.CTXT[i].FRAME.U32[0] ^ NewFrame.U32[0]) & 0x3f3f01ff) // FBP FBW PSM
{
m_env.CTXT[i].offset.fb = m_mem.GetOffset(NewFrame.Block(), NewFrame.FBW, NewFrame.PSM);
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), NewFrame.FBW, m_env.CTXT[i].ZBUF.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(NewFrame, m_env.CTXT[i].ZBUF);
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(NewFrame, m_env.CTXT[i].ZBUF);
}
m_env.CTXT[i].FRAME = (GSVector4i)NewFrame;
switch (m_env.CTXT[i].FRAME.PSM)
{
case PSM_PSMT8H:
// Berserk uses the format to only update the alpha channel
GL_INS("CORRECT FRAME FORMAT replaces PSM_PSMT8H by PSM_PSMCT32/0x00FF_FFFF");
m_env.CTXT[i].FRAME.PSM = PSM_PSMCT32;
m_env.CTXT[i].FRAME.FBMSK = 0x00FFFFFF;
break;
case PSM_PSMT4HH: // Not tested. Based on PSM_PSMT8H behavior
GL_INS("CORRECT FRAME FORMAT replaces PSM_PSMT4HH by PSM_PSMCT32/0x0FFF_FFFF");
m_env.CTXT[i].FRAME.PSM = PSM_PSMCT32;
m_env.CTXT[i].FRAME.FBMSK = 0x0FFFFFFF;
break;
case PSM_PSMT4HL: // Not tested. Based on PSM_PSMT8H behavior
GL_INS("CORRECT FRAME FORMAT replaces PSM_PSMT4HL by PSM_PSMCT32/0xF0FF_FFFF");
m_env.CTXT[i].FRAME.PSM = PSM_PSMCT32;
m_env.CTXT[i].FRAME.FBMSK = 0xF0FFFFFF;
break;
default:
break;
}
}
template <int i>
void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
{
GL_REG("ZBUF_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);
GIFRegZBUF ZBUF = r->ZBUF;
// We tested this on the PS2 and it seems to be that when the FRAME is a Z format,
// the Z buffer is forced to use color swizzling.
// Powerdrome relies on this behavior to clear the z buffer by drawing 32 pixel wide strips, skipping 32,
// causing the FRAME to do one strip and the Z to do the other 32 due to the block arrangement.
// Other games listed here also hit this Color/Z swap behaviour without masking Z so could be problematic:
// Black, Driver Parallel Lines, Driv3r, Dropship, DT Racer, Scarface, The Simpsons, THP8
if ((m_env.CTXT[i].FRAME.PSM & 0x30) == 0x30)
ZBUF.PSM &= ~0x30;
else
ZBUF.PSM |= 0x30;
if (PRIM->CTXT == i && ZBUF != m_env.CTXT[i].ZBUF)
Flush();
if ((m_env.CTXT[i].ZBUF.U32[0] ^ ZBUF.U32[0]) & 0x3f0001ff) // ZBP PSM
{
m_env.CTXT[i].offset.zb = m_mem.GetOffset(ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, ZBUF.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, ZBUF);
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, ZBUF);
}
m_env.CTXT[i].ZBUF = (GSVector4i)ZBUF;
}
void GSState::GIFRegHandlerBITBLTBUF(const GIFReg* RESTRICT r)
{
// TODO: Paletted formats
// There is a memory bug on the GS as it relates to the transfering of
// 8-bit and 4-bit formats needing an even buffer width due to the
// second half of the page being addressed by TBW/2
//
// namcoXcapcom: Apparently uses DBW of 5 and 11 (and refers to them
// in TEX0 later as 4 and 10 respectively). However I can find no
// documentation on this problem, nothing in the game to suggest
// it is broken and the code here for it was likely incorrect to begin with.
GL_REG("BITBLTBUF = 0x%x_%x", r->U32[1], r->U32[0]);
if (r->BITBLTBUF != m_env.BITBLTBUF)
FlushWrite();
m_env.BITBLTBUF = (GSVector4i)r->BITBLTBUF;
}
void GSState::GIFRegHandlerTRXPOS(const GIFReg* RESTRICT r)
{
GL_REG("TRXPOS = 0x%x_%x", r->U32[1], r->U32[0]);
if (r->TRXPOS != m_env.TRXPOS)
FlushWrite();
m_env.TRXPOS = (GSVector4i)r->TRXPOS;
}
void GSState::GIFRegHandlerTRXREG(const GIFReg* RESTRICT r)
{
GL_REG("TRXREG = 0x%x_%x", r->U32[1], r->U32[0]);
if (r->TRXREG != m_env.TRXREG)
FlushWrite();
m_env.TRXREG = (GSVector4i)r->TRXREG;
}
void GSState::GIFRegHandlerTRXDIR(const GIFReg* RESTRICT r)
{
GL_REG("TRXDIR = 0x%x_%x", r->U32[1], r->U32[0]);
Flush();
m_env.TRXDIR = (GSVector4i)r->TRXDIR;
switch (m_env.TRXDIR.XDIR)
{
case 0: // host -> local
m_tr.Init(m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, m_env.BITBLTBUF);
break;
case 1: // local -> host
m_tr.Init(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY, m_env.BITBLTBUF);
break;
case 2: // local -> local
Move();
break;
default: // 3 prohibited, behavior unknown
Console.Warning("Invalid guest transfer direction. Please report: https://github.com/PCSX2/pcsx2/issues");
break;
}
}
void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r)
{
GL_REG("HWREG = 0x%x_%x", r->U32[1], r->U32[0]);
// don't bother if not host -> local
// real hw ignores
if (m_env.TRXDIR.XDIR != 0)
return;
Write(reinterpret_cast<const u8*>(r), 8); // haunting ground
}
void GSState::Flush()
{
FlushWrite();
FlushPrim();
}
void GSState::FlushWrite()
{
const int len = m_tr.end - m_tr.start;
if (len <= 0)
return;
GSVector4i r;
r.left = m_env.TRXPOS.DSAX;
r.top = m_env.TRXPOS.DSAY;
r.right = r.left + m_env.TRXREG.RRW;
r.bottom = r.top + m_env.TRXREG.RRH;
InvalidateVideoMem(m_env.BITBLTBUF, r);
const GSLocalMemory::writeImage wi = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wi;
(m_mem.*wi)(m_tr.x, m_tr.y, &m_tr.buff[m_tr.start], len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
m_tr.start += len;
g_perfmon.Put(GSPerfMon::Swizzle, len);
}
void GSState::FlushPrim()
{
if (m_index.tail > 0)
{
GL_REG("FlushPrim ctxt %d", PRIM->CTXT);
// internal frame rate detection based on sprite blits to the display framebuffer
{
const u32 FRAME_FBP = m_context->FRAME.FBP;
if ((m_regs->DISP[0].DISPFB.FBP == FRAME_FBP && m_regs->PMODE.EN1) ||
(m_regs->DISP[1].DISPFB.FBP == FRAME_FBP && m_regs->PMODE.EN2))
{
g_perfmon.AddDisplayFramebufferSpriteBlit();
}
}
GSVertex buff[2];
s_n++;
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
size_t unused = 0;
if (tail > head)
{
switch (PRIM->PRIM)
{
case GS_POINTLIST:
ASSERT(0);
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
unused = tail - head;
memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused);
break;
case GS_TRIANGLEFAN:
buff[0] = m_vertex.buff[head];
unused = 1;
if (tail - 1 > head)
{
buff[1] = m_vertex.buff[tail - 1];
unused = 2;
}
break;
case GS_INVALID:
break;
default:
__assume(0);
}
ASSERT((int)unused < GSUtil::GetVertexCount(PRIM->PRIM));
}
// If the PSM format of Z is invalid, but it is masked (no write) and ZTST is set to ALWAYS pass (no test, just allow)
// we can ignore the Z format, since it won't be used in the draw (Star Ocean 3 transitions)
const bool ignoreZ = m_context->ZBUF.ZMSK && m_context->TEST.ZTST == 1;
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt >= 3 || (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt >= 3 && !ignoreZ))
{
Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM);
}
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
m_context->SaveReg();
try
{
Draw();
}
catch (GSRecoverableError&)
{
// could be an unsupported draw call
}
catch (const std::bad_alloc&)
{
// Texture Out Of Memory
PurgePool();
Console.Error("GS: Memory allocation failure.");
}
m_context->RestoreReg();
g_perfmon.Put(GSPerfMon::Draw, 1);
g_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
m_index.tail = 0;
m_vertex.head = 0;
if (unused > 0)
{
memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused);
m_vertex.tail = unused;
m_vertex.next = next > head ? next - head : 0;
}
else
{
m_vertex.tail = 0;
m_vertex.next = 0;
}
}
}
void GSState::Write(const u8* mem, int len)
{
int w = m_env.TRXREG.RRW;
int h = m_env.TRXREG.RRH;
GIFRegBITBLTBUF& blit = m_tr.m_blit;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[blit.DPSM];
// The game uses a resolution of 512x244. RT is located at 0x700 and depth at 0x0
//
// #Bug number 1. (bad top bar)
// The game saves the depth buffer in the EE but with a resolution of
// 512x255. So it is ending to 0x7F8, ouch it saves the top of the RT too.
//
// #Bug number 2. (darker screen)
// The game will restore the previously saved buffer at position 0x0 to
// 0x7F8. Because of the extra RT pixels, GS will partialy invalidate
// the texture located at 0x700. Next access will generate a cache miss
//
// The no-solution: instead to handle garbage (aka RT) at the end of the
// depth buffer. Let's reduce the size of the transfer
if (m_game.title == CRC::SMTNocturne) // TODO: hack
{
if (blit.DBP == 0 && blit.DPSM == PSM_PSMZ32 && w == 512 && h > 224)
{
h = 224;
m_env.TRXREG.RRH = 224;
}
}
if (!m_tr.Update(w, h, psm.trbpp, len))
return;
GL_CACHE("Write! ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)",
blit.DBP, blit.DBW, psm_str(blit.DPSM),
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h);
if (PRIM->TME && (blit.DBP == m_context->TEX0.TBP0 || blit.DBP == m_context->TEX0.CBP)) // TODO: hmmmm
FlushPrim();
if (m_tr.end == 0 && len >= m_tr.total)
{
// received all data in one piece, no need to buffer it
GSVector4i r;
r.left = m_env.TRXPOS.DSAX;
r.top = m_env.TRXPOS.DSAY;
r.right = r.left + m_env.TRXREG.RRW;
r.bottom = r.top + m_env.TRXREG.RRH;
InvalidateVideoMem(blit, r);
(m_mem.*psm.wi)(m_tr.x, m_tr.y, mem, m_tr.total, blit, m_env.TRXPOS, m_env.TRXREG);
m_tr.start = m_tr.end = m_tr.total;
g_perfmon.Put(GSPerfMon::Swizzle, len);
}
else
{
memcpy(&m_tr.buff[m_tr.end], mem, len);
m_tr.end += len;
if (m_tr.end >= m_tr.total)
FlushWrite();
}
m_mem.m_clut.Invalidate();
}
void GSState::InitReadFIFO(u8* mem, int len)
{
if (len <= 0)
return;
const int sx = m_env.TRXPOS.SSAX;
const int sy = m_env.TRXPOS.SSAY;
const int w = m_env.TRXREG.RRW;
const int h = m_env.TRXREG.RRH;
const u16 bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp;
if (!m_tr.Update(w, h, bpp, len))
return;
if (m_tr.x == sx && m_tr.y == sy)
InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h));
}
// NOTE: called from outside MTGS
void GSState::Read(u8* mem, int len)
{
if (len <= 0)
return;
const int sx = m_env.TRXPOS.SSAX;
const int sy = m_env.TRXPOS.SSAY;
const int w = m_env.TRXREG.RRW;
const int h = m_env.TRXREG.RRH;
const GSVector4i r(sx, sy, sx + w, sy + h);
const u16 bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp;
if (!m_tr.Update(w, h, bpp, len))
return;
m_mem.ReadImageX(m_tr.x, m_tr.y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
if (s_dump && s_save && s_n >= s_saven)
{
std::string s = m_dump_root + format(
"%05d_read_%05x_%d_%d_%d_%d_%d_%d.bmp",
s_n, (int)m_env.BITBLTBUF.SBP, (int)m_env.BITBLTBUF.SBW, (int)m_env.BITBLTBUF.SPSM,
r.left, r.top, r.right, r.bottom);
m_mem.SaveBMP(s, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, r.right, r.bottom);
}
}
void GSState::Move()
{
// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
// guitar hero copies the far end of the board to do a similar blend too
int sx = m_env.TRXPOS.SSAX;
int sy = m_env.TRXPOS.SSAY;
int dx = m_env.TRXPOS.DSAX;
int dy = m_env.TRXPOS.DSAY;
const int w = m_env.TRXREG.RRW;
const int h = m_env.TRXREG.RRH;
GL_CACHE("Move! 0x%x W:%d F:%s => 0x%x W:%d F:%s (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d)",
m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, psm_str(m_env.BITBLTBUF.SPSM),
m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, psm_str(m_env.BITBLTBUF.DPSM),
m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY,
sx, sy, dx, dy, w, h);
InvalidateLocalMem(m_env.BITBLTBUF, GSVector4i(sx, sy, sx + w, sy + h));
InvalidateVideoMem(m_env.BITBLTBUF, GSVector4i(dx, dy, dx + w, dy + h));
int xinc = 1;
int yinc = 1;
if (m_env.TRXPOS.DIRX)
{
sx += w - 1;
dx += w - 1;
xinc = -1;
}
if (m_env.TRXPOS.DIRY)
{
sy += h - 1;
dy += h - 1;
yinc = -1;
}
const GSLocalMemory::psm_t& spsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM];
const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM];
// TODO: unroll inner loops (width has special size requirement, must be multiples of 1 << n, depending on the format)
int sbp = m_env.BITBLTBUF.SBP;
int sbw = m_env.BITBLTBUF.SBW;
int dbp = m_env.BITBLTBUF.DBP;
int dbw = m_env.BITBLTBUF.DBW;
GSOffset spo = m_mem.GetOffset(sbp, sbw, m_env.BITBLTBUF.SPSM);
GSOffset dpo = m_mem.GetOffset(dbp, dbw, m_env.BITBLTBUF.DPSM);
auto genericCopy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& getPAHelper, auto&& pxCopyFn)
{
int _sy = sy, _dy = dy; // Faster with local copied variables, compiler optimizations are dumb
if (xinc > 0)
{
for (int y = 0; y < h; y++, _sy += yinc, _dy += yinc)
{
auto s = getPAHelper(spo, sx, _sy);
auto d = getPAHelper(dpo, dx, _dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d, s, x);
}
}
}
else
{
for (int y = 0; y < h; y++, _sy += yinc, _dy += yinc)
{
auto s = getPAHelper(spo, sx, _sy);
auto d = getPAHelper(dpo, dx, _dy);
for (int x = 0; x < w; x++)
{
pxCopyFn(d, s, -x);
}
}
}
};
auto copy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
{
genericCopy(dpo, spo,
[](const GSOffset& o, int x, int y) { return o.paMulti(x, y); },
[=](const GSOffset::PAHelper& d, const GSOffset::PAHelper& s, int x)
{
return pxCopyFn(d.value(x), s.value(x));
});
};
auto copyFast = [=](auto* vm, const GSOffset& dpo, const GSOffset& spo, auto&& pxCopyFn)
{
genericCopy(dpo, spo,
[=](const GSOffset& o, int x, int y) { return o.paMulti(vm, x, y); },
[=](const auto& d, const auto& s, int x)
{
return pxCopyFn(d.value(x), s.value(x));
});
};
if (spsm.trbpp == dpsm.trbpp && spsm.trbpp >= 16)
{
if (spsm.trbpp == 32)
{
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](u32* d, u32* s)
{
*d = *s;
});
}
else if (spsm.trbpp == 24)
{
copyFast(m_mem.m_vm32, dpo.assertSizesMatch(GSLocalMemory::swizzle32), spo.assertSizesMatch(GSLocalMemory::swizzle32), [](u32* d, u32* s)
{
*d = (*d & 0xff000000) | (*s & 0x00ffffff);
});
}
else // if(spsm.trbpp == 16)
{
copyFast(m_mem.m_vm16, dpo.assertSizesMatch(GSLocalMemory::swizzle16), spo.assertSizesMatch(GSLocalMemory::swizzle16), [](u16* d, u16* s)
{
*d = *s;
});
}
}
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT8 && m_env.BITBLTBUF.DPSM == PSM_PSMT8)
{
copyFast(m_mem.m_vm8, GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT8), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT8), [](u8* d, u8* s)
{
*d = *s;
});
}
else if (m_env.BITBLTBUF.SPSM == PSM_PSMT4 && m_env.BITBLTBUF.DPSM == PSM_PSMT4)
{
copy(GSOffset::fromKnownPSM(dbp, dbw, PSM_PSMT4), GSOffset::fromKnownPSM(sbp, sbw, PSM_PSMT4), [&](u32 doff, u32 soff)
{
m_mem.WritePixel4(doff, m_mem.ReadPixel4(soff));
});
}
else
{
copy(dpo, spo, [&](u32 doff, u32 soff)
{
(m_mem.*dpsm.wpa)(doff, (m_mem.*spsm.rpa)(soff));
});
}
}
void GSState::SoftReset(u32 mask)
{
if (mask & 1)
{
memset(&m_path[0], 0, sizeof(GIFPath));
memset(&m_path[3], 0, sizeof(GIFPath));
}
if (mask & 2)
memset(&m_path[1], 0, sizeof(GIFPath));
if (mask & 4)
memset(&m_path[2], 0, sizeof(GIFPath));
m_env.TRXDIR.XDIR = 3; //-1 ; set it to invalid value
m_q = 1.0f;
}
void GSState::ReadFIFO(u8* mem, int size)
{
GSPerfMonAutoTimer pmat(&g_perfmon);
Flush();
size *= 16;
Read(mem, size);
if (m_dump)
m_dump->ReadFIFO(size);
}
template void GSState::Transfer<0>(const u8* mem, u32 size);
template void GSState::Transfer<1>(const u8* mem, u32 size);
template void GSState::Transfer<2>(const u8* mem, u32 size);
template void GSState::Transfer<3>(const u8* mem, u32 size);
template <int index>
void GSState::Transfer(const u8* mem, u32 size)
{
GSPerfMonAutoTimer pmat(&g_perfmon);
const u8* start = mem;
GIFPath& path = m_path[index];
while (size > 0)
{
if (path.nloop == 0)
{
path.SetTag(mem);
mem += sizeof(GIFTag);
size--;
// eeuser 7.2.2. GIFtag:
// "... when NLOOP is 0, the GIF does not output anything, and values other than the EOP field are disregarded."
if (path.nloop > 0)
{
m_q = 1.0f;
// ASSERT(!(path.tag.PRE && path.tag.FLG == GIF_FLG_REGLIST)); // kingdom hearts
if (path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED)
ApplyPRIM(path.tag.PRIM);
}
}
else
{
u32 total;
switch (path.tag.FLG)
{
case GIF_FLG_PACKED:
// get to the start of the loop
if (path.reg != 0)
{
do
{
(this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem);
mem += sizeof(GIFPackedReg);
size--;
} while (path.StepReg() && size > 0 && path.reg != 0);
}
// all data available? usually is
total = path.nloop * path.nreg;
if (size >= total)
{
size -= total;
switch (path.type)
{
case GIFPath::TYPE_UNKNOWN:
{
u32 reg = 0;
do
{
(this->*m_fpGIFPackedRegHandlers[path.GetReg(reg++)])((GIFPackedReg*)mem);
mem += sizeof(GIFPackedReg);
reg = reg & ((int)(reg - path.nreg) >> 31); // resets reg back to 0 when it becomes equal to path.nreg
} while (--total > 0);
}
break;
case GIFPath::TYPE_ADONLY: // very common
do
{
(this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR & 0x7F])(&((GIFPackedReg*)mem)->r);
mem += sizeof(GIFPackedReg);
} while (--total > 0);
break;
case GIFPath::TYPE_STQRGBAXYZF2: // majority of the vertices are formatted like this
(this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2])((GIFPackedReg*)mem, total);
mem += total * sizeof(GIFPackedReg);
break;
case GIFPath::TYPE_STQRGBAXYZ2:
(this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2])((GIFPackedReg*)mem, total);
mem += total * sizeof(GIFPackedReg);
break;
default:
__assume(0);
}
path.nloop = 0;
}
else
{
do
{
(this->*m_fpGIFPackedRegHandlers[path.GetReg()])((GIFPackedReg*)mem);
mem += sizeof(GIFPackedReg);
size--;
} while (path.StepReg() && size > 0);
}
break;
case GIF_FLG_REGLIST:
// TODO: do it similar to packed operation
size *= 2;
do
{
(this->*m_fpGIFRegHandlers[path.GetReg() & 0x7F])((GIFReg*)mem);
mem += sizeof(GIFReg);
size--;
} while (path.StepReg() && size > 0);
if (size & 1)
mem += sizeof(GIFReg);
size /= 2;
break;
case GIF_FLG_IMAGE2:
// hmmm
// Fall through here fixes a crash in Wallace and Gromit Project Zoo
// and according to Pseudonym we shouldn't even land in this code. So hmm indeed. (rama)
case GIF_FLG_IMAGE:
{
int len = (int)std::min(size, path.nloop);
switch (m_env.TRXDIR.XDIR)
{
case 0:
Write(mem, len * 16);
break;
case 2:
Move();
break;
default: // 1 and 3
// 1 is invalid because downloads can only be done
// with a reverse fifo operation (vif)
// 3 is spec prohibited, it's behavior is not known
// lets do nothing for now
break;
}
mem += len * 16;
path.nloop -= len;
size -= len;
break;
}
default:
__assume(0);
}
}
if (index == 0)
{
if (path.tag.EOP && path.nloop == 0)
break;
}
}
if (m_dump && mem > start)
m_dump->Transfer(index, start, mem - start);
if (index == 0)
{
if (size == 0 && path.nloop > 0)
{
// Hackfix for BIOS, which sends an incomplete packet when it does an XGKICK without
// having an EOP specified anywhere in VU1 memory. Needed until PCSX2 is fixed to
// handle it more properly (ie, without looping infinitely).
path.nloop = 0;
}
}
}
template <class T>
static void WriteState(u8*& dst, T* src, size_t len = sizeof(T))
{
memcpy(dst, src, len);
dst += len;
}
template <class T>
static void ReadState(T* dst, u8*& src, size_t len = sizeof(T))
{
memcpy(dst, src, len);
src += len;
}
int GSState::Freeze(freezeData* fd, bool sizeonly)
{
if (sizeonly)
{
fd->size = m_sssize;
return 0;
}
if (!fd->data || fd->size < m_sssize)
return -1;
Flush();
u8* data = fd->data;
WriteState(data, &m_version);
WriteState(data, &m_env.PRIM);
WriteState(data, &m_env.PRMODECONT);
WriteState(data, &m_env.TEXCLUT);
WriteState(data, &m_env.SCANMSK);
WriteState(data, &m_env.TEXA);
WriteState(data, &m_env.FOGCOL);
WriteState(data, &m_env.DIMX);
WriteState(data, &m_env.DTHE);
WriteState(data, &m_env.COLCLAMP);
WriteState(data, &m_env.PABE);
WriteState(data, &m_env.BITBLTBUF);
WriteState(data, &m_env.TRXDIR);
WriteState(data, &m_env.TRXPOS);
WriteState(data, &m_env.TRXREG);
WriteState(data, &m_env.TRXREG); // obsolete
for (int i = 0; i < 2; i++)
{
WriteState(data, &m_env.CTXT[i].XYOFFSET);
WriteState(data, &m_env.CTXT[i].TEX0);
WriteState(data, &m_env.CTXT[i].TEX1);
WriteState(data, &m_env.CTXT[i].CLAMP);
WriteState(data, &m_env.CTXT[i].MIPTBP1);
WriteState(data, &m_env.CTXT[i].MIPTBP2);
WriteState(data, &m_env.CTXT[i].SCISSOR);
WriteState(data, &m_env.CTXT[i].ALPHA);
WriteState(data, &m_env.CTXT[i].TEST);
WriteState(data, &m_env.CTXT[i].FBA);
WriteState(data, &m_env.CTXT[i].FRAME);
WriteState(data, &m_env.CTXT[i].ZBUF);
}
WriteState(data, &m_v.RGBAQ);
WriteState(data, &m_v.ST);
WriteState(data, &m_v.UV);
WriteState(data, &m_v.FOG);
WriteState(data, &m_v.XYZ);
data += sizeof(GIFReg); // obsolite
WriteState(data, &m_tr.x);
WriteState(data, &m_tr.y);
WriteState(data, m_mem.m_vm8, m_mem.m_vmsize);
for (GIFPath& path : m_path)
{
path.tag.NREG = path.nreg;
path.tag.NLOOP = path.nloop;
path.tag.REGS = 0;
for (size_t j = 0; j < std::size(path.regs.U8); j++)
{
path.tag.U32[2 + (j >> 3)] |= path.regs.U8[j] << ((j & 7) << 2);
}
WriteState(data, &path.tag);
WriteState(data, &path.reg);
}
WriteState(data, &m_q);
return 0;
}
int GSState::Defrost(const freezeData* fd)
{
if (!fd || !fd->data || fd->size == 0)
return -1;
if (fd->size < m_sssize)
return -1;
u8* data = fd->data;
int version;
ReadState(&version, data);
if (version > m_version)
{
Console.Error("GS: Savestate version is incompatible. Load aborted.");
return -1;
}
Flush();
Reset();
ReadState(&m_env.PRIM, data);
if (version <= 6)
data += sizeof(GIFRegPRMODE);
ReadState(&m_env.PRMODECONT, data);
ReadState(&m_env.TEXCLUT, data);
ReadState(&m_env.SCANMSK, data);
ReadState(&m_env.TEXA, data);
ReadState(&m_env.FOGCOL, data);
ReadState(&m_env.DIMX, data);
ReadState(&m_env.DTHE, data);
ReadState(&m_env.COLCLAMP, data);
ReadState(&m_env.PABE, data);
ReadState(&m_env.BITBLTBUF, data);
ReadState(&m_env.TRXDIR, data);
ReadState(&m_env.TRXPOS, data);
ReadState(&m_env.TRXREG, data);
ReadState(&m_env.TRXREG, data); // obsolete
// Technically this value ought to be saved like m_tr.x/y (break
// compatibility) but so far only a single game (Motocross Mania) really
// depends on this value (i.e != BITBLTBUF) Savestates are likely done at
// VSYNC, so not in the middle of a texture transfer, therefore register
// will be set again properly
m_tr.m_blit = m_env.BITBLTBUF;
for (int i = 0; i < 2; i++)
{
ReadState(&m_env.CTXT[i].XYOFFSET, data);
ReadState(&m_env.CTXT[i].TEX0, data);
ReadState(&m_env.CTXT[i].TEX1, data);
if (version <= 6)
data += sizeof(GIFRegTEX2);
ReadState(&m_env.CTXT[i].CLAMP, data);
ReadState(&m_env.CTXT[i].MIPTBP1, data);
ReadState(&m_env.CTXT[i].MIPTBP2, data);
ReadState(&m_env.CTXT[i].SCISSOR, data);
ReadState(&m_env.CTXT[i].ALPHA, data);
ReadState(&m_env.CTXT[i].TEST, data);
ReadState(&m_env.CTXT[i].FBA, data);
ReadState(&m_env.CTXT[i].FRAME, data);
ReadState(&m_env.CTXT[i].ZBUF, data);
m_env.CTXT[i].XYOFFSET.OFX &= 0xffff;
m_env.CTXT[i].XYOFFSET.OFY &= 0xffff;
if (version <= 4)
data += sizeof(u32) * 7; // skip
}
ReadState(&m_v.RGBAQ, data);
ReadState(&m_v.ST, data);
ReadState(&m_v.UV, data);
ReadState(&m_v.FOG, data);
ReadState(&m_v.XYZ, data);
data += sizeof(GIFReg); // obsolite
ReadState(&m_tr.x, data);
ReadState(&m_tr.y, data);
ReadState(m_mem.m_vm8, data, m_mem.m_vmsize);
m_tr.total = 0; // TODO: restore transfer state
for (GIFPath& path : m_path)
{
ReadState(&path.tag, data);
ReadState(&path.reg, data);
path.SetTag(&path.tag); // expand regs
}
ReadState(&m_q, data);
PRIM = &m_env.PRIM;
UpdateContext();
UpdateVertexKick();
m_env.UpdateDIMX();
for (size_t i = 0; i < 2; i++)
{
m_env.CTXT[i].UpdateScissor();
m_env.CTXT[i].offset.fb = m_mem.GetOffset(m_env.CTXT[i].FRAME.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].FRAME.PSM);
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
m_env.CTXT[i].offset.tex = m_mem.GetOffset(m_env.CTXT[i].TEX0.TBP0, m_env.CTXT[i].TEX0.TBW, m_env.CTXT[i].TEX0.PSM);
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
}
UpdateScissor();
g_perfmon.SetFrame(5000);
return 0;
}
void GSState::SetGameCRC(u32 crc, int options)
{
m_crc = crc;
m_options = options;
m_game = CRC::Lookup(m_crc_hack_level != CRCHackLevel::Off ? crc : 0);
SetupCrcHack();
}
//
void GSState::UpdateContext()
{
const bool ctx_switch = (m_context != &m_env.CTXT[PRIM->CTXT]);
if (ctx_switch)
GL_REG("Context Switch %d", PRIM->CTXT);
m_context = &m_env.CTXT[PRIM->CTXT];
UpdateScissor();
}
void GSState::UpdateScissor()
{
m_scissor = m_context->scissor.ex;
m_ofxy = m_context->scissor.ofxy;
}
void GSState::UpdateVertexKick()
{
if (m_frameskip)
return;
const u32 prim = PRIM->PRIM;
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0];
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = m_fpGIFPackedRegHandlerXYZ[prim][1];
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = m_fpGIFPackedRegHandlerXYZ[prim][2];
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = m_fpGIFPackedRegHandlerXYZ[prim][3];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = m_fpGIFRegHandlerXYZ[prim][0];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = m_fpGIFRegHandlerXYZ[prim][1];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = m_fpGIFRegHandlerXYZ[prim][2];
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3];
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim];
}
void GSState::GrowVertexBuffer()
{
const size_t maxcount = std::max<size_t>(m_vertex.maxcount * 3 / 2, 10000);
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 3, 32); // worst case is slightly less than vertex number * 3
if (vertex == NULL || index == NULL)
{
const size_t vert_byte_count = sizeof(GSVertex) * maxcount;
const size_t idx_byte_count = sizeof(u32) * maxcount * 3;
Console.Error("GS: failed to allocate %zu bytes for verticles and %zu for indices.",
vert_byte_count, idx_byte_count);
throw GSError();
}
if (m_vertex.buff != NULL)
{
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
_aligned_free(m_vertex.buff);
}
if (m_index.buff != NULL)
{
memcpy(index, m_index.buff, sizeof(u32) * m_index.tail);
_aligned_free(m_index.buff);
}
m_vertex.buff = vertex;
m_vertex.maxcount = maxcount - 3; // -3 to have some space at the end of the buffer before DrawingKick can grow it
m_index.buff = index;
}
GSState::PRIM_OVERLAP GSState::PrimitiveOverlap()
{
// Either 1 triangle or 1 line or 3 POINTs
// It is bad for the POINTs but low probability that they overlap
if (m_vertex.next < 4)
return PRIM_OVERLAP_NO;
if (m_vt.m_primclass != GS_SPRITE_CLASS)
return PRIM_OVERLAP_UNKNOW; // maybe, maybe not
// Check intersection of sprite primitive only
const size_t count = m_vertex.next;
PRIM_OVERLAP overlap = PRIM_OVERLAP_NO;
const GSVertex* v = m_vertex.buff;
m_drawlist.clear();
size_t i = 0;
while (i < count)
{
// In order to speed up comparison a bounding-box is accumulated. It removes a
// loop so code is much faster (check game virtua fighter). Besides it allow to check
// properly the Y order.
// .x = min(v[i].XYZ.X, v[i+1].XYZ.X)
// .y = min(v[i].XYZ.Y, v[i+1].XYZ.Y)
// .z = max(v[i].XYZ.X, v[i+1].XYZ.X)
// .w = max(v[i].XYZ.Y, v[i+1].XYZ.Y)
GSVector4i all = GSVector4i(v[i].m[1]).upl16(GSVector4i(v[i + 1].m[1])).upl16().xzyw();
all = all.xyxy().blend(all.zwzw(), all > all.zwxy());
size_t j = i + 2;
while (j < count)
{
GSVector4i sprite = GSVector4i(v[j].m[1]).upl16(GSVector4i(v[j + 1].m[1])).upl16().xzyw();
sprite = sprite.xyxy().blend(sprite.zwzw(), sprite > sprite.zwxy());
// Be sure to get vertex in good order, otherwise .r* function doesn't
// work as expected.
ASSERT(sprite.x <= sprite.z);
ASSERT(sprite.y <= sprite.w);
ASSERT(all.x <= all.z);
ASSERT(all.y <= all.w);
if (all.rintersect(sprite).rempty())
{
all = all.runion_ordered(sprite);
}
else
{
overlap = PRIM_OVERLAP_YES;
break;
}
j += 2;
}
m_drawlist.push_back((j - i) >> 1); // Sprite count
i = j;
}
#if 0
// Old algo: less constraint but O(n^2) instead of O(n) as above
// You have no guarantee on the sprite order, first vertex can be either top-left or bottom-left
// There is a high probability that the draw call will uses same ordering for all vertices.
// In order to keep a small performance impact only the first sprite will be checked
//
// Some safe-guard will be added in the outer-loop to avoid corruption with a limited perf impact
if (v[1].XYZ.Y < v[0].XYZ.Y) {
// First vertex is Top-Left
for (size_t i = 0; i < count; i += 2) {
if (v[i + 1].XYZ.Y > v[i].XYZ.Y) {
return PRIM_OVERLAP_UNKNOW;
}
GSVector4i vi(v[i].XYZ.X, v[i + 1].XYZ.Y, v[i + 1].XYZ.X, v[i].XYZ.Y);
for (size_t j = i + 2; j < count; j += 2) {
GSVector4i vj(v[j].XYZ.X, v[j + 1].XYZ.Y, v[j + 1].XYZ.X, v[j].XYZ.Y);
GSVector4i inter = vi.rintersect(vj);
if (!inter.rempty()) {
return PRIM_OVERLAP_YES;
}
}
}
}
else {
// First vertex is Bottom-Left
for (size_t i = 0; i < count; i += 2) {
if (v[i + 1].XYZ.Y < v[i].XYZ.Y) {
return PRIM_OVERLAP_UNKNOW;
}
GSVector4i vi(v[i].XYZ.X, v[i].XYZ.Y, v[i + 1].XYZ.X, v[i + 1].XYZ.Y);
for (size_t j = i + 2; j < count; j += 2) {
GSVector4i vj(v[j].XYZ.X, v[j].XYZ.Y, v[j + 1].XYZ.X, v[j + 1].XYZ.Y);
GSVector4i inter = vi.rintersect(vj);
if (!inter.rempty()) {
return PRIM_OVERLAP_YES;
}
}
}
}
#endif
// fprintf(stderr, "%d: Yes, code can be optimized (draw of %d vertices)\n", s_n, count);
return overlap;
}
__forceinline void GSState::HandleAutoFlush()
{
const bool frame_hit = (m_context->FRAME.Block() == m_context->TEX0.TBP0) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2);
// There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd.
const bool zbuf_hit = (m_context->ZBUF.Block() == m_context->TEX0.TBP0) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL != 2) && !m_context->ZBUF.ZMSK;
// To briefly explain what's going on here, what we are checking for is draws over a texture when the source and destination are themselves.
// Because one page of the texture gets buffered in the Texture Cache (the PS2's one) if any of those pixels are overwritten, you still read the old data.
// So we need to calculate if a page boundary is being crossed for the format it is in and if the same part of the texture being written and read inside the draw.
if (((frame_hit && ((m_context->TEX0.PSM ^ m_context->FRAME.PSM) & ~0x30) == 0) || (zbuf_hit && ((m_context->TEX0.PSM ^ m_context->ZBUF.PSM) & ~0x30) == 0)) && PRIM->TME && (m_context->FRAME.FBMSK != 0xFFFFFFFF))
{
const int page_mask_x = ~(GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.x - 1);
const int page_mask_y = ~(GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.y - 1);
const GSVector4i page_mask = { page_mask_x, page_mask_y, page_mask_x, page_mask_y };
size_t n = 1;
switch (GSUtil::GetPrimClass(PRIM->PRIM))
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
GSVector4i tex_coord;
// Prepare the currently processed vertex.
if (PRIM->FST)
{
tex_coord.x = m_v.U >> 4;
tex_coord.y = m_v.V >> 4;
}
else
{
tex_coord.x = (int)((1 << m_context->TEX0.TW) * (m_v.ST.S / m_v.RGBAQ.Q));
tex_coord.y = (int)((1 << m_context->TEX0.TH) * (m_v.ST.T / m_v.RGBAQ.Q));
}
GSVector4i tex_rect = tex_coord.xyxy();
GSVector4i next_rect = tex_rect;
const int current_tex_end = (int)(m_index.tail - (m_index.tail % n)) - 1;
bool page_crossed = false;
// Check previous texture co-ordindates to see if we have changed page
for (int i = m_index.tail - 1; i >= current_tex_end; i--)
{
const GSVertex* v = &m_vertex.buff[m_index.buff[i]];
if (PRIM->FST)
{
tex_coord.x = v->U >> 4;
tex_coord.y = v->V >> 4;
}
else
{
tex_coord.x = (int)((1 << m_context->TEX0.TW) * (v->ST.S / v->RGBAQ.Q));
tex_coord.y = (int)((1 << m_context->TEX0.TH) * (v->ST.T / v->RGBAQ.Q));
}
next_rect.x = std::min(next_rect.x, tex_coord.x);
next_rect.z = std::max(next_rect.z, tex_coord.x);
next_rect.y = std::min(next_rect.y, tex_coord.y);
next_rect.w = std::max(next_rect.w, tex_coord.y);
const GSVector4i pages = next_rect & page_mask;
// We have changed page, so ignore the old textures co-ordinates.
if (!pages.xyxy().eq(pages.zwzw()))
{
page_crossed = true;
break;
}
tex_rect = next_rect;
}
tex_rect += GSVector4i(0, 0, 1, 1); // Intersect goes on space inside the rect
if(page_crossed)
{
// Make sure the format matches, otherwise the coordinates aren't gonna match, so the draws won't intersect.
if (((frame_hit && (m_context->TEX0.PSM == m_context->FRAME.PSM)) || (zbuf_hit && (m_context->TEX0.PSM == m_context->ZBUF.PSM)))
&& (m_context->FRAME.FBW == m_context->TEX0.TBW))
{
// Update the vertex trace, scissor it (important for Jak 3!) and intersect with the current texture.
if ((m_index.tail - 1) == current_tex_end)
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail - m_vertex.head, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
GSVector4i area_out = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
if (!area_out.rintersect(tex_rect).rempty())
{
Flush();
}
}
else // Storage of the TEX and FRAME/Z is different, so uhh, just fall back to flushing each page. It's slower, sorry.
{
if (m_context->FRAME.FBW == m_context->TEX0.TBW)
{
//We know we've changed page, so let's set the dimension to cover the page they're in (for different pixel orders)
tex_rect = tex_rect & page_mask;
tex_rect += GSVector4i(0, 0, 1, 1); // Intersect goes on space inside the rect
tex_rect.z += GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.x;
tex_rect.w += GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.y;
if ((m_index.tail - 1) == current_tex_end)
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail - m_vertex.head, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
GSVector4i area_out = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(m_context->scissor.in));
area_out = area_out & page_mask;
area_out += GSVector4i(0, 0, 1, 1); // Intersect goes on space inside the rect
area_out.z += GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.x;
area_out.w += GSLocalMemory::m_psm[m_context->TEX0.PSM].pgs.y;
if (!area_out.rintersect(tex_rect).rempty())
{
Flush();
}
}
else // Page width is different, so it's much more difficult to calculate where it's modifying.
Flush();
}
}
}
}
template <u32 prim, bool auto_flush, bool index_swap>
__forceinline void GSState::VertexKick(u32 skip)
{
size_t n = 0;
switch (prim)
{
case GS_POINTLIST:
case GS_INVALID:
n = 1;
break;
case GS_LINELIST:
case GS_SPRITE:
case GS_LINESTRIP:
n = 2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
n = 3;
break;
}
if (m_context->FRAME.FBMSK != 0xFFFFFFFF)
m_mem.m_clut.Invalidate(m_context->FRAME.Block());
if (auto_flush && m_index.tail >= n)
HandleAutoFlush();
ASSERT(m_vertex.tail < m_vertex.maxcount + 3);
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
size_t xy_tail = m_vertex.xy_tail;
// callers should write XYZUVF to m_v.m[1] in one piece to have this load store-forwarded, either by the cpu or the compiler when this function is inlined
GSVector4i v0(m_v.m[0]);
GSVector4i v1(m_v.m[1]);
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail];
tailptr[0] = v0;
tailptr[1] = v1;
const GSVector4i xy = v1.xxxx().u16to32().sub32(m_ofxy);
GSVector4i::storel(&m_vertex.xy[xy_tail & 3], xy.blend16<0xf0>(xy.sra32(4)).ps32());
m_vertex.tail = ++tail;
m_vertex.xy_tail = ++xy_tail;
size_t m = tail - head;
if (m < n)
return;
if (skip == 0 && (prim != GS_TRIANGLEFAN || m <= 4)) // m_vertex.xy only knows about the last 4 vertices, head could be far behind for fan
{
GSVector4i v0, v1, v2, v3, pmin, pmax;
v0 = GSVector4i::loadl(&m_vertex.xy[(xy_tail + 1) & 3]); // T-3
v1 = GSVector4i::loadl(&m_vertex.xy[(xy_tail + 2) & 3]); // T-2
v2 = GSVector4i::loadl(&m_vertex.xy[(xy_tail + 3) & 3]); // T-1
v3 = GSVector4i::loadl(&m_vertex.xy[(xy_tail - m) & 3]); // H
switch (prim)
{
case GS_POINTLIST:
pmin = v2;
pmax = v2;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
pmin = v2.min_i16(v1);
pmax = v2.max_i16(v1);
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
pmin = v2.min_i16(v1.min_i16(v0));
pmax = v2.max_i16(v1.max_i16(v0));
break;
case GS_TRIANGLEFAN:
pmin = v2.min_i16(v1.min_i16(v3));
pmax = v2.max_i16(v1.max_i16(v3));
break;
default:
break;
}
GSVector4i test = pmax.lt16(m_scissor) | pmin.gt16(m_scissor.zwzwl());
switch (prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
// FIXME: GREG I don't understand the purpose of the m_nativeres check
// It impacts badly the number of draw call in the HW renderer.
test |= m_nativeres ? pmin.eq16(pmax).zwzwl() : pmin.eq16(pmax);
break;
default:
break;
}
switch (prim)
{
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
// TODO: any way to do a 16-bit integer cross product?
// cross product is zero most of the time because either of the vertices are the same
test = (test | v0 == v1) | (v1 == v2 | v0 == v2);
break;
case GS_TRIANGLEFAN:
test = (test | v3 == v1) | (v1 == v2 | v3 == v2);
break;
default:
break;
}
skip |= test.mask() & 15;
}
if (skip != 0)
{
switch (prim)
{
case GS_POINTLIST:
case GS_LINELIST:
case GS_TRIANGLELIST:
case GS_SPRITE:
case GS_INVALID:
m_vertex.tail = head; // no need to check or grow the buffer length
break;
case GS_LINESTRIP:
case GS_TRIANGLESTRIP:
m_vertex.head = head + 1;
[[fallthrough]];
case GS_TRIANGLEFAN:
if (tail >= m_vertex.maxcount)
GrowVertexBuffer(); // in case too many vertices were skipped
break;
default:
__assume(0);
}
return;
}
if (tail >= m_vertex.maxcount)
GrowVertexBuffer();
u32* RESTRICT buff = &m_index.buff[m_index.tail];
switch (prim)
{
case GS_POINTLIST:
buff[0] = head + 0;
m_vertex.head = head + 1;
m_vertex.next = head + 1;
m_index.tail += 1;
break;
case GS_LINELIST:
buff[0] = head + (index_swap ? 1 : 0);
buff[1] = head + (index_swap ? 0 : 1);
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
break;
case GS_LINESTRIP:
if (next < head)
{
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
head = next;
m_vertex.tail = next + 2;
}
buff[0] = head + (index_swap ? 1 : 0);
buff[1] = head + (index_swap ? 0 : 1);
m_vertex.head = head + 1;
m_vertex.next = head + 2;
m_index.tail += 2;
break;
case GS_TRIANGLELIST:
buff[0] = head + (index_swap ? 2 : 0);
buff[1] = head + 1;
buff[2] = head + (index_swap ? 0 : 2);
m_vertex.head = head + 3;
m_vertex.next = head + 3;
m_index.tail += 3;
break;
case GS_TRIANGLESTRIP:
if (next < head)
{
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
m_vertex.buff[next + 2] = m_vertex.buff[head + 2];
head = next;
m_vertex.tail = next + 3;
}
buff[0] = head + (index_swap ? 2 : 0);
buff[1] = head + 1;
buff[2] = head + (index_swap ? 0 : 2);
m_vertex.head = head + 1;
m_vertex.next = head + 3;
m_index.tail += 3;
break;
case GS_TRIANGLEFAN:
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
buff[0] = index_swap ? (tail - 1) : (head + 0);
buff[1] = tail - 2;
buff[2] = index_swap ? (head + 0) : (tail - 1);
m_vertex.next = tail;
m_index.tail += 3;
break;
case GS_SPRITE:
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
break;
case GS_INVALID:
m_vertex.tail = head;
break;
default:
__assume(0);
}
if (PRIM->TME)
tex_flushed = false;
}
/// Checks if region repeat is used (applying it does something to at least one of the values in min...max)
/// Also calculates the real min and max values seen after applying the region repeat to all values in min...max
static bool UsesRegionRepeat(int fix, int msk, int min, int max, int* min_out, int* max_out)
{
if ((min < 0) != (max < 0))
{
// Algorithm doesn't work properly if bits overflow when incrementing (happens on the -1 → 0 crossing)
// Conveniently, crossing zero guarantees you use the full range
*min_out = fix;
*max_out = (fix | msk) + 1;
return true;
}
const int cleared_bits = ~msk & ~fix; // Bits that are always cleared by applying msk and fix
const int set_bits = fix; // Bits that are always set by applying msk and fix
unsigned long msb;
int variable_bits = min ^ max;
if (_BitScanReverse(&msb, variable_bits))
variable_bits |= (1 << msb) - 1; // Fill in all lower bits
const int always_set = min & ~variable_bits; // Bits that are set in every value in min...max
const int sometimes_set = min | variable_bits; // Bits that are set in at least one value in min...max
const bool sets_bits = (set_bits | always_set) != always_set; // At least one bit in min...max is set by applying msk and fix
const bool clears_bits = (cleared_bits & sometimes_set) != 0; // At least one bit in min...max is cleared by applying msk and fix
const int overwritten_variable_bits = (cleared_bits | set_bits) & variable_bits;
// A variable bit that's `0` in `min` will at some point switch to a `1` (because it's variable)
// When it does, all bits below it will switch to a `0` (that's how incrementing works)
// If the 0 to 1 switch is reflected in the final output (not masked and not replaced by a fixed value),
// the final value would be larger than the previous. Otherwise, the final value will be less.
// The true minimum value is `min` with all bits below the most significant replaced variable `0` bit cleared
const int min_overwritten_variable_zeros = ~min & overwritten_variable_bits;
if (_BitScanReverse(&msb, min_overwritten_variable_zeros))
min &= (~0 << msb);
// Similar thing for max, but the first masked `1` bit
const int max_overwritten_variable_ones = max & overwritten_variable_bits;
if (_BitScanReverse(&msb, max_overwritten_variable_ones))
max |= (1 << msb) - 1;
*min_out = (msk & min) | fix;
*max_out = ((msk & max) | fix) + 1;
return sets_bits || clears_bits;
}
GSState::TextureMinMaxResult GSState::GetTextureMinMax(const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear)
{
// TODO: some of the +1s can be removed if linear == false
const int tw = TEX0.TW;
const int th = TEX0.TH;
const int w = 1 << tw;
const int h = 1 << th;
const int tw_mask = w - 1;
const int th_mask = h - 1;
GSVector4i tr(0, 0, w, h);
const int wms = CLAMP.WMS;
const int wmt = CLAMP.WMT;
const int minu = (int)CLAMP.MINU;
const int minv = (int)CLAMP.MINV;
const int maxu = (int)CLAMP.MAXU;
const int maxv = (int)CLAMP.MAXV;
GSVector4i vr = tr;
switch (wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if (vr.x < minu)
vr.x = minu;
if (vr.z > maxu + 1)
vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = (maxu | minu) + 1;
break;
default:
__assume(0);
}
switch (wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if (vr.y < minv)
vr.y = minv;
if (vr.w > maxv + 1)
vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = (maxv | minv) + 1;
break;
default:
__assume(0);
}
u8 uses_border = 0;
if (m_vt.m_max.t.x >= FLT_MAX || m_vt.m_min.t.x <= -FLT_MAX ||
m_vt.m_max.t.y >= FLT_MAX || m_vt.m_min.t.y <= -FLT_MAX)
{
// If any of the min/max values are +-FLT_MAX we can't rely on them
// so just assume full texture.
uses_border = 0xF;
}
else
{
// Optimisation aims to reduce the amount of texture loaded to only the bit which will be read
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if (linear)
st += GSVector4(-0.5f, 0.5f).xxyy();
GSVector4i uv = GSVector4i(st.floor());
uses_border = GSVector4::cast((uv < vr).blend32<0xc>(uv >= vr)).mask();
// Roughly cut out the min/max of the read (Clamp)
switch (wms)
{
case CLAMP_REPEAT:
if ((uv.x & ~tw_mask) == (uv.z & ~tw_mask))
{
vr.x = std::max(vr.x, uv.x & tw_mask);
vr.z = std::min(vr.z, (uv.z & tw_mask) + 1);
}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if (vr.x < uv.x)
vr.x = uv.x;
if (vr.z > (uv.z + 1))
vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT:
if (UsesRegionRepeat(maxu, minu, uv.x, uv.z, &vr.x, &vr.z) || maxu >= tw)
uses_border |= TextureMinMaxResult::USES_BOUNDARY_U;
break;
}
switch (wmt)
{
case CLAMP_REPEAT:
if ((uv.y & ~th_mask) == (uv.w & ~th_mask))
{
vr.y = std::max(vr.y, uv.y & th_mask);
vr.w = std::min(vr.w, (uv.w & th_mask) + 1);
}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if (vr.y < uv.y)
vr.y = uv.y;
if (vr.w > (uv.w + 1))
vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT:
if (UsesRegionRepeat(maxv, minv, uv.y, uv.w, &vr.y, &vr.w) || maxv >= th)
uses_border |= TextureMinMaxResult::USES_BOUNDARY_V;
break;
}
}
vr = vr.rintersect(tr);
// This really shouldn't happen now except with the clamping region set entirely outside the texture,
// special handling should be written for that case.
if (vr.rempty())
{
// NOTE: this can happen when texcoords are all outside the texture or clamping area is zero, but we can't
// let the texture cache update nothing, the sampler will still need a single texel from the border somewhere
// examples:
// - THPS (no visible problems)
// - NFSMW (strange rectangles on screen, might be unrelated)
// - Lupin 3rd (huge problems, textures sizes seem to be randomly specified)
vr = (vr + GSVector4i(-1, +1).xxyy()).rintersect(tr);
}
return { vr, uses_border };
}
void GSState::CalcAlphaMinMax()
{
if (m_vt.m_alpha.valid)
return;
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if (PRIM->TME && context->TEX0.TCC)
{
switch (GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
{
case 0:
a.y = 0;
a.w = 0xff;
break;
case 1:
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
break;
case 2:
a.y = env.TEXA.AEM ? 0 : std::min(env.TEXA.TA0, env.TEXA.TA1);
a.w = std::max(env.TEXA.TA0, env.TEXA.TA1);
break;
case 3:
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
break;
default:
__assume(0);
}
switch (context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if (a.x > 0xff)
a.x = 0xff;
if (a.z > 0xff)
a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if (a.x > 0xff)
a.x = 0xff;
if (a.z > 0xff)
a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vt.m_alpha.min = a.x;
m_vt.m_alpha.max = a.z;
m_vt.m_alpha.valid = true;
}
bool GSState::TryAlphaTest(u32& fm, u32& zm)
{
// Shortcut for the easy case
if (m_context->TEST.ATST == ATST_ALWAYS)
return true;
// Alpha test can only control the write of some channels. If channels are already masked
// the alpha test is therefore a nop.
switch (m_context->TEST.AFAIL)
{
case AFAIL_KEEP:
break;
case AFAIL_FB_ONLY:
if (zm == 0xFFFFFFFF)
return true;
break;
case AFAIL_ZB_ONLY:
if (fm == 0xFFFFFFFF)
return true;
break;
case AFAIL_RGB_ONLY:
if (zm == 0xFFFFFFFF && ((fm & 0xFF000000) == 0xFF000000 || GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt == 1))
return true;
break;
default:
__assume(0);
}
bool pass = true;
if (m_context->TEST.ATST == ATST_NEVER)
{
pass = false; // Shortcut to avoid GetAlphaMinMax below
}
else
{
const int amin = GetAlphaMinMax().min;
const int amax = GetAlphaMinMax().max;
const int aref = m_context->TEST.AREF;
switch (m_context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if (amax < aref)
pass = true;
else if (amin >= aref)
pass = false;
else
return false;
break;
case ATST_LEQUAL:
if (amax <= aref)
pass = true;
else if (amin > aref)
pass = false;
else
return false;
break;
case ATST_EQUAL:
if (amin == aref && amax == aref)
pass = true;
else if (amin > aref || amax < aref)
pass = false;
else
return false;
break;
case ATST_GEQUAL:
if (amin >= aref)
pass = true;
else if (amax < aref)
pass = false;
else
return false;
break;
case ATST_GREATER:
if (amin > aref)
pass = true;
else if (amax <= aref)
pass = false;
else
return false;
break;
case ATST_NOTEQUAL:
if (amin == aref && amax == aref)
pass = false;
else if (amin > aref || amax < aref)
pass = true;
else
return false;
break;
default:
__assume(0);
}
}
if (!pass)
{
switch (m_context->TEST.AFAIL)
{
case AFAIL_KEEP:
fm = zm = 0xffffffff;
break;
case AFAIL_FB_ONLY:
zm = 0xffffffff;
break;
case AFAIL_ZB_ONLY:
fm = 0xffffffff;
break;
case AFAIL_RGB_ONLY:
fm |= 0xff000000;
zm = 0xffffffff;
break;
default:
__assume(0);
}
}
return true;
}
bool GSState::IsOpaque()
{
if (PRIM->AA1)
return false;
if (!PRIM->ABE)
return true;
const GSDrawingContext* context = m_context;
int amin = 0;
int amax = 0xff;
if (context->ALPHA.A != context->ALPHA.B)
{
if (context->ALPHA.C == 0)
{
amin = GetAlphaMinMax().min;
amax = GetAlphaMinMax().max;
}
else if (context->ALPHA.C == 1)
{
if (context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
amin = amax = 0x80;
}
else if (context->ALPHA.C == 2)
{
amin = amax = context->ALPHA.FIX;
}
}
return context->ALPHA.IsOpaque(amin, amax);
}
bool GSState::IsMipMapDraw()
{
return m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0 && (!m_context->TEX1.MTBA || m_context->TEX0.TH == m_context->TEX0.TW);
}
bool GSState::IsMipMapActive()
{
return m_mipmap && IsMipMapDraw();
}
GIFRegTEX0 GSState::GetTex0Layer(u32 lod)
{
// Shortcut
if (lod == 0)
return m_context->TEX0;
GIFRegTEX0 TEX0 = m_context->TEX0;
switch (lod)
{
case 1:
TEX0.TBP0 = m_context->MIPTBP1.TBP1;
TEX0.TBW = m_context->MIPTBP1.TBW1;
break;
case 2:
TEX0.TBP0 = m_context->MIPTBP1.TBP2;
TEX0.TBW = m_context->MIPTBP1.TBW2;
break;
case 3:
TEX0.TBP0 = m_context->MIPTBP1.TBP3;
TEX0.TBW = m_context->MIPTBP1.TBW3;
break;
case 4:
TEX0.TBP0 = m_context->MIPTBP2.TBP4;
TEX0.TBW = m_context->MIPTBP2.TBW4;
break;
case 5:
TEX0.TBP0 = m_context->MIPTBP2.TBP5;
TEX0.TBW = m_context->MIPTBP2.TBW5;
break;
case 6:
TEX0.TBP0 = m_context->MIPTBP2.TBP6;
TEX0.TBW = m_context->MIPTBP2.TBW6;
break;
default:
Console.Error("GS: Invalid guest lod setting. Please report: https://github.com/PCSX2/pcsx2/issues");
}
// Correct the texture size
if (TEX0.TH <= lod)
TEX0.TH = 0;
else
TEX0.TH -= lod;
if (TEX0.TW <= lod)
TEX0.TW = 0;
else
TEX0.TW -= lod;
return TEX0;
}
// GSTransferBuffer
GSState::GSTransferBuffer::GSTransferBuffer()
{
x = y = 0;
overflow = false;
start = end = total = 0;
constexpr size_t alloc_size = 1024 * 1024 * 4;
buff = reinterpret_cast<u8*>(_aligned_malloc(alloc_size, 32));
}
GSState::GSTransferBuffer::~GSTransferBuffer()
{
_aligned_free(buff);
}
void GSState::GSTransferBuffer::Init(int tx, int ty, const GIFRegBITBLTBUF& blit)
{
x = tx;
y = ty;
total = 0;
m_blit = blit;
}
bool GSState::GSTransferBuffer::Update(int tw, int th, int bpp, int& len)
{
if (total == 0)
{
start = end = 0;
total = std::min<int>((tw * bpp >> 3) * th, 1024 * 1024 * 4);
overflow = false;
}
const int remaining = total - end;
if (len > remaining)
{
if (!overflow)
{
overflow = true;
#if defined(PCSX2_DEVBUILD) || defined(_DEBUG)
Console.Warning("GS transfer buffer overflow");
#endif
}
len = remaining;
}
return len > 0;
}