330 lines
11 KiB
C++
330 lines
11 KiB
C++
#include "Common/CommonTypes.h"
|
|
|
|
#include "VideoCommon/BPStructs.h"
|
|
#include "VideoCommon/Debugger.h"
|
|
#include "VideoCommon/GeometryShaderManager.h"
|
|
#include "VideoCommon/IndexGenerator.h"
|
|
#include "VideoCommon/MainBase.h"
|
|
#include "VideoCommon/NativeVertexFormat.h"
|
|
#include "VideoCommon/OpcodeDecoding.h"
|
|
#include "VideoCommon/PerfQueryBase.h"
|
|
#include "VideoCommon/PixelShaderManager.h"
|
|
#include "VideoCommon/RenderBase.h"
|
|
#include "VideoCommon/Statistics.h"
|
|
#include "VideoCommon/TextureCacheBase.h"
|
|
#include "VideoCommon/VertexLoaderManager.h"
|
|
#include "VideoCommon/VertexManagerBase.h"
|
|
#include "VideoCommon/VertexShaderManager.h"
|
|
#include "VideoCommon/VideoConfig.h"
|
|
#include "VideoCommon/XFMemory.h"
|
|
|
|
VertexManager *g_vertex_manager;
|
|
|
|
u8 *VertexManager::s_pCurBufferPointer;
|
|
u8 *VertexManager::s_pBaseBufferPointer;
|
|
u8 *VertexManager::s_pEndBufferPointer;
|
|
|
|
PrimitiveType VertexManager::current_primitive_type;
|
|
|
|
Slope VertexManager::ZSlope;
|
|
|
|
bool VertexManager::IsFlushed;
|
|
|
|
static const PrimitiveType primitive_from_gx[8] = {
|
|
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS
|
|
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS_2
|
|
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLES
|
|
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_STRIP
|
|
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_FAN
|
|
PRIMITIVE_LINES, // GX_DRAW_LINES
|
|
PRIMITIVE_LINES, // GX_DRAW_LINE_STRIP
|
|
PRIMITIVE_POINTS, // GX_DRAW_POINTS
|
|
};
|
|
|
|
VertexManager::VertexManager()
|
|
{
|
|
IsFlushed = true;
|
|
}
|
|
|
|
VertexManager::~VertexManager()
|
|
{
|
|
}
|
|
|
|
u32 VertexManager::GetRemainingSize()
|
|
{
|
|
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
|
|
}
|
|
|
|
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
|
|
{
|
|
// The SSE vertex loader can write up to 4 bytes past the end
|
|
u32 const needed_vertex_bytes = count * stride + 4;
|
|
|
|
// We can't merge different kinds of primitives, so we have to flush here
|
|
if (current_primitive_type != primitive_from_gx[primitive])
|
|
Flush();
|
|
current_primitive_type = primitive_from_gx[primitive];
|
|
|
|
// Check for size in buffer, if the buffer gets full, call Flush()
|
|
if ( !IsFlushed && ( count > IndexGenerator::GetRemainingIndices() ||
|
|
count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize() ) )
|
|
{
|
|
Flush();
|
|
|
|
if (count > IndexGenerator::GetRemainingIndices())
|
|
ERROR_LOG(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush.");
|
|
if (count > GetRemainingIndices(primitive))
|
|
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
|
|
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.");
|
|
if (needed_vertex_bytes > GetRemainingSize())
|
|
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
|
|
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.");
|
|
}
|
|
|
|
// need to alloc new buffer
|
|
if (IsFlushed)
|
|
{
|
|
g_vertex_manager->ResetBuffer(stride);
|
|
IsFlushed = false;
|
|
}
|
|
|
|
return DataReader(s_pCurBufferPointer, s_pEndBufferPointer);
|
|
}
|
|
|
|
void VertexManager::FlushData(u32 count, u32 stride)
|
|
{
|
|
s_pCurBufferPointer += count * stride;
|
|
}
|
|
|
|
u32 VertexManager::GetRemainingIndices(int primitive)
|
|
{
|
|
u32 index_len = MAXIBUFFERSIZE - IndexGenerator::GetIndexLen();
|
|
|
|
if (g_Config.backend_info.bSupportsPrimitiveRestart)
|
|
{
|
|
switch (primitive)
|
|
{
|
|
case GX_DRAW_QUADS:
|
|
case GX_DRAW_QUADS_2:
|
|
return index_len / 5 * 4;
|
|
case GX_DRAW_TRIANGLES:
|
|
return index_len / 4 * 3;
|
|
case GX_DRAW_TRIANGLE_STRIP:
|
|
return index_len / 1 - 1;
|
|
case GX_DRAW_TRIANGLE_FAN:
|
|
return index_len / 6 * 4 + 1;
|
|
|
|
case GX_DRAW_LINES:
|
|
return index_len;
|
|
case GX_DRAW_LINE_STRIP:
|
|
return index_len / 2 + 1;
|
|
|
|
case GX_DRAW_POINTS:
|
|
return index_len;
|
|
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (primitive)
|
|
{
|
|
case GX_DRAW_QUADS:
|
|
case GX_DRAW_QUADS_2:
|
|
return index_len / 6 * 4;
|
|
case GX_DRAW_TRIANGLES:
|
|
return index_len;
|
|
case GX_DRAW_TRIANGLE_STRIP:
|
|
return index_len / 3 + 2;
|
|
case GX_DRAW_TRIANGLE_FAN:
|
|
return index_len / 3 + 2;
|
|
|
|
case GX_DRAW_LINES:
|
|
return index_len;
|
|
case GX_DRAW_LINE_STRIP:
|
|
return index_len / 2 + 1;
|
|
|
|
case GX_DRAW_POINTS:
|
|
return index_len;
|
|
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void VertexManager::Flush()
|
|
{
|
|
if (IsFlushed)
|
|
return;
|
|
|
|
// loading a state will invalidate BP, so check for it
|
|
g_video_backend->CheckInvalidState();
|
|
|
|
VideoFifo_CheckEFBAccess();
|
|
|
|
#if defined(_DEBUG) || defined(DEBUGFAST)
|
|
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens,
|
|
xfmem.numChan.numColorChans, xfmem.dualTexTrans.enabled, bpmem.ztex2.op,
|
|
(int)bpmem.blendmode.colorupdate, (int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable);
|
|
|
|
for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i)
|
|
{
|
|
LitChannel* ch = &xfmem.color[i];
|
|
PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
|
|
ch = &xfmem.alpha[i];
|
|
PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
|
|
}
|
|
|
|
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
|
|
{
|
|
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
|
|
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) tinfo.hex &= 0x7ff;
|
|
if (tinfo.texgentype != XF_TEXGEN_REGULAR) tinfo.projection = 0;
|
|
|
|
PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, postmtx=%d, postnorm=%d",
|
|
i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow, tinfo.embosssourceshift, tinfo.embosslightshift,
|
|
xfmem.postMtxInfo[i].index, xfmem.postMtxInfo[i].normalize);
|
|
}
|
|
|
|
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x", (int)bpmem.genMode.numtevstages+1, (int)bpmem.genMode.numindstages,
|
|
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff);
|
|
#endif
|
|
|
|
BitSet32 usedtextures;
|
|
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
|
|
if (bpmem.tevorders[i / 2].getEnable(i & 1))
|
|
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
|
|
|
|
if (bpmem.genMode.numindstages > 0)
|
|
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
|
|
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
|
|
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
|
|
|
|
for (unsigned int i : usedtextures)
|
|
{
|
|
g_renderer->SetSamplerState(i & 3, i >> 2);
|
|
const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i);
|
|
|
|
if (tentry)
|
|
{
|
|
// 0s are probably for no manual wrapping needed.
|
|
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0);
|
|
}
|
|
else
|
|
ERROR_LOG(VIDEO, "error loading texture");
|
|
}
|
|
|
|
// set global constants
|
|
VertexShaderManager::SetConstants();
|
|
GeometryShaderManager::SetConstants();
|
|
PixelShaderManager::SetConstants();
|
|
|
|
// Calculate ZSlope for zfreeze
|
|
if (!bpmem.genMode.zfreeze)
|
|
{
|
|
// Must be done after VertexShaderManager::SetConstants()
|
|
CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
|
|
}
|
|
else if (ZSlope.dirty) // or apply any dirty ZSlopes
|
|
{
|
|
PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0);
|
|
ZSlope.dirty = false;
|
|
}
|
|
|
|
// If cull mode is CULL_ALL, we shouldn't render any triangles/quads (points and lines don't get culled)
|
|
// vertex loader has already converted any quads into triangles, so we just check for triangles.
|
|
// TODO: These culled primites need to get this far through the pipeline to be used as zfreeze refrence
|
|
// planes. But currently we apply excessive processing and store the vertices in buffers on the
|
|
// video card, which is a waste of bandwidth.
|
|
if (bpmem.genMode.cullmode == GenMode::CULL_ALL && current_primitive_type == PRIMITIVE_TRIANGLES)
|
|
{
|
|
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
|
|
IsFlushed = true;
|
|
return;
|
|
}
|
|
|
|
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass &&
|
|
bpmem.dstalpha.enable &&
|
|
bpmem.blendmode.alphaupdate &&
|
|
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
|
|
|
|
if (PerfQueryBase::ShouldEmulate())
|
|
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
|
g_vertex_manager->vFlush(useDstAlpha);
|
|
if (PerfQueryBase::ShouldEmulate())
|
|
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
|
|
|
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
|
|
|
|
if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
|
|
ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
|
|
|
|
IsFlushed = true;
|
|
}
|
|
|
|
void VertexManager::DoState(PointerWrap& p)
|
|
{
|
|
p.Do(ZSlope);
|
|
g_vertex_manager->vDoState(p);
|
|
}
|
|
|
|
void VertexManager::CalculateZSlope(NativeVertexFormat *format)
|
|
{
|
|
float vtx[9];
|
|
float out[12];
|
|
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
|
|
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
|
|
|
|
// Global matrix ID.
|
|
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
|
PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
|
|
size_t posOff = vert_decl.position.offset;
|
|
size_t mtxOff = vert_decl.posmtx.offset;
|
|
|
|
// Lookup vertices of the last rendered triangle and software-transform them
|
|
// This allows us to determine the depth slope, which will be used if z--freeze
|
|
// is enabled in the following flush.
|
|
for (unsigned int i = 0; i < 3; ++i)
|
|
{
|
|
u8* vtx_ptr = s_pCurBufferPointer - vert_decl.stride * (3 - i);
|
|
vtx[0 + i * 3] = ((float*)(vtx_ptr + posOff))[0];
|
|
vtx[1 + i * 3] = ((float*)(vtx_ptr + posOff))[1];
|
|
vtx[2 + i * 3] = ((float*)(vtx_ptr + posOff))[2];
|
|
|
|
// If this vertex format has per-vertex position matrix IDs, look it up.
|
|
if(vert_decl.posmtx.enable)
|
|
mtxIdx = *((u32*)(vtx_ptr + mtxOff));
|
|
|
|
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4], mtxIdx);
|
|
|
|
// Transform to Screenspace
|
|
float inv_w = 1.0f / out[3 + i * 4];
|
|
|
|
out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
|
|
out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
|
|
out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
|
|
}
|
|
|
|
float dx31 = out[8] - out[0];
|
|
float dx12 = out[0] - out[4];
|
|
float dy12 = out[1] - out[5];
|
|
float dy31 = out[9] - out[1];
|
|
|
|
float DF31 = out[10] - out[2];
|
|
float DF21 = out[6] - out[2];
|
|
float a = DF31 * -dy12 - DF21 * dy31;
|
|
float b = dx31 * DF21 + dx12 * DF31;
|
|
float c = -dx12 * dy31 - dx31 * -dy12;
|
|
|
|
// Sometimes we process de-generate triangles. Stop any divide by zeros
|
|
if (c == 0)
|
|
return;
|
|
|
|
ZSlope.dfdx = -a / c;
|
|
ZSlope.dfdy = -b / c;
|
|
ZSlope.f0 = out[2] - (out[0] * ZSlope.dfdx + out[1] * ZSlope.dfdy);
|
|
ZSlope.dirty = true;
|
|
}
|