#include "Common/CommonTypes.h" #include "VideoCommon/BPStructs.h" #include "VideoCommon/Debugger.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/MainBase.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" VertexManager *g_vertex_manager; u8 *VertexManager::s_pCurBufferPointer; u8 *VertexManager::s_pBaseBufferPointer; u8 *VertexManager::s_pEndBufferPointer; PrimitiveType VertexManager::current_primitive_type; bool VertexManager::IsFlushed; static const PrimitiveType primitive_from_gx[8] = { PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS_2 PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLES PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_STRIP PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_FAN PRIMITIVE_LINES, // GX_DRAW_LINES PRIMITIVE_LINES, // GX_DRAW_LINE_STRIP PRIMITIVE_POINTS, // GX_DRAW_POINTS }; VertexManager::VertexManager() { IsFlushed = true; } VertexManager::~VertexManager() { } u32 VertexManager::GetRemainingSize() { return (u32)(s_pEndBufferPointer - s_pCurBufferPointer); } DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) { // The SSE vertex loader can write up to 4 bytes past the end u32 const needed_vertex_bytes = count * stride + 4; // We can't merge different kinds of primitives, so we have to flush here if (current_primitive_type != primitive_from_gx[primitive]) Flush(); current_primitive_type = primitive_from_gx[primitive]; // Check for size in buffer, if the buffer gets full, call Flush() if ( !IsFlushed && ( count > IndexGenerator::GetRemainingIndices() || count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize() ) ) { Flush(); if (count > IndexGenerator::GetRemainingIndices()) ERROR_LOG(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush."); if (count > GetRemainingIndices(primitive)) ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! " "Increase MAXIBUFFERSIZE or we need primitive breaking after all."); if (needed_vertex_bytes > GetRemainingSize()) ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! " "Increase MAXVBUFFERSIZE or we need primitive breaking after all."); } // need to alloc new buffer if (IsFlushed) { g_vertex_manager->ResetBuffer(stride); IsFlushed = false; } return DataReader(s_pCurBufferPointer, s_pEndBufferPointer); } void VertexManager::FlushData(u32 count, u32 stride) { s_pCurBufferPointer += count * stride; } u32 VertexManager::GetRemainingIndices(int primitive) { u32 index_len = MAXIBUFFERSIZE - IndexGenerator::GetIndexLen(); if (g_Config.backend_info.bSupportsPrimitiveRestart) { switch (primitive) { case GX_DRAW_QUADS: case GX_DRAW_QUADS_2: return index_len / 5 * 4; case GX_DRAW_TRIANGLES: return index_len / 4 * 3; case GX_DRAW_TRIANGLE_STRIP: return index_len / 1 - 1; case GX_DRAW_TRIANGLE_FAN: return index_len / 6 * 4 + 1; case GX_DRAW_LINES: return index_len; case GX_DRAW_LINE_STRIP: return index_len / 2 + 1; case GX_DRAW_POINTS: return index_len; default: return 0; } } else { switch (primitive) { case GX_DRAW_QUADS: case GX_DRAW_QUADS_2: return index_len / 6 * 4; case GX_DRAW_TRIANGLES: return index_len; case GX_DRAW_TRIANGLE_STRIP: return index_len / 3 + 2; case GX_DRAW_TRIANGLE_FAN: return index_len / 3 + 2; case GX_DRAW_LINES: return index_len; case GX_DRAW_LINE_STRIP: return index_len / 2 + 1; case GX_DRAW_POINTS: return index_len; default: return 0; } } } void VertexManager::Flush() { if (IsFlushed) return; // loading a state will invalidate BP, so check for it g_video_backend->CheckInvalidState(); VideoFifo_CheckEFBAccess(); #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens, xfmem.numChan.numColorChans, xfmem.dualTexTrans.enabled, bpmem.ztex2.op, (int)bpmem.blendmode.colorupdate, (int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable); for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i) { LitChannel* ch = &xfmem.color[i]; PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc); ch = &xfmem.alpha[i]; PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc); } for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i) { TexMtxInfo tinfo = xfmem.texMtxInfo[i]; if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) tinfo.hex &= 0x7ff; if (tinfo.texgentype != XF_TEXGEN_REGULAR) tinfo.projection = 0; PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, postmtx=%d, postnorm=%d", i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow, tinfo.embosssourceshift, tinfo.embosslightshift, xfmem.postMtxInfo[i].index, xfmem.postMtxInfo[i].normalize); } PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x", (int)bpmem.genMode.numtevstages+1, (int)bpmem.genMode.numindstages, (int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff); #endif BitSet32 usedtextures; for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) if (bpmem.tevorders[i / 2].getEnable(i & 1)) usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true; if (bpmem.genMode.numindstages > 0) for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; for (unsigned int i : usedtextures) { g_renderer->SetSamplerState(i & 3, i >> 2); const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i); if (tentry) { // 0s are probably for no manual wrapping needed. PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0); } else ERROR_LOG(VIDEO, "error loading texture"); } // set global constants VertexShaderManager::SetConstants(); GeometryShaderManager::SetConstants(); PixelShaderManager::SetConstants(); bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; if (PerfQueryBase::ShouldEmulate()) g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); g_vertex_manager->vFlush(useDstAlpha); if (PerfQueryBase::ShouldEmulate()) g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens) ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value()); IsFlushed = true; } void VertexManager::DoState(PointerWrap& p) { g_vertex_manager->vDoState(p); } void VertexManager::CalculateZSlope(u32 stride) { float vtx[9]; float out[12]; // Lookup vertices of the last rendered triangle and software-transform them // This allows us to determine the depth slope, which will be used if zfreeze // is enabled in the following flush. for (unsigned int i = 0; i < 3; ++i) { u8* vtx_ptr = s_pCurBufferPointer - stride * (3 - i); vtx[0 + i * 3] = ((float*)vtx_ptr)[0]; vtx[1 + i * 3] = ((float*)vtx_ptr)[1]; vtx[2 + i * 3] = ((float*)vtx_ptr)[2]; VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]); // Transform to Screenspace out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd + (xfmem.viewport.xOrig - 342); out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht + (xfmem.viewport.yOrig - 342); out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ; } float dx31 = out[8] - out[0]; float dx12 = out[0] - out[4]; float dy12 = out[1] - out[5]; float dy31 = out[9] - out[1]; float DF31 = out[10] - out[2]; float DF21 = out[6] - out[2]; float a = DF31 * -dy12 - DF21 * dy31; float b = dx31 * DF21 + dx12 * DF31; float c = -dx12 * dy31 - dx31 * -dy12; float slope_dfdx = -a / c; float slope_dfdy = -b / c; float slope_f0 = out[2] - (out[0] * slope_dfdx + out[1] * slope_dfdy); PixelShaderManager::SetZSlope(slope_dfdx, slope_dfdy, slope_f0); }