From 38765dc89a5a7a155804d6e036b56e48fa150e38 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 31 Jan 2022 20:40:09 +0100 Subject: [PATCH] pvr: fix multipass. naomi2: constant color shading. pvr: link multipass TA contexts. Each pass has its own. pvr: get rid of context pool naomi2: constant color shading. Use model diffuse and specular selection. Don't use vtx color for offset color. naomi2: pseudo-parallel lights fix naomi2: pass-through TA data needs forced list type. Terminate with end-of-list enable RTT copy to vram for Beach Spikers --- core/emulator.cpp | 7 +- core/hw/pvr/Renderer_if.cpp | 19 ++++- core/hw/pvr/elan.cpp | 162 ++++++++++++++++-------------------- core/hw/pvr/elan_struct.h | 7 +- core/hw/pvr/pvr_regs.cpp | 2 +- core/hw/pvr/ta.cpp | 12 +-- core/hw/pvr/ta.h | 1 - core/hw/pvr/ta_ctx.cpp | 113 +++++++------------------ core/hw/pvr/ta_ctx.h | 40 ++++----- core/hw/pvr/ta_vtx.cpp | 45 +++++++--- core/rend/gl4/gl4.h | 10 ++- core/rend/gl4/gl4naomi2.cpp | 3 +- core/rend/gl4/gl4naomi2.h | 2 +- core/rend/gl4/gldraw.cpp | 3 +- core/rend/gl4/gles.cpp | 4 +- core/rend/gles/gldraw.cpp | 3 +- core/rend/gles/gles.cpp | 4 +- core/rend/gles/gles.h | 10 ++- core/rend/gles/naomi2.cpp | 25 ++++-- core/rend/gles/naomi2.h | 15 +++- core/serialize.h | 3 +- 21 files changed, 241 insertions(+), 249 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index b6a1f6340..43c795447 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -96,7 +96,7 @@ static void loadSpecialSettings() // JSR (EU) || prod_id == "MK-5105850") { - INFO_LOG(BOOT, "Enabling render to texture buffer for game %s", prod_id.c_str()); + INFO_LOG(BOOT, "Enabling RTT Copy to VRAM for game %s", prod_id.c_str()); config::RenderToTextureBuffer.override(true); } if (prod_id == "HDR-0176" || prod_id == "RDC-0057") @@ -230,6 +230,11 @@ static void loadSpecialSettings() INFO_LOG(BOOT, "Enabling translucent depth multipass for game %s", naomi_game_id); config::TranslucentPolygonDepthMask.override(true); } + if (!strcmp(naomi_game_id, "BEACH SPIKERS JAPAN")) + { + INFO_LOG(BOOT, "Enabling RTT Copy to VRAM for game %s", naomi_game_id); + config::RenderToTextureBuffer.override(true); + } // Input configuration settings.input.JammaSetup = JVS::Default; if (!strcmp("DYNAMIC GOLF", naomi_game_id) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index e8688231b..79c9bbe2c 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -195,8 +195,22 @@ void rend_start_render(TA_context *ctx) pend_rend = false; if (ctx == nullptr) { - u32 ta_ol_base = getTAContextAddress(); - ctx = tactx_Pop(ta_ol_base); + u32 addresses[MAX_PASSES]; + int count = getTAContextAddresses(addresses); + if (count > 0) + { + ctx = tactx_Pop(addresses[0]); + if (ctx != nullptr) + { + TA_context *linkedCtx = ctx; + for (int i = 1; i < count; i++) + { + linkedCtx->nextContext = tactx_Pop(addresses[i]); + if (linkedCtx->nextContext != nullptr) + linkedCtx = linkedCtx->nextContext; + } + } + } } // No end of render interrupt when rendering the framebuffer @@ -258,7 +272,6 @@ void rend_vblank() ctx.Alloc(); ctx.rend.isRenderFramebuffer = true; rend_start_render(&ctx); - ctx.Free(); fb_dirty = false; } render_called = false; diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index 0ec8af409..ab8245c6c 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -115,7 +115,7 @@ T DYNACALL read_elanreg(u32 paddr) return (T)0xe1ad0000; case 4: // revision return 0x1; // 1 or x10 - // 10 breaks vstriker? + // TODO 10 breaks vstriker, vf4 case 0xc: // command queue size // loops until < 2 (v1) or 3 (v10) @@ -412,17 +412,18 @@ struct State if (plight->pcw.parallelLight) { ParallelLight *light = (ParallelLight *)plight; - DEBUG_LOG(PVR, " Parallel light %d: col %d %d %d dir %d %d %d", light->lightId, light->red, light->green, light->blue, + DEBUG_LOG(PVR, " Parallel light %d: [%x] col %d %d %d dir %d %d %d", light->lightId, plight->pcw.full, + light->red, light->green, light->blue, light->dirX, light->dirY, light->dirZ); } else { - DEBUG_LOG(PVR, " Point light %d: dattenmode %d col %d %d %d dir %d %d %d pos %f %f %f routing %d dist %f %f angle %f %f", - plight->lightId, plight->dattenmode, + DEBUG_LOG(PVR, " Point light %d: [%x] routing %d dmode %d smode %d col %d %d %d dir %d %d %d pos %f %f %f dist %f %f angle %f %f", + plight->lightId, plight->pcw.full, plight->routing, plight->dmode, plight->smode, plight->red, plight->green, plight->blue, plight->dirX, plight->dirY, plight->dirZ, plight->posX, plight->posY, plight->posZ, - plight->routing, plight->attnMinDistance(), plight->attnMaxDistance(), + plight->attnMinDistance(), plight->attnMaxDistance(), plight->attnMinAngle(), plight->attnMaxAngle()); } elan::curLights[lightId] = plight; @@ -565,6 +566,21 @@ void setNormal(Vertex& vd, const T& vs) vd.nz = normal.z; } +static void addModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1) +{ + if (curGmp != nullptr) + { + if (curGmp->paramSelect.d0) + baseCol0 += unpackColor(curGmp->diffuse0); + if (curGmp->paramSelect.s0) + offsetCol0 += unpackColor(curGmp->specular0); + if (curGmp->paramSelect.d1) + baseCol1 += unpackColor(curGmp->diffuse1); + if (curGmp->paramSelect.s1) + offsetCol1 += unpackColor(curGmp->specular1); + } +} + template static void convertVertex(const T& vs, Vertex& vd); @@ -574,35 +590,16 @@ void convertVertex(const N2_VERTEX& vs, Vertex& vd) setCoords(vd, vs.x, vs.y, vs.z); setNormal(vd, vs); SetEnvMapUV(vd); - glm::vec4 baseCol0; - glm::vec4 offsetCol0; - glm::vec4 baseCol1; - glm::vec4 offsetCol1; - if (curGmp != nullptr) - { - baseCol0 = unpackColor(curGmp->diffuse0); - offsetCol0 = unpackColor(curGmp->specular0); - baseCol1 = unpackColor(curGmp->diffuse1); - offsetCol1 = unpackColor(curGmp->specular1); - if (state.listType == 2) - { - // FIXME - baseCol0.a = 0; - offsetCol0.a = 1; - baseCol1.a = 0; - offsetCol1.a = 1; - } - } - else - { - baseCol0 = glm::vec4(0); - offsetCol0 = glm::vec4(0); - baseCol1 = glm::vec4(0); - offsetCol1 = glm::vec4(0); - } - // non-textured vertices have no offset color - *(u32 *)vd.col = packColor(baseCol0 + offsetCol0); - *(u32 *)vd.col1 = packColor(baseCol1 + offsetCol1); + glm::vec4 baseCol0(0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1(0); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.spc = packColor(offsetCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + *(u32 *)vd.spc1 = packColor(offsetCol1); } template<> @@ -612,20 +609,14 @@ void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd) setNormal(vd, vs); SetEnvMapUV(vd); glm::vec4 baseCol0 = unpackColor(vs.rgb.argb0); - glm::vec4 offsetCol0 = baseCol0; + glm::vec4 offsetCol0(0); glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1); - glm::vec4 offsetCol1 = baseCol1; - if (curGmp != nullptr) - { - // Not sure about offset but vf4 needs base addition - baseCol0 += unpackColor(curGmp->diffuse0); - offsetCol0 += unpackColor(curGmp->specular0); - baseCol1 += unpackColor(curGmp->diffuse1); - offsetCol1 += unpackColor(curGmp->specular1); - } - // non-textured vertices have no offset color - *(u32 *)vd.col = packColor(baseCol0 + offsetCol0); - *(u32 *)vd.col1 = packColor(baseCol1 + offsetCol1); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.spc = packColor(offsetCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + *(u32 *)vd.spc1 = packColor(offsetCol1); } template<> @@ -634,24 +625,11 @@ void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd) setCoords(vd, vs.x, vs.y, vs.z); setNormal(vd, vs); setUV(vs, vd); - glm::vec4 baseCol0; - glm::vec4 offsetCol0; - glm::vec4 baseCol1; - glm::vec4 offsetCol1; - if (curGmp != nullptr) - { - baseCol0 = unpackColor(curGmp->diffuse0); - offsetCol0 = unpackColor(curGmp->specular0); - baseCol1 = unpackColor(curGmp->diffuse1); - offsetCol1 = unpackColor(curGmp->specular1); - } - else - { - baseCol0 = glm::vec4(0); - offsetCol0 = glm::vec4(0); - baseCol1 = glm::vec4(0); - offsetCol1 = glm::vec4(0); - } + glm::vec4 baseCol0(0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1(0); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.spc = packColor(offsetCol0); *(u32 *)vd.col1 = packColor(baseCol1); @@ -665,17 +643,10 @@ void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd) setNormal(vd, vs); setUV(vs, vd); glm::vec4 baseCol0 = unpackColor(vs.rgb.argb0); - glm::vec4 offsetCol0 = baseCol0; + glm::vec4 offsetCol0(0); glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1); - glm::vec4 offsetCol1 = baseCol1; - if (curGmp != nullptr) - { - // Not sure about offset but vf4 needs base addition - baseCol0 += unpackColor(curGmp->diffuse0); - offsetCol0 += unpackColor(curGmp->specular0); - baseCol1 += unpackColor(curGmp->diffuse1); - offsetCol1 += unpackColor(curGmp->specular1); - } + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.spc = packColor(offsetCol0); *(u32 *)vd.col1 = packColor(baseCol1); @@ -924,14 +895,14 @@ static void sendLights() light.diffuse = diffuse; light.specular = specular; light.parallel = curLights[i]->pcw.parallelLight; - if (light.parallel != 0) + if (light.parallel) { ParallelLight *plight = (ParallelLight *)curLights[i]; memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); light.routing = plight->routing; light.dmode = plight->dmode; light.smode = N2_LMETHOD_SINGLE_SIDED; - memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4(-(int8_t)plight->dirX, (int8_t)plight->dirY, -(int8_t)plight->dirZ, 0))), + memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4(-(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ, 0))), sizeof(light.direction)); } else @@ -941,14 +912,27 @@ static void sendLights() light.routing = plight->routing; light.dmode = plight->dmode; light.smode = plight->smode; - memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position)); - memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4((int8_t)plight->dirX, (int8_t)plight->dirY, (int8_t)plight->dirZ, 0))), - sizeof(light.direction)); - light.distAttnMode = plight->dattenmode; - light.attnDistA = plight->distA(); - light.attnDistB = plight->distB(); - light.attnAngleA = plight->angleA(); - light.attnAngleB = plight->angleB(); + if (plight->posX == 0 && plight->posY == 0 && plight->posZ == 0 + && plight->_distA == 0 && plight->_distB == 0 + && plight->_angleA == 0 && plight->_angleB == 0) + { + // Lights not using distance or angle attenuation are converted into parallel lights on the CPU side? + DEBUG_LOG(PVR, "Point -> parallel light[%d] dir %d %d %d", i, -(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ); + light.parallel = true; + memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4(-(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ, 0))), + sizeof(light.direction)); + } + else + { + memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4((int8_t)plight->dirX, (int8_t)plight->dirY, (int8_t)plight->dirZ, 0))), + sizeof(light.direction)); + memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position)); + light.distAttnMode = plight->dattenmode; + light.attnDistA = plight->distA(); + light.attnDistB = plight->distB(); + light.attnAngleA = plight->angleA(); + light.attnAngleB = plight->angleB(); + } } usingAlphaLight = usingAlphaLight || light.routing == N2_LFUNC_ALPHADIFF_SUB; model.lightCount++; @@ -969,6 +953,9 @@ static void setStateParams(PolyParam& pp) { pp.glossCoef0 = curGmp->gloss.getCoef0(); pp.glossCoef1 = curGmp->gloss.getCoef1(); + pp.constantColor = curGmp->paramSelect.b0; + pp.diffuseColor = curGmp->paramSelect.d0; + pp.specularColor = curGmp->paramSelect.s0; } // FIXME hack ScrInstr condition fixes lens flares in vf4 if (state.listType == 2 && usingAlphaLight && pp.tsp.SrcInstr == 1) @@ -1348,11 +1335,10 @@ static void executeCommand(u8 *data, int size) } else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked? { - // FIXME this matches TA polys such as a2000009 - // no possible disambiguation since 80000000 is a valid OP poly pcw (poly type 0 / vtx 0) DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw); + state.listType = (pcw >> 24) & 0xf; size -= 32; - ta_add_ta_data((u32 *)(data + 32), size - 32); + ta_add_ta_data(state.listType, (u32 *)(data + 32), size - 32); size = 32; } else if (pcw == 0x20000000) diff --git a/core/hw/pvr/elan_struct.h b/core/hw/pvr/elan_struct.h index 98fe558f6..72a48db9a 100644 --- a/core/hw/pvr/elan_struct.h +++ b/core/hw/pvr/elan_struct.h @@ -161,8 +161,8 @@ struct GMP : public ElanBase struct { u32 d0:1; // diffuse u32 s0:1; // specular - u32 a0:1; // ambient? alpha? - u32 f0:1; // fog + u32 a0:1; // alpha? + u32 f0:1; // fog? u32 d1:1; u32 s1:1; @@ -170,7 +170,7 @@ struct GMP : public ElanBase u32 f1:1; u32 vol1UsesVol0UV:1; - u32 b0:1; // bump mapping?? TODO check + u32 b0:1; // constant color u32 b1:1; u32 e0:1; // environmental mapping u32 e1:1; @@ -191,6 +191,7 @@ struct GMP : public ElanBase // 11110 1111 1111 (everything! except v1uv0, rt66, vtx type2 (vtx only)) // 00110 0000 0000 (b0 and b1, vf4) // 00000 1010 1010 specular and fog? soul surfer + // 00010 0010 0010 b0, s0 s1 (initd, headlights) u32 diffuse0; u32 specular0; diff --git a/core/hw/pvr/pvr_regs.cpp b/core/hw/pvr/pvr_regs.cpp index 7a0b570b9..261b5babd 100644 --- a/core/hw/pvr/pvr_regs.cpp +++ b/core/hw/pvr/pvr_regs.cpp @@ -150,7 +150,7 @@ void pvr_WriteReg(u32 paddr,u32 data) case TA_LIST_CONT_addr: //a write of anything works ? - ta_vtx_ListCont(); + ta_vtx_ListInit(); break; case SPG_CONTROL_addr: diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index 69d097aa8..176dd94d2 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -282,23 +282,13 @@ static void markObjectListBlocks() } } -void ta_vtx_ListCont() -{ - SetCurrentTARC(TA_OL_BASE); - ta_tad.Continue(); - ta_ctx->rend.newRenderPass(); - markObjectListBlocks(); - - ta_cur_state=TAS_NS; - ta_fsm_cl = 7; -} void ta_vtx_ListInit() { SetCurrentTARC(TA_OL_BASE); ta_tad.ClearPartial(); markObjectListBlocks(); - ta_cur_state=TAS_NS; + ta_cur_state = TAS_NS; ta_fsm_cl = 7; } void ta_vtx_SoftReset() diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index bfa2d2353..2f2b540a0 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -5,7 +5,6 @@ struct TA_context; -void ta_vtx_ListCont(); void ta_vtx_ListInit(); void ta_vtx_SoftReset(); diff --git a/core/hw/pvr/ta_ctx.cpp b/core/hw/pvr/ta_ctx.cpp index 6d756aa20..621ed844e 100644 --- a/core/hw/pvr/ta_ctx.cpp +++ b/core/hw/pvr/ta_ctx.cpp @@ -11,7 +11,7 @@ static int RenderCount; TA_context* ta_ctx; tad_context ta_tad; -static void tactx_Recycle(TA_context* poped_ctx); +static void tactx_Recycle(TA_context* ctx); static TA_context *tactx_Find(u32 addr, bool allocnew = false); void SetCurrentTARC(u32 addr) @@ -101,69 +101,40 @@ void FinishRender(TA_context* ctx) frame_finished.Set(); } -static std::mutex mtx_pool; - -static std::vector ctx_pool; static std::vector ctx_list; -TA_context* tactx_Alloc() +static TA_context *tactx_Alloc() { - TA_context* rv = 0; + TA_context *ctx = new TA_context(); + ctx->Alloc(); - mtx_pool.lock(); - if (!ctx_pool.empty()) - { - rv = ctx_pool[ctx_pool.size()-1]; - ctx_pool.pop_back(); - } - mtx_pool.unlock(); - - if (!rv) - { - rv = new TA_context(); - rv->Alloc(); - } - - return rv; + return ctx; } -static void tactx_Recycle(TA_context* poped_ctx) +static void tactx_Recycle(TA_context* ctx) { - if (poped_ctx->rend.isRenderFramebuffer) + if (ctx->rend.isRenderFramebuffer) return; - mtx_pool.lock(); - { - if (ctx_pool.size()>2) - { - poped_ctx->Free(); - delete poped_ctx; - } - else - { - poped_ctx->Reset(); - ctx_pool.push_back(poped_ctx); - } - } - mtx_pool.unlock(); + if (ctx->nextContext != nullptr) + tactx_Recycle(ctx->nextContext); + delete ctx; } -TA_context* tactx_Find(u32 addr, bool allocnew) +static TA_context *tactx_Find(u32 addr, bool allocnew) { - for (size_t i=0; iAddress==addr) - return ctx_list[i]; - } + for (TA_context *ctx : ctx_list) + if (ctx->Address == addr) + return ctx; if (allocnew) { - TA_context* rv = tactx_Alloc(); - rv->Address=addr; - ctx_list.push_back(rv); + TA_context *ctx = tactx_Alloc(); + ctx->Address = addr; + ctx_list.push_back(ctx); - return rv; + return ctx; } - return 0; + return nullptr; } TA_context* tactx_Pop(u32 addr) @@ -190,22 +161,9 @@ void tactx_Term() if (ta_ctx != nullptr) SetCurrentTARC(TACTX_NONE); - for (size_t i = 0; i < ctx_list.size(); i++) - { - ctx_list[i]->Free(); - delete ctx_list[i]; - } + for (TA_context *ctx : ctx_list) + delete ctx; ctx_list.clear(); - mtx_pool.lock(); - { - for (size_t i = 0; i < ctx_pool.size(); i++) - { - ctx_pool[i]->Free(); - delete ctx_pool[i]; - } - } - ctx_pool.clear(); - mtx_pool.unlock(); } const u32 NULL_CONTEXT = ~0u; @@ -214,8 +172,8 @@ static void serializeContext(Serializer& ser, const TA_context *ctx) { if (ser.dryrun()) { - // Maximum size: address, size, data, render pass count, render passes - ser.skip(4 + 4 + TA_DATA_SIZE + 4 + ARRAY_SIZE(tad_context::render_passes) * 4); + // Maximum size: address, size, data + ser.skip(4 + 4 + TA_DATA_SIZE); return; } if (ctx == nullptr) @@ -228,12 +186,6 @@ static void serializeContext(Serializer& ser, const TA_context *ctx) const u32 taSize = tad.thd_data - tad.thd_root; ser << taSize; ser.serialize(tad.thd_root, taSize); - ser << tad.render_pass_count; - for (u32 i = 0; i < tad.render_pass_count; i++) - { - u32 offset = (u32)(tad.render_passes[i] - tad.thd_root); - ser << offset; - } } static void deserializeContext(Deserializer& deser, TA_context **pctx) @@ -251,19 +203,12 @@ static void deserializeContext(Deserializer& deser, TA_context **pctx) tad_context& tad = (*pctx)->tad; deser.deserialize(tad.thd_root, size); tad.thd_data = tad.thd_root + size; - if (deser.version() >= Deserializer::V12 || (deser.version() >= Deserializer::V12_LIBRETRO && deser.version() < Deserializer::V5)) + if ((deser.version() >= Deserializer::V12 && deser.version() < Deserializer::V26) + || (deser.version() >= Deserializer::V12_LIBRETRO && deser.version() < Deserializer::V5)) { - deser >> tad.render_pass_count; - for (u32 i = 0; i < tad.render_pass_count; i++) - { - u32 offset; - deser >> offset; - tad.render_passes[i] = tad.thd_root + offset; - } - } - else - { - tad.render_pass_count = 0; + u32 render_pass_count; + deser >> render_pass_count; + deser.skip(sizeof(u32) * render_pass_count); } } @@ -273,7 +218,7 @@ void SerializeTAContext(Serializer& ser) int curCtx = -1; for (const auto& ctx : ctx_list) { - if (ctx == ta_ctx) + if (ctx == ::ta_ctx) curCtx = (int)(&ctx - &ctx_list[0]); serializeContext(ser, ctx); } diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 799bb2b93..49fadc55f 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -56,6 +56,9 @@ struct PolyParam float glossCoef1; N2LightModel *lightModel; bool envMapping; + bool constantColor; + bool diffuseColor; + bool specularColor; bool equivalent(const PolyParam& other) const { @@ -73,7 +76,10 @@ struct PolyParam && glossCoef0 == other.glossCoef0 && glossCoef1 == other.glossCoef1 && lightModel == other.lightModel - && envMapping == other.envMapping; + && envMapping == other.envMapping + && constantColor == other.constantColor + && diffuseColor == other.diffuseColor + && specularColor == other.specularColor; } bool isNaomi2() const { return projMatrix != nullptr; } }; @@ -95,18 +101,17 @@ struct ModTriangle f32 x0,y0,z0,x1,y1,z1,x2,y2,z2; }; +constexpr size_t MAX_PASSES = 10; + struct tad_context { u8* thd_data; u8* thd_root; u8* thd_old_data; - u8 *render_passes[10]; - u32 render_pass_count; void Clear() { thd_old_data = thd_data = thd_root; - render_pass_count = 0; } void ClearPartial() @@ -115,13 +120,6 @@ struct tad_context thd_data = thd_root; } - void Continue() - { - render_passes[render_pass_count] = End(); - if (render_pass_count < sizeof(render_passes) / sizeof(u8*) - 1) - render_pass_count++; - } - u8* End() { return thd_data == thd_root ? thd_old_data : thd_data; @@ -244,15 +242,13 @@ struct rend_context struct TA_context { u32 Address; - u32 LastUsed; - std::mutex thd_inuse; std::mutex rend_inuse; tad_context tad; rend_context rend; - + TA_context *nextContext = nullptr; /* Dreamcast games use up to 20k vtx, 30k idx, 1k (in total) parameters. at 30 fps, thats 600kvtx (900 stripped) @@ -269,12 +265,10 @@ struct TA_context sa2: idx: 36094, vtx: 24520, op: 1330, pt: 10, tr: 177, mvo: 39, modt: 360, ov: 0 */ - void MarkRend(u32 render_pass) + void MarkRend() { - verify(render_pass <= tad.render_pass_count); - - rend.proc_start = render_pass == 0 ? tad.thd_root : tad.render_passes[render_pass - 1]; - rend.proc_end = render_pass == tad.render_pass_count ? tad.End() : tad.render_passes[render_pass]; + rend.proc_start = tad.thd_root; + rend.proc_end = tad.End(); } void Alloc() @@ -291,7 +285,7 @@ struct TA_context rend.modtrig.Init(16384, &rend.Overrun, "modtrig"); - rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes + rend.render_passes.Init(sizeof(RenderPass) * MAX_PASSES, &rend.Overrun, "render_passes"); // 10 render passes rend.matrices.Init(2000, &rend.Overrun, "matrices"); rend.lightModels.Init(100, &rend.Overrun, "lightModels"); @@ -308,7 +302,7 @@ struct TA_context rend_inuse.unlock(); } - void Free() + ~TA_context() { verify(tad.End() - tad.thd_root <= TA_DATA_SIZE); freeAligned(tad.thd_root); @@ -357,5 +351,5 @@ void ta_add_vertex(const Vertex& vtx); void ta_add_triangle(const ModTriangle& tri); float* ta_add_matrix(const float *matrix); N2LightModel *ta_add_light(const N2LightModel& light); -void ta_add_ta_data(u32 *data, u32 size); -u32 getTAContextAddress(); +void ta_add_ta_data(int listType, u32 *data, u32 size); +int getTAContextAddresses(u32 *addresses); diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 164dfdfff..3ddde7461 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -70,6 +70,7 @@ const u32 ListType_None = -1; const u32 SZ32 = 1; const u32 SZ64 = 2; static bool fetchTextures = true; +static u32 forcedListType = ListType_None; #include "ta_structs.h" @@ -98,7 +99,7 @@ class FifoSplitter verify(CurrentList==ListType_None); //verify(ListIsFinished[new_list]==false); //printf("Starting list %d\n",new_list); - CurrentList=new_list; + CurrentList = forcedListType != ListType_None ? forcedListType : new_list; StartList(CurrentList); } @@ -1567,11 +1568,12 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) bgpp->envMapping = false; } - for (u32 pass = 0; pass <= ctx->tad.render_pass_count; pass++) + TA_context *childCtx = ctx; + while (childCtx != nullptr) { - ctx->MarkRend(pass); - vd_rc.proc_start = ctx->rend.proc_start; - vd_rc.proc_end = ctx->rend.proc_end; + childCtx->MarkRend(); + vd_rc.proc_start = childCtx->rend.proc_start; + vd_rc.proc_end = childCtx->rend.proc_end; Ta_Dma* ta_data = (Ta_Dma *)vd_rc.proc_start; Ta_Dma* ta_data_end = (Ta_Dma *)vd_rc.proc_end - 1; @@ -1579,9 +1581,10 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) while (ta_data <= ta_data_end) ta_data = TaCmd(ta_data, ta_data_end); - if (ctx->rend.Overrun) + if (vd_ctx->rend.Overrun) break; + int pass = vd_rc.render_passes.used(); bool empty_pass = vd_rc.global_param_op.used() == (pass == 0 ? 0 : (int)vd_rc.render_passes.LastPtr()->op_count) && vd_rc.global_param_pt.used() == (pass == 0 ? 0 : (int)vd_rc.render_passes.LastPtr()->pt_count) && vd_rc.global_param_tr.used() == (pass == 0 ? 0 : (int)vd_rc.render_passes.LastPtr()->tr_count); @@ -1607,10 +1610,11 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) render_pass->autosort = UsingAutoSort(pass); render_pass->z_clear = ClearZBeforePass(pass); } + childCtx = childCtx->nextContext; } rv = !empty_context; - bool overrun = ctx->rend.Overrun; + bool overrun = vd_ctx->rend.Overrun; if (overrun) WARN_LOG(PVR, "ERROR: TA context overrun"); else if (config::RenderResolution > 480) @@ -1776,10 +1780,11 @@ N2LightModel *ta_add_light(const N2LightModel& light) return ta_ctx->rend.lightModels.LastPtr(); } -void ta_add_ta_data(u32 *data, u32 size) +void ta_add_ta_data(int listType, u32 *data, u32 size) { vd_ctx = ta_ctx; fetchTextures = false; + forcedListType = listType; //TODO if (bgraColors) // TAParserDX.vdec_init(); //else @@ -1789,8 +1794,14 @@ void ta_add_ta_data(u32 *data, u32 size) Ta_Dma *ta_data_end = (Ta_Dma *)(data + size / 4) - 1; while (ta_data <= ta_data_end) ta_data = TaCmd(ta_data, ta_data_end); + Ta_Dma eol{}; + eol.pcw.ParaType = ParamType_End_Of_List; + eol.pcw.ListType = listType; + TaCmd(&eol, &eol); + vd_ctx = nullptr; fetchTextures = true; + forcedListType = ListType_None; } //decode a vertex in the native pvr format @@ -2065,7 +2076,7 @@ static bool ClearZBeforePass(int pass_number) return !tile.NoZClear; } -u32 getTAContextAddress() +int getTAContextAddresses(u32 *addresses) { u32 addr = REGION_BASE; const bool type1_tile = ((FPU_PARAM_CFG >> 21) & 1) == 0; @@ -2085,11 +2096,19 @@ u32 getTAContextAddress() if (type1_tile && tile.PreSort) // Windows CE weirdness tile_size = 6 * 4; + u32 x = tile.X; + u32 y = tile.Y; + u32 count = 0; + do { + tile.full = pvr_read32p(addr); + if (tile.X != x || tile.Y != y) + break; + u32 opbAddr = pvr_read32p(addr + 4); + addresses[count++] = pvr_read32p(opbAddr); + addr += tile_size; + } while (!tile.LastRegion && count < MAX_PASSES); - u32 opbAddr = pvr_read32p(addr + 4); - u32 ta_ol_base = pvr_read32p(opbAddr); - - return ta_ol_base; + return count; } void rend_context::newRenderPass() diff --git a/core/rend/gl4/gl4.h b/core/rend/gl4/gl4.h index bf8402d1d..54dc0e25e 100755 --- a/core/rend/gl4/gl4.h +++ b/core/rend/gl4/gl4.h @@ -45,18 +45,23 @@ struct gl4PipelineShader GLint fog_clamp_min, fog_clamp_max; GLint ndcMat; GLint palette_index; + // Naomi2 GLint mvMat; GLint normalMat; GLint projMat; GLint glossCoef0; + GLint envMapping; + GLint bumpMapping; + GLint constantColor; + GLint modelDiffuse; + GLint modelSpecular; + GLint lightCount; GLint ambientBase; GLint ambientOffset; GLint ambientMaterial; GLint useBaseOver; - GLint envMapping; - GLint bumpMapping; struct { GLint color; GLint direction; @@ -73,6 +78,7 @@ struct gl4PipelineShader GLint attnAngleA; GLint attnAngleB; } lights[elan::MAX_LIGHTS]; + float *lastMvMat; float *lastNormalMat; float *lastProjMat; diff --git a/core/rend/gl4/gl4naomi2.cpp b/core/rend/gl4/gl4naomi2.cpp index ef248bb40..bc116f172 100644 --- a/core/rend/gl4/gl4naomi2.cpp +++ b/core/rend/gl4/gl4naomi2.cpp @@ -31,11 +31,12 @@ static const char *gouraudSource = R"( #define NOPERSPECTIVE noperspective )"; -N2Vertex4Source::N2Vertex4Source(bool gouraud, bool geometryOnly) : OpenGl4Source() +N2Vertex4Source::N2Vertex4Source(bool gouraud, bool geometryOnly, bool texture) : OpenGl4Source() { addConstant("pp_Gouraud", gouraud); addConstant("GEOM_ONLY", geometryOnly); addConstant("TWO_VOLUMES", 1); + addConstant("pp_Texture", (int)texture); addSource(gouraudSource); if (!geometryOnly) diff --git a/core/rend/gl4/gl4naomi2.h b/core/rend/gl4/gl4naomi2.h index 5775a4c9d..f376e10d7 100644 --- a/core/rend/gl4/gl4naomi2.h +++ b/core/rend/gl4/gl4naomi2.h @@ -23,7 +23,7 @@ class N2Vertex4Source : public OpenGl4Source { public: - N2Vertex4Source(bool gouraud, bool geometryOnly = false); + N2Vertex4Source(bool gouraud, bool geometryOnly, bool texture); }; class N2Geometry4Shader : public OpenGl4Source diff --git a/core/rend/gl4/gldraw.cpp b/core/rend/gl4/gldraw.cpp index 918675199..a72db1d78 100644 --- a/core/rend/gl4/gldraw.cpp +++ b/core/rend/gl4/gldraw.cpp @@ -381,7 +381,8 @@ static void DrawModVols(int first, int count) if (param.isNaomi2()) { glcache.UseProgram(gl4.n2ModVolShader.program); - glUniformMatrix4fv(gl4.n2ModVolShader.mvMat, 1, GL_FALSE, param.mvMatrix); + if (param.mvMatrix != nullptr) + glUniformMatrix4fv(gl4.n2ModVolShader.mvMat, 1, GL_FALSE, param.mvMatrix); glUniformMatrix4fv(gl4.n2ModVolShader.projMat, 1, GL_FALSE, param.projMatrix); } else diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index 02d5cd5fe..a661b7997 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -469,7 +469,7 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source std::string geometrySource; if (s->naomi2) { - vertexSource = N2Vertex4Source(s->pp_Gouraud).generate(); + vertexSource = N2Vertex4Source(s->pp_Gouraud, false, s->pp_Texture).generate(); geometrySource = N2Geometry4Shader(s->pp_Gouraud).generate(); } else @@ -593,7 +593,7 @@ static void create_modvol_shader() gl4.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str()); gl4.modvol_shader.ndcMat = glGetUniformLocation(gl4.modvol_shader.program, "ndcMat"); - N2Vertex4Source n2VertexShader(false, true); + N2Vertex4Source n2VertexShader(false, true, false); N2Geometry4Shader geometryShader(false, true); gl4.n2ModVolShader.program = gl_CompileAndLink(n2VertexShader.generate().c_str(), fragmentShader.generate().c_str(), geometryShader.generate().c_str()); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 3f01a90f1..d57d54d6e 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -576,7 +576,8 @@ void DrawModVols(int first, int count) if (param.mvMatrix != curMVMat) { curMVMat = param.mvMatrix; - glUniformMatrix4fv(gl.n2ModVolShader.mvMat, 1, GL_FALSE, curMVMat); + if (curMVMat != nullptr) + glUniformMatrix4fv(gl.n2ModVolShader.mvMat, 1, GL_FALSE, curMVMat); } if (param.projMatrix != curProjMat) { diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 19cfc0a7e..622c85895 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -728,7 +728,7 @@ bool CompilePipelineShader(PipelineShader* s) { std::string vertexShader; if (s->naomi2) - vertexShader = N2VertexSource(s->pp_Gouraud).generate(); + vertexShader = N2VertexSource(s->pp_Gouraud, false, s->pp_Texture).generate(); else vertexShader = VertexSource(s->pp_Gouraud).generate(); FragmentShaderSource fragmentSource(s); @@ -896,7 +896,7 @@ static void create_modvol_shader() gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor"); gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); - N2VertexSource n2vertexShader(false, true); + N2VertexSource n2vertexShader(false, true, false); N2GeometryShader geometryShader(false, true); gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str(), geometryShader.generate().c_str()); gl.n2ModVolShader.ndcMat = glGetUniformLocation(gl.n2ModVolShader.program, "ndcMat"); diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 901e3bc1b..f4fccdd8a 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -52,18 +52,23 @@ struct PipelineShader GLint fog_clamp_min, fog_clamp_max; GLint ndcMat; GLint palette_index; + // Naomi2 GLint mvMat; GLint normalMat; GLint projMat; GLint glossCoef0; + GLint envMapping; + GLint bumpMapping; + GLint constantColor; + GLint modelDiffuse; + GLint modelSpecular; + GLint lightCount; GLint ambientBase; GLint ambientOffset; GLint ambientMaterial; GLint useBaseOver; - GLint envMapping; - GLint bumpMapping; struct { GLint color; GLint direction; @@ -80,6 +85,7 @@ struct PipelineShader GLint attnAngleA; GLint attnAngleB; } lights[elan::MAX_LIGHTS]; + float *lastMvMat; float *lastNormalMat; float *lastProjMat; diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp index 7c2461836..bdd4718cb 100644 --- a/core/rend/gles/naomi2.cpp +++ b/core/rend/gles/naomi2.cpp @@ -66,7 +66,12 @@ void main() #endif vec4 vnorm = normalize(normalMat * vec4(in_normal, 0.0)); if (bumpMapping == 0) + { computeColors(vs_base, vs_offs, vpos.xyz, vnorm.xyz); +#if pp_Texture == 0 + vs_base += vs_offs; +#endif + } vs_uv.xy = in_uv; if (envMapping == 1) computeEnvMap(vs_uv.xy, vpos.xyz, vnorm.xyz); @@ -136,9 +141,14 @@ uniform int useBaseOver; // model attributes uniform float glossCoef0; uniform float glossCoef1; +uniform int constantColor; +uniform int modelDiffuse; +uniform int modelSpecular; void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in vec3 position, in vec3 normal) { + if (constantColor == 1) + return; vec3 diffuse = vec3(0.0); vec3 specular = vec3(0.0); float diffuseAlpha = 0.0; @@ -223,12 +233,16 @@ void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in vec3 position, i diffuse += ambientBase.rgb; specular += ambientOffset.rgb; } - baseCol.rgb *= diffuse; - offsetCol.rgb *= specular; + if (modelDiffuse == 1) + baseCol.rgb *= diffuse; + if (modelSpecular == 1) + offsetCol.rgb *= specular; if (ambientMaterial == 0) { - baseCol.rgb += ambientBase.rgb; - offsetCol.rgb += ambientOffset.rgb; + if (modelDiffuse == 1) + baseCol.rgb += ambientBase.rgb; + if (modelSpecular == 1) + offsetCol.rgb += ambientOffset.rgb; } baseCol.a = max(0.0, baseCol.a + diffuseAlpha); offsetCol.a = max(0.0, offsetCol.a + specularAlpha); @@ -503,11 +517,12 @@ void main() )"; -N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly) : OpenGlSource() +N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly, bool texture) : OpenGlSource() { addConstant("pp_Gouraud", gouraud); addConstant("GEOM_ONLY", geometryOnly); addConstant("TWO_VOLUMES", 0); + addConstant("pp_Texture", (int)texture); addSource(VertexCompatShader); addSource(GouraudSource); diff --git a/core/rend/gles/naomi2.h b/core/rend/gles/naomi2.h index 9ae0eb97d..20931fb9e 100644 --- a/core/rend/gles/naomi2.h +++ b/core/rend/gles/naomi2.h @@ -27,7 +27,7 @@ class N2VertexSource : public OpenGlSource { public: - N2VertexSource(bool gouraud, bool geometryOnly = false); + N2VertexSource(bool gouraud, bool geometryOnly, bool texture); }; class N2GeometryShader : public OpenGlSource @@ -53,6 +53,10 @@ void initN2Uniforms(ShaderType *shader) shader->glossCoef0 = glGetUniformLocation(shader->program, "glossCoef0"); shader->envMapping = glGetUniformLocation(shader->program, "envMapping"); shader->bumpMapping = glGetUniformLocation(shader->program, "bumpMapping"); + shader->constantColor = glGetUniformLocation(shader->program, "constantColor"); + shader->modelDiffuse = glGetUniformLocation(shader->program, "modelDiffuse"); + shader->modelSpecular = glGetUniformLocation(shader->program, "modelSpecular"); + // Lights shader->lightCount = glGetUniformLocation(shader->program, "lightCount"); shader->ambientBase = glGetUniformLocation(shader->program, "ambientBase"); @@ -100,12 +104,14 @@ void setN2Uniforms(const PolyParam *pp, ShaderType *shader) if (pp->mvMatrix != shader->lastMvMat) { shader->lastMvMat = pp->mvMatrix; - glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, pp->mvMatrix); + if (pp->mvMatrix != nullptr) + glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, pp->mvMatrix); } if (pp->normalMatrix != shader->lastNormalMat) { shader->lastNormalMat = pp->normalMatrix; - glUniformMatrix4fv(shader->normalMat, 1, GL_FALSE, pp->normalMatrix); + if (pp->normalMatrix != nullptr) + glUniformMatrix4fv(shader->normalMat, 1, GL_FALSE, pp->normalMatrix); } if (pp->projMatrix != shader->lastProjMat) { @@ -113,6 +119,9 @@ void setN2Uniforms(const PolyParam *pp, ShaderType *shader) glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, pp->projMatrix); } glUniform1f(shader->glossCoef0, pp->glossCoef0); + glUniform1i(shader->constantColor, (int)pp->constantColor); + glUniform1i(shader->modelDiffuse, (int)pp->diffuseColor); + glUniform1i(shader->modelSpecular, (int)pp->specularColor); N2LightModel *const lightModel = pp->lightModel; if (lightModel != shader->lastLightModel) diff --git a/core/serialize.h b/core/serialize.h index a453f4b9a..b7d7c91a0 100644 --- a/core/serialize.h +++ b/core/serialize.h @@ -61,7 +61,8 @@ public: V23 = 818, V24 = 819, V25 = 820, - Current = V25, + V26 = 821, + Current = V26, Next = Current + 1, };