diff --git a/CMakeLists.txt b/CMakeLists.txt
index 39038bbb7..d1d5d6a2c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1079,6 +1079,8 @@ if(WIN32)
core/rend/dx11/dx11context_lr.cpp
core/rend/dx11/dx11context_lr.h
core/rend/dx11/dx11_driver.h
+ core/rend/dx11/dx11_naomi2.cpp
+ core/rend/dx11/dx11_naomi2.h
core/rend/dx11/oit/dx11_oitbuffers.h
core/rend/dx11/oit/dx11_oitrenderer.cpp
core/rend/dx11/oit/dx11_oitshaders.cpp
diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp
index d43e8a113..1714a63fa 100644
--- a/core/hw/pvr/elan.cpp
+++ b/core/hw/pvr/elan.cpp
@@ -1,1804 +1,1811 @@
-/*
- Copyright 2022 flyinghead
-
- This file is part of Flycast.
-
- Flycast is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 2 of the License, or
- (at your option) any later version.
-
- Flycast is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with Flycast. If not, see .
- */
-/*
- * VideoLogic custom transformation & lighting (T&L) chip (codenamed: ELAN)
- * 32 MB RAM
- * Clock: 100 MHz
- * 16 light sources per polygon
- * ambient, parallel, point or spot (Fog lights and alpha lights also exist)
- * Perspective conversion
- * Near, far and side clipping, offscreen and backface culling
- * bump mapping, environmental mapping
- * dynamic & static model processing
- * model cache system
- *
- * Each PVR2 chip renders half the screen (rectangular, stripes, and checker board options)
- * so textures have to be duplicated in each vram
- *
- * Area 0:
- * 005f6800 - 005f7cff asic A regs
- * 005f8000 - 005f9fff CLXA regs
- * 025f6800 - 025f7cff asic B regs
- * 025f8000 - 025f9fff CLXB regs
- *
- * Area 1:
- * 05000000 - 06ffffff CLXA vram
- * 07000000 - 08ffffff CLXB vram
- *
- * Area 2:
- * 085f6800 - 085f7cff write both asic regs
- * 085f8000 - 085f9fff write both PVR regs
- * 08800000 - 088000ff? elan regs
- * 09000000 - ? elan command buffer
- * 0A000000 - 0bfffffff elan RAM
- */
-#include "elan.h"
-#include "hw/mem/_vmem.h"
-#include "pvr_mem.h"
-#include "ta.h"
-#include "ta_ctx.h"
-#include "hw/holly/holly_intc.h"
-#include "hw/holly/sb.h"
-#include "hw/pvr/Renderer_if.h"
-#include "hw/sh4/sh4_sched.h"
-#include "hw/sh4/sh4_mem.h"
-#include "emulator.h"
-#include "serialize.h"
-#include "elan_struct.h"
-#include "network/ggpo.h"
-#include
-#include
-#include
-#include
-
-namespace elan {
-
-static _vmem_handler elanRegHandler;
-static _vmem_handler elanCmdHandler;
-static _vmem_handler elanRamHandler;
-
-u8 *RAM;
-
-static u32 reg10;
-static u32 reg74;
-static u32 reg30 = 0x31;
-
-static u32 elanCmd[32 / 4];
-
-template
-T DYNACALL read_elanreg(u32 paddr)
-{
- //verify(sizeof(T) == 4);
- u32 addr = paddr & 0x01ffffff;
- switch (addr >> 16)
- {
- case 0x5F:
- if (addr >= 0x005F6800 && addr <= 0x005F7CFF)
- {
- // 5F6908: Tests for errors 4, 8, 10, 2 and 1 (render isp buf ovf, render hazard, ISP param ovf, ob list ptr ovf, ta ill param)
- // 5f6900: then int 4 and 40 (EoR TSP, EoT YUV)
- return (T)sb_ReadMem(paddr, sizeof(T));
- }
- else if (addr >= 0x005F8000 && addr <= 0x005F9FFF)
- {
- if (sizeof(T) != 4)
- // House of the Dead 2
- return 0;
- return (T)pvr_ReadReg(paddr);
- }
- else
- {
- INFO_LOG(MEMORY, "Read from area2 not implemented [Unassigned], addr=%x", addr);
- return 0;
- }
-
- default:
-// if ((addr & 0xFF) != 0x74)
- DEBUG_LOG(PVR, "ELAN read(%d) %08x [pc %08x]", (u32)sizeof(T), addr, p_sh4rcb->cntx.pc);
- switch (addr & 0xFF)
- {
- case 0: // magic number
- return (T)0xe1ad0000;
- case 4: // revision
- return 0x10; // 1 or x10
- case 0xc:
- // command queue size
- // loops until < 2 (v1) or 3 (v10)
- return 1;
- case 0x10: // sh4 if control?
- // b0 broadcast on cs1
- // b1 elan channel 2
- // b2 enable pvr #2
- // rewritten by bios as reg10 & ~1
- return reg10;
- case 0x14: // SDRAM refresh (never read?)
- return (T)0x2029; //default 0x1429
- case 0x1c: // SDRAM CFG
- return (T)0x87320961;
- case 0x30: // Macro tiler config
- // 0 0 l l l l l l t t t t 0 0 r r r r r r b b b b 0 0 V H 0 0 0 T
- // lllll: left tile
- // tttt: top tile
- // rrrrrr: right tile
- // bbbb: bottom tile
- // V: tile vertically
- // H: tile horizontally
- // T: tiler enabled
- return reg30;
- case 0x74:
- // b0 dma completed
- // b1 cmd completed
- // b2-b3 geometry timeouts
- // b4-b6 errors?
- return reg74;
- case 0x78: // IRQ MASK
- // 6 bits?
- return 0;
- default:
- return (T)0;
- }
- }
-}
-
-template
-void DYNACALL write_elanreg(u32 paddr, T data)
-{
- //verify(sizeof(T) == 4);
- u32 addr = paddr & 0x01ffffff;
- switch (addr >> 16)
- {
- case 0x5F:
- if (addr>= 0x005F6800 && addr <= 0x005F7CFF)
- sb_WriteMem(paddr, data, sizeof(T));
- else if (addr >= 0x005F8000 && addr <= 0x005F9FFF)
- {
- if (addr == 0x5F8040 && data == 0xFF00FF)
- {
- ERROR_LOG(PVR, "ELAN SCREWED pr %x pc %x", p_sh4rcb->cntx.pr, p_sh4rcb->cntx.pc);
- throw FlycastException("Boot aborted");
- }
- else if ((addr & 0x1fff) == SOFTRESET_addr && data == 0)
- reg74 &= 3;
- else if ((addr & 0x1fff) == STARTRENDER_addr)
- reg74 &= 3;
-
- //if ((paddr & 0x1c000000) == 0x08000000 && (addr & 0x1fff) == SOFTRESET_addr && data == 0)
- // reg74 |= 2;
- pvr_WriteReg(paddr, data);
- }
- else
- INFO_LOG(COMMON, "Write to area2 not implemented [Unassigned], addr=%x,data=%x,size=%d", addr, data, (u32)sizeof(T));
- break;
- default:
-// if ((addr & 0xFF) != 0x74)
- DEBUG_LOG(PVR, "ELAN write(%d) %08x = %x", (u32)sizeof(T), addr, data);
- switch (addr & 0xFF)
- {
- case 0x0:
- // 0 multiple times (_kmtlifAbortDisplayListProcessing)
- break;
- // 0x4: _kmtlifAbortDisplayListProcessing: 0
- case 0x8: // write-only. reset ?
- // 1 then 0
- // bios: 5
- // _kmtlifAbortDisplayListProcessing: 5 then 0
- // _kmtlifHandleDMATimeout: 1, 0, 4, 0...
- if (data == 0)
- reg74 = 0;
- break;
- case 0xc:
- // 0
- break;
- case 0x10: // sh4 if control?
- reg10 = data;
- break;;
- case 0x14: // SDRAM refresh
- // x2029
- break;
- case 0x1c: // SDRAM CFG
- break;
- case 0x30:
- reg30 = data;
- break;
- case 0x74: // IRQ STAT
- reg74 &= ~data;
- break;
- // _kmtlifSetupElanInts:
- // 78 = 3f
- // 7C = 0
- // 80 = 17
- // 84 = 2b
- // 88 = 0
- case 0xd0: // _kmtlifSetCullingRegister
- // 6
- break;;
- default:
- break;
- }
- }
-}
-
-template
-T DYNACALL read_elancmd(u32 addr)
-{
- DEBUG_LOG(PVR, "ELAN cmd READ! (%d) %08x", (u32)sizeof(T), addr);
- return 0;
-}
-
-static glm::vec4 unpackColorBGRA(u32 color)
-{
- return glm::vec4((float)((color >> 16) & 0xff) / 255.f,
- (float)((color >> 8) & 0xff) / 255.f,
- (float)(color & 0xff) / 255.f,
- (float)(color >> 24) / 255.f);
-}
-
-static glm::vec4 unpackColorRGBA(u32 color)
-{
- return glm::vec4((float)(color & 0xff) / 255.f,
- (float)((color >> 8) & 0xff) / 255.f,
- (float)((color >> 16) & 0xff) / 255.f,
- (float)(color >> 24) / 255.f);
-}
-
-static glm::vec4 unpackColor(u8 red, u8 green, u8 blue, u8 alpha = 0)
-{
- return glm::vec4((float)red / 255.f, (float)green / 255.f, (float)blue / 255.f, (float)alpha / 255.f);
-}
-
-static u32 packColor(const glm::vec4& color)
-{
- return (int)(std::min(1.f, color.a) * 255.f) << 24
- | (int)(std::min(1.f, color.r) * 255.f) << 16
- | (int)(std::min(1.f, color.g) * 255.f) << 8
- | (int)(std::min(1.f, color.b) * 255.f);
-}
-
-static GMP *curGmp;
-static glm::mat4x4 curMatrix;
-static float *taMVMatrix;
-static float *taNormalMatrix;
-static glm::mat4 projectionMatrix;
-static float *taProjMatrix;
-static LightModel *curLightModel;
-static ElanBase *curLights[MAX_LIGHTS];
-static float nearPlane = 0.001f;
-static float farPlane = 100000.f;
-static bool envMapping;
-static bool cullingReversed;
-static bool openModifierVolume;
-static bool shadowedVolume;
-static TSP modelTSP;
-static glm::vec4 gmpDiffuseColor0;
-static glm::vec4 gmpSpecularColor0;
-static glm::vec4 gmpDiffuseColor1;
-static glm::vec4 gmpSpecularColor1;
-
-struct State
-{
- static constexpr u32 Null = 0xffffffff;
-
- int listType = -1;
- u32 gmp = Null;
- u32 instance = Null;
- u32 projMatrix = Null;
- u32 tileclip = 0;
- u32 lightModel = Null;
- u32 lights[MAX_LIGHTS] = {
- Null, Null, Null, Null, Null, Null, Null, Null,
- Null, Null, Null, Null, Null, Null, Null, Null
- };
- bool lightModelUpdated = false;
- float envMapUOffset = 0.f;
- float envMapVOffset = 0.f;
-
- void reset()
- {
- listType = -1;
- gmp = Null;
- instance = Null;
- projMatrix = Null;
- tileclip = 0;
- lightModel = Null;
- for (auto& light : lights)
- light = Null;
- update();
- }
- void setMatrix(InstanceMatrix *pinstance)
- {
- instance = elanRamAddress(pinstance);
- updateMatrix();
- }
-
- void updateMatrix()
- {
- if (instance == Null)
- {
- taMVMatrix = nullptr;
- taNormalMatrix = nullptr;
- envMapUOffset = 0.f;
- envMapVOffset = 0.f;
- return;
- }
- InstanceMatrix *mat = (InstanceMatrix *)&RAM[instance];
- DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f\n %f %f %f",
- -mat->tm00, -mat->tm10, -mat->tm20, -mat->tm30,
- mat->tm01, mat->tm11, mat->tm21, mat->tm31,
- -mat->tm02, -mat->tm12, -mat->tm22, -mat->tm32,
- mat->lm00, mat->lm10, mat->lm20,
- mat->lm01, mat->lm11, mat->lm21,
- mat->lm02, mat->lm12, mat->lm22);
-
- curMatrix = glm::mat4x4{
- -mat->tm00, mat->tm01, -mat->tm02, 0.f,
- -mat->tm10, mat->tm11, -mat->tm12, 0.f,
- -mat->tm20, mat->tm21, -mat->tm22, 0.f,
- -mat->tm30, mat->tm31, -mat->tm32, 1.f
- };
- glm::mat4x4 normalMatrix = glm::mat4x4{
- mat->lm00, mat->lm01, mat->lm02, 0.f,
- mat->lm10, mat->lm11, mat->lm12, 0.f,
- mat->lm20, mat->lm21, mat->lm22, 0.f,
- -mat->tm30, mat->tm31, -mat->tm32, 1.f
- };
- nearPlane = mat->_near;
- farPlane = mat->_far;
- envMapUOffset = mat->envMapU;
- envMapVOffset = mat->envMapV;
- taMVMatrix = ta_add_matrix(glm::value_ptr(curMatrix));
- if (normalMatrix != curMatrix)
- taNormalMatrix = ta_add_matrix(glm::value_ptr(normalMatrix));
- else
- taNormalMatrix = taMVMatrix;
- }
-
- void setProjectionMatrix(void *p)
- {
- projMatrix = elanRamAddress(p);
- updateProjectionMatrix();
- }
-
- void updateProjectionMatrix()
- {
- if (projMatrix == Null)
- {
- taProjMatrix = nullptr;
- return;
- }
- ProjMatrix *pm = (ProjMatrix *)&RAM[projMatrix];
- DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f near %f far %f", pm->fx, pm->tx, pm->fy, pm->ty, nearPlane, farPlane);
- // fx = -m00 * w/2
- // tx = -m20 * w/2 + left + w/2
- // fy = -m11 * h/2
- // ty = -m21 * h/2 + top + h/2
- projectionMatrix = glm::mat4(
- -pm->fx, 0, 0, 0,
- 0, pm->fy, 0, 0,
- -pm->tx, -pm->ty, -1, -1,
- 0, 0, 0, 0
- );
- taProjMatrix = ta_add_matrix(glm::value_ptr(projectionMatrix));
- }
-
- void setGMP(void *p)
- {
- gmp = elanRamAddress(p);
- updateGMP();
- }
-
- void updateGMP()
- {
- if (gmp == Null)
- {
- curGmp = nullptr;
- gmpDiffuseColor0 = glm::vec4(0);
- gmpSpecularColor0 = glm::vec4(0);
- gmpDiffuseColor1 = glm::vec4(0);
- gmpSpecularColor1 = glm::vec4(0);
- }
- else
- {
- curGmp = (GMP *)&RAM[gmp];
- DEBUG_LOG(PVR, "GMP paramSelect %x", curGmp->paramSelect.full);
- if (curGmp->paramSelect.d0)
- gmpDiffuseColor0 = unpackColorRGBA(curGmp->diffuse0);
- else
- gmpDiffuseColor0 = glm::vec4(0);
- if (curGmp->paramSelect.s0)
- gmpSpecularColor0 = unpackColorRGBA(curGmp->specular0);
- else
- gmpSpecularColor0 = glm::vec4(0);
- if (curGmp->paramSelect.d1)
- gmpDiffuseColor1 = unpackColorRGBA(curGmp->diffuse1);
- else
- gmpDiffuseColor1 = glm::vec4(0);
- if (curGmp->paramSelect.s1)
- gmpSpecularColor1 = unpackColorRGBA(curGmp->specular1);
- else
- gmpSpecularColor1 = glm::vec4(0);
- }
- }
-
- void setLightModel(void *p)
- {
- lightModel = elanRamAddress(p);
- updateLightModel();
- }
-
- void updateLightModel()
- {
- lightModelUpdated = true;
- if (lightModel == Null)
- curLightModel = nullptr;
- else
- {
- curLightModel = (LightModel *)&RAM[lightModel];
- DEBUG_LOG(PVR, "Light model mask: diffuse %04x specular %04x, ambient base %08x offset %08x", curLightModel->diffuseMask0, curLightModel->specularMask0,
- curLightModel->ambientBase0, curLightModel->ambientOffset0);
- }
- }
-
- void setLight(int lightId, void *p)
- {
- lights[lightId] = elanRamAddress(p);
- updateLight(lightId);
- }
-
- void updateLight(int lightId)
- {
- lightModelUpdated = true;
- if (lights[lightId] == Null)
- {
- elan::curLights[lightId] = nullptr;
- return;
- }
- PointLight *plight = (PointLight *)&RAM[lights[lightId]];
- if (plight->pcw.parallelLight)
- {
- ParallelLight *light = (ParallelLight *)plight;
- DEBUG_LOG(PVR, " Parallel light %d: [%x] routing %d dmode %d col %d %d %d dir %f %f %f", light->lightId, plight->pcw.full,
- light->routing, light->dmode,
- light->red, light->green, light->blue,
- light->getDirX(), light->getDirY(), light->getDirZ());
- }
- else
- {
- DEBUG_LOG(PVR, " Point light %d: [%x] routing %d dmode %d smode %d col %d %d %d dir %f %f %f pos %f %f %f dist %f %f angle %f %f",
- plight->lightId, plight->pcw.full, plight->routing, plight->dmode, plight->smode,
- plight->red, plight->green, plight->blue,
- plight->getDirX(), plight->getDirY(), plight->getDirZ(),
- plight->posX, plight->posY, plight->posZ,
- plight->distA(), plight->distB(),
- plight->angleA(), plight->angleB());
- }
- elan::curLights[lightId] = plight;
- }
-
- void setClipMode(PCW pcw)
- {
- tileclip = (tileclip & ~0xF0000000) | (pcw.userClip << 28);
- }
-
- void setClipTiles(u32 xmin, u32 ymin, u32 xmax, u32 ymax)
- {
- u32 t = tileclip & 0xF0000000;
- t |= xmin & 0x3f; // 6 bits
- t |= (xmax & 0x3f) << 6; // 6 bits
- t |= (ymin & 0x1f) << 12; // 5 bits
- t |= (ymax & 0x1f) << 17; // 5 bits
- tileclip = t;
- }
-
- void update()
- {
- updateMatrix();
- updateProjectionMatrix();
- updateGMP();
- updateLightModel();
- for (u32 i = 0; i < MAX_LIGHTS; i++)
- updateLight(i);
- }
-
- static u32 elanRamAddress(void *p)
- {
- if ((u8 *)p < RAM || (u8 *)p >= RAM + ELAN_RAM_SIZE)
- return Null;
- else
- return (u32)((u8 *)p - RAM);
- }
-
- void serialize(Serializer& ser)
- {
- ser << listType;
- ser << gmp;
- ser << instance;
- ser << projMatrix;
- ser << tileclip;
- ser << lightModel;
- ser << lights;
- }
-
- void deserialize(Deserializer& deser)
- {
- if (deser.version() < Deserializer::V24)
- {
- reset();
- return;
- }
- deser >> listType;
- deser >> gmp;
- deser >> instance;
- deser >> projMatrix;
- deser >> tileclip;
- deser >> lightModel;
- deser >> lights;
- update();
- }
-};
-
-static State state;
-
-static void setCoords(Vertex& vtx, float x, float y, float z)
-{
- vtx.x = x;
- vtx.y = y;
- vtx.z = z;
-}
-
-template
-static void setUV(const Ts& vs, Vertex& vd)
-{
- if (envMapping)
- {
- vd.u = state.envMapUOffset;
- vd.v = state.envMapVOffset;
- vd.u1 = state.envMapUOffset;
- vd.v1 = state.envMapVOffset;
- }
- else
- {
- vd.u = vs.uv.u;
- vd.v = vs.uv.v;
- vd.u1 = vs.uv.u;
- vd.v1 = vs.uv.v;
- }
-}
-
-static void SetEnvMapUV(Vertex& vtx)
-{
- if (envMapping)
- {
- vtx.u = state.envMapUOffset;
- vtx.v = state.envMapVOffset;
- vtx.u1 = state.envMapUOffset;
- vtx.v1 = state.envMapVOffset;
- }
-}
-
-template
-glm::vec3 getNormal(const T& vtx)
-{
- return { (int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f };
-}
-
-template<>
-glm::vec3 getNormal(const N2_VERTEX_VNU& vtx)
-{
- return { vtx.normal.nx, vtx.normal.ny, vtx.normal.nz };
-}
-
-template
-void setNormal(Vertex& vd, const T& vs)
-{
- glm::vec3 normal = getNormal(vs);
- vd.nx = normal.x;
- vd.ny = normal.y;
- vd.nz = normal.z;
-}
-
-static void addModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1)
-{
- baseCol0 += gmpDiffuseColor0;
- offsetCol0 += gmpSpecularColor0;
- baseCol1 += gmpDiffuseColor1;
- offsetCol1 += gmpSpecularColor1;
-}
-
-template
-static void convertVertex(const T& vs, Vertex& vd);
-
-template<>
-void convertVertex(const N2_VERTEX& vs, Vertex& vd)
-{
- setCoords(vd, vs.x, vs.y, vs.z);
- setNormal(vd, vs);
- SetEnvMapUV(vd);
- glm::vec4 baseCol0(0);
- glm::vec4 offsetCol0(0);
- glm::vec4 baseCol1(0);
- glm::vec4 offsetCol1(0);
- addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
-
- *(u32 *)vd.col = packColor(baseCol0);
- *(u32 *)vd.spc = packColor(offsetCol0);
- *(u32 *)vd.col1 = packColor(baseCol1);
- *(u32 *)vd.spc1 = packColor(offsetCol1);
-}
-
-template<>
-void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd)
-{
- setCoords(vd, vs.x, vs.y, vs.z);
- setNormal(vd, vs);
- SetEnvMapUV(vd);
- glm::vec4 baseCol0 = unpackColorRGBA(vs.rgb.argb0);
- glm::vec4 offsetCol0(0);
- glm::vec4 baseCol1 = unpackColorRGBA(vs.rgb.argb1);
- glm::vec4 offsetCol1(0);
- addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
- *(u32 *)vd.col = packColor(baseCol0);
- *(u32 *)vd.spc = packColor(offsetCol0);
- *(u32 *)vd.col1 = packColor(baseCol1);
- *(u32 *)vd.spc1 = packColor(offsetCol1);
-}
-
-template<>
-void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd)
-{
- setCoords(vd, vs.x, vs.y, vs.z);
- setNormal(vd, vs);
- setUV(vs, vd);
- glm::vec4 baseCol0(0);
- glm::vec4 offsetCol0(0);
- glm::vec4 baseCol1(0);
- glm::vec4 offsetCol1(0);
- addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
- *(u32 *)vd.col = packColor(baseCol0);
- *(u32 *)vd.spc = packColor(offsetCol0);
- *(u32 *)vd.col1 = packColor(baseCol1);
- *(u32 *)vd.spc1 = packColor(offsetCol1);
-}
-
-template<>
-void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd)
-{
- setCoords(vd, vs.x, vs.y, vs.z);
- setNormal(vd, vs);
- setUV(vs, vd);
- glm::vec4 baseCol0 = unpackColorRGBA(vs.rgb.argb0);
- glm::vec4 offsetCol0(0);
- glm::vec4 baseCol1 = unpackColorRGBA(vs.rgb.argb1);
- glm::vec4 offsetCol1(0);
- addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
- *(u32 *)vd.col = packColor(baseCol0);
- *(u32 *)vd.spc = packColor(offsetCol0);
- *(u32 *)vd.col1 = packColor(baseCol1);
- *(u32 *)vd.spc1 = packColor(offsetCol1);
-}
-
-template<>
-void convertVertex(const N2_VERTEX_VUB& vs, Vertex& vd)
-{
- setCoords(vd, vs.x, vs.y, vs.z);
- setNormal(vd, vs);
- setUV(vs, vd);
- glm::vec4 baseCol0(0);
- glm::vec4 offsetCol0(0);
- glm::vec4 baseCol1(0);
- glm::vec4 offsetCol1(0);
- addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
- *(u32 *)vd.col = packColor(baseCol0);
- *(u32 *)vd.col1 = packColor(baseCol1);
- // Stuff the bump map normals and parameters in the specular colors
- vd.spc[0] = vs.bump.tangent.x;
- vd.spc[1] = vs.bump.tangent.y;
- vd.spc[2] = vs.bump.tangent.z;
- vd.spc1[0] = vs.bump.bitangent.x;
- vd.spc1[1] = vs.bump.bitangent.y;
- vd.spc1[2] = vs.bump.bitangent.z;
- vd.spc[3] = vs.bump.scaleFactor.bumpDegree; // always 255?
- vd.spc1[3] = vs.bump.scaleFactor.fixedOffset; // always 0?
-// int nx = (int8_t)vs.header.nx;
-// int ny = (int8_t)vs.header.ny;
-// int nz = (int8_t)vs.header.nz;
-// printf("BumpMap vtx deg %d off %d normal %d %d %d tangent %d %d %d bitangent %d %d %d dot %d %d %d\n", vs.bump.scaleFactor.bumpDegree, vs.bump.scaleFactor.fixedOffset,
-// nx, ny, nz,
-// vs.bump.tangent.x, vs.bump.tangent.y, vs.bump.tangent.z, vs.bump.bitangent.x, vs.bump.bitangent.y, vs.bump.bitangent.z,
-// nx * vs.bump.tangent.x + ny * vs.bump.tangent.y + nz * vs.bump.tangent.z,
-// nx * vs.bump.bitangent.x + ny * vs.bump.bitangent.y + nz * vs.bump.bitangent.z,
-// vs.bump.tangent.x * vs.bump.bitangent.x + vs.bump.tangent.y * vs.bump.bitangent.y + vs.bump.tangent.z * vs.bump.bitangent.z
-// );
-}
-
-template
-static void boundingBox(const T* vertices, u32 count, glm::vec3& min, glm::vec3& max)
-{
- min = { 1e38f, 1e38f, 1e38f };
- max = { -1e38f, -1e38f, -1e38f };
- for (u32 i = 0; i < count; i++)
- {
- glm::vec3 pos{ vertices[i].x, vertices[i].y, vertices[i].z };
- min = glm::min(min, pos);
- max = glm::max(max, pos);
- }
- glm::vec4 center((min + max) / 2.f, 1);
- glm::vec4 extents(max - glm::vec3(center), 0);
- // transform
- center = curMatrix * center;
- glm::vec3 extentX = curMatrix * glm::vec4(extents.x, 0, 0, 0);
- glm::vec3 extentY = curMatrix * glm::vec4(0, extents.y, 0, 0);
- glm::vec3 extentZ = curMatrix * glm::vec4(0, 0, extents.z, 0);
- // new AA extents
- glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ);
-
- min = glm::vec3(center) - newExtent;
- max = glm::vec3(center) + newExtent;
-}
-
-template
-static bool isBetweenNearAndFar(const T* vertices, u32 count, bool& needNearClipping)
-{
- glm::vec3 min;
- glm::vec3 max;
- boundingBox(vertices, count, min, max);
- if (min.z > -nearPlane || max.z < -farPlane)
- return false;
-
- glm::vec4 pmin = projectionMatrix * glm::vec4(min, 1);
- glm::vec4 pmax = projectionMatrix * glm::vec4(max, 1);
- if (std::isnan(pmin.x) || std::isnan(pmin.y) || std::isnan(pmax.x) || std::isnan(pmax.y))
- return false;
-
- needNearClipping = max.z > -nearPlane;
-
- return true;
-}
-
-class TriangleStripClipper
-{
-public:
- TriangleStripClipper(bool enabled) : enabled(enabled) {}
-
- void add(const Vertex& vtx)
- {
- if (enabled)
- {
- float z = vtx.x * curMatrix[0][2] + vtx.y * curMatrix[1][2] + vtx.z * curMatrix[2][2] + curMatrix[3][2];
- float dist = -z - nearPlane;
- clip(vtx, dist);
- count++;
- }
- else
- {
- ta_add_vertex(vtx);
- }
- }
-
-private:
- void sendVertex(const Vertex& r)
- {
- if (dupeNext)
- ta_add_vertex(r);
- dupeNext = false;
- ta_add_vertex(r);
- }
-
- // Three-Dimensional Homogeneous Clipping of Triangle Strips
- // Patrick-Gilles Maillot. Graphics Gems II - 1991
- void clip(const Vertex& r, float rDist)
- {
- clipCode >>= 1;
- clipCode |= (int)(rDist < 0) << 2;
- if (count == 1)
- {
- switch (clipCode >> 1) {
- case 0: // Q and R inside
- sendVertex(q);
- sendVertex(r);
- break;
- case 1: // Q outside, R inside
- sendVertex(interpolate(q, qDist, r, rDist));
- sendVertex(r);
- break;
- case 2: // Q inside, R outside
- sendVertex(q);
- sendVertex(interpolate(q, qDist, r, rDist));
- break;
- case 3: // Q and R outside
- break;
- }
- }
- else if (count >= 2)
- {
- switch (clipCode)
- {
- case 0: // all inside
- sendVertex(r);
- break;
- case 1: // P outside, Q and R inside
- sendVertex(interpolate(r, rDist, p, pDist));
- sendVertex(q);
- sendVertex(r);
- break;
- case 2: // P inside, Q outside and R inside
- sendVertex(r);
- sendVertex(interpolate(q, qDist, r, rDist));
- sendVertex(r);
- break;
- case 3: // P and Q outside, R inside
- {
- Vertex tmp = interpolate(r, rDist, p, pDist);
- sendVertex(tmp);
- sendVertex(tmp);
- sendVertex(tmp); // One more to preserve strip swap order
- sendVertex(interpolate(q, qDist, r, rDist));
- sendVertex(r);
- }
- break;
- case 4: // P and Q inside, R outside
- sendVertex(interpolate(r, rDist, p, pDist));
- sendVertex(q);
- sendVertex(interpolate(q, qDist, r, rDist));
- break;
- case 5: // P outside, Q inside, R outside
- sendVertex(interpolate(q, qDist, r, rDist));
- break;
- case 6: // P inside, Q and R outside
- {
- Vertex tmp = interpolate(r, rDist, p, pDist);
- sendVertex(tmp);
- sendVertex(tmp);
- sendVertex(tmp); // One more to preserve strip swap order
- }
- break;
- case 7: // P, Q and R outside
- dupeNext = !dupeNext;
- break;
- }
- }
- p = q;
- pDist = qDist;
- q = r;
- qDist = rDist;
- }
-
- Vertex interpolate(const Vertex& v1, float f1, const Vertex& v2, float f2)
- {
- Vertex v;
- float a2 = std::abs(f1) / (std::abs(f1) + std::abs(f2));
- float a1 = 1 - a2;
- v.x = v1.x * a1 + v2.x * a2;
- v.y = v1.y * a1 + v2.y * a2;
- v.z = v1.z * a1 + v2.z * a2;
-
- v.u = v1.u * a1 + v2.u * a2;
- v.v = v1.v * a1 + v2.v * a2;
- v.u1 = v1.u1 * a1 + v2.u1 * a2;
- v.v1 = v1.v1 * a1 + v2.v1 * a2;
-
- for (size_t i = 0; i < ARRAY_SIZE(v1.col); i++)
- {
- v.col[i] = (u8)std::round(v1.col[i] * a1 + v2.col[i] * a2);
- v.spc[i] = (u8)std::round(v1.spc[i] * a1 + v2.spc[i] * a2);
- v.col1[i] = (u8)std::round(v1.col1[i] * a1 + v2.col1[i] * a2);
- v.spc1[i] = (u8)std::round(v1.spc1[i] * a1 + v2.spc1[i] * a2);
- }
- v.nx = v1.nx * a1 + v2.nx * a2;
- v.ny = v1.ny * a1 + v2.ny * a2;
- v.nz = v1.nz * a1 + v2.nz * a2;
-
- return v;
- }
-
- bool enabled;
- int count = 0;
- int clipCode = 0;
- Vertex p;
- float pDist = 0;
- Vertex q;
- float qDist = 0;
- bool dupeNext = false;
-};
-
-template
-static void sendVertices(const ICHList *list, const T* vtx, bool needClipping)
-{
- Vertex taVtx;
- verify(list->vertexSize() > 0);
-
- Vertex fanCenterVtx{};
- Vertex fanLastVtx{};
- bool stripStart = true;
- int outStripIndex = 0;
- TriangleStripClipper clipper(needClipping);
-
- for (u32 i = 0; i < list->vtxCount; i++)
- {
- convertVertex(*vtx, taVtx);
-
- if (stripStart)
- {
- // Center vertex if triangle fan
- //verify(vtx->header.isFirstOrSecond()); This fails for some strips: strip=1 fan=0 (soul surfer)
- fanCenterVtx = taVtx;
- if (outStripIndex > 0)
- {
- // use degenerate triangles to link strips
- clipper.add(fanLastVtx);
- clipper.add(taVtx);
- outStripIndex += 2;
- if (outStripIndex & 1)
- {
- clipper.add(taVtx);
- outStripIndex++;
- }
- }
- stripStart = false;
- }
- else if (vtx->header.isFan())
- {
- // use degenerate triangles to link strips
- clipper.add(fanLastVtx);
- clipper.add(fanCenterVtx);
- outStripIndex += 2;
- if (outStripIndex & 1)
- {
- clipper.add(fanCenterVtx);
- outStripIndex++;
- }
- // Triangle fan
- clipper.add(fanCenterVtx);
- clipper.add(fanLastVtx);
- outStripIndex += 2;
- }
- clipper.add(taVtx);
- outStripIndex++;
- fanLastVtx = taVtx;
- if (vtx->header.endOfStrip)
- stripStart = true;
-
- vtx++;
- }
-}
-
-class ModifierVolumeClipper
-{
-public:
- ModifierVolumeClipper(bool enabled) : enabled(enabled) {}
-
- void add(ModTriangle& tri)
- {
- if (enabled)
- {
- glm::vec3 dist{
- tri.x0 * curMatrix[0][2] + tri.y0 * curMatrix[1][2] + tri.z0 * curMatrix[2][2] + curMatrix[3][2],
- tri.x1 * curMatrix[0][2] + tri.y1 * curMatrix[1][2] + tri.z1 * curMatrix[2][2] + curMatrix[3][2],
- tri.x2 * curMatrix[0][2] + tri.y2 * curMatrix[1][2] + tri.z2 * curMatrix[2][2] + curMatrix[3][2]
- };
- dist = -dist - nearPlane;
- ModTriangle newTri;
- int n = sutherlandHodgmanClip(dist, tri, newTri);
- switch (n)
- {
- case 0:
- // fully clipped
- break;
- case 3:
- ta_add_triangle(tri);
- break;
- case 4:
- ta_add_triangle(tri);
- ta_add_triangle(newTri);
- break;
- }
- }
- else
- {
- ta_add_triangle(tri);
- }
- }
-
-private:
- //
- // Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire
- // Journal of Graphics GPU and Game Tools - November 2011
- //
- glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist)
- {
- return (A * std::abs(Bdist) + B * std::abs(Adist)) / (std::abs(Adist) + std::abs(Bdist));
- }
-
- // Clip the triangle 'trig' with respect to the provided distances to the clipping plane.
- int sutherlandHodgmanClip(glm::vec3& dist, ModTriangle& trig, ModTriangle& newTrig)
- {
- constexpr float clipEpsilon = 0.f; //0.00001;
- constexpr float clipEpsilon2 = 0.f; //0.01;
-
- if (!glm::any(glm::greaterThanEqual(dist , glm::vec3(clipEpsilon2))))
- // all clipped
- return 0;
- if (glm::all(glm::greaterThanEqual(dist , glm::vec3(-clipEpsilon))))
- // none clipped
- return 3;
-
- // There are either 1 or 2 vertices above the clipping plane.
- glm::bvec3 above = glm::greaterThanEqual(dist, glm::vec3(0.f));
- bool nextIsAbove;
- glm::vec3 v0(trig.x0, trig.y0, trig.z0);
- glm::vec3 v1(trig.x1, trig.y1, trig.z1);
- glm::vec3 v2(trig.x2, trig.y2, trig.z2);
- glm::vec3 v3;
- // Find the CCW-most vertex above the plane.
- if (above[1] && !above[0])
- {
- // Cycle once CCW. Use v3 as a temp
- nextIsAbove = above[2];
- v3 = v0;
- v0 = v1;
- v1 = v2;
- v2 = v3;
- dist = glm::vec3(dist.y, dist.z, dist.x);
- }
- else if (above[2] && !above[1])
- {
- // Cycle once CW. Use v3 as a temp.
- nextIsAbove = above[0];
- v3 = v2;
- v2 = v1;
- v1 = v0;
- v0 = v3;
- dist = glm::vec3(dist.z, dist.x, dist.y);
- }
- else
- nextIsAbove = above[1];
- trig.x0 = v0.x;
- trig.y0 = v0.y;
- trig.z0 = v0.z;
- // We always need to clip v2-v0.
- v3 = intersect(v0, dist[0], v2, dist[2]);
- if (nextIsAbove)
- {
- v2 = intersect(v1, dist[1], v2, dist[2]);
- trig.x1 = v1.x;
- trig.y1 = v1.y;
- trig.z1 = v1.z;
- trig.x2 = v2.x;
- trig.y2 = v2.y;
- trig.z2 = v2.z;
- newTrig.x0 = v0.x;
- newTrig.y0 = v0.y;
- newTrig.z0 = v0.z;
- newTrig.x1 = v2.x;
- newTrig.y1 = v2.y;
- newTrig.z1 = v2.z;
- newTrig.x2 = v3.x;
- newTrig.y2 = v3.y;
- newTrig.z2 = v3.z;
-
- return 4;
- }
- else
- {
- v1 = intersect(v0, dist[0], v1, dist[1]);
- trig.x1 = v1.x;
- trig.y1 = v1.y;
- trig.z1 = v1.z;
- trig.x2 = v3.x;
- trig.y2 = v3.y;
- trig.z2 = v3.z;
-
- return 3;
- }
- }
-
- bool enabled;
-};
-
-template
-static void sendMVPolygon(ICHList *list, const T *vtx, bool needClipping)
-{
- ModifierVolumeParam mvp{};
- mvp.isp.full = list->isp.full;
- if (!openModifierVolume)
- mvp.isp.CullMode = 0;
- mvp.isp.VolumeLast = list->pcw.volume;
- mvp.isp.DepthMode &= 3;
- mvp.mvMatrix = taMVMatrix;
- mvp.projMatrix = taProjMatrix;
- ta_add_poly(state.listType, mvp);
-
- ModifierVolumeClipper clipper(needClipping);
- glm::vec3 vtx0{};
- glm::vec3 vtx1{};
- u32 stripStart = 0;
-
- for (u32 i = 0; i < list->vtxCount; i++)
- {
- glm::vec3 v(vtx->x, vtx->y, vtx->z);
- u32 triIdx = i - stripStart;
- if (triIdx >= 2)
- {
- ModTriangle tri;
-
- if (triIdx & 1)
- {
- tri.x1 = vtx0.x;
- tri.y1 = vtx0.y;
- tri.z1 = vtx0.z;
-
- tri.x0 = vtx1.x;
- tri.y0 = vtx1.y;
- tri.z0 = vtx1.z;
- }
- else
- {
- tri.x0 = vtx0.x;
- tri.y0 = vtx0.y;
- tri.z0 = vtx0.z;
-
- tri.x1 = vtx1.x;
- tri.y1 = vtx1.y;
- tri.z1 = vtx1.z;
- }
- tri.x2 = v.x;
- tri.y2 = v.y;
- tri.z2 = v.z;
-
- clipper.add(tri);
- }
- if (vtx->header.endOfStrip)
- stripStart = i + 1;
- vtx0 = vtx1;
- vtx1 = v;
- vtx++;
- }
-}
-
-static N2LightModel *taLightModel;
-
-static void sendLights()
-{
- if (!state.lightModelUpdated)
- return;
-
- state.lightModelUpdated = false;
- N2LightModel model;
- model.lightCount = 0;
- if (curLightModel == nullptr)
- {
- model.useBaseOver = false;
- for (int i = 0; i < 2; i++)
- {
- model.ambientMaterialBase[i] = false;
- model.ambientMaterialOffset[i] = false;
- model.ambientBase[i][0] = model.ambientBase[i][1] = model.ambientBase[i][2] = model.ambientBase[i][3] = 1.f;
- }
- memset(model.ambientOffset, 0, sizeof(model.ambientOffset));
- taLightModel = nullptr;
- return;
- }
- model.ambientMaterialBase[0] = curLightModel->useAmbientBase0;
- model.ambientMaterialBase[1] = curLightModel->useAmbientBase1;
- model.ambientMaterialOffset[0] = curLightModel->useAmbientOffset0;
- model.ambientMaterialOffset[1] = curLightModel->useAmbientOffset1;
- model.useBaseOver = curLightModel->useBaseOver;
- model.bumpId1 = -1;
- model.bumpId2 = -1;
- memcpy(model.ambientBase[0], glm::value_ptr(unpackColorBGRA(curLightModel->ambientBase0)), sizeof(model.ambientBase[0]));
- memcpy(model.ambientBase[1], glm::value_ptr(unpackColorBGRA(curLightModel->ambientBase1)), sizeof(model.ambientBase[1]));
- memcpy(model.ambientOffset[0], glm::value_ptr(unpackColorBGRA(curLightModel->ambientOffset0)), sizeof(model.ambientOffset[0]));
- memcpy(model.ambientOffset[1], glm::value_ptr(unpackColorBGRA(curLightModel->ambientOffset1)), sizeof(model.ambientOffset[1]));
- for (u32 i = 0; i < MAX_LIGHTS; i++)
- {
- N2Light& light = model.lights[model.lightCount];
- for (int vol = 0; vol < 2; vol++)
- {
- light.diffuse[vol] = curLightModel->isDiffuse(i, vol);
- light.specular[vol] = curLightModel->isSpecular(i, vol);
- }
- if (!light.diffuse[0] && !light.specular[0]
- && !light.diffuse[1] && !light.specular[1])
- continue;
- if (curLights[i] == nullptr)
- {
- INFO_LOG(PVR, "Light %d is referenced but undefined", i);
- continue;
- }
- if (i == curLightModel->bumpId1)
- model.bumpId1 = model.lightCount;
- if (i == curLightModel->bumpId2)
- model.bumpId2 = model.lightCount;
- light.parallel = curLights[i]->pcw.parallelLight;
- if (light.parallel)
- {
- ParallelLight *plight = (ParallelLight *)curLights[i];
- memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color));
- light.routing = plight->routing;
- light.dmode = plight->dmode;
- light.smode = N2_LMETHOD_SINGLE_SIDED;
- memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)),
- sizeof(light.direction));
- }
- else
- {
- PointLight *plight = (PointLight *)curLights[i];
- memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color));
- light.routing = plight->routing;
- light.dmode = plight->dmode;
- light.smode = plight->smode;
- if (plight->posX == 0 && plight->posY == 0 && plight->posZ == 0
- && plight->_distA == 0 && plight->_distB == 0
- && plight->_angleA == 0 && plight->_angleB == 0)
- {
- // Lights not using distance or angle attenuation are converted into parallel lights on the CPU side?
- DEBUG_LOG(PVR, "Point -> parallel light[%d] dir %d %d %d", i, -(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ);
- light.parallel = true;
- memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)),
- sizeof(light.direction));
- }
- else
- {
- memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)),
- sizeof(light.direction));
- memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position));
- light.distAttnMode = plight->dattenmode;
- light.attnDistA = plight->distA();
- light.attnDistB = plight->distB();
- light.attnAngleA = plight->angleA();
- light.attnAngleB = plight->angleB();
- }
- }
- model.lightCount++;
- }
- taLightModel = ta_add_light(model);
-}
-
-static void setStateParams(PolyParam& pp, const ICHList *list)
-{
- sendLights();
- pp.tileclip = state.tileclip;
- pp.mvMatrix = taMVMatrix;
- pp.normalMatrix = taNormalMatrix;
- pp.projMatrix = taProjMatrix;
- pp.lightModel = taLightModel;
- pp.envMapping[0] = false;
- pp.envMapping[1] = false;
- if (curGmp != nullptr)
- {
- pp.glossCoef[0] = curGmp->gloss.getCoef0();
- pp.glossCoef[1] = curGmp->gloss.getCoef1();
- pp.constantColor[0] = curGmp->paramSelect.b0;
- pp.diffuseColor[0] = curGmp->paramSelect.d0;
- pp.specularColor[0] = curGmp->paramSelect.s0;
- pp.constantColor[1] = curGmp->paramSelect.b1;
- pp.diffuseColor[1] = curGmp->paramSelect.d1;
- pp.specularColor[1] = curGmp->paramSelect.s1;
-
- // Environment mapping
- if (curGmp->paramSelect.e0)
- {
- pp.pcw.Texture = 1;
- pp.pcw.Offset = 0;
- pp.tsp.UseAlpha = 1;
- pp.tsp.IgnoreTexA = 0;
- pp.envMapping[0] = true;
- pp.tcw = list->tcw0;
- envMapping = true;
- }
- if (curGmp->paramSelect.e1)
- {
- pp.pcw.Texture = 1;
- pp.pcw.Offset = 0;
- pp.tsp1.UseAlpha = 1;
- pp.tsp1.IgnoreTexA = 0;
- pp.envMapping[1] = true;
- pp.tcw1 = list->tcw1;
- envMapping = true;
- }
- }
- pp.tsp.full ^= modelTSP.full;
- pp.tsp1.full ^= modelTSP.full;
-
- // projFlip is for left-handed projection matrices (initd rear view mirror)
- bool projFlip = taProjMatrix != nullptr && std::signbit(taProjMatrix[0]) == std::signbit(taProjMatrix[5]);
- pp.isp.CullMode ^= (u32)cullingReversed ^ (u32)projFlip;
- pp.pcw.Shadow ^= shadowedVolume;
- if (pp.pcw.Shadow == 0 || pp.pcw.Volume == 0)
- {
- pp.tsp1.full = -1;
- pp.tcw1.full = -1;
- pp.glossCoef[1] = 0;
- pp.constantColor[1] = false;
- pp.diffuseColor[1] = false;
- pp.specularColor[1] = false;
- }
-// else if (pp.pcw.Volume == 1)
-// printf("2-Volume poly listType %d vtxtype %x gmp params %x diff tcw %08x tsp %08x\n", state.listType, list->flags, curGmp->paramSelect.full,
-// pp.tcw.full ^ pp.tcw1.full, pp.tsp.full ^ pp.tsp1.full);
-}
-
-static void sendPolygon(ICHList *list)
-{
- bool needClipping;
-
- switch (list->flags)
- {
- case ICHList::VTX_TYPE_V:
- {
- N2_VERTEX *vtx = (N2_VERTEX *)((u8 *)list + sizeof(ICHList));
- if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
- break;
- if (state.listType & 1)
- sendMVPolygon(list, vtx, needClipping);
- else
- {
- PolyParam pp{};
- pp.pcw.Shadow = list->pcw.shadow;
- pp.pcw.Texture = list->pcw.texture;
- pp.pcw.Offset = list->pcw.offset;
- pp.pcw.Gouraud = list->pcw.gouraud;
- pp.pcw.Volume = list->pcw.volume;
- pp.isp = list->isp;
- pp.tsp = list->tsp0;
- pp.tsp1 = list->tsp1;
- setStateParams(pp, list);
- ta_add_poly(state.listType, pp);
-
- sendVertices(list, vtx, needClipping);
- }
- }
- break;
-
- case ICHList::VTX_TYPE_VU:
- {
- N2_VERTEX_VU *vtx = (N2_VERTEX_VU *)((u8 *)list + sizeof(ICHList));
- if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
- break;
- if (state.listType & 1)
- sendMVPolygon(list, vtx, needClipping);
- else
- {
- PolyParam pp{};
- pp.pcw.Shadow = list->pcw.shadow;
- pp.pcw.Texture = list->pcw.texture;
- pp.pcw.Offset = list->pcw.offset;
- pp.pcw.Gouraud = list->pcw.gouraud;
- pp.pcw.Volume = list->pcw.volume;
- pp.isp = list->isp;
- pp.tsp = list->tsp0;
- pp.tcw = list->tcw0;
- pp.tsp1 = list->tsp1;
- pp.tcw1 = list->tcw1;
- setStateParams(pp, list);
- ta_add_poly(state.listType, pp);
-
- sendVertices(list, vtx, needClipping);
- }
- }
- break;
-
- case ICHList::VTX_TYPE_VUR:
- {
- verify(curGmp == nullptr || curGmp->paramSelect.e0 == 0);
- N2_VERTEX_VUR *vtx = (N2_VERTEX_VUR *)((u8 *)list + sizeof(ICHList));
- if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
- break;
- PolyParam pp{};
- pp.pcw.Shadow = list->pcw.shadow;
- pp.pcw.Texture = list->pcw.texture;
- pp.pcw.Offset = list->pcw.offset;
- pp.pcw.Gouraud = list->pcw.gouraud;
- pp.pcw.Volume = list->pcw.volume;
- pp.isp = list->isp;
- pp.tsp = list->tsp0;
- pp.tcw = list->tcw0;
- pp.tsp1 = list->tsp1;
- pp.tcw1 = list->tcw1;
- setStateParams(pp, list);
- ta_add_poly(state.listType, pp);
-
- sendVertices(list, vtx, needClipping);
- }
- break;
-
- case ICHList::VTX_TYPE_VR:
- {
- N2_VERTEX_VR *vtx = (N2_VERTEX_VR *)((u8 *)list + sizeof(ICHList));
- if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
- break;
- PolyParam pp{};
- pp.pcw.Shadow = list->pcw.shadow;
- pp.pcw.Texture = list->pcw.texture;
- pp.pcw.Offset = list->pcw.offset;
- pp.pcw.Gouraud = list->pcw.gouraud;
- pp.pcw.Volume = list->pcw.volume;
- pp.isp = list->isp;
- pp.tsp = list->tsp0;
- pp.tsp1 = list->tsp1;
- setStateParams(pp, list);
- ta_add_poly(state.listType, pp);
-
- sendVertices(list, vtx, needClipping);
- }
- break;
-
- case ICHList::VTX_TYPE_VUB:
- {
- // TODO
- //printf("BUMP MAP fmt %d filter %d src select %d dst %d\n", list->tcw0.PixelFmt, list->tsp0.FilterMode, list->tsp0.SrcSelect, list->tsp0.DstSelect);
- N2_VERTEX_VUB *vtx = (N2_VERTEX_VUB *)((u8 *)list + sizeof(ICHList));
- if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
- break;
- PolyParam pp{};
- pp.pcw.Shadow = list->pcw.shadow;
- pp.pcw.Texture = 1;
- pp.pcw.Offset = 1;
- pp.pcw.Gouraud = list->pcw.gouraud;
- pp.pcw.Volume = list->pcw.volume;
- pp.isp = list->isp;
- pp.tsp = list->tsp0;
- pp.tcw = list->tcw0;
- pp.tsp1 = list->tsp1;
- pp.tcw1 = list->tcw1;
- setStateParams(pp, list);
- ta_add_poly(state.listType, pp);
-
- sendVertices(list, vtx, needClipping);
- }
- break;
-
- default:
- WARN_LOG(PVR, "Unhandled poly format %x", list->flags);
- die("Unsupported");
- break;
- }
- envMapping = false;
-}
-
-template
-static void executeCommand(u8 *data, int size)
-{
-// verify(size >= 0);
-// verify(size < (int)ELAN_RAM_SIZE);
-// if (0x2b00 == (u32)(data - RAM))
-// for (int i = 0; i < size; i += 4)
-// DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]);
-
- while (size >= 32)
- {
- const int oldSize = size;
- ElanBase *cmd = (ElanBase *)data;
- if (cmd->pcw.naomi2)
- {
- switch(cmd->pcw.n2Command)
- {
- case PCW::null:
- size -= 32;
- break;
-
- case PCW::projMatrix:
- if (Active)
- state.setProjectionMatrix(data);
- size -= sizeof(ProjMatrix);
- break;
-
- case PCW::matrixOrLight:
- {
- InstanceMatrix *instance = (InstanceMatrix *)data;
- if (instance->isInstanceMatrix())
- {
- //DEBUG_LOG(PVR, "Model instance");
- if (Active)
- state.setMatrix(instance);
- size -= sizeof(InstanceMatrix);
- break;
- }
- if (Active)
- {
- if (instance->id1 & 0x10)
- {
- state.setLightModel(data);
- }
- else //if ((instance->id2 & 0x40000000) || (instance->id1 & 0xffffff00)) // FIXME what are these lights without id2|0x40000000? vf4
- {
- if (instance->pcw.parallelLight)
- {
- ParallelLight *light = (ParallelLight *)data;
- state.setLight(light->lightId, data);
- }
- else
- {
- PointLight *light = (PointLight *)data;
- state.setLight(light->lightId, data);
- }
- }
- //else
- //{
- // WARN_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2);
- // for (int i = 0; i < 32; i += 4)
- // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]);
- //}
- }
- size -= sizeof(LightModel);
- }
- break;
-
- case PCW::model:
- {
- Model *model = (Model *)data;
- if (Active)
- {
- cullingReversed = model->param.cwCulling == 0;
- state.setClipMode(model->pcw);
- openModifierVolume = model->param.openVolume;
- shadowedVolume = model->pcw.shadow;
- modelTSP = model->tsp;
- DEBUG_LOG(PVR, "Model offset %x size %x pcw %08x tsp %08x", model->offset, model->size, model->pcw.full, model->tsp.full);
- }
- executeCommand(&RAM[model->offset & 0x1ffffff8], model->size);
- cullingReversed = false;
- openModifierVolume = false;
- shadowedVolume = false;
- modelTSP.full = 0;
- size -= sizeof(Model);
- }
- break;
-
- case PCW::registerWait:
- {
- RegisterWait *wait = (RegisterWait *)data;
- if (wait->offset != (u32)-1 && wait->mask != 0)
- {
- DEBUG_LOG(PVR, "Register wait %x mask %x", wait->offset, wait->mask);
- // wait for interrupt
- HollyInterruptID inter;
- switch (wait->mask)
- {
- case 0x80:
- inter = holly_OPAQUE;
- break;
- case 0x100:
- inter = holly_OPAQUEMOD;
- break;
- case 0x200:
- inter = holly_TRANS;
- break;
- case 0x400:
- inter = holly_TRANSMOD;
- break;
- case 0x200000:
- inter = holly_PUNCHTHRU;
- break;
- default:
- WARN_LOG(PVR, "Unknown interrupt mask %x", wait->mask);
- die("unexpected");
- inter = holly_OPAQUE;
- break;
- }
- asic_RaiseInterruptBothCLX(inter);
- TA_ITP_CURRENT += 32;
- if (Active)
- state.reset();
- }
- size -= sizeof(RegisterWait);
- }
- break;
-
- case PCW::link:
- {
- Link *link = (Link *)data;
- if (link->offset & 0x80000000)
- {
- // elan v10 only
- DEBUG_LOG(PVR, "Texture DMA from %x to %x (%x)", DMAC_SAR(2), link->_res & 0x1ffffff8, link->size);
- memcpy(&vram[link->_res & VRAM_MASK], &mem_b[DMAC_SAR(2) & RAM_MASK], link->size);
- reg74 |= 1;
- }
- else if (link->offset & 0x20000000)
- {
- // elan v10 only
- DEBUG_LOG(PVR, "Texture DMA from eram %x -> %x (%x)", link->offset & 0x01fffff8, link->_res & VRAM_MASK, link->size);
- memcpy(&vram[link->_res & VRAM_MASK], &RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size);
- reg74 |= 1;
- }
- else
- {
- DEBUG_LOG(PVR, "Link to %x (%x)", link->offset & 0x1ffffff8, link->size);
- executeCommand(&RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size);
- }
- size -= sizeof(Link);
- }
- break;
-
- case PCW::gmp:
- if (Active)
- state.setGMP(data);
- size -= sizeof(GMP);
- break;
-
- case PCW::ich:
- {
- ICHList *ich = (ICHList *)data;
- if (Active)
- {
- DEBUG_LOG(PVR, "ICH flags %x, %d verts", ich->flags, ich->vtxCount);
- sendPolygon(ich);
- }
- size -= sizeof(ICHList) + ich->vertexSize() * ich->vtxCount;
- }
- break;
-
- default:
- DEBUG_LOG(PVR, "Unhandled Elan command %x", cmd->pcw.n2Command);
- size -= 32;
- break;
- }
- }
- else
- {
- u32 pcw = *(u32 *)data;
- if ((pcw & 0xd0ffff00) == 0x808c0000) // display list
- {
- if (Active)
- {
- DEBUG_LOG(PVR, "Display list type %d", (pcw >> 24) & 0xf);
- state.reset();
- state.listType = (pcw >> 24) & 0xf;
- // TODO is this the right place for this?
- SQBuffer eol{};
- ta_vtx_data32(&eol);
- }
- size -= 24 * 4;
- }
- else if ((pcw & 0xd0fcff00) == 0x80800000) // User clipping
- {
- if (Active)
- {
- state.setClipMode((PCW&)pcw);
- DEBUG_LOG(PVR, "User clip type %d", ((PCW&)pcw).userClip);
- }
- size -= 0xE0;
- }
- else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked?
- {
- if (Active)
- {
- DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw);
- state.listType = (pcw >> 24) & 0xf;
- size -= 32;
- ta_add_ta_data(state.listType, (u32 *)(data + 32), size - 32);
- }
- size = 32;
- }
- else if (pcw == 0x20000000)
- {
- // User clipping
- if (Active)
- {
- u32 *tiles = (u32 *)data + 4;
- DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", tiles[0] * 32, tiles[1] * 32,
- tiles[2] * 32, tiles[3] * 32);
- state.setClipTiles(tiles[0], tiles[1], tiles[2], tiles[3]);
- }
- size -= 32;
- }
- else
- {
- if (Active)
- {
- if (pcw != 0)
- INFO_LOG(PVR, "Unhandled command %x", pcw);
- for (int i = 0; i < 32; i += 4)
- DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]);
- }
- size -= 32;
- }
- }
- data += oldSize - size;
- }
-}
-
-template
-void DYNACALL write_elancmd(u32 addr, T data)
-{
-// DEBUG_LOG(PVR, "ELAN cmd %08x = %x", addr, data);
- addr &= 0x1f;
- *(T *)&((u8 *)elanCmd)[addr] = data;
-
- if (addr == 0x1c)
- {
- if (!ggpo::rollbacking())
- executeCommand((u8 *)elanCmd, sizeof(elanCmd));
- else
- executeCommand((u8 *)elanCmd, sizeof(elanCmd));
- if (!(reg74 & 1))
- reg74 |= 2;
- reg74 &= ~0x3c;
- }
-}
-
-template
-T DYNACALL read_elanram(u32 addr)
-{
- return *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)];
-}
-
-template
-void DYNACALL write_elanram(u32 addr, T data)
-{
- *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)] = data;
-}
-
-void init()
-{
-}
-
-void reset(bool hard)
-{
- if (hard)
- {
- memset(RAM, 0, ELAN_RAM_SIZE);
- state.reset();
- }
-}
-
-void term()
-{
-}
-
-void vmem_init()
-{
- elanRegHandler = _vmem_register_handler_Template(read_elanreg, write_elanreg);
- elanCmdHandler = _vmem_register_handler_Template(read_elancmd, write_elancmd);
- elanRamHandler = _vmem_register_handler_Template(read_elanram, write_elanram);
-}
-
-void vmem_map(u32 base)
-{
- _vmem_map_handler(elanRegHandler, base | 8, base | 8);
- _vmem_map_handler(elanCmdHandler, base | 9, base | 9);
- _vmem_map_handler(elanRamHandler, base | 0xA, base | 0xB);
- _vmem_map_block(RAM, base | 0xA, base | 0xB, ELAN_RAM_SIZE - 1);
-}
-
-void serialize(Serializer& ser)
-{
- if (!settings.platform.isNaomi2())
- return;
- ser << reg10;
- ser << reg74;
- ser << elanCmd;
- if (!ser.rollback())
- ser.serialize(RAM, ELAN_RAM_SIZE);
- state.serialize(ser);
-}
-
-void deserialize(Deserializer& deser)
-{
- if (!settings.platform.isNaomi2())
- return;
- deser >> reg10;
- deser >> reg74;
- deser >> elanCmd;
- if (!deser.rollback())
- deser.deserialize(RAM, ELAN_RAM_SIZE);
- state.deserialize(deser);
-}
-
-}
+/*
+ Copyright 2022 flyinghead
+
+ This file is part of Flycast.
+
+ Flycast is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ Flycast is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Flycast. If not, see .
+ */
+/*
+ * VideoLogic custom transformation & lighting (T&L) chip (codenamed: ELAN)
+ * 32 MB RAM
+ * Clock: 100 MHz
+ * 16 light sources per polygon
+ * ambient, parallel, point or spot (Fog lights and alpha lights also exist)
+ * Perspective conversion
+ * Near, far and side clipping, offscreen and backface culling
+ * bump mapping, environmental mapping
+ * dynamic & static model processing
+ * model cache system
+ *
+ * Each PVR2 chip renders half the screen (rectangular, stripes, and checker board options)
+ * so textures have to be duplicated in each vram
+ *
+ * Area 0:
+ * 005f6800 - 005f7cff asic A regs
+ * 005f8000 - 005f9fff CLXA regs
+ * 025f6800 - 025f7cff asic B regs
+ * 025f8000 - 025f9fff CLXB regs
+ *
+ * Area 1:
+ * 05000000 - 06ffffff CLXA vram
+ * 07000000 - 08ffffff CLXB vram
+ *
+ * Area 2:
+ * 085f6800 - 085f7cff write both asic regs
+ * 085f8000 - 085f9fff write both PVR regs
+ * 08800000 - 088000ff? elan regs
+ * 09000000 - ? elan command buffer
+ * 0A000000 - 0bfffffff elan RAM
+ */
+#include "elan.h"
+#include "hw/mem/_vmem.h"
+#include "pvr_mem.h"
+#include "ta.h"
+#include "ta_ctx.h"
+#include "hw/holly/holly_intc.h"
+#include "hw/holly/sb.h"
+#include "hw/pvr/Renderer_if.h"
+#include "hw/sh4/sh4_sched.h"
+#include "hw/sh4/sh4_mem.h"
+#include "emulator.h"
+#include "serialize.h"
+#include "elan_struct.h"
+#include "network/ggpo.h"
+#include
+#include
+#include
+#include
+
+namespace elan {
+
+static _vmem_handler elanRegHandler;
+static _vmem_handler elanCmdHandler;
+static _vmem_handler elanRamHandler;
+
+u8 *RAM;
+
+static u32 reg10;
+static u32 reg74;
+static u32 reg30 = 0x31;
+
+static u32 elanCmd[32 / 4];
+
+template
+T DYNACALL read_elanreg(u32 paddr)
+{
+ //verify(sizeof(T) == 4);
+ u32 addr = paddr & 0x01ffffff;
+ switch (addr >> 16)
+ {
+ case 0x5F:
+ if (addr >= 0x005F6800 && addr <= 0x005F7CFF)
+ {
+ // 5F6908: Tests for errors 4, 8, 10, 2 and 1 (render isp buf ovf, render hazard, ISP param ovf, ob list ptr ovf, ta ill param)
+ // 5f6900: then int 4 and 40 (EoR TSP, EoT YUV)
+ return (T)sb_ReadMem(paddr, sizeof(T));
+ }
+ else if (addr >= 0x005F8000 && addr <= 0x005F9FFF)
+ {
+ if (sizeof(T) != 4)
+ // House of the Dead 2
+ return 0;
+ return (T)pvr_ReadReg(paddr);
+ }
+ else
+ {
+ INFO_LOG(MEMORY, "Read from area2 not implemented [Unassigned], addr=%x", addr);
+ return 0;
+ }
+
+ default:
+// if ((addr & 0xFF) != 0x74)
+ DEBUG_LOG(PVR, "ELAN read(%d) %08x [pc %08x]", (u32)sizeof(T), addr, p_sh4rcb->cntx.pc);
+ switch (addr & 0xFF)
+ {
+ case 0: // magic number
+ return (T)0xe1ad0000;
+ case 4: // revision
+ return 0x10; // 1 or x10
+ case 0xc:
+ // command queue size
+ // loops until < 2 (v1) or 3 (v10)
+ return 1;
+ case 0x10: // sh4 if control?
+ // b0 broadcast on cs1
+ // b1 elan channel 2
+ // b2 enable pvr #2
+ // rewritten by bios as reg10 & ~1
+ return reg10;
+ case 0x14: // SDRAM refresh (never read?)
+ return (T)0x2029; //default 0x1429
+ case 0x1c: // SDRAM CFG
+ return (T)0x87320961;
+ case 0x30: // Macro tiler config
+ // 0 0 l l l l l l t t t t 0 0 r r r r r r b b b b 0 0 V H 0 0 0 T
+ // lllll: left tile
+ // tttt: top tile
+ // rrrrrr: right tile
+ // bbbb: bottom tile
+ // V: tile vertically
+ // H: tile horizontally
+ // T: tiler enabled
+ return reg30;
+ case 0x74:
+ // b0 dma completed
+ // b1 cmd completed
+ // b2-b3 geometry timeouts
+ // b4-b6 errors?
+ return reg74;
+ case 0x78: // IRQ MASK
+ // 6 bits?
+ return 0;
+ default:
+ return (T)0;
+ }
+ }
+}
+
+template
+void DYNACALL write_elanreg(u32 paddr, T data)
+{
+ //verify(sizeof(T) == 4);
+ u32 addr = paddr & 0x01ffffff;
+ switch (addr >> 16)
+ {
+ case 0x5F:
+ if (addr>= 0x005F6800 && addr <= 0x005F7CFF)
+ sb_WriteMem(paddr, data, sizeof(T));
+ else if (addr >= 0x005F8000 && addr <= 0x005F9FFF)
+ {
+ if (addr == 0x5F8040 && data == 0xFF00FF)
+ {
+ ERROR_LOG(PVR, "ELAN SCREWED pr %x pc %x", p_sh4rcb->cntx.pr, p_sh4rcb->cntx.pc);
+ throw FlycastException("Boot aborted");
+ }
+ else if ((addr & 0x1fff) == SOFTRESET_addr && data == 0)
+ reg74 &= 3;
+ else if ((addr & 0x1fff) == STARTRENDER_addr)
+ reg74 &= 3;
+
+ //if ((paddr & 0x1c000000) == 0x08000000 && (addr & 0x1fff) == SOFTRESET_addr && data == 0)
+ // reg74 |= 2;
+ pvr_WriteReg(paddr, data);
+ }
+ else
+ INFO_LOG(COMMON, "Write to area2 not implemented [Unassigned], addr=%x,data=%x,size=%d", addr, data, (u32)sizeof(T));
+ break;
+ default:
+// if ((addr & 0xFF) != 0x74)
+ DEBUG_LOG(PVR, "ELAN write(%d) %08x = %x", (u32)sizeof(T), addr, data);
+ switch (addr & 0xFF)
+ {
+ case 0x0:
+ // 0 multiple times (_kmtlifAbortDisplayListProcessing)
+ break;
+ // 0x4: _kmtlifAbortDisplayListProcessing: 0
+ case 0x8: // write-only. reset ?
+ // 1 then 0
+ // bios: 5
+ // _kmtlifAbortDisplayListProcessing: 5 then 0
+ // _kmtlifHandleDMATimeout: 1, 0, 4, 0...
+ if (data == 0)
+ reg74 = 0;
+ break;
+ case 0xc:
+ // 0
+ break;
+ case 0x10: // sh4 if control?
+ reg10 = data;
+ break;;
+ case 0x14: // SDRAM refresh
+ // x2029
+ break;
+ case 0x1c: // SDRAM CFG
+ break;
+ case 0x30:
+ reg30 = data;
+ break;
+ case 0x74: // IRQ STAT
+ reg74 &= ~data;
+ break;
+ // _kmtlifSetupElanInts:
+ // 78 = 3f
+ // 7C = 0
+ // 80 = 17
+ // 84 = 2b
+ // 88 = 0
+ case 0xd0: // _kmtlifSetCullingRegister
+ // 6
+ break;;
+ default:
+ break;
+ }
+ }
+}
+
+template
+T DYNACALL read_elancmd(u32 addr)
+{
+ DEBUG_LOG(PVR, "ELAN cmd READ! (%d) %08x", (u32)sizeof(T), addr);
+ return 0;
+}
+
+static glm::vec4 unpackColor(u32 color)
+{
+ return glm::vec4((float)((color >> 16) & 0xff) / 255.f,
+ (float)((color >> 8) & 0xff) / 255.f,
+ (float)(color & 0xff) / 255.f,
+ (float)(color >> 24) / 255.f);
+}
+
+static glm::vec4 unpackColor(u8 red, u8 green, u8 blue, u8 alpha = 0)
+{
+ return glm::vec4((float)red / 255.f, (float)green / 255.f, (float)blue / 255.f, (float)alpha / 255.f);
+}
+
+static u32 packColorBGRA(const glm::vec4& color)
+{
+ return (int)(std::min(1.f, color.a) * 255.f) << 24
+ | (int)(std::min(1.f, color.r) * 255.f) << 16
+ | (int)(std::min(1.f, color.g) * 255.f) << 8
+ | (int)(std::min(1.f, color.b) * 255.f);
+}
+
+static u32 packColorRGBA(const glm::vec4& color)
+{
+ return (int)(std::min(1.f, color.r) * 255.f)
+ | (int)(std::min(1.f, color.g) * 255.f) << 8
+ | (int)(std::min(1.f, color.b) * 255.f) << 16
+ | (int)(std::min(1.f, color.a) * 255.f) << 24;
+}
+
+u32 (*packColor)(const glm::vec4& color) = packColorRGBA;
+
+static GMP *curGmp;
+static glm::mat4x4 curMatrix;
+static float *taMVMatrix;
+static float *taNormalMatrix;
+static glm::mat4 projectionMatrix;
+static float *taProjMatrix;
+static LightModel *curLightModel;
+static ElanBase *curLights[MAX_LIGHTS];
+static float nearPlane = 0.001f;
+static float farPlane = 100000.f;
+static bool envMapping;
+static bool cullingReversed;
+static bool openModifierVolume;
+static bool shadowedVolume;
+static TSP modelTSP;
+static glm::vec4 gmpDiffuseColor0;
+static glm::vec4 gmpSpecularColor0;
+static glm::vec4 gmpDiffuseColor1;
+static glm::vec4 gmpSpecularColor1;
+
+struct State
+{
+ static constexpr u32 Null = 0xffffffff;
+
+ int listType = -1;
+ u32 gmp = Null;
+ u32 instance = Null;
+ u32 projMatrix = Null;
+ u32 tileclip = 0;
+ u32 lightModel = Null;
+ u32 lights[MAX_LIGHTS] = {
+ Null, Null, Null, Null, Null, Null, Null, Null,
+ Null, Null, Null, Null, Null, Null, Null, Null
+ };
+ bool lightModelUpdated = false;
+ float envMapUOffset = 0.f;
+ float envMapVOffset = 0.f;
+
+ void reset()
+ {
+ listType = -1;
+ gmp = Null;
+ instance = Null;
+ projMatrix = Null;
+ tileclip = 0;
+ lightModel = Null;
+ for (auto& light : lights)
+ light = Null;
+ update();
+ if (isDirectX(config::RendererType))
+ packColor = packColorBGRA;
+ else
+ packColor = packColorRGBA;
+ }
+ void setMatrix(InstanceMatrix *pinstance)
+ {
+ instance = elanRamAddress(pinstance);
+ updateMatrix();
+ }
+
+ void updateMatrix()
+ {
+ if (instance == Null)
+ {
+ taMVMatrix = nullptr;
+ taNormalMatrix = nullptr;
+ envMapUOffset = 0.f;
+ envMapVOffset = 0.f;
+ return;
+ }
+ InstanceMatrix *mat = (InstanceMatrix *)&RAM[instance];
+ DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f\n %f %f %f",
+ -mat->tm00, -mat->tm10, -mat->tm20, -mat->tm30,
+ mat->tm01, mat->tm11, mat->tm21, mat->tm31,
+ -mat->tm02, -mat->tm12, -mat->tm22, -mat->tm32,
+ mat->lm00, mat->lm10, mat->lm20,
+ mat->lm01, mat->lm11, mat->lm21,
+ mat->lm02, mat->lm12, mat->lm22);
+
+ curMatrix = glm::mat4x4{
+ -mat->tm00, mat->tm01, -mat->tm02, 0.f,
+ -mat->tm10, mat->tm11, -mat->tm12, 0.f,
+ -mat->tm20, mat->tm21, -mat->tm22, 0.f,
+ -mat->tm30, mat->tm31, -mat->tm32, 1.f
+ };
+ glm::mat4x4 normalMatrix = glm::mat4x4{
+ mat->lm00, mat->lm01, mat->lm02, 0.f,
+ mat->lm10, mat->lm11, mat->lm12, 0.f,
+ mat->lm20, mat->lm21, mat->lm22, 0.f,
+ -mat->tm30, mat->tm31, -mat->tm32, 1.f
+ };
+ nearPlane = mat->_near;
+ farPlane = mat->_far;
+ envMapUOffset = mat->envMapU;
+ envMapVOffset = mat->envMapV;
+ taMVMatrix = ta_add_matrix(glm::value_ptr(curMatrix));
+ if (normalMatrix != curMatrix)
+ taNormalMatrix = ta_add_matrix(glm::value_ptr(normalMatrix));
+ else
+ taNormalMatrix = taMVMatrix;
+ }
+
+ void setProjectionMatrix(void *p)
+ {
+ projMatrix = elanRamAddress(p);
+ updateProjectionMatrix();
+ }
+
+ void updateProjectionMatrix()
+ {
+ if (projMatrix == Null)
+ {
+ taProjMatrix = nullptr;
+ return;
+ }
+ ProjMatrix *pm = (ProjMatrix *)&RAM[projMatrix];
+ DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f near %f far %f", pm->fx, pm->tx, pm->fy, pm->ty, nearPlane, farPlane);
+ // fx = -m00 * w/2
+ // tx = -m20 * w/2 + left + w/2
+ // fy = -m11 * h/2
+ // ty = -m21 * h/2 + top + h/2
+ projectionMatrix = glm::mat4(
+ -pm->fx, 0, 0, 0,
+ 0, pm->fy, 0, 0,
+ -pm->tx, -pm->ty, -1, -1,
+ 0, 0, 0, 0
+ );
+ taProjMatrix = ta_add_matrix(glm::value_ptr(projectionMatrix));
+ }
+
+ void setGMP(void *p)
+ {
+ gmp = elanRamAddress(p);
+ updateGMP();
+ }
+
+ void updateGMP()
+ {
+ if (gmp == Null)
+ {
+ curGmp = nullptr;
+ gmpDiffuseColor0 = glm::vec4(0);
+ gmpSpecularColor0 = glm::vec4(0);
+ gmpDiffuseColor1 = glm::vec4(0);
+ gmpSpecularColor1 = glm::vec4(0);
+ }
+ else
+ {
+ curGmp = (GMP *)&RAM[gmp];
+ DEBUG_LOG(PVR, "GMP paramSelect %x", curGmp->paramSelect.full);
+ if (curGmp->paramSelect.d0)
+ gmpDiffuseColor0 = unpackColor(curGmp->diffuse0);
+ else
+ gmpDiffuseColor0 = glm::vec4(0);
+ if (curGmp->paramSelect.s0)
+ gmpSpecularColor0 = unpackColor(curGmp->specular0);
+ else
+ gmpSpecularColor0 = glm::vec4(0);
+ if (curGmp->paramSelect.d1)
+ gmpDiffuseColor1 = unpackColor(curGmp->diffuse1);
+ else
+ gmpDiffuseColor1 = glm::vec4(0);
+ if (curGmp->paramSelect.s1)
+ gmpSpecularColor1 = unpackColor(curGmp->specular1);
+ else
+ gmpSpecularColor1 = glm::vec4(0);
+ }
+ }
+
+ void setLightModel(void *p)
+ {
+ lightModel = elanRamAddress(p);
+ updateLightModel();
+ }
+
+ void updateLightModel()
+ {
+ lightModelUpdated = true;
+ if (lightModel == Null)
+ curLightModel = nullptr;
+ else
+ {
+ curLightModel = (LightModel *)&RAM[lightModel];
+ DEBUG_LOG(PVR, "Light model mask: diffuse %04x specular %04x, ambient base %08x offset %08x", curLightModel->diffuseMask0, curLightModel->specularMask0,
+ curLightModel->ambientBase0, curLightModel->ambientOffset0);
+ }
+ }
+
+ void setLight(int lightId, void *p)
+ {
+ lights[lightId] = elanRamAddress(p);
+ updateLight(lightId);
+ }
+
+ void updateLight(int lightId)
+ {
+ lightModelUpdated = true;
+ if (lights[lightId] == Null)
+ {
+ elan::curLights[lightId] = nullptr;
+ return;
+ }
+ PointLight *plight = (PointLight *)&RAM[lights[lightId]];
+ if (plight->pcw.parallelLight)
+ {
+ ParallelLight *light = (ParallelLight *)plight;
+ DEBUG_LOG(PVR, " Parallel light %d: [%x] routing %d dmode %d col %d %d %d dir %f %f %f", light->lightId, plight->pcw.full,
+ light->routing, light->dmode,
+ light->red, light->green, light->blue,
+ light->getDirX(), light->getDirY(), light->getDirZ());
+ }
+ else
+ {
+ DEBUG_LOG(PVR, " Point light %d: [%x] routing %d dmode %d smode %d col %d %d %d dir %f %f %f pos %f %f %f dist %f %f angle %f %f",
+ plight->lightId, plight->pcw.full, plight->routing, plight->dmode, plight->smode,
+ plight->red, plight->green, plight->blue,
+ plight->getDirX(), plight->getDirY(), plight->getDirZ(),
+ plight->posX, plight->posY, plight->posZ,
+ plight->distA(), plight->distB(),
+ plight->angleA(), plight->angleB());
+ }
+ elan::curLights[lightId] = plight;
+ }
+
+ void setClipMode(PCW pcw)
+ {
+ tileclip = (tileclip & ~0xF0000000) | (pcw.userClip << 28);
+ }
+
+ void setClipTiles(u32 xmin, u32 ymin, u32 xmax, u32 ymax)
+ {
+ u32 t = tileclip & 0xF0000000;
+ t |= xmin & 0x3f; // 6 bits
+ t |= (xmax & 0x3f) << 6; // 6 bits
+ t |= (ymin & 0x1f) << 12; // 5 bits
+ t |= (ymax & 0x1f) << 17; // 5 bits
+ tileclip = t;
+ }
+
+ void update()
+ {
+ updateMatrix();
+ updateProjectionMatrix();
+ updateGMP();
+ updateLightModel();
+ for (u32 i = 0; i < MAX_LIGHTS; i++)
+ updateLight(i);
+ }
+
+ static u32 elanRamAddress(void *p)
+ {
+ if ((u8 *)p < RAM || (u8 *)p >= RAM + ELAN_RAM_SIZE)
+ return Null;
+ else
+ return (u32)((u8 *)p - RAM);
+ }
+
+ void serialize(Serializer& ser)
+ {
+ ser << listType;
+ ser << gmp;
+ ser << instance;
+ ser << projMatrix;
+ ser << tileclip;
+ ser << lightModel;
+ ser << lights;
+ }
+
+ void deserialize(Deserializer& deser)
+ {
+ if (deser.version() < Deserializer::V24)
+ {
+ reset();
+ return;
+ }
+ deser >> listType;
+ deser >> gmp;
+ deser >> instance;
+ deser >> projMatrix;
+ deser >> tileclip;
+ deser >> lightModel;
+ deser >> lights;
+ update();
+ }
+};
+
+static State state;
+
+static void setCoords(Vertex& vtx, float x, float y, float z)
+{
+ vtx.x = x;
+ vtx.y = y;
+ vtx.z = z;
+}
+
+template
+static void setUV(const Ts& vs, Vertex& vd)
+{
+ if (envMapping)
+ {
+ vd.u = state.envMapUOffset;
+ vd.v = state.envMapVOffset;
+ vd.u1 = state.envMapUOffset;
+ vd.v1 = state.envMapVOffset;
+ }
+ else
+ {
+ vd.u = vs.uv.u;
+ vd.v = vs.uv.v;
+ vd.u1 = vs.uv.u;
+ vd.v1 = vs.uv.v;
+ }
+}
+
+static void SetEnvMapUV(Vertex& vtx)
+{
+ if (envMapping)
+ {
+ vtx.u = state.envMapUOffset;
+ vtx.v = state.envMapVOffset;
+ vtx.u1 = state.envMapUOffset;
+ vtx.v1 = state.envMapVOffset;
+ }
+}
+
+template
+glm::vec3 getNormal(const T& vtx)
+{
+ return { (int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f };
+}
+
+template<>
+glm::vec3 getNormal(const N2_VERTEX_VNU& vtx)
+{
+ return { vtx.normal.nx, vtx.normal.ny, vtx.normal.nz };
+}
+
+template
+void setNormal(Vertex& vd, const T& vs)
+{
+ glm::vec3 normal = getNormal(vs);
+ vd.nx = normal.x;
+ vd.ny = normal.y;
+ vd.nz = normal.z;
+}
+
+static void addModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1)
+{
+ baseCol0 += gmpDiffuseColor0;
+ offsetCol0 += gmpSpecularColor0;
+ baseCol1 += gmpDiffuseColor1;
+ offsetCol1 += gmpSpecularColor1;
+}
+
+template
+static void convertVertex(const T& vs, Vertex& vd);
+
+template<>
+void convertVertex(const N2_VERTEX& vs, Vertex& vd)
+{
+ setCoords(vd, vs.x, vs.y, vs.z);
+ setNormal(vd, vs);
+ SetEnvMapUV(vd);
+ glm::vec4 baseCol0(0);
+ glm::vec4 offsetCol0(0);
+ glm::vec4 baseCol1(0);
+ glm::vec4 offsetCol1(0);
+ addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
+
+ *(u32 *)vd.col = packColor(baseCol0);
+ *(u32 *)vd.spc = packColor(offsetCol0);
+ *(u32 *)vd.col1 = packColor(baseCol1);
+ *(u32 *)vd.spc1 = packColor(offsetCol1);
+}
+
+template<>
+void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd)
+{
+ setCoords(vd, vs.x, vs.y, vs.z);
+ setNormal(vd, vs);
+ SetEnvMapUV(vd);
+ glm::vec4 baseCol0 = unpackColor(vs.rgb.argb0);
+ glm::vec4 offsetCol0(0);
+ glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1);
+ glm::vec4 offsetCol1(0);
+ addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
+ *(u32 *)vd.col = packColor(baseCol0);
+ *(u32 *)vd.spc = packColor(offsetCol0);
+ *(u32 *)vd.col1 = packColor(baseCol1);
+ *(u32 *)vd.spc1 = packColor(offsetCol1);
+}
+
+template<>
+void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd)
+{
+ setCoords(vd, vs.x, vs.y, vs.z);
+ setNormal(vd, vs);
+ setUV(vs, vd);
+ glm::vec4 baseCol0(0);
+ glm::vec4 offsetCol0(0);
+ glm::vec4 baseCol1(0);
+ glm::vec4 offsetCol1(0);
+ addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
+ *(u32 *)vd.col = packColor(baseCol0);
+ *(u32 *)vd.spc = packColor(offsetCol0);
+ *(u32 *)vd.col1 = packColor(baseCol1);
+ *(u32 *)vd.spc1 = packColor(offsetCol1);
+}
+
+template<>
+void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd)
+{
+ setCoords(vd, vs.x, vs.y, vs.z);
+ setNormal(vd, vs);
+ setUV(vs, vd);
+ glm::vec4 baseCol0 = unpackColor(vs.rgb.argb0);
+ glm::vec4 offsetCol0(0);
+ glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1);
+ glm::vec4 offsetCol1(0);
+ addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
+ *(u32 *)vd.col = packColor(baseCol0);
+ *(u32 *)vd.spc = packColor(offsetCol0);
+ *(u32 *)vd.col1 = packColor(baseCol1);
+ *(u32 *)vd.spc1 = packColor(offsetCol1);
+}
+
+template<>
+void convertVertex(const N2_VERTEX_VUB& vs, Vertex& vd)
+{
+ setCoords(vd, vs.x, vs.y, vs.z);
+ setNormal(vd, vs);
+ setUV(vs, vd);
+ glm::vec4 baseCol0(0);
+ glm::vec4 offsetCol0(0);
+ glm::vec4 baseCol1(0);
+ glm::vec4 offsetCol1(0);
+ addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1);
+ *(u32 *)vd.col = packColor(baseCol0);
+ *(u32 *)vd.col1 = packColor(baseCol1);
+ // Stuff the bump map normals and parameters in the specular colors
+ vd.spc[0] = vs.bump.tangent.x;
+ vd.spc[1] = vs.bump.tangent.y;
+ vd.spc[2] = vs.bump.tangent.z;
+ vd.spc1[0] = vs.bump.bitangent.x;
+ vd.spc1[1] = vs.bump.bitangent.y;
+ vd.spc1[2] = vs.bump.bitangent.z;
+ vd.spc[3] = vs.bump.scaleFactor.bumpDegree; // always 255?
+ vd.spc1[3] = vs.bump.scaleFactor.fixedOffset; // always 0?
+// int nx = (int8_t)vs.header.nx;
+// int ny = (int8_t)vs.header.ny;
+// int nz = (int8_t)vs.header.nz;
+// printf("BumpMap vtx deg %d off %d normal %d %d %d tangent %d %d %d bitangent %d %d %d dot %d %d %d\n", vs.bump.scaleFactor.bumpDegree, vs.bump.scaleFactor.fixedOffset,
+// nx, ny, nz,
+// vs.bump.tangent.x, vs.bump.tangent.y, vs.bump.tangent.z, vs.bump.bitangent.x, vs.bump.bitangent.y, vs.bump.bitangent.z,
+// nx * vs.bump.tangent.x + ny * vs.bump.tangent.y + nz * vs.bump.tangent.z,
+// nx * vs.bump.bitangent.x + ny * vs.bump.bitangent.y + nz * vs.bump.bitangent.z,
+// vs.bump.tangent.x * vs.bump.bitangent.x + vs.bump.tangent.y * vs.bump.bitangent.y + vs.bump.tangent.z * vs.bump.bitangent.z
+// );
+}
+
+template
+static void boundingBox(const T* vertices, u32 count, glm::vec3& min, glm::vec3& max)
+{
+ min = { 1e38f, 1e38f, 1e38f };
+ max = { -1e38f, -1e38f, -1e38f };
+ for (u32 i = 0; i < count; i++)
+ {
+ glm::vec3 pos{ vertices[i].x, vertices[i].y, vertices[i].z };
+ min = glm::min(min, pos);
+ max = glm::max(max, pos);
+ }
+ glm::vec4 center((min + max) / 2.f, 1);
+ glm::vec4 extents(max - glm::vec3(center), 0);
+ // transform
+ center = curMatrix * center;
+ glm::vec3 extentX = curMatrix * glm::vec4(extents.x, 0, 0, 0);
+ glm::vec3 extentY = curMatrix * glm::vec4(0, extents.y, 0, 0);
+ glm::vec3 extentZ = curMatrix * glm::vec4(0, 0, extents.z, 0);
+ // new AA extents
+ glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ);
+
+ min = glm::vec3(center) - newExtent;
+ max = glm::vec3(center) + newExtent;
+}
+
+template
+static bool isBetweenNearAndFar(const T* vertices, u32 count, bool& needNearClipping)
+{
+ glm::vec3 min;
+ glm::vec3 max;
+ boundingBox(vertices, count, min, max);
+ if (min.z > -nearPlane || max.z < -farPlane)
+ return false;
+
+ glm::vec4 pmin = projectionMatrix * glm::vec4(min, 1);
+ glm::vec4 pmax = projectionMatrix * glm::vec4(max, 1);
+ if (std::isnan(pmin.x) || std::isnan(pmin.y) || std::isnan(pmax.x) || std::isnan(pmax.y))
+ return false;
+
+ needNearClipping = max.z > -nearPlane;
+
+ return true;
+}
+
+class TriangleStripClipper
+{
+public:
+ TriangleStripClipper(bool enabled) : enabled(enabled) {}
+
+ void add(const Vertex& vtx)
+ {
+ if (enabled)
+ {
+ float z = vtx.x * curMatrix[0][2] + vtx.y * curMatrix[1][2] + vtx.z * curMatrix[2][2] + curMatrix[3][2];
+ float dist = -z - nearPlane;
+ clip(vtx, dist);
+ count++;
+ }
+ else
+ {
+ ta_add_vertex(vtx);
+ }
+ }
+
+private:
+ void sendVertex(const Vertex& r)
+ {
+ if (dupeNext)
+ ta_add_vertex(r);
+ dupeNext = false;
+ ta_add_vertex(r);
+ }
+
+ // Three-Dimensional Homogeneous Clipping of Triangle Strips
+ // Patrick-Gilles Maillot. Graphics Gems II - 1991
+ void clip(const Vertex& r, float rDist)
+ {
+ clipCode >>= 1;
+ clipCode |= (int)(rDist < 0) << 2;
+ if (count == 1)
+ {
+ switch (clipCode >> 1) {
+ case 0: // Q and R inside
+ sendVertex(q);
+ sendVertex(r);
+ break;
+ case 1: // Q outside, R inside
+ sendVertex(interpolate(q, qDist, r, rDist));
+ sendVertex(r);
+ break;
+ case 2: // Q inside, R outside
+ sendVertex(q);
+ sendVertex(interpolate(q, qDist, r, rDist));
+ break;
+ case 3: // Q and R outside
+ break;
+ }
+ }
+ else if (count >= 2)
+ {
+ switch (clipCode)
+ {
+ case 0: // all inside
+ sendVertex(r);
+ break;
+ case 1: // P outside, Q and R inside
+ sendVertex(interpolate(r, rDist, p, pDist));
+ sendVertex(q);
+ sendVertex(r);
+ break;
+ case 2: // P inside, Q outside and R inside
+ sendVertex(r);
+ sendVertex(interpolate(q, qDist, r, rDist));
+ sendVertex(r);
+ break;
+ case 3: // P and Q outside, R inside
+ {
+ Vertex tmp = interpolate(r, rDist, p, pDist);
+ sendVertex(tmp);
+ sendVertex(tmp);
+ sendVertex(tmp); // One more to preserve strip swap order
+ sendVertex(interpolate(q, qDist, r, rDist));
+ sendVertex(r);
+ }
+ break;
+ case 4: // P and Q inside, R outside
+ sendVertex(interpolate(r, rDist, p, pDist));
+ sendVertex(q);
+ sendVertex(interpolate(q, qDist, r, rDist));
+ break;
+ case 5: // P outside, Q inside, R outside
+ sendVertex(interpolate(q, qDist, r, rDist));
+ break;
+ case 6: // P inside, Q and R outside
+ {
+ Vertex tmp = interpolate(r, rDist, p, pDist);
+ sendVertex(tmp);
+ sendVertex(tmp);
+ sendVertex(tmp); // One more to preserve strip swap order
+ }
+ break;
+ case 7: // P, Q and R outside
+ dupeNext = !dupeNext;
+ break;
+ }
+ }
+ p = q;
+ pDist = qDist;
+ q = r;
+ qDist = rDist;
+ }
+
+ Vertex interpolate(const Vertex& v1, float f1, const Vertex& v2, float f2)
+ {
+ Vertex v;
+ float a2 = std::abs(f1) / (std::abs(f1) + std::abs(f2));
+ float a1 = 1 - a2;
+ v.x = v1.x * a1 + v2.x * a2;
+ v.y = v1.y * a1 + v2.y * a2;
+ v.z = v1.z * a1 + v2.z * a2;
+
+ v.u = v1.u * a1 + v2.u * a2;
+ v.v = v1.v * a1 + v2.v * a2;
+ v.u1 = v1.u1 * a1 + v2.u1 * a2;
+ v.v1 = v1.v1 * a1 + v2.v1 * a2;
+
+ for (size_t i = 0; i < ARRAY_SIZE(v1.col); i++)
+ {
+ v.col[i] = (u8)std::round(v1.col[i] * a1 + v2.col[i] * a2);
+ v.spc[i] = (u8)std::round(v1.spc[i] * a1 + v2.spc[i] * a2);
+ v.col1[i] = (u8)std::round(v1.col1[i] * a1 + v2.col1[i] * a2);
+ v.spc1[i] = (u8)std::round(v1.spc1[i] * a1 + v2.spc1[i] * a2);
+ }
+ v.nx = v1.nx * a1 + v2.nx * a2;
+ v.ny = v1.ny * a1 + v2.ny * a2;
+ v.nz = v1.nz * a1 + v2.nz * a2;
+
+ return v;
+ }
+
+ bool enabled;
+ int count = 0;
+ int clipCode = 0;
+ Vertex p;
+ float pDist = 0;
+ Vertex q;
+ float qDist = 0;
+ bool dupeNext = false;
+};
+
+template
+static void sendVertices(const ICHList *list, const T* vtx, bool needClipping)
+{
+ Vertex taVtx;
+ verify(list->vertexSize() > 0);
+
+ Vertex fanCenterVtx{};
+ Vertex fanLastVtx{};
+ bool stripStart = true;
+ int outStripIndex = 0;
+ TriangleStripClipper clipper(needClipping);
+
+ for (u32 i = 0; i < list->vtxCount; i++)
+ {
+ convertVertex(*vtx, taVtx);
+
+ if (stripStart)
+ {
+ // Center vertex if triangle fan
+ //verify(vtx->header.isFirstOrSecond()); This fails for some strips: strip=1 fan=0 (soul surfer)
+ fanCenterVtx = taVtx;
+ if (outStripIndex > 0)
+ {
+ // use degenerate triangles to link strips
+ clipper.add(fanLastVtx);
+ clipper.add(taVtx);
+ outStripIndex += 2;
+ if (outStripIndex & 1)
+ {
+ clipper.add(taVtx);
+ outStripIndex++;
+ }
+ }
+ stripStart = false;
+ }
+ else if (vtx->header.isFan())
+ {
+ // use degenerate triangles to link strips
+ clipper.add(fanLastVtx);
+ clipper.add(fanCenterVtx);
+ outStripIndex += 2;
+ if (outStripIndex & 1)
+ {
+ clipper.add(fanCenterVtx);
+ outStripIndex++;
+ }
+ // Triangle fan
+ clipper.add(fanCenterVtx);
+ clipper.add(fanLastVtx);
+ outStripIndex += 2;
+ }
+ clipper.add(taVtx);
+ outStripIndex++;
+ fanLastVtx = taVtx;
+ if (vtx->header.endOfStrip)
+ stripStart = true;
+
+ vtx++;
+ }
+}
+
+class ModifierVolumeClipper
+{
+public:
+ ModifierVolumeClipper(bool enabled) : enabled(enabled) {}
+
+ void add(ModTriangle& tri)
+ {
+ if (enabled)
+ {
+ glm::vec3 dist{
+ tri.x0 * curMatrix[0][2] + tri.y0 * curMatrix[1][2] + tri.z0 * curMatrix[2][2] + curMatrix[3][2],
+ tri.x1 * curMatrix[0][2] + tri.y1 * curMatrix[1][2] + tri.z1 * curMatrix[2][2] + curMatrix[3][2],
+ tri.x2 * curMatrix[0][2] + tri.y2 * curMatrix[1][2] + tri.z2 * curMatrix[2][2] + curMatrix[3][2]
+ };
+ dist = -dist - nearPlane;
+ ModTriangle newTri;
+ int n = sutherlandHodgmanClip(dist, tri, newTri);
+ switch (n)
+ {
+ case 0:
+ // fully clipped
+ break;
+ case 3:
+ ta_add_triangle(tri);
+ break;
+ case 4:
+ ta_add_triangle(tri);
+ ta_add_triangle(newTri);
+ break;
+ }
+ }
+ else
+ {
+ ta_add_triangle(tri);
+ }
+ }
+
+private:
+ //
+ // Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire
+ // Journal of Graphics GPU and Game Tools - November 2011
+ //
+ glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist)
+ {
+ return (A * std::abs(Bdist) + B * std::abs(Adist)) / (std::abs(Adist) + std::abs(Bdist));
+ }
+
+ // Clip the triangle 'trig' with respect to the provided distances to the clipping plane.
+ int sutherlandHodgmanClip(glm::vec3& dist, ModTriangle& trig, ModTriangle& newTrig)
+ {
+ constexpr float clipEpsilon = 0.f; //0.00001;
+ constexpr float clipEpsilon2 = 0.f; //0.01;
+
+ if (!glm::any(glm::greaterThanEqual(dist , glm::vec3(clipEpsilon2))))
+ // all clipped
+ return 0;
+ if (glm::all(glm::greaterThanEqual(dist , glm::vec3(-clipEpsilon))))
+ // none clipped
+ return 3;
+
+ // There are either 1 or 2 vertices above the clipping plane.
+ glm::bvec3 above = glm::greaterThanEqual(dist, glm::vec3(0.f));
+ bool nextIsAbove;
+ glm::vec3 v0(trig.x0, trig.y0, trig.z0);
+ glm::vec3 v1(trig.x1, trig.y1, trig.z1);
+ glm::vec3 v2(trig.x2, trig.y2, trig.z2);
+ glm::vec3 v3;
+ // Find the CCW-most vertex above the plane.
+ if (above[1] && !above[0])
+ {
+ // Cycle once CCW. Use v3 as a temp
+ nextIsAbove = above[2];
+ v3 = v0;
+ v0 = v1;
+ v1 = v2;
+ v2 = v3;
+ dist = glm::vec3(dist.y, dist.z, dist.x);
+ }
+ else if (above[2] && !above[1])
+ {
+ // Cycle once CW. Use v3 as a temp.
+ nextIsAbove = above[0];
+ v3 = v2;
+ v2 = v1;
+ v1 = v0;
+ v0 = v3;
+ dist = glm::vec3(dist.z, dist.x, dist.y);
+ }
+ else
+ nextIsAbove = above[1];
+ trig.x0 = v0.x;
+ trig.y0 = v0.y;
+ trig.z0 = v0.z;
+ // We always need to clip v2-v0.
+ v3 = intersect(v0, dist[0], v2, dist[2]);
+ if (nextIsAbove)
+ {
+ v2 = intersect(v1, dist[1], v2, dist[2]);
+ trig.x1 = v1.x;
+ trig.y1 = v1.y;
+ trig.z1 = v1.z;
+ trig.x2 = v2.x;
+ trig.y2 = v2.y;
+ trig.z2 = v2.z;
+ newTrig.x0 = v0.x;
+ newTrig.y0 = v0.y;
+ newTrig.z0 = v0.z;
+ newTrig.x1 = v2.x;
+ newTrig.y1 = v2.y;
+ newTrig.z1 = v2.z;
+ newTrig.x2 = v3.x;
+ newTrig.y2 = v3.y;
+ newTrig.z2 = v3.z;
+
+ return 4;
+ }
+ else
+ {
+ v1 = intersect(v0, dist[0], v1, dist[1]);
+ trig.x1 = v1.x;
+ trig.y1 = v1.y;
+ trig.z1 = v1.z;
+ trig.x2 = v3.x;
+ trig.y2 = v3.y;
+ trig.z2 = v3.z;
+
+ return 3;
+ }
+ }
+
+ bool enabled;
+};
+
+template
+static void sendMVPolygon(ICHList *list, const T *vtx, bool needClipping)
+{
+ ModifierVolumeParam mvp{};
+ mvp.isp.full = list->isp.full;
+ if (!openModifierVolume)
+ mvp.isp.CullMode = 0;
+ mvp.isp.VolumeLast = list->pcw.volume;
+ mvp.isp.DepthMode &= 3;
+ mvp.mvMatrix = taMVMatrix;
+ mvp.projMatrix = taProjMatrix;
+ ta_add_poly(state.listType, mvp);
+
+ ModifierVolumeClipper clipper(needClipping);
+ glm::vec3 vtx0{};
+ glm::vec3 vtx1{};
+ u32 stripStart = 0;
+
+ for (u32 i = 0; i < list->vtxCount; i++)
+ {
+ glm::vec3 v(vtx->x, vtx->y, vtx->z);
+ u32 triIdx = i - stripStart;
+ if (triIdx >= 2)
+ {
+ ModTriangle tri;
+
+ if (triIdx & 1)
+ {
+ tri.x1 = vtx0.x;
+ tri.y1 = vtx0.y;
+ tri.z1 = vtx0.z;
+
+ tri.x0 = vtx1.x;
+ tri.y0 = vtx1.y;
+ tri.z0 = vtx1.z;
+ }
+ else
+ {
+ tri.x0 = vtx0.x;
+ tri.y0 = vtx0.y;
+ tri.z0 = vtx0.z;
+
+ tri.x1 = vtx1.x;
+ tri.y1 = vtx1.y;
+ tri.z1 = vtx1.z;
+ }
+ tri.x2 = v.x;
+ tri.y2 = v.y;
+ tri.z2 = v.z;
+
+ clipper.add(tri);
+ }
+ if (vtx->header.endOfStrip)
+ stripStart = i + 1;
+ vtx0 = vtx1;
+ vtx1 = v;
+ vtx++;
+ }
+}
+
+static N2LightModel *taLightModel;
+
+static void sendLights()
+{
+ if (!state.lightModelUpdated)
+ return;
+
+ state.lightModelUpdated = false;
+ N2LightModel model;
+ model.lightCount = 0;
+ if (curLightModel == nullptr)
+ {
+ model.useBaseOver = false;
+ for (int i = 0; i < 2; i++)
+ {
+ model.ambientMaterialBase[i] = false;
+ model.ambientMaterialOffset[i] = false;
+ model.ambientBase[i][0] = model.ambientBase[i][1] = model.ambientBase[i][2] = model.ambientBase[i][3] = 1.f;
+ }
+ memset(model.ambientOffset, 0, sizeof(model.ambientOffset));
+ taLightModel = nullptr;
+ return;
+ }
+ model.ambientMaterialBase[0] = curLightModel->useAmbientBase0;
+ model.ambientMaterialBase[1] = curLightModel->useAmbientBase1;
+ model.ambientMaterialOffset[0] = curLightModel->useAmbientOffset0;
+ model.ambientMaterialOffset[1] = curLightModel->useAmbientOffset1;
+ model.useBaseOver = curLightModel->useBaseOver;
+ model.bumpId1 = -1;
+ model.bumpId2 = -1;
+ memcpy(model.ambientBase[0], glm::value_ptr(unpackColor(curLightModel->ambientBase0)), sizeof(model.ambientBase[0]));
+ memcpy(model.ambientBase[1], glm::value_ptr(unpackColor(curLightModel->ambientBase1)), sizeof(model.ambientBase[1]));
+ memcpy(model.ambientOffset[0], glm::value_ptr(unpackColor(curLightModel->ambientOffset0)), sizeof(model.ambientOffset[0]));
+ memcpy(model.ambientOffset[1], glm::value_ptr(unpackColor(curLightModel->ambientOffset1)), sizeof(model.ambientOffset[1]));
+
+ for (u32 i = 0; i < MAX_LIGHTS; i++)
+ {
+ N2Light& light = model.lights[model.lightCount];
+ for (int vol = 0; vol < 2; vol++)
+ {
+ light.diffuse[vol] = curLightModel->isDiffuse(i, vol);
+ light.specular[vol] = curLightModel->isSpecular(i, vol);
+ }
+ if (!light.diffuse[0] && !light.specular[0]
+ && !light.diffuse[1] && !light.specular[1])
+ continue;
+ if (curLights[i] == nullptr)
+ {
+ INFO_LOG(PVR, "Light %d is referenced but undefined", i);
+ continue;
+ }
+ if (i == curLightModel->bumpId1)
+ model.bumpId1 = model.lightCount;
+ if (i == curLightModel->bumpId2)
+ model.bumpId2 = model.lightCount;
+ light.parallel = curLights[i]->pcw.parallelLight;
+ if (light.parallel)
+ {
+ ParallelLight *plight = (ParallelLight *)curLights[i];
+ memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color));
+ light.routing = plight->routing;
+ light.dmode = plight->dmode;
+ light.smode = N2_LMETHOD_SINGLE_SIDED;
+ memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)),
+ sizeof(light.direction));
+ }
+ else
+ {
+ PointLight *plight = (PointLight *)curLights[i];
+ memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color));
+ light.routing = plight->routing;
+ light.dmode = plight->dmode;
+ light.smode = plight->smode;
+ if (plight->posX == 0 && plight->posY == 0 && plight->posZ == 0
+ && plight->_distA == 0 && plight->_distB == 0
+ && plight->_angleA == 0 && plight->_angleB == 0)
+ {
+ // Lights not using distance or angle attenuation are converted into parallel lights on the CPU side?
+ DEBUG_LOG(PVR, "Point -> parallel light[%d] dir %d %d %d", i, -(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ);
+ light.parallel = true;
+ memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)),
+ sizeof(light.direction));
+ }
+ else
+ {
+ memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)),
+ sizeof(light.direction));
+ memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position));
+ light.distAttnMode = plight->dattenmode;
+ light.attnDistA = plight->distA();
+ light.attnDistB = plight->distB();
+ light.attnAngleA = plight->angleA();
+ light.attnAngleB = plight->angleB();
+ }
+ }
+ model.lightCount++;
+ }
+ taLightModel = ta_add_light(model);
+}
+
+static void setStateParams(PolyParam& pp, const ICHList *list)
+{
+ sendLights();
+ pp.tileclip = state.tileclip;
+ pp.mvMatrix = taMVMatrix;
+ pp.normalMatrix = taNormalMatrix;
+ pp.projMatrix = taProjMatrix;
+ pp.lightModel = taLightModel;
+ pp.envMapping[0] = false;
+ pp.envMapping[1] = false;
+ if (curGmp != nullptr)
+ {
+ pp.glossCoef[0] = curGmp->gloss.getCoef0();
+ pp.glossCoef[1] = curGmp->gloss.getCoef1();
+ pp.constantColor[0] = curGmp->paramSelect.b0;
+ pp.diffuseColor[0] = curGmp->paramSelect.d0;
+ pp.specularColor[0] = curGmp->paramSelect.s0;
+ pp.constantColor[1] = curGmp->paramSelect.b1;
+ pp.diffuseColor[1] = curGmp->paramSelect.d1;
+ pp.specularColor[1] = curGmp->paramSelect.s1;
+
+ // Environment mapping
+ if (curGmp->paramSelect.e0)
+ {
+ pp.pcw.Texture = 1;
+ pp.pcw.Offset = 0;
+ pp.tsp.UseAlpha = 1;
+ pp.tsp.IgnoreTexA = 0;
+ pp.envMapping[0] = true;
+ pp.tcw = list->tcw0;
+ envMapping = true;
+ }
+ if (curGmp->paramSelect.e1)
+ {
+ pp.pcw.Texture = 1;
+ pp.pcw.Offset = 0;
+ pp.tsp1.UseAlpha = 1;
+ pp.tsp1.IgnoreTexA = 0;
+ pp.envMapping[1] = true;
+ pp.tcw1 = list->tcw1;
+ envMapping = true;
+ }
+ }
+ pp.tsp.full ^= modelTSP.full;
+ pp.tsp1.full ^= modelTSP.full;
+
+ // projFlip is for left-handed projection matrices (initd rear view mirror)
+ bool projFlip = taProjMatrix != nullptr && std::signbit(taProjMatrix[0]) == std::signbit(taProjMatrix[5]);
+ pp.isp.CullMode ^= (u32)cullingReversed ^ (u32)projFlip;
+ pp.pcw.Shadow ^= shadowedVolume;
+ if (pp.pcw.Shadow == 0 || pp.pcw.Volume == 0)
+ {
+ pp.tsp1.full = -1;
+ pp.tcw1.full = -1;
+ pp.glossCoef[1] = 0;
+ pp.constantColor[1] = false;
+ pp.diffuseColor[1] = false;
+ pp.specularColor[1] = false;
+ }
+// else if (pp.pcw.Volume == 1)
+// printf("2-Volume poly listType %d vtxtype %x gmp params %x diff tcw %08x tsp %08x\n", state.listType, list->flags, curGmp->paramSelect.full,
+// pp.tcw.full ^ pp.tcw1.full, pp.tsp.full ^ pp.tsp1.full);
+}
+
+static void sendPolygon(ICHList *list)
+{
+ bool needClipping;
+
+ switch (list->flags)
+ {
+ case ICHList::VTX_TYPE_V:
+ {
+ N2_VERTEX *vtx = (N2_VERTEX *)((u8 *)list + sizeof(ICHList));
+ if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
+ break;
+ if (state.listType & 1)
+ sendMVPolygon(list, vtx, needClipping);
+ else
+ {
+ PolyParam pp{};
+ pp.pcw.Shadow = list->pcw.shadow;
+ pp.pcw.Texture = list->pcw.texture;
+ pp.pcw.Offset = list->pcw.offset;
+ pp.pcw.Gouraud = list->pcw.gouraud;
+ pp.pcw.Volume = list->pcw.volume;
+ pp.isp = list->isp;
+ pp.tsp = list->tsp0;
+ pp.tsp1 = list->tsp1;
+ setStateParams(pp, list);
+ ta_add_poly(state.listType, pp);
+
+ sendVertices(list, vtx, needClipping);
+ }
+ }
+ break;
+
+ case ICHList::VTX_TYPE_VU:
+ {
+ N2_VERTEX_VU *vtx = (N2_VERTEX_VU *)((u8 *)list + sizeof(ICHList));
+ if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
+ break;
+ if (state.listType & 1)
+ sendMVPolygon(list, vtx, needClipping);
+ else
+ {
+ PolyParam pp{};
+ pp.pcw.Shadow = list->pcw.shadow;
+ pp.pcw.Texture = list->pcw.texture;
+ pp.pcw.Offset = list->pcw.offset;
+ pp.pcw.Gouraud = list->pcw.gouraud;
+ pp.pcw.Volume = list->pcw.volume;
+ pp.isp = list->isp;
+ pp.tsp = list->tsp0;
+ pp.tcw = list->tcw0;
+ pp.tsp1 = list->tsp1;
+ pp.tcw1 = list->tcw1;
+ setStateParams(pp, list);
+ ta_add_poly(state.listType, pp);
+
+ sendVertices(list, vtx, needClipping);
+ }
+ }
+ break;
+
+ case ICHList::VTX_TYPE_VUR:
+ {
+ verify(curGmp == nullptr || curGmp->paramSelect.e0 == 0);
+ N2_VERTEX_VUR *vtx = (N2_VERTEX_VUR *)((u8 *)list + sizeof(ICHList));
+ if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
+ break;
+ PolyParam pp{};
+ pp.pcw.Shadow = list->pcw.shadow;
+ pp.pcw.Texture = list->pcw.texture;
+ pp.pcw.Offset = list->pcw.offset;
+ pp.pcw.Gouraud = list->pcw.gouraud;
+ pp.pcw.Volume = list->pcw.volume;
+ pp.isp = list->isp;
+ pp.tsp = list->tsp0;
+ pp.tcw = list->tcw0;
+ pp.tsp1 = list->tsp1;
+ pp.tcw1 = list->tcw1;
+ setStateParams(pp, list);
+ ta_add_poly(state.listType, pp);
+
+ sendVertices(list, vtx, needClipping);
+ }
+ break;
+
+ case ICHList::VTX_TYPE_VR:
+ {
+ N2_VERTEX_VR *vtx = (N2_VERTEX_VR *)((u8 *)list + sizeof(ICHList));
+ if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
+ break;
+ PolyParam pp{};
+ pp.pcw.Shadow = list->pcw.shadow;
+ pp.pcw.Texture = list->pcw.texture;
+ pp.pcw.Offset = list->pcw.offset;
+ pp.pcw.Gouraud = list->pcw.gouraud;
+ pp.pcw.Volume = list->pcw.volume;
+ pp.isp = list->isp;
+ pp.tsp = list->tsp0;
+ pp.tsp1 = list->tsp1;
+ setStateParams(pp, list);
+ ta_add_poly(state.listType, pp);
+
+ sendVertices(list, vtx, needClipping);
+ }
+ break;
+
+ case ICHList::VTX_TYPE_VUB:
+ {
+ // TODO
+ //printf("BUMP MAP fmt %d filter %d src select %d dst %d\n", list->tcw0.PixelFmt, list->tsp0.FilterMode, list->tsp0.SrcSelect, list->tsp0.DstSelect);
+ N2_VERTEX_VUB *vtx = (N2_VERTEX_VUB *)((u8 *)list + sizeof(ICHList));
+ if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping))
+ break;
+ PolyParam pp{};
+ pp.pcw.Shadow = list->pcw.shadow;
+ pp.pcw.Texture = 1;
+ pp.pcw.Offset = 1;
+ pp.pcw.Gouraud = list->pcw.gouraud;
+ pp.pcw.Volume = list->pcw.volume;
+ pp.isp = list->isp;
+ pp.tsp = list->tsp0;
+ pp.tcw = list->tcw0;
+ pp.tsp1 = list->tsp1;
+ pp.tcw1 = list->tcw1;
+ setStateParams(pp, list);
+ ta_add_poly(state.listType, pp);
+
+ sendVertices(list, vtx, needClipping);
+ }
+ break;
+
+ default:
+ WARN_LOG(PVR, "Unhandled poly format %x", list->flags);
+ die("Unsupported");
+ break;
+ }
+ envMapping = false;
+}
+
+template
+static void executeCommand(u8 *data, int size)
+{
+// verify(size >= 0);
+// verify(size < (int)ELAN_RAM_SIZE);
+// if (0x2b00 == (u32)(data - RAM))
+// for (int i = 0; i < size; i += 4)
+// DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]);
+
+ while (size >= 32)
+ {
+ const int oldSize = size;
+ ElanBase *cmd = (ElanBase *)data;
+ if (cmd->pcw.naomi2)
+ {
+ switch(cmd->pcw.n2Command)
+ {
+ case PCW::null:
+ size -= 32;
+ break;
+
+ case PCW::projMatrix:
+ if (Active)
+ state.setProjectionMatrix(data);
+ size -= sizeof(ProjMatrix);
+ break;
+
+ case PCW::matrixOrLight:
+ {
+ InstanceMatrix *instance = (InstanceMatrix *)data;
+ if (instance->isInstanceMatrix())
+ {
+ //DEBUG_LOG(PVR, "Model instance");
+ if (Active)
+ state.setMatrix(instance);
+ size -= sizeof(InstanceMatrix);
+ break;
+ }
+ if (Active)
+ {
+ if (instance->id1 & 0x10)
+ {
+ state.setLightModel(data);
+ }
+ else //if ((instance->id2 & 0x40000000) || (instance->id1 & 0xffffff00)) // FIXME what are these lights without id2|0x40000000? vf4
+ {
+ if (instance->pcw.parallelLight)
+ {
+ ParallelLight *light = (ParallelLight *)data;
+ state.setLight(light->lightId, data);
+ }
+ else
+ {
+ PointLight *light = (PointLight *)data;
+ state.setLight(light->lightId, data);
+ }
+ }
+ //else
+ //{
+ // WARN_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2);
+ // for (int i = 0; i < 32; i += 4)
+ // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]);
+ //}
+ }
+ size -= sizeof(LightModel);
+ }
+ break;
+
+ case PCW::model:
+ {
+ Model *model = (Model *)data;
+ if (Active)
+ {
+ cullingReversed = model->param.cwCulling == 0;
+ state.setClipMode(model->pcw);
+ openModifierVolume = model->param.openVolume;
+ shadowedVolume = model->pcw.shadow;
+ modelTSP = model->tsp;
+ DEBUG_LOG(PVR, "Model offset %x size %x pcw %08x tsp %08x", model->offset, model->size, model->pcw.full, model->tsp.full);
+ }
+ executeCommand(&RAM[model->offset & 0x1ffffff8], model->size);
+ cullingReversed = false;
+ openModifierVolume = false;
+ shadowedVolume = false;
+ modelTSP.full = 0;
+ size -= sizeof(Model);
+ }
+ break;
+
+ case PCW::registerWait:
+ {
+ RegisterWait *wait = (RegisterWait *)data;
+ if (wait->offset != (u32)-1 && wait->mask != 0)
+ {
+ DEBUG_LOG(PVR, "Register wait %x mask %x", wait->offset, wait->mask);
+ // wait for interrupt
+ HollyInterruptID inter;
+ switch (wait->mask)
+ {
+ case 0x80:
+ inter = holly_OPAQUE;
+ break;
+ case 0x100:
+ inter = holly_OPAQUEMOD;
+ break;
+ case 0x200:
+ inter = holly_TRANS;
+ break;
+ case 0x400:
+ inter = holly_TRANSMOD;
+ break;
+ case 0x200000:
+ inter = holly_PUNCHTHRU;
+ break;
+ default:
+ WARN_LOG(PVR, "Unknown interrupt mask %x", wait->mask);
+ die("unexpected");
+ inter = holly_OPAQUE;
+ break;
+ }
+ asic_RaiseInterruptBothCLX(inter);
+ TA_ITP_CURRENT += 32;
+ if (Active)
+ state.reset();
+ }
+ size -= sizeof(RegisterWait);
+ }
+ break;
+
+ case PCW::link:
+ {
+ Link *link = (Link *)data;
+ if (link->offset & 0x80000000)
+ {
+ // elan v10 only
+ DEBUG_LOG(PVR, "Texture DMA from %x to %x (%x)", DMAC_SAR(2), link->_res & 0x1ffffff8, link->size);
+ memcpy(&vram[link->_res & VRAM_MASK], &mem_b[DMAC_SAR(2) & RAM_MASK], link->size);
+ reg74 |= 1;
+ }
+ else if (link->offset & 0x20000000)
+ {
+ // elan v10 only
+ DEBUG_LOG(PVR, "Texture DMA from eram %x -> %x (%x)", link->offset & 0x01fffff8, link->_res & VRAM_MASK, link->size);
+ memcpy(&vram[link->_res & VRAM_MASK], &RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size);
+ reg74 |= 1;
+ }
+ else
+ {
+ DEBUG_LOG(PVR, "Link to %x (%x)", link->offset & 0x1ffffff8, link->size);
+ executeCommand(&RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size);
+ }
+ size -= sizeof(Link);
+ }
+ break;
+
+ case PCW::gmp:
+ if (Active)
+ state.setGMP(data);
+ size -= sizeof(GMP);
+ break;
+
+ case PCW::ich:
+ {
+ ICHList *ich = (ICHList *)data;
+ if (Active)
+ {
+ DEBUG_LOG(PVR, "ICH flags %x, %d verts", ich->flags, ich->vtxCount);
+ sendPolygon(ich);
+ }
+ size -= sizeof(ICHList) + ich->vertexSize() * ich->vtxCount;
+ }
+ break;
+
+ default:
+ DEBUG_LOG(PVR, "Unhandled Elan command %x", cmd->pcw.n2Command);
+ size -= 32;
+ break;
+ }
+ }
+ else
+ {
+ u32 pcw = *(u32 *)data;
+ if ((pcw & 0xd0ffff00) == 0x808c0000) // display list
+ {
+ if (Active)
+ {
+ DEBUG_LOG(PVR, "Display list type %d", (pcw >> 24) & 0xf);
+ state.reset();
+ state.listType = (pcw >> 24) & 0xf;
+ // TODO is this the right place for this?
+ SQBuffer eol{};
+ ta_vtx_data32(&eol);
+ }
+ size -= 24 * 4;
+ }
+ else if ((pcw & 0xd0fcff00) == 0x80800000) // User clipping
+ {
+ if (Active)
+ {
+ state.setClipMode((PCW&)pcw);
+ DEBUG_LOG(PVR, "User clip type %d", ((PCW&)pcw).userClip);
+ }
+ size -= 0xE0;
+ }
+ else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked?
+ {
+ if (Active)
+ {
+ DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw);
+ state.listType = (pcw >> 24) & 0xf;
+ size -= 32;
+ ta_add_ta_data(state.listType, (u32 *)(data + 32), size - 32);
+ }
+ size = 32;
+ }
+ else if (pcw == 0x20000000)
+ {
+ // User clipping
+ if (Active)
+ {
+ u32 *tiles = (u32 *)data + 4;
+ DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", tiles[0] * 32, tiles[1] * 32,
+ tiles[2] * 32, tiles[3] * 32);
+ state.setClipTiles(tiles[0], tiles[1], tiles[2], tiles[3]);
+ }
+ size -= 32;
+ }
+ else
+ {
+ if (Active)
+ {
+ if (pcw != 0)
+ INFO_LOG(PVR, "Unhandled command %x", pcw);
+ for (int i = 0; i < 32; i += 4)
+ DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]);
+ }
+ size -= 32;
+ }
+ }
+ data += oldSize - size;
+ }
+}
+
+template
+void DYNACALL write_elancmd(u32 addr, T data)
+{
+// DEBUG_LOG(PVR, "ELAN cmd %08x = %x", addr, data);
+ addr &= 0x1f;
+ *(T *)&((u8 *)elanCmd)[addr] = data;
+
+ if (addr == 0x1c)
+ {
+ if (!ggpo::rollbacking())
+ executeCommand((u8 *)elanCmd, sizeof(elanCmd));
+ else
+ executeCommand((u8 *)elanCmd, sizeof(elanCmd));
+ if (!(reg74 & 1))
+ reg74 |= 2;
+ reg74 &= ~0x3c;
+ }
+}
+
+template
+T DYNACALL read_elanram(u32 addr)
+{
+ return *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)];
+}
+
+template
+void DYNACALL write_elanram(u32 addr, T data)
+{
+ *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)] = data;
+}
+
+void init()
+{
+}
+
+void reset(bool hard)
+{
+ if (hard)
+ {
+ memset(RAM, 0, ELAN_RAM_SIZE);
+ state.reset();
+ }
+}
+
+void term()
+{
+}
+
+void vmem_init()
+{
+ elanRegHandler = _vmem_register_handler_Template(read_elanreg, write_elanreg);
+ elanCmdHandler = _vmem_register_handler_Template(read_elancmd, write_elancmd);
+ elanRamHandler = _vmem_register_handler_Template(read_elanram, write_elanram);
+}
+
+void vmem_map(u32 base)
+{
+ _vmem_map_handler(elanRegHandler, base | 8, base | 8);
+ _vmem_map_handler(elanCmdHandler, base | 9, base | 9);
+ _vmem_map_handler(elanRamHandler, base | 0xA, base | 0xB);
+ _vmem_map_block(RAM, base | 0xA, base | 0xB, ELAN_RAM_SIZE - 1);
+}
+
+void serialize(Serializer& ser)
+{
+ if (!settings.platform.isNaomi2())
+ return;
+ ser << reg10;
+ ser << reg74;
+ ser << elanCmd;
+ if (!ser.rollback())
+ ser.serialize(RAM, ELAN_RAM_SIZE);
+ state.serialize(ser);
+}
+
+void deserialize(Deserializer& deser)
+{
+ if (!settings.platform.isNaomi2())
+ return;
+ deser >> reg10;
+ deser >> reg74;
+ deser >> elanCmd;
+ if (!deser.rollback())
+ deser.deserialize(RAM, ELAN_RAM_SIZE);
+ state.deserialize(deser);
+}
+
+}
diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp
index a929582c1..7af3a8d62 100644
--- a/core/hw/pvr/ta_vtx.cpp
+++ b/core/hw/pvr/ta_vtx.cpp
@@ -1561,8 +1561,10 @@ bool ta_parse_vdrc(TA_context* ctx)
bgpp->envMapping[1] = false;
}
- const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT;
- // TODO || config::RendererType == RenderType::Vulkan_OIT || config::RendererType == RenderType::DirectX11_OIT
+ const bool mergeTranslucent = !config::PerStripSorting
+ || config::RendererType == RenderType::OpenGL_OIT
+ || config::RendererType == RenderType::DirectX11_OIT;
+ // TODO || config::RendererType == RenderType::Vulkan_OIT
TA_context *childCtx = ctx;
while (childCtx != nullptr)
{
@@ -1680,8 +1682,10 @@ bool ta_parse_naomi2(TA_context* ctx)
int op_count = 0;
int pt_count = 0;
int tr_count = 0;
- const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT;
- // TODO || config::RendererType == RenderType::Vulkan_OIT || config::RendererType == RenderType::DirectX11_OIT;
+ const bool mergeTranslucent = !config::PerStripSorting
+ || config::RendererType == RenderType::OpenGL_OIT
+ || config::RendererType == RenderType::DirectX11_OIT;
+ // TODO || config::RendererType == RenderType::Vulkan_OIT
for (const RenderPass& pass : ctx->rend.render_passes)
{
make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend);
diff --git a/core/rend/dx11/dx11_naomi2.cpp b/core/rend/dx11/dx11_naomi2.cpp
new file mode 100644
index 000000000..1669aceb1
--- /dev/null
+++ b/core/rend/dx11/dx11_naomi2.cpp
@@ -0,0 +1,387 @@
+/*
+ Copyright 2022 flyinghead
+
+ This file is part of Flycast.
+
+ Flycast is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ Flycast is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Flycast. If not, see .
+ */
+#include "dx11_naomi2.h"
+
+const char * const DX11N2VertexShader = R"(
+#if pp_Gouraud == 1
+#define INTERPOLATION
+#else
+#define INTERPOLATION nointerpolation
+#endif
+
+struct VertexIn
+{
+ float4 pos : POSITION;
+#if POSITION_ONLY == 0
+ float4 col : COLOR0;
+ float4 spec : COLOR1;
+ float2 uv : TEXCOORD0;
+#if pp_TwoVolumes == 1
+ float4 col1 : COLOR2;
+ float4 spec1 : COLOR3;
+ float2 uv1 : TEXCOORD1;
+#endif
+ float3 normal: NORMAL;
+ uint vertexId : SV_VertexID;
+#endif
+};
+
+struct VertexOut
+{
+ float4 pos : SV_POSITION;
+ float4 uv : TEXCOORD0;
+#if POSITION_ONLY == 0
+ INTERPOLATION float4 col : COLOR0;
+ INTERPOLATION float4 spec : COLOR1;
+#if pp_TwoVolumes == 1
+ float2 uv1 : TEXCOORD1;
+ INTERPOLATION float4 col1 : COLOR2;
+ INTERPOLATION float4 spec1 : COLOR3;
+#endif
+ nointerpolation uint index : BLENDINDICES0;
+#endif
+};
+
+cbuffer shaderConstants : register(b0)
+{
+ float4x4 ndcMat;
+ float4 leftPlane;
+ float4 topPlane;
+ float4 rightPlane;
+ float4 bottomPlane;
+};
+
+cbuffer polyConstants : register(b1)
+{
+ float4x4 mvMat;
+ float4x4 normalMat;
+ float4x4 projMat;
+ int envMapping0;
+ int envMapping1;
+ int bumpMapping;
+ int polyNumber;
+
+ float4 glossCoef;
+ int4 constantColor;
+ int4 model_diff_spec; // diffuse0, diffuse1, specular0, specular1
+};
+
+void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, in float3 position, in float3 normal);
+void computeEnvMap(inout float2 uv, in float3 normal);
+void computeBumpMap(inout float4 color0, in float4 color1, in float3 position, in float3 normal, in float4x4 normalMat);
+
+[clipplanes(leftPlane, topPlane, rightPlane, bottomPlane)]
+VertexOut main(in VertexIn vin)
+{
+ VertexOut vo;
+ vo.pos = mul(mvMat, float4(vin.pos.xyz, 1.f));
+#if POSITION_ONLY == 0
+ vo.col = vin.col;
+ vo.spec = vin.spec;
+ #if LIGHT_ON == 1
+ float4 vnorm = normalize(mul(normalMat, float4(vin.normal, 0.f)));
+ #endif
+ #if pp_TwoVolumes == 1
+ vo.col1 = vin.col1;
+ vo.spec1 = vin.spec1;
+ vo.uv1 = vin.uv1;
+ #if LIGHT_ON == 1
+ // FIXME need offset0 and offset1 for bump maps
+ if (bumpMapping == 1)
+ computeBumpMap(vo.spec, vo.spec1, vo.pos.xyz, vnorm.xyz, normalMat);
+ else
+ {
+ computeColors(vo.col1, vo.spec1, 1, vo.pos.xyz, vnorm.xyz);
+ #if pp_Texture == 0
+ vo.col1 += vo.spec1;
+ #endif
+ }
+ if (envMapping1 == 1)
+ computeEnvMap(vo.uv1.xy, vnorm.xyz);
+ #endif
+ #endif
+ #if LIGHT_ON == 1
+ if (bumpMapping == 0)
+ {
+ computeColors(vo.col, vo.spec, 0, vo.pos.xyz, vnorm.xyz);
+ #if pp_Texture == 0
+ vo.col += vo.spec;
+ #endif
+ }
+ #endif
+ vo.uv.xy = vin.uv;
+ #if LIGHT_ON == 1
+ if (envMapping0 == 1)
+ computeEnvMap(vo.uv.xy, vnorm.xyz);
+ #endif
+ vo.index = (uint(polyNumber) << 18) + vin.vertexId;
+#endif
+
+ vo.pos = mul(projMat, vo.pos);
+
+ vo.pos = float4(vo.pos.xy / vo.pos.w, 1.f / vo.pos.w, 1.f);
+ vo.pos = mul(ndcMat, vo.pos);
+#if POSITION_ONLY == 1
+ vo.uv = float4(0.f, 0.f, 0.f, vo.pos.z);
+#else
+#if pp_Gouraud == 1
+ vo.col *= vo.pos.z;
+ vo.spec *= vo.pos.z;
+#if pp_TwoVolumes == 1
+ vo.col1 *= vo.pos.z;
+ vo.spec1 *= vo.pos.z;
+#endif
+#endif
+ vo.uv = float4(vo.uv.xy * vo.pos.z, 0.f, vo.pos.z);
+#if pp_TwoVolumes == 1
+ vo.uv1 *= vo.pos.z;
+#endif
+#endif
+ vo.pos.w = 1.f;
+ vo.pos.z = 0.f;
+
+ return vo;
+}
+
+)";
+
+const char * const DX11N2ColorShader = R"(
+#define PI 3.1415926f
+
+#define LMODE_SINGLE_SIDED 0
+#define LMODE_DOUBLE_SIDED 1
+#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2
+#define LMODE_SPECIAL_EFFECT 3
+#define LMODE_THIN_SURFACE 4
+#define LMODE_BUMP_MAP 5
+
+#define ROUTING_BASEDIFF_BASESPEC_ADD 0
+#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1
+#define ROUTING_OFFSDIFF_BASESPEC_ADD 2
+#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3
+#define ROUTING_ALPHADIFF_ADD 4
+#define ROUTING_ALPHAATTEN_ADD 5
+#define ROUTING_FOGDIFF_ADD 6
+#define ROUTING_FOGATTENUATION_ADD 7
+#define ROUTING_BASEDIFF_BASESPEC_SUB 8
+#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9
+#define ROUTING_OFFSDIFF_BASESPEC_SUB 10
+#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11
+#define ROUTING_ALPHADIFF_SUB 12
+#define ROUTING_ALPHAATTEN_SUB 13
+
+struct N2Light
+{
+ float4 color;
+ float4 direction;
+ float4 position;
+ int parallel;
+ int routing;
+ int dmode;
+ int smode;
+ int4 diffuse_specular; // diffuse0, diffuse1, specular0, specular1
+ float attnDistA;
+ float attnDistB;
+ float attnAngleA;
+ float attnAngleB;
+ int distAttnMode;
+ int3 _pad;
+};
+
+cbuffer lightConstants : register(b2)
+{
+ N2Light lights[16];
+ int lightCount;
+ float4 ambientBase[2];
+ float4 ambientOffset[2];
+ int4 ambientMaterial; // base0, base1, offset0, offset1
+ int useBaseOver;
+ int bumpId0;
+ int bumpId1;
+}
+
+void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, in float3 position, in float3 normal)
+{
+ if (constantColor[volIdx] == 1)
+ return;
+ float3 diffuse = float3(0.f, 0.f, 0.f);
+ float3 specular = float3(0.f, 0.f, 0.f);
+ float diffuseAlpha = 0.f;
+ float specularAlpha = 0.f;
+
+ for (int i = 0; i < lightCount; i++)
+ {
+ N2Light light = lights[i];
+ float3 lightDir; // direction to the light
+ float3 lightColor = light.color.rgb;
+ if (light.parallel == 1)
+ {
+ lightDir = normalize(light.direction.xyz);
+ }
+ else
+ {
+ lightDir = normalize(light.position.xyz - position);
+ if (light.attnDistA != 1.f || light.attnDistB != 0.f)
+ {
+ float distance = length(light.position.xyz - position);
+ if (light.distAttnMode == 0)
+ distance = 1.f / distance;
+ lightColor *= clamp(light.attnDistB * distance + light.attnDistA, 0.f, 1.f);
+ }
+ if (light.attnAngleA != 1.f || light.attnAngleB != 0.f)
+ {
+ float3 spotDir = light.direction.xyz;
+ float cosAngle = 1.f - max(0.f, dot(lightDir, spotDir));
+ lightColor *= clamp(cosAngle * light.attnAngleB + light.attnAngleA, 0.f, 1.f);
+ }
+ }
+ int routing = light.routing;
+ if (light.diffuse_specular[volIdx] == 1) // If light contributes to diffuse
+ {
+ float factor;
+ switch (light.dmode)
+ {
+ case LMODE_SINGLE_SIDED:
+ factor = max(dot(normal, lightDir), 0.f);
+ break;
+ case LMODE_DOUBLE_SIDED:
+ factor = abs(dot(normal, lightDir));
+ break;
+ case LMODE_SPECIAL_EFFECT:
+ default:
+ factor = 1.f;
+ break;
+ }
+ if (routing == ROUTING_ALPHADIFF_SUB)
+ diffuseAlpha -= lightColor.r * factor;
+ else if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD)
+ diffuse += lightColor * factor;
+ if (routing == ROUTING_OFFSDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD)
+ specular += lightColor * factor;
+ }
+ if (light.diffuse_specular[2 + volIdx] == 1) // If light contributes to specular
+ {
+ float3 reflectDir = reflect(-lightDir, normal);
+ float factor;
+ switch (light.smode)
+ {
+ case LMODE_SINGLE_SIDED:
+ factor = clamp(pow(max(dot(normalize(-position), reflectDir), 0.f), glossCoef[volIdx]), 0.f, 1.f);
+ break;
+ case LMODE_DOUBLE_SIDED:
+ factor = clamp(pow(abs(dot(normalize(-position), reflectDir)), glossCoef[volIdx]), 0.f, 1.f);
+ break;
+ case LMODE_SPECIAL_EFFECT:
+ default:
+ factor = 1.f;
+ break;
+ }
+ if (routing == ROUTING_ALPHADIFF_SUB)
+ specularAlpha -= lightColor.r * factor;
+ else if (routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD)
+ specular += lightColor * factor;
+ if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_BASESPEC_ADD)
+ diffuse += lightColor * factor;
+ }
+ }
+ // ambient with material
+ if (ambientMaterial[volIdx] == 1)
+ diffuse += ambientBase[volIdx].rgb;
+ if (ambientMaterial[volIdx + 2] == 1)
+ specular += ambientOffset[volIdx].rgb;
+
+ if (model_diff_spec[volIdx] == 1)
+ baseCol.rgb *= diffuse;
+ if (model_diff_spec[volIdx + 2] == 1)
+ offsetCol.rgb *= specular;
+
+ // ambient w/o material
+ if (ambientMaterial[volIdx] == 0 && model_diff_spec[volIdx] == 1)
+ baseCol.rgb += ambientBase[volIdx].rgb;
+ if (ambientMaterial[volIdx + 2] == 0 && model_diff_spec[volIdx + 2] == 1)
+ offsetCol.rgb += ambientOffset[volIdx].rgb;
+
+ baseCol.a = max(0.f, baseCol.a + diffuseAlpha);
+ offsetCol.a = max(0.f, offsetCol.a + specularAlpha);
+ if (useBaseOver == 1)
+ {
+ float4 overflow = max(float4(0.f, 0.f, 0.f, 0.f), baseCol - float4(1.f, 1.f, 1.f, 1.f));
+ offsetCol += overflow;
+ }
+}
+
+void computeEnvMap(inout float2 uv, in float3 normal)
+{
+ // Cheap env mapping
+ uv += normal.xy / 2.f + 0.5f;
+ uv = clamp(uv, 0.f, 1.f);
+}
+
+void computeBumpMap(inout float4 color0, in float4 color1, in float3 position, in float3 normal, in float4x4 normalMat)
+{
+ // TODO
+ //if (bumpId0 == -1)
+ return;
+ float3 tangent = color0.xyz;
+ if (tangent.x > 0.5f)
+ tangent.x -= 1.f;
+ if (tangent.y > 0.5f)
+ tangent.y -= 1.f;
+ if (tangent.z > 0.5f)
+ tangent.z -= 1.f;
+ tangent = normalize(mul(normalMat, float4(tangent, 0.f))).xyz;
+ float3 bitangent = color1.xyz;
+ if (bitangent.x > 0.5f)
+ bitangent.x -= 1.f;
+ if (bitangent.y > 0.5f)
+ bitangent.y -= 1.f;
+ if (bitangent.z > 0.5f)
+ bitangent.z -= 1.f;
+ bitangent = normalize(mul(normalMat, float4(bitangent, 0.f))).xyz;
+
+ float scaleDegree = color0.w;
+ float scaleOffset = color1.w;
+
+ N2Light light = lights[bumpId0];
+ float3 lightDir; // direction to the light
+ if (light.parallel == 1)
+ lightDir = normalize(light.direction.xyz);
+ else
+ lightDir = normalize(light.position.xyz - position);
+
+ float n = dot(lightDir, normal);
+ float cosQ = dot(lightDir, tangent);
+ float sinQ = dot(lightDir, bitangent);
+
+ float sinT = clamp(n, 0.f, 1.f);
+ float k1 = 1.f - scaleDegree;
+ float k2 = scaleDegree * sinT;
+ float k3 = scaleDegree * sqrt(1.f - sinT * sinT); // cos T
+
+ float q = acos(cosQ);
+ if (sinQ < 0.f)
+ q = 2.f * PI - q;
+
+ color0.r = k2;
+ color0.g = k3;
+ color0.b = q / PI / 2.f;
+ color0.a = k1;
+}
+
+)";
diff --git a/core/rend/dx11/dx11_naomi2.h b/core/rend/dx11/dx11_naomi2.h
new file mode 100644
index 000000000..a4ad6f7ec
--- /dev/null
+++ b/core/rend/dx11/dx11_naomi2.h
@@ -0,0 +1,220 @@
+/*
+ Copyright 2022 flyinghead
+
+ This file is part of Flycast.
+
+ Flycast is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ Flycast is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Flycast. If not, see .
+ */
+#pragma once
+#include
+#include
+#include "windows/comptr.h"
+#include "hw/pvr/ta_ctx.h"
+
+extern const char * const DX11N2VertexShader;
+extern const char * const DX11N2ColorShader;
+
+struct N2PolyConstants
+{
+ float mvMat[4][4]; // 0
+ float normalMat[4][4]; // 64
+ float projMat[4][4]; // 128
+ int envMapping[2]; // 192
+ int bumpMapping; // 200
+ int polyNumber; // 204
+
+ float glossCoef[4]; // 208
+ int constantColor[4]; // 224
+ // int4 model_diff_spec
+ int modelDiffuse[2]; // 240
+ int modelSpecular[2]; // 248
+ // 256
+};
+static_assert(sizeof(N2PolyConstants) == 256, "sizeof(N2PolyConstants) should be 256");
+
+struct DX11N2Light
+{
+ float color[4]; // 0
+ float direction[4]; // 16
+ float position[4]; // 32
+ int parallel; // 48
+ int routing; // 52
+ int dmode; // 56
+ int smode; // 60
+ // int4 diffuse_specular
+ int diffuse[2]; // 64
+ int specular[2]; // 72
+ float attnDistA; // 80
+ float attnDistB; // 84
+ float attnAngleA; // 88
+ float attnAngleB; // 92
+ int distAttnMode; // 96
+ int _pad[3];
+ // 112
+};
+static_assert(sizeof(DX11N2Light) == 112, "sizeof(DX11N2Light) should be 112");
+
+struct N2LightConstants
+{
+ DX11N2Light lights[16]; // 0
+ int lightCount; // 1792
+ int _pad0[3];
+ float ambientBase[2][4]; // 1808
+ float ambientOffset[2][4]; // 1840
+ // int4 ambientMaterial
+ int ambientMaterialBase[2]; // 1872
+ int ambientMaterialOffset[2]; // 1880
+ int useBaseOver; // 1888
+ int bumpId1; // 1892
+ int bumpId2; // 1896
+ int _pad3; // 1900
+ // 1904
+};
+static_assert(sizeof(N2LightConstants) == 1904, "sizeof(N2LightConstants) should be 1904");
+
+class Naomi2Helper
+{
+public:
+ void init(ComPtr& device, ComPtr deviceContext)
+ {
+ this->deviceContext = deviceContext;
+ D3D11_BUFFER_DESC desc{};
+ desc.ByteWidth = sizeof(N2PolyConstants);
+ desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4;
+ desc.Usage = D3D11_USAGE_DYNAMIC;
+ desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+ if (FAILED(device->CreateBuffer(&desc, nullptr, &polyConstantsBuffer.get())))
+ WARN_LOG(RENDERER, "Per-polygon constants buffer creation failed");
+
+ desc.ByteWidth = sizeof(N2LightConstants);
+ desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4;
+ if (FAILED(device->CreateBuffer(&desc, nullptr, &lightConstantsBuffer.get())))
+ WARN_LOG(RENDERER, "Light constants buffer creation failed");
+ resetCache();
+ }
+
+ void term()
+ {
+ polyConstantsBuffer.reset();
+ lightConstantsBuffer.reset();
+ deviceContext.reset();
+ }
+
+ void setConstants(const PolyParam& pp, u32 polyNumber)
+ {
+ N2PolyConstants polyConstants;
+ memcpy(polyConstants.mvMat, pp.mvMatrix, sizeof(polyConstants.mvMat));
+ memcpy(polyConstants.normalMat, pp.normalMatrix, sizeof(polyConstants.normalMat));
+ memcpy(polyConstants.projMat, pp.projMatrix, sizeof(polyConstants.projMat));
+ polyConstants.envMapping[0] = pp.envMapping[0];
+ polyConstants.envMapping[1] = pp.envMapping[1];
+ polyConstants.bumpMapping = pp.pcw.Texture == 1 && pp.tcw.PixelFmt == PixelBumpMap;
+ polyConstants.polyNumber = polyNumber;
+ for (size_t i = 0; i < 2; i++)
+ {
+ polyConstants.glossCoef[i] = pp.glossCoef[i];
+ polyConstants.constantColor[i] = pp.constantColor[i];
+ polyConstants.modelDiffuse[i] = pp.diffuseColor[i];
+ polyConstants.modelSpecular[i] = pp.specularColor[i];
+ }
+ setConstBuffer(polyConstantsBuffer, polyConstants);
+ deviceContext->VSSetConstantBuffers(1, 1, &polyConstantsBuffer.get());
+
+ if (pp.lightModel != lastModel)
+ {
+ lastModel = pp.lightModel;
+ N2LightConstants lightConstants{};
+ if (pp.lightModel != nullptr)
+ {
+ const N2LightModel& lights = *pp.lightModel;
+ lightConstants.lightCount = lights.lightCount;
+ for (int i = 0; i < lights.lightCount; i++)
+ {
+ DX11N2Light& light = lightConstants.lights[i];
+ memcpy(light.color, lights.lights[i].color, sizeof(light.color));
+ memcpy(light.direction, lights.lights[i].direction, sizeof(light.direction));
+ memcpy(light.position, lights.lights[i].position, sizeof(light.position));
+ light.parallel = lights.lights[i].parallel;
+ light.routing = lights.lights[i].routing;
+ light.dmode = lights.lights[i].dmode;
+ light.smode = lights.lights[i].smode;
+ memcpy(light.diffuse, lights.lights[i].diffuse, sizeof(light.diffuse));
+ memcpy(light.specular, lights.lights[i].specular, sizeof(light.specular));
+ light.attnDistA = lights.lights[i].attnDistA;
+ light.attnDistB = lights.lights[i].attnDistB;
+ light.attnAngleA = lights.lights[i].attnAngleA;
+ light.attnAngleB = lights.lights[i].attnAngleB;
+ light.distAttnMode = lights.lights[i].distAttnMode;
+ }
+ memcpy(lightConstants.ambientBase, lights.ambientBase, sizeof(lightConstants.ambientBase));
+ memcpy(lightConstants.ambientOffset, lights.ambientOffset, sizeof(lightConstants.ambientOffset));
+ for (int i = 0; i < 2; i++)
+ {
+ lightConstants.ambientMaterialBase[i] = lights.ambientMaterialBase[i];
+ lightConstants.ambientMaterialOffset[i] = lights.ambientMaterialOffset[i];
+ }
+ lightConstants.useBaseOver = lights.useBaseOver;
+ lightConstants.bumpId1 = lights.bumpId1;
+ lightConstants.bumpId2 = lights.bumpId2;
+ }
+ else
+ {
+ lightConstants.lightCount = 0;
+ float white[] { 1.f, 1.f, 1.f, 1.f };
+ float black[4]{};
+ for (int vol = 0; vol < 2; vol++)
+ {
+ lightConstants.ambientMaterialBase[vol] = 0;
+ lightConstants.ambientMaterialOffset[vol] = 0;
+ memcpy(lightConstants.ambientBase[vol], white, sizeof(white));
+ memcpy(lightConstants.ambientOffset[vol], black, sizeof(black));
+ }
+ lightConstants.useBaseOver = 0;
+ lightConstants.bumpId1 = -1;
+ lightConstants.bumpId2 = -1;
+ }
+ setConstBuffer(lightConstantsBuffer, lightConstants);
+ deviceContext->VSSetConstantBuffers(2, 1, &lightConstantsBuffer.get());
+ }
+ }
+
+ void setConstants(const float *mvMatrix, const float *projMatrix)
+ {
+ N2PolyConstants polyConstants;
+ memcpy(polyConstants.mvMat, mvMatrix, sizeof(polyConstants.mvMat));
+ memcpy(polyConstants.projMat, projMatrix, sizeof(polyConstants.projMat));
+ setConstBuffer(polyConstantsBuffer, polyConstants);
+ deviceContext->VSSetConstantBuffers(1, 1, &polyConstantsBuffer.get());
+ }
+
+ void resetCache() {
+ lastModel = (N2LightModel *)1;
+ }
+
+private:
+ template
+ void setConstBuffer(const ComPtr& buffer, const T& data)
+ {
+ D3D11_MAPPED_SUBRESOURCE mappedSubres;
+ deviceContext->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres);
+ memcpy(mappedSubres.pData, &data, sizeof(T));
+ deviceContext->Unmap(buffer, 0);
+ }
+
+ ComPtr deviceContext;
+ ComPtr polyConstantsBuffer;
+ ComPtr lightConstantsBuffer;
+ const N2LightModel *lastModel;
+};
diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp
index 75d430d1f..1eb988083 100644
--- a/core/rend/dx11/dx11_renderer.cpp
+++ b/core/rend/dx11/dx11_renderer.cpp
@@ -29,6 +29,7 @@ const D3D11_INPUT_ELEMENT_DESC MainLayout[]
{ "COLOR", 0, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 1, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0 },
+ { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, nx), D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
const D3D11_INPUT_ELEMENT_DESC ModVolLayout[]
{
@@ -43,7 +44,7 @@ bool DX11Renderer::Init()
shaders = &theDX11Context.getShaders();
samplers = &theDX11Context.getSamplers();
- bool success = (bool)shaders->getVertexShader(true);
+ bool success = (bool)shaders->getVertexShader(true, true);
ComPtr blob = shaders->getVertexShaderBlob();
success = success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get()));
blob = shaders->getMVVertexShaderBlob();
@@ -146,6 +147,7 @@ bool DX11Renderer::Init()
quad = std::unique_ptr(new Quad());
quad->init(device, deviceContext, shaders);
+ n2Helper.init(device, deviceContext);
fog_needs_update = true;
forcePaletteUpdate();
@@ -163,6 +165,7 @@ bool DX11Renderer::Init()
void DX11Renderer::Term()
{
NOTICE_LOG(RENDERER, "DX11 renderer terminating");
+ n2Helper.term();
vtxConstants.reset();
pxlConstants.reset();
fbTex.reset();
@@ -310,7 +313,12 @@ bool DX11Renderer::Process(TA_context* ctx)
}
else
{
- if (!ta_parse_vdrc(ctx))
+ bool success;
+ if (settings.platform.isNaomi2())
+ success = ta_parse_naomi2(ctx);
+ else
+ success = ta_parse_vdrc(ctx);
+ if (!success)
return false;
}
@@ -319,7 +327,7 @@ bool DX11Renderer::Process(TA_context* ctx)
//
// Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire
-// Journal of Graphics GPU and Game Tools � November 2011
+// Journal of Graphics GPU and Game Tools - November 2011
//
static glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist)
{
@@ -523,21 +531,24 @@ void DX11Renderer::uploadGeometryBuffers()
{
const ModTriangle *data = nullptr;
u32 size = 0;
-#if 1
- // clip triangles
- std::vector modVolTriangles;
- modVolTriangles.reserve(pvrrc.modtrig.used());
- clipModVols(pvrrc.global_param_mvo, modVolTriangles);
- clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles);
- if (!modVolTriangles.empty())
+ if (!settings.platform.isNaomi2()) // TODO for naomi2 as well?
{
- size = (u32)(modVolTriangles.size() * sizeof(ModTriangle));
- data = modVolTriangles.data();
+ // clip triangles
+ std::vector modVolTriangles;
+ modVolTriangles.reserve(pvrrc.modtrig.used());
+ clipModVols(pvrrc.global_param_mvo, modVolTriangles);
+ clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles);
+ if (!modVolTriangles.empty())
+ {
+ size = (u32)(modVolTriangles.size() * sizeof(ModTriangle));
+ data = modVolTriangles.data();
+ }
+ }
+ else
+ {
+ size = pvrrc.modtrig.bytes();
+ data = pvrrc.modtrig.head();
}
-#else
- size = pvrrc.modtrig.bytes();
- data = pvrrc.modtrig.head();
-#endif
if (size > 0)
{
verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size));
@@ -598,6 +609,7 @@ bool DX11Renderer::Render()
if (!pvrrc.isRenderFramebuffer)
{
+ n2Helper.resetCache();
uploadGeometryBuffers();
updateFogTexture();
@@ -745,7 +757,7 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
DX11Texture *texture = (DX11Texture *)gp->texture;
bool gpuPalette = texture != nullptr ? texture->gpuPalette : false;
- ComPtr vertexShader = shaders->getVertexShader(gp->pcw.Gouraud);
+ ComPtr vertexShader = shaders->getVertexShader(gp->pcw.Gouraud, gp->isNaomi2());
deviceContext->VSSetShader(vertexShader, nullptr, 0);
ComPtr pixelShader = shaders->getShader(
gp->pcw.Texture,
@@ -819,7 +831,7 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
zfunc = gp->isp.DepthMode;
bool zwriteEnable;
- if (SortingEnabled && !config::PerStripSorting)
+ if (SortingEnabled /* && !config::PerStripSorting */)
zwriteEnable = false;
else
{
@@ -832,6 +844,9 @@ void DX11Renderer::setRenderState(const PolyParam *gp)
}
const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0;
deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, config::ModifierVolumes), stencil);
+
+ if (gp->isNaomi2())
+ n2Helper.setConstants(*gp, 0); // poly number only used in OIT
}
template
@@ -900,7 +915,7 @@ void DX11Renderer::drawSorted(bool multipass)
// Write to the depth buffer now. The next render pass might need it. (Cosmic Smash)
deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), nullptr, 0xffffffff);
- ComPtr vertexShader = shaders->getVertexShader(true);
+ ComPtr vertexShader = shaders->getVertexShader(true, settings.platform.isNaomi2());
deviceContext->VSSetShader(vertexShader, nullptr, 0);
ComPtr pixelShader = shaders->getShader(
false,
@@ -949,7 +964,6 @@ void DX11Renderer::drawModVols(int first, int count)
deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), nullptr, 0xffffffff);
- deviceContext->VSSetShader(shaders->getMVVertexShader(), nullptr, 0);
deviceContext->PSSetShader(shaders->getModVolShader(), nullptr, 0);
deviceContext->RSSetScissorRects(1, &scissorRect);
@@ -958,6 +972,8 @@ void DX11Renderer::drawModVols(int first, int count)
ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first];
int mod_base = -1;
+ const float *curMVMat = nullptr;
+ const float *curProjMat = nullptr;
for (int cmv = 0; cmv < count; cmv++)
{
@@ -968,6 +984,13 @@ void DX11Renderer::drawModVols(int first, int count)
if (mod_base == -1)
mod_base = param.first;
+ if (param.isNaomi2() && (param.mvMatrix != curMVMat || param.projMatrix != curProjMat))
+ {
+ curMVMat = param.mvMatrix;
+ curProjMat = param.projMatrix;
+ n2Helper.setConstants(param.mvMatrix, param.projMatrix);
+ }
+ deviceContext->VSSetShader(shaders->getMVVertexShader(param.isNaomi2()), nullptr, 0);
if (!param.isp.VolumeLast && mv_mode > 0)
// OR'ing (open volume or quad)
deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(DepthStencilStates::Or), 2);
@@ -1005,6 +1028,8 @@ void DX11Renderer::drawModVols(int first, int count)
deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0);
deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+ // Use the background poly as a quad
+ deviceContext->VSSetShader(shaders->getMVVertexShader(false), nullptr, 0);
deviceContext->DrawIndexed(4, 0, 0);
}
diff --git a/core/rend/dx11/dx11_renderer.h b/core/rend/dx11/dx11_renderer.h
index 4f1c3f6f8..a96c7f073 100644
--- a/core/rend/dx11/dx11_renderer.h
+++ b/core/rend/dx11/dx11_renderer.h
@@ -27,6 +27,7 @@
#include "dx11_shaders.h"
#include "rend/sorter.h"
#include "dx11_renderstate.h"
+#include "dx11_naomi2.h"
struct DX11Renderer : public Renderer
{
@@ -112,6 +113,7 @@ protected:
u32 height = 0;
bool frameRendered = false;
bool frameRenderedOnce = false;
+ Naomi2Helper n2Helper;
private:
void readDCFramebuffer();
diff --git a/core/rend/dx11/dx11_shaders.cpp b/core/rend/dx11/dx11_shaders.cpp
index 8e055bc41..fe7037a28 100644
--- a/core/rend/dx11/dx11_shaders.cpp
+++ b/core/rend/dx11/dx11_shaders.cpp
@@ -19,6 +19,7 @@
#include "dx11_shaders.h"
#include "dx11context.h"
#include "stdclass.h"
+#include "dx11_naomi2.h"
#include
const char * const VertexShader = R"(
@@ -356,6 +357,9 @@ const char * const MacroValues[] { "0", "1", "2", "3" };
static D3D_SHADER_MACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
+ { "POSITION_ONLY", "0" },
+ { "pp_TwoVolumes", "0" },
+ { "LIGHT_ON", "1" },
{ nullptr, nullptr }
};
@@ -439,24 +443,48 @@ const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp
return shader;
}
-const ComPtr& DX11Shaders::getVertexShader(bool gouraud)
+const ComPtr& DX11Shaders::getVertexShader(bool gouraud, bool naomi2)
{
- ComPtr& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader;
+ int index = (int)gouraud | ((int)naomi2 << 1);
+ ComPtr& vertexShader = vertexShaders[index];
if (!vertexShader)
{
VertexMacros[0].Definition = MacroValues[gouraud];
- vertexShader = compileVS(VertexShader, "main", VertexMacros);
+ if (!naomi2)
+ {
+ vertexShader = compileVS(VertexShader, "main", VertexMacros);
+ }
+ else
+ {
+ VertexMacros[1].Definition = MacroValues[false];
+ VertexMacros[2].Definition = MacroValues[false];
+ VertexMacros[3].Definition = MacroValues[true];
+ std::string source(DX11N2VertexShader);
+ source += std::string("\n") + DX11N2ColorShader;
+ vertexShader = compileVS(source.c_str(), "main", VertexMacros);
+ }
}
return vertexShader;
}
-const ComPtr& DX11Shaders::getMVVertexShader()
+const ComPtr& DX11Shaders::getMVVertexShader(bool naomi2)
{
- if (!modVolVertexShader)
- modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr);
+ if (!modVolVertexShaders[naomi2])
+ {
+ if (!naomi2)
+ modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr);
+ else
+ {
+ VertexMacros[0].Definition = MacroValues[false];
+ VertexMacros[1].Definition = MacroValues[true];
+ VertexMacros[2].Definition = MacroValues[false];
+ VertexMacros[3].Definition = MacroValues[false];
+ modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros);
+ }
+ }
- return modVolVertexShader;
+ return modVolVertexShaders[naomi2];
}
const ComPtr& DX11Shaders::getModVolShader()
@@ -536,13 +564,22 @@ ComPtr DX11Shaders::compilePS(const char* source, const char*
ComPtr DX11Shaders::getVertexShaderBlob()
{
- VertexMacros[0].Definition = MacroValues[0];
- return compileShader(VertexShader, "main", "vs_4_0", VertexMacros);
+ VertexMacros[0].Definition = MacroValues[true];
+ // FIXME code dup
+ VertexMacros[1].Definition = MacroValues[false];
+ VertexMacros[2].Definition = MacroValues[false];
+ std::string source(DX11N2VertexShader);
+ source += std::string("\n") + DX11N2ColorShader;
+ return compileShader(source.c_str(), "main", "vs_4_0", VertexMacros);
}
ComPtr DX11Shaders::getMVVertexShaderBlob()
{
- return compileShader(ModVolVertexShader, "main", "vs_4_0", nullptr);
+ // FIXME code dup
+ VertexMacros[0].Definition = MacroValues[false];
+ VertexMacros[1].Definition = MacroValues[true];
+ VertexMacros[2].Definition = MacroValues[false];
+ return compileShader(DX11N2VertexShader, "main", "vs_4_0", VertexMacros);
}
ComPtr DX11Shaders::getQuadVertexShaderBlob()
@@ -562,10 +599,11 @@ void DX11Shaders::term()
{
saveCache(CacheFile);
shaders.clear();
- gouraudVertexShader.reset();
- flatVertexShader.reset();
+ for (auto& shader : vertexShaders)
+ shader.reset();
modVolShader.reset();
- modVolVertexShader.reset();
+ for (auto& shader : modVolVertexShaders)
+ shader.reset();
quadVertexShader.reset();
quadRotateVertexShader.reset();
quadPixelShader.reset();
diff --git a/core/rend/dx11/dx11_shaders.h b/core/rend/dx11/dx11_shaders.h
index 08ed32510..3a91ef2ad 100644
--- a/core/rend/dx11/dx11_shaders.h
+++ b/core/rend/dx11/dx11_shaders.h
@@ -53,9 +53,9 @@ public:
const ComPtr& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr,
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool trilinear, bool palette, bool gouraud,
bool alphaTest, bool clipInside, bool nearestWrapFix);
- const ComPtr& getVertexShader(bool gouraud);
+ const ComPtr& getVertexShader(bool gouraud, bool naomi2);
const ComPtr& getModVolShader();
- const ComPtr& getMVVertexShader();
+ const ComPtr& getMVVertexShader(bool naomi2);
const ComPtr& getQuadPixelShader();
const ComPtr& getQuadVertexShader(bool rotate);
@@ -70,10 +70,9 @@ private:
ComPtr device;
std::unordered_map> shaders;
- ComPtr gouraudVertexShader;
- ComPtr flatVertexShader;
+ ComPtr vertexShaders[4];
ComPtr modVolShader;
- ComPtr modVolVertexShader;
+ ComPtr modVolVertexShaders[2];
ComPtr quadPixelShader;
ComPtr quadVertexShader;
ComPtr quadRotateVertexShader;
diff --git a/core/rend/dx11/oit/dx11_oitrenderer.cpp b/core/rend/dx11/oit/dx11_oitrenderer.cpp
index 06d0d8c1c..f326740ed 100644
--- a/core/rend/dx11/oit/dx11_oitrenderer.cpp
+++ b/core/rend/dx11/oit/dx11_oitrenderer.cpp
@@ -40,6 +40,8 @@ const D3D11_INPUT_ELEMENT_DESC MainLayout[]
{ "COLOR", 2, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col1), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 3, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc1), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u1), D3D11_INPUT_PER_VERTEX_DATA, 0 },
+ // Naomi 2
+ { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, nx), D3D11_INPUT_PER_VERTEX_DATA, 0 },
};
struct DX11OITRenderer : public DX11Renderer
@@ -51,7 +53,6 @@ struct DX11OITRenderer : public DX11Renderer
int blend_mode1[2];
float paletteIndex;
float trilinearAlpha;
- int pp_Number;
// two volume mode
int shading_instr0;
@@ -81,7 +82,19 @@ struct DX11OITRenderer : public DX11Renderer
buffers.init(device, deviceContext);
ComPtr blob = shaders.getVertexShaderBlob();
mainInputLayout.reset();
- return success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get()));
+ success = SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get())) && success;
+
+ blob = shaders.getFinalVertexShaderBlob();
+ success = SUCCEEDED(device->CreateInputLayout(MainLayout, 0, blob->GetBufferPointer(), blob->GetBufferSize(), &finalInputLayout.get())) && success;
+
+ desc.ByteWidth = sizeof(int);
+ desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4;
+ desc.Usage = D3D11_USAGE_DYNAMIC;
+ desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+ desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+ success = SUCCEEDED(device->CreateBuffer(&desc, nullptr, &vtxPolyConstants.get())) && success;
+
+ return success;
}
void Resize(int w, int h) override {
@@ -110,6 +123,9 @@ struct DX11OITRenderer : public DX11Renderer
void Term() override
{
+ vtxPolyConstants.reset();
+ finalInputLayout.reset();
+ mainInputLayout.reset();
opaqueTextureView.reset();
opaqueRenderTarget.reset();
opaqueTex.reset();
@@ -121,7 +137,7 @@ struct DX11OITRenderer : public DX11Renderer
template
void setRenderState(const PolyParam *gp, int polyNumber)
{
- ComPtr vertexShader = shaders.getVertexShader(gp->pcw.Gouraud);
+ ComPtr vertexShader = shaders.getVertexShader(gp->pcw.Gouraud, gp->isNaomi2(), false, pass != DX11OITShaders::Depth);
deviceContext->VSSetShader(vertexShader, nullptr, 0);
PixelPolyConstants constants;
@@ -214,7 +230,6 @@ struct DX11OITRenderer : public DX11Renderer
constants.clipTest[3] = (float)(clip_rect[1] + clip_rect[3]);
}
}
- constants.pp_Number = polyNumber;
constants.blend_mode0[0] = gp->tsp.SrcInstr;
constants.blend_mode0[1] = gp->tsp.DstInstr;
if (two_volumes_mode)
@@ -235,6 +250,14 @@ struct DX11OITRenderer : public DX11Renderer
memcpy(mappedSubres.pData, &constants, sizeof(constants));
deviceContext->Unmap(pxlPolyConstants, 0);
+ if (!gp->isNaomi2())
+ {
+ deviceContext->Map(vtxPolyConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres);
+ memcpy(mappedSubres.pData, &polyNumber, sizeof(polyNumber));
+ deviceContext->Unmap(vtxPolyConstants, 0);
+ deviceContext->VSSetConstantBuffers(1, 1, &vtxPolyConstants.get());
+ }
+
if (pass == DX11OITShaders::Color)
{
// Apparently punch-through polys support blending, or at least some combinations
@@ -280,6 +303,9 @@ struct DX11OITRenderer : public DX11Renderer
bool needStencil = config::ModifierVolumes && pass == DX11OITShaders::Depth && Type != ListType_Translucent;
const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0;
deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, needStencil), stencil);
+
+ if (gp->isNaomi2())
+ n2Helper.setConstants(*gp, polyNumber);
}
template
@@ -318,13 +344,14 @@ struct DX11OITRenderer : public DX11Renderer
unsigned int offset = 0;
deviceContext->IASetVertexBuffers(0, 1, &modvolBuffer.get(), &stride, &offset);
deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
- deviceContext->VSSetShader(shaders.getMVVertexShader(), nullptr, 0);
if (!Transparent)
deviceContext->PSSetShader(shaders.getModVolShader(), nullptr, 0);
deviceContext->RSSetScissorRects(1, &scissorRect);
ModifierVolumeParam* params = Transparent ? &pvrrc.global_param_mvo_tr.head()[first] : &pvrrc.global_param_mvo.head()[first];
int mod_base = -1;
+ const float *curMVMat = nullptr;
+ const float *curProjMat = nullptr;
for (int cmv = 0; cmv < count; cmv++)
{
@@ -337,6 +364,13 @@ struct DX11OITRenderer : public DX11Renderer
if (param.count > 0)
{
+ if (param.isNaomi2() && (param.mvMatrix != curMVMat || param.projMatrix != curProjMat))
+ {
+ curMVMat = param.mvMatrix;
+ curProjMat = param.projMatrix;
+ n2Helper.setConstants(param.mvMatrix, param.projMatrix);
+ }
+ deviceContext->VSSetShader(shaders.getMVVertexShader(param.isNaomi2()), nullptr, 0);
if (Transparent)
{
if (!param.isp.VolumeLast && mv_mode > 0)
@@ -393,6 +427,7 @@ struct DX11OITRenderer : public DX11Renderer
deviceContext->OMSetDepthStencilState(depthStencilStates.getState(false, false, 0, false), 0);
setCullMode(0);
+ deviceContext->IASetInputLayout(finalInputLayout);
deviceContext->VSSetShader(shaders.getFinalVertexShader(), nullptr, 0);
deviceContext->PSSetShader(shaders.getFinalShader(), nullptr, 0);
@@ -529,6 +564,7 @@ struct DX11OITRenderer : public DX11Renderer
//
renderABuffer();
deviceContext->PSSetShaderResources(0, 1, &p);
+ deviceContext->IASetInputLayout(mainInputLayout);
// Clear the stencil from this pass
deviceContext->ClearDepthStencilView(depthStencilView2, D3D11_CLEAR_STENCIL, 0.f, 0);
@@ -559,6 +595,7 @@ struct DX11OITRenderer : public DX11Renderer
if (!pvrrc.isRenderFramebuffer)
{
+ n2Helper.resetCache();
uploadGeometryBuffers();
updateFogTexture();
@@ -609,6 +646,9 @@ private:
ComPtr trPolyParamsBuffer;
u32 trPolyParamsBufferSize = 0;
ComPtr trPolyParamsBufferView;
+ ComPtr mainInputLayout; // FIXME
+ ComPtr finalInputLayout;
+ ComPtr vtxPolyConstants;
};
Renderer *rend_OITDirectX11()
diff --git a/core/rend/dx11/oit/dx11_oitshaders.cpp b/core/rend/dx11/oit/dx11_oitshaders.cpp
index f4a8087cd..cdaf0ada1 100644
--- a/core/rend/dx11/oit/dx11_oitshaders.cpp
+++ b/core/rend/dx11/oit/dx11_oitshaders.cpp
@@ -18,6 +18,7 @@
*/
#include "dx11_oitshaders.h"
#include "../dx11context.h"
+#include "../dx11_naomi2.h"
const char * const VertexShader = R"(
#if pp_Gouraud == 1
@@ -35,6 +36,8 @@ struct VertexIn
float4 col1 : COLOR2;
float4 spec1 : COLOR3;
float2 uv1 : TEXCOORD1;
+ float3 normal: NORMAL; // unused
+ uint vertexId : SV_VertexID;
};
struct VertexOut
@@ -43,12 +46,13 @@ struct VertexOut
float4 uv : TEXCOORD0;
INTERPOLATION float4 col : COLOR0;
INTERPOLATION float4 spec : COLOR1;
- float4 uv1 : TEXCOORD1;
+ float2 uv1 : TEXCOORD1;
INTERPOLATION float4 col1 : COLOR2;
INTERPOLATION float4 spec1 : COLOR3;
+ nointerpolation uint index : BLENDINDICES0;
};
-cbuffer constantBuffer : register(b0)
+cbuffer shaderConstants : register(b0)
{
float4x4 transMatrix;
float4 leftPlane;
@@ -57,6 +61,11 @@ cbuffer constantBuffer : register(b0)
float4 bottomPlane;
};
+cbuffer polyConstants : register(b1)
+{
+ int polyNumber;
+};
+
[clipplanes(leftPlane, topPlane, rightPlane, bottomPlane)]
VertexOut main(in VertexIn vin)
{
@@ -75,7 +84,8 @@ VertexOut main(in VertexIn vin)
vo.spec1 = vin.spec1;
#endif
vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z);
- vo.uv1 = float4(vin.uv1 * vo.pos.z, 0.f, 0.f);
+ vo.uv1 = vin.uv1 * vo.pos.z;
+ vo.index = (uint(polyNumber) << 18) + vin.vertexId;
vo.pos.w = 1.f;
vo.pos.z = 0.f;
@@ -195,11 +205,16 @@ bool getShadowEnable(in PolyParam pp)
return (pp.tsp_isp_pcw & 1) != 0;
}
-uint getPolyNumber(in Pixel pixel)
+uint getPolyIndex(in Pixel pixel)
{
return pixel.seq_num & 0x3FFFFFFFu;
}
+uint getPolyNumber(in Pixel pixel)
+{
+ return (pixel.seq_num & 0x3FFFFFFFu) >> 18;
+}
+
#define SHADOW_STENCIL 0x40000000u
#define SHADOW_ACC 0x80000000u
@@ -269,9 +284,10 @@ struct VertexIn
float4 uv : TEXCOORD0;
INTERPOLATION float4 col : COLOR0;
INTERPOLATION float4 spec : COLOR1;
- float4 uv1 : TEXCOORD1;
+ float2 uv1 : TEXCOORD1;
INTERPOLATION float4 col1 : COLOR2;
INTERPOLATION float4 spec1 : COLOR3;
+ nointerpolation uint index : BLENDINDICES0;
};
Texture2D texture0 : register(t0);
@@ -299,7 +315,6 @@ cbuffer polyConstantBuffer : register(b1)
int2 blend_mode1;
float paletteIndex;
float trilinearAlpha;
- int pp_Number;
// two volume mode
int shading_instr0;
@@ -406,7 +421,7 @@ PSO main(in VertexIn inpix)
float2 uv;
#if pp_TwoVolumes == 1
if (area1)
- uv = inpix.uv1.xy / inpix.uv.w;
+ uv = inpix.uv1 / inpix.uv.w;
else
#endif
uv = inpix.uv.xy / inpix.uv.w;
@@ -550,7 +565,7 @@ PSO main(in VertexIn inpix)
Pixel pixel;
pixel.color = packColors(clamp(color, 0.f, 1.f));
pixel.depth = inpix.uv.w;
- pixel.seq_num = uint(pp_Number);
+ pixel.seq_num = inpix.index;
InterlockedExchange(abufferPointers[coords], idx, pixel.next);
Pixels[idx] = pixel;
@@ -594,7 +609,7 @@ int fillAndSortFragmentArray(in uint2 coords, out uint pixel_list[MAX_PIXELS_PER
uint jIdx = pixel_list[j];
while (j >= 0
&& (Pixels[jIdx].depth > Pixels[idx].depth
- || (Pixels[jIdx].depth == Pixels[idx].depth && getPolyNumber(Pixels[jIdx]) > getPolyNumber(Pixels[idx]))))
+ || (Pixels[jIdx].depth == Pixels[idx].depth && getPolyIndex(Pixels[jIdx]) > getPolyIndex(Pixels[idx]))))
{
pixel_list[j + 1] = pixel_list[j];
j--;
@@ -819,6 +834,9 @@ const char * const MacroValues[] { "0", "1", "2", "3" };
static D3D_SHADER_MACRO VertexMacros[]
{
{ "pp_Gouraud", "1" },
+ { "POSITION_ONLY", "0" },
+ { "pp_TwoVolumes", "0" },
+ { "LIGHT_ON", "1" },
{ nullptr, nullptr }
};
@@ -907,24 +925,53 @@ const ComPtr& DX11OITShaders::getShader(bool pp_Texture, bool
return shader;
}
-const ComPtr& DX11OITShaders::getVertexShader(bool gouraud)
+const ComPtr& DX11OITShaders::getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes)
{
- ComPtr& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader;
- if (!vertexShader)
+ const u32 hash = (int)gouraud
+ | ((int)naomi2 << 1)
+ | ((int)positionOnly << 2)
+ | ((int)lightOn << 3)
+ | ((int)twoVolumes << 4);
+ auto& shader = vertexShaders[hash];
+ if (shader == nullptr)
{
VertexMacros[0].Definition = MacroValues[gouraud];
- vertexShader = compileVS(VertexShader, "main", VertexMacros);
+ if (!naomi2)
+ {
+ shader = compileVS(VertexShader, "main", VertexMacros);
+ }
+ else
+ {
+ VertexMacros[1].Definition = MacroValues[positionOnly];
+ VertexMacros[2].Definition = MacroValues[twoVolumes];
+ VertexMacros[3].Definition = MacroValues[lightOn];
+ std::string source(DX11N2VertexShader);
+ if (!positionOnly && lightOn)
+ source += std::string("\n") + DX11N2ColorShader;
+ shader = compileVS(source.c_str(), "main", VertexMacros);
+ }
}
- return vertexShader;
+ return shader;
}
-const ComPtr& DX11OITShaders::getMVVertexShader()
+const ComPtr& DX11OITShaders::getMVVertexShader(bool naomi2)
{
- if (!modVolVertexShader)
- modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr);
+ if (!modVolVertexShaders[naomi2])
+ {
+ if (!naomi2)
+ modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr);
+ else
+ {
+ VertexMacros[0].Definition = MacroValues[false];
+ VertexMacros[1].Definition = MacroValues[true];
+ VertexMacros[2].Definition = MacroValues[false];
+ VertexMacros[3].Definition = MacroValues[false];
+ modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros);
+ }
+ }
- return modVolVertexShader;
+ return modVolVertexShaders[naomi2];
}
const ComPtr& DX11OITShaders::getModVolShader()
@@ -1021,13 +1068,27 @@ ComPtr DX11OITShaders::compilePS(const char* source, const ch
ComPtr DX11OITShaders::getVertexShaderBlob()
{
- VertexMacros[0].Definition = MacroValues[0];
- return compileShader(VertexShader, "main", "vs_5_0", VertexMacros);
+ VertexMacros[0].Definition = MacroValues[true];
+ // FIXME code dup
+ VertexMacros[1].Definition = MacroValues[false];
+ VertexMacros[2].Definition = MacroValues[true];
+ std::string source(DX11N2VertexShader);
+ source += std::string("\n") + DX11N2ColorShader;
+ return compileShader(source.c_str(), "main", "vs_5_0", VertexMacros);
}
ComPtr DX11OITShaders::getMVVertexShaderBlob()
{
- return compileShader(ModVolVertexShader, "main", "vs_5_0", nullptr);
+ // FIXME code dup
+ VertexMacros[0].Definition = MacroValues[false];
+ VertexMacros[1].Definition = MacroValues[true];
+ VertexMacros[2].Definition = MacroValues[false];
+ return compileShader(DX11N2VertexShader, "main", "vs_5_0", VertexMacros);
+}
+
+ComPtr DX11OITShaders::getFinalVertexShaderBlob()
+{
+ return compileShader(OITFinalVertexShaderSource, "main", "vs_5_0", nullptr);
}
void DX11OITShaders::init(const ComPtr& device, pD3DCompile D3DCompile)
diff --git a/core/rend/dx11/oit/dx11_oitshaders.h b/core/rend/dx11/oit/dx11_oitshaders.h
index fc772a4c4..1e0497a6a 100644
--- a/core/rend/dx11/oit/dx11_oitshaders.h
+++ b/core/rend/dx11/oit/dx11_oitshaders.h
@@ -35,9 +35,9 @@ public:
const ComPtr& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr,
bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping,
bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass);
- const ComPtr& getVertexShader(bool gouraud);
+ const ComPtr& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = true);
const ComPtr& getModVolShader();
- const ComPtr& getMVVertexShader();
+ const ComPtr& getMVVertexShader(bool naomi2);
const ComPtr& getFinalShader();
const ComPtr& getTrModVolShader(int type);
const ComPtr& getFinalVertexShader();
@@ -46,10 +46,10 @@ public:
{
saveCache(CacheFile);
shaders.clear();
- gouraudVertexShader.reset();
- flatVertexShader.reset();
+ vertexShaders.clear();
modVolShader.reset();
- modVolVertexShader.reset();
+ for (auto& shader : modVolVertexShaders)
+ shader.reset();
for (auto& shader : trModVolShaders)
shader.reset();
finalShader.reset();
@@ -59,6 +59,7 @@ public:
}
ComPtr getVertexShaderBlob();
ComPtr getMVVertexShaderBlob();
+ ComPtr getFinalVertexShaderBlob();
private:
ComPtr compileShader(const char *source, const char* function, const char* profile, const D3D_SHADER_MACRO *pDefines);
@@ -67,10 +68,9 @@ private:
ComPtr device;
std::unordered_map> shaders;
- ComPtr gouraudVertexShader;
- ComPtr flatVertexShader;
+ std::unordered_map> vertexShaders;
ComPtr modVolShader;
- ComPtr modVolVertexShader;
+ ComPtr modVolVertexShaders[2];
ComPtr trModVolShaders[4];
ComPtr finalShader;
diff --git a/core/rend/gl4/gl4naomi2.cpp b/core/rend/gl4/gl4naomi2.cpp
index 6fbe39603..9d36dfab8 100644
--- a/core/rend/gl4/gl4naomi2.cpp
+++ b/core/rend/gl4/gl4naomi2.cpp
@@ -34,14 +34,14 @@ N2Vertex4Source::N2Vertex4Source(const gl4PipelineShader* shader) : OpenGl4Sourc
{
if (shader == nullptr)
{
- addConstant("GEOM_ONLY", 1);
+ addConstant("POSITION_ONLY", 1);
addConstant("pp_TwoVolumes", 0);
addConstant("pp_Gouraud", 0);
addConstant("pp_Texture", 0);
}
else
{
- addConstant("GEOM_ONLY", shader->pass == Pass::Depth); // geometry only for depth pass
+ addConstant("POSITION_ONLY", shader->pass == Pass::Depth); // geometry only for depth pass
addConstant("pp_TwoVolumes", shader->pp_TwoVolumes || shader->pp_BumpMap);
addConstant("pp_Gouraud", shader->pp_Gouraud);
addConstant("pp_Texture", shader->pp_Texture);
diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp
index ac8374f32..c667ef557 100644
--- a/core/rend/gles/gldraw.cpp
+++ b/core/rend/gles/gldraw.cpp
@@ -233,7 +233,7 @@ __forceinline
glcache.DepthFunc(Zfunction[gp->isp.DepthMode]);
}
- if (SortingEnabled && !config::PerStripSorting)
+ if (SortingEnabled /* && !config::PerStripSorting */) // Looks glitchy too but less missing graphics (but wrong depth order...)
glcache.DepthMask(GL_FALSE);
else
{
diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp
index a9588b22b..e81cab27e 100644
--- a/core/rend/gles/naomi2.cpp
+++ b/core/rend/gles/naomi2.cpp
@@ -30,7 +30,7 @@ uniform int pp_Number;
// Vertex input
in vec3 in_pos;
-#if GEOM_ONLY == 0
+#if POSITION_ONLY == 0
in vec4 in_base;
in vec4 in_offs;
in vec2 in_uv;
@@ -56,7 +56,7 @@ void wDivide(inout vec4 vpos)
{
vpos = vec4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0);
vpos = ndcMat * vpos;
-#if GEOM_ONLY == 1
+#if POSITION_ONLY == 1
vtx_uv = vec3(0.0, 0.0, vpos.z);
#else
#if pp_Gouraud == 1
@@ -79,7 +79,7 @@ void wDivide(inout vec4 vpos)
void main()
{
vec4 vpos = mvMat * vec4(in_pos, 1.0);
-#if GEOM_ONLY == 0
+#if POSITION_ONLY == 0
vtx_base = in_base;
vtx_offs = in_offs;
vec4 vnorm = normalize(normalMat * vec4(in_normal, 0.0));
@@ -203,7 +203,7 @@ void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in int volIdx, in v
else
{
lightDir = normalize(light.position.xyz - position);
- if (light.attnDistA != 1.0 && light.attnDistB != 0.0)
+ if (light.attnDistA != 1.0 || light.attnDistB != 0.0)
{
float distance = length(light.position.xyz - position);
if (light.distAttnMode == 0)
@@ -359,7 +359,7 @@ void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3
N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly, bool texture) : OpenGlSource()
{
addConstant("pp_Gouraud", gouraud);
- addConstant("GEOM_ONLY", geometryOnly);
+ addConstant("POSITION_ONLY", geometryOnly);
addConstant("pp_TwoVolumes", 0);
addConstant("pp_Texture", (int)texture);
diff --git a/core/rend/gles/naomi2.h b/core/rend/gles/naomi2.h
index a277b33e3..ba697b131 100644
--- a/core/rend/gles/naomi2.h
+++ b/core/rend/gles/naomi2.h
@@ -192,6 +192,8 @@ void setN2Uniforms(const PolyParam *pp, ShaderType *shader)
}
glUniform1i(shader->useBaseOver, 0);
glUniform1i(shader->lightCount, 0);
+ glUniform1i(shader->bumpId0, -1);
+ glUniform1i(shader->bumpId1, -1);
}
}
glUniform1i(shader->bumpMapping, pp->pcw.Texture == 1 && pp->tcw.PixelFmt == PixelBumpMap);
diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp
index a92eb26dc..29172033c 100644
--- a/core/rend/sorter.cpp
+++ b/core/rend/sorter.cpp
@@ -42,10 +42,10 @@ static bool operator<(const PolyParam& left, const PolyParam& right)
return left.zvZ < right.zvZ;
}
-static float getProjectedZ(const Vertex *v, const glm::mat4& mat)
+static float getProjectedZ(const Vertex *v, const float *mat)
{
- // 1 / w
- return 1 / mat[0][3] * v->x + mat[1][3] * v->y + mat[2][3] * v->z + mat[3][3];
+ // -1 / z
+ return -1 / (mat[2] * v->x + mat[1 * 4 + 2] * v->y + mat[2 * 4 + 2] * v->z + mat[3 * 4 + 2]);
}
void SortPParams(int first, int count)
@@ -178,16 +178,12 @@ void GenSorted(int first, int count, std::vector& pidx_sort,
{
const u32 *idx = idx_base + pp->first;
u32 flip = 0;
- glm::mat4 mat;
- float z0, z1;
+ float z0 = 0, z1 = 0;
if (pp->isNaomi2())
{
- mat = glm::make_mat4(pp->projMatrix);
- if (pp->mvMatrix != nullptr)
- mat *= glm::make_mat4(pp->mvMatrix);
- z0 = getProjectedZ(vtx_base + idx[0], mat);
- z1 = getProjectedZ(vtx_base + idx[1], mat);
+ z0 = getProjectedZ(vtx_base + idx[0], pp->mvMatrix);
+ z1 = getProjectedZ(vtx_base + idx[1], pp->mvMatrix);
}
for (u32 i = 0; i < pp->count - 2; i++)
{
@@ -207,7 +203,7 @@ void GenSorted(int first, int count, std::vector& pidx_sort,
lst[pfsti].pid = ppid;
if (pp->isNaomi2())
{
- float z2 = getProjectedZ(v2, mat);
+ float z2 = getProjectedZ(v2, pp->mvMatrix);
lst[pfsti].z = std::min(z0, std::min(z1, z2));
z0 = z1;
z1 = z2;