diff --git a/CMakeLists.txt b/CMakeLists.txt index 39038bbb7..d1d5d6a2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1079,6 +1079,8 @@ if(WIN32) core/rend/dx11/dx11context_lr.cpp core/rend/dx11/dx11context_lr.h core/rend/dx11/dx11_driver.h + core/rend/dx11/dx11_naomi2.cpp + core/rend/dx11/dx11_naomi2.h core/rend/dx11/oit/dx11_oitbuffers.h core/rend/dx11/oit/dx11_oitrenderer.cpp core/rend/dx11/oit/dx11_oitshaders.cpp diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index d43e8a113..1714a63fa 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -1,1804 +1,1811 @@ -/* - Copyright 2022 flyinghead - - This file is part of Flycast. - - Flycast is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - Flycast is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Flycast. If not, see . - */ -/* - * VideoLogic custom transformation & lighting (T&L) chip (codenamed: ELAN) - * 32 MB RAM - * Clock: 100 MHz - * 16 light sources per polygon - * ambient, parallel, point or spot (Fog lights and alpha lights also exist) - * Perspective conversion - * Near, far and side clipping, offscreen and backface culling - * bump mapping, environmental mapping - * dynamic & static model processing - * model cache system - * - * Each PVR2 chip renders half the screen (rectangular, stripes, and checker board options) - * so textures have to be duplicated in each vram - * - * Area 0: - * 005f6800 - 005f7cff asic A regs - * 005f8000 - 005f9fff CLXA regs - * 025f6800 - 025f7cff asic B regs - * 025f8000 - 025f9fff CLXB regs - * - * Area 1: - * 05000000 - 06ffffff CLXA vram - * 07000000 - 08ffffff CLXB vram - * - * Area 2: - * 085f6800 - 085f7cff write both asic regs - * 085f8000 - 085f9fff write both PVR regs - * 08800000 - 088000ff? elan regs - * 09000000 - ? elan command buffer - * 0A000000 - 0bfffffff elan RAM - */ -#include "elan.h" -#include "hw/mem/_vmem.h" -#include "pvr_mem.h" -#include "ta.h" -#include "ta_ctx.h" -#include "hw/holly/holly_intc.h" -#include "hw/holly/sb.h" -#include "hw/pvr/Renderer_if.h" -#include "hw/sh4/sh4_sched.h" -#include "hw/sh4/sh4_mem.h" -#include "emulator.h" -#include "serialize.h" -#include "elan_struct.h" -#include "network/ggpo.h" -#include -#include -#include -#include - -namespace elan { - -static _vmem_handler elanRegHandler; -static _vmem_handler elanCmdHandler; -static _vmem_handler elanRamHandler; - -u8 *RAM; - -static u32 reg10; -static u32 reg74; -static u32 reg30 = 0x31; - -static u32 elanCmd[32 / 4]; - -template -T DYNACALL read_elanreg(u32 paddr) -{ - //verify(sizeof(T) == 4); - u32 addr = paddr & 0x01ffffff; - switch (addr >> 16) - { - case 0x5F: - if (addr >= 0x005F6800 && addr <= 0x005F7CFF) - { - // 5F6908: Tests for errors 4, 8, 10, 2 and 1 (render isp buf ovf, render hazard, ISP param ovf, ob list ptr ovf, ta ill param) - // 5f6900: then int 4 and 40 (EoR TSP, EoT YUV) - return (T)sb_ReadMem(paddr, sizeof(T)); - } - else if (addr >= 0x005F8000 && addr <= 0x005F9FFF) - { - if (sizeof(T) != 4) - // House of the Dead 2 - return 0; - return (T)pvr_ReadReg(paddr); - } - else - { - INFO_LOG(MEMORY, "Read from area2 not implemented [Unassigned], addr=%x", addr); - return 0; - } - - default: -// if ((addr & 0xFF) != 0x74) - DEBUG_LOG(PVR, "ELAN read(%d) %08x [pc %08x]", (u32)sizeof(T), addr, p_sh4rcb->cntx.pc); - switch (addr & 0xFF) - { - case 0: // magic number - return (T)0xe1ad0000; - case 4: // revision - return 0x10; // 1 or x10 - case 0xc: - // command queue size - // loops until < 2 (v1) or 3 (v10) - return 1; - case 0x10: // sh4 if control? - // b0 broadcast on cs1 - // b1 elan channel 2 - // b2 enable pvr #2 - // rewritten by bios as reg10 & ~1 - return reg10; - case 0x14: // SDRAM refresh (never read?) - return (T)0x2029; //default 0x1429 - case 0x1c: // SDRAM CFG - return (T)0x87320961; - case 0x30: // Macro tiler config - // 0 0 l l l l l l t t t t 0 0 r r r r r r b b b b 0 0 V H 0 0 0 T - // lllll: left tile - // tttt: top tile - // rrrrrr: right tile - // bbbb: bottom tile - // V: tile vertically - // H: tile horizontally - // T: tiler enabled - return reg30; - case 0x74: - // b0 dma completed - // b1 cmd completed - // b2-b3 geometry timeouts - // b4-b6 errors? - return reg74; - case 0x78: // IRQ MASK - // 6 bits? - return 0; - default: - return (T)0; - } - } -} - -template -void DYNACALL write_elanreg(u32 paddr, T data) -{ - //verify(sizeof(T) == 4); - u32 addr = paddr & 0x01ffffff; - switch (addr >> 16) - { - case 0x5F: - if (addr>= 0x005F6800 && addr <= 0x005F7CFF) - sb_WriteMem(paddr, data, sizeof(T)); - else if (addr >= 0x005F8000 && addr <= 0x005F9FFF) - { - if (addr == 0x5F8040 && data == 0xFF00FF) - { - ERROR_LOG(PVR, "ELAN SCREWED pr %x pc %x", p_sh4rcb->cntx.pr, p_sh4rcb->cntx.pc); - throw FlycastException("Boot aborted"); - } - else if ((addr & 0x1fff) == SOFTRESET_addr && data == 0) - reg74 &= 3; - else if ((addr & 0x1fff) == STARTRENDER_addr) - reg74 &= 3; - - //if ((paddr & 0x1c000000) == 0x08000000 && (addr & 0x1fff) == SOFTRESET_addr && data == 0) - // reg74 |= 2; - pvr_WriteReg(paddr, data); - } - else - INFO_LOG(COMMON, "Write to area2 not implemented [Unassigned], addr=%x,data=%x,size=%d", addr, data, (u32)sizeof(T)); - break; - default: -// if ((addr & 0xFF) != 0x74) - DEBUG_LOG(PVR, "ELAN write(%d) %08x = %x", (u32)sizeof(T), addr, data); - switch (addr & 0xFF) - { - case 0x0: - // 0 multiple times (_kmtlifAbortDisplayListProcessing) - break; - // 0x4: _kmtlifAbortDisplayListProcessing: 0 - case 0x8: // write-only. reset ? - // 1 then 0 - // bios: 5 - // _kmtlifAbortDisplayListProcessing: 5 then 0 - // _kmtlifHandleDMATimeout: 1, 0, 4, 0... - if (data == 0) - reg74 = 0; - break; - case 0xc: - // 0 - break; - case 0x10: // sh4 if control? - reg10 = data; - break;; - case 0x14: // SDRAM refresh - // x2029 - break; - case 0x1c: // SDRAM CFG - break; - case 0x30: - reg30 = data; - break; - case 0x74: // IRQ STAT - reg74 &= ~data; - break; - // _kmtlifSetupElanInts: - // 78 = 3f - // 7C = 0 - // 80 = 17 - // 84 = 2b - // 88 = 0 - case 0xd0: // _kmtlifSetCullingRegister - // 6 - break;; - default: - break; - } - } -} - -template -T DYNACALL read_elancmd(u32 addr) -{ - DEBUG_LOG(PVR, "ELAN cmd READ! (%d) %08x", (u32)sizeof(T), addr); - return 0; -} - -static glm::vec4 unpackColorBGRA(u32 color) -{ - return glm::vec4((float)((color >> 16) & 0xff) / 255.f, - (float)((color >> 8) & 0xff) / 255.f, - (float)(color & 0xff) / 255.f, - (float)(color >> 24) / 255.f); -} - -static glm::vec4 unpackColorRGBA(u32 color) -{ - return glm::vec4((float)(color & 0xff) / 255.f, - (float)((color >> 8) & 0xff) / 255.f, - (float)((color >> 16) & 0xff) / 255.f, - (float)(color >> 24) / 255.f); -} - -static glm::vec4 unpackColor(u8 red, u8 green, u8 blue, u8 alpha = 0) -{ - return glm::vec4((float)red / 255.f, (float)green / 255.f, (float)blue / 255.f, (float)alpha / 255.f); -} - -static u32 packColor(const glm::vec4& color) -{ - return (int)(std::min(1.f, color.a) * 255.f) << 24 - | (int)(std::min(1.f, color.r) * 255.f) << 16 - | (int)(std::min(1.f, color.g) * 255.f) << 8 - | (int)(std::min(1.f, color.b) * 255.f); -} - -static GMP *curGmp; -static glm::mat4x4 curMatrix; -static float *taMVMatrix; -static float *taNormalMatrix; -static glm::mat4 projectionMatrix; -static float *taProjMatrix; -static LightModel *curLightModel; -static ElanBase *curLights[MAX_LIGHTS]; -static float nearPlane = 0.001f; -static float farPlane = 100000.f; -static bool envMapping; -static bool cullingReversed; -static bool openModifierVolume; -static bool shadowedVolume; -static TSP modelTSP; -static glm::vec4 gmpDiffuseColor0; -static glm::vec4 gmpSpecularColor0; -static glm::vec4 gmpDiffuseColor1; -static glm::vec4 gmpSpecularColor1; - -struct State -{ - static constexpr u32 Null = 0xffffffff; - - int listType = -1; - u32 gmp = Null; - u32 instance = Null; - u32 projMatrix = Null; - u32 tileclip = 0; - u32 lightModel = Null; - u32 lights[MAX_LIGHTS] = { - Null, Null, Null, Null, Null, Null, Null, Null, - Null, Null, Null, Null, Null, Null, Null, Null - }; - bool lightModelUpdated = false; - float envMapUOffset = 0.f; - float envMapVOffset = 0.f; - - void reset() - { - listType = -1; - gmp = Null; - instance = Null; - projMatrix = Null; - tileclip = 0; - lightModel = Null; - for (auto& light : lights) - light = Null; - update(); - } - void setMatrix(InstanceMatrix *pinstance) - { - instance = elanRamAddress(pinstance); - updateMatrix(); - } - - void updateMatrix() - { - if (instance == Null) - { - taMVMatrix = nullptr; - taNormalMatrix = nullptr; - envMapUOffset = 0.f; - envMapVOffset = 0.f; - return; - } - InstanceMatrix *mat = (InstanceMatrix *)&RAM[instance]; - DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f\n %f %f %f", - -mat->tm00, -mat->tm10, -mat->tm20, -mat->tm30, - mat->tm01, mat->tm11, mat->tm21, mat->tm31, - -mat->tm02, -mat->tm12, -mat->tm22, -mat->tm32, - mat->lm00, mat->lm10, mat->lm20, - mat->lm01, mat->lm11, mat->lm21, - mat->lm02, mat->lm12, mat->lm22); - - curMatrix = glm::mat4x4{ - -mat->tm00, mat->tm01, -mat->tm02, 0.f, - -mat->tm10, mat->tm11, -mat->tm12, 0.f, - -mat->tm20, mat->tm21, -mat->tm22, 0.f, - -mat->tm30, mat->tm31, -mat->tm32, 1.f - }; - glm::mat4x4 normalMatrix = glm::mat4x4{ - mat->lm00, mat->lm01, mat->lm02, 0.f, - mat->lm10, mat->lm11, mat->lm12, 0.f, - mat->lm20, mat->lm21, mat->lm22, 0.f, - -mat->tm30, mat->tm31, -mat->tm32, 1.f - }; - nearPlane = mat->_near; - farPlane = mat->_far; - envMapUOffset = mat->envMapU; - envMapVOffset = mat->envMapV; - taMVMatrix = ta_add_matrix(glm::value_ptr(curMatrix)); - if (normalMatrix != curMatrix) - taNormalMatrix = ta_add_matrix(glm::value_ptr(normalMatrix)); - else - taNormalMatrix = taMVMatrix; - } - - void setProjectionMatrix(void *p) - { - projMatrix = elanRamAddress(p); - updateProjectionMatrix(); - } - - void updateProjectionMatrix() - { - if (projMatrix == Null) - { - taProjMatrix = nullptr; - return; - } - ProjMatrix *pm = (ProjMatrix *)&RAM[projMatrix]; - DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f near %f far %f", pm->fx, pm->tx, pm->fy, pm->ty, nearPlane, farPlane); - // fx = -m00 * w/2 - // tx = -m20 * w/2 + left + w/2 - // fy = -m11 * h/2 - // ty = -m21 * h/2 + top + h/2 - projectionMatrix = glm::mat4( - -pm->fx, 0, 0, 0, - 0, pm->fy, 0, 0, - -pm->tx, -pm->ty, -1, -1, - 0, 0, 0, 0 - ); - taProjMatrix = ta_add_matrix(glm::value_ptr(projectionMatrix)); - } - - void setGMP(void *p) - { - gmp = elanRamAddress(p); - updateGMP(); - } - - void updateGMP() - { - if (gmp == Null) - { - curGmp = nullptr; - gmpDiffuseColor0 = glm::vec4(0); - gmpSpecularColor0 = glm::vec4(0); - gmpDiffuseColor1 = glm::vec4(0); - gmpSpecularColor1 = glm::vec4(0); - } - else - { - curGmp = (GMP *)&RAM[gmp]; - DEBUG_LOG(PVR, "GMP paramSelect %x", curGmp->paramSelect.full); - if (curGmp->paramSelect.d0) - gmpDiffuseColor0 = unpackColorRGBA(curGmp->diffuse0); - else - gmpDiffuseColor0 = glm::vec4(0); - if (curGmp->paramSelect.s0) - gmpSpecularColor0 = unpackColorRGBA(curGmp->specular0); - else - gmpSpecularColor0 = glm::vec4(0); - if (curGmp->paramSelect.d1) - gmpDiffuseColor1 = unpackColorRGBA(curGmp->diffuse1); - else - gmpDiffuseColor1 = glm::vec4(0); - if (curGmp->paramSelect.s1) - gmpSpecularColor1 = unpackColorRGBA(curGmp->specular1); - else - gmpSpecularColor1 = glm::vec4(0); - } - } - - void setLightModel(void *p) - { - lightModel = elanRamAddress(p); - updateLightModel(); - } - - void updateLightModel() - { - lightModelUpdated = true; - if (lightModel == Null) - curLightModel = nullptr; - else - { - curLightModel = (LightModel *)&RAM[lightModel]; - DEBUG_LOG(PVR, "Light model mask: diffuse %04x specular %04x, ambient base %08x offset %08x", curLightModel->diffuseMask0, curLightModel->specularMask0, - curLightModel->ambientBase0, curLightModel->ambientOffset0); - } - } - - void setLight(int lightId, void *p) - { - lights[lightId] = elanRamAddress(p); - updateLight(lightId); - } - - void updateLight(int lightId) - { - lightModelUpdated = true; - if (lights[lightId] == Null) - { - elan::curLights[lightId] = nullptr; - return; - } - PointLight *plight = (PointLight *)&RAM[lights[lightId]]; - if (plight->pcw.parallelLight) - { - ParallelLight *light = (ParallelLight *)plight; - DEBUG_LOG(PVR, " Parallel light %d: [%x] routing %d dmode %d col %d %d %d dir %f %f %f", light->lightId, plight->pcw.full, - light->routing, light->dmode, - light->red, light->green, light->blue, - light->getDirX(), light->getDirY(), light->getDirZ()); - } - else - { - DEBUG_LOG(PVR, " Point light %d: [%x] routing %d dmode %d smode %d col %d %d %d dir %f %f %f pos %f %f %f dist %f %f angle %f %f", - plight->lightId, plight->pcw.full, plight->routing, plight->dmode, plight->smode, - plight->red, plight->green, plight->blue, - plight->getDirX(), plight->getDirY(), plight->getDirZ(), - plight->posX, plight->posY, plight->posZ, - plight->distA(), plight->distB(), - plight->angleA(), plight->angleB()); - } - elan::curLights[lightId] = plight; - } - - void setClipMode(PCW pcw) - { - tileclip = (tileclip & ~0xF0000000) | (pcw.userClip << 28); - } - - void setClipTiles(u32 xmin, u32 ymin, u32 xmax, u32 ymax) - { - u32 t = tileclip & 0xF0000000; - t |= xmin & 0x3f; // 6 bits - t |= (xmax & 0x3f) << 6; // 6 bits - t |= (ymin & 0x1f) << 12; // 5 bits - t |= (ymax & 0x1f) << 17; // 5 bits - tileclip = t; - } - - void update() - { - updateMatrix(); - updateProjectionMatrix(); - updateGMP(); - updateLightModel(); - for (u32 i = 0; i < MAX_LIGHTS; i++) - updateLight(i); - } - - static u32 elanRamAddress(void *p) - { - if ((u8 *)p < RAM || (u8 *)p >= RAM + ELAN_RAM_SIZE) - return Null; - else - return (u32)((u8 *)p - RAM); - } - - void serialize(Serializer& ser) - { - ser << listType; - ser << gmp; - ser << instance; - ser << projMatrix; - ser << tileclip; - ser << lightModel; - ser << lights; - } - - void deserialize(Deserializer& deser) - { - if (deser.version() < Deserializer::V24) - { - reset(); - return; - } - deser >> listType; - deser >> gmp; - deser >> instance; - deser >> projMatrix; - deser >> tileclip; - deser >> lightModel; - deser >> lights; - update(); - } -}; - -static State state; - -static void setCoords(Vertex& vtx, float x, float y, float z) -{ - vtx.x = x; - vtx.y = y; - vtx.z = z; -} - -template -static void setUV(const Ts& vs, Vertex& vd) -{ - if (envMapping) - { - vd.u = state.envMapUOffset; - vd.v = state.envMapVOffset; - vd.u1 = state.envMapUOffset; - vd.v1 = state.envMapVOffset; - } - else - { - vd.u = vs.uv.u; - vd.v = vs.uv.v; - vd.u1 = vs.uv.u; - vd.v1 = vs.uv.v; - } -} - -static void SetEnvMapUV(Vertex& vtx) -{ - if (envMapping) - { - vtx.u = state.envMapUOffset; - vtx.v = state.envMapVOffset; - vtx.u1 = state.envMapUOffset; - vtx.v1 = state.envMapVOffset; - } -} - -template -glm::vec3 getNormal(const T& vtx) -{ - return { (int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f }; -} - -template<> -glm::vec3 getNormal(const N2_VERTEX_VNU& vtx) -{ - return { vtx.normal.nx, vtx.normal.ny, vtx.normal.nz }; -} - -template -void setNormal(Vertex& vd, const T& vs) -{ - glm::vec3 normal = getNormal(vs); - vd.nx = normal.x; - vd.ny = normal.y; - vd.nz = normal.z; -} - -static void addModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1) -{ - baseCol0 += gmpDiffuseColor0; - offsetCol0 += gmpSpecularColor0; - baseCol1 += gmpDiffuseColor1; - offsetCol1 += gmpSpecularColor1; -} - -template -static void convertVertex(const T& vs, Vertex& vd); - -template<> -void convertVertex(const N2_VERTEX& vs, Vertex& vd) -{ - setCoords(vd, vs.x, vs.y, vs.z); - setNormal(vd, vs); - SetEnvMapUV(vd); - glm::vec4 baseCol0(0); - glm::vec4 offsetCol0(0); - glm::vec4 baseCol1(0); - glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); - - *(u32 *)vd.col = packColor(baseCol0); - *(u32 *)vd.spc = packColor(offsetCol0); - *(u32 *)vd.col1 = packColor(baseCol1); - *(u32 *)vd.spc1 = packColor(offsetCol1); -} - -template<> -void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd) -{ - setCoords(vd, vs.x, vs.y, vs.z); - setNormal(vd, vs); - SetEnvMapUV(vd); - glm::vec4 baseCol0 = unpackColorRGBA(vs.rgb.argb0); - glm::vec4 offsetCol0(0); - glm::vec4 baseCol1 = unpackColorRGBA(vs.rgb.argb1); - glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); - *(u32 *)vd.col = packColor(baseCol0); - *(u32 *)vd.spc = packColor(offsetCol0); - *(u32 *)vd.col1 = packColor(baseCol1); - *(u32 *)vd.spc1 = packColor(offsetCol1); -} - -template<> -void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd) -{ - setCoords(vd, vs.x, vs.y, vs.z); - setNormal(vd, vs); - setUV(vs, vd); - glm::vec4 baseCol0(0); - glm::vec4 offsetCol0(0); - glm::vec4 baseCol1(0); - glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); - *(u32 *)vd.col = packColor(baseCol0); - *(u32 *)vd.spc = packColor(offsetCol0); - *(u32 *)vd.col1 = packColor(baseCol1); - *(u32 *)vd.spc1 = packColor(offsetCol1); -} - -template<> -void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd) -{ - setCoords(vd, vs.x, vs.y, vs.z); - setNormal(vd, vs); - setUV(vs, vd); - glm::vec4 baseCol0 = unpackColorRGBA(vs.rgb.argb0); - glm::vec4 offsetCol0(0); - glm::vec4 baseCol1 = unpackColorRGBA(vs.rgb.argb1); - glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); - *(u32 *)vd.col = packColor(baseCol0); - *(u32 *)vd.spc = packColor(offsetCol0); - *(u32 *)vd.col1 = packColor(baseCol1); - *(u32 *)vd.spc1 = packColor(offsetCol1); -} - -template<> -void convertVertex(const N2_VERTEX_VUB& vs, Vertex& vd) -{ - setCoords(vd, vs.x, vs.y, vs.z); - setNormal(vd, vs); - setUV(vs, vd); - glm::vec4 baseCol0(0); - glm::vec4 offsetCol0(0); - glm::vec4 baseCol1(0); - glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); - *(u32 *)vd.col = packColor(baseCol0); - *(u32 *)vd.col1 = packColor(baseCol1); - // Stuff the bump map normals and parameters in the specular colors - vd.spc[0] = vs.bump.tangent.x; - vd.spc[1] = vs.bump.tangent.y; - vd.spc[2] = vs.bump.tangent.z; - vd.spc1[0] = vs.bump.bitangent.x; - vd.spc1[1] = vs.bump.bitangent.y; - vd.spc1[2] = vs.bump.bitangent.z; - vd.spc[3] = vs.bump.scaleFactor.bumpDegree; // always 255? - vd.spc1[3] = vs.bump.scaleFactor.fixedOffset; // always 0? -// int nx = (int8_t)vs.header.nx; -// int ny = (int8_t)vs.header.ny; -// int nz = (int8_t)vs.header.nz; -// printf("BumpMap vtx deg %d off %d normal %d %d %d tangent %d %d %d bitangent %d %d %d dot %d %d %d\n", vs.bump.scaleFactor.bumpDegree, vs.bump.scaleFactor.fixedOffset, -// nx, ny, nz, -// vs.bump.tangent.x, vs.bump.tangent.y, vs.bump.tangent.z, vs.bump.bitangent.x, vs.bump.bitangent.y, vs.bump.bitangent.z, -// nx * vs.bump.tangent.x + ny * vs.bump.tangent.y + nz * vs.bump.tangent.z, -// nx * vs.bump.bitangent.x + ny * vs.bump.bitangent.y + nz * vs.bump.bitangent.z, -// vs.bump.tangent.x * vs.bump.bitangent.x + vs.bump.tangent.y * vs.bump.bitangent.y + vs.bump.tangent.z * vs.bump.bitangent.z -// ); -} - -template -static void boundingBox(const T* vertices, u32 count, glm::vec3& min, glm::vec3& max) -{ - min = { 1e38f, 1e38f, 1e38f }; - max = { -1e38f, -1e38f, -1e38f }; - for (u32 i = 0; i < count; i++) - { - glm::vec3 pos{ vertices[i].x, vertices[i].y, vertices[i].z }; - min = glm::min(min, pos); - max = glm::max(max, pos); - } - glm::vec4 center((min + max) / 2.f, 1); - glm::vec4 extents(max - glm::vec3(center), 0); - // transform - center = curMatrix * center; - glm::vec3 extentX = curMatrix * glm::vec4(extents.x, 0, 0, 0); - glm::vec3 extentY = curMatrix * glm::vec4(0, extents.y, 0, 0); - glm::vec3 extentZ = curMatrix * glm::vec4(0, 0, extents.z, 0); - // new AA extents - glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ); - - min = glm::vec3(center) - newExtent; - max = glm::vec3(center) + newExtent; -} - -template -static bool isBetweenNearAndFar(const T* vertices, u32 count, bool& needNearClipping) -{ - glm::vec3 min; - glm::vec3 max; - boundingBox(vertices, count, min, max); - if (min.z > -nearPlane || max.z < -farPlane) - return false; - - glm::vec4 pmin = projectionMatrix * glm::vec4(min, 1); - glm::vec4 pmax = projectionMatrix * glm::vec4(max, 1); - if (std::isnan(pmin.x) || std::isnan(pmin.y) || std::isnan(pmax.x) || std::isnan(pmax.y)) - return false; - - needNearClipping = max.z > -nearPlane; - - return true; -} - -class TriangleStripClipper -{ -public: - TriangleStripClipper(bool enabled) : enabled(enabled) {} - - void add(const Vertex& vtx) - { - if (enabled) - { - float z = vtx.x * curMatrix[0][2] + vtx.y * curMatrix[1][2] + vtx.z * curMatrix[2][2] + curMatrix[3][2]; - float dist = -z - nearPlane; - clip(vtx, dist); - count++; - } - else - { - ta_add_vertex(vtx); - } - } - -private: - void sendVertex(const Vertex& r) - { - if (dupeNext) - ta_add_vertex(r); - dupeNext = false; - ta_add_vertex(r); - } - - // Three-Dimensional Homogeneous Clipping of Triangle Strips - // Patrick-Gilles Maillot. Graphics Gems II - 1991 - void clip(const Vertex& r, float rDist) - { - clipCode >>= 1; - clipCode |= (int)(rDist < 0) << 2; - if (count == 1) - { - switch (clipCode >> 1) { - case 0: // Q and R inside - sendVertex(q); - sendVertex(r); - break; - case 1: // Q outside, R inside - sendVertex(interpolate(q, qDist, r, rDist)); - sendVertex(r); - break; - case 2: // Q inside, R outside - sendVertex(q); - sendVertex(interpolate(q, qDist, r, rDist)); - break; - case 3: // Q and R outside - break; - } - } - else if (count >= 2) - { - switch (clipCode) - { - case 0: // all inside - sendVertex(r); - break; - case 1: // P outside, Q and R inside - sendVertex(interpolate(r, rDist, p, pDist)); - sendVertex(q); - sendVertex(r); - break; - case 2: // P inside, Q outside and R inside - sendVertex(r); - sendVertex(interpolate(q, qDist, r, rDist)); - sendVertex(r); - break; - case 3: // P and Q outside, R inside - { - Vertex tmp = interpolate(r, rDist, p, pDist); - sendVertex(tmp); - sendVertex(tmp); - sendVertex(tmp); // One more to preserve strip swap order - sendVertex(interpolate(q, qDist, r, rDist)); - sendVertex(r); - } - break; - case 4: // P and Q inside, R outside - sendVertex(interpolate(r, rDist, p, pDist)); - sendVertex(q); - sendVertex(interpolate(q, qDist, r, rDist)); - break; - case 5: // P outside, Q inside, R outside - sendVertex(interpolate(q, qDist, r, rDist)); - break; - case 6: // P inside, Q and R outside - { - Vertex tmp = interpolate(r, rDist, p, pDist); - sendVertex(tmp); - sendVertex(tmp); - sendVertex(tmp); // One more to preserve strip swap order - } - break; - case 7: // P, Q and R outside - dupeNext = !dupeNext; - break; - } - } - p = q; - pDist = qDist; - q = r; - qDist = rDist; - } - - Vertex interpolate(const Vertex& v1, float f1, const Vertex& v2, float f2) - { - Vertex v; - float a2 = std::abs(f1) / (std::abs(f1) + std::abs(f2)); - float a1 = 1 - a2; - v.x = v1.x * a1 + v2.x * a2; - v.y = v1.y * a1 + v2.y * a2; - v.z = v1.z * a1 + v2.z * a2; - - v.u = v1.u * a1 + v2.u * a2; - v.v = v1.v * a1 + v2.v * a2; - v.u1 = v1.u1 * a1 + v2.u1 * a2; - v.v1 = v1.v1 * a1 + v2.v1 * a2; - - for (size_t i = 0; i < ARRAY_SIZE(v1.col); i++) - { - v.col[i] = (u8)std::round(v1.col[i] * a1 + v2.col[i] * a2); - v.spc[i] = (u8)std::round(v1.spc[i] * a1 + v2.spc[i] * a2); - v.col1[i] = (u8)std::round(v1.col1[i] * a1 + v2.col1[i] * a2); - v.spc1[i] = (u8)std::round(v1.spc1[i] * a1 + v2.spc1[i] * a2); - } - v.nx = v1.nx * a1 + v2.nx * a2; - v.ny = v1.ny * a1 + v2.ny * a2; - v.nz = v1.nz * a1 + v2.nz * a2; - - return v; - } - - bool enabled; - int count = 0; - int clipCode = 0; - Vertex p; - float pDist = 0; - Vertex q; - float qDist = 0; - bool dupeNext = false; -}; - -template -static void sendVertices(const ICHList *list, const T* vtx, bool needClipping) -{ - Vertex taVtx; - verify(list->vertexSize() > 0); - - Vertex fanCenterVtx{}; - Vertex fanLastVtx{}; - bool stripStart = true; - int outStripIndex = 0; - TriangleStripClipper clipper(needClipping); - - for (u32 i = 0; i < list->vtxCount; i++) - { - convertVertex(*vtx, taVtx); - - if (stripStart) - { - // Center vertex if triangle fan - //verify(vtx->header.isFirstOrSecond()); This fails for some strips: strip=1 fan=0 (soul surfer) - fanCenterVtx = taVtx; - if (outStripIndex > 0) - { - // use degenerate triangles to link strips - clipper.add(fanLastVtx); - clipper.add(taVtx); - outStripIndex += 2; - if (outStripIndex & 1) - { - clipper.add(taVtx); - outStripIndex++; - } - } - stripStart = false; - } - else if (vtx->header.isFan()) - { - // use degenerate triangles to link strips - clipper.add(fanLastVtx); - clipper.add(fanCenterVtx); - outStripIndex += 2; - if (outStripIndex & 1) - { - clipper.add(fanCenterVtx); - outStripIndex++; - } - // Triangle fan - clipper.add(fanCenterVtx); - clipper.add(fanLastVtx); - outStripIndex += 2; - } - clipper.add(taVtx); - outStripIndex++; - fanLastVtx = taVtx; - if (vtx->header.endOfStrip) - stripStart = true; - - vtx++; - } -} - -class ModifierVolumeClipper -{ -public: - ModifierVolumeClipper(bool enabled) : enabled(enabled) {} - - void add(ModTriangle& tri) - { - if (enabled) - { - glm::vec3 dist{ - tri.x0 * curMatrix[0][2] + tri.y0 * curMatrix[1][2] + tri.z0 * curMatrix[2][2] + curMatrix[3][2], - tri.x1 * curMatrix[0][2] + tri.y1 * curMatrix[1][2] + tri.z1 * curMatrix[2][2] + curMatrix[3][2], - tri.x2 * curMatrix[0][2] + tri.y2 * curMatrix[1][2] + tri.z2 * curMatrix[2][2] + curMatrix[3][2] - }; - dist = -dist - nearPlane; - ModTriangle newTri; - int n = sutherlandHodgmanClip(dist, tri, newTri); - switch (n) - { - case 0: - // fully clipped - break; - case 3: - ta_add_triangle(tri); - break; - case 4: - ta_add_triangle(tri); - ta_add_triangle(newTri); - break; - } - } - else - { - ta_add_triangle(tri); - } - } - -private: - // - // Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire - // Journal of Graphics GPU and Game Tools - November 2011 - // - glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist) - { - return (A * std::abs(Bdist) + B * std::abs(Adist)) / (std::abs(Adist) + std::abs(Bdist)); - } - - // Clip the triangle 'trig' with respect to the provided distances to the clipping plane. - int sutherlandHodgmanClip(glm::vec3& dist, ModTriangle& trig, ModTriangle& newTrig) - { - constexpr float clipEpsilon = 0.f; //0.00001; - constexpr float clipEpsilon2 = 0.f; //0.01; - - if (!glm::any(glm::greaterThanEqual(dist , glm::vec3(clipEpsilon2)))) - // all clipped - return 0; - if (glm::all(glm::greaterThanEqual(dist , glm::vec3(-clipEpsilon)))) - // none clipped - return 3; - - // There are either 1 or 2 vertices above the clipping plane. - glm::bvec3 above = glm::greaterThanEqual(dist, glm::vec3(0.f)); - bool nextIsAbove; - glm::vec3 v0(trig.x0, trig.y0, trig.z0); - glm::vec3 v1(trig.x1, trig.y1, trig.z1); - glm::vec3 v2(trig.x2, trig.y2, trig.z2); - glm::vec3 v3; - // Find the CCW-most vertex above the plane. - if (above[1] && !above[0]) - { - // Cycle once CCW. Use v3 as a temp - nextIsAbove = above[2]; - v3 = v0; - v0 = v1; - v1 = v2; - v2 = v3; - dist = glm::vec3(dist.y, dist.z, dist.x); - } - else if (above[2] && !above[1]) - { - // Cycle once CW. Use v3 as a temp. - nextIsAbove = above[0]; - v3 = v2; - v2 = v1; - v1 = v0; - v0 = v3; - dist = glm::vec3(dist.z, dist.x, dist.y); - } - else - nextIsAbove = above[1]; - trig.x0 = v0.x; - trig.y0 = v0.y; - trig.z0 = v0.z; - // We always need to clip v2-v0. - v3 = intersect(v0, dist[0], v2, dist[2]); - if (nextIsAbove) - { - v2 = intersect(v1, dist[1], v2, dist[2]); - trig.x1 = v1.x; - trig.y1 = v1.y; - trig.z1 = v1.z; - trig.x2 = v2.x; - trig.y2 = v2.y; - trig.z2 = v2.z; - newTrig.x0 = v0.x; - newTrig.y0 = v0.y; - newTrig.z0 = v0.z; - newTrig.x1 = v2.x; - newTrig.y1 = v2.y; - newTrig.z1 = v2.z; - newTrig.x2 = v3.x; - newTrig.y2 = v3.y; - newTrig.z2 = v3.z; - - return 4; - } - else - { - v1 = intersect(v0, dist[0], v1, dist[1]); - trig.x1 = v1.x; - trig.y1 = v1.y; - trig.z1 = v1.z; - trig.x2 = v3.x; - trig.y2 = v3.y; - trig.z2 = v3.z; - - return 3; - } - } - - bool enabled; -}; - -template -static void sendMVPolygon(ICHList *list, const T *vtx, bool needClipping) -{ - ModifierVolumeParam mvp{}; - mvp.isp.full = list->isp.full; - if (!openModifierVolume) - mvp.isp.CullMode = 0; - mvp.isp.VolumeLast = list->pcw.volume; - mvp.isp.DepthMode &= 3; - mvp.mvMatrix = taMVMatrix; - mvp.projMatrix = taProjMatrix; - ta_add_poly(state.listType, mvp); - - ModifierVolumeClipper clipper(needClipping); - glm::vec3 vtx0{}; - glm::vec3 vtx1{}; - u32 stripStart = 0; - - for (u32 i = 0; i < list->vtxCount; i++) - { - glm::vec3 v(vtx->x, vtx->y, vtx->z); - u32 triIdx = i - stripStart; - if (triIdx >= 2) - { - ModTriangle tri; - - if (triIdx & 1) - { - tri.x1 = vtx0.x; - tri.y1 = vtx0.y; - tri.z1 = vtx0.z; - - tri.x0 = vtx1.x; - tri.y0 = vtx1.y; - tri.z0 = vtx1.z; - } - else - { - tri.x0 = vtx0.x; - tri.y0 = vtx0.y; - tri.z0 = vtx0.z; - - tri.x1 = vtx1.x; - tri.y1 = vtx1.y; - tri.z1 = vtx1.z; - } - tri.x2 = v.x; - tri.y2 = v.y; - tri.z2 = v.z; - - clipper.add(tri); - } - if (vtx->header.endOfStrip) - stripStart = i + 1; - vtx0 = vtx1; - vtx1 = v; - vtx++; - } -} - -static N2LightModel *taLightModel; - -static void sendLights() -{ - if (!state.lightModelUpdated) - return; - - state.lightModelUpdated = false; - N2LightModel model; - model.lightCount = 0; - if (curLightModel == nullptr) - { - model.useBaseOver = false; - for (int i = 0; i < 2; i++) - { - model.ambientMaterialBase[i] = false; - model.ambientMaterialOffset[i] = false; - model.ambientBase[i][0] = model.ambientBase[i][1] = model.ambientBase[i][2] = model.ambientBase[i][3] = 1.f; - } - memset(model.ambientOffset, 0, sizeof(model.ambientOffset)); - taLightModel = nullptr; - return; - } - model.ambientMaterialBase[0] = curLightModel->useAmbientBase0; - model.ambientMaterialBase[1] = curLightModel->useAmbientBase1; - model.ambientMaterialOffset[0] = curLightModel->useAmbientOffset0; - model.ambientMaterialOffset[1] = curLightModel->useAmbientOffset1; - model.useBaseOver = curLightModel->useBaseOver; - model.bumpId1 = -1; - model.bumpId2 = -1; - memcpy(model.ambientBase[0], glm::value_ptr(unpackColorBGRA(curLightModel->ambientBase0)), sizeof(model.ambientBase[0])); - memcpy(model.ambientBase[1], glm::value_ptr(unpackColorBGRA(curLightModel->ambientBase1)), sizeof(model.ambientBase[1])); - memcpy(model.ambientOffset[0], glm::value_ptr(unpackColorBGRA(curLightModel->ambientOffset0)), sizeof(model.ambientOffset[0])); - memcpy(model.ambientOffset[1], glm::value_ptr(unpackColorBGRA(curLightModel->ambientOffset1)), sizeof(model.ambientOffset[1])); - for (u32 i = 0; i < MAX_LIGHTS; i++) - { - N2Light& light = model.lights[model.lightCount]; - for (int vol = 0; vol < 2; vol++) - { - light.diffuse[vol] = curLightModel->isDiffuse(i, vol); - light.specular[vol] = curLightModel->isSpecular(i, vol); - } - if (!light.diffuse[0] && !light.specular[0] - && !light.diffuse[1] && !light.specular[1]) - continue; - if (curLights[i] == nullptr) - { - INFO_LOG(PVR, "Light %d is referenced but undefined", i); - continue; - } - if (i == curLightModel->bumpId1) - model.bumpId1 = model.lightCount; - if (i == curLightModel->bumpId2) - model.bumpId2 = model.lightCount; - light.parallel = curLights[i]->pcw.parallelLight; - if (light.parallel) - { - ParallelLight *plight = (ParallelLight *)curLights[i]; - memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); - light.routing = plight->routing; - light.dmode = plight->dmode; - light.smode = N2_LMETHOD_SINGLE_SIDED; - memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)), - sizeof(light.direction)); - } - else - { - PointLight *plight = (PointLight *)curLights[i]; - memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); - light.routing = plight->routing; - light.dmode = plight->dmode; - light.smode = plight->smode; - if (plight->posX == 0 && plight->posY == 0 && plight->posZ == 0 - && plight->_distA == 0 && plight->_distB == 0 - && plight->_angleA == 0 && plight->_angleB == 0) - { - // Lights not using distance or angle attenuation are converted into parallel lights on the CPU side? - DEBUG_LOG(PVR, "Point -> parallel light[%d] dir %d %d %d", i, -(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ); - light.parallel = true; - memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)), - sizeof(light.direction)); - } - else - { - memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)), - sizeof(light.direction)); - memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position)); - light.distAttnMode = plight->dattenmode; - light.attnDistA = plight->distA(); - light.attnDistB = plight->distB(); - light.attnAngleA = plight->angleA(); - light.attnAngleB = plight->angleB(); - } - } - model.lightCount++; - } - taLightModel = ta_add_light(model); -} - -static void setStateParams(PolyParam& pp, const ICHList *list) -{ - sendLights(); - pp.tileclip = state.tileclip; - pp.mvMatrix = taMVMatrix; - pp.normalMatrix = taNormalMatrix; - pp.projMatrix = taProjMatrix; - pp.lightModel = taLightModel; - pp.envMapping[0] = false; - pp.envMapping[1] = false; - if (curGmp != nullptr) - { - pp.glossCoef[0] = curGmp->gloss.getCoef0(); - pp.glossCoef[1] = curGmp->gloss.getCoef1(); - pp.constantColor[0] = curGmp->paramSelect.b0; - pp.diffuseColor[0] = curGmp->paramSelect.d0; - pp.specularColor[0] = curGmp->paramSelect.s0; - pp.constantColor[1] = curGmp->paramSelect.b1; - pp.diffuseColor[1] = curGmp->paramSelect.d1; - pp.specularColor[1] = curGmp->paramSelect.s1; - - // Environment mapping - if (curGmp->paramSelect.e0) - { - pp.pcw.Texture = 1; - pp.pcw.Offset = 0; - pp.tsp.UseAlpha = 1; - pp.tsp.IgnoreTexA = 0; - pp.envMapping[0] = true; - pp.tcw = list->tcw0; - envMapping = true; - } - if (curGmp->paramSelect.e1) - { - pp.pcw.Texture = 1; - pp.pcw.Offset = 0; - pp.tsp1.UseAlpha = 1; - pp.tsp1.IgnoreTexA = 0; - pp.envMapping[1] = true; - pp.tcw1 = list->tcw1; - envMapping = true; - } - } - pp.tsp.full ^= modelTSP.full; - pp.tsp1.full ^= modelTSP.full; - - // projFlip is for left-handed projection matrices (initd rear view mirror) - bool projFlip = taProjMatrix != nullptr && std::signbit(taProjMatrix[0]) == std::signbit(taProjMatrix[5]); - pp.isp.CullMode ^= (u32)cullingReversed ^ (u32)projFlip; - pp.pcw.Shadow ^= shadowedVolume; - if (pp.pcw.Shadow == 0 || pp.pcw.Volume == 0) - { - pp.tsp1.full = -1; - pp.tcw1.full = -1; - pp.glossCoef[1] = 0; - pp.constantColor[1] = false; - pp.diffuseColor[1] = false; - pp.specularColor[1] = false; - } -// else if (pp.pcw.Volume == 1) -// printf("2-Volume poly listType %d vtxtype %x gmp params %x diff tcw %08x tsp %08x\n", state.listType, list->flags, curGmp->paramSelect.full, -// pp.tcw.full ^ pp.tcw1.full, pp.tsp.full ^ pp.tsp1.full); -} - -static void sendPolygon(ICHList *list) -{ - bool needClipping; - - switch (list->flags) - { - case ICHList::VTX_TYPE_V: - { - N2_VERTEX *vtx = (N2_VERTEX *)((u8 *)list + sizeof(ICHList)); - if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) - break; - if (state.listType & 1) - sendMVPolygon(list, vtx, needClipping); - else - { - PolyParam pp{}; - pp.pcw.Shadow = list->pcw.shadow; - pp.pcw.Texture = list->pcw.texture; - pp.pcw.Offset = list->pcw.offset; - pp.pcw.Gouraud = list->pcw.gouraud; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.tsp = list->tsp0; - pp.tsp1 = list->tsp1; - setStateParams(pp, list); - ta_add_poly(state.listType, pp); - - sendVertices(list, vtx, needClipping); - } - } - break; - - case ICHList::VTX_TYPE_VU: - { - N2_VERTEX_VU *vtx = (N2_VERTEX_VU *)((u8 *)list + sizeof(ICHList)); - if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) - break; - if (state.listType & 1) - sendMVPolygon(list, vtx, needClipping); - else - { - PolyParam pp{}; - pp.pcw.Shadow = list->pcw.shadow; - pp.pcw.Texture = list->pcw.texture; - pp.pcw.Offset = list->pcw.offset; - pp.pcw.Gouraud = list->pcw.gouraud; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.tsp = list->tsp0; - pp.tcw = list->tcw0; - pp.tsp1 = list->tsp1; - pp.tcw1 = list->tcw1; - setStateParams(pp, list); - ta_add_poly(state.listType, pp); - - sendVertices(list, vtx, needClipping); - } - } - break; - - case ICHList::VTX_TYPE_VUR: - { - verify(curGmp == nullptr || curGmp->paramSelect.e0 == 0); - N2_VERTEX_VUR *vtx = (N2_VERTEX_VUR *)((u8 *)list + sizeof(ICHList)); - if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) - break; - PolyParam pp{}; - pp.pcw.Shadow = list->pcw.shadow; - pp.pcw.Texture = list->pcw.texture; - pp.pcw.Offset = list->pcw.offset; - pp.pcw.Gouraud = list->pcw.gouraud; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.tsp = list->tsp0; - pp.tcw = list->tcw0; - pp.tsp1 = list->tsp1; - pp.tcw1 = list->tcw1; - setStateParams(pp, list); - ta_add_poly(state.listType, pp); - - sendVertices(list, vtx, needClipping); - } - break; - - case ICHList::VTX_TYPE_VR: - { - N2_VERTEX_VR *vtx = (N2_VERTEX_VR *)((u8 *)list + sizeof(ICHList)); - if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) - break; - PolyParam pp{}; - pp.pcw.Shadow = list->pcw.shadow; - pp.pcw.Texture = list->pcw.texture; - pp.pcw.Offset = list->pcw.offset; - pp.pcw.Gouraud = list->pcw.gouraud; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.tsp = list->tsp0; - pp.tsp1 = list->tsp1; - setStateParams(pp, list); - ta_add_poly(state.listType, pp); - - sendVertices(list, vtx, needClipping); - } - break; - - case ICHList::VTX_TYPE_VUB: - { - // TODO - //printf("BUMP MAP fmt %d filter %d src select %d dst %d\n", list->tcw0.PixelFmt, list->tsp0.FilterMode, list->tsp0.SrcSelect, list->tsp0.DstSelect); - N2_VERTEX_VUB *vtx = (N2_VERTEX_VUB *)((u8 *)list + sizeof(ICHList)); - if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) - break; - PolyParam pp{}; - pp.pcw.Shadow = list->pcw.shadow; - pp.pcw.Texture = 1; - pp.pcw.Offset = 1; - pp.pcw.Gouraud = list->pcw.gouraud; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.tsp = list->tsp0; - pp.tcw = list->tcw0; - pp.tsp1 = list->tsp1; - pp.tcw1 = list->tcw1; - setStateParams(pp, list); - ta_add_poly(state.listType, pp); - - sendVertices(list, vtx, needClipping); - } - break; - - default: - WARN_LOG(PVR, "Unhandled poly format %x", list->flags); - die("Unsupported"); - break; - } - envMapping = false; -} - -template -static void executeCommand(u8 *data, int size) -{ -// verify(size >= 0); -// verify(size < (int)ELAN_RAM_SIZE); -// if (0x2b00 == (u32)(data - RAM)) -// for (int i = 0; i < size; i += 4) -// DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); - - while (size >= 32) - { - const int oldSize = size; - ElanBase *cmd = (ElanBase *)data; - if (cmd->pcw.naomi2) - { - switch(cmd->pcw.n2Command) - { - case PCW::null: - size -= 32; - break; - - case PCW::projMatrix: - if (Active) - state.setProjectionMatrix(data); - size -= sizeof(ProjMatrix); - break; - - case PCW::matrixOrLight: - { - InstanceMatrix *instance = (InstanceMatrix *)data; - if (instance->isInstanceMatrix()) - { - //DEBUG_LOG(PVR, "Model instance"); - if (Active) - state.setMatrix(instance); - size -= sizeof(InstanceMatrix); - break; - } - if (Active) - { - if (instance->id1 & 0x10) - { - state.setLightModel(data); - } - else //if ((instance->id2 & 0x40000000) || (instance->id1 & 0xffffff00)) // FIXME what are these lights without id2|0x40000000? vf4 - { - if (instance->pcw.parallelLight) - { - ParallelLight *light = (ParallelLight *)data; - state.setLight(light->lightId, data); - } - else - { - PointLight *light = (PointLight *)data; - state.setLight(light->lightId, data); - } - } - //else - //{ - // WARN_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2); - // for (int i = 0; i < 32; i += 4) - // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); - //} - } - size -= sizeof(LightModel); - } - break; - - case PCW::model: - { - Model *model = (Model *)data; - if (Active) - { - cullingReversed = model->param.cwCulling == 0; - state.setClipMode(model->pcw); - openModifierVolume = model->param.openVolume; - shadowedVolume = model->pcw.shadow; - modelTSP = model->tsp; - DEBUG_LOG(PVR, "Model offset %x size %x pcw %08x tsp %08x", model->offset, model->size, model->pcw.full, model->tsp.full); - } - executeCommand(&RAM[model->offset & 0x1ffffff8], model->size); - cullingReversed = false; - openModifierVolume = false; - shadowedVolume = false; - modelTSP.full = 0; - size -= sizeof(Model); - } - break; - - case PCW::registerWait: - { - RegisterWait *wait = (RegisterWait *)data; - if (wait->offset != (u32)-1 && wait->mask != 0) - { - DEBUG_LOG(PVR, "Register wait %x mask %x", wait->offset, wait->mask); - // wait for interrupt - HollyInterruptID inter; - switch (wait->mask) - { - case 0x80: - inter = holly_OPAQUE; - break; - case 0x100: - inter = holly_OPAQUEMOD; - break; - case 0x200: - inter = holly_TRANS; - break; - case 0x400: - inter = holly_TRANSMOD; - break; - case 0x200000: - inter = holly_PUNCHTHRU; - break; - default: - WARN_LOG(PVR, "Unknown interrupt mask %x", wait->mask); - die("unexpected"); - inter = holly_OPAQUE; - break; - } - asic_RaiseInterruptBothCLX(inter); - TA_ITP_CURRENT += 32; - if (Active) - state.reset(); - } - size -= sizeof(RegisterWait); - } - break; - - case PCW::link: - { - Link *link = (Link *)data; - if (link->offset & 0x80000000) - { - // elan v10 only - DEBUG_LOG(PVR, "Texture DMA from %x to %x (%x)", DMAC_SAR(2), link->_res & 0x1ffffff8, link->size); - memcpy(&vram[link->_res & VRAM_MASK], &mem_b[DMAC_SAR(2) & RAM_MASK], link->size); - reg74 |= 1; - } - else if (link->offset & 0x20000000) - { - // elan v10 only - DEBUG_LOG(PVR, "Texture DMA from eram %x -> %x (%x)", link->offset & 0x01fffff8, link->_res & VRAM_MASK, link->size); - memcpy(&vram[link->_res & VRAM_MASK], &RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size); - reg74 |= 1; - } - else - { - DEBUG_LOG(PVR, "Link to %x (%x)", link->offset & 0x1ffffff8, link->size); - executeCommand(&RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size); - } - size -= sizeof(Link); - } - break; - - case PCW::gmp: - if (Active) - state.setGMP(data); - size -= sizeof(GMP); - break; - - case PCW::ich: - { - ICHList *ich = (ICHList *)data; - if (Active) - { - DEBUG_LOG(PVR, "ICH flags %x, %d verts", ich->flags, ich->vtxCount); - sendPolygon(ich); - } - size -= sizeof(ICHList) + ich->vertexSize() * ich->vtxCount; - } - break; - - default: - DEBUG_LOG(PVR, "Unhandled Elan command %x", cmd->pcw.n2Command); - size -= 32; - break; - } - } - else - { - u32 pcw = *(u32 *)data; - if ((pcw & 0xd0ffff00) == 0x808c0000) // display list - { - if (Active) - { - DEBUG_LOG(PVR, "Display list type %d", (pcw >> 24) & 0xf); - state.reset(); - state.listType = (pcw >> 24) & 0xf; - // TODO is this the right place for this? - SQBuffer eol{}; - ta_vtx_data32(&eol); - } - size -= 24 * 4; - } - else if ((pcw & 0xd0fcff00) == 0x80800000) // User clipping - { - if (Active) - { - state.setClipMode((PCW&)pcw); - DEBUG_LOG(PVR, "User clip type %d", ((PCW&)pcw).userClip); - } - size -= 0xE0; - } - else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked? - { - if (Active) - { - DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw); - state.listType = (pcw >> 24) & 0xf; - size -= 32; - ta_add_ta_data(state.listType, (u32 *)(data + 32), size - 32); - } - size = 32; - } - else if (pcw == 0x20000000) - { - // User clipping - if (Active) - { - u32 *tiles = (u32 *)data + 4; - DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", tiles[0] * 32, tiles[1] * 32, - tiles[2] * 32, tiles[3] * 32); - state.setClipTiles(tiles[0], tiles[1], tiles[2], tiles[3]); - } - size -= 32; - } - else - { - if (Active) - { - if (pcw != 0) - INFO_LOG(PVR, "Unhandled command %x", pcw); - for (int i = 0; i < 32; i += 4) - DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); - } - size -= 32; - } - } - data += oldSize - size; - } -} - -template -void DYNACALL write_elancmd(u32 addr, T data) -{ -// DEBUG_LOG(PVR, "ELAN cmd %08x = %x", addr, data); - addr &= 0x1f; - *(T *)&((u8 *)elanCmd)[addr] = data; - - if (addr == 0x1c) - { - if (!ggpo::rollbacking()) - executeCommand((u8 *)elanCmd, sizeof(elanCmd)); - else - executeCommand((u8 *)elanCmd, sizeof(elanCmd)); - if (!(reg74 & 1)) - reg74 |= 2; - reg74 &= ~0x3c; - } -} - -template -T DYNACALL read_elanram(u32 addr) -{ - return *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)]; -} - -template -void DYNACALL write_elanram(u32 addr, T data) -{ - *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)] = data; -} - -void init() -{ -} - -void reset(bool hard) -{ - if (hard) - { - memset(RAM, 0, ELAN_RAM_SIZE); - state.reset(); - } -} - -void term() -{ -} - -void vmem_init() -{ - elanRegHandler = _vmem_register_handler_Template(read_elanreg, write_elanreg); - elanCmdHandler = _vmem_register_handler_Template(read_elancmd, write_elancmd); - elanRamHandler = _vmem_register_handler_Template(read_elanram, write_elanram); -} - -void vmem_map(u32 base) -{ - _vmem_map_handler(elanRegHandler, base | 8, base | 8); - _vmem_map_handler(elanCmdHandler, base | 9, base | 9); - _vmem_map_handler(elanRamHandler, base | 0xA, base | 0xB); - _vmem_map_block(RAM, base | 0xA, base | 0xB, ELAN_RAM_SIZE - 1); -} - -void serialize(Serializer& ser) -{ - if (!settings.platform.isNaomi2()) - return; - ser << reg10; - ser << reg74; - ser << elanCmd; - if (!ser.rollback()) - ser.serialize(RAM, ELAN_RAM_SIZE); - state.serialize(ser); -} - -void deserialize(Deserializer& deser) -{ - if (!settings.platform.isNaomi2()) - return; - deser >> reg10; - deser >> reg74; - deser >> elanCmd; - if (!deser.rollback()) - deser.deserialize(RAM, ELAN_RAM_SIZE); - state.deserialize(deser); -} - -} +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +/* + * VideoLogic custom transformation & lighting (T&L) chip (codenamed: ELAN) + * 32 MB RAM + * Clock: 100 MHz + * 16 light sources per polygon + * ambient, parallel, point or spot (Fog lights and alpha lights also exist) + * Perspective conversion + * Near, far and side clipping, offscreen and backface culling + * bump mapping, environmental mapping + * dynamic & static model processing + * model cache system + * + * Each PVR2 chip renders half the screen (rectangular, stripes, and checker board options) + * so textures have to be duplicated in each vram + * + * Area 0: + * 005f6800 - 005f7cff asic A regs + * 005f8000 - 005f9fff CLXA regs + * 025f6800 - 025f7cff asic B regs + * 025f8000 - 025f9fff CLXB regs + * + * Area 1: + * 05000000 - 06ffffff CLXA vram + * 07000000 - 08ffffff CLXB vram + * + * Area 2: + * 085f6800 - 085f7cff write both asic regs + * 085f8000 - 085f9fff write both PVR regs + * 08800000 - 088000ff? elan regs + * 09000000 - ? elan command buffer + * 0A000000 - 0bfffffff elan RAM + */ +#include "elan.h" +#include "hw/mem/_vmem.h" +#include "pvr_mem.h" +#include "ta.h" +#include "ta_ctx.h" +#include "hw/holly/holly_intc.h" +#include "hw/holly/sb.h" +#include "hw/pvr/Renderer_if.h" +#include "hw/sh4/sh4_sched.h" +#include "hw/sh4/sh4_mem.h" +#include "emulator.h" +#include "serialize.h" +#include "elan_struct.h" +#include "network/ggpo.h" +#include +#include +#include +#include + +namespace elan { + +static _vmem_handler elanRegHandler; +static _vmem_handler elanCmdHandler; +static _vmem_handler elanRamHandler; + +u8 *RAM; + +static u32 reg10; +static u32 reg74; +static u32 reg30 = 0x31; + +static u32 elanCmd[32 / 4]; + +template +T DYNACALL read_elanreg(u32 paddr) +{ + //verify(sizeof(T) == 4); + u32 addr = paddr & 0x01ffffff; + switch (addr >> 16) + { + case 0x5F: + if (addr >= 0x005F6800 && addr <= 0x005F7CFF) + { + // 5F6908: Tests for errors 4, 8, 10, 2 and 1 (render isp buf ovf, render hazard, ISP param ovf, ob list ptr ovf, ta ill param) + // 5f6900: then int 4 and 40 (EoR TSP, EoT YUV) + return (T)sb_ReadMem(paddr, sizeof(T)); + } + else if (addr >= 0x005F8000 && addr <= 0x005F9FFF) + { + if (sizeof(T) != 4) + // House of the Dead 2 + return 0; + return (T)pvr_ReadReg(paddr); + } + else + { + INFO_LOG(MEMORY, "Read from area2 not implemented [Unassigned], addr=%x", addr); + return 0; + } + + default: +// if ((addr & 0xFF) != 0x74) + DEBUG_LOG(PVR, "ELAN read(%d) %08x [pc %08x]", (u32)sizeof(T), addr, p_sh4rcb->cntx.pc); + switch (addr & 0xFF) + { + case 0: // magic number + return (T)0xe1ad0000; + case 4: // revision + return 0x10; // 1 or x10 + case 0xc: + // command queue size + // loops until < 2 (v1) or 3 (v10) + return 1; + case 0x10: // sh4 if control? + // b0 broadcast on cs1 + // b1 elan channel 2 + // b2 enable pvr #2 + // rewritten by bios as reg10 & ~1 + return reg10; + case 0x14: // SDRAM refresh (never read?) + return (T)0x2029; //default 0x1429 + case 0x1c: // SDRAM CFG + return (T)0x87320961; + case 0x30: // Macro tiler config + // 0 0 l l l l l l t t t t 0 0 r r r r r r b b b b 0 0 V H 0 0 0 T + // lllll: left tile + // tttt: top tile + // rrrrrr: right tile + // bbbb: bottom tile + // V: tile vertically + // H: tile horizontally + // T: tiler enabled + return reg30; + case 0x74: + // b0 dma completed + // b1 cmd completed + // b2-b3 geometry timeouts + // b4-b6 errors? + return reg74; + case 0x78: // IRQ MASK + // 6 bits? + return 0; + default: + return (T)0; + } + } +} + +template +void DYNACALL write_elanreg(u32 paddr, T data) +{ + //verify(sizeof(T) == 4); + u32 addr = paddr & 0x01ffffff; + switch (addr >> 16) + { + case 0x5F: + if (addr>= 0x005F6800 && addr <= 0x005F7CFF) + sb_WriteMem(paddr, data, sizeof(T)); + else if (addr >= 0x005F8000 && addr <= 0x005F9FFF) + { + if (addr == 0x5F8040 && data == 0xFF00FF) + { + ERROR_LOG(PVR, "ELAN SCREWED pr %x pc %x", p_sh4rcb->cntx.pr, p_sh4rcb->cntx.pc); + throw FlycastException("Boot aborted"); + } + else if ((addr & 0x1fff) == SOFTRESET_addr && data == 0) + reg74 &= 3; + else if ((addr & 0x1fff) == STARTRENDER_addr) + reg74 &= 3; + + //if ((paddr & 0x1c000000) == 0x08000000 && (addr & 0x1fff) == SOFTRESET_addr && data == 0) + // reg74 |= 2; + pvr_WriteReg(paddr, data); + } + else + INFO_LOG(COMMON, "Write to area2 not implemented [Unassigned], addr=%x,data=%x,size=%d", addr, data, (u32)sizeof(T)); + break; + default: +// if ((addr & 0xFF) != 0x74) + DEBUG_LOG(PVR, "ELAN write(%d) %08x = %x", (u32)sizeof(T), addr, data); + switch (addr & 0xFF) + { + case 0x0: + // 0 multiple times (_kmtlifAbortDisplayListProcessing) + break; + // 0x4: _kmtlifAbortDisplayListProcessing: 0 + case 0x8: // write-only. reset ? + // 1 then 0 + // bios: 5 + // _kmtlifAbortDisplayListProcessing: 5 then 0 + // _kmtlifHandleDMATimeout: 1, 0, 4, 0... + if (data == 0) + reg74 = 0; + break; + case 0xc: + // 0 + break; + case 0x10: // sh4 if control? + reg10 = data; + break;; + case 0x14: // SDRAM refresh + // x2029 + break; + case 0x1c: // SDRAM CFG + break; + case 0x30: + reg30 = data; + break; + case 0x74: // IRQ STAT + reg74 &= ~data; + break; + // _kmtlifSetupElanInts: + // 78 = 3f + // 7C = 0 + // 80 = 17 + // 84 = 2b + // 88 = 0 + case 0xd0: // _kmtlifSetCullingRegister + // 6 + break;; + default: + break; + } + } +} + +template +T DYNACALL read_elancmd(u32 addr) +{ + DEBUG_LOG(PVR, "ELAN cmd READ! (%d) %08x", (u32)sizeof(T), addr); + return 0; +} + +static glm::vec4 unpackColor(u32 color) +{ + return glm::vec4((float)((color >> 16) & 0xff) / 255.f, + (float)((color >> 8) & 0xff) / 255.f, + (float)(color & 0xff) / 255.f, + (float)(color >> 24) / 255.f); +} + +static glm::vec4 unpackColor(u8 red, u8 green, u8 blue, u8 alpha = 0) +{ + return glm::vec4((float)red / 255.f, (float)green / 255.f, (float)blue / 255.f, (float)alpha / 255.f); +} + +static u32 packColorBGRA(const glm::vec4& color) +{ + return (int)(std::min(1.f, color.a) * 255.f) << 24 + | (int)(std::min(1.f, color.r) * 255.f) << 16 + | (int)(std::min(1.f, color.g) * 255.f) << 8 + | (int)(std::min(1.f, color.b) * 255.f); +} + +static u32 packColorRGBA(const glm::vec4& color) +{ + return (int)(std::min(1.f, color.r) * 255.f) + | (int)(std::min(1.f, color.g) * 255.f) << 8 + | (int)(std::min(1.f, color.b) * 255.f) << 16 + | (int)(std::min(1.f, color.a) * 255.f) << 24; +} + +u32 (*packColor)(const glm::vec4& color) = packColorRGBA; + +static GMP *curGmp; +static glm::mat4x4 curMatrix; +static float *taMVMatrix; +static float *taNormalMatrix; +static glm::mat4 projectionMatrix; +static float *taProjMatrix; +static LightModel *curLightModel; +static ElanBase *curLights[MAX_LIGHTS]; +static float nearPlane = 0.001f; +static float farPlane = 100000.f; +static bool envMapping; +static bool cullingReversed; +static bool openModifierVolume; +static bool shadowedVolume; +static TSP modelTSP; +static glm::vec4 gmpDiffuseColor0; +static glm::vec4 gmpSpecularColor0; +static glm::vec4 gmpDiffuseColor1; +static glm::vec4 gmpSpecularColor1; + +struct State +{ + static constexpr u32 Null = 0xffffffff; + + int listType = -1; + u32 gmp = Null; + u32 instance = Null; + u32 projMatrix = Null; + u32 tileclip = 0; + u32 lightModel = Null; + u32 lights[MAX_LIGHTS] = { + Null, Null, Null, Null, Null, Null, Null, Null, + Null, Null, Null, Null, Null, Null, Null, Null + }; + bool lightModelUpdated = false; + float envMapUOffset = 0.f; + float envMapVOffset = 0.f; + + void reset() + { + listType = -1; + gmp = Null; + instance = Null; + projMatrix = Null; + tileclip = 0; + lightModel = Null; + for (auto& light : lights) + light = Null; + update(); + if (isDirectX(config::RendererType)) + packColor = packColorBGRA; + else + packColor = packColorRGBA; + } + void setMatrix(InstanceMatrix *pinstance) + { + instance = elanRamAddress(pinstance); + updateMatrix(); + } + + void updateMatrix() + { + if (instance == Null) + { + taMVMatrix = nullptr; + taNormalMatrix = nullptr; + envMapUOffset = 0.f; + envMapVOffset = 0.f; + return; + } + InstanceMatrix *mat = (InstanceMatrix *)&RAM[instance]; + DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f\n %f %f %f", + -mat->tm00, -mat->tm10, -mat->tm20, -mat->tm30, + mat->tm01, mat->tm11, mat->tm21, mat->tm31, + -mat->tm02, -mat->tm12, -mat->tm22, -mat->tm32, + mat->lm00, mat->lm10, mat->lm20, + mat->lm01, mat->lm11, mat->lm21, + mat->lm02, mat->lm12, mat->lm22); + + curMatrix = glm::mat4x4{ + -mat->tm00, mat->tm01, -mat->tm02, 0.f, + -mat->tm10, mat->tm11, -mat->tm12, 0.f, + -mat->tm20, mat->tm21, -mat->tm22, 0.f, + -mat->tm30, mat->tm31, -mat->tm32, 1.f + }; + glm::mat4x4 normalMatrix = glm::mat4x4{ + mat->lm00, mat->lm01, mat->lm02, 0.f, + mat->lm10, mat->lm11, mat->lm12, 0.f, + mat->lm20, mat->lm21, mat->lm22, 0.f, + -mat->tm30, mat->tm31, -mat->tm32, 1.f + }; + nearPlane = mat->_near; + farPlane = mat->_far; + envMapUOffset = mat->envMapU; + envMapVOffset = mat->envMapV; + taMVMatrix = ta_add_matrix(glm::value_ptr(curMatrix)); + if (normalMatrix != curMatrix) + taNormalMatrix = ta_add_matrix(glm::value_ptr(normalMatrix)); + else + taNormalMatrix = taMVMatrix; + } + + void setProjectionMatrix(void *p) + { + projMatrix = elanRamAddress(p); + updateProjectionMatrix(); + } + + void updateProjectionMatrix() + { + if (projMatrix == Null) + { + taProjMatrix = nullptr; + return; + } + ProjMatrix *pm = (ProjMatrix *)&RAM[projMatrix]; + DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f near %f far %f", pm->fx, pm->tx, pm->fy, pm->ty, nearPlane, farPlane); + // fx = -m00 * w/2 + // tx = -m20 * w/2 + left + w/2 + // fy = -m11 * h/2 + // ty = -m21 * h/2 + top + h/2 + projectionMatrix = glm::mat4( + -pm->fx, 0, 0, 0, + 0, pm->fy, 0, 0, + -pm->tx, -pm->ty, -1, -1, + 0, 0, 0, 0 + ); + taProjMatrix = ta_add_matrix(glm::value_ptr(projectionMatrix)); + } + + void setGMP(void *p) + { + gmp = elanRamAddress(p); + updateGMP(); + } + + void updateGMP() + { + if (gmp == Null) + { + curGmp = nullptr; + gmpDiffuseColor0 = glm::vec4(0); + gmpSpecularColor0 = glm::vec4(0); + gmpDiffuseColor1 = glm::vec4(0); + gmpSpecularColor1 = glm::vec4(0); + } + else + { + curGmp = (GMP *)&RAM[gmp]; + DEBUG_LOG(PVR, "GMP paramSelect %x", curGmp->paramSelect.full); + if (curGmp->paramSelect.d0) + gmpDiffuseColor0 = unpackColor(curGmp->diffuse0); + else + gmpDiffuseColor0 = glm::vec4(0); + if (curGmp->paramSelect.s0) + gmpSpecularColor0 = unpackColor(curGmp->specular0); + else + gmpSpecularColor0 = glm::vec4(0); + if (curGmp->paramSelect.d1) + gmpDiffuseColor1 = unpackColor(curGmp->diffuse1); + else + gmpDiffuseColor1 = glm::vec4(0); + if (curGmp->paramSelect.s1) + gmpSpecularColor1 = unpackColor(curGmp->specular1); + else + gmpSpecularColor1 = glm::vec4(0); + } + } + + void setLightModel(void *p) + { + lightModel = elanRamAddress(p); + updateLightModel(); + } + + void updateLightModel() + { + lightModelUpdated = true; + if (lightModel == Null) + curLightModel = nullptr; + else + { + curLightModel = (LightModel *)&RAM[lightModel]; + DEBUG_LOG(PVR, "Light model mask: diffuse %04x specular %04x, ambient base %08x offset %08x", curLightModel->diffuseMask0, curLightModel->specularMask0, + curLightModel->ambientBase0, curLightModel->ambientOffset0); + } + } + + void setLight(int lightId, void *p) + { + lights[lightId] = elanRamAddress(p); + updateLight(lightId); + } + + void updateLight(int lightId) + { + lightModelUpdated = true; + if (lights[lightId] == Null) + { + elan::curLights[lightId] = nullptr; + return; + } + PointLight *plight = (PointLight *)&RAM[lights[lightId]]; + if (plight->pcw.parallelLight) + { + ParallelLight *light = (ParallelLight *)plight; + DEBUG_LOG(PVR, " Parallel light %d: [%x] routing %d dmode %d col %d %d %d dir %f %f %f", light->lightId, plight->pcw.full, + light->routing, light->dmode, + light->red, light->green, light->blue, + light->getDirX(), light->getDirY(), light->getDirZ()); + } + else + { + DEBUG_LOG(PVR, " Point light %d: [%x] routing %d dmode %d smode %d col %d %d %d dir %f %f %f pos %f %f %f dist %f %f angle %f %f", + plight->lightId, plight->pcw.full, plight->routing, plight->dmode, plight->smode, + plight->red, plight->green, plight->blue, + plight->getDirX(), plight->getDirY(), plight->getDirZ(), + plight->posX, plight->posY, plight->posZ, + plight->distA(), plight->distB(), + plight->angleA(), plight->angleB()); + } + elan::curLights[lightId] = plight; + } + + void setClipMode(PCW pcw) + { + tileclip = (tileclip & ~0xF0000000) | (pcw.userClip << 28); + } + + void setClipTiles(u32 xmin, u32 ymin, u32 xmax, u32 ymax) + { + u32 t = tileclip & 0xF0000000; + t |= xmin & 0x3f; // 6 bits + t |= (xmax & 0x3f) << 6; // 6 bits + t |= (ymin & 0x1f) << 12; // 5 bits + t |= (ymax & 0x1f) << 17; // 5 bits + tileclip = t; + } + + void update() + { + updateMatrix(); + updateProjectionMatrix(); + updateGMP(); + updateLightModel(); + for (u32 i = 0; i < MAX_LIGHTS; i++) + updateLight(i); + } + + static u32 elanRamAddress(void *p) + { + if ((u8 *)p < RAM || (u8 *)p >= RAM + ELAN_RAM_SIZE) + return Null; + else + return (u32)((u8 *)p - RAM); + } + + void serialize(Serializer& ser) + { + ser << listType; + ser << gmp; + ser << instance; + ser << projMatrix; + ser << tileclip; + ser << lightModel; + ser << lights; + } + + void deserialize(Deserializer& deser) + { + if (deser.version() < Deserializer::V24) + { + reset(); + return; + } + deser >> listType; + deser >> gmp; + deser >> instance; + deser >> projMatrix; + deser >> tileclip; + deser >> lightModel; + deser >> lights; + update(); + } +}; + +static State state; + +static void setCoords(Vertex& vtx, float x, float y, float z) +{ + vtx.x = x; + vtx.y = y; + vtx.z = z; +} + +template +static void setUV(const Ts& vs, Vertex& vd) +{ + if (envMapping) + { + vd.u = state.envMapUOffset; + vd.v = state.envMapVOffset; + vd.u1 = state.envMapUOffset; + vd.v1 = state.envMapVOffset; + } + else + { + vd.u = vs.uv.u; + vd.v = vs.uv.v; + vd.u1 = vs.uv.u; + vd.v1 = vs.uv.v; + } +} + +static void SetEnvMapUV(Vertex& vtx) +{ + if (envMapping) + { + vtx.u = state.envMapUOffset; + vtx.v = state.envMapVOffset; + vtx.u1 = state.envMapUOffset; + vtx.v1 = state.envMapVOffset; + } +} + +template +glm::vec3 getNormal(const T& vtx) +{ + return { (int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f }; +} + +template<> +glm::vec3 getNormal(const N2_VERTEX_VNU& vtx) +{ + return { vtx.normal.nx, vtx.normal.ny, vtx.normal.nz }; +} + +template +void setNormal(Vertex& vd, const T& vs) +{ + glm::vec3 normal = getNormal(vs); + vd.nx = normal.x; + vd.ny = normal.y; + vd.nz = normal.z; +} + +static void addModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1) +{ + baseCol0 += gmpDiffuseColor0; + offsetCol0 += gmpSpecularColor0; + baseCol1 += gmpDiffuseColor1; + offsetCol1 += gmpSpecularColor1; +} + +template +static void convertVertex(const T& vs, Vertex& vd); + +template<> +void convertVertex(const N2_VERTEX& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + SetEnvMapUV(vd); + glm::vec4 baseCol0(0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1(0); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.spc = packColor(offsetCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + *(u32 *)vd.spc1 = packColor(offsetCol1); +} + +template<> +void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + SetEnvMapUV(vd); + glm::vec4 baseCol0 = unpackColor(vs.rgb.argb0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.spc = packColor(offsetCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + *(u32 *)vd.spc1 = packColor(offsetCol1); +} + +template<> +void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + setUV(vs, vd); + glm::vec4 baseCol0(0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1(0); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.spc = packColor(offsetCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + *(u32 *)vd.spc1 = packColor(offsetCol1); +} + +template<> +void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + setUV(vs, vd); + glm::vec4 baseCol0 = unpackColor(vs.rgb.argb0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.spc = packColor(offsetCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + *(u32 *)vd.spc1 = packColor(offsetCol1); +} + +template<> +void convertVertex(const N2_VERTEX_VUB& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + setUV(vs, vd); + glm::vec4 baseCol0(0); + glm::vec4 offsetCol0(0); + glm::vec4 baseCol1(0); + glm::vec4 offsetCol1(0); + addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + *(u32 *)vd.col = packColor(baseCol0); + *(u32 *)vd.col1 = packColor(baseCol1); + // Stuff the bump map normals and parameters in the specular colors + vd.spc[0] = vs.bump.tangent.x; + vd.spc[1] = vs.bump.tangent.y; + vd.spc[2] = vs.bump.tangent.z; + vd.spc1[0] = vs.bump.bitangent.x; + vd.spc1[1] = vs.bump.bitangent.y; + vd.spc1[2] = vs.bump.bitangent.z; + vd.spc[3] = vs.bump.scaleFactor.bumpDegree; // always 255? + vd.spc1[3] = vs.bump.scaleFactor.fixedOffset; // always 0? +// int nx = (int8_t)vs.header.nx; +// int ny = (int8_t)vs.header.ny; +// int nz = (int8_t)vs.header.nz; +// printf("BumpMap vtx deg %d off %d normal %d %d %d tangent %d %d %d bitangent %d %d %d dot %d %d %d\n", vs.bump.scaleFactor.bumpDegree, vs.bump.scaleFactor.fixedOffset, +// nx, ny, nz, +// vs.bump.tangent.x, vs.bump.tangent.y, vs.bump.tangent.z, vs.bump.bitangent.x, vs.bump.bitangent.y, vs.bump.bitangent.z, +// nx * vs.bump.tangent.x + ny * vs.bump.tangent.y + nz * vs.bump.tangent.z, +// nx * vs.bump.bitangent.x + ny * vs.bump.bitangent.y + nz * vs.bump.bitangent.z, +// vs.bump.tangent.x * vs.bump.bitangent.x + vs.bump.tangent.y * vs.bump.bitangent.y + vs.bump.tangent.z * vs.bump.bitangent.z +// ); +} + +template +static void boundingBox(const T* vertices, u32 count, glm::vec3& min, glm::vec3& max) +{ + min = { 1e38f, 1e38f, 1e38f }; + max = { -1e38f, -1e38f, -1e38f }; + for (u32 i = 0; i < count; i++) + { + glm::vec3 pos{ vertices[i].x, vertices[i].y, vertices[i].z }; + min = glm::min(min, pos); + max = glm::max(max, pos); + } + glm::vec4 center((min + max) / 2.f, 1); + glm::vec4 extents(max - glm::vec3(center), 0); + // transform + center = curMatrix * center; + glm::vec3 extentX = curMatrix * glm::vec4(extents.x, 0, 0, 0); + glm::vec3 extentY = curMatrix * glm::vec4(0, extents.y, 0, 0); + glm::vec3 extentZ = curMatrix * glm::vec4(0, 0, extents.z, 0); + // new AA extents + glm::vec3 newExtent = glm::abs(extentX) + glm::abs(extentY) + glm::abs(extentZ); + + min = glm::vec3(center) - newExtent; + max = glm::vec3(center) + newExtent; +} + +template +static bool isBetweenNearAndFar(const T* vertices, u32 count, bool& needNearClipping) +{ + glm::vec3 min; + glm::vec3 max; + boundingBox(vertices, count, min, max); + if (min.z > -nearPlane || max.z < -farPlane) + return false; + + glm::vec4 pmin = projectionMatrix * glm::vec4(min, 1); + glm::vec4 pmax = projectionMatrix * glm::vec4(max, 1); + if (std::isnan(pmin.x) || std::isnan(pmin.y) || std::isnan(pmax.x) || std::isnan(pmax.y)) + return false; + + needNearClipping = max.z > -nearPlane; + + return true; +} + +class TriangleStripClipper +{ +public: + TriangleStripClipper(bool enabled) : enabled(enabled) {} + + void add(const Vertex& vtx) + { + if (enabled) + { + float z = vtx.x * curMatrix[0][2] + vtx.y * curMatrix[1][2] + vtx.z * curMatrix[2][2] + curMatrix[3][2]; + float dist = -z - nearPlane; + clip(vtx, dist); + count++; + } + else + { + ta_add_vertex(vtx); + } + } + +private: + void sendVertex(const Vertex& r) + { + if (dupeNext) + ta_add_vertex(r); + dupeNext = false; + ta_add_vertex(r); + } + + // Three-Dimensional Homogeneous Clipping of Triangle Strips + // Patrick-Gilles Maillot. Graphics Gems II - 1991 + void clip(const Vertex& r, float rDist) + { + clipCode >>= 1; + clipCode |= (int)(rDist < 0) << 2; + if (count == 1) + { + switch (clipCode >> 1) { + case 0: // Q and R inside + sendVertex(q); + sendVertex(r); + break; + case 1: // Q outside, R inside + sendVertex(interpolate(q, qDist, r, rDist)); + sendVertex(r); + break; + case 2: // Q inside, R outside + sendVertex(q); + sendVertex(interpolate(q, qDist, r, rDist)); + break; + case 3: // Q and R outside + break; + } + } + else if (count >= 2) + { + switch (clipCode) + { + case 0: // all inside + sendVertex(r); + break; + case 1: // P outside, Q and R inside + sendVertex(interpolate(r, rDist, p, pDist)); + sendVertex(q); + sendVertex(r); + break; + case 2: // P inside, Q outside and R inside + sendVertex(r); + sendVertex(interpolate(q, qDist, r, rDist)); + sendVertex(r); + break; + case 3: // P and Q outside, R inside + { + Vertex tmp = interpolate(r, rDist, p, pDist); + sendVertex(tmp); + sendVertex(tmp); + sendVertex(tmp); // One more to preserve strip swap order + sendVertex(interpolate(q, qDist, r, rDist)); + sendVertex(r); + } + break; + case 4: // P and Q inside, R outside + sendVertex(interpolate(r, rDist, p, pDist)); + sendVertex(q); + sendVertex(interpolate(q, qDist, r, rDist)); + break; + case 5: // P outside, Q inside, R outside + sendVertex(interpolate(q, qDist, r, rDist)); + break; + case 6: // P inside, Q and R outside + { + Vertex tmp = interpolate(r, rDist, p, pDist); + sendVertex(tmp); + sendVertex(tmp); + sendVertex(tmp); // One more to preserve strip swap order + } + break; + case 7: // P, Q and R outside + dupeNext = !dupeNext; + break; + } + } + p = q; + pDist = qDist; + q = r; + qDist = rDist; + } + + Vertex interpolate(const Vertex& v1, float f1, const Vertex& v2, float f2) + { + Vertex v; + float a2 = std::abs(f1) / (std::abs(f1) + std::abs(f2)); + float a1 = 1 - a2; + v.x = v1.x * a1 + v2.x * a2; + v.y = v1.y * a1 + v2.y * a2; + v.z = v1.z * a1 + v2.z * a2; + + v.u = v1.u * a1 + v2.u * a2; + v.v = v1.v * a1 + v2.v * a2; + v.u1 = v1.u1 * a1 + v2.u1 * a2; + v.v1 = v1.v1 * a1 + v2.v1 * a2; + + for (size_t i = 0; i < ARRAY_SIZE(v1.col); i++) + { + v.col[i] = (u8)std::round(v1.col[i] * a1 + v2.col[i] * a2); + v.spc[i] = (u8)std::round(v1.spc[i] * a1 + v2.spc[i] * a2); + v.col1[i] = (u8)std::round(v1.col1[i] * a1 + v2.col1[i] * a2); + v.spc1[i] = (u8)std::round(v1.spc1[i] * a1 + v2.spc1[i] * a2); + } + v.nx = v1.nx * a1 + v2.nx * a2; + v.ny = v1.ny * a1 + v2.ny * a2; + v.nz = v1.nz * a1 + v2.nz * a2; + + return v; + } + + bool enabled; + int count = 0; + int clipCode = 0; + Vertex p; + float pDist = 0; + Vertex q; + float qDist = 0; + bool dupeNext = false; +}; + +template +static void sendVertices(const ICHList *list, const T* vtx, bool needClipping) +{ + Vertex taVtx; + verify(list->vertexSize() > 0); + + Vertex fanCenterVtx{}; + Vertex fanLastVtx{}; + bool stripStart = true; + int outStripIndex = 0; + TriangleStripClipper clipper(needClipping); + + for (u32 i = 0; i < list->vtxCount; i++) + { + convertVertex(*vtx, taVtx); + + if (stripStart) + { + // Center vertex if triangle fan + //verify(vtx->header.isFirstOrSecond()); This fails for some strips: strip=1 fan=0 (soul surfer) + fanCenterVtx = taVtx; + if (outStripIndex > 0) + { + // use degenerate triangles to link strips + clipper.add(fanLastVtx); + clipper.add(taVtx); + outStripIndex += 2; + if (outStripIndex & 1) + { + clipper.add(taVtx); + outStripIndex++; + } + } + stripStart = false; + } + else if (vtx->header.isFan()) + { + // use degenerate triangles to link strips + clipper.add(fanLastVtx); + clipper.add(fanCenterVtx); + outStripIndex += 2; + if (outStripIndex & 1) + { + clipper.add(fanCenterVtx); + outStripIndex++; + } + // Triangle fan + clipper.add(fanCenterVtx); + clipper.add(fanLastVtx); + outStripIndex += 2; + } + clipper.add(taVtx); + outStripIndex++; + fanLastVtx = taVtx; + if (vtx->header.endOfStrip) + stripStart = true; + + vtx++; + } +} + +class ModifierVolumeClipper +{ +public: + ModifierVolumeClipper(bool enabled) : enabled(enabled) {} + + void add(ModTriangle& tri) + { + if (enabled) + { + glm::vec3 dist{ + tri.x0 * curMatrix[0][2] + tri.y0 * curMatrix[1][2] + tri.z0 * curMatrix[2][2] + curMatrix[3][2], + tri.x1 * curMatrix[0][2] + tri.y1 * curMatrix[1][2] + tri.z1 * curMatrix[2][2] + curMatrix[3][2], + tri.x2 * curMatrix[0][2] + tri.y2 * curMatrix[1][2] + tri.z2 * curMatrix[2][2] + curMatrix[3][2] + }; + dist = -dist - nearPlane; + ModTriangle newTri; + int n = sutherlandHodgmanClip(dist, tri, newTri); + switch (n) + { + case 0: + // fully clipped + break; + case 3: + ta_add_triangle(tri); + break; + case 4: + ta_add_triangle(tri); + ta_add_triangle(newTri); + break; + } + } + else + { + ta_add_triangle(tri); + } + } + +private: + // + // Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire + // Journal of Graphics GPU and Game Tools - November 2011 + // + glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist) + { + return (A * std::abs(Bdist) + B * std::abs(Adist)) / (std::abs(Adist) + std::abs(Bdist)); + } + + // Clip the triangle 'trig' with respect to the provided distances to the clipping plane. + int sutherlandHodgmanClip(glm::vec3& dist, ModTriangle& trig, ModTriangle& newTrig) + { + constexpr float clipEpsilon = 0.f; //0.00001; + constexpr float clipEpsilon2 = 0.f; //0.01; + + if (!glm::any(glm::greaterThanEqual(dist , glm::vec3(clipEpsilon2)))) + // all clipped + return 0; + if (glm::all(glm::greaterThanEqual(dist , glm::vec3(-clipEpsilon)))) + // none clipped + return 3; + + // There are either 1 or 2 vertices above the clipping plane. + glm::bvec3 above = glm::greaterThanEqual(dist, glm::vec3(0.f)); + bool nextIsAbove; + glm::vec3 v0(trig.x0, trig.y0, trig.z0); + glm::vec3 v1(trig.x1, trig.y1, trig.z1); + glm::vec3 v2(trig.x2, trig.y2, trig.z2); + glm::vec3 v3; + // Find the CCW-most vertex above the plane. + if (above[1] && !above[0]) + { + // Cycle once CCW. Use v3 as a temp + nextIsAbove = above[2]; + v3 = v0; + v0 = v1; + v1 = v2; + v2 = v3; + dist = glm::vec3(dist.y, dist.z, dist.x); + } + else if (above[2] && !above[1]) + { + // Cycle once CW. Use v3 as a temp. + nextIsAbove = above[0]; + v3 = v2; + v2 = v1; + v1 = v0; + v0 = v3; + dist = glm::vec3(dist.z, dist.x, dist.y); + } + else + nextIsAbove = above[1]; + trig.x0 = v0.x; + trig.y0 = v0.y; + trig.z0 = v0.z; + // We always need to clip v2-v0. + v3 = intersect(v0, dist[0], v2, dist[2]); + if (nextIsAbove) + { + v2 = intersect(v1, dist[1], v2, dist[2]); + trig.x1 = v1.x; + trig.y1 = v1.y; + trig.z1 = v1.z; + trig.x2 = v2.x; + trig.y2 = v2.y; + trig.z2 = v2.z; + newTrig.x0 = v0.x; + newTrig.y0 = v0.y; + newTrig.z0 = v0.z; + newTrig.x1 = v2.x; + newTrig.y1 = v2.y; + newTrig.z1 = v2.z; + newTrig.x2 = v3.x; + newTrig.y2 = v3.y; + newTrig.z2 = v3.z; + + return 4; + } + else + { + v1 = intersect(v0, dist[0], v1, dist[1]); + trig.x1 = v1.x; + trig.y1 = v1.y; + trig.z1 = v1.z; + trig.x2 = v3.x; + trig.y2 = v3.y; + trig.z2 = v3.z; + + return 3; + } + } + + bool enabled; +}; + +template +static void sendMVPolygon(ICHList *list, const T *vtx, bool needClipping) +{ + ModifierVolumeParam mvp{}; + mvp.isp.full = list->isp.full; + if (!openModifierVolume) + mvp.isp.CullMode = 0; + mvp.isp.VolumeLast = list->pcw.volume; + mvp.isp.DepthMode &= 3; + mvp.mvMatrix = taMVMatrix; + mvp.projMatrix = taProjMatrix; + ta_add_poly(state.listType, mvp); + + ModifierVolumeClipper clipper(needClipping); + glm::vec3 vtx0{}; + glm::vec3 vtx1{}; + u32 stripStart = 0; + + for (u32 i = 0; i < list->vtxCount; i++) + { + glm::vec3 v(vtx->x, vtx->y, vtx->z); + u32 triIdx = i - stripStart; + if (triIdx >= 2) + { + ModTriangle tri; + + if (triIdx & 1) + { + tri.x1 = vtx0.x; + tri.y1 = vtx0.y; + tri.z1 = vtx0.z; + + tri.x0 = vtx1.x; + tri.y0 = vtx1.y; + tri.z0 = vtx1.z; + } + else + { + tri.x0 = vtx0.x; + tri.y0 = vtx0.y; + tri.z0 = vtx0.z; + + tri.x1 = vtx1.x; + tri.y1 = vtx1.y; + tri.z1 = vtx1.z; + } + tri.x2 = v.x; + tri.y2 = v.y; + tri.z2 = v.z; + + clipper.add(tri); + } + if (vtx->header.endOfStrip) + stripStart = i + 1; + vtx0 = vtx1; + vtx1 = v; + vtx++; + } +} + +static N2LightModel *taLightModel; + +static void sendLights() +{ + if (!state.lightModelUpdated) + return; + + state.lightModelUpdated = false; + N2LightModel model; + model.lightCount = 0; + if (curLightModel == nullptr) + { + model.useBaseOver = false; + for (int i = 0; i < 2; i++) + { + model.ambientMaterialBase[i] = false; + model.ambientMaterialOffset[i] = false; + model.ambientBase[i][0] = model.ambientBase[i][1] = model.ambientBase[i][2] = model.ambientBase[i][3] = 1.f; + } + memset(model.ambientOffset, 0, sizeof(model.ambientOffset)); + taLightModel = nullptr; + return; + } + model.ambientMaterialBase[0] = curLightModel->useAmbientBase0; + model.ambientMaterialBase[1] = curLightModel->useAmbientBase1; + model.ambientMaterialOffset[0] = curLightModel->useAmbientOffset0; + model.ambientMaterialOffset[1] = curLightModel->useAmbientOffset1; + model.useBaseOver = curLightModel->useBaseOver; + model.bumpId1 = -1; + model.bumpId2 = -1; + memcpy(model.ambientBase[0], glm::value_ptr(unpackColor(curLightModel->ambientBase0)), sizeof(model.ambientBase[0])); + memcpy(model.ambientBase[1], glm::value_ptr(unpackColor(curLightModel->ambientBase1)), sizeof(model.ambientBase[1])); + memcpy(model.ambientOffset[0], glm::value_ptr(unpackColor(curLightModel->ambientOffset0)), sizeof(model.ambientOffset[0])); + memcpy(model.ambientOffset[1], glm::value_ptr(unpackColor(curLightModel->ambientOffset1)), sizeof(model.ambientOffset[1])); + + for (u32 i = 0; i < MAX_LIGHTS; i++) + { + N2Light& light = model.lights[model.lightCount]; + for (int vol = 0; vol < 2; vol++) + { + light.diffuse[vol] = curLightModel->isDiffuse(i, vol); + light.specular[vol] = curLightModel->isSpecular(i, vol); + } + if (!light.diffuse[0] && !light.specular[0] + && !light.diffuse[1] && !light.specular[1]) + continue; + if (curLights[i] == nullptr) + { + INFO_LOG(PVR, "Light %d is referenced but undefined", i); + continue; + } + if (i == curLightModel->bumpId1) + model.bumpId1 = model.lightCount; + if (i == curLightModel->bumpId2) + model.bumpId2 = model.lightCount; + light.parallel = curLights[i]->pcw.parallelLight; + if (light.parallel) + { + ParallelLight *plight = (ParallelLight *)curLights[i]; + memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); + light.routing = plight->routing; + light.dmode = plight->dmode; + light.smode = N2_LMETHOD_SINGLE_SIDED; + memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)), + sizeof(light.direction)); + } + else + { + PointLight *plight = (PointLight *)curLights[i]; + memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); + light.routing = plight->routing; + light.dmode = plight->dmode; + light.smode = plight->smode; + if (plight->posX == 0 && plight->posY == 0 && plight->posZ == 0 + && plight->_distA == 0 && plight->_distB == 0 + && plight->_angleA == 0 && plight->_angleB == 0) + { + // Lights not using distance or angle attenuation are converted into parallel lights on the CPU side? + DEBUG_LOG(PVR, "Point -> parallel light[%d] dir %d %d %d", i, -(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ); + light.parallel = true; + memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)), + sizeof(light.direction)); + } + else + { + memcpy(light.direction, glm::value_ptr(-glm::vec4(plight->getDirX(), plight->getDirY(), plight->getDirZ(), 0)), + sizeof(light.direction)); + memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position)); + light.distAttnMode = plight->dattenmode; + light.attnDistA = plight->distA(); + light.attnDistB = plight->distB(); + light.attnAngleA = plight->angleA(); + light.attnAngleB = plight->angleB(); + } + } + model.lightCount++; + } + taLightModel = ta_add_light(model); +} + +static void setStateParams(PolyParam& pp, const ICHList *list) +{ + sendLights(); + pp.tileclip = state.tileclip; + pp.mvMatrix = taMVMatrix; + pp.normalMatrix = taNormalMatrix; + pp.projMatrix = taProjMatrix; + pp.lightModel = taLightModel; + pp.envMapping[0] = false; + pp.envMapping[1] = false; + if (curGmp != nullptr) + { + pp.glossCoef[0] = curGmp->gloss.getCoef0(); + pp.glossCoef[1] = curGmp->gloss.getCoef1(); + pp.constantColor[0] = curGmp->paramSelect.b0; + pp.diffuseColor[0] = curGmp->paramSelect.d0; + pp.specularColor[0] = curGmp->paramSelect.s0; + pp.constantColor[1] = curGmp->paramSelect.b1; + pp.diffuseColor[1] = curGmp->paramSelect.d1; + pp.specularColor[1] = curGmp->paramSelect.s1; + + // Environment mapping + if (curGmp->paramSelect.e0) + { + pp.pcw.Texture = 1; + pp.pcw.Offset = 0; + pp.tsp.UseAlpha = 1; + pp.tsp.IgnoreTexA = 0; + pp.envMapping[0] = true; + pp.tcw = list->tcw0; + envMapping = true; + } + if (curGmp->paramSelect.e1) + { + pp.pcw.Texture = 1; + pp.pcw.Offset = 0; + pp.tsp1.UseAlpha = 1; + pp.tsp1.IgnoreTexA = 0; + pp.envMapping[1] = true; + pp.tcw1 = list->tcw1; + envMapping = true; + } + } + pp.tsp.full ^= modelTSP.full; + pp.tsp1.full ^= modelTSP.full; + + // projFlip is for left-handed projection matrices (initd rear view mirror) + bool projFlip = taProjMatrix != nullptr && std::signbit(taProjMatrix[0]) == std::signbit(taProjMatrix[5]); + pp.isp.CullMode ^= (u32)cullingReversed ^ (u32)projFlip; + pp.pcw.Shadow ^= shadowedVolume; + if (pp.pcw.Shadow == 0 || pp.pcw.Volume == 0) + { + pp.tsp1.full = -1; + pp.tcw1.full = -1; + pp.glossCoef[1] = 0; + pp.constantColor[1] = false; + pp.diffuseColor[1] = false; + pp.specularColor[1] = false; + } +// else if (pp.pcw.Volume == 1) +// printf("2-Volume poly listType %d vtxtype %x gmp params %x diff tcw %08x tsp %08x\n", state.listType, list->flags, curGmp->paramSelect.full, +// pp.tcw.full ^ pp.tcw1.full, pp.tsp.full ^ pp.tsp1.full); +} + +static void sendPolygon(ICHList *list) +{ + bool needClipping; + + switch (list->flags) + { + case ICHList::VTX_TYPE_V: + { + N2_VERTEX *vtx = (N2_VERTEX *)((u8 *)list + sizeof(ICHList)); + if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) + break; + if (state.listType & 1) + sendMVPolygon(list, vtx, needClipping); + else + { + PolyParam pp{}; + pp.pcw.Shadow = list->pcw.shadow; + pp.pcw.Texture = list->pcw.texture; + pp.pcw.Offset = list->pcw.offset; + pp.pcw.Gouraud = list->pcw.gouraud; + pp.pcw.Volume = list->pcw.volume; + pp.isp = list->isp; + pp.tsp = list->tsp0; + pp.tsp1 = list->tsp1; + setStateParams(pp, list); + ta_add_poly(state.listType, pp); + + sendVertices(list, vtx, needClipping); + } + } + break; + + case ICHList::VTX_TYPE_VU: + { + N2_VERTEX_VU *vtx = (N2_VERTEX_VU *)((u8 *)list + sizeof(ICHList)); + if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) + break; + if (state.listType & 1) + sendMVPolygon(list, vtx, needClipping); + else + { + PolyParam pp{}; + pp.pcw.Shadow = list->pcw.shadow; + pp.pcw.Texture = list->pcw.texture; + pp.pcw.Offset = list->pcw.offset; + pp.pcw.Gouraud = list->pcw.gouraud; + pp.pcw.Volume = list->pcw.volume; + pp.isp = list->isp; + pp.tsp = list->tsp0; + pp.tcw = list->tcw0; + pp.tsp1 = list->tsp1; + pp.tcw1 = list->tcw1; + setStateParams(pp, list); + ta_add_poly(state.listType, pp); + + sendVertices(list, vtx, needClipping); + } + } + break; + + case ICHList::VTX_TYPE_VUR: + { + verify(curGmp == nullptr || curGmp->paramSelect.e0 == 0); + N2_VERTEX_VUR *vtx = (N2_VERTEX_VUR *)((u8 *)list + sizeof(ICHList)); + if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) + break; + PolyParam pp{}; + pp.pcw.Shadow = list->pcw.shadow; + pp.pcw.Texture = list->pcw.texture; + pp.pcw.Offset = list->pcw.offset; + pp.pcw.Gouraud = list->pcw.gouraud; + pp.pcw.Volume = list->pcw.volume; + pp.isp = list->isp; + pp.tsp = list->tsp0; + pp.tcw = list->tcw0; + pp.tsp1 = list->tsp1; + pp.tcw1 = list->tcw1; + setStateParams(pp, list); + ta_add_poly(state.listType, pp); + + sendVertices(list, vtx, needClipping); + } + break; + + case ICHList::VTX_TYPE_VR: + { + N2_VERTEX_VR *vtx = (N2_VERTEX_VR *)((u8 *)list + sizeof(ICHList)); + if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) + break; + PolyParam pp{}; + pp.pcw.Shadow = list->pcw.shadow; + pp.pcw.Texture = list->pcw.texture; + pp.pcw.Offset = list->pcw.offset; + pp.pcw.Gouraud = list->pcw.gouraud; + pp.pcw.Volume = list->pcw.volume; + pp.isp = list->isp; + pp.tsp = list->tsp0; + pp.tsp1 = list->tsp1; + setStateParams(pp, list); + ta_add_poly(state.listType, pp); + + sendVertices(list, vtx, needClipping); + } + break; + + case ICHList::VTX_TYPE_VUB: + { + // TODO + //printf("BUMP MAP fmt %d filter %d src select %d dst %d\n", list->tcw0.PixelFmt, list->tsp0.FilterMode, list->tsp0.SrcSelect, list->tsp0.DstSelect); + N2_VERTEX_VUB *vtx = (N2_VERTEX_VUB *)((u8 *)list + sizeof(ICHList)); + if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) + break; + PolyParam pp{}; + pp.pcw.Shadow = list->pcw.shadow; + pp.pcw.Texture = 1; + pp.pcw.Offset = 1; + pp.pcw.Gouraud = list->pcw.gouraud; + pp.pcw.Volume = list->pcw.volume; + pp.isp = list->isp; + pp.tsp = list->tsp0; + pp.tcw = list->tcw0; + pp.tsp1 = list->tsp1; + pp.tcw1 = list->tcw1; + setStateParams(pp, list); + ta_add_poly(state.listType, pp); + + sendVertices(list, vtx, needClipping); + } + break; + + default: + WARN_LOG(PVR, "Unhandled poly format %x", list->flags); + die("Unsupported"); + break; + } + envMapping = false; +} + +template +static void executeCommand(u8 *data, int size) +{ +// verify(size >= 0); +// verify(size < (int)ELAN_RAM_SIZE); +// if (0x2b00 == (u32)(data - RAM)) +// for (int i = 0; i < size; i += 4) +// DEBUG_LOG(PVR, "Elan Parse %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); + + while (size >= 32) + { + const int oldSize = size; + ElanBase *cmd = (ElanBase *)data; + if (cmd->pcw.naomi2) + { + switch(cmd->pcw.n2Command) + { + case PCW::null: + size -= 32; + break; + + case PCW::projMatrix: + if (Active) + state.setProjectionMatrix(data); + size -= sizeof(ProjMatrix); + break; + + case PCW::matrixOrLight: + { + InstanceMatrix *instance = (InstanceMatrix *)data; + if (instance->isInstanceMatrix()) + { + //DEBUG_LOG(PVR, "Model instance"); + if (Active) + state.setMatrix(instance); + size -= sizeof(InstanceMatrix); + break; + } + if (Active) + { + if (instance->id1 & 0x10) + { + state.setLightModel(data); + } + else //if ((instance->id2 & 0x40000000) || (instance->id1 & 0xffffff00)) // FIXME what are these lights without id2|0x40000000? vf4 + { + if (instance->pcw.parallelLight) + { + ParallelLight *light = (ParallelLight *)data; + state.setLight(light->lightId, data); + } + else + { + PointLight *light = (PointLight *)data; + state.setLight(light->lightId, data); + } + } + //else + //{ + // WARN_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2); + // for (int i = 0; i < 32; i += 4) + // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); + //} + } + size -= sizeof(LightModel); + } + break; + + case PCW::model: + { + Model *model = (Model *)data; + if (Active) + { + cullingReversed = model->param.cwCulling == 0; + state.setClipMode(model->pcw); + openModifierVolume = model->param.openVolume; + shadowedVolume = model->pcw.shadow; + modelTSP = model->tsp; + DEBUG_LOG(PVR, "Model offset %x size %x pcw %08x tsp %08x", model->offset, model->size, model->pcw.full, model->tsp.full); + } + executeCommand(&RAM[model->offset & 0x1ffffff8], model->size); + cullingReversed = false; + openModifierVolume = false; + shadowedVolume = false; + modelTSP.full = 0; + size -= sizeof(Model); + } + break; + + case PCW::registerWait: + { + RegisterWait *wait = (RegisterWait *)data; + if (wait->offset != (u32)-1 && wait->mask != 0) + { + DEBUG_LOG(PVR, "Register wait %x mask %x", wait->offset, wait->mask); + // wait for interrupt + HollyInterruptID inter; + switch (wait->mask) + { + case 0x80: + inter = holly_OPAQUE; + break; + case 0x100: + inter = holly_OPAQUEMOD; + break; + case 0x200: + inter = holly_TRANS; + break; + case 0x400: + inter = holly_TRANSMOD; + break; + case 0x200000: + inter = holly_PUNCHTHRU; + break; + default: + WARN_LOG(PVR, "Unknown interrupt mask %x", wait->mask); + die("unexpected"); + inter = holly_OPAQUE; + break; + } + asic_RaiseInterruptBothCLX(inter); + TA_ITP_CURRENT += 32; + if (Active) + state.reset(); + } + size -= sizeof(RegisterWait); + } + break; + + case PCW::link: + { + Link *link = (Link *)data; + if (link->offset & 0x80000000) + { + // elan v10 only + DEBUG_LOG(PVR, "Texture DMA from %x to %x (%x)", DMAC_SAR(2), link->_res & 0x1ffffff8, link->size); + memcpy(&vram[link->_res & VRAM_MASK], &mem_b[DMAC_SAR(2) & RAM_MASK], link->size); + reg74 |= 1; + } + else if (link->offset & 0x20000000) + { + // elan v10 only + DEBUG_LOG(PVR, "Texture DMA from eram %x -> %x (%x)", link->offset & 0x01fffff8, link->_res & VRAM_MASK, link->size); + memcpy(&vram[link->_res & VRAM_MASK], &RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size); + reg74 |= 1; + } + else + { + DEBUG_LOG(PVR, "Link to %x (%x)", link->offset & 0x1ffffff8, link->size); + executeCommand(&RAM[link->offset & (ELAN_RAM_SIZE - 1)], link->size); + } + size -= sizeof(Link); + } + break; + + case PCW::gmp: + if (Active) + state.setGMP(data); + size -= sizeof(GMP); + break; + + case PCW::ich: + { + ICHList *ich = (ICHList *)data; + if (Active) + { + DEBUG_LOG(PVR, "ICH flags %x, %d verts", ich->flags, ich->vtxCount); + sendPolygon(ich); + } + size -= sizeof(ICHList) + ich->vertexSize() * ich->vtxCount; + } + break; + + default: + DEBUG_LOG(PVR, "Unhandled Elan command %x", cmd->pcw.n2Command); + size -= 32; + break; + } + } + else + { + u32 pcw = *(u32 *)data; + if ((pcw & 0xd0ffff00) == 0x808c0000) // display list + { + if (Active) + { + DEBUG_LOG(PVR, "Display list type %d", (pcw >> 24) & 0xf); + state.reset(); + state.listType = (pcw >> 24) & 0xf; + // TODO is this the right place for this? + SQBuffer eol{}; + ta_vtx_data32(&eol); + } + size -= 24 * 4; + } + else if ((pcw & 0xd0fcff00) == 0x80800000) // User clipping + { + if (Active) + { + state.setClipMode((PCW&)pcw); + DEBUG_LOG(PVR, "User clip type %d", ((PCW&)pcw).userClip); + } + size -= 0xE0; + } + else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked? + { + if (Active) + { + DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw); + state.listType = (pcw >> 24) & 0xf; + size -= 32; + ta_add_ta_data(state.listType, (u32 *)(data + 32), size - 32); + } + size = 32; + } + else if (pcw == 0x20000000) + { + // User clipping + if (Active) + { + u32 *tiles = (u32 *)data + 4; + DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", tiles[0] * 32, tiles[1] * 32, + tiles[2] * 32, tiles[3] * 32); + state.setClipTiles(tiles[0], tiles[1], tiles[2], tiles[3]); + } + size -= 32; + } + else + { + if (Active) + { + if (pcw != 0) + INFO_LOG(PVR, "Unhandled command %x", pcw); + for (int i = 0; i < 32; i += 4) + DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); + } + size -= 32; + } + } + data += oldSize - size; + } +} + +template +void DYNACALL write_elancmd(u32 addr, T data) +{ +// DEBUG_LOG(PVR, "ELAN cmd %08x = %x", addr, data); + addr &= 0x1f; + *(T *)&((u8 *)elanCmd)[addr] = data; + + if (addr == 0x1c) + { + if (!ggpo::rollbacking()) + executeCommand((u8 *)elanCmd, sizeof(elanCmd)); + else + executeCommand((u8 *)elanCmd, sizeof(elanCmd)); + if (!(reg74 & 1)) + reg74 |= 2; + reg74 &= ~0x3c; + } +} + +template +T DYNACALL read_elanram(u32 addr) +{ + return *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)]; +} + +template +void DYNACALL write_elanram(u32 addr, T data) +{ + *(T *)&RAM[addr & (ELAN_RAM_SIZE - 1)] = data; +} + +void init() +{ +} + +void reset(bool hard) +{ + if (hard) + { + memset(RAM, 0, ELAN_RAM_SIZE); + state.reset(); + } +} + +void term() +{ +} + +void vmem_init() +{ + elanRegHandler = _vmem_register_handler_Template(read_elanreg, write_elanreg); + elanCmdHandler = _vmem_register_handler_Template(read_elancmd, write_elancmd); + elanRamHandler = _vmem_register_handler_Template(read_elanram, write_elanram); +} + +void vmem_map(u32 base) +{ + _vmem_map_handler(elanRegHandler, base | 8, base | 8); + _vmem_map_handler(elanCmdHandler, base | 9, base | 9); + _vmem_map_handler(elanRamHandler, base | 0xA, base | 0xB); + _vmem_map_block(RAM, base | 0xA, base | 0xB, ELAN_RAM_SIZE - 1); +} + +void serialize(Serializer& ser) +{ + if (!settings.platform.isNaomi2()) + return; + ser << reg10; + ser << reg74; + ser << elanCmd; + if (!ser.rollback()) + ser.serialize(RAM, ELAN_RAM_SIZE); + state.serialize(ser); +} + +void deserialize(Deserializer& deser) +{ + if (!settings.platform.isNaomi2()) + return; + deser >> reg10; + deser >> reg74; + deser >> elanCmd; + if (!deser.rollback()) + deser.deserialize(RAM, ELAN_RAM_SIZE); + state.deserialize(deser); +} + +} diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index a929582c1..7af3a8d62 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -1561,8 +1561,10 @@ bool ta_parse_vdrc(TA_context* ctx) bgpp->envMapping[1] = false; } - const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT; - // TODO || config::RendererType == RenderType::Vulkan_OIT || config::RendererType == RenderType::DirectX11_OIT + const bool mergeTranslucent = !config::PerStripSorting + || config::RendererType == RenderType::OpenGL_OIT + || config::RendererType == RenderType::DirectX11_OIT; + // TODO || config::RendererType == RenderType::Vulkan_OIT TA_context *childCtx = ctx; while (childCtx != nullptr) { @@ -1680,8 +1682,10 @@ bool ta_parse_naomi2(TA_context* ctx) int op_count = 0; int pt_count = 0; int tr_count = 0; - const bool mergeTranslucent = !config::PerStripSorting || config::RendererType == RenderType::OpenGL_OIT; - // TODO || config::RendererType == RenderType::Vulkan_OIT || config::RendererType == RenderType::DirectX11_OIT; + const bool mergeTranslucent = !config::PerStripSorting + || config::RendererType == RenderType::OpenGL_OIT + || config::RendererType == RenderType::DirectX11_OIT; + // TODO || config::RendererType == RenderType::Vulkan_OIT for (const RenderPass& pass : ctx->rend.render_passes) { make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend); diff --git a/core/rend/dx11/dx11_naomi2.cpp b/core/rend/dx11/dx11_naomi2.cpp new file mode 100644 index 000000000..1669aceb1 --- /dev/null +++ b/core/rend/dx11/dx11_naomi2.cpp @@ -0,0 +1,387 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#include "dx11_naomi2.h" + +const char * const DX11N2VertexShader = R"( +#if pp_Gouraud == 1 +#define INTERPOLATION +#else +#define INTERPOLATION nointerpolation +#endif + +struct VertexIn +{ + float4 pos : POSITION; +#if POSITION_ONLY == 0 + float4 col : COLOR0; + float4 spec : COLOR1; + float2 uv : TEXCOORD0; +#if pp_TwoVolumes == 1 + float4 col1 : COLOR2; + float4 spec1 : COLOR3; + float2 uv1 : TEXCOORD1; +#endif + float3 normal: NORMAL; + uint vertexId : SV_VertexID; +#endif +}; + +struct VertexOut +{ + float4 pos : SV_POSITION; + float4 uv : TEXCOORD0; +#if POSITION_ONLY == 0 + INTERPOLATION float4 col : COLOR0; + INTERPOLATION float4 spec : COLOR1; +#if pp_TwoVolumes == 1 + float2 uv1 : TEXCOORD1; + INTERPOLATION float4 col1 : COLOR2; + INTERPOLATION float4 spec1 : COLOR3; +#endif + nointerpolation uint index : BLENDINDICES0; +#endif +}; + +cbuffer shaderConstants : register(b0) +{ + float4x4 ndcMat; + float4 leftPlane; + float4 topPlane; + float4 rightPlane; + float4 bottomPlane; +}; + +cbuffer polyConstants : register(b1) +{ + float4x4 mvMat; + float4x4 normalMat; + float4x4 projMat; + int envMapping0; + int envMapping1; + int bumpMapping; + int polyNumber; + + float4 glossCoef; + int4 constantColor; + int4 model_diff_spec; // diffuse0, diffuse1, specular0, specular1 +}; + +void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, in float3 position, in float3 normal); +void computeEnvMap(inout float2 uv, in float3 normal); +void computeBumpMap(inout float4 color0, in float4 color1, in float3 position, in float3 normal, in float4x4 normalMat); + +[clipplanes(leftPlane, topPlane, rightPlane, bottomPlane)] +VertexOut main(in VertexIn vin) +{ + VertexOut vo; + vo.pos = mul(mvMat, float4(vin.pos.xyz, 1.f)); +#if POSITION_ONLY == 0 + vo.col = vin.col; + vo.spec = vin.spec; + #if LIGHT_ON == 1 + float4 vnorm = normalize(mul(normalMat, float4(vin.normal, 0.f))); + #endif + #if pp_TwoVolumes == 1 + vo.col1 = vin.col1; + vo.spec1 = vin.spec1; + vo.uv1 = vin.uv1; + #if LIGHT_ON == 1 + // FIXME need offset0 and offset1 for bump maps + if (bumpMapping == 1) + computeBumpMap(vo.spec, vo.spec1, vo.pos.xyz, vnorm.xyz, normalMat); + else + { + computeColors(vo.col1, vo.spec1, 1, vo.pos.xyz, vnorm.xyz); + #if pp_Texture == 0 + vo.col1 += vo.spec1; + #endif + } + if (envMapping1 == 1) + computeEnvMap(vo.uv1.xy, vnorm.xyz); + #endif + #endif + #if LIGHT_ON == 1 + if (bumpMapping == 0) + { + computeColors(vo.col, vo.spec, 0, vo.pos.xyz, vnorm.xyz); + #if pp_Texture == 0 + vo.col += vo.spec; + #endif + } + #endif + vo.uv.xy = vin.uv; + #if LIGHT_ON == 1 + if (envMapping0 == 1) + computeEnvMap(vo.uv.xy, vnorm.xyz); + #endif + vo.index = (uint(polyNumber) << 18) + vin.vertexId; +#endif + + vo.pos = mul(projMat, vo.pos); + + vo.pos = float4(vo.pos.xy / vo.pos.w, 1.f / vo.pos.w, 1.f); + vo.pos = mul(ndcMat, vo.pos); +#if POSITION_ONLY == 1 + vo.uv = float4(0.f, 0.f, 0.f, vo.pos.z); +#else +#if pp_Gouraud == 1 + vo.col *= vo.pos.z; + vo.spec *= vo.pos.z; +#if pp_TwoVolumes == 1 + vo.col1 *= vo.pos.z; + vo.spec1 *= vo.pos.z; +#endif +#endif + vo.uv = float4(vo.uv.xy * vo.pos.z, 0.f, vo.pos.z); +#if pp_TwoVolumes == 1 + vo.uv1 *= vo.pos.z; +#endif +#endif + vo.pos.w = 1.f; + vo.pos.z = 0.f; + + return vo; +} + +)"; + +const char * const DX11N2ColorShader = R"( +#define PI 3.1415926f + +#define LMODE_SINGLE_SIDED 0 +#define LMODE_DOUBLE_SIDED 1 +#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2 +#define LMODE_SPECIAL_EFFECT 3 +#define LMODE_THIN_SURFACE 4 +#define LMODE_BUMP_MAP 5 + +#define ROUTING_BASEDIFF_BASESPEC_ADD 0 +#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1 +#define ROUTING_OFFSDIFF_BASESPEC_ADD 2 +#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3 +#define ROUTING_ALPHADIFF_ADD 4 +#define ROUTING_ALPHAATTEN_ADD 5 +#define ROUTING_FOGDIFF_ADD 6 +#define ROUTING_FOGATTENUATION_ADD 7 +#define ROUTING_BASEDIFF_BASESPEC_SUB 8 +#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9 +#define ROUTING_OFFSDIFF_BASESPEC_SUB 10 +#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11 +#define ROUTING_ALPHADIFF_SUB 12 +#define ROUTING_ALPHAATTEN_SUB 13 + +struct N2Light +{ + float4 color; + float4 direction; + float4 position; + int parallel; + int routing; + int dmode; + int smode; + int4 diffuse_specular; // diffuse0, diffuse1, specular0, specular1 + float attnDistA; + float attnDistB; + float attnAngleA; + float attnAngleB; + int distAttnMode; + int3 _pad; +}; + +cbuffer lightConstants : register(b2) +{ + N2Light lights[16]; + int lightCount; + float4 ambientBase[2]; + float4 ambientOffset[2]; + int4 ambientMaterial; // base0, base1, offset0, offset1 + int useBaseOver; + int bumpId0; + int bumpId1; +} + +void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, in float3 position, in float3 normal) +{ + if (constantColor[volIdx] == 1) + return; + float3 diffuse = float3(0.f, 0.f, 0.f); + float3 specular = float3(0.f, 0.f, 0.f); + float diffuseAlpha = 0.f; + float specularAlpha = 0.f; + + for (int i = 0; i < lightCount; i++) + { + N2Light light = lights[i]; + float3 lightDir; // direction to the light + float3 lightColor = light.color.rgb; + if (light.parallel == 1) + { + lightDir = normalize(light.direction.xyz); + } + else + { + lightDir = normalize(light.position.xyz - position); + if (light.attnDistA != 1.f || light.attnDistB != 0.f) + { + float distance = length(light.position.xyz - position); + if (light.distAttnMode == 0) + distance = 1.f / distance; + lightColor *= clamp(light.attnDistB * distance + light.attnDistA, 0.f, 1.f); + } + if (light.attnAngleA != 1.f || light.attnAngleB != 0.f) + { + float3 spotDir = light.direction.xyz; + float cosAngle = 1.f - max(0.f, dot(lightDir, spotDir)); + lightColor *= clamp(cosAngle * light.attnAngleB + light.attnAngleA, 0.f, 1.f); + } + } + int routing = light.routing; + if (light.diffuse_specular[volIdx] == 1) // If light contributes to diffuse + { + float factor; + switch (light.dmode) + { + case LMODE_SINGLE_SIDED: + factor = max(dot(normal, lightDir), 0.f); + break; + case LMODE_DOUBLE_SIDED: + factor = abs(dot(normal, lightDir)); + break; + case LMODE_SPECIAL_EFFECT: + default: + factor = 1.f; + break; + } + if (routing == ROUTING_ALPHADIFF_SUB) + diffuseAlpha -= lightColor.r * factor; + else if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) + diffuse += lightColor * factor; + if (routing == ROUTING_OFFSDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD) + specular += lightColor * factor; + } + if (light.diffuse_specular[2 + volIdx] == 1) // If light contributes to specular + { + float3 reflectDir = reflect(-lightDir, normal); + float factor; + switch (light.smode) + { + case LMODE_SINGLE_SIDED: + factor = clamp(pow(max(dot(normalize(-position), reflectDir), 0.f), glossCoef[volIdx]), 0.f, 1.f); + break; + case LMODE_DOUBLE_SIDED: + factor = clamp(pow(abs(dot(normalize(-position), reflectDir)), glossCoef[volIdx]), 0.f, 1.f); + break; + case LMODE_SPECIAL_EFFECT: + default: + factor = 1.f; + break; + } + if (routing == ROUTING_ALPHADIFF_SUB) + specularAlpha -= lightColor.r * factor; + else if (routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) + specular += lightColor * factor; + if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_BASESPEC_ADD) + diffuse += lightColor * factor; + } + } + // ambient with material + if (ambientMaterial[volIdx] == 1) + diffuse += ambientBase[volIdx].rgb; + if (ambientMaterial[volIdx + 2] == 1) + specular += ambientOffset[volIdx].rgb; + + if (model_diff_spec[volIdx] == 1) + baseCol.rgb *= diffuse; + if (model_diff_spec[volIdx + 2] == 1) + offsetCol.rgb *= specular; + + // ambient w/o material + if (ambientMaterial[volIdx] == 0 && model_diff_spec[volIdx] == 1) + baseCol.rgb += ambientBase[volIdx].rgb; + if (ambientMaterial[volIdx + 2] == 0 && model_diff_spec[volIdx + 2] == 1) + offsetCol.rgb += ambientOffset[volIdx].rgb; + + baseCol.a = max(0.f, baseCol.a + diffuseAlpha); + offsetCol.a = max(0.f, offsetCol.a + specularAlpha); + if (useBaseOver == 1) + { + float4 overflow = max(float4(0.f, 0.f, 0.f, 0.f), baseCol - float4(1.f, 1.f, 1.f, 1.f)); + offsetCol += overflow; + } +} + +void computeEnvMap(inout float2 uv, in float3 normal) +{ + // Cheap env mapping + uv += normal.xy / 2.f + 0.5f; + uv = clamp(uv, 0.f, 1.f); +} + +void computeBumpMap(inout float4 color0, in float4 color1, in float3 position, in float3 normal, in float4x4 normalMat) +{ + // TODO + //if (bumpId0 == -1) + return; + float3 tangent = color0.xyz; + if (tangent.x > 0.5f) + tangent.x -= 1.f; + if (tangent.y > 0.5f) + tangent.y -= 1.f; + if (tangent.z > 0.5f) + tangent.z -= 1.f; + tangent = normalize(mul(normalMat, float4(tangent, 0.f))).xyz; + float3 bitangent = color1.xyz; + if (bitangent.x > 0.5f) + bitangent.x -= 1.f; + if (bitangent.y > 0.5f) + bitangent.y -= 1.f; + if (bitangent.z > 0.5f) + bitangent.z -= 1.f; + bitangent = normalize(mul(normalMat, float4(bitangent, 0.f))).xyz; + + float scaleDegree = color0.w; + float scaleOffset = color1.w; + + N2Light light = lights[bumpId0]; + float3 lightDir; // direction to the light + if (light.parallel == 1) + lightDir = normalize(light.direction.xyz); + else + lightDir = normalize(light.position.xyz - position); + + float n = dot(lightDir, normal); + float cosQ = dot(lightDir, tangent); + float sinQ = dot(lightDir, bitangent); + + float sinT = clamp(n, 0.f, 1.f); + float k1 = 1.f - scaleDegree; + float k2 = scaleDegree * sinT; + float k3 = scaleDegree * sqrt(1.f - sinT * sinT); // cos T + + float q = acos(cosQ); + if (sinQ < 0.f) + q = 2.f * PI - q; + + color0.r = k2; + color0.g = k3; + color0.b = q / PI / 2.f; + color0.a = k1; +} + +)"; diff --git a/core/rend/dx11/dx11_naomi2.h b/core/rend/dx11/dx11_naomi2.h new file mode 100644 index 000000000..a4ad6f7ec --- /dev/null +++ b/core/rend/dx11/dx11_naomi2.h @@ -0,0 +1,220 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#pragma once +#include +#include +#include "windows/comptr.h" +#include "hw/pvr/ta_ctx.h" + +extern const char * const DX11N2VertexShader; +extern const char * const DX11N2ColorShader; + +struct N2PolyConstants +{ + float mvMat[4][4]; // 0 + float normalMat[4][4]; // 64 + float projMat[4][4]; // 128 + int envMapping[2]; // 192 + int bumpMapping; // 200 + int polyNumber; // 204 + + float glossCoef[4]; // 208 + int constantColor[4]; // 224 + // int4 model_diff_spec + int modelDiffuse[2]; // 240 + int modelSpecular[2]; // 248 + // 256 +}; +static_assert(sizeof(N2PolyConstants) == 256, "sizeof(N2PolyConstants) should be 256"); + +struct DX11N2Light +{ + float color[4]; // 0 + float direction[4]; // 16 + float position[4]; // 32 + int parallel; // 48 + int routing; // 52 + int dmode; // 56 + int smode; // 60 + // int4 diffuse_specular + int diffuse[2]; // 64 + int specular[2]; // 72 + float attnDistA; // 80 + float attnDistB; // 84 + float attnAngleA; // 88 + float attnAngleB; // 92 + int distAttnMode; // 96 + int _pad[3]; + // 112 +}; +static_assert(sizeof(DX11N2Light) == 112, "sizeof(DX11N2Light) should be 112"); + +struct N2LightConstants +{ + DX11N2Light lights[16]; // 0 + int lightCount; // 1792 + int _pad0[3]; + float ambientBase[2][4]; // 1808 + float ambientOffset[2][4]; // 1840 + // int4 ambientMaterial + int ambientMaterialBase[2]; // 1872 + int ambientMaterialOffset[2]; // 1880 + int useBaseOver; // 1888 + int bumpId1; // 1892 + int bumpId2; // 1896 + int _pad3; // 1900 + // 1904 +}; +static_assert(sizeof(N2LightConstants) == 1904, "sizeof(N2LightConstants) should be 1904"); + +class Naomi2Helper +{ +public: + void init(ComPtr& device, ComPtr deviceContext) + { + this->deviceContext = deviceContext; + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(N2PolyConstants); + desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + if (FAILED(device->CreateBuffer(&desc, nullptr, &polyConstantsBuffer.get()))) + WARN_LOG(RENDERER, "Per-polygon constants buffer creation failed"); + + desc.ByteWidth = sizeof(N2LightConstants); + desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; + if (FAILED(device->CreateBuffer(&desc, nullptr, &lightConstantsBuffer.get()))) + WARN_LOG(RENDERER, "Light constants buffer creation failed"); + resetCache(); + } + + void term() + { + polyConstantsBuffer.reset(); + lightConstantsBuffer.reset(); + deviceContext.reset(); + } + + void setConstants(const PolyParam& pp, u32 polyNumber) + { + N2PolyConstants polyConstants; + memcpy(polyConstants.mvMat, pp.mvMatrix, sizeof(polyConstants.mvMat)); + memcpy(polyConstants.normalMat, pp.normalMatrix, sizeof(polyConstants.normalMat)); + memcpy(polyConstants.projMat, pp.projMatrix, sizeof(polyConstants.projMat)); + polyConstants.envMapping[0] = pp.envMapping[0]; + polyConstants.envMapping[1] = pp.envMapping[1]; + polyConstants.bumpMapping = pp.pcw.Texture == 1 && pp.tcw.PixelFmt == PixelBumpMap; + polyConstants.polyNumber = polyNumber; + for (size_t i = 0; i < 2; i++) + { + polyConstants.glossCoef[i] = pp.glossCoef[i]; + polyConstants.constantColor[i] = pp.constantColor[i]; + polyConstants.modelDiffuse[i] = pp.diffuseColor[i]; + polyConstants.modelSpecular[i] = pp.specularColor[i]; + } + setConstBuffer(polyConstantsBuffer, polyConstants); + deviceContext->VSSetConstantBuffers(1, 1, &polyConstantsBuffer.get()); + + if (pp.lightModel != lastModel) + { + lastModel = pp.lightModel; + N2LightConstants lightConstants{}; + if (pp.lightModel != nullptr) + { + const N2LightModel& lights = *pp.lightModel; + lightConstants.lightCount = lights.lightCount; + for (int i = 0; i < lights.lightCount; i++) + { + DX11N2Light& light = lightConstants.lights[i]; + memcpy(light.color, lights.lights[i].color, sizeof(light.color)); + memcpy(light.direction, lights.lights[i].direction, sizeof(light.direction)); + memcpy(light.position, lights.lights[i].position, sizeof(light.position)); + light.parallel = lights.lights[i].parallel; + light.routing = lights.lights[i].routing; + light.dmode = lights.lights[i].dmode; + light.smode = lights.lights[i].smode; + memcpy(light.diffuse, lights.lights[i].diffuse, sizeof(light.diffuse)); + memcpy(light.specular, lights.lights[i].specular, sizeof(light.specular)); + light.attnDistA = lights.lights[i].attnDistA; + light.attnDistB = lights.lights[i].attnDistB; + light.attnAngleA = lights.lights[i].attnAngleA; + light.attnAngleB = lights.lights[i].attnAngleB; + light.distAttnMode = lights.lights[i].distAttnMode; + } + memcpy(lightConstants.ambientBase, lights.ambientBase, sizeof(lightConstants.ambientBase)); + memcpy(lightConstants.ambientOffset, lights.ambientOffset, sizeof(lightConstants.ambientOffset)); + for (int i = 0; i < 2; i++) + { + lightConstants.ambientMaterialBase[i] = lights.ambientMaterialBase[i]; + lightConstants.ambientMaterialOffset[i] = lights.ambientMaterialOffset[i]; + } + lightConstants.useBaseOver = lights.useBaseOver; + lightConstants.bumpId1 = lights.bumpId1; + lightConstants.bumpId2 = lights.bumpId2; + } + else + { + lightConstants.lightCount = 0; + float white[] { 1.f, 1.f, 1.f, 1.f }; + float black[4]{}; + for (int vol = 0; vol < 2; vol++) + { + lightConstants.ambientMaterialBase[vol] = 0; + lightConstants.ambientMaterialOffset[vol] = 0; + memcpy(lightConstants.ambientBase[vol], white, sizeof(white)); + memcpy(lightConstants.ambientOffset[vol], black, sizeof(black)); + } + lightConstants.useBaseOver = 0; + lightConstants.bumpId1 = -1; + lightConstants.bumpId2 = -1; + } + setConstBuffer(lightConstantsBuffer, lightConstants); + deviceContext->VSSetConstantBuffers(2, 1, &lightConstantsBuffer.get()); + } + } + + void setConstants(const float *mvMatrix, const float *projMatrix) + { + N2PolyConstants polyConstants; + memcpy(polyConstants.mvMat, mvMatrix, sizeof(polyConstants.mvMat)); + memcpy(polyConstants.projMat, projMatrix, sizeof(polyConstants.projMat)); + setConstBuffer(polyConstantsBuffer, polyConstants); + deviceContext->VSSetConstantBuffers(1, 1, &polyConstantsBuffer.get()); + } + + void resetCache() { + lastModel = (N2LightModel *)1; + } + +private: + template + void setConstBuffer(const ComPtr& buffer, const T& data) + { + D3D11_MAPPED_SUBRESOURCE mappedSubres; + deviceContext->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, &data, sizeof(T)); + deviceContext->Unmap(buffer, 0); + } + + ComPtr deviceContext; + ComPtr polyConstantsBuffer; + ComPtr lightConstantsBuffer; + const N2LightModel *lastModel; +}; diff --git a/core/rend/dx11/dx11_renderer.cpp b/core/rend/dx11/dx11_renderer.cpp index 75d430d1f..1eb988083 100644 --- a/core/rend/dx11/dx11_renderer.cpp +++ b/core/rend/dx11/dx11_renderer.cpp @@ -29,6 +29,7 @@ const D3D11_INPUT_ELEMENT_DESC MainLayout[] { "COLOR", 0, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "COLOR", 1, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, nx), D3D11_INPUT_PER_VERTEX_DATA, 0 }, }; const D3D11_INPUT_ELEMENT_DESC ModVolLayout[] { @@ -43,7 +44,7 @@ bool DX11Renderer::Init() shaders = &theDX11Context.getShaders(); samplers = &theDX11Context.getSamplers(); - bool success = (bool)shaders->getVertexShader(true); + bool success = (bool)shaders->getVertexShader(true, true); ComPtr blob = shaders->getVertexShaderBlob(); success = success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get())); blob = shaders->getMVVertexShaderBlob(); @@ -146,6 +147,7 @@ bool DX11Renderer::Init() quad = std::unique_ptr(new Quad()); quad->init(device, deviceContext, shaders); + n2Helper.init(device, deviceContext); fog_needs_update = true; forcePaletteUpdate(); @@ -163,6 +165,7 @@ bool DX11Renderer::Init() void DX11Renderer::Term() { NOTICE_LOG(RENDERER, "DX11 renderer terminating"); + n2Helper.term(); vtxConstants.reset(); pxlConstants.reset(); fbTex.reset(); @@ -310,7 +313,12 @@ bool DX11Renderer::Process(TA_context* ctx) } else { - if (!ta_parse_vdrc(ctx)) + bool success; + if (settings.platform.isNaomi2()) + success = ta_parse_naomi2(ctx); + else + success = ta_parse_vdrc(ctx); + if (!success) return false; } @@ -319,7 +327,7 @@ bool DX11Renderer::Process(TA_context* ctx) // // Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire -// Journal of Graphics GPU and Game Tools � November 2011 +// Journal of Graphics GPU and Game Tools - November 2011 // static glm::vec3 intersect(const glm::vec3& A, float Adist , const glm::vec3& B, float Bdist) { @@ -523,21 +531,24 @@ void DX11Renderer::uploadGeometryBuffers() { const ModTriangle *data = nullptr; u32 size = 0; -#if 1 - // clip triangles - std::vector modVolTriangles; - modVolTriangles.reserve(pvrrc.modtrig.used()); - clipModVols(pvrrc.global_param_mvo, modVolTriangles); - clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles); - if (!modVolTriangles.empty()) + if (!settings.platform.isNaomi2()) // TODO for naomi2 as well? { - size = (u32)(modVolTriangles.size() * sizeof(ModTriangle)); - data = modVolTriangles.data(); + // clip triangles + std::vector modVolTriangles; + modVolTriangles.reserve(pvrrc.modtrig.used()); + clipModVols(pvrrc.global_param_mvo, modVolTriangles); + clipModVols(pvrrc.global_param_mvo_tr, modVolTriangles); + if (!modVolTriangles.empty()) + { + size = (u32)(modVolTriangles.size() * sizeof(ModTriangle)); + data = modVolTriangles.data(); + } + } + else + { + size = pvrrc.modtrig.bytes(); + data = pvrrc.modtrig.head(); } -#else - size = pvrrc.modtrig.bytes(); - data = pvrrc.modtrig.head(); -#endif if (size > 0) { verify(ensureBufferSize(modvolBuffer, D3D11_BIND_VERTEX_BUFFER, modvolBufferSize, size)); @@ -598,6 +609,7 @@ bool DX11Renderer::Render() if (!pvrrc.isRenderFramebuffer) { + n2Helper.resetCache(); uploadGeometryBuffers(); updateFogTexture(); @@ -745,7 +757,7 @@ void DX11Renderer::setRenderState(const PolyParam *gp) DX11Texture *texture = (DX11Texture *)gp->texture; bool gpuPalette = texture != nullptr ? texture->gpuPalette : false; - ComPtr vertexShader = shaders->getVertexShader(gp->pcw.Gouraud); + ComPtr vertexShader = shaders->getVertexShader(gp->pcw.Gouraud, gp->isNaomi2()); deviceContext->VSSetShader(vertexShader, nullptr, 0); ComPtr pixelShader = shaders->getShader( gp->pcw.Texture, @@ -819,7 +831,7 @@ void DX11Renderer::setRenderState(const PolyParam *gp) zfunc = gp->isp.DepthMode; bool zwriteEnable; - if (SortingEnabled && !config::PerStripSorting) + if (SortingEnabled /* && !config::PerStripSorting */) zwriteEnable = false; else { @@ -832,6 +844,9 @@ void DX11Renderer::setRenderState(const PolyParam *gp) } const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0; deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, config::ModifierVolumes), stencil); + + if (gp->isNaomi2()) + n2Helper.setConstants(*gp, 0); // poly number only used in OIT } template @@ -900,7 +915,7 @@ void DX11Renderer::drawSorted(bool multipass) // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), nullptr, 0xffffffff); - ComPtr vertexShader = shaders->getVertexShader(true); + ComPtr vertexShader = shaders->getVertexShader(true, settings.platform.isNaomi2()); deviceContext->VSSetShader(vertexShader, nullptr, 0); ComPtr pixelShader = shaders->getShader( false, @@ -949,7 +964,6 @@ void DX11Renderer::drawModVols(int first, int count) deviceContext->OMSetBlendState(blendStates.getState(false, 0, 0, true), nullptr, 0xffffffff); - deviceContext->VSSetShader(shaders->getMVVertexShader(), nullptr, 0); deviceContext->PSSetShader(shaders->getModVolShader(), nullptr, 0); deviceContext->RSSetScissorRects(1, &scissorRect); @@ -958,6 +972,8 @@ void DX11Renderer::drawModVols(int first, int count) ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first]; int mod_base = -1; + const float *curMVMat = nullptr; + const float *curProjMat = nullptr; for (int cmv = 0; cmv < count; cmv++) { @@ -968,6 +984,13 @@ void DX11Renderer::drawModVols(int first, int count) if (mod_base == -1) mod_base = param.first; + if (param.isNaomi2() && (param.mvMatrix != curMVMat || param.projMatrix != curProjMat)) + { + curMVMat = param.mvMatrix; + curProjMat = param.projMatrix; + n2Helper.setConstants(param.mvMatrix, param.projMatrix); + } + deviceContext->VSSetShader(shaders->getMVVertexShader(param.isNaomi2()), nullptr, 0); if (!param.isp.VolumeLast && mv_mode > 0) // OR'ing (open volume or quad) deviceContext->OMSetDepthStencilState(depthStencilStates.getMVState(DepthStencilStates::Or), 2); @@ -1005,6 +1028,8 @@ void DX11Renderer::drawModVols(int first, int count) deviceContext->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R32_UINT, 0); deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + // Use the background poly as a quad + deviceContext->VSSetShader(shaders->getMVVertexShader(false), nullptr, 0); deviceContext->DrawIndexed(4, 0, 0); } diff --git a/core/rend/dx11/dx11_renderer.h b/core/rend/dx11/dx11_renderer.h index 4f1c3f6f8..a96c7f073 100644 --- a/core/rend/dx11/dx11_renderer.h +++ b/core/rend/dx11/dx11_renderer.h @@ -27,6 +27,7 @@ #include "dx11_shaders.h" #include "rend/sorter.h" #include "dx11_renderstate.h" +#include "dx11_naomi2.h" struct DX11Renderer : public Renderer { @@ -112,6 +113,7 @@ protected: u32 height = 0; bool frameRendered = false; bool frameRenderedOnce = false; + Naomi2Helper n2Helper; private: void readDCFramebuffer(); diff --git a/core/rend/dx11/dx11_shaders.cpp b/core/rend/dx11/dx11_shaders.cpp index 8e055bc41..fe7037a28 100644 --- a/core/rend/dx11/dx11_shaders.cpp +++ b/core/rend/dx11/dx11_shaders.cpp @@ -19,6 +19,7 @@ #include "dx11_shaders.h" #include "dx11context.h" #include "stdclass.h" +#include "dx11_naomi2.h" #include const char * const VertexShader = R"( @@ -356,6 +357,9 @@ const char * const MacroValues[] { "0", "1", "2", "3" }; static D3D_SHADER_MACRO VertexMacros[] { { "pp_Gouraud", "1" }, + { "POSITION_ONLY", "0" }, + { "pp_TwoVolumes", "0" }, + { "LIGHT_ON", "1" }, { nullptr, nullptr } }; @@ -439,24 +443,48 @@ const ComPtr& DX11Shaders::getShader(bool pp_Texture, bool pp return shader; } -const ComPtr& DX11Shaders::getVertexShader(bool gouraud) +const ComPtr& DX11Shaders::getVertexShader(bool gouraud, bool naomi2) { - ComPtr& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader; + int index = (int)gouraud | ((int)naomi2 << 1); + ComPtr& vertexShader = vertexShaders[index]; if (!vertexShader) { VertexMacros[0].Definition = MacroValues[gouraud]; - vertexShader = compileVS(VertexShader, "main", VertexMacros); + if (!naomi2) + { + vertexShader = compileVS(VertexShader, "main", VertexMacros); + } + else + { + VertexMacros[1].Definition = MacroValues[false]; + VertexMacros[2].Definition = MacroValues[false]; + VertexMacros[3].Definition = MacroValues[true]; + std::string source(DX11N2VertexShader); + source += std::string("\n") + DX11N2ColorShader; + vertexShader = compileVS(source.c_str(), "main", VertexMacros); + } } return vertexShader; } -const ComPtr& DX11Shaders::getMVVertexShader() +const ComPtr& DX11Shaders::getMVVertexShader(bool naomi2) { - if (!modVolVertexShader) - modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr); + if (!modVolVertexShaders[naomi2]) + { + if (!naomi2) + modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr); + else + { + VertexMacros[0].Definition = MacroValues[false]; + VertexMacros[1].Definition = MacroValues[true]; + VertexMacros[2].Definition = MacroValues[false]; + VertexMacros[3].Definition = MacroValues[false]; + modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros); + } + } - return modVolVertexShader; + return modVolVertexShaders[naomi2]; } const ComPtr& DX11Shaders::getModVolShader() @@ -536,13 +564,22 @@ ComPtr DX11Shaders::compilePS(const char* source, const char* ComPtr DX11Shaders::getVertexShaderBlob() { - VertexMacros[0].Definition = MacroValues[0]; - return compileShader(VertexShader, "main", "vs_4_0", VertexMacros); + VertexMacros[0].Definition = MacroValues[true]; + // FIXME code dup + VertexMacros[1].Definition = MacroValues[false]; + VertexMacros[2].Definition = MacroValues[false]; + std::string source(DX11N2VertexShader); + source += std::string("\n") + DX11N2ColorShader; + return compileShader(source.c_str(), "main", "vs_4_0", VertexMacros); } ComPtr DX11Shaders::getMVVertexShaderBlob() { - return compileShader(ModVolVertexShader, "main", "vs_4_0", nullptr); + // FIXME code dup + VertexMacros[0].Definition = MacroValues[false]; + VertexMacros[1].Definition = MacroValues[true]; + VertexMacros[2].Definition = MacroValues[false]; + return compileShader(DX11N2VertexShader, "main", "vs_4_0", VertexMacros); } ComPtr DX11Shaders::getQuadVertexShaderBlob() @@ -562,10 +599,11 @@ void DX11Shaders::term() { saveCache(CacheFile); shaders.clear(); - gouraudVertexShader.reset(); - flatVertexShader.reset(); + for (auto& shader : vertexShaders) + shader.reset(); modVolShader.reset(); - modVolVertexShader.reset(); + for (auto& shader : modVolVertexShaders) + shader.reset(); quadVertexShader.reset(); quadRotateVertexShader.reset(); quadPixelShader.reset(); diff --git a/core/rend/dx11/dx11_shaders.h b/core/rend/dx11/dx11_shaders.h index 08ed32510..3a91ef2ad 100644 --- a/core/rend/dx11/dx11_shaders.h +++ b/core/rend/dx11/dx11_shaders.h @@ -53,9 +53,9 @@ public: const ComPtr& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool trilinear, bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix); - const ComPtr& getVertexShader(bool gouraud); + const ComPtr& getVertexShader(bool gouraud, bool naomi2); const ComPtr& getModVolShader(); - const ComPtr& getMVVertexShader(); + const ComPtr& getMVVertexShader(bool naomi2); const ComPtr& getQuadPixelShader(); const ComPtr& getQuadVertexShader(bool rotate); @@ -70,10 +70,9 @@ private: ComPtr device; std::unordered_map> shaders; - ComPtr gouraudVertexShader; - ComPtr flatVertexShader; + ComPtr vertexShaders[4]; ComPtr modVolShader; - ComPtr modVolVertexShader; + ComPtr modVolVertexShaders[2]; ComPtr quadPixelShader; ComPtr quadVertexShader; ComPtr quadRotateVertexShader; diff --git a/core/rend/dx11/oit/dx11_oitrenderer.cpp b/core/rend/dx11/oit/dx11_oitrenderer.cpp index 06d0d8c1c..f326740ed 100644 --- a/core/rend/dx11/oit/dx11_oitrenderer.cpp +++ b/core/rend/dx11/oit/dx11_oitrenderer.cpp @@ -40,6 +40,8 @@ const D3D11_INPUT_ELEMENT_DESC MainLayout[] { "COLOR", 2, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, col1), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "COLOR", 3, DXGI_FORMAT_B8G8R8A8_UNORM, 0, (UINT)offsetof(Vertex, spc1), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, (UINT)offsetof(Vertex, u1), D3D11_INPUT_PER_VERTEX_DATA, 0 }, + // Naomi 2 + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, (UINT)offsetof(Vertex, nx), D3D11_INPUT_PER_VERTEX_DATA, 0 }, }; struct DX11OITRenderer : public DX11Renderer @@ -51,7 +53,6 @@ struct DX11OITRenderer : public DX11Renderer int blend_mode1[2]; float paletteIndex; float trilinearAlpha; - int pp_Number; // two volume mode int shading_instr0; @@ -81,7 +82,19 @@ struct DX11OITRenderer : public DX11Renderer buffers.init(device, deviceContext); ComPtr blob = shaders.getVertexShaderBlob(); mainInputLayout.reset(); - return success && SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get())); + success = SUCCEEDED(device->CreateInputLayout(MainLayout, ARRAY_SIZE(MainLayout), blob->GetBufferPointer(), blob->GetBufferSize(), &mainInputLayout.get())) && success; + + blob = shaders.getFinalVertexShaderBlob(); + success = SUCCEEDED(device->CreateInputLayout(MainLayout, 0, blob->GetBufferPointer(), blob->GetBufferSize(), &finalInputLayout.get())) && success; + + desc.ByteWidth = sizeof(int); + desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + success = SUCCEEDED(device->CreateBuffer(&desc, nullptr, &vtxPolyConstants.get())) && success; + + return success; } void Resize(int w, int h) override { @@ -110,6 +123,9 @@ struct DX11OITRenderer : public DX11Renderer void Term() override { + vtxPolyConstants.reset(); + finalInputLayout.reset(); + mainInputLayout.reset(); opaqueTextureView.reset(); opaqueRenderTarget.reset(); opaqueTex.reset(); @@ -121,7 +137,7 @@ struct DX11OITRenderer : public DX11Renderer template void setRenderState(const PolyParam *gp, int polyNumber) { - ComPtr vertexShader = shaders.getVertexShader(gp->pcw.Gouraud); + ComPtr vertexShader = shaders.getVertexShader(gp->pcw.Gouraud, gp->isNaomi2(), false, pass != DX11OITShaders::Depth); deviceContext->VSSetShader(vertexShader, nullptr, 0); PixelPolyConstants constants; @@ -214,7 +230,6 @@ struct DX11OITRenderer : public DX11Renderer constants.clipTest[3] = (float)(clip_rect[1] + clip_rect[3]); } } - constants.pp_Number = polyNumber; constants.blend_mode0[0] = gp->tsp.SrcInstr; constants.blend_mode0[1] = gp->tsp.DstInstr; if (two_volumes_mode) @@ -235,6 +250,14 @@ struct DX11OITRenderer : public DX11Renderer memcpy(mappedSubres.pData, &constants, sizeof(constants)); deviceContext->Unmap(pxlPolyConstants, 0); + if (!gp->isNaomi2()) + { + deviceContext->Map(vtxPolyConstants, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedSubres); + memcpy(mappedSubres.pData, &polyNumber, sizeof(polyNumber)); + deviceContext->Unmap(vtxPolyConstants, 0); + deviceContext->VSSetConstantBuffers(1, 1, &vtxPolyConstants.get()); + } + if (pass == DX11OITShaders::Color) { // Apparently punch-through polys support blending, or at least some combinations @@ -280,6 +303,9 @@ struct DX11OITRenderer : public DX11Renderer bool needStencil = config::ModifierVolumes && pass == DX11OITShaders::Depth && Type != ListType_Translucent; const u32 stencil = (gp->pcw.Shadow != 0) ? 0x80 : 0; deviceContext->OMSetDepthStencilState(depthStencilStates.getState(true, zwriteEnable, zfunc, needStencil), stencil); + + if (gp->isNaomi2()) + n2Helper.setConstants(*gp, polyNumber); } template @@ -318,13 +344,14 @@ struct DX11OITRenderer : public DX11Renderer unsigned int offset = 0; deviceContext->IASetVertexBuffers(0, 1, &modvolBuffer.get(), &stride, &offset); deviceContext->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - deviceContext->VSSetShader(shaders.getMVVertexShader(), nullptr, 0); if (!Transparent) deviceContext->PSSetShader(shaders.getModVolShader(), nullptr, 0); deviceContext->RSSetScissorRects(1, &scissorRect); ModifierVolumeParam* params = Transparent ? &pvrrc.global_param_mvo_tr.head()[first] : &pvrrc.global_param_mvo.head()[first]; int mod_base = -1; + const float *curMVMat = nullptr; + const float *curProjMat = nullptr; for (int cmv = 0; cmv < count; cmv++) { @@ -337,6 +364,13 @@ struct DX11OITRenderer : public DX11Renderer if (param.count > 0) { + if (param.isNaomi2() && (param.mvMatrix != curMVMat || param.projMatrix != curProjMat)) + { + curMVMat = param.mvMatrix; + curProjMat = param.projMatrix; + n2Helper.setConstants(param.mvMatrix, param.projMatrix); + } + deviceContext->VSSetShader(shaders.getMVVertexShader(param.isNaomi2()), nullptr, 0); if (Transparent) { if (!param.isp.VolumeLast && mv_mode > 0) @@ -393,6 +427,7 @@ struct DX11OITRenderer : public DX11Renderer deviceContext->OMSetDepthStencilState(depthStencilStates.getState(false, false, 0, false), 0); setCullMode(0); + deviceContext->IASetInputLayout(finalInputLayout); deviceContext->VSSetShader(shaders.getFinalVertexShader(), nullptr, 0); deviceContext->PSSetShader(shaders.getFinalShader(), nullptr, 0); @@ -529,6 +564,7 @@ struct DX11OITRenderer : public DX11Renderer // renderABuffer(); deviceContext->PSSetShaderResources(0, 1, &p); + deviceContext->IASetInputLayout(mainInputLayout); // Clear the stencil from this pass deviceContext->ClearDepthStencilView(depthStencilView2, D3D11_CLEAR_STENCIL, 0.f, 0); @@ -559,6 +595,7 @@ struct DX11OITRenderer : public DX11Renderer if (!pvrrc.isRenderFramebuffer) { + n2Helper.resetCache(); uploadGeometryBuffers(); updateFogTexture(); @@ -609,6 +646,9 @@ private: ComPtr trPolyParamsBuffer; u32 trPolyParamsBufferSize = 0; ComPtr trPolyParamsBufferView; + ComPtr mainInputLayout; // FIXME + ComPtr finalInputLayout; + ComPtr vtxPolyConstants; }; Renderer *rend_OITDirectX11() diff --git a/core/rend/dx11/oit/dx11_oitshaders.cpp b/core/rend/dx11/oit/dx11_oitshaders.cpp index f4a8087cd..cdaf0ada1 100644 --- a/core/rend/dx11/oit/dx11_oitshaders.cpp +++ b/core/rend/dx11/oit/dx11_oitshaders.cpp @@ -18,6 +18,7 @@ */ #include "dx11_oitshaders.h" #include "../dx11context.h" +#include "../dx11_naomi2.h" const char * const VertexShader = R"( #if pp_Gouraud == 1 @@ -35,6 +36,8 @@ struct VertexIn float4 col1 : COLOR2; float4 spec1 : COLOR3; float2 uv1 : TEXCOORD1; + float3 normal: NORMAL; // unused + uint vertexId : SV_VertexID; }; struct VertexOut @@ -43,12 +46,13 @@ struct VertexOut float4 uv : TEXCOORD0; INTERPOLATION float4 col : COLOR0; INTERPOLATION float4 spec : COLOR1; - float4 uv1 : TEXCOORD1; + float2 uv1 : TEXCOORD1; INTERPOLATION float4 col1 : COLOR2; INTERPOLATION float4 spec1 : COLOR3; + nointerpolation uint index : BLENDINDICES0; }; -cbuffer constantBuffer : register(b0) +cbuffer shaderConstants : register(b0) { float4x4 transMatrix; float4 leftPlane; @@ -57,6 +61,11 @@ cbuffer constantBuffer : register(b0) float4 bottomPlane; }; +cbuffer polyConstants : register(b1) +{ + int polyNumber; +}; + [clipplanes(leftPlane, topPlane, rightPlane, bottomPlane)] VertexOut main(in VertexIn vin) { @@ -75,7 +84,8 @@ VertexOut main(in VertexIn vin) vo.spec1 = vin.spec1; #endif vo.uv = float4(vin.uv * vo.pos.z, 0.f, vo.pos.z); - vo.uv1 = float4(vin.uv1 * vo.pos.z, 0.f, 0.f); + vo.uv1 = vin.uv1 * vo.pos.z; + vo.index = (uint(polyNumber) << 18) + vin.vertexId; vo.pos.w = 1.f; vo.pos.z = 0.f; @@ -195,11 +205,16 @@ bool getShadowEnable(in PolyParam pp) return (pp.tsp_isp_pcw & 1) != 0; } -uint getPolyNumber(in Pixel pixel) +uint getPolyIndex(in Pixel pixel) { return pixel.seq_num & 0x3FFFFFFFu; } +uint getPolyNumber(in Pixel pixel) +{ + return (pixel.seq_num & 0x3FFFFFFFu) >> 18; +} + #define SHADOW_STENCIL 0x40000000u #define SHADOW_ACC 0x80000000u @@ -269,9 +284,10 @@ struct VertexIn float4 uv : TEXCOORD0; INTERPOLATION float4 col : COLOR0; INTERPOLATION float4 spec : COLOR1; - float4 uv1 : TEXCOORD1; + float2 uv1 : TEXCOORD1; INTERPOLATION float4 col1 : COLOR2; INTERPOLATION float4 spec1 : COLOR3; + nointerpolation uint index : BLENDINDICES0; }; Texture2D texture0 : register(t0); @@ -299,7 +315,6 @@ cbuffer polyConstantBuffer : register(b1) int2 blend_mode1; float paletteIndex; float trilinearAlpha; - int pp_Number; // two volume mode int shading_instr0; @@ -406,7 +421,7 @@ PSO main(in VertexIn inpix) float2 uv; #if pp_TwoVolumes == 1 if (area1) - uv = inpix.uv1.xy / inpix.uv.w; + uv = inpix.uv1 / inpix.uv.w; else #endif uv = inpix.uv.xy / inpix.uv.w; @@ -550,7 +565,7 @@ PSO main(in VertexIn inpix) Pixel pixel; pixel.color = packColors(clamp(color, 0.f, 1.f)); pixel.depth = inpix.uv.w; - pixel.seq_num = uint(pp_Number); + pixel.seq_num = inpix.index; InterlockedExchange(abufferPointers[coords], idx, pixel.next); Pixels[idx] = pixel; @@ -594,7 +609,7 @@ int fillAndSortFragmentArray(in uint2 coords, out uint pixel_list[MAX_PIXELS_PER uint jIdx = pixel_list[j]; while (j >= 0 && (Pixels[jIdx].depth > Pixels[idx].depth - || (Pixels[jIdx].depth == Pixels[idx].depth && getPolyNumber(Pixels[jIdx]) > getPolyNumber(Pixels[idx])))) + || (Pixels[jIdx].depth == Pixels[idx].depth && getPolyIndex(Pixels[jIdx]) > getPolyIndex(Pixels[idx])))) { pixel_list[j + 1] = pixel_list[j]; j--; @@ -819,6 +834,9 @@ const char * const MacroValues[] { "0", "1", "2", "3" }; static D3D_SHADER_MACRO VertexMacros[] { { "pp_Gouraud", "1" }, + { "POSITION_ONLY", "0" }, + { "pp_TwoVolumes", "0" }, + { "LIGHT_ON", "1" }, { nullptr, nullptr } }; @@ -907,24 +925,53 @@ const ComPtr& DX11OITShaders::getShader(bool pp_Texture, bool return shader; } -const ComPtr& DX11OITShaders::getVertexShader(bool gouraud) +const ComPtr& DX11OITShaders::getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes) { - ComPtr& vertexShader = gouraud ? gouraudVertexShader : flatVertexShader; - if (!vertexShader) + const u32 hash = (int)gouraud + | ((int)naomi2 << 1) + | ((int)positionOnly << 2) + | ((int)lightOn << 3) + | ((int)twoVolumes << 4); + auto& shader = vertexShaders[hash]; + if (shader == nullptr) { VertexMacros[0].Definition = MacroValues[gouraud]; - vertexShader = compileVS(VertexShader, "main", VertexMacros); + if (!naomi2) + { + shader = compileVS(VertexShader, "main", VertexMacros); + } + else + { + VertexMacros[1].Definition = MacroValues[positionOnly]; + VertexMacros[2].Definition = MacroValues[twoVolumes]; + VertexMacros[3].Definition = MacroValues[lightOn]; + std::string source(DX11N2VertexShader); + if (!positionOnly && lightOn) + source += std::string("\n") + DX11N2ColorShader; + shader = compileVS(source.c_str(), "main", VertexMacros); + } } - return vertexShader; + return shader; } -const ComPtr& DX11OITShaders::getMVVertexShader() +const ComPtr& DX11OITShaders::getMVVertexShader(bool naomi2) { - if (!modVolVertexShader) - modVolVertexShader = compileVS(ModVolVertexShader, "main", nullptr); + if (!modVolVertexShaders[naomi2]) + { + if (!naomi2) + modVolVertexShaders[0] = compileVS(ModVolVertexShader, "main", nullptr); + else + { + VertexMacros[0].Definition = MacroValues[false]; + VertexMacros[1].Definition = MacroValues[true]; + VertexMacros[2].Definition = MacroValues[false]; + VertexMacros[3].Definition = MacroValues[false]; + modVolVertexShaders[1] = compileVS(DX11N2VertexShader, "main", VertexMacros); + } + } - return modVolVertexShader; + return modVolVertexShaders[naomi2]; } const ComPtr& DX11OITShaders::getModVolShader() @@ -1021,13 +1068,27 @@ ComPtr DX11OITShaders::compilePS(const char* source, const ch ComPtr DX11OITShaders::getVertexShaderBlob() { - VertexMacros[0].Definition = MacroValues[0]; - return compileShader(VertexShader, "main", "vs_5_0", VertexMacros); + VertexMacros[0].Definition = MacroValues[true]; + // FIXME code dup + VertexMacros[1].Definition = MacroValues[false]; + VertexMacros[2].Definition = MacroValues[true]; + std::string source(DX11N2VertexShader); + source += std::string("\n") + DX11N2ColorShader; + return compileShader(source.c_str(), "main", "vs_5_0", VertexMacros); } ComPtr DX11OITShaders::getMVVertexShaderBlob() { - return compileShader(ModVolVertexShader, "main", "vs_5_0", nullptr); + // FIXME code dup + VertexMacros[0].Definition = MacroValues[false]; + VertexMacros[1].Definition = MacroValues[true]; + VertexMacros[2].Definition = MacroValues[false]; + return compileShader(DX11N2VertexShader, "main", "vs_5_0", VertexMacros); +} + +ComPtr DX11OITShaders::getFinalVertexShaderBlob() +{ + return compileShader(OITFinalVertexShaderSource, "main", "vs_5_0", nullptr); } void DX11OITShaders::init(const ComPtr& device, pD3DCompile D3DCompile) diff --git a/core/rend/dx11/oit/dx11_oitshaders.h b/core/rend/dx11/oit/dx11_oitshaders.h index fc772a4c4..1e0497a6a 100644 --- a/core/rend/dx11/oit/dx11_oitshaders.h +++ b/core/rend/dx11/oit/dx11_oitshaders.h @@ -35,9 +35,9 @@ public: const ComPtr& getShader(bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_BumpMap, bool fog_clamping, bool palette, bool gouraud, bool alphaTest, bool clipInside, bool nearestWrapFix, bool twoVolumes, Pass pass); - const ComPtr& getVertexShader(bool gouraud); + const ComPtr& getVertexShader(bool gouraud, bool naomi2, bool positionOnly, bool lightOn, bool twoVolumes = true); const ComPtr& getModVolShader(); - const ComPtr& getMVVertexShader(); + const ComPtr& getMVVertexShader(bool naomi2); const ComPtr& getFinalShader(); const ComPtr& getTrModVolShader(int type); const ComPtr& getFinalVertexShader(); @@ -46,10 +46,10 @@ public: { saveCache(CacheFile); shaders.clear(); - gouraudVertexShader.reset(); - flatVertexShader.reset(); + vertexShaders.clear(); modVolShader.reset(); - modVolVertexShader.reset(); + for (auto& shader : modVolVertexShaders) + shader.reset(); for (auto& shader : trModVolShaders) shader.reset(); finalShader.reset(); @@ -59,6 +59,7 @@ public: } ComPtr getVertexShaderBlob(); ComPtr getMVVertexShaderBlob(); + ComPtr getFinalVertexShaderBlob(); private: ComPtr compileShader(const char *source, const char* function, const char* profile, const D3D_SHADER_MACRO *pDefines); @@ -67,10 +68,9 @@ private: ComPtr device; std::unordered_map> shaders; - ComPtr gouraudVertexShader; - ComPtr flatVertexShader; + std::unordered_map> vertexShaders; ComPtr modVolShader; - ComPtr modVolVertexShader; + ComPtr modVolVertexShaders[2]; ComPtr trModVolShaders[4]; ComPtr finalShader; diff --git a/core/rend/gl4/gl4naomi2.cpp b/core/rend/gl4/gl4naomi2.cpp index 6fbe39603..9d36dfab8 100644 --- a/core/rend/gl4/gl4naomi2.cpp +++ b/core/rend/gl4/gl4naomi2.cpp @@ -34,14 +34,14 @@ N2Vertex4Source::N2Vertex4Source(const gl4PipelineShader* shader) : OpenGl4Sourc { if (shader == nullptr) { - addConstant("GEOM_ONLY", 1); + addConstant("POSITION_ONLY", 1); addConstant("pp_TwoVolumes", 0); addConstant("pp_Gouraud", 0); addConstant("pp_Texture", 0); } else { - addConstant("GEOM_ONLY", shader->pass == Pass::Depth); // geometry only for depth pass + addConstant("POSITION_ONLY", shader->pass == Pass::Depth); // geometry only for depth pass addConstant("pp_TwoVolumes", shader->pp_TwoVolumes || shader->pp_BumpMap); addConstant("pp_Gouraud", shader->pp_Gouraud); addConstant("pp_Texture", shader->pp_Texture); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index ac8374f32..c667ef557 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -233,7 +233,7 @@ __forceinline glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); } - if (SortingEnabled && !config::PerStripSorting) + if (SortingEnabled /* && !config::PerStripSorting */) // Looks glitchy too but less missing graphics (but wrong depth order...) glcache.DepthMask(GL_FALSE); else { diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp index a9588b22b..e81cab27e 100644 --- a/core/rend/gles/naomi2.cpp +++ b/core/rend/gles/naomi2.cpp @@ -30,7 +30,7 @@ uniform int pp_Number; // Vertex input in vec3 in_pos; -#if GEOM_ONLY == 0 +#if POSITION_ONLY == 0 in vec4 in_base; in vec4 in_offs; in vec2 in_uv; @@ -56,7 +56,7 @@ void wDivide(inout vec4 vpos) { vpos = vec4(vpos.xy / vpos.w, 1.0 / vpos.w, 1.0); vpos = ndcMat * vpos; -#if GEOM_ONLY == 1 +#if POSITION_ONLY == 1 vtx_uv = vec3(0.0, 0.0, vpos.z); #else #if pp_Gouraud == 1 @@ -79,7 +79,7 @@ void wDivide(inout vec4 vpos) void main() { vec4 vpos = mvMat * vec4(in_pos, 1.0); -#if GEOM_ONLY == 0 +#if POSITION_ONLY == 0 vtx_base = in_base; vtx_offs = in_offs; vec4 vnorm = normalize(normalMat * vec4(in_normal, 0.0)); @@ -203,7 +203,7 @@ void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in int volIdx, in v else { lightDir = normalize(light.position.xyz - position); - if (light.attnDistA != 1.0 && light.attnDistB != 0.0) + if (light.attnDistA != 1.0 || light.attnDistB != 0.0) { float distance = length(light.position.xyz - position); if (light.distAttnMode == 0) @@ -359,7 +359,7 @@ void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3 N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly, bool texture) : OpenGlSource() { addConstant("pp_Gouraud", gouraud); - addConstant("GEOM_ONLY", geometryOnly); + addConstant("POSITION_ONLY", geometryOnly); addConstant("pp_TwoVolumes", 0); addConstant("pp_Texture", (int)texture); diff --git a/core/rend/gles/naomi2.h b/core/rend/gles/naomi2.h index a277b33e3..ba697b131 100644 --- a/core/rend/gles/naomi2.h +++ b/core/rend/gles/naomi2.h @@ -192,6 +192,8 @@ void setN2Uniforms(const PolyParam *pp, ShaderType *shader) } glUniform1i(shader->useBaseOver, 0); glUniform1i(shader->lightCount, 0); + glUniform1i(shader->bumpId0, -1); + glUniform1i(shader->bumpId1, -1); } } glUniform1i(shader->bumpMapping, pp->pcw.Texture == 1 && pp->tcw.PixelFmt == PixelBumpMap); diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp index a92eb26dc..29172033c 100644 --- a/core/rend/sorter.cpp +++ b/core/rend/sorter.cpp @@ -42,10 +42,10 @@ static bool operator<(const PolyParam& left, const PolyParam& right) return left.zvZ < right.zvZ; } -static float getProjectedZ(const Vertex *v, const glm::mat4& mat) +static float getProjectedZ(const Vertex *v, const float *mat) { - // 1 / w - return 1 / mat[0][3] * v->x + mat[1][3] * v->y + mat[2][3] * v->z + mat[3][3]; + // -1 / z + return -1 / (mat[2] * v->x + mat[1 * 4 + 2] * v->y + mat[2 * 4 + 2] * v->z + mat[3 * 4 + 2]); } void SortPParams(int first, int count) @@ -178,16 +178,12 @@ void GenSorted(int first, int count, std::vector& pidx_sort, { const u32 *idx = idx_base + pp->first; u32 flip = 0; - glm::mat4 mat; - float z0, z1; + float z0 = 0, z1 = 0; if (pp->isNaomi2()) { - mat = glm::make_mat4(pp->projMatrix); - if (pp->mvMatrix != nullptr) - mat *= glm::make_mat4(pp->mvMatrix); - z0 = getProjectedZ(vtx_base + idx[0], mat); - z1 = getProjectedZ(vtx_base + idx[1], mat); + z0 = getProjectedZ(vtx_base + idx[0], pp->mvMatrix); + z1 = getProjectedZ(vtx_base + idx[1], pp->mvMatrix); } for (u32 i = 0; i < pp->count - 2; i++) { @@ -207,7 +203,7 @@ void GenSorted(int first, int count, std::vector& pidx_sort, lst[pfsti].pid = ppid; if (pp->isNaomi2()) { - float z2 = getProjectedZ(v2, mat); + float z2 = getProjectedZ(v2, pp->mvMatrix); lst[pfsti].z = std::min(z0, std::min(z1, z2)); z0 = z1; z1 = z2;