From 56768f2d3df5036fa3e7e3f36e989345566cf687 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 16 Mar 2022 17:28:45 +0100 Subject: [PATCH] naomi2: use TA parser. Use N2light directly with vk, dx11 Use TA parser to handle list type and clipping for Naomi 2 polys. Pass all pass-throuch TA data to TA parser but stop on naomi2 command. Set default projection matrix instead of identity (fixes Area conquered screen invisible in initd). Change N2Light and N2LightModel so they are usable in GLSL (vulkan) and HLSL (dx11). TA parser throws exception for unhandled param. ta_vtx.cpp refactoring. Support for subtractive lights (diffuse+specular). Used in some clubk circuits. Fix vulkan crash when direct FB render. --- core/emulator.cpp | 5 +- core/hw/pvr/elan.cpp | 223 ++++---- core/hw/pvr/elan_struct.h | 3 +- core/hw/pvr/pvr.cpp | 1 + core/hw/pvr/ta.cpp | 202 ++++++- core/hw/pvr/ta.h | 5 + core/hw/pvr/ta_ctx.h | 35 +- core/hw/pvr/ta_vtx.cpp | 855 ++++++++++------------------ core/rend/dx11/dx11_naomi2.cpp | 153 ++--- core/rend/gles/gles.cpp | 17 +- core/rend/gles/naomi2.cpp | 67 +-- core/rend/vulkan/drawer.cpp | 3 +- core/rend/vulkan/drawer.h | 50 +- core/rend/vulkan/oit/oit_drawer.cpp | 3 +- core/rend/vulkan/oit/oit_pipeline.h | 4 +- core/rend/vulkan/pipeline.h | 4 +- core/rend/vulkan/shaders.cpp | 59 +- core/rend/vulkan/shaders.h | 38 -- core/rend/vulkan/vulkan_renderer.h | 17 +- 19 files changed, 789 insertions(+), 955 deletions(-) diff --git a/core/emulator.cpp b/core/emulator.cpp index 8348edf90..59f2c8e0e 100644 --- a/core/emulator.cpp +++ b/core/emulator.cpp @@ -458,10 +458,13 @@ void Emulator::loadGame(const char *path, LoadProgress *progress) mem_map_default(); config::Settings::instance().reset(); - dc_reset(true); config::Settings::instance().load(false); + dc_reset(true); memset(&settings.network.md5, 0, sizeof(settings.network.md5)); + if (settings.platform.isNaomi2() && config::RendererType == RenderType::DirectX9) + throw FlycastException("DirectX 9 doesn't support Naomi 2 games. Select a different graphics API"); + if (settings.platform.isConsole()) { if (settings.content.path.empty()) diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index eba65d43b..d5ba43e47 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -270,11 +270,9 @@ struct State { static constexpr u32 Null = 0xffffffff; - int listType = -1; u32 gmp = Null; u32 instance = Null; u32 projMatrix = Null; - u32 tileclip = 0; u32 lightModel = Null; u32 lights[MAX_LIGHTS] = { Null, Null, Null, Null, Null, Null, Null, Null, @@ -286,11 +284,9 @@ struct State void reset() { - listType = -1; gmp = Null; instance = Null; projMatrix = Null; - tileclip = 0; lightModel = Null; for (auto& light : lights) light = Null; @@ -470,21 +466,6 @@ struct State elan::curLights[lightId] = plight; } - void setClipMode(PCW pcw) - { - tileclip = (tileclip & ~0xF0000000) | (pcw.userClip << 28); - } - - void setClipTiles(u32 xmin, u32 ymin, u32 xmax, u32 ymax) - { - u32 t = tileclip & 0xF0000000; - t |= xmin & 0x3f; // 6 bits - t |= (xmax & 0x3f) << 6; // 6 bits - t |= (ymin & 0x1f) << 12; // 5 bits - t |= (ymax & 0x1f) << 17; // 5 bits - tileclip = t; - } - void update() { updateMatrix(); @@ -505,11 +486,11 @@ struct State void serialize(Serializer& ser) { - ser << listType; + ser << ta_get_list_type(); ser << gmp; ser << instance; ser << projMatrix; - ser << tileclip; + ser << ta_get_tileclip(); ser << lightModel; ser << lights; } @@ -521,11 +502,15 @@ struct State reset(); return; } + u32 listType; deser >> listType; + ta_set_list_type(listType); deser >> gmp; deser >> instance; deser >> projMatrix; + u32 tileclip; deser >> tileclip; + ta_set_tileclip(tileclip); deser >> lightModel; deser >> lights; update(); @@ -592,12 +577,18 @@ static void setNormal(Vertex& vd, const T& vs) vd.nz = normal.z; } -static void addModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1) +static void setModelColors(glm::vec4& baseCol0, glm::vec4& offsetCol0, glm::vec4& baseCol1, glm::vec4& offsetCol1) { - baseCol0 += gmpDiffuseColor0; - offsetCol0 += gmpSpecularColor0; - baseCol1 += gmpDiffuseColor1; - offsetCol1 += gmpSpecularColor1; + if (curGmp == nullptr) + return; + if (curGmp->paramSelect.d0) + baseCol0 = gmpDiffuseColor0; + if (curGmp->paramSelect.s0) + offsetCol0 = gmpSpecularColor0; + if (curGmp->paramSelect.d1) + baseCol1 = gmpDiffuseColor1; + if (curGmp->paramSelect.s1) + offsetCol1 = gmpSpecularColor1; } template @@ -609,11 +600,11 @@ void convertVertex(const N2_VERTEX& vs, Vertex& vd) setCoords(vd, vs.x, vs.y, vs.z); setNormal(vd, vs); SetEnvMapUV(vd); - glm::vec4 baseCol0(0); + glm::vec4 baseCol0(1); glm::vec4 offsetCol0(0); - glm::vec4 baseCol1(0); + glm::vec4 baseCol1(1); glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + setModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.spc = packColor(offsetCol0); @@ -631,7 +622,7 @@ void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd) glm::vec4 offsetCol0(0); glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1); glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + setModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.spc = packColor(offsetCol0); *(u32 *)vd.col1 = packColor(baseCol1); @@ -644,11 +635,11 @@ void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd) setCoords(vd, vs.x, vs.y, vs.z); setNormal(vd, vs); setUV(vs, vd); - glm::vec4 baseCol0(0); + glm::vec4 baseCol0(1); glm::vec4 offsetCol0(0); - glm::vec4 baseCol1(0); + glm::vec4 baseCol1(1); glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + setModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.spc = packColor(offsetCol0); *(u32 *)vd.col1 = packColor(baseCol1); @@ -665,7 +656,7 @@ void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd) glm::vec4 offsetCol0(0); glm::vec4 baseCol1 = unpackColor(vs.rgb.argb1); glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + setModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.spc = packColor(offsetCol0); *(u32 *)vd.col1 = packColor(baseCol1); @@ -678,11 +669,11 @@ void convertVertex(const N2_VERTEX_VUB& vs, Vertex& vd) setCoords(vd, vs.x, vs.y, vs.z); setNormal(vd, vs); setUV(vs, vd); - glm::vec4 baseCol0(0); + glm::vec4 baseCol0(1); glm::vec4 offsetCol0(0); - glm::vec4 baseCol1(0); + glm::vec4 baseCol1(1); glm::vec4 offsetCol1(0); - addModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); + setModelColors(baseCol0, offsetCol0, baseCol1, offsetCol1); *(u32 *)vd.col = packColor(baseCol0); *(u32 *)vd.col1 = packColor(baseCol1); // Stuff the bump map normals and parameters in the specular colors @@ -1101,7 +1092,7 @@ static void sendMVPolygon(ICHList *list, const T *vtx, bool needClipping) mvp.isp.DepthMode &= 3; mvp.mvMatrix = taMVMatrix; mvp.projMatrix = taProjMatrix; - ta_add_poly(state.listType, mvp); + ta_add_poly(list->pcw.listType, mvp); ModifierVolumeClipper clipper(needClipping); glm::vec3 vtx0{}; @@ -1162,11 +1153,11 @@ static void sendLights() model.lightCount = 0; if (curLightModel == nullptr) { - model.useBaseOver = false; + model.useBaseOver = 0; for (int i = 0; i < 2; i++) { - model.ambientMaterialBase[i] = false; - model.ambientMaterialOffset[i] = false; + model.ambientMaterialBase[i] = 0; + model.ambientMaterialOffset[i] = 0; model.ambientBase[i][0] = model.ambientBase[i][1] = model.ambientBase[i][2] = model.ambientBase[i][3] = 1.f; } memset(model.ambientOffset, 0, sizeof(model.ambientOffset)); @@ -1253,7 +1244,6 @@ static void sendLights() static void setStateParams(PolyParam& pp, const ICHList *list) { sendLights(); - pp.tileclip = state.tileclip; pp.mvMatrix = taMVMatrix; pp.normalMatrix = taNormalMatrix; pp.projMatrix = taProjMatrix; @@ -1310,7 +1300,7 @@ static void setStateParams(PolyParam& pp, const ICHList *list) pp.specularColor[1] = false; } // else if (pp.pcw.Volume == 1) -// printf("2-Volume poly listType %d vtxtype %x gmp params %x diff tcw %08x tsp %08x\n", state.listType, list->flags, curGmp->paramSelect.full, +// printf("2-Volume poly listType %d vtxtype %x gmp params %x diff tcw %08x tsp %08x\n", ta_get_list_type(), list->flags, curGmp->paramSelect.full, // pp.tcw.full ^ pp.tcw1.full, pp.tsp.full ^ pp.tsp1.full); } @@ -1325,7 +1315,10 @@ static void sendPolygon(ICHList *list) N2_VERTEX *vtx = (N2_VERTEX *)((u8 *)list + sizeof(ICHList)); if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) break; - if (state.listType & 1) + int listType = ta_get_list_type(); + if (listType == -1) + listType = list->pcw.listType; + if (listType & 1) sendMVPolygon(list, vtx, needClipping); else { @@ -1339,7 +1332,7 @@ static void sendPolygon(ICHList *list) pp.tsp = list->tsp0; pp.tsp1 = list->tsp1; setStateParams(pp, list); - ta_add_poly(state.listType, pp); + ta_add_poly(pp); sendVertices(list, vtx, needClipping); } @@ -1351,7 +1344,10 @@ static void sendPolygon(ICHList *list) N2_VERTEX_VU *vtx = (N2_VERTEX_VU *)((u8 *)list + sizeof(ICHList)); if (!isBetweenNearAndFar(vtx, list->vtxCount, needClipping)) break; - if (state.listType & 1) + int listType = ta_get_list_type(); + if (listType == -1) + listType = list->pcw.listType; + if (listType & 1) sendMVPolygon(list, vtx, needClipping); else { @@ -1367,7 +1363,7 @@ static void sendPolygon(ICHList *list) pp.tsp1 = list->tsp1; pp.tcw1 = list->tcw1; setStateParams(pp, list); - ta_add_poly(state.listType, pp); + ta_add_poly(pp); sendVertices(list, vtx, needClipping); } @@ -1391,7 +1387,7 @@ static void sendPolygon(ICHList *list) pp.tsp1 = list->tsp1; pp.tcw1 = list->tcw1; setStateParams(pp, list); - ta_add_poly(state.listType, pp); + ta_add_poly(pp); sendVertices(list, vtx, needClipping); } @@ -1412,7 +1408,7 @@ static void sendPolygon(ICHList *list) pp.tsp = list->tsp0; pp.tsp1 = list->tsp1; setStateParams(pp, list); - ta_add_poly(state.listType, pp); + ta_add_poly(pp); sendVertices(list, vtx, needClipping); } @@ -1437,7 +1433,7 @@ static void sendPolygon(ICHList *list) pp.tsp1 = list->tsp1; pp.tcw1 = list->tcw1; setStateParams(pp, list); - ta_add_poly(state.listType, pp); + ta_add_poly(pp); sendVertices(list, vtx, needClipping); } @@ -1526,7 +1522,7 @@ static void executeCommand(u8 *data, int size) if (Active) { cullingReversed = model->param.cwCulling == 0; - state.setClipMode(model->pcw); + ta_set_tileclip((model->pcw.userClip << 28) | (ta_get_tileclip() & 0x0fffffff)); openModifierVolume = model->param.openVolume; shadowedVolume = model->pcw.shadow; modelTSP = model->tsp; @@ -1591,20 +1587,20 @@ static void executeCommand(u8 *data, int size) if (link->offset & 0x80000000) { // elan v10 only - DEBUG_LOG(PVR, "Texture DMA from %x to %x (%x)", DMAC_SAR(2), link->_res & 0x1ffffff8, link->size); - memcpy(&vram[link->_res & VRAM_MASK], &mem_b[DMAC_SAR(2) & RAM_MASK], link->size); + DEBUG_LOG(PVR, "Texture DMA from %x to %x (%x)", DMAC_SAR(2), link->vramAddress & 0x1ffffff8, link->size); + memcpy(&vram[link->vramAddress & VRAM_MASK], &mem_b[DMAC_SAR(2) & RAM_MASK], link->size); reg74 |= 1; } else if (link->offset & 0x20000000) { // elan v10 only - DEBUG_LOG(PVR, "Texture DMA from eram %x -> %x (%x)", link->offset & 0x01fffff8, link->_res & VRAM_MASK, link->size); - memcpy(&vram[link->_res & VRAM_MASK], &RAM[link->offset & ELAN_RAM_MASK], link->size); + DEBUG_LOG(PVR, "Texture DMA from eram %x -> %x (%x)", link->offset & ELAN_RAM_MASK, link->vramAddress & VRAM_MASK, link->size); + memcpy(&vram[link->vramAddress & VRAM_MASK], &RAM[link->offset & ELAN_RAM_MASK], link->size); reg74 |= 1; } else { - DEBUG_LOG(PVR, "Link to %x (%x)", link->offset & 0x1ffffff8, link->size); + DEBUG_LOG(PVR, "Link to %8x (%x)", link->offset, link->size); executeCommand(&RAM[link->offset & ELAN_RAM_MASK], link->size); } size -= sizeof(Link); @@ -1630,69 +1626,84 @@ static void executeCommand(u8 *data, int size) break; default: - DEBUG_LOG(PVR, "Unhandled Elan command %x", cmd->pcw.n2Command); + WARN_LOG(PVR, "Unhandled Elan command %x", cmd->pcw.n2Command); size -= 32; break; } } else { - u32 pcw = *(u32 *)data; - if ((pcw & 0xd0ffff00) == 0x808c0000) // display list + if (Active) { - if (Active) - { - DEBUG_LOG(PVR, "Display list type %d", (pcw >> 24) & 0xf); - state.reset(); - state.listType = (pcw >> 24) & 0xf; - // TODO is this the right place for this? - SQBuffer eol{}; - ta_vtx_data32(&eol); + u32 pcw = *(u32 *)data; + DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw); + try { + size -= ta_add_ta_data((u32 *)data, size); + } catch (const TAParserException& e) { + size = 0; } - size -= 24 * 4; - } - else if ((pcw & 0xd0fcff00) == 0x80800000) // User clipping - { - if (Active) - { - state.setClipMode((PCW&)pcw); - DEBUG_LOG(PVR, "User clip type %d", ((PCW&)pcw).userClip); - } - size -= 0xE0; - } - else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked? - { - if (Active) - { - DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw); - state.listType = (pcw >> 24) & 0xf; - size -= 32; - ta_add_ta_data(state.listType, (u32 *)(data + 32), size - 32); - } - size = 32; - } - else if (pcw == 0x20000000) - { - // User clipping - if (Active) - { - u32 *tiles = (u32 *)data + 4; - DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", tiles[0] * 32, tiles[1] * 32, - tiles[2] * 32, tiles[3] * 32); - state.setClipTiles(tiles[0], tiles[1], tiles[2], tiles[3]); - } - size -= 32; } else { - if (Active) + u32 vertexSize = 32; + int listType = ta_get_list_type(); + int i = 0; + while (i < size) { - if (pcw != 0) - INFO_LOG(PVR, "Unhandled command %x", pcw); - for (int i = 0; i < 32; i += 4) - DEBUG_LOG(PVR, " %08x: %08x", (u32)(&data[i] - RAM), *(u32 *)&data[i]); + PCW pcw = *(PCW *)&data[i]; + if (pcw.naomi2 == 1) + break; + switch (pcw.paraType) + { + case ParamType_End_Of_List: + listType = -1; + i += 32; + break; + case ParamType_Object_List_Set: + case ParamType_User_Tile_Clip: + i += 32; + break; + case ParamType_Polygon_or_Modifier_Volume: + { + static const u32 * const PolyTypeLut = TaTypeLut::instance().table; + + if (listType == -1) + listType = pcw.listType; + if (listType & 1) + { + // modifier volumes + vertexSize = 64; + i += 32; + } + else + { + u32 polyId = PolyTypeLut[pcw.objectControl]; + u32 polySize = polyId >> 30; + u32 vertexType = (u8)polyId; + if (vertexType == 5 || vertexType == 6 || (vertexType >= 11 && vertexType <= 14)) + vertexSize = 64; + else + vertexSize = 32; + i += polySize == SZ64 ? 64 : 32; + } + } + break; + case ParamType_Sprite: + if (listType == -1) + listType = pcw.listType; + vertexSize = 64; + i += 32; + break; + case ParamType_Vertex_Parameter: + i += vertexSize; + break; + default: + WARN_LOG(PVR, "Invalid param type %d", pcw.paraType); + i = size; + break; + } } - size -= 32; + size -= i; } } data += oldSize - size; diff --git a/core/hw/pvr/elan_struct.h b/core/hw/pvr/elan_struct.h index 784ed98a4..98486e3f7 100644 --- a/core/hw/pvr/elan_struct.h +++ b/core/hw/pvr/elan_struct.h @@ -65,6 +65,7 @@ union PCW u32 endOfStrip : 1; u32 paraType : 3; }; + u8 objectControl; u32 full; }; @@ -382,7 +383,7 @@ struct Link : public ElanBase { // 08000f00 u32 offset; - u32 _res; // 09000000 + u32 vramAddress; // for texture DMA xfers, otherwise 09000000 u32 size; u32 _res0[4]; }; diff --git a/core/hw/pvr/pvr.cpp b/core/hw/pvr/pvr.cpp index 49884be1d..18a0c3048 100644 --- a/core/hw/pvr/pvr.cpp +++ b/core/hw/pvr/pvr.cpp @@ -45,6 +45,7 @@ void reset(bool hard) rend_reset(); tactx_Term(); elan::reset(hard); + ta_parse_reset(); } void init() diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index df932bbf9..4f542a29b 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -263,6 +263,203 @@ static NOINLINE void DYNACALL ta_handle_cmd(u32 trans) static OnLoad ol_fillfsm(&fill_fsm); +/* +Volume,Col_Type,Texture,Offset,Gouraud,16bit_UV + +0 0 0 (0) x invalid Polygon Type 0 Polygon Type 0 +0 0 1 x x 0 Polygon Type 0 Polygon Type 3 +0 0 1 x x 1 Polygon Type 0 Polygon Type 4 + +0 1 0 (0) x invalid Polygon Type 0 Polygon Type 1 +0 1 1 x x 0 Polygon Type 0 Polygon Type 5 +0 1 1 x x 1 Polygon Type 0 Polygon Type 6 + +0 2 0 (0) x invalid Polygon Type 1 Polygon Type 2 +0 2 1 0 x 0 Polygon Type 1 Polygon Type 7 +0 2 1 0 x 1 Polygon Type 1 Polygon Type 8 +0 2 1 1 x 0 Polygon Type 2 Polygon Type 7 +0 2 1 1 x 1 Polygon Type 2 Polygon Type 8 + +0 3 0 (0) x invalid Polygon Type 0 Polygon Type 2 +0 3 1 x x 0 Polygon Type 0 Polygon Type 7 +0 3 1 x x 1 Polygon Type 0 Polygon Type 8 + +1 0 0 (0) x invalid Polygon Type 3 Polygon Type 9 +1 0 1 x x 0 Polygon Type 3 Polygon Type 11 +1 0 1 x x 1 Polygon Type 3 Polygon Type 12 + +1 2 0 (0) x invalid Polygon Type 4 Polygon Type 10 +1 2 1 x x 0 Polygon Type 4 Polygon Type 13 +1 2 1 x x 1 Polygon Type 4 Polygon Type 14 + +1 3 0 (0) x invalid Polygon Type 3 Polygon Type 10 +1 3 1 x x 0 Polygon Type 3 Polygon Type 13 +1 3 1 x x 1 Polygon Type 3 Polygon Type 14 + +Sprites : +(0) (0) 0 (0) (0) invalid Sprite Sprite Type 0 +(0) (0) 1 x (0) (1) Sprite Sprite Type 1 + +*/ +//helpers 0-14 +u32 TaTypeLut::poly_data_type_id(PCW pcw) +{ + if (pcw.Texture) + { + //textured + if (pcw.Volume==0) + { //single volume + if (pcw.Col_Type==0) + { + if (pcw.UV_16bit==0) + return 3; //(Textured, Packed Color , 32b uv) + else + return 4; //(Textured, Packed Color , 16b uv) + } + else if (pcw.Col_Type==1) + { + if (pcw.UV_16bit==0) + return 5; //(Textured, Floating Color , 32b uv) + else + return 6; //(Textured, Floating Color , 16b uv) + } + else + { + if (pcw.UV_16bit==0) + return 7; //(Textured, Intensity , 32b uv) + else + return 8; //(Textured, Intensity , 16b uv) + } + } + else + { + //two volumes + if (pcw.Col_Type==0) + { + if (pcw.UV_16bit==0) + return 11; //(Textured, Packed Color, with Two Volumes) + + else + return 12; //(Textured, Packed Color, 16bit UV, with Two Volumes) + + } + else if (pcw.Col_Type==1) + { + //die ("invalid"); + return 0xFFFFFFFF; + } + else + { + if (pcw.UV_16bit==0) + return 13; //(Textured, Intensity, with Two Volumes) + + else + return 14; //(Textured, Intensity, 16bit UV, with Two Volumes) + } + } + } + else + { + //non textured + if (pcw.Volume==0) + { //single volume + if (pcw.Col_Type==0) + return 0; //(Non-Textured, Packed Color) + else if (pcw.Col_Type==1) + return 1; //(Non-Textured, Floating Color) + else + return 2; //(Non-Textured, Intensity) + } + else + { + //two volumes + if (pcw.Col_Type==0) + return 9; //(Non-Textured, Packed Color, with Two Volumes) + else if (pcw.Col_Type==1) + { + //die ("invalid"); + return 0xFFFFFFFF; + } + else + { + return 10; //(Non-Textured, Intensity, with Two Volumes) + } + } + } +} +//0-4 | 0x80 +u32 TaTypeLut::poly_header_type_size(PCW pcw) +{ + if (pcw.Volume == 0) + { + if ( pcw.Col_Type<2 ) //0,1 + { + return 0 | 0; //Polygon Type 0 -- SZ32 + } + else if ( pcw.Col_Type == 2 ) + { + if (pcw.Texture) + { + if (pcw.Offset) + { + return 2 | 0x80; //Polygon Type 2 -- SZ64 + } + else + { + return 1 | 0; //Polygon Type 1 -- SZ32 + } + } + else + { + return 1 | 0; //Polygon Type 1 -- SZ32 + } + } + else //col_type ==3 + { + return 0 | 0; //Polygon Type 0 -- SZ32 + } + } + else + { + if ( pcw.Col_Type==0 ) //0 + { + return 3 | 0; //Polygon Type 3 -- SZ32 + } + else if ( pcw.Col_Type==2 ) //2 + { + return 4 | 0x80; //Polygon Type 4 -- SZ64 + } + else if ( pcw.Col_Type==3 ) //3 + { + return 3 | 0; //Polygon Type 3 -- SZ32 + } + else + { + return 0xFFDDEEAA;//die ("data->pcw.Col_Type==1 && volume ==1"); + } + } +} + +TaTypeLut::TaTypeLut() +{ + for (int i = 0; i < 256; i++) + { + PCW pcw; + pcw.obj_ctrl = i; + u32 rv = poly_data_type_id(pcw); + u32 type = poly_header_type_size(pcw); + + if (type & 0x80) + rv |= SZ64 << 30; + else + rv |= SZ32 << 30; + + rv |= (type & 0x7F) << 8; + + table[i] = rv; + } +} + static u32 opbSize(int n) { return n == 0 ? 0 : 16 << n; @@ -305,10 +502,13 @@ void ta_vtx_ListInit() ta_cur_state = TAS_NS; ta_fsm_cl = 7; + if (settings.platform.isNaomi2()) + ta_parse_reset(); } + void ta_vtx_SoftReset() { - ta_cur_state=TAS_NS; + ta_cur_state = TAS_NS; } static INLINE diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 4aee0064f..48d95ea0c 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -3,6 +3,9 @@ #include "ta_ctx.h" #include "hw/sh4/sh4_if.h" +constexpr u32 SZ32 = 1; +constexpr u32 SZ64 = 2; + struct TA_context; void ta_vtx_ListInit(); @@ -25,4 +28,6 @@ public: private: TaTypeLut(); + u32 poly_data_type_id(PCW pcw); + u32 poly_header_type_size(PCW pcw); }; diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 2ae9f998a..9562dce21 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -195,30 +195,34 @@ struct N2Light float color[4]; float direction[4]; // For parallel/spot float position[4]; // For spot/point + int parallel; - int diffuse[2]; - int specular[2]; int routing; int dmode; int smode; - int distAttnMode; // For spot/point + int diffuse[2]; + int specular[2]; + float attnDistA; float attnDistB; float attnAngleA; // For spot float attnAngleB; + int distAttnMode; // For spot/point + int _pad[3]; }; struct N2LightModel { N2Light lights[16]; - int lightCount; float ambientBase[2][4]; // base ambient colors float ambientOffset[2][4]; // offset ambient colors - bool ambientMaterialBase[2]; // base ambient light is multiplied by model material/color - bool ambientMaterialOffset[2]; // offset ambient light is multiplied by model material/color - bool useBaseOver; // base color overflows into offset color + int ambientMaterialBase[2]; // base ambient light is multiplied by model material/color + int ambientMaterialOffset[2];// offset ambient light is multiplied by model material/color + + int lightCount; + int useBaseOver; // base color overflows into offset color int bumpId1; // Light index for vol0 bump mapping int bumpId2; // Light index for vol1 bump mapping }; @@ -391,11 +395,22 @@ bool rend_framePending(); void SerializeTAContext(Serializer& ser); void DeserializeTAContext(Deserializer& deser); -void ta_add_poly(int type, const PolyParam& pp); -void ta_add_poly(int type, const ModifierVolumeParam& mvp); +void ta_add_poly(const PolyParam& pp); +void ta_add_poly(int listType, const ModifierVolumeParam& mvp); void ta_add_vertex(const Vertex& vtx); void ta_add_triangle(const ModTriangle& tri); float* ta_add_matrix(const float *matrix); N2LightModel *ta_add_light(const N2LightModel& light); -void ta_add_ta_data(int listType, u32 *data, u32 size); +u32 ta_add_ta_data(u32 *data, u32 size); int getTAContextAddresses(u32 *addresses); +u32 ta_get_tileclip(); +void ta_set_tileclip(u32 tileclip); +u32 ta_get_list_type(); +void ta_set_list_type(u32 listType); +void ta_parse_reset(); + +class TAParserException : public FlycastException +{ +public: + TAParserException() : FlycastException("") {} +}; diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 2d4473e66..68cd4095f 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -19,69 +19,16 @@ #define verify(x) #endif -//cache state vars -static u32 tileclip_val = 0; - static u8 f32_su8_tbl[65536]; -#define float_to_satu8(val) f32_su8_tbl[((u32&)val)>>16] -#ifndef NDEBUG -/* - This uses just 1k of lookup, but does more calcs - The full 64k table will be much faster -- as only a small sub-part of it will be used anyway (the same 1k) -*/ -static u8 float_to_satu8_2(float val) -{ - s32 vl=(s32&)val>>16; - u32 m1=(vl-0x3b80)>>31; //1 if smaller 0x3b80 or negative - u32 m2=(vl-0x3f80)>>31; //1 if smaller 0x3f80 or negative - u32 vo=vl-0x3b80; - vo &= (~m1>>22); - - return f32_su8_tbl[0x3b80+vo] | (~m2>>24); -} -#endif - -#define saturate01(x) (((s32&)x)<0?0:(s32&)x>0x3f800000?1:x) -static u8 float_to_satu8_math(float val) -{ - return u8(saturate01(val)*255); +static u8 float_to_satu8(float val) { + return f32_su8_tbl[(u32&)val >> 16]; } static TA_context *vd_ctx; #define vd_rc (vd_ctx->rend) -//vdec state variables -static ModTriangle* lmr; - -static PolyParam* CurrentPP; -static List* CurrentPPlist; - -//TA state vars -alignas(4) static u8 FaceBaseColor[4]; -alignas(4) static u8 FaceOffsColor[4]; -alignas(4) static u8 FaceBaseColor1[4]; -alignas(4) static u8 FaceOffsColor1[4]; -static u32 SFaceBaseColor; -static u32 SFaceOffsColor; - -//misc ones -const u32 ListType_None = -1; -const u32 SZ32 = 1; -const u32 SZ64 = 2; -static bool fetchTextures = true; -static u32 forcedListType = ListType_None; - -#include "ta_structs.h" - -typedef Ta_Dma* DYNACALL TaListFP(Ta_Dma* data,Ta_Dma* data_end); -typedef void TACALL TaPolyParamFP(void* ptr); - -static TaListFP* TaCmd; - -static u32 CurrentList; -static TaListFP* VertexDataFP; -static bool ListIsFinished[5]; +constexpr u32 ListType_None = -1; static f32 f16(u16 v) { @@ -89,30 +36,141 @@ static f32 f16(u16 v) return *(f32*)&z; } -template -class FifoSplitter +class BaseTAParser { - static const u32 *ta_type_lut; - - static void ta_list_start(u32 new_list) - { - verify(CurrentList==ListType_None); - //verify(ListIsFinished[new_list]==false); - //printf("Starting list %d\n",new_list); - CurrentList = forcedListType != ListType_None ? forcedListType : new_list; - StartList(CurrentList); - } - - static Ta_Dma* DYNACALL NullVertexData(Ta_Dma* data,Ta_Dma* data_end) + static Ta_Dma *DYNACALL NullVertexData(Ta_Dma *data, Ta_Dma *data_end) { INFO_LOG(PVR, "TA: Invalid state, ignoring VTX data"); - return data+SZ32; + return data + SZ32; } +public: + static void startList(u32 listType) + { + if (CurrentList != ListType_None) + return; + CurrentList = listType; + if (listType == ListType_Opaque) + CurrentPPlist = &vd_rc.global_param_op; + else if (listType == ListType_Punch_Through) + CurrentPPlist = &vd_rc.global_param_pt; + else if (listType == ListType_Translucent) + CurrentPPlist = &vd_rc.global_param_tr; + + CurrentPP = nullptr; + } + + static void endList() + { + if (CurrentList == ListType_None) + return; + if (CurrentPP != nullptr && CurrentPP->count == 0) + CurrentPPlist->PopLast(); + CurrentPP = nullptr; + CurrentPPlist = nullptr; + + if (CurrentList == ListType_Opaque_Modifier_Volume + || CurrentList == ListType_Translucent_Modifier_Volume) + endModVol(); + CurrentList = ListType_None; + VertexDataFP = NullVertexData; + } + + static int getCurrentList() { + return CurrentList; + } + + static u32 getTileClip() { + return tileclip_val; + } + + static void setTileClip(u32 tileclip) { + tileclip_val = tileclip; + } + +protected: + typedef Ta_Dma* DYNACALL TaListFP(Ta_Dma* data, Ta_Dma* data_end); + typedef void TACALL TaPolyParamFP(void* ptr); + + static void endModVol() + { + List *list = nullptr; + if (CurrentList == ListType_Opaque_Modifier_Volume) + list = &vd_rc.global_param_mvo; + else if (CurrentList == ListType_Translucent_Modifier_Volume) + list = &vd_rc.global_param_mvo_tr; + else + return; + if (list->used() > 0) + { + ModifierVolumeParam *p = list->LastPtr(); + p->count = vd_rc.modtrig.used() - p->first; + if (p->count == 0) + list->PopLast(); + } + } + + static void reset() + { + memset(FaceBaseColor, 0, sizeof(FaceBaseColor)); + memset(FaceOffsColor, 0, sizeof(FaceOffsColor)); + memset(FaceBaseColor1, 0, sizeof(FaceBaseColor1)); + memset(FaceOffsColor1, 0, sizeof(FaceOffsColor1)); + SFaceBaseColor = 0; + SFaceOffsColor = 0; + lmr = nullptr; + CurrentList = ListType_None; + CurrentPP = nullptr; + CurrentPPlist = nullptr; + VertexDataFP = NullVertexData; + } + + static const u32 *ta_type_lut; + + //cache state vars + static u32 tileclip_val; + + //TA state vars + static u8 FaceBaseColor[4]; + static u8 FaceOffsColor[4]; + static u8 FaceBaseColor1[4]; + static u8 FaceOffsColor1[4]; + static u32 SFaceBaseColor; + static u32 SFaceOffsColor; + //vdec state variables + static ModTriangle* lmr; + + static u32 CurrentList; + static PolyParam* CurrentPP; + static TaListFP *VertexDataFP; +public: + static List* CurrentPPlist; + static TaListFP* TaCmd; + static bool fetchTextures; +}; + +const u32 *BaseTAParser::ta_type_lut = TaTypeLut::instance().table; +u32 BaseTAParser::tileclip_val; +alignas(4) u8 BaseTAParser::FaceBaseColor[4]; +alignas(4) u8 BaseTAParser::FaceOffsColor[4]; +alignas(4) u8 BaseTAParser::FaceBaseColor1[4]; +alignas(4) u8 BaseTAParser::FaceOffsColor1[4]; +u32 BaseTAParser::SFaceBaseColor; +u32 BaseTAParser::SFaceOffsColor; +ModTriangle* BaseTAParser::lmr; +u32 BaseTAParser::CurrentList; +PolyParam* BaseTAParser::CurrentPP; +List* BaseTAParser::CurrentPPlist; +BaseTAParser::TaListFP *BaseTAParser::TaCmd; +BaseTAParser::TaListFP *BaseTAParser::VertexDataFP; +bool BaseTAParser::fetchTextures = true; + +template +class TAParserTempl : public BaseTAParser +{ //part : 0 fill all data , 1 fill upper 32B , 2 fill lower 32B //Poly decoder , will be moved to pvr code template - __forceinline static Ta_Dma* TACALL ta_handle_poly(Ta_Dma* data,Ta_Dma* data_end) { TA_VertexParam* vp=(TA_VertexParam*)data; @@ -181,13 +239,7 @@ case num : {\ }; //Code Splitter/routers - - //helper function for dummy dma's.Handles 32B and then switches to ta_main for next data - static Ta_Dma* TACALL ta_dummy_32(Ta_Dma* data,Ta_Dma* data_end) - { - TaCmd=ta_main; - return data+SZ32; - } + static Ta_Dma* TACALL ta_modvolB_32(Ta_Dma* data,Ta_Dma* data_end) { AppendModVolVertexB((TA_ModVolB*)data); @@ -313,36 +365,22 @@ strip_end: return data+SZ32; } - //Group_En bit seems ignored, thanks p1pkin -#define group_EN() /*if (data->pcw.Group_En) */{ TileClipMode(data->pcw.User_Clip); } - static Ta_Dma* TACALL ta_main(Ta_Dma* data,Ta_Dma* data_end) + static Ta_Dma* TACALL ta_main(Ta_Dma* data, Ta_Dma* data_end) { - do + while (data < data_end) { + if (settings.platform.isNaomi2() && (data->pcw.full & 0x08000000) != 0) + { + DEBUG_LOG(PVR, "Naomi 2 command detected"); + break; + } switch (data->pcw.ParaType) { //Control parameter //32Bw3 case ParamType_End_Of_List: - { - - if (CurrentList==ListType_None) - { - CurrentList=data->pcw.ListType; - //printf("End_Of_List : list error\n"); - } - else - { - //end of list should be all 0's ... - EndList(CurrentList);//end a list olny if it was realy started - } - - //printf("End list %X\n",CurrentList); - ListIsFinished[CurrentList]=true; - CurrentList=ListType_None; - VertexDataFP = NullVertexData; - data+=SZ32; - } + endList(); + data += SZ32; break; //32B @@ -363,11 +401,11 @@ strip_end: //PolyType :32B/64B case ParamType_Polygon_or_Modifier_Volume: { - group_EN(); + TileClipMode(data->pcw.User_Clip); //Yep , C++ IS lame & limited #include "ta_const_df.h" if (CurrentList==ListType_None) - ta_list_start(data->pcw.ListType); //start a list ;) + startList(data->pcw.ListType); if (IsModVolList(CurrentList)) { @@ -412,9 +450,9 @@ strip_end: case ParamType_Sprite: { - group_EN(); + TileClipMode(data->pcw.User_Clip); if (CurrentList==ListType_None) - ta_list_start(data->pcw.ListType); //start a list ;) + startList(data->pcw.ListType); VertexDataFP = ta_sprite_data; AppendSpriteParam((TA_SpriteParam*)data); @@ -431,221 +469,28 @@ strip_end: //Assumed to be 32B case 3: case 6: - { - die("Unhandled parameter"); - data+=SZ32; - } + WARN_LOG(PVR, "Unhandled param type pcw %08x", data->pcw.full); + throw TAParserException(); + //die("Unhandled parameter"); + //data += SZ32; break; } } - while (data <= data_end); + return data; } + TAParserTempl(); + public: - //Fill in lookup table - FifoSplitter() - { - VertexDataFP = NullVertexData; - ta_type_lut = TaTypeLut::instance().table; - } - /* - Volume,Col_Type,Texture,Offset,Gouraud,16bit_UV - - 0 0 0 (0) x invalid Polygon Type 0 Polygon Type 0 - 0 0 1 x x 0 Polygon Type 0 Polygon Type 3 - 0 0 1 x x 1 Polygon Type 0 Polygon Type 4 - - 0 1 0 (0) x invalid Polygon Type 0 Polygon Type 1 - 0 1 1 x x 0 Polygon Type 0 Polygon Type 5 - 0 1 1 x x 1 Polygon Type 0 Polygon Type 6 - - 0 2 0 (0) x invalid Polygon Type 1 Polygon Type 2 - 0 2 1 0 x 0 Polygon Type 1 Polygon Type 7 - 0 2 1 0 x 1 Polygon Type 1 Polygon Type 8 - 0 2 1 1 x 0 Polygon Type 2 Polygon Type 7 - 0 2 1 1 x 1 Polygon Type 2 Polygon Type 8 - - 0 3 0 (0) x invalid Polygon Type 0 Polygon Type 2 - 0 3 1 x x 0 Polygon Type 0 Polygon Type 7 - 0 3 1 x x 1 Polygon Type 0 Polygon Type 8 - - 1 0 0 (0) x invalid Polygon Type 3 Polygon Type 9 - 1 0 1 x x 0 Polygon Type 3 Polygon Type 11 - 1 0 1 x x 1 Polygon Type 3 Polygon Type 12 - - 1 2 0 (0) x invalid Polygon Type 4 Polygon Type 10 - 1 2 1 x x 0 Polygon Type 4 Polygon Type 13 - 1 2 1 x x 1 Polygon Type 4 Polygon Type 14 - - 1 3 0 (0) x invalid Polygon Type 3 Polygon Type 10 - 1 3 1 x x 0 Polygon Type 3 Polygon Type 13 - 1 3 1 x x 1 Polygon Type 3 Polygon Type 14 - - Sprites : - (0) (0) 0 (0) (0) invalid Sprite Sprite Type 0 - (0) (0) 1 x (0) (1) Sprite Sprite Type 1 - - */ - //helpers 0-14 - static u32 poly_data_type_id(PCW pcw) - { - if (pcw.Texture) - { - //textured - if (pcw.Volume==0) - { //single volume - if (pcw.Col_Type==0) - { - if (pcw.UV_16bit==0) - return 3; //(Textured, Packed Color , 32b uv) - else - return 4; //(Textured, Packed Color , 16b uv) - } - else if (pcw.Col_Type==1) - { - if (pcw.UV_16bit==0) - return 5; //(Textured, Floating Color , 32b uv) - else - return 6; //(Textured, Floating Color , 16b uv) - } - else - { - if (pcw.UV_16bit==0) - return 7; //(Textured, Intensity , 32b uv) - else - return 8; //(Textured, Intensity , 16b uv) - } - } - else - { - //two volumes - if (pcw.Col_Type==0) - { - if (pcw.UV_16bit==0) - return 11; //(Textured, Packed Color, with Two Volumes) - - else - return 12; //(Textured, Packed Color, 16bit UV, with Two Volumes) - - } - else if (pcw.Col_Type==1) - { - //die ("invalid"); - return 0xFFFFFFFF; - } - else - { - if (pcw.UV_16bit==0) - return 13; //(Textured, Intensity, with Two Volumes) - - else - return 14; //(Textured, Intensity, 16bit UV, with Two Volumes) - } - } - } - else - { - //non textured - if (pcw.Volume==0) - { //single volume - if (pcw.Col_Type==0) - return 0; //(Non-Textured, Packed Color) - else if (pcw.Col_Type==1) - return 1; //(Non-Textured, Floating Color) - else - return 2; //(Non-Textured, Intensity) - } - else - { - //two volumes - if (pcw.Col_Type==0) - return 9; //(Non-Textured, Packed Color, with Two Volumes) - else if (pcw.Col_Type==1) - { - //die ("invalid"); - return 0xFFFFFFFF; - } - else - { - return 10; //(Non-Textured, Intensity, with Two Volumes) - } - } - } - } - //0-4 | 0x80 - static u32 poly_header_type_size(PCW pcw) - { - if (pcw.Volume == 0) - { - if ( pcw.Col_Type<2 ) //0,1 - { - return 0 | 0; //Polygon Type 0 -- SZ32 - } - else if ( pcw.Col_Type == 2 ) - { - if (pcw.Texture) - { - if (pcw.Offset) - { - return 2 | 0x80; //Polygon Type 2 -- SZ64 - } - else - { - return 1 | 0; //Polygon Type 1 -- SZ32 - } - } - else - { - return 1 | 0; //Polygon Type 1 -- SZ32 - } - } - else //col_type ==3 - { - return 0 | 0; //Polygon Type 0 -- SZ32 - } - } - else - { - if ( pcw.Col_Type==0 ) //0 - { - return 3 | 0; //Polygon Type 3 -- SZ32 - } - else if ( pcw.Col_Type==2 ) //2 - { - return 4 | 0x80; //Polygon Type 4 -- SZ64 - } - else if ( pcw.Col_Type==3 ) //3 - { - return 3 | 0; //Polygon Type 3 -- SZ32 - } - else - { - return 0xFFDDEEAA;//die ("data->pcw.Col_Type==1 && volume ==1"); - } - } - } - - void vdec_init() + static void reset() { TaCmd = ta_main; - CurrentList = ListType_None; - ListIsFinished[0] = ListIsFinished[1] = ListIsFinished[2] = ListIsFinished[3] = ListIsFinished[4] = false; - VertexDataFP = NullVertexData; - memset(FaceBaseColor, 0xff, sizeof(FaceBaseColor)); - memset(FaceOffsColor, 0xff, sizeof(FaceOffsColor)); - memset(FaceBaseColor1, 0xff, sizeof(FaceBaseColor1)); - memset(FaceOffsColor1, 0xff, sizeof(FaceOffsColor1)); - SFaceBaseColor = 0; - SFaceOffsColor = 0; - lmr = NULL; - CurrentPP = NULL; - CurrentPPlist = NULL; + BaseTAParser::reset(); } - + private: - __forceinline - static void SetTileClip(u32 xmin,u32 ymin,u32 xmax,u32 ymax) + static void SetTileClip(u32 xmin,u32 ymin,u32 xmax,u32 ymax) { u32 rv=tileclip_val & 0xF0000000; rv|=xmin; //6 bits @@ -655,39 +500,12 @@ private: tileclip_val=rv; } - __forceinline - static void TileClipMode(u32 mode) + static void TileClipMode(u32 mode) { + //Group_En bit seems ignored, thanks p1pkin tileclip_val=(tileclip_val&(~0xF0000000)) | (mode<<28); } - //list handling - __forceinline - static void StartList(u32 ListType) - { - if (ListType==ListType_Opaque) - CurrentPPlist=&vd_rc.global_param_op; - else if (ListType==ListType_Punch_Through) - CurrentPPlist=&vd_rc.global_param_pt; - else if (ListType==ListType_Translucent) - CurrentPPlist=&vd_rc.global_param_tr; - - CurrentPP = NULL; - } - - __forceinline - static void EndList(u32 ListType) - { - if (CurrentPP != NULL && CurrentPP->count == 0) - CurrentPPlist->PopLast(); - CurrentPP = NULL; - CurrentPPlist = NULL; - - if (ListType == ListType_Opaque_Modifier_Volume - || ListType == ListType_Translucent_Modifier_Volume) - EndModVol(); - } - //Polys -- update code on sprites if that gets updated too -- template static void glob_param_bdc_(T* pp) @@ -726,8 +544,7 @@ private: // Poly param handling // Packed/Floating Color - __forceinline - static void TACALL AppendPolyParam0(void* vpp) + static void TACALL AppendPolyParam0(void* vpp) { TA_PolyParam0* pp=(TA_PolyParam0*)vpp; @@ -735,8 +552,7 @@ private: } // Intensity, no Offset Color - __forceinline - static void TACALL AppendPolyParam1(void* vpp) + static void TACALL AppendPolyParam1(void* vpp) { TA_PolyParam1* pp=(TA_PolyParam1*)vpp; @@ -745,16 +561,14 @@ private: } // Intensity, use Offset Color - __forceinline - static void TACALL AppendPolyParam2A(void* vpp) + static void TACALL AppendPolyParam2A(void* vpp) { TA_PolyParam2A* pp=(TA_PolyParam2A*)vpp; glob_param_bdc(pp); } - __forceinline - static void TACALL AppendPolyParam2B(void* vpp) + static void TACALL AppendPolyParam2B(void* vpp) { TA_PolyParam2B* pp=(TA_PolyParam2B*)vpp; @@ -763,8 +577,7 @@ private: } // Packed Color, with Two Volumes - __forceinline - static void TACALL AppendPolyParam3(void* vpp) + static void TACALL AppendPolyParam3(void* vpp) { TA_PolyParam3* pp=(TA_PolyParam3*)vpp; @@ -777,8 +590,7 @@ private: } // Intensity, with Two Volumes - __forceinline - static void TACALL AppendPolyParam4A(void* vpp) + static void TACALL AppendPolyParam4A(void* vpp) { TA_PolyParam4A* pp=(TA_PolyParam4A*)vpp; @@ -790,8 +602,7 @@ private: CurrentPP->texture1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } - __forceinline - static void TACALL AppendPolyParam4B(void* vpp) + static void TACALL AppendPolyParam4B(void* vpp) { TA_PolyParam4B* pp=(TA_PolyParam4B*)vpp; @@ -800,8 +611,7 @@ private: } //Poly Strip handling - __forceinline - static void EndPolyStrip() + static void EndPolyStrip() { CurrentPP->count = vd_rc.verts.used() - CurrentPP->first; @@ -814,8 +624,6 @@ private: d_pp->count = 0; } } - - static inline void update_fz(float z) { @@ -919,8 +727,7 @@ private: //(Non-Textured, Packed Color) - __forceinline - static void AppendPolyVertex0(TA_Vertex0* vtx) + static void AppendPolyVertex0(TA_Vertex0* vtx) { vert_cvt_base; @@ -928,8 +735,7 @@ private: } //(Non-Textured, Floating Color) - __forceinline - static void AppendPolyVertex1(TA_Vertex1* vtx) + static void AppendPolyVertex1(TA_Vertex1* vtx) { vert_cvt_base; @@ -937,8 +743,7 @@ private: } //(Non-Textured, Intensity) - __forceinline - static void AppendPolyVertex2(TA_Vertex2* vtx) + static void AppendPolyVertex2(TA_Vertex2* vtx) { vert_cvt_base; @@ -946,8 +751,7 @@ private: } //(Textured, Packed Color) - __forceinline - static void AppendPolyVertex3(TA_Vertex3* vtx) + static void AppendPolyVertex3(TA_Vertex3* vtx) { vert_cvt_base; @@ -958,8 +762,7 @@ private: } //(Textured, Packed Color, 16bit UV) - __forceinline - static void AppendPolyVertex4(TA_Vertex4* vtx) + static void AppendPolyVertex4(TA_Vertex4* vtx) { vert_cvt_base; @@ -970,8 +773,7 @@ private: } //(Textured, Floating Color) - __forceinline - static void AppendPolyVertex5A(TA_Vertex5A* vtx) + static void AppendPolyVertex5A(TA_Vertex5A* vtx) { vert_cvt_base; @@ -980,8 +782,7 @@ private: vert_uv_32(u,v); } - __forceinline - static void AppendPolyVertex5B(TA_Vertex5B* vtx) + static void AppendPolyVertex5B(TA_Vertex5B* vtx) { vert_res_base; @@ -990,8 +791,7 @@ private: } //(Textured, Floating Color, 16bit UV) - __forceinline - static void AppendPolyVertex6A(TA_Vertex6A* vtx) + static void AppendPolyVertex6A(TA_Vertex6A* vtx) { vert_cvt_base; @@ -999,8 +799,8 @@ private: vert_uv_16(u,v); } - __forceinline - static void AppendPolyVertex6B(TA_Vertex6B* vtx) + + static void AppendPolyVertex6B(TA_Vertex6B* vtx) { vert_res_base; @@ -1009,8 +809,7 @@ private: } //(Textured, Intensity) - __forceinline - static void AppendPolyVertex7(TA_Vertex7* vtx) + static void AppendPolyVertex7(TA_Vertex7* vtx) { vert_cvt_base; @@ -1021,8 +820,7 @@ private: } //(Textured, Intensity, 16bit UV) - __forceinline - static void AppendPolyVertex8(TA_Vertex8* vtx) + static void AppendPolyVertex8(TA_Vertex8* vtx) { vert_cvt_base; @@ -1034,8 +832,7 @@ private: } //(Non-Textured, Packed Color, with Two Volumes) - __forceinline - static void AppendPolyVertex9(TA_Vertex9* vtx) + static void AppendPolyVertex9(TA_Vertex9* vtx) { vert_cvt_base; @@ -1044,8 +841,7 @@ private: } //(Non-Textured, Intensity, with Two Volumes) - __forceinline - static void AppendPolyVertex10(TA_Vertex10* vtx) + static void AppendPolyVertex10(TA_Vertex10* vtx) { vert_cvt_base; @@ -1054,8 +850,7 @@ private: } //(Textured, Packed Color, with Two Volumes) - __forceinline - static void AppendPolyVertex11A(TA_Vertex11A* vtx) + static void AppendPolyVertex11A(TA_Vertex11A* vtx) { vert_cvt_base; @@ -1064,8 +859,8 @@ private: vert_uv_32(u0,v0); } - __forceinline - static void AppendPolyVertex11B(TA_Vertex11B* vtx) + + static void AppendPolyVertex11B(TA_Vertex11B* vtx) { vert_res_base; @@ -1076,8 +871,7 @@ private: } //(Textured, Packed Color, 16bit UV, with Two Volumes) - __forceinline - static void AppendPolyVertex12A(TA_Vertex12A* vtx) + static void AppendPolyVertex12A(TA_Vertex12A* vtx) { vert_cvt_base; @@ -1086,8 +880,8 @@ private: vert_uv_16(u0,v0); } - __forceinline - static void AppendPolyVertex12B(TA_Vertex12B* vtx) + + static void AppendPolyVertex12B(TA_Vertex12B* vtx) { vert_res_base; @@ -1098,8 +892,7 @@ private: } //(Textured, Intensity, with Two Volumes) - __forceinline - static void AppendPolyVertex13A(TA_Vertex13A* vtx) + static void AppendPolyVertex13A(TA_Vertex13A* vtx) { vert_cvt_base; @@ -1108,8 +901,8 @@ private: vert_uv_32(u0,v0); } - __forceinline - static void AppendPolyVertex13B(TA_Vertex13B* vtx) + + static void AppendPolyVertex13B(TA_Vertex13B* vtx) { vert_res_base; @@ -1120,8 +913,7 @@ private: } //(Textured, Intensity, 16bit UV, with Two Volumes) - __forceinline - static void AppendPolyVertex14A(TA_Vertex14A* vtx) + static void AppendPolyVertex14A(TA_Vertex14A* vtx) { vert_cvt_base; @@ -1130,8 +922,8 @@ private: vert_uv_16(u0,v0); } - __forceinline - static void AppendPolyVertex14B(TA_Vertex14B* vtx) + + static void AppendPolyVertex14B(TA_Vertex14B* vtx) { vert_res_base; @@ -1142,8 +934,7 @@ private: } //Sprites - __forceinline - static void AppendSpriteParam(TA_SpriteParam* spr) + static void AppendSpriteParam(TA_SpriteParam* spr) { //printf("Sprite\n"); PolyParam* d_pp = CurrentPP; @@ -1180,8 +971,7 @@ private: cv[indx].v = f16(sv->v_name); //Sprite Vertex Handlers - __forceinline - static void AppendSpriteVertexA(TA_Sprite1A* sv) + static void AppendSpriteVertexA(TA_Sprite1A* sv) { CurrentPP->count = 4; @@ -1205,6 +995,7 @@ private: cv[1].x=sv->x2; } + static void CaclulateSpritePlane(Vertex* base) { const Vertex& A=base[2]; @@ -1253,8 +1044,8 @@ private: P.u = A_u + k1 * AB_u + k2 * AC_u; P.v = A_v + k1 * AB_v + k2 * AC_v; } - __forceinline - static void AppendSpriteVertexB(TA_Sprite1B* sv) + + static void AppendSpriteVertexB(TA_Sprite1B* sv) { vert_res_base; cv-=3; @@ -1285,29 +1076,9 @@ private: // Modifier Volumes Vertex handlers - static void EndModVol() - { - List *list = NULL; - if (CurrentList == ListType_Opaque_Modifier_Volume) - list = &vd_rc.global_param_mvo; - else if (CurrentList == ListType_Translucent_Modifier_Volume) - list = &vd_rc.global_param_mvo_tr; - else - return; - if (list->used() > 0) - { - ModifierVolumeParam *p = list->LastPtr(); - p->count = vd_rc.modtrig.used() - p->first; - if (p->count == 0) - list->PopLast(); - - } - } - - //Mod Volume Vertex handlers static void StartModVol(TA_ModVolParam* param) { - EndModVol(); + endModVol(); ModifierVolumeParam *p = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) @@ -1322,8 +1093,7 @@ private: p->first = vd_rc.modtrig.used(); } - __forceinline - static void AppendModVolVertexA(TA_ModVolA* mvv) + static void AppendModVolVertexA(TA_ModVolA* mvv) { if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; @@ -1342,8 +1112,7 @@ private: lmr->x2=mvv->x2; } - __forceinline - static void AppendModVolVertexB(TA_ModVolB* mvv) + static void AppendModVolVertexB(TA_ModVolB* mvv) { if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; @@ -1353,36 +1122,10 @@ private: } }; -template -const u32 *FifoSplitter::ta_type_lut; - -TaTypeLut::TaTypeLut() -{ - for (int i = 0; i < 256; i++) - { - PCW pcw; - pcw.obj_ctrl = i; - u32 rv = FifoSplitter<>::poly_data_type_id(pcw); - u32 type = FifoSplitter<>::poly_header_type_size(pcw); - - if (type & 0x80) - rv |= SZ64 << 30; - else - rv |= SZ32 << 30; - - rv |= (type & 0x7F) << 8; - - table[i] = rv; - } -} - static bool ClearZBeforePass(int pass_number); static bool UsingAutoSort(int pass_number); static void getRegionTileClipping(u32& xmin, u32& xmax, u32& ymin, u32& ymax); -FifoSplitter<> TAParser; -FifoSplitter<2, 1, 0, 3> TAParserDX; - // // Check if a vertex has huge x,y,z values or negative z // @@ -1539,10 +1282,7 @@ bool ta_parse_vdrc(TA_context* ctx) verify(vd_ctx == nullptr); vd_ctx = ctx; - if (isDirectX(config::RendererType)) - TAParserDX.vdec_init(); - else - TAParser.vdec_init(); + ta_parse_reset(); bool empty_context = true; int op_poly_count = 0; @@ -1554,11 +1294,6 @@ bool ta_parse_vdrc(TA_context* ctx) { bgpp->texture = renderer->GetTexture(bgpp->tsp, bgpp->tcw); empty_context = false; - bgpp->mvMatrix = nullptr; - bgpp->projMatrix = nullptr; - bgpp->lightModel = nullptr; - bgpp->envMapping[0] = false; - bgpp->envMapping[1] = false; } const bool mergeTranslucent = !config::PerStripSorting @@ -1573,10 +1308,14 @@ bool ta_parse_vdrc(TA_context* ctx) vd_rc.proc_end = childCtx->rend.proc_end; Ta_Dma* ta_data = (Ta_Dma *)vd_rc.proc_start; - Ta_Dma* ta_data_end = (Ta_Dma *)vd_rc.proc_end - 1; + Ta_Dma* ta_data_end = (Ta_Dma *)vd_rc.proc_end; - while (ta_data <= ta_data_end) - ta_data = TaCmd(ta_data, ta_data_end); + while (ta_data < ta_data_end) + try { + ta_data = BaseTAParser::TaCmd(ta_data, ta_data_end); + } catch (const TAParserException& e) { + break; + } if (vd_ctx->rend.Overrun) break; @@ -1642,13 +1381,6 @@ bool ta_parse_naomi2(TA_context* ctx) { ctx->rend_inuse.lock(); - PolyParam &bgpp = ctx->rend.global_param_op.head()[0]; - bgpp.mvMatrix = nullptr; - bgpp.projMatrix = nullptr; - bgpp.lightModel = nullptr; - bgpp.envMapping[0] = false; - bgpp.envMapping[1] = false; - for (PolyParam& pp : ctx->rend.global_param_op) { if (pp.pcw.Texture) @@ -1718,41 +1450,42 @@ const float identityMat[] { 0.f, 0.f, 0.f, 1.f }; -void ta_add_poly(int type, const PolyParam& pp) +const float defaultProjMat[] { + 579.411194f, 0.f, 0.f, 0.f, + 0.f, -579.411194f, 0.f, 0.f, + -320.f, -240.f, -1.f, -1.f, + 0.f, 0.f, 0.f, 0.f +}; + +void ta_add_poly(const PolyParam& pp) { verify(ta_ctx != nullptr); - switch (type) - { - case ListType_Opaque: - *ta_ctx->rend.global_param_op.Append() = pp; - n2CurrentPP = ta_ctx->rend.global_param_op.LastPtr(); - break; - case ListType_Translucent: - *ta_ctx->rend.global_param_tr.Append() = pp; - n2CurrentPP = ta_ctx->rend.global_param_tr.LastPtr(); - break; - case ListType_Punch_Through: - *ta_ctx->rend.global_param_pt.Append() = pp; - n2CurrentPP = ta_ctx->rend.global_param_pt.LastPtr(); - break; - default: - die("wrong list type"); - break; - } + verify(vd_ctx == nullptr); + vd_ctx = ta_ctx; + BaseTAParser::startList(pp.pcw.ListType); + + *BaseTAParser::CurrentPPlist->Append() = pp; + n2CurrentPP = BaseTAParser::CurrentPPlist->LastPtr(); n2CurrentPP->first = ta_ctx->rend.verts.used(); n2CurrentPP->count = 0; + n2CurrentPP->tileclip = BaseTAParser::getTileClip(); if (n2CurrentPP->mvMatrix == nullptr) n2CurrentPP->mvMatrix = identityMat; if (n2CurrentPP->normalMatrix == nullptr) n2CurrentPP->normalMatrix = identityMat; if (n2CurrentPP->projMatrix == nullptr) - n2CurrentPP->projMatrix = identityMat; + n2CurrentPP->projMatrix = defaultProjMat; + vd_ctx = nullptr; } -void ta_add_poly(int type, const ModifierVolumeParam& mvp) +void ta_add_poly(int listType, const ModifierVolumeParam& mvp) { verify(ta_ctx != nullptr); - switch (type) + verify(vd_ctx == nullptr); + vd_ctx = ta_ctx; + BaseTAParser::startList(listType); + + switch (BaseTAParser::getCurrentList()) { case ListType_Opaque_Modifier_Volume: *ta_ctx->rend.global_param_mvo.Append() = mvp; @@ -1771,7 +1504,8 @@ void ta_add_poly(int type, const ModifierVolumeParam& mvp) if (n2CurrentMVP->mvMatrix == nullptr) n2CurrentMVP->mvMatrix = identityMat; if (n2CurrentMVP->projMatrix == nullptr) - n2CurrentMVP->projMatrix = identityMat; + n2CurrentMVP->projMatrix = defaultProjMat; + vd_ctx = nullptr; } void ta_add_vertex(const Vertex& vtx) @@ -1799,28 +1533,59 @@ N2LightModel *ta_add_light(const N2LightModel& light) return ta_ctx->rend.lightModels.LastPtr(); } -void ta_add_ta_data(int listType, u32 *data, u32 size) +u32 ta_add_ta_data(u32 *data, u32 size) { + verify(vd_ctx == nullptr); vd_ctx = ta_ctx; - fetchTextures = false; - forcedListType = listType; - if (isDirectX(config::RendererType)) - TAParserDX.vdec_init(); - else - TAParser.vdec_init(); + BaseTAParser::fetchTextures = false; Ta_Dma *ta_data = (Ta_Dma *)data; - Ta_Dma *ta_data_end = (Ta_Dma *)(data + size / 4) - 1; - while (ta_data <= ta_data_end) - ta_data = TaCmd(ta_data, ta_data_end); - Ta_Dma eol{}; - eol.pcw.ParaType = ParamType_End_Of_List; - eol.pcw.ListType = listType; - TaCmd(&eol, &eol); + Ta_Dma *ta_data_end = (Ta_Dma *)(data + size / 4); + try { + ta_data = BaseTAParser::TaCmd(ta_data, ta_data_end); + } catch (const FlycastException& e) { + vd_ctx = nullptr; + BaseTAParser::fetchTextures = true; + throw; + } vd_ctx = nullptr; - fetchTextures = true; - forcedListType = ListType_None; + BaseTAParser::fetchTextures = true; + + return (u8 *)ta_data - (u8 *)data; +} + +u32 ta_get_tileclip() { + return BaseTAParser::getTileClip(); +} + +void ta_set_tileclip(u32 tileclip) { + BaseTAParser::setTileClip(tileclip); +} + +u32 ta_get_list_type() { + return BaseTAParser::getCurrentList(); +} + +void ta_set_list_type(u32 listType) +{ + verify(vd_ctx == nullptr); + vd_ctx = ta_ctx; + BaseTAParser::endList(); + if (listType != ListType_None) + BaseTAParser::startList(listType); + vd_ctx = nullptr; +} + +void ta_parse_reset() +{ + using TAParser = TAParserTempl<>; + using TAParserDX = TAParserTempl<2, 1, 0, 3>; + + if (isDirectX(config::RendererType)) + TAParserDX::reset(); + else + TAParser::reset(); } //decode a vertex in the native pvr format @@ -1878,37 +1643,28 @@ void decode_pvr_vertex(u32 base, u32 ptr, Vertex* cv) } } -void vtxdec_init() +static u8 float_to_satu8_math(float val) +{ + return (u8)(std::min(1.f, std::max(0.f, val)) * 255.f); +} + +static void vtxdec_init() { /* 0x3b80 ~ 0x3f80 -> actual useful range. Rest is clamping to 0 or 255 ~ */ - for (u32 i=0;i<65536;i++) + for (u32 i = 0; i < ARRAY_SIZE(f32_su8_tbl); i++) { - u32 fr=i<<16; + u32 fr = i << 16; - f32_su8_tbl[i]=float_to_satu8_math((f32&)fr); + f32_su8_tbl[i] = float_to_satu8_math((f32&)fr); } - -#ifndef NDEBUG - for (u32 i=0;i<65536;i++) - { - u32 fr=i<<16; - f32 ff=(f32&)fr; - - verify(float_to_satu8_math(ff)==float_to_satu8_2(ff)); - verify(float_to_satu8_math(ff)==float_to_satu8(ff)); - } -#endif } - - static OnLoad ol_vtxdec(&vtxdec_init); void FillBGP(TA_context* ctx) { - //Render pre-code //--BG poly u32 param_base=PARAM_BASE & 0xF00000; @@ -1939,17 +1695,12 @@ void FillBGP(TA_context* ctx) u32 vertex_ptr=strip_vert_num*strip_vs+strip_base +3*4; //now , all the info is ready :p - bgpp->texture = nullptr; - bgpp->isp.full = pvr_read32p(strip_base); bgpp->tsp.full = pvr_read32p(strip_base + 4); bgpp->tcw.full = pvr_read32p(strip_base + 8); - bgpp->tcw1.full = -1; - bgpp->tsp1.full = -1; - bgpp->texture1 = nullptr; - bgpp->count=4; - bgpp->first=0; - bgpp->tileclip=0;//disabled ! HA ~ + bgpp->count = 4; + bgpp->first = 0; + bgpp->tileclip = 0;//disabled ! HA ~ bgpp->isp.DepthMode=7;// -> this makes things AWFULLY slow .. sometimes bgpp->isp.CullMode=0;// -> so that its not culled, or somehow else hidden ! @@ -1959,7 +1710,6 @@ void FillBGP(TA_context* ctx) bgpp->pcw.Offset=bgpp->isp.Offset; bgpp->pcw.Texture = bgpp->isp.Texture; bgpp->pcw.Shadow = ISP_BACKGND_T.shadow; - bgpp->projMatrix = nullptr; float scale_x= (SCALER_CTL.hscale) ? 2.f:1.f; //if AA hack the hacked pos value hacks for (int i=0;i<3;i++) @@ -2149,18 +1899,13 @@ int getTAContextAddresses(u32 *addresses) void rend_context::newRenderPass() { - if (global_param_op.used() > 0 - || global_param_tr.used() > 0 - || global_param_pt.used() > 0) - { - RenderPass pass; - pass.op_count = global_param_op.used(); - pass.tr_count = global_param_tr.used(); - pass.pt_count = global_param_pt.used(); - pass.mvo_count = global_param_mvo.used(); - pass.mvo_tr_count = global_param_mvo_tr.used(); - pass.autosort = UsingAutoSort(render_passes.used()); - pass.z_clear = ClearZBeforePass(render_passes.used()); - *render_passes.Append() = pass; - } + RenderPass pass; + pass.op_count = global_param_op.used(); + pass.tr_count = global_param_tr.used(); + pass.pt_count = global_param_pt.used(); + pass.mvo_count = global_param_mvo.used(); + pass.mvo_tr_count = global_param_mvo_tr.used(); + pass.autosort = UsingAutoSort(render_passes.used()); + pass.z_clear = ClearZBeforePass(render_passes.used()); + *render_passes.Append() = pass; } diff --git a/core/rend/dx11/dx11_naomi2.cpp b/core/rend/dx11/dx11_naomi2.cpp index 5df133910..d170fd8ea 100644 --- a/core/rend/dx11/dx11_naomi2.cpp +++ b/core/rend/dx11/dx11_naomi2.cpp @@ -171,31 +171,26 @@ const char * const DX11N2ColorShader = R"( #define LMODE_THIN_SURFACE 4 #define LMODE_BUMP_MAP 5 -#define ROUTING_BASEDIFF_BASESPEC_ADD 0 -#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1 -#define ROUTING_OFFSDIFF_BASESPEC_ADD 2 -#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3 -#define ROUTING_ALPHADIFF_ADD 4 -#define ROUTING_ALPHAATTEN_ADD 5 -#define ROUTING_FOGDIFF_ADD 6 -#define ROUTING_FOGATTENUATION_ADD 7 -#define ROUTING_BASEDIFF_BASESPEC_SUB 8 -#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9 -#define ROUTING_OFFSDIFF_BASESPEC_SUB 10 -#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11 -#define ROUTING_ALPHADIFF_SUB 12 -#define ROUTING_ALPHAATTEN_SUB 13 +#define ROUTING_SPEC_TO_OFFSET 1 +#define ROUTING_DIFF_TO_OFFSET 2 +#define ROUTING_ATTENUATION 1 // not handled +#define ROUTING_FOG 2 // not handled +#define ROUTING_ALPHA 4 +#define ROUTING_SUB 8 struct N2Light { float4 color; float4 direction; float4 position; + int parallel; int routing; int dmode; int smode; + int4 diffuse_specular; // diffuse0, diffuse1, specular0, specular1 + float attnDistA; float attnDistB; float attnAngleA; @@ -207,10 +202,12 @@ struct N2Light cbuffer lightConstants : register(b2) { N2Light lights[16]; - int lightCount; + float4 ambientBase[2]; float4 ambientOffset[2]; int4 ambientMaterial; // base0, base1, offset0, offset1 + + int lightCount; int useBaseOver; int bumpId0; int bumpId1; @@ -256,7 +253,7 @@ void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, int routing = light.routing; if (light.diffuse_specular[volIdx] == 1) // If light contributes to diffuse { - float factor = BASE_FACTOR; + float factor = (routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; switch (light.dmode) { case LMODE_SINGLE_SIDED: @@ -266,16 +263,19 @@ void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, factor *= abs(dot(normal, lightDir)); break; } - if (routing == ROUTING_ALPHADIFF_SUB) - diffuseAlpha -= lightColor.r * factor; - else if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) - diffuse += lightColor * factor; - if (routing == ROUTING_OFFSDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD) - specular += lightColor * factor; + if ((routing & ROUTING_ALPHA) != 0) + diffuseAlpha += lightColor.r * factor; + else + { + if ((routing & ROUTING_DIFF_TO_OFFSET) == 0) + diffuse += lightColor * factor; + else + specular += lightColor * factor; + } } if (light.diffuse_specular[2 + volIdx] == 1) // If light contributes to specular { - float factor = BASE_FACTOR; + float factor = (routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; switch (light.smode) { case LMODE_SINGLE_SIDED: @@ -285,12 +285,15 @@ void computeColors(inout float4 baseCol, inout float4 offsetCol, in int volIdx, factor *= saturate(pow(abs(dot(lightDir, reflectDir)), glossCoef[volIdx])); break; } - if (routing == ROUTING_ALPHADIFF_SUB) - specularAlpha -= lightColor.r * factor; - else if (routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) - specular += lightColor * factor; - if (routing == ROUTING_BASEDIFF_BASESPEC_ADD || routing == ROUTING_OFFSDIFF_BASESPEC_ADD) - diffuse += lightColor * factor; + if ((routing & ROUTING_ALPHA) != 0) + specularAlpha += lightColor.r * factor; + else + { + if ((routing & ROUTING_SPEC_TO_OFFSET) == 0) + diffuse += lightColor * factor; + else + specular += lightColor * factor; + } } } // ambient with material @@ -399,46 +402,6 @@ struct N2PolyConstants }; static_assert(sizeof(N2PolyConstants) == 256, "sizeof(N2PolyConstants) should be 256"); -struct DX11N2Light -{ - float color[4]; // 0 - float direction[4]; // 16 - float position[4]; // 32 - int parallel; // 48 - int routing; // 52 - int dmode; // 56 - int smode; // 60 - // int4 diffuse_specular - int diffuse[2]; // 64 - int specular[2]; // 72 - float attnDistA; // 80 - float attnDistB; // 84 - float attnAngleA; // 88 - float attnAngleB; // 92 - int distAttnMode; // 96 - int _pad[3]; - // 112 -}; -static_assert(sizeof(DX11N2Light) == 112, "sizeof(DX11N2Light) should be 112"); - -struct N2LightConstants -{ - DX11N2Light lights[16]; // 0 - int lightCount; // 1792 - int _pad0[3]; - float ambientBase[2][4]; // 1808 - float ambientOffset[2][4]; // 1840 - // int4 ambientMaterial - int ambientMaterialBase[2]; // 1872 - int ambientMaterialOffset[2]; // 1880 - int useBaseOver; // 1888 - int bumpId1; // 1892 - int bumpId2; // 1896 - int _pad3; // 1900 - // 1904 -}; -static_assert(sizeof(N2LightConstants) == 1904, "sizeof(N2LightConstants) should be 1904"); - void Naomi2Helper::init(ComPtr& device, ComPtr deviceContext) { this->deviceContext = deviceContext; @@ -451,7 +414,7 @@ void Naomi2Helper::init(ComPtr& device, ComPtrCreateBuffer(&desc, nullptr, &polyConstantsBuffer.get()))) WARN_LOG(RENDERER, "Per-polygon constants buffer creation failed"); - desc.ByteWidth = sizeof(N2LightConstants); + desc.ByteWidth = sizeof(N2LightModel); desc.ByteWidth = (((desc.ByteWidth - 1) >> 4) + 1) << 4; if (FAILED(device->CreateBuffer(&desc, nullptr, &lightConstantsBuffer.get()))) WARN_LOG(RENDERER, "Light constants buffer creation failed"); @@ -481,57 +444,13 @@ void Naomi2Helper::setConstants(const PolyParam& pp, u32 polyNumber) if (pp.lightModel != lastModel) { lastModel = pp.lightModel; - N2LightConstants lightConstants{}; if (pp.lightModel != nullptr) - { - const N2LightModel& lights = *pp.lightModel; - lightConstants.lightCount = lights.lightCount; - for (int i = 0; i < lights.lightCount; i++) - { - DX11N2Light& light = lightConstants.lights[i]; - memcpy(light.color, lights.lights[i].color, sizeof(light.color)); - memcpy(light.direction, lights.lights[i].direction, sizeof(light.direction)); - memcpy(light.position, lights.lights[i].position, sizeof(light.position)); - light.parallel = lights.lights[i].parallel; - light.routing = lights.lights[i].routing; - light.dmode = lights.lights[i].dmode; - light.smode = lights.lights[i].smode; - memcpy(light.diffuse, lights.lights[i].diffuse, sizeof(light.diffuse)); - memcpy(light.specular, lights.lights[i].specular, sizeof(light.specular)); - light.attnDistA = lights.lights[i].attnDistA; - light.attnDistB = lights.lights[i].attnDistB; - light.attnAngleA = lights.lights[i].attnAngleA; - light.attnAngleB = lights.lights[i].attnAngleB; - light.distAttnMode = lights.lights[i].distAttnMode; - } - memcpy(lightConstants.ambientBase, lights.ambientBase, sizeof(lightConstants.ambientBase)); - memcpy(lightConstants.ambientOffset, lights.ambientOffset, sizeof(lightConstants.ambientOffset)); - for (int i = 0; i < 2; i++) - { - lightConstants.ambientMaterialBase[i] = lights.ambientMaterialBase[i]; - lightConstants.ambientMaterialOffset[i] = lights.ambientMaterialOffset[i]; - } - lightConstants.useBaseOver = lights.useBaseOver; - lightConstants.bumpId1 = lights.bumpId1; - lightConstants.bumpId2 = lights.bumpId2; - } + setConstBuffer(lightConstantsBuffer, *pp.lightModel); else { - lightConstants.lightCount = 0; - float white[] { 1.f, 1.f, 1.f, 1.f }; - float black[4]{}; - for (int vol = 0; vol < 2; vol++) - { - lightConstants.ambientMaterialBase[vol] = 0; - lightConstants.ambientMaterialOffset[vol] = 0; - memcpy(lightConstants.ambientBase[vol], white, sizeof(white)); - memcpy(lightConstants.ambientOffset[vol], black, sizeof(black)); - } - lightConstants.useBaseOver = 0; - lightConstants.bumpId1 = -1; - lightConstants.bumpId2 = -1; + N2LightModel lightModel{}; + setConstBuffer(lightConstantsBuffer, lightModel); } - setConstBuffer(lightConstantsBuffer, lightConstants); deviceContext->VSSetConstantBuffers(2, 1, &lightConstantsBuffer.get()); } } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index f8388f73e..6d791a5c5 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -880,13 +880,16 @@ static void create_modvol_shader() gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor"); gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); - N2VertexSource n2vertexShader(false, true, false); - gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str()); - gl.n2ModVolShader.ndcMat = glGetUniformLocation(gl.n2ModVolShader.program, "ndcMat"); - gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor"); - gl.n2ModVolShader.depth_scale = glGetUniformLocation(gl.n2ModVolShader.program, "depth_scale"); - gl.n2ModVolShader.mvMat = glGetUniformLocation(gl.n2ModVolShader.program, "mvMat"); - gl.n2ModVolShader.projMat = glGetUniformLocation(gl.n2ModVolShader.program, "projMat"); + if (gl.gl_major >= 3) + { + N2VertexSource n2vertexShader(false, true, false); + gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str()); + gl.n2ModVolShader.ndcMat = glGetUniformLocation(gl.n2ModVolShader.program, "ndcMat"); + gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor"); + gl.n2ModVolShader.depth_scale = glGetUniformLocation(gl.n2ModVolShader.program, "depth_scale"); + gl.n2ModVolShader.mvMat = glGetUniformLocation(gl.n2ModVolShader.program, "mvMat"); + gl.n2ModVolShader.projMat = glGetUniformLocation(gl.n2ModVolShader.program, "projMat"); + } } bool gl_create_resources() diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp index c449132da..02c13a5b9 100644 --- a/core/rend/gles/naomi2.cpp +++ b/core/rend/gles/naomi2.cpp @@ -94,7 +94,7 @@ void main() #if LIGHT_ON == 1 // FIXME need offset0 and offset1 for bump maps if (bumpMapping == 1) - computeBumpMap(vtx_offs, vtx_offs1, vpos.xyz, vnorm, normalMat); + computeBumpMap(vtx_offs, vtx_offs1, vpos.xyz, in_normal, normalMat); else { computeColors(vtx_base1, vtx_offs1, 1, vpos.xyz, vnorm); @@ -144,20 +144,12 @@ const char* N2ColorShader = R"( #define LMODE_THIN_SURFACE 4 #define LMODE_BUMP_MAP 5 -#define ROUTING_BASEDIFF_BASESPEC_ADD 0 -#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1 -#define ROUTING_OFFSDIFF_BASESPEC_ADD 2 -#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3 -#define ROUTING_ALPHADIFF_ADD 4 -#define ROUTING_ALPHAATTEN_ADD 5 -#define ROUTING_FOGDIFF_ADD 6 -#define ROUTING_FOGATTENUATION_ADD 7 -#define ROUTING_BASEDIFF_BASESPEC_SUB 8 -#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9 -#define ROUTING_OFFSDIFF_BASESPEC_SUB 10 -#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11 -#define ROUTING_ALPHADIFF_SUB 12 -#define ROUTING_ALPHAATTEN_SUB 13 +#define ROUTING_SPEC_TO_OFFSET 1 +#define ROUTING_DIFF_TO_OFFSET 2 +#define ROUTING_ATTENUATION 1 // not handled +#define ROUTING_FOG 2 // not handled +#define ROUTING_ALPHA 4 +#define ROUTING_SUB 8 struct N2Light { @@ -232,33 +224,39 @@ void computeColors(inout vec4 baseCol, inout vec4 offsetCol, int volIdx, vec3 po } if (lights[i].diffuse[volIdx] == 1) { - float factor = BASE_FACTOR; + float factor = (lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; if (lights[i].dmode == LMODE_SINGLE_SIDED) factor *= max(dot(normal, lightDir), 0.0); else if (lights[i].dmode == LMODE_DOUBLE_SIDED) factor *= abs(dot(normal, lightDir)); - if (lights[i].routing == ROUTING_ALPHADIFF_SUB) - diffuseAlpha -= lightColor.r * factor; - else if (lights[i].routing == ROUTING_BASEDIFF_BASESPEC_ADD || lights[i].routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) - diffuse += lightColor * factor; - if (lights[i].routing == ROUTING_OFFSDIFF_BASESPEC_ADD || lights[i].routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD) - specular += lightColor * factor; + if ((lights[i].routing & ROUTING_ALPHA) != 0) + diffuseAlpha += lightColor.r * factor; + else + { + if ((lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) + diffuse += lightColor * factor; + else + specular += lightColor * factor; + } } if (lights[i].specular[volIdx] == 1) { - float factor = BASE_FACTOR; + float factor = (lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; if (lights[i].smode == LMODE_SINGLE_SIDED) factor *= clamp(pow(max(dot(lightDir, reflectDir), 0.0), glossCoef[volIdx]), 0.0, 1.0); else if (lights[i].smode == LMODE_DOUBLE_SIDED) factor *= clamp(pow(abs(dot(lightDir, reflectDir)), glossCoef[volIdx]), 0.0, 1.0); - if (lights[i].routing == ROUTING_ALPHADIFF_SUB) - specularAlpha -= lightColor.r * factor; - else if (lights[i].routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || lights[i].routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) - specular += lightColor * factor; - if (lights[i].routing == ROUTING_BASEDIFF_BASESPEC_ADD || lights[i].routing == ROUTING_OFFSDIFF_BASESPEC_ADD) - diffuse += lightColor * factor; + if ((lights[i].routing & ROUTING_ALPHA) != 0) + specularAlpha += lightColor.r * factor; + else + { + if ((lights[i].routing & ROUTING_SPEC_TO_OFFSET) == 0) + diffuse += lightColor * factor; + else + specular += lightColor * factor; + } } } // ambient with material @@ -306,6 +304,7 @@ void computeBumpMap(inout vec4 color0, vec4 color1, vec3 position, vec3 normal, // TODO //if (bumpId0 == -1) return; + normal = normalize(normal); vec3 tangent = color0.xyz; if (tangent.x > 0.5) tangent.x -= 1.0; @@ -313,7 +312,7 @@ void computeBumpMap(inout vec4 color0, vec4 color1, vec3 position, vec3 normal, tangent.y -= 1.0; if (tangent.z > 0.5) tangent.z -= 1.0; - tangent = normalize(normalMat * vec4(tangent, 0.0)).xyz; + tangent = normalize(tangent); vec3 bitangent = color1.xyz; if (bitangent.x > 0.5) bitangent.x -= 1.0; @@ -321,16 +320,17 @@ void computeBumpMap(inout vec4 color0, vec4 color1, vec3 position, vec3 normal, bitangent.y -= 1.0; if (bitangent.z > 0.5) bitangent.z -= 1.0; - bitangent = normalize(normalMat * vec4(bitangent, 0.0)).xyz; + bitangent = normalize(bitangent); float scaleDegree = color0.w; float scaleOffset = color1.w; vec3 lightDir; // direction to the light if (lights[bumpId0].parallel == 1) - lightDir = normalize(lights[bumpId0].direction.xyz); + lightDir = lights[bumpId0].direction.xyz; else - lightDir = normalize(lights[bumpId0].position.xyz - position); + lightDir = lights[bumpId0].position.xyz - position; + lightDir = normalize(lightDir * mat3(normalMat)); float n = dot(lightDir, normal); float cosQ = dot(lightDir, tangent); @@ -349,6 +349,7 @@ void computeBumpMap(inout vec4 color0, vec4 color1, vec3 position, vec3 normal, color0.g = k3; color0.b = q / PI / 2.0; color0.a = k1; + color0 = clamp(color0, 0.0, 1.0); } )"; diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index e72d04222..c19e3ed80 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -278,11 +278,10 @@ void Drawer::UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const offsets.fragmentUniformOffset = packer.addUniform(&fragmentUniforms, sizeof(fragmentUniforms)); std::vector n2uniforms; - std::vector n2lights; if (settings.platform.isNaomi2()) { packNaomi2Uniforms(packer, offsets, n2uniforms, false); - offsets.lightsOffset = packNaomi2Lights(packer, n2lights); + offsets.lightsOffset = packNaomi2Lights(packer); } BufferData *buffer = GetMainBuffer(packer.size()); diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index 6a160c562..ddadb2729 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -139,48 +139,20 @@ protected: offsets.naomi2TrModVolOffset = offsets.naomi2OpaqueOffset + trMvOffset; } - vk::DeviceSize packNaomi2Lights(BufferPacker& packer, std::vector& n2lights) + vk::DeviceSize packNaomi2Lights(BufferPacker& packer) { - size_t n2LightSize = sizeof(VkN2LightConstants) + align(sizeof(VkN2LightConstants), GetContext()->GetUniformBufferAlignment()); - n2lights.resize(pvrrc.lightModels.used() * n2LightSize); - size_t bufIdx = 0; - for (const N2LightModel& lights : pvrrc.lightModels) - { - VkN2LightConstants& vkLights = *(VkN2LightConstants *)&n2lights[bufIdx]; - vkLights.lightCount = lights.lightCount; - for (int i = 0; i < lights.lightCount; i++) - { - VkN2Light& vkLight = vkLights.lights[i]; - const N2Light& light = lights.lights[i]; - memcpy(vkLight.color, light.color, sizeof(vkLight.color)); - memcpy(vkLight.direction, light.direction, sizeof(vkLight.direction)); - memcpy(vkLight.position, light.position, sizeof(vkLight.position)); - vkLight.parallel = light.parallel; - vkLight.routing = light.routing; - vkLight.dmode = light.dmode; - vkLight.smode = light.smode; - memcpy(vkLight.diffuse, light.diffuse, sizeof(vkLight.diffuse)); - memcpy(vkLight.specular, light.specular, sizeof(vkLight.specular)); - vkLight.attnDistA = light.attnDistA; - vkLight.attnDistB = light.attnDistB; - vkLight.attnAngleA = light.attnAngleA; - vkLight.attnAngleB = light.attnAngleB; - vkLight.distAttnMode = light.distAttnMode; - } - memcpy(vkLights.ambientBase, lights.ambientBase, sizeof(vkLights.ambientBase)); - memcpy(vkLights.ambientOffset, lights.ambientOffset, sizeof(vkLights.ambientOffset)); - for (int i = 0; i < 2; i++) - { - vkLights.ambientMaterialBase[i] = lights.ambientMaterialBase[i]; - vkLights.ambientMaterialOffset[i] = lights.ambientMaterialOffset[i]; - } - vkLights.useBaseOver = lights.useBaseOver; - vkLights.bumpId1 = lights.bumpId1; - vkLights.bumpId2 = lights.bumpId2; + size_t n2LightSize = sizeof(N2LightModel) + align(sizeof(N2LightModel), GetContext()->GetUniformBufferAlignment()); + if (n2LightSize == sizeof(N2LightModel)) + return packer.addUniform(pvrrc.lightModels.head(), pvrrc.lightModels.bytes()); - bufIdx += n2LightSize; + vk::DeviceSize offset = (vk::DeviceSize)-1; + for (const N2LightModel& model : pvrrc.lightModels) + { + vk::DeviceSize o = packer.addUniform(&model, sizeof(N2LightModel)); + if (offset == (vk::DeviceSize)-1) + offset = o; } - return packer.addUniform(n2lights.data(), bufIdx); + return offset; } vk::Rect2D baseScissor; diff --git a/core/rend/vulkan/oit/oit_drawer.cpp b/core/rend/vulkan/oit/oit_drawer.cpp index e5d36c199..fb9a4044a 100644 --- a/core/rend/vulkan/oit/oit_drawer.cpp +++ b/core/rend/vulkan/oit/oit_drawer.cpp @@ -238,11 +238,10 @@ void OITDrawer::UploadMainBuffer(const OITDescriptorSets::VertexShaderUniforms& offsets.polyParamsOffset = packer.addStorage(trPolyParams.data(), offsets.polyParamsSize); std::vector n2uniforms; - std::vector n2lights; if (settings.platform.isNaomi2()) { packNaomi2Uniforms(packer, offsets, n2uniforms, true); - offsets.lightsOffset = packNaomi2Lights(packer, n2lights); + offsets.lightsOffset = packNaomi2Lights(packer); } BufferData *buffer = GetMainBuffer(packer.size()); diff --git a/core/rend/vulkan/oit/oit_pipeline.h b/core/rend/vulkan/oit/oit_pipeline.h index 88b182bba..9a2d32306 100644 --- a/core/rend/vulkan/oit/oit_pipeline.h +++ b/core/rend/vulkan/oit/oit_pipeline.h @@ -188,8 +188,8 @@ public: if (poly.lightModel != nullptr) { - size = sizeof(VkN2LightConstants) + align(sizeof(VkN2LightConstants), uniformAlignment); - lightBufferInfo = vk::DescriptorBufferInfo{ buffer, lightOffset + (poly.lightModel - pvrrc.lightModels.head()) * size, sizeof(VkN2LightConstants) }; + size = sizeof(N2LightModel) + align(sizeof(N2LightModel), uniformAlignment); + lightBufferInfo = vk::DescriptorBufferInfo{ buffer, lightOffset + (poly.lightModel - pvrrc.lightModels.head()) * size, sizeof(N2LightModel) }; writeDescriptorSets.emplace_back(perPolyDescSet, 3, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &lightBufferInfo, nullptr); } // TODO no light diff --git a/core/rend/vulkan/pipeline.h b/core/rend/vulkan/pipeline.h index 136b13501..e0853652e 100644 --- a/core/rend/vulkan/pipeline.h +++ b/core/rend/vulkan/pipeline.h @@ -105,8 +105,8 @@ public: if (poly.lightModel != nullptr) { - size = sizeof(VkN2LightConstants) + align(sizeof(VkN2LightConstants), uniformAlignment); - lightBufferInfo = vk::DescriptorBufferInfo{ buffer, lightOffset + (poly.lightModel - pvrrc.lightModels.head()) * size, sizeof(VkN2LightConstants) }; + size = sizeof(N2LightModel) + align(sizeof(N2LightModel), uniformAlignment); + lightBufferInfo = vk::DescriptorBufferInfo{ buffer, lightOffset + (poly.lightModel - pvrrc.lightModels.head()) * size, sizeof(N2LightModel) }; writeDescriptorSets.emplace_back(perPolyDescSet, 3, 0, 1, vk::DescriptorType::eUniformBuffer, nullptr, &lightBufferInfo, nullptr); } // TODO no light diff --git a/core/rend/vulkan/shaders.cpp b/core/rend/vulkan/shaders.cpp index db6d3c4fe..277e77771 100644 --- a/core/rend/vulkan/shaders.cpp +++ b/core/rend/vulkan/shaders.cpp @@ -351,20 +351,12 @@ layout (std140, set = 1, binding = 2) uniform N2VertexShaderUniforms #define LMODE_THIN_SURFACE 4 #define LMODE_BUMP_MAP 5 -#define ROUTING_BASEDIFF_BASESPEC_ADD 0 -#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1 -#define ROUTING_OFFSDIFF_BASESPEC_ADD 2 -#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3 -#define ROUTING_ALPHADIFF_ADD 4 -#define ROUTING_ALPHAATTEN_ADD 5 -#define ROUTING_FOGDIFF_ADD 6 -#define ROUTING_FOGATTENUATION_ADD 7 -#define ROUTING_BASEDIFF_BASESPEC_SUB 8 -#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9 -#define ROUTING_OFFSDIFF_BASESPEC_SUB 10 -#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11 -#define ROUTING_ALPHADIFF_SUB 12 -#define ROUTING_ALPHAATTEN_SUB 13 +#define ROUTING_SPEC_TO_OFFSET 1 +#define ROUTING_DIFF_TO_OFFSET 2 +#define ROUTING_ATTENUATION 1 // not handled +#define ROUTING_FOG 2 // not handled +#define ROUTING_ALPHA 4 +#define ROUTING_SUB 8 struct N2Light { @@ -442,33 +434,39 @@ void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in int volIdx, in v } if (n2Lights.lights[i].diffuse[volIdx] == 1) { - float factor = BASE_FACTOR; + float factor = (n2Lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; if (n2Lights.lights[i].dmode == LMODE_SINGLE_SIDED) factor *= max(dot(normal, lightDir), 0.0); else if (n2Lights.lights[i].dmode == LMODE_DOUBLE_SIDED) factor *= abs(dot(normal, lightDir)); - if (n2Lights.lights[i].routing == ROUTING_ALPHADIFF_SUB) - diffuseAlpha -= lightColor.r * factor; - else if (n2Lights.lights[i].routing == ROUTING_BASEDIFF_BASESPEC_ADD || n2Lights.lights[i].routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) - diffuse += lightColor * factor; - if (n2Lights.lights[i].routing == ROUTING_OFFSDIFF_BASESPEC_ADD || n2Lights.lights[i].routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD) - specular += lightColor * factor; + if ((n2Lights.lights[i].routing & ROUTING_ALPHA) != 0) + diffuseAlpha += lightColor.r * factor; + else + { + if ((n2Lights.lights[i].routing & ROUTING_DIFF_TO_OFFSET) == 0) + diffuse += lightColor * factor; + else + specular += lightColor * factor; + } } if (n2Lights.lights[i].specular[volIdx] == 1) { - float factor = BASE_FACTOR; + float factor = (n2Lights.lights[i].routing & ROUTING_SUB) != 0 ? -BASE_FACTOR : BASE_FACTOR; if (n2Lights.lights[i].smode == LMODE_SINGLE_SIDED) factor *= clamp(pow(max(dot(lightDir, reflectDir), 0.0), n2Uniform.glossCoef[volIdx]), 0.0, 1.0); else if (n2Lights.lights[i].smode == LMODE_DOUBLE_SIDED) factor *= clamp(pow(abs(dot(lightDir, reflectDir)), n2Uniform.glossCoef[volIdx]), 0.0, 1.0); - if (n2Lights.lights[i].routing == ROUTING_ALPHADIFF_SUB) - specularAlpha -= lightColor.r * factor; - else if (n2Lights.lights[i].routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || n2Lights.lights[i].routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) - specular += lightColor * factor; - if (n2Lights.lights[i].routing == ROUTING_BASEDIFF_BASESPEC_ADD || n2Lights.lights[i].routing == ROUTING_OFFSDIFF_BASESPEC_ADD) - diffuse += lightColor * factor; + if ((n2Lights.lights[i].routing & ROUTING_ALPHA) != 0) + specularAlpha += lightColor.r * factor; + else + { + if ((n2Lights.lights[i].routing & ROUTING_SPEC_TO_OFFSET) == 0) + diffuse += lightColor * factor; + else + specular += lightColor * factor; + } } } // ambient with material @@ -514,7 +512,7 @@ void computeEnvMap(inout vec2 uv, in vec3 position, in vec3 normal) void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3 normal, in mat4 normalMat) { // TODO - if (n2Lights.bumpId0 == -1) + //if (n2Lights.bumpId0 == -1) return; normal = normalize(normal); vec3 tangent = color0.xyz; @@ -524,7 +522,6 @@ void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3 tangent.y -= 1.0; if (tangent.z > 0.5) tangent.z -= 1.0; - //tangent = normalize(normalMat * vec4(tangent, 0.0)).xyz; tangent = normalize(tangent); vec3 bitangent = color1.xyz; if (bitangent.x > 0.5) @@ -533,7 +530,6 @@ void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3 bitangent.y -= 1.0; if (bitangent.z > 0.5) bitangent.z -= 1.0; - //bitangent = normalize(normalMat * vec4(bitangent, 0.0)).xyz; bitangent = normalize(bitangent); float scaleDegree = color0.w; @@ -563,6 +559,7 @@ void computeBumpMap(inout vec4 color0, in vec4 color1, in vec3 position, in vec3 color0.g = k3; color0.b = q / PI / 2.0; color0.a = k1; + color0 = clamp(color0, 0.0, 1.0); } )"; diff --git a/core/rend/vulkan/shaders.h b/core/rend/vulkan/shaders.h index d75ce37c9..d993659e1 100644 --- a/core/rend/vulkan/shaders.h +++ b/core/rend/vulkan/shaders.h @@ -93,44 +93,6 @@ struct N2VertexShaderUniforms int modelSpecular[2]; }; -// std140 alignment required -struct VkN2Light -{ - float color[4]; - float direction[4]; - float position[4]; - - int parallel; - int routing; - int dmode; - int smode; - - int diffuse[2]; - int specular[2]; - - float attnDistA; - float attnDistB; - float attnAngleA; - float attnAngleB; - - int distAttnMode; - int _pad[3]; -}; - -// std140 alignment required -struct VkN2LightConstants -{ - VkN2Light lights[16]; - float ambientBase[2][4]; - float ambientOffset[2][4]; - int ambientMaterialBase[2]; - int ambientMaterialOffset[2]; - int lightCount; - int useBaseOver; - int bumpId1; - int bumpId2; -}; - class ShaderManager { public: diff --git a/core/rend/vulkan/vulkan_renderer.h b/core/rend/vulkan/vulkan_renderer.h index 5242464c9..0090c3c09 100644 --- a/core/rend/vulkan/vulkan_renderer.h +++ b/core/rend/vulkan/vulkan_renderer.h @@ -258,7 +258,8 @@ protected: curTexture->UploadToGPU(width, height, (u8*)pb.data(), false); curTexture->SetCommandBuffer(nullptr); - Vertex *vtx = ctx->rend.verts.Append(4); + // Use background poly vtx and param + Vertex *vtx = ctx->rend.verts.head(); vtx[0].x = 0.f; vtx[0].y = 0.f; vtx[0].z = 0.1f; @@ -280,13 +281,13 @@ protected: vtx[3].v = 1.f; u32 *idx = ctx->rend.idx.Append(4); - idx[0] = ctx->rend.verts.used() - 4; - idx[1] = idx[0] + 1; - idx[2] = idx[1] + 1; - idx[3] = idx[2] + 1; + idx[0] = 0; + idx[1] = 1; + idx[2] = 2; + idx[3] = 3; - PolyParam *pp = ctx->rend.global_param_op.Append(1); - pp->first = ctx->rend.idx.used() - 4; + PolyParam *pp = ctx->rend.global_param_op.head(); + pp->first = 0; pp->count = 4; pp->isp.full = 0; @@ -314,7 +315,7 @@ protected: pass->autosort = false; pass->mvo_count = 0; pass->mvo_tr_count = 0; - pass->op_count = ctx->rend.global_param_op.used(); + pass->op_count = 1; pass->pt_count = 0; pass->tr_count = 0;