diff --git a/CMakeLists.txt b/CMakeLists.txt index eaddf0e71..a5dfefdc4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -919,7 +919,9 @@ if(USE_OPENGL) core/rend/gles/gltex.cpp core/rend/gles/quad.cpp core/rend/gles/postprocess.cpp - core/rend/gles/postprocess.h) + core/rend/gles/postprocess.h + core/rend/gles/naomi2.cpp + core/rend/gles/naomi2.h) if(NOT LIBRETRO) target_sources(${PROJECT_NAME} PRIVATE @@ -934,7 +936,9 @@ if(USE_OPENGL) core/rend/gl4/abuffer.cpp core/rend/gl4/gl4.h core/rend/gl4/gldraw.cpp - core/rend/gl4/gles.cpp) + core/rend/gl4/gles.cpp + core/rend/gl4/naomi2.cpp + core/rend/gl4/naomi2.h) endif() endif() diff --git a/core/hw/pvr/elan.cpp b/core/hw/pvr/elan.cpp index dca363f76..e6f23167c 100644 --- a/core/hw/pvr/elan.cpp +++ b/core/hw/pvr/elan.cpp @@ -63,6 +63,7 @@ #include #include #include +#include namespace elan { @@ -242,12 +243,16 @@ T DYNACALL read_elancmd(u32 addr) static GMP *curGmp; static glm::mat4x4 curMatrix; +static float *taMVMatrix; static glm::mat4x4 lightMatrix; static glm::mat4 projectionMatrix; +static float *taProjMatrix; static LightModel *curLightModel; static ElanBase *curLights[MAX_LIGHTS]; -static float near = 0.001f; -static float far = 100000.f; +static float nearPlane = 0.001f; +static float farPlane = 100000.f; +static bool envMapping; +static bool cullingReversed; struct State { @@ -255,46 +260,53 @@ struct State int listType = -1; u32 gmp = Null; - u32 matrix = Null; + u32 instance = Null; u32 projMatrix = Null; - int userClip = 0; + u32 tileclip = 0; u32 lightModel = Null; u32 lights[MAX_LIGHTS] = { Null, Null, Null, Null, Null, Null, Null, Null, Null, Null, Null, Null, Null, Null, Null, Null }; + bool lightModelUpdated = false; + float envMapUOffset = 0.f; + float envMapVOffset = 0.f; void reset() { listType = -1; gmp = Null; - matrix = Null; + instance = Null; projMatrix = Null; - userClip = 0; + tileclip = 0; lightModel = Null; for (auto& light : lights) light = Null; update(); } - void setMatrix(void *p) + void setMatrix(InstanceMatrix *pinstance) { - matrix = elanRamAddress(p); + instance = elanRamAddress(pinstance); updateMatrix(); } void updateMatrix() { - if (matrix == Null) + if (instance == Null) + { + taMVMatrix = nullptr; + envMapUOffset = 0.f; + envMapVOffset = 0.f; return; - Matrix *mat = (Matrix *)&elanRAM[matrix]; + } + InstanceMatrix *mat = (InstanceMatrix *)&elanRAM[instance]; DEBUG_LOG(PVR, "Matrix %f %f %f %f\n %f %f %f %f\n %f %f %f %f\nLight: %f %f %f\n %f %f %f", -mat->tm00, -mat->tm01, -mat->tm02, -mat->mat03, mat->tm10, mat->tm11, mat->tm12, mat->mat13, mat->tm20, mat->tm21, mat->tm22, -mat->mat23, mat->lm00, mat->lm01, mat->lm02, mat->lm10, mat->lm11, mat->lm12); -// DEBUG_LOG(PVR, "Matrix proj4 %f %f %f %f %f", -// mat->proj4, mat->proj5, mat->mproj6, mat->proj7, mat->proj8); + curMatrix = glm::mat4x4{ -mat->tm00, mat->tm10, mat->tm20, 0, -mat->tm01, mat->tm11, mat->tm21, 0, @@ -307,8 +319,11 @@ struct State -mat->lm02, mat->lm12, mat->tm22, 0, -mat->mat03, mat->mat13, -mat->mat23, 1 }; - near = mat->proj4; - far = mat->proj5; + nearPlane = mat->_near; + farPlane = mat->_far; + envMapUOffset = mat->envMapU; + envMapVOffset = mat->envMapV; + taMVMatrix = ta_add_matrix(glm::value_ptr(curMatrix)); } void setProjectionMatrix(void *p) @@ -320,15 +335,20 @@ struct State void updateProjectionMatrix() { if (projMatrix == Null) + { + taProjMatrix = nullptr; return; + } ProjMatrix *pm = (ProjMatrix *)&elanRAM[projMatrix]; - DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f", pm->fx, pm->tx, pm->fy, pm->ty); + DEBUG_LOG(PVR, "Proj matrix x: %f %f y: %f %f near %f far %f", pm->fx, pm->tx, pm->fy, pm->ty, nearPlane, farPlane); + projectionMatrix = glm::mat4( -pm->fx, 0, 0, 0, - 0, pm->fy, 0, 0, + 0, pm->fy, 0, 0, -pm->tx, -pm->ty, -1, -1, - 0, 0, 0, 0 + 0, 0, 0, 0 ); + taProjMatrix = ta_add_matrix(glm::value_ptr(projectionMatrix)); } void setGMP(void *p) @@ -353,7 +373,9 @@ struct State updateLightModel(); } - void updateLightModel() { + void updateLightModel() + { + lightModelUpdated = true; if (lightModel == Null) curLightModel = nullptr; else @@ -372,30 +394,45 @@ struct State void updateLight(int lightId) { + lightModelUpdated = true; if (lights[lightId] == Null) { elan::curLights[lightId] = nullptr; return; } - Instance *instance = (Instance *)&elanRAM[lights[lightId]]; - if (instance->pcw.parallelLight) + PointLight *plight = (PointLight *)&elanRAM[lights[lightId]]; + if (plight->pcw.parallelLight) { - ParallelLight *light = (ParallelLight *)instance; + ParallelLight *light = (ParallelLight *)plight; DEBUG_LOG(PVR, " Parallel light %d: col %d %d %d dir %d %d %d", light->lightId, light->red, light->green, light->blue, light->dirX, light->dirY, light->dirZ); } else { - PointLight *light = (PointLight *)instance; DEBUG_LOG(PVR, " Point light %d: dattenmode %d col %d %d %d dir %d %d %d pos %f %f %f routing %d dist %f %f angle %f %f", - light->lightId, light->dattenmode, - light->red, light->green, light->blue, - light->dirX, light->dirY, light->dirZ, - light->posX, light->posY, light->posZ, - light->routing, light->attnMinDistance(), light->attnMaxDistance(), - light->attnMinAngle(), light->attnMaxAngle()); + plight->lightId, plight->dattenmode, + plight->red, plight->green, plight->blue, + plight->dirX, plight->dirY, plight->dirZ, + plight->posX, plight->posY, plight->posZ, + plight->routing, plight->attnMinDistance(), plight->attnMaxDistance(), + plight->attnMinAngle(), plight->attnMaxAngle()); } - elan::curLights[lightId] = instance; + elan::curLights[lightId] = plight; + } + + void setClipMode(PCW pcw) + { + tileclip = (tileclip & ~0xF0000000) | (pcw.userClip << 28); + } + + void setClipTiles(u32 xmin, u32 ymin, u32 xmax, u32 ymax) + { + u32 t = tileclip & 0xF0000000; + t |= xmin & 0x3f; // 6 bits + t |= (xmax & 0x3f) << 6; // 6 bits + t |= (ymin & 0x1f) << 12; // 5 bits + t |= (ymax & 0x1f) << 17; // 5 bits + tileclip = t; } void update() @@ -419,26 +456,38 @@ struct State static State state; -template -static void setCoords(T& vtx, float x, float y, float z) +static void setCoords(Vertex& vtx, float x, float y, float z) { - glm::vec4 v(x, y, z, 1); - v = projectionMatrix * curMatrix * v; - v.x /= v.w; - v.y /= v.w; - vtx.xyz[0] = v.x; - vtx.xyz[1] = v.y; - vtx.xyz[2] = 1 / v.w; + vtx.x = x; + vtx.y = y; + vtx.z = z; } -template -static void setUV(const Ts& vs, Td& vd) +template +static void setUV(const Ts& vs, Vertex& vd) { - vd.u = vs.uv.u; - vd.v = vs.uv.v; + if (envMapping) + { + vd.u = state.envMapUOffset; + vd.v = state.envMapVOffset; + } + else + { + vd.u = vs.uv.u; + vd.v = vs.uv.v; + } } -glm::vec4 unpackColor(u32 color) +static void SetEnvMapUV(Vertex& vtx) +{ + if (envMapping) + { + vtx.u = state.envMapUOffset; + vtx.v = state.envMapVOffset; + } +} + +static glm::vec4 unpackColor(u32 color) { return glm::vec4((float)((color >> 16) & 0xff) / 255.f, (float)((color >> 8) & 0xff) / 255.f, @@ -446,12 +495,12 @@ glm::vec4 unpackColor(u32 color) (float)(color >> 24) / 255.f); } -glm::vec4 unpackColor(u8 red, u8 green, u8 blue, u8 alpha = 0) +static glm::vec4 unpackColor(u8 red, u8 green, u8 blue, u8 alpha = 0) { return glm::vec4((float)red / 255.f, (float)green / 255.f, (float)blue / 255.f, (float)alpha / 255.f); } -u32 packColor(const glm::vec4& color) +static u32 packColor(const glm::vec4& color) { return (int)(std::max(0.f, std::min(1.f, color.a)) * 255.f) << 24 | (int)(std::max(0.f, std::min(1.f, color.r)) * 255.f) << 16 @@ -494,7 +543,8 @@ static void computeColors(glm::vec4& baseCol, glm::vec4& offsetCol, const glm::v { PointLight *light = (PointLight *)base; glm::vec4 lightPos(light->posX, light->posY, light->posZ, 1); - lightDir = glm::normalize(lightPos - pos); + lightDir = glm::normalize(lightPos - pos); // FIXME normalizing 4D vec + lightColor = unpackColor(light->red, light->green, light->blue); routing = light->routing; @@ -597,22 +647,79 @@ static void computeColors(glm::vec4& baseCol, glm::vec4& offsetCol, const glm::v template glm::vec4 getNormal(const T& vtx) { - return glm::normalize(lightMatrix * glm::vec4((int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f, 0)); + return glm::vec4((int8_t)vtx.header.nx / 127.f, (int8_t)vtx.header.ny / 127.f, (int8_t)vtx.header.nz / 127.f, 0); } template<> glm::vec4 getNormal(const N2_VERTEX_VNU& vtx) { - return glm::normalize(lightMatrix * glm::vec4(vtx.normal.nx, vtx.normal.ny, vtx.normal.nz, 0)); + return glm::vec4(vtx.normal.nx, vtx.normal.ny, vtx.normal.nz, 0); +} + +template +void setNormal(Vertex& vd, const T& vs) +{ + glm::vec4 normal = getNormal(vs); + vd.nx = normal.x; + vd.ny = normal.y; + vd.nz = normal.z; } template -static void convertVertex(const T& vs, TA_VertexParam& vd); +static void convertVertex(const T& vs, Vertex& vd); template<> -void convertVertex(const Vertex& vs, TA_VertexParam& vd) +void convertVertex(const N2_VERTEX& vs, Vertex& vd) { - setCoords(vd.vtx0, vs.x, vs.y, vs.z); + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + SetEnvMapUV(vd); + glm::vec4 baseCol; + glm::vec4 offsetCol; + if (curGmp != nullptr) + { + baseCol = unpackColor(curGmp->diffuse0); + offsetCol = unpackColor(curGmp->specular0); + if (state.listType == 2) + { + // FIXME + baseCol.a = 0; + offsetCol.a = 1; + } + computeColors(baseCol, offsetCol, curMatrix * glm::vec4(vs.x, vs.y, vs.z, 1), getNormal(vs)); + } + else + { + baseCol = glm::vec4(0); + offsetCol = glm::vec4(0); + } + *(u32 *)vd.col = packColor(baseCol + offsetCol); +} + +template<> +void convertVertex(const N2_VERTEX_VR& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + SetEnvMapUV(vd); + glm::vec4 baseCol = unpackColor(vs.rgb.argb0); + glm::vec4 offsetCol = baseCol; + if (curGmp != nullptr) + { + // Not sure about offset but vf4 needs base addition + baseCol += unpackColor(curGmp->diffuse0); + offsetCol += unpackColor(curGmp->specular0); + computeColors(baseCol, offsetCol, curMatrix * glm::vec4(vs.x, vs.y, vs.z, 1), getNormal(vs)); + } + *(u32 *)vd.col = packColor(baseCol + offsetCol); +} + +template<> +void convertVertex(const N2_VERTEX_VU& vs, Vertex& vd) +{ + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + setUV(vs, vd); glm::vec4 baseCol; glm::vec4 offsetCol; if (curGmp != nullptr) @@ -626,13 +733,16 @@ void convertVertex(const Vertex& vs, TA_VertexParam& vd) baseCol = glm::vec4(0); offsetCol = glm::vec4(0); } - vd.vtx0.BaseCol = packColor(baseCol + offsetCol); + *(u32 *)vd.col = packColor(baseCol); + *(u32 *)vd.spc = packColor(offsetCol); } template<> -void convertVertex(const N2_VERTEX_VR& vs, TA_VertexParam& vd) +void convertVertex(const N2_VERTEX_VUR& vs, Vertex& vd) { - setCoords(vd.vtx0, vs.x, vs.y, vs.z); + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + setUV(vs, vd); glm::vec4 baseCol = unpackColor(vs.rgb.argb0); glm::vec4 offsetCol = baseCol; if (curGmp != nullptr) @@ -642,55 +752,17 @@ void convertVertex(const N2_VERTEX_VR& vs, TA_VertexParam& vd) offsetCol += unpackColor(curGmp->specular0); computeColors(baseCol, offsetCol, curMatrix * glm::vec4(vs.x, vs.y, vs.z, 1), getNormal(vs)); } - vd.vtx0.BaseCol = packColor(baseCol + offsetCol); + *(u32 *)vd.col = packColor(baseCol); + *(u32 *)vd.spc = packColor(offsetCol); } template<> -void convertVertex(const N2_VERTEX_VU& vs, TA_VertexParam& vd) -{ - setCoords(vd.vtx3, vs.x, vs.y, vs.z); - setUV(vs, vd.vtx3); - glm::vec4 baseCol; - glm::vec4 offsetCol; - if (curGmp != nullptr) - { - baseCol = unpackColor(curGmp->diffuse0); - offsetCol = unpackColor(curGmp->specular0); - computeColors(baseCol, offsetCol, curMatrix * glm::vec4(vs.x, vs.y, vs.z, 1), getNormal(vs)); - } - else - { - baseCol = glm::vec4(0); - offsetCol = glm::vec4(0); - } - vd.vtx3.BaseCol = packColor(baseCol); - vd.vtx3.OffsCol = packColor(offsetCol); -} - -template<> -void convertVertex(const N2_VERTEX_VUR& vs, TA_VertexParam& vd) -{ - setCoords(vd.vtx3, vs.x, vs.y, vs.z); - setUV(vs, vd.vtx3); - glm::vec4 baseCol = unpackColor(vs.rgb.argb0); - glm::vec4 offsetCol = baseCol; - if (curGmp != nullptr) - { - // Not sure about offset but vf4 needs base addition - baseCol += unpackColor(curGmp->diffuse0); - offsetCol += unpackColor(curGmp->specular0); - computeColors(baseCol, offsetCol, curMatrix * glm::vec4(vs.x, vs.y, vs.z, 1), getNormal(vs)); - } - vd.vtx3.BaseCol = packColor(baseCol); - vd.vtx3.OffsCol = packColor(offsetCol); -} - -template<> -void convertVertex(const N2_VERTEX_VUB& vs, TA_VertexParam& vd) +void convertVertex(const N2_VERTEX_VUB& vs, Vertex& vd) { // TODO - setCoords(vd.vtx3, vs.x, vs.y, vs.z); - setUV(vs, vd.vtx3); + setCoords(vd, vs.x, vs.y, vs.z); + setNormal(vd, vs); + setUV(vs, vd); glm::vec4 baseCol; glm::vec4 offsetCol; if (curGmp != nullptr) @@ -704,49 +776,130 @@ void convertVertex(const N2_VERTEX_VUB& vs, TA_VertexParam& vd) baseCol = glm::vec4(0); offsetCol = glm::vec4(0); } - vd.vtx3.BaseCol = packColor(baseCol); - vd.vtx3.OffsCol = packColor(offsetCol); + *(u32 *)vd.col = packColor(baseCol); + *(u32 *)vd.spc = packColor(offsetCol); +} + +template +static void boundingBox(const T* vertices, u32 count, glm::vec3& min, glm::vec3& max) +{ + min = { 1e38f, 1e38f, 1e38f }; + max = { -1e38f, -1e38f, -1e38f }; + for (u32 i = 0; i < count; i++) + { + glm::vec3 pos{ vertices[i].x, vertices[i].y, vertices[i].z }; + min = glm::min(min, pos); + max = glm::max(max, pos); + } + glm::vec4 center((min + max) / 2.f, 1); + glm::vec4 extents(max - glm::vec3(center), 0); + // transform + center = curMatrix * center; + glm::vec3 extentX = curMatrix * glm::vec4(extents.x, 0, 0, 0); + glm::vec3 extentY = curMatrix * glm::vec4(0, extents.y, 0, 0); + glm::vec3 extentZ = curMatrix * glm::vec4(0, 0, extents.z, 0); + // new AA extents + const float newX = std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentX)) + + std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentY)) + + std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentZ)); + + const float newY = std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentX)) + + std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentY)) + + std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentZ)); + + const float newZ = std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentX)) + + std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentY)) + + std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentZ)); + + min = glm::vec3(center) - glm::vec3(newX, newY, newZ); + max = glm::vec3(center) + glm::vec3(newX, newY, newZ); +} + +template +static bool isInFrustum(const T* vertices, u32 count) +{ + glm::vec3 min; + glm::vec3 max; + boundingBox(vertices, count, min, max); + if (min.z > -nearPlane || max.z < -farPlane) + return false; + + glm::vec4 pmin = projectionMatrix * glm::vec4(min, 1); + glm::vec4 pmax = projectionMatrix * glm::vec4(max, 1); + if (std::isnan(pmin.x) || std::isnan(pmin.y) || std::isnan(pmax.x) || std::isnan(pmax.y)) + return false; + float w; + // Check the farthest side + if (std::abs(pmin.w) < std::abs(pmax.w)) + w = pmax.w; + else + w = pmin.w; + glm::vec4 t = glm::min(pmin / w, pmax / w); + pmax = glm::max(pmin / w, pmax / w); + pmin = t; + if (pmax.x <= -214 || pmin.x >= 854 // FIXME viewport dimensions + || pmax.y < 0 || pmin.y >= 480) + return false; + + //printf("AABB %f %f - %f %f\n", pmin.x, pmin.y, pmax.x, pmax.y); + + return true; } template static void sendVertices(const ICHList *list, const T* vtx) { - alignas(32) TA_VertexParam taVtx; - taVtx.pcw.ParaType = 7; + Vertex taVtx; verify(list->vertexSize() > 0); - alignas(32) TA_VertexParam fanCenterVtx{}; - alignas(32) TA_VertexParam fanLastVtx{}; + Vertex fanCenterVtx{}; + Vertex fanLastVtx{}; + bool stripStart = true; + int outStripIndex = 0; for (u32 i = 0; i < list->vtxCount; i++) { - taVtx.pcw.EndOfStrip = vtx->header.endOfStrip; - convertVertex(*vtx, taVtx); - if (fanCenterVtx.pcw.ParaType == 0) + if (stripStart) { // Center vertex if triangle fan //verify(vtx->header.isFirstOrSecond()); This fails for some strips: strip=1 fan=0 (soul surfer) - memcpy(&fanCenterVtx, &taVtx, sizeof(SQBuffer)); - } - else if (vtx->header.isThird()) - { - // End of strip if triangle fan - if (i + 1 < list->vtxCount && vtx[1].header.isFan()) - taVtx.pcw.EndOfStrip = 1; + fanCenterVtx = taVtx; + if (outStripIndex > 0) + { + // use degenerate triangles to link strips + ta_add_vertex(fanLastVtx); + ta_add_vertex(taVtx); + outStripIndex += 2; + if (outStripIndex & 1) + { + ta_add_vertex(taVtx); + outStripIndex++; + } + } + stripStart = false; } else if (vtx->header.isFan()) { + // use degenerate triangles to link strips + ta_add_vertex(fanLastVtx); + ta_add_vertex(fanCenterVtx); + outStripIndex += 2; + if (outStripIndex & 1) + { + ta_add_vertex(fanCenterVtx); + outStripIndex++; + } // Triangle fan - ta_vtx_data32((SQBuffer *)&fanCenterVtx); - ta_vtx_data32((SQBuffer *)&fanLastVtx); - taVtx.pcw.EndOfStrip = 1; + ta_add_vertex(fanCenterVtx); + ta_add_vertex(fanLastVtx); + outStripIndex += 2; } - ta_vtx_data32((SQBuffer *)&taVtx); - memcpy(&fanLastVtx, &taVtx, sizeof(SQBuffer)); - fanLastVtx.pcw.EndOfStrip = 0; + ta_add_vertex(taVtx); + outStripIndex++; + fanLastVtx = taVtx; if (vtx->header.endOfStrip) - fanCenterVtx.pcw.ParaType = 0; + stripStart = true; vtx++; } @@ -755,52 +908,46 @@ static void sendVertices(const ICHList *list, const T* vtx) template static void sendMVVertices(const ICHList *list, const T* vtx) { - SQBuffer sqb[2]{}; - TA_VertexParam& taVtx = *(TA_VertexParam *)&sqb[0]; - taVtx.mvolA.pcw.ParaType = 7; - taVtx.mvolA.pcw.EndOfStrip = 1; verify(list->vertexSize() > 0); - glm::vec4 vtx0{}; - glm::vec4 vtx1{}; + glm::vec3 vtx0{}; + glm::vec3 vtx1{}; u32 stripStart = 0; for (u32 i = 0; i < list->vtxCount; i++) { - glm::vec4 v(vtx->x, vtx->y, vtx->z, 1); - v = projectionMatrix * curMatrix * v; - v.x /= v.w; - v.y /= v.w; + glm::vec3 v(vtx->x, vtx->y, vtx->z); // printf("MV %f %f %f - strip %d fan %d eos %d _res %x\n", v.x, v.y, 1 / v.w, vtx->header.strip, vtx->header.fan, vtx->header.endOfStrip, vtx->header._res); u32 triIdx = i - stripStart; if (triIdx >= 2) { + ModTriangle tri; + if (triIdx & 1) { - taVtx.mvolA.x1 = vtx0.x; - taVtx.mvolA.y1 = vtx0.y; - taVtx.mvolA.z1 = 1 / vtx0.w; + tri.x1 = vtx0.x; + tri.y1 = vtx0.y; + tri.z1 = vtx0.z; - taVtx.mvolA.x0 = vtx1.x; - taVtx.mvolA.y0 = vtx1.y; - taVtx.mvolA.z0 = 1 / vtx1.w; + tri.x0 = vtx1.x; + tri.y0 = vtx1.y; + tri.z0 = vtx1.z; } else { - taVtx.mvolA.x0 = vtx0.x; - taVtx.mvolA.y0 = vtx0.y; - taVtx.mvolA.z0 = 1 / vtx0.w; + tri.x0 = vtx0.x; + tri.y0 = vtx0.y; + tri.z0 = vtx0.z; - taVtx.mvolA.x1 = vtx1.x; - taVtx.mvolA.y1 = vtx1.y; - taVtx.mvolA.z1 = 1 / vtx1.w; + tri.x1 = vtx1.x; + tri.y1 = vtx1.y; + tri.z1 = vtx1.z; } - taVtx.mvolA.x2 = v.x; - taVtx.mvolB.y2 = v.y; - taVtx.mvolB.z2 = 1 / v.w; + tri.x2 = v.x; + tri.y2 = v.y; + tri.z2 = v.z; - ta_vtx_data32(&sqb[0]); - ta_vtx_data32(&sqb[1]); + ta_add_triangle(tri); } if (vtx->header.endOfStrip) stripStart = i + 1; @@ -810,24 +957,126 @@ static void sendMVVertices(const ICHList *list, const T* vtx) } } +static N2LightModel *taLightModel; +static bool usingAlphaLight; + +static void sendLights() +{ + if (!state.lightModelUpdated) + return; + + state.lightModelUpdated = false; + usingAlphaLight = false; + N2LightModel model; + model.lightCount = 0; + if (curLightModel == nullptr) + { + model.ambientMaterial = false; + model.useBaseOver = false; + model.ambientBase[0] = model.ambientBase[1] = model.ambientBase[2] = model.ambientBase[3] = 1.f; + memset(model.ambientOffset, 0, sizeof(model.ambientOffset)); + return; + } + model.ambientMaterial = curLightModel->useAmbientBase0; + // TODO model.ambientMaterialForSpec = curLightModel->useAmbientOffset0; + model.useBaseOver = curLightModel->useBaseOver; + memcpy(model.ambientBase, glm::value_ptr(unpackColor(curLightModel->ambientBase0)), sizeof(model.ambientBase)); + memcpy(model.ambientOffset, glm::value_ptr(unpackColor(curLightModel->ambientOffset0)), sizeof(model.ambientOffset)); + for (u32 i = 0; i < MAX_LIGHTS; i++) + { + bool diffuse = curLightModel->isDiffuse(i); + bool specular = curLightModel->isSpecular(i); + if (!diffuse && !specular) + continue; + if (curLights[i] == nullptr) + { + INFO_LOG(PVR, "Light %d is referenced but undefined", i); + continue; + } + N2Light& light = model.lights[model.lightCount]; + light.diffuse = diffuse; + light.specular = specular; + light.parallel = curLights[i]->pcw.parallelLight; + if (light.parallel != 0) + { + ParallelLight *plight = (ParallelLight *)curLights[i]; + memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); + light.routing = plight->routing; + light.dmode = plight->dmode; + light.smode = N2_LMETHOD_SINGLE_SIDED; + memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4(-(int8_t)plight->dirX, -(int8_t)plight->dirY, -(int8_t)plight->dirZ, 0))), + sizeof(light.direction)); + } + else + { + PointLight *plight = (PointLight *)curLights[i]; + memcpy(light.color, glm::value_ptr(unpackColor(plight->red, plight->green, plight->blue)), sizeof(light.color)); + light.routing = plight->routing; + light.dmode = plight->dmode; + light.smode = plight->smode; + memcpy(light.position, glm::value_ptr(glm::vec4(plight->posX, plight->posY, plight->posZ, 1)), sizeof(light.position)); + memcpy(light.direction, glm::value_ptr(glm::normalize(glm::vec4((int8_t)plight->dirX, (int8_t)plight->dirY, (int8_t)plight->dirZ, 0))), + sizeof(light.direction)); + light.distAttnMode = plight->dattenmode; + light.attnDistA = plight->distA(); + light.attnDistB = plight->distB(); + light.attnAngleA = plight->angleA(); + light.attnAngleB = plight->angleB(); + } + usingAlphaLight = usingAlphaLight || light.routing == N2_LFUNC_ALPHADIFF_SUB; + model.lightCount++; + } + taLightModel = ta_add_light(model); +} + +static void setStateParams(PolyParam& pp) +{ + sendLights(); + pp.tileclip = state.tileclip; + pp.mvMatrix = taMVMatrix; + pp.projMatrix = taProjMatrix; + pp.lightModel = taLightModel; + pp.envMapping = false; + if (curGmp != nullptr) + { + pp.glossCoef0 = curGmp->gloss.getCoef0(); + pp.glossCoef1 = curGmp->gloss.getCoef1(); + } + // FIXME hack ScrInstr condition fixes lens flares in vf4 + if (state.listType == 2 && usingAlphaLight && pp.tsp.SrcInstr == 1) + { + //printf("gmp pselect %x\n", curGmp->paramSelect.full); // ff ... not relevant + pp.tsp.UseAlpha = 1; // TODO alpha light volumes need manual settings of which params? + pp.tsp.ShadInstr = 3; + pp.tsp.SrcInstr = 4; + pp.tsp.DstInstr = 5; + } + // projFlip is for left-handed projection matrices (initd rear view mirror) + bool projFlip = std::signbit(taProjMatrix[0]) == std::signbit(taProjMatrix[5]); + pp.isp.CullMode ^= (u32)cullingReversed ^ (u32)projFlip; +} + static void sendPolygon(ICHList *list) { switch (list->flags) { case ICHList::VTX_TYPE_V: { - Vertex *vtx = (Vertex *)((u8 *)list + sizeof(ICHList)); + N2_VERTEX *vtx = (N2_VERTEX *)((u8 *)list + sizeof(ICHList)); + if (!isInFrustum(vtx, list->vtxCount)) + break; if (state.listType & 1) { - TA_ModVolParam pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.isp.CullMode = 0; // FIXME required for closed volumes and not set properly - pp.isp.DepthMode &= 3; - ta_vtx_data32((const SQBuffer *)&pp); + ModifierVolumeParam mvp{}; + mvp.isp.full = list->isp.full; + mvp.isp.CullMode = 0; // FIXME required for closed volumes and not set properly + if (mvp.isp.DepthMode >= 3) + INFO_LOG(PVR, "MV mode %d", mvp.isp.DepthMode); + mvp.isp.VolumeLast = list->pcw.volume; + mvp.isp.DepthMode &= 3; + mvp.mvMatrix = taMVMatrix; + mvp.projMatrix = taProjMatrix; + ta_add_poly(state.listType, mvp); //for (int i = 0; i < list->vtxCount; i++) // printf("MV %f %f %f strip %d fan %d eos %d _res %x\n", vtx[i].x, vtx[i].y, vtx[i].z, vtx[i].header.strip, vtx[i].header.fan, vtx[i].header.endOfStrip, vtx[i].header._res); @@ -835,18 +1084,27 @@ static void sendPolygon(ICHList *list) } else { - // poly 0, vtx 0 - TA_PolyParam0 pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; + PolyParam pp{}; pp.pcw.Shadow = list->pcw.shadow; pp.pcw.Gouraud = list->pcw.gouraud; pp.isp = list->isp; pp.tsp = list->tsp0; - ta_vtx_data32((const SQBuffer *)&pp); + setStateParams(pp); + if (curGmp != nullptr && curGmp->paramSelect.e0) + { + // Environment mapping + pp.pcw.Texture = 1; + pp.pcw.Offset = 0; + pp.tsp.UseAlpha = 1; + pp.tsp.IgnoreTexA = 0; + pp.envMapping = true; + pp.tcw = list->tcw0; + envMapping = true; + } + ta_add_poly(state.listType, pp); sendVertices(list, vtx); + envMapping = false; } } break; @@ -854,17 +1112,20 @@ static void sendPolygon(ICHList *list) case ICHList::VTX_TYPE_VU: { N2_VERTEX_VU *vtx = (N2_VERTEX_VU *)((u8 *)list + sizeof(ICHList)); + if (!isInFrustum(vtx, list->vtxCount)) + break; if (state.listType & 1) { - TA_ModVolParam pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; - pp.pcw.Volume = list->pcw.volume; - pp.isp = list->isp; - pp.isp.CullMode = 0; // FIXME required for closed volumes and not set properly - pp.isp.DepthMode &= 3; - ta_vtx_data32((const SQBuffer *)&pp); + ModifierVolumeParam mvp{}; + mvp.isp.full = list->isp.full; + mvp.isp.CullMode = 0; // FIXME required for closed volumes and not set properly + if (mvp.isp.DepthMode >= 3) + INFO_LOG(PVR, "MV mode %d", mvp.isp.DepthMode); + mvp.isp.VolumeLast = list->pcw.volume; + mvp.isp.DepthMode &= 3; + mvp.mvMatrix = taMVMatrix; + mvp.projMatrix = taProjMatrix; + ta_add_poly(state.listType, mvp); //for (int i = 0; i < list->vtxCount; i++) // printf("MV %f %f %f strip %d fan %d eos %d _res %x\n", vtx[i].x, vtx[i].y, vtx[i].z, vtx[i].header.strip, vtx[i].header.fan, vtx[i].header.endOfStrip, vtx[i].header._res); @@ -872,10 +1133,8 @@ static void sendPolygon(ICHList *list) } else { - TA_PolyParam0 pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; + verify(curGmp == nullptr || curGmp->paramSelect.e0 == 0); + PolyParam pp{}; pp.pcw.Shadow = list->pcw.shadow; pp.pcw.Texture = 1; pp.pcw.Offset = list->pcw.offset; @@ -883,15 +1142,8 @@ static void sendPolygon(ICHList *list) pp.isp = list->isp; pp.tsp = list->tsp0; pp.tcw = list->tcw0; - if (state.listType == 2) - pp.tsp.UseAlpha = 1; // FIXME alpha light volumes need manual settings of params? -// pp.tsp.ShadInstr = 3; // FIXME -// if (state.listType == 2) // FIXME -// { -// pp.tsp.SrcInstr = 4; -// pp.tsp.DstInstr = 5; -// } - ta_vtx_data32((const SQBuffer *)&pp); + setStateParams(pp); + ta_add_poly(state.listType, pp); sendVertices(list, vtx); } @@ -900,10 +1152,11 @@ static void sendPolygon(ICHList *list) case ICHList::VTX_TYPE_VUR: { - TA_PolyParam0 pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; + verify(curGmp == nullptr || curGmp->paramSelect.e0 == 0); + N2_VERTEX_VUR *vtx = (N2_VERTEX_VUR *)((u8 *)list + sizeof(ICHList)); + if (!isInFrustum(vtx, list->vtxCount)) + break; + PolyParam pp{}; pp.pcw.Shadow = list->pcw.shadow; pp.pcw.Texture = 1; pp.pcw.Offset = list->pcw.offset; @@ -911,48 +1164,50 @@ static void sendPolygon(ICHList *list) pp.isp = list->isp; pp.tsp = list->tsp0; pp.tcw = list->tcw0; - if (state.listType == 2) - pp.tsp.UseAlpha = 1; // FIXME alpha light volumes need manual settings of params? -// pp.tsp.ShadInstr = 3; // FIXME -// if (state.listType == 2) // FIXME -// { -// pp.tsp.SrcInstr = 4; -// pp.tsp.DstInstr = 5; -// } - ta_vtx_data32((const SQBuffer *)&pp); + setStateParams(pp); + ta_add_poly(state.listType, pp); - N2_VERTEX_VUR *vtx = (N2_VERTEX_VUR *)((u8 *)list + sizeof(ICHList)); sendVertices(list, vtx); } break; case ICHList::VTX_TYPE_VR: { - // poly 0, vtx 0 - TA_PolyParam0 pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; + N2_VERTEX_VR *vtx = (N2_VERTEX_VR *)((u8 *)list + sizeof(ICHList)); + if (!isInFrustum(vtx, list->vtxCount)) + break; + PolyParam pp{}; pp.pcw.Shadow = list->pcw.shadow; pp.pcw.Gouraud = list->pcw.gouraud; pp.isp = list->isp; pp.tsp = list->tsp0; - if (state.listType == 2) - pp.tsp.UseAlpha = 1; // FIXME alpha light volumes need manual settings of params? - ta_vtx_data32((const SQBuffer *)&pp); + setStateParams(pp); + if (curGmp != nullptr && curGmp->paramSelect.e0) + { + // FIXME doesn't seem to work + // Environment mapping + pp.pcw.Texture = 1; + pp.pcw.Offset = 0; + pp.tsp.UseAlpha = 1; + pp.tsp.IgnoreTexA = 0; + pp.envMapping = true; + pp.tcw = list->tcw0; + envMapping = true; + } + ta_add_poly(state.listType, pp); - N2_VERTEX_VR *vtx = (N2_VERTEX_VR *)((u8 *)list + sizeof(ICHList)); sendVertices(list, vtx); + envMapping = false; } break; case ICHList::VTX_TYPE_VUB: { // TODO - TA_PolyParam0 pp{}; - pp.pcw.ParaType = 4; - pp.pcw.ListType = state.listType ; - pp.pcw.User_Clip = state.userClip; + N2_VERTEX_VUB *vtx = (N2_VERTEX_VUB *)((u8 *)list + sizeof(ICHList)); + if (!isInFrustum(vtx, list->vtxCount)) + break; + PolyParam pp{}; pp.pcw.Shadow = list->pcw.shadow; pp.pcw.Texture = 1; pp.pcw.Offset = 1; @@ -960,9 +1215,9 @@ static void sendPolygon(ICHList *list) pp.isp = list->isp; pp.tsp = list->tsp0; pp.tcw = list->tcw0; - //ta_vtx_data32((const SQBuffer *)&pp); + setStateParams(pp); + //ta_add_poly(state.listType, pp); - //N2_VERTEX_VUB *vtx = (N2_VERTEX_VUB *)((u8 *)list + sizeof(ICHList)); //sendVertices(list, vtx); INFO_LOG(PVR, "Unhandled poly format VTX_TYPE_VUB"); } @@ -995,29 +1250,26 @@ static void executeCommand(u8 *data, int size) size -= 32; break; - case PCW::matrix: - state.setMatrix(data); - size -= sizeof(Matrix); - break; - case PCW::projMatrix: state.setProjectionMatrix(data); size -= sizeof(ProjMatrix); break; - case PCW::instance: + case PCW::matrixOrLight: { - Instance *instance = (Instance *)data; - if (instance->isModelInstance()) + InstanceMatrix *instance = (InstanceMatrix *)data; + if (instance->isInstanceMatrix()) { - DEBUG_LOG(PVR, "Model instance offset %x size %x", instance->offset & 0x1ffffff8, instance->size); -//FIXME instance? model? executeCommand(&elanRAM[instance->offset & 0x1ffffff8], instance->size); + //DEBUG_LOG(PVR, "Model instance"); + state.setMatrix(instance); + size -= sizeof(InstanceMatrix); + break; } else if (instance->id1 & 0x10) { state.setLightModel(data); } - else if ((instance->id2 & 0x40000000) || (instance->id1 & 0xffffff00)) // FIXME what are these lights without id2|0x40000000? vf4 + else //if ((instance->id2 & 0x40000000) || (instance->id1 & 0xffffff00)) // FIXME what are these lights without id2|0x40000000? vf4 { if (instance->pcw.parallelLight) { @@ -1030,25 +1282,24 @@ static void executeCommand(u8 *data, int size) state.setLight(light->lightId, data); } } - else - { - INFO_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2); - for (int i = 0; i < 32; i += 4) - INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - elanRAM), *(u32 *)&data[i]); - } - size -= sizeof(Instance); + //else + //{ + // WARN_LOG(PVR, "Other instance %08x %08x", instance->id1, instance->id2); + // for (int i = 0; i < 32; i += 4) + // INFO_LOG(PVR, " %08x: %08x", (u32)(&data[i] - elanRAM), *(u32 *)&data[i]); + //} + size -= sizeof(LightModel); } break; case PCW::model: { - // FIXME instance and model are switched? this is used for nl_set_light_instance() - // or static vs. dynamic? Model *model = (Model *)data; - // TODO fails rt66 start verify(model->id1 == 0x18000000 || model->id1 == 0x10000000); - state.userClip = model->pcw.userClip; + cullingReversed = (model->id1 & 0x08000000) == 0; + state.setClipMode(model->pcw); DEBUG_LOG(PVR, "Model offset %x size %x clip %d", model->offset, model->size, model->pcw.userClip); executeCommand(&elanRAM[model->offset & 0x1ffffff8], model->size); + cullingReversed = false; size -= sizeof(Model); } break; @@ -1084,7 +1335,6 @@ static void executeCommand(u8 *data, int size) inter = holly_OPAQUE; break; } -//bad reg74 |= 0x3c; asic_RaiseInterrupt(inter); TA_ITP_CURRENT += 32; state.reset(); @@ -1128,6 +1378,7 @@ static void executeCommand(u8 *data, int size) if ((pcw & 0xd0ffff00) == 0x808c0000) // display list { DEBUG_LOG(PVR, "Display list type %d", (pcw >> 24) & 0xf); + state.reset(); state.listType = (pcw >> 24) & 0xf; // TODO is this the right place for this? SQBuffer eol{}; @@ -1136,8 +1387,8 @@ static void executeCommand(u8 *data, int size) } else if ((pcw & 0xd0fcff00) == 0x80800000) // User clipping { - state.userClip = ((PCW&)pcw).userClip; - DEBUG_LOG(PVR, "User clip type %d", state.userClip); + state.setClipMode((PCW&)pcw); + DEBUG_LOG(PVR, "User clip type %d", ((PCW&)pcw).userClip); size -= 0xE0; } else if ((pcw & 0xd0ffff00) == 0x80000000) // geometry follows or linked? @@ -1146,21 +1397,16 @@ static void executeCommand(u8 *data, int size) // no possible disambiguation since 80000000 is a valid OP poly pcw (poly type 0 / vtx 0) DEBUG_LOG(PVR, "Geometry type %d - %08x", (pcw >> 24) & 0xf, pcw); size -= 32; - SQBuffer *sqb = (SQBuffer *)data + 1; - while (size > 32) - { - DEBUG_LOG(PVR, "vtx data %p", sqb); - ta_vtx_data32(sqb); - sqb++; - size -= 32; - } + ta_add_ta_data((u32 *)(data + 32), size - 32); + size = 32; } else if (pcw == 0x20000000) { // User clipping - DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", ((u32 *)data)[4] * 32, ((u32 *)data)[5] * 32, - ((u32 *)data)[6] * 32, ((u32 *)data)[7] * 32); - ta_vtx_data32((SQBuffer *)data); + u32 *tiles = (u32 *)data + 4; + DEBUG_LOG(PVR, "User clipping %d,%d - %d,%d", tiles[0] * 32, tiles[1] * 32, + tiles[2] * 32, tiles[3] * 32); + state.setClipTiles(tiles[0], tiles[1], tiles[2], tiles[3]); size -= 32; } else diff --git a/core/hw/pvr/elan_struct.h b/core/hw/pvr/elan_struct.h index 7af0f29c7..7d7af0758 100644 --- a/core/hw/pvr/elan_struct.h +++ b/core/hw/pvr/elan_struct.h @@ -25,11 +25,11 @@ namespace elan union PCW { enum Command { - null = 0, - unk_1, // instance matrix continuation? - matrix = 2, + null = 0, + _matrix2 = 1, + _matrix1 = 2, projMatrix = 3, - instance = 4, + matrixOrLight = 4, gmp = 5, ich = 7, model = 8, @@ -86,27 +86,15 @@ struct Model : public ElanBase }; static_assert(sizeof(Model) % 32 == 0, "Invalid size for Model"); -struct Instance : public ElanBase +struct InstanceMatrix : public ElanBase { // 08000400 u32 id1; // f u32 id2; // 7f - u32 _res; - u32 offset; - u32 one; // 1 - u32 size; - u32 _res0; + u32 _res[5]; - bool isModelInstance() const { - return id1 == 0xf && id2 == 0x7f && one == 1; - } -}; -static_assert(sizeof(Instance) % 32 == 0, "Invalid size for Instance"); - -struct Matrix : public ElanBase -{ - // 08000200 - float proj7; // env map U offset + u32 _res1; // 08000200 + float envMapU; // env map U offset float lm00; float lm01; float lm02; @@ -116,10 +104,11 @@ struct Matrix : public ElanBase float tm20; float tm21; float tm22; - float proj8; // env map V offset - float _res[4]; - u32 contCmd; - float proj4; // near? + float envMapV; // env map V offset + float _res2[4]; + + u32 _res3; // 08000100 + float _near; float tm00; float tm10; float mfr2; @@ -132,10 +121,14 @@ struct Matrix : public ElanBase float mat03; float mat13; float mat23; - float proj5; // far? + float _far; float mproj6; + + bool isInstanceMatrix() const { + return id1 == 0xf && id2 == 0x7f; + } }; -static_assert(sizeof(Matrix) % 32 == 0, "Invalid size for Matrix"); +static_assert(sizeof(InstanceMatrix) % 32 == 0, "Invalid size for InstanceMatrix"); struct ProjMatrix : public ElanBase { @@ -192,11 +185,12 @@ struct GMP : public ElanBase // ee110 1111 1111 constant // 00000 1111 1111 bump shading? - // seen: 00110 1111 1111 (b0 and b1 set) - // seen: 11000 1111 1111 (e0 and e1 set, followed by vtx type2 (vtx only)) - // seen: 11110 1111 1111 (everything! except v1uv0, rt66, vtx type2 (vtx only)) - // seen: 00110 0000 0000 (b0 and b1, vf4) - // seen: 00000 1010 1010 specular and fog? soul surfer + // seen: + // 00110 1111 1111 (b0 and b1 set) + // 11000 1111 1111 (e0 and e1 set, followed by vtx type2 (vtx only)) + // 11110 1111 1111 (everything! except v1uv0, rt66, vtx type2 (vtx only)) + // 00110 0000 0000 (b0 and b1, vf4) + // 00000 1010 1010 specular and fog? soul surfer u32 diffuse0; u32 specular0; @@ -234,7 +228,7 @@ union HeaderAndNormal bool isStrip() const { return strip == 1 && fan == 0; } }; -struct Vertex +struct N2_VERTEX { HeaderAndNormal header; float x; @@ -294,7 +288,7 @@ struct BumpMap // // textured, 1 or 2 para // -struct N2_VERTEX_VU : public Vertex +struct N2_VERTEX_VU : public N2_VERTEX { UnpackedUV uv; }; @@ -302,7 +296,7 @@ struct N2_VERTEX_VU : public Vertex // // textured, 1 or 2 para with unpacked normal // -struct N2_VERTEX_VNU : public Vertex +struct N2_VERTEX_VNU : public N2_VERTEX { Normal normal; UnpackedUV uv; @@ -311,7 +305,7 @@ struct N2_VERTEX_VNU : public Vertex // // for colored vertex, 1 para // -struct N2_VERTEX_VUR : public Vertex +struct N2_VERTEX_VUR : public N2_VERTEX { UnpackedUV uv; PackedRGB rgb; @@ -320,13 +314,13 @@ struct N2_VERTEX_VUR : public Vertex // // for bumpmapped, 1 para // -struct N2_VERTEX_VUB : public Vertex +struct N2_VERTEX_VUB : public N2_VERTEX { UnpackedUV uv; BumpMap bump; }; -struct N2_VERTEX_VR : public Vertex +struct N2_VERTEX_VR : public N2_VERTEX { PackedRGB rgb; }; @@ -358,7 +352,7 @@ struct ICHList : public ElanBase { switch (flags) { - case VTX_TYPE_V: return sizeof(Vertex); + case VTX_TYPE_V: return sizeof(N2_VERTEX); case VTX_TYPE_VU: return sizeof(N2_VERTEX_VU); case VTX_TYPE_VNU: return sizeof(N2_VERTEX_VNU); case VTX_TYPE_VR: return sizeof(N2_VERTEX_VR); @@ -505,70 +499,57 @@ struct PointLight : public ElanBase float posX; float posY; float posZ; - u16 distA; - u16 distB; - u16 angleA; - u16 angleB; + u16 _distA; + u16 _distB; + u16 _angleA; + u16 _angleB; + + static float f16tof32(u16 v) + { + u32 z = v << 16; + return (float&)z; + } + + float distA() const { return f16tof32(_distA); } + float distB() const { return f16tof32(_distB); } + float angleA() const { return f16tof32(_angleA); } + float angleB() const { return f16tof32(_angleB); } float attnMinDistance() const { - float a = 0; - *((u16 *)&a + 1) = distA; - float b = 0; - *((u16 *)&b + 1) = distB; - return -b / (a - 1); + return -distB() / (distA() - 1); } float attnMaxDistance() const { - float a = 0; - *((u16 *)&a + 1) = distA; - float b = 0; - *((u16 *)&b + 1) = distB; - return -b / a; + return -distB() / distA(); } float attnDist(float dist) const { - float a = 0; - *((u16 *)&a + 1) = distA; - float b = 0; - *((u16 *)&b + 1) = distB; float rv; if (dattenmode) - rv = b * dist + a; + rv = distB() * dist + distA(); else - rv = b / dist + a; + rv = distB() / dist + distA(); return std::max(0.f, std::min(1.f, rv)); } bool isAttnDist() const { - return distA != 1 && distB != 0; + return distA() != 1 && distB() != 0; } float attnMinAngle() const { - float a = 0; - *((u16 *)&a + 1) = angleA; - float b = 0; - *((u16 *)&b + 1) = angleB; - return acosf((1 - a) / b); + return acosf((1 - angleA()) / angleB()); } float attnMaxAngle() const { - float a = 0; - *((u16 *)&a + 1) = angleA; - float b = 0; - *((u16 *)&b + 1) = angleB; - return acosf(-a / b); + return acosf(-angleA() / angleB()); } float attnAngle(float angleCos) const { - float a = 0; - *((u16 *)&a + 1) = angleA; - float b = 0; - *((u16 *)&b + 1) = angleB; - return std::max(0.f, std::min(1.f, angleCos * b + a)); + return std::max(0.f, std::min(1.f, angleCos * angleB() + angleA())); } bool isAttnAngle() const { - return angleA != 1 && angleB != 0; + return angleA() != 1 && angleB() != 0; } }; diff --git a/core/hw/pvr/ta.cpp b/core/hw/pvr/ta.cpp index 8dfd81312..af4ff3287 100644 --- a/core/hw/pvr/ta.cpp +++ b/core/hw/pvr/ta.cpp @@ -263,6 +263,7 @@ void ta_vtx_ListCont() { SetCurrentTARC(TA_CURRENT_CTX); ta_tad.Continue(); + ta_ctx->rend.newRenderPass(); ta_cur_state=TAS_NS; ta_fsm_cl = 7; diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 62007764f..bfa2d2353 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -13,6 +13,7 @@ void DYNACALL ta_vtx_data32(const SQBuffer *data); void ta_vtx_data(const SQBuffer *data, u32 size); bool ta_parse_vdrc(TA_context *ctx, bool bgraColors = false); +bool ta_parse_naomi2(TA_context* ctx); class TaTypeLut { diff --git a/core/hw/pvr/ta_ctx.cpp b/core/hw/pvr/ta_ctx.cpp index b1c681568..1df789bda 100644 --- a/core/hw/pvr/ta_ctx.cpp +++ b/core/hw/pvr/ta_ctx.cpp @@ -11,9 +11,6 @@ static int RenderCount; TA_context* ta_ctx; tad_context ta_tad; -TA_context* vd_ctx; -rend_context vd_rc; - void SetCurrentTARC(u32 addr) { if (addr != TACTX_NONE) diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 777b205cf..e0101a377 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -9,6 +9,7 @@ #include class BaseTextureCacheData; +struct N2LightModel; //Vertex storage types struct Vertex @@ -25,6 +26,9 @@ struct Vertex u8 spc1[4]; float u1,v1; + + // Naomi2 normal + float nx,ny,nz; }; struct PolyParam @@ -33,9 +37,6 @@ struct PolyParam u32 count; BaseTextureCacheData *texture; -#if !defined(HOST_64BIT_CPU) - u32 _pad0; -#endif TSP tsp; TCW tcw; @@ -47,9 +48,13 @@ struct PolyParam TSP tsp1; TCW tcw1; BaseTextureCacheData *texture1; -#if !defined(HOST_64BIT_CPU) - u32 _pad1; -#endif + + float *mvMatrix; + float *projMatrix; + float glossCoef0; + float glossCoef1; + N2LightModel *lightModel; + bool envMapping; }; struct ModifierVolumeParam @@ -57,6 +62,9 @@ struct ModifierVolumeParam u32 first; u32 count; ISP_Modvol isp; + + float *mvMatrix; + float *projMatrix; }; struct ModTriangle @@ -98,10 +106,9 @@ struct tad_context void Reset(u8* ptr) { - thd_data = thd_root = thd_old_data = ptr; - render_pass_count = 0; + thd_root = ptr; + Clear(); } - }; struct RenderPass { @@ -114,6 +121,41 @@ struct RenderPass { u32 mvo_tr_count; }; +struct N2Matrix +{ + float mat[16]; +}; + +struct N2Light +{ + float color[4]; + float direction[4]; // For parallel/spot + float position[4]; // For spot/point + int parallel; + int diffuse; + int specular; + int routing; + int dmode; + int smode; + + int distAttnMode; // For spot/point + float attnDistA; + float attnDistB; + float attnAngleA; // For spot + float attnAngleB; +}; + +struct N2LightModel +{ + N2Light lights[16]; + int lightCount; + + float ambientBase[4]; // base ambient color + float ambientOffset[4]; // offset ambient color + bool ambientMaterial; // ambient light is multiplied by model material/color + bool useBaseOver; // base color overflows into offset color +}; + struct rend_context { u8* proc_start; @@ -143,6 +185,10 @@ struct rend_context List global_param_tr; List render_passes; + List matrices; + List lightModels; + bool init = false; + void Clear() { verts.Clear(); @@ -155,11 +201,19 @@ struct rend_context global_param_mvo_tr.Clear(); render_passes.Clear(); - Overrun=false; - fZ_min= 1000000.0f; - fZ_max= 1.0f; + // Reserve space for background poly + global_param_op.Append(); + verts.Append(4); + + Overrun = false; + fZ_min = 1000000.0f; + fZ_max = 1.0f; isRenderFramebuffer = false; + matrices.Clear(); + lightModels.Clear(); } + + void newRenderPass(); }; #define TA_DATA_SIZE (8 * 1024 * 1024) @@ -205,17 +259,20 @@ struct TA_context { tad.Reset((u8*)allocAligned(32, TA_DATA_SIZE)); - rend.verts.InitBytes(4 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame - rend.idx.Init(120 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead ) - rend.global_param_op.Init(16384, &rend.Overrun, "global_param_op"); + rend.verts.InitBytes(16 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame + rend.idx.Init(512 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead ) + rend.global_param_op.Init(32768, &rend.Overrun, "global_param_op"); rend.global_param_pt.Init(5120, &rend.Overrun, "global_param_pt"); rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo"); - rend.global_param_tr.Init(10240, &rend.Overrun, "global_param_tr"); + rend.global_param_tr.Init(32768, &rend.Overrun, "global_param_tr"); rend.global_param_mvo_tr.Init(4096, &rend.Overrun, "global_param_mvo_tr"); rend.modtrig.Init(16384, &rend.Overrun, "modtrig"); rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes + rend.matrices.Init(1000, &rend.Overrun, "matrices"); + rend.lightModels.Init(100, &rend.Overrun, "lightModels"); + rend.init = true; Reset(); } @@ -243,6 +300,8 @@ struct TA_context rend.global_param_mvo.Free(); rend.global_param_mvo_tr.Free(); rend.render_passes.Free(); + rend.matrices.Free(); + rend.lightModels.Free(); } }; @@ -250,9 +309,6 @@ struct TA_context extern TA_context* ta_ctx; extern tad_context ta_tad; -extern TA_context* vd_ctx; -extern rend_context vd_rc; - TA_context* tactx_Find(u32 addr, bool allocnew=false); TA_context* tactx_Pop(u32 addr); @@ -274,7 +330,14 @@ void FinishRender(TA_context* ctx); //must be moved to proper header void FillBGP(TA_context* ctx); -bool UsingAutoSort(int pass_number); bool rend_framePending(); void SerializeTAContext(Serializer& ser); void DeserializeTAContext(Deserializer& deser); + +void ta_add_poly(int type, const PolyParam& pp); +void ta_add_poly(int type, const ModifierVolumeParam& mvp); +void ta_add_vertex(const Vertex& vtx); +void ta_add_triangle(const ModTriangle& tri); +float* ta_add_matrix(const float *matrix); +N2LightModel *ta_add_light(const N2LightModel& light); +void ta_add_ta_data(u32 *data, u32 size); diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index dbb676828..0a88214d7 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -48,6 +48,9 @@ static u8 float_to_satu8_math(float val) return u8(saturate01(val)*255); } +static TA_context *vd_ctx; +#define vd_rc (vd_ctx->rend) + //vdec state variables static ModTriangle* lmr; @@ -66,6 +69,7 @@ static u32 SFaceOffsColor; const u32 ListType_None = -1; const u32 SZ32 = 1; const u32 SZ64 = 2; +static bool fetchTextures = true; #include "ta_structs.h" @@ -84,8 +88,6 @@ static f32 f16(u16 v) return *(f32*)&z; } -#define vdrc vd_rc - template class FifoSplitter { @@ -625,7 +627,6 @@ public: void vdec_init() { - VDECInit(); TaCmd = ta_main; CurrentList = ListType_None; ListIsFinished[0] = ListIsFinished[1] = ListIsFinished[2] = ListIsFinished[3] = ListIsFinished[4] = false; @@ -664,11 +665,11 @@ private: static void StartList(u32 ListType) { if (ListType==ListType_Opaque) - CurrentPPlist=&vdrc.global_param_op; + CurrentPPlist=&vd_rc.global_param_op; else if (ListType==ListType_Punch_Through) - CurrentPPlist=&vdrc.global_param_pt; + CurrentPPlist=&vd_rc.global_param_pt; else if (ListType==ListType_Translucent) - CurrentPPlist=&vdrc.global_param_tr; + CurrentPPlist=&vd_rc.global_param_tr; CurrentPP = NULL; } @@ -696,7 +697,7 @@ private: d_pp = CurrentPPlist->Append(); CurrentPP = d_pp; } - d_pp->first = vdrc.verts.used(); + d_pp->first = vd_rc.verts.used(); d_pp->count = 0; d_pp->isp = pp->isp; @@ -705,7 +706,7 @@ private: d_pp->pcw = pp->pcw; d_pp->tileclip = tileclip_val; - if (d_pp->pcw.Texture) + if (d_pp->pcw.Texture && fetchTextures) d_pp->texture = renderer->GetTexture(d_pp->tsp, d_pp->tcw); else d_pp->texture = nullptr; @@ -713,6 +714,10 @@ private: d_pp->tsp1.full = -1; d_pp->tcw1.full = -1; d_pp->texture1 = nullptr; + d_pp->mvMatrix = nullptr; + d_pp->projMatrix = nullptr; + d_pp->lightModel = nullptr; + d_pp->envMapping = false; } #define glob_param_bdc(pp) glob_param_bdc_( (TA_PolyParam0*)pp) @@ -776,7 +781,7 @@ private: CurrentPP->tsp1.full = pp->tsp1.full; CurrentPP->tcw1.full = pp->tcw1.full; - if (pp->pcw.Texture) + if (pp->pcw.Texture && fetchTextures) CurrentPP->texture1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } @@ -790,7 +795,7 @@ private: CurrentPP->tsp1.full = pp->tsp1.full; CurrentPP->tcw1.full = pp->tcw1.full; - if (pp->pcw.Texture) + if (pp->pcw.Texture && fetchTextures) CurrentPP->texture1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } @@ -807,14 +812,14 @@ private: __forceinline static void EndPolyStrip() { - CurrentPP->count = vdrc.verts.used() - CurrentPP->first; + CurrentPP->count = vd_rc.verts.used() - CurrentPP->first; if (CurrentPP->count > 0) { PolyParam* d_pp = CurrentPPlist->Append(); *d_pp = *CurrentPP; CurrentPP = d_pp; - d_pp->first = vdrc.verts.used(); + d_pp->first = vd_rc.verts.used(); d_pp->count = 0; } } @@ -823,8 +828,8 @@ private: static inline void update_fz(float z) { - if ((s32&)vdrc.fZ_max<(s32&)z && (s32&)z<0x49800000) - vdrc.fZ_max=z; + if ((s32&)vd_rc.fZ_max<(s32&)z && (s32&)z<0x49800000) + vd_rc.fZ_max=z; } //Poly Vertex handlers @@ -833,7 +838,7 @@ private: static Vertex* vert_cvt_base_(T* vtx) { f32 invW=vtx->xyz[2]; - Vertex* cv=vdrc.verts.Append(); + Vertex* cv=vd_rc.verts.Append(); cv->x=vtx->xyz[0]; cv->y=vtx->xyz[1]; cv->z=invW; @@ -845,7 +850,7 @@ private: //Resume vertex base (for B part) #define vert_res_base \ - Vertex* cv=vdrc.verts.LastPtr(); + Vertex* cv=vd_rc.verts.LastPtr(); //uv 16/32 #define vert_uv_32(u_name,v_name) \ @@ -1159,7 +1164,7 @@ private: CurrentPP=d_pp; } - d_pp->first = vdrc.verts.used(); + d_pp->first = vd_rc.verts.used(); d_pp->count=0; d_pp->isp=spr->isp; d_pp->tsp=spr->tsp; @@ -1167,7 +1172,7 @@ private: d_pp->pcw=spr->pcw; d_pp->tileclip=tileclip_val; - if (d_pp->pcw.Texture) + if (d_pp->pcw.Texture && fetchTextures) d_pp->texture = renderer->GetTexture(d_pp->tsp, d_pp->tcw); else d_pp->texture = nullptr; @@ -1175,6 +1180,10 @@ private: d_pp->tcw1.full = -1; d_pp->tsp1.full = -1; d_pp->texture1 = nullptr; + d_pp->mvMatrix = nullptr; + d_pp->projMatrix = nullptr; + d_pp->lightModel = nullptr; + d_pp->envMapping = false; SFaceBaseColor=spr->BaseCol; SFaceOffsColor=spr->OffsCol; @@ -1196,7 +1205,7 @@ private: { CurrentPP->count = 4; - Vertex* cv = vdrc.verts.Append(4); + Vertex* cv = vd_rc.verts.Append(4); //Fill static stuff append_sprite(0); @@ -1290,7 +1299,7 @@ private: PolyParam* d_pp = CurrentPPlist->Append(); *d_pp = *CurrentPP; CurrentPP = d_pp; - d_pp->first = vdrc.verts.used(); + d_pp->first = vd_rc.verts.used(); d_pp->count = 0; } @@ -1300,15 +1309,15 @@ private: { List *list = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) - list = &vdrc.global_param_mvo; + list = &vd_rc.global_param_mvo; else if (CurrentList == ListType_Translucent_Modifier_Volume) - list = &vdrc.global_param_mvo_tr; + list = &vd_rc.global_param_mvo_tr; else return; if (list->used() > 0) { ModifierVolumeParam *p = list->LastPtr(); - p->count = vdrc.modtrig.used() - p->first; + p->count = vd_rc.modtrig.used() - p->first; if (p->count == 0) list->PopLast(); @@ -1322,21 +1331,23 @@ private: ModifierVolumeParam *p = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) - p = vdrc.global_param_mvo.Append(); + p = vd_rc.global_param_mvo.Append(); else if (CurrentList == ListType_Translucent_Modifier_Volume) - p = vdrc.global_param_mvo_tr.Append(); + p = vd_rc.global_param_mvo_tr.Append(); else return; p->isp.full = param->isp.full; p->isp.VolumeLast = param->pcw.Volume != 0; - p->first = vdrc.modtrig.used(); + p->first = vd_rc.modtrig.used(); + p->mvMatrix = nullptr; + p->projMatrix = nullptr; } __forceinline static void AppendModVolVertexA(TA_ModVolA* mvv) { if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; - lmr=vdrc.modtrig.Append(); + lmr=vd_rc.modtrig.Append(); lmr->x0=mvv->x0; lmr->y0=mvv->y0; @@ -1360,15 +1371,6 @@ private: lmr->z2=mvv->z2; //update_fz(mvv->z2); } - - static void VDECInit() - { - vd_rc.Clear(); - - //allocate storage for BG poly - vd_rc.global_param_op.Append(); - vd_rc.verts.Append(4); - } }; template @@ -1395,6 +1397,7 @@ TaTypeLut::TaTypeLut() } static bool ClearZBeforePass(int pass_number); +static bool UsingAutoSort(int pass_number); static void getRegionTileClipping(u32& xmin, u32& xmax, u32& ymin, u32& ymax); FifoSplitter<> TAParser; @@ -1426,11 +1429,16 @@ static void make_index(const List *polys, int first, int end, bool me bool dupe_next_vtx = false; if (merge && last_poly != nullptr + && last_poly->count != 0 && poly->pcw.full == last_poly->pcw.full && poly->tcw.full == last_poly->tcw.full && poly->tsp.full == last_poly->tsp.full && poly->isp.full == last_poly->isp.full && poly->tileclip == last_poly->tileclip + && poly->mvMatrix == last_poly->mvMatrix + && poly->projMatrix == last_poly->projMatrix + && poly->lightModel == last_poly->lightModel + && poly->envMapping == last_poly->envMapping // FIXME tcw1, tsp1? ) { @@ -1448,7 +1456,7 @@ static void make_index(const List *polys, int first, int end, bool me for (u32 i = 0; i < poly->count; i++) { const Vertex& vtx = vertices[poly->first + i]; - if (is_vertex_inf(vtx)) + if (poly->projMatrix == nullptr && is_vertex_inf(vtx)) { while (i < poly->count - 1) { @@ -1544,14 +1552,14 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) { ctx->rend_inuse.lock(); bool rv=false; - verify(vd_ctx == 0); + verify(vd_ctx == nullptr); vd_ctx = ctx; - vd_rc = vd_ctx->rend; if (bgraColors) TAParserDX.vdec_init(); else TAParser.vdec_init(); + vd_rc.Clear(); bool empty_context = true; int op_poly_count = 0; @@ -1563,6 +1571,10 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) { bgpp->texture = renderer->GetTexture(bgpp->tsp, bgpp->tcw); empty_context = false; + bgpp->mvMatrix = nullptr; + bgpp->projMatrix = nullptr; + bgpp->lightModel = nullptr; + bgpp->envMapping = false; } for (u32 pass = 0; pass <= ctx->tad.render_pass_count; pass++) @@ -1627,8 +1639,7 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) vd_rc.fb_Y_CLIP.max = std::min(vd_rc.fb_Y_CLIP.max, ymax + 31); } - vd_ctx->rend = vd_rc; - vd_ctx = 0; + vd_ctx = nullptr; ctx->rend_inuse.unlock(); ctx->rend.Overrun = overrun; @@ -1636,6 +1647,149 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors) return rv && !overrun; } +bool ta_parse_naomi2(TA_context* ctx) // TODO BGRA colors +{ + ctx->rend_inuse.lock(); + + PolyParam &bgpp = ctx->rend.global_param_op.head()[0]; + bgpp.mvMatrix = nullptr; + bgpp.projMatrix = nullptr; + bgpp.lightModel = nullptr; + bgpp.envMapping = false; + + for (PolyParam& pp : ctx->rend.global_param_op) + if (pp.pcw.Texture) + pp.texture = renderer->GetTexture(pp.tsp, pp.tcw); + for (PolyParam& pp : ctx->rend.global_param_pt) + if (pp.pcw.Texture) + pp.texture = renderer->GetTexture(pp.tsp, pp.tcw); + for (PolyParam& pp : ctx->rend.global_param_tr) + if (pp.pcw.Texture) + pp.texture = renderer->GetTexture(pp.tsp, pp.tcw); + + bool overrun = ctx->rend.Overrun; + if (overrun) + { + WARN_LOG(PVR, "ERROR: TA context overrun"); + } + else + { + ctx->rend.newRenderPass(); + int op_count = 0; + int pt_count = 0; + int tr_count = 0; + for (const RenderPass& pass : ctx->rend.render_passes) + { + make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend); + make_index(&ctx->rend.global_param_pt, pt_count, pass.pt_count, true, &ctx->rend); + make_index(&ctx->rend.global_param_tr, tr_count, pass.tr_count, false, &ctx->rend); + op_count = pass.op_count; + pt_count = pass.pt_count; + tr_count = pass.tr_count; + } + + u32 xmin, xmax, ymin, ymax; + getRegionTileClipping(xmin, xmax, ymin, ymax); + ctx->rend.fb_X_CLIP.min = std::max(ctx->rend.fb_X_CLIP.min, xmin); + ctx->rend.fb_X_CLIP.max = std::min(ctx->rend.fb_X_CLIP.max, xmax + 31); + ctx->rend.fb_Y_CLIP.min = std::max(ctx->rend.fb_Y_CLIP.min, ymin); + ctx->rend.fb_Y_CLIP.max = std::min(ctx->rend.fb_Y_CLIP.max, ymax + 31); + } + ctx->rend_inuse.unlock(); + + return !overrun; +} + +static PolyParam *n2CurrentPP; +static ModifierVolumeParam *n2CurrentMVP; + +void ta_add_poly(int type, const PolyParam& pp) +{ + verify(ta_ctx != nullptr); + switch (type) + { + case ListType_Opaque: + *ta_ctx->rend.global_param_op.Append() = pp; + n2CurrentPP = ta_ctx->rend.global_param_op.LastPtr(); + break; + case ListType_Translucent: + *ta_ctx->rend.global_param_tr.Append() = pp; + n2CurrentPP = ta_ctx->rend.global_param_tr.LastPtr(); + break; + case ListType_Punch_Through: + *ta_ctx->rend.global_param_pt.Append() = pp; + n2CurrentPP = ta_ctx->rend.global_param_pt.LastPtr(); + break; + default: + die("wrong list type"); + break; + } + n2CurrentPP->first = ta_ctx->rend.verts.used(); + n2CurrentPP->count = 0; +} + +void ta_add_poly(int type, const ModifierVolumeParam& mvp) +{ + verify(ta_ctx != nullptr); + switch (type) + { + case ListType_Opaque_Modifier_Volume: + *ta_ctx->rend.global_param_mvo.Append() = mvp; + n2CurrentMVP = ta_ctx->rend.global_param_mvo.LastPtr(); + break; + case ListType_Translucent_Modifier_Volume: + *ta_ctx->rend.global_param_mvo_tr.Append() = mvp; + n2CurrentMVP = ta_ctx->rend.global_param_mvo_tr.LastPtr(); + break; + default: + die("wrong list type"); + break; + } + n2CurrentMVP->first = ta_ctx->rend.modtrig.used(); + n2CurrentMVP->count = 0; +} + +void ta_add_vertex(const Vertex& vtx) +{ + *ta_ctx->rend.verts.Append() = vtx; + n2CurrentPP->count++; +} + +void ta_add_triangle(const ModTriangle& tri) +{ + *ta_ctx->rend.modtrig.Append() = tri; + n2CurrentMVP->count++; +} + +float *ta_add_matrix(const float *matrix) +{ + N2Matrix *n2mat = ta_ctx->rend.matrices.Append(); + memcpy(n2mat->mat, matrix, sizeof(N2Matrix::mat)); + return n2mat->mat; +} + +N2LightModel *ta_add_light(const N2LightModel& light) +{ + *ta_ctx->rend.lightModels.Append() = light; + return ta_ctx->rend.lightModels.LastPtr(); +} + +void ta_add_ta_data(u32 *data, u32 size) +{ + vd_ctx = ta_ctx; + fetchTextures = false; + //TODO if (bgraColors) + // TAParserDX.vdec_init(); + //else + TAParser.vdec_init(); + + Ta_Dma *ta_data = (Ta_Dma *)data; + Ta_Dma *ta_data_end = (Ta_Dma *)(data + size / 4) - 1; + while (ta_data <= ta_data_end) + ta_data = TaCmd(ta_data, ta_data_end); + vd_ctx = nullptr; + fetchTextures = true; +} //decode a vertex in the native pvr format //used for bg poly @@ -1773,6 +1927,7 @@ void FillBGP(TA_context* ctx) bgpp->pcw.Offset=bgpp->isp.Offset; bgpp->pcw.Texture = bgpp->isp.Texture; bgpp->pcw.Shadow = ISP_BACKGND_T.shadow; + bgpp->projMatrix = nullptr; float scale_x= (SCALER_CTL.hscale) ? 2.f:1.f; //if AA hack the hacked pos value hacks for (int i=0;i<3;i++) @@ -1887,7 +2042,7 @@ static RegionArrayTile getRegionTile(int pass_number) return tile; } -bool UsingAutoSort(int pass_number) +static bool UsingAutoSort(int pass_number) { if (((FPU_PARAM_CFG >> 21) & 1) == 0) // Type 1 region header type @@ -1907,3 +2062,22 @@ static bool ClearZBeforePass(int pass_number) return !tile.NoZClear; } + +void rend_context::newRenderPass() +{ + verify(init); + if (global_param_op.used() > 0 + || global_param_tr.used() > 0 + || global_param_pt.used() > 0) + { + RenderPass pass; + pass.op_count = global_param_op.used(); + pass.tr_count = global_param_tr.used(); + pass.pt_count = global_param_pt.used(); + pass.mvo_count = global_param_mvo.used(); + pass.mvo_tr_count = global_param_mvo_tr.used(); + pass.autosort = UsingAutoSort(render_passes.used()); + pass.z_clear = ClearZBeforePass(render_passes.used()); + *render_passes.Append() = pass; + } +} diff --git a/core/rend/gl4/gl4.h b/core/rend/gl4/gl4.h index f7081e68a..524d27045 100755 --- a/core/rend/gl4/gl4.h +++ b/core/rend/gl4/gl4.h @@ -44,6 +44,16 @@ struct gl4PipelineShader GLint fog_clamp_min, fog_clamp_max; GLint normal_matrix; GLint palette_index; + // Naomi2 + GLint mvMat; + GLint projMat; + GLint glossCoef0; + GLint lightCount; + GLint ambientBase; + GLint ambientOffset; + GLint ambientMaterial; + GLint useBaseOver; + GLint envMapping; bool cp_AlphaTest; bool pp_InsideClipping; @@ -59,6 +69,7 @@ struct gl4PipelineShader bool pp_BumpMap; bool fog_clamping; bool palette; + bool naomi2; }; @@ -71,6 +82,15 @@ struct gl4_ctx GLuint normal_matrix; } modvol_shader; + struct + { + GLuint program; + + GLuint normal_matrix; + GLint mvMat; + GLint projMat; + } n2ModVolShader; + std::unordered_map shaders; struct @@ -88,7 +108,8 @@ extern int max_image_width; extern int max_image_height; extern const char *gl4PixelPipelineShader; -bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *pixel_source = nullptr, const char *vertex_source = nullptr); +bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *pixel_source = nullptr, const char *vertex_source = nullptr, + const char *geom_source = nullptr); void initABuffer(); void termABuffer(); diff --git a/core/rend/gl4/gldraw.cpp b/core/rend/gl4/gldraw.cpp index 613cdf0ce..67ecbd9dc 100644 --- a/core/rend/gl4/gldraw.cpp +++ b/core/rend/gl4/gldraw.cpp @@ -20,6 +20,7 @@ #include "rend/gles/glcache.h" #include "rend/tileclip.h" #include "rend/osd.h" +#include "naomi2.h" static gl4PipelineShader* CurrentShader; extern u32 gcflip; @@ -34,7 +35,7 @@ GLuint depthSaveTexId; static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_TwoVolumes, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, - bool palette, Pass pass) + bool palette, bool naomi2, Pass pass) { u32 rv=0; @@ -51,6 +52,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin rv <<= 1; rv |= (int)pp_BumpMap; rv <<= 1; rv |= (int)fog_clamping; rv <<= 1; rv |= (int)palette; + rv <<= 1; rv |= (int)naomi2; rv <<= 2; rv |= (int)pass; gl4PipelineShader *shader = &gl4.shaders[rv]; @@ -69,6 +71,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin shader->pp_BumpMap = pp_BumpMap; shader->fog_clamping = fog_clamping; shader->palette = palette; + shader->naomi2 = naomi2; shader->pass = pass; gl4CompilePipelineShader(shader); } @@ -128,6 +131,7 @@ static void SetGPState(const PolyParam* gp) false, false, false, + gp->projMatrix != nullptr, pass); } else @@ -152,6 +156,7 @@ static void SetGPState(const PolyParam* gp) gp->tcw.PixelFmt == PixelBumpMap, color_clamp, gpuPalette, + gp->projMatrix != nullptr, pass); } glcache.UseProgram(CurrentShader->program); @@ -241,8 +246,6 @@ static void SetGPState(const PolyParam* gp) glActiveTexture(GL_TEXTURE0); } - //set cull mode ! - //cflip is required when exploding triangles for triangle sorting //gcflip is global clip flip, needed for when rendering to texture due to mirrored Y direction SetCull(gp->isp.CullMode ^ gcflip); @@ -267,6 +270,8 @@ static void SetGPState(const PolyParam* gp) } else glcache.DepthMask(GL_FALSE); + if (gp->projMatrix != nullptr) + setN2Uniforms(gp, CurrentShader); } template @@ -330,6 +335,9 @@ void gl4SetupMainVBO() glEnableVertexAttribArray(VERTEX_UV1_ARRAY); glCheck(); glVertexAttribPointer(VERTEX_UV1_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_NORM_ARRAY); + glVertexAttribPointer(VERTEX_NORM_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, nx)); } void gl4SetupModvolVBO() @@ -354,8 +362,6 @@ static void DrawModVols(int first, int count) glcache.Disable(GL_BLEND); SetBaseClipping(); - glcache.UseProgram(gl4.modvol_shader.program); - glcache.Enable(GL_DEPTH_TEST); glcache.DepthMask(GL_FALSE); glcache.DepthFunc(GL_GREATER); @@ -372,6 +378,14 @@ static void DrawModVols(int first, int count) if (param.count == 0) continue; + if (param.projMatrix != nullptr) + { + glcache.UseProgram(gl4.n2ModVolShader.program); + glUniformMatrix4fv(gl4.n2ModVolShader.mvMat, 1, GL_FALSE, param.mvMatrix); + glUniformMatrix4fv(gl4.n2ModVolShader.projMat, 1, GL_FALSE, param.projMatrix); + } + else + glcache.UseProgram(gl4.modvol_shader.program); u32 mv_mode = param.isp.DepthMode; diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index d23c41aec..211673e99 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -21,6 +21,7 @@ #include "rend/transform_matrix.h" #include "rend/osd.h" #include "glsl.h" +#include "naomi2.h" //Fragment and vertex shaders code @@ -461,13 +462,23 @@ struct gl4ShaderUniforms_t gl4ShaderUniforms; int max_image_width; int max_image_height; -bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source /* = nullptr */, const char *vertex_source /* = nullptr */) +bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source /* = nullptr */, + const char *vertex_source /* = nullptr */, const char *geom_source /* = nullptr */) { - Vertex4Source vertexSource(s->pp_Gouraud); + std::string vertexSource; + std::string geometrySource; + if (s->naomi2) + { + vertexSource = N2Vertex4Source(s->pp_Gouraud).generate(); + geometrySource = N2Geometry4Shader(s->pp_Gouraud).generate(); + } + else + vertexSource = Vertex4Source(s->pp_Gouraud).generate(); Fragment4ShaderSource fragmentSource(s); - s->program = gl_CompileAndLink(vertex_source != nullptr ? vertex_source : vertexSource.generate().c_str(), - fragment_source != nullptr ? fragment_source : fragmentSource.generate().c_str()); + s->program = gl_CompileAndLink(vertex_source != nullptr ? vertex_source : vertexSource.c_str(), + fragment_source != nullptr ? fragment_source : fragmentSource.generate().c_str(), + geometrySource.empty() ? nullptr : geometrySource.c_str()); //setup texture 0 as the input for the shader GLint gu = glGetUniformLocation(s->program, "tex0"); @@ -537,6 +548,18 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source glUniform1i(gu, 6); // GL_TEXTURE6 s->palette_index = glGetUniformLocation(s->program, "palette_index"); + // Naomi2 + s->mvMat = glGetUniformLocation(s->program, "mvMat"); + s->projMat = glGetUniformLocation(s->program, "projMat"); + s->glossCoef0 = glGetUniformLocation(s->program, "glossCoef0"); + s->envMapping = glGetUniformLocation(s->program, "envMapping"); + // Lights + s->lightCount = glGetUniformLocation(s->program, "lightCount"); + s->ambientBase = glGetUniformLocation(s->program, "ambientBase"); + s->ambientOffset = glGetUniformLocation(s->program, "ambientOffset"); + s->ambientMaterial = glGetUniformLocation(s->program, "ambientMaterial"); + s->useBaseOver = glGetUniformLocation(s->program, "useBaseOver"); + return glIsProgram(s->program)==GL_TRUE; } @@ -550,6 +573,8 @@ static void gl4_delete_shaders() gl4.shaders.clear(); glcache.DeleteProgram(gl4.modvol_shader.program); gl4.modvol_shader.program = 0; + glcache.DeleteProgram(gl4.n2ModVolShader.program); + gl4.n2ModVolShader.program = 0; } static void gl4_term() @@ -576,6 +601,14 @@ static void create_modvol_shader() gl4.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str()); gl4.modvol_shader.normal_matrix = glGetUniformLocation(gl4.modvol_shader.program, "normal_matrix"); + + N2Vertex4Source n2VertexShader(false, true); + N2Geometry4Shader geometryShader(false, true); + gl4.n2ModVolShader.program = gl_CompileAndLink(n2VertexShader.generate().c_str(), fragmentShader.generate().c_str(), + geometryShader.generate().c_str()); + gl4.n2ModVolShader.normal_matrix = glGetUniformLocation(gl4.n2ModVolShader.program, "normal_matrix"); + gl4.n2ModVolShader.mvMat = glGetUniformLocation(gl4.n2ModVolShader.program, "mvMat"); + gl4.n2ModVolShader.projMat = glGetUniformLocation(gl4.n2ModVolShader.program, "projMat"); } static bool gl_create_resources() @@ -734,9 +767,14 @@ static bool RenderFrame(int width, int height) pvrrc.fog_clamp_min.getRGBAColor(gl4ShaderUniforms.fog_clamp_min); pvrrc.fog_clamp_max.getRGBAColor(gl4ShaderUniforms.fog_clamp_max); - glcache.UseProgram(gl4.modvol_shader.program); + if (config::Fog) + { + glcache.UseProgram(gl4.modvol_shader.program); + glUniformMatrix4fv(gl4.modvol_shader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]); - glUniformMatrix4fv(gl4.modvol_shader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]); + glcache.UseProgram(gl4.n2ModVolShader.program); + glUniformMatrix4fv(gl4.n2ModVolShader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]); + } gl4ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; diff --git a/core/rend/gl4/naomi2.cpp b/core/rend/gl4/naomi2.cpp new file mode 100644 index 000000000..a3969fc76 --- /dev/null +++ b/core/rend/gl4/naomi2.cpp @@ -0,0 +1,127 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#include "naomi2.h" + +extern const char *N2VertexShader; +extern const char *N2ColorShader; +extern const char *GeometryClippingShader; + +static const char *gouraudSource = R"( +#if pp_Gouraud == 0 +#define INTERPOLATION flat +#else +#define INTERPOLATION noperspective +#endif +#define NOPERSPECTIVE noperspective +)"; + +N2Vertex4Source::N2Vertex4Source(bool gouraud, bool geometryOnly) : OpenGl4Source() +{ + addConstant("pp_Gouraud", gouraud); + addConstant("GEOM_ONLY", geometryOnly); + addConstant("TWO_VOLUMES", 1); + + addSource(gouraudSource); + if (!geometryOnly) + addSource(N2ColorShader); + addSource(N2VertexShader); +} + +N2Geometry4Shader::N2Geometry4Shader(bool gouraud, bool geometryOnly) : OpenGl4Source() +{ + addConstant("pp_Gouraud", gouraud); + addConstant("GEOM_ONLY", geometryOnly); + addConstant("TWO_VOLUMES", 1); + + addSource(gouraudSource); + addSource(GeometryClippingShader); +} + +static void setLightUniform(const gl4PipelineShader *shader, int lightId, const char *name, int v) +{ + char s[128]; + sprintf(s, "lights[%d].%s", lightId, name); + GLint loc = glGetUniformLocation(shader->program, s); + glUniform1i(loc, v); +} + +static void setLightUniform(const gl4PipelineShader *shader, int lightId, const char *name, float v) +{ + char s[128]; + sprintf(s, "lights[%d].%s", lightId, name); + GLint loc = glGetUniformLocation(shader->program, s); + glUniform1f(loc, v); +} + +static void setLightUniform4f(const gl4PipelineShader *shader, int lightId, const char *name, const float *v) +{ + char s[128]; + sprintf(s, "lights[%d].%s", lightId, name); + GLint loc = glGetUniformLocation(shader->program, s); + glUniform4fv(loc, 1, v); +} + +void setN2Uniforms(const PolyParam *pp, const gl4PipelineShader *shader) +{ + glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, &pp->mvMatrix[0]); + glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, &pp->projMatrix[0]); + glUniform1f(shader->glossCoef0, pp->glossCoef0); + N2LightModel *const lightModel = pp->lightModel; + if (lightModel != nullptr) + { + glUniform1i(shader->ambientMaterial, lightModel->ambientMaterial); + glUniform4fv(shader->ambientBase, 1, lightModel->ambientBase); + glUniform4fv(shader->ambientOffset, 1, lightModel->ambientOffset); + glUniform1i(shader->useBaseOver, lightModel->useBaseOver); + glUniform1i(shader->lightCount, lightModel->lightCount); + for (int i = 0; i < lightModel->lightCount; i++) + { + const N2Light& light = lightModel->lights[i]; + setLightUniform(shader, i, "parallel", light.parallel); + + setLightUniform4f(shader, i, "color", light.color); + setLightUniform4f(shader, i, "direction", light.direction); + setLightUniform4f(shader, i, "position", light.position); + + setLightUniform(shader, i, "diffuse", light.diffuse); + setLightUniform(shader, i, "specular", light.specular); + setLightUniform(shader, i, "routing", light.routing); + setLightUniform(shader, i, "dmode", light.dmode); + setLightUniform(shader, i, "smode", light.smode); + setLightUniform(shader, i, "distAttnMode", light.distAttnMode); + + setLightUniform(shader, i, "attnDistA", light.attnDistA); + setLightUniform(shader, i, "attnDistB", light.attnDistB); + setLightUniform(shader, i, "attnAngleA", light.attnAngleA); + setLightUniform(shader, i, "attnAngleB", light.attnAngleB); + } + } + else + { + float white[] { 1.f, 1.f, 1.f, 1.f }; + float black[4]{}; + glUniform1i(shader->ambientMaterial, 0); + glUniform4fv(shader->ambientBase, 1, white); + glUniform4fv(shader->ambientOffset, 1, black); + glUniform1i(shader->useBaseOver, 0); + glUniform1i(shader->lightCount, 0); + } + glUniform1i(shader->envMapping, pp->envMapping); + glEnable(GL_CLIP_DISTANCE0); +} diff --git a/core/rend/gl4/naomi2.h b/core/rend/gl4/naomi2.h new file mode 100644 index 000000000..c8fd7a513 --- /dev/null +++ b/core/rend/gl4/naomi2.h @@ -0,0 +1,34 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#pragma once +#include "gl4.h" + +void setN2Uniforms(const PolyParam *pp, const gl4PipelineShader *shader); + +class N2Vertex4Source : public OpenGl4Source +{ +public: + N2Vertex4Source(bool gouraud, bool geometryOnly = false); +}; + +class N2Geometry4Shader : public OpenGl4Source +{ +public: + N2Geometry4Shader(bool gouraud, bool geometryOnly = false); +}; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 53c2a4ac7..678cf8f6c 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -3,6 +3,7 @@ #include "rend/sorter.h" #include "rend/tileclip.h" #include "rend/osd.h" +#include "naomi2.h" /* @@ -133,7 +134,8 @@ __forceinline gp->tcw.PixelFmt == PixelBumpMap, color_clamp, ShaderUniforms.trilinear_alpha != 1.f, - gpuPalette); + gpuPalette, + gp->projMatrix != nullptr); glcache.UseProgram(CurrentShader->program); if (CurrentShader->trilinear_alpha != -1) @@ -224,7 +226,7 @@ __forceinline //set Z mode, only if required if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled)) { - glcache.DepthFunc(GL_GEQUAL); + glcache.DepthFunc(Zfunction[6]); // >= } else { @@ -242,6 +244,8 @@ __forceinline else glcache.DepthMask(!gp->isp.ZWriteDis); } + if (CurrentShader->naomi2) + setN2Uniforms(gp, CurrentShader); } template @@ -502,6 +506,10 @@ void SetupMainVBO() glEnableVertexAttribArray(VERTEX_UV_ARRAY); glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); + + glEnableVertexAttribArray(VERTEX_NORM_ARRAY); + glVertexAttribPointer(VERTEX_NORM_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, nx)); + glCheck(); } @@ -544,9 +552,6 @@ void DrawModVols(int first, int count) glcache.Disable(GL_BLEND); SetBaseClipping(); - glcache.UseProgram(gl.modvol_shader.program); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); - glcache.Enable(GL_DEPTH_TEST); glcache.DepthMask(GL_FALSE); glcache.DepthFunc(GL_GREATER); @@ -556,6 +561,8 @@ void DrawModVols(int first, int count) ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first]; int mod_base = -1; + float *curMVMat = nullptr; + float *curProjMat = nullptr; for (int cmv = 0; cmv < count; cmv++) { @@ -563,6 +570,24 @@ void DrawModVols(int first, int count) if (param.count == 0) continue; + if (param.projMatrix != nullptr) + { + glcache.UseProgram(gl.n2ModVolShader.program); + if (param.mvMatrix != curMVMat) + { + curMVMat = param.mvMatrix; + glUniformMatrix4fv(gl.n2ModVolShader.mvMat, 1, GL_FALSE, curMVMat); + } + if (param.projMatrix != curProjMat) + { + curProjMat = param.projMatrix; + glUniformMatrix4fv(gl.n2ModVolShader.projMat, 1, GL_FALSE, curProjMat); + } + } + else + { + glcache.UseProgram(gl.modvol_shader.program); + } u32 mv_mode = param.isp.DepthMode; @@ -789,7 +814,7 @@ void DrawVmuTexture(u8 vmu_screen_number) glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); SetupMainVBO(); - PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false); + PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false, false); glcache.UseProgram(shader->program); { @@ -883,7 +908,7 @@ void DrawGunCrosshair(u8 port) glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE); SetupMainVBO(); - PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false); + PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false, false); glcache.UseProgram(shader->program); { diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index f1de503ab..9acc6e962 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -13,6 +13,7 @@ #include "rend/transform_matrix.h" #include "wsi/gl_context.h" #include "emulator.h" +#include "naomi2.h" #include @@ -68,7 +69,7 @@ const char *PixelCompatShader = R"( #endif )"; -static const char* GouraudSource = R"( +const char* GouraudSource = R"( #if TARGET_GL == GL3 || defined(GL_NV_shader_noperspective_interpolation) #define NOPERSPECTIVE noperspective #if pp_Gouraud == 0 @@ -400,6 +401,8 @@ static void gl_delete_shaders() gl.shaders.clear(); glcache.DeleteProgram(gl.modvol_shader.program); gl.modvol_shader.program = 0; + glcache.DeleteProgram(gl.n2ModVolShader.program); + gl.n2ModVolShader.program = 0; } void termGLCommon() @@ -491,7 +494,7 @@ void findGLVersion() #if defined(__APPLE__) gl.glsl_version_header = "#version 150"; #else - gl.glsl_version_header = "#version 130"; + gl.glsl_version_header = "#version 150"; // FIXME GLSL 1.5 / Open GL 3.2 needed for geometry shader #endif gl.single_channel_format = GL_RED; } @@ -565,16 +568,22 @@ GLuint gl_CompileShader(const char* shader,GLuint type) return rv; } -GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) +GLuint gl_CompileAndLink(const char *vertexShader, const char *fragmentShader, const char *geometryShader) { //create shaders - GLuint vs=gl_CompileShader(VertexShader ,GL_VERTEX_SHADER); - GLuint ps=gl_CompileShader(FragmentShader ,GL_FRAGMENT_SHADER); + GLuint vs = gl_CompileShader(vertexShader, GL_VERTEX_SHADER); + GLuint ps = gl_CompileShader(fragmentShader, GL_FRAGMENT_SHADER); + GLuint gs = 0; + if (geometryShader != nullptr) + gs = gl_CompileShader(geometryShader, GL_GEOMETRY_SHADER); GLuint program = glCreateProgram(); glAttachShader(program, vs); glAttachShader(program, ps); + if (gs != 0) + glAttachShader(program, gs); + //bind vertex attribute to vbo inputs glBindAttribLocation(program, VERTEX_POS_ARRAY, "in_pos"); glBindAttribLocation(program, VERTEX_COL_BASE_ARRAY, "in_base"); @@ -583,6 +592,8 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) glBindAttribLocation(program, VERTEX_COL_BASE1_ARRAY, "in_base1"); glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1"); glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1"); + // Naomi 2 + glBindAttribLocation(program, VERTEX_NORM_ARRAY, "in_normal"); #ifndef GLES if (!gl.is_gles && gl.gl_major >= 3) @@ -610,13 +621,17 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) free(compile_log); // Dump the shaders source for troubleshooting - INFO_LOG(RENDERER, "// VERTEX SHADER\n%s\n// END", VertexShader); - INFO_LOG(RENDERER, "// FRAGMENT SHADER\n%s\n// END", FragmentShader); + INFO_LOG(RENDERER, "// VERTEX SHADER\n%s\n// END", vertexShader); + if (geometryShader != nullptr) + INFO_LOG(RENDERER, "// GEOMETRY SHADER\n%s\n// END", geometryShader); + INFO_LOG(RENDERER, "// FRAGMENT SHADER\n%s\n// END", fragmentShader); die("shader compile fail\n"); } glDeleteShader(vs); glDeleteShader(ps); + if (gs != 0) + glDeleteShader(gs); glcache.UseProgram(program); @@ -628,7 +643,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear, - bool palette) + bool palette, bool naomi2) { u32 rv=0; @@ -645,6 +660,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, rv<<=1; rv|=fog_clamping; rv<<=1; rv|=trilinear; rv<<=1; rv|=palette; + rv<<=1; rv|=naomi2; PipelineShader *shader = &gl.shaders[rv]; if (shader->program == 0) @@ -662,6 +678,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, shader->fog_clamping = fog_clamping; shader->trilinear = trilinear; shader->palette = palette; + shader->naomi2 = naomi2; CompilePipelineShader(shader); } @@ -707,10 +724,17 @@ public: bool CompilePipelineShader(PipelineShader* s) { - VertexSource vertexSource(s->pp_Gouraud); + std::string vertexShader; + if (s->naomi2) + vertexShader = N2VertexSource(s->pp_Gouraud).generate(); + else + vertexShader = VertexSource(s->pp_Gouraud).generate(); FragmentShaderSource fragmentSource(s); + std::string geometryShader; + if (s->naomi2) + geometryShader = N2GeometryShader(s->pp_Gouraud).generate(); - s->program = gl_CompileAndLink(vertexSource.generate().c_str(), fragmentSource.generate().c_str()); + s->program = gl_CompileAndLink(vertexShader.c_str(), fragmentSource.generate().c_str(), s->naomi2 ? geometryShader.c_str() : nullptr); //setup texture 0 as the input for the shader GLint gu = glGetUniformLocation(s->program, "tex"); @@ -763,6 +787,18 @@ bool CompilePipelineShader(PipelineShader* s) } s->normal_matrix = glGetUniformLocation(s->program, "normal_matrix"); + // Naomi2 + s->mvMat = glGetUniformLocation(s->program, "mvMat"); + s->projMat = glGetUniformLocation(s->program, "projMat"); + s->glossCoef0 = glGetUniformLocation(s->program, "glossCoef0"); + s->envMapping = glGetUniformLocation(s->program, "envMapping"); + // Lights + s->lightCount = glGetUniformLocation(s->program, "lightCount"); + s->ambientBase = glGetUniformLocation(s->program, "ambientBase"); + s->ambientOffset = glGetUniformLocation(s->program, "ambientOffset"); + s->ambientMaterial = glGetUniformLocation(s->program, "ambientMaterial"); + s->useBaseOver = glGetUniformLocation(s->program, "useBaseOver"); + ShaderUniforms.Set(s); return glIsProgram(s->program)==GL_TRUE; @@ -863,9 +899,18 @@ static void create_modvol_shader() .addSource(ModifierVolumeShader); gl.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str()); - gl.modvol_shader.normal_matrix = glGetUniformLocation(gl.modvol_shader.program, "normal_matrix"); + gl.modvol_shader.normal_matrix = glGetUniformLocation(gl.modvol_shader.program, "normal_matrix"); gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor"); - gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); + gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); + + N2VertexSource n2vertexShader(false, true); + N2GeometryShader geometryShader(false, true); + gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str(), geometryShader.generate().c_str()); + gl.n2ModVolShader.normal_matrix = glGetUniformLocation(gl.n2ModVolShader.program, "normal_matrix"); + gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor"); + gl.n2ModVolShader.depth_scale = glGetUniformLocation(gl.n2ModVolShader.program, "depth_scale"); + gl.n2ModVolShader.mvMat = glGetUniformLocation(gl.n2ModVolShader.program, "mvMat"); + gl.n2ModVolShader.projMat = glGetUniformLocation(gl.n2ModVolShader.program, "projMat"); } bool gl_create_resources() @@ -1123,7 +1168,12 @@ bool OpenGLRenderer::Process(TA_context* ctx) palette_updated = false; } - if (!ta_parse_vdrc(ctx)) + bool success; + if (settings.platform.system == DC_PLATFORM_NAOMI2) + success = ta_parse_naomi2(ctx); + else + success = ta_parse_vdrc(ctx); + if (!success) return false; } @@ -1190,10 +1240,16 @@ bool RenderFrame(int width, int height) pvrrc.fog_clamp_max.getRGBAColor(ShaderUniforms.fog_clamp_max); glcache.UseProgram(gl.modvol_shader.program); - if (gl.modvol_shader.depth_scale != -1) glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); glUniformMatrix4fv(gl.modvol_shader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]); + glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); + + glcache.UseProgram(gl.n2ModVolShader.program); + if (gl.n2ModVolShader.depth_scale != -1) + glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs); + glUniformMatrix4fv(gl.n2ModVolShader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]); + glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 0dc0ab924..84a59852d 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -27,6 +27,8 @@ #define VERTEX_COL_BASE1_ARRAY 4 #define VERTEX_COL_OFFS1_ARRAY 5 #define VERTEX_UV1_ARRAY 6 +// Naomi2 +#define VERTEX_NORM_ARRAY 7 //vertex types extern u32 gcflip; @@ -49,6 +51,16 @@ struct PipelineShader GLint fog_clamp_min, fog_clamp_max; GLint normal_matrix; GLint palette_index; + // Naomi2 + GLint mvMat; + GLint projMat; + GLint glossCoef0; + GLint lightCount; + GLint ambientBase; + GLint ambientOffset; + GLint ambientMaterial; + GLint useBaseOver; + GLint envMapping; // bool cp_AlphaTest; @@ -64,6 +76,7 @@ struct PipelineShader bool fog_clamping; bool trilinear; bool palette; + bool naomi2; }; @@ -76,9 +89,20 @@ struct gl_ctx GLint depth_scale; GLint sp_ShaderColor; GLint normal_matrix; - } modvol_shader; + struct + { + GLuint program; + + GLint depth_scale; + GLint sp_ShaderColor; + GLint normal_matrix; + + GLint mvMat; + GLint projMat; + } n2ModVolShader; + std::unordered_map shaders; struct @@ -175,11 +199,12 @@ void OSD_DRAW(bool clear_screen); PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear, - bool palette); + bool palette, bool naomi2); GLuint gl_CompileShader(const char* shader, GLuint type); -GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader); +GLuint gl_CompileAndLink(const char *vertexShader, const char *fragmentShader, const char *geometryShader = nullptr); bool CompilePipelineShader(PipelineShader* s); +extern const char* GouraudSource; extern struct ShaderUniforms_t { diff --git a/core/rend/gles/naomi2.cpp b/core/rend/gles/naomi2.cpp new file mode 100644 index 000000000..6c7a8f06b --- /dev/null +++ b/core/rend/gles/naomi2.cpp @@ -0,0 +1,571 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#include "naomi2.h" + +// FIXME GLES +#ifndef GL_CLIP_DISTANCE0 +#define GL_CLIP_DISTANCE0 0x3000 +#endif + +const char* N2VertexShader = R"( +uniform vec4 depth_scale; +uniform mat4 normal_matrix; +uniform float sp_FOG_DENSITY; + +uniform mat4 mvMat; +uniform mat4 projMat; +uniform int envMapping; + +// Vertex input +in vec3 in_pos; +#if GEOM_ONLY == 0 +in vec4 in_base; +in vec4 in_offs; +in vec2 in_uv; +in vec3 in_normal; +#if TWO_VOLUMES == 1 +in vec4 in_base1; +in vec4 in_offs1; +in vec2 in_uv1; +#endif +// output +INTERPOLATION out highp vec4 vs_base; +INTERPOLATION out highp vec4 vs_offs; +NOPERSPECTIVE out highp vec3 vs_uv; +#if TWO_VOLUMES == 1 +INTERPOLATION out vec4 vs_base1; +INTERPOLATION out vec4 vs_offs1; +noperspective out vec2 vs_uv1; +#endif +#endif +out float gl_ClipDistance[1]; + +void main() +{ + vec4 vpos = mvMat * vec4(in_pos, 1.0); +#if GEOM_ONLY == 0 + vs_base = in_base; + vs_offs = in_offs; +#if TWO_VOLUMES == 1 + vs_base1 = in_base1; + vs_offs1 = in_offs1; + vs_uv1 = in_uv1; +#endif + vec4 vnorm = normalize(mvMat * vec4(in_normal, 0.0)); + computeColors(vs_base, vs_offs, vpos.xyz, vnorm.xyz); + vs_uv.xy = in_uv; + if (envMapping == 1) + computeEnvMap(vs_uv.xy, vpos.xyz, vnorm.xyz); +#endif + + vpos = projMat * vpos; + + gl_ClipDistance[0] = vpos.w - 0.001; // near FIXME + + gl_Position = vpos; +} + +)"; + +const char* N2ColorShader = R"( +#define LMODE_SINGLE_SIDED 0 +#define LMODE_DOUBLE_SIDED 1 +#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2 +#define LMODE_SPECIAL_EFFECT 3 +#define LMODE_THIN_SURFACE 4 +#define LMODE_BUMP_MAP 5 + +#define ROUTING_BASEDIFF_BASESPEC_ADD 0 +#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1 +#define ROUTING_OFFSDIFF_BASESPEC_ADD 2 +#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3 +#define ROUTING_ALPHADIFF_ADD 4 +#define ROUTING_ALPHAATTEN_ADD 5 +#define ROUTING_FOGDIFF_ADD 6 +#define ROUTING_FOGATTENUATION_ADD 7 +#define ROUTING_BASEDIFF_BASESPEC_SUB 8 +#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9 +#define ROUTING_OFFSDIFF_BASESPEC_SUB 10 +#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11 +#define ROUTING_ALPHADIFF_SUB 12 +#define ROUTING_ALPHAATTEN_SUB 13 + +struct N2Light +{ + vec4 color; + vec4 direction; // For parallel/spot + vec4 position; // For spot/point + int parallel; + int diffuse; + int specular; + int routing; + int dmode; + int smode; + + int distAttnMode; // For spot/point + float attnDistA; + float attnDistB; + float attnAngleA; // For spot + float attnAngleB; +}; +uniform N2Light lights[16]; +uniform int lightCount; + +uniform vec4 ambientBase; +uniform vec4 ambientOffset; +uniform int ambientMaterial; +uniform int useBaseOver; + +// model attributes +uniform float glossCoef0; +uniform float glossCoef1; + +void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in vec3 position, in vec3 normal) +{ + vec3 diffuse = vec3(0.0); + vec3 specular = vec3(0.0); + float diffuseAlpha = 0.0; + float specularAlpha = 0.0; + + for (int i = 0; i < lightCount; i++) + { + N2Light light = lights[i]; + vec3 lightDir; // direction to the light + vec3 lightColor = light.color.rgb; + if (light.parallel == 1) + { + lightDir = normalize(light.direction.xyz); + } + else + { + lightDir = normalize(light.position.xyz - position); + if (light.attnDistA != 1.0 && light.attnDistB != 0.0) + { + float distance = length(light.position.xyz - position); + if (light.distAttnMode == 0) + distance = 1.0 / distance; + lightColor *= clamp(light.attnDistB * distance + light.attnDistA, 0.0, 1.0); + } + if (light.attnAngleA != 1.0 && light.attnAngleB != 0.0) + { + vec3 spotDir = normalize(light.direction.xyz); + float cosAngle = max(0.0, dot(-lightDir, spotDir)); + lightColor *= clamp(cosAngle * light.attnAngleB + light.attnAngleA, 0.0, 1.0); + } + } + if (light.diffuse == 1) + { + float factor; + switch (light.dmode) + { + case LMODE_SINGLE_SIDED: + factor = max(dot(normal, lightDir), 0.0); + break; + case LMODE_DOUBLE_SIDED: + factor = abs(dot(normal, lightDir)); + break; + case LMODE_SPECIAL_EFFECT: + default: + factor = 1.0; + break; + } + if (light.routing == ROUTING_ALPHADIFF_SUB) + diffuseAlpha -= lightColor.r * factor; + else if (light.routing == ROUTING_BASEDIFF_BASESPEC_ADD || light.routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) + diffuse += lightColor * factor; + if (light.routing == ROUTING_OFFSDIFF_BASESPEC_ADD || light.routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD) + specular += lightColor * factor; + } + if (light.specular == 1) + { + vec3 reflectDir = reflect(-lightDir, normal); + float factor; + switch (light.smode) + { + case LMODE_SINGLE_SIDED: + factor = clamp(pow(max(dot(normalize(-position), reflectDir), 0.0), glossCoef0), 0.0, 1.0); + break; + case LMODE_DOUBLE_SIDED: + factor = clamp(pow(abs(dot(normalize(-position), reflectDir)), glossCoef0), 0.0, 1.0); + break; + case LMODE_SPECIAL_EFFECT: + default: + factor = 1.0; + break; + } + if (light.routing == ROUTING_ALPHADIFF_SUB) + specularAlpha -= lightColor.r * factor; + else if (light.routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || light.routing == ROUTING_BASEDIFF_OFFSSPEC_ADD) + specular += lightColor * factor; + if (light.routing == ROUTING_BASEDIFF_BASESPEC_ADD || light.routing == ROUTING_OFFSDIFF_BASESPEC_ADD) + diffuse += lightColor * factor; + } + } + if (ambientMaterial == 1) + { + diffuse += ambientBase.rgb; + specular += ambientOffset.rgb; + } + baseCol.rgb *= diffuse; + offsetCol.rgb *= specular; + if (ambientMaterial == 0) + { + baseCol.rgb += ambientBase.rgb; + offsetCol.rgb += ambientOffset.rgb; + } + baseCol.a = max(0.0, baseCol.a + diffuseAlpha); + offsetCol.a = max(0.0, offsetCol.a + specularAlpha); + if (useBaseOver == 1) + { + vec4 overflow = max(vec4(0.0), baseCol - vec4(1.0)); + offsetCol += overflow; + } +} + +void computeEnvMap(inout vec2 uv, in vec3 position, in vec3 normal) +{ + // Spherical mapping + //vec3 r = reflect(normalize(position), normal); + //float m = 2.0 * sqrt(r.x * r.x + r.y * r.y + (r.z + 1.0) * (r.z + 1.0)); + //uv += r.xy / m + 0.5; + + // Cheap env mapping + uv += normal.xy / 2.0 + 0.5; + uv = clamp(uv, 0.0, 1.0); +} + +)"; + +const char *GeometryClippingShader = R"( +layout (triangles) in; +layout (triangle_strip, max_vertices = 6) out; + +uniform mat4 normal_matrix; + +#if GEOM_ONLY == 0 +INTERPOLATION in highp vec4 vs_base[3]; +INTERPOLATION in highp vec4 vs_offs[3]; +NOPERSPECTIVE in highp vec3 vs_uv[3]; +#if TWO_VOLUMES == 1 +INTERPOLATION in highp vec4 vs_base1[3]; +INTERPOLATION in highp vec4 vs_offs1[3]; +NOPERSPECTIVE in highp vec2 vs_uv1[3]; +#endif + +INTERPOLATION out highp vec4 vtx_base; +INTERPOLATION out highp vec4 vtx_offs; +#if TWO_VOLUMES == 1 +INTERPOLATION out highp vec4 vtx_base1; +INTERPOLATION out highp vec4 vtx_offs1; +NOPERSPECTIVE out highp vec2 vtx_uv1; +#endif +#endif +NOPERSPECTIVE out highp vec3 vtx_uv; // For depth + +struct Vertex +{ + vec4 pos; + vec4 base; + vec4 offs; + vec3 uv; +#if TWO_VOLUMES == 1 + vec4 base1; + vec4 offs1; + vec2 uv1; +#endif + float clipDist; +}; + +Vertex interpolate(in Vertex v0, in Vertex v1, in float d0, in float d1) +{ + Vertex v; + float f = d0 / (d0 - d1); + v.pos = mix(v0.pos, v1.pos, f); +#if GEOM_ONLY == 0 + v.base = mix(v0.base, v1.base, f); + v.offs = mix(v0.offs, v1.offs, f); + v.uv = mix(v0.uv, v1.uv, f); +#if TWO_VOLUMES == 1 + v.base1 = mix(v0.base1, v1.base1, f); + v.offs1 = mix(v0.offs1, v1.offs1, f); + v.uv1 = mix(v0.uv1, v1.uv1, f); +#endif +#endif + v.clipDist = mix(v0.clipDist, v1.clipDist, f); + + return v; +} + +// +// Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire +// Journal of Graphics GPU and Game Tools, November 2011 +// +const float clipEpsilon = 0.00001; +const float clipEpsilon2 = 0.0; // 0.01; + +/** + Computes the intersection of triangle v0-v1-v2 with the half-space (x,y,z) * n > 0. + The result is a convex polygon in v0-v1-v2-v3. Vertex v3 may be degenerate + and equal to the first vertex. + + \return number of vertices; 0, 3, or 4 +*/ +int clip3(in vec3 dist, inout Vertex v0, inout Vertex v1, inout Vertex v2, out Vertex v3) +{ + if (!any(greaterThanEqual(dist, vec3(clipEpsilon2)))) + // All clipped + return 0; + + if (all(greaterThanEqual(dist, vec3(-clipEpsilon)))) { + // None clipped (original triangle vertices are unmodified) + v3 = v0; + return 3; + } + + bvec3 above = greaterThanEqual(dist, vec3(0.0)); + + // There are either 1 or 2 vertices above the clipping plane. + bool nextIsAbove; + + // Find the CCW-most vertex above the plane by cycling + // the vertices in place. There are three cases. + if (above[1] && !above[0]) { + nextIsAbove = above[2]; + // Cycle once CCW. Use v3 as a temp + v3 = v0; v0 = v1; v1 = v2; v2 = v3; + dist = dist.yzx; + } + else if (above[2] && !above[1]) { + // Cycle once CW. Use v3 as a temp. + nextIsAbove = above[0]; + v3 = v2; v2 = v1; v1 = v0; v0 = v3; + dist = dist.zxy; + } + else { + nextIsAbove = above[1]; + } + + // We always need to clip v2-v0. + v3 = interpolate(v0, v2, dist[0], dist[2]); + + if (nextIsAbove) { + v2 = interpolate(v1, v2, dist[1], dist[2]); + return 4; + } else { + v1 = interpolate(v0, v1, dist[0], dist[1]); + v2 = v3; + v3 = v0; + return 3; + } +} + +void wDivide(inout Vertex v) +{ + v.pos = vec4(v.pos.xy / v.pos.w, 1.0 / v.pos.w, 1.0); + v.pos = normal_matrix * v.pos; +#if GEOM_ONLY == 1 + v.uv = vec3(0.0, 0.0, v.pos.z); +#else +#if pp_Gouraud == 1 + v.base *= v.pos.z; + v.offs *= v.pos.z; +#if TWO_VOLUMES == 1 + v.base1 *= v.pos.z; + v.offs1 *= v.pos.z; +#endif +#endif + v.uv = vec3(v.uv.xy * v.pos.z, v.pos.z); +#if TWO_VOLUMES == 1 + v.uv1 *= v.pos.z; +#endif +#endif + v.pos.w = 1.0; + v.pos.z = 0.0; +} + +void emitVertex(in Vertex v) +{ + wDivide(v); +#if GEOM_ONLY == 0 + vtx_base = v.base; + vtx_offs = v.offs; +#if TWO_VOLUMES == 1 + vtx_base1 = v.base1; + vtx_offs1 = v.offs1; + vtx_uv1 = v.uv1; +#endif +#endif + vtx_uv = v.uv; + gl_Position = v.pos; + EmitVertex(); +} + +void main() +{ + Vertex vtx[6]; + vtx[0].pos = gl_in[0].gl_Position; + vtx[1].pos = gl_in[1].gl_Position; + vtx[2].pos = gl_in[2].gl_Position; +#if GEOM_ONLY == 0 + vtx[0].base = vs_base[0]; + vtx[0].offs = vs_offs[0]; + vtx[0].uv = vs_uv[0]; + vtx[1].base = vs_base[1]; + vtx[1].offs = vs_offs[1]; + vtx[1].uv = vs_uv[1]; + vtx[2].base = vs_base[2]; + vtx[2].offs = vs_offs[2]; + vtx[2].uv = vs_uv[2]; +#if TWO_VOLUMES == 1 + vtx[0].base1 = vs_base1[0]; + vtx[0].offs1 = vs_offs1[0]; + vtx[0].uv1 = vs_uv1[0]; + vtx[1].base1 = vs_base1[1]; + vtx[1].offs1 = vs_offs1[1]; + vtx[1].uv1 = vs_uv1[1]; + vtx[2].base1 = vs_base1[2]; + vtx[2].offs1 = vs_offs1[2]; + vtx[2].uv1 = vs_uv1[2]; +#endif +#endif + int vtxCount = 3; + vtx[0].clipDist = gl_in[0].gl_ClipDistance[0]; + vtx[1].clipDist = gl_in[1].gl_ClipDistance[0]; + vtx[2].clipDist = gl_in[2].gl_ClipDistance[0]; + + // near-plane only + vec3 dist = vec3(vtx[0].clipDist, vtx[1].clipDist, vtx[2].clipDist); + Vertex v3; + int size = clip3(dist, vtx[0], vtx[1], vtx[2], v3); + if (size == 0) + vtxCount = 0; + else if (size == 4) + { + vtx[3] = vtx[0]; + vtx[4] = vtx[2]; + vtx[5] = v3; + vtxCount = 6; + } + + for (int i = 0; i + 2 < vtxCount; i += 3) + { + emitVertex(vtx[i]); + emitVertex(vtx[i + 1]); + emitVertex(vtx[i + 2]); + EndPrimitive(); + } +} + +)"; + +N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly) : OpenGlSource() +{ + addConstant("pp_Gouraud", gouraud); + addConstant("GEOM_ONLY", geometryOnly); + addConstant("TWO_VOLUMES", 0); + + addSource(VertexCompatShader); + addSource(GouraudSource); + if (!geometryOnly) + addSource(N2ColorShader); + addSource(N2VertexShader); +} + +N2GeometryShader::N2GeometryShader(bool gouraud, bool geometryOnly) : OpenGlSource() +{ + addConstant("pp_Gouraud", gouraud); + addConstant("GEOM_ONLY", geometryOnly); + addConstant("TWO_VOLUMES", 0); + addSource(GouraudSource); + addSource(GeometryClippingShader); +} + +static void setLightUniform(const PipelineShader *shader, int lightId, const char *name, int v) +{ + char s[128]; + sprintf(s, "lights[%d].%s", lightId, name); + GLint loc = glGetUniformLocation(shader->program, s); + glUniform1i(loc, v); +} + +static void setLightUniform(const PipelineShader *shader, int lightId, const char *name, float v) +{ + char s[128]; + sprintf(s, "lights[%d].%s", lightId, name); + GLint loc = glGetUniformLocation(shader->program, s); + glUniform1f(loc, v); +} + +static void setLightUniform4f(const PipelineShader *shader, int lightId, const char *name, const float *v) +{ + char s[128]; + sprintf(s, "lights[%d].%s", lightId, name); + GLint loc = glGetUniformLocation(shader->program, s); + glUniform4fv(loc, 1, v); +} + +void setN2Uniforms(const PolyParam *pp, const PipelineShader *shader) +{ + glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, &pp->mvMatrix[0]); + glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, &pp->projMatrix[0]); + glUniform1f(shader->glossCoef0, pp->glossCoef0); + N2LightModel *const lightModel = pp->lightModel; + if (lightModel != nullptr) + { + glUniform1i(shader->ambientMaterial, lightModel->ambientMaterial); + glUniform4fv(shader->ambientBase, 1, lightModel->ambientBase); + glUniform4fv(shader->ambientOffset, 1, lightModel->ambientOffset); + glUniform1i(shader->useBaseOver, lightModel->useBaseOver); + glUniform1i(shader->lightCount, lightModel->lightCount); + for (int i = 0; i < lightModel->lightCount; i++) + { + const N2Light& light = lightModel->lights[i]; + setLightUniform(shader, i, "parallel", light.parallel); + + setLightUniform4f(shader, i, "color", light.color); + setLightUniform4f(shader, i, "direction", light.direction); + setLightUniform4f(shader, i, "position", light.position); + + setLightUniform(shader, i, "diffuse", light.diffuse); + setLightUniform(shader, i, "specular", light.specular); + setLightUniform(shader, i, "routing", light.routing); + setLightUniform(shader, i, "dmode", light.dmode); + setLightUniform(shader, i, "smode", light.smode); + setLightUniform(shader, i, "distAttnMode", light.distAttnMode); + + setLightUniform(shader, i, "attnDistA", light.attnDistA); + setLightUniform(shader, i, "attnDistB", light.attnDistB); + setLightUniform(shader, i, "attnAngleA", light.attnAngleA); + setLightUniform(shader, i, "attnAngleB", light.attnAngleB); + } + } + else + { + float white[] { 1.f, 1.f, 1.f, 1.f }; + float black[4]{}; + glUniform1i(shader->ambientMaterial, 0); + glUniform4fv(shader->ambientBase, 1, white); + glUniform4fv(shader->ambientOffset, 1, black); + glUniform1i(shader->useBaseOver, 0); + glUniform1i(shader->lightCount, 0); + } + glUniform1i(shader->envMapping, pp->envMapping); + + glEnable(GL_CLIP_DISTANCE0); +} diff --git a/core/rend/gles/naomi2.h b/core/rend/gles/naomi2.h new file mode 100644 index 000000000..0a49cbec2 --- /dev/null +++ b/core/rend/gles/naomi2.h @@ -0,0 +1,34 @@ +/* + Copyright 2022 flyinghead + + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . + */ +#pragma once +#include "gles.h" + +void setN2Uniforms(const PolyParam *pp, const PipelineShader *shader); + +class N2VertexSource : public OpenGlSource +{ +public: + N2VertexSource(bool gouraud, bool geometryOnly = false); +}; + +class N2GeometryShader : public OpenGlSource +{ +public: + N2GeometryShader(bool gouraud, bool geometryOnly = false); +}; diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp index 4cabe014b..16d1428de 100644 --- a/core/rend/sorter.cpp +++ b/core/rend/sorter.cpp @@ -17,6 +17,8 @@ #include "sorter.h" #include "hw/pvr/Renderer_if.h" #include +#include +#include struct IndexTrig { @@ -25,18 +27,6 @@ struct IndexTrig f32 z; }; -#if 0 -static float min3(float v0, float v1, float v2) -{ - return std::min(std::min(v0, v1), v2); -} - -static float max3(float v0, float v1, float v2) -{ - return std::max(std::max(v0, v1), v2); -} -#endif - static float minZ(const Vertex *v, const u32 *mod) { return std::min(std::min(v[mod[0]].z, v[mod[1]].z), v[mod[2]].z); @@ -54,6 +44,12 @@ static bool operator<(const PolyParam& left, const PolyParam& right) //return left.zMinx + mat[1][3] * v->y + mat[2][3] * v->z + mat[3][3]; +} + void SortPParams(int first, int count) { if (pvrrc.verts.used() == 0 || count <= 1) @@ -78,14 +74,58 @@ void SortPParams(int first, int count) Vertex* vtx=vtx_base+idx[0]; Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; - u32 zv=0xFFFFFFFF; - while(vtx!=vtx_end) + if (pp->projMatrix != nullptr) { - zv = std::min(zv, (u32&)vtx->z); - vtx++; - } + glm::mat4 mvMat = glm::make_mat4(pp->mvMatrix); + glm::mat4 projMat = glm::make_mat4(pp->projMatrix); + glm::vec4 min{ 1e38f, 1e38f, 1e38f, 0.f }; + glm::vec4 max{ -1e38f, -1e38f, -1e38f, 0.f }; + while (vtx != vtx_end) + { + glm::vec4 pos{ vtx->x, vtx->y, vtx->z, 0.f }; + min = glm::min(min, pos); + max = glm::max(max, pos); + vtx++; + } + glm::vec4 center = (min + max) / 2.f; + center.w = 1; + glm::vec4 extents = max - center; + // transform + center = mvMat * center; + glm::vec3 extentX = mvMat * glm::vec4(extents.x, 0, 0, 0); + glm::vec3 extentY = mvMat * glm::vec4(0, extents.y, 0, 0); + glm::vec3 extentZ = mvMat * glm::vec4(0, 0, extents.z, 0); + // new AA extents + const float newX = std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentX)) + + std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentY)) + + std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentZ)); - pp->zvZ=(f32&)zv; + const float newY = std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentX)) + + std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentY)) + + std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentZ)); + + const float newZ = std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentX)) + + std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentY)) + + std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentZ)); + min = center - glm::vec4(newX, newY, newZ, 0); + max = center + glm::vec4(newX, newY, newZ, 0); + // project + glm::vec4 a = projMat * min; + glm::vec4 b = projMat * max; + + pp->zvZ = 1 / std::max(a.w, b.w); + } + else + { + u32 zv=0xFFFFFFFF; + while(vtx!=vtx_end) + { + zv = std::min(zv, (u32&)vtx->z); + vtx++; + } + + pp->zvZ=(f32&)zv; + } } pp++; } @@ -193,7 +233,9 @@ bool Intersect(const IndexTrig &left, const IndexTrig &right) static bool PP_EQ(const PolyParam *pp0, const PolyParam *pp1) { return (pp0->pcw.full & PCW_DRAW_MASK) == (pp1->pcw.full & PCW_DRAW_MASK) && pp0->isp.full == pp1->isp.full - && pp0->tcw.full == pp1->tcw.full && pp0->tsp.full == pp1->tsp.full && pp0->tileclip == pp1->tileclip; + && pp0->tcw.full == pp1->tcw.full && pp0->tsp.full == pp1->tsp.full && pp0->tileclip == pp1->tileclip + && pp0->mvMatrix == pp1->mvMatrix && pp0->projMatrix == pp1->projMatrix + && pp0->lightModel == pp1->lightModel && pp0->envMapping == pp1->envMapping; } static void fill_id(u32 *d, const Vertex *v0, const Vertex *v1, const Vertex *v2, const Vertex *vb) @@ -246,15 +288,23 @@ void GenSorted(int first, int count, std::vector& pidx_sort, int pfsti=0; - while(pp!=pp_end) + while (pp != pp_end) { u32 ppid = (u32)(pp - pp_base); - if (pp->count>2) + if (pp->count > 2) { const u32 *idx = idx_base + pp->first; u32 flip = 0; + glm::mat4 mat; + float z0, z1; + if (pp->projMatrix != nullptr) + { + mat = glm::make_mat4(pp->projMatrix) * glm::make_mat4(pp->mvMatrix); + z0 = getProjectedZ(vtx_base + idx[0], mat); + z1 = getProjectedZ(vtx_base + idx[1], mat); + } for (u32 i = 0; i < pp->count - 2; i++) { const Vertex *v0, *v1; @@ -269,88 +319,20 @@ void GenSorted(int first, int count, std::vector& pidx_sort, v1 = vtx_base + idx[i + 1]; } const Vertex *v2 = vtx_base + idx[i + 2]; -#if 0 - const Vertex *v3, *v4, *v5; - if (settings.pvr.subdivide_transp) + fill_id(lst[pfsti].id, v0, v1, v2, vtx_base); + lst[pfsti].pid = ppid; + if (pp->projMatrix != nullptr) { - u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32; - u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32; - - if (tess_x==1) tess_x=0; - if (tess_y==1) tess_y=0; - - //bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2; - - if (tess_x + tess_y) - { - v3=pvrrc.verts.Append(3); - v4=v3+1; - v5=v4+1; - - //xyz - for (int i=0;i<3;i++) - { - ((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - ((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f; - ((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - } - - //*TODO* Make it perspective correct - - //uv - for (int i=0;i<2;i++) - { - ((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - ((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f; - ((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - } - - //color - for (int i=0;i<4;i++) - { - v3->col[i]=v0->col[i]/2+v2->col[i]/2; - v4->col[i]=v0->col[i]/2+v1->col[i]/2; - v5->col[i]=v1->col[i]/2+v2->col[i]/2; - } - - fill_id(lst[pfsti].id,v0,v3,v4,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v2,v3,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v3,v4,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v5,v4,v1,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - tess_gen+=3; - } - else - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } + float z2 = getProjectedZ(v2, mat); + lst[pfsti].z = std::min(z0, std::min(z1, z2)); + z0 = z1; + z1 = z2; } else -#endif { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; + lst[pfsti].z = minZ(vtx_base, lst[pfsti].id); } + pfsti++; flip ^= 1; }