naomi2: do T&L on the gpu. bypass TA format. env mapping.

fix model/instance/matrix parsing
handle culling reversed model flag
geometry shader for near plane clipping
This commit is contained in:
Flyinghead 2022-01-25 15:37:04 +01:00
parent b048921c11
commit cb99e7d920
19 changed files with 1921 additions and 527 deletions

View File

@ -919,7 +919,9 @@ if(USE_OPENGL)
core/rend/gles/gltex.cpp
core/rend/gles/quad.cpp
core/rend/gles/postprocess.cpp
core/rend/gles/postprocess.h)
core/rend/gles/postprocess.h
core/rend/gles/naomi2.cpp
core/rend/gles/naomi2.h)
if(NOT LIBRETRO)
target_sources(${PROJECT_NAME} PRIVATE
@ -934,7 +936,9 @@ if(USE_OPENGL)
core/rend/gl4/abuffer.cpp
core/rend/gl4/gl4.h
core/rend/gl4/gldraw.cpp
core/rend/gl4/gles.cpp)
core/rend/gl4/gles.cpp
core/rend/gl4/naomi2.cpp
core/rend/gl4/naomi2.h)
endif()
endif()

File diff suppressed because it is too large Load Diff

View File

@ -25,11 +25,11 @@ namespace elan
union PCW
{
enum Command {
null = 0,
unk_1, // instance matrix continuation?
matrix = 2,
null = 0,
_matrix2 = 1,
_matrix1 = 2,
projMatrix = 3,
instance = 4,
matrixOrLight = 4,
gmp = 5,
ich = 7,
model = 8,
@ -86,27 +86,15 @@ struct Model : public ElanBase
};
static_assert(sizeof(Model) % 32 == 0, "Invalid size for Model");
struct Instance : public ElanBase
struct InstanceMatrix : public ElanBase
{
// 08000400
u32 id1; // f
u32 id2; // 7f
u32 _res;
u32 offset;
u32 one; // 1
u32 size;
u32 _res0;
u32 _res[5];
bool isModelInstance() const {
return id1 == 0xf && id2 == 0x7f && one == 1;
}
};
static_assert(sizeof(Instance) % 32 == 0, "Invalid size for Instance");
struct Matrix : public ElanBase
{
// 08000200
float proj7; // env map U offset
u32 _res1; // 08000200
float envMapU; // env map U offset
float lm00;
float lm01;
float lm02;
@ -116,10 +104,11 @@ struct Matrix : public ElanBase
float tm20;
float tm21;
float tm22;
float proj8; // env map V offset
float _res[4];
u32 contCmd;
float proj4; // near?
float envMapV; // env map V offset
float _res2[4];
u32 _res3; // 08000100
float _near;
float tm00;
float tm10;
float mfr2;
@ -132,10 +121,14 @@ struct Matrix : public ElanBase
float mat03;
float mat13;
float mat23;
float proj5; // far?
float _far;
float mproj6;
bool isInstanceMatrix() const {
return id1 == 0xf && id2 == 0x7f;
}
};
static_assert(sizeof(Matrix) % 32 == 0, "Invalid size for Matrix");
static_assert(sizeof(InstanceMatrix) % 32 == 0, "Invalid size for InstanceMatrix");
struct ProjMatrix : public ElanBase
{
@ -192,11 +185,12 @@ struct GMP : public ElanBase
// ee110 1111 1111 constant
// 00000 1111 1111 bump shading?
// seen: 00110 1111 1111 (b0 and b1 set)
// seen: 11000 1111 1111 (e0 and e1 set, followed by vtx type2 (vtx only))
// seen: 11110 1111 1111 (everything! except v1uv0, rt66, vtx type2 (vtx only))
// seen: 00110 0000 0000 (b0 and b1, vf4)
// seen: 00000 1010 1010 specular and fog? soul surfer
// seen:
// 00110 1111 1111 (b0 and b1 set)
// 11000 1111 1111 (e0 and e1 set, followed by vtx type2 (vtx only))
// 11110 1111 1111 (everything! except v1uv0, rt66, vtx type2 (vtx only))
// 00110 0000 0000 (b0 and b1, vf4)
// 00000 1010 1010 specular and fog? soul surfer
u32 diffuse0;
u32 specular0;
@ -234,7 +228,7 @@ union HeaderAndNormal
bool isStrip() const { return strip == 1 && fan == 0; }
};
struct Vertex
struct N2_VERTEX
{
HeaderAndNormal header;
float x;
@ -294,7 +288,7 @@ struct BumpMap
//
// textured, 1 or 2 para
//
struct N2_VERTEX_VU : public Vertex
struct N2_VERTEX_VU : public N2_VERTEX
{
UnpackedUV uv;
};
@ -302,7 +296,7 @@ struct N2_VERTEX_VU : public Vertex
//
// textured, 1 or 2 para with unpacked normal
//
struct N2_VERTEX_VNU : public Vertex
struct N2_VERTEX_VNU : public N2_VERTEX
{
Normal normal;
UnpackedUV uv;
@ -311,7 +305,7 @@ struct N2_VERTEX_VNU : public Vertex
//
// for colored vertex, 1 para
//
struct N2_VERTEX_VUR : public Vertex
struct N2_VERTEX_VUR : public N2_VERTEX
{
UnpackedUV uv;
PackedRGB rgb;
@ -320,13 +314,13 @@ struct N2_VERTEX_VUR : public Vertex
//
// for bumpmapped, 1 para
//
struct N2_VERTEX_VUB : public Vertex
struct N2_VERTEX_VUB : public N2_VERTEX
{
UnpackedUV uv;
BumpMap bump;
};
struct N2_VERTEX_VR : public Vertex
struct N2_VERTEX_VR : public N2_VERTEX
{
PackedRGB rgb;
};
@ -358,7 +352,7 @@ struct ICHList : public ElanBase
{
switch (flags)
{
case VTX_TYPE_V: return sizeof(Vertex);
case VTX_TYPE_V: return sizeof(N2_VERTEX);
case VTX_TYPE_VU: return sizeof(N2_VERTEX_VU);
case VTX_TYPE_VNU: return sizeof(N2_VERTEX_VNU);
case VTX_TYPE_VR: return sizeof(N2_VERTEX_VR);
@ -505,70 +499,57 @@ struct PointLight : public ElanBase
float posX;
float posY;
float posZ;
u16 distA;
u16 distB;
u16 angleA;
u16 angleB;
u16 _distA;
u16 _distB;
u16 _angleA;
u16 _angleB;
static float f16tof32(u16 v)
{
u32 z = v << 16;
return (float&)z;
}
float distA() const { return f16tof32(_distA); }
float distB() const { return f16tof32(_distB); }
float angleA() const { return f16tof32(_angleA); }
float angleB() const { return f16tof32(_angleB); }
float attnMinDistance() const {
float a = 0;
*((u16 *)&a + 1) = distA;
float b = 0;
*((u16 *)&b + 1) = distB;
return -b / (a - 1);
return -distB() / (distA() - 1);
}
float attnMaxDistance() const {
float a = 0;
*((u16 *)&a + 1) = distA;
float b = 0;
*((u16 *)&b + 1) = distB;
return -b / a;
return -distB() / distA();
}
float attnDist(float dist) const {
float a = 0;
*((u16 *)&a + 1) = distA;
float b = 0;
*((u16 *)&b + 1) = distB;
float rv;
if (dattenmode)
rv = b * dist + a;
rv = distB() * dist + distA();
else
rv = b / dist + a;
rv = distB() / dist + distA();
return std::max(0.f, std::min(1.f, rv));
}
bool isAttnDist() const {
return distA != 1 && distB != 0;
return distA() != 1 && distB() != 0;
}
float attnMinAngle() const {
float a = 0;
*((u16 *)&a + 1) = angleA;
float b = 0;
*((u16 *)&b + 1) = angleB;
return acosf((1 - a) / b);
return acosf((1 - angleA()) / angleB());
}
float attnMaxAngle() const {
float a = 0;
*((u16 *)&a + 1) = angleA;
float b = 0;
*((u16 *)&b + 1) = angleB;
return acosf(-a / b);
return acosf(-angleA() / angleB());
}
float attnAngle(float angleCos) const {
float a = 0;
*((u16 *)&a + 1) = angleA;
float b = 0;
*((u16 *)&b + 1) = angleB;
return std::max(0.f, std::min(1.f, angleCos * b + a));
return std::max(0.f, std::min(1.f, angleCos * angleB() + angleA()));
}
bool isAttnAngle() const {
return angleA != 1 && angleB != 0;
return angleA() != 1 && angleB() != 0;
}
};

View File

@ -263,6 +263,7 @@ void ta_vtx_ListCont()
{
SetCurrentTARC(TA_CURRENT_CTX);
ta_tad.Continue();
ta_ctx->rend.newRenderPass();
ta_cur_state=TAS_NS;
ta_fsm_cl = 7;

View File

@ -13,6 +13,7 @@ void DYNACALL ta_vtx_data32(const SQBuffer *data);
void ta_vtx_data(const SQBuffer *data, u32 size);
bool ta_parse_vdrc(TA_context *ctx, bool bgraColors = false);
bool ta_parse_naomi2(TA_context* ctx);
class TaTypeLut
{

View File

@ -11,9 +11,6 @@ static int RenderCount;
TA_context* ta_ctx;
tad_context ta_tad;
TA_context* vd_ctx;
rend_context vd_rc;
void SetCurrentTARC(u32 addr)
{
if (addr != TACTX_NONE)

View File

@ -9,6 +9,7 @@
#include <mutex>
class BaseTextureCacheData;
struct N2LightModel;
//Vertex storage types
struct Vertex
@ -25,6 +26,9 @@ struct Vertex
u8 spc1[4];
float u1,v1;
// Naomi2 normal
float nx,ny,nz;
};
struct PolyParam
@ -33,9 +37,6 @@ struct PolyParam
u32 count;
BaseTextureCacheData *texture;
#if !defined(HOST_64BIT_CPU)
u32 _pad0;
#endif
TSP tsp;
TCW tcw;
@ -47,9 +48,13 @@ struct PolyParam
TSP tsp1;
TCW tcw1;
BaseTextureCacheData *texture1;
#if !defined(HOST_64BIT_CPU)
u32 _pad1;
#endif
float *mvMatrix;
float *projMatrix;
float glossCoef0;
float glossCoef1;
N2LightModel *lightModel;
bool envMapping;
};
struct ModifierVolumeParam
@ -57,6 +62,9 @@ struct ModifierVolumeParam
u32 first;
u32 count;
ISP_Modvol isp;
float *mvMatrix;
float *projMatrix;
};
struct ModTriangle
@ -98,10 +106,9 @@ struct tad_context
void Reset(u8* ptr)
{
thd_data = thd_root = thd_old_data = ptr;
render_pass_count = 0;
thd_root = ptr;
Clear();
}
};
struct RenderPass {
@ -114,6 +121,41 @@ struct RenderPass {
u32 mvo_tr_count;
};
struct N2Matrix
{
float mat[16];
};
struct N2Light
{
float color[4];
float direction[4]; // For parallel/spot
float position[4]; // For spot/point
int parallel;
int diffuse;
int specular;
int routing;
int dmode;
int smode;
int distAttnMode; // For spot/point
float attnDistA;
float attnDistB;
float attnAngleA; // For spot
float attnAngleB;
};
struct N2LightModel
{
N2Light lights[16];
int lightCount;
float ambientBase[4]; // base ambient color
float ambientOffset[4]; // offset ambient color
bool ambientMaterial; // ambient light is multiplied by model material/color
bool useBaseOver; // base color overflows into offset color
};
struct rend_context
{
u8* proc_start;
@ -143,6 +185,10 @@ struct rend_context
List<PolyParam> global_param_tr;
List<RenderPass> render_passes;
List<N2Matrix> matrices;
List<N2LightModel> lightModels;
bool init = false;
void Clear()
{
verts.Clear();
@ -155,11 +201,19 @@ struct rend_context
global_param_mvo_tr.Clear();
render_passes.Clear();
Overrun=false;
fZ_min= 1000000.0f;
fZ_max= 1.0f;
// Reserve space for background poly
global_param_op.Append();
verts.Append(4);
Overrun = false;
fZ_min = 1000000.0f;
fZ_max = 1.0f;
isRenderFramebuffer = false;
matrices.Clear();
lightModels.Clear();
}
void newRenderPass();
};
#define TA_DATA_SIZE (8 * 1024 * 1024)
@ -205,17 +259,20 @@ struct TA_context
{
tad.Reset((u8*)allocAligned(32, TA_DATA_SIZE));
rend.verts.InitBytes(4 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame
rend.idx.Init(120 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead )
rend.global_param_op.Init(16384, &rend.Overrun, "global_param_op");
rend.verts.InitBytes(16 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame
rend.idx.Init(512 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead )
rend.global_param_op.Init(32768, &rend.Overrun, "global_param_op");
rend.global_param_pt.Init(5120, &rend.Overrun, "global_param_pt");
rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo");
rend.global_param_tr.Init(10240, &rend.Overrun, "global_param_tr");
rend.global_param_tr.Init(32768, &rend.Overrun, "global_param_tr");
rend.global_param_mvo_tr.Init(4096, &rend.Overrun, "global_param_mvo_tr");
rend.modtrig.Init(16384, &rend.Overrun, "modtrig");
rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes
rend.matrices.Init(1000, &rend.Overrun, "matrices");
rend.lightModels.Init(100, &rend.Overrun, "lightModels");
rend.init = true;
Reset();
}
@ -243,6 +300,8 @@ struct TA_context
rend.global_param_mvo.Free();
rend.global_param_mvo_tr.Free();
rend.render_passes.Free();
rend.matrices.Free();
rend.lightModels.Free();
}
};
@ -250,9 +309,6 @@ struct TA_context
extern TA_context* ta_ctx;
extern tad_context ta_tad;
extern TA_context* vd_ctx;
extern rend_context vd_rc;
TA_context* tactx_Find(u32 addr, bool allocnew=false);
TA_context* tactx_Pop(u32 addr);
@ -274,7 +330,14 @@ void FinishRender(TA_context* ctx);
//must be moved to proper header
void FillBGP(TA_context* ctx);
bool UsingAutoSort(int pass_number);
bool rend_framePending();
void SerializeTAContext(Serializer& ser);
void DeserializeTAContext(Deserializer& deser);
void ta_add_poly(int type, const PolyParam& pp);
void ta_add_poly(int type, const ModifierVolumeParam& mvp);
void ta_add_vertex(const Vertex& vtx);
void ta_add_triangle(const ModTriangle& tri);
float* ta_add_matrix(const float *matrix);
N2LightModel *ta_add_light(const N2LightModel& light);
void ta_add_ta_data(u32 *data, u32 size);

View File

@ -48,6 +48,9 @@ static u8 float_to_satu8_math(float val)
return u8(saturate01(val)*255);
}
static TA_context *vd_ctx;
#define vd_rc (vd_ctx->rend)
//vdec state variables
static ModTriangle* lmr;
@ -66,6 +69,7 @@ static u32 SFaceOffsColor;
const u32 ListType_None = -1;
const u32 SZ32 = 1;
const u32 SZ64 = 2;
static bool fetchTextures = true;
#include "ta_structs.h"
@ -84,8 +88,6 @@ static f32 f16(u16 v)
return *(f32*)&z;
}
#define vdrc vd_rc
template<int Red = 0, int Green = 1, int Blue = 2, int Alpha = 3>
class FifoSplitter
{
@ -625,7 +627,6 @@ public:
void vdec_init()
{
VDECInit();
TaCmd = ta_main;
CurrentList = ListType_None;
ListIsFinished[0] = ListIsFinished[1] = ListIsFinished[2] = ListIsFinished[3] = ListIsFinished[4] = false;
@ -664,11 +665,11 @@ private:
static void StartList(u32 ListType)
{
if (ListType==ListType_Opaque)
CurrentPPlist=&vdrc.global_param_op;
CurrentPPlist=&vd_rc.global_param_op;
else if (ListType==ListType_Punch_Through)
CurrentPPlist=&vdrc.global_param_pt;
CurrentPPlist=&vd_rc.global_param_pt;
else if (ListType==ListType_Translucent)
CurrentPPlist=&vdrc.global_param_tr;
CurrentPPlist=&vd_rc.global_param_tr;
CurrentPP = NULL;
}
@ -696,7 +697,7 @@ private:
d_pp = CurrentPPlist->Append();
CurrentPP = d_pp;
}
d_pp->first = vdrc.verts.used();
d_pp->first = vd_rc.verts.used();
d_pp->count = 0;
d_pp->isp = pp->isp;
@ -705,7 +706,7 @@ private:
d_pp->pcw = pp->pcw;
d_pp->tileclip = tileclip_val;
if (d_pp->pcw.Texture)
if (d_pp->pcw.Texture && fetchTextures)
d_pp->texture = renderer->GetTexture(d_pp->tsp, d_pp->tcw);
else
d_pp->texture = nullptr;
@ -713,6 +714,10 @@ private:
d_pp->tsp1.full = -1;
d_pp->tcw1.full = -1;
d_pp->texture1 = nullptr;
d_pp->mvMatrix = nullptr;
d_pp->projMatrix = nullptr;
d_pp->lightModel = nullptr;
d_pp->envMapping = false;
}
#define glob_param_bdc(pp) glob_param_bdc_( (TA_PolyParam0*)pp)
@ -776,7 +781,7 @@ private:
CurrentPP->tsp1.full = pp->tsp1.full;
CurrentPP->tcw1.full = pp->tcw1.full;
if (pp->pcw.Texture)
if (pp->pcw.Texture && fetchTextures)
CurrentPP->texture1 = renderer->GetTexture(pp->tsp1, pp->tcw1);
}
@ -790,7 +795,7 @@ private:
CurrentPP->tsp1.full = pp->tsp1.full;
CurrentPP->tcw1.full = pp->tcw1.full;
if (pp->pcw.Texture)
if (pp->pcw.Texture && fetchTextures)
CurrentPP->texture1 = renderer->GetTexture(pp->tsp1, pp->tcw1);
}
@ -807,14 +812,14 @@ private:
__forceinline
static void EndPolyStrip()
{
CurrentPP->count = vdrc.verts.used() - CurrentPP->first;
CurrentPP->count = vd_rc.verts.used() - CurrentPP->first;
if (CurrentPP->count > 0)
{
PolyParam* d_pp = CurrentPPlist->Append();
*d_pp = *CurrentPP;
CurrentPP = d_pp;
d_pp->first = vdrc.verts.used();
d_pp->first = vd_rc.verts.used();
d_pp->count = 0;
}
}
@ -823,8 +828,8 @@ private:
static inline void update_fz(float z)
{
if ((s32&)vdrc.fZ_max<(s32&)z && (s32&)z<0x49800000)
vdrc.fZ_max=z;
if ((s32&)vd_rc.fZ_max<(s32&)z && (s32&)z<0x49800000)
vd_rc.fZ_max=z;
}
//Poly Vertex handlers
@ -833,7 +838,7 @@ private:
static Vertex* vert_cvt_base_(T* vtx)
{
f32 invW=vtx->xyz[2];
Vertex* cv=vdrc.verts.Append();
Vertex* cv=vd_rc.verts.Append();
cv->x=vtx->xyz[0];
cv->y=vtx->xyz[1];
cv->z=invW;
@ -845,7 +850,7 @@ private:
//Resume vertex base (for B part)
#define vert_res_base \
Vertex* cv=vdrc.verts.LastPtr();
Vertex* cv=vd_rc.verts.LastPtr();
//uv 16/32
#define vert_uv_32(u_name,v_name) \
@ -1159,7 +1164,7 @@ private:
CurrentPP=d_pp;
}
d_pp->first = vdrc.verts.used();
d_pp->first = vd_rc.verts.used();
d_pp->count=0;
d_pp->isp=spr->isp;
d_pp->tsp=spr->tsp;
@ -1167,7 +1172,7 @@ private:
d_pp->pcw=spr->pcw;
d_pp->tileclip=tileclip_val;
if (d_pp->pcw.Texture)
if (d_pp->pcw.Texture && fetchTextures)
d_pp->texture = renderer->GetTexture(d_pp->tsp, d_pp->tcw);
else
d_pp->texture = nullptr;
@ -1175,6 +1180,10 @@ private:
d_pp->tcw1.full = -1;
d_pp->tsp1.full = -1;
d_pp->texture1 = nullptr;
d_pp->mvMatrix = nullptr;
d_pp->projMatrix = nullptr;
d_pp->lightModel = nullptr;
d_pp->envMapping = false;
SFaceBaseColor=spr->BaseCol;
SFaceOffsColor=spr->OffsCol;
@ -1196,7 +1205,7 @@ private:
{
CurrentPP->count = 4;
Vertex* cv = vdrc.verts.Append(4);
Vertex* cv = vd_rc.verts.Append(4);
//Fill static stuff
append_sprite(0);
@ -1290,7 +1299,7 @@ private:
PolyParam* d_pp = CurrentPPlist->Append();
*d_pp = *CurrentPP;
CurrentPP = d_pp;
d_pp->first = vdrc.verts.used();
d_pp->first = vd_rc.verts.used();
d_pp->count = 0;
}
@ -1300,15 +1309,15 @@ private:
{
List<ModifierVolumeParam> *list = NULL;
if (CurrentList == ListType_Opaque_Modifier_Volume)
list = &vdrc.global_param_mvo;
list = &vd_rc.global_param_mvo;
else if (CurrentList == ListType_Translucent_Modifier_Volume)
list = &vdrc.global_param_mvo_tr;
list = &vd_rc.global_param_mvo_tr;
else
return;
if (list->used() > 0)
{
ModifierVolumeParam *p = list->LastPtr();
p->count = vdrc.modtrig.used() - p->first;
p->count = vd_rc.modtrig.used() - p->first;
if (p->count == 0)
list->PopLast();
@ -1322,21 +1331,23 @@ private:
ModifierVolumeParam *p = NULL;
if (CurrentList == ListType_Opaque_Modifier_Volume)
p = vdrc.global_param_mvo.Append();
p = vd_rc.global_param_mvo.Append();
else if (CurrentList == ListType_Translucent_Modifier_Volume)
p = vdrc.global_param_mvo_tr.Append();
p = vd_rc.global_param_mvo_tr.Append();
else
return;
p->isp.full = param->isp.full;
p->isp.VolumeLast = param->pcw.Volume != 0;
p->first = vdrc.modtrig.used();
p->first = vd_rc.modtrig.used();
p->mvMatrix = nullptr;
p->projMatrix = nullptr;
}
__forceinline
static void AppendModVolVertexA(TA_ModVolA* mvv)
{
if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume)
return;
lmr=vdrc.modtrig.Append();
lmr=vd_rc.modtrig.Append();
lmr->x0=mvv->x0;
lmr->y0=mvv->y0;
@ -1360,15 +1371,6 @@ private:
lmr->z2=mvv->z2;
//update_fz(mvv->z2);
}
static void VDECInit()
{
vd_rc.Clear();
//allocate storage for BG poly
vd_rc.global_param_op.Append();
vd_rc.verts.Append(4);
}
};
template<int Red, int Green, int Blue, int Alpha>
@ -1395,6 +1397,7 @@ TaTypeLut::TaTypeLut()
}
static bool ClearZBeforePass(int pass_number);
static bool UsingAutoSort(int pass_number);
static void getRegionTileClipping(u32& xmin, u32& xmax, u32& ymin, u32& ymax);
FifoSplitter<> TAParser;
@ -1426,11 +1429,16 @@ static void make_index(const List<PolyParam> *polys, int first, int end, bool me
bool dupe_next_vtx = false;
if (merge
&& last_poly != nullptr
&& last_poly->count != 0
&& poly->pcw.full == last_poly->pcw.full
&& poly->tcw.full == last_poly->tcw.full
&& poly->tsp.full == last_poly->tsp.full
&& poly->isp.full == last_poly->isp.full
&& poly->tileclip == last_poly->tileclip
&& poly->mvMatrix == last_poly->mvMatrix
&& poly->projMatrix == last_poly->projMatrix
&& poly->lightModel == last_poly->lightModel
&& poly->envMapping == last_poly->envMapping
// FIXME tcw1, tsp1?
)
{
@ -1448,7 +1456,7 @@ static void make_index(const List<PolyParam> *polys, int first, int end, bool me
for (u32 i = 0; i < poly->count; i++)
{
const Vertex& vtx = vertices[poly->first + i];
if (is_vertex_inf(vtx))
if (poly->projMatrix == nullptr && is_vertex_inf(vtx))
{
while (i < poly->count - 1)
{
@ -1544,14 +1552,14 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors)
{
ctx->rend_inuse.lock();
bool rv=false;
verify(vd_ctx == 0);
verify(vd_ctx == nullptr);
vd_ctx = ctx;
vd_rc = vd_ctx->rend;
if (bgraColors)
TAParserDX.vdec_init();
else
TAParser.vdec_init();
vd_rc.Clear();
bool empty_context = true;
int op_poly_count = 0;
@ -1563,6 +1571,10 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors)
{
bgpp->texture = renderer->GetTexture(bgpp->tsp, bgpp->tcw);
empty_context = false;
bgpp->mvMatrix = nullptr;
bgpp->projMatrix = nullptr;
bgpp->lightModel = nullptr;
bgpp->envMapping = false;
}
for (u32 pass = 0; pass <= ctx->tad.render_pass_count; pass++)
@ -1627,8 +1639,7 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors)
vd_rc.fb_Y_CLIP.max = std::min(vd_rc.fb_Y_CLIP.max, ymax + 31);
}
vd_ctx->rend = vd_rc;
vd_ctx = 0;
vd_ctx = nullptr;
ctx->rend_inuse.unlock();
ctx->rend.Overrun = overrun;
@ -1636,6 +1647,149 @@ bool ta_parse_vdrc(TA_context* ctx, bool bgraColors)
return rv && !overrun;
}
bool ta_parse_naomi2(TA_context* ctx) // TODO BGRA colors
{
ctx->rend_inuse.lock();
PolyParam &bgpp = ctx->rend.global_param_op.head()[0];
bgpp.mvMatrix = nullptr;
bgpp.projMatrix = nullptr;
bgpp.lightModel = nullptr;
bgpp.envMapping = false;
for (PolyParam& pp : ctx->rend.global_param_op)
if (pp.pcw.Texture)
pp.texture = renderer->GetTexture(pp.tsp, pp.tcw);
for (PolyParam& pp : ctx->rend.global_param_pt)
if (pp.pcw.Texture)
pp.texture = renderer->GetTexture(pp.tsp, pp.tcw);
for (PolyParam& pp : ctx->rend.global_param_tr)
if (pp.pcw.Texture)
pp.texture = renderer->GetTexture(pp.tsp, pp.tcw);
bool overrun = ctx->rend.Overrun;
if (overrun)
{
WARN_LOG(PVR, "ERROR: TA context overrun");
}
else
{
ctx->rend.newRenderPass();
int op_count = 0;
int pt_count = 0;
int tr_count = 0;
for (const RenderPass& pass : ctx->rend.render_passes)
{
make_index(&ctx->rend.global_param_op, op_count, pass.op_count, true, &ctx->rend);
make_index(&ctx->rend.global_param_pt, pt_count, pass.pt_count, true, &ctx->rend);
make_index(&ctx->rend.global_param_tr, tr_count, pass.tr_count, false, &ctx->rend);
op_count = pass.op_count;
pt_count = pass.pt_count;
tr_count = pass.tr_count;
}
u32 xmin, xmax, ymin, ymax;
getRegionTileClipping(xmin, xmax, ymin, ymax);
ctx->rend.fb_X_CLIP.min = std::max(ctx->rend.fb_X_CLIP.min, xmin);
ctx->rend.fb_X_CLIP.max = std::min(ctx->rend.fb_X_CLIP.max, xmax + 31);
ctx->rend.fb_Y_CLIP.min = std::max(ctx->rend.fb_Y_CLIP.min, ymin);
ctx->rend.fb_Y_CLIP.max = std::min(ctx->rend.fb_Y_CLIP.max, ymax + 31);
}
ctx->rend_inuse.unlock();
return !overrun;
}
static PolyParam *n2CurrentPP;
static ModifierVolumeParam *n2CurrentMVP;
void ta_add_poly(int type, const PolyParam& pp)
{
verify(ta_ctx != nullptr);
switch (type)
{
case ListType_Opaque:
*ta_ctx->rend.global_param_op.Append() = pp;
n2CurrentPP = ta_ctx->rend.global_param_op.LastPtr();
break;
case ListType_Translucent:
*ta_ctx->rend.global_param_tr.Append() = pp;
n2CurrentPP = ta_ctx->rend.global_param_tr.LastPtr();
break;
case ListType_Punch_Through:
*ta_ctx->rend.global_param_pt.Append() = pp;
n2CurrentPP = ta_ctx->rend.global_param_pt.LastPtr();
break;
default:
die("wrong list type");
break;
}
n2CurrentPP->first = ta_ctx->rend.verts.used();
n2CurrentPP->count = 0;
}
void ta_add_poly(int type, const ModifierVolumeParam& mvp)
{
verify(ta_ctx != nullptr);
switch (type)
{
case ListType_Opaque_Modifier_Volume:
*ta_ctx->rend.global_param_mvo.Append() = mvp;
n2CurrentMVP = ta_ctx->rend.global_param_mvo.LastPtr();
break;
case ListType_Translucent_Modifier_Volume:
*ta_ctx->rend.global_param_mvo_tr.Append() = mvp;
n2CurrentMVP = ta_ctx->rend.global_param_mvo_tr.LastPtr();
break;
default:
die("wrong list type");
break;
}
n2CurrentMVP->first = ta_ctx->rend.modtrig.used();
n2CurrentMVP->count = 0;
}
void ta_add_vertex(const Vertex& vtx)
{
*ta_ctx->rend.verts.Append() = vtx;
n2CurrentPP->count++;
}
void ta_add_triangle(const ModTriangle& tri)
{
*ta_ctx->rend.modtrig.Append() = tri;
n2CurrentMVP->count++;
}
float *ta_add_matrix(const float *matrix)
{
N2Matrix *n2mat = ta_ctx->rend.matrices.Append();
memcpy(n2mat->mat, matrix, sizeof(N2Matrix::mat));
return n2mat->mat;
}
N2LightModel *ta_add_light(const N2LightModel& light)
{
*ta_ctx->rend.lightModels.Append() = light;
return ta_ctx->rend.lightModels.LastPtr();
}
void ta_add_ta_data(u32 *data, u32 size)
{
vd_ctx = ta_ctx;
fetchTextures = false;
//TODO if (bgraColors)
// TAParserDX.vdec_init();
//else
TAParser.vdec_init();
Ta_Dma *ta_data = (Ta_Dma *)data;
Ta_Dma *ta_data_end = (Ta_Dma *)(data + size / 4) - 1;
while (ta_data <= ta_data_end)
ta_data = TaCmd(ta_data, ta_data_end);
vd_ctx = nullptr;
fetchTextures = true;
}
//decode a vertex in the native pvr format
//used for bg poly
@ -1773,6 +1927,7 @@ void FillBGP(TA_context* ctx)
bgpp->pcw.Offset=bgpp->isp.Offset;
bgpp->pcw.Texture = bgpp->isp.Texture;
bgpp->pcw.Shadow = ISP_BACKGND_T.shadow;
bgpp->projMatrix = nullptr;
float scale_x= (SCALER_CTL.hscale) ? 2.f:1.f; //if AA hack the hacked pos value hacks
for (int i=0;i<3;i++)
@ -1887,7 +2042,7 @@ static RegionArrayTile getRegionTile(int pass_number)
return tile;
}
bool UsingAutoSort(int pass_number)
static bool UsingAutoSort(int pass_number)
{
if (((FPU_PARAM_CFG >> 21) & 1) == 0)
// Type 1 region header type
@ -1907,3 +2062,22 @@ static bool ClearZBeforePass(int pass_number)
return !tile.NoZClear;
}
void rend_context::newRenderPass()
{
verify(init);
if (global_param_op.used() > 0
|| global_param_tr.used() > 0
|| global_param_pt.used() > 0)
{
RenderPass pass;
pass.op_count = global_param_op.used();
pass.tr_count = global_param_tr.used();
pass.pt_count = global_param_pt.used();
pass.mvo_count = global_param_mvo.used();
pass.mvo_tr_count = global_param_mvo_tr.used();
pass.autosort = UsingAutoSort(render_passes.used());
pass.z_clear = ClearZBeforePass(render_passes.used());
*render_passes.Append() = pass;
}
}

View File

@ -44,6 +44,16 @@ struct gl4PipelineShader
GLint fog_clamp_min, fog_clamp_max;
GLint normal_matrix;
GLint palette_index;
// Naomi2
GLint mvMat;
GLint projMat;
GLint glossCoef0;
GLint lightCount;
GLint ambientBase;
GLint ambientOffset;
GLint ambientMaterial;
GLint useBaseOver;
GLint envMapping;
bool cp_AlphaTest;
bool pp_InsideClipping;
@ -59,6 +69,7 @@ struct gl4PipelineShader
bool pp_BumpMap;
bool fog_clamping;
bool palette;
bool naomi2;
};
@ -71,6 +82,15 @@ struct gl4_ctx
GLuint normal_matrix;
} modvol_shader;
struct
{
GLuint program;
GLuint normal_matrix;
GLint mvMat;
GLint projMat;
} n2ModVolShader;
std::unordered_map<u32, gl4PipelineShader> shaders;
struct
@ -88,7 +108,8 @@ extern int max_image_width;
extern int max_image_height;
extern const char *gl4PixelPipelineShader;
bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *pixel_source = nullptr, const char *vertex_source = nullptr);
bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *pixel_source = nullptr, const char *vertex_source = nullptr,
const char *geom_source = nullptr);
void initABuffer();
void termABuffer();

View File

@ -20,6 +20,7 @@
#include "rend/gles/glcache.h"
#include "rend/tileclip.h"
#include "rend/osd.h"
#include "naomi2.h"
static gl4PipelineShader* CurrentShader;
extern u32 gcflip;
@ -34,7 +35,7 @@ GLuint depthSaveTexId;
static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset,
u32 pp_FogCtrl, bool pp_TwoVolumes, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping,
bool palette, Pass pass)
bool palette, bool naomi2, Pass pass)
{
u32 rv=0;
@ -51,6 +52,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin
rv <<= 1; rv |= (int)pp_BumpMap;
rv <<= 1; rv |= (int)fog_clamping;
rv <<= 1; rv |= (int)palette;
rv <<= 1; rv |= (int)naomi2;
rv <<= 2; rv |= (int)pass;
gl4PipelineShader *shader = &gl4.shaders[rv];
@ -69,6 +71,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin
shader->pp_BumpMap = pp_BumpMap;
shader->fog_clamping = fog_clamping;
shader->palette = palette;
shader->naomi2 = naomi2;
shader->pass = pass;
gl4CompilePipelineShader(shader);
}
@ -128,6 +131,7 @@ static void SetGPState(const PolyParam* gp)
false,
false,
false,
gp->projMatrix != nullptr,
pass);
}
else
@ -152,6 +156,7 @@ static void SetGPState(const PolyParam* gp)
gp->tcw.PixelFmt == PixelBumpMap,
color_clamp,
gpuPalette,
gp->projMatrix != nullptr,
pass);
}
glcache.UseProgram(CurrentShader->program);
@ -241,8 +246,6 @@ static void SetGPState(const PolyParam* gp)
glActiveTexture(GL_TEXTURE0);
}
//set cull mode !
//cflip is required when exploding triangles for triangle sorting
//gcflip is global clip flip, needed for when rendering to texture due to mirrored Y direction
SetCull(gp->isp.CullMode ^ gcflip);
@ -267,6 +270,8 @@ static void SetGPState(const PolyParam* gp)
}
else
glcache.DepthMask(GL_FALSE);
if (gp->projMatrix != nullptr)
setN2Uniforms(gp, CurrentShader);
}
template <u32 Type, bool SortingEnabled, Pass pass>
@ -330,6 +335,9 @@ void gl4SetupMainVBO()
glEnableVertexAttribArray(VERTEX_UV1_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_UV1_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u1)); glCheck();
glEnableVertexAttribArray(VERTEX_NORM_ARRAY);
glVertexAttribPointer(VERTEX_NORM_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, nx));
}
void gl4SetupModvolVBO()
@ -354,8 +362,6 @@ static void DrawModVols(int first, int count)
glcache.Disable(GL_BLEND);
SetBaseClipping();
glcache.UseProgram(gl4.modvol_shader.program);
glcache.Enable(GL_DEPTH_TEST);
glcache.DepthMask(GL_FALSE);
glcache.DepthFunc(GL_GREATER);
@ -372,6 +378,14 @@ static void DrawModVols(int first, int count)
if (param.count == 0)
continue;
if (param.projMatrix != nullptr)
{
glcache.UseProgram(gl4.n2ModVolShader.program);
glUniformMatrix4fv(gl4.n2ModVolShader.mvMat, 1, GL_FALSE, param.mvMatrix);
glUniformMatrix4fv(gl4.n2ModVolShader.projMat, 1, GL_FALSE, param.projMatrix);
}
else
glcache.UseProgram(gl4.modvol_shader.program);
u32 mv_mode = param.isp.DepthMode;

View File

@ -21,6 +21,7 @@
#include "rend/transform_matrix.h"
#include "rend/osd.h"
#include "glsl.h"
#include "naomi2.h"
//Fragment and vertex shaders code
@ -461,13 +462,23 @@ struct gl4ShaderUniforms_t gl4ShaderUniforms;
int max_image_width;
int max_image_height;
bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source /* = nullptr */, const char *vertex_source /* = nullptr */)
bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source /* = nullptr */,
const char *vertex_source /* = nullptr */, const char *geom_source /* = nullptr */)
{
Vertex4Source vertexSource(s->pp_Gouraud);
std::string vertexSource;
std::string geometrySource;
if (s->naomi2)
{
vertexSource = N2Vertex4Source(s->pp_Gouraud).generate();
geometrySource = N2Geometry4Shader(s->pp_Gouraud).generate();
}
else
vertexSource = Vertex4Source(s->pp_Gouraud).generate();
Fragment4ShaderSource fragmentSource(s);
s->program = gl_CompileAndLink(vertex_source != nullptr ? vertex_source : vertexSource.generate().c_str(),
fragment_source != nullptr ? fragment_source : fragmentSource.generate().c_str());
s->program = gl_CompileAndLink(vertex_source != nullptr ? vertex_source : vertexSource.c_str(),
fragment_source != nullptr ? fragment_source : fragmentSource.generate().c_str(),
geometrySource.empty() ? nullptr : geometrySource.c_str());
//setup texture 0 as the input for the shader
GLint gu = glGetUniformLocation(s->program, "tex0");
@ -537,6 +548,18 @@ bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *fragment_source
glUniform1i(gu, 6); // GL_TEXTURE6
s->palette_index = glGetUniformLocation(s->program, "palette_index");
// Naomi2
s->mvMat = glGetUniformLocation(s->program, "mvMat");
s->projMat = glGetUniformLocation(s->program, "projMat");
s->glossCoef0 = glGetUniformLocation(s->program, "glossCoef0");
s->envMapping = glGetUniformLocation(s->program, "envMapping");
// Lights
s->lightCount = glGetUniformLocation(s->program, "lightCount");
s->ambientBase = glGetUniformLocation(s->program, "ambientBase");
s->ambientOffset = glGetUniformLocation(s->program, "ambientOffset");
s->ambientMaterial = glGetUniformLocation(s->program, "ambientMaterial");
s->useBaseOver = glGetUniformLocation(s->program, "useBaseOver");
return glIsProgram(s->program)==GL_TRUE;
}
@ -550,6 +573,8 @@ static void gl4_delete_shaders()
gl4.shaders.clear();
glcache.DeleteProgram(gl4.modvol_shader.program);
gl4.modvol_shader.program = 0;
glcache.DeleteProgram(gl4.n2ModVolShader.program);
gl4.n2ModVolShader.program = 0;
}
static void gl4_term()
@ -576,6 +601,14 @@ static void create_modvol_shader()
gl4.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str());
gl4.modvol_shader.normal_matrix = glGetUniformLocation(gl4.modvol_shader.program, "normal_matrix");
N2Vertex4Source n2VertexShader(false, true);
N2Geometry4Shader geometryShader(false, true);
gl4.n2ModVolShader.program = gl_CompileAndLink(n2VertexShader.generate().c_str(), fragmentShader.generate().c_str(),
geometryShader.generate().c_str());
gl4.n2ModVolShader.normal_matrix = glGetUniformLocation(gl4.n2ModVolShader.program, "normal_matrix");
gl4.n2ModVolShader.mvMat = glGetUniformLocation(gl4.n2ModVolShader.program, "mvMat");
gl4.n2ModVolShader.projMat = glGetUniformLocation(gl4.n2ModVolShader.program, "projMat");
}
static bool gl_create_resources()
@ -734,9 +767,14 @@ static bool RenderFrame(int width, int height)
pvrrc.fog_clamp_min.getRGBAColor(gl4ShaderUniforms.fog_clamp_min);
pvrrc.fog_clamp_max.getRGBAColor(gl4ShaderUniforms.fog_clamp_max);
glcache.UseProgram(gl4.modvol_shader.program);
if (config::Fog)
{
glcache.UseProgram(gl4.modvol_shader.program);
glUniformMatrix4fv(gl4.modvol_shader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]);
glUniformMatrix4fv(gl4.modvol_shader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]);
glcache.UseProgram(gl4.n2ModVolShader.program);
glUniformMatrix4fv(gl4.n2ModVolShader.normal_matrix, 1, GL_FALSE, &gl4ShaderUniforms.normal_mat[0][0]);
}
gl4ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f;

127
core/rend/gl4/naomi2.cpp Normal file
View File

@ -0,0 +1,127 @@
/*
Copyright 2022 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#include "naomi2.h"
extern const char *N2VertexShader;
extern const char *N2ColorShader;
extern const char *GeometryClippingShader;
static const char *gouraudSource = R"(
#if pp_Gouraud == 0
#define INTERPOLATION flat
#else
#define INTERPOLATION noperspective
#endif
#define NOPERSPECTIVE noperspective
)";
N2Vertex4Source::N2Vertex4Source(bool gouraud, bool geometryOnly) : OpenGl4Source()
{
addConstant("pp_Gouraud", gouraud);
addConstant("GEOM_ONLY", geometryOnly);
addConstant("TWO_VOLUMES", 1);
addSource(gouraudSource);
if (!geometryOnly)
addSource(N2ColorShader);
addSource(N2VertexShader);
}
N2Geometry4Shader::N2Geometry4Shader(bool gouraud, bool geometryOnly) : OpenGl4Source()
{
addConstant("pp_Gouraud", gouraud);
addConstant("GEOM_ONLY", geometryOnly);
addConstant("TWO_VOLUMES", 1);
addSource(gouraudSource);
addSource(GeometryClippingShader);
}
static void setLightUniform(const gl4PipelineShader *shader, int lightId, const char *name, int v)
{
char s[128];
sprintf(s, "lights[%d].%s", lightId, name);
GLint loc = glGetUniformLocation(shader->program, s);
glUniform1i(loc, v);
}
static void setLightUniform(const gl4PipelineShader *shader, int lightId, const char *name, float v)
{
char s[128];
sprintf(s, "lights[%d].%s", lightId, name);
GLint loc = glGetUniformLocation(shader->program, s);
glUniform1f(loc, v);
}
static void setLightUniform4f(const gl4PipelineShader *shader, int lightId, const char *name, const float *v)
{
char s[128];
sprintf(s, "lights[%d].%s", lightId, name);
GLint loc = glGetUniformLocation(shader->program, s);
glUniform4fv(loc, 1, v);
}
void setN2Uniforms(const PolyParam *pp, const gl4PipelineShader *shader)
{
glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, &pp->mvMatrix[0]);
glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, &pp->projMatrix[0]);
glUniform1f(shader->glossCoef0, pp->glossCoef0);
N2LightModel *const lightModel = pp->lightModel;
if (lightModel != nullptr)
{
glUniform1i(shader->ambientMaterial, lightModel->ambientMaterial);
glUniform4fv(shader->ambientBase, 1, lightModel->ambientBase);
glUniform4fv(shader->ambientOffset, 1, lightModel->ambientOffset);
glUniform1i(shader->useBaseOver, lightModel->useBaseOver);
glUniform1i(shader->lightCount, lightModel->lightCount);
for (int i = 0; i < lightModel->lightCount; i++)
{
const N2Light& light = lightModel->lights[i];
setLightUniform(shader, i, "parallel", light.parallel);
setLightUniform4f(shader, i, "color", light.color);
setLightUniform4f(shader, i, "direction", light.direction);
setLightUniform4f(shader, i, "position", light.position);
setLightUniform(shader, i, "diffuse", light.diffuse);
setLightUniform(shader, i, "specular", light.specular);
setLightUniform(shader, i, "routing", light.routing);
setLightUniform(shader, i, "dmode", light.dmode);
setLightUniform(shader, i, "smode", light.smode);
setLightUniform(shader, i, "distAttnMode", light.distAttnMode);
setLightUniform(shader, i, "attnDistA", light.attnDistA);
setLightUniform(shader, i, "attnDistB", light.attnDistB);
setLightUniform(shader, i, "attnAngleA", light.attnAngleA);
setLightUniform(shader, i, "attnAngleB", light.attnAngleB);
}
}
else
{
float white[] { 1.f, 1.f, 1.f, 1.f };
float black[4]{};
glUniform1i(shader->ambientMaterial, 0);
glUniform4fv(shader->ambientBase, 1, white);
glUniform4fv(shader->ambientOffset, 1, black);
glUniform1i(shader->useBaseOver, 0);
glUniform1i(shader->lightCount, 0);
}
glUniform1i(shader->envMapping, pp->envMapping);
glEnable(GL_CLIP_DISTANCE0);
}

34
core/rend/gl4/naomi2.h Normal file
View File

@ -0,0 +1,34 @@
/*
Copyright 2022 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include "gl4.h"
void setN2Uniforms(const PolyParam *pp, const gl4PipelineShader *shader);
class N2Vertex4Source : public OpenGl4Source
{
public:
N2Vertex4Source(bool gouraud, bool geometryOnly = false);
};
class N2Geometry4Shader : public OpenGl4Source
{
public:
N2Geometry4Shader(bool gouraud, bool geometryOnly = false);
};

View File

@ -3,6 +3,7 @@
#include "rend/sorter.h"
#include "rend/tileclip.h"
#include "rend/osd.h"
#include "naomi2.h"
/*
@ -133,7 +134,8 @@ __forceinline
gp->tcw.PixelFmt == PixelBumpMap,
color_clamp,
ShaderUniforms.trilinear_alpha != 1.f,
gpuPalette);
gpuPalette,
gp->projMatrix != nullptr);
glcache.UseProgram(CurrentShader->program);
if (CurrentShader->trilinear_alpha != -1)
@ -224,7 +226,7 @@ __forceinline
//set Z mode, only if required
if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled))
{
glcache.DepthFunc(GL_GEQUAL);
glcache.DepthFunc(Zfunction[6]); // >=
}
else
{
@ -242,6 +244,8 @@ __forceinline
else
glcache.DepthMask(!gp->isp.ZWriteDis);
}
if (CurrentShader->naomi2)
setN2Uniforms(gp, CurrentShader);
}
template <u32 Type, bool SortingEnabled>
@ -502,6 +506,10 @@ void SetupMainVBO()
glEnableVertexAttribArray(VERTEX_UV_ARRAY);
glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u));
glEnableVertexAttribArray(VERTEX_NORM_ARRAY);
glVertexAttribPointer(VERTEX_NORM_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, nx));
glCheck();
}
@ -544,9 +552,6 @@ void DrawModVols(int first, int count)
glcache.Disable(GL_BLEND);
SetBaseClipping();
glcache.UseProgram(gl.modvol_shader.program);
glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
glcache.Enable(GL_DEPTH_TEST);
glcache.DepthMask(GL_FALSE);
glcache.DepthFunc(GL_GREATER);
@ -556,6 +561,8 @@ void DrawModVols(int first, int count)
ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first];
int mod_base = -1;
float *curMVMat = nullptr;
float *curProjMat = nullptr;
for (int cmv = 0; cmv < count; cmv++)
{
@ -563,6 +570,24 @@ void DrawModVols(int first, int count)
if (param.count == 0)
continue;
if (param.projMatrix != nullptr)
{
glcache.UseProgram(gl.n2ModVolShader.program);
if (param.mvMatrix != curMVMat)
{
curMVMat = param.mvMatrix;
glUniformMatrix4fv(gl.n2ModVolShader.mvMat, 1, GL_FALSE, curMVMat);
}
if (param.projMatrix != curProjMat)
{
curProjMat = param.projMatrix;
glUniformMatrix4fv(gl.n2ModVolShader.projMat, 1, GL_FALSE, curProjMat);
}
}
else
{
glcache.UseProgram(gl.modvol_shader.program);
}
u32 mv_mode = param.isp.DepthMode;
@ -789,7 +814,7 @@ void DrawVmuTexture(u8 vmu_screen_number)
glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
SetupMainVBO();
PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false);
PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false, false);
glcache.UseProgram(shader->program);
{
@ -883,7 +908,7 @@ void DrawGunCrosshair(u8 port)
glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE);
SetupMainVBO();
PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false);
PipelineShader *shader = GetProgram(false, false, true, true, false, 0, false, 2, false, false, false, false, false, false);
glcache.UseProgram(shader->program);
{

View File

@ -13,6 +13,7 @@
#include "rend/transform_matrix.h"
#include "wsi/gl_context.h"
#include "emulator.h"
#include "naomi2.h"
#include <cmath>
@ -68,7 +69,7 @@ const char *PixelCompatShader = R"(
#endif
)";
static const char* GouraudSource = R"(
const char* GouraudSource = R"(
#if TARGET_GL == GL3 || defined(GL_NV_shader_noperspective_interpolation)
#define NOPERSPECTIVE noperspective
#if pp_Gouraud == 0
@ -400,6 +401,8 @@ static void gl_delete_shaders()
gl.shaders.clear();
glcache.DeleteProgram(gl.modvol_shader.program);
gl.modvol_shader.program = 0;
glcache.DeleteProgram(gl.n2ModVolShader.program);
gl.n2ModVolShader.program = 0;
}
void termGLCommon()
@ -491,7 +494,7 @@ void findGLVersion()
#if defined(__APPLE__)
gl.glsl_version_header = "#version 150";
#else
gl.glsl_version_header = "#version 130";
gl.glsl_version_header = "#version 150"; // FIXME GLSL 1.5 / Open GL 3.2 needed for geometry shader
#endif
gl.single_channel_format = GL_RED;
}
@ -565,16 +568,22 @@ GLuint gl_CompileShader(const char* shader,GLuint type)
return rv;
}
GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader)
GLuint gl_CompileAndLink(const char *vertexShader, const char *fragmentShader, const char *geometryShader)
{
//create shaders
GLuint vs=gl_CompileShader(VertexShader ,GL_VERTEX_SHADER);
GLuint ps=gl_CompileShader(FragmentShader ,GL_FRAGMENT_SHADER);
GLuint vs = gl_CompileShader(vertexShader, GL_VERTEX_SHADER);
GLuint ps = gl_CompileShader(fragmentShader, GL_FRAGMENT_SHADER);
GLuint gs = 0;
if (geometryShader != nullptr)
gs = gl_CompileShader(geometryShader, GL_GEOMETRY_SHADER);
GLuint program = glCreateProgram();
glAttachShader(program, vs);
glAttachShader(program, ps);
if (gs != 0)
glAttachShader(program, gs);
//bind vertex attribute to vbo inputs
glBindAttribLocation(program, VERTEX_POS_ARRAY, "in_pos");
glBindAttribLocation(program, VERTEX_COL_BASE_ARRAY, "in_base");
@ -583,6 +592,8 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader)
glBindAttribLocation(program, VERTEX_COL_BASE1_ARRAY, "in_base1");
glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1");
glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1");
// Naomi 2
glBindAttribLocation(program, VERTEX_NORM_ARRAY, "in_normal");
#ifndef GLES
if (!gl.is_gles && gl.gl_major >= 3)
@ -610,13 +621,17 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader)
free(compile_log);
// Dump the shaders source for troubleshooting
INFO_LOG(RENDERER, "// VERTEX SHADER\n%s\n// END", VertexShader);
INFO_LOG(RENDERER, "// FRAGMENT SHADER\n%s\n// END", FragmentShader);
INFO_LOG(RENDERER, "// VERTEX SHADER\n%s\n// END", vertexShader);
if (geometryShader != nullptr)
INFO_LOG(RENDERER, "// GEOMETRY SHADER\n%s\n// END", geometryShader);
INFO_LOG(RENDERER, "// FRAGMENT SHADER\n%s\n// END", fragmentShader);
die("shader compile fail\n");
}
glDeleteShader(vs);
glDeleteShader(ps);
if (gs != 0)
glDeleteShader(gs);
glcache.UseProgram(program);
@ -628,7 +643,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader)
PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset,
u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear,
bool palette)
bool palette, bool naomi2)
{
u32 rv=0;
@ -645,6 +660,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
rv<<=1; rv|=fog_clamping;
rv<<=1; rv|=trilinear;
rv<<=1; rv|=palette;
rv<<=1; rv|=naomi2;
PipelineShader *shader = &gl.shaders[rv];
if (shader->program == 0)
@ -662,6 +678,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
shader->fog_clamping = fog_clamping;
shader->trilinear = trilinear;
shader->palette = palette;
shader->naomi2 = naomi2;
CompilePipelineShader(shader);
}
@ -707,10 +724,17 @@ public:
bool CompilePipelineShader(PipelineShader* s)
{
VertexSource vertexSource(s->pp_Gouraud);
std::string vertexShader;
if (s->naomi2)
vertexShader = N2VertexSource(s->pp_Gouraud).generate();
else
vertexShader = VertexSource(s->pp_Gouraud).generate();
FragmentShaderSource fragmentSource(s);
std::string geometryShader;
if (s->naomi2)
geometryShader = N2GeometryShader(s->pp_Gouraud).generate();
s->program = gl_CompileAndLink(vertexSource.generate().c_str(), fragmentSource.generate().c_str());
s->program = gl_CompileAndLink(vertexShader.c_str(), fragmentSource.generate().c_str(), s->naomi2 ? geometryShader.c_str() : nullptr);
//setup texture 0 as the input for the shader
GLint gu = glGetUniformLocation(s->program, "tex");
@ -763,6 +787,18 @@ bool CompilePipelineShader(PipelineShader* s)
}
s->normal_matrix = glGetUniformLocation(s->program, "normal_matrix");
// Naomi2
s->mvMat = glGetUniformLocation(s->program, "mvMat");
s->projMat = glGetUniformLocation(s->program, "projMat");
s->glossCoef0 = glGetUniformLocation(s->program, "glossCoef0");
s->envMapping = glGetUniformLocation(s->program, "envMapping");
// Lights
s->lightCount = glGetUniformLocation(s->program, "lightCount");
s->ambientBase = glGetUniformLocation(s->program, "ambientBase");
s->ambientOffset = glGetUniformLocation(s->program, "ambientOffset");
s->ambientMaterial = glGetUniformLocation(s->program, "ambientMaterial");
s->useBaseOver = glGetUniformLocation(s->program, "useBaseOver");
ShaderUniforms.Set(s);
return glIsProgram(s->program)==GL_TRUE;
@ -863,9 +899,18 @@ static void create_modvol_shader()
.addSource(ModifierVolumeShader);
gl.modvol_shader.program = gl_CompileAndLink(vertexShader.generate().c_str(), fragmentShader.generate().c_str());
gl.modvol_shader.normal_matrix = glGetUniformLocation(gl.modvol_shader.program, "normal_matrix");
gl.modvol_shader.normal_matrix = glGetUniformLocation(gl.modvol_shader.program, "normal_matrix");
gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor");
gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale");
gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale");
N2VertexSource n2vertexShader(false, true);
N2GeometryShader geometryShader(false, true);
gl.n2ModVolShader.program = gl_CompileAndLink(n2vertexShader.generate().c_str(), fragmentShader.generate().c_str(), geometryShader.generate().c_str());
gl.n2ModVolShader.normal_matrix = glGetUniformLocation(gl.n2ModVolShader.program, "normal_matrix");
gl.n2ModVolShader.sp_ShaderColor = glGetUniformLocation(gl.n2ModVolShader.program, "sp_ShaderColor");
gl.n2ModVolShader.depth_scale = glGetUniformLocation(gl.n2ModVolShader.program, "depth_scale");
gl.n2ModVolShader.mvMat = glGetUniformLocation(gl.n2ModVolShader.program, "mvMat");
gl.n2ModVolShader.projMat = glGetUniformLocation(gl.n2ModVolShader.program, "projMat");
}
bool gl_create_resources()
@ -1123,7 +1168,12 @@ bool OpenGLRenderer::Process(TA_context* ctx)
palette_updated = false;
}
if (!ta_parse_vdrc(ctx))
bool success;
if (settings.platform.system == DC_PLATFORM_NAOMI2)
success = ta_parse_naomi2(ctx);
else
success = ta_parse_vdrc(ctx);
if (!success)
return false;
}
@ -1190,10 +1240,16 @@ bool RenderFrame(int width, int height)
pvrrc.fog_clamp_max.getRGBAColor(ShaderUniforms.fog_clamp_max);
glcache.UseProgram(gl.modvol_shader.program);
if (gl.modvol_shader.depth_scale != -1)
glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.modvol_shader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]);
glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
glcache.UseProgram(gl.n2ModVolShader.program);
if (gl.n2ModVolShader.depth_scale != -1)
glUniform4fv(gl.n2ModVolShader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.n2ModVolShader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]);
glUniform1f(gl.n2ModVolShader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f);
ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f;

View File

@ -27,6 +27,8 @@
#define VERTEX_COL_BASE1_ARRAY 4
#define VERTEX_COL_OFFS1_ARRAY 5
#define VERTEX_UV1_ARRAY 6
// Naomi2
#define VERTEX_NORM_ARRAY 7
//vertex types
extern u32 gcflip;
@ -49,6 +51,16 @@ struct PipelineShader
GLint fog_clamp_min, fog_clamp_max;
GLint normal_matrix;
GLint palette_index;
// Naomi2
GLint mvMat;
GLint projMat;
GLint glossCoef0;
GLint lightCount;
GLint ambientBase;
GLint ambientOffset;
GLint ambientMaterial;
GLint useBaseOver;
GLint envMapping;
//
bool cp_AlphaTest;
@ -64,6 +76,7 @@ struct PipelineShader
bool fog_clamping;
bool trilinear;
bool palette;
bool naomi2;
};
@ -76,9 +89,20 @@ struct gl_ctx
GLint depth_scale;
GLint sp_ShaderColor;
GLint normal_matrix;
} modvol_shader;
struct
{
GLuint program;
GLint depth_scale;
GLint sp_ShaderColor;
GLint normal_matrix;
GLint mvMat;
GLint projMat;
} n2ModVolShader;
std::unordered_map<u32, PipelineShader> shaders;
struct
@ -175,11 +199,12 @@ void OSD_DRAW(bool clear_screen);
PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset,
u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear,
bool palette);
bool palette, bool naomi2);
GLuint gl_CompileShader(const char* shader, GLuint type);
GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader);
GLuint gl_CompileAndLink(const char *vertexShader, const char *fragmentShader, const char *geometryShader = nullptr);
bool CompilePipelineShader(PipelineShader* s);
extern const char* GouraudSource;
extern struct ShaderUniforms_t
{

571
core/rend/gles/naomi2.cpp Normal file
View File

@ -0,0 +1,571 @@
/*
Copyright 2022 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#include "naomi2.h"
// FIXME GLES
#ifndef GL_CLIP_DISTANCE0
#define GL_CLIP_DISTANCE0 0x3000
#endif
const char* N2VertexShader = R"(
uniform vec4 depth_scale;
uniform mat4 normal_matrix;
uniform float sp_FOG_DENSITY;
uniform mat4 mvMat;
uniform mat4 projMat;
uniform int envMapping;
// Vertex input
in vec3 in_pos;
#if GEOM_ONLY == 0
in vec4 in_base;
in vec4 in_offs;
in vec2 in_uv;
in vec3 in_normal;
#if TWO_VOLUMES == 1
in vec4 in_base1;
in vec4 in_offs1;
in vec2 in_uv1;
#endif
// output
INTERPOLATION out highp vec4 vs_base;
INTERPOLATION out highp vec4 vs_offs;
NOPERSPECTIVE out highp vec3 vs_uv;
#if TWO_VOLUMES == 1
INTERPOLATION out vec4 vs_base1;
INTERPOLATION out vec4 vs_offs1;
noperspective out vec2 vs_uv1;
#endif
#endif
out float gl_ClipDistance[1];
void main()
{
vec4 vpos = mvMat * vec4(in_pos, 1.0);
#if GEOM_ONLY == 0
vs_base = in_base;
vs_offs = in_offs;
#if TWO_VOLUMES == 1
vs_base1 = in_base1;
vs_offs1 = in_offs1;
vs_uv1 = in_uv1;
#endif
vec4 vnorm = normalize(mvMat * vec4(in_normal, 0.0));
computeColors(vs_base, vs_offs, vpos.xyz, vnorm.xyz);
vs_uv.xy = in_uv;
if (envMapping == 1)
computeEnvMap(vs_uv.xy, vpos.xyz, vnorm.xyz);
#endif
vpos = projMat * vpos;
gl_ClipDistance[0] = vpos.w - 0.001; // near FIXME
gl_Position = vpos;
}
)";
const char* N2ColorShader = R"(
#define LMODE_SINGLE_SIDED 0
#define LMODE_DOUBLE_SIDED 1
#define LMODE_DOUBLE_SIDED_WITH_TOLERANCE 2
#define LMODE_SPECIAL_EFFECT 3
#define LMODE_THIN_SURFACE 4
#define LMODE_BUMP_MAP 5
#define ROUTING_BASEDIFF_BASESPEC_ADD 0
#define ROUTING_BASEDIFF_OFFSSPEC_ADD 1
#define ROUTING_OFFSDIFF_BASESPEC_ADD 2
#define ROUTING_OFFSDIFF_OFFSSPEC_ADD 3
#define ROUTING_ALPHADIFF_ADD 4
#define ROUTING_ALPHAATTEN_ADD 5
#define ROUTING_FOGDIFF_ADD 6
#define ROUTING_FOGATTENUATION_ADD 7
#define ROUTING_BASEDIFF_BASESPEC_SUB 8
#define ROUTING_BASEDIFF_OFFSSPEC_SUB 9
#define ROUTING_OFFSDIFF_BASESPEC_SUB 10
#define ROUTING_OFFSDIFF_OFFSSPEC_SUB 11
#define ROUTING_ALPHADIFF_SUB 12
#define ROUTING_ALPHAATTEN_SUB 13
struct N2Light
{
vec4 color;
vec4 direction; // For parallel/spot
vec4 position; // For spot/point
int parallel;
int diffuse;
int specular;
int routing;
int dmode;
int smode;
int distAttnMode; // For spot/point
float attnDistA;
float attnDistB;
float attnAngleA; // For spot
float attnAngleB;
};
uniform N2Light lights[16];
uniform int lightCount;
uniform vec4 ambientBase;
uniform vec4 ambientOffset;
uniform int ambientMaterial;
uniform int useBaseOver;
// model attributes
uniform float glossCoef0;
uniform float glossCoef1;
void computeColors(inout vec4 baseCol, inout vec4 offsetCol, in vec3 position, in vec3 normal)
{
vec3 diffuse = vec3(0.0);
vec3 specular = vec3(0.0);
float diffuseAlpha = 0.0;
float specularAlpha = 0.0;
for (int i = 0; i < lightCount; i++)
{
N2Light light = lights[i];
vec3 lightDir; // direction to the light
vec3 lightColor = light.color.rgb;
if (light.parallel == 1)
{
lightDir = normalize(light.direction.xyz);
}
else
{
lightDir = normalize(light.position.xyz - position);
if (light.attnDistA != 1.0 && light.attnDistB != 0.0)
{
float distance = length(light.position.xyz - position);
if (light.distAttnMode == 0)
distance = 1.0 / distance;
lightColor *= clamp(light.attnDistB * distance + light.attnDistA, 0.0, 1.0);
}
if (light.attnAngleA != 1.0 && light.attnAngleB != 0.0)
{
vec3 spotDir = normalize(light.direction.xyz);
float cosAngle = max(0.0, dot(-lightDir, spotDir));
lightColor *= clamp(cosAngle * light.attnAngleB + light.attnAngleA, 0.0, 1.0);
}
}
if (light.diffuse == 1)
{
float factor;
switch (light.dmode)
{
case LMODE_SINGLE_SIDED:
factor = max(dot(normal, lightDir), 0.0);
break;
case LMODE_DOUBLE_SIDED:
factor = abs(dot(normal, lightDir));
break;
case LMODE_SPECIAL_EFFECT:
default:
factor = 1.0;
break;
}
if (light.routing == ROUTING_ALPHADIFF_SUB)
diffuseAlpha -= lightColor.r * factor;
else if (light.routing == ROUTING_BASEDIFF_BASESPEC_ADD || light.routing == ROUTING_BASEDIFF_OFFSSPEC_ADD)
diffuse += lightColor * factor;
if (light.routing == ROUTING_OFFSDIFF_BASESPEC_ADD || light.routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD)
specular += lightColor * factor;
}
if (light.specular == 1)
{
vec3 reflectDir = reflect(-lightDir, normal);
float factor;
switch (light.smode)
{
case LMODE_SINGLE_SIDED:
factor = clamp(pow(max(dot(normalize(-position), reflectDir), 0.0), glossCoef0), 0.0, 1.0);
break;
case LMODE_DOUBLE_SIDED:
factor = clamp(pow(abs(dot(normalize(-position), reflectDir)), glossCoef0), 0.0, 1.0);
break;
case LMODE_SPECIAL_EFFECT:
default:
factor = 1.0;
break;
}
if (light.routing == ROUTING_ALPHADIFF_SUB)
specularAlpha -= lightColor.r * factor;
else if (light.routing == ROUTING_OFFSDIFF_OFFSSPEC_ADD || light.routing == ROUTING_BASEDIFF_OFFSSPEC_ADD)
specular += lightColor * factor;
if (light.routing == ROUTING_BASEDIFF_BASESPEC_ADD || light.routing == ROUTING_OFFSDIFF_BASESPEC_ADD)
diffuse += lightColor * factor;
}
}
if (ambientMaterial == 1)
{
diffuse += ambientBase.rgb;
specular += ambientOffset.rgb;
}
baseCol.rgb *= diffuse;
offsetCol.rgb *= specular;
if (ambientMaterial == 0)
{
baseCol.rgb += ambientBase.rgb;
offsetCol.rgb += ambientOffset.rgb;
}
baseCol.a = max(0.0, baseCol.a + diffuseAlpha);
offsetCol.a = max(0.0, offsetCol.a + specularAlpha);
if (useBaseOver == 1)
{
vec4 overflow = max(vec4(0.0), baseCol - vec4(1.0));
offsetCol += overflow;
}
}
void computeEnvMap(inout vec2 uv, in vec3 position, in vec3 normal)
{
// Spherical mapping
//vec3 r = reflect(normalize(position), normal);
//float m = 2.0 * sqrt(r.x * r.x + r.y * r.y + (r.z + 1.0) * (r.z + 1.0));
//uv += r.xy / m + 0.5;
// Cheap env mapping
uv += normal.xy / 2.0 + 0.5;
uv = clamp(uv, 0.0, 1.0);
}
)";
const char *GeometryClippingShader = R"(
layout (triangles) in;
layout (triangle_strip, max_vertices = 6) out;
uniform mat4 normal_matrix;
#if GEOM_ONLY == 0
INTERPOLATION in highp vec4 vs_base[3];
INTERPOLATION in highp vec4 vs_offs[3];
NOPERSPECTIVE in highp vec3 vs_uv[3];
#if TWO_VOLUMES == 1
INTERPOLATION in highp vec4 vs_base1[3];
INTERPOLATION in highp vec4 vs_offs1[3];
NOPERSPECTIVE in highp vec2 vs_uv1[3];
#endif
INTERPOLATION out highp vec4 vtx_base;
INTERPOLATION out highp vec4 vtx_offs;
#if TWO_VOLUMES == 1
INTERPOLATION out highp vec4 vtx_base1;
INTERPOLATION out highp vec4 vtx_offs1;
NOPERSPECTIVE out highp vec2 vtx_uv1;
#endif
#endif
NOPERSPECTIVE out highp vec3 vtx_uv; // For depth
struct Vertex
{
vec4 pos;
vec4 base;
vec4 offs;
vec3 uv;
#if TWO_VOLUMES == 1
vec4 base1;
vec4 offs1;
vec2 uv1;
#endif
float clipDist;
};
Vertex interpolate(in Vertex v0, in Vertex v1, in float d0, in float d1)
{
Vertex v;
float f = d0 / (d0 - d1);
v.pos = mix(v0.pos, v1.pos, f);
#if GEOM_ONLY == 0
v.base = mix(v0.base, v1.base, f);
v.offs = mix(v0.offs, v1.offs, f);
v.uv = mix(v0.uv, v1.uv, f);
#if TWO_VOLUMES == 1
v.base1 = mix(v0.base1, v1.base1, f);
v.offs1 = mix(v0.offs1, v1.offs1, f);
v.uv1 = mix(v0.uv1, v1.uv1, f);
#endif
#endif
v.clipDist = mix(v0.clipDist, v1.clipDist, f);
return v;
}
//
// Efficient Triangle and Quadrilateral Clipping within Shaders. M. McGuire
// Journal of Graphics GPU and Game Tools, November 2011
//
const float clipEpsilon = 0.00001;
const float clipEpsilon2 = 0.0; // 0.01;
/**
Computes the intersection of triangle v0-v1-v2 with the half-space (x,y,z) * n > 0.
The result is a convex polygon in v0-v1-v2-v3. Vertex v3 may be degenerate
and equal to the first vertex.
\return number of vertices; 0, 3, or 4
*/
int clip3(in vec3 dist, inout Vertex v0, inout Vertex v1, inout Vertex v2, out Vertex v3)
{
if (!any(greaterThanEqual(dist, vec3(clipEpsilon2))))
// All clipped
return 0;
if (all(greaterThanEqual(dist, vec3(-clipEpsilon)))) {
// None clipped (original triangle vertices are unmodified)
v3 = v0;
return 3;
}
bvec3 above = greaterThanEqual(dist, vec3(0.0));
// There are either 1 or 2 vertices above the clipping plane.
bool nextIsAbove;
// Find the CCW-most vertex above the plane by cycling
// the vertices in place. There are three cases.
if (above[1] && !above[0]) {
nextIsAbove = above[2];
// Cycle once CCW. Use v3 as a temp
v3 = v0; v0 = v1; v1 = v2; v2 = v3;
dist = dist.yzx;
}
else if (above[2] && !above[1]) {
// Cycle once CW. Use v3 as a temp.
nextIsAbove = above[0];
v3 = v2; v2 = v1; v1 = v0; v0 = v3;
dist = dist.zxy;
}
else {
nextIsAbove = above[1];
}
// We always need to clip v2-v0.
v3 = interpolate(v0, v2, dist[0], dist[2]);
if (nextIsAbove) {
v2 = interpolate(v1, v2, dist[1], dist[2]);
return 4;
} else {
v1 = interpolate(v0, v1, dist[0], dist[1]);
v2 = v3;
v3 = v0;
return 3;
}
}
void wDivide(inout Vertex v)
{
v.pos = vec4(v.pos.xy / v.pos.w, 1.0 / v.pos.w, 1.0);
v.pos = normal_matrix * v.pos;
#if GEOM_ONLY == 1
v.uv = vec3(0.0, 0.0, v.pos.z);
#else
#if pp_Gouraud == 1
v.base *= v.pos.z;
v.offs *= v.pos.z;
#if TWO_VOLUMES == 1
v.base1 *= v.pos.z;
v.offs1 *= v.pos.z;
#endif
#endif
v.uv = vec3(v.uv.xy * v.pos.z, v.pos.z);
#if TWO_VOLUMES == 1
v.uv1 *= v.pos.z;
#endif
#endif
v.pos.w = 1.0;
v.pos.z = 0.0;
}
void emitVertex(in Vertex v)
{
wDivide(v);
#if GEOM_ONLY == 0
vtx_base = v.base;
vtx_offs = v.offs;
#if TWO_VOLUMES == 1
vtx_base1 = v.base1;
vtx_offs1 = v.offs1;
vtx_uv1 = v.uv1;
#endif
#endif
vtx_uv = v.uv;
gl_Position = v.pos;
EmitVertex();
}
void main()
{
Vertex vtx[6];
vtx[0].pos = gl_in[0].gl_Position;
vtx[1].pos = gl_in[1].gl_Position;
vtx[2].pos = gl_in[2].gl_Position;
#if GEOM_ONLY == 0
vtx[0].base = vs_base[0];
vtx[0].offs = vs_offs[0];
vtx[0].uv = vs_uv[0];
vtx[1].base = vs_base[1];
vtx[1].offs = vs_offs[1];
vtx[1].uv = vs_uv[1];
vtx[2].base = vs_base[2];
vtx[2].offs = vs_offs[2];
vtx[2].uv = vs_uv[2];
#if TWO_VOLUMES == 1
vtx[0].base1 = vs_base1[0];
vtx[0].offs1 = vs_offs1[0];
vtx[0].uv1 = vs_uv1[0];
vtx[1].base1 = vs_base1[1];
vtx[1].offs1 = vs_offs1[1];
vtx[1].uv1 = vs_uv1[1];
vtx[2].base1 = vs_base1[2];
vtx[2].offs1 = vs_offs1[2];
vtx[2].uv1 = vs_uv1[2];
#endif
#endif
int vtxCount = 3;
vtx[0].clipDist = gl_in[0].gl_ClipDistance[0];
vtx[1].clipDist = gl_in[1].gl_ClipDistance[0];
vtx[2].clipDist = gl_in[2].gl_ClipDistance[0];
// near-plane only
vec3 dist = vec3(vtx[0].clipDist, vtx[1].clipDist, vtx[2].clipDist);
Vertex v3;
int size = clip3(dist, vtx[0], vtx[1], vtx[2], v3);
if (size == 0)
vtxCount = 0;
else if (size == 4)
{
vtx[3] = vtx[0];
vtx[4] = vtx[2];
vtx[5] = v3;
vtxCount = 6;
}
for (int i = 0; i + 2 < vtxCount; i += 3)
{
emitVertex(vtx[i]);
emitVertex(vtx[i + 1]);
emitVertex(vtx[i + 2]);
EndPrimitive();
}
}
)";
N2VertexSource::N2VertexSource(bool gouraud, bool geometryOnly) : OpenGlSource()
{
addConstant("pp_Gouraud", gouraud);
addConstant("GEOM_ONLY", geometryOnly);
addConstant("TWO_VOLUMES", 0);
addSource(VertexCompatShader);
addSource(GouraudSource);
if (!geometryOnly)
addSource(N2ColorShader);
addSource(N2VertexShader);
}
N2GeometryShader::N2GeometryShader(bool gouraud, bool geometryOnly) : OpenGlSource()
{
addConstant("pp_Gouraud", gouraud);
addConstant("GEOM_ONLY", geometryOnly);
addConstant("TWO_VOLUMES", 0);
addSource(GouraudSource);
addSource(GeometryClippingShader);
}
static void setLightUniform(const PipelineShader *shader, int lightId, const char *name, int v)
{
char s[128];
sprintf(s, "lights[%d].%s", lightId, name);
GLint loc = glGetUniformLocation(shader->program, s);
glUniform1i(loc, v);
}
static void setLightUniform(const PipelineShader *shader, int lightId, const char *name, float v)
{
char s[128];
sprintf(s, "lights[%d].%s", lightId, name);
GLint loc = glGetUniformLocation(shader->program, s);
glUniform1f(loc, v);
}
static void setLightUniform4f(const PipelineShader *shader, int lightId, const char *name, const float *v)
{
char s[128];
sprintf(s, "lights[%d].%s", lightId, name);
GLint loc = glGetUniformLocation(shader->program, s);
glUniform4fv(loc, 1, v);
}
void setN2Uniforms(const PolyParam *pp, const PipelineShader *shader)
{
glUniformMatrix4fv(shader->mvMat, 1, GL_FALSE, &pp->mvMatrix[0]);
glUniformMatrix4fv(shader->projMat, 1, GL_FALSE, &pp->projMatrix[0]);
glUniform1f(shader->glossCoef0, pp->glossCoef0);
N2LightModel *const lightModel = pp->lightModel;
if (lightModel != nullptr)
{
glUniform1i(shader->ambientMaterial, lightModel->ambientMaterial);
glUniform4fv(shader->ambientBase, 1, lightModel->ambientBase);
glUniform4fv(shader->ambientOffset, 1, lightModel->ambientOffset);
glUniform1i(shader->useBaseOver, lightModel->useBaseOver);
glUniform1i(shader->lightCount, lightModel->lightCount);
for (int i = 0; i < lightModel->lightCount; i++)
{
const N2Light& light = lightModel->lights[i];
setLightUniform(shader, i, "parallel", light.parallel);
setLightUniform4f(shader, i, "color", light.color);
setLightUniform4f(shader, i, "direction", light.direction);
setLightUniform4f(shader, i, "position", light.position);
setLightUniform(shader, i, "diffuse", light.diffuse);
setLightUniform(shader, i, "specular", light.specular);
setLightUniform(shader, i, "routing", light.routing);
setLightUniform(shader, i, "dmode", light.dmode);
setLightUniform(shader, i, "smode", light.smode);
setLightUniform(shader, i, "distAttnMode", light.distAttnMode);
setLightUniform(shader, i, "attnDistA", light.attnDistA);
setLightUniform(shader, i, "attnDistB", light.attnDistB);
setLightUniform(shader, i, "attnAngleA", light.attnAngleA);
setLightUniform(shader, i, "attnAngleB", light.attnAngleB);
}
}
else
{
float white[] { 1.f, 1.f, 1.f, 1.f };
float black[4]{};
glUniform1i(shader->ambientMaterial, 0);
glUniform4fv(shader->ambientBase, 1, white);
glUniform4fv(shader->ambientOffset, 1, black);
glUniform1i(shader->useBaseOver, 0);
glUniform1i(shader->lightCount, 0);
}
glUniform1i(shader->envMapping, pp->envMapping);
glEnable(GL_CLIP_DISTANCE0);
}

34
core/rend/gles/naomi2.h Normal file
View File

@ -0,0 +1,34 @@
/*
Copyright 2022 flyinghead
This file is part of Flycast.
Flycast is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
Flycast is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Flycast. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include "gles.h"
void setN2Uniforms(const PolyParam *pp, const PipelineShader *shader);
class N2VertexSource : public OpenGlSource
{
public:
N2VertexSource(bool gouraud, bool geometryOnly = false);
};
class N2GeometryShader : public OpenGlSource
{
public:
N2GeometryShader(bool gouraud, bool geometryOnly = false);
};

View File

@ -17,6 +17,8 @@
#include "sorter.h"
#include "hw/pvr/Renderer_if.h"
#include <algorithm>
#include <glm/glm.hpp>
#include <glm/gtc/type_ptr.hpp>
struct IndexTrig
{
@ -25,18 +27,6 @@ struct IndexTrig
f32 z;
};
#if 0
static float min3(float v0, float v1, float v2)
{
return std::min(std::min(v0, v1), v2);
}
static float max3(float v0, float v1, float v2)
{
return std::max(std::max(v0, v1), v2);
}
#endif
static float minZ(const Vertex *v, const u32 *mod)
{
return std::min(std::min(v[mod[0]].z, v[mod[1]].z), v[mod[2]].z);
@ -54,6 +44,12 @@ static bool operator<(const PolyParam& left, const PolyParam& right)
//return left.zMin<right.zMax;
}
static float getProjectedZ(const Vertex *v, const glm::mat4& mat)
{
// 1 / w
return 1 / mat[0][3] * v->x + mat[1][3] * v->y + mat[2][3] * v->z + mat[3][3];
}
void SortPParams(int first, int count)
{
if (pvrrc.verts.used() == 0 || count <= 1)
@ -78,14 +74,58 @@ void SortPParams(int first, int count)
Vertex* vtx=vtx_base+idx[0];
Vertex* vtx_end=vtx_base + idx[pp->count-1]+1;
u32 zv=0xFFFFFFFF;
while(vtx!=vtx_end)
if (pp->projMatrix != nullptr)
{
zv = std::min(zv, (u32&)vtx->z);
vtx++;
}
glm::mat4 mvMat = glm::make_mat4(pp->mvMatrix);
glm::mat4 projMat = glm::make_mat4(pp->projMatrix);
glm::vec4 min{ 1e38f, 1e38f, 1e38f, 0.f };
glm::vec4 max{ -1e38f, -1e38f, -1e38f, 0.f };
while (vtx != vtx_end)
{
glm::vec4 pos{ vtx->x, vtx->y, vtx->z, 0.f };
min = glm::min(min, pos);
max = glm::max(max, pos);
vtx++;
}
glm::vec4 center = (min + max) / 2.f;
center.w = 1;
glm::vec4 extents = max - center;
// transform
center = mvMat * center;
glm::vec3 extentX = mvMat * glm::vec4(extents.x, 0, 0, 0);
glm::vec3 extentY = mvMat * glm::vec4(0, extents.y, 0, 0);
glm::vec3 extentZ = mvMat * glm::vec4(0, 0, extents.z, 0);
// new AA extents
const float newX = std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentX)) +
std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentY)) +
std::abs(glm::dot(glm::vec3{ 1.f, 0.f, 0.f }, extentZ));
pp->zvZ=(f32&)zv;
const float newY = std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentX)) +
std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentY)) +
std::abs(glm::dot(glm::vec3{ 0.f, 1.f, 0.f }, extentZ));
const float newZ = std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentX)) +
std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentY)) +
std::abs(glm::dot(glm::vec3{ 0.f, 0.f, 1.f }, extentZ));
min = center - glm::vec4(newX, newY, newZ, 0);
max = center + glm::vec4(newX, newY, newZ, 0);
// project
glm::vec4 a = projMat * min;
glm::vec4 b = projMat * max;
pp->zvZ = 1 / std::max(a.w, b.w);
}
else
{
u32 zv=0xFFFFFFFF;
while(vtx!=vtx_end)
{
zv = std::min(zv, (u32&)vtx->z);
vtx++;
}
pp->zvZ=(f32&)zv;
}
}
pp++;
}
@ -193,7 +233,9 @@ bool Intersect(const IndexTrig &left, const IndexTrig &right)
static bool PP_EQ(const PolyParam *pp0, const PolyParam *pp1)
{
return (pp0->pcw.full & PCW_DRAW_MASK) == (pp1->pcw.full & PCW_DRAW_MASK) && pp0->isp.full == pp1->isp.full
&& pp0->tcw.full == pp1->tcw.full && pp0->tsp.full == pp1->tsp.full && pp0->tileclip == pp1->tileclip;
&& pp0->tcw.full == pp1->tcw.full && pp0->tsp.full == pp1->tsp.full && pp0->tileclip == pp1->tileclip
&& pp0->mvMatrix == pp1->mvMatrix && pp0->projMatrix == pp1->projMatrix
&& pp0->lightModel == pp1->lightModel && pp0->envMapping == pp1->envMapping;
}
static void fill_id(u32 *d, const Vertex *v0, const Vertex *v1, const Vertex *v2, const Vertex *vb)
@ -246,15 +288,23 @@ void GenSorted(int first, int count, std::vector<SortTrigDrawParam>& pidx_sort,
int pfsti=0;
while(pp!=pp_end)
while (pp != pp_end)
{
u32 ppid = (u32)(pp - pp_base);
if (pp->count>2)
if (pp->count > 2)
{
const u32 *idx = idx_base + pp->first;
u32 flip = 0;
glm::mat4 mat;
float z0, z1;
if (pp->projMatrix != nullptr)
{
mat = glm::make_mat4(pp->projMatrix) * glm::make_mat4(pp->mvMatrix);
z0 = getProjectedZ(vtx_base + idx[0], mat);
z1 = getProjectedZ(vtx_base + idx[1], mat);
}
for (u32 i = 0; i < pp->count - 2; i++)
{
const Vertex *v0, *v1;
@ -269,88 +319,20 @@ void GenSorted(int first, int count, std::vector<SortTrigDrawParam>& pidx_sort,
v1 = vtx_base + idx[i + 1];
}
const Vertex *v2 = vtx_base + idx[i + 2];
#if 0
const Vertex *v3, *v4, *v5;
if (settings.pvr.subdivide_transp)
fill_id(lst[pfsti].id, v0, v1, v2, vtx_base);
lst[pfsti].pid = ppid;
if (pp->projMatrix != nullptr)
{
u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32;
u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32;
if (tess_x==1) tess_x=0;
if (tess_y==1) tess_y=0;
//bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2;
if (tess_x + tess_y)
{
v3=pvrrc.verts.Append(3);
v4=v3+1;
v5=v4+1;
//xyz
for (int i=0;i<3;i++)
{
((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f;
((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f;
((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f;
}
//*TODO* Make it perspective correct
//uv
for (int i=0;i<2;i++)
{
((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f;
((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f;
((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f;
}
//color
for (int i=0;i<4;i++)
{
v3->col[i]=v0->col[i]/2+v2->col[i]/2;
v4->col[i]=v0->col[i]/2+v1->col[i]/2;
v5->col[i]=v1->col[i]/2+v2->col[i]/2;
}
fill_id(lst[pfsti].id,v0,v3,v4,vtx_base);
lst[pfsti].pid= ppid ;
lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
pfsti++;
fill_id(lst[pfsti].id,v2,v3,v5,vtx_base);
lst[pfsti].pid= ppid ;
lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
pfsti++;
fill_id(lst[pfsti].id,v3,v4,v5,vtx_base);
lst[pfsti].pid= ppid ;
lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
pfsti++;
fill_id(lst[pfsti].id,v5,v4,v1,vtx_base);
lst[pfsti].pid= ppid ;
lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
pfsti++;
tess_gen+=3;
}
else
{
fill_id(lst[pfsti].id,v0,v1,v2,vtx_base);
lst[pfsti].pid= ppid ;
lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
pfsti++;
}
float z2 = getProjectedZ(v2, mat);
lst[pfsti].z = std::min(z0, std::min(z1, z2));
z0 = z1;
z1 = z2;
}
else
#endif
{
fill_id(lst[pfsti].id,v0,v1,v2,vtx_base);
lst[pfsti].pid= ppid ;
lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
pfsti++;
lst[pfsti].z = minZ(vtx_base, lst[pfsti].id);
}
pfsti++;
flip ^= 1;
}