Merge OIT renderer into master

This commit is contained in:
Flyinghead 2018-10-04 10:29:23 +02:00
parent ba8e9dcab0
commit 3f791d6f04
17 changed files with 3176 additions and 72 deletions

View File

@ -44,6 +44,9 @@ endif
ifndef NO_REND
RZDCY_MODULES += rend/gles/
ifndef USE_GLES
RZDCY_MODULES += rend/gl4/
endif
else
RZDCY_MODULES += rend/norend/
endif
@ -78,8 +81,7 @@ RZDCY_FILES += $(foreach dir,$(addprefix $(RZDCY_SRC_DIR)/,$(RZDCY_MODULES)),$(w
ifdef FOR_PANDORA
RZDCY_CFLAGS := \
$(CFLAGS) -c -O3 -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps \
-I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules \
$(CFLAGS) -c -O3 \
-DRELEASE -DPANDORA\
-march=armv7-a -mtune=cortex-a8 -mfpu=neon -mfloat-abi=softfp \
-frename-registers -fsingle-precision-constant -ffast-math \
@ -89,8 +91,7 @@ RZDCY_CFLAGS := \
else
ifdef FOR_ANDROID
RZDCY_CFLAGS := \
$(CFLAGS) -c -O3 -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps \
-I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules \
$(CFLAGS) -c -O3 \
-D_ANDROID -DRELEASE \
-frename-registers -fsingle-precision-constant -ffast-math \
-ftree-vectorize -fomit-frame-pointer
@ -106,12 +107,13 @@ RZDCY_CFLAGS := \
endif
endif
else
RZDCY_CFLAGS := \
-I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps \
-I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules
RZDCY_CFLAGS :=
endif
endif
RZDCY_CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/rend/gles -I$(RZDCY_SRC_DIR)/deps \
-I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules
ifdef NO_REC
RZDCY_CFLAGS += -DTARGET_NO_REC
endif

View File

@ -103,7 +103,7 @@ void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref = NULL
u32 bytes = ctx->tad.End() - ctx->tad.thd_root;
fwrite("TAFRAME3", 1, 8, fw);
fwrite("TAFRAME4", 1, 8, fw);
fwrite(&ctx->rend.isRTT, 1, sizeof(ctx->rend.isRTT), fw);
u32 zero = 0;
@ -168,10 +168,17 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) {
fread(id0, 1, 8, fw);
if (memcmp(id0, "TAFRAME3", 8) != 0) {
if (memcmp(id0, "TAFRAME", 7) != 0 || (id0[7] != '3' && id0[7] != '4')) {
fclose(fw);
return 0;
}
int sizeofPolyParam = sizeof(PolyParam);
int sizeofVertex = sizeof(Vertex);
if (id0[7] == '3')
{
sizeofPolyParam -= 12;
sizeofVertex -= 16;
}
TA_context* ctx = tactx_Alloc();
@ -184,8 +191,10 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) {
fread(&ctx->rend.fb_X_CLIP.full, 1, sizeof(ctx->rend.fb_X_CLIP.full), fw);
fread(&ctx->rend.fb_Y_CLIP.full, 1, sizeof(ctx->rend.fb_Y_CLIP.full), fw);
fread(ctx->rend.global_param_op.Append(), 1, sizeof(PolyParam), fw);
fread(ctx->rend.verts.Append(4), 1, 4 * sizeof(Vertex), fw);
fread(ctx->rend.global_param_op.Append(), 1, sizeofPolyParam, fw);
Vertex *vtx = ctx->rend.verts.Append(4);
for (int i = 0; i < 4; i++)
fread(vtx + i, 1, sizeofVertex, fw);
fread(&t, 1, sizeof(t), fw);
verify(t == VRAM_SIZE);
@ -500,6 +509,11 @@ bool rend_init()
case 2:
renderer = rend_softrend();
break;
#endif
#if !defined(GLES) && HOST_OS != OS_DARWIN
case 3:
renderer = rend_GL4();
break;
#endif
}

View File

@ -51,9 +51,10 @@ struct Renderer
extern Renderer* renderer;
Renderer* rend_D3D11();
Renderer* rend_GLES2();
#if !defined(GLES) && HOST_OS != OS_DARWIN
Renderer* rend_GL4();
#endif
Renderer* rend_norend();
Renderer* rend_softrend();

View File

@ -8,6 +8,7 @@ struct List
int size;
bool* overrun;
const char *list_name;
__forceinline int used() const { return size-avail; }
__forceinline int bytes() const { return used()* sizeof(T); }
@ -17,6 +18,8 @@ struct List
{
*overrun |= true;
Clear();
if (list_name != NULL)
printf("List overrun for list %s\n", list_name);
return daty;
}
@ -45,7 +48,7 @@ struct List
T* head() const { return daty-used(); }
void InitBytes(int maxbytes,bool* ovrn)
void InitBytes(int maxbytes,bool* ovrn, const char *name)
{
maxbytes-=maxbytes%sizeof(T);
@ -58,11 +61,12 @@ struct List
overrun=ovrn;
Clear();
list_name = name;
}
void Init(int maxsize,bool* ovrn)
void Init(int maxsize,bool* ovrn, const char *name)
{
InitBytes(maxsize*sizeof(T),ovrn);
InitBytes(maxsize*sizeof(T),ovrn, name);
}
void Clear()
@ -76,4 +80,4 @@ struct List
Clear();
free(daty);
}
};
};

View File

@ -21,12 +21,4 @@ void ta_vtx_data(u32* data, u32 size);
bool ta_parse_vdrc(TA_context* ctx);
#define STRIPS_AS_PPARAMS 1
#define TRIG_SORT 1
#if TRIG_SORT
#undef STRIPS_AS_PPARAMS
#define STRIPS_AS_PPARAMS 1
#endif

View File

@ -17,6 +17,12 @@ struct Vertex
u8 spc[4];
float u,v;
// Two volumes format
u8 col1[4];
u8 spc1[4];
float u1,v1;
};
struct PolyParam
@ -35,6 +41,9 @@ struct PolyParam
float zvZ;
u32 tileclip;
//float zMin,zMax;
TSP tsp1;
TCW tcw1;
u32 texid1;
};
struct ModifierVolumeParam
@ -98,6 +107,7 @@ struct RenderPass {
u32 mvo_count;
u32 pt_count;
u32 tr_count;
u32 mvo_tr_count;
};
struct rend_context
@ -124,6 +134,7 @@ struct rend_context
List<u16> idx;
List<ModTriangle> modtrig;
List<ModifierVolumeParam> global_param_mvo;
List<ModifierVolumeParam> global_param_mvo_tr;
List<PolyParam> global_param_op;
List<PolyParam> global_param_pt;
@ -139,6 +150,7 @@ struct rend_context
global_param_tr.Clear();
modtrig.Clear();
global_param_mvo.Clear();
global_param_mvo_tr.Clear();
render_passes.Clear();
Overrun=false;
@ -189,16 +201,17 @@ struct TA_context
{
tad.Reset((u8*)OS_aligned_malloc(32, 8*1024*1024));
rend.verts.InitBytes(2*1024*1024,&rend.Overrun); //up to 2 MB of vtx data/frame = ~ 75k vtx/frame
rend.idx.Init(120*1024,&rend.Overrun); //up to 120K indexes ( idx have stripification overhead )
rend.global_param_op.Init(4096,&rend.Overrun);
rend.global_param_pt.Init(4096,&rend.Overrun);
rend.global_param_mvo.Init(4096,&rend.Overrun);
rend.global_param_tr.Init(8192,&rend.Overrun);
rend.verts.InitBytes(4 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame
rend.idx.Init(120 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead )
rend.global_param_op.Init(4096, &rend.Overrun, "global_param_op");
rend.global_param_pt.Init(4096, &rend.Overrun, "global_param_pt");
rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo");
rend.global_param_tr.Init(10240, &rend.Overrun, "global_param_tr");
rend.global_param_mvo_tr.Init(4096, &rend.Overrun, "global_param_mvo_tr");
rend.modtrig.Init(8192,&rend.Overrun);
rend.modtrig.Init(16384, &rend.Overrun, "modtrig");
rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun); // 10 render passes
rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes
Reset();
}
@ -222,6 +235,7 @@ struct TA_context
rend.global_param_tr.Free();
rend.modtrig.Free();
rend.global_param_mvo.Free();
rend.global_param_mvo_tr.Free();
rend.render_passes.Free();
}
};

View File

@ -85,6 +85,8 @@ List<PolyParam>* CurrentPPlist;
//TA state vars
DECL_ALIGN(4) u8 FaceBaseColor[4];
DECL_ALIGN(4) u8 FaceOffsColor[4];
DECL_ALIGN(4) u8 FaceBaseColor1[4];
DECL_ALIGN(4) u8 FaceOffsColor1[4];
DECL_ALIGN(4) u32 SFaceBaseColor;
DECL_ALIGN(4) u32 SFaceOffsColor;
@ -769,7 +771,7 @@ public:
CurrentPP=&nullPP;
CurrentPPlist=0;
if (ListType == ListType_Opaque_Modifier_Volume)
if (ListType == ListType_Opaque_Modifier_Volume || ListType == ListType_Translucent_Modifier_Volume)
EndModVol();
}
@ -808,6 +810,9 @@ public:
if (d_pp->pcw.Texture) {
d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw);
}
d_pp->tsp1.full = -1;
d_pp->tcw1.full = -1;
d_pp->texid1 = -1;
}
}
@ -860,6 +865,11 @@ public:
TA_PolyParam3* pp=(TA_PolyParam3*)vpp;
glob_param_bdc(pp);
CurrentPP->tsp1.full = pp->tsp1.full;
CurrentPP->tcw1.full = pp->tcw1.full;
if (pp->pcw.Texture)
CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1);
}
__forceinline
static void TACALL AppendPolyParam4A(void* vpp)
@ -867,13 +877,19 @@ public:
TA_PolyParam4A* pp=(TA_PolyParam4A*)vpp;
glob_param_bdc(pp);
CurrentPP->tsp1.full = pp->tsp1.full;
CurrentPP->tcw1.full = pp->tcw1.full;
if (pp->pcw.Texture)
CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1);
}
__forceinline
static void TACALL AppendPolyParam4B(void* vpp)
{
TA_PolyParam4B* pp=(TA_PolyParam4B*)vpp;
poly_float_color(FaceBaseColor,FaceColor0);
poly_float_color(FaceBaseColor, FaceColor0);
poly_float_color(FaceBaseColor1, FaceColor1);
}
//Poly Strip handling
@ -884,14 +900,6 @@ public:
{
CurrentPP->count=vdrc.idx.used() - CurrentPP->first;
int vbase=vdrc.verts.used();
*vdrc.idx.Append()=vbase-1;
*vdrc.idx.Append()=vbase;
if (CurrentPP->count&1)
*vdrc.idx.Append()=vbase;
#if STRIPS_AS_PPARAMS
if (CurrentPPlist==&vdrc.global_param_tr)
{
PolyParam* d_pp =CurrentPPlist->Append();
@ -900,7 +908,16 @@ public:
d_pp->first=vdrc.idx.used();
d_pp->count=0;
}
#endif
else
{
int vbase=vdrc.verts.used();
*vdrc.idx.Append()=vbase-1;
*vdrc.idx.Append()=vbase;
if (CurrentPP->count&1)
*vdrc.idx.Append()=vbase;
}
}
@ -941,6 +958,14 @@ public:
cv->u = f16(vtx->u_name);\
cv->v = f16(vtx->v_name);
#define vert_uv1_32(u_name,v_name) \
cv->u1 = (vtx->u_name);\
cv->v1 = (vtx->v_name);
#define vert_uv1_16(u_name,v_name) \
cv->u1 = f16(vtx->u_name);\
cv->v1 = f16(vtx->v_name);
//Color conversions
#define vert_packed_color_(to,src) \
{ \
@ -984,6 +1009,20 @@ public:
cv->spc[2] = FaceOffsColor[2]*satint/256; \
cv->spc[3] = FaceOffsColor[3]; }
#define vert_face_base_color1(baseint) \
{ u32 satint=float_to_satu8(vtx->baseint); \
cv->col1[0] = FaceBaseColor1[0]*satint/256; \
cv->col1[1] = FaceBaseColor1[1]*satint/256; \
cv->col1[2] = FaceBaseColor1[2]*satint/256; \
cv->col1[3] = FaceBaseColor1[3]; }
#define vert_face_offs_color1(offsint) \
{ u32 satint=float_to_satu8(vtx->offsint); \
cv->spc1[0] = FaceOffsColor1[0]*satint/256; \
cv->spc1[1] = FaceOffsColor1[1]*satint/256; \
cv->spc1[2] = FaceOffsColor1[2]*satint/256; \
cv->spc1[3] = FaceOffsColor1[3]; }
//vert_float_color_(cv->spc,FaceOffsColor[3],FaceOffsColor[0]*satint/256,FaceOffsColor[1]*satint/256,FaceOffsColor[2]*satint/256); }
@ -1109,6 +1148,7 @@ public:
vert_cvt_base;
vert_packed_color(col,BaseCol0);
vert_packed_color(col1, BaseCol1);
}
//(Non-Textured, Intensity, with Two Volumes)
@ -1118,6 +1158,7 @@ public:
vert_cvt_base;
vert_face_base_color(BaseInt0);
vert_face_base_color1(BaseInt1);
}
//(Textured, Packed Color, with Two Volumes)
@ -1136,6 +1177,10 @@ public:
{
vert_res_base;
vert_packed_color(col1, BaseCol1);
vert_packed_color(spc1, OffsCol1);
vert_uv1_32(u1, v1);
}
//(Textured, Packed Color, 16bit UV, with Two Volumes)
@ -1154,6 +1199,10 @@ public:
{
vert_res_base;
vert_packed_color(col1, BaseCol1);
vert_packed_color(spc1, OffsCol1);
vert_uv1_16(u1, v1);
}
//(Textured, Intensity, with Two Volumes)
@ -1172,6 +1221,10 @@ public:
{
vert_res_base;
vert_face_base_color1(BaseInt1);
vert_face_offs_color1(OffsInt1);
vert_uv1_32(u1,v1);
}
//(Textured, Intensity, 16bit UV, with Two Volumes)
@ -1190,6 +1243,10 @@ public:
{
vert_res_base;
vert_face_base_color1(BaseInt1);
vert_face_offs_color1(OffsInt1);
vert_uv1_16(u1, v1);
}
//Sprites
@ -1217,6 +1274,9 @@ public:
if (d_pp->pcw.Texture) {
d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw);
}
d_pp->tcw1.full = -1;
d_pp->tsp1.full = -1;
d_pp->texid1 = -1;
SFaceBaseColor=spr->BaseCol;
SFaceOffsColor=spr->OffsCol;
@ -1355,7 +1415,7 @@ public:
vert[-1].z=vert[0].z;
CurrentPP->count+=2;
}*/
#if STRIPS_AS_PPARAMS
if (CurrentPPlist==&vdrc.global_param_tr)
{
PolyParam* d_pp =CurrentPPlist->Append();
@ -1364,7 +1424,6 @@ public:
d_pp->first=vdrc.idx.used();
d_pp->count=0;
}
#endif
}
// Modifier Volumes Vertex handlers
@ -1374,6 +1433,8 @@ public:
List<ModifierVolumeParam> *list = NULL;
if (CurrentList == ListType_Opaque_Modifier_Volume)
list = &vdrc.global_param_mvo;
else if (CurrentList == ListType_Translucent_Modifier_Volume)
list = &vdrc.global_param_mvo_tr;
else
return;
if (list->used() > 0)
@ -1390,6 +1451,8 @@ public:
ModifierVolumeParam *p = NULL;
if (CurrentList == ListType_Opaque_Modifier_Volume)
p = vdrc.global_param_mvo.Append();
else if (CurrentList == ListType_Translucent_Modifier_Volume)
p = vdrc.global_param_mvo_tr.Append();
else
return;
p->isp.full = param->isp.full;
@ -1399,7 +1462,7 @@ public:
__forceinline
static void AppendModVolVertexA(TA_ModVolA* mvv)
{
if (CurrentList!=ListType_Opaque_Modifier_Volume)
if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume)
return;
lmr=vdrc.modtrig.Append();
@ -1419,7 +1482,7 @@ public:
__forceinline
static void AppendModVolVertexB(TA_ModVolB* mvv)
{
if (CurrentList!=ListType_Opaque_Modifier_Volume)
if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume)
return;
lmr->y2=mvv->y2;
lmr->z2=mvv->z2;
@ -1486,6 +1549,7 @@ bool ta_parse_vdrc(TA_context* ctx)
render_pass->mvo_count = vd_rc.global_param_mvo.used();
render_pass->pt_count = vd_rc.global_param_pt.used();
render_pass->tr_count = vd_rc.global_param_tr.used();
render_pass->mvo_tr_count = vd_rc.global_param_mvo_tr.used();
render_pass->autosort = UsingAutoSort(pass);
render_pass->z_clear = ClearZBeforePass(pass);
}
@ -1637,6 +1701,9 @@ void FillBGP(TA_context* ctx)
bgpp->isp.full=vri(strip_base);
bgpp->tsp.full=vri(strip_base+4);
bgpp->tcw.full=vri(strip_base+8);
bgpp->tcw1.full = -1;
bgpp->tsp1.full = -1;
bgpp->texid1 = -1;
bgpp->count=4;
bgpp->first=0;
bgpp->tileclip=0;//disabled ! HA ~

View File

@ -210,8 +210,8 @@
}
}
if (code < 0)
printf("WARNING: %s/%s not configured!\n", section.c_str(), dc_key.c_str());
//if (code < 0)
// printf("WARNING: %s/%s not configured!\n", section.c_str(), dc_key.c_str());
return code;
}

View File

@ -704,20 +704,30 @@ void x11_window_create()
verify(glXCreateContextAttribsARB != 0);
int context_attribs[] =
{
GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
GLX_CONTEXT_MINOR_VERSION_ARB, 1,
GLX_CONTEXT_MAJOR_VERSION_ARB, 4,
GLX_CONTEXT_MINOR_VERSION_ARB, 3,
#ifndef RELEASE
GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB,
#endif
GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB,
None
};
x11_glc = glXCreateContextAttribsARB(x11Display, bestFbc, 0, True, context_attribs);
XSync(x11Display, False);
if (!x11_glc)
{
die("Failed to create GL3.1 context\n");
printf("Open GL 4.3 not supported\n");
// Try GL 3.1
context_attribs[1] = 3;
context_attribs[3] = 1;
x11_glc = glXCreateContextAttribsARB(x11Display, bestFbc, 0, True, context_attribs);
if (!x11_glc)
{
die("Open GL 3.1 not supported\n");
}
}
XSync(x11Display, False);
#endif
XFlush(x11Display);

546
core/rend/gl4/abuffer.cpp Normal file
View File

@ -0,0 +1,546 @@
/*
* abuffer.cpp
*
* Created on: May 26, 2018
* Author: raph
*/
#include "gl4.h"
#include "rend/gles/glcache.h"
GLuint pixels_buffer;
GLuint pixels_pointers;
GLuint atomic_buffer;
gl4PipelineShader g_abuffer_final_shader;
gl4PipelineShader g_abuffer_final_nosort_shader;
gl4PipelineShader g_abuffer_clear_shader;
gl4PipelineShader g_abuffer_tr_modvol_shaders[ModeCount];
static GLuint g_quadBuffer = 0;
static GLuint g_quadVertexArray = 0;
static int g_imageWidth = 0;
static int g_imageHeight = 0;
GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 512 MB
#define MAX_PIXELS_PER_FRAGMENT "32"
static const char *final_shader_source = SHADER_HEADER "\
#define DEPTH_SORTED %d \n\
#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\
\n\
layout(binding = 0) uniform sampler2D tex; \n\
uniform highp float shade_scale_factor; \n\
\n\
out vec4 FragColor; \n\
\n\
uint pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\
\n\
\n\
int fillAndSortFragmentArray(ivec2 coords) \n\
{ \n\
// Load fragments into a local memory array for sorting \n\
uint idx = imageLoad(abufferPointerImg, coords).x; \n\
int count = 0; \n\
for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++) \n\
{ \n\
const Pixel p = pixels[idx]; \n\
int j = count - 1; \n\
Pixel jp = pixels[pixel_list[j]]; \n\
#if DEPTH_SORTED == 1 \n\
while (j >= 0 \n\
&& (jp.depth < p.depth \n\
|| (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p)))) \n\
#else \n\
while (j >= 0 && getPolyNumber(jp) > getPolyNumber(p)) \n\
#endif \n\
{ \n\
pixel_list[j + 1] = pixel_list[j]; \n\
j--; \n\
jp = pixels[pixel_list[j]]; \n\
} \n\
pixel_list[j + 1] = idx; \n\
idx = p.next; \n\
} \n\
return count; \n\
} \n\
\n\
// Blend fragments back-to-front \n\
vec4 resolveAlphaBlend(ivec2 coords) { \n\
\n\
// Copy and sort fragments into a local array \n\
int num_frag = fillAndSortFragmentArray(coords); \n\
\n\
vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\
vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer \n\
float depth = 1.0; \n\
\n\
for (int i = 0; i < num_frag; i++) \n\
{ \n\
const Pixel pixel = pixels[pixel_list[i]]; \n\
const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\
#if DEPTH_SORTED != 1 \n\
const float frag_depth = pixel.depth; \n\
switch (getDepthFunc(pp)) \n\
{ \n\
case 0: // Never \n\
continue; \n\
case 1: // Greater \n\
if (frag_depth <= depth) \n\
continue; \n\
break; \n\
case 2: // Equal \n\
if (frag_depth != depth) \n\
continue; \n\
break; \n\
case 3: // Greater or equal \n\
if (frag_depth < depth) \n\
continue; \n\
break; \n\
case 4: // Less \n\
if (frag_depth >= depth) \n\
continue; \n\
break; \n\
case 5: // Not equal \n\
if (frag_depth == depth) \n\
continue; \n\
break; \n\
case 6: // Less or equal \n\
if (frag_depth > depth) \n\
continue; \n\
break; \n\
case 7: // Always \n\
break; \n\
} \n\
\n\
if (getDepthMask(pp)) \n\
depth = frag_depth; \n\
#endif \n\
bool area1 = false; \n\
bool shadowed = false; \n\
if (isShadowed(pixel)) \n\
{ \n\
if (isTwoVolumes(pp)) \n\
area1 = true; \n\
else \n\
shadowed = true; \n\
} \n\
vec4 srcColor; \n\
if (getSrcSelect(pp, area1)) \n\
srcColor = secondaryBuffer; \n\
else \n\
{ \n\
srcColor = pixel.color; \n\
if (shadowed) \n\
srcColor.rgb *= shade_scale_factor; \n\
} \n\
vec4 dstColor = getDstSelect(pp, area1) ? secondaryBuffer : finalColor; \n\
vec4 srcCoef; \n\
vec4 dstCoef; \n\
\n\
int srcBlend = getSrcBlendFunc(pp, area1); \n\
switch (srcBlend) \n\
{ \n\
case ZERO: \n\
srcCoef = vec4(0.0); \n\
break; \n\
case ONE: \n\
srcCoef = vec4(1.0); \n\
break; \n\
case OTHER_COLOR: \n\
srcCoef = finalColor; \n\
break; \n\
case INVERSE_OTHER_COLOR: \n\
srcCoef = vec4(1.0) - dstColor; \n\
break; \n\
case SRC_ALPHA: \n\
srcCoef = vec4(srcColor.a); \n\
break; \n\
case INVERSE_SRC_ALPHA: \n\
srcCoef = vec4(1.0 - srcColor.a); \n\
break; \n\
case DST_ALPHA: \n\
srcCoef = vec4(dstColor.a); \n\
break; \n\
case INVERSE_DST_ALPHA: \n\
srcCoef = vec4(1.0 - dstColor.a); \n\
break; \n\
} \n\
int dstBlend = getDstBlendFunc(pp, area1); \n\
switch (dstBlend) \n\
{ \n\
case ZERO: \n\
dstCoef = vec4(0.0); \n\
break; \n\
case ONE: \n\
dstCoef = vec4(1.0); \n\
break; \n\
case OTHER_COLOR: \n\
dstCoef = srcColor; \n\
break; \n\
case INVERSE_OTHER_COLOR: \n\
dstCoef = vec4(1.0) - srcColor; \n\
break; \n\
case SRC_ALPHA: \n\
dstCoef = vec4(srcColor.a); \n\
break; \n\
case INVERSE_SRC_ALPHA: \n\
dstCoef = vec4(1.0 - srcColor.a); \n\
break; \n\
case DST_ALPHA: \n\
dstCoef = vec4(dstColor.a); \n\
break; \n\
case INVERSE_DST_ALPHA: \n\
dstCoef = vec4(1.0 - dstColor.a); \n\
break; \n\
} \n\
const vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\
if (getDstSelect(pp, area1)) \n\
secondaryBuffer = result; \n\
else \n\
finalColor = result; \n\
} \n\
\n\
return finalColor; \n\
\n\
} \n\
\n\
void main(void) \n\
{ \n\
ivec2 coords = ivec2(gl_FragCoord.xy); \n\
// Compute and output final color for the frame buffer \n\
// Visualize the number of layers in use \n\
//FragColor = vec4(float(fillFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT, 0, 0, 1); \n\
FragColor = resolveAlphaBlend(coords); \n\
} \n\
";
static const char *clear_shader_source = SHADER_HEADER "\
\n\
void main(void) \n\
{ \n\
ivec2 coords = ivec2(gl_FragCoord.xy); \n\
\n\
// Reset pointers \n\
imageStore(abufferPointerImg, coords, uvec4(EOL)); \n\
\n\
// Discard fragment so nothing is written to the framebuffer \n\
discard; \n\
} \n\
";
static const char *tr_modvol_shader_source = SHADER_HEADER "\
#define MV_MODE %d \n\
#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\
\n\
// Must match ModifierVolumeMode enum values \n\
#define MV_XOR 0 \n\
#define MV_OR 1 \n\
#define MV_INCLUSION 2 \n\
#define MV_EXCLUSION 3 \n\
\n\
void main(void) \n\
{ \n\
#if MV_MODE == MV_XOR || MV_MODE == MV_OR \n\
setFragDepth(); \n\
#endif \n\
ivec2 coords = ivec2(gl_FragCoord.xy); \n\
\n\
uint idx = imageLoad(abufferPointerImg, coords).x; \n\
int list_len = 0; \n\
while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT) \n\
{ \n\
const Pixel pixel = pixels[idx]; \n\
const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\
if (getShadowEnable(pp)) \n\
{ \n\
#if MV_MODE == MV_XOR \n\
if (gl_FragDepth <= pixel.depth) \n\
atomicXor(pixels[idx].seq_num, SHADOW_STENCIL); \n\
#elif MV_MODE == MV_OR \n\
if (gl_FragDepth <= pixel.depth) \n\
atomicOr(pixels[idx].seq_num, SHADOW_STENCIL); \n\
#elif MV_MODE == MV_INCLUSION \n\
uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL)); \n\
if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL) \n\
pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\
#elif MV_MODE == MV_EXCLUSION \n\
uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC)); \n\
if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC) \n\
pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\
#endif \n\
} \n\
idx = pixel.next; \n\
list_len++; \n\
} \n\
\n\
discard; \n\
} \n\
";
void DrawQuad();
void initABuffer()
{
g_imageWidth = screen_width;
g_imageHeight = screen_height;
if (g_imageWidth > 0 && g_imageHeight > 0)
{
if (pixels_pointers == 0)
pixels_pointers = glcache.GenTexture();
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_2D, pixels_pointers);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, g_imageWidth, g_imageHeight, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, 0);
glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI);
glCheck();
}
if (pixels_buffer == 0 )
{
// Create the buffer
glGenBuffers(1, &pixels_buffer);
// Bind it
glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer);
// Declare storage
glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer);
glCheck();
}
if (atomic_buffer == 0 )
{
// Create the buffer
glGenBuffers(1, &atomic_buffer);
// Bind it
glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer);
// Declare storage
glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY);
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer);
GLint zero = 0;
glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), &zero);
glCheck();
}
if (g_abuffer_final_shader.program == 0)
{
char source[16384];
sprintf(source, final_shader_source, 1);
gl4CompilePipelineShader(&g_abuffer_final_shader, source);
}
if (g_abuffer_final_nosort_shader.program == 0)
{
char source[16384];
sprintf(source, final_shader_source, 0);
gl4CompilePipelineShader(&g_abuffer_final_nosort_shader, source);
}
if (g_abuffer_clear_shader.program == 0)
gl4CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source);
if (g_abuffer_tr_modvol_shaders[0].program == 0)
{
char source[16384];
for (int mode = 0; mode < ModeCount; mode++)
{
sprintf(source, tr_modvol_shader_source, mode);
gl4CompilePipelineShader(&g_abuffer_tr_modvol_shaders[mode], source);
}
}
if (g_quadVertexArray == 0)
glGenVertexArrays(1, &g_quadVertexArray);
if (g_quadBuffer == 0)
glGenBuffers(1, &g_quadBuffer);
glCheck();
// Clear A-buffer pointers
glcache.UseProgram(g_abuffer_clear_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_clear_shader);
DrawQuad();
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glCheck();
}
void reshapeABuffer(int w, int h)
{
if (w != g_imageWidth || h != g_imageHeight) {
if (pixels_pointers != 0)
{
glcache.DeleteTextures(1, &pixels_pointers);
pixels_pointers = 0;
}
initABuffer();
}
}
void DrawQuad()
{
glBindVertexArray(g_quadVertexArray);
float xmin = (gl4ShaderUniforms.scale_coefs[2] - 1) / gl4ShaderUniforms.scale_coefs[0];
float xmax = (gl4ShaderUniforms.scale_coefs[2] + 1) / gl4ShaderUniforms.scale_coefs[0];
float ymin = (gl4ShaderUniforms.scale_coefs[3] - 1) / gl4ShaderUniforms.scale_coefs[1];
float ymax = (gl4ShaderUniforms.scale_coefs[3] + 1) / gl4ShaderUniforms.scale_coefs[1];
if (ymin > ymax)
{
float t = ymin;
ymin = ymax;
ymax = t;
}
struct Vertex vertices[] = {
{ xmin, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 },
{ xmin, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 },
{ xmax, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 },
{ xmax, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 },
};
GLushort indices[] = { 0, 1, 2, 1, 3 };
glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck();
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck();
glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck();
glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck();
glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck();
glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck();
glDisableVertexAttribArray(VERTEX_UV1_ARRAY);
glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY);
glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY);
glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck();
}
void DrawTranslucentModVols(int first, int count)
{
if (count == 0 || pvrrc.modtrig.used() == 0)
return;
gl4SetupModvolVBO();
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_STENCIL_TEST);
glCheck();
ModifierVolumeParam* params = &pvrrc.global_param_mvo_tr.head()[first];
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
int mod_base = -1;
for (u32 cmv = 0; cmv < count; cmv++)
{
ModifierVolumeParam& param = params[cmv];
if (param.count == 0)
continue;
u32 mv_mode = param.isp.DepthMode;
verify(param.first >= 0 && param.first + param.count <= pvrrc.modtrig.used());
if (mod_base == -1)
mod_base = param.first;
gl4PipelineShader *shader;
if (!param.isp.VolumeLast && mv_mode > 0)
shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad)
else
shader = &g_abuffer_tr_modvol_shaders[Xor]; // XOR'ing (closed volume)
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
SetCull(param.isp.CullMode); glCheck();
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck();
if (mv_mode == 1 || mv_mode == 2)
{
//Sum the area
shader = &g_abuffer_tr_modvol_shaders[mv_mode == 1 ? Inclusion : Exclusion];
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); glCheck();
mod_base = -1;
}
}
}
void checkOverflowAndReset()
{
// Using atomic counter
GLuint max_pixel_index = 0;
// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index);
//// printf("ABUFFER %d pixels used\n", max_pixel_index);
// if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size)
// {
// GLint64 size;
// glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size);
// if (pixel_buffer_size == size)
// printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index);
// else
// {
// pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size);
//
// printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024);
//
// glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer);
// glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY);
// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer);
// glCheck();
// }
// }
// Reset counter
max_pixel_index = 0;
glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index);
}
void renderABuffer(bool sortFragments)
{
// Render to output FBO
glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_final_shader);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_CULL_FACE);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
DrawQuad();
glCheck();
// Clear A-buffer pointers
glcache.UseProgram(g_abuffer_clear_shader.program);
gl4ShaderUniforms.Set(&g_abuffer_clear_shader);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
DrawQuad();
glActiveTexture(GL_TEXTURE0);
glCheck();
}

314
core/rend/gl4/gl4.h Executable file
View File

@ -0,0 +1,314 @@
#pragma once
#include "rend/gles/gles.h"
#include <map>
extern float gl4_scale_x, gl4_scale_y;
void gl4DrawStrips(GLuint output_fbo);
struct gl4PipelineShader
{
GLuint program;
GLuint scale;
GLuint extra_depth_scale;
GLuint pp_ClipTest,cp_AlphaTestValue;
GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY;
GLuint shade_scale_factor;
GLuint pp_Number;
GLuint blend_mode;
GLuint use_alpha;
GLuint ignore_tex_alpha;
GLuint shading_instr;
GLuint fog_control;
GLuint trilinear_alpha;
GLuint fog_clamp_min, fog_clamp_max;
//
u32 cp_AlphaTest; s32 pp_ClipTestMode;
u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl;
u32 pp_DepthFunc;
int pass;
bool pp_TwoVolumes;
bool pp_Gouraud;
bool pp_BumpMap;
bool fog_clamping;
};
struct gl4_ctx
{
struct
{
GLuint program;
GLuint scale;
GLuint extra_depth_scale;
} modvol_shader;
std::map<int, gl4PipelineShader *> shaders;
struct
{
GLuint program,scale;
GLuint extra_depth_scale;
} OSD_SHADER;
struct
{
GLuint geometry,modvols,idxs,idxs2;
GLuint vao;
GLuint tr_poly_params;
} vbo;
gl4PipelineShader *getShader(int programId) {
gl4PipelineShader *shader = shaders[programId];
if (shader == NULL) {
shader = new gl4PipelineShader();
shaders[programId] = shader;
shader->program = -1;
}
return shader;
}
};
extern gl4_ctx gl4;
extern int screen_width;
extern int screen_height;
GLuint gl4BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt);
void gl4DrawFramebuffer(float w, float h);
int gl4GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode,
u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset,
u32 pp_FogCtrl, bool two_volumes, u32 pp_DepthFunc, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, int pass);
extern const char *gl4PixelPipelineShader;
bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *source = gl4PixelPipelineShader);
extern GLuint stencilTexId;
extern GLuint depthTexId;
extern GLuint opaqueTexId;
extern GLuint depthSaveTexId;
#define SHADER_HEADER "#version 430 \n\
\n\
layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\
struct Pixel { \n\
highp vec4 color; \n\
highp float depth; \n\
uint seq_num; \n\
uint next; \n\
}; \n\
#define EOL 0xFFFFFFFFu \n\
layout (binding = 0, std430) coherent restrict buffer PixelBuffer { \n\
Pixel pixels[]; \n\
}; \n\
layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\
\n\
#define ZERO 0 \n\
#define ONE 1 \n\
#define OTHER_COLOR 2 \n\
#define INVERSE_OTHER_COLOR 3 \n\
#define SRC_ALPHA 4 \n\
#define INVERSE_SRC_ALPHA 5 \n\
#define DST_ALPHA 6 \n\
#define INVERSE_DST_ALPHA 7 \n\
\n\
uint getNextPixelIndex() \n\
{ \n\
uint index = atomicCounterIncrement(buffer_index); \n\
if (index >= pixels.length()) \n\
// Buffer overflow \n\
discard; \n\
\n\
return index; \n\
} \n\
\n\
void setFragDepth(void) \n\
{ \n\
highp float w = 100000.0 * gl_FragCoord.w; \n\
gl_FragDepth = 1.0 - log2(1.0 + w) / 34.0; \n\
} \n\
struct PolyParam { \n\
int first; \n\
int count; \n\
int texid; \n\
int tsp; \n\
int tcw; \n\
int pcw; \n\
int isp; \n\
float zvZ; \n\
int tileclip; \n\
int tsp1; \n\
int tcw1; \n\
int texid1; \n\
}; \n\
layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\
PolyParam tr_poly_params[]; \n\
}; \n\
\n\
#define GET_TSP_FOR_AREA int tsp; if (area1) tsp = pp.tsp1; else tsp = pp.tsp; \n\
\n\
int getSrcBlendFunc(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return (tsp >> 29) & 7; \n\
} \n\
\n\
int getDstBlendFunc(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return (tsp >> 26) & 7; \n\
} \n\
\n\
bool getSrcSelect(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return ((tsp >> 25) & 1) != 0; \n\
} \n\
\n\
bool getDstSelect(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return ((tsp >> 24) & 1) != 0; \n\
} \n\
\n\
int getFogControl(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return (tsp >> 22) & 3; \n\
} \n\
\n\
bool getUseAlpha(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return ((tsp >> 20) & 1) != 0; \n\
} \n\
\n\
bool getIgnoreTexAlpha(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return ((tsp >> 19) & 1) != 0; \n\
} \n\
\n\
int getShadingInstruction(const PolyParam pp, bool area1) \n\
{ \n\
GET_TSP_FOR_AREA \n\
return (tsp >> 6) & 3; \n\
} \n\
\n\
int getDepthFunc(const PolyParam pp) \n\
{ \n\
return (pp.isp >> 29) & 7; \n\
} \n\
\n\
bool getDepthMask(const PolyParam pp) \n\
{ \n\
return ((pp.isp >> 26) & 1) != 1; \n\
} \n\
\n\
bool getShadowEnable(const PolyParam pp) \n\
{ \n\
return ((pp.pcw >> 7) & 1) != 0; \n\
} \n\
\n\
uint getPolyNumber(const Pixel pixel) \n\
{ \n\
return pixel.seq_num & 0x3FFFFFFFu; \n\
} \n\
\n\
#define SHADOW_STENCIL 0x40000000u \n\
#define SHADOW_ACC 0x80000000u \n\
\n\
bool isShadowed(const Pixel pixel) \n\
{ \n\
return (pixel.seq_num & SHADOW_ACC) == SHADOW_ACC; \n\
} \n\
\n\
bool isTwoVolumes(const PolyParam pp) \n\
{ \n\
return pp.tsp1 != -1 || pp.tcw1 != -1; \n\
} \n\
\n\
"
void gl4SetupModvolVBO();
extern struct gl4ShaderUniforms_t
{
float PT_ALPHA;
float scale_coefs[4];
float extra_depth_scale;
float fog_den_float;
float ps_FOG_COL_RAM[3];
float ps_FOG_COL_VERT[3];
int poly_number;
float trilinear_alpha;
TSP tsp0;
TSP tsp1;
TCW tcw0;
TCW tcw1;
float fog_clamp_min[4];
float fog_clamp_max[4];
void setUniformArray(GLuint location, int v0, int v1)
{
int array[] = { v0, v1 };
glUniform1iv(location, 2, array);
}
void Set(gl4PipelineShader* s)
{
if (s->cp_AlphaTestValue!=-1)
glUniform1f(s->cp_AlphaTestValue,PT_ALPHA);
if (s->scale!=-1)
glUniform4fv( s->scale, 1, scale_coefs);
if (s->extra_depth_scale != -1)
glUniform1f(s->extra_depth_scale, extra_depth_scale);
if (s->sp_FOG_DENSITY!=-1)
glUniform1f( s->sp_FOG_DENSITY,fog_den_float);
if (s->sp_FOG_COL_RAM!=-1)
glUniform3fv( s->sp_FOG_COL_RAM, 1, ps_FOG_COL_RAM);
if (s->sp_FOG_COL_VERT!=-1)
glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT);
if (s->shade_scale_factor != -1)
glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f);
if (s->blend_mode != -1) {
u32 blend_mode[] = { tsp0.SrcInstr, tsp0.DstInstr, tsp1.SrcInstr, tsp1.DstInstr };
glUniform2iv(s->blend_mode, 2, (GLint *)blend_mode);
}
if (s->use_alpha != -1)
setUniformArray(s->use_alpha, tsp0.UseAlpha, tsp1.UseAlpha);
if (s->ignore_tex_alpha != -1)
setUniformArray(s->ignore_tex_alpha, tsp0.IgnoreTexA, tsp1.IgnoreTexA);
if (s->shading_instr != -1)
setUniformArray(s->shading_instr, tsp0.ShadInstr, tsp1.ShadInstr);
if (s->fog_control != -1)
setUniformArray(s->fog_control, tsp0.FogCtrl, tsp1.FogCtrl);
if (s->pp_Number != -1)
glUniform1i(s->pp_Number, poly_number);
if (s->trilinear_alpha != -1)
glUniform1f(s->trilinear_alpha, trilinear_alpha);
if (s->fog_clamp_min != -1)
glUniform4fv(s->fog_clamp_min, 1, fog_clamp_min);
if (s->fog_clamp_max != -1)
glUniform4fv(s->fog_clamp_max, 1, fog_clamp_max);
}
} gl4ShaderUniforms;

921
core/rend/gl4/gldraw.cpp Normal file
View File

@ -0,0 +1,921 @@
#include "gl4.h"
#include "rend/gles/glcache.h"
#include "rend/rend.h"
/*
Drawing and related state management
Takes vertex, textures and renders to the currently set up target
*/
#define INVERT_DEPTH_FUNC
const static u32 Zfunction[]=
{
GL_NEVER, //GL_NEVER, //0 Never
#ifndef INVERT_DEPTH_FUNC
GL_LESS, //GL_LESS/*EQUAL*/, //1 Less
GL_EQUAL, //GL_EQUAL, //2 Equal
GL_LEQUAL, //GL_LEQUAL, //3 Less Or Equal
GL_GREATER, //GL_GREATER/*EQUAL*/, //4 Greater
GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal
GL_GEQUAL, //GL_GEQUAL, //6 Greater Or Equal
#else
GL_GREATER, //1 Less
GL_EQUAL, //2 Equal
GL_GEQUAL, //3 Less Or Equal
GL_LESS, //4 Greater
GL_NOTEQUAL, //5 Not Equal
GL_LEQUAL, //6 Greater Or Equal
#endif
GL_ALWAYS, //GL_ALWAYS, //7 Always
};
/*
0 Zero (0, 0, 0, 0)
1 One (1, 1, 1, 1)
2 Other Color (OR, OG, OB, OA)
3 Inverse Other Color (1-OR, 1-OG, 1-OB, 1-OA)
4 SRC Alpha (SA, SA, SA, SA)
5 Inverse SRC Alpha (1-SA, 1-SA, 1-SA, 1-SA)
6 DST Alpha (DA, DA, DA, DA)
7 Inverse DST Alpha (1-DA, 1-DA, 1-DA, 1-DA)
*/
const static u32 DstBlendGL[] =
{
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA
};
const static u32 SrcBlendGL[] =
{
GL_ZERO,
GL_ONE,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA
};
static gl4PipelineShader* CurrentShader;
extern u32 gcflip;
static GLuint geom_fbo;
GLuint stencilTexId;
GLuint opaqueTexId;
GLuint depthTexId;
static GLuint texSamplers[2];
static GLuint depth_fbo;
GLuint depthSaveTexId;
static s32 SetTileClip(u32 val, bool set)
{
if (!settings.rend.Clipping)
return 0;
u32 clipmode=val>>28;
s32 clip_mode;
if (clipmode<2)
{
clip_mode=0; //always passes
}
else if (clipmode&1)
clip_mode=-1; //render stuff outside the region
else
clip_mode=1; //render stuff inside the region
float csx=0,csy=0,cex=0,cey=0;
csx=(float)(val&63);
cex=(float)((val>>6)&63);
csy=(float)((val>>12)&31);
cey=(float)((val>>17)&31);
csx=csx*32;
cex=cex*32 +32;
csy=csy*32;
cey=cey*32 +32;
if (csx <= 0 && csy <= 0 && cex >= 640 && cey >= 480)
return 0;
if (set && clip_mode)
{
if (!pvrrc.isRTT)
{
csx /= gl4_scale_x;
csy /= gl4_scale_y;
cex /= gl4_scale_x;
cey /= gl4_scale_y;
float t = cey;
cey = 480 - csy;
csy = 480 - t;
float dc2s_scale_h = screen_height / 480.0f;
float ds2s_offs_x = (screen_width - dc2s_scale_h * 640) / 2;
csx = csx * dc2s_scale_h + ds2s_offs_x;
cex = cex * dc2s_scale_h + ds2s_offs_x;
csy = csy * dc2s_scale_h;
cey = cey * dc2s_scale_h;
}
else
{
csx *= settings.rend.RenderToTextureUpscale;
csy *= settings.rend.RenderToTextureUpscale;
cex *= settings.rend.RenderToTextureUpscale;
cey *= settings.rend.RenderToTextureUpscale;
}
glUniform4f(CurrentShader->pp_ClipTest, csx, csy, cex, cey);
}
return clip_mode;
}
static void SetTextureRepeatMode(int index, GLuint dir, u32 clamp, u32 mirror)
{
if (clamp)
glSamplerParameteri(texSamplers[index], dir, GL_CLAMP_TO_EDGE);
else
glSamplerParameteri(texSamplers[index], dir, mirror ? GL_MIRRORED_REPEAT : GL_REPEAT);
}
template <u32 Type, bool SortingEnabled>
static void SetGPState(const PolyParam* gp, int pass, u32 cflip=0)
{
if (gp->pcw.Texture && gp->tsp.FilterMode > 1)
{
gl4ShaderUniforms.trilinear_alpha = 0.25 * (gp->tsp.MipMapD & 0x3);
if (gp->tsp.FilterMode == 2)
// Trilinear pass A
gl4ShaderUniforms.trilinear_alpha = 1.0 - gl4ShaderUniforms.trilinear_alpha;
}
else
gl4ShaderUniforms.trilinear_alpha = 1.0;
s32 clipping = SetTileClip(gp->tileclip, false);
int shaderId;
if (pass == 0)
{
shaderId = gl4GetProgramID(Type == ListType_Punch_Through ? 1 : 0,
clipping + 1,
Type == ListType_Punch_Through ? gp->pcw.Texture : 0,
1,
gp->tsp.IgnoreTexA,
0,
0,
2,
false, // TODO Can PT have two different textures for area 0 and 1 ??
0,
false,
false,
false,
pass);
CurrentShader = gl4.getShader(shaderId);
if (CurrentShader->program == -1) {
CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0;
CurrentShader->pp_ClipTestMode = clipping;
CurrentShader->pp_Texture = Type == ListType_Punch_Through ? gp->pcw.Texture : 0;
CurrentShader->pp_UseAlpha = 1;
CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA;
CurrentShader->pp_ShadInstr = 0;
CurrentShader->pp_Offset = 0;
CurrentShader->pp_FogCtrl = 2;
CurrentShader->pp_TwoVolumes = false;
CurrentShader->pp_DepthFunc = 0;
CurrentShader->pp_Gouraud = false;
CurrentShader->pp_BumpMap = false;
CurrentShader->fog_clamping = false;
CurrentShader->pass = pass;
gl4CompilePipelineShader(CurrentShader);
}
}
else
{
// Two volumes mode only supported for OP and PT
bool two_volumes_mode = (gp->tsp1.full != -1) && Type != ListType_Translucent;
bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff);
int depth_func = 0;
if (Type == ListType_Translucent)
{
if (SortingEnabled)
depth_func = 6; // GEQUAL
else
depth_func = gp->isp.DepthMode;
}
shaderId = gl4GetProgramID(Type == ListType_Punch_Through ? 1 : 0,
clipping + 1,
gp->pcw.Texture,
gp->tsp.UseAlpha,
gp->tsp.IgnoreTexA,
gp->tsp.ShadInstr,
gp->pcw.Offset,
gp->tsp.FogCtrl,
two_volumes_mode,
depth_func,
gp->pcw.Gouraud,
gp->tcw.PixelFmt == PixelBumpMap,
color_clamp,
pass);
CurrentShader = gl4.getShader(shaderId);
if (CurrentShader->program == -1) {
CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0;
CurrentShader->pp_ClipTestMode = clipping;
CurrentShader->pp_Texture = gp->pcw.Texture;
CurrentShader->pp_UseAlpha = gp->tsp.UseAlpha;
CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA;
CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr;
CurrentShader->pp_Offset = gp->pcw.Offset;
CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl;
CurrentShader->pp_TwoVolumes = two_volumes_mode;
CurrentShader->pp_DepthFunc = depth_func;
CurrentShader->pp_Gouraud = gp->pcw.Gouraud;
CurrentShader->pp_BumpMap = gp->tcw.PixelFmt == 4;
CurrentShader->fog_clamping = color_clamp;
CurrentShader->pass = pass;
gl4CompilePipelineShader(CurrentShader);
}
}
glcache.UseProgram(CurrentShader->program);
gl4ShaderUniforms.tsp0 = gp->tsp;
gl4ShaderUniforms.tsp1 = gp->tsp1;
gl4ShaderUniforms.tcw0 = gp->tcw;
gl4ShaderUniforms.tcw1 = gp->tcw1;
if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a >0 and <1 alpha?
{
gl4ShaderUniforms.tsp0.SrcInstr = 1;
gl4ShaderUniforms.tsp0.DstInstr = 0;
gl4ShaderUniforms.tsp1.SrcInstr = 1;
gl4ShaderUniforms.tsp1.DstInstr = 0;
}
gl4ShaderUniforms.Set(CurrentShader);
SetTileClip(gp->tileclip,true);
//This bit control which pixels are affected
//by modvols
const u32 stencil=(gp->pcw.Shadow!=0)?0x80:0x0;
glcache.StencilFunc(GL_ALWAYS,stencil,stencil);
if (CurrentShader->pp_Texture)
{
for (int i = 0; i < 2; i++)
{
glActiveTexture(GL_TEXTURE0 + i);
GLuint texid = i == 0 ? gp->texid : gp->texid1;
glBindTexture(GL_TEXTURE_2D, texid == -1 ? 0 : texid);
if (texid != -1)
{
TSP tsp = i == 0 ? gp->tsp : gp->tsp1;
TCW tcw = i == 0 ? gp->tcw : gp->tcw1;
glBindSampler(i, texSamplers[i]);
SetTextureRepeatMode(i, GL_TEXTURE_WRAP_S, tsp.ClampU, tsp.FlipU);
SetTextureRepeatMode(i, GL_TEXTURE_WRAP_T, tsp.ClampV, tsp.FlipV);
//set texture filter mode
if (tsp.FilterMode == 0)
{
//disable filtering, mipmaps
glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_NEAREST);
}
else
{
//bilinear filtering
//PowerVR supports also trilinear via two passes, but we ignore that for now
glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, (tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR);
glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
}
}
glActiveTexture(GL_TEXTURE0);
}
//set cull mode !
//cflip is required when exploding triangles for triangle sorting
//gcflip is global clip flip, needed for when rendering to texture due to mirrored Y direction
SetCull(gp->isp.CullMode^cflip^gcflip);
//set Z mode, only if required
if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled))
{
glcache.DepthFunc(Zfunction[6]); // Greater or equal
}
else
{
glcache.DepthFunc(Zfunction[gp->isp.DepthMode]);
}
// Depth buffer is updated in pass 0 (and also in pass 1 for OP PT)
if (pass < 2)
glcache.DepthMask(!gp->isp.ZWriteDis);
else
glcache.DepthMask(GL_FALSE);
}
template <u32 Type, bool SortingEnabled>
static void DrawList(const List<PolyParam>& gply, int first, int count, int pass)
{
PolyParam* params = &gply.head()[first];
if (count==0)
return;
//we want at least 1 PParam
while(count-->0)
{
if (params->count>2) //this actually happens for some games. No idea why ..
{
if (pass != 0)
{
// No need to draw this one
if (Type == ListType_Translucent && params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1)
{
params++;
continue;
}
}
gl4ShaderUniforms.poly_number = params - gply.head();
SetGPState<Type,SortingEnabled>(params, pass);
glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck();
}
params++;
}
}
//All pixels are in area 0 by default.
//If inside an 'in' volume, they are in area 1
//if inside an 'out' volume, they are in area 0
/*
Stencil bits:
bit 7: mv affected (must be preserved)
bit 1: current volume state
but 0: summary result (starts off as 0)
Lower 2 bits:
IN volume (logical OR):
00 -> 00
01 -> 01
10 -> 01
11 -> 01
Out volume (logical AND):
00 -> 00
01 -> 00
10 -> 00
11 -> 01
*/
static void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc)
{
if (mv_mode == Xor)
{
// set states
glcache.Enable(GL_DEPTH_TEST);
// write only bit 1
glcache.StencilMask(2);
// no stencil testing
glcache.StencilFunc(GL_ALWAYS, 0, 2);
// count the number of pixels in front of the Z buffer (xor zpass)
glcache.StencilOp(GL_KEEP, GL_KEEP, GL_INVERT);
// Cull mode needs to be set
SetCull(ispc.CullMode);
}
else if (mv_mode == Or)
{
// set states
glcache.Enable(GL_DEPTH_TEST);
// write only bit 1
glcache.StencilMask(2);
// no stencil testing
glcache.StencilFunc(GL_ALWAYS, 2, 2);
// Or'ing of all triangles
glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
// Cull mode needs to be set
SetCull(ispc.CullMode);
}
else
{
// Inclusion or Exclusion volume
// no depth test
glcache.Disable(GL_DEPTH_TEST);
// write bits 1:0
glcache.StencilMask(3);
if (mv_mode == Inclusion)
{
// Inclusion volume
//res : old : final
//0 : 0 : 00
//0 : 1 : 01
//1 : 0 : 01
//1 : 1 : 01
// if (1<=st) st=1; else st=0;
glcache.StencilFunc(GL_LEQUAL,1,3);
glcache.StencilOp(GL_ZERO, GL_ZERO, GL_REPLACE);
}
else
{
// Exclusion volume
/*
I've only seen a single game use it, so i guess it doesn't matter ? (Zombie revenge)
(actually, i think there was also another, racing game)
*/
// The initial value for exclusion volumes is 1 so we need to invert the result before and'ing.
//res : old : final
//0 : 0 : 00
//0 : 1 : 01
//1 : 0 : 00
//1 : 1 : 00
// if (1 == st) st = 1; else st = 0;
glcache.StencilFunc(GL_EQUAL, 1, 3);
glcache.StencilOp(GL_ZERO, GL_ZERO, GL_KEEP);
}
}
}
static void SetupMainVBO()
{
glBindVertexArray(gl4.vbo.vao);
glBindBuffer(GL_ARRAY_BUFFER, gl4.vbo.geometry); glCheck();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl4.vbo.idxs); glCheck();
//setup vertex buffers attrib pointers
glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck();
glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck();
glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck();
glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck();
glEnableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_COL_BASE1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, col1)); glCheck();
glEnableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_COL_OFFS1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, spc1)); glCheck();
glEnableVertexAttribArray(VERTEX_UV1_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_UV1_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u1)); glCheck();
}
void gl4SetupModvolVBO()
{
glBindVertexArray(gl4.vbo.vao);
glBindBuffer(GL_ARRAY_BUFFER, gl4.vbo.modvols); glCheck();
//setup vertex buffers attrib pointers
glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck();
glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(float)*3, (void*)0); glCheck();
glDisableVertexAttribArray(VERTEX_UV_ARRAY);
glDisableVertexAttribArray(VERTEX_COL_OFFS_ARRAY);
glDisableVertexAttribArray(VERTEX_COL_BASE_ARRAY);
glDisableVertexAttribArray(VERTEX_UV1_ARRAY);
glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY);
glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY);
}
static void DrawModVols(int first, int count)
{
if (count == 0 || pvrrc.modtrig.used() == 0)
return;
gl4SetupModvolVBO();
glcache.UseProgram(gl4.modvol_shader.program);
glcache.DepthMask(GL_FALSE);
glcache.DepthFunc(Zfunction[4]);
if(0)
{
//simply draw the volumes -- for debugging
SetCull(0);
glDrawArrays(GL_TRIANGLES, first, count * 3);
SetupMainVBO();
}
else
{
//Full emulation
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first];
int mod_base = -1;
for (u32 cmv = 0; cmv < count; cmv++)
{
ModifierVolumeParam& param = params[cmv];
if (param.count == 0)
continue;
u32 mv_mode = param.isp.DepthMode;
if (mod_base == -1)
mod_base = param.first;
if (!param.isp.VolumeLast && mv_mode > 0)
SetMVS_Mode(Or, param.isp); // OR'ing (open volume or quad)
else
SetMVS_Mode(Xor, param.isp); // XOR'ing (closed volume)
glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3);
if (mv_mode == 1 || mv_mode == 2)
{
// Sum the area
SetMVS_Mode(mv_mode == 1 ? Inclusion : Exclusion, param.isp);
glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3);
mod_base = -1;
}
}
SetupMainVBO();
}
//restore states
glcache.Enable(GL_DEPTH_TEST);
glcache.DepthMask(GL_TRUE);
}
void renderABuffer(bool sortFragments);
void DrawTranslucentModVols(int first, int count);
void checkOverflowAndReset();
static GLuint CreateColorFBOTexture()
{
GLuint texId = glcache.GenTexture();
glcache.BindTexture(GL_TEXTURE_2D, texId);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texId, 0);
glCheck();
return texId;
}
static void CreateTextures()
{
stencilTexId = glcache.GenTexture();
glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck();
glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); // OpenGL >= 4.3
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// Using glTexStorage2D instead of glTexImage2D to satisfy requirement GL_TEXTURE_IMMUTABLE_FORMAT=true, needed for glTextureView below
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH32F_STENCIL8, screen_width, screen_height);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck();
glCheck();
opaqueTexId = CreateColorFBOTexture();
depthTexId = glcache.GenTexture();
glTextureView(depthTexId, GL_TEXTURE_2D, stencilTexId, GL_DEPTH32F_STENCIL8, 0, 1, 0, 1);
glCheck();
glcache.BindTexture(GL_TEXTURE_2D, depthTexId);
glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glCheck();
}
void gl4DrawStrips(GLuint output_fbo)
{
checkOverflowAndReset();
if (geom_fbo == 0)
{
glGenFramebuffers(1, &geom_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo);
CreateTextures();
GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
verify(uStatus == GL_FRAMEBUFFER_COMPLETE);
}
else
{
glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo);
if (stencilTexId == 0)
CreateTextures();
}
if (texSamplers[0] == 0)
glGenSamplers(2, texSamplers);
glcache.ClearColor(0, 0, 0, 0);
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glcache.DepthMask(GL_TRUE);
glStencilMask(0xFF);
glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck();
SetupMainVBO();
//Draw the strips !
//We use sampler 0
glActiveTexture(GL_TEXTURE0);
glcache.Disable(GL_BLEND);
glProvokingVertex(GL_LAST_VERTEX_CONVENTION);
RenderPass previous_pass = {0};
int render_pass_count = pvrrc.render_passes.used();
for (int render_pass = 0; render_pass < render_pass_count; render_pass++)
{
const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass];
// Check if we can skip this pass, in part or completely, in case nothing is drawn (Cosmic Smash)
bool skip_op_pt = true;
bool skip_tr = true;
for (int j = previous_pass.op_count; skip_op_pt && j < current_pass.op_count; j++)
{
if (pvrrc.global_param_op.head()[j].count > 2)
skip_op_pt = false;
}
for (int j = previous_pass.pt_count; skip_op_pt && j < current_pass.pt_count; j++)
{
if (pvrrc.global_param_pt.head()[j].count > 2)
skip_op_pt = false;
}
for (int j = previous_pass.tr_count; skip_tr && j < current_pass.tr_count; j++)
{
if (pvrrc.global_param_tr.head()[j].count > 2)
skip_tr = false;
}
if (skip_op_pt && skip_tr)
{
previous_pass = current_pass;
continue;
}
if (!skip_op_pt)
{
//
// PASS 1: Geometry pass to update depth and stencil
//
if (render_pass > 0)
{
// Make a copy of the depth buffer that will be reused in pass 2
if (depth_fbo == 0)
glGenFramebuffers(1, &depth_fbo);
glBindFramebuffer(GL_FRAMEBUFFER, depth_fbo);
if (depthSaveTexId == 0)
{
depthSaveTexId = glcache.GenTexture();
glcache.BindTexture(GL_TEXTURE_2D, depthSaveTexId);
glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); glCheck();
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthSaveTexId, 0); glCheck();
}
GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
verify(uStatus == GL_FRAMEBUFFER_COMPLETE);
glBindFramebuffer(GL_READ_FRAMEBUFFER, geom_fbo);
glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST);
glCheck();
glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo);
}
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glcache.Enable(GL_DEPTH_TEST);
glcache.DepthMask(GL_TRUE);
glcache.Enable(GL_STENCIL_TEST);
glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
DrawList<ListType_Opaque, false>(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0);
DrawList<ListType_Punch_Through, false>(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0);
// Modifier volumes
if (settings.rend.ModifierVolumes)
DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count);
//
// PASS 2: Render OP and PT to fbo
//
if (render_pass == 0)
{
glcache.DepthMask(GL_TRUE);
glClear(GL_DEPTH_BUFFER_BIT);
}
else
{
// Restore the depth buffer from the last render pass
// FIXME This is pretty slow apparently (CS)
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, geom_fbo);
glBindFramebuffer(GL_READ_FRAMEBUFFER, depth_fbo);
glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST);
glCheck();
glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo);
}
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glcache.Disable(GL_STENCIL_TEST);
// Bind stencil buffer for the fragment shader (shadowing)
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_2D, stencilTexId);
glActiveTexture(GL_TEXTURE0);
glCheck();
//Opaque
DrawList<ListType_Opaque, false>(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1);
//Alpha tested
DrawList<ListType_Punch_Through, false>(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1);
// Unbind stencil
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
}
if (!skip_tr)
{
//
// PASS 3: Render TR to a-buffers
//
glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
glcache.Disable(GL_DEPTH_TEST);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, depthTexId);
glActiveTexture(GL_TEXTURE0);
//Alpha blended
if (current_pass.autosort)
DrawList<ListType_Translucent, true>(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more
else
DrawList<ListType_Translucent, false>(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more
glCheck();
// Translucent modifier volumes
if (settings.rend.ModifierVolumes)
DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count);
if (render_pass < render_pass_count - 1)
{
//
// PASS 3b: Geometry pass with TR to update the depth for the next TA render pass
//
// Unbind depth texture
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glcache.Enable(GL_DEPTH_TEST);
if (current_pass.autosort)
DrawList<ListType_Translucent, true>(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0);
else
DrawList<ListType_Translucent, false>(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0);
//
// PASS 3c: Render a-buffer to temporary texture
//
GLuint texId = CreateColorFBOTexture();
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glActiveTexture(GL_TEXTURE0);
glBindSampler(0, 0);
glBindTexture(GL_TEXTURE_2D, opaqueTexId);
renderABuffer(current_pass.autosort);
SetupMainVBO();
glcache.DeleteTextures(1, &opaqueTexId);
opaqueTexId = texId;
glCheck();
}
}
if (!skip_op_pt && render_pass < render_pass_count - 1)
{
// Clear the stencil from this pass
glStencilMask(0xFF);
glClear(GL_STENCIL_BUFFER_BIT);
}
previous_pass = current_pass;
}
//
// PASS 4: Render a-buffers to screen
//
glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); glCheck();
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glActiveTexture(GL_TEXTURE0);
glBindSampler(0, 0);
glBindTexture(GL_TEXTURE_2D, opaqueTexId);
renderABuffer(previous_pass.autosort);
SetupMainVBO();
}
void gl4DrawFramebuffer(float w, float h)
{
struct Vertex vertices[] = {
{ 0, h, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 },
{ 0, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 },
{ w, h, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 },
{ w, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 },
};
GLushort indices[] = { 0, 1, 2, 1, 3 };
glcache.Disable(GL_SCISSOR_TEST);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_STENCIL_TEST);
glcache.Disable(GL_CULL_FACE);
glcache.Disable(GL_BLEND);
gl4ShaderUniforms.trilinear_alpha = 1.0;
int shaderId = gl4GetProgramID(0,
1,
1,
0,
1,
0,
0,
2,
false,
0,
false,
false,
false,
1);
gl4PipelineShader *shader = gl4.getShader(shaderId);
if (shader->program == -1)
{
shader->cp_AlphaTest = 0;
shader->pp_ClipTestMode = 0;
shader->pp_Texture = 1;
shader->pp_UseAlpha = 0;
shader->pp_IgnoreTexA = 1;
shader->pp_ShadInstr = 0;
shader->pp_Offset = 0;
shader->pp_FogCtrl = 2;
shader->pp_TwoVolumes = false;
shader->pp_DepthFunc = 0;
shader->pp_Gouraud = false;
shader->pp_BumpMap = false;
shader->fog_clamping = false;
shader->pass = 1;
gl4CompilePipelineShader(shader);
}
glcache.UseProgram(shader->program);
gl4ShaderUniforms.Set(shader);
glActiveTexture(GL_TEXTURE0);
glcache.BindTexture(GL_TEXTURE_2D, fbTextureId);
SetupMainVBO();
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STREAM_DRAW);
glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, (void *)0);
glcache.DeleteTextures(1, &fbTextureId);
fbTextureId = 0;
glBufferData(GL_ARRAY_BUFFER, pvrrc.verts.bytes(), pvrrc.verts.head(), GL_STREAM_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, pvrrc.idx.bytes(), pvrrc.idx.head(), GL_STREAM_DRAW);
}

1139
core/rend/gl4/gles.cpp Normal file

File diff suppressed because it is too large Load Diff

52
core/rend/gl4/gltex.cpp Normal file
View File

@ -0,0 +1,52 @@
#include "gl4.h"
#include "glcache.h"
GLuint gl4BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
{
FBT& rv=fb_rtt;
if (rv.fbo) glDeleteFramebuffers(1,&rv.fbo);
if (rv.tex) glcache.DeleteTextures(1,&rv.tex);
rv.TexAddr=addy>>3;
// Find the smallest power of two texture that fits the viewport
int fbh2 = 2;
while (fbh2 < fbh)
fbh2 *= 2;
int fbw2 = 2;
while (fbw2 < fbw)
fbw2 *= 2;
if (settings.rend.RenderToTextureUpscale > 1 && !settings.rend.RenderToTextureBuffer)
{
fbw *= settings.rend.RenderToTextureUpscale;
fbh *= settings.rend.RenderToTextureUpscale;
fbw2 *= settings.rend.RenderToTextureUpscale;
fbh2 *= settings.rend.RenderToTextureUpscale;
}
// Get the currently bound frame buffer object. On most platforms this just gives 0.
//glGetIntegerv(GL_FRAMEBUFFER_BINDING, &m_i32OriginalFbo);
// Create a texture for rendering to
rv.tex = glcache.GenTexture();
glcache.BindTexture(GL_TEXTURE_2D, rv.tex);
glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw2, fbh2, 0, channels, fmt, 0);
// Create the object that will allow us to render to the aforementioned texture
glGenFramebuffers(1, &rv.fbo);
glBindFramebuffer(GL_FRAMEBUFFER, rv.fbo);
// Attach the texture to the FBO
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rv.tex, 0);
// Check that our FBO creation was successful
GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
verify(uStatus == GL_FRAMEBUFFER_COMPLETE);
glViewport(0, 0, fbw, fbh); // TODO CLIP_X/Y min?
return rv.fbo;
}

View File

@ -641,16 +641,25 @@ GLuint fogTextureId;
}
int attribs[] =
{
WGL_CONTEXT_MAJOR_VERSION_ARB, 3,
WGL_CONTEXT_MINOR_VERSION_ARB, 1,
{
WGL_CONTEXT_MAJOR_VERSION_ARB, 4,
WGL_CONTEXT_MINOR_VERSION_ARB, 3,
WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB,
WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB,
0
};
};
HGLRC m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs);
if (!m_hrc)
{
printf("Open GL 4.3 not supported\n");
// Try Gl 3.1
attribs[1] = 3;
attribs[3] = 1;
m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs);
}
if (m_hrc)
wglMakeCurrent(ourWindowHandleToDeviceContext,m_hrc);
else
@ -811,6 +820,9 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader)
glBindAttribLocation(program, VERTEX_COL_BASE_ARRAY, "in_base");
glBindAttribLocation(program, VERTEX_COL_OFFS_ARRAY, "in_offs");
glBindAttribLocation(program, VERTEX_UV_ARRAY, "in_uv");
glBindAttribLocation(program, VERTEX_COL_BASE1_ARRAY, "in_base1");
glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1");
glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1");
#ifndef GLES
glBindFragDataLocation(program, 0, "FragColor");
@ -1060,8 +1072,6 @@ bool gl_create_resources()
return true;
}
bool gl_init(void* wind, void* disp);
//swap buffers
void gl_swap();
//destroy the gles context and free resources
@ -1076,7 +1086,6 @@ bool gl_create_resources();
bool gles_init()
{
if (!gl_init((void*)libPvr_GetRenderTarget(),
(void*)libPvr_GetRenderSurface()))
return false;
@ -1092,6 +1101,11 @@ bool gles_init()
#endif
#endif
// glEnable(GL_DEBUG_OUTPUT);
// glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
// glDebugMessageCallback(gl_DebugOutput, NULL);
// glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
//clean up the buffer
glcache.ClearColor(0.f, 0.f, 0.f, 0.f);
glClear(GL_COLOR_BUFFER_BIT);
@ -1354,7 +1368,7 @@ static float LastFPSTime;
static int lastFrameCount = 0;
static float fps = -1;
static void OSD_HOOK()
void OSD_HOOK()
{
osd_base=pvrrc.verts.used();
osd_count=0;

View File

@ -42,6 +42,10 @@
#define VERTEX_COL_BASE_ARRAY 1
#define VERTEX_COL_OFFS_ARRAY 2
#define VERTEX_UV_ARRAY 3
// OIT only
#define VERTEX_COL_BASE1_ARRAY 4
#define VERTEX_COL_OFFS1_ARRAY 5
#define VERTEX_UV1_ARRAY 6
#ifndef GL_UNSIGNED_INT_8_8_8_8
#define GL_UNSIGNED_INT_8_8_8_8 0x8035
@ -121,6 +125,7 @@ struct gl_ctx
extern gl_ctx gl;
extern GLuint fbTextureId;
extern float fb_scale_x, fb_scale_y;
GLuint gl_GetTexture(TSP tsp,TCW tcw);
struct text_info {
@ -130,20 +135,27 @@ struct text_info {
u32 textype; // 0 565, 1 1555, 2 4444
};
bool gl_init(void* wind, void* disp);
void gl_swap();
text_info raw_GetTexture(TSP tsp, TCW tcw);
void CollectCleanup();
void DoCleanup();
void SortPParams(int first, int count);
void SetCull(u32 CullMode);
void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt);
void ReadRTTBuffer();
void RenderFramebuffer();
void DrawFramebuffer(float w, float h);
void OSD_HOOK();
int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode,
u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset,
u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping);
GLuint gl_CompileShader(const char* shader, GLuint type);
GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader);
bool CompilePipelineShader(PipelineShader* s);
#define TEXTURE_LOAD_ERROR 0
GLuint loadPNG(const string& subpath, int &width, int &height);
@ -197,3 +209,13 @@ extern struct ShaderUniforms_t
} ShaderUniforms;
// Render to texture
struct FBT
{
u32 TexAddr;
GLuint depthb,stencilb;
GLuint tex;
GLuint fbo;
};
extern FBT fb_rtt;

View File

@ -573,14 +573,6 @@ typedef map<u64,TextureCacheData>::iterator TexCacheIter;
TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw);
struct FBT
{
u32 TexAddr;
GLuint depthb,stencilb;
GLuint tex;
GLuint fbo;
};
FBT fb_rtt;
void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
@ -594,7 +586,7 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
rv.TexAddr=addy>>3;
// Find the largest square power of two texture that fits into the viewport
// Find the smallest power of two texture that fits into the viewport
int fbh2 = 2;
while (fbh2 < fbh)
fbh2 *= 2;