diff --git a/core/core.mk b/core/core.mk index cc8eb9423..8d2832211 100755 --- a/core/core.mk +++ b/core/core.mk @@ -44,6 +44,9 @@ endif ifndef NO_REND RZDCY_MODULES += rend/gles/ + ifndef USE_GLES + RZDCY_MODULES += rend/gl4/ + endif else RZDCY_MODULES += rend/norend/ endif @@ -78,8 +81,7 @@ RZDCY_FILES += $(foreach dir,$(addprefix $(RZDCY_SRC_DIR)/,$(RZDCY_MODULES)),$(w ifdef FOR_PANDORA RZDCY_CFLAGS := \ - $(CFLAGS) -c -O3 -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps \ - -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules \ + $(CFLAGS) -c -O3 \ -DRELEASE -DPANDORA\ -march=armv7-a -mtune=cortex-a8 -mfpu=neon -mfloat-abi=softfp \ -frename-registers -fsingle-precision-constant -ffast-math \ @@ -89,8 +91,7 @@ RZDCY_CFLAGS := \ else ifdef FOR_ANDROID RZDCY_CFLAGS := \ - $(CFLAGS) -c -O3 -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps \ - -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules \ + $(CFLAGS) -c -O3 \ -D_ANDROID -DRELEASE \ -frename-registers -fsingle-precision-constant -ffast-math \ -ftree-vectorize -fomit-frame-pointer @@ -106,12 +107,13 @@ RZDCY_CFLAGS := \ endif endif else -RZDCY_CFLAGS := \ - -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps \ - -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules +RZDCY_CFLAGS := endif endif +RZDCY_CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/rend/gles -I$(RZDCY_SRC_DIR)/deps \ + -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules + ifdef NO_REC RZDCY_CFLAGS += -DTARGET_NO_REC endif diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index e8aef73f0..bb6c26995 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -103,7 +103,7 @@ void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref = NULL u32 bytes = ctx->tad.End() - ctx->tad.thd_root; - fwrite("TAFRAME3", 1, 8, fw); + fwrite("TAFRAME4", 1, 8, fw); fwrite(&ctx->rend.isRTT, 1, sizeof(ctx->rend.isRTT), fw); u32 zero = 0; @@ -168,10 +168,17 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fread(id0, 1, 8, fw); - if (memcmp(id0, "TAFRAME3", 8) != 0) { + if (memcmp(id0, "TAFRAME", 7) != 0 || (id0[7] != '3' && id0[7] != '4')) { fclose(fw); return 0; } + int sizeofPolyParam = sizeof(PolyParam); + int sizeofVertex = sizeof(Vertex); + if (id0[7] == '3') + { + sizeofPolyParam -= 12; + sizeofVertex -= 16; + } TA_context* ctx = tactx_Alloc(); @@ -184,8 +191,10 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fread(&ctx->rend.fb_X_CLIP.full, 1, sizeof(ctx->rend.fb_X_CLIP.full), fw); fread(&ctx->rend.fb_Y_CLIP.full, 1, sizeof(ctx->rend.fb_Y_CLIP.full), fw); - fread(ctx->rend.global_param_op.Append(), 1, sizeof(PolyParam), fw); - fread(ctx->rend.verts.Append(4), 1, 4 * sizeof(Vertex), fw); + fread(ctx->rend.global_param_op.Append(), 1, sizeofPolyParam, fw); + Vertex *vtx = ctx->rend.verts.Append(4); + for (int i = 0; i < 4; i++) + fread(vtx + i, 1, sizeofVertex, fw); fread(&t, 1, sizeof(t), fw); verify(t == VRAM_SIZE); @@ -500,6 +509,11 @@ bool rend_init() case 2: renderer = rend_softrend(); break; +#endif +#if !defined(GLES) && HOST_OS != OS_DARWIN + case 3: + renderer = rend_GL4(); + break; #endif } diff --git a/core/hw/pvr/Renderer_if.h b/core/hw/pvr/Renderer_if.h index 2c8d037b2..4777d7ef5 100644 --- a/core/hw/pvr/Renderer_if.h +++ b/core/hw/pvr/Renderer_if.h @@ -51,9 +51,10 @@ struct Renderer extern Renderer* renderer; - -Renderer* rend_D3D11(); Renderer* rend_GLES2(); +#if !defined(GLES) && HOST_OS != OS_DARWIN +Renderer* rend_GL4(); +#endif Renderer* rend_norend(); Renderer* rend_softrend(); diff --git a/core/hw/pvr/helper_classes.h b/core/hw/pvr/helper_classes.h index d52153d5b..2e1f01d63 100644 --- a/core/hw/pvr/helper_classes.h +++ b/core/hw/pvr/helper_classes.h @@ -8,6 +8,7 @@ struct List int size; bool* overrun; + const char *list_name; __forceinline int used() const { return size-avail; } __forceinline int bytes() const { return used()* sizeof(T); } @@ -17,6 +18,8 @@ struct List { *overrun |= true; Clear(); + if (list_name != NULL) + printf("List overrun for list %s\n", list_name); return daty; } @@ -45,7 +48,7 @@ struct List T* head() const { return daty-used(); } - void InitBytes(int maxbytes,bool* ovrn) + void InitBytes(int maxbytes,bool* ovrn, const char *name) { maxbytes-=maxbytes%sizeof(T); @@ -58,11 +61,12 @@ struct List overrun=ovrn; Clear(); + list_name = name; } - void Init(int maxsize,bool* ovrn) + void Init(int maxsize,bool* ovrn, const char *name) { - InitBytes(maxsize*sizeof(T),ovrn); + InitBytes(maxsize*sizeof(T),ovrn, name); } void Clear() @@ -76,4 +80,4 @@ struct List Clear(); free(daty); } -}; \ No newline at end of file +}; diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 29b7c6d39..eb8505bf6 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -21,12 +21,4 @@ void ta_vtx_data(u32* data, u32 size); bool ta_parse_vdrc(TA_context* ctx); - -#define STRIPS_AS_PPARAMS 1 #define TRIG_SORT 1 - - -#if TRIG_SORT -#undef STRIPS_AS_PPARAMS -#define STRIPS_AS_PPARAMS 1 -#endif \ No newline at end of file diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index a99348c67..a476b97c7 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -17,6 +17,12 @@ struct Vertex u8 spc[4]; float u,v; + + // Two volumes format + u8 col1[4]; + u8 spc1[4]; + + float u1,v1; }; struct PolyParam @@ -35,6 +41,9 @@ struct PolyParam float zvZ; u32 tileclip; //float zMin,zMax; + TSP tsp1; + TCW tcw1; + u32 texid1; }; struct ModifierVolumeParam @@ -98,6 +107,7 @@ struct RenderPass { u32 mvo_count; u32 pt_count; u32 tr_count; + u32 mvo_tr_count; }; struct rend_context @@ -124,6 +134,7 @@ struct rend_context List idx; List modtrig; List global_param_mvo; + List global_param_mvo_tr; List global_param_op; List global_param_pt; @@ -139,6 +150,7 @@ struct rend_context global_param_tr.Clear(); modtrig.Clear(); global_param_mvo.Clear(); + global_param_mvo_tr.Clear(); render_passes.Clear(); Overrun=false; @@ -189,16 +201,17 @@ struct TA_context { tad.Reset((u8*)OS_aligned_malloc(32, 8*1024*1024)); - rend.verts.InitBytes(2*1024*1024,&rend.Overrun); //up to 2 MB of vtx data/frame = ~ 75k vtx/frame - rend.idx.Init(120*1024,&rend.Overrun); //up to 120K indexes ( idx have stripification overhead ) - rend.global_param_op.Init(4096,&rend.Overrun); - rend.global_param_pt.Init(4096,&rend.Overrun); - rend.global_param_mvo.Init(4096,&rend.Overrun); - rend.global_param_tr.Init(8192,&rend.Overrun); + rend.verts.InitBytes(4 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame + rend.idx.Init(120 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead ) + rend.global_param_op.Init(4096, &rend.Overrun, "global_param_op"); + rend.global_param_pt.Init(4096, &rend.Overrun, "global_param_pt"); + rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo"); + rend.global_param_tr.Init(10240, &rend.Overrun, "global_param_tr"); + rend.global_param_mvo_tr.Init(4096, &rend.Overrun, "global_param_mvo_tr"); - rend.modtrig.Init(8192,&rend.Overrun); + rend.modtrig.Init(16384, &rend.Overrun, "modtrig"); - rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun); // 10 render passes + rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes Reset(); } @@ -222,6 +235,7 @@ struct TA_context rend.global_param_tr.Free(); rend.modtrig.Free(); rend.global_param_mvo.Free(); + rend.global_param_mvo_tr.Free(); rend.render_passes.Free(); } }; diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 3b15230c2..e7268d269 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -85,6 +85,8 @@ List* CurrentPPlist; //TA state vars DECL_ALIGN(4) u8 FaceBaseColor[4]; DECL_ALIGN(4) u8 FaceOffsColor[4]; +DECL_ALIGN(4) u8 FaceBaseColor1[4]; +DECL_ALIGN(4) u8 FaceOffsColor1[4]; DECL_ALIGN(4) u32 SFaceBaseColor; DECL_ALIGN(4) u32 SFaceOffsColor; @@ -769,7 +771,7 @@ public: CurrentPP=&nullPP; CurrentPPlist=0; - if (ListType == ListType_Opaque_Modifier_Volume) + if (ListType == ListType_Opaque_Modifier_Volume || ListType == ListType_Translucent_Modifier_Volume) EndModVol(); } @@ -808,6 +810,9 @@ public: if (d_pp->pcw.Texture) { d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw); } + d_pp->tsp1.full = -1; + d_pp->tcw1.full = -1; + d_pp->texid1 = -1; } } @@ -860,6 +865,11 @@ public: TA_PolyParam3* pp=(TA_PolyParam3*)vpp; glob_param_bdc(pp); + + CurrentPP->tsp1.full = pp->tsp1.full; + CurrentPP->tcw1.full = pp->tcw1.full; + if (pp->pcw.Texture) + CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } __forceinline static void TACALL AppendPolyParam4A(void* vpp) @@ -867,13 +877,19 @@ public: TA_PolyParam4A* pp=(TA_PolyParam4A*)vpp; glob_param_bdc(pp); + + CurrentPP->tsp1.full = pp->tsp1.full; + CurrentPP->tcw1.full = pp->tcw1.full; + if (pp->pcw.Texture) + CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } __forceinline static void TACALL AppendPolyParam4B(void* vpp) { TA_PolyParam4B* pp=(TA_PolyParam4B*)vpp; - poly_float_color(FaceBaseColor,FaceColor0); + poly_float_color(FaceBaseColor, FaceColor0); + poly_float_color(FaceBaseColor1, FaceColor1); } //Poly Strip handling @@ -884,14 +900,6 @@ public: { CurrentPP->count=vdrc.idx.used() - CurrentPP->first; - int vbase=vdrc.verts.used(); - - *vdrc.idx.Append()=vbase-1; - *vdrc.idx.Append()=vbase; - - if (CurrentPP->count&1) - *vdrc.idx.Append()=vbase; -#if STRIPS_AS_PPARAMS if (CurrentPPlist==&vdrc.global_param_tr) { PolyParam* d_pp =CurrentPPlist->Append(); @@ -900,7 +908,16 @@ public: d_pp->first=vdrc.idx.used(); d_pp->count=0; } -#endif + else + { + int vbase=vdrc.verts.used(); + + *vdrc.idx.Append()=vbase-1; + *vdrc.idx.Append()=vbase; + + if (CurrentPP->count&1) + *vdrc.idx.Append()=vbase; + } } @@ -941,6 +958,14 @@ public: cv->u = f16(vtx->u_name);\ cv->v = f16(vtx->v_name); + #define vert_uv1_32(u_name,v_name) \ + cv->u1 = (vtx->u_name);\ + cv->v1 = (vtx->v_name); + + #define vert_uv1_16(u_name,v_name) \ + cv->u1 = f16(vtx->u_name);\ + cv->v1 = f16(vtx->v_name); + //Color conversions #define vert_packed_color_(to,src) \ { \ @@ -984,6 +1009,20 @@ public: cv->spc[2] = FaceOffsColor[2]*satint/256; \ cv->spc[3] = FaceOffsColor[3]; } + #define vert_face_base_color1(baseint) \ + { u32 satint=float_to_satu8(vtx->baseint); \ + cv->col1[0] = FaceBaseColor1[0]*satint/256; \ + cv->col1[1] = FaceBaseColor1[1]*satint/256; \ + cv->col1[2] = FaceBaseColor1[2]*satint/256; \ + cv->col1[3] = FaceBaseColor1[3]; } + + #define vert_face_offs_color1(offsint) \ + { u32 satint=float_to_satu8(vtx->offsint); \ + cv->spc1[0] = FaceOffsColor1[0]*satint/256; \ + cv->spc1[1] = FaceOffsColor1[1]*satint/256; \ + cv->spc1[2] = FaceOffsColor1[2]*satint/256; \ + cv->spc1[3] = FaceOffsColor1[3]; } + //vert_float_color_(cv->spc,FaceOffsColor[3],FaceOffsColor[0]*satint/256,FaceOffsColor[1]*satint/256,FaceOffsColor[2]*satint/256); } @@ -1109,6 +1148,7 @@ public: vert_cvt_base; vert_packed_color(col,BaseCol0); + vert_packed_color(col1, BaseCol1); } //(Non-Textured, Intensity, with Two Volumes) @@ -1118,6 +1158,7 @@ public: vert_cvt_base; vert_face_base_color(BaseInt0); + vert_face_base_color1(BaseInt1); } //(Textured, Packed Color, with Two Volumes) @@ -1136,6 +1177,10 @@ public: { vert_res_base; + vert_packed_color(col1, BaseCol1); + vert_packed_color(spc1, OffsCol1); + + vert_uv1_32(u1, v1); } //(Textured, Packed Color, 16bit UV, with Two Volumes) @@ -1154,6 +1199,10 @@ public: { vert_res_base; + vert_packed_color(col1, BaseCol1); + vert_packed_color(spc1, OffsCol1); + + vert_uv1_16(u1, v1); } //(Textured, Intensity, with Two Volumes) @@ -1172,6 +1221,10 @@ public: { vert_res_base; + vert_face_base_color1(BaseInt1); + vert_face_offs_color1(OffsInt1); + + vert_uv1_32(u1,v1); } //(Textured, Intensity, 16bit UV, with Two Volumes) @@ -1190,6 +1243,10 @@ public: { vert_res_base; + vert_face_base_color1(BaseInt1); + vert_face_offs_color1(OffsInt1); + + vert_uv1_16(u1, v1); } //Sprites @@ -1217,6 +1274,9 @@ public: if (d_pp->pcw.Texture) { d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw); } + d_pp->tcw1.full = -1; + d_pp->tsp1.full = -1; + d_pp->texid1 = -1; SFaceBaseColor=spr->BaseCol; SFaceOffsColor=spr->OffsCol; @@ -1355,7 +1415,7 @@ public: vert[-1].z=vert[0].z; CurrentPP->count+=2; }*/ -#if STRIPS_AS_PPARAMS + if (CurrentPPlist==&vdrc.global_param_tr) { PolyParam* d_pp =CurrentPPlist->Append(); @@ -1364,7 +1424,6 @@ public: d_pp->first=vdrc.idx.used(); d_pp->count=0; } -#endif } // Modifier Volumes Vertex handlers @@ -1374,6 +1433,8 @@ public: List *list = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) list = &vdrc.global_param_mvo; + else if (CurrentList == ListType_Translucent_Modifier_Volume) + list = &vdrc.global_param_mvo_tr; else return; if (list->used() > 0) @@ -1390,6 +1451,8 @@ public: ModifierVolumeParam *p = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) p = vdrc.global_param_mvo.Append(); + else if (CurrentList == ListType_Translucent_Modifier_Volume) + p = vdrc.global_param_mvo_tr.Append(); else return; p->isp.full = param->isp.full; @@ -1399,7 +1462,7 @@ public: __forceinline static void AppendModVolVertexA(TA_ModVolA* mvv) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; lmr=vdrc.modtrig.Append(); @@ -1419,7 +1482,7 @@ public: __forceinline static void AppendModVolVertexB(TA_ModVolB* mvv) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; lmr->y2=mvv->y2; lmr->z2=mvv->z2; @@ -1486,6 +1549,7 @@ bool ta_parse_vdrc(TA_context* ctx) render_pass->mvo_count = vd_rc.global_param_mvo.used(); render_pass->pt_count = vd_rc.global_param_pt.used(); render_pass->tr_count = vd_rc.global_param_tr.used(); + render_pass->mvo_tr_count = vd_rc.global_param_mvo_tr.used(); render_pass->autosort = UsingAutoSort(pass); render_pass->z_clear = ClearZBeforePass(pass); } @@ -1637,6 +1701,9 @@ void FillBGP(TA_context* ctx) bgpp->isp.full=vri(strip_base); bgpp->tsp.full=vri(strip_base+4); bgpp->tcw.full=vri(strip_base+8); + bgpp->tcw1.full = -1; + bgpp->tsp1.full = -1; + bgpp->texid1 = -1; bgpp->count=4; bgpp->first=0; bgpp->tileclip=0;//disabled ! HA ~ diff --git a/core/linux-dist/evdev.cpp b/core/linux-dist/evdev.cpp index 2f9d77f9f..3e9f14a5b 100644 --- a/core/linux-dist/evdev.cpp +++ b/core/linux-dist/evdev.cpp @@ -210,8 +210,8 @@ } } - if (code < 0) - printf("WARNING: %s/%s not configured!\n", section.c_str(), dc_key.c_str()); + //if (code < 0) + // printf("WARNING: %s/%s not configured!\n", section.c_str(), dc_key.c_str()); return code; } diff --git a/core/linux-dist/x11.cpp b/core/linux-dist/x11.cpp index 4e44f761d..7b299452f 100644 --- a/core/linux-dist/x11.cpp +++ b/core/linux-dist/x11.cpp @@ -704,20 +704,30 @@ void x11_window_create() verify(glXCreateContextAttribsARB != 0); int context_attribs[] = { - GLX_CONTEXT_MAJOR_VERSION_ARB, 3, - GLX_CONTEXT_MINOR_VERSION_ARB, 1, + GLX_CONTEXT_MAJOR_VERSION_ARB, 4, + GLX_CONTEXT_MINOR_VERSION_ARB, 3, +#ifndef RELEASE GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB, +#endif GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB, None }; x11_glc = glXCreateContextAttribsARB(x11Display, bestFbc, 0, True, context_attribs); - XSync(x11Display, False); - if (!x11_glc) { - die("Failed to create GL3.1 context\n"); + printf("Open GL 4.3 not supported\n"); + // Try GL 3.1 + context_attribs[1] = 3; + context_attribs[3] = 1; + x11_glc = glXCreateContextAttribsARB(x11Display, bestFbc, 0, True, context_attribs); + if (!x11_glc) + { + die("Open GL 3.1 not supported\n"); + } } + XSync(x11Display, False); + #endif XFlush(x11Display); diff --git a/core/rend/gl4/abuffer.cpp b/core/rend/gl4/abuffer.cpp new file mode 100644 index 000000000..b181a9f4c --- /dev/null +++ b/core/rend/gl4/abuffer.cpp @@ -0,0 +1,546 @@ +/* + * abuffer.cpp + * + * Created on: May 26, 2018 + * Author: raph + */ +#include "gl4.h" +#include "rend/gles/glcache.h" + +GLuint pixels_buffer; +GLuint pixels_pointers; +GLuint atomic_buffer; +gl4PipelineShader g_abuffer_final_shader; +gl4PipelineShader g_abuffer_final_nosort_shader; +gl4PipelineShader g_abuffer_clear_shader; +gl4PipelineShader g_abuffer_tr_modvol_shaders[ModeCount]; +static GLuint g_quadBuffer = 0; +static GLuint g_quadVertexArray = 0; + +static int g_imageWidth = 0; +static int g_imageHeight = 0; + +GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 512 MB + +#define MAX_PIXELS_PER_FRAGMENT "32" + +static const char *final_shader_source = SHADER_HEADER "\ +#define DEPTH_SORTED %d \n\ +#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ + \n\ +layout(binding = 0) uniform sampler2D tex; \n\ +uniform highp float shade_scale_factor; \n\ + \n\ +out vec4 FragColor; \n\ + \n\ +uint pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\ + \n\ + \n\ +int fillAndSortFragmentArray(ivec2 coords) \n\ +{ \n\ + // Load fragments into a local memory array for sorting \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + int count = 0; \n\ + for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++) \n\ + { \n\ + const Pixel p = pixels[idx]; \n\ + int j = count - 1; \n\ + Pixel jp = pixels[pixel_list[j]]; \n\ +#if DEPTH_SORTED == 1 \n\ + while (j >= 0 \n\ + && (jp.depth < p.depth \n\ + || (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p)))) \n\ +#else \n\ + while (j >= 0 && getPolyNumber(jp) > getPolyNumber(p)) \n\ +#endif \n\ + { \n\ + pixel_list[j + 1] = pixel_list[j]; \n\ + j--; \n\ + jp = pixels[pixel_list[j]]; \n\ + } \n\ + pixel_list[j + 1] = idx; \n\ + idx = p.next; \n\ + } \n\ + return count; \n\ +} \n\ + \n\ +// Blend fragments back-to-front \n\ +vec4 resolveAlphaBlend(ivec2 coords) { \n\ + \n\ + // Copy and sort fragments into a local array \n\ + int num_frag = fillAndSortFragmentArray(coords); \n\ + \n\ + vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\ + vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer \n\ + float depth = 1.0; \n\ + \n\ + for (int i = 0; i < num_frag; i++) \n\ + { \n\ + const Pixel pixel = pixels[pixel_list[i]]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ +#if DEPTH_SORTED != 1 \n\ + const float frag_depth = pixel.depth; \n\ + switch (getDepthFunc(pp)) \n\ + { \n\ + case 0: // Never \n\ + continue; \n\ + case 1: // Greater \n\ + if (frag_depth <= depth) \n\ + continue; \n\ + break; \n\ + case 2: // Equal \n\ + if (frag_depth != depth) \n\ + continue; \n\ + break; \n\ + case 3: // Greater or equal \n\ + if (frag_depth < depth) \n\ + continue; \n\ + break; \n\ + case 4: // Less \n\ + if (frag_depth >= depth) \n\ + continue; \n\ + break; \n\ + case 5: // Not equal \n\ + if (frag_depth == depth) \n\ + continue; \n\ + break; \n\ + case 6: // Less or equal \n\ + if (frag_depth > depth) \n\ + continue; \n\ + break; \n\ + case 7: // Always \n\ + break; \n\ + } \n\ + \n\ + if (getDepthMask(pp)) \n\ + depth = frag_depth; \n\ +#endif \n\ + bool area1 = false; \n\ + bool shadowed = false; \n\ + if (isShadowed(pixel)) \n\ + { \n\ + if (isTwoVolumes(pp)) \n\ + area1 = true; \n\ + else \n\ + shadowed = true; \n\ + } \n\ + vec4 srcColor; \n\ + if (getSrcSelect(pp, area1)) \n\ + srcColor = secondaryBuffer; \n\ + else \n\ + { \n\ + srcColor = pixel.color; \n\ + if (shadowed) \n\ + srcColor.rgb *= shade_scale_factor; \n\ + } \n\ + vec4 dstColor = getDstSelect(pp, area1) ? secondaryBuffer : finalColor; \n\ + vec4 srcCoef; \n\ + vec4 dstCoef; \n\ + \n\ + int srcBlend = getSrcBlendFunc(pp, area1); \n\ + switch (srcBlend) \n\ + { \n\ + case ZERO: \n\ + srcCoef = vec4(0.0); \n\ + break; \n\ + case ONE: \n\ + srcCoef = vec4(1.0); \n\ + break; \n\ + case OTHER_COLOR: \n\ + srcCoef = finalColor; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + srcCoef = vec4(1.0) - dstColor; \n\ + break; \n\ + case SRC_ALPHA: \n\ + srcCoef = vec4(srcColor.a); \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + srcCoef = vec4(1.0 - srcColor.a); \n\ + break; \n\ + case DST_ALPHA: \n\ + srcCoef = vec4(dstColor.a); \n\ + break; \n\ + case INVERSE_DST_ALPHA: \n\ + srcCoef = vec4(1.0 - dstColor.a); \n\ + break; \n\ + } \n\ + int dstBlend = getDstBlendFunc(pp, area1); \n\ + switch (dstBlend) \n\ + { \n\ + case ZERO: \n\ + dstCoef = vec4(0.0); \n\ + break; \n\ + case ONE: \n\ + dstCoef = vec4(1.0); \n\ + break; \n\ + case OTHER_COLOR: \n\ + dstCoef = srcColor; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + dstCoef = vec4(1.0) - srcColor; \n\ + break; \n\ + case SRC_ALPHA: \n\ + dstCoef = vec4(srcColor.a); \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + dstCoef = vec4(1.0 - srcColor.a); \n\ + break; \n\ + case DST_ALPHA: \n\ + dstCoef = vec4(dstColor.a); \n\ + break; \n\ + case INVERSE_DST_ALPHA: \n\ + dstCoef = vec4(1.0 - dstColor.a); \n\ + break; \n\ + } \n\ + const vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ + if (getDstSelect(pp, area1)) \n\ + secondaryBuffer = result; \n\ + else \n\ + finalColor = result; \n\ + } \n\ + \n\ + return finalColor; \n\ + \n\ +} \n\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + // Compute and output final color for the frame buffer \n\ + // Visualize the number of layers in use \n\ + //FragColor = vec4(float(fillFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT, 0, 0, 1); \n\ + FragColor = resolveAlphaBlend(coords); \n\ +} \n\ +"; + +static const char *clear_shader_source = SHADER_HEADER "\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + // Reset pointers \n\ + imageStore(abufferPointerImg, coords, uvec4(EOL)); \n\ + \n\ + // Discard fragment so nothing is written to the framebuffer \n\ + discard; \n\ +} \n\ +"; + +static const char *tr_modvol_shader_source = SHADER_HEADER "\ +#define MV_MODE %d \n\ +#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ + \n\ +// Must match ModifierVolumeMode enum values \n\ +#define MV_XOR 0 \n\ +#define MV_OR 1 \n\ +#define MV_INCLUSION 2 \n\ +#define MV_EXCLUSION 3 \n\ + \n\ +void main(void) \n\ +{ \n\ +#if MV_MODE == MV_XOR || MV_MODE == MV_OR \n\ + setFragDepth(); \n\ +#endif \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + int list_len = 0; \n\ + while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT) \n\ + { \n\ + const Pixel pixel = pixels[idx]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ + if (getShadowEnable(pp)) \n\ + { \n\ +#if MV_MODE == MV_XOR \n\ + if (gl_FragDepth <= pixel.depth) \n\ + atomicXor(pixels[idx].seq_num, SHADOW_STENCIL); \n\ +#elif MV_MODE == MV_OR \n\ + if (gl_FragDepth <= pixel.depth) \n\ + atomicOr(pixels[idx].seq_num, SHADOW_STENCIL); \n\ +#elif MV_MODE == MV_INCLUSION \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL)); \n\ + if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL) \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\ +#elif MV_MODE == MV_EXCLUSION \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC)); \n\ + if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC) \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\ +#endif \n\ + } \n\ + idx = pixel.next; \n\ + list_len++; \n\ + } \n\ + \n\ + discard; \n\ +} \n\ +"; + +void DrawQuad(); + +void initABuffer() +{ + g_imageWidth = screen_width; + g_imageHeight = screen_height; + + if (g_imageWidth > 0 && g_imageHeight > 0) + { + if (pixels_pointers == 0) + pixels_pointers = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, g_imageWidth, g_imageHeight, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); + glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); + glCheck(); + } + + if (pixels_buffer == 0 ) + { + // Create the buffer + glGenBuffers(1, &pixels_buffer); + // Bind it + glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); + // Declare storage + glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); + glCheck(); + } + + if (atomic_buffer == 0 ) + { + // Create the buffer + glGenBuffers(1, &atomic_buffer); + // Bind it + glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); + // Declare storage + glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); + GLint zero = 0; + glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), &zero); + glCheck(); + } + + if (g_abuffer_final_shader.program == 0) + { + char source[16384]; + sprintf(source, final_shader_source, 1); + gl4CompilePipelineShader(&g_abuffer_final_shader, source); + } + if (g_abuffer_final_nosort_shader.program == 0) + { + char source[16384]; + sprintf(source, final_shader_source, 0); + gl4CompilePipelineShader(&g_abuffer_final_nosort_shader, source); + } + if (g_abuffer_clear_shader.program == 0) + gl4CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); + if (g_abuffer_tr_modvol_shaders[0].program == 0) + { + char source[16384]; + for (int mode = 0; mode < ModeCount; mode++) + { + sprintf(source, tr_modvol_shader_source, mode); + gl4CompilePipelineShader(&g_abuffer_tr_modvol_shaders[mode], source); + } + } + + if (g_quadVertexArray == 0) + glGenVertexArrays(1, &g_quadVertexArray); + if (g_quadBuffer == 0) + glGenBuffers(1, &g_quadBuffer); + + glCheck(); + + // Clear A-buffer pointers + glcache.UseProgram(g_abuffer_clear_shader.program); + gl4ShaderUniforms.Set(&g_abuffer_clear_shader); + + DrawQuad(); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + + glCheck(); +} + +void reshapeABuffer(int w, int h) +{ + if (w != g_imageWidth || h != g_imageHeight) { + if (pixels_pointers != 0) + { + glcache.DeleteTextures(1, &pixels_pointers); + pixels_pointers = 0; + } + + initABuffer(); + } +} + +void DrawQuad() +{ + glBindVertexArray(g_quadVertexArray); + + float xmin = (gl4ShaderUniforms.scale_coefs[2] - 1) / gl4ShaderUniforms.scale_coefs[0]; + float xmax = (gl4ShaderUniforms.scale_coefs[2] + 1) / gl4ShaderUniforms.scale_coefs[0]; + float ymin = (gl4ShaderUniforms.scale_coefs[3] - 1) / gl4ShaderUniforms.scale_coefs[1]; + float ymax = (gl4ShaderUniforms.scale_coefs[3] + 1) / gl4ShaderUniforms.scale_coefs[1]; + if (ymin > ymax) + { + float t = ymin; + ymin = ymax; + ymax = t; + } + struct Vertex vertices[] = { + { xmin, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { xmin, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { xmax, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { xmax, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + }; + GLushort indices[] = { 0, 1, 2, 1, 3 }; + + glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck(); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck(); + + glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glDisableVertexAttribArray(VERTEX_UV1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); + + glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); +} + +void DrawTranslucentModVols(int first, int count) +{ + if (count == 0 || pvrrc.modtrig.used() == 0) + return; + gl4SetupModvolVBO(); + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, 0); + + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_STENCIL_TEST); + + glCheck(); + + ModifierVolumeParam* params = &pvrrc.global_param_mvo_tr.head()[first]; + + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); + + int mod_base = -1; + + for (u32 cmv = 0; cmv < count; cmv++) + { + ModifierVolumeParam& param = params[cmv]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + verify(param.first >= 0 && param.first + param.count <= pvrrc.modtrig.used()); + + if (mod_base == -1) + mod_base = param.first; + + gl4PipelineShader *shader; + if (!param.isp.VolumeLast && mv_mode > 0) + shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad) + else + shader = &g_abuffer_tr_modvol_shaders[Xor]; // XOR'ing (closed volume) + glcache.UseProgram(shader->program); + gl4ShaderUniforms.Set(shader); + + SetCull(param.isp.CullMode); glCheck(); + + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + + glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck(); + + if (mv_mode == 1 || mv_mode == 2) + { + //Sum the area + shader = &g_abuffer_tr_modvol_shaders[mv_mode == 1 ? Inclusion : Exclusion]; + glcache.UseProgram(shader->program); + gl4ShaderUniforms.Set(shader); + + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); glCheck(); + mod_base = -1; + } + } +} + +void checkOverflowAndReset() +{ + // Using atomic counter + GLuint max_pixel_index = 0; +// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index); +//// printf("ABUFFER %d pixels used\n", max_pixel_index); +// if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size) +// { +// GLint64 size; +// glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size); +// if (pixel_buffer_size == size) +// printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index); +// else +// { +// pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size); +// +// printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024); +// +// glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); +// glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); +// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); +// glCheck(); +// } +// } + // Reset counter + max_pixel_index = 0; + glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index); +} + +void renderABuffer(bool sortFragments) +{ + // Render to output FBO + glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); + gl4ShaderUniforms.Set(&g_abuffer_final_shader); + + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_CULL_FACE); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); + + DrawQuad(); + + glCheck(); + + // Clear A-buffer pointers + glcache.UseProgram(g_abuffer_clear_shader.program); + gl4ShaderUniforms.Set(&g_abuffer_clear_shader); + + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(); + + glActiveTexture(GL_TEXTURE0); + + glCheck(); +} diff --git a/core/rend/gl4/gl4.h b/core/rend/gl4/gl4.h new file mode 100755 index 000000000..66e042daa --- /dev/null +++ b/core/rend/gl4/gl4.h @@ -0,0 +1,314 @@ +#pragma once +#include "rend/gles/gles.h" +#include + +extern float gl4_scale_x, gl4_scale_y; + +void gl4DrawStrips(GLuint output_fbo); + +struct gl4PipelineShader +{ + GLuint program; + + GLuint scale; + GLuint extra_depth_scale; + GLuint pp_ClipTest,cp_AlphaTestValue; + GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY; + GLuint shade_scale_factor; + GLuint pp_Number; + GLuint blend_mode; + GLuint use_alpha; + GLuint ignore_tex_alpha; + GLuint shading_instr; + GLuint fog_control; + GLuint trilinear_alpha; + GLuint fog_clamp_min, fog_clamp_max; + + // + u32 cp_AlphaTest; s32 pp_ClipTestMode; + u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl; + u32 pp_DepthFunc; + int pass; + bool pp_TwoVolumes; + bool pp_Gouraud; + bool pp_BumpMap; + bool fog_clamping; +}; + + +struct gl4_ctx +{ + struct + { + GLuint program; + + GLuint scale; + GLuint extra_depth_scale; + } modvol_shader; + + std::map shaders; + struct + { + GLuint program,scale; + GLuint extra_depth_scale; + } OSD_SHADER; + + struct + { + GLuint geometry,modvols,idxs,idxs2; + GLuint vao; + GLuint tr_poly_params; + } vbo; + + gl4PipelineShader *getShader(int programId) { + gl4PipelineShader *shader = shaders[programId]; + if (shader == NULL) { + shader = new gl4PipelineShader(); + shaders[programId] = shader; + shader->program = -1; + } + return shader; + } +}; + +extern gl4_ctx gl4; + +extern int screen_width; +extern int screen_height; + +GLuint gl4BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); +void gl4DrawFramebuffer(float w, float h); + +int gl4GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, + u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, + u32 pp_FogCtrl, bool two_volumes, u32 pp_DepthFunc, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, int pass); + +extern const char *gl4PixelPipelineShader; +bool gl4CompilePipelineShader(gl4PipelineShader* s, const char *source = gl4PixelPipelineShader); + +extern GLuint stencilTexId; +extern GLuint depthTexId; +extern GLuint opaqueTexId; +extern GLuint depthSaveTexId; + +#define SHADER_HEADER "#version 430 \n\ +\n\ +layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ +struct Pixel { \n\ + highp vec4 color; \n\ + highp float depth; \n\ + uint seq_num; \n\ + uint next; \n\ +}; \n\ +#define EOL 0xFFFFFFFFu \n\ +layout (binding = 0, std430) coherent restrict buffer PixelBuffer { \n\ + Pixel pixels[]; \n\ +}; \n\ +layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\ +\n\ +#define ZERO 0 \n\ +#define ONE 1 \n\ +#define OTHER_COLOR 2 \n\ +#define INVERSE_OTHER_COLOR 3 \n\ +#define SRC_ALPHA 4 \n\ +#define INVERSE_SRC_ALPHA 5 \n\ +#define DST_ALPHA 6 \n\ +#define INVERSE_DST_ALPHA 7 \n\ + \n\ +uint getNextPixelIndex() \n\ +{ \n\ + uint index = atomicCounterIncrement(buffer_index); \n\ + if (index >= pixels.length()) \n\ + // Buffer overflow \n\ + discard; \n\ + \n\ + return index; \n\ +} \n\ +\n\ +void setFragDepth(void) \n\ +{ \n\ + highp float w = 100000.0 * gl_FragCoord.w; \n\ + gl_FragDepth = 1.0 - log2(1.0 + w) / 34.0; \n\ +} \n\ +struct PolyParam { \n\ + int first; \n\ + int count; \n\ + int texid; \n\ + int tsp; \n\ + int tcw; \n\ + int pcw; \n\ + int isp; \n\ + float zvZ; \n\ + int tileclip; \n\ + int tsp1; \n\ + int tcw1; \n\ + int texid1; \n\ +}; \n\ +layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\ + PolyParam tr_poly_params[]; \n\ +}; \n\ + \n\ +#define GET_TSP_FOR_AREA int tsp; if (area1) tsp = pp.tsp1; else tsp = pp.tsp; \n\ + \n\ +int getSrcBlendFunc(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 29) & 7; \n\ +} \n\ +\n\ +int getDstBlendFunc(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 26) & 7; \n\ +} \n\ +\n\ +bool getSrcSelect(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 25) & 1) != 0; \n\ +} \n\ +\n\ +bool getDstSelect(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 24) & 1) != 0; \n\ +} \n\ +\n\ +int getFogControl(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 22) & 3; \n\ +} \n\ +\n\ +bool getUseAlpha(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 20) & 1) != 0; \n\ +} \n\ +\n\ +bool getIgnoreTexAlpha(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 19) & 1) != 0; \n\ +} \n\ +\n\ +int getShadingInstruction(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 6) & 3; \n\ +} \n\ +\n\ +int getDepthFunc(const PolyParam pp) \n\ +{ \n\ + return (pp.isp >> 29) & 7; \n\ +} \n\ +\n\ +bool getDepthMask(const PolyParam pp) \n\ +{ \n\ + return ((pp.isp >> 26) & 1) != 1; \n\ +} \n\ +\n\ +bool getShadowEnable(const PolyParam pp) \n\ +{ \n\ + return ((pp.pcw >> 7) & 1) != 0; \n\ +} \n\ +\n\ +uint getPolyNumber(const Pixel pixel) \n\ +{ \n\ + return pixel.seq_num & 0x3FFFFFFFu; \n\ +} \n\ +\n\ +#define SHADOW_STENCIL 0x40000000u \n\ +#define SHADOW_ACC 0x80000000u \n\ +\n\ +bool isShadowed(const Pixel pixel) \n\ +{ \n\ + return (pixel.seq_num & SHADOW_ACC) == SHADOW_ACC; \n\ +} \n\ +\n\ +bool isTwoVolumes(const PolyParam pp) \n\ +{ \n\ + return pp.tsp1 != -1 || pp.tcw1 != -1; \n\ +} \n\ + \n\ +" + +void gl4SetupModvolVBO(); + +extern struct gl4ShaderUniforms_t +{ + float PT_ALPHA; + float scale_coefs[4]; + float extra_depth_scale; + float fog_den_float; + float ps_FOG_COL_RAM[3]; + float ps_FOG_COL_VERT[3]; + int poly_number; + float trilinear_alpha; + TSP tsp0; + TSP tsp1; + TCW tcw0; + TCW tcw1; + float fog_clamp_min[4]; + float fog_clamp_max[4]; + + void setUniformArray(GLuint location, int v0, int v1) + { + int array[] = { v0, v1 }; + glUniform1iv(location, 2, array); + } + + void Set(gl4PipelineShader* s) + { + if (s->cp_AlphaTestValue!=-1) + glUniform1f(s->cp_AlphaTestValue,PT_ALPHA); + + if (s->scale!=-1) + glUniform4fv( s->scale, 1, scale_coefs); + + if (s->extra_depth_scale != -1) + glUniform1f(s->extra_depth_scale, extra_depth_scale); + + if (s->sp_FOG_DENSITY!=-1) + glUniform1f( s->sp_FOG_DENSITY,fog_den_float); + + if (s->sp_FOG_COL_RAM!=-1) + glUniform3fv( s->sp_FOG_COL_RAM, 1, ps_FOG_COL_RAM); + + if (s->sp_FOG_COL_VERT!=-1) + glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT); + + if (s->shade_scale_factor != -1) + glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); + + if (s->blend_mode != -1) { + u32 blend_mode[] = { tsp0.SrcInstr, tsp0.DstInstr, tsp1.SrcInstr, tsp1.DstInstr }; + glUniform2iv(s->blend_mode, 2, (GLint *)blend_mode); + } + + if (s->use_alpha != -1) + setUniformArray(s->use_alpha, tsp0.UseAlpha, tsp1.UseAlpha); + + if (s->ignore_tex_alpha != -1) + setUniformArray(s->ignore_tex_alpha, tsp0.IgnoreTexA, tsp1.IgnoreTexA); + + if (s->shading_instr != -1) + setUniformArray(s->shading_instr, tsp0.ShadInstr, tsp1.ShadInstr); + + if (s->fog_control != -1) + setUniformArray(s->fog_control, tsp0.FogCtrl, tsp1.FogCtrl); + + if (s->pp_Number != -1) + glUniform1i(s->pp_Number, poly_number); + + if (s->trilinear_alpha != -1) + glUniform1f(s->trilinear_alpha, trilinear_alpha); + + if (s->fog_clamp_min != -1) + glUniform4fv(s->fog_clamp_min, 1, fog_clamp_min); + if (s->fog_clamp_max != -1) + glUniform4fv(s->fog_clamp_max, 1, fog_clamp_max); + } + +} gl4ShaderUniforms; + diff --git a/core/rend/gl4/gldraw.cpp b/core/rend/gl4/gldraw.cpp new file mode 100644 index 000000000..15f8dc876 --- /dev/null +++ b/core/rend/gl4/gldraw.cpp @@ -0,0 +1,921 @@ +#include "gl4.h" +#include "rend/gles/glcache.h" +#include "rend/rend.h" + +/* + +Drawing and related state management +Takes vertex, textures and renders to the currently set up target + + + + +*/ + +#define INVERT_DEPTH_FUNC +const static u32 Zfunction[]= +{ + GL_NEVER, //GL_NEVER, //0 Never +#ifndef INVERT_DEPTH_FUNC + GL_LESS, //GL_LESS/*EQUAL*/, //1 Less + GL_EQUAL, //GL_EQUAL, //2 Equal + GL_LEQUAL, //GL_LEQUAL, //3 Less Or Equal + GL_GREATER, //GL_GREATER/*EQUAL*/, //4 Greater + GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal + GL_GEQUAL, //GL_GEQUAL, //6 Greater Or Equal +#else + GL_GREATER, //1 Less + GL_EQUAL, //2 Equal + GL_GEQUAL, //3 Less Or Equal + GL_LESS, //4 Greater + GL_NOTEQUAL, //5 Not Equal + GL_LEQUAL, //6 Greater Or Equal +#endif + GL_ALWAYS, //GL_ALWAYS, //7 Always +}; + +/* +0 Zero (0, 0, 0, 0) +1 One (1, 1, 1, 1) +2 Other Color (OR, OG, OB, OA) +3 Inverse Other Color (1-OR, 1-OG, 1-OB, 1-OA) +4 SRC Alpha (SA, SA, SA, SA) +5 Inverse SRC Alpha (1-SA, 1-SA, 1-SA, 1-SA) +6 DST Alpha (DA, DA, DA, DA) +7 Inverse DST Alpha (1-DA, 1-DA, 1-DA, 1-DA) +*/ + +const static u32 DstBlendGL[] = +{ + GL_ZERO, + GL_ONE, + GL_SRC_COLOR, + GL_ONE_MINUS_SRC_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA +}; + +const static u32 SrcBlendGL[] = +{ + GL_ZERO, + GL_ONE, + GL_DST_COLOR, + GL_ONE_MINUS_DST_COLOR, + GL_SRC_ALPHA, + GL_ONE_MINUS_SRC_ALPHA, + GL_DST_ALPHA, + GL_ONE_MINUS_DST_ALPHA +}; + +static gl4PipelineShader* CurrentShader; +extern u32 gcflip; +static GLuint geom_fbo; +GLuint stencilTexId; +GLuint opaqueTexId; +GLuint depthTexId; +static GLuint texSamplers[2]; +static GLuint depth_fbo; +GLuint depthSaveTexId; + +static s32 SetTileClip(u32 val, bool set) +{ + if (!settings.rend.Clipping) + return 0; + + u32 clipmode=val>>28; + s32 clip_mode; + if (clipmode<2) + { + clip_mode=0; //always passes + } + else if (clipmode&1) + clip_mode=-1; //render stuff outside the region + else + clip_mode=1; //render stuff inside the region + + float csx=0,csy=0,cex=0,cey=0; + + + csx=(float)(val&63); + cex=(float)((val>>6)&63); + csy=(float)((val>>12)&31); + cey=(float)((val>>17)&31); + csx=csx*32; + cex=cex*32 +32; + csy=csy*32; + cey=cey*32 +32; + + if (csx <= 0 && csy <= 0 && cex >= 640 && cey >= 480) + return 0; + + if (set && clip_mode) + { + if (!pvrrc.isRTT) + { + csx /= gl4_scale_x; + csy /= gl4_scale_y; + cex /= gl4_scale_x; + cey /= gl4_scale_y; + float t = cey; + cey = 480 - csy; + csy = 480 - t; + float dc2s_scale_h = screen_height / 480.0f; + float ds2s_offs_x = (screen_width - dc2s_scale_h * 640) / 2; + csx = csx * dc2s_scale_h + ds2s_offs_x; + cex = cex * dc2s_scale_h + ds2s_offs_x; + csy = csy * dc2s_scale_h; + cey = cey * dc2s_scale_h; + } + else + { + csx *= settings.rend.RenderToTextureUpscale; + csy *= settings.rend.RenderToTextureUpscale; + cex *= settings.rend.RenderToTextureUpscale; + cey *= settings.rend.RenderToTextureUpscale; + } + glUniform4f(CurrentShader->pp_ClipTest, csx, csy, cex, cey); + } + + return clip_mode; +} + +static void SetTextureRepeatMode(int index, GLuint dir, u32 clamp, u32 mirror) +{ + if (clamp) + glSamplerParameteri(texSamplers[index], dir, GL_CLAMP_TO_EDGE); + else + glSamplerParameteri(texSamplers[index], dir, mirror ? GL_MIRRORED_REPEAT : GL_REPEAT); +} + +template + static void SetGPState(const PolyParam* gp, int pass, u32 cflip=0) +{ + if (gp->pcw.Texture && gp->tsp.FilterMode > 1) + { + gl4ShaderUniforms.trilinear_alpha = 0.25 * (gp->tsp.MipMapD & 0x3); + if (gp->tsp.FilterMode == 2) + // Trilinear pass A + gl4ShaderUniforms.trilinear_alpha = 1.0 - gl4ShaderUniforms.trilinear_alpha; + } + else + gl4ShaderUniforms.trilinear_alpha = 1.0; + + s32 clipping = SetTileClip(gp->tileclip, false); + int shaderId; + + if (pass == 0) + { + shaderId = gl4GetProgramID(Type == ListType_Punch_Through ? 1 : 0, + clipping + 1, + Type == ListType_Punch_Through ? gp->pcw.Texture : 0, + 1, + gp->tsp.IgnoreTexA, + 0, + 0, + 2, + false, // TODO Can PT have two different textures for area 0 and 1 ?? + 0, + false, + false, + false, + pass); + CurrentShader = gl4.getShader(shaderId); + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = clipping; + CurrentShader->pp_Texture = Type == ListType_Punch_Through ? gp->pcw.Texture : 0; + CurrentShader->pp_UseAlpha = 1; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = 0; + CurrentShader->pp_Offset = 0; + CurrentShader->pp_FogCtrl = 2; + CurrentShader->pp_TwoVolumes = false; + CurrentShader->pp_DepthFunc = 0; + CurrentShader->pp_Gouraud = false; + CurrentShader->pp_BumpMap = false; + CurrentShader->fog_clamping = false; + CurrentShader->pass = pass; + gl4CompilePipelineShader(CurrentShader); + } + } + else + { + // Two volumes mode only supported for OP and PT + bool two_volumes_mode = (gp->tsp1.full != -1) && Type != ListType_Translucent; + bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff); + + int depth_func = 0; + if (Type == ListType_Translucent) + { + if (SortingEnabled) + depth_func = 6; // GEQUAL + else + depth_func = gp->isp.DepthMode; + } + + shaderId = gl4GetProgramID(Type == ListType_Punch_Through ? 1 : 0, + clipping + 1, + gp->pcw.Texture, + gp->tsp.UseAlpha, + gp->tsp.IgnoreTexA, + gp->tsp.ShadInstr, + gp->pcw.Offset, + gp->tsp.FogCtrl, + two_volumes_mode, + depth_func, + gp->pcw.Gouraud, + gp->tcw.PixelFmt == PixelBumpMap, + color_clamp, + pass); + CurrentShader = gl4.getShader(shaderId); + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = clipping; + CurrentShader->pp_Texture = gp->pcw.Texture; + CurrentShader->pp_UseAlpha = gp->tsp.UseAlpha; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; + CurrentShader->pp_Offset = gp->pcw.Offset; + CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; + CurrentShader->pp_TwoVolumes = two_volumes_mode; + CurrentShader->pp_DepthFunc = depth_func; + CurrentShader->pp_Gouraud = gp->pcw.Gouraud; + CurrentShader->pp_BumpMap = gp->tcw.PixelFmt == 4; + CurrentShader->fog_clamping = color_clamp; + CurrentShader->pass = pass; + gl4CompilePipelineShader(CurrentShader); + } + } + + glcache.UseProgram(CurrentShader->program); + + gl4ShaderUniforms.tsp0 = gp->tsp; + gl4ShaderUniforms.tsp1 = gp->tsp1; + gl4ShaderUniforms.tcw0 = gp->tcw; + gl4ShaderUniforms.tcw1 = gp->tcw1; + + if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a >0 and <1 alpha? + { + gl4ShaderUniforms.tsp0.SrcInstr = 1; + gl4ShaderUniforms.tsp0.DstInstr = 0; + gl4ShaderUniforms.tsp1.SrcInstr = 1; + gl4ShaderUniforms.tsp1.DstInstr = 0; + } + gl4ShaderUniforms.Set(CurrentShader); + + SetTileClip(gp->tileclip,true); + + //This bit control which pixels are affected + //by modvols + const u32 stencil=(gp->pcw.Shadow!=0)?0x80:0x0; + + glcache.StencilFunc(GL_ALWAYS,stencil,stencil); + + if (CurrentShader->pp_Texture) + { + for (int i = 0; i < 2; i++) + { + glActiveTexture(GL_TEXTURE0 + i); + GLuint texid = i == 0 ? gp->texid : gp->texid1; + + glBindTexture(GL_TEXTURE_2D, texid == -1 ? 0 : texid); + + if (texid != -1) + { + TSP tsp = i == 0 ? gp->tsp : gp->tsp1; + TCW tcw = i == 0 ? gp->tcw : gp->tcw1; + + glBindSampler(i, texSamplers[i]); + SetTextureRepeatMode(i, GL_TEXTURE_WRAP_S, tsp.ClampU, tsp.FlipU); + SetTextureRepeatMode(i, GL_TEXTURE_WRAP_T, tsp.ClampV, tsp.FlipV); + + //set texture filter mode + if (tsp.FilterMode == 0) + { + //disable filtering, mipmaps + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + else + { + //bilinear filtering + //PowerVR supports also trilinear via two passes, but we ignore that for now + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, (tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } + } + } + glActiveTexture(GL_TEXTURE0); + } + + //set cull mode ! + //cflip is required when exploding triangles for triangle sorting + //gcflip is global clip flip, needed for when rendering to texture due to mirrored Y direction + SetCull(gp->isp.CullMode^cflip^gcflip); + + //set Z mode, only if required + if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled)) + { + glcache.DepthFunc(Zfunction[6]); // Greater or equal + } + else + { + glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); + } + + // Depth buffer is updated in pass 0 (and also in pass 1 for OP PT) + if (pass < 2) + glcache.DepthMask(!gp->isp.ZWriteDis); + else + glcache.DepthMask(GL_FALSE); +} + +template +static void DrawList(const List& gply, int first, int count, int pass) +{ + PolyParam* params = &gply.head()[first]; + + + if (count==0) + return; + //we want at least 1 PParam + + while(count-->0) + { + if (params->count>2) //this actually happens for some games. No idea why .. + { + if (pass != 0) + { + // No need to draw this one + if (Type == ListType_Translucent && params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) + { + params++; + continue; + } + } + gl4ShaderUniforms.poly_number = params - gply.head(); + SetGPState(params, pass); + glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); + } + + params++; + } +} + +//All pixels are in area 0 by default. +//If inside an 'in' volume, they are in area 1 +//if inside an 'out' volume, they are in area 0 +/* + Stencil bits: + bit 7: mv affected (must be preserved) + bit 1: current volume state + but 0: summary result (starts off as 0) + + Lower 2 bits: + + IN volume (logical OR): + 00 -> 00 + 01 -> 01 + 10 -> 01 + 11 -> 01 + + Out volume (logical AND): + 00 -> 00 + 01 -> 00 + 10 -> 00 + 11 -> 01 +*/ +static void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc) +{ + if (mv_mode == Xor) + { + // set states + glcache.Enable(GL_DEPTH_TEST); + // write only bit 1 + glcache.StencilMask(2); + // no stencil testing + glcache.StencilFunc(GL_ALWAYS, 0, 2); + // count the number of pixels in front of the Z buffer (xor zpass) + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_INVERT); + + // Cull mode needs to be set + SetCull(ispc.CullMode); + } + else if (mv_mode == Or) + { + // set states + glcache.Enable(GL_DEPTH_TEST); + // write only bit 1 + glcache.StencilMask(2); + // no stencil testing + glcache.StencilFunc(GL_ALWAYS, 2, 2); + // Or'ing of all triangles + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + + // Cull mode needs to be set + SetCull(ispc.CullMode); + } + else + { + // Inclusion or Exclusion volume + + // no depth test + glcache.Disable(GL_DEPTH_TEST); + // write bits 1:0 + glcache.StencilMask(3); + + if (mv_mode == Inclusion) + { + // Inclusion volume + //res : old : final + //0 : 0 : 00 + //0 : 1 : 01 + //1 : 0 : 01 + //1 : 1 : 01 + + // if (1<=st) st=1; else st=0; + glcache.StencilFunc(GL_LEQUAL,1,3); + glcache.StencilOp(GL_ZERO, GL_ZERO, GL_REPLACE); + } + else + { + // Exclusion volume + /* + I've only seen a single game use it, so i guess it doesn't matter ? (Zombie revenge) + (actually, i think there was also another, racing game) + */ + // The initial value for exclusion volumes is 1 so we need to invert the result before and'ing. + //res : old : final + //0 : 0 : 00 + //0 : 1 : 01 + //1 : 0 : 00 + //1 : 1 : 00 + + // if (1 == st) st = 1; else st = 0; + glcache.StencilFunc(GL_EQUAL, 1, 3); + glcache.StencilOp(GL_ZERO, GL_ZERO, GL_KEEP); + } + } +} + + +static void SetupMainVBO() +{ + glBindVertexArray(gl4.vbo.vao); + + glBindBuffer(GL_ARRAY_BUFFER, gl4.vbo.geometry); glCheck(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl4.vbo.idxs); glCheck(); + + //setup vertex buffers attrib pointers + glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, col1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, spc1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV1_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u1)); glCheck(); +} + +void gl4SetupModvolVBO() +{ + glBindVertexArray(gl4.vbo.vao); + + glBindBuffer(GL_ARRAY_BUFFER, gl4.vbo.modvols); glCheck(); + + //setup vertex buffers attrib pointers + glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(float)*3, (void*)0); glCheck(); + + glDisableVertexAttribArray(VERTEX_UV_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE_ARRAY); + glDisableVertexAttribArray(VERTEX_UV1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); +} + +static void DrawModVols(int first, int count) +{ + if (count == 0 || pvrrc.modtrig.used() == 0) + return; + + gl4SetupModvolVBO(); + + glcache.UseProgram(gl4.modvol_shader.program); + + glcache.DepthMask(GL_FALSE); + glcache.DepthFunc(Zfunction[4]); + + if(0) + { + //simply draw the volumes -- for debugging + SetCull(0); + glDrawArrays(GL_TRIANGLES, first, count * 3); + SetupMainVBO(); + } + else + { + //Full emulation + + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first]; + + int mod_base = -1; + + for (u32 cmv = 0; cmv < count; cmv++) + { + ModifierVolumeParam& param = params[cmv]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + if (mod_base == -1) + mod_base = param.first; + + if (!param.isp.VolumeLast && mv_mode > 0) + SetMVS_Mode(Or, param.isp); // OR'ing (open volume or quad) + else + SetMVS_Mode(Xor, param.isp); // XOR'ing (closed volume) + + glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); + + if (mv_mode == 1 || mv_mode == 2) + { + // Sum the area + SetMVS_Mode(mv_mode == 1 ? Inclusion : Exclusion, param.isp); + glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); + mod_base = -1; + } + } + + SetupMainVBO(); + } + + //restore states + glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(GL_TRUE); +} + +void renderABuffer(bool sortFragments); +void DrawTranslucentModVols(int first, int count); +void checkOverflowAndReset(); + +static GLuint CreateColorFBOTexture() +{ + GLuint texId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texId, 0); + glCheck(); + + return texId; +} + +static void CreateTextures() +{ + stencilTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); // OpenGL >= 4.3 + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Using glTexStorage2D instead of glTexImage2D to satisfy requirement GL_TEXTURE_IMMUTABLE_FORMAT=true, needed for glTextureView below + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH32F_STENCIL8, screen_width, screen_height); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); + glCheck(); + + opaqueTexId = CreateColorFBOTexture(); + + depthTexId = glcache.GenTexture(); + glTextureView(depthTexId, GL_TEXTURE_2D, stencilTexId, GL_DEPTH32F_STENCIL8, 0, 1, 0, 1); + glCheck(); + glcache.BindTexture(GL_TEXTURE_2D, depthTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glCheck(); +} + +void gl4DrawStrips(GLuint output_fbo) +{ + checkOverflowAndReset(); + + if (geom_fbo == 0) + { + glGenFramebuffers(1, &geom_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + + CreateTextures(); + + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + } + else + { + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + if (stencilTexId == 0) + CreateTextures(); + } + if (texSamplers[0] == 0) + glGenSamplers(2, texSamplers); + + glcache.ClearColor(0, 0, 0, 0); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.DepthMask(GL_TRUE); + glStencilMask(0xFF); + glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); + + SetupMainVBO(); + //Draw the strips ! + + //We use sampler 0 + glActiveTexture(GL_TEXTURE0); + glcache.Disable(GL_BLEND); + glProvokingVertex(GL_LAST_VERTEX_CONVENTION); + + RenderPass previous_pass = {0}; + int render_pass_count = pvrrc.render_passes.used(); + + for (int render_pass = 0; render_pass < render_pass_count; render_pass++) + { + const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; + + // Check if we can skip this pass, in part or completely, in case nothing is drawn (Cosmic Smash) + bool skip_op_pt = true; + bool skip_tr = true; + for (int j = previous_pass.op_count; skip_op_pt && j < current_pass.op_count; j++) + { + if (pvrrc.global_param_op.head()[j].count > 2) + skip_op_pt = false; + } + for (int j = previous_pass.pt_count; skip_op_pt && j < current_pass.pt_count; j++) + { + if (pvrrc.global_param_pt.head()[j].count > 2) + skip_op_pt = false; + } + for (int j = previous_pass.tr_count; skip_tr && j < current_pass.tr_count; j++) + { + if (pvrrc.global_param_tr.head()[j].count > 2) + skip_tr = false; + } + if (skip_op_pt && skip_tr) + { + previous_pass = current_pass; + continue; + } + + if (!skip_op_pt) + { + // + // PASS 1: Geometry pass to update depth and stencil + // + if (render_pass > 0) + { + // Make a copy of the depth buffer that will be reused in pass 2 + if (depth_fbo == 0) + glGenFramebuffers(1, &depth_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, depth_fbo); + if (depthSaveTexId == 0) + { + depthSaveTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, depthSaveTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); glCheck(); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthSaveTexId, 0); glCheck(); + } + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, geom_fbo); + glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + glCheck(); + + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + } + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(GL_TRUE); + glcache.Enable(GL_STENCIL_TEST); + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0); + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0); + + // Modifier volumes + if (settings.rend.ModifierVolumes) + DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + + // + // PASS 2: Render OP and PT to fbo + // + if (render_pass == 0) + { + glcache.DepthMask(GL_TRUE); + glClear(GL_DEPTH_BUFFER_BIT); + } + else + { + // Restore the depth buffer from the last render pass + // FIXME This is pretty slow apparently (CS) + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, geom_fbo); + glBindFramebuffer(GL_READ_FRAMEBUFFER, depth_fbo); + glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + glCheck(); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + } + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.Disable(GL_STENCIL_TEST); + + // Bind stencil buffer for the fragment shader (shadowing) + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, stencilTexId); + glActiveTexture(GL_TEXTURE0); + glCheck(); + + //Opaque + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); + + //Alpha tested + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1); + + // Unbind stencil + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + } + + if (!skip_tr) + { + // + // PASS 3: Render TR to a-buffers + // + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Disable(GL_DEPTH_TEST); + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, depthTexId); + glActiveTexture(GL_TEXTURE0); + + //Alpha blended + if (current_pass.autosort) + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + else + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + glCheck(); + + // Translucent modifier volumes + if (settings.rend.ModifierVolumes) + DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); + + if (render_pass < render_pass_count - 1) + { + // + // PASS 3b: Geometry pass with TR to update the depth for the next TA render pass + // + // Unbind depth texture + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + + glcache.Enable(GL_DEPTH_TEST); + if (current_pass.autosort) + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + else + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + + // + // PASS 3c: Render a-buffer to temporary texture + // + GLuint texId = CreateColorFBOTexture(); + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + + renderABuffer(current_pass.autosort); + SetupMainVBO(); + + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = texId; + + glCheck(); + } + } + + if (!skip_op_pt && render_pass < render_pass_count - 1) + { + // Clear the stencil from this pass + glStencilMask(0xFF); + glClear(GL_STENCIL_BUFFER_BIT); + } + + previous_pass = current_pass; + } + + // + // PASS 4: Render a-buffers to screen + // + glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); glCheck(); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + renderABuffer(previous_pass.autosort); + SetupMainVBO(); +} + +void gl4DrawFramebuffer(float w, float h) +{ + struct Vertex vertices[] = { + { 0, h, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { 0, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { w, h, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { w, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + }; + GLushort indices[] = { 0, 1, 2, 1, 3 }; + + glcache.Disable(GL_SCISSOR_TEST); + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_STENCIL_TEST); + glcache.Disable(GL_CULL_FACE); + glcache.Disable(GL_BLEND); + + gl4ShaderUniforms.trilinear_alpha = 1.0; + + int shaderId = gl4GetProgramID(0, + 1, + 1, + 0, + 1, + 0, + 0, + 2, + false, + 0, + false, + false, + false, + 1); + gl4PipelineShader *shader = gl4.getShader(shaderId); + if (shader->program == -1) + { + shader->cp_AlphaTest = 0; + shader->pp_ClipTestMode = 0; + shader->pp_Texture = 1; + shader->pp_UseAlpha = 0; + shader->pp_IgnoreTexA = 1; + shader->pp_ShadInstr = 0; + shader->pp_Offset = 0; + shader->pp_FogCtrl = 2; + shader->pp_TwoVolumes = false; + shader->pp_DepthFunc = 0; + shader->pp_Gouraud = false; + shader->pp_BumpMap = false; + shader->fog_clamping = false; + shader->pass = 1; + gl4CompilePipelineShader(shader); + } + glcache.UseProgram(shader->program); + gl4ShaderUniforms.Set(shader); + + glActiveTexture(GL_TEXTURE0); + glcache.BindTexture(GL_TEXTURE_2D, fbTextureId); + + SetupMainVBO(); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STREAM_DRAW); + + glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, (void *)0); + + glcache.DeleteTextures(1, &fbTextureId); + fbTextureId = 0; + + glBufferData(GL_ARRAY_BUFFER, pvrrc.verts.bytes(), pvrrc.verts.head(), GL_STREAM_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, pvrrc.idx.bytes(), pvrrc.idx.head(), GL_STREAM_DRAW); +} diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp new file mode 100644 index 000000000..0a05ac5aa --- /dev/null +++ b/core/rend/gl4/gles.cpp @@ -0,0 +1,1139 @@ +#include +#include "gl4.h" +#include "rend/gles/glcache.h" +#include "rend/TexCache.h" +#include "cfg/cfg.h" + +#include "oslib/oslib.h" +#include "rend/rend.h" +#include "hw/pvr/Renderer_if.h" + +float gl4_scale_x, gl4_scale_y; + +//Fragment and vertex shaders code + +static const char* VertexShaderSource = +"\ +#version 140 \n\ +#define pp_Gouraud %d \n\ + \n\ +#if pp_Gouraud == 0 \n\ +#define INTERPOLATION flat \n\ +#else \n\ +#define INTERPOLATION smooth \n\ +#endif \n\ + \n\ +/* Vertex constants*/ \n\ +uniform highp vec4 scale; \n\ +uniform highp float extra_depth_scale; \n\ +/* Vertex input */ \n\ +in highp vec4 in_pos; \n\ +in lowp vec4 in_base; \n\ +in lowp vec4 in_offs; \n\ +in mediump vec2 in_uv; \n\ +in lowp vec4 in_base1; \n\ +in lowp vec4 in_offs1; \n\ +in mediump vec2 in_uv1; \n\ +/* output */ \n\ +INTERPOLATION out lowp vec4 vtx_base; \n\ +INTERPOLATION out lowp vec4 vtx_offs; \n\ + out mediump vec2 vtx_uv; \n\ +INTERPOLATION out lowp vec4 vtx_base1; \n\ +INTERPOLATION out lowp vec4 vtx_offs1; \n\ + out mediump vec2 vtx_uv1; \n\ +void main() \n\ +{ \n\ + vtx_base=in_base; \n\ + vtx_offs=in_offs; \n\ + vtx_uv=in_uv; \n\ + vtx_base1 = in_base1; \n\ + vtx_offs1 = in_offs1; \n\ + vtx_uv1 = in_uv1; \n\ + vec4 vpos=in_pos; \n\ + if (isinf(vpos.z)) \n\ + vpos.w = 1.18e-38; \n\ + else \n\ + vpos.w = extra_depth_scale / vpos.z; \n\ + if (vpos.w < 0.0) { \n\ + gl_Position = vec4(0.0, 0.0, 0.0, vpos.w); \n\ + return; \n\ + } \n\ + vpos.z = vpos.w; \n\ + vpos.xy=vpos.xy*scale.xy-scale.zw; \n\ + vpos.xy*=vpos.w; \n\ + gl_Position = vpos; \n\ +}"; + +const char* gl4PixelPipelineShader = SHADER_HEADER +"\ +#define cp_AlphaTest %d \n\ +#define pp_ClipTestMode %d \n\ +#define pp_UseAlpha %d \n\ +#define pp_Texture %d \n\ +#define pp_IgnoreTexA %d \n\ +#define pp_ShadInstr %d \n\ +#define pp_Offset %d \n\ +#define pp_FogCtrl %d \n\ +#define pp_TwoVolumes %d \n\ +#define pp_DepthFunc %d \n\ +#define pp_Gouraud %d \n\ +#define pp_BumpMap %d \n\ +#define FogClamping %d \n\ +#define PASS %d \n\ +#define PI 3.1415926 \n\ + \n\ +#if PASS <= 1 \n\ +out vec4 FragColor; \n\ +#endif \n\ + \n\ +#if pp_TwoVolumes == 1 \n\ +#define IF(x) if (x) \n\ +#else \n\ +#define IF(x) \n\ +#endif \n\ + \n\ +#if pp_Gouraud == 0 \n\ +#define INTERPOLATION flat \n\ +#else \n\ +#define INTERPOLATION smooth \n\ +#endif \n\ + \n\ +/* Shader program params*/ \n\ +uniform lowp float cp_AlphaTestValue; \n\ +uniform lowp vec4 pp_ClipTest; \n\ +uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ +uniform highp float sp_FOG_DENSITY; \n\ +uniform highp float shade_scale_factor; \n\ +uniform sampler2D tex0, tex1; \n\ +layout(binding = 5) uniform sampler2D fog_table; \n\ +uniform int pp_Number; \n\ +uniform usampler2D shadow_stencil; \n\ +uniform sampler2D DepthTex; \n\ +uniform lowp float trilinear_alpha; \n\ +uniform lowp vec4 fog_clamp_min; \n\ +uniform lowp vec4 fog_clamp_max; \n\ + \n\ +uniform ivec2 blend_mode[2]; \n\ +#if pp_TwoVolumes == 1 \n\ +uniform bool use_alpha[2]; \n\ +uniform bool ignore_tex_alpha[2]; \n\ +uniform int shading_instr[2]; \n\ +uniform int fog_control[2]; \n\ +#endif \n\ + \n\ +uniform highp float extra_depth_scale; \n\ +/* Vertex input*/ \n\ +INTERPOLATION in lowp vec4 vtx_base; \n\ +INTERPOLATION in lowp vec4 vtx_offs; \n\ + in mediump vec2 vtx_uv; \n\ +INTERPOLATION in lowp vec4 vtx_base1; \n\ +INTERPOLATION in lowp vec4 vtx_offs1; \n\ + in mediump vec2 vtx_uv1; \n\ + \n\ +lowp float fog_mode2(highp float w) \n\ +{ \n\ + highp float z = clamp(w * extra_depth_scale * sp_FOG_DENSITY, 1.0, 255.9999); \n\ + highp float exp = floor(log2(z)); \n\ + highp float m = z * 16.0 / pow(2.0, exp) - 16.0; \n\ + float idx = floor(m) + exp * 16.0 + 0.5; \n\ + vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); \n\ + return fog_coef.r; \n\ +} \n\ + \n\ +highp vec4 fog_clamp(highp vec4 col) \n\ +{ \n\ +#if FogClamping == 1 \n\ + return clamp(col, fog_clamp_min, fog_clamp_max); \n\ +#else \n\ + return col; \n\ +#endif \n\ +} \n\ + \n\ +void main() \n\ +{ \n\ + setFragDepth(); \n\ + \n\ + #if PASS == 3 \n\ + // Manual depth testing \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; \n\ + #if pp_DepthFunc == 0 // Never \n\ + discard; \n\ + #elif pp_DepthFunc == 1 // Greater \n\ + if (gl_FragDepth <= frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 2 // Equal \n\ + if (gl_FragDepth != frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 3 // Greater or equal \n\ + if (gl_FragDepth < frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 4 // Less \n\ + if (gl_FragDepth >= frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 5 // Not equal \n\ + if (gl_FragDepth == frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 6 // Less or equal \n\ + if (gl_FragDepth > frontDepth) \n\ + discard; \n\ + #endif \n\ + #endif \n\ + \n\ + // Clip outside the box \n\ + #if pp_ClipTestMode==1 \n\ + if (gl_FragCoord.x < pp_ClipTest.x || gl_FragCoord.x > pp_ClipTest.z \n\ + || gl_FragCoord.y < pp_ClipTest.y || gl_FragCoord.y > pp_ClipTest.w) \n\ + discard; \n\ + #endif \n\ + // Clip inside the box \n\ + #if pp_ClipTestMode==-1 \n\ + if (gl_FragCoord.x >= pp_ClipTest.x && gl_FragCoord.x <= pp_ClipTest.z \n\ + && gl_FragCoord.y >= pp_ClipTest.y && gl_FragCoord.y <= pp_ClipTest.w) \n\ + discard; \n\ + #endif \n\ + \n\ + highp vec4 color = vtx_base; \n\ + lowp vec4 offset = vtx_offs; \n\ + mediump vec2 uv = vtx_uv; \n\ + bool area1 = false; \n\ + ivec2 cur_blend_mode = blend_mode[0]; \n\ + \n\ + #if pp_TwoVolumes == 1 \n\ + bool cur_use_alpha = use_alpha[0]; \n\ + bool cur_ignore_tex_alpha = ignore_tex_alpha[0]; \n\ + int cur_shading_instr = shading_instr[0]; \n\ + int cur_fog_control = fog_control[0]; \n\ + #if PASS == 1 \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / textureSize(shadow_stencil, 0)); \n\ + if (stencil.r == 0x81u) { \n\ + color = vtx_base1; \n\ + offset = vtx_offs1; \n\ + uv = vtx_uv1; \n\ + area1 = true; \n\ + cur_blend_mode = blend_mode[1]; \n\ + cur_use_alpha = use_alpha[1]; \n\ + cur_ignore_tex_alpha = ignore_tex_alpha[1]; \n\ + cur_shading_instr = shading_instr[1]; \n\ + cur_fog_control = fog_control[1]; \n\ + } \n\ + #endif\n\ + #endif\n\ + \n\ + #if pp_UseAlpha==0 || pp_TwoVolumes == 1 \n\ + IF(!cur_use_alpha) \n\ + color.a=1.0; \n\ + #endif\n\ + #if pp_FogCtrl==3 || pp_TwoVolumes == 1 // LUT Mode 2 \n\ + IF(cur_fog_control == 3) \n\ + color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ + #endif\n\ + #if pp_Texture==1 \n\ + { \n\ + highp vec4 texcol; \n\ + if (area1) \n\ + texcol = texture(tex1, uv); \n\ + else \n\ + texcol = texture(tex0, uv); \n\ + #if pp_BumpMap == 1 \n\ + highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; \n\ + highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; \n\ + texcol.a = clamp(vtx_offs.a + vtx_offs.r * sin(s) + vtx_offs.g * cos(s) * cos(r - 2.0 * PI * vtx_offs.b), 0.0, 1.0); \n\ + texcol.rgb = vec3(1.0, 1.0, 1.0); \n\ + #else\n\ + #if pp_IgnoreTexA==1 || pp_TwoVolumes == 1 \n\ + IF(cur_ignore_tex_alpha) \n\ + texcol.a=1.0; \n\ + #endif\n\ + \n\ + #if cp_AlphaTest == 1 \n\ + if (cp_AlphaTestValue>texcol.a) discard;\n\ + #endif \n\ + #endif\n\ + #if pp_ShadInstr==0 || pp_TwoVolumes == 1 // DECAL \n\ + IF(cur_shading_instr == 0) \n\ + { \n\ + color=texcol; \n\ + } \n\ + #endif\n\ + #if pp_ShadInstr==1 || pp_TwoVolumes == 1 // MODULATE \n\ + IF(cur_shading_instr == 1) \n\ + { \n\ + color.rgb*=texcol.rgb; \n\ + color.a=texcol.a; \n\ + } \n\ + #endif\n\ + #if pp_ShadInstr==2 || pp_TwoVolumes == 1 // DECAL ALPHA \n\ + IF(cur_shading_instr == 2) \n\ + { \n\ + color.rgb=mix(color.rgb,texcol.rgb,texcol.a); \n\ + } \n\ + #endif\n\ + #if pp_ShadInstr==3 || pp_TwoVolumes == 1 // MODULATE ALPHA \n\ + IF(cur_shading_instr == 3) \n\ + { \n\ + color*=texcol; \n\ + } \n\ + #endif\n\ + \n\ + #if pp_Offset==1 && pp_BumpMap == 0 \n\ + { \n\ + color.rgb += offset.rgb; \n\ + } \n\ + #endif\n\ + } \n\ + #endif\n\ + #if PASS == 1 && pp_TwoVolumes == 0 \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / textureSize(shadow_stencil, 0)); \n\ + if (stencil.r == 0x81u) \n\ + color.rgb *= shade_scale_factor; \n\ + #endif \n\ + \n\ + color = fog_clamp(color); \n\ + \n\ + #if pp_FogCtrl==0 || pp_TwoVolumes == 1 // LUT \n\ + IF(cur_fog_control == 0) \n\ + { \n\ + color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ + } \n\ + #endif\n\ + #if pp_Offset==1 && pp_BumpMap == 0 && (pp_FogCtrl == 1 || pp_TwoVolumes == 1) // Per vertex \n\ + IF(cur_fog_control == 1) \n\ + { \n\ + color.rgb=mix(color.rgb, sp_FOG_COL_VERT.rgb, offset.a); \n\ + } \n\ + #endif\n\ + \n\ + color *= trilinear_alpha; \n\ + \n\ + #if cp_AlphaTest == 1 \n\ + color.a=1.0; \n\ + #endif \n\ + \n\ + //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0); \n\ + \n\ + #if PASS == 1 \n\ + FragColor = color; \n\ + #elif PASS > 1 \n\ + // Discard as many pixels as possible \n\ + switch (cur_blend_mode.y) // DST \n\ + { \n\ + case ONE: \n\ + switch (cur_blend_mode.x) // SRC \n\ + { \n\ + case ZERO: \n\ + discard; \n\ + case ONE: \n\ + case OTHER_COLOR: \n\ + case INVERSE_OTHER_COLOR: \n\ + if (color == vec4(0.0)) \n\ + discard; \n\ + break; \n\ + case SRC_ALPHA: \n\ + if (color.a == 0.0 || color.rgb == vec3(0.0)) \n\ + discard; \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + if (color.a == 1.0 || color.rgb == vec3(0.0)) \n\ + discard; \n\ + break; \n\ + } \n\ + break; \n\ + case OTHER_COLOR: \n\ + if (cur_blend_mode.x == ZERO && color == vec4(1.0)) \n\ + discard; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + if (cur_blend_mode.x <= SRC_ALPHA && color == vec4(0.0)) \n\ + discard; \n\ + break; \n\ + case SRC_ALPHA: \n\ + if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ + discard; \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + switch (cur_blend_mode.x) // SRC \n\ + { \n\ + case ZERO: \n\ + case SRC_ALPHA: \n\ + if (color.a == 0.0) \n\ + discard; \n\ + break; \n\ + case ONE: \n\ + case OTHER_COLOR: \n\ + case INVERSE_OTHER_COLOR: \n\ + if (color == vec4(0.0)) \n\ + discard; \n\ + break; \n\ + } \n\ + break; \n\ + } \n\ + \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + uint idx = getNextPixelIndex(); \n\ + \n\ + Pixel pixel; \n\ + pixel.color = color; \n\ + pixel.depth = gl_FragDepth; \n\ + pixel.seq_num = uint(pp_Number); \n\ + pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ + pixels[idx] = pixel; \n\ + \n\ + discard; \n\ + \n\ + #endif \n\ +}"; + +static const char* ModifierVolumeShader = SHADER_HEADER +" \ +/* Vertex input*/ \n\ +void main() \n\ +{ \n\ + setFragDepth(); \n\ + \n\ +}"; + +static const char* OSD_Shader = +" \ +#version 140 \n\ +out vec4 FragColor; \n\ + \n\ +smooth in lowp vec4 vtx_base; \n\ + in mediump vec2 vtx_uv; \n\ +/* Vertex input*/ \n\ +uniform sampler2D tex; \n\ +void main() \n\ +{ \n\ + mediump vec2 uv = vtx_uv; \n\ + uv.y = 1.0 - uv.y; \n\ + FragColor = vtx_base * texture(tex, uv.st); \n\n\ +}"; + +gl4_ctx gl4; + +static GLuint fogTextureId; + +struct gl4ShaderUniforms_t gl4ShaderUniforms; + +int gl4GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, + u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, + u32 pp_FogCtrl, bool pp_TwoVolumes, u32 pp_DepthFunc, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, int pass) +{ + u32 rv=0; + + rv|=pp_ClipTestMode; + rv<<=1; rv|=cp_AlphaTest; + rv<<=1; rv|=pp_Texture; + rv<<=1; rv|=pp_UseAlpha; + rv<<=1; rv|=pp_IgnoreTexA; + rv<<=2; rv|=pp_ShadInstr; + rv<<=1; rv|=pp_Offset; + rv<<=2; rv|=pp_FogCtrl; + rv <<= 1; rv |= (int)pp_TwoVolumes; + rv <<= 3; rv |= pp_DepthFunc; + rv <<= 1; rv |= (int)pp_Gouraud; + rv <<= 1; rv |= pp_BumpMap; + rv <<= 1; rv |= fog_clamping; + rv <<= 2; rv |= pass; + + return rv; +} + +bool gl4CompilePipelineShader( gl4PipelineShader* s, const char *source /* = PixelPipelineShader */) +{ + char vshader[16384]; + + sprintf(vshader, VertexShaderSource, s->pp_Gouraud); + + char pshader[16384]; + + sprintf(pshader, source, + s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pp_DepthFunc, s->pp_Gouraud, s->pp_BumpMap, s->fog_clamping, s->pass); + + s->program = gl_CompileAndLink(vshader, pshader); + + //setup texture 0 as the input for the shader + GLint gu = glGetUniformLocation(s->program, "tex0"); + if (s->pp_Texture == 1 && gu != -1) + glUniform1i(gu, 0); + // Setup texture 1 as the input for area 1 in two volume mode + gu = glGetUniformLocation(s->program, "tex1"); + if (s->pp_Texture == 1 && gu != -1) + glUniform1i(gu, 1); + + //get the uniform locations + s->scale = glGetUniformLocation(s->program, "scale"); + s->extra_depth_scale = glGetUniformLocation(s->program, "extra_depth_scale"); + + s->pp_ClipTest = glGetUniformLocation(s->program, "pp_ClipTest"); + + s->sp_FOG_DENSITY = glGetUniformLocation(s->program, "sp_FOG_DENSITY"); + + s->cp_AlphaTestValue= glGetUniformLocation(s->program, "cp_AlphaTestValue"); + + //FOG_COL_RAM,FOG_COL_VERT,FOG_DENSITY; + if (s->pp_FogCtrl==1 && s->pp_Texture==1) + s->sp_FOG_COL_VERT=glGetUniformLocation(s->program, "sp_FOG_COL_VERT"); + else + s->sp_FOG_COL_VERT=-1; + if (s->pp_FogCtrl==0 || s->pp_FogCtrl==3) + { + s->sp_FOG_COL_RAM=glGetUniformLocation(s->program, "sp_FOG_COL_RAM"); + } + else + { + s->sp_FOG_COL_RAM=-1; + } + s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); + + // Use texture 1 for depth texture + gu = glGetUniformLocation(s->program, "DepthTex"); + if (gu != -1) + glUniform1i(gu, 2); // GL_TEXTURE2 + + s->trilinear_alpha = glGetUniformLocation(s->program, "trilinear_alpha"); + + if (s->fog_clamping) + { + s->fog_clamp_min = glGetUniformLocation(s->program, "fog_clamp_min"); + s->fog_clamp_max = glGetUniformLocation(s->program, "fog_clamp_max"); + } + else + { + s->fog_clamp_min = -1; + s->fog_clamp_max = -1; + } + + // Shadow stencil for OP/PT rendering pass + gu = glGetUniformLocation(s->program, "shadow_stencil"); + if (gu != -1) + glUniform1i(gu, 3); // GL_TEXTURE3 + + s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); + + s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); + s->use_alpha = glGetUniformLocation(s->program, "use_alpha"); + s->ignore_tex_alpha = glGetUniformLocation(s->program, "ignore_tex_alpha"); + s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); + s->fog_control = glGetUniformLocation(s->program, "fog_control"); + + return glIsProgram(s->program)==GL_TRUE; +} + +extern GLuint osd_tex; +extern GLuint osd_font; + +static bool gl_create_resources() +{ + if (gl4.vbo.geometry != 0) + // Assume the resources have already been created + return true; + + //create vao + //This is really not "proper", vaos are supposed to be defined once + //i keep updating the same one to make the es2 code work in 3.1 context + glGenVertexArrays(1, &gl4.vbo.vao); + + //create vbos + glGenBuffers(1, &gl4.vbo.geometry); + glGenBuffers(1, &gl4.vbo.modvols); + glGenBuffers(1, &gl4.vbo.idxs); + glGenBuffers(1, &gl4.vbo.idxs2); + + char vshader[16384]; + sprintf(vshader, VertexShaderSource, 1); + + gl4.modvol_shader.program=gl_CompileAndLink(vshader, ModifierVolumeShader); + gl4.modvol_shader.scale = glGetUniformLocation(gl4.modvol_shader.program, "scale"); + gl4.modvol_shader.extra_depth_scale = glGetUniformLocation(gl4.modvol_shader.program, "extra_depth_scale"); + + + gl4.OSD_SHADER.program=gl_CompileAndLink(vshader, OSD_Shader); + gl4.OSD_SHADER.scale=glGetUniformLocation(gl4.OSD_SHADER.program, "scale"); + gl4.OSD_SHADER.extra_depth_scale = glGetUniformLocation(gl4.OSD_SHADER.program, "extra_depth_scale"); + glUniform1i(glGetUniformLocation(gl4.OSD_SHADER.program, "tex"),0); //bind osd texture to slot 0 + + int w, h; + osd_tex=loadPNG(get_readonly_data_path("/data/buttons.png"),w,h); + osd_font = loadPNG(get_readonly_data_path("/pixmaps/font.png"), w, h); + if (osd_font == 0) + osd_font = loadPNG(get_readonly_data_path("/font.png"), w, h); + + // Create the buffer for Translucent poly params + glGenBuffers(1, &gl4.vbo.tr_poly_params); + // Bind it + glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl4.vbo.tr_poly_params); + // Declare storage + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, gl4.vbo.tr_poly_params); + glCheck(); + + return true; +} + +//setup +extern void initABuffer(); + +// FIXME dup code if not gl_create_res +static bool gles_init() +{ + + if (!gl_init((void*)libPvr_GetRenderTarget(), + (void*)libPvr_GetRenderSurface())) + return false; + + if (!gl_create_resources()) + return false; + +// glEnable(GL_DEBUG_OUTPUT); +// glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); +// glDebugMessageCallback(gl_DebugOutput, NULL); +// glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); + + + //clean up the buffer + glcache.ClearColor(0.f, 0.f, 0.f, 0.f); + glClear(GL_COLOR_BUFFER_BIT); + gl_swap(); + + initABuffer(); + + if (settings.rend.TextureUpscale > 1) + { + // Trick to preload the tables used by xBRZ + u32 src[] { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }; + u32 dst[16]; + UpscalexBRZ(2, src, dst, 2, 2, false); + } + + return true; +} + + +static void UpdateFogTexture(u8 *fog_table) +{ + glActiveTexture(GL_TEXTURE5); + if (fogTextureId == 0) + { + fogTextureId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, fogTextureId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + else + glcache.BindTexture(GL_TEXTURE_2D, fogTextureId); + + u8 temp_tex_buffer[256]; + for (int i = 0; i < 128; i++) + { + temp_tex_buffer[i] = fog_table[i * 4]; + temp_tex_buffer[i + 128] = fog_table[i * 4 + 1]; + } + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, 128, 2, 0, GL_RED, GL_UNSIGNED_BYTE, temp_tex_buffer); + glCheck(); + + glActiveTexture(GL_TEXTURE0); +} + +extern u32 osd_base; +extern u32 osd_count; + +#if defined(_ANDROID) +extern float vjoy_pos[14][8]; +#else + +static float vjoy_pos[14][8]= +{ + {24+0,24+64,64,64}, //LEFT + {24+64,24+0,64,64}, //UP + {24+128,24+64,64,64}, //RIGHT + {24+64,24+128,64,64}, //DOWN + + {440+0,280+64,64,64}, //X + {440+64,280+0,64,64}, //Y + {440+128,280+64,64,64}, //B + {440+64,280+128,64,64}, //A + + {320-32,360+32,64,64}, //Start + + {440,200,90,64}, //RT + {542,200,90,64}, //LT + + {-24,128+224,128,128}, //ANALOG_RING + {96,320,64,64}, //ANALOG_POINT + {1} +}; +#endif // !_ANDROID + +static float vjoy_sz[2][14] = { + { 64,64,64,64, 64,64,64,64, 64, 90,90, 128, 64 }, + { 64,64,64,64, 64,64,64,64, 64, 64,64, 128, 64 }, +}; + +#define OSD_TEX_W 512 +#define OSD_TEX_H 256 + +static void OSD_DRAW() +{ + #ifndef TARGET_PANDORA + if (osd_tex) + { + float u=0; + float v=0; + + for (int i=0;i<13;i++) + { + //umin,vmin,umax,vmax + vjoy_pos[i][4]=(u+1)/OSD_TEX_W; + vjoy_pos[i][5]=(v+1)/OSD_TEX_H; + + vjoy_pos[i][6]=((u+vjoy_sz[0][i]-1))/OSD_TEX_W; + vjoy_pos[i][7]=((v+vjoy_sz[1][i]-1))/OSD_TEX_H; + + u+=vjoy_sz[0][i]; + if (u>=OSD_TEX_W) + { + u-=OSD_TEX_W; + v+=vjoy_sz[1][i]; + } + //v+=vjoy_pos[i][3]; + } + + verify(glIsProgram(gl4.OSD_SHADER.program)); + + glcache.BindTexture(GL_TEXTURE_2D, osd_tex); + glcache.UseProgram(gl4.OSD_SHADER.program); + + //reset rendering scale +/* + float dc_width=640; + float dc_height=480; + + float dc2s_scale_h=screen_height/480.0f; + float ds2s_offs_x=(screen_width-dc2s_scale_h*640)/2; + + //-1 -> too much to left + gl4ShaderUniforms.scale_coefs[0]=2.0f/(screen_width/dc2s_scale_h); + gl4ShaderUniforms.scale_coefs[1]=-2/dc_height; + gl4ShaderUniforms.scale_coefs[2]=1-2*ds2s_offs_x/(screen_width); + gl4ShaderUniforms.scale_coefs[3]=-1; + + glUniform4fv( gl4.OSD_SHADER.scale, 1, gl4ShaderUniforms.scale_coefs); +*/ + + glcache.Enable(GL_BLEND); + glcache.Disable(GL_DEPTH_TEST); + glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + glcache.DepthMask(false); + glcache.DepthFunc(GL_ALWAYS); + + glcache.Disable(GL_CULL_FACE); + glcache.Disable(GL_SCISSOR_TEST); + + int dfa=osd_count/4; + + for (int i=0;irend_inuse.Lock(); + + if (KillTex) + { + void killtex(); + killtex(); + printf("Texture cache cleared\n"); + } + + if (ctx->rend.isRenderFramebuffer) + { + RenderFramebuffer(); + ctx->rend_inuse.Unlock(); + } + else + { + if (!ta_parse_vdrc(ctx)) + return false; + } + CollectCleanup(); + + if (ctx->rend.Overrun) + printf("ERROR: TA context overrun\n"); + + return !ctx->rend.Overrun; +} + +static bool RenderFrame() +{ + static int old_screen_width, old_screen_height; + if (screen_width != old_screen_width || screen_height != old_screen_height) { + rend_resize(screen_width, screen_height); + old_screen_width = screen_width; + old_screen_height = screen_height; + } + DoCleanup(); + + bool is_rtt=pvrrc.isRTT; + + //if (FrameCount&7) return; + + //these should be adjusted based on the current PVR scaling etc params + float dc_width=640; + float dc_height=480; + + if (!is_rtt) + { + gcflip=0; + } + else + { + gcflip=1; + + //For some reason this produces wrong results + //so for now its hacked based like on the d3d code + /* + u32 pvr_stride=(FB_W_LINESTRIDE.stride)*8; + */ + dc_width = pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1; + dc_height = pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1; + } + + gl4_scale_x = 1; + gl4_scale_y = 1; + + float scissoring_scale_x = 1; + + if (!is_rtt && !pvrrc.isRenderFramebuffer) + { + gl4_scale_x=fb_scale_x; + gl4_scale_y=fb_scale_y; + + //work out scaling parameters ! + //Pixel doubling is on VO, so it does not affect any pixel operations + //A second scaling is used here for scissoring + if (VO_CONTROL.pixel_double) + { + scissoring_scale_x = 0.5f; + gl4_scale_x *= 0.5f; + } + + if (SCALER_CTL.hscale) + { + scissoring_scale_x /= 2; + gl4_scale_x*=2; + } + } + + dc_width *= gl4_scale_x; + dc_height *= gl4_scale_y; + + /* + Handle Dc to screen scaling + */ + float dc2s_scale_h = is_rtt ? (screen_width / dc_width) : (screen_height / 480.0); + float ds2s_offs_x = is_rtt ? 0 : ((screen_width - dc2s_scale_h * 640.0) / 2); + + //-1 -> too much to left + gl4ShaderUniforms.scale_coefs[0]=2.0f/(screen_width/dc2s_scale_h*gl4_scale_x); + gl4ShaderUniforms.scale_coefs[1]=(is_rtt ? 2 : -2) / dc_height; // FIXME CT2 needs 480 here instead of dc_height=512 + gl4ShaderUniforms.scale_coefs[2]=1-2*ds2s_offs_x/(screen_width); + gl4ShaderUniforms.scale_coefs[3]=(is_rtt?1:-1); + + gl4ShaderUniforms.extra_depth_scale = settings.rend.ExtraDepthScale; + + //printf("scale: %f, %f, %f, %f\n",gl4ShaderUniforms.scale_coefs[0],gl4ShaderUniforms.scale_coefs[1],gl4ShaderUniforms.scale_coefs[2],gl4ShaderUniforms.scale_coefs[3]); + + if (!is_rtt) + OSD_HOOK(); + + //VERT and RAM fog color constants + u8* fog_colvert_bgra=(u8*)&FOG_COL_VERT; + u8* fog_colram_bgra=(u8*)&FOG_COL_RAM; + gl4ShaderUniforms.ps_FOG_COL_VERT[0]=fog_colvert_bgra[2]/255.0f; + gl4ShaderUniforms.ps_FOG_COL_VERT[1]=fog_colvert_bgra[1]/255.0f; + gl4ShaderUniforms.ps_FOG_COL_VERT[2]=fog_colvert_bgra[0]/255.0f; + + gl4ShaderUniforms.ps_FOG_COL_RAM[0]=fog_colram_bgra [2]/255.0f; + gl4ShaderUniforms.ps_FOG_COL_RAM[1]=fog_colram_bgra [1]/255.0f; + gl4ShaderUniforms.ps_FOG_COL_RAM[2]=fog_colram_bgra [0]/255.0f; + + //Fog density constant + u8* fog_density=(u8*)&FOG_DENSITY; + float fog_den_mant=fog_density[1]/128.0f; //bit 7 -> x. bit, so [6:0] -> fraction -> /128 + s32 fog_den_exp=(s8)fog_density[0]; + gl4ShaderUniforms.fog_den_float=fog_den_mant*powf(2.0f,fog_den_exp); + + gl4ShaderUniforms.fog_clamp_min[0] = ((pvrrc.fog_clamp_min >> 16) & 0xFF) / 255.0f; + gl4ShaderUniforms.fog_clamp_min[1] = ((pvrrc.fog_clamp_min >> 8) & 0xFF) / 255.0f; + gl4ShaderUniforms.fog_clamp_min[2] = ((pvrrc.fog_clamp_min >> 0) & 0xFF) / 255.0f; + gl4ShaderUniforms.fog_clamp_min[3] = ((pvrrc.fog_clamp_min >> 24) & 0xFF) / 255.0f; + + gl4ShaderUniforms.fog_clamp_max[0] = ((pvrrc.fog_clamp_max >> 16) & 0xFF) / 255.0f; + gl4ShaderUniforms.fog_clamp_max[1] = ((pvrrc.fog_clamp_max >> 8) & 0xFF) / 255.0f; + gl4ShaderUniforms.fog_clamp_max[2] = ((pvrrc.fog_clamp_max >> 0) & 0xFF) / 255.0f; + gl4ShaderUniforms.fog_clamp_max[3] = ((pvrrc.fog_clamp_max >> 24) & 0xFF) / 255.0f; + + if (fog_needs_update) + { + fog_needs_update = false; + UpdateFogTexture((u8 *)FOG_TABLE); + } + + glcache.UseProgram(gl4.modvol_shader.program); + + glUniform4fv( gl4.modvol_shader.scale, 1, gl4ShaderUniforms.scale_coefs); + + glUniform1f(gl4.modvol_shader.extra_depth_scale, gl4ShaderUniforms.extra_depth_scale); + + GLfloat td[4]={0.5,0,0,0}; + + glcache.UseProgram(gl4.OSD_SHADER.program); + glUniform4fv( gl4.OSD_SHADER.scale, 1, gl4ShaderUniforms.scale_coefs); + glUniform1f(gl4.OSD_SHADER.extra_depth_scale, 1.0f); + + gl4ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; + + GLuint output_fbo; + + //setup render target first + if (is_rtt) + { + GLuint channels,format; + switch(FB_W_CTRL.fb_packmode) + { + case 0: //0x0 0555 KRGB 16 bit (default) Bit 15 is the value of fb_kval[7]. + channels=GL_RGBA; + format=GL_UNSIGNED_BYTE; + break; + + case 1: //0x1 565 RGB 16 bit + channels=GL_RGB; + format=GL_UNSIGNED_SHORT_5_6_5; + break; + + case 2: //0x2 4444 ARGB 16 bit + channels=GL_RGBA; + format=GL_UNSIGNED_BYTE; + break; + + case 3://0x3 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold. + channels=GL_RGBA; + format=GL_UNSIGNED_BYTE; + break; + + case 4: //0x4 888 RGB 24 bit packed + case 5: //0x5 0888 KRGB 32 bit K is the value of fk_kval. + case 6: //0x6 8888 ARGB 32 bit + fprintf(stderr, "Unsupported render to texture format: %d\n", FB_W_CTRL.fb_packmode); + return false; + + case 7: //7 invalid + die("7 is not valid"); + break; + } + //printf("RTT packmode=%d stride=%d - %d,%d -> %d,%d\n", FB_W_CTRL.fb_packmode, FB_W_LINESTRIDE.stride * 8, + // FB_X_CLIP.min, FB_Y_CLIP.min, FB_X_CLIP.max, FB_Y_CLIP.max); + output_fbo = gl4BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format); + } + else + { +#if HOST_OS != OS_DARWIN + //Fix this in a proper way + glBindFramebuffer(GL_FRAMEBUFFER,0); +#endif + glViewport(0, 0, screen_width, screen_height); + output_fbo = 0; + } + + bool wide_screen_on = !is_rtt && settings.rend.WideScreen + && pvrrc.fb_X_CLIP.min == 0 + && (pvrrc.fb_X_CLIP.max + 1) / gl4_scale_x == 640 + && pvrrc.fb_Y_CLIP.min == 0 + && (pvrrc.fb_Y_CLIP.max + 1) / gl4_scale_y == 480; + + //Color is cleared by the background plane + + glcache.Disable(GL_SCISSOR_TEST); + + //move vertex to gpu + + if (!pvrrc.isRenderFramebuffer) + { + //Main VBO + glBindBuffer(GL_ARRAY_BUFFER, gl4.vbo.geometry); glCheck(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl4.vbo.idxs); glCheck(); + + glBufferData(GL_ARRAY_BUFFER,pvrrc.verts.bytes(),pvrrc.verts.head(),GL_STREAM_DRAW); glCheck(); + + glBufferData(GL_ELEMENT_ARRAY_BUFFER,pvrrc.idx.bytes(),pvrrc.idx.head(),GL_STREAM_DRAW); + + //Modvol VBO + if (pvrrc.modtrig.used()) + { + glBindBuffer(GL_ARRAY_BUFFER, gl4.vbo.modvols); glCheck(); + glBufferData(GL_ARRAY_BUFFER,pvrrc.modtrig.bytes(),pvrrc.modtrig.head(),GL_STREAM_DRAW); glCheck(); + } + + // TR PolyParam data + glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl4.vbo.tr_poly_params); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(struct PolyParam) * pvrrc.global_param_tr.used(), pvrrc.global_param_tr.head(), GL_STATIC_DRAW); + glCheck(); + + int offs_x=ds2s_offs_x+0.5f; + //this needs to be scaled + + //not all scaling affects pixel operations, scale to adjust for that + gl4_scale_x *= scissoring_scale_x; + + #if 0 + //handy to debug really stupid render-not-working issues ... + printf("SS: %dx%d\n", screen_width, screen_height); + printf("SCI: %d, %f\n", pvrrc.fb_X_CLIP.max, dc2s_scale_h); + printf("SCI: %f, %f, %f, %f\n", offs_x+pvrrc.fb_X_CLIP.min/gl4_scale_x,(pvrrc.fb_Y_CLIP.min/gl4_scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/gl4_scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/gl4_scale_y*dc2s_scale_h); + #endif + + if (!wide_screen_on) + { + float width = (pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1) / gl4_scale_x; + float height = (pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1) / gl4_scale_y; + float min_x = pvrrc.fb_X_CLIP.min / gl4_scale_x; + float min_y = pvrrc.fb_Y_CLIP.min / gl4_scale_y; + if (!is_rtt) + { + // Add x offset for aspect ratio > 4/3 + min_x = min_x * dc2s_scale_h + offs_x; + // Invert y coordinates when rendering to screen + min_y = screen_height - (min_y + height) * dc2s_scale_h; + width *= dc2s_scale_h; + height *= dc2s_scale_h; + + if (ds2s_offs_x > 0) + { + glcache.ClearColor(0.f, 0.f, 0.f, 0.f); + glcache.Enable(GL_SCISSOR_TEST); + glScissor(0, 0, ds2s_offs_x, screen_height); + glClear(GL_COLOR_BUFFER_BIT); + glScissor(screen_width - ds2s_offs_x, 0, ds2s_offs_x, screen_height); + glClear(GL_COLOR_BUFFER_BIT); + } + } + else if (settings.rend.RenderToTextureUpscale > 1 && !settings.rend.RenderToTextureBuffer) + { + min_x *= settings.rend.RenderToTextureUpscale; + min_y *= settings.rend.RenderToTextureUpscale; + width *= settings.rend.RenderToTextureUpscale; + height *= settings.rend.RenderToTextureUpscale; + } + + glScissor(min_x, min_y, width, height); + glcache.Enable(GL_SCISSOR_TEST); + } + + //restore scale_x + gl4_scale_x /= scissoring_scale_x; + gl4DrawStrips(output_fbo); + } + else + { + glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); + + glcache.ClearColor(0.f, 0.f, 0.f, 0.f); + glClear(GL_COLOR_BUFFER_BIT); + + DrawFramebuffer(dc_width, dc_height); + } + #if HOST_OS==OS_WINDOWS + //Sleep(40); //to test MT stability + #endif + + eglCheck(); + + KillTex=false; + + if (is_rtt) + ReadRTTBuffer(); + + return !is_rtt; +} + +#if !defined(_ANDROID) && !defined(TARGET_NACL32) +#if HOST_OS==OS_LINUX +#define SET_AFNT 1 +#endif +#endif + +void reshapeABuffer(int w, int h); + +struct gl4rend : Renderer +{ + bool Init() { return gles_init(); } + void Resize(int w, int h) + { + screen_width=w; + screen_height=h; + if (stencilTexId != 0) + { + glcache.DeleteTextures(1, &stencilTexId); + stencilTexId = 0; + } + if (depthTexId != 0) + { + glcache.DeleteTextures(1, &depthTexId); + depthTexId = 0; + } + if (opaqueTexId != 0) + { + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = 0; + } + if (depthSaveTexId != 0) + { + glcache.DeleteTextures(1, &depthSaveTexId); + depthSaveTexId = 0; + } + reshapeABuffer(w, h); + } + void Term() { } + + bool Process(TA_context* ctx) { return ProcessFrame(ctx); } + bool Render() { return RenderFrame(); } + + void Present() { gl_swap(); } + + void DrawOSD() { OSD_DRAW(); } + + virtual u32 GetTexture(TSP tsp, TCW tcw) { + return gl_GetTexture(tsp, tcw); + } +}; + +Renderer* rend_GL4() { return new gl4rend(); } diff --git a/core/rend/gl4/gltex.cpp b/core/rend/gl4/gltex.cpp new file mode 100644 index 000000000..40f986e14 --- /dev/null +++ b/core/rend/gl4/gltex.cpp @@ -0,0 +1,52 @@ +#include "gl4.h" +#include "glcache.h" + +GLuint gl4BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) +{ + FBT& rv=fb_rtt; + + if (rv.fbo) glDeleteFramebuffers(1,&rv.fbo); + if (rv.tex) glcache.DeleteTextures(1,&rv.tex); + + rv.TexAddr=addy>>3; + + // Find the smallest power of two texture that fits the viewport + int fbh2 = 2; + while (fbh2 < fbh) + fbh2 *= 2; + int fbw2 = 2; + while (fbw2 < fbw) + fbw2 *= 2; + + if (settings.rend.RenderToTextureUpscale > 1 && !settings.rend.RenderToTextureBuffer) + { + fbw *= settings.rend.RenderToTextureUpscale; + fbh *= settings.rend.RenderToTextureUpscale; + fbw2 *= settings.rend.RenderToTextureUpscale; + fbh2 *= settings.rend.RenderToTextureUpscale; + } + // Get the currently bound frame buffer object. On most platforms this just gives 0. + //glGetIntegerv(GL_FRAMEBUFFER_BINDING, &m_i32OriginalFbo); + + // Create a texture for rendering to + rv.tex = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, rv.tex); + + glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw2, fbh2, 0, channels, fmt, 0); + + // Create the object that will allow us to render to the aforementioned texture + glGenFramebuffers(1, &rv.fbo); + glBindFramebuffer(GL_FRAMEBUFFER, rv.fbo); + + // Attach the texture to the FBO + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rv.tex, 0); + + // Check that our FBO creation was successful + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + + glViewport(0, 0, fbw, fbh); // TODO CLIP_X/Y min? + + return rv.fbo; +} diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index d0c96f040..f6feec0b8 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -641,16 +641,25 @@ GLuint fogTextureId; } int attribs[] = - { - WGL_CONTEXT_MAJOR_VERSION_ARB, 3, - WGL_CONTEXT_MINOR_VERSION_ARB, 1, + { + WGL_CONTEXT_MAJOR_VERSION_ARB, 4, + WGL_CONTEXT_MINOR_VERSION_ARB, 3, WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, 0 - }; + }; HGLRC m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs); + if (!m_hrc) + { + printf("Open GL 4.3 not supported\n"); + // Try Gl 3.1 + attribs[1] = 3; + attribs[3] = 1; + m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs); + } + if (m_hrc) wglMakeCurrent(ourWindowHandleToDeviceContext,m_hrc); else @@ -811,6 +820,9 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) glBindAttribLocation(program, VERTEX_COL_BASE_ARRAY, "in_base"); glBindAttribLocation(program, VERTEX_COL_OFFS_ARRAY, "in_offs"); glBindAttribLocation(program, VERTEX_UV_ARRAY, "in_uv"); + glBindAttribLocation(program, VERTEX_COL_BASE1_ARRAY, "in_base1"); + glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1"); + glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1"); #ifndef GLES glBindFragDataLocation(program, 0, "FragColor"); @@ -1060,8 +1072,6 @@ bool gl_create_resources() return true; } -bool gl_init(void* wind, void* disp); - //swap buffers void gl_swap(); //destroy the gles context and free resources @@ -1076,7 +1086,6 @@ bool gl_create_resources(); bool gles_init() { - if (!gl_init((void*)libPvr_GetRenderTarget(), (void*)libPvr_GetRenderSurface())) return false; @@ -1092,6 +1101,11 @@ bool gles_init() #endif #endif + // glEnable(GL_DEBUG_OUTPUT); + // glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); + // glDebugMessageCallback(gl_DebugOutput, NULL); + // glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); + //clean up the buffer glcache.ClearColor(0.f, 0.f, 0.f, 0.f); glClear(GL_COLOR_BUFFER_BIT); @@ -1354,7 +1368,7 @@ static float LastFPSTime; static int lastFrameCount = 0; static float fps = -1; -static void OSD_HOOK() +void OSD_HOOK() { osd_base=pvrrc.verts.used(); osd_count=0; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 22a53f6aa..d549e73d3 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -42,6 +42,10 @@ #define VERTEX_COL_BASE_ARRAY 1 #define VERTEX_COL_OFFS_ARRAY 2 #define VERTEX_UV_ARRAY 3 +// OIT only +#define VERTEX_COL_BASE1_ARRAY 4 +#define VERTEX_COL_OFFS1_ARRAY 5 +#define VERTEX_UV1_ARRAY 6 #ifndef GL_UNSIGNED_INT_8_8_8_8 #define GL_UNSIGNED_INT_8_8_8_8 0x8035 @@ -121,6 +125,7 @@ struct gl_ctx extern gl_ctx gl; extern GLuint fbTextureId; +extern float fb_scale_x, fb_scale_y; GLuint gl_GetTexture(TSP tsp,TCW tcw); struct text_info { @@ -130,20 +135,27 @@ struct text_info { u32 textype; // 0 565, 1 1555, 2 4444 }; +bool gl_init(void* wind, void* disp); +void gl_swap(); + text_info raw_GetTexture(TSP tsp, TCW tcw); void CollectCleanup(); void DoCleanup(); void SortPParams(int first, int count); +void SetCull(u32 CullMode); void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); void RenderFramebuffer(); void DrawFramebuffer(float w, float h); +void OSD_HOOK(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping); +GLuint gl_CompileShader(const char* shader, GLuint type); +GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader); bool CompilePipelineShader(PipelineShader* s); #define TEXTURE_LOAD_ERROR 0 GLuint loadPNG(const string& subpath, int &width, int &height); @@ -197,3 +209,13 @@ extern struct ShaderUniforms_t } ShaderUniforms; +// Render to texture +struct FBT +{ + u32 TexAddr; + GLuint depthb,stencilb; + GLuint tex; + GLuint fbo; +}; + +extern FBT fb_rtt; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 40d309e77..02cdebdcf 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -573,14 +573,6 @@ typedef map::iterator TexCacheIter; TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw); -struct FBT -{ - u32 TexAddr; - GLuint depthb,stencilb; - GLuint tex; - GLuint fbo; -}; - FBT fb_rtt; void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) @@ -594,7 +586,7 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) rv.TexAddr=addy>>3; - // Find the largest square power of two texture that fits into the viewport + // Find the smallest power of two texture that fits into the viewport int fbh2 = 2; while (fbh2 < fbh) fbh2 *= 2;