diff --git a/Makefile.wiiu b/Makefile.wiiu index 789b6c9fcc..453d22a9be 100644 --- a/Makefile.wiiu +++ b/Makefile.wiiu @@ -6,7 +6,6 @@ DEBUG = 0 GRIFFIN_BUILD = 0 SALAMANDER_BUILD = 0 WHOLE_ARCHIVE_LINK = 0 -HAVE_HID = 0 WIIU_HID = 0 BUILD_DIR = objs/wiiu PC_DEVELOPMENT_IP_ADDRESS ?= @@ -15,6 +14,8 @@ PC_DEVELOPMENT_TCP_PORT ?= ifeq ($(SALAMANDER_BUILD),1) BUILD_DIR := $(BUILD_DIR)-salamander TARGET := $(TARGET)_salamander +else ifeq ($(GRIFFIN_BUILD),1) + BUILD_DIR := $(BUILD_DIR)-griffin endif ifeq ($(DEBUG),1) @@ -70,12 +71,14 @@ else DEFINES += -DHAVE_UPDATE_ASSETS DEFINES += -DHAVE_FILTERS_BUILTIN DEFINES += -DHAVE_SLANG + DEFINES += -DHAVE_SHADERPIPELINE OBJ += wiiu/system/missing_libc_functions.o OBJ += wiiu/shader_utils.o OBJ += wiiu/tex_shader.o OBJ += wiiu/sprite_shader.o OBJ += wiiu/frame_shader.o + OBJ += wiiu/ribbon_shader.o OBJ += gfx/drivers_shader/slang_preprocess.o OBJ += gfx/drivers_shader/glslang_util.o @@ -87,6 +90,7 @@ else # DEFINES += -DWANT_IFADDRS # DEFINES += -DHAVE_FREETYPE DEFINES += -DHAVE_XMB -DHAVE_MATERIALUI +# DEFINES += -DHAVE_HID else HAVE_MENU_COMMON = 1 HAVE_RTGA = 1 @@ -112,6 +116,7 @@ else HAVE_OVERLAY = 1 HAVE_STATIC_VIDEO_FILTERS = 1 HAVE_STATIC_AUDIO_FILTERS = 1 +# HAVE_HID = 1 WANT_LIBFAT = 1 WANT_IOSUHAX = 1 diff --git a/gfx/common/gx2_common.h b/gfx/common/gx2_common.h index f8a222bab7..6412870379 100644 --- a/gfx/common/gx2_common.h +++ b/gfx/common/gx2_common.h @@ -1,7 +1,11 @@ +#pragma once + #include #include "wiiu/frame_shader.h" +#include "wiiu/tex_shader.h" #include "wiiu/sprite_shader.h" +#include "wiiu/ribbon_shader.h" #include "gfx/video_shader_parse.h" #undef _X @@ -70,12 +74,21 @@ typedef struct int width; int height; + float* menu_display_coord_array; + ribbon_uniform_t* ribbon_ubo; + struct { sprite_vertex_t* v; int size; int current; } vertex_cache; + struct + { + tex_shader_vertex_t* v; + int size; + int current; + } vertex_cache_tex; void* drc_scan_buffer; void* tv_scan_buffer; diff --git a/gfx/drivers/wiiu_gfx.c b/gfx/drivers/wiiu_gfx.c index 04c886f087..0f0c3d3389 100644 --- a/gfx/drivers/wiiu_gfx.c +++ b/gfx/drivers/wiiu_gfx.c @@ -284,7 +284,9 @@ static void *wiiu_gfx_init(const video_info_t *video, GX2SetCullOnlyControl(GX2_FRONT_FACE_CCW, GX2_DISABLE, GX2_DISABLE); GX2InitShader(&frame_shader); + GX2InitShader(&tex_shader); GX2InitShader(&sprite_shader); + GX2InitShader(&ribbon_shader); GX2SetShader(&frame_shader); wiiu->ubo_vp = MEM1_alloc(sizeof(*wiiu->ubo_vp), GX2_UNIFORM_BLOCK_ALIGNMENT); @@ -361,6 +363,11 @@ static void *wiiu_gfx_init(const video_info_t *video, wiiu->vertex_cache.v = MEM2_alloc(wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v), GX2_VERTEX_BUFFER_ALIGNMENT); + wiiu->vertex_cache_tex.size = 0x1000; + wiiu->vertex_cache_tex.current = 0; + wiiu->vertex_cache_tex.v = MEM2_alloc(wiiu->vertex_cache_tex.size + * sizeof(*wiiu->vertex_cache_tex.v), GX2_VERTEX_BUFFER_ALIGNMENT); + /* Initialize samplers */ for (int i = 0; i < RARCH_WRAP_MAX; i++) { @@ -656,7 +663,9 @@ static void wiiu_gfx_free(void *data) GX2SetDRCEnable(GX2_DISABLE); GX2DestroyShader(&frame_shader); + GX2DestroyShader(&tex_shader); GX2DestroyShader(&sprite_shader); + GX2DestroyShader(&ribbon_shader); wiiu_free_shader_preset(wiiu); #ifdef HAVE_OVERLAY @@ -670,6 +679,9 @@ static void wiiu_gfx_free(void *data) MEM2_free(wiiu->v); MEM2_free(wiiu->menu.v); MEM2_free(wiiu->vertex_cache.v); + MEM2_free(wiiu->vertex_cache_tex.v); + MEM2_free(wiiu->menu_display_coord_array); + MEM2_free(wiiu->ribbon_ubo); MEM1_free(wiiu->color_buffer.surface.image); MEM1_free(wiiu->ubo_vp); @@ -1293,6 +1305,7 @@ static bool wiiu_gfx_frame(void *data, const void *frame, } wiiu->vertex_cache.current = 0; + wiiu->vertex_cache_tex.current = 0; GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v), sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v); GX2SetPixelSampler(&wiiu->sampler_linear[RARCH_WRAP_EDGE], @@ -1310,6 +1323,8 @@ static bool wiiu_gfx_frame(void *data, const void *frame, GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->vertex_cache.v, wiiu->vertex_cache.current * sizeof(*wiiu->vertex_cache.v)); + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, + wiiu->vertex_cache_tex.v, wiiu->vertex_cache_tex.current * sizeof(*wiiu->vertex_cache_tex.v)); if (wiiu->menu.enable) GX2DrawDone(); diff --git a/menu/drivers_display/menu_display_wiiu.c b/menu/drivers_display/menu_display_wiiu.c index 514ce091f0..c62c7ba430 100644 --- a/menu/drivers_display/menu_display_wiiu.c +++ b/menu/drivers_display/menu_display_wiiu.c @@ -26,6 +26,7 @@ #include "gfx/font_driver.h" #include "gfx/video_driver.h" #include "gfx/common/gx2_common.h" +#include "wiiu/system/memory.h" #include "wiiu/wiiu_dbg.h" static const float *menu_display_wiiu_get_default_vertices(void) @@ -61,7 +62,6 @@ static void menu_display_wiiu_viewport(void *data) static void menu_display_wiiu_draw(void *data) { - GX2Texture *texture = NULL; wiiu_video_t *wiiu = (wiiu_video_t*)video_driver_get_ptr(false); menu_display_ctx_draw_t *draw = (menu_display_ctx_draw_t*)data; @@ -69,69 +69,173 @@ static void menu_display_wiiu_draw(void *data) if (!wiiu || !draw) return; - texture = (GX2Texture*)draw->texture; - - if (!texture) - return; - - if (wiiu->vertex_cache.current + 4 > wiiu->vertex_cache.size) - return; - - sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; - - if(draw->coords->vertex && draw->coords->vertices == 4) + if(draw->pipeline.id) { - v->pos.x = MIN(MIN(MIN(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]); - v->pos.y = 1.0 - MAX(MAX(MAX(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]); - v->pos.width = MAX(MAX(MAX(draw->coords->vertex[0], draw->coords->vertex[2]), draw->coords->vertex[4]), draw->coords->vertex[6]) - v->pos.x; - v->pos.height = 1.0 - MIN(MIN(MIN(draw->coords->vertex[1], draw->coords->vertex[3]), draw->coords->vertex[5]), draw->coords->vertex[7]) - v->pos.y; - v->pos.x *= wiiu->color_buffer.surface.width; - v->pos.y *= wiiu->color_buffer.surface.height; - v->pos.width *= wiiu->color_buffer.surface.width; - v->pos.height *= wiiu->color_buffer.surface.height; + if(draw->pipeline.id != VIDEO_SHADER_MENU) + return; + + GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_BLOCK); + GX2SetShader(&ribbon_shader); + GX2SetVertexUniformBlock(ribbon_shader.vs.uniformBlocks[0].offset, + ribbon_shader.vs.uniformBlocks[0].size, + wiiu->ribbon_ubo); + GX2SetAttribBuffer(0, draw->coords->vertices * 2 * sizeof(float), 2 * sizeof(float), wiiu->menu_display_coord_array); + GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_ONE, + GX2_BLEND_COMBINE_MODE_ADD, GX2_DISABLE, 0, 0, 0); + + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, draw->coords->vertices, 0, 1); + + GX2SetBlendControl(GX2_RENDER_TARGET_0, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, + GX2_BLEND_COMBINE_MODE_ADD, + GX2_ENABLE, GX2_BLEND_MODE_SRC_ALPHA, GX2_BLEND_MODE_INV_SRC_ALPHA, + GX2_BLEND_COMBINE_MODE_ADD); + } + else if(draw->coords->vertex || draw->coords->color[0] != draw->coords->color[12]) + { + if (wiiu->vertex_cache_tex.current + 4 > wiiu->vertex_cache_tex.size) + return; + + + tex_shader_vertex_t* v = wiiu->vertex_cache_tex.v + wiiu->vertex_cache_tex.current; + + + GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_BLOCK); + GX2SetShader(&tex_shader); + GX2SetVertexUniformBlock(tex_shader.vs.uniformBlocks[0].offset, + tex_shader.vs.uniformBlocks[0].size, + wiiu->ubo_mvp); + GX2SetAttribBuffer(0, wiiu->vertex_cache_tex.size * sizeof(*wiiu->vertex_cache_tex.v), + sizeof(*wiiu->vertex_cache_tex.v), wiiu->vertex_cache_tex.v); + + if(!draw->coords->vertex) + { + v[0].pos.x = 0.0f; + v[0].pos.y = 1.0f; + v[1].pos.x = 1.0f; + v[1].pos.y = 1.0f; + v[2].pos.x = 0.0f; + v[2].pos.y = 0.0f; + v[3].pos.x = 1.0f; + v[3].pos.y = 0.0f; + } + else + { + v[0].pos.x = draw->coords->vertex[0]; + v[0].pos.y = 1.0 - draw->coords->vertex[1]; + v[1].pos.x = draw->coords->vertex[2]; + v[1].pos.y = 1.0 - draw->coords->vertex[3]; + v[2].pos.x = draw->coords->vertex[4]; + v[2].pos.y = 1.0 - draw->coords->vertex[5]; + v[3].pos.x = draw->coords->vertex[6]; + v[3].pos.y = 1.0 - draw->coords->vertex[7]; + } + + if(!draw->coords->tex_coord) + { + v[0].coord.u = 0.0f; + v[0].coord.v = 1.0f; + v[1].coord.u = 1.0f; + v[1].coord.v = 1.0f; + v[2].coord.u = 0.0f; + v[2].coord.v = 0.0f; + v[3].coord.u = 1.0f; + v[3].coord.v = 0.0f; + } + else + { + v[0].coord.u = draw->coords->tex_coord[0]; + v[0].coord.v = draw->coords->tex_coord[1]; + v[1].coord.u = draw->coords->tex_coord[2]; + v[1].coord.v = draw->coords->tex_coord[3]; + v[2].coord.u = draw->coords->tex_coord[4]; + v[2].coord.v = draw->coords->tex_coord[5]; + v[3].coord.u = draw->coords->tex_coord[6]; + v[3].coord.v = draw->coords->tex_coord[7]; + } + + for(int i = 0; i < 4; i++) + { + v[i].color.r = draw->coords->color[(i << 2) + 0]; + v[i].color.g = draw->coords->color[(i << 2) + 1]; + v[i].color.b = draw->coords->color[(i << 2) + 2]; + v[i].color.a = draw->coords->color[(i << 2) + 3]; + } + + + if(draw->texture) + GX2SetPixelTexture((GX2Texture*)draw->texture, tex_shader.ps.samplerVars[0].location); + + GX2DrawEx(GX2_PRIMITIVE_MODE_TRIANGLE_STRIP, 4, wiiu->vertex_cache_tex.current, 1); + wiiu->vertex_cache_tex.current += 4; } else { + if (wiiu->vertex_cache.current + 1 > wiiu->vertex_cache.size) + return; + + sprite_vertex_t* v = wiiu->vertex_cache.v + wiiu->vertex_cache.current; v->pos.x = draw->x; v->pos.y = wiiu->color_buffer.surface.height - draw->y - draw->height; v->pos.width = draw->width; v->pos.height = draw->height; - } - if(draw->coords->tex_coord && draw->coords->vertices == 4) - { - v->coord.u = MIN(MIN(MIN(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]); - v->coord.v = MIN(MIN(MIN(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]); - v->coord.width = MAX(MAX(MAX(draw->coords->tex_coord[0], draw->coords->tex_coord[2]), draw->coords->tex_coord[4]), draw->coords->tex_coord[6]) - v->coord.u; - v->coord.height = MAX(MAX(MAX(draw->coords->tex_coord[1], draw->coords->tex_coord[3]), draw->coords->tex_coord[5]), draw->coords->tex_coord[7]) - v->coord.v; - } - else - { v->coord.u = 0.0f; v->coord.v = 0.0f; v->coord.width = 1.0f; v->coord.height = 1.0f; + + v->color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1], + 0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]); + + if(draw->texture) + GX2SetPixelTexture((GX2Texture*)draw->texture, sprite_shader.ps.samplerVars[0].location); + + GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, wiiu->vertex_cache.current, 1); + wiiu->vertex_cache.current ++; + return; } - v->color = COLOR_RGBA(0xFF * draw->coords->color[0], 0xFF * draw->coords->color[1], - 0xFF * draw->coords->color[2], 0xFF * draw->coords->color[3]); - GX2SetPixelTexture(texture, sprite_shader.ps.samplerVars[0].location); - - GX2DrawEx(GX2_PRIMITIVE_MODE_POINTS, 1, wiiu->vertex_cache.current, 1); - -#if 0 - printf("(%i,%i,%i,%i) , (%i,%i)\n", (int)draw->x, - (int)draw->y, (int)draw->width, (int)draw->height, - texture->surface.width, texture->surface.height); -#endif - - wiiu->vertex_cache.current ++; + GX2SetShaderMode(GX2_SHADER_MODE_GEOMETRY_SHADER); + GX2SetShader(&sprite_shader); +// GX2SetGeometryShaderInputRingBuffer(wiiu->input_ring_buffer, wiiu->input_ring_buffer_size); +// GX2SetGeometryShaderOutputRingBuffer(wiiu->output_ring_buffer, wiiu->output_ring_buffer_size); + GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[0].offset, + sprite_shader.vs.uniformBlocks[0].size, + wiiu->ubo_vp); + GX2SetVertexUniformBlock(sprite_shader.vs.uniformBlocks[1].offset, + sprite_shader.vs.uniformBlocks[1].size, + wiiu->ubo_tex); + GX2SetAttribBuffer(0, wiiu->vertex_cache.size * sizeof(*wiiu->vertex_cache.v), + sizeof(*wiiu->vertex_cache.v), wiiu->vertex_cache.v); } static void menu_display_wiiu_draw_pipeline(void *data) { + menu_display_ctx_draw_t *draw = (menu_display_ctx_draw_t*)data; + wiiu_video_t *wiiu = (wiiu_video_t*)video_driver_get_ptr(false); + + video_coord_array_t *ca = NULL; + + if (!wiiu || !draw || draw->pipeline.id != VIDEO_SHADER_MENU) + return; + + ca = menu_display_get_coords_array(); + if(!wiiu->menu_display_coord_array) + { + wiiu->menu_display_coord_array = MEM2_alloc(ca->coords.vertices * 2 * sizeof(float), GX2_VERTEX_BUFFER_ALIGNMENT); + memcpy(wiiu->menu_display_coord_array, ca->coords.vertex, ca->coords.vertices * 2 * sizeof(float)); + wiiu->ribbon_ubo = MEM2_alloc(sizeof(*wiiu->ribbon_ubo), GX2_UNIFORM_BLOCK_ALIGNMENT); + wiiu->ribbon_ubo->time = 0.0f; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_ATTRIBUTE_BUFFER, wiiu->menu_display_coord_array, ca->coords.vertices * 2 * sizeof(float)); + } + + draw->coords->vertex = wiiu->menu_display_coord_array; + draw->coords->vertices = ca->coords.vertices; + + wiiu->ribbon_ubo->time += 0.01; + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_UNIFORM_BLOCK, wiiu->ribbon_ubo, sizeof(*wiiu->ribbon_ubo)); } static void menu_display_wiiu_restore_clear_color(void) diff --git a/wiiu/gx2_shader_inl.h b/wiiu/gx2_shader_inl.h index 08cf3c2c88..8fce718ab9 100644 --- a/wiiu/gx2_shader_inl.h +++ b/wiiu/gx2_shader_inl.h @@ -111,6 +111,7 @@ #define _w 3 #define _0 4 #define _1 5 +#define _m 7 /*mask*/ #define _xyzw 0b1111 #define _xy__ 0b0011 @@ -118,6 +119,10 @@ #define GX2_COMP_SEL(c0, c1, c2, c3) (((c0) << 24) | ((c1) << 16) | ((c2) << 8) | (c3)) #define ALU_LITERAL(v) to_QWORD(to_LE(v), 0) +#define ALU_LITERAL2(v0,v1) to_QWORD(to_LE(v0), to_LE(v1)) +#define ALU_LITERAL3(v0,v1,v2) ALU_LITERAL2(v0,v1),ALU_LITERAL(v2) +#define ALU_LITERAL4(v0,v1,v2,v3) ALU_LITERAL2(v0,v1),ALU_LITERAL2(v2,v3) +#define ALU_LITERAL5(v0,v1,v2,v3,v5) ALU_LITERAL4(v0,v1,v2,v3),ALU_LITERAL(v4) /* SRCx_SEL special constants */ #define ALU_SRC_1_DBL_L 0xF4 @@ -195,11 +200,17 @@ #define CF_INST_EMIT_VERTEX 0x15 #define CF_INST_MEM_RING 0x26 /* ALU */ -#define OP2_INST_ADD 0x0 -#define OP2_INST_MUL 0x1 -#define OP2_INST_MUL_IEEE 0x2 -#define OP2_INST_MOV 0x19 -#define OP2_INST_RECIP_IEEE 0x66 +#define OP2_INST_ADD 0x0 +#define OP2_INST_MUL 0x1 +#define OP2_INST_MUL_IEEE 0x2 +#define OP2_INST_FRACT 0x10 +#define OP2_INST_FLOOR 0x14 +#define OP2_INST_MOV 0x19 +#define OP2_INST_DOT4_IEEE 0x51 +#define OP2_INST_RECIP_IEEE 0x66 +#define OP2_INST_RECIPSQRT_IEEE 0x69 +#define OP2_INST_SIN 0x6E +#define OP2_INST_COS 0x6F #define OP3_INST_MULADD 0x10 /* EXP */ @@ -207,7 +218,9 @@ #define CF_INST_EXP_DONE 0x28 /* TEX */ -#define TEX_INST_SAMPLE 0x10 +#define TEX_INST_GET_GRADIENTS_H 0x07 +#define TEX_INST_GET_GRADIENTS_V 0x08 +#define TEX_INST_SAMPLE 0x10 /* VTX */ #define VTX_INST_FETCH 0x0 @@ -298,35 +311,67 @@ to_QWORD(ALU_WORD0(src0Sel, 0x0, src0Chan, 0x0, src1Sel, 0x0, src1Chan, 0x0, 0x0, 0x0), \ ALU_WORD1_OP3(src2Sel, 0x0, src2Chan, 0x0, inst, 0x0, dstGpr, 0x0, dstChan, 0x0)) -#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \ - ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) - -#define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \ - ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2) - -#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ - ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) - -#define ALU_MULADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \ - ALU_OP3(OP3_INST_MULADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) - -#define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ - ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) - #define ALU_ADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) #define ALU_ADD_x2(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ ALU_OP2(OP2_INST_ADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_M2) +#define ALU_MUL(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_MUL, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) + +#define ALU_MUL_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_MUL_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) + +#define ALU_FRACT(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_FRACT, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_FLOOR(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_FLOOR, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_MOV(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_MOV_x2(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M2) + +#define ALU_MOV_x4(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_MOV, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_M4) + +#define ALU_DOT4_IEEE(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan) \ + ALU_OP2(OP2_INST_DOT4_IEEE, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, ALU_OMOD_OFF) + #define ALU_RECIP_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \ ALU_OP2(OP2_INST_RECIP_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) +#define ALU_RECIPSQRT_IEEE(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_RECIPSQRT_IEEE, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_SIN(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_SIN, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_COS(dstGpr, dstChan, src0Sel, src0Chan) \ + ALU_OP2(OP2_INST_COS, dstGpr, dstChan, src0Sel, src0Chan, ALU_SRC_0, 0x0, ALU_OMOD_OFF) + +#define ALU_MULADD(dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) \ + ALU_OP3(OP3_INST_MULADD, dstGpr, dstChan, src0Sel, src0Chan, src1Sel, src1Chan, src2Sel, src2Chan) + #define TEX_SAMPLE(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ to_QWORD(TEX_WORD0(TEX_INST_SAMPLE, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _0, _x), 0x00000000) +#define TEX_GET_GRADIENTS_H(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ + to_QWORD(TEX_WORD0(TEX_INST_GET_GRADIENTS_H, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ + TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ + to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _z, _x), 0x00000000) + +#define TEX_GET_GRADIENTS_V(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, srcSelY, srcSelZ, srcSelW, resourceID, samplerID)\ + to_QWORD(TEX_WORD0(TEX_INST_GET_GRADIENTS_V, 0x0, 0x0, resourceID, srcReg, 0x0, 0x0), \ + TEX_WORD1(dstReg, 0x0, dstSelX, dstSelY, dstSelZ, dstSelW, 0x0, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED, TEX_NORMALIZED)), \ + to_QWORD(TEX_WORD2(0x0, 0x0, 0x0, samplerID, _x, _y, _z, _x), 0x00000000) + + #define VTX_FETCH(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW, srcReg, srcSelX, buffer_id, type, mega, offset) \ to_QWORD(VTX_WORD0(VTX_INST_FETCH, type, buffer_id, srcReg, srcSelX, mega), VTX_WORD1(dstReg, dstSelX, dstSelY, dstSelZ, dstSelW)) , \ to_QWORD(VTX_WORD2(offset, (mega >> 31)), 0x00000000) diff --git a/wiiu/ribbon_shader.c b/wiiu/ribbon_shader.c new file mode 100644 index 0000000000..a5d768f481 --- /dev/null +++ b/wiiu/ribbon_shader.c @@ -0,0 +1,389 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2014-2018 - Ali Bouhlel + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +#include +#include +#include +#include +#include "frame_shader.h" +#include "gx2_shader_inl.h" + +__attribute__((aligned(GX2_SHADER_ALIGNMENT))) +static struct +{ + u64 cf[32]; + u64 alu[123 + 51]; +} vs_program = +{ + { + CALL_FS NO_BARRIER, + ALU(32, 123) KCACHE0(CB1, _0_15), + ALU(155, 51), + EXP_DONE(POS0, _R7, _x, _y, _z, _w), + EXP_DONE(PARAM0, _R7, _x, _y, _z, _w) NO_BARRIER + END_OF_PROGRAM + END_OF_PROGRAM + }, + { + ALU_MOV(_R7,_x, _R1,_x), + ALU_MUL_IEEE(__,_y, KC0(0),_x, ALU_SRC_LITERAL,_x), + ALU_MUL_IEEE(__,_z, KC0(0),_x, ALU_SRC_LITERAL,_y), + ALU_MUL_IEEE(__,_w, KC0(0),_x, ALU_SRC_LITERAL,_z), + ALU_MUL_IEEE(_R127,_w, KC0(0),_x, ALU_SRC_0_5,_x) + ALU_LAST, + ALU_LITERAL3(0x3E4CCCCD,0x3C23D70A,0x3DCCCCCD), + ALU_ADD(__,_x, ALU_SRC_PV _NEG,_z, ALU_SRC_0, _x), + ALU_ADD(__,_y, _R1,_y, ALU_SRC_PV _NEG ,_w), + ALU_MOV_x2(__,_z, ALU_SRC_PV,_x), + ALU_ADD(__,_w, ALU_SRC_PV,_x, ALU_SRC_PV _NEG,_y), + ALU_ADD(__,__, _R1,_y, ALU_SRC_PV,_w) + ALU_LAST, + ALU_MUL(_R127,_x, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_x), + ALU_MUL(__,_y, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_x), + ALU_MUL_IEEE(__,_z, ALU_SRC_PV,_w, ALU_SRC_LITERAL,_y), + ALU_ADD(_R126,_w, _R127 _NEG,_w, ALU_SRC_PV,_z), + ALU_ADD(_R127,_z, _R7,_x, ALU_SRC_PS,_x) + ALU_LAST, + ALU_LITERAL2(0x40E00000,0x3E800000), + ALU_FLOOR(__,_x, ALU_SRC_PV,_y), + ALU_FLOOR(__,_y, ALU_SRC_PV,_x), + ALU_MUL(__,_z, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_x), + ALU_FRACT(_R127,_w, ALU_SRC_PV,_y), + ALU_MOV_x4(_R124,_x, _R1,_y) + ALU_LAST, + ALU_LITERAL(0x40E00000), + ALU_MUL(__,_x, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_x), + ALU_MUL(_R127,_y, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_y), + ALU_FRACT(__,_z, ALU_SRC_PV,_z), + ALU_FLOOR(__,_w, ALU_SRC_PV,_z), + ALU_MOV_x2(__,__, ALU_SRC_PV,_w) + ALU_LAST, + ALU_LITERAL2(0x42640000,0x42E20000), + ALU_MUL(_R125,_x, ALU_SRC_PV,_z, ALU_SRC_PV,_z), + ALU_MOV_x2(__,_y, ALU_SRC_PV,_z), + ALU_FRACT(_R124,_z, _R127,_x), + ALU_ADD(__,_w, ALU_SRC_PV,_w, ALU_SRC_PV,_x), + ALU_ADD(_R124,_y, ALU_SRC_PS _NEG,_x, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL(0x40400000), + ALU_ADD(_R126,_x, ALU_SRC_PV _NEG,_y, ALU_SRC_LITERAL,_x), + ALU_MUL(_R0,_y, _R127,_w, _R127,_w), + ALU_ADD(_R127,_z, _R127,_y, ALU_SRC_PV,_w), + ALU_MULADD(_R0,_w, _R126,_w, ALU_SRC_LITERAL,_y, ALU_SRC_0_5,_x) VEC_120, + ALU_MULADD(_R2,_y, _R127,_z, ALU_SRC_LITERAL,_y, ALU_SRC_0_5,_x) + ALU_LAST, + ALU_LITERAL2(0x40400000,0x3E22F983), + ALU_ADD(__,_x, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_x), + ALU_ADD(_R127,_y, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_y), + ALU_ADD(__,_z, ALU_SRC_PV,_z, ALU_SRC_1,_x), + ALU_ADD(_R127,_w, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_z), + ALU_ADD(_R127,_x, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_w) + ALU_LAST, + ALU_LITERAL4(0x42640000,0x42680000,0x42E20000,0x42E40000), + ALU_ADD(__,_x, _R127,_z, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R126,_y, _R127,_z, ALU_SRC_LITERAL,_y, ALU_SRC_0_5,_x), + ALU_ADD(__,_z, _R127,_z, ALU_SRC_LITERAL,_z), + ALU_MULADD(_R126,_w, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_y, ALU_SRC_0_5,_x), + ALU_MULADD(_R125,_w, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_y, ALU_SRC_0_5,_x) + ALU_LAST, + ALU_LITERAL3(0x432B0000,0x3E22F983,0x432A0000), + ALU_MULADD(_R123,_x, _R127,_w, ALU_SRC_LITERAL,_x, ALU_SRC_0_5,_x), + ALU_MULADD(_R127,_y, _R127,_x, ALU_SRC_LITERAL,_x, ALU_SRC_0_5,_x), + ALU_MULADD(_R127,_z, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_x, ALU_SRC_0_5,_x), + ALU_MULADD(_R123,_w, _R127,_y, ALU_SRC_LITERAL,_x, ALU_SRC_0_5,_x), + ALU_MULADD(_R125,_y, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_x, ALU_SRC_0_5,_x) + ALU_LAST, + ALU_LITERAL(0x3E22F983), + ALU_FRACT(__,_x, _R126,_y), + ALU_FRACT(__,_y, _R126,_w), + ALU_FRACT(_R126,_z, _R125,_w) VEC_120, + ALU_FRACT(_R126,_w, ALU_SRC_PV,_w), + ALU_FRACT(_R125,_z, ALU_SRC_PV,_x) + ALU_LAST, + ALU_FRACT(__,_x, _R127,_z), + ALU_FRACT(_R127,_y, _R125,_y), + ALU_FRACT(__,_z, _R127,_y) VEC_120, + ALU_MULADD(_R125,_w, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R127,_w, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL2(0xC0490FDB,0x40C90FDB), + ALU_MULADD(_R123,_x, _R126,_w, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R123,_y, _R126,_z, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R126,_z, _R125,_z, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x) VEC_120, + ALU_MULADD(_R126,_w, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R124,_w, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL2(0xC0490FDB,0x40C90FDB), + ALU_MUL(_R127,_x, _R127,_w, ALU_SRC_LITERAL,_x), + ALU_MUL(_R127,_y, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_x), + ALU_MUL(__,_z, _R125,_w, ALU_SRC_LITERAL,_x) VEC_120, + ALU_MULADD(_R123,_w, _R127,_y, ALU_SRC_LITERAL,_z, ALU_SRC_LITERAL,_y), + ALU_MUL(_R0,_x, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL3(0x3E22F983,0xC0490FDB,0x40C90FDB), + ALU_MUL(_R2,_x, _R126,_z, ALU_SRC_LITERAL,_x), + ALU_MUL(_R3,_y, _R126,_w, ALU_SRC_LITERAL,_x), + ALU_MUL(_R126,_z, _R124,_w, ALU_SRC_LITERAL,_x) VEC_120, + ALU_MUL(_R126,_w, ALU_SRC_PV,_w, ALU_SRC_LITERAL,_x), + ALU_SIN(__,__, ALU_SRC_PV,_z) SCL_210 + ALU_LAST, + ALU_LITERAL(0x3E22F983), + ALU_MUL(__,_x, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_MUL(_R6,_y, _R126,_x, _R125,_x), + ALU_MOV_x2(_R0,_z, _R124,_z), + ALU_MUL(_R2,_w, _R124,_y, _R0,_y), + ALU_SIN(__,__, _R127,_x) SCL_210 + ALU_LAST, + ALU_LITERAL(0x472AEE8C), + ALU_MUL(__,_x, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R123,_y, _R124,_x, ALU_SRC_LITERAL,_y, ALU_SRC_0_5,_x), + ALU_FRACT(_R125,_z, _R0,_w), + ALU_FRACT(_R124,_w, ALU_SRC_PV,_x), + ALU_SIN(__,__, _R127,_y) SCL_210 + ALU_LAST, + ALU_LITERAL2(0x472AEE8C,0x3E22F983), + ALU_FRACT(_R3,_x, _R2,_y), + ALU_FRACT(_R0,_y, ALU_SRC_PV,_y), + ALU_MUL(__,_z, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_FRACT(__,_w, ALU_SRC_PV,_x), + ALU_SIN(__,__, _R0,_x) SCL_210 + ALU_LAST, + ALU_LITERAL(0x472AEE8C), + ALU_ADD(__,_x, _R124 _NEG,_w, ALU_SRC_PV,_w), + ALU_FRACT(_R5,_y, ALU_SRC_PV,_z), + ALU_MUL(__,_z, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R1,_w, _R125,_z, ALU_SRC_LITERAL,_z,ALU_SRC_LITERAL,_y), + ALU_SIN(__,__, _R126,_z) SCL_210 + ALU_LAST, + ALU_LITERAL3(0x472AEE8C,0xC0490FDB,0x40C90FDB), + ALU_MUL(_R0,_x, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_FRACT(_R4,_y, ALU_SRC_PV,_z), + ALU_MULADD(_R1,_z, ALU_SRC_PV,_x, _R6,_y, _R124,_w) VEC_021, + ALU_MUL(_R0,_w, _R124,_z, _R124,_z), + ALU_SIN(_R2,_y, _R126,_w) SCL_210 + ALU_LAST, + ALU_LITERAL(0x472AEE8C), + ALU_MUL(__,_x, _R2,_y, ALU_SRC_LITERAL,_x), + ALU_ADD(__,_y, _R0 _NEG,_z, ALU_SRC_LITERAL,_y), + ALU_MULADD(_R124,_z, _R0,_y, ALU_SRC_LITERAL,_w, ALU_SRC_LITERAL,_z) VEC_120, + ALU_FRACT(_R126,_w, _R0,_x), + ALU_SIN(__,__, _R3,_y) SCL_210 + ALU_LAST, + ALU_LITERAL4(0x472AEE8C,0x40400000,0xC0490FDB,0x40C90FDB), + ALU_ADD(__,_x, _R5 _NEG,_y, _R4,_y), + ALU_MUL(_R125,_y, ALU_SRC_PV,_y, _R0,_w), + ALU_MUL(__,_z, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_FRACT(__,_w, ALU_SRC_PV,_x), + ALU_SIN(__,__, _R2,_x) SCL_210 + ALU_LAST, + ALU_LITERAL(0x472AEE8C), + ALU_ADD(__,_x, _R126 _NEG,_w, ALU_SRC_PV,_w), + ALU_FRACT(_R127,_y, ALU_SRC_PV,_z), + ALU_MUL(__,_z, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_x), + ALU_MULADD(_R123,_w, ALU_SRC_PV,_x, _R6,_y, _R5,_y), + ALU_MUL(__,__, _R1,_w, ALU_SRC_LITERAL,_y) + ALU_LAST, + ALU_LITERAL2(0x472AEE8C,0x3E22F983), + ALU_MULADD(_R124,_x, ALU_SRC_PV,_x, _R6,_y, _R126,_w), + ALU_FRACT(_R124,_y, ALU_SRC_PV,_z), + ALU_ADD(__,_z, _R1 _NEG,_z, ALU_SRC_PV,_w), + ALU_MULADD(_R123,_w, _R3,_x, ALU_SRC_LITERAL,_y, ALU_SRC_LITERAL,_x), + ALU_COS(__,__, ALU_SRC_PS,_x) SCL_210 + ALU_LAST, + ALU_LITERAL2(0xC0490FDB,0x40C90FDB), + ALU_ADD(__,_x, ALU_SRC_PV _NEG,_y, _R127,_y), + ALU_MULADD(_R127,_y, ALU_SRC_PV,_z, _R2,_w, _R1,_z), + ALU_MUL(_R124,_z, _R124,_z, ALU_SRC_LITERAL,_x), + ALU_MUL(__,_w, ALU_SRC_PV,_w, ALU_SRC_LITERAL,_x), + ALU_MUL_IEEE(_R124,_w, ALU_SRC_PS,_x, ALU_SRC_LITERAL,_y) + ALU_LAST, + ALU_LITERAL2(0x3E22F983,0x3E4CCCCD), + ALU_MULADD(_R126,_w, ALU_SRC_PV,_x, _R6,_y, _R124,_y), + ALU_COS(_R124,_y, ALU_SRC_PV,_w) SCL_210 + ALU_LAST, + ALU_ADD(__,_x, ALU_SRC_PV _NEG,_w, _R124,_x), + ALU_MOV(_R7,_w, ALU_SRC_LITERAL,_x), + ALU_COS(__,__, _R124,_z) SCL_210 + ALU_LAST, + ALU_LITERAL(0x3F800000), + ALU_MUL(__,_z, _R124,_y, ALU_SRC_PS,_x), + ALU_MULADD(_R123,_w, ALU_SRC_PV,_x, _R2,_w, _R126,_w) + ALU_LAST, + ALU_MUL_IEEE(_R124,_y, ALU_SRC_PV,_z, ALU_SRC_LITERAL,_x), + ALU_ADD(__,_z, _R127 _NEG,_y, ALU_SRC_PV,_w) + ALU_LAST, + ALU_LITERAL(0x3E000000), + ALU_MULADD(_R123,_y, ALU_SRC_PV,_z, _R125,_y, _R127,_y) + ALU_LAST, + ALU_MUL_IEEE(__,_x, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL(0x3D888889), + ALU_ADD(__,_y, ALU_SRC_PV,_x, _R124,_w), + ALU_ADD(_R7,_z, _R1,_y, ALU_SRC_PV _NEG,_x) + ALU_LAST, + ALU_ADD(__,_z, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL(0xBE99999A), + ALU_ADD(__,_x, _R124,_y, ALU_SRC_PV _NEG,_z) + ALU_LAST, + ALU_MOV(_R7,_y, ALU_SRC_PV _NEG,_x) + ALU_LAST, + } +}; + +__attribute__((aligned(GX2_SHADER_ALIGNMENT))) +static struct +{ + u64 cf[32]; + u64 alu[64-32]; + u64 tex[2 * 2]; +} +ps_program = +{ + { + TEX(64, 2) VALID_PIX, + ALU(32, 27), + EXP_DONE(PIX0, _R2, _z, _z, _z, _w) + END_OF_PROGRAM + + }, + { + ALU_MUL(__,_x, _R1,_z, _R0,_x), + ALU_MUL(__,_y, _R1,_y, _R0,_z), + ALU_MOV(_R2,_z, ALU_SRC_LITERAL,_x), + ALU_MUL(__,_w, _R1,_x, _R0,_y) + ALU_LAST, + ALU_LITERAL(0x3F800000), + ALU_MULADD(_R123,_x, _R0 _NEG,_y, _R1,_z, ALU_SRC_PV,_y), + ALU_MULADD(_R123,_y, _R0 _NEG,_z, _R1,_x, ALU_SRC_PV,_x), + ALU_MULADD(_R127,_z, _R0 _NEG,_x, _R1,_y, ALU_SRC_PV,_w) + ALU_LAST, + ALU_DOT4_IEEE(__,_x, ALU_SRC_PV,_x, ALU_SRC_PV,_x), + ALU_DOT4_IEEE(__,_y, ALU_SRC_PV,_y, ALU_SRC_PV,_y), + ALU_DOT4_IEEE(__,_z, ALU_SRC_PV,_z, ALU_SRC_PV,_z), + ALU_DOT4_IEEE(__,_w, ALU_SRC_LITERAL,_x, ALU_SRC_0,_x) + ALU_LAST, + ALU_LITERAL(0x80000000), + ALU_RECIPSQRT_IEEE(__,__, ALU_SRC_PV,_x) SCL_210 + ALU_LAST, + ALU_MULADD(_R123,_w, _R127 _NEG,_z, ALU_SRC_PS,_x, ALU_SRC_1,_x) + ALU_LAST, + ALU_MUL(__,_x, ALU_SRC_PV,_w, ALU_SRC_PV,_w) + ALU_LAST, + ALU_MULADD(_R123,_z, ALU_SRC_PV,_x, ALU_SRC_LITERAL,_x, ALU_SRC_0_5,_x) + ALU_LAST, + ALU_LITERAL(0x3E22F983), + ALU_FRACT(__,_y, ALU_SRC_PV,_z) + ALU_LAST, + ALU_MULADD(_R123,_w, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_y,ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL2(0xC0490FDB,0x40C90FDB), + ALU_MUL(__,_x, ALU_SRC_PV,_w, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL(0x3E22F983), + ALU_COS(__,__, ALU_SRC_PV,_x) SCL_210 + ALU_LAST, + ALU_ADD(__,_y, ALU_SRC_PS _NEG,_x, ALU_SRC_1,_x) + ALU_LAST, + ALU_MUL_IEEE(_R2,_w, ALU_SRC_PV,_y, ALU_SRC_LITERAL,_x) + ALU_LAST, + ALU_LITERAL(0x3D4CCCCD), + }, + { + TEX_GET_GRADIENTS_H(_R1,_x,_y,_z,_m, _R0,_x,_y,_z,_x, _t0, _s0), + TEX_GET_GRADIENTS_V(_R0,_x,_y,_z,_m, _R0,_x,_y,_z,_x, _t0, _s0) + } +}; + +static GX2AttribVar attributes[] = +{ + { "VertexCoord", GX2_SHADER_VAR_TYPE_FLOAT3, 0, 0}, +}; + +static GX2AttribStream attribute_stream[] = +{ + {0, 0, 0, GX2_ATTRIB_FORMAT_FLOAT_32_32_32, + GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT} +}; + +static GX2SamplerVar samplers[] = +{ + { "Source", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, +}; + +static GX2UniformBlock uniform_blocks[] = { + {"UBO", 1, 16} +}; + +static GX2UniformVar uniform_vars[] = { + {"constants.time", GX2_SHADER_VAR_TYPE_FLOAT, 1, 0, 0}, +}; + + +GX2Shader ribbon_shader = +{ + { + { + .sq_pgm_resources_vs.num_gprs = 8, + .sq_pgm_resources_vs.stack_size = 1, + .spi_vs_out_config.vs_export_count = 0, + .num_spi_vs_out_id = 1, + { + {.semantic_0 = 0x00, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, + }, + .sq_vtx_semantic_clear = ~0x1, + .num_sq_vtx_semantic = 1, + { + 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }, + .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, + .vgt_hos_reuse_depth.reuse_depth = 0x10, + }, /* regs */ + .size = sizeof(vs_program), + .program = (uint8_t*)&vs_program, + .mode = GX2_SHADER_MODE_UNIFORM_BLOCK, + .uniformBlockCount = countof(uniform_blocks), uniform_blocks, + .uniformVarCount = countof(uniform_vars), uniform_vars, + .attribVarCount = countof(attributes), attributes, + }, + { + { + .sq_pgm_resources_ps.num_gprs = 3, + .sq_pgm_exports_ps.export_mode = 0x2, + .spi_ps_in_control_0.num_interp = 1, + .spi_ps_in_control_0.persp_gradient_ena = 1, + .spi_ps_in_control_0.baryc_sample_cntl = spi_baryc_cntl_centers_only, + .num_spi_ps_input_cntl = 1, {{.semantic = 0, .default_val = 1}}, + .cb_shader_mask.output0_enable = 0xF, + .cb_shader_control.rt0_enable = TRUE, + .db_shader_control.z_order = db_z_order_early_z_then_late_z, + }, /* regs */ + .size = sizeof(ps_program), + .program = (uint8_t*)&ps_program, + .mode = GX2_SHADER_MODE_UNIFORM_BLOCK, + .samplerVarCount = countof(samplers), samplers, + }, + .attribute_stream = attribute_stream, +}; diff --git a/wiiu/ribbon_shader.h b/wiiu/ribbon_shader.h new file mode 100644 index 0000000000..c83fa9d1f4 --- /dev/null +++ b/wiiu/ribbon_shader.h @@ -0,0 +1,46 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2014-2018 - Ali Bouhlel + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct +{ + struct + { + float x; + float y; + float z; + }pos; +}ribbon_vertex_t; + +typedef struct +__attribute__((scalar_storage_order ("little-endian"))) +__attribute__((aligned (16))) +{ + float time; +}ribbon_uniform_t; + + +extern GX2Shader ribbon_shader; + +#ifdef __cplusplus +} +#endif diff --git a/wiiu/shaders/ribbon.frag b/wiiu/shaders/ribbon.frag new file mode 100644 index 0000000000..8c7aac21c1 --- /dev/null +++ b/wiiu/shaders/ribbon.frag @@ -0,0 +1,21 @@ +#version 150 + +uniform UBO +{ + float time; +} constants; + +layout(location = 0) in vec3 vEC; +layout(location = 0) out vec4 FragColor; + +void main() +{ + const vec3 up = vec3(0.0, 0.0, 1.0); + vec3 x = dFdx(vEC); + vec3 y = dFdy(vEC); + vec3 normal = normalize(cross(x, y)); + float c = 1.0 - dot(normal, up); + c = (1.0 - cos(c * c)) / 20.0; +// FragColor = vec4(c, c, c, 1.0); + FragColor = vec4(1.0, 1.0, 1.0, c); +} diff --git a/wiiu/shaders/ribbon.vert b/wiiu/shaders/ribbon.vert new file mode 100644 index 0000000000..f2bb7fb042 --- /dev/null +++ b/wiiu/shaders/ribbon.vert @@ -0,0 +1,53 @@ +#version 150 + +layout(location = 0) in vec3 VertexCoord; +layout(location = 0) out vec3 vEC; + +uniform UBO +{ + float time; +} constants; + +float iqhash(float n) +{ + return fract(sin(n) * 43758.5453); +} + +float noise(vec3 x) +{ + vec3 p = floor(x); + vec3 f = fract(x); + f = f * f * (3.0 - 2.0 * f); + float n = p.x + p.y * 57.0 + 113.0 * p.z; + return mix(mix(mix(iqhash(n), iqhash(n + 1.0), f.x), + mix(iqhash(n + 57.0), iqhash(n + 58.0), f.x), f.y), + mix(mix(iqhash(n + 113.0), iqhash(n + 114.0), f.x), + mix(iqhash(n + 170.0), iqhash(n + 171.0), f.x), f.y), f.z); +} + +float xmb_noise2(vec3 x) +{ + return cos(x.z * 4.0) * cos(x.z + constants.time / 10.0 + x.x); +} + +void main() +{ + vec3 v = vec3(VertexCoord.x, 0.0, VertexCoord.y); + vec3 v2 = v; + vec3 v3 = v; + + v.y = xmb_noise2(v2) / 8.0; + + v3.x -= constants.time / 5.0; + v3.x /= 4.0; + + v3.z -= constants.time / 10.0; + v3.y -= constants.time / 100.0; + + v.z -= noise(v3 * 7.0) / 15.0; + v.y -= noise(v3 * 7.0) / 15.0 + cos(v.x * 2.0 - constants.time / 2.0) / 5.0 - 0.3; + v.y = -v.y; + + vEC = v; + gl_Position = vec4(v, 1.0); +} diff --git a/wiiu/tex_shader.c b/wiiu/tex_shader.c index 152acfc511..248559d48e 100644 --- a/wiiu/tex_shader.c +++ b/wiiu/tex_shader.c @@ -23,30 +23,64 @@ __attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { - u64 cf[16]; + u64 cf[32]; + u64 alu[16]; } vs_program = { { CALL_FS NO_BARRIER, + ALU(32, 16) KCACHE0(CB1, _0_15), EXP_DONE(POS0, _R1, _x, _y, _0, _1), - EXP_DONE(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER + EXP(PARAM0, _R2, _x, _y, _0, _0) NO_BARRIER, + EXP_DONE(PARAM1, _R3, _x, _y, _z, _w) NO_BARRIER END_OF_PROGRAM + }, + { + ALU_MUL(__,_x, _R1,_w, KC0(3),_y), + ALU_MUL(__,_y, _R1,_w, KC0(3),_x), + ALU_MUL(__,_z, _R1,_w, KC0(3),_w), + ALU_MUL(__,_w, _R1,_w, KC0(3),_z) + ALU_LAST, + ALU_MULADD(_R123,_x, _R1,_z, KC0(2),_y, ALU_SRC_PV,_x), + ALU_MULADD(_R123,_y, _R1,_z, KC0(2),_x, ALU_SRC_PV,_y), + ALU_MULADD(_R123,_z, _R1,_z, KC0(2),_w, ALU_SRC_PV,_z), + ALU_MULADD(_R123,_w, _R1,_z, KC0(2),_z, ALU_SRC_PV,_w) + ALU_LAST, + ALU_MULADD(_R123,_x, _R1,_y, KC0(1),_y, ALU_SRC_PV,_x), + ALU_MULADD(_R123,_y, _R1,_y, KC0(1),_x, ALU_SRC_PV,_y), + ALU_MULADD(_R123,_z, _R1,_y, KC0(1),_w, ALU_SRC_PV,_z), + ALU_MULADD(_R123,_w, _R1,_y, KC0(1),_z, ALU_SRC_PV,_w) + ALU_LAST, + ALU_MULADD(_R1,_x, _R1,_x, KC0(0),_x, ALU_SRC_PV,_y), + ALU_MULADD(_R1,_y, _R1,_x, KC0(0),_y, ALU_SRC_PV,_x), + ALU_MULADD(_R1,_z, _R1,_x, KC0(0),_z, ALU_SRC_PV,_w), + ALU_MULADD(_R1,_w, _R1,_x, KC0(0),_w, ALU_SRC_PV,_z) + ALU_LAST, } }; __attribute__((aligned(GX2_SHADER_ALIGNMENT))) static struct { - u64 cf[16]; + u64 cf[32]; + u64 alu[16]; u64 tex[1 * 2]; } ps_program = { { - TEX(16, 1) VALID_PIX, + TEX(48, 1) VALID_PIX, + ALU(32, 4), EXP_DONE(PIX0, _R0, _x, _y, _z, _w) END_OF_PROGRAM }, + { + ALU_MUL(_R0,_x, _R0,_x, _R1,_x), + ALU_MUL(_R0,_y, _R0,_y, _R1,_y), + ALU_MUL(_R0,_z, _R0,_z, _R1,_z), + ALU_MUL(_R0,_w, _R0,_w, _R1,_w) + ALU_LAST + }, { TEX_SAMPLE(_R0,_x,_y,_z,_w, _R0,_x,_y,_0,_0, _t0, _s0) } @@ -56,6 +90,7 @@ static GX2AttribVar attributes[] = { { "position", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 0}, { "tex_coord", GX2_SHADER_VAR_TYPE_FLOAT2, 0, 1}, + { "color", GX2_SHADER_VAR_TYPE_FLOAT4, 0, 2}, }; static GX2AttribStream attribute_stream[] = @@ -64,6 +99,8 @@ static GX2AttribStream attribute_stream[] = GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT}, {1, 0, offsetof(tex_shader_vertex_t, coord), GX2_ATTRIB_FORMAT_FLOAT_32_32, GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _0, _1), GX2_ENDIAN_SWAP_DEFAULT}, + {2, 0, offsetof(tex_shader_vertex_t, color), GX2_ATTRIB_FORMAT_FLOAT_32_32_32_32, + GX2_ATTRIB_INDEX_PER_VERTEX, 0, GX2_COMP_SEL(_x, _y, _z, _w), GX2_ENDIAN_SWAP_DEFAULT}, }; static GX2SamplerVar samplers[] = @@ -71,12 +108,19 @@ static GX2SamplerVar samplers[] = { "s", GX2_SAMPLER_VAR_TYPE_SAMPLER_2D, 0 }, }; +static GX2UniformBlock uniform_blocks[] = { + {"UBO", 1, 64} +}; + +static GX2UniformVar uniform_vars[] = { + {"global.MVP", GX2_SHADER_VAR_TYPE_MATRIX4X4, 1, 0, 0}, +}; GX2Shader tex_shader = { { { - .sq_pgm_resources_vs.num_gprs = 3, + .sq_pgm_resources_vs.num_gprs = 4, .sq_pgm_resources_vs.stack_size = 1, .spi_vs_out_config.vs_export_count = 1, .num_spi_vs_out_id = 1, @@ -92,10 +136,10 @@ GX2Shader tex_shader = {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, {.semantic_0 = 0xFF, .semantic_1 = 0xFF, .semantic_2 = 0xFF, .semantic_3 = 0xFF}, }, - .sq_vtx_semantic_clear = ~0x3, - .num_sq_vtx_semantic = 2, + .sq_vtx_semantic_clear = ~0x7, + .num_sq_vtx_semantic = 3, { - 0, 1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0, 1, 2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, .vgt_vertex_reuse_block_cntl.vtx_reuse_depth = 0xE, @@ -103,12 +147,14 @@ GX2Shader tex_shader = }, /* regs */ .size = sizeof(vs_program), .program = (uint8_t*)&vs_program, - .mode = GX2_SHADER_MODE_UNIFORM_REGISTER, + .mode = GX2_SHADER_MODE_UNIFORM_BLOCK, + .uniformBlockCount = countof(uniform_blocks), uniform_blocks, + .uniformVarCount = countof(uniform_vars), uniform_vars, .attribVarCount = countof(attributes), attributes, }, { { - .sq_pgm_resources_ps.num_gprs = 1, + .sq_pgm_resources_ps.num_gprs = 2, .sq_pgm_exports_ps.export_mode = 0x2, .spi_ps_in_control_0.num_interp = 2, .spi_ps_in_control_0.persp_gradient_ena = 1, @@ -120,7 +166,7 @@ GX2Shader tex_shader = }, /* regs */ .size = sizeof(ps_program), .program = (uint8_t*)&ps_program, - .mode = GX2_SHADER_MODE_UNIFORM_REGISTER, + .mode = GX2_SHADER_MODE_UNIFORM_BLOCK, .samplerVarCount = countof(samplers), samplers, }, .attribute_stream = attribute_stream, diff --git a/wiiu/tex_shader.h b/wiiu/tex_shader.h index b7271eace8..766ec5b76d 100644 --- a/wiiu/tex_shader.h +++ b/wiiu/tex_shader.h @@ -35,6 +35,14 @@ typedef struct float u; float v; }coord; + + struct + { + float r; + float g; + float b; + float a; + }color; }tex_shader_vertex_t; extern GX2Shader tex_shader;