From 14ac13f70b6b178e56ae6616005b31dd07778bd6 Mon Sep 17 00:00:00 2001 From: espes Date: Thu, 14 Nov 2013 07:56:49 +1100 Subject: [PATCH] terrible fragment shader generation --- hw/xbox/Makefile.objs | 2 +- hw/xbox/nv2a.c | 187 ++++++++-- hw/xbox/nv2a_psh.c | 700 +++++++++++++++++++++++++++++++++++++ hw/xbox/nv2a_psh.h | 36 ++ include/qapi/qmp/qstring.h | 4 + qobject/qstring.c | 45 +++ 6 files changed, 949 insertions(+), 25 deletions(-) create mode 100644 hw/xbox/nv2a_psh.c create mode 100644 hw/xbox/nv2a_psh.h diff --git a/hw/xbox/Makefile.objs b/hw/xbox/Makefile.objs index 4b97cc161e..72d8b187f0 100644 --- a/hw/xbox/Makefile.objs +++ b/hw/xbox/Makefile.objs @@ -2,7 +2,7 @@ obj-y += xbox.o chihiro.o obj-y += xbox_pci.o acpi_xbox.o obj-y += amd_smbus.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o obj-y += nvnet.o -obj-y += nv2a.o nv2a_vsh.o +obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o obj-y += mcpx_apu.o mcpx_aci.o obj-y += lpc47m157.o obj-y += xid.o diff --git a/hw/xbox/nv2a.c b/hw/xbox/nv2a.c index 254aa0b688..f44e00e21f 100644 --- a/hw/xbox/nv2a.c +++ b/hw/xbox/nv2a.c @@ -29,6 +29,7 @@ #include "hw/xbox/u_format_r11g11b10f.h" #include "hw/xbox/nv2a_vsh.h" +#include "hw/xbox/nv2a_psh.h" #include "hw/xbox/nv2a.h" @@ -259,6 +260,19 @@ # define NV_PGRAPH_CLEARRECTY_YMIN 0x00000FFF # define NV_PGRAPH_CLEARRECTY_YMAX 0x0FFF0000 #define NV_PGRAPH_COLORCLEARVALUE 0x0000186C +#define NV_PGRAPH_COMBINEFACTOR0 0x00001880 +#define NV_PGRAPH_COMBINEFACTOR1 0x000018A0 +#define NV_PGRAPH_COMBINEALPHAI0 0x000018C0 +#define NV_PGRAPH_COMBINEALPHAO0 0x000018E0 +#define NV_PGRAPH_COMBINECOLORI0 0x00001900 +#define NV_PGRAPH_COMBINECOLORO0 0x00001920 +#define NV_PGRAPH_COMBINECTL 0x00001940 +#define NV_PGRAPH_COMBINESPECFOG0 0x00001944 +#define NV_PGRAPH_COMBINESPECFOG1 0x00001948 +#define NV_PGRAPH_SHADERCTL 0x00001998 +#define NV_PGRAPH_SHADERPROG 0x0000199C +#define NV_PGRAPH_SPECFOGFACTOR0 0x000019AC +#define NV_PGRAPH_SPECFOGFACTOR1 0x000019B0 #define NV_PGRAPH_ZSTENCILCLEARVALUE 0x00001A88 #define NV_PCRTC_INTR_0 0x00000100 @@ -409,8 +423,15 @@ # define NV097_SET_SURFACE_PITCH_ZETA 0xFFFF0000 # define NV097_SET_SURFACE_COLOR_OFFSET 0x00970210 # define NV097_SET_SURFACE_ZETA_OFFSET 0x00970214 +# define NV097_SET_COMBINER_ALPHA_ICW 0x00970260 +# define NV097_SET_COMBINER_SPECULAR_FOG_CW0 0x00970288 +# define NV097_SET_COMBINER_SPECULAR_FOG_CW1 0x0097028C # define NV097_SET_COLOR_MASK 0x00970358 # define NV097_SET_VIEWPORT_OFFSET 0x00970A20 +# define NV097_SET_COMBINER_FACTOR0 0x00970A60 +# define NV097_SET_COMBINER_FACTOR1 0x00970A80 +# define NV097_SET_COMBINER_ALPHA_OCW 0x00970AA0 +# define NV097_SET_COMBINER_COLOR_ICW 0x00970AC0 # define NV097_SET_VIEWPORT_SCALE 0x00970AF0 # define NV097_SET_TRANSFORM_PROGRAM 0x00970B00 # define NV097_SET_TRANSFORM_CONSTANT 0x00970B80 @@ -486,6 +507,10 @@ # define NV097_CLEAR_SURFACE_A (1 << 7) # define NV097_SET_CLEAR_RECT_HORIZONTAL 0x00971D98 # define NV097_SET_CLEAR_RECT_VERTICAL 0x00971D9C +# define NV097_SET_COMBINER_COLOR_OCW 0x00971E40 +# define NV097_SET_COMBINER_CONTROL 0x00971E60 +# define NV097_SET_SHADER_STAGE_PROGRAM 0x00971E70 +# define NV097_SET_SHADER_OTHER_STAGE_INPUT 0x00971E78 # define NV097_SET_TRANSFORM_EXECUTION_MODE 0x00971E94 # define NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN 0x00971E98 # define NV097_SET_TRANSFORM_PROGRAM_LOAD 0x00971E9C @@ -1239,7 +1264,7 @@ static void kelvin_bind_vertexshader(KelvinState *kelvin) shader->program_length); const char* shader_code_str = qstring_get_str(shader_code); - NV2A_DPRINTF("bind shader %d, code:\n%s\n", + NV2A_DPRINTF("bind vertex program %d, code:\n%s\n", kelvin->vertexshader_start_slot, shader_code_str); @@ -1252,7 +1277,7 @@ static void kelvin_bind_vertexshader(KelvinState *kelvin) GLint pos; glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); if (pos != -1) { - fprintf(stderr, "nv2a: Vertex shader compilation failed:\n" + fprintf(stderr, "nv2a: vertex shader compilation failed:\n" " pos %d, %s\n", pos, glGetString(GL_PROGRAM_ERROR_STRING_ARB)); fprintf(stderr, "ucode:\n"); @@ -1402,15 +1427,53 @@ static void pgraph_bind_textures(NV2AState *d) static void pgraph_bind_fragment_shader(PGRAPHState *pg) { - const char *shader_code = "\n" -"uniform sampler2DRect texSamp0;\n" -"void main() {\n" -" gl_FragColor = texture2DRect(texSamp0, gl_TexCoord[0].st);\n" -//" gl_FragColor = vec4(1, 0, 0, 1);\n" -"}\n"; + int i; if (pg->fragment_shader_dirty) { - glShaderSource(pg->gl_fragment_shader, 1, &shader_code, 0); + uint32_t combiner_control = pg->regs[NV_PGRAPH_COMBINECTL]; + uint32_t shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG]; + uint32_t other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL]; + uint32_t final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0]; + uint32_t final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1]; + uint32_t final_constant_0 = pg->regs[NV_PGRAPH_SPECFOGFACTOR0]; + uint32_t final_constant_1 = pg->regs[NV_PGRAPH_SPECFOGFACTOR1]; + + uint32_t rgb_inputs[8], rgb_outputs[8], + alpha_inputs[8], alpha_outputs[8], + constant_0[8], constant_1[8]; + + for (i = 0; i < 8; i++) { + rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; + rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; + alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4]; + alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4]; + constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; + constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; + } + + bool rect_tex[4]; + for (i = 0; i < 4; i++) { + rect_tex[i] = false; + if (pg->textures[i].enabled + && kelvin_color_format_map[pg->textures[i].color_format].linear) { + rect_tex[i] = true; + } + } + + QString *shader_code = psh_translate(combiner_control, shader_stage_program, + other_stage_input, + rgb_inputs, rgb_outputs, + alpha_inputs, alpha_outputs, + constant_0, constant_1, + final_inputs_0, final_inputs_1, + final_constant_0, final_constant_1, + rect_tex); + + const char *shader_code_str = qstring_get_str(shader_code); + + NV2A_DPRINTF("bind pixel shader, code:\n%s\n", shader_code_str); + + glShaderSource(pg->gl_fragment_shader, 1, &shader_code_str, 0); glCompileShader(pg->gl_fragment_shader); /* Check it compiled */ @@ -1419,7 +1482,7 @@ static void pgraph_bind_fragment_shader(PGRAPHState *pg) if (!compiled) { GLchar log[1024]; glGetShaderInfoLog(pg->gl_fragment_shader, 1024, NULL, log); - fprintf(stderr, "nv2a: Fragment shader compilation failed: %s\n", log); + fprintf(stderr, "nv2a: fragment shader compilation failed: %s\n", log); abort(); } @@ -1430,26 +1493,33 @@ static void pgraph_bind_fragment_shader(PGRAPHState *pg) if(!linked) { GLchar log[1024]; glGetProgramInfoLog(pg->gl_program, 1024, NULL, log); - fprintf(stderr, "nv2a: Fragment shader linking failed: %s\n", log); - abort(); - } - - glValidateProgram(pg->gl_program); - GLint valid = 0; - glGetProgramiv(pg->gl_program, GL_VALIDATE_STATUS, &valid); - if (!valid) { - GLchar log[1024]; - glGetProgramInfoLog(pg->gl_program, 1024, NULL, log); - fprintf(stderr, "nv2a: Fragment shader validation failed: %s\n", log); + fprintf(stderr, "nv2a: fragment shader linking failed: %s\n", log); abort(); } glUseProgram(pg->gl_program); - GLint texSamp0Loc = glGetUniformLocation(pg->gl_program, "texSamp0"); - glUniform1i(texSamp0Loc, 0); + /* set texture samplers */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + char samplerName[16]; + snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); + GLint texSampLoc = glGetUniformLocation(pg->gl_program, samplerName); + if (texSampLoc >= 0) { + glUniform1i(texSampLoc, i); + } + } + /*glValidateProgram(pg->gl_program); + GLint valid = 0; + glGetProgramiv(pg->gl_program, GL_VALIDATE_STATUS, &valid); + if (!valid) { + GLchar log[1024]; + glGetProgramInfoLog(pg->gl_program, 1024, NULL, log); + fprintf(stderr, "nv2a: fragment shader validation failed: %s\n", log); + abort(); + }*/ + QDECREF(shader_code); pg->fragment_shader_dirty = false; } else { glUseProgram(pg->gl_program); @@ -1513,7 +1583,7 @@ static void pgraph_update_surface(NV2AState *d, bool upload) assert(d->pgraph.surface_color.pitch % bytes_per_pixel == 0); - glDisable(GL_FRAGMENT_PROGRAM_ARB); + //glDisable(GL_FRAGMENT_PROGRAM_ARB); glUseProgram(0); int rl, pa; @@ -1919,6 +1989,24 @@ static void pgraph_method(NV2AState *d, pg->surface_zeta.offset = parameter; break; + + case NV097_SET_COMBINER_ALPHA_ICW ... + NV097_SET_COMBINER_ALPHA_ICW + 28: + slot = (class_method - NV097_SET_COMBINER_ALPHA_ICW) / 4; + pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_COMBINER_SPECULAR_FOG_CW0: + pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_COMBINER_SPECULAR_FOG_CW1: + pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter; + pg->fragment_shader_dirty = true; + break; + case NV097_SET_COLOR_MASK: pg->color_mask = parameter; break; @@ -1932,6 +2020,35 @@ static void pgraph_method(NV2AState *d, kelvin->constants[59].data[slot] = parameter; kelvin->constants[59].dirty = true; break; + + case NV097_SET_COMBINER_FACTOR0 ... + NV097_SET_COMBINER_FACTOR0 + 28: + slot = (class_method - NV097_SET_COMBINER_FACTOR0) / 4; + pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_COMBINER_FACTOR1 ... + NV097_SET_COMBINER_FACTOR1 + 28: + slot = (class_method - NV097_SET_COMBINER_FACTOR1) / 4; + pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_COMBINER_ALPHA_OCW ... + NV097_SET_COMBINER_ALPHA_OCW + 28: + slot = (class_method - NV097_SET_COMBINER_ALPHA_OCW) / 4; + pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_COMBINER_COLOR_ICW ... + NV097_SET_COMBINER_COLOR_ICW + 28: + slot = (class_method - NV097_SET_COMBINER_COLOR_ICW) / 4; + pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter; + pg->fragment_shader_dirty = true; + break; + case NV097_SET_VIEWPORT_SCALE ... NV097_SET_VIEWPORT_SCALE + 12: @@ -2276,6 +2393,28 @@ static void pgraph_method(NV2AState *d, pg->regs[NV_PGRAPH_CLEARRECTY] = parameter; break; + case NV097_SET_COMBINER_COLOR_OCW ... + NV097_SET_COMBINER_COLOR_OCW + 28: + slot = (class_method - NV097_SET_COMBINER_COLOR_OCW) / 4; + pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_COMBINER_CONTROL: + pg->regs[NV_PGRAPH_COMBINECTL] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_SHADER_STAGE_PROGRAM: + pg->regs[NV_PGRAPH_SHADERPROG] = parameter; + pg->fragment_shader_dirty = true; + break; + + case NV097_SET_SHADER_OTHER_STAGE_INPUT: + pg->regs[NV_PGRAPH_SHADERCTL] = parameter; + pg->fragment_shader_dirty = true; + break; + case NV097_SET_TRANSFORM_EXECUTION_MODE: kelvin->use_vertex_program = (parameter & 3) == 2; break; diff --git a/hw/xbox/nv2a_psh.c b/hw/xbox/nv2a_psh.c new file mode 100644 index 0000000000..33e8ae5134 --- /dev/null +++ b/hw/xbox/nv2a_psh.c @@ -0,0 +1,700 @@ +/* + * QEMU Geforce NV2A pixel shader translation + * + * Copyright (c) 2013 espes + * + * Based on: + * Cxbx, PixelShader.cpp + * Copyright (c) 2004 Aaron Robinson + * Kingofc + * Xeon, XBD3DPixelShader.cpp + * Copyright (c) 2003 _SF_ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "qapi/qmp/qstring.h" + +#include "hw/xbox/nv2a_psh.h" + + +enum PS_TEXTUREMODES +{ // valid in stage 0 1 2 3 + PS_TEXTUREMODES_NONE= 0x00L, // * * * * + PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * * + PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * * + PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * * + PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * * + PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * * + PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * * + PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * * + PS_TEXTUREMODES_BRDF= 0x08L, // - - * * + PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * * + PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * * + PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * - + PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - * + PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - * + PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - * + PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * * + PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * * + PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - + PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * + // 0x13-0x1f reserved +}; + +enum PS_INPUTMAPPING +{ + PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) OK for final combiner + PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) OK for final combiner + PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2*max(0,x) - 1 invalid for final combiner + PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2*max(0,x) invalid for final combiner + PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 invalid for final combiner + PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) invalid for final combiner + PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x invalid for final combiner + PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x invalid for final combiner +}; + +enum PS_REGISTER +{ + PS_REGISTER_ZERO= 0x00L, // r + PS_REGISTER_DISCARD= 0x00L, // w + PS_REGISTER_C0= 0x01L, // r + PS_REGISTER_C1= 0x02L, // r + PS_REGISTER_FOG= 0x03L, // r + PS_REGISTER_V0= 0x04L, // r/w + PS_REGISTER_V1= 0x05L, // r/w + PS_REGISTER_T0= 0x08L, // r/w + PS_REGISTER_T1= 0x09L, // r/w + PS_REGISTER_T2= 0x0aL, // r/w + PS_REGISTER_T3= 0x0bL, // r/w + PS_REGISTER_R0= 0x0cL, // r/w + PS_REGISTER_R1= 0x0dL, // r/w + PS_REGISTER_V1R0_SUM= 0x0eL, // r + PS_REGISTER_EF_PROD= 0x0fL, // r + + PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner + PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // invalid for final combiner + PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner + PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner +}; + +enum PS_COMBINERCOUNTFLAGS +{ + PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb + PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb + + PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage + PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage + + PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage + PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage +}; + +enum PS_COMBINEROUTPUT +{ + PS_COMBINEROUTPUT_IDENTITY= 0x00L, // y = x + PS_COMBINEROUTPUT_BIAS= 0x08L, // y = x - 0.5 + PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // y = x*2 + PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5)*2 + PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // y = x*4 + PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // y = x/2 + + PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only + + PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only + + PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, + PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only + + PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, + PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only + + PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD + PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a +}; + +enum PS_CHANNEL +{ + PS_CHANNEL_RGB= 0x00, // used as RGB source + PS_CHANNEL_BLUE= 0x00, // used as ALPHA source + PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source +}; + + +enum PS_FINALCOMBINERSETTING +{ + PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1] + + PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping + + PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping +}; + + + +// Structures to describe the PS definition + +struct InputInfo { + int reg, mod, chan; + bool invert; +}; + +struct InputVarInfo { + struct InputInfo a, b, c, d; +}; + +struct FCInputInfo { + struct InputInfo a, b, c, d, e, f, g; + int c0, c1; + //uint32_t c0_value, c1_value; + bool c0_used, c1_used; + bool v1r0_sum, clamp_sum, inv_v1, inv_r0, enabled; +}; + +struct OutputInfo { + int ab, cd, muxsum, flags, ab_op, cd_op, muxsum_op, + mapping, ab_alphablue, cd_alphablue; +}; + +struct PSStageInfo { + struct InputVarInfo rgb_input, alpha_input; + struct OutputInfo rgb_output, alpha_output; + int c0, c1; + //uint32_t c0_value, c1_value; + bool c0_used, c1_used; +}; + +struct PixelShader { + int num_stages, flags; + struct PSStageInfo stage[8]; + struct FCInputInfo final_input; + int tex_modes[4], input_tex[4]; + + //uint32_t compare_mode, dot_mapping, input_texture; + + bool rect_tex[4]; + + QString *varE, *varF; + QString *code; + int cur_stage; + + int num_var_refs; + const char var_refs[32][32]; + int num_const_refs; + const char const_refs[32][32]; +}; + +static void add_var_ref(struct PixelShader *ps, const char *var) +{ + int i; + for (i=0; inum_var_refs; i++) { + if (strcmp((char*)ps->var_refs[i], var) == 0) return; + } + strcpy((char*)ps->var_refs[ps->num_var_refs++], var); +} + +static void add_const_ref(struct PixelShader *ps, const char *var) +{ + int i; + for (i=0; inum_const_refs; i++) { + if (strcmp((char*)ps->const_refs[i], var) == 0) return; + } + strcpy((char*)ps->const_refs[ps->num_const_refs++], var); +} + +// Get the code for a variable used in the program +static QString* get_var(struct PixelShader *ps, int reg, bool is_dest) +{ + switch (reg) { + case PS_REGISTER_DISCARD: + if (is_dest) { + return qstring_from_str(""); + } else { + return qstring_from_str("0.0"); + } + break; + case PS_REGISTER_C0: + if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C0) { + QString *reg = qstring_from_fmt("c_%d_%d", ps->cur_stage, 0); + add_const_ref(ps, qstring_get_str(reg)); + if (ps->cur_stage == 8) { + ps->final_input.c0_used = true; + } else { + ps->stage[ps->cur_stage].c0_used = true; + } + return reg; + } else { // Same c0 + add_const_ref(ps, "c_0_0"); + ps->stage[0].c0_used = true; + return qstring_from_str("c_0_0"); + } + break; + case PS_REGISTER_C1: + if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C1) { + QString *reg = qstring_from_fmt("c_%d_%d", ps->cur_stage, 1); + add_const_ref(ps, qstring_get_str(reg)); + if (ps->cur_stage == 8) { + ps->final_input.c1_used = true; + } else { + ps->stage[ps->cur_stage].c1_used = true; + } + return reg; + } else { // Same c1 + add_const_ref(ps, "c_0_1"); + ps->stage[0].c1_used = true; + return qstring_from_str("c_0_1"); + } + break; + case PS_REGISTER_FOG: // TODO + assert(false); + break; + case PS_REGISTER_V0: + return qstring_from_str("v0"); + case PS_REGISTER_V1: + return qstring_from_str("v1"); + case PS_REGISTER_T0: + return qstring_from_str("t0"); + case PS_REGISTER_T1: + return qstring_from_str("t1"); + case PS_REGISTER_T2: + return qstring_from_str("t2"); + case PS_REGISTER_T3: + return qstring_from_str("t3"); + case PS_REGISTER_R0: + add_var_ref(ps, "r0"); + return qstring_from_str("r0"); + case PS_REGISTER_R1: + add_var_ref(ps, "r1"); + return qstring_from_str("r1"); + case PS_REGISTER_V1R0_SUM: + add_var_ref(ps, "r0"); + return qstring_from_str("(v1 + r0)"); + case PS_REGISTER_EF_PROD: + return qstring_from_fmt("(%s * %s)", qstring_get_str(ps->varE), + qstring_get_str(ps->varF)); + default: + assert(false); + break; + } +} + +// Get input variable code +static QString* get_input_var(struct PixelShader *ps, struct InputInfo in, bool is_alpha) +{ + QString *reg = get_var(ps, in.reg, false); + + if (strcmp(qstring_get_str(reg), "0.0") != 0 + && (in.reg != PS_REGISTER_EF_PROD + || strstr(qstring_get_str(reg), ".a") == NULL)) { + switch (in.chan) { + case PS_CHANNEL_RGB: + if (is_alpha) { + qstring_append(reg, ".b"); + } else { + qstring_append(reg, ".rgb"); + } + break; + case PS_CHANNEL_ALPHA: + qstring_append(reg, ".a"); + break; + default: + assert(false); + break; + } + } + + QString *res; + switch (in.mod) { + case PS_INPUTMAPPING_SIGNED_IDENTITY: + case PS_INPUTMAPPING_UNSIGNED_IDENTITY: + QINCREF(reg); + res = reg; + break; + case PS_INPUTMAPPING_UNSIGNED_INVERT: + res = qstring_from_fmt("(1.0 - %s)", qstring_get_str(reg)); + break; + case PS_INPUTMAPPING_EXPAND_NORMAL: // TODO: Change to max(0, x)?? + res = qstring_from_fmt("(2.0 * %s - 1.0)", qstring_get_str(reg)); + break; + case PS_INPUTMAPPING_EXPAND_NEGATE: + res = qstring_from_fmt("(1.0 - 2.0 * %s)", qstring_get_str(reg)); + break; + case PS_INPUTMAPPING_HALFBIAS_NORMAL: + res = qstring_from_fmt("(%s - 0.5)", qstring_get_str(reg)); + break; + case PS_INPUTMAPPING_HALFBIAS_NEGATE: + res = qstring_from_fmt("(0.5 - %s)", qstring_get_str(reg)); + break; + case PS_INPUTMAPPING_SIGNED_NEGATE: + res = qstring_from_fmt("-%s", qstring_get_str(reg)); + break; + default: + assert(false); + break; + } + + QDECREF(reg); + return res; +} + +// Get code for the output mapping of a stage +static QString* get_output(QString *reg, int mapping) +{ + QString *res; + switch (mapping) { + case PS_COMBINEROUTPUT_IDENTITY: + QINCREF(reg); + res = reg; + break; + case PS_COMBINEROUTPUT_BIAS: + res = qstring_from_fmt("(%s - 0.5)", qstring_get_str(reg)); + break; + case PS_COMBINEROUTPUT_SHIFTLEFT_1: + res = qstring_from_fmt("(%s * 2.0)", qstring_get_str(reg)); + break; + case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS: + res = qstring_from_fmt("((%s - 0.5) * 2.0)", qstring_get_str(reg)); + break; + case PS_COMBINEROUTPUT_SHIFTLEFT_2: + res = qstring_from_fmt("(%s * 4.0)", qstring_get_str(reg)); + break; + case PS_COMBINEROUTPUT_SHIFTRIGHT_1: + res = qstring_from_fmt("(%s / 2.0)", qstring_get_str(reg)); + break; + default: + assert(false); + break; + } + return res; +} + +// Add the HLSL code for a stage +static void add_stage_code(struct PixelShader *ps, + struct InputVarInfo input, struct OutputInfo output, + const char *write_mask, bool is_alpha) +{ + QString *a = get_input_var(ps, input.a, is_alpha); + QString *b = get_input_var(ps, input.b, is_alpha); + QString *c = get_input_var(ps, input.c, is_alpha); + QString *d = get_input_var(ps, input.d, is_alpha); + + const char *caster = ""; + if (strlen(write_mask) == 3) { + caster = "vec3"; + } + + QString *ab; + if (output.ab_op == PS_COMBINEROUTPUT_AB_DOT_PRODUCT) { + ab = qstring_from_fmt("dot(%s, %s)", + qstring_get_str(a), qstring_get_str(b)); + } else { + ab = qstring_from_fmt("(%s * %s)", + qstring_get_str(a), qstring_get_str(b)); + } + + QString *cd; + if (output.cd_op == PS_COMBINEROUTPUT_CD_DOT_PRODUCT) { + cd = qstring_from_fmt("dot(%s, %s)", + qstring_get_str(c), qstring_get_str(d)); + } else { + cd = qstring_from_fmt("(%s * %s)", + qstring_get_str(c), qstring_get_str(d)); + } + + QString *ab_mapping = get_output(ab, output.mapping); + QString *cd_mapping = get_output(cd, output.mapping); + QString *ab_dest = get_var(ps, output.ab, true); + QString *cd_dest = get_var(ps, output.cd, true); + QString *sum_dest = get_var(ps, output.muxsum, true); + + if (qstring_get_length(ab_dest)) { + qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n", + qstring_get_str(ab_dest), write_mask, caster, qstring_get_str(ab_mapping)); + } else { + QINCREF(ab_mapping); + ab_dest = ab_mapping; + } + + if (qstring_get_length(cd_dest)) { + qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n", + qstring_get_str(cd_dest), write_mask, caster, qstring_get_str(cd_mapping)); + } else { + QINCREF(cd_mapping); + cd_dest = cd_mapping; + } + + if (!is_alpha && output.flags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) { + qstring_append_fmt(ps->code, "%s.a = %s.b;\n", + qstring_get_str(ab_dest), qstring_get_str(ab_dest)); + } + if (!is_alpha && output.flags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) { + qstring_append_fmt(ps->code, "%s.a = %s.b;\n", + qstring_get_str(cd_dest), qstring_get_str(cd_dest)); + } + + QString *sum; + if (output.muxsum_op == PS_COMBINEROUTPUT_AB_CD_SUM) { + sum = qstring_from_fmt("(%s + %s)", qstring_get_str(ab), qstring_get_str(cd)); + } else { + sum = qstring_from_fmt("((r0.a >= 0.5) ? %s : %s)", + qstring_get_str(cd), qstring_get_str(ab)); + } + + QString *sum_mapping = get_output(sum, output.mapping); + if (qstring_get_length(sum_dest)) { + qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n", + qstring_get_str(sum_dest), write_mask, caster, qstring_get_str(sum_mapping)); + } + + QDECREF(a); + QDECREF(b); + QDECREF(c); + QDECREF(d); + QDECREF(ab); + QDECREF(cd); + QDECREF(ab_mapping); + QDECREF(cd_mapping); + QDECREF(ab_dest); + QDECREF(cd_dest); + QDECREF(sum_dest); + QDECREF(sum); + QDECREF(sum_mapping); +} + +// Add code for the final combiner stage +static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo final) +{ + ps->varE = get_input_var(ps, final.e, false); + ps->varF = get_input_var(ps, final.f, false); + + QString *a = get_input_var(ps, final.a, false); + QString *b = get_input_var(ps, final.b, false); + QString *c = get_input_var(ps, final.c, false); + QString *d = get_input_var(ps, final.d, false); + QString *g = get_input_var(ps, final.g, false); + + add_var_ref(ps, "r0"); + qstring_append_fmt(ps->code, "r0.rgb = (%s * %s) + ((1.0 - %s) * %s) + %s;\n", + qstring_get_str(a), qstring_get_str(b), + qstring_get_str(a), qstring_get_str(c), qstring_get_str(d)); + qstring_append_fmt(ps->code, "r0.a = %s;\n", qstring_get_str(g)); + + QDECREF(a); + QDECREF(b); + QDECREF(c); + QDECREF(d); + QDECREF(g); + + QDECREF(ps->varE); + QDECREF(ps->varF); + ps->varE = ps->varF = NULL; +} + + + +static QString* psh_convert(struct PixelShader *ps) +{ + int i; + + QString *preflight = qstring_new(); + QString *vars = qstring_new(); + + qstring_append(vars, "vec4 v0 = gl_Color;\n"); + qstring_append(vars, "vec4 v1 = gl_SecondaryColor;\n"); + qstring_append(vars, "float fog = gl_FogFragCoord;\n"); + + for (i = 0; i < 4; i++) { + if (ps->tex_modes[i] == PS_TEXTUREMODES_NONE) continue; + + const char *sampler_type; + const char *sampler_function; + + switch (ps->tex_modes[i]) { + case PS_TEXTUREMODES_PROJECT2D: + if (ps->rect_tex[i]) { + sampler_type = "sampler2DRect"; + sampler_function = "texture2DRect"; + } else { + sampler_type = "sampler2D"; + sampler_function = "texture2D"; + } + qstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, gl_TexCoord[%d].xy);\n", + i, sampler_function, i, i); + break; + case PS_TEXTUREMODES_PROJECT3D: + sampler_type = "sampler3D"; + qstring_append_fmt(vars, "vec4 t%d = texture3D(texSamp%d, gl_TexCoord[%d].xyz);\n", + i, i, i); + break; + case PS_TEXTUREMODES_CUBEMAP: + sampler_type = "samplerCube"; + qstring_append_fmt(vars, "vec4 t%d = textureCube(texSamp%d, gl_TexCoord[%d].xyz);\n", + i, i, i); + break; + case PS_TEXTUREMODES_PASSTHRU: + qstring_append_fmt(vars, "vec4 t%d;\n", i); + break; + default: + printf("%x\n", ps->tex_modes[i]); + assert(false); + break; + } + + if (ps->tex_modes[i] != PS_TEXTUREMODES_PASSTHRU) { + qstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i); + } + } + + ps->code = qstring_new(); + for (i = 0; i < ps->num_stages; i++) { + ps->cur_stage = i; + qstring_append_fmt(ps->code, "// Stage %d\n", i); + add_stage_code(ps, ps->stage[i].rgb_input, ps->stage[i].rgb_output, "rgb", false); + add_stage_code(ps, ps->stage[i].alpha_input, ps->stage[i].alpha_output, "a", false); + } + + if (ps->final_input.enabled) { + ps->cur_stage = 8; + qstring_append(ps->code, "// Final Combiner\n"); + add_final_stage_code(ps, ps->final_input); + } + + for (i = 0; i < ps->num_var_refs; i++) { + qstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]); + if (strcmp(ps->var_refs[i], "r0") == 0) { + if (ps->tex_modes[0] != PS_TEXTUREMODES_NONE) { + qstring_append(vars, "r0.a = t0.a;\n"); + } else { + qstring_append(vars, "r0.a = 1.0;\n"); + } + } + } + for (i = 0; i < ps->num_const_refs; i++) { + qstring_append_fmt(vars, "vec4 %s;\n", ps->const_refs[i]); + } + + + + QString *final = qstring_new(); + qstring_append(final, qstring_get_str(preflight)); + qstring_append(final, "void main() {\n"); + qstring_append(final, qstring_get_str(vars)); + qstring_append(final, qstring_get_str(ps->code)); + qstring_append(final, "gl_FragColor = r0;\n"); + qstring_append(final, "}\n"); + + QDECREF(preflight); + QDECREF(vars); + QDECREF(ps->code); + + return final; +} + +static void parse_input(struct InputInfo *var, int value) +{ + var->reg = value & 0xF; + var->chan = value & 0x10; + var->mod = value & 0xE0; +} + +static void parse_combiner_inputs(uint32_t value, + struct InputInfo *a, struct InputInfo *b, + struct InputInfo *c, struct InputInfo *d) +{ + parse_input(d, value & 0xFF); + parse_input(c, (value >> 8) & 0xFF); + parse_input(b, (value >> 16) & 0xFF); + parse_input(a, (value >> 24) & 0xFF); +} + +static void parse_combiner_output(uint32_t value, struct OutputInfo *out) +{ + out->cd = value & 0xF; + out->ab = (value >> 4) & 0xF; + out->muxsum = (value >> 8) & 0xF; + int flags = value >> 12; + out->flags = flags; + out->cd_op = flags & 1; + out->ab_op = flags & 2; + out->muxsum_op = flags & 4; + out->mapping = flags & 0x38; + out->ab_alphablue = flags & 0x80; + out->cd_alphablue = flags & 0x40; +} + +QString *psh_translate(uint32_t combiner_control, uint32_t shader_stage_program, + uint32_t other_stage_input, + uint32_t rgb_inputs[8], uint32_t rgb_outputs[8], + uint32_t alpha_inputs[8], uint32_t alpha_outputs[8], + /*uint32_t constant_0[8], uint32_t constant_1[8],*/ + uint32_t final_inputs_0, uint32_t final_inputs_1, + /*uint32_t final_constant_0, uint32_t final_constant_1,*/ + bool rect_tex[4]) +{ + int i; + struct PixelShader ps; + memset(&ps, 0, sizeof(ps)); + + ps.num_stages = combiner_control & 0xFF; + ps.flags = combiner_control >> 8; + for (i = 0; i < 4; i++) { + ps.tex_modes[i] = (shader_stage_program >> (i * 5)) & 0x1F; + ps.rect_tex[i] = rect_tex[i]; + } + + ps.input_tex[0] = -1; + ps.input_tex[1] = 0; + ps.input_tex[2] = (other_stage_input >> 16) & 0xF; + ps.input_tex[3] = (other_stage_input >> 20) & 0xF; + for (i = 0; i < ps.num_stages; i++) { + parse_combiner_inputs(rgb_inputs[i], &ps.stage[i].rgb_input.a, + &ps.stage[i].rgb_input.b, &ps.stage[i].rgb_input.c, &ps.stage[i].rgb_input.d); + parse_combiner_inputs(alpha_inputs[i], &ps.stage[i].alpha_input.a, + &ps.stage[i].alpha_input.b, &ps.stage[i].alpha_input.c, &ps.stage[i].alpha_input.d); + + parse_combiner_output(rgb_outputs[i], &ps.stage[i].rgb_output); + parse_combiner_output(alpha_outputs[i], &ps.stage[i].alpha_output); + //ps.stage[i].c0 = (pDef->PSC0Mapping >> (i * 4)) & 0xF; + //ps.stage[i].c1 = (pDef->PSC1Mapping >> (i * 4)) & 0xF; + //ps.stage[i].c0_value = constant_0[i]; + //ps.stage[i].c1_value = constant_1[i]; + } + + struct InputInfo blank; + ps.final_input.enabled = final_inputs_0 || final_inputs_1; + if (ps.final_input.enabled) { + parse_combiner_inputs(final_inputs_0, &ps.final_input.a, &ps.final_input.a, + &ps.final_input.c, &ps.final_input.d); + parse_combiner_inputs(final_inputs_1, &ps.final_input.e, &ps.final_input.f, + &ps.final_input.g, &blank); + int flags = final_inputs_1 & 0xFF; + ps.final_input.clamp_sum = flags & PS_FINALCOMBINERSETTING_CLAMP_SUM; + ps.final_input.inv_v1 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1; + ps.final_input.inv_r0 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0; + //ps.final_input.c0 = (pDef->PSFinalCombinerConstants >> 0) & 0xF; + //ps.final_input.c1 = (pDef->PSFinalCombinerConstants >> 4) & 0xF; + //ps.final_input.c0_value = final_constant_0; + //ps.final_input.c1_value = final_constant_1; + } + + + + return psh_convert(&ps); +} \ No newline at end of file diff --git a/hw/xbox/nv2a_psh.h b/hw/xbox/nv2a_psh.h new file mode 100644 index 0000000000..61c59851b0 --- /dev/null +++ b/hw/xbox/nv2a_psh.h @@ -0,0 +1,36 @@ +/* + * QEMU Geforce NV2A pixel shader translation + * + * Copyright (c) 2013 espes + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2 as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef HW_NV2A_PSH_H +#define HW_NV2A_PSH_H + +#include "qapi/qmp/qstring.h" + +QString *psh_translate(uint32_t combiner_control, uint32_t shader_stage_program, + uint32_t other_stage_input, + uint32_t rgb_inputs[8], uint32_t rgb_outputs[8], + uint32_t alpha_inputs[8], uint32_t alpha_outputs[8], + /*uint32_t constant_0[8], uint32_t constant_1[8],*/ + uint32_t final_inputs_0, uint32_t final_inputs_1, + /*uint32_t final_constant_0, uint32_t final_constant_1,*/ + bool rect_tex[4]); + +#endif \ No newline at end of file diff --git a/include/qapi/qmp/qstring.h b/include/qapi/qmp/qstring.h index 1bc3666107..460ea8aa0b 100644 --- a/include/qapi/qmp/qstring.h +++ b/include/qapi/qmp/qstring.h @@ -14,6 +14,7 @@ #define QSTRING_H #include +#include #include "qapi/qmp/qobject.h" typedef struct QString { @@ -26,11 +27,14 @@ typedef struct QString { QString *qstring_new(void); QString *qstring_from_str(const char *str); QString *qstring_from_substr(const char *str, int start, int end); +QString *qstring_from_fmt(const char *fmt, ...); size_t qstring_get_length(const QString *qstring); const char *qstring_get_str(const QString *qstring); void qstring_append_int(QString *qstring, int64_t value); void qstring_append(QString *qstring, const char *str); void qstring_append_chr(QString *qstring, int c); +void qstring_append_fmt(QString *qstring, const char *fmt, ...); +void qstring_append_va(QString *qstring, const char *fmt, va_list va); QString *qobject_to_qstring(const QObject *obj); #endif /* QSTRING_H */ diff --git a/qobject/qstring.c b/qobject/qstring.c index 607b7a142c..d3bbcd13e2 100644 --- a/qobject/qstring.c +++ b/qobject/qstring.c @@ -72,6 +72,17 @@ QString *qstring_from_str(const char *str) return qstring_from_substr(str, 0, strlen(str) - 1); } +QString *qstring_from_fmt(const char *fmt, ...) +{ + QString *ret = qstring_new(); + va_list ap; + va_start(ap, fmt); + qstring_append_va(ret, fmt, ap); + va_end(ap); + + return ret; +} + static void capacity_increase(QString *qstring, size_t len) { if (qstring->capacity < (qstring->length + len)) { @@ -112,6 +123,40 @@ void qstring_append_chr(QString *qstring, int c) qstring->string[qstring->length] = 0; } +void qstring_append_fmt(QString *qstring, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + qstring_append_va(qstring, fmt, ap); + va_end(ap); +} + +void qstring_append_va(QString *qstring, const char *fmt, va_list va) +{ + char scratch[256]; + + va_list ap; + va_copy(ap, va); + const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap); + va_end(ap); + + if (len == 0) { + return; + } else if (len < sizeof(scratch)) { + qstring_append(qstring, scratch); + return; + } + + /* overflowed out scratch buffer, alloc and try again */ + char *buf = g_malloc(len + 1); + va_copy(ap, va); + vsnprintf(buf, len + 1, fmt, ap); + va_end(ap); + + qstring_append(qstring, buf); + g_free(buf); +} + /** * qobject_to_qstring(): Convert a QObject to a QString */