terrible fragment shader generation

This commit is contained in:
espes 2013-11-14 07:56:49 +11:00
parent 8b3628cb39
commit 14ac13f70b
6 changed files with 949 additions and 25 deletions

View File

@ -2,7 +2,7 @@ obj-y += xbox.o chihiro.o
obj-y += xbox_pci.o acpi_xbox.o
obj-y += amd_smbus.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o
obj-y += nvnet.o
obj-y += nv2a.o nv2a_vsh.o
obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o
obj-y += mcpx_apu.o mcpx_aci.o
obj-y += lpc47m157.o
obj-y += xid.o

View File

@ -29,6 +29,7 @@
#include "hw/xbox/u_format_r11g11b10f.h"
#include "hw/xbox/nv2a_vsh.h"
#include "hw/xbox/nv2a_psh.h"
#include "hw/xbox/nv2a.h"
@ -259,6 +260,19 @@
# define NV_PGRAPH_CLEARRECTY_YMIN 0x00000FFF
# define NV_PGRAPH_CLEARRECTY_YMAX 0x0FFF0000
#define NV_PGRAPH_COLORCLEARVALUE 0x0000186C
#define NV_PGRAPH_COMBINEFACTOR0 0x00001880
#define NV_PGRAPH_COMBINEFACTOR1 0x000018A0
#define NV_PGRAPH_COMBINEALPHAI0 0x000018C0
#define NV_PGRAPH_COMBINEALPHAO0 0x000018E0
#define NV_PGRAPH_COMBINECOLORI0 0x00001900
#define NV_PGRAPH_COMBINECOLORO0 0x00001920
#define NV_PGRAPH_COMBINECTL 0x00001940
#define NV_PGRAPH_COMBINESPECFOG0 0x00001944
#define NV_PGRAPH_COMBINESPECFOG1 0x00001948
#define NV_PGRAPH_SHADERCTL 0x00001998
#define NV_PGRAPH_SHADERPROG 0x0000199C
#define NV_PGRAPH_SPECFOGFACTOR0 0x000019AC
#define NV_PGRAPH_SPECFOGFACTOR1 0x000019B0
#define NV_PGRAPH_ZSTENCILCLEARVALUE 0x00001A88
#define NV_PCRTC_INTR_0 0x00000100
@ -409,8 +423,15 @@
# define NV097_SET_SURFACE_PITCH_ZETA 0xFFFF0000
# define NV097_SET_SURFACE_COLOR_OFFSET 0x00970210
# define NV097_SET_SURFACE_ZETA_OFFSET 0x00970214
# define NV097_SET_COMBINER_ALPHA_ICW 0x00970260
# define NV097_SET_COMBINER_SPECULAR_FOG_CW0 0x00970288
# define NV097_SET_COMBINER_SPECULAR_FOG_CW1 0x0097028C
# define NV097_SET_COLOR_MASK 0x00970358
# define NV097_SET_VIEWPORT_OFFSET 0x00970A20
# define NV097_SET_COMBINER_FACTOR0 0x00970A60
# define NV097_SET_COMBINER_FACTOR1 0x00970A80
# define NV097_SET_COMBINER_ALPHA_OCW 0x00970AA0
# define NV097_SET_COMBINER_COLOR_ICW 0x00970AC0
# define NV097_SET_VIEWPORT_SCALE 0x00970AF0
# define NV097_SET_TRANSFORM_PROGRAM 0x00970B00
# define NV097_SET_TRANSFORM_CONSTANT 0x00970B80
@ -486,6 +507,10 @@
# define NV097_CLEAR_SURFACE_A (1 << 7)
# define NV097_SET_CLEAR_RECT_HORIZONTAL 0x00971D98
# define NV097_SET_CLEAR_RECT_VERTICAL 0x00971D9C
# define NV097_SET_COMBINER_COLOR_OCW 0x00971E40
# define NV097_SET_COMBINER_CONTROL 0x00971E60
# define NV097_SET_SHADER_STAGE_PROGRAM 0x00971E70
# define NV097_SET_SHADER_OTHER_STAGE_INPUT 0x00971E78
# define NV097_SET_TRANSFORM_EXECUTION_MODE 0x00971E94
# define NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN 0x00971E98
# define NV097_SET_TRANSFORM_PROGRAM_LOAD 0x00971E9C
@ -1239,7 +1264,7 @@ static void kelvin_bind_vertexshader(KelvinState *kelvin)
shader->program_length);
const char* shader_code_str = qstring_get_str(shader_code);
NV2A_DPRINTF("bind shader %d, code:\n%s\n",
NV2A_DPRINTF("bind vertex program %d, code:\n%s\n",
kelvin->vertexshader_start_slot,
shader_code_str);
@ -1252,7 +1277,7 @@ static void kelvin_bind_vertexshader(KelvinState *kelvin)
GLint pos;
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
if (pos != -1) {
fprintf(stderr, "nv2a: Vertex shader compilation failed:\n"
fprintf(stderr, "nv2a: vertex shader compilation failed:\n"
" pos %d, %s\n",
pos, glGetString(GL_PROGRAM_ERROR_STRING_ARB));
fprintf(stderr, "ucode:\n");
@ -1402,15 +1427,53 @@ static void pgraph_bind_textures(NV2AState *d)
static void pgraph_bind_fragment_shader(PGRAPHState *pg)
{
const char *shader_code = "\n"
"uniform sampler2DRect texSamp0;\n"
"void main() {\n"
" gl_FragColor = texture2DRect(texSamp0, gl_TexCoord[0].st);\n"
//" gl_FragColor = vec4(1, 0, 0, 1);\n"
"}\n";
int i;
if (pg->fragment_shader_dirty) {
glShaderSource(pg->gl_fragment_shader, 1, &shader_code, 0);
uint32_t combiner_control = pg->regs[NV_PGRAPH_COMBINECTL];
uint32_t shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG];
uint32_t other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL];
uint32_t final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0];
uint32_t final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1];
uint32_t final_constant_0 = pg->regs[NV_PGRAPH_SPECFOGFACTOR0];
uint32_t final_constant_1 = pg->regs[NV_PGRAPH_SPECFOGFACTOR1];
uint32_t rgb_inputs[8], rgb_outputs[8],
alpha_inputs[8], alpha_outputs[8],
constant_0[8], constant_1[8];
for (i = 0; i < 8; i++) {
rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4];
rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4];
alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4];
alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4];
constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
}
bool rect_tex[4];
for (i = 0; i < 4; i++) {
rect_tex[i] = false;
if (pg->textures[i].enabled
&& kelvin_color_format_map[pg->textures[i].color_format].linear) {
rect_tex[i] = true;
}
}
QString *shader_code = psh_translate(combiner_control, shader_stage_program,
other_stage_input,
rgb_inputs, rgb_outputs,
alpha_inputs, alpha_outputs,
constant_0, constant_1,
final_inputs_0, final_inputs_1,
final_constant_0, final_constant_1,
rect_tex);
const char *shader_code_str = qstring_get_str(shader_code);
NV2A_DPRINTF("bind pixel shader, code:\n%s\n", shader_code_str);
glShaderSource(pg->gl_fragment_shader, 1, &shader_code_str, 0);
glCompileShader(pg->gl_fragment_shader);
/* Check it compiled */
@ -1419,7 +1482,7 @@ static void pgraph_bind_fragment_shader(PGRAPHState *pg)
if (!compiled) {
GLchar log[1024];
glGetShaderInfoLog(pg->gl_fragment_shader, 1024, NULL, log);
fprintf(stderr, "nv2a: Fragment shader compilation failed: %s\n", log);
fprintf(stderr, "nv2a: fragment shader compilation failed: %s\n", log);
abort();
}
@ -1430,26 +1493,33 @@ static void pgraph_bind_fragment_shader(PGRAPHState *pg)
if(!linked) {
GLchar log[1024];
glGetProgramInfoLog(pg->gl_program, 1024, NULL, log);
fprintf(stderr, "nv2a: Fragment shader linking failed: %s\n", log);
abort();
}
glValidateProgram(pg->gl_program);
GLint valid = 0;
glGetProgramiv(pg->gl_program, GL_VALIDATE_STATUS, &valid);
if (!valid) {
GLchar log[1024];
glGetProgramInfoLog(pg->gl_program, 1024, NULL, log);
fprintf(stderr, "nv2a: Fragment shader validation failed: %s\n", log);
fprintf(stderr, "nv2a: fragment shader linking failed: %s\n", log);
abort();
}
glUseProgram(pg->gl_program);
GLint texSamp0Loc = glGetUniformLocation(pg->gl_program, "texSamp0");
glUniform1i(texSamp0Loc, 0);
/* set texture samplers */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
char samplerName[16];
snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
GLint texSampLoc = glGetUniformLocation(pg->gl_program, samplerName);
if (texSampLoc >= 0) {
glUniform1i(texSampLoc, i);
}
}
/*glValidateProgram(pg->gl_program);
GLint valid = 0;
glGetProgramiv(pg->gl_program, GL_VALIDATE_STATUS, &valid);
if (!valid) {
GLchar log[1024];
glGetProgramInfoLog(pg->gl_program, 1024, NULL, log);
fprintf(stderr, "nv2a: fragment shader validation failed: %s\n", log);
abort();
}*/
QDECREF(shader_code);
pg->fragment_shader_dirty = false;
} else {
glUseProgram(pg->gl_program);
@ -1513,7 +1583,7 @@ static void pgraph_update_surface(NV2AState *d, bool upload)
assert(d->pgraph.surface_color.pitch % bytes_per_pixel == 0);
glDisable(GL_FRAGMENT_PROGRAM_ARB);
//glDisable(GL_FRAGMENT_PROGRAM_ARB);
glUseProgram(0);
int rl, pa;
@ -1919,6 +1989,24 @@ static void pgraph_method(NV2AState *d,
pg->surface_zeta.offset = parameter;
break;
case NV097_SET_COMBINER_ALPHA_ICW ...
NV097_SET_COMBINER_ALPHA_ICW + 28:
slot = (class_method - NV097_SET_COMBINER_ALPHA_ICW) / 4;
pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COMBINER_SPECULAR_FOG_CW0:
pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COMBINER_SPECULAR_FOG_CW1:
pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COLOR_MASK:
pg->color_mask = parameter;
break;
@ -1932,6 +2020,35 @@ static void pgraph_method(NV2AState *d,
kelvin->constants[59].data[slot] = parameter;
kelvin->constants[59].dirty = true;
break;
case NV097_SET_COMBINER_FACTOR0 ...
NV097_SET_COMBINER_FACTOR0 + 28:
slot = (class_method - NV097_SET_COMBINER_FACTOR0) / 4;
pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COMBINER_FACTOR1 ...
NV097_SET_COMBINER_FACTOR1 + 28:
slot = (class_method - NV097_SET_COMBINER_FACTOR1) / 4;
pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COMBINER_ALPHA_OCW ...
NV097_SET_COMBINER_ALPHA_OCW + 28:
slot = (class_method - NV097_SET_COMBINER_ALPHA_OCW) / 4;
pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COMBINER_COLOR_ICW ...
NV097_SET_COMBINER_COLOR_ICW + 28:
slot = (class_method - NV097_SET_COMBINER_COLOR_ICW) / 4;
pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_VIEWPORT_SCALE ...
NV097_SET_VIEWPORT_SCALE + 12:
@ -2276,6 +2393,28 @@ static void pgraph_method(NV2AState *d,
pg->regs[NV_PGRAPH_CLEARRECTY] = parameter;
break;
case NV097_SET_COMBINER_COLOR_OCW ...
NV097_SET_COMBINER_COLOR_OCW + 28:
slot = (class_method - NV097_SET_COMBINER_COLOR_OCW) / 4;
pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_COMBINER_CONTROL:
pg->regs[NV_PGRAPH_COMBINECTL] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_SHADER_STAGE_PROGRAM:
pg->regs[NV_PGRAPH_SHADERPROG] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_SHADER_OTHER_STAGE_INPUT:
pg->regs[NV_PGRAPH_SHADERCTL] = parameter;
pg->fragment_shader_dirty = true;
break;
case NV097_SET_TRANSFORM_EXECUTION_MODE:
kelvin->use_vertex_program = (parameter & 3) == 2;
break;

700
hw/xbox/nv2a_psh.c Normal file
View File

@ -0,0 +1,700 @@
/*
* QEMU Geforce NV2A pixel shader translation
*
* Copyright (c) 2013 espes
*
* Based on:
* Cxbx, PixelShader.cpp
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
* Kingofc <kingofc@freenet.de>
* Xeon, XBD3DPixelShader.cpp
* Copyright (c) 2003 _SF_
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include "qapi/qmp/qstring.h"
#include "hw/xbox/nv2a_psh.h"
enum PS_TEXTUREMODES
{ // valid in stage 0 1 2 3
PS_TEXTUREMODES_NONE= 0x00L, // * * * *
PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * *
PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * *
PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * *
PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * *
PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * *
PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * *
PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * *
PS_TEXTUREMODES_BRDF= 0x08L, // - - * *
PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * *
PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * *
PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * -
PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - *
PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - *
PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - *
PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * *
PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * *
PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * -
PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - *
// 0x13-0x1f reserved
};
enum PS_INPUTMAPPING
{
PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) OK for final combiner
PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) OK for final combiner
PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2*max(0,x) - 1 invalid for final combiner
PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2*max(0,x) invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) invalid for final combiner
PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x invalid for final combiner
PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x invalid for final combiner
};
enum PS_REGISTER
{
PS_REGISTER_ZERO= 0x00L, // r
PS_REGISTER_DISCARD= 0x00L, // w
PS_REGISTER_C0= 0x01L, // r
PS_REGISTER_C1= 0x02L, // r
PS_REGISTER_FOG= 0x03L, // r
PS_REGISTER_V0= 0x04L, // r/w
PS_REGISTER_V1= 0x05L, // r/w
PS_REGISTER_T0= 0x08L, // r/w
PS_REGISTER_T1= 0x09L, // r/w
PS_REGISTER_T2= 0x0aL, // r/w
PS_REGISTER_T3= 0x0bL, // r/w
PS_REGISTER_R0= 0x0cL, // r/w
PS_REGISTER_R1= 0x0dL, // r/w
PS_REGISTER_V1R0_SUM= 0x0eL, // r
PS_REGISTER_EF_PROD= 0x0fL, // r
PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner
PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // invalid for final combiner
PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner
PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner
};
enum PS_COMBINERCOUNTFLAGS
{
PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb
PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb
PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage
PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage
PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage
PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage
};
enum PS_COMBINEROUTPUT
{
PS_COMBINEROUTPUT_IDENTITY= 0x00L, // y = x
PS_COMBINEROUTPUT_BIAS= 0x08L, // y = x - 0.5
PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // y = x*2
PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5)*2
PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // y = x*4
PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // y = x/2
PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only
PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only
PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only
PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only
PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD
PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a
};
enum PS_CHANNEL
{
PS_CHANNEL_RGB= 0x00, // used as RGB source
PS_CHANNEL_BLUE= 0x00, // used as ALPHA source
PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source
};
enum PS_FINALCOMBINERSETTING
{
PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1]
PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping
PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping
};
// Structures to describe the PS definition
struct InputInfo {
int reg, mod, chan;
bool invert;
};
struct InputVarInfo {
struct InputInfo a, b, c, d;
};
struct FCInputInfo {
struct InputInfo a, b, c, d, e, f, g;
int c0, c1;
//uint32_t c0_value, c1_value;
bool c0_used, c1_used;
bool v1r0_sum, clamp_sum, inv_v1, inv_r0, enabled;
};
struct OutputInfo {
int ab, cd, muxsum, flags, ab_op, cd_op, muxsum_op,
mapping, ab_alphablue, cd_alphablue;
};
struct PSStageInfo {
struct InputVarInfo rgb_input, alpha_input;
struct OutputInfo rgb_output, alpha_output;
int c0, c1;
//uint32_t c0_value, c1_value;
bool c0_used, c1_used;
};
struct PixelShader {
int num_stages, flags;
struct PSStageInfo stage[8];
struct FCInputInfo final_input;
int tex_modes[4], input_tex[4];
//uint32_t compare_mode, dot_mapping, input_texture;
bool rect_tex[4];
QString *varE, *varF;
QString *code;
int cur_stage;
int num_var_refs;
const char var_refs[32][32];
int num_const_refs;
const char const_refs[32][32];
};
static void add_var_ref(struct PixelShader *ps, const char *var)
{
int i;
for (i=0; i<ps->num_var_refs; i++) {
if (strcmp((char*)ps->var_refs[i], var) == 0) return;
}
strcpy((char*)ps->var_refs[ps->num_var_refs++], var);
}
static void add_const_ref(struct PixelShader *ps, const char *var)
{
int i;
for (i=0; i<ps->num_const_refs; i++) {
if (strcmp((char*)ps->const_refs[i], var) == 0) return;
}
strcpy((char*)ps->const_refs[ps->num_const_refs++], var);
}
// Get the code for a variable used in the program
static QString* get_var(struct PixelShader *ps, int reg, bool is_dest)
{
switch (reg) {
case PS_REGISTER_DISCARD:
if (is_dest) {
return qstring_from_str("");
} else {
return qstring_from_str("0.0");
}
break;
case PS_REGISTER_C0:
if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C0) {
QString *reg = qstring_from_fmt("c_%d_%d", ps->cur_stage, 0);
add_const_ref(ps, qstring_get_str(reg));
if (ps->cur_stage == 8) {
ps->final_input.c0_used = true;
} else {
ps->stage[ps->cur_stage].c0_used = true;
}
return reg;
} else { // Same c0
add_const_ref(ps, "c_0_0");
ps->stage[0].c0_used = true;
return qstring_from_str("c_0_0");
}
break;
case PS_REGISTER_C1:
if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C1) {
QString *reg = qstring_from_fmt("c_%d_%d", ps->cur_stage, 1);
add_const_ref(ps, qstring_get_str(reg));
if (ps->cur_stage == 8) {
ps->final_input.c1_used = true;
} else {
ps->stage[ps->cur_stage].c1_used = true;
}
return reg;
} else { // Same c1
add_const_ref(ps, "c_0_1");
ps->stage[0].c1_used = true;
return qstring_from_str("c_0_1");
}
break;
case PS_REGISTER_FOG: // TODO
assert(false);
break;
case PS_REGISTER_V0:
return qstring_from_str("v0");
case PS_REGISTER_V1:
return qstring_from_str("v1");
case PS_REGISTER_T0:
return qstring_from_str("t0");
case PS_REGISTER_T1:
return qstring_from_str("t1");
case PS_REGISTER_T2:
return qstring_from_str("t2");
case PS_REGISTER_T3:
return qstring_from_str("t3");
case PS_REGISTER_R0:
add_var_ref(ps, "r0");
return qstring_from_str("r0");
case PS_REGISTER_R1:
add_var_ref(ps, "r1");
return qstring_from_str("r1");
case PS_REGISTER_V1R0_SUM:
add_var_ref(ps, "r0");
return qstring_from_str("(v1 + r0)");
case PS_REGISTER_EF_PROD:
return qstring_from_fmt("(%s * %s)", qstring_get_str(ps->varE),
qstring_get_str(ps->varF));
default:
assert(false);
break;
}
}
// Get input variable code
static QString* get_input_var(struct PixelShader *ps, struct InputInfo in, bool is_alpha)
{
QString *reg = get_var(ps, in.reg, false);
if (strcmp(qstring_get_str(reg), "0.0") != 0
&& (in.reg != PS_REGISTER_EF_PROD
|| strstr(qstring_get_str(reg), ".a") == NULL)) {
switch (in.chan) {
case PS_CHANNEL_RGB:
if (is_alpha) {
qstring_append(reg, ".b");
} else {
qstring_append(reg, ".rgb");
}
break;
case PS_CHANNEL_ALPHA:
qstring_append(reg, ".a");
break;
default:
assert(false);
break;
}
}
QString *res;
switch (in.mod) {
case PS_INPUTMAPPING_SIGNED_IDENTITY:
case PS_INPUTMAPPING_UNSIGNED_IDENTITY:
QINCREF(reg);
res = reg;
break;
case PS_INPUTMAPPING_UNSIGNED_INVERT:
res = qstring_from_fmt("(1.0 - %s)", qstring_get_str(reg));
break;
case PS_INPUTMAPPING_EXPAND_NORMAL: // TODO: Change to max(0, x)??
res = qstring_from_fmt("(2.0 * %s - 1.0)", qstring_get_str(reg));
break;
case PS_INPUTMAPPING_EXPAND_NEGATE:
res = qstring_from_fmt("(1.0 - 2.0 * %s)", qstring_get_str(reg));
break;
case PS_INPUTMAPPING_HALFBIAS_NORMAL:
res = qstring_from_fmt("(%s - 0.5)", qstring_get_str(reg));
break;
case PS_INPUTMAPPING_HALFBIAS_NEGATE:
res = qstring_from_fmt("(0.5 - %s)", qstring_get_str(reg));
break;
case PS_INPUTMAPPING_SIGNED_NEGATE:
res = qstring_from_fmt("-%s", qstring_get_str(reg));
break;
default:
assert(false);
break;
}
QDECREF(reg);
return res;
}
// Get code for the output mapping of a stage
static QString* get_output(QString *reg, int mapping)
{
QString *res;
switch (mapping) {
case PS_COMBINEROUTPUT_IDENTITY:
QINCREF(reg);
res = reg;
break;
case PS_COMBINEROUTPUT_BIAS:
res = qstring_from_fmt("(%s - 0.5)", qstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTLEFT_1:
res = qstring_from_fmt("(%s * 2.0)", qstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS:
res = qstring_from_fmt("((%s - 0.5) * 2.0)", qstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTLEFT_2:
res = qstring_from_fmt("(%s * 4.0)", qstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTRIGHT_1:
res = qstring_from_fmt("(%s / 2.0)", qstring_get_str(reg));
break;
default:
assert(false);
break;
}
return res;
}
// Add the HLSL code for a stage
static void add_stage_code(struct PixelShader *ps,
struct InputVarInfo input, struct OutputInfo output,
const char *write_mask, bool is_alpha)
{
QString *a = get_input_var(ps, input.a, is_alpha);
QString *b = get_input_var(ps, input.b, is_alpha);
QString *c = get_input_var(ps, input.c, is_alpha);
QString *d = get_input_var(ps, input.d, is_alpha);
const char *caster = "";
if (strlen(write_mask) == 3) {
caster = "vec3";
}
QString *ab;
if (output.ab_op == PS_COMBINEROUTPUT_AB_DOT_PRODUCT) {
ab = qstring_from_fmt("dot(%s, %s)",
qstring_get_str(a), qstring_get_str(b));
} else {
ab = qstring_from_fmt("(%s * %s)",
qstring_get_str(a), qstring_get_str(b));
}
QString *cd;
if (output.cd_op == PS_COMBINEROUTPUT_CD_DOT_PRODUCT) {
cd = qstring_from_fmt("dot(%s, %s)",
qstring_get_str(c), qstring_get_str(d));
} else {
cd = qstring_from_fmt("(%s * %s)",
qstring_get_str(c), qstring_get_str(d));
}
QString *ab_mapping = get_output(ab, output.mapping);
QString *cd_mapping = get_output(cd, output.mapping);
QString *ab_dest = get_var(ps, output.ab, true);
QString *cd_dest = get_var(ps, output.cd, true);
QString *sum_dest = get_var(ps, output.muxsum, true);
if (qstring_get_length(ab_dest)) {
qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n",
qstring_get_str(ab_dest), write_mask, caster, qstring_get_str(ab_mapping));
} else {
QINCREF(ab_mapping);
ab_dest = ab_mapping;
}
if (qstring_get_length(cd_dest)) {
qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n",
qstring_get_str(cd_dest), write_mask, caster, qstring_get_str(cd_mapping));
} else {
QINCREF(cd_mapping);
cd_dest = cd_mapping;
}
if (!is_alpha && output.flags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) {
qstring_append_fmt(ps->code, "%s.a = %s.b;\n",
qstring_get_str(ab_dest), qstring_get_str(ab_dest));
}
if (!is_alpha && output.flags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) {
qstring_append_fmt(ps->code, "%s.a = %s.b;\n",
qstring_get_str(cd_dest), qstring_get_str(cd_dest));
}
QString *sum;
if (output.muxsum_op == PS_COMBINEROUTPUT_AB_CD_SUM) {
sum = qstring_from_fmt("(%s + %s)", qstring_get_str(ab), qstring_get_str(cd));
} else {
sum = qstring_from_fmt("((r0.a >= 0.5) ? %s : %s)",
qstring_get_str(cd), qstring_get_str(ab));
}
QString *sum_mapping = get_output(sum, output.mapping);
if (qstring_get_length(sum_dest)) {
qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n",
qstring_get_str(sum_dest), write_mask, caster, qstring_get_str(sum_mapping));
}
QDECREF(a);
QDECREF(b);
QDECREF(c);
QDECREF(d);
QDECREF(ab);
QDECREF(cd);
QDECREF(ab_mapping);
QDECREF(cd_mapping);
QDECREF(ab_dest);
QDECREF(cd_dest);
QDECREF(sum_dest);
QDECREF(sum);
QDECREF(sum_mapping);
}
// Add code for the final combiner stage
static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo final)
{
ps->varE = get_input_var(ps, final.e, false);
ps->varF = get_input_var(ps, final.f, false);
QString *a = get_input_var(ps, final.a, false);
QString *b = get_input_var(ps, final.b, false);
QString *c = get_input_var(ps, final.c, false);
QString *d = get_input_var(ps, final.d, false);
QString *g = get_input_var(ps, final.g, false);
add_var_ref(ps, "r0");
qstring_append_fmt(ps->code, "r0.rgb = (%s * %s) + ((1.0 - %s) * %s) + %s;\n",
qstring_get_str(a), qstring_get_str(b),
qstring_get_str(a), qstring_get_str(c), qstring_get_str(d));
qstring_append_fmt(ps->code, "r0.a = %s;\n", qstring_get_str(g));
QDECREF(a);
QDECREF(b);
QDECREF(c);
QDECREF(d);
QDECREF(g);
QDECREF(ps->varE);
QDECREF(ps->varF);
ps->varE = ps->varF = NULL;
}
static QString* psh_convert(struct PixelShader *ps)
{
int i;
QString *preflight = qstring_new();
QString *vars = qstring_new();
qstring_append(vars, "vec4 v0 = gl_Color;\n");
qstring_append(vars, "vec4 v1 = gl_SecondaryColor;\n");
qstring_append(vars, "float fog = gl_FogFragCoord;\n");
for (i = 0; i < 4; i++) {
if (ps->tex_modes[i] == PS_TEXTUREMODES_NONE) continue;
const char *sampler_type;
const char *sampler_function;
switch (ps->tex_modes[i]) {
case PS_TEXTUREMODES_PROJECT2D:
if (ps->rect_tex[i]) {
sampler_type = "sampler2DRect";
sampler_function = "texture2DRect";
} else {
sampler_type = "sampler2D";
sampler_function = "texture2D";
}
qstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, gl_TexCoord[%d].xy);\n",
i, sampler_function, i, i);
break;
case PS_TEXTUREMODES_PROJECT3D:
sampler_type = "sampler3D";
qstring_append_fmt(vars, "vec4 t%d = texture3D(texSamp%d, gl_TexCoord[%d].xyz);\n",
i, i, i);
break;
case PS_TEXTUREMODES_CUBEMAP:
sampler_type = "samplerCube";
qstring_append_fmt(vars, "vec4 t%d = textureCube(texSamp%d, gl_TexCoord[%d].xyz);\n",
i, i, i);
break;
case PS_TEXTUREMODES_PASSTHRU:
qstring_append_fmt(vars, "vec4 t%d;\n", i);
break;
default:
printf("%x\n", ps->tex_modes[i]);
assert(false);
break;
}
if (ps->tex_modes[i] != PS_TEXTUREMODES_PASSTHRU) {
qstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
}
}
ps->code = qstring_new();
for (i = 0; i < ps->num_stages; i++) {
ps->cur_stage = i;
qstring_append_fmt(ps->code, "// Stage %d\n", i);
add_stage_code(ps, ps->stage[i].rgb_input, ps->stage[i].rgb_output, "rgb", false);
add_stage_code(ps, ps->stage[i].alpha_input, ps->stage[i].alpha_output, "a", false);
}
if (ps->final_input.enabled) {
ps->cur_stage = 8;
qstring_append(ps->code, "// Final Combiner\n");
add_final_stage_code(ps, ps->final_input);
}
for (i = 0; i < ps->num_var_refs; i++) {
qstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]);
if (strcmp(ps->var_refs[i], "r0") == 0) {
if (ps->tex_modes[0] != PS_TEXTUREMODES_NONE) {
qstring_append(vars, "r0.a = t0.a;\n");
} else {
qstring_append(vars, "r0.a = 1.0;\n");
}
}
}
for (i = 0; i < ps->num_const_refs; i++) {
qstring_append_fmt(vars, "vec4 %s;\n", ps->const_refs[i]);
}
QString *final = qstring_new();
qstring_append(final, qstring_get_str(preflight));
qstring_append(final, "void main() {\n");
qstring_append(final, qstring_get_str(vars));
qstring_append(final, qstring_get_str(ps->code));
qstring_append(final, "gl_FragColor = r0;\n");
qstring_append(final, "}\n");
QDECREF(preflight);
QDECREF(vars);
QDECREF(ps->code);
return final;
}
static void parse_input(struct InputInfo *var, int value)
{
var->reg = value & 0xF;
var->chan = value & 0x10;
var->mod = value & 0xE0;
}
static void parse_combiner_inputs(uint32_t value,
struct InputInfo *a, struct InputInfo *b,
struct InputInfo *c, struct InputInfo *d)
{
parse_input(d, value & 0xFF);
parse_input(c, (value >> 8) & 0xFF);
parse_input(b, (value >> 16) & 0xFF);
parse_input(a, (value >> 24) & 0xFF);
}
static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
{
out->cd = value & 0xF;
out->ab = (value >> 4) & 0xF;
out->muxsum = (value >> 8) & 0xF;
int flags = value >> 12;
out->flags = flags;
out->cd_op = flags & 1;
out->ab_op = flags & 2;
out->muxsum_op = flags & 4;
out->mapping = flags & 0x38;
out->ab_alphablue = flags & 0x80;
out->cd_alphablue = flags & 0x40;
}
QString *psh_translate(uint32_t combiner_control, uint32_t shader_stage_program,
uint32_t other_stage_input,
uint32_t rgb_inputs[8], uint32_t rgb_outputs[8],
uint32_t alpha_inputs[8], uint32_t alpha_outputs[8],
/*uint32_t constant_0[8], uint32_t constant_1[8],*/
uint32_t final_inputs_0, uint32_t final_inputs_1,
/*uint32_t final_constant_0, uint32_t final_constant_1,*/
bool rect_tex[4])
{
int i;
struct PixelShader ps;
memset(&ps, 0, sizeof(ps));
ps.num_stages = combiner_control & 0xFF;
ps.flags = combiner_control >> 8;
for (i = 0; i < 4; i++) {
ps.tex_modes[i] = (shader_stage_program >> (i * 5)) & 0x1F;
ps.rect_tex[i] = rect_tex[i];
}
ps.input_tex[0] = -1;
ps.input_tex[1] = 0;
ps.input_tex[2] = (other_stage_input >> 16) & 0xF;
ps.input_tex[3] = (other_stage_input >> 20) & 0xF;
for (i = 0; i < ps.num_stages; i++) {
parse_combiner_inputs(rgb_inputs[i], &ps.stage[i].rgb_input.a,
&ps.stage[i].rgb_input.b, &ps.stage[i].rgb_input.c, &ps.stage[i].rgb_input.d);
parse_combiner_inputs(alpha_inputs[i], &ps.stage[i].alpha_input.a,
&ps.stage[i].alpha_input.b, &ps.stage[i].alpha_input.c, &ps.stage[i].alpha_input.d);
parse_combiner_output(rgb_outputs[i], &ps.stage[i].rgb_output);
parse_combiner_output(alpha_outputs[i], &ps.stage[i].alpha_output);
//ps.stage[i].c0 = (pDef->PSC0Mapping >> (i * 4)) & 0xF;
//ps.stage[i].c1 = (pDef->PSC1Mapping >> (i * 4)) & 0xF;
//ps.stage[i].c0_value = constant_0[i];
//ps.stage[i].c1_value = constant_1[i];
}
struct InputInfo blank;
ps.final_input.enabled = final_inputs_0 || final_inputs_1;
if (ps.final_input.enabled) {
parse_combiner_inputs(final_inputs_0, &ps.final_input.a, &ps.final_input.a,
&ps.final_input.c, &ps.final_input.d);
parse_combiner_inputs(final_inputs_1, &ps.final_input.e, &ps.final_input.f,
&ps.final_input.g, &blank);
int flags = final_inputs_1 & 0xFF;
ps.final_input.clamp_sum = flags & PS_FINALCOMBINERSETTING_CLAMP_SUM;
ps.final_input.inv_v1 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1;
ps.final_input.inv_r0 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0;
//ps.final_input.c0 = (pDef->PSFinalCombinerConstants >> 0) & 0xF;
//ps.final_input.c1 = (pDef->PSFinalCombinerConstants >> 4) & 0xF;
//ps.final_input.c0_value = final_constant_0;
//ps.final_input.c1_value = final_constant_1;
}
return psh_convert(&ps);
}

36
hw/xbox/nv2a_psh.h Normal file
View File

@ -0,0 +1,36 @@
/*
* QEMU Geforce NV2A pixel shader translation
*
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2 as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>
*
* Contributions after 2012-01-13 are licensed under the terms of the
* GNU GPL, version 2 or (at your option) any later version.
*/
#ifndef HW_NV2A_PSH_H
#define HW_NV2A_PSH_H
#include "qapi/qmp/qstring.h"
QString *psh_translate(uint32_t combiner_control, uint32_t shader_stage_program,
uint32_t other_stage_input,
uint32_t rgb_inputs[8], uint32_t rgb_outputs[8],
uint32_t alpha_inputs[8], uint32_t alpha_outputs[8],
/*uint32_t constant_0[8], uint32_t constant_1[8],*/
uint32_t final_inputs_0, uint32_t final_inputs_1,
/*uint32_t final_constant_0, uint32_t final_constant_1,*/
bool rect_tex[4]);
#endif

View File

@ -14,6 +14,7 @@
#define QSTRING_H
#include <stdint.h>
#include <stdarg.h>
#include "qapi/qmp/qobject.h"
typedef struct QString {
@ -26,11 +27,14 @@ typedef struct QString {
QString *qstring_new(void);
QString *qstring_from_str(const char *str);
QString *qstring_from_substr(const char *str, int start, int end);
QString *qstring_from_fmt(const char *fmt, ...);
size_t qstring_get_length(const QString *qstring);
const char *qstring_get_str(const QString *qstring);
void qstring_append_int(QString *qstring, int64_t value);
void qstring_append(QString *qstring, const char *str);
void qstring_append_chr(QString *qstring, int c);
void qstring_append_fmt(QString *qstring, const char *fmt, ...);
void qstring_append_va(QString *qstring, const char *fmt, va_list va);
QString *qobject_to_qstring(const QObject *obj);
#endif /* QSTRING_H */

View File

@ -72,6 +72,17 @@ QString *qstring_from_str(const char *str)
return qstring_from_substr(str, 0, strlen(str) - 1);
}
QString *qstring_from_fmt(const char *fmt, ...)
{
QString *ret = qstring_new();
va_list ap;
va_start(ap, fmt);
qstring_append_va(ret, fmt, ap);
va_end(ap);
return ret;
}
static void capacity_increase(QString *qstring, size_t len)
{
if (qstring->capacity < (qstring->length + len)) {
@ -112,6 +123,40 @@ void qstring_append_chr(QString *qstring, int c)
qstring->string[qstring->length] = 0;
}
void qstring_append_fmt(QString *qstring, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
qstring_append_va(qstring, fmt, ap);
va_end(ap);
}
void qstring_append_va(QString *qstring, const char *fmt, va_list va)
{
char scratch[256];
va_list ap;
va_copy(ap, va);
const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap);
va_end(ap);
if (len == 0) {
return;
} else if (len < sizeof(scratch)) {
qstring_append(qstring, scratch);
return;
}
/* overflowed out scratch buffer, alloc and try again */
char *buf = g_malloc(len + 1);
va_copy(ap, va);
vsnprintf(buf, len + 1, fmt, ap);
va_end(ap);
qstring_append(qstring, buf);
g_free(buf);
}
/**
* qobject_to_qstring(): Convert a QObject to a QString
*/