xemu/hw/xbox/nv2a/pgraph/glsl/psh.c

1376 lines
53 KiB
C

/*
* QEMU Geforce NV2A pixel shader translation
*
* Copyright (c) 2013 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* Based on:
* Cxbx, PixelShader.cpp
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
* Kingofc <kingofc@freenet.de>
* Xeon, XBD3DPixelShader.cpp
* Copyright (c) 2003 _SF_
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include "common.h"
#include "hw/xbox/nv2a/debug.h"
#include "hw/xbox/nv2a/pgraph/psh.h"
#include "psh.h"
/*
* This implements translation of register combiners into glsl
* fragment shaders, but all terminology is in terms of Xbox DirectX
* pixel shaders, since I wanted to be lazy while referencing existing
* work / stealing code.
*
* For some background, see the OpenGL extension:
* https://www.opengl.org/registry/specs/NV/register_combiners.txt
*/
enum PS_TEXTUREMODES
{ // valid in stage 0 1 2 3
PS_TEXTUREMODES_NONE= 0x00L, // * * * *
PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * *
PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * *
PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * *
PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * *
PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * *
PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * *
PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * *
PS_TEXTUREMODES_BRDF= 0x08L, // - - * *
PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * *
PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * *
PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * -
PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - *
PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - *
PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - *
PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * *
PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * *
PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * -
PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - *
// 0x13-0x1f reserved
};
enum PS_INPUTMAPPING
{
PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) OK for final combiner
PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) OK for final combiner
PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2*max(0,x) - 1 invalid for final combiner
PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2*max(0,x) invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) invalid for final combiner
PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x invalid for final combiner
PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x invalid for final combiner
};
enum PS_REGISTER
{
PS_REGISTER_ZERO= 0x00L, // r
PS_REGISTER_DISCARD= 0x00L, // w
PS_REGISTER_C0= 0x01L, // r
PS_REGISTER_C1= 0x02L, // r
PS_REGISTER_FOG= 0x03L, // r
PS_REGISTER_V0= 0x04L, // r/w
PS_REGISTER_V1= 0x05L, // r/w
PS_REGISTER_T0= 0x08L, // r/w
PS_REGISTER_T1= 0x09L, // r/w
PS_REGISTER_T2= 0x0aL, // r/w
PS_REGISTER_T3= 0x0bL, // r/w
PS_REGISTER_R0= 0x0cL, // r/w
PS_REGISTER_R1= 0x0dL, // r/w
PS_REGISTER_V1R0_SUM= 0x0eL, // r
PS_REGISTER_EF_PROD= 0x0fL, // r
PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner
PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // invalid for final combiner
PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner
PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner
};
enum PS_COMBINERCOUNTFLAGS
{
PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb
PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb
PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage
PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage
PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage
PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage
};
enum PS_COMBINEROUTPUT
{
PS_COMBINEROUTPUT_IDENTITY= 0x00L, // y = x
PS_COMBINEROUTPUT_BIAS= 0x08L, // y = x - 0.5
PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // y = x*2
PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5)*2
PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // y = x*4
PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // y = x/2
PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only
PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only
PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only
PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only
PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD
PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a
};
enum PS_CHANNEL
{
PS_CHANNEL_RGB= 0x00, // used as RGB source
PS_CHANNEL_BLUE= 0x00, // used as ALPHA source
PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source
};
enum PS_FINALCOMBINERSETTING
{
PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1]
PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping
PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping
};
enum PS_DOTMAPPING
{ // valid in stage 0 1 2 3
PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * *
PS_DOTMAPPING_HILO_1= 0x04L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE_D3D= 0x05L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE_GL= 0x06L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * *
};
// Structures to describe the PS definition
struct InputInfo {
int reg, mod, chan;
};
struct InputVarInfo {
struct InputInfo a, b, c, d;
};
struct FCInputInfo {
struct InputInfo a, b, c, d, e, f, g;
bool v1r0_sum, clamp_sum, inv_v1, inv_r0, enabled;
};
struct OutputInfo {
int ab, cd, muxsum, flags, ab_op, cd_op, muxsum_op,
mapping, ab_alphablue, cd_alphablue;
};
struct PSStageInfo {
struct InputVarInfo rgb_input, alpha_input;
struct OutputInfo rgb_output, alpha_output;
int c0, c1;
};
struct PixelShader {
PshState state;
int num_stages, flags;
struct PSStageInfo stage[8];
struct FCInputInfo final_input;
int tex_modes[4], input_tex[4], dot_map[4];
MString *varE, *varF;
MString *code;
int cur_stage;
int num_var_refs;
char var_refs[32][32];
int num_const_refs;
char const_refs[32][32];
};
static void add_var_ref(struct PixelShader *ps, const char *var)
{
int i;
for (i=0; i<ps->num_var_refs; i++) {
if (strcmp((char*)ps->var_refs[i], var) == 0) return;
}
strcpy((char*)ps->var_refs[ps->num_var_refs++], var);
}
static void add_const_ref(struct PixelShader *ps, const char *var)
{
int i;
for (i=0; i<ps->num_const_refs; i++) {
if (strcmp((char*)ps->const_refs[i], var) == 0) return;
}
strcpy((char*)ps->const_refs[ps->num_const_refs++], var);
}
// Get the code for a variable used in the program
static MString* get_var(struct PixelShader *ps, int reg, bool is_dest)
{
switch (reg) {
case PS_REGISTER_DISCARD:
if (is_dest) {
return mstring_from_str("");
} else {
return mstring_from_str("vec4(0.0)");
}
break;
case PS_REGISTER_C0:
if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C0 || ps->cur_stage == 8) {
MString *reg_name = mstring_from_fmt("c0_%d", ps->cur_stage);
add_const_ref(ps, mstring_get_str(reg_name));
return reg_name;
} else { // Same c0
add_const_ref(ps, "c0_0");
return mstring_from_str("c0_0");
}
break;
case PS_REGISTER_C1:
if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C1 || ps->cur_stage == 8) {
MString *reg_name = mstring_from_fmt("c1_%d", ps->cur_stage);
add_const_ref(ps, mstring_get_str(reg_name));
return reg_name;
} else { // Same c1
add_const_ref(ps, "c1_0");
return mstring_from_str("c1_0");
}
break;
case PS_REGISTER_FOG:
return mstring_from_str("pFog");
case PS_REGISTER_V0:
return mstring_from_str("v0");
case PS_REGISTER_V1:
return mstring_from_str("v1");
case PS_REGISTER_T0:
return mstring_from_str("t0");
case PS_REGISTER_T1:
return mstring_from_str("t1");
case PS_REGISTER_T2:
return mstring_from_str("t2");
case PS_REGISTER_T3:
return mstring_from_str("t3");
case PS_REGISTER_R0:
add_var_ref(ps, "r0");
return mstring_from_str("r0");
case PS_REGISTER_R1:
add_var_ref(ps, "r1");
return mstring_from_str("r1");
case PS_REGISTER_V1R0_SUM:
add_var_ref(ps, "r0");
if (ps->final_input.clamp_sum) {
return mstring_from_fmt(
"clamp(vec4(%s.rgb + %s.rgb, 0.0), 0.0, 1.0)",
ps->final_input.inv_v1 ? "(1.0 - v1)" : "v1",
ps->final_input.inv_r0 ? "(1.0 - r0)" : "r0");
} else {
return mstring_from_fmt(
"vec4(%s.rgb + %s.rgb, 0.0)",
ps->final_input.inv_v1 ? "(1.0 - v1)" : "v1",
ps->final_input.inv_r0 ? "(1.0 - r0)" : "r0");
}
case PS_REGISTER_EF_PROD:
return mstring_from_fmt("vec4(%s * %s, 0.0)",
mstring_get_str(ps->varE),
mstring_get_str(ps->varF));
default:
assert(false);
return NULL;
}
}
// Get input variable code
static MString* get_input_var(struct PixelShader *ps, struct InputInfo in, bool is_alpha)
{
MString *reg = get_var(ps, in.reg, false);
if (!is_alpha) {
switch (in.chan) {
case PS_CHANNEL_RGB:
mstring_append(reg, ".rgb");
break;
case PS_CHANNEL_ALPHA:
mstring_append(reg, ".aaa");
break;
default:
assert(false);
break;
}
} else {
switch (in.chan) {
case PS_CHANNEL_BLUE:
mstring_append(reg, ".b");
break;
case PS_CHANNEL_ALPHA:
mstring_append(reg, ".a");
break;
default:
assert(false);
break;
}
}
MString *res;
switch (in.mod) {
case PS_INPUTMAPPING_UNSIGNED_IDENTITY:
res = mstring_from_fmt("max(%s, 0.0)", mstring_get_str(reg));
break;
case PS_INPUTMAPPING_UNSIGNED_INVERT:
res = mstring_from_fmt("(1.0 - clamp(%s, 0.0, 1.0))", mstring_get_str(reg));
break;
case PS_INPUTMAPPING_EXPAND_NORMAL:
res = mstring_from_fmt("(2.0 * max(%s, 0.0) - 1.0)", mstring_get_str(reg));
break;
case PS_INPUTMAPPING_EXPAND_NEGATE:
res = mstring_from_fmt("(-2.0 * max(%s, 0.0) + 1.0)", mstring_get_str(reg));
break;
case PS_INPUTMAPPING_HALFBIAS_NORMAL:
res = mstring_from_fmt("(max(%s, 0.0) - 0.5)", mstring_get_str(reg));
break;
case PS_INPUTMAPPING_HALFBIAS_NEGATE:
res = mstring_from_fmt("(-max(%s, 0.0) + 0.5)", mstring_get_str(reg));
break;
case PS_INPUTMAPPING_SIGNED_IDENTITY:
mstring_ref(reg);
res = reg;
break;
case PS_INPUTMAPPING_SIGNED_NEGATE:
res = mstring_from_fmt("-%s", mstring_get_str(reg));
break;
default:
assert(false);
break;
}
mstring_unref(reg);
return res;
}
// Get code for the output mapping of a stage
static MString* get_output(MString *reg, int mapping)
{
MString *res;
switch (mapping) {
case PS_COMBINEROUTPUT_IDENTITY:
mstring_ref(reg);
res = reg;
break;
case PS_COMBINEROUTPUT_BIAS:
res = mstring_from_fmt("(%s - 0.5)", mstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTLEFT_1:
res = mstring_from_fmt("(%s * 2.0)", mstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS:
res = mstring_from_fmt("((%s - 0.5) * 2.0)", mstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTLEFT_2:
res = mstring_from_fmt("(%s * 4.0)", mstring_get_str(reg));
break;
case PS_COMBINEROUTPUT_SHIFTRIGHT_1:
res = mstring_from_fmt("(%s / 2.0)", mstring_get_str(reg));
break;
default:
assert(false);
break;
}
return res;
}
// Add the GLSL code for a stage
static MString* add_stage_code(struct PixelShader *ps,
struct InputVarInfo input,
struct OutputInfo output,
const char *write_mask, bool is_alpha)
{
MString *ret = mstring_new();
MString *a = get_input_var(ps, input.a, is_alpha);
MString *b = get_input_var(ps, input.b, is_alpha);
MString *c = get_input_var(ps, input.c, is_alpha);
MString *d = get_input_var(ps, input.d, is_alpha);
const char *caster = "";
if (strlen(write_mask) == 3) {
caster = "vec3";
}
MString *ab;
if (output.ab_op == PS_COMBINEROUTPUT_AB_DOT_PRODUCT) {
ab = mstring_from_fmt("dot(%s, %s)",
mstring_get_str(a), mstring_get_str(b));
} else {
ab = mstring_from_fmt("(%s * %s)",
mstring_get_str(a), mstring_get_str(b));
}
MString *cd;
if (output.cd_op == PS_COMBINEROUTPUT_CD_DOT_PRODUCT) {
cd = mstring_from_fmt("dot(%s, %s)",
mstring_get_str(c), mstring_get_str(d));
} else {
cd = mstring_from_fmt("(%s * %s)",
mstring_get_str(c), mstring_get_str(d));
}
MString *ab_mapping = get_output(ab, output.mapping);
MString *cd_mapping = get_output(cd, output.mapping);
MString *ab_dest = get_var(ps, output.ab, true);
MString *cd_dest = get_var(ps, output.cd, true);
MString *muxsum_dest = get_var(ps, output.muxsum, true);
bool assign_ab = false;
bool assign_cd = false;
bool assign_muxsum = false;
if (mstring_get_length(ab_dest)) {
mstring_append_fmt(ps->code, "ab.%s = clamp(%s(%s), -1.0, 1.0);\n",
write_mask, caster, mstring_get_str(ab_mapping));
assign_ab = true;
} else {
mstring_unref(ab_dest);
mstring_ref(ab_mapping);
ab_dest = ab_mapping;
}
if (mstring_get_length(cd_dest)) {
mstring_append_fmt(ps->code, "cd.%s = clamp(%s(%s), -1.0, 1.0);\n",
write_mask, caster, mstring_get_str(cd_mapping));
assign_cd = true;
} else {
mstring_unref(cd_dest);
mstring_ref(cd_mapping);
cd_dest = cd_mapping;
}
MString *muxsum;
if (output.muxsum_op == PS_COMBINEROUTPUT_AB_CD_SUM) {
muxsum = mstring_from_fmt("(%s + %s)", mstring_get_str(ab),
mstring_get_str(cd));
} else {
muxsum = mstring_from_fmt("((%s) ? %s(%s) : %s(%s))",
(ps->flags & PS_COMBINERCOUNT_MUX_MSB) ?
"r0.a >= 0.5" :
"(uint(r0.a * 255.0) & 1u) == 1u",
caster, mstring_get_str(cd), caster,
mstring_get_str(ab));
}
MString *muxsum_mapping = get_output(muxsum, output.mapping);
if (mstring_get_length(muxsum_dest)) {
mstring_append_fmt(ps->code, "mux_sum.%s = clamp(%s(%s), -1.0, 1.0);\n",
write_mask, caster, mstring_get_str(muxsum_mapping));
assign_muxsum = true;
}
if (assign_ab) {
mstring_append_fmt(ret, "%s.%s = ab.%s;\n",
mstring_get_str(ab_dest), write_mask, write_mask);
if (!is_alpha && output.flags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) {
mstring_append_fmt(ret, "%s.a = ab.b;\n",
mstring_get_str(ab_dest));
}
}
if (assign_cd) {
mstring_append_fmt(ret, "%s.%s = cd.%s;\n",
mstring_get_str(cd_dest), write_mask, write_mask);
if (!is_alpha && output.flags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) {
mstring_append_fmt(ret, "%s.a = cd.b;\n",
mstring_get_str(cd_dest));
}
}
if (assign_muxsum) {
mstring_append_fmt(ret, "%s.%s = mux_sum.%s;\n",
mstring_get_str(muxsum_dest), write_mask, write_mask);
}
mstring_unref(a);
mstring_unref(b);
mstring_unref(c);
mstring_unref(d);
mstring_unref(ab);
mstring_unref(cd);
mstring_unref(ab_mapping);
mstring_unref(cd_mapping);
mstring_unref(ab_dest);
mstring_unref(cd_dest);
mstring_unref(muxsum_dest);
mstring_unref(muxsum);
mstring_unref(muxsum_mapping);
return ret;
}
// Add code for the final combiner stage
static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo final)
{
ps->varE = get_input_var(ps, final.e, false);
ps->varF = get_input_var(ps, final.f, false);
MString *a = get_input_var(ps, final.a, false);
MString *b = get_input_var(ps, final.b, false);
MString *c = get_input_var(ps, final.c, false);
MString *d = get_input_var(ps, final.d, false);
MString *g = get_input_var(ps, final.g, true);
mstring_append_fmt(ps->code, "fragColor.rgb = %s + mix(vec3(%s), vec3(%s), vec3(%s));\n",
mstring_get_str(d), mstring_get_str(c),
mstring_get_str(b), mstring_get_str(a));
mstring_append_fmt(ps->code, "fragColor.a = %s;\n", mstring_get_str(g));
mstring_unref(a);
mstring_unref(b);
mstring_unref(c);
mstring_unref(d);
mstring_unref(g);
mstring_unref(ps->varE);
mstring_unref(ps->varF);
ps->varE = ps->varF = NULL;
}
static const char *get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *state, int i)
{
const char *sampler2D = "sampler2D";
const char *sampler3D = "sampler3D";
const char *samplerCube = "samplerCube";
int dim = state->dim_tex[i];
// FIXME: Cleanup
switch (mode) {
default:
case PS_TEXTUREMODES_NONE:
return NULL;
case PS_TEXTUREMODES_PROJECT2D:
if (state->dim_tex[i] == 2) {
if (state->tex_x8y24[i] && state->vulkan) {
return "usampler2D";
}
return sampler2D;
}
if (state->dim_tex[i] == 3) return sampler3D;
assert(!"Unhandled texture dimensions");
return NULL;
case PS_TEXTUREMODES_BUMPENVMAP:
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
case PS_TEXTUREMODES_DOT_ST:
if (state->shadow_map[i]) {
fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
assert(!"Shadow map support not implemented for this mode");
}
if (state->dim_tex[i] == 2) return sampler2D;
if (state->dim_tex[i] == 3 && mode != PS_TEXTUREMODES_DOT_ST) return sampler3D;
assert(!"Unhandled texture dimensions");
return NULL;
case PS_TEXTUREMODES_PROJECT3D:
case PS_TEXTUREMODES_DOT_STR_3D:
if (state->tex_x8y24[i] && state->vulkan) {
return "usampler2D";
}
if (state->shadow_map[i]) {
return sampler2D;
}
return dim == 2 ? sampler2D : sampler3D;
case PS_TEXTUREMODES_CUBEMAP:
case PS_TEXTUREMODES_DOT_RFLCT_DIFF:
case PS_TEXTUREMODES_DOT_RFLCT_SPEC:
case PS_TEXTUREMODES_DOT_STR_CUBE:
if (state->shadow_map[i]) {
fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
assert(!"Shadow map support not implemented for this mode");
}
assert(state->dim_tex[i] == 2);
return samplerCube;
case PS_TEXTUREMODES_DPNDNT_AR:
case PS_TEXTUREMODES_DPNDNT_GB:
if (state->shadow_map[i]) {
fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
assert(!"Shadow map support not implemented for this mode");
}
assert(state->dim_tex[i] == 2);
return sampler2D;
}
}
static const char *shadow_comparison_map[] = {
[SHADOW_DEPTH_FUNC_LESS] = "<",
[SHADOW_DEPTH_FUNC_EQUAL] = "==",
[SHADOW_DEPTH_FUNC_LEQUAL] = "<=",
[SHADOW_DEPTH_FUNC_GREATER] = ">",
[SHADOW_DEPTH_FUNC_NOTEQUAL] = "!=",
[SHADOW_DEPTH_FUNC_GEQUAL] = ">=",
};
static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compare_z, MString *vars)
{
if (ps->state.shadow_depth_func == SHADOW_DEPTH_FUNC_NEVER) {
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0);\n", i);
return;
}
if (ps->state.shadow_depth_func == SHADOW_DEPTH_FUNC_ALWAYS) {
mstring_append_fmt(vars, "vec4 t%d = vec4(1.0);\n", i);
return;
}
g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", i);
const char *tex_remap = ps->state.rect_tex[i] ? normalize_tex_coords : "";
const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func];
bool extract_msb_24b = ps->state.tex_x8y24[i] && ps->state.vulkan;
mstring_append_fmt(
vars, "%svec4 t%d_depth%s = textureProj(texSamp%d, %s(pT%d.xyw));\n",
extract_msb_24b ? "u" : "", i, extract_msb_24b ? "_raw" : "", i,
tex_remap, i);
if (extract_msb_24b) {
mstring_append_fmt(vars,
"vec4 t%d_depth = vec4(float(t%d_depth_raw.x >> 8) "
"/ 0xFFFFFF, 1.0, 0.0, 0.0);\n",
i, i);
}
// Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float.
if (compare_z) {
mstring_append_fmt(
vars,
"float t%d_max_depth;\n"
"if (t%d_depth.y > 0) {\n"
" t%d_max_depth = 0xFFFFFF;\n"
"} else {\n"
" t%d_max_depth = t%d_depth.z > 0 ? 511.9375 : 0xFFFF;\n"
"}\n"
"t%d_depth.x *= t%d_max_depth;\n"
"pT%d.z = clamp(pT%d.z / pT%d.w, 0, t%d_max_depth);\n"
"vec4 t%d = vec4(t%d_depth.x %s pT%d.z ? 1.0 : 0.0);\n",
i, i, i, i, i,
i, i, i, i, i, i,
i, i, comparison, i);
} else {
mstring_append_fmt(
vars,
"vec4 t%d = vec4(t%d_depth.x %s 0.0 ? 1.0 : 0.0);\n",
i, i, comparison);
}
}
// Adjust the s, t coordinates in the given VAR to account for the 4 texel
// border supported by the hardware.
static void apply_border_adjustment(const struct PixelShader *ps, MString *vars, int tex_index, const char *var_template)
{
int i = tex_index;
if (ps->state.border_logical_size[i][0] == 0.0f) {
return;
}
char var_name[32] = {0};
snprintf(var_name, sizeof(var_name), var_template, i);
mstring_append_fmt(
vars,
"vec3 t%dLogicalSize = vec3(%f, %f, %f);\n"
"%s.xyz = (%s.xyz * t%dLogicalSize + vec3(4, 4, 4)) * vec3(%f, %f, %f);\n",
i, ps->state.border_logical_size[i][0], ps->state.border_logical_size[i][1], ps->state.border_logical_size[i][2],
var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]);
}
static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex)
{
assert(ps->state.dim_tex[tex] == 2);
// FIXME: Quincunx
g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", tex);
const char *tex_remap = ps->state.rect_tex[tex] ? normalize_tex_coords : "";
mstring_append_fmt(vars,
"vec4 t%d = vec4(0.0);\n"
"for (int i = 0; i < 9; i++) {\n"
" vec3 texCoordDelta = vec3(convolution3x3[i], 0);\n"
" texCoordDelta.xy /= textureSize(texSamp%d, 0);\n"
" t%d += textureProj(texSamp%d, %s(pT%d.xyw) + texCoordDelta) * gaussian3x3[i];\n"
"}\n", tex, tex, tex, tex, tex_remap, tex);
}
static MString* psh_convert(struct PixelShader *ps)
{
const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove
MString *preflight = mstring_new();
pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan,
ps->state.smooth_shading, true, false, false);
if (ps->state.vulkan) {
mstring_append_fmt(preflight,
"layout(location = 0) out vec4 fragColor;\n"
"layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING);
} else {
mstring_append_fmt(preflight,
"layout(location = 0) out vec4 fragColor;\n");
}
mstring_append_fmt(preflight, "%sint alphaRef;\n"
"%svec4 fogColor;\n"
"%sivec4 clipRegion[8];\n"
"%svec4 clipRange;\n"
"%sfloat depthOffset;\n",
u, u, u, u, u);
for (int i = 0; i < 4; i++) {
mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n"
"%sfloat bumpScale%d;\n"
"%sfloat bumpOffset%d;\n"
"%sfloat texScale%d;\n",
u, i, u, i, u, i, u, i);
}
for (int i = 0; i < 9; i++) {
for (int j = 0; j < 2; j++) {
mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i);
}
}
if (ps->state.vulkan) {
mstring_append(preflight, "};\n");
}
const char *dotmap_funcs[] = {
"dotmap_zero_to_one",
"dotmap_minus1_to_1_d3d",
"dotmap_minus1_to_1_gl",
"dotmap_minus1_to_1",
"dotmap_hilo_1",
"dotmap_hilo_hemisphere_d3d",
"dotmap_hilo_hemisphere_gl",
"dotmap_hilo_hemisphere",
};
mstring_append_fmt(preflight,
"float sign1(float x) {\n"
" x *= 255.0;\n"
" return (x-128.0)/127.0;\n"
"}\n"
"float sign2(float x) {\n"
" x *= 255.0;\n"
" if (x >= 128.0) return (x-255.5)/127.5;\n"
" else return (x+0.5)/127.5;\n"
"}\n"
"float sign3(float x) {\n"
" x *= 255.0;\n"
" if (x >= 128.0) return (x-256.0)/127.0;\n"
" else return (x)/127.0;\n"
"}\n"
"float sign3_to_0_to_1(float x) {\n"
" if (x >= 0) return x/2;\n"
" else return 1+x/2;\n"
"}\n"
"vec3 dotmap_zero_to_one(vec4 col) {\n"
" return col.rgb;\n"
"}\n"
"vec3 dotmap_minus1_to_1_d3d(vec4 col) {\n"
" return vec3(sign1(col.r),sign1(col.g),sign1(col.b));\n"
"}\n"
"vec3 dotmap_minus1_to_1_gl(vec4 col) {\n"
" return vec3(sign2(col.r),sign2(col.g),sign2(col.b));\n"
"}\n"
"vec3 dotmap_minus1_to_1(vec4 col) {\n"
" return vec3(sign3(col.r),sign3(col.g),sign3(col.b));\n"
"}\n"
"vec3 dotmap_hilo_1(vec4 col) {\n"
" uint hi_i = uint(col.a * float(0xff)) << 8\n"
" | uint(col.r * float(0xff));\n"
" uint lo_i = uint(col.g * float(0xff)) << 8\n"
" | uint(col.b * float(0xff));\n"
" float hi_f = float(hi_i) / float(0xffff);\n"
" float lo_f = float(lo_i) / float(0xffff);\n"
" return vec3(hi_f, lo_f, 1.0);\n"
"}\n"
"vec3 dotmap_hilo_hemisphere_d3d(vec4 col) {\n"
" return col.rgb;\n" // FIXME
"}\n"
"vec3 dotmap_hilo_hemisphere_gl(vec4 col) {\n"
" return col.rgb;\n" // FIXME
"}\n"
"vec3 dotmap_hilo_hemisphere(vec4 col) {\n"
" return col.rgb;\n" // FIXME
"}\n"
"const float[9] gaussian3x3 = float[9](\n"
" 1.0/16.0, 2.0/16.0, 1.0/16.0,\n"
" 2.0/16.0, 4.0/16.0, 2.0/16.0,\n"
" 1.0/16.0, 2.0/16.0, 1.0/16.0);\n"
"const vec2[9] convolution3x3 = vec2[9](\n"
" vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n"
" vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n"
" vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n"
);
/* Window Clipping */
MString *clip = mstring_new();
mstring_append_fmt(clip, "/* Window-clip (%slusive) */\n",
ps->state.window_clip_exclusive ? "Exc" : "Inc");
if (!ps->state.window_clip_exclusive) {
mstring_append(clip, "bool clipContained = false;\n");
}
mstring_append(clip, "vec2 coord = gl_FragCoord.xy - 0.5;\n"
"for (int i = 0; i < 8; i++) {\n"
" bool outside = any(bvec4(\n"
" lessThan(coord, vec2(clipRegion[i].xy)),\n"
" greaterThanEqual(coord, vec2(clipRegion[i].zw))));\n"
" if (!outside) {\n");
if (ps->state.window_clip_exclusive) {
mstring_append(clip, " discard;\n");
} else {
mstring_append(clip, " clipContained = true;\n"
" break;\n");
}
mstring_append(clip, " }\n"
"}\n");
if (!ps->state.window_clip_exclusive) {
mstring_append(clip, "if (!clipContained) {\n"
" discard;\n"
"}\n");
}
/* Depth clipping */
if (ps->state.depth_clipping) {
if (ps->state.z_perspective) {
mstring_append(
clip, "float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n"
"if (zvalue < clipRange.z || clipRange.w < zvalue) {\n"
" discard;\n"
"}\n");
} else {
/* Take care of floating point precision problems. MS dashboard
* outputs exactly 0.0 z-coordinates and then our fixed function
* vertex shader outputs -w as the z-coordinate when OpenGL is
* used. Since -w/w = -1, this should give us exactly 0.0 as
* gl_FragCoord.z here. Unfortunately, with AMD Radeon RX 6600 the
* result is slightly greater than 0. MS dashboard sets the clip
* range to [0.0, 0.0] and so the imprecision causes unwanted
* clipping. Note that since Vulkan uses NDC range [0,1] it
* doesn't suffer from this problem with Radeon. Also, despite the
* imprecision OpenGL Radeon writes the correct value 0 to the depth
* buffer (if writing is enabled.) Radeon appears to write floored
* values. To compare, Intel integrated UHD 770 has gl_FragCoord.z
* exactly 0 (and writes rounded to closest integer values to the
* depth buffer.) Radeon OpenGL problem could also be fixed by using
* glClipControl(), but it requires OpenGL 4.5.
* Above is based on experiments with Linux and Mesa.
*/
if (ps->state.vulkan) {
mstring_append(
clip, "if (gl_FragCoord.z*clipRange.y < clipRange.z ||\n"
" gl_FragCoord.z*clipRange.y > clipRange.w) {\n"
" discard;\n"
"}\n");
} else {
mstring_append(
clip, "if ((gl_FragCoord.z + 1.0f/16777216.0f)*clipRange.y < clipRange.z ||\n"
" (gl_FragCoord.z - 1.0f/16777216.0f)*clipRange.y > clipRange.w) {\n"
" discard;\n"
"}\n");
}
}
}
MString *vars = mstring_new();
mstring_append(vars, "vec4 pD0 = vtxD0;\n");
mstring_append(vars, "vec4 pD1 = vtxD1;\n");
mstring_append(vars, "vec4 pB0 = vtxB0;\n");
mstring_append(vars, "vec4 pB1 = vtxB1;\n");
mstring_append(vars, "vec4 pFog = vec4(fogColor.rgb, clamp(vtxFog, 0.0, 1.0));\n");
mstring_append(vars, "vec4 pT0 = vtxT0;\n");
mstring_append(vars, "vec4 pT1 = vtxT1;\n");
mstring_append(vars, "vec4 pT2 = vtxT2;\n");
if (ps->state.point_sprite) {
assert(!ps->state.rect_tex[3]);
mstring_append(vars, "vec4 pT3 = vec4(gl_PointCoord, 1.0, 1.0);\n");
} else {
mstring_append(vars, "vec4 pT3 = vtxT3;\n");
}
mstring_append(vars, "\n");
mstring_append(vars, "vec4 v0 = pD0;\n");
mstring_append(vars, "vec4 v1 = pD1;\n");
mstring_append(vars, "vec4 ab;\n");
mstring_append(vars, "vec4 cd;\n");
mstring_append(vars, "vec4 mux_sum;\n");
ps->code = mstring_new();
for (int i = 0; i < 4; i++) {
const char *sampler_type = get_sampler_type(ps->tex_modes[i], &ps->state, i);
g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", i);
const char *tex_remap = ps->state.rect_tex[i] ? normalize_tex_coords : "";
assert(ps->dot_map[i] < 8);
const char *dotmap_func = dotmap_funcs[ps->dot_map[i]];
if (ps->dot_map[i] > 3) {
NV2A_UNIMPLEMENTED("Dot Mapping mode %s", dotmap_func);
}
switch (ps->tex_modes[i]) {
case PS_TEXTUREMODES_NONE:
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0, 0.0, 0.0, 1.0); /* PS_TEXTUREMODES_NONE */\n",
i);
break;
case PS_TEXTUREMODES_PROJECT2D: {
if (ps->state.shadow_map[i]) {
psh_append_shadowmap(ps, i, false, vars);
} else {
apply_border_adjustment(ps, vars, i, "pT%d");
if (((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN) ||
(ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX))) {
apply_convolution_filter(ps, vars, i);
} else {
if (ps->state.dim_tex[i] == 2) {
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, %s(pT%d.xyw));\n",
i, i, tex_remap, i);
} else if (ps->state.dim_tex[i] == 3) {
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, vec4(pT%d.xy, 0.0, pT%d.w));\n",
i, i, i, i);
} else {
assert(!"Unhandled texture dimensions");
}
}
}
break;
}
case PS_TEXTUREMODES_PROJECT3D:
if (ps->state.shadow_map[i]) {
psh_append_shadowmap(ps, i, true, vars);
} else {
apply_border_adjustment(ps, vars, i, "pT%d");
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, %s(pT%d.xyzw));\n",
i, i, tex_remap, i);
}
break;
case PS_TEXTUREMODES_CUBEMAP:
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, pT%d.xyz);\n",
i, i, i);
break;
case PS_TEXTUREMODES_PASSTHRU:
assert(ps->state.border_logical_size[i][0] == 0.0f && "Unexpected border texture on passthru");
mstring_append_fmt(vars, "vec4 t%d = pT%d;\n", i, i);
break;
case PS_TEXTUREMODES_CLIPPLANE: {
int j;
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_CLIPPLANE */\n",
i);
for (j = 0; j < 4; j++) {
mstring_append_fmt(vars, " if(pT%d.%c %s 0.0) { discard; };\n",
i, "xyzw"[j],
ps->state.compare_mode[i][j] ? ">=" : "<");
}
break;
}
case PS_TEXTUREMODES_BUMPENVMAP:
assert(i >= 1);
if (ps->state.snorm_tex[ps->input_tex[i]]) {
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
mstring_append_fmt(vars, "vec2 dsdt%d = t%d.bg;\n",
i, ps->input_tex[i]);
} else {
/* Convert to signed (FIXME: loss of accuracy due to filtering/interpolation) */
mstring_append_fmt(vars, "vec2 dsdt%d = vec2(sign3(t%d.b), sign3(t%d.g));\n",
i, ps->input_tex[i], ps->input_tex[i]);
}
mstring_append_fmt(vars, "dsdt%d = bumpMat%d * dsdt%d;\n", i, i, i);
if (ps->state.dim_tex[i] == 2) {
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdt%d));\n",
i, i, tex_remap, i, i);
} else if (ps->state.dim_tex[i] == 3) {
// FIXME: Does hardware pass through the r/z coordinate or is it 0?
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, vec3(pT%d.xy + dsdt%d, pT%d.z));\n",
i, i, i, i, i);
} else {
assert(!"Unhandled texture dimensions");
}
break;
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
assert(i >= 1);
if (ps->state.snorm_tex[ps->input_tex[i]]) {
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
mstring_append_fmt(vars, "vec3 dsdtl%d = vec3(t%d.bg, sign3_to_0_to_1(t%d.r));\n",
i, ps->input_tex[i], ps->input_tex[i]);
} else {
/* Convert to signed (FIXME: loss of accuracy due to filtering/interpolation) */
mstring_append_fmt(vars, "vec3 dsdtl%d = vec3(sign3(t%d.b), sign3(t%d.g), t%d.r);\n",
i, ps->input_tex[i], ps->input_tex[i], ps->input_tex[i]);
}
mstring_append_fmt(vars, "dsdtl%d.st = bumpMat%d * dsdtl%d.st;\n",
i, i, i);
if (ps->state.dim_tex[i] == 2) {
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdtl%d.st));\n",
i, i, tex_remap, i, i);
} else if (ps->state.dim_tex[i] == 3) {
// FIXME: Does hardware pass through the r/z coordinate or is it 0?
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, vec3(pT%d.xy + dsdtl%d.st, pT%d.z));\n",
i, i, i, i, i);
} else {
assert(!"Unhandled texture dimensions");
}
mstring_append_fmt(vars, "t%d = t%d * (bumpScale%d * dsdtl%d.p + bumpOffset%d);\n",
i, i, i, i, i);
break;
case PS_TEXTUREMODES_BRDF:
assert(i >= 2);
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_BRDF */\n",
i);
NV2A_UNIMPLEMENTED("PS_TEXTUREMODES_BRDF");
break;
case PS_TEXTUREMODES_DOT_ST:
assert(i >= 2);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_ST */\n");
mstring_append_fmt(vars,
"float dot%d = dot(pT%d.xyz, %s(t%d));\n"
"vec2 dotST%d = vec2(dot%d, dot%d);\n",
i, i, dotmap_func, ps->input_tex[i], i, i-1, i);
apply_border_adjustment(ps, vars, i, "dotST%d");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(dotST%d));\n",
i, i, tex_remap, i);
break;
case PS_TEXTUREMODES_DOT_ZW:
assert(i >= 2);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_ZW */\n");
mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d));\n",
i, i, dotmap_func, ps->input_tex[i]);
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0);\n", i);
// FIXME: mstring_append_fmt(vars, "gl_FragDepth = t%d.x;\n", i);
break;
case PS_TEXTUREMODES_DOT_RFLCT_DIFF:
assert(i == 2);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_RFLCT_DIFF */\n");
mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d));\n",
i, i, dotmap_func, ps->input_tex[i]);
assert(ps->dot_map[i+1] < 8);
mstring_append_fmt(vars, "float dot%d_n = dot(pT%d.xyz, %s(t%d));\n",
i, i+1, dotmap_funcs[ps->dot_map[i+1]], ps->input_tex[i+1]);
mstring_append_fmt(vars, "vec3 n_%d = vec3(dot%d, dot%d, dot%d_n);\n",
i, i-1, i, i);
apply_border_adjustment(ps, vars, i, "n_%d");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, n_%d);\n",
i, i, i);
break;
case PS_TEXTUREMODES_DOT_RFLCT_SPEC:
assert(i == 3);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_RFLCT_SPEC */\n");
mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d));\n",
i, i, dotmap_func, ps->input_tex[i]);
mstring_append_fmt(vars, "vec3 n_%d = vec3(dot%d, dot%d, dot%d);\n",
i, i-2, i-1, i);
mstring_append_fmt(vars, "vec3 e_%d = vec3(pT%d.w, pT%d.w, pT%d.w);\n",
i, i-2, i-1, i);
mstring_append_fmt(vars, "vec3 rv_%d = 2*n_%d*dot(n_%d,e_%d)/dot(n_%d,n_%d) - e_%d;\n",
i, i, i, i, i, i, i);
apply_border_adjustment(ps, vars, i, "rv_%d");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, rv_%d);\n",
i, i, i);
break;
case PS_TEXTUREMODES_DOT_STR_3D:
assert(i == 3);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_STR_3D */\n");
mstring_append_fmt(vars,
"float dot%d = dot(pT%d.xyz, %s(t%d));\n"
"vec3 dotSTR%d = vec3(dot%d, dot%d, dot%d);\n",
i, i, dotmap_func, ps->input_tex[i],
i, i-2, i-1, i);
apply_border_adjustment(ps, vars, i, "dotSTR%d");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, dotSTR%d);\n",
i, i, i);
break;
case PS_TEXTUREMODES_DOT_STR_CUBE:
assert(i == 3);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOT_STR_CUBE */\n");
mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d));\n",
i, i, dotmap_func, ps->input_tex[i]);
mstring_append_fmt(vars, "vec3 dotSTR%dCube = vec3(dot%d, dot%d, dot%d);\n",
i, i-2, i-1, i);
apply_border_adjustment(ps, vars, i, "dotSTR%dCube");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, dotSTR%dCube);\n",
i, i, i);
break;
case PS_TEXTUREMODES_DPNDNT_AR:
assert(i >= 1);
assert(!ps->state.rect_tex[i]);
mstring_append_fmt(vars, "vec2 t%dAR = t%d.ar;\n", i, ps->input_tex[i]);
apply_border_adjustment(ps, vars, i, "t%dAR");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(t%dAR));\n",
i, i, tex_remap, i);
break;
case PS_TEXTUREMODES_DPNDNT_GB:
assert(i >= 1);
assert(!ps->state.rect_tex[i]);
mstring_append_fmt(vars, "vec2 t%dGB = t%d.gb;\n", i, ps->input_tex[i]);
apply_border_adjustment(ps, vars, i, "t%dGB");
mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(t%dGB));\n",
i, i, tex_remap, i);
break;
case PS_TEXTUREMODES_DOTPRODUCT:
assert(i == 1 || i == 2);
mstring_append_fmt(vars, "/* PS_TEXTUREMODES_DOTPRODUCT */\n");
mstring_append_fmt(vars, "float dot%d = dot(pT%d.xyz, %s(t%d));\n",
i, i, dotmap_func, ps->input_tex[i]);
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0);\n", i);
break;
case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST:
assert(i == 3);
mstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST */\n",
i);
NV2A_UNIMPLEMENTED("PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST");
break;
default:
fprintf(stderr, "Unknown ps tex mode: 0x%x\n", ps->tex_modes[i]);
assert(false);
break;
}
if (sampler_type != NULL) {
if (ps->state.vulkan) {
mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i);
}
mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
/* As this means a texture fetch does happen, do alphakill */
if (ps->state.alphakill[i]) {
mstring_append_fmt(vars, "if (t%d.a == 0.0) { discard; };\n",
i);
}
if (ps->state.rect_tex[i]) {
mstring_append_fmt(preflight,
"vec2 norm%d(vec2 coord) {\n"
" return coord / (textureSize(texSamp%d, 0) / texScale%d);\n"
"}\n",
i, i, i);
mstring_append_fmt(preflight,
"vec3 norm%d(vec3 coord) {\n"
" return vec3(norm%d(coord.xy), coord.z);\n"
"}\n",
i, i);
mstring_append_fmt(preflight,
"vec4 norm%d(vec4 coord) {\n"
" return vec4(norm%d(coord.xy), 0, coord.w);\n"
"}\n",
i, i);
}
}
}
for (int i = 0; i < ps->num_stages; i++) {
ps->cur_stage = i;
mstring_append_fmt(ps->code, "// Stage %d\n", i);
MString* color = add_stage_code(ps, ps->stage[i].rgb_input, ps->stage[i].rgb_output, "rgb", false);
MString* alpha = add_stage_code(ps, ps->stage[i].alpha_input, ps->stage[i].alpha_output, "a", true);
mstring_append(ps->code, mstring_get_str(color));
mstring_append(ps->code, mstring_get_str(alpha));
mstring_unref(color);
mstring_unref(alpha);
}
if (ps->final_input.enabled) {
ps->cur_stage = 8;
mstring_append(ps->code, "// Final Combiner\n");
add_final_stage_code(ps, ps->final_input);
}
if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) {
if (ps->state.alpha_func == ALPHA_FUNC_NEVER) {
mstring_append(ps->code, "discard;\n");
} else {
const char* alpha_op;
switch (ps->state.alpha_func) {
case ALPHA_FUNC_LESS: alpha_op = "<"; break;
case ALPHA_FUNC_EQUAL: alpha_op = "=="; break;
case ALPHA_FUNC_LEQUAL: alpha_op = "<="; break;
case ALPHA_FUNC_GREATER: alpha_op = ">"; break;
case ALPHA_FUNC_NOTEQUAL: alpha_op = "!="; break;
case ALPHA_FUNC_GEQUAL: alpha_op = ">="; break;
default:
assert(false);
break;
}
mstring_append_fmt(ps->code,
"int fragAlpha = int(round(fragColor.a * 255.0));\n"
"if (!(fragAlpha %s alphaRef)) discard;\n",
alpha_op);
}
}
for (int i = 0; i < ps->num_var_refs; i++) {
mstring_append_fmt(vars, "vec4 %s = vec4(0);\n", ps->var_refs[i]);
if (strcmp(ps->var_refs[i], "r0") == 0) {
if (ps->tex_modes[0] != PS_TEXTUREMODES_NONE) {
mstring_append(vars, "r0.a = t0.a;\n");
} else {
mstring_append(vars, "r0.a = 1.0;\n");
}
}
}
if (ps->state.z_perspective) {
if (!ps->state.depth_clipping) {
mstring_append(ps->code,
"float zvalue = 1.0/gl_FragCoord.w + depthOffset;\n");
}
/* TODO: With integer depth buffers Xbox hardware floors values and so
* does Radeon, but Intel UHD 770 rounds to nearest. Should probably
* floor here explicitly (in some way that doesn't also cause
* imprecision issues due to division by clipRange.y)
*/
mstring_append(ps->code,
"gl_FragDepth = clamp(zvalue, clipRange.z, clipRange.w)/clipRange.y;\n");
} else if (!ps->state.depth_clipping) {
mstring_append(ps->code,
"gl_FragDepth = clamp(gl_FragCoord.z, clipRange.z/clipRange.y, clipRange.w/clipRange.y);\n");
}
MString *final = mstring_new();
mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400);
mstring_append(final, mstring_get_str(preflight));
mstring_append(final, "void main() {\n");
mstring_append(final, mstring_get_str(clip));
mstring_append(final, mstring_get_str(vars));
mstring_append(final, mstring_get_str(ps->code));
mstring_append(final, "}\n");
mstring_unref(preflight);
mstring_unref(vars);
mstring_unref(ps->code);
return final;
}
static void parse_input(struct InputInfo *var, int value)
{
var->reg = value & 0xF;
var->chan = value & 0x10;
var->mod = value & 0xE0;
}
static void parse_combiner_inputs(uint32_t value,
struct InputInfo *a, struct InputInfo *b,
struct InputInfo *c, struct InputInfo *d)
{
parse_input(d, value & 0xFF);
parse_input(c, (value >> 8) & 0xFF);
parse_input(b, (value >> 16) & 0xFF);
parse_input(a, (value >> 24) & 0xFF);
}
static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
{
out->cd = value & 0xF;
out->ab = (value >> 4) & 0xF;
out->muxsum = (value >> 8) & 0xF;
int flags = value >> 12;
out->flags = flags;
out->cd_op = flags & 1;
out->ab_op = flags & 2;
out->muxsum_op = flags & 4;
out->mapping = flags & 0x38;
out->ab_alphablue = flags & 0x80;
out->cd_alphablue = flags & 0x40;
}
MString *pgraph_gen_psh_glsl(const PshState state)
{
int i;
struct PixelShader ps;
memset(&ps, 0, sizeof(ps));
ps.state = state;
ps.num_stages = state.combiner_control & 0xFF;
ps.flags = state.combiner_control >> 8;
for (i = 0; i < 4; i++) {
ps.tex_modes[i] = (state.shader_stage_program >> (i * 5)) & 0x1F;
}
ps.dot_map[0] = 0;
ps.dot_map[1] = (state.other_stage_input >> 0) & 0xf;
ps.dot_map[2] = (state.other_stage_input >> 4) & 0xf;
ps.dot_map[3] = (state.other_stage_input >> 8) & 0xf;
ps.input_tex[0] = -1;
ps.input_tex[1] = 0;
ps.input_tex[2] = (state.other_stage_input >> 16) & 0xF;
ps.input_tex[3] = (state.other_stage_input >> 20) & 0xF;
for (i = 0; i < ps.num_stages; i++) {
parse_combiner_inputs(state.rgb_inputs[i],
&ps.stage[i].rgb_input.a, &ps.stage[i].rgb_input.b,
&ps.stage[i].rgb_input.c, &ps.stage[i].rgb_input.d);
parse_combiner_inputs(state.alpha_inputs[i],
&ps.stage[i].alpha_input.a, &ps.stage[i].alpha_input.b,
&ps.stage[i].alpha_input.c, &ps.stage[i].alpha_input.d);
parse_combiner_output(state.rgb_outputs[i], &ps.stage[i].rgb_output);
parse_combiner_output(state.alpha_outputs[i], &ps.stage[i].alpha_output);
}
struct InputInfo blank;
ps.final_input.enabled = state.final_inputs_0 || state.final_inputs_1;
if (ps.final_input.enabled) {
parse_combiner_inputs(state.final_inputs_0,
&ps.final_input.a, &ps.final_input.b,
&ps.final_input.c, &ps.final_input.d);
parse_combiner_inputs(state.final_inputs_1,
&ps.final_input.e, &ps.final_input.f,
&ps.final_input.g, &blank);
int flags = state.final_inputs_1 & 0xFF;
ps.final_input.clamp_sum = flags & PS_FINALCOMBINERSETTING_CLAMP_SUM;
ps.final_input.inv_v1 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1;
ps.final_input.inv_r0 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0;
}
return psh_convert(&ps);
}