xbox: broken vertex shader translation, and other shit that doesn't work.

2012-11-13 01:18:46 +11:00 · 2012-11-13 01:18:46 +11:00 · 2d53c2dd7d
parent cdcbc81ad8
commit 2d53c2dd7d
4 changed files with 1373 additions and 177 deletions
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@ -12,6 +12,6 @@ obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_msi.o
 obj-y += kvm/
 obj-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o

-obj-$(CONFIG_XBOX) += xbox.o xbox_pci.o acpi_xbox.o amd_smbus.o nv2a.o mcpx.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o
+obj-$(CONFIG_XBOX) += xbox.o xbox_pci.o acpi_xbox.o amd_smbus.o nv2a.o nv2a_vsh.o mcpx.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o

 obj-y := $(addprefix ../,$(obj-y))
--- a/hw/nv2a.c
+++ b/hw/nv2a.c
--- a/hw/nv2a_vsh.c
+++ b/hw/nv2a_vsh.c
@ -0,0 +1,682 @@
+/*
+ * QEMU Geforce NV2A vertex shader translation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * Based on:
+ * Cxbx, VertexShader.cpp
+ * Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
+ *                    Kingofc <kingofc@freenet.de>
+ * Dxbx, uPushBuffer.pas
+ * Copyright (c) 2007 Shadow_tj, PatrickvL
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "nv2a_vsh.h"
+
+
+#define VSH_TOKEN_SIZE 4
+
+
+typedef enum {
+    FLD_ILU = 0,
+    FLD_MAC,
+    FLD_CONST,
+    FLD_V,
+    // Input A
+    FLD_A_NEG,
+    FLD_A_SWZ_X,
+    FLD_A_SWZ_Y,
+    FLD_A_SWZ_Z,
+    FLD_A_SWZ_W,
+    FLD_A_R,
+    FLD_A_MUX,
+    // Input B
+    FLD_B_NEG,
+    FLD_B_SWZ_X,
+    FLD_B_SWZ_Y,
+    FLD_B_SWZ_Z,
+    FLD_B_SWZ_W,
+    FLD_B_R,
+    FLD_B_MUX,
+    // Input C
+    FLD_C_NEG,
+    FLD_C_SWZ_X,
+    FLD_C_SWZ_Y,
+    FLD_C_SWZ_Z,
+    FLD_C_SWZ_W,
+    FLD_C_R_HIGH,
+    FLD_C_R_LOW,
+    FLD_C_MUX,
+    // Output
+    FLD_OUT_MAC_MASK,
+    FLD_OUT_R,
+    FLD_OUT_ILU_MASK,
+    FLD_OUT_O_MASK,
+    FLD_OUT_ORB,
+    FLD_OUT_ADDRESS,
+    FLD_OUT_MUX,
+    // Relative addressing
+    FLD_A0X,
+    // Final instruction
+    FLD_FINAL
+} VshFieldName;
+
+
+typedef enum {
+    PARAM_UNKNOWN = 0,
+    PARAM_R,
+    PARAM_V,
+    PARAM_C
+} VshParameterType;
+
+typedef enum {
+    OUTPUT_C = 0,
+    OUTPUT_O
+} VshOutputType;
+
+typedef enum {
+    OMUX_MAC = 0,
+    OMUX_ILU
+} VshOutputMux;
+
+typedef enum {
+    ILU_NOP = 0,
+    ILU_MOV,
+    ILU_RCP,
+    ILU_RCC,
+    ILU_RSQ,
+    ILU_EXP,
+    ILU_LOG,
+    ILU_LIT
+} VshILU;
+
+typedef enum {
+    MAC_NOP,
+    MAC_MOV,
+    MAC_MUL,
+    MAC_ADD,
+    MAC_MAD,
+    MAC_DP3,
+    MAC_DPH,
+    MAC_DP4,
+    MAC_DST,
+    MAC_MIN,
+    MAC_MAX,
+    MAC_SLT,
+    MAC_SGE,
+    MAC_ARL
+} VshMAC;
+
+typedef enum {
+    SWIZZLE_X = 0,
+    SWIZZLE_Y,
+    SWIZZLE_Z,
+    SWIZZLE_W
+} VshSwizzle;
+
+
+typedef struct VshFieldMapping {
+    VshFieldName field_name;
+    uint8_t subtoken;
+    uint8_t start_bit;
+    uint8_t bit_length;
+} VshFieldMapping;
+
+static const VshFieldMapping field_mapping[] = {
+    // Field Name         DWORD BitPos BitSize
+    {  FLD_ILU,              1,   25,     3 },
+    {  FLD_MAC,              1,   21,     4 },
+    {  FLD_CONST,            1,   13,     8 },
+    {  FLD_V,                1,    9,     4 },
+    // INPUT A
+    {  FLD_A_NEG,            1,    8,     1 },
+    {  FLD_A_SWZ_X,          1,    6,     2 },
+    {  FLD_A_SWZ_Y,          1,    4,     2 },
+    {  FLD_A_SWZ_Z,          1,    2,     2 },
+    {  FLD_A_SWZ_W,          1,    0,     2 },
+    {  FLD_A_R,              2,   28,     4 },
+    {  FLD_A_MUX,            2,   26,     2 },
+    // INPUT B
+    {  FLD_B_NEG,            2,   25,     1 },
+    {  FLD_B_SWZ_X,          2,   23,     2 },
+    {  FLD_B_SWZ_Y,          2,   21,     2 },
+    {  FLD_B_SWZ_Z,          2,   19,     2 },
+    {  FLD_B_SWZ_W,          2,   17,     2 },
+    {  FLD_B_R,              2,   13,     4 },
+    {  FLD_B_MUX,            2,   11,     2 },
+    // INPUT C
+    {  FLD_C_NEG,            2,   10,     1 },
+    {  FLD_C_SWZ_X,          2,    8,     2 },
+    {  FLD_C_SWZ_Y,          2,    6,     2 },
+    {  FLD_C_SWZ_Z,          2,    4,     2 },
+    {  FLD_C_SWZ_W,          2,    2,     2 },
+    {  FLD_C_R_HIGH,         2,    0,     2 },
+    {  FLD_C_R_LOW,          3,   30,     2 },
+    {  FLD_C_MUX,            3,   28,     2 },
+    // Output
+    {  FLD_OUT_MAC_MASK,     3,   24,     4 },
+    {  FLD_OUT_R,            3,   20,     4 },
+    {  FLD_OUT_ILU_MASK,     3,   16,     4 },
+    {  FLD_OUT_O_MASK,       3,   12,     4 },
+    {  FLD_OUT_ORB,          3,   11,     1 },
+    {  FLD_OUT_ADDRESS,      3,    3,     8 },
+    {  FLD_OUT_MUX,          3,    2,     1 },
+    // Other
+    {  FLD_A0X,              3,    1,     1 },
+    {  FLD_FINAL,            3,    0,     1 }
+};
+
+
+typedef struct VshOpcodeParams {
+    bool A;
+    bool B;
+    bool C;
+} VshOpcodeParams;
+
+static const VshOpcodeParams ilu_opcode_params[] = {
+    /* ILU OP       ParamA ParamB ParamC */
+    /* ILU_NOP */ { false, false, false }, // Dxbx note : Unused
+    /* ILU_MOV */ { false, false, true  },
+    /* ILU_RCP */ { false, false, true  },
+    /* ILU_RCC */ { false, false, true  },
+    /* ILU_RSQ */ { false, false, true  },
+    /* ILU_EXP */ { false, false, true  },
+    /* ILU_LOG */ { false, false, true  },
+    /* ILU_LIT */ { false, false, true  },
+};
+
+static const VshOpcodeParams mac_opcode_params[] = {
+    /* MAC OP      ParamA  ParamB ParamC */
+    /* MAC_NOP */ { false, false, false }, // Dxbx note : Unused
+    /* MAC_MOV */ { true,  false, false },
+    /* MAC_MUL */ { true,  true,  false },
+    /* MAC_ADD */ { true,  false, true  },
+    /* MAC_MAD */ { true,  true,  true  },
+    /* MAC_DP3 */ { true,  true,  false },
+    /* MAC_DPH */ { true,  true,  false },
+    /* MAC_DP4 */ { true,  true,  false },
+    /* MAC_DST */ { true,  true,  false },
+    /* MAC_MIN */ { true,  true,  false },
+    /* MAC_MAX */ { true,  true,  false },
+    /* MAC_SLT */ { true,  true,  false },
+    /* MAC_SGE */ { true,  true,  false },
+    /* MAC_ARL */ { true,  false, false },
+};
+
+
+
+static const char* mask_str[] = {
+            // xyzw xyzw
+    "",     // 0000 ____
+    ".w",   // 0001 ___w
+    ".z",   // 0010 __z_
+    ".zw",  // 0011 __zw
+    ".y",   // 0100 _y__
+    ".yw",  // 0101 _y_w
+    ".yz",  // 0110 _yz_
+    ".yzw", // 0111 _yzw
+    ".x",   // 1000 x___
+    ".xw",  // 1001 x__w
+    ".xz",  // 1010 x_z_
+    ".xzw", // 1011 x_zw
+    ".xy",  // 1100 xy__
+    ".xyw", // 1101 xy_w
+    ".xyz", // 1110 xyz_
+    ""//.xyzw  1111 xyzw
+};
+
+/* Note: OpenGL seems to be case-sensitive, and requires upper-case opcodes! */
+static const char* mac_opcode[] = {
+    "NOP",
+    "MOV",
+    "MUL",
+    "ADD",
+    "MAD",
+    "DP3",
+    "DPH",
+    "DP4",
+    "DST",
+    "MIN",
+    "MAX",
+    "SLT",
+    "SGE",
+    "ARL A0.x", // Dxbx note : Alias for "mov a0.x"
+};
+
+static const char* ilu_opcode[] = {
+    "NOP",
+    "MOV",
+    "RCP",
+    "RCP", // Was RCC
+    "RSQ",
+    "EXP",
+    "LOG",
+    "LIT",
+};
+
+static bool ilu_force_scalar[] = {
+    false,
+    false,
+    true,
+    true,
+    true,
+    true,
+    true,
+    false,
+};
+
+static const char* out_reg_name[] = {
+    "R12", // "oPos",
+    "???",
+    "???",
+    "oD0",
+    "oD1",
+    "oFog",
+    "oPts",
+    "oB0",
+    "oB1",
+    "oT0",
+    "oT1",
+    "oT2",
+    "oT3",
+    "???",
+    "???",
+    "A0.x",
+};
+
+
+
+// Retrieves a number of bits in the instruction token
+static int vsh_get_from_token(uint32_t *shader_token,
+                              uint8_t subtoken,
+                              uint8_t start_bit,
+                              uint8_t bit_length)
+{
+    return (shader_token[subtoken] >> start_bit) & ~(0xFFFFFFFF << bit_length);
+}
+static uint8_t vsh_get_field(uint32_t *shader_token, VshFieldName field_name)
+{
+
+    return (uint8_t)(vsh_get_from_token(shader_token,
+                                        field_mapping[field_name].subtoken,
+                                        field_mapping[field_name].start_bit,
+                                        field_mapping[field_name].bit_length));
+}
+
+
+// Converts the C register address to disassembly format
+static int16_t convert_c_register(const int16_t c_reg)
+{
+    int16_t r = ((((c_reg >> 5) & 7) - 3) * 32) + (c_reg & 31);
+    r += VSH_D3DSCM_CORRECTION; /* to map -96..95 to 0..191 */
+    return r;
+}
+
+
+
+static QString* decode_swizzle(uint32_t *shader_token,
+                               VshFieldName swizzle_field)
+{
+    const char* swizzle_str = "xyzw";
+    VshSwizzle x, y, z, w;
+
+    /* some microcode instructions force a scalar value */
+    if (swizzle_field == FLD_C_SWZ_X
+        && ilu_force_scalar[vsh_get_field(shader_token, FLD_ILU)]) {
+        x = y = z = w = x = vsh_get_field(shader_token, swizzle_field);
+    } else {
+        x = vsh_get_field(shader_token, swizzle_field++);
+        y = vsh_get_field(shader_token, swizzle_field++);
+        z = vsh_get_field(shader_token, swizzle_field++);
+        w = vsh_get_field(shader_token, swizzle_field);
+    }
+
+    if (x == SWIZZLE_X && y == SWIZZLE_Y
+        && z == SWIZZLE_Z && w == SWIZZLE_W) {
+        /* Don't print the swizzle if it's .xyzw */
+        return qstring_from_str("");
+    /* Don't print duplicates */
+    } else if (x == y && y == z && z == w) {
+        return qstring_from_str((char[]){'.', swizzle_str[x], '\0'});
+    } else if (x == y && z == w) {
+        return qstring_from_str((char[]){'.',
+            swizzle_str[x], swizzle_str[y], '\0'});
+    } /*else if (z == w) {
+        return qstring_from_str((char[]){'.',
+            swizzle_str[x], swizzle_str[y], swizzle_str[z], '\0'});
+    }*/ else {
+        return qstring_from_str((char[]){'.',
+                                       swizzle_str[x], swizzle_str[y],
+                                       swizzle_str[z], swizzle_str[w],
+                                       '\0'});
+    }
+}
+
+static QString* decode_opcode_input(uint32_t *shader_token,
+                                    VshParameterType param,
+                                    VshFieldName neg_field,
+                                    int reg_num)
+{
+    /* This function decodes a vertex shader opcode parameter into a string.
+     * Input A, B or C is controlled via the Param and NEG fieldnames,
+     * the R-register address for each input is already given by caller. */
+
+    QString *ret_str = qstring_new();
+
+
+    if (vsh_get_field(shader_token, neg_field) > 0) {
+        qstring_append_chr(ret_str, '-');
+    }
+
+    /* PARAM_R uses the supplied reg_num, but the other two need to be
+     * determined */
+    char tmp[40];
+    switch (param) {
+    case PARAM_R:
+        snprintf(tmp, sizeof(tmp), "R%d", reg_num);
+        break;
+    case PARAM_V:
+        reg_num = vsh_get_field(shader_token, FLD_V);
+        snprintf(tmp, sizeof(tmp), "v%d", reg_num);
+        break;
+    case PARAM_C:
+        reg_num = convert_c_register(vsh_get_field(shader_token, FLD_CONST));
+        if (vsh_get_field(shader_token, FLD_A0X) > 0) {
+            snprintf(tmp, sizeof(tmp), "c[A0+%d]", reg_num);
+        } else {
+            snprintf(tmp, sizeof(tmp), "c[%d]", reg_num);
+        }
+        break;
+    default:
+        assert(false);
+    }
+    qstring_append(ret_str, tmp);
+
+    {
+        /* swizzle bits are next to the neg bit */
+        QString *swizzle_str = decode_swizzle(shader_token, neg_field+1);
+        qstring_append(ret_str, qstring_get_str(swizzle_str));
+        QDECREF(swizzle_str);
+    }
+
+    return ret_str;
+}
+
+
+static QString* decode_opcode(uint32_t *shader_token,
+                              VshOutputMux out_mux,
+                              uint32_t mask,
+                              const char* opcode,
+                              QString *inputs)
+{
+    QString *ret = qstring_new();
+    int reg_num = vsh_get_field(shader_token, FLD_OUT_R);
+
+    /* Test for paired opcodes (in other words : Are both <> NOP?) */
+    if (out_mux == OMUX_MAC
+          &&  vsh_get_field(shader_token, FLD_ILU) != ILU_NOP
+          && reg_num == 1) {
+        /* Ignore paired MAC opcodes that write to R1 */
+        mask = 0;
+    } else if (out_mux == OMUX_ILU
+               && vsh_get_field(shader_token, FLD_MAC) != MAC_NOP) {
+        /* Paired ILU opcodes can only write to R1 */
+        reg_num = 1;
+    }
+
+    if (mask > 0) {
+        if (strcmp(opcode, mac_opcode[MAC_ARL]) == 0) {
+            qstring_append(ret, opcode);
+            qstring_append(ret, qstring_get_str(inputs));
+            qstring_append(ret, ";\n");
+        } else {
+            qstring_append(ret, opcode);
+            qstring_append(ret, " R");
+            qstring_append_int(ret, reg_num);
+            qstring_append(ret, mask_str[mask]);
+            qstring_append(ret, qstring_get_str(inputs));
+            qstring_append(ret, ";\n");
+        }
+    }
+
+    /* See if we must add a muxed opcode too: */
+    if (vsh_get_field(shader_token, FLD_OUT_MUX) == out_mux
+        /* Only if it's not masked away: */
+        && vsh_get_field(shader_token, FLD_OUT_O_MASK) != 0) {
+
+        qstring_append(ret, opcode);
+        if (vsh_get_field(shader_token, FLD_OUT_ORB) == OUTPUT_C) {
+            /* TODO : Emulate writeable const registers */
+            qstring_append(ret, " c");
+            qstring_append_int(ret,
+                convert_c_register(
+                    vsh_get_field(shader_token, FLD_OUT_ADDRESS)));
+        } else {
+            qstring_append_chr(ret, ' ');
+            qstring_append(ret,
+                out_reg_name[
+                    vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF]);
+        }
+        qstring_append(ret,
+            mask_str[
+                vsh_get_field(shader_token, FLD_OUT_O_MASK)]);
+        qstring_append(ret, qstring_get_str(inputs));
+        qstring_append(ret, ";\n");
+    }
+
+    return ret;
+}
+
+
+static QString* decode_token(uint32_t *shader_token)
+{
+    QString *ret;
+
+    /* Since it's potentially used twice, decode input C once: */
+    QString *input_c =
+        decode_opcode_input(shader_token,
+                            vsh_get_field(shader_token, FLD_C_MUX),
+                            FLD_C_NEG,
+                            (vsh_get_field(shader_token, FLD_C_R_HIGH) << 2)
+                                | vsh_get_field(shader_token, FLD_C_R_LOW));
+
+    /* See what MAC opcode is written to (if not masked away): */
+    VshMAC mac = vsh_get_field(shader_token, FLD_MAC);
+    if (mac != MAC_NOP) {
+        QString *inputs_mac = qstring_new();
+        if (mac_opcode_params[mac].A) {
+            QString *input_a =
+                decode_opcode_input(shader_token,
+                                    vsh_get_field(shader_token, FLD_A_MUX),
+                                    FLD_A_NEG,
+                                    vsh_get_field(shader_token, FLD_A_R));
+            qstring_append(inputs_mac, ", ");
+            qstring_append(inputs_mac, qstring_get_str(input_a));
+            QDECREF(input_a);
+        }
+        if (mac_opcode_params[mac].B) {
+            QString *input_b =
+                decode_opcode_input(shader_token,
+                                    vsh_get_field(shader_token, FLD_B_MUX),
+                                    FLD_B_NEG,
+                                    vsh_get_field(shader_token, FLD_B_R));
+            qstring_append(inputs_mac, ", ");
+            qstring_append(inputs_mac, qstring_get_str(input_b));
+            QDECREF(input_b);
+        }
+        if (mac_opcode_params[mac].C) {
+            qstring_append(inputs_mac, ", ");
+            qstring_append(inputs_mac, qstring_get_str(input_c));
+        }
+
+        /* Then prepend these inputs with the actual opcode, mask, and input : */
+        ret = decode_opcode(shader_token,
+                            OMUX_MAC,
+                            vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
+                            mac_opcode[mac],
+                            inputs_mac);
+    } else {
+        ret = qstring_new();
+    }
+
+    /* See if a ILU opcode is present too: */
+    VshILU ilu = vsh_get_field(shader_token, FLD_ILU);
+    if (ilu != ILU_NOP) {
+        QString *inputs_c = qstring_from_str(", ");
+        qstring_append(inputs_c, qstring_get_str(input_c));
+
+        /* Append the ILU opcode, mask and (the already determined) input C: */
+        QString *ilu_op =
+            decode_opcode(shader_token,
+                          OMUX_ILU,
+                          vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
+                          ilu_opcode[ilu],
+                          inputs_c);
+
+        qstring_append(ret, qstring_get_str(ilu_op));
+
+        QDECREF(inputs_c);
+        QDECREF(ilu_op);
+    }
+
+    QDECREF(input_c);
+
+    return ret;
+}
+
+/* Vertex shader header, mapping Xbox1 registers to the ARB syntax (original
+ * version by KingOfC). Note about the use of 'conventional' attributes in here:
+ * Since we prefer to use only one shader for both immediate and deferred mode
+ * rendering, we alias all attributes to conventional inputs as much as possible.
+ * Only when there's no conventional attribute available, we use generic
+ * attributes. So in the following header, we use conventional attributes first,
+ * and generic attributes for the rest of the vertex attribute slots. This makes
+ * it possible to support immediate and deferred mode rendering with the same
+ * shader, and the use of the OpenGL fixed-function pipeline without a shader.
+ */
+static const char* vsh_header =
+    "!!ARBvp1.0\n"
+    "TEMP R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12;\n"
+    "ADDRESS A0;\n"
+#if 0
+    "ATTRIB v0 = vertex.position;" // (See "conventional" note above)
+    "ATTRIB v1 = vertex.%s;" // Note : We replace this with "weight" or "attrib[1]" depending GL_ARB_vertex_blend
+    "ATTRIB v2 = vertex.normal;"
+    "ATTRIB v3 = vertex.color.primary;"
+    "ATTRIB v4 = vertex.color.secondary;"
+    "ATTRIB v5 = vertex.fogcoord;"
+    "ATTRIB v6 = vertex.attrib[6];"
+    "ATTRIB v7 = vertex.attrib[7];"
+    "ATTRIB v8 = vertex.texcoord[0];"
+    "ATTRIB v9 = vertex.texcoord[1];"
+    "ATTRIB v10 = vertex.texcoord[2];"
+    "ATTRIB v11 = vertex.texcoord[3];"
+#else
+    "ATTRIB v0 = vertex.attrib[0];\n"
+    "ATTRIB v1 = vertex.attrib[1];\n"
+    "ATTRIB v2 = vertex.attrib[2];\n"
+    "ATTRIB v3 = vertex.attrib[3];\n"
+    "ATTRIB v4 = vertex.attrib[4];\n"
+    "ATTRIB v5 = vertex.attrib[5];\n"
+    "ATTRIB v6 = vertex.attrib[6];\n"
+    "ATTRIB v7 = vertex.attrib[7];\n"
+    "ATTRIB v8 = vertex.attrib[8];\n"
+    "ATTRIB v9 = vertex.attrib[9];\n"
+    "ATTRIB v10 = vertex.attrib[10];\n"
+    "ATTRIB v11 = vertex.attrib[11];\n"
+#endif
+    "ATTRIB v12 = vertex.attrib[12];\n"
+    "ATTRIB v13 = vertex.attrib[13];\n"
+    "ATTRIB v14 = vertex.attrib[14];\n"
+    "ATTRIB v15 = vertex.attrib[15];\n"
+    "OUTPUT oPos = result.position;\n"
+    "OUTPUT oD0 = result.color.front.primary;\n"
+    "OUTPUT oD1 = result.color.front.secondary;\n"
+    "OUTPUT oB0 = result.color.back.primary;\n"
+    "OUTPUT oB1 = result.color.back.secondary;\n"
+    "OUTPUT oPts = result.pointsize;\n"
+    "OUTPUT oFog = result.fogcoord;\n"
+    "OUTPUT oT0 = result.texcoord[0];\n"
+    "OUTPUT oT1 = result.texcoord[1];\n"
+    "OUTPUT oT2 = result.texcoord[2];\n"
+    "OUTPUT oT3 = result.texcoord[3];\n"
+    /* All constants in 1 array declaration (requires NV_gpu_program4?) */
+    "PARAM c[] = { program.env[0..191] };\n"
+    "PARAM mvp[4] = { state.matrix.mvp };\n";
+
+
+QString* vsh_translate(uint16_t version,
+                       uint32_t *tokens, unsigned int tokens_length)
+{
+    QString *ret = qstring_from_str(vsh_header);
+    
+    uint32_t *cur_token = tokens;
+    while (cur_token-tokens < tokens_length) {
+        QString *token_str = decode_token(cur_token);
+        qstring_append(ret, qstring_get_str(token_str));
+        QDECREF(token_str);
+
+        if (vsh_get_field(cur_token, FLD_FINAL)) {
+            break;
+        }
+        cur_token += VSH_TOKEN_SIZE;
+    }
+
+    /* Note : Since we replaced oPos with r12 in the above decoding,
+     * we have to assign oPos at the end; This can be done in two ways;
+     * 1) When the shader is complete (including transformations),
+     *    we could just do a 'MOV oPos, R12;' and be done with it.
+     * 2) In case of D3DFVF_XYZRHW, it seems the NV2A applies the mvp
+     *    (model/view/projection) matrix transformation AFTER executing
+     *    the shader (but OpenGL expects *the*shader* to handle this
+     *    transformation).
+     * Until we can discern these two situations, we apply the matrix 
+     * transformation :
+     * TODO : What should we do about normals, eye-space lighting and all that?
+     */
+    qstring_append(ret,
+/*
+    '# Dxbx addition : Transform the vertex to clip coordinates :'
+    "DP4 R0.x, mvp[0], R12;"
+    "DP4 R0.y, mvp[1], R12;"
+    "DP4 R0.z, mvp[2], R12;"
+    "DP4 R0.w, mvp[3], R12;"
+    "MOV R12, R0;"
+*/
+
+    /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection
+     * in state.c
+     *
+     * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However,
+     * shaders are run before the homogeneous divide, so we have to take the w
+     * into account: z = ((z / w) * 2 - 1) * w, which is the same as
+     * z = z * 2 - w.
+     */
+        "# Apply Z coord mapping\n"
+        "ADD R12.z, R12.z, R12.z;\n"
+        "ADD R12.z, R12.z, -R12.w;\n"
+
+        "# End of shader:\n"
+        "MOV oPos, R12;\n"
+        "END"
+    );
+    return ret;
+}
--- a/hw/nv2a_vsh.h
+++ b/hw/nv2a_vsh.h
@ -0,0 +1,46 @@
+/*
+ * QEMU Geforce NV2A vertex shader translation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_NV2A_VSH_H
+#define HW_NV2A_VSH_H
+
+#include "qstring.h"
+
+// vs.1.1, not an official value
+#define VSH_VERSION_VS                     0xF078
+
+// Xbox vertex shader
+#define VSH_VERSION_XVS                    0x2078
+
+// Xbox vertex state shader
+#define VSH_VERSION_XVSS                   0x7378
+
+// Xbox vertex read/write shader
+#define VSH_VERSION_XVSW                   0x7778
+
+
+#define VSH_D3DSCM_CORRECTION 96
+
+QString* vsh_translate(uint16_t version,
+                       uint32_t *tokens, unsigned int tokens_length);
+
+
+#endif